Re #1954: Add WebRTC to third party component

* Add build config for GNU build systems git-svn-id: http://svn.pjsip.org/repos/pjproject/trunk@5428 74dad513-b988-da41-8d7b-12977e46ad98
author: Liong Sauw Ming <ming@teluu.com> 2016-08-25 01:36:33 +0000
committer: Liong Sauw Ming <ming@teluu.com> 2016-08-25 01:36:33 +0000
commit: 78d67a9205358ec4f5c38fa4191f7042d3983047 (patch)
tree: 8af5992cbaec463de76ed49c8538fc7a69ff398a /third_party/webrtc/src/webrtc
parent: e9fc0d90805002cbca667c7c1d8c275adc458bc6 (diff)
117 files changed, 36706 insertions, 0 deletions
diff --git a/third_party/webrtc/src/webrtc/common_audio/fft4g.c b/third_party/webrtc/src/webrtc/common_audio/fft4g.c
new file mode 100644
index 00000000..9cf7b9f6
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/fft4g.c
@@ -0,0 +1,1332 @@
+/*
+ * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
+ * Copyright Takuya OOURA, 1996-2001
+ *
+ * You may use, copy, modify and distribute this code for any purpose (include
+ * commercial use) and without fee. Please refer to this package when you modify
+ * this code.
+ *
+ * Changes:
+ * Trivial type modifications by the WebRTC authors.
+ */
+
+/*
+Fast Fourier/Cosine/Sine Transform
+    dimension   :one
+    data length :power of 2
+    decimation  :frequency
+    radix       :4, 2
+    data        :inplace
+    table       :use
+functions
+    cdft: Complex Discrete Fourier Transform
+    rdft: Real Discrete Fourier Transform
+    ddct: Discrete Cosine Transform
+    ddst: Discrete Sine Transform
+    dfct: Cosine Transform of RDFT (Real Symmetric DFT)
+    dfst: Sine Transform of RDFT (Real Anti-symmetric DFT)
+function prototypes
+    void cdft(int, int, float *, int *, float *);
+    void rdft(size_t, int, float *, size_t *, float *);
+    void ddct(int, int, float *, int *, float *);
+    void ddst(int, int, float *, int *, float *);
+    void dfct(int, float *, float *, int *, float *);
+    void dfst(int, float *, float *, int *, float *);
+
+
+-------- Complex DFT (Discrete Fourier Transform) --------
+    [definition]
+        <case1>
+            X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n
+        <case2>
+            X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n
+        (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
+    [usage]
+        <case1>
+            ip[0] = 0; // first time only
+            cdft(2*n, 1, a, ip, w);
+        <case2>
+            ip[0] = 0; // first time only
+            cdft(2*n, -1, a, ip, w);
+    [parameters]
+        2*n            :data length (int)
+                        n >= 1, n = power of 2
+        a[0...2*n-1]   :input/output data (float *)
+                        input data
+                            a[2*j] = Re(x[j]),
+                            a[2*j+1] = Im(x[j]), 0<=j<n
+                        output data
+                            a[2*k] = Re(X[k]),
+                            a[2*k+1] = Im(X[k]), 0<=k<n
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n)
+                        strictly,
+                        length of ip >=
+                            2+(1<<(int)(log(n+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n/2-1]   :cos/sin table (float *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of
+            cdft(2*n, -1, a, ip, w);
+        is
+            cdft(2*n, 1, a, ip, w);
+            for (j = 0; j <= 2 * n - 1; j++) {
+                a[j] *= 1.0 / n;
+            }
+        .
+
+
+-------- Real DFT / Inverse of Real DFT --------
+    [definition]
+        <case1> RDFT
+            R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
+            I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
+        <case2> IRDFT (excluding scale)
+            a[k] = (R[0] + R[n/2]*cos(pi*k))/2 +
+                   sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) +
+                   sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
+    [usage]
+        <case1>
+            ip[0] = 0; // first time only
+            rdft(n, 1, a, ip, w);
+        <case2>
+            ip[0] = 0; // first time only
+            rdft(n, -1, a, ip, w);
+    [parameters]
+        n              :data length (size_t)
+                        n >= 2, n = power of 2
+        a[0...n-1]     :input/output data (float *)
+                        <case1>
+                            output data
+                                a[2*k] = R[k], 0<=k<n/2
+                                a[2*k+1] = I[k], 0<k<n/2
+                                a[1] = R[n/2]
+                        <case2>
+                            input data
+                                a[2*j] = R[j], 0<=j<n/2
+                                a[2*j+1] = I[j], 0<j<n/2
+                                a[1] = R[n/2]
+        ip[0...*]      :work area for bit reversal (size_t *)
+                        length of ip >= 2+sqrt(n/2)
+                        strictly,
+                        length of ip >=
+                            2+(1<<(int)(log(n/2+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n/2-1]   :cos/sin table (float *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of
+            rdft(n, 1, a, ip, w);
+        is
+            rdft(n, -1, a, ip, w);
+            for (j = 0; j <= n - 1; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+-------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
+    [definition]
+        <case1> IDCT (excluding scale)
+            C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n
+        <case2> DCT
+            C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n
+    [usage]
+        <case1>
+            ip[0] = 0; // first time only
+            ddct(n, 1, a, ip, w);
+        <case2>
+            ip[0] = 0; // first time only
+            ddct(n, -1, a, ip, w);
+    [parameters]
+        n              :data length (int)
+                        n >= 2, n = power of 2
+        a[0...n-1]     :input/output data (float *)
+                        output data
+                            a[k] = C[k], 0<=k<n
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n/2)
+                        strictly,
+                        length of ip >=
+                            2+(1<<(int)(log(n/2+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n*5/4-1] :cos/sin table (float *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of
+            ddct(n, -1, a, ip, w);
+        is
+            a[0] *= 0.5;
+            ddct(n, 1, a, ip, w);
+            for (j = 0; j <= n - 1; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+-------- DST (Discrete Sine Transform) / Inverse of DST --------
+    [definition]
+        <case1> IDST (excluding scale)
+            S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n
+        <case2> DST
+            S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n
+    [usage]
+        <case1>
+            ip[0] = 0; // first time only
+            ddst(n, 1, a, ip, w);
+        <case2>
+            ip[0] = 0; // first time only
+            ddst(n, -1, a, ip, w);
+    [parameters]
+        n              :data length (int)
+                        n >= 2, n = power of 2
+        a[0...n-1]     :input/output data (float *)
+                        <case1>
+                            input data
+                                a[j] = A[j], 0<j<n
+                                a[0] = A[n]
+                            output data
+                                a[k] = S[k], 0<=k<n
+                        <case2>
+                            output data
+                                a[k] = S[k], 0<k<n
+                                a[0] = S[n]
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n/2)
+                        strictly,
+                        length of ip >=
+                            2+(1<<(int)(log(n/2+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n*5/4-1] :cos/sin table (float *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of
+            ddst(n, -1, a, ip, w);
+        is
+            a[0] *= 0.5;
+            ddst(n, 1, a, ip, w);
+            for (j = 0; j <= n - 1; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+-------- Cosine Transform of RDFT (Real Symmetric DFT) --------
+    [definition]
+        C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n
+    [usage]
+        ip[0] = 0; // first time only
+        dfct(n, a, t, ip, w);
+    [parameters]
+        n              :data length - 1 (int)
+                        n >= 2, n = power of 2
+        a[0...n]       :input/output data (float *)
+                        output data
+                            a[k] = C[k], 0<=k<=n
+        t[0...n/2]     :work area (float *)
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n/4)
+                        strictly,
+                        length of ip >=
+                            2+(1<<(int)(log(n/4+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n*5/8-1] :cos/sin table (float *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of
+            a[0] *= 0.5;
+            a[n] *= 0.5;
+            dfct(n, a, t, ip, w);
+        is
+            a[0] *= 0.5;
+            a[n] *= 0.5;
+            dfct(n, a, t, ip, w);
+            for (j = 0; j <= n; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+-------- Sine Transform of RDFT (Real Anti-symmetric DFT) --------
+    [definition]
+        S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n
+    [usage]
+        ip[0] = 0; // first time only
+        dfst(n, a, t, ip, w);
+    [parameters]
+        n              :data length + 1 (int)
+                        n >= 2, n = power of 2
+        a[0...n-1]     :input/output data (float *)
+                        output data
+                            a[k] = S[k], 0<k<n
+                        (a[0] is used for work area)
+        t[0...n/2-1]   :work area (float *)
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n/4)
+                        strictly,
+                        length of ip >=
+                            2+(1<<(int)(log(n/4+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n*5/8-1] :cos/sin table (float *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of
+            dfst(n, a, t, ip, w);
+        is
+            dfst(n, a, t, ip, w);
+            for (j = 1; j <= n - 1; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+Appendix :
+    The cos/sin table is recalculated when the larger table required.
+    w[] and ip[] are compatible with all routines.
+*/
+
+#include <stddef.h>
+
+static void makewt(size_t nw, size_t *ip, float *w);
+static void makect(size_t nc, size_t *ip, float *c);
+static void bitrv2(size_t n, size_t *ip, float *a);
+#if 0  // Not used.
+static void bitrv2conj(int n, int *ip, float *a);
+#endif
+static void cftfsub(size_t n, float *a, float *w);
+static void cftbsub(size_t n, float *a, float *w);
+static void cft1st(size_t n, float *a, float *w);
+static void cftmdl(size_t n, size_t l, float *a, float *w);
+static void rftfsub(size_t n, float *a, size_t nc, float *c);
+static void rftbsub(size_t n, float *a, size_t nc, float *c);
+#if 0  // Not used.
+static void dctsub(int n, float *a, int nc, float *c)
+static void dstsub(int n, float *a, int nc, float *c)
+#endif
+
+
+#if 0  // Not used.
+void WebRtc_cdft(int n, int isgn, float *a, int *ip, float *w)
+{
+    if (n > (ip[0] << 2)) {
+        makewt(n >> 2, ip, w);
+    }
+    if (n > 4) {
+        if (isgn >= 0) {
+            bitrv2(n, ip + 2, a);
+            cftfsub(n, a, w);
+        } else {
+            bitrv2conj(n, ip + 2, a);
+            cftbsub(n, a, w);
+        }
+    } else if (n == 4) {
+        cftfsub(n, a, w);
+    }
+}
+#endif
+
+
+void WebRtc_rdft(size_t n, int isgn, float *a, size_t *ip, float *w)
+{
+    size_t nw, nc;
+    float xi;
+
+    nw = ip[0];
+    if (n > (nw << 2)) {
+        nw = n >> 2;
+        makewt(nw, ip, w);
+    }
+    nc = ip[1];
+    if (n > (nc << 2)) {
+        nc = n >> 2;
+        makect(nc, ip, w + nw);
+    }
+    if (isgn >= 0) {
+        if (n > 4) {
+            bitrv2(n, ip + 2, a);
+            cftfsub(n, a, w);
+            rftfsub(n, a, nc, w + nw);
+        } else if (n == 4) {
+            cftfsub(n, a, w);
+        }
+        xi = a[0] - a[1];
+        a[0] += a[1];
+        a[1] = xi;
+    } else {
+        a[1] = 0.5f * (a[0] - a[1]);
+        a[0] -= a[1];
+        if (n > 4) {
+            rftbsub(n, a, nc, w + nw);
+            bitrv2(n, ip + 2, a);
+            cftbsub(n, a, w);
+        } else if (n == 4) {
+            cftfsub(n, a, w);
+        }
+    }
+}
+
+#if 0  // Not used.
+static void ddct(int n, int isgn, float *a, int *ip, float *w)
+{
+    int j, nw, nc;
+    float xr;
+
+    nw = ip[0];
+    if (n > (nw << 2)) {
+        nw = n >> 2;
+        makewt(nw, ip, w);
+    }
+    nc = ip[1];
+    if (n > nc) {
+        nc = n;
+        makect(nc, ip, w + nw);
+    }
+    if (isgn < 0) {
+        xr = a[n - 1];
+        for (j = n - 2; j >= 2; j -= 2) {
+            a[j + 1] = a[j] - a[j - 1];
+            a[j] += a[j - 1];
+        }
+        a[1] = a[0] - xr;
+        a[0] += xr;
+        if (n > 4) {
+            rftbsub(n, a, nc, w + nw);
+            bitrv2(n, ip + 2, a);
+            cftbsub(n, a, w);
+        } else if (n == 4) {
+            cftfsub(n, a, w);
+        }
+    }
+    dctsub(n, a, nc, w + nw);
+    if (isgn >= 0) {
+        if (n > 4) {
+            bitrv2(n, ip + 2, a);
+            cftfsub(n, a, w);
+            rftfsub(n, a, nc, w + nw);
+        } else if (n == 4) {
+            cftfsub(n, a, w);
+        }
+        xr = a[0] - a[1];
+        a[0] += a[1];
+        for (j = 2; j < n; j += 2) {
+            a[j - 1] = a[j] - a[j + 1];
+            a[j] += a[j + 1];
+        }
+        a[n - 1] = xr;
+    }
+}
+
+
+static void ddst(int n, int isgn, float *a, int *ip, float *w)
+{
+    int j, nw, nc;
+    float xr;
+
+    nw = ip[0];
+    if (n > (nw << 2)) {
+        nw = n >> 2;
+        makewt(nw, ip, w);
+    }
+    nc = ip[1];
+    if (n > nc) {
+        nc = n;
+        makect(nc, ip, w + nw);
+    }
+    if (isgn < 0) {
+        xr = a[n - 1];
+        for (j = n - 2; j >= 2; j -= 2) {
+            a[j + 1] = -a[j] - a[j - 1];
+            a[j] -= a[j - 1];
+        }
+        a[1] = a[0] + xr;
+        a[0] -= xr;
+        if (n > 4) {
+            rftbsub(n, a, nc, w + nw);
+            bitrv2(n, ip + 2, a);
+            cftbsub(n, a, w);
+        } else if (n == 4) {
+            cftfsub(n, a, w);
+        }
+    }
+    dstsub(n, a, nc, w + nw);
+    if (isgn >= 0) {
+        if (n > 4) {
+            bitrv2(n, ip + 2, a);
+            cftfsub(n, a, w);
+            rftfsub(n, a, nc, w + nw);
+        } else if (n == 4) {
+            cftfsub(n, a, w);
+        }
+        xr = a[0] - a[1];
+        a[0] += a[1];
+        for (j = 2; j < n; j += 2) {
+            a[j - 1] = -a[j] - a[j + 1];
+            a[j] -= a[j + 1];
+        }
+        a[n - 1] = -xr;
+    }
+}
+
+
+static void dfct(int n, float *a, float *t, int *ip, float *w)
+{
+    int j, k, l, m, mh, nw, nc;
+    float xr, xi, yr, yi;
+
+    nw = ip[0];
+    if (n > (nw << 3)) {
+        nw = n >> 3;
+        makewt(nw, ip, w);
+    }
+    nc = ip[1];
+    if (n > (nc << 1)) {
+        nc = n >> 1;
+        makect(nc, ip, w + nw);
+    }
+    m = n >> 1;
+    yi = a[m];
+    xi = a[0] + a[n];
+    a[0] -= a[n];
+    t[0] = xi - yi;
+    t[m] = xi + yi;
+    if (n > 2) {
+        mh = m >> 1;
+        for (j = 1; j < mh; j++) {
+            k = m - j;
+            xr = a[j] - a[n - j];
+            xi = a[j] + a[n - j];
+            yr = a[k] - a[n - k];
+            yi = a[k] + a[n - k];
+            a[j] = xr;
+            a[k] = yr;
+            t[j] = xi - yi;
+            t[k] = xi + yi;
+        }
+        t[mh] = a[mh] + a[n - mh];
+        a[mh] -= a[n - mh];
+        dctsub(m, a, nc, w + nw);
+        if (m > 4) {
+            bitrv2(m, ip + 2, a);
+            cftfsub(m, a, w);
+            rftfsub(m, a, nc, w + nw);
+        } else if (m == 4) {
+            cftfsub(m, a, w);
+        }
+        a[n - 1] = a[0] - a[1];
+        a[1] = a[0] + a[1];
+        for (j = m - 2; j >= 2; j -= 2) {
+            a[2 * j + 1] = a[j] + a[j + 1];
+            a[2 * j - 1] = a[j] - a[j + 1];
+        }
+        l = 2;
+        m = mh;
+        while (m >= 2) {
+            dctsub(m, t, nc, w + nw);
+            if (m > 4) {
+                bitrv2(m, ip + 2, t);
+                cftfsub(m, t, w);
+                rftfsub(m, t, nc, w + nw);
+            } else if (m == 4) {
+                cftfsub(m, t, w);
+            }
+            a[n - l] = t[0] - t[1];
+            a[l] = t[0] + t[1];
+            k = 0;
+            for (j = 2; j < m; j += 2) {
+                k += l << 2;
+                a[k - l] = t[j] - t[j + 1];
+                a[k + l] = t[j] + t[j + 1];
+            }
+            l <<= 1;
+            mh = m >> 1;
+            for (j = 0; j < mh; j++) {
+                k = m - j;
+                t[j] = t[m + k] - t[m + j];
+                t[k] = t[m + k] + t[m + j];
+            }
+            t[mh] = t[m + mh];
+            m = mh;
+        }
+        a[l] = t[0];
+        a[n] = t[2] - t[1];
+        a[0] = t[2] + t[1];
+    } else {
+        a[1] = a[0];
+        a[2] = t[0];
+        a[0] = t[1];
+    }
+}
+
+static void dfst(int n, float *a, float *t, int *ip, float *w)
+{
+    int j, k, l, m, mh, nw, nc;
+    float xr, xi, yr, yi;
+
+    nw = ip[0];
+    if (n > (nw << 3)) {
+        nw = n >> 3;
+        makewt(nw, ip, w);
+    }
+    nc = ip[1];
+    if (n > (nc << 1)) {
+        nc = n >> 1;
+        makect(nc, ip, w + nw);
+    }
+    if (n > 2) {
+        m = n >> 1;
+        mh = m >> 1;
+        for (j = 1; j < mh; j++) {
+            k = m - j;
+            xr = a[j] + a[n - j];
+            xi = a[j] - a[n - j];
+            yr = a[k] + a[n - k];
+            yi = a[k] - a[n - k];
+            a[j] = xr;
+            a[k] = yr;
+            t[j] = xi + yi;
+            t[k] = xi - yi;
+        }
+        t[0] = a[mh] - a[n - mh];
+        a[mh] += a[n - mh];
+        a[0] = a[m];
+        dstsub(m, a, nc, w + nw);
+        if (m > 4) {
+            bitrv2(m, ip + 2, a);
+            cftfsub(m, a, w);
+            rftfsub(m, a, nc, w + nw);
+        } else if (m == 4) {
+            cftfsub(m, a, w);
+        }
+        a[n - 1] = a[1] - a[0];
+        a[1] = a[0] + a[1];
+        for (j = m - 2; j >= 2; j -= 2) {
+            a[2 * j + 1] = a[j] - a[j + 1];
+            a[2 * j - 1] = -a[j] - a[j + 1];
+        }
+        l = 2;
+        m = mh;
+        while (m >= 2) {
+            dstsub(m, t, nc, w + nw);
+            if (m > 4) {
+                bitrv2(m, ip + 2, t);
+                cftfsub(m, t, w);
+                rftfsub(m, t, nc, w + nw);
+            } else if (m == 4) {
+                cftfsub(m, t, w);
+            }
+            a[n - l] = t[1] - t[0];
+            a[l] = t[0] + t[1];
+            k = 0;
+            for (j = 2; j < m; j += 2) {
+                k += l << 2;
+                a[k - l] = -t[j] - t[j + 1];
+                a[k + l] = t[j] - t[j + 1];
+            }
+            l <<= 1;
+            mh = m >> 1;
+            for (j = 1; j < mh; j++) {
+                k = m - j;
+                t[j] = t[m + k] + t[m + j];
+                t[k] = t[m + k] - t[m + j];
+            }
+            t[0] = t[m + mh];
+            m = mh;
+        }
+        a[l] = t[0];
+    }
+    a[0] = 0;
+}
+#endif  // Not used.
+
+
+/* -------- initializing routines -------- */
+
+
+#include <math.h>
+
+static void makewt(size_t nw, size_t *ip, float *w)
+{
+    size_t j, nwh;
+    float delta, x, y;
+
+    ip[0] = nw;
+    ip[1] = 1;
+    if (nw > 2) {
+        nwh = nw >> 1;
+        delta = atanf(1.0f) / nwh;
+        w[0] = 1;
+        w[1] = 0;
+        w[nwh] = (float)cos(delta * nwh);
+        w[nwh + 1] = w[nwh];
+        if (nwh > 2) {
+            for (j = 2; j < nwh; j += 2) {
+                x = (float)cos(delta * j);
+                y = (float)sin(delta * j);
+                w[j] = x;
+                w[j + 1] = y;
+                w[nw - j] = y;
+                w[nw - j + 1] = x;
+            }
+            bitrv2(nw, ip + 2, w);
+        }
+    }
+}
+
+
+static void makect(size_t nc, size_t *ip, float *c)
+{
+    size_t j, nch;
+    float delta;
+
+    ip[1] = nc;
+    if (nc > 1) {
+        nch = nc >> 1;
+        delta = atanf(1.0f) / nch;
+        c[0] = (float)cos(delta * nch);
+        c[nch] = 0.5f * c[0];
+        for (j = 1; j < nch; j++) {
+            c[j] = 0.5f * (float)cos(delta * j);
+            c[nc - j] = 0.5f * (float)sin(delta * j);
+        }
+    }
+}
+
+
+/* -------- child routines -------- */
+
+
+static void bitrv2(size_t n, size_t *ip, float *a)
+{
+    size_t j, j1, k, k1, l, m, m2;
+    float xr, xi, yr, yi;
+
+    ip[0] = 0;
+    l = n;
+    m = 1;
+    while ((m << 3) < l) {
+        l >>= 1;
+        for (j = 0; j < m; j++) {
+            ip[m + j] = ip[j] + l;
+        }
+        m <<= 1;
+    }
+    m2 = 2 * m;
+    if ((m << 3) == l) {
+        for (k = 0; k < m; k++) {
+            for (j = 0; j < k; j++) {
+                j1 = 2 * j + ip[k];
+                k1 = 2 * k + ip[j];
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 += 2 * m2;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 -= m2;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 += 2 * m2;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+            }
+            j1 = 2 * k + m2 + ip[k];
+            k1 = j1 + m2;
+            xr = a[j1];
+            xi = a[j1 + 1];
+            yr = a[k1];
+            yi = a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+        }
+    } else {
+        for (k = 1; k < m; k++) {
+            for (j = 0; j < k; j++) {
+                j1 = 2 * j + ip[k];
+                k1 = 2 * k + ip[j];
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 += m2;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+            }
+        }
+    }
+}
+
+#if 0  // Not used.
+static void bitrv2conj(int n, int *ip, float *a)
+{
+    int j, j1, k, k1, l, m, m2;
+    float xr, xi, yr, yi;
+
+    ip[0] = 0;
+    l = n;
+    m = 1;
+    while ((m << 3) < l) {
+        l >>= 1;
+        for (j = 0; j < m; j++) {
+            ip[m + j] = ip[j] + l;
+        }
+        m <<= 1;
+    }
+    m2 = 2 * m;
+    if ((m << 3) == l) {
+        for (k = 0; k < m; k++) {
+            for (j = 0; j < k; j++) {
+                j1 = 2 * j + ip[k];
+                k1 = 2 * k + ip[j];
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 += 2 * m2;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 -= m2;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 += 2 * m2;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+            }
+            k1 = 2 * k + ip[k];
+            a[k1 + 1] = -a[k1 + 1];
+            j1 = k1 + m2;
+            k1 = j1 + m2;
+            xr = a[j1];
+            xi = -a[j1 + 1];
+            yr = a[k1];
+            yi = -a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+            k1 += m2;
+            a[k1 + 1] = -a[k1 + 1];
+        }
+    } else {
+        a[1] = -a[1];
+        a[m2 + 1] = -a[m2 + 1];
+        for (k = 1; k < m; k++) {
+            for (j = 0; j < k; j++) {
+                j1 = 2 * j + ip[k];
+                k1 = 2 * k + ip[j];
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 += m2;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+            }
+            k1 = 2 * k + ip[k];
+            a[k1 + 1] = -a[k1 + 1];
+            a[k1 + m2 + 1] = -a[k1 + m2 + 1];
+        }
+    }
+}
+#endif
+
+static void cftfsub(size_t n, float *a, float *w)
+{
+    size_t j, j1, j2, j3, l;
+    float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+    l = 2;
+    if (n > 8) {
+        cft1st(n, a, w);
+        l = 8;
+        while ((l << 2) < n) {
+            cftmdl(n, l, a, w);
+            l <<= 2;
+        }
+    }
+    if ((l << 2) == n) {
+        for (j = 0; j < l; j += 2) {
+            j1 = j + l;
+            j2 = j1 + l;
+            j3 = j2 + l;
+            x0r = a[j] + a[j1];
+            x0i = a[j + 1] + a[j1 + 1];
+            x1r = a[j] - a[j1];
+            x1i = a[j + 1] - a[j1 + 1];
+            x2r = a[j2] + a[j3];
+            x2i = a[j2 + 1] + a[j3 + 1];
+            x3r = a[j2] - a[j3];
+            x3i = a[j2 + 1] - a[j3 + 1];
+            a[j] = x0r + x2r;
+            a[j + 1] = x0i + x2i;
+            a[j2] = x0r - x2r;
+            a[j2 + 1] = x0i - x2i;
+            a[j1] = x1r - x3i;
+            a[j1 + 1] = x1i + x3r;
+            a[j3] = x1r + x3i;
+            a[j3 + 1] = x1i - x3r;
+        }
+    } else {
+        for (j = 0; j < l; j += 2) {
+            j1 = j + l;
+            x0r = a[j] - a[j1];
+            x0i = a[j + 1] - a[j1 + 1];
+            a[j] += a[j1];
+            a[j + 1] += a[j1 + 1];
+            a[j1] = x0r;
+            a[j1 + 1] = x0i;
+        }
+    }
+}
+
+
+static void cftbsub(size_t n, float *a, float *w)
+{
+    size_t j, j1, j2, j3, l;
+    float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+    l = 2;
+    if (n > 8) {
+        cft1st(n, a, w);
+        l = 8;
+        while ((l << 2) < n) {
+            cftmdl(n, l, a, w);
+            l <<= 2;
+        }
+    }
+    if ((l << 2) == n) {
+        for (j = 0; j < l; j += 2) {
+            j1 = j + l;
+            j2 = j1 + l;
+            j3 = j2 + l;
+            x0r = a[j] + a[j1];
+            x0i = -a[j + 1] - a[j1 + 1];
+            x1r = a[j] - a[j1];
+            x1i = -a[j + 1] + a[j1 + 1];
+            x2r = a[j2] + a[j3];
+            x2i = a[j2 + 1] + a[j3 + 1];
+            x3r = a[j2] - a[j3];
+            x3i = a[j2 + 1] - a[j3 + 1];
+            a[j] = x0r + x2r;
+            a[j + 1] = x0i - x2i;
+            a[j2] = x0r - x2r;
+            a[j2 + 1] = x0i + x2i;
+            a[j1] = x1r - x3i;
+            a[j1 + 1] = x1i - x3r;
+            a[j3] = x1r + x3i;
+            a[j3 + 1] = x1i + x3r;
+        }
+    } else {
+        for (j = 0; j < l; j += 2) {
+            j1 = j + l;
+            x0r = a[j] - a[j1];
+            x0i = -a[j + 1] + a[j1 + 1];
+            a[j] += a[j1];
+            a[j + 1] = -a[j + 1] - a[j1 + 1];
+            a[j1] = x0r;
+            a[j1 + 1] = x0i;
+        }
+    }
+}
+
+
+static void cft1st(size_t n, float *a, float *w)
+{
+    size_t j, k1, k2;
+    float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
+    float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+    x0r = a[0] + a[2];
+    x0i = a[1] + a[3];
+    x1r = a[0] - a[2];
+    x1i = a[1] - a[3];
+    x2r = a[4] + a[6];
+    x2i = a[5] + a[7];
+    x3r = a[4] - a[6];
+    x3i = a[5] - a[7];
+    a[0] = x0r + x2r;
+    a[1] = x0i + x2i;
+    a[4] = x0r - x2r;
+    a[5] = x0i - x2i;
+    a[2] = x1r - x3i;
+    a[3] = x1i + x3r;
+    a[6] = x1r + x3i;
+    a[7] = x1i - x3r;
+    wk1r = w[2];
+    x0r = a[8] + a[10];
+    x0i = a[9] + a[11];
+    x1r = a[8] - a[10];
+    x1i = a[9] - a[11];
+    x2r = a[12] + a[14];
+    x2i = a[13] + a[15];
+    x3r = a[12] - a[14];
+    x3i = a[13] - a[15];
+    a[8] = x0r + x2r;
+    a[9] = x0i + x2i;
+    a[12] = x2i - x0i;
+    a[13] = x0r - x2r;
+    x0r = x1r - x3i;
+    x0i = x1i + x3r;
+    a[10] = wk1r * (x0r - x0i);
+    a[11] = wk1r * (x0r + x0i);
+    x0r = x3i + x1r;
+    x0i = x3r - x1i;
+    a[14] = wk1r * (x0i - x0r);
+    a[15] = wk1r * (x0i + x0r);
+    k1 = 0;
+    for (j = 16; j < n; j += 16) {
+        k1 += 2;
+        k2 = 2 * k1;
+        wk2r = w[k1];
+        wk2i = w[k1 + 1];
+        wk1r = w[k2];
+        wk1i = w[k2 + 1];
+        wk3r = wk1r - 2 * wk2i * wk1i;
+        wk3i = 2 * wk2i * wk1r - wk1i;
+        x0r = a[j] + a[j + 2];
+        x0i = a[j + 1] + a[j + 3];
+        x1r = a[j] - a[j + 2];
+        x1i = a[j + 1] - a[j + 3];
+        x2r = a[j + 4] + a[j + 6];
+        x2i = a[j + 5] + a[j + 7];
+        x3r = a[j + 4] - a[j + 6];
+        x3i = a[j + 5] - a[j + 7];
+        a[j] = x0r + x2r;
+        a[j + 1] = x0i + x2i;
+        x0r -= x2r;
+        x0i -= x2i;
+        a[j + 4] = wk2r * x0r - wk2i * x0i;
+        a[j + 5] = wk2r * x0i + wk2i * x0r;
+        x0r = x1r - x3i;
+        x0i = x1i + x3r;
+        a[j + 2] = wk1r * x0r - wk1i * x0i;
+        a[j + 3] = wk1r * x0i + wk1i * x0r;
+        x0r = x1r + x3i;
+        x0i = x1i - x3r;
+        a[j + 6] = wk3r * x0r - wk3i * x0i;
+        a[j + 7] = wk3r * x0i + wk3i * x0r;
+        wk1r = w[k2 + 2];
+        wk1i = w[k2 + 3];
+        wk3r = wk1r - 2 * wk2r * wk1i;
+        wk3i = 2 * wk2r * wk1r - wk1i;
+        x0r = a[j + 8] + a[j + 10];
+        x0i = a[j + 9] + a[j + 11];
+        x1r = a[j + 8] - a[j + 10];
+        x1i = a[j + 9] - a[j + 11];
+        x2r = a[j + 12] + a[j + 14];
+        x2i = a[j + 13] + a[j + 15];
+        x3r = a[j + 12] - a[j + 14];
+        x3i = a[j + 13] - a[j + 15];
+        a[j + 8] = x0r + x2r;
+        a[j + 9] = x0i + x2i;
+        x0r -= x2r;
+        x0i -= x2i;
+        a[j + 12] = -wk2i * x0r - wk2r * x0i;
+        a[j + 13] = -wk2i * x0i + wk2r * x0r;
+        x0r = x1r - x3i;
+        x0i = x1i + x3r;
+        a[j + 10] = wk1r * x0r - wk1i * x0i;
+        a[j + 11] = wk1r * x0i + wk1i * x0r;
+        x0r = x1r + x3i;
+        x0i = x1i - x3r;
+        a[j + 14] = wk3r * x0r - wk3i * x0i;
+        a[j + 15] = wk3r * x0i + wk3i * x0r;
+    }
+}
+
+
+static void cftmdl(size_t n, size_t l, float *a, float *w)
+{
+    size_t j, j1, j2, j3, k, k1, k2, m, m2;
+    float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
+    float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+    m = l << 2;
+    for (j = 0; j < l; j += 2) {
+        j1 = j + l;
+        j2 = j1 + l;
+        j3 = j2 + l;
+        x0r = a[j] + a[j1];
+        x0i = a[j + 1] + a[j1 + 1];
+        x1r = a[j] - a[j1];
+        x1i = a[j + 1] - a[j1 + 1];
+        x2r = a[j2] + a[j3];
+        x2i = a[j2 + 1] + a[j3 + 1];
+        x3r = a[j2] - a[j3];
+        x3i = a[j2 + 1] - a[j3 + 1];
+        a[j] = x0r + x2r;
+        a[j + 1] = x0i + x2i;
+        a[j2] = x0r - x2r;
+        a[j2 + 1] = x0i - x2i;
+        a[j1] = x1r - x3i;
+        a[j1 + 1] = x1i + x3r;
+        a[j3] = x1r + x3i;
+        a[j3 + 1] = x1i - x3r;
+    }
+    wk1r = w[2];
+    for (j = m; j < l + m; j += 2) {
+        j1 = j + l;
+        j2 = j1 + l;
+        j3 = j2 + l;
+        x0r = a[j] + a[j1];
+        x0i = a[j + 1] + a[j1 + 1];
+        x1r = a[j] - a[j1];
+        x1i = a[j + 1] - a[j1 + 1];
+        x2r = a[j2] + a[j3];
+        x2i = a[j2 + 1] + a[j3 + 1];
+        x3r = a[j2] - a[j3];
+        x3i = a[j2 + 1] - a[j3 + 1];
+        a[j] = x0r + x2r;
+        a[j + 1] = x0i + x2i;
+        a[j2] = x2i - x0i;
+        a[j2 + 1] = x0r - x2r;
+        x0r = x1r - x3i;
+        x0i = x1i + x3r;
+        a[j1] = wk1r * (x0r - x0i);
+        a[j1 + 1] = wk1r * (x0r + x0i);
+        x0r = x3i + x1r;
+        x0i = x3r - x1i;
+        a[j3] = wk1r * (x0i - x0r);
+        a[j3 + 1] = wk1r * (x0i + x0r);
+    }
+    k1 = 0;
+    m2 = 2 * m;
+    for (k = m2; k < n; k += m2) {
+        k1 += 2;
+        k2 = 2 * k1;
+        wk2r = w[k1];
+        wk2i = w[k1 + 1];
+        wk1r = w[k2];
+        wk1i = w[k2 + 1];
+        wk3r = wk1r - 2 * wk2i * wk1i;
+        wk3i = 2 * wk2i * wk1r - wk1i;
+        for (j = k; j < l + k; j += 2) {
+            j1 = j + l;
+            j2 = j1 + l;
+            j3 = j2 + l;
+            x0r = a[j] + a[j1];
+            x0i = a[j + 1] + a[j1 + 1];
+            x1r = a[j] - a[j1];
+            x1i = a[j + 1] - a[j1 + 1];
+            x2r = a[j2] + a[j3];
+            x2i = a[j2 + 1] + a[j3 + 1];
+            x3r = a[j2] - a[j3];
+            x3i = a[j2 + 1] - a[j3 + 1];
+            a[j] = x0r + x2r;
+            a[j + 1] = x0i + x2i;
+            x0r -= x2r;
+            x0i -= x2i;
+            a[j2] = wk2r * x0r - wk2i * x0i;
+            a[j2 + 1] = wk2r * x0i + wk2i * x0r;
+            x0r = x1r - x3i;
+            x0i = x1i + x3r;
+            a[j1] = wk1r * x0r - wk1i * x0i;
+            a[j1 + 1] = wk1r * x0i + wk1i * x0r;
+            x0r = x1r + x3i;
+            x0i = x1i - x3r;
+            a[j3] = wk3r * x0r - wk3i * x0i;
+            a[j3 + 1] = wk3r * x0i + wk3i * x0r;
+        }
+        wk1r = w[k2 + 2];
+        wk1i = w[k2 + 3];
+        wk3r = wk1r - 2 * wk2r * wk1i;
+        wk3i = 2 * wk2r * wk1r - wk1i;
+        for (j = k + m; j < l + (k + m); j += 2) {
+            j1 = j + l;
+            j2 = j1 + l;
+            j3 = j2 + l;
+            x0r = a[j] + a[j1];
+            x0i = a[j + 1] + a[j1 + 1];
+            x1r = a[j] - a[j1];
+            x1i = a[j + 1] - a[j1 + 1];
+            x2r = a[j2] + a[j3];
+            x2i = a[j2 + 1] + a[j3 + 1];
+            x3r = a[j2] - a[j3];
+            x3i = a[j2 + 1] - a[j3 + 1];
+            a[j] = x0r + x2r;
+            a[j + 1] = x0i + x2i;
+            x0r -= x2r;
+            x0i -= x2i;
+            a[j2] = -wk2i * x0r - wk2r * x0i;
+            a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
+            x0r = x1r - x3i;
+            x0i = x1i + x3r;
+            a[j1] = wk1r * x0r - wk1i * x0i;
+            a[j1 + 1] = wk1r * x0i + wk1i * x0r;
+            x0r = x1r + x3i;
+            x0i = x1i - x3r;
+            a[j3] = wk3r * x0r - wk3i * x0i;
+            a[j3 + 1] = wk3r * x0i + wk3i * x0r;
+        }
+    }
+}
+
+
+static void rftfsub(size_t n, float *a, size_t nc, float *c)
+{
+    size_t j, k, kk, ks, m;
+    float wkr, wki, xr, xi, yr, yi;
+
+    m = n >> 1;
+    ks = 2 * nc / m;
+    kk = 0;
+    for (j = 2; j < m; j += 2) {
+        k = n - j;
+        kk += ks;
+        wkr = 0.5f - c[nc - kk];
+        wki = c[kk];
+        xr = a[j] - a[k];
+        xi = a[j + 1] + a[k + 1];
+        yr = wkr * xr - wki * xi;
+        yi = wkr * xi + wki * xr;
+        a[j] -= yr;
+        a[j + 1] -= yi;
+        a[k] += yr;
+        a[k + 1] -= yi;
+    }
+}
+
+
+static void rftbsub(size_t n, float *a, size_t nc, float *c)
+{
+    size_t j, k, kk, ks, m;
+    float wkr, wki, xr, xi, yr, yi;
+
+    a[1] = -a[1];
+    m = n >> 1;
+    ks = 2 * nc / m;
+    kk = 0;
+    for (j = 2; j < m; j += 2) {
+        k = n - j;
+        kk += ks;
+        wkr = 0.5f - c[nc - kk];
+        wki = c[kk];
+        xr = a[j] - a[k];
+        xi = a[j + 1] + a[k + 1];
+        yr = wkr * xr + wki * xi;
+        yi = wkr * xi - wki * xr;
+        a[j] -= yr;
+        a[j + 1] = yi - a[j + 1];
+        a[k] += yr;
+        a[k + 1] = yi - a[k + 1];
+    }
+    a[m + 1] = -a[m + 1];
+}
+
+#if 0  // Not used.
+static void dctsub(int n, float *a, int nc, float *c)
+{
+    int j, k, kk, ks, m;
+    float wkr, wki, xr;
+
+    m = n >> 1;
+    ks = nc / n;
+    kk = 0;
+    for (j = 1; j < m; j++) {
+        k = n - j;
+        kk += ks;
+        wkr = c[kk] - c[nc - kk];
+        wki = c[kk] + c[nc - kk];
+        xr = wki * a[j] - wkr * a[k];
+        a[j] = wkr * a[j] + wki * a[k];
+        a[k] = xr;
+    }
+    a[m] *= c[0];
+}
+
+
+static void dstsub(int n, float *a, int nc, float *c)
+{
+    int j, k, kk, ks, m;
+    float wkr, wki, xr;
+
+    m = n >> 1;
+    ks = nc / n;
+    kk = 0;
+    for (j = 1; j < m; j++) {
+        k = n - j;
+        kk += ks;
+        wkr = c[kk] - c[nc - kk];
+        wki = c[kk] + c[nc - kk];
+        xr = wki * a[k] - wkr * a[j];
+        a[k] = wkr * a[k] + wki * a[j];
+        a[j] = xr;
+    }
+    a[m] *= c[0];
+}
+#endif  // Not used.
diff --git a/third_party/webrtc/src/webrtc/common_audio/fft4g.h b/third_party/webrtc/src/webrtc/common_audio/fft4g.h
new file mode 100644
index 00000000..6dd792f6
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/fft4g.h
@@ -0,0 +1,25 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_COMMON_AUDIO_FFT4G_H_
+#define WEBRTC_COMMON_AUDIO_FFT4G_H_
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+// Refer to fft4g.c for documentation.
+void WebRtc_rdft(size_t n, int isgn, float *a, size_t *ip, float *w);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif  // WEBRTC_COMMON_AUDIO_FFT4G_H_
diff --git a/third_party/webrtc/src/webrtc/common_audio/ring_buffer.c b/third_party/webrtc/src/webrtc/common_audio/ring_buffer.c
new file mode 100644
index 00000000..60fb5dff
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/ring_buffer.c
@@ -0,0 +1,247 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+// A ring buffer to hold arbitrary data. Provides no thread safety. Unless
+// otherwise specified, functions return 0 on success and -1 on error.
+
+#include "webrtc/common_audio/ring_buffer.h"
+
+#include <stddef.h>  // size_t
+#include <stdlib.h>
+#include <string.h>
+
+enum Wrap {
+  SAME_WRAP,
+  DIFF_WRAP
+};
+
+struct RingBuffer {
+  size_t read_pos;
+  size_t write_pos;
+  size_t element_count;
+  size_t element_size;
+  enum Wrap rw_wrap;
+  char* data;
+};
+
+// Get address of region(s) from which we can read data.
+// If the region is contiguous, |data_ptr_bytes_2| will be zero.
+// If non-contiguous, |data_ptr_bytes_2| will be the size in bytes of the second
+// region. Returns room available to be read or |element_count|, whichever is
+// smaller.
+static size_t GetBufferReadRegions(RingBuffer* buf,
+                                   size_t element_count,
+                                   void** data_ptr_1,
+                                   size_t* data_ptr_bytes_1,
+                                   void** data_ptr_2,
+                                   size_t* data_ptr_bytes_2) {
+
+  const size_t readable_elements = WebRtc_available_read(buf);
+  const size_t read_elements = (readable_elements < element_count ?
+      readable_elements : element_count);
+  const size_t margin = buf->element_count - buf->read_pos;
+
+  // Check to see if read is not contiguous.
+  if (read_elements > margin) {
+    // Write data in two blocks that wrap the buffer.
+    *data_ptr_1 = buf->data + buf->read_pos * buf->element_size;
+    *data_ptr_bytes_1 = margin * buf->element_size;
+    *data_ptr_2 = buf->data;
+    *data_ptr_bytes_2 = (read_elements - margin) * buf->element_size;
+  } else {
+    *data_ptr_1 = buf->data + buf->read_pos * buf->element_size;
+    *data_ptr_bytes_1 = read_elements * buf->element_size;
+    *data_ptr_2 = NULL;
+    *data_ptr_bytes_2 = 0;
+  }
+
+  return read_elements;
+}
+
+RingBuffer* WebRtc_CreateBuffer(size_t element_count, size_t element_size) {
+  RingBuffer* self = NULL;
+  if (element_count == 0 || element_size == 0) {
+    return NULL;
+  }
+
+  self = malloc(sizeof(RingBuffer));
+  if (!self) {
+    return NULL;
+  }
+
+  self->data = malloc(element_count * element_size);
+  if (!self->data) {
+    free(self);
+    self = NULL;
+    return NULL;
+  }
+
+  self->element_count = element_count;
+  self->element_size = element_size;
+  WebRtc_InitBuffer(self);
+
+  return self;
+}
+
+void WebRtc_InitBuffer(RingBuffer* self) {
+  self->read_pos = 0;
+  self->write_pos = 0;
+  self->rw_wrap = SAME_WRAP;
+
+  // Initialize buffer to zeros
+  memset(self->data, 0, self->element_count * self->element_size);
+}
+
+void WebRtc_FreeBuffer(void* handle) {
+  RingBuffer* self = (RingBuffer*)handle;
+  if (!self) {
+    return;
+  }
+
+  free(self->data);
+  free(self);
+}
+
+size_t WebRtc_ReadBuffer(RingBuffer* self,
+                         void** data_ptr,
+                         void* data,
+                         size_t element_count) {
+
+  if (self == NULL) {
+    return 0;
+  }
+  if (data == NULL) {
+    return 0;
+  }
+
+  {
+    void* buf_ptr_1 = NULL;
+    void* buf_ptr_2 = NULL;
+    size_t buf_ptr_bytes_1 = 0;
+    size_t buf_ptr_bytes_2 = 0;
+    const size_t read_count = GetBufferReadRegions(self,
+                                                   element_count,
+                                                   &buf_ptr_1,
+                                                   &buf_ptr_bytes_1,
+                                                   &buf_ptr_2,
+                                                   &buf_ptr_bytes_2);
+
+    if (buf_ptr_bytes_2 > 0) {
+      // We have a wrap around when reading the buffer. Copy the buffer data to
+      // |data| and point to it.
+      memcpy(data, buf_ptr_1, buf_ptr_bytes_1);
+      memcpy(((char*) data) + buf_ptr_bytes_1, buf_ptr_2, buf_ptr_bytes_2);
+      buf_ptr_1 = data;
+    } else if (!data_ptr) {
+      // No wrap, but a memcpy was requested.
+      memcpy(data, buf_ptr_1, buf_ptr_bytes_1);
+    }
+    if (data_ptr) {
+      // |buf_ptr_1| == |data| in the case of a wrap.
+      *data_ptr = buf_ptr_1;
+    }
+
+    // Update read position
+    WebRtc_MoveReadPtr(self, (int) read_count);
+
+    return read_count;
+  }
+}
+
+size_t WebRtc_WriteBuffer(RingBuffer* self,
+                          const void* data,
+                          size_t element_count) {
+  if (!self) {
+    return 0;
+  }
+  if (!data) {
+    return 0;
+  }
+
+  {
+    const size_t free_elements = WebRtc_available_write(self);
+    const size_t write_elements = (free_elements < element_count ? free_elements
+        : element_count);
+    size_t n = write_elements;
+    const size_t margin = self->element_count - self->write_pos;
+
+    if (write_elements > margin) {
+      // Buffer wrap around when writing.
+      memcpy(self->data + self->write_pos * self->element_size,
+             data, margin * self->element_size);
+      self->write_pos = 0;
+      n -= margin;
+      self->rw_wrap = DIFF_WRAP;
+    }
+    memcpy(self->data + self->write_pos * self->element_size,
+           ((const char*) data) + ((write_elements - n) * self->element_size),
+           n * self->element_size);
+    self->write_pos += n;
+
+    return write_elements;
+  }
+}
+
+int WebRtc_MoveReadPtr(RingBuffer* self, int element_count) {
+  if (!self) {
+    return 0;
+  }
+
+  {
+    // We need to be able to take care of negative changes, hence use "int"
+    // instead of "size_t".
+    const int free_elements = (int) WebRtc_available_write(self);
+    const int readable_elements = (int) WebRtc_available_read(self);
+    int read_pos = (int) self->read_pos;
+
+    if (element_count > readable_elements) {
+      element_count = readable_elements;
+    }
+    if (element_count < -free_elements) {
+      element_count = -free_elements;
+    }
+
+    read_pos += element_count;
+    if (read_pos > (int) self->element_count) {
+      // Buffer wrap around. Restart read position and wrap indicator.
+      read_pos -= (int) self->element_count;
+      self->rw_wrap = SAME_WRAP;
+    }
+    if (read_pos < 0) {
+      // Buffer wrap around. Restart read position and wrap indicator.
+      read_pos += (int) self->element_count;
+      self->rw_wrap = DIFF_WRAP;
+    }
+
+    self->read_pos = (size_t) read_pos;
+
+    return element_count;
+  }
+}
+
+size_t WebRtc_available_read(const RingBuffer* self) {
+  if (!self) {
+    return 0;
+  }
+
+  if (self->rw_wrap == SAME_WRAP) {
+    return self->write_pos - self->read_pos;
+  } else {
+    return self->element_count - self->read_pos + self->write_pos;
+  }
+}
+
+size_t WebRtc_available_write(const RingBuffer* self) {
+  if (!self) {
+    return 0;
+  }
+
+  return self->element_count - WebRtc_available_read(self);
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/ring_buffer.h b/third_party/webrtc/src/webrtc/common_audio/ring_buffer.h
new file mode 100644
index 00000000..4125c48d
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/ring_buffer.h
@@ -0,0 +1,66 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+// A ring buffer to hold arbitrary data. Provides no thread safety. Unless
+// otherwise specified, functions return 0 on success and -1 on error.
+
+#ifndef WEBRTC_COMMON_AUDIO_RING_BUFFER_H_
+#define WEBRTC_COMMON_AUDIO_RING_BUFFER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stddef.h>  // size_t
+
+typedef struct RingBuffer RingBuffer;
+
+// Creates and initializes the buffer. Returns NULL on failure.
+RingBuffer* WebRtc_CreateBuffer(size_t element_count, size_t element_size);
+void WebRtc_InitBuffer(RingBuffer* handle);
+void WebRtc_FreeBuffer(void* handle);
+
+// Reads data from the buffer. The |data_ptr| will point to the address where
+// it is located. If all |element_count| data are feasible to read without
+// buffer wrap around |data_ptr| will point to the location in the buffer.
+// Otherwise, the data will be copied to |data| (memory allocation done by the
+// user) and |data_ptr| points to the address of |data|. |data_ptr| is only
+// guaranteed to be valid until the next call to WebRtc_WriteBuffer().
+//
+// To force a copying to |data|, pass a NULL |data_ptr|.
+//
+// Returns number of elements read.
+size_t WebRtc_ReadBuffer(RingBuffer* handle,
+                         void** data_ptr,
+                         void* data,
+                         size_t element_count);
+
+// Writes |data| to buffer and returns the number of elements written.
+size_t WebRtc_WriteBuffer(RingBuffer* handle, const void* data,
+                          size_t element_count);
+
+// Moves the buffer read position and returns the number of elements moved.
+// Positive |element_count| moves the read position towards the write position,
+// that is, flushing the buffer. Negative |element_count| moves the read
+// position away from the the write position, that is, stuffing the buffer.
+// Returns number of elements moved.
+int WebRtc_MoveReadPtr(RingBuffer* handle, int element_count);
+
+// Returns number of available elements to read.
+size_t WebRtc_available_read(const RingBuffer* handle);
+
+// Returns number of available elements for write.
+size_t WebRtc_available_write(const RingBuffer* handle);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // WEBRTC_COMMON_AUDIO_RING_BUFFER_H_
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/auto_corr_to_refl_coef.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/auto_corr_to_refl_coef.c
new file mode 100644
index 00000000..f99dd62b
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/auto_corr_to_refl_coef.c
@@ -0,0 +1,103 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_AutoCorrToReflCoef().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+void WebRtcSpl_AutoCorrToReflCoef(const int32_t *R, int use_order, int16_t *K)
+{
+    int i, n;
+    int16_t tmp;
+    const int32_t *rptr;
+    int32_t L_num, L_den;
+    int16_t *acfptr, *pptr, *wptr, *p1ptr, *w1ptr, ACF[WEBRTC_SPL_MAX_LPC_ORDER],
+            P[WEBRTC_SPL_MAX_LPC_ORDER], W[WEBRTC_SPL_MAX_LPC_ORDER];
+
+    // Initialize loop and pointers.
+    acfptr = ACF;
+    rptr = R;
+    pptr = P;
+    p1ptr = &P[1];
+    w1ptr = &W[1];
+    wptr = w1ptr;
+
+    // First loop; n=0. Determine shifting.
+    tmp = WebRtcSpl_NormW32(*R);
+    *acfptr = (int16_t)((*rptr++ << tmp) >> 16);
+    *pptr++ = *acfptr++;
+
+    // Initialize ACF, P and W.
+    for (i = 1; i <= use_order; i++)
+    {
+        *acfptr = (int16_t)((*rptr++ << tmp) >> 16);
+        *wptr++ = *acfptr;
+        *pptr++ = *acfptr++;
+    }
+
+    // Compute reflection coefficients.
+    for (n = 1; n <= use_order; n++, K++)
+    {
+        tmp = WEBRTC_SPL_ABS_W16(*p1ptr);
+        if (*P < tmp)
+        {
+            for (i = n; i <= use_order; i++)
+                *K++ = 0;
+
+            return;
+        }
+
+        // Division: WebRtcSpl_div(tmp, *P)
+        *K = 0;
+        if (tmp != 0)
+        {
+            L_num = tmp;
+            L_den = *P;
+            i = 15;
+            while (i--)
+            {
+                (*K) <<= 1;
+                L_num <<= 1;
+                if (L_num >= L_den)
+                {
+                    L_num -= L_den;
+                    (*K)++;
+                }
+            }
+            if (*p1ptr > 0)
+                *K = -*K;
+        }
+
+        // Last iteration; don't do Schur recursion.
+        if (n == use_order)
+            return;
+
+        // Schur recursion.
+        pptr = P;
+        wptr = w1ptr;
+        tmp = (int16_t)(((int32_t)*p1ptr * (int32_t)*K + 16384) >> 15);
+        *pptr = WebRtcSpl_AddSatW16(*pptr, tmp);
+        pptr++;
+        for (i = 1; i <= use_order - n; i++)
+        {
+            tmp = (int16_t)(((int32_t)*wptr * (int32_t)*K + 16384) >> 15);
+            *pptr = WebRtcSpl_AddSatW16(*(pptr + 1), tmp);
+            pptr++;
+            tmp = (int16_t)(((int32_t)*pptr * (int32_t)*K + 16384) >> 15);
+            *wptr = WebRtcSpl_AddSatW16(*wptr, tmp);
+            wptr++;
+        }
+    }
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/auto_correlation.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/auto_correlation.c
new file mode 100644
index 00000000..fda4fffe
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/auto_correlation.c
@@ -0,0 +1,65 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+#include <assert.h>
+
+size_t WebRtcSpl_AutoCorrelation(const int16_t* in_vector,
+                                 size_t in_vector_length,
+                                 size_t order,
+                                 int32_t* result,
+                                 int* scale) {
+  int32_t sum = 0;
+  size_t i = 0, j = 0;
+  int16_t smax = 0;
+  int scaling = 0;
+
+  assert(order <= in_vector_length);
+
+  // Find the maximum absolute value of the samples.
+  smax = WebRtcSpl_MaxAbsValueW16(in_vector, in_vector_length);
+
+  // In order to avoid overflow when computing the sum we should scale the
+  // samples so that (in_vector_length * smax * smax) will not overflow.
+  if (smax == 0) {
+    scaling = 0;
+  } else {
+    // Number of bits in the sum loop.
+    int nbits = WebRtcSpl_GetSizeInBits((uint32_t)in_vector_length);
+    // Number of bits to normalize smax.
+    int t = WebRtcSpl_NormW32(WEBRTC_SPL_MUL(smax, smax));
+
+    if (t > nbits) {
+      scaling = 0;
+    } else {
+      scaling = nbits - t;
+    }
+  }
+
+  // Perform the actual correlation calculation.
+  for (i = 0; i < order + 1; i++) {
+    sum = 0;
+    /* Unroll the loop to improve performance. */
+    for (j = 0; i + j + 3 < in_vector_length; j += 4) {
+      sum += (in_vector[j + 0] * in_vector[i + j + 0]) >> scaling;
+      sum += (in_vector[j + 1] * in_vector[i + j + 1]) >> scaling;
+      sum += (in_vector[j + 2] * in_vector[i + j + 2]) >> scaling;
+      sum += (in_vector[j + 3] * in_vector[i + j + 3]) >> scaling;
+    }
+    for (; j < in_vector_length - i; j++) {
+      sum += (in_vector[j] * in_vector[i + j]) >> scaling;
+    }
+    *result++ = sum;
+  }
+
+  *scale = scaling;
+  return order + 1;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse.c
new file mode 100644
index 00000000..c8bd2dc4
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse.c
@@ -0,0 +1,108 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+/* Tables for data buffer indexes that are bit reversed and thus need to be
+ * swapped. Note that, index_7[{0, 2, 4, ...}] are for the left side of the swap
+ * operations, while index_7[{1, 3, 5, ...}] are for the right side of the
+ * operation. Same for index_8.
+ */
+
+/* Indexes for the case of stages == 7. */
+static const int16_t index_7[112] = {
+  1, 64, 2, 32, 3, 96, 4, 16, 5, 80, 6, 48, 7, 112, 9, 72, 10, 40, 11, 104,
+  12, 24, 13, 88, 14, 56, 15, 120, 17, 68, 18, 36, 19, 100, 21, 84, 22, 52,
+  23, 116, 25, 76, 26, 44, 27, 108, 29, 92, 30, 60, 31, 124, 33, 66, 35, 98,
+  37, 82, 38, 50, 39, 114, 41, 74, 43, 106, 45, 90, 46, 58, 47, 122, 49, 70,
+  51, 102, 53, 86, 55, 118, 57, 78, 59, 110, 61, 94, 63, 126, 67, 97, 69,
+  81, 71, 113, 75, 105, 77, 89, 79, 121, 83, 101, 87, 117, 91, 109, 95, 125,
+  103, 115, 111, 123
+};
+
+/* Indexes for the case of stages == 8. */
+static const int16_t index_8[240] = {
+  1, 128, 2, 64, 3, 192, 4, 32, 5, 160, 6, 96, 7, 224, 8, 16, 9, 144, 10, 80,
+  11, 208, 12, 48, 13, 176, 14, 112, 15, 240, 17, 136, 18, 72, 19, 200, 20,
+  40, 21, 168, 22, 104, 23, 232, 25, 152, 26, 88, 27, 216, 28, 56, 29, 184,
+  30, 120, 31, 248, 33, 132, 34, 68, 35, 196, 37, 164, 38, 100, 39, 228, 41,
+  148, 42, 84, 43, 212, 44, 52, 45, 180, 46, 116, 47, 244, 49, 140, 50, 76,
+  51, 204, 53, 172, 54, 108, 55, 236, 57, 156, 58, 92, 59, 220, 61, 188, 62,
+  124, 63, 252, 65, 130, 67, 194, 69, 162, 70, 98, 71, 226, 73, 146, 74, 82,
+  75, 210, 77, 178, 78, 114, 79, 242, 81, 138, 83, 202, 85, 170, 86, 106, 87,
+  234, 89, 154, 91, 218, 93, 186, 94, 122, 95, 250, 97, 134, 99, 198, 101,
+  166, 103, 230, 105, 150, 107, 214, 109, 182, 110, 118, 111, 246, 113, 142,
+  115, 206, 117, 174, 119, 238, 121, 158, 123, 222, 125, 190, 127, 254, 131,
+  193, 133, 161, 135, 225, 137, 145, 139, 209, 141, 177, 143, 241, 147, 201,
+  149, 169, 151, 233, 155, 217, 157, 185, 159, 249, 163, 197, 167, 229, 171,
+  213, 173, 181, 175, 245, 179, 205, 183, 237, 187, 221, 191, 253, 199, 227,
+  203, 211, 207, 243, 215, 235, 223, 251, 239, 247
+};
+
+void WebRtcSpl_ComplexBitReverse(int16_t* __restrict complex_data, int stages) {
+  /* For any specific value of stages, we know exactly the indexes that are
+   * bit reversed. Currently (Feb. 2012) in WebRTC the only possible values of
+   * stages are 7 and 8, so we use tables to save unnecessary iterations and
+   * calculations for these two cases.
+   */
+  if (stages == 7 || stages == 8) {
+    int m = 0;
+    int length = 112;
+    const int16_t* index = index_7;
+
+    if (stages == 8) {
+      length = 240;
+      index = index_8;
+    }
+
+    /* Decimation in time. Swap the elements with bit-reversed indexes. */
+    for (m = 0; m < length; m += 2) {
+      /* We declare a int32_t* type pointer, to load both the 16-bit real
+       * and imaginary elements from complex_data in one instruction, reducing
+       * complexity.
+       */
+      int32_t* complex_data_ptr = (int32_t*)complex_data;
+      int32_t temp = 0;
+
+      temp = complex_data_ptr[index[m]];  /* Real and imaginary */
+      complex_data_ptr[index[m]] = complex_data_ptr[index[m + 1]];
+      complex_data_ptr[index[m + 1]] = temp;
+    }
+  }
+  else {
+    int m = 0, mr = 0, l = 0;
+    int n = 1 << stages;
+    int nn = n - 1;
+
+    /* Decimation in time - re-order data */
+    for (m = 1; m <= nn; ++m) {
+      int32_t* complex_data_ptr = (int32_t*)complex_data;
+      int32_t temp = 0;
+
+      /* Find out indexes that are bit-reversed. */
+      l = n;
+      do {
+        l >>= 1;
+      } while (l > nn - mr);
+      mr = (mr & (l - 1)) + l;
+
+      if (mr <= m) {
+        continue;
+      }
+
+      /* Swap the elements with bit-reversed indexes.
+       * This is similar to the loop in the stages == 7 or 8 cases.
+       */
+      temp = complex_data_ptr[m];  /* Real and imaginary */
+      complex_data_ptr[m] = complex_data_ptr[mr];
+      complex_data_ptr[mr] = temp;
+    }
+  }
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse_arm.S b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse_arm.S
new file mode 100644
index 00000000..e7f8a819
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse_arm.S
@@ -0,0 +1,119 @@
+@
+@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+@
+@ Use of this source code is governed by a BSD-style license
+@ that can be found in the LICENSE file in the root of the source
+@ tree. An additional intellectual property rights grant can be found
+@ in the file PATENTS.  All contributing project authors may
+@ be found in the AUTHORS file in the root of the source tree.
+@
+
+@ This file contains the function WebRtcSpl_ComplexBitReverse(), optimized
+@ for ARMv5 platforms.
+@ Reference C code is in file complex_bit_reverse.c. Bit-exact.
+
+#include "webrtc/system_wrappers/interface/asm_defines.h"
+
+GLOBAL_FUNCTION WebRtcSpl_ComplexBitReverse
+.align  2
+DEFINE_FUNCTION WebRtcSpl_ComplexBitReverse
+  push {r4-r7}
+
+  cmp r1, #7
+  adr r3, index_7                 @ Table pointer.
+  mov r4, #112                    @ Number of interations.
+  beq PRE_LOOP_STAGES_7_OR_8
+
+  cmp r1, #8
+  adr r3, index_8                 @ Table pointer.
+  mov r4, #240                    @ Number of interations.
+  beq PRE_LOOP_STAGES_7_OR_8
+
+  mov r3, #1                      @ Initialize m.
+  mov r1, r3, asl r1              @ n = 1 << stages;
+  subs r6, r1, #1                 @ nn = n - 1;
+  ble END
+
+  mov r5, r0                      @ &complex_data
+  mov r4, #0                      @ ml
+
+LOOP_GENERIC:
+  rsb r12, r4, r6                 @ l > nn - mr
+  mov r2, r1                      @ n
+
+LOOP_SHIFT:
+  asr r2, #1                      @ l >>= 1;
+  cmp r2, r12
+  bgt LOOP_SHIFT
+
+  sub r12, r2, #1
+  and r4, r12, r4
+  add r4, r2                      @ mr = (mr & (l - 1)) + l;
+  cmp r4, r3                      @ mr <= m ?
+  ble UPDATE_REGISTERS
+
+  mov r12, r4, asl #2
+  ldr r7, [r5, #4]                @ complex_data[2 * m, 2 * m + 1].
+                                  @   Offset 4 due to m incrementing from 1.
+  ldr r2, [r0, r12]               @ complex_data[2 * mr, 2 * mr + 1].
+  str r7, [r0, r12]
+  str r2, [r5, #4]
+
+UPDATE_REGISTERS:
+  add r3, r3, #1
+  add r5, #4
+  cmp r3, r1
+  bne LOOP_GENERIC
+
+  b END
+
+PRE_LOOP_STAGES_7_OR_8:
+  add r4, r3, r4, asl #1
+
+LOOP_STAGES_7_OR_8:
+  ldrsh r2, [r3], #2              @ index[m]
+  ldrsh r5, [r3], #2              @ index[m + 1]
+  ldr r1, [r0, r2]                @ complex_data[index[m], index[m] + 1]
+  ldr r12, [r0, r5]               @ complex_data[index[m + 1], index[m + 1] + 1]
+  cmp r3, r4
+  str r1, [r0, r5]
+  str r12, [r0, r2]
+  bne LOOP_STAGES_7_OR_8
+
+END:
+  pop {r4-r7}
+  bx lr
+
+@ The index tables. Note the values are doubles of the actual indexes for 16-bit
+@ elements, different from the generic C code. It actually provides byte offsets
+@ for the indexes.
+
+.align  2
+index_7:  @ Indexes for stages == 7.
+  .short 4, 256, 8, 128, 12, 384, 16, 64, 20, 320, 24, 192, 28, 448, 36, 288
+  .short 40, 160, 44, 416, 48, 96, 52, 352, 56, 224, 60, 480, 68, 272, 72, 144
+  .short 76, 400, 84, 336, 88, 208, 92, 464, 100, 304, 104, 176, 108, 432, 116
+  .short 368, 120, 240, 124, 496, 132, 264, 140, 392, 148, 328, 152, 200, 156
+  .short 456, 164, 296, 172, 424, 180, 360, 184, 232, 188, 488, 196, 280, 204
+  .short 408, 212, 344, 220, 472, 228, 312, 236, 440, 244, 376, 252, 504, 268
+  .short 388, 276, 324, 284, 452, 300, 420, 308, 356, 316, 484, 332, 404, 348
+  .short 468, 364, 436, 380, 500, 412, 460, 444, 492
+
+index_8:  @ Indexes for stages == 8.
+  .short 4, 512, 8, 256, 12, 768, 16, 128, 20, 640, 24, 384, 28, 896, 32, 64
+  .short 36, 576, 40, 320, 44, 832, 48, 192, 52, 704, 56, 448, 60, 960, 68, 544
+  .short 72, 288, 76, 800, 80, 160, 84, 672, 88, 416, 92, 928, 100, 608, 104
+  .short 352, 108, 864, 112, 224, 116, 736, 120, 480, 124, 992, 132, 528, 136
+  .short 272, 140, 784, 148, 656, 152, 400, 156, 912, 164, 592, 168, 336, 172
+  .short 848, 176, 208, 180, 720, 184, 464, 188, 976, 196, 560, 200, 304, 204
+  .short 816, 212, 688, 216, 432, 220, 944, 228, 624, 232, 368, 236, 880, 244
+  .short 752, 248, 496, 252, 1008, 260, 520, 268, 776, 276, 648, 280, 392, 284
+  .short 904, 292, 584, 296, 328, 300, 840, 308, 712, 312, 456, 316, 968, 324
+  .short 552, 332, 808, 340, 680, 344, 424, 348, 936, 356, 616, 364, 872, 372
+  .short 744, 376, 488, 380, 1000, 388, 536, 396, 792, 404, 664, 412, 920, 420
+  .short 600, 428, 856, 436, 728, 440, 472, 444, 984, 452, 568, 460, 824, 468
+  .short 696, 476, 952, 484, 632, 492, 888, 500, 760, 508, 1016, 524, 772, 532
+  .short 644, 540, 900, 548, 580, 556, 836, 564, 708, 572, 964, 588, 804, 596
+  .short 676, 604, 932, 620, 868, 628, 740, 636, 996, 652, 788, 668, 916, 684
+  .short 852, 692, 724, 700, 980, 716, 820, 732, 948, 748, 884, 764, 1012, 796
+  .short 908, 812, 844, 828, 972, 860, 940, 892, 1004, 956, 988
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse_mips.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse_mips.c
new file mode 100644
index 00000000..583fe4f6
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse_mips.c
@@ -0,0 +1,176 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+static int16_t coefTable_7[] = {
+    4, 256,   8, 128,  12, 384,  16,  64,
+   20, 320,  24, 192,  28, 448,  36, 288,
+   40, 160,  44, 416,  48,  96,  52, 352,
+   56, 224,  60, 480,  68, 272,  72, 144,
+   76, 400,  84, 336,  88, 208,  92, 464,
+  100, 304, 104, 176, 108, 432, 116, 368,
+  120, 240, 124, 496, 132, 264, 140, 392,
+  148, 328, 152, 200, 156, 456, 164, 296,
+  172, 424, 180, 360, 184, 232, 188, 488,
+  196, 280, 204, 408, 212, 344, 220, 472,
+  228, 312, 236, 440, 244, 376, 252, 504,
+  268, 388, 276, 324, 284, 452, 300, 420,
+  308, 356, 316, 484, 332, 404, 348, 468,
+  364, 436, 380, 500, 412, 460, 444, 492
+};
+
+static int16_t coefTable_8[] = {
+    4,  512,    8,  256,   12,  768,   16,  128,
+   20,  640,   24,  384,   28,  896,   32,   64,
+   36,  576,   40,  320,   44,  832,   48,  192,
+   52,  704,   56,  448,   60,  960,   68,  544,
+   72,  288,   76,  800,   80,  160,   84,  672,
+   88,  416,   92,  928,  100,  608,  104,  352,
+  108,  864,  112,  224,  116,  736,  120,  480,
+  124,  992,  132,  528,  136,  272,  140,  784,
+  148,  656,  152,  400,  156,  912,  164,  592,
+  168,  336,  172,  848,  176,  208,  180,  720,
+  184,  464,  188,  976,  196,  560,  200,  304,
+  204,  816,  212,  688,  216,  432,  220,  944,
+  228,  624,  232,  368,  236,  880,  244,  752,
+  248,  496,  252, 1008,  260,  520,  268,  776,
+  276,  648,  280,  392,  284,  904,  292,  584,
+  296,  328,  300,  840,  308,  712,  312,  456,
+  316,  968,  324,  552,  332,  808,  340,  680,
+  344,  424,  348,  936,  356,  616,  364,  872,
+  372,  744,  376,  488,  380, 1000,  388,  536,
+  396,  792,  404,  664,  412,  920,  420,  600,
+  428,  856,  436,  728,  440,  472,  444,  984,
+  452,  568,  460,  824,  468,  696,  476,  952,
+  484,  632,  492,  888,  500,  760,  508, 1016,
+  524,  772,  532,  644,  540,  900,  548,  580,
+  556,  836,  564,  708,  572,  964,  588,  804,
+  596,  676,  604,  932,  620,  868,  628,  740,
+  636,  996,  652,  788,  668,  916,  684,  852,
+  692,  724,  700,  980,  716,  820,  732,  948,
+  748,  884,  764, 1012,  796,  908,  812,  844,
+  828,  972,  860,  940,  892, 1004,  956,  988
+};
+
+void WebRtcSpl_ComplexBitReverse(int16_t frfi[], int stages) {
+  int l;
+  int16_t tr, ti;
+  int32_t tmp1, tmp2, tmp3, tmp4;
+  int32_t* ptr_i;
+  int32_t* ptr_j;
+
+  if (stages == 8) {
+    int16_t* pcoeftable_8 = coefTable_8;
+
+    __asm __volatile (
+      ".set         push                                             \n\t"
+      ".set         noreorder                                        \n\t"
+      "addiu        %[l],            $zero,               120        \n\t"
+     "1:                                                             \n\t"
+      "addiu        %[l],            %[l],                -4         \n\t"
+      "lh           %[tr],           0(%[pcoeftable_8])              \n\t"
+      "lh           %[ti],           2(%[pcoeftable_8])              \n\t"
+      "lh           %[tmp3],         4(%[pcoeftable_8])              \n\t"
+      "lh           %[tmp4],         6(%[pcoeftable_8])              \n\t"
+      "addu         %[ptr_i],        %[frfi],             %[tr]      \n\t"
+      "addu         %[ptr_j],        %[frfi],             %[ti]      \n\t"
+      "addu         %[tr],           %[frfi],             %[tmp3]    \n\t"
+      "addu         %[ti],           %[frfi],             %[tmp4]    \n\t"
+      "ulw          %[tmp1],         0(%[ptr_i])                     \n\t"
+      "ulw          %[tmp2],         0(%[ptr_j])                     \n\t"
+      "ulw          %[tmp3],         0(%[tr])                        \n\t"
+      "ulw          %[tmp4],         0(%[ti])                        \n\t"
+      "usw          %[tmp1],         0(%[ptr_j])                     \n\t"
+      "usw          %[tmp2],         0(%[ptr_i])                     \n\t"
+      "usw          %[tmp4],         0(%[tr])                        \n\t"
+      "usw          %[tmp3],         0(%[ti])                        \n\t"
+      "lh           %[tmp1],         8(%[pcoeftable_8])              \n\t"
+      "lh           %[tmp2],         10(%[pcoeftable_8])             \n\t"
+      "lh           %[tr],           12(%[pcoeftable_8])             \n\t"
+      "lh           %[ti],           14(%[pcoeftable_8])             \n\t"
+      "addu         %[ptr_i],        %[frfi],             %[tmp1]    \n\t"
+      "addu         %[ptr_j],        %[frfi],             %[tmp2]    \n\t"
+      "addu         %[tr],           %[frfi],             %[tr]      \n\t"
+      "addu         %[ti],           %[frfi],             %[ti]      \n\t"
+      "ulw          %[tmp1],         0(%[ptr_i])                     \n\t"
+      "ulw          %[tmp2],         0(%[ptr_j])                     \n\t"
+      "ulw          %[tmp3],         0(%[tr])                        \n\t"
+      "ulw          %[tmp4],         0(%[ti])                        \n\t"
+      "usw          %[tmp1],         0(%[ptr_j])                     \n\t"
+      "usw          %[tmp2],         0(%[ptr_i])                     \n\t"
+      "usw          %[tmp4],         0(%[tr])                        \n\t"
+      "usw          %[tmp3],         0(%[ti])                        \n\t"
+      "bgtz         %[l],            1b                              \n\t"
+      " addiu       %[pcoeftable_8], %[pcoeftable_8],     16         \n\t"
+      ".set         pop                                              \n\t"
+
+      : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [ptr_i] "=&r" (ptr_i),
+        [ptr_j] "=&r" (ptr_j), [tr] "=&r" (tr), [l] "=&r" (l),
+        [tmp3] "=&r" (tmp3), [pcoeftable_8] "+r" (pcoeftable_8),
+        [ti] "=&r" (ti), [tmp4] "=&r" (tmp4)
+      : [frfi] "r" (frfi)
+      : "memory"
+    );
+  } else if (stages == 7) {
+    int16_t* pcoeftable_7 = coefTable_7;
+
+    __asm __volatile (
+      ".set push                                                     \n\t"
+      ".set noreorder                                                \n\t"
+      "addiu        %[l],            $zero,               56         \n\t"
+     "1:                                                             \n\t"
+      "addiu        %[l],            %[l],                -4         \n\t"
+      "lh           %[tr],           0(%[pcoeftable_7])              \n\t"
+      "lh           %[ti],           2(%[pcoeftable_7])              \n\t"
+      "lh           %[tmp3],         4(%[pcoeftable_7])              \n\t"
+      "lh           %[tmp4],         6(%[pcoeftable_7])              \n\t"
+      "addu         %[ptr_i],        %[frfi],             %[tr]      \n\t"
+      "addu         %[ptr_j],        %[frfi],             %[ti]      \n\t"
+      "addu         %[tr],           %[frfi],             %[tmp3]    \n\t"
+      "addu         %[ti],           %[frfi],             %[tmp4]    \n\t"
+      "ulw          %[tmp1],         0(%[ptr_i])                     \n\t"
+      "ulw          %[tmp2],         0(%[ptr_j])                     \n\t"
+      "ulw          %[tmp3],         0(%[tr])                        \n\t"
+      "ulw          %[tmp4],         0(%[ti])                        \n\t"
+      "usw          %[tmp1],         0(%[ptr_j])                     \n\t"
+      "usw          %[tmp2],         0(%[ptr_i])                     \n\t"
+      "usw          %[tmp4],         0(%[tr])                        \n\t"
+      "usw          %[tmp3],         0(%[ti])                        \n\t"
+      "lh           %[tmp1],         8(%[pcoeftable_7])              \n\t"
+      "lh           %[tmp2],         10(%[pcoeftable_7])             \n\t"
+      "lh           %[tr],           12(%[pcoeftable_7])             \n\t"
+      "lh           %[ti],           14(%[pcoeftable_7])             \n\t"
+      "addu         %[ptr_i],        %[frfi],             %[tmp1]    \n\t"
+      "addu         %[ptr_j],        %[frfi],             %[tmp2]    \n\t"
+      "addu         %[tr],           %[frfi],             %[tr]      \n\t"
+      "addu         %[ti],           %[frfi],             %[ti]      \n\t"
+      "ulw          %[tmp1],         0(%[ptr_i])                     \n\t"
+      "ulw          %[tmp2],         0(%[ptr_j])                     \n\t"
+      "ulw          %[tmp3],         0(%[tr])                        \n\t"
+      "ulw          %[tmp4],         0(%[ti])                        \n\t"
+      "usw          %[tmp1],         0(%[ptr_j])                     \n\t"
+      "usw          %[tmp2],         0(%[ptr_i])                     \n\t"
+      "usw          %[tmp4],         0(%[tr])                        \n\t"
+      "usw          %[tmp3],         0(%[ti])                        \n\t"
+      "bgtz         %[l],            1b                              \n\t"
+      " addiu       %[pcoeftable_7], %[pcoeftable_7],     16         \n\t"
+      ".set pop                                                      \n\t"
+
+      : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [ptr_i] "=&r" (ptr_i),
+        [ptr_j] "=&r" (ptr_j), [ti] "=&r" (ti), [tr] "=&r" (tr),
+        [l] "=&r" (l), [pcoeftable_7] "+r" (pcoeftable_7),
+        [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4)
+      : [frfi] "r" (frfi)
+      : "memory"
+    );
+  }
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft.c
new file mode 100644
index 00000000..97ebacc4
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft.c
@@ -0,0 +1,298 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_ComplexFFT().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/complex_fft_tables.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+#define CFFTSFT 14
+#define CFFTRND 1
+#define CFFTRND2 16384
+
+#define CIFFTSFT 14
+#define CIFFTRND 1
+
+
+int WebRtcSpl_ComplexFFT(int16_t frfi[], int stages, int mode)
+{
+    int i, j, l, k, istep, n, m;
+    int16_t wr, wi;
+    int32_t tr32, ti32, qr32, qi32;
+
+    /* The 1024-value is a constant given from the size of kSinTable1024[],
+     * and should not be changed depending on the input parameter 'stages'
+     */
+    n = 1 << stages;
+    if (n > 1024)
+        return -1;
+
+    l = 1;
+    k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change
+         depending on the input parameter 'stages' */
+
+    if (mode == 0)
+    {
+        // mode==0: Low-complexity and Low-accuracy mode
+        while (l < n)
+        {
+            istep = l << 1;
+
+            for (m = 0; m < l; ++m)
+            {
+                j = m << k;
+
+                /* The 256-value is a constant given as 1/4 of the size of
+                 * kSinTable1024[], and should not be changed depending on the input
+                 * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
+                 */
+                wr = kSinTable1024[j + 256];
+                wi = -kSinTable1024[j];
+
+                for (i = m; i < n; i += istep)
+                {
+                    j = i + l;
+
+                    tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15;
+
+                    ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15;
+
+                    qr32 = (int32_t)frfi[2 * i];
+                    qi32 = (int32_t)frfi[2 * i + 1];
+                    frfi[2 * j] = (int16_t)((qr32 - tr32) >> 1);
+                    frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> 1);
+                    frfi[2 * i] = (int16_t)((qr32 + tr32) >> 1);
+                    frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> 1);
+                }
+            }
+
+            --k;
+            l = istep;
+
+        }
+
+    } else
+    {
+        // mode==1: High-complexity and High-accuracy mode
+        while (l < n)
+        {
+            istep = l << 1;
+
+            for (m = 0; m < l; ++m)
+            {
+                j = m << k;
+
+                /* The 256-value is a constant given as 1/4 of the size of
+                 * kSinTable1024[], and should not be changed depending on the input
+                 * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
+                 */
+                wr = kSinTable1024[j + 256];
+                wi = -kSinTable1024[j];
+
+#ifdef WEBRTC_ARCH_ARM_V7
+                int32_t wri = 0;
+                __asm __volatile("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) :
+                    "r"((int32_t)wr), "r"((int32_t)wi));
+#endif
+
+                for (i = m; i < n; i += istep)
+                {
+                    j = i + l;
+
+#ifdef WEBRTC_ARCH_ARM_V7
+                    register int32_t frfi_r;
+                    __asm __volatile(
+                        "pkhbt %[frfi_r], %[frfi_even], %[frfi_odd],"
+                        " lsl #16\n\t"
+                        "smlsd %[tr32], %[wri], %[frfi_r], %[cfftrnd]\n\t"
+                        "smladx %[ti32], %[wri], %[frfi_r], %[cfftrnd]\n\t"
+                        :[frfi_r]"=&r"(frfi_r),
+                         [tr32]"=&r"(tr32),
+                         [ti32]"=r"(ti32)
+                        :[frfi_even]"r"((int32_t)frfi[2*j]),
+                         [frfi_odd]"r"((int32_t)frfi[2*j +1]),
+                         [wri]"r"(wri),
+                         [cfftrnd]"r"(CFFTRND));
+#else
+                    tr32 = wr * frfi[2 * j] - wi * frfi[2 * j + 1] + CFFTRND;
+
+                    ti32 = wr * frfi[2 * j + 1] + wi * frfi[2 * j] + CFFTRND;
+#endif
+
+                    tr32 >>= 15 - CFFTSFT;
+                    ti32 >>= 15 - CFFTSFT;
+
+                    qr32 = ((int32_t)frfi[2 * i]) << CFFTSFT;
+                    qi32 = ((int32_t)frfi[2 * i + 1]) << CFFTSFT;
+
+                    frfi[2 * j] = (int16_t)(
+                        (qr32 - tr32 + CFFTRND2) >> (1 + CFFTSFT));
+                    frfi[2 * j + 1] = (int16_t)(
+                        (qi32 - ti32 + CFFTRND2) >> (1 + CFFTSFT));
+                    frfi[2 * i] = (int16_t)(
+                        (qr32 + tr32 + CFFTRND2) >> (1 + CFFTSFT));
+                    frfi[2 * i + 1] = (int16_t)(
+                        (qi32 + ti32 + CFFTRND2) >> (1 + CFFTSFT));
+                }
+            }
+
+            --k;
+            l = istep;
+        }
+    }
+    return 0;
+}
+
+int WebRtcSpl_ComplexIFFT(int16_t frfi[], int stages, int mode)
+{
+    size_t i, j, l, istep, n, m;
+    int k, scale, shift;
+    int16_t wr, wi;
+    int32_t tr32, ti32, qr32, qi32;
+    int32_t tmp32, round2;
+
+    /* The 1024-value is a constant given from the size of kSinTable1024[],
+     * and should not be changed depending on the input parameter 'stages'
+     */
+    n = 1 << stages;
+    if (n > 1024)
+        return -1;
+
+    scale = 0;
+
+    l = 1;
+    k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change
+         depending on the input parameter 'stages' */
+
+    while (l < n)
+    {
+        // variable scaling, depending upon data
+        shift = 0;
+        round2 = 8192;
+
+        tmp32 = WebRtcSpl_MaxAbsValueW16(frfi, 2 * n);
+        if (tmp32 > 13573)
+        {
+            shift++;
+            scale++;
+            round2 <<= 1;
+        }
+        if (tmp32 > 27146)
+        {
+            shift++;
+            scale++;
+            round2 <<= 1;
+        }
+
+        istep = l << 1;
+
+        if (mode == 0)
+        {
+            // mode==0: Low-complexity and Low-accuracy mode
+            for (m = 0; m < l; ++m)
+            {
+                j = m << k;
+
+                /* The 256-value is a constant given as 1/4 of the size of
+                 * kSinTable1024[], and should not be changed depending on the input
+                 * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
+                 */
+                wr = kSinTable1024[j + 256];
+                wi = kSinTable1024[j];
+
+                for (i = m; i < n; i += istep)
+                {
+                    j = i + l;
+
+                    tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15;
+
+                    ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15;
+
+                    qr32 = (int32_t)frfi[2 * i];
+                    qi32 = (int32_t)frfi[2 * i + 1];
+                    frfi[2 * j] = (int16_t)((qr32 - tr32) >> shift);
+                    frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> shift);
+                    frfi[2 * i] = (int16_t)((qr32 + tr32) >> shift);
+                    frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> shift);
+                }
+            }
+        } else
+        {
+            // mode==1: High-complexity and High-accuracy mode
+
+            for (m = 0; m < l; ++m)
+            {
+                j = m << k;
+
+                /* The 256-value is a constant given as 1/4 of the size of
+                 * kSinTable1024[], and should not be changed depending on the input
+                 * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
+                 */
+                wr = kSinTable1024[j + 256];
+                wi = kSinTable1024[j];
+
+#ifdef WEBRTC_ARCH_ARM_V7
+                int32_t wri = 0;
+                __asm __volatile("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) :
+                    "r"((int32_t)wr), "r"((int32_t)wi));
+#endif
+
+                for (i = m; i < n; i += istep)
+                {
+                    j = i + l;
+
+#ifdef WEBRTC_ARCH_ARM_V7
+                    register int32_t frfi_r;
+                    __asm __volatile(
+                      "pkhbt %[frfi_r], %[frfi_even], %[frfi_odd], lsl #16\n\t"
+                      "smlsd %[tr32], %[wri], %[frfi_r], %[cifftrnd]\n\t"
+                      "smladx %[ti32], %[wri], %[frfi_r], %[cifftrnd]\n\t"
+                      :[frfi_r]"=&r"(frfi_r),
+                       [tr32]"=&r"(tr32),
+                       [ti32]"=r"(ti32)
+                      :[frfi_even]"r"((int32_t)frfi[2*j]),
+                       [frfi_odd]"r"((int32_t)frfi[2*j +1]),
+                       [wri]"r"(wri),
+                       [cifftrnd]"r"(CIFFTRND)
+                    );
+#else
+
+                    tr32 = wr * frfi[2 * j] - wi * frfi[2 * j + 1] + CIFFTRND;
+
+                    ti32 = wr * frfi[2 * j + 1] + wi * frfi[2 * j] + CIFFTRND;
+#endif
+                    tr32 >>= 15 - CIFFTSFT;
+                    ti32 >>= 15 - CIFFTSFT;
+
+                    qr32 = ((int32_t)frfi[2 * i]) << CIFFTSFT;
+                    qi32 = ((int32_t)frfi[2 * i + 1]) << CIFFTSFT;
+
+                    frfi[2 * j] = (int16_t)(
+                        (qr32 - tr32 + round2) >> (shift + CIFFTSFT));
+                    frfi[2 * j + 1] = (int16_t)(
+                        (qi32 - ti32 + round2) >> (shift + CIFFTSFT));
+                    frfi[2 * i] = (int16_t)(
+                        (qr32 + tr32 + round2) >> (shift + CIFFTSFT));
+                    frfi[2 * i + 1] = (int16_t)(
+                        (qi32 + ti32 + round2) >> (shift + CIFFTSFT));
+                }
+            }
+
+        }
+        --k;
+        l = istep;
+    }
+    return scale;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft_mips.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft_mips.c
new file mode 100644
index 00000000..34c4f232
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft_mips.c
@@ -0,0 +1,328 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "webrtc/common_audio/signal_processing/complex_fft_tables.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+#define CFFTSFT 14
+#define CFFTRND 1
+#define CFFTRND2 16384
+
+#define CIFFTSFT 14
+#define CIFFTRND 1
+
+int WebRtcSpl_ComplexFFT(int16_t frfi[], int stages, int mode) {
+  int i = 0;
+  int l = 0;
+  int k = 0;
+  int istep = 0;
+  int n = 0;
+  int m = 0;
+  int32_t wr = 0, wi = 0;
+  int32_t tmp1 = 0;
+  int32_t tmp2 = 0;
+  int32_t tmp3 = 0;
+  int32_t tmp4 = 0;
+  int32_t tmp5 = 0;
+  int32_t tmp6 = 0;
+  int32_t tmp = 0;
+  int16_t* ptr_j = NULL;
+  int16_t* ptr_i = NULL;
+
+  n = 1 << stages;
+  if (n > 1024) {
+    return -1;
+  }
+
+  __asm __volatile (
+    ".set push                                                         \n\t"
+    ".set noreorder                                                    \n\t"
+
+    "addiu      %[k],           $zero,            10                   \n\t"
+    "addiu      %[l],           $zero,            1                    \n\t"
+   "3:                                                                 \n\t"
+    "sll        %[istep],       %[l],             1                    \n\t"
+    "move       %[m],           $zero                                  \n\t"
+    "sll        %[tmp],         %[l],             2                    \n\t"
+    "move       %[i],           $zero                                  \n\t"
+   "2:                                                                 \n\t"
+#if defined(MIPS_DSP_R1_LE)
+    "sllv       %[tmp3],        %[m],             %[k]                 \n\t"
+    "addiu      %[tmp2],        %[tmp3],          512                  \n\t"
+    "addiu      %[m],           %[m],             1                    \n\t"
+    "lhx        %[wi],          %[tmp3](%[kSinTable1024])              \n\t"
+    "lhx        %[wr],          %[tmp2](%[kSinTable1024])              \n\t"
+#else  // #if defined(MIPS_DSP_R1_LE)
+    "sllv       %[tmp3],        %[m],             %[k]                 \n\t"
+    "addu       %[ptr_j],       %[tmp3],          %[kSinTable1024]     \n\t"
+    "addiu      %[ptr_i],       %[ptr_j],         512                  \n\t"
+    "addiu      %[m],           %[m],             1                    \n\t"
+    "lh         %[wi],          0(%[ptr_j])                            \n\t"
+    "lh         %[wr],          0(%[ptr_i])                            \n\t"
+#endif  // #if defined(MIPS_DSP_R1_LE)
+   "1:                                                                 \n\t"
+    "sll        %[tmp1],        %[i],             2                    \n\t"
+    "addu       %[ptr_i],       %[frfi],          %[tmp1]              \n\t"
+    "addu       %[ptr_j],       %[ptr_i],         %[tmp]               \n\t"
+    "lh         %[tmp6],        0(%[ptr_i])                            \n\t"
+    "lh         %[tmp5],        2(%[ptr_i])                            \n\t"
+    "lh         %[tmp3],        0(%[ptr_j])                            \n\t"
+    "lh         %[tmp4],        2(%[ptr_j])                            \n\t"
+    "addu       %[i],           %[i],             %[istep]             \n\t"
+#if defined(MIPS_DSP_R2_LE)
+    "mult       %[wr],          %[tmp3]                                \n\t"
+    "madd       %[wi],          %[tmp4]                                \n\t"
+    "mult       $ac1,           %[wr],            %[tmp4]              \n\t"
+    "msub       $ac1,           %[wi],            %[tmp3]              \n\t"
+    "mflo       %[tmp1]                                                \n\t"
+    "mflo       %[tmp2],        $ac1                                   \n\t"
+    "sll        %[tmp6],        %[tmp6],          14                   \n\t"
+    "sll        %[tmp5],        %[tmp5],          14                   \n\t"
+    "shra_r.w   %[tmp1],        %[tmp1],          1                    \n\t"
+    "shra_r.w   %[tmp2],        %[tmp2],          1                    \n\t"
+    "subu       %[tmp4],        %[tmp6],          %[tmp1]              \n\t"
+    "addu       %[tmp1],        %[tmp6],          %[tmp1]              \n\t"
+    "addu       %[tmp6],        %[tmp5],          %[tmp2]              \n\t"
+    "subu       %[tmp5],        %[tmp5],          %[tmp2]              \n\t"
+    "shra_r.w   %[tmp1],        %[tmp1],          15                   \n\t"
+    "shra_r.w   %[tmp6],        %[tmp6],          15                   \n\t"
+    "shra_r.w   %[tmp4],        %[tmp4],          15                   \n\t"
+    "shra_r.w   %[tmp5],        %[tmp5],          15                   \n\t"
+#else  // #if defined(MIPS_DSP_R2_LE)
+    "mul        %[tmp2],        %[wr],            %[tmp4]              \n\t"
+    "mul        %[tmp1],        %[wr],            %[tmp3]              \n\t"
+    "mul        %[tmp4],        %[wi],            %[tmp4]              \n\t"
+    "mul        %[tmp3],        %[wi],            %[tmp3]              \n\t"
+    "sll        %[tmp6],        %[tmp6],          14                   \n\t"
+    "sll        %[tmp5],        %[tmp5],          14                   \n\t"
+    "addiu      %[tmp6],        %[tmp6],          16384                \n\t"
+    "addiu      %[tmp5],        %[tmp5],          16384                \n\t"
+    "addu       %[tmp1],        %[tmp1],          %[tmp4]              \n\t"
+    "subu       %[tmp2],        %[tmp2],          %[tmp3]              \n\t"
+    "addiu      %[tmp1],        %[tmp1],          1                    \n\t"
+    "addiu      %[tmp2],        %[tmp2],          1                    \n\t"
+    "sra        %[tmp1],        %[tmp1],          1                    \n\t"
+    "sra        %[tmp2],        %[tmp2],          1                    \n\t"
+    "subu       %[tmp4],        %[tmp6],          %[tmp1]              \n\t"
+    "addu       %[tmp1],        %[tmp6],          %[tmp1]              \n\t"
+    "addu       %[tmp6],        %[tmp5],          %[tmp2]              \n\t"
+    "subu       %[tmp5],        %[tmp5],          %[tmp2]              \n\t"
+    "sra        %[tmp4],        %[tmp4],          15                   \n\t"
+    "sra        %[tmp1],        %[tmp1],          15                   \n\t"
+    "sra        %[tmp6],        %[tmp6],          15                   \n\t"
+    "sra        %[tmp5],        %[tmp5],          15                   \n\t"
+#endif  // #if defined(MIPS_DSP_R2_LE)
+    "sh         %[tmp1],        0(%[ptr_i])                            \n\t"
+    "sh         %[tmp6],        2(%[ptr_i])                            \n\t"
+    "sh         %[tmp4],        0(%[ptr_j])                            \n\t"
+    "blt        %[i],           %[n],             1b                   \n\t"
+    " sh        %[tmp5],        2(%[ptr_j])                            \n\t"
+    "blt        %[m],           %[l],             2b                   \n\t"
+    " addu      %[i],           $zero,            %[m]                 \n\t"
+    "move       %[l],           %[istep]                               \n\t"
+    "blt        %[l],           %[n],             3b                   \n\t"
+    " addiu     %[k],           %[k],             -1                   \n\t"
+
+    ".set pop                                                          \n\t"
+
+    : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
+      [tmp4] "=&r" (tmp4), [tmp5] "=&r" (tmp5), [tmp6] "=&r" (tmp6),
+      [ptr_i] "=&r" (ptr_i), [i] "=&r" (i), [wi] "=&r" (wi), [wr] "=&r" (wr),
+      [m] "=&r" (m), [istep] "=&r" (istep), [l] "=&r" (l), [k] "=&r" (k),
+      [ptr_j] "=&r" (ptr_j), [tmp] "=&r" (tmp)
+    : [n] "r" (n), [frfi] "r" (frfi), [kSinTable1024] "r" (kSinTable1024)
+    : "hi", "lo", "memory"
+#if defined(MIPS_DSP_R2_LE)
+    , "$ac1hi", "$ac1lo"
+#endif  // #if defined(MIPS_DSP_R2_LE)
+  );
+
+  return 0;
+}
+
+int WebRtcSpl_ComplexIFFT(int16_t frfi[], int stages, int mode) {
+  int i = 0, l = 0, k = 0;
+  int istep = 0, n = 0, m = 0;
+  int scale = 0, shift = 0;
+  int32_t wr = 0, wi = 0;
+  int32_t tmp1 = 0, tmp2 = 0, tmp3 = 0, tmp4 = 0;
+  int32_t tmp5 = 0, tmp6 = 0, tmp = 0, tempMax = 0, round2 = 0;
+  int16_t* ptr_j = NULL;
+  int16_t* ptr_i = NULL;
+
+  n = 1 << stages;
+  if (n > 1024) {
+    return -1;
+  }
+
+  __asm __volatile (
+    ".set push                                                         \n\t"
+    ".set noreorder                                                    \n\t"
+
+    "addiu      %[k],           $zero,            10                   \n\t"
+    "addiu      %[l],           $zero,            1                    \n\t"
+    "move       %[scale],       $zero                                  \n\t"
+   "3:                                                                 \n\t"
+    "addiu      %[shift],       $zero,            14                   \n\t"
+    "addiu      %[round2],      $zero,            8192                 \n\t"
+    "move       %[ptr_i],       %[frfi]                                \n\t"
+    "move       %[tempMax],     $zero                                  \n\t"
+    "addu       %[i],           %[n],             %[n]                 \n\t"
+   "5:                                                                 \n\t"
+    "lh         %[tmp1],        0(%[ptr_i])                            \n\t"
+    "lh         %[tmp2],        2(%[ptr_i])                            \n\t"
+    "lh         %[tmp3],        4(%[ptr_i])                            \n\t"
+    "lh         %[tmp4],        6(%[ptr_i])                            \n\t"
+#if defined(MIPS_DSP_R1_LE)
+    "absq_s.w   %[tmp1],        %[tmp1]                                \n\t"
+    "absq_s.w   %[tmp2],        %[tmp2]                                \n\t"
+    "absq_s.w   %[tmp3],        %[tmp3]                                \n\t"
+    "absq_s.w   %[tmp4],        %[tmp4]                                \n\t"
+#else  // #if defined(MIPS_DSP_R1_LE)
+    "slt        %[tmp5],        %[tmp1],          $zero                \n\t"
+    "subu       %[tmp6],        $zero,            %[tmp1]              \n\t"
+    "movn       %[tmp1],        %[tmp6],          %[tmp5]              \n\t"
+    "slt        %[tmp5],        %[tmp2],          $zero                \n\t"
+    "subu       %[tmp6],        $zero,            %[tmp2]              \n\t"
+    "movn       %[tmp2],        %[tmp6],          %[tmp5]              \n\t"
+    "slt        %[tmp5],        %[tmp3],          $zero                \n\t"
+    "subu       %[tmp6],        $zero,            %[tmp3]              \n\t"
+    "movn       %[tmp3],        %[tmp6],          %[tmp5]              \n\t"
+    "slt        %[tmp5],        %[tmp4],          $zero                \n\t"
+    "subu       %[tmp6],        $zero,            %[tmp4]              \n\t"
+    "movn       %[tmp4],        %[tmp6],          %[tmp5]              \n\t"
+#endif  // #if defined(MIPS_DSP_R1_LE)
+    "slt        %[tmp5],        %[tempMax],       %[tmp1]              \n\t"
+    "movn       %[tempMax],     %[tmp1],          %[tmp5]              \n\t"
+    "addiu      %[i],           %[i],             -4                   \n\t"
+    "slt        %[tmp5],        %[tempMax],       %[tmp2]              \n\t"
+    "movn       %[tempMax],     %[tmp2],          %[tmp5]              \n\t"
+    "slt        %[tmp5],        %[tempMax],       %[tmp3]              \n\t"
+    "movn       %[tempMax],     %[tmp3],          %[tmp5]              \n\t"
+    "slt        %[tmp5],        %[tempMax],       %[tmp4]              \n\t"
+    "movn       %[tempMax],     %[tmp4],          %[tmp5]              \n\t"
+    "bgtz       %[i],                             5b                   \n\t"
+    " addiu     %[ptr_i],       %[ptr_i],         8                    \n\t"
+    "addiu      %[tmp1],        $zero,            13573                \n\t"
+    "addiu      %[tmp2],        $zero,            27146                \n\t"
+#if !defined(MIPS32_R2_LE)
+    "sll        %[tempMax],     %[tempMax],       16                   \n\t"
+    "sra        %[tempMax],     %[tempMax],       16                   \n\t"
+#else  // #if !defined(MIPS32_R2_LE)
+    "seh        %[tempMax]                                             \n\t"
+#endif  // #if !defined(MIPS32_R2_LE)
+    "slt        %[tmp1],        %[tmp1],          %[tempMax]           \n\t"
+    "slt        %[tmp2],        %[tmp2],          %[tempMax]           \n\t"
+    "addu       %[tmp1],        %[tmp1],          %[tmp2]              \n\t"
+    "addu       %[shift],       %[shift],         %[tmp1]              \n\t"
+    "addu       %[scale],       %[scale],         %[tmp1]              \n\t"
+    "sllv       %[round2],      %[round2],        %[tmp1]              \n\t"
+    "sll        %[istep],       %[l],             1                    \n\t"
+    "move       %[m],           $zero                                  \n\t"
+    "sll        %[tmp],         %[l],             2                    \n\t"
+   "2:                                                                 \n\t"
+#if defined(MIPS_DSP_R1_LE)
+    "sllv       %[tmp3],        %[m],             %[k]                 \n\t"
+    "addiu      %[tmp2],        %[tmp3],          512                  \n\t"
+    "addiu      %[m],           %[m],             1                    \n\t"
+    "lhx        %[wi],          %[tmp3](%[kSinTable1024])              \n\t"
+    "lhx        %[wr],          %[tmp2](%[kSinTable1024])              \n\t"
+#else  // #if defined(MIPS_DSP_R1_LE)
+    "sllv       %[tmp3],        %[m],             %[k]                 \n\t"
+    "addu       %[ptr_j],       %[tmp3],          %[kSinTable1024]     \n\t"
+    "addiu      %[ptr_i],       %[ptr_j],         512                  \n\t"
+    "addiu      %[m],           %[m],             1                    \n\t"
+    "lh         %[wi],          0(%[ptr_j])                            \n\t"
+    "lh         %[wr],          0(%[ptr_i])                            \n\t"
+#endif  // #if defined(MIPS_DSP_R1_LE)
+   "1:                                                                 \n\t"
+    "sll        %[tmp1],        %[i],             2                    \n\t"
+    "addu       %[ptr_i],       %[frfi],          %[tmp1]              \n\t"
+    "addu       %[ptr_j],       %[ptr_i],         %[tmp]               \n\t"
+    "lh         %[tmp3],        0(%[ptr_j])                            \n\t"
+    "lh         %[tmp4],        2(%[ptr_j])                            \n\t"
+    "lh         %[tmp6],        0(%[ptr_i])                            \n\t"
+    "lh         %[tmp5],        2(%[ptr_i])                            \n\t"
+    "addu       %[i],           %[i],             %[istep]             \n\t"
+#if defined(MIPS_DSP_R2_LE)
+    "mult       %[wr],          %[tmp3]                                \n\t"
+    "msub       %[wi],          %[tmp4]                                \n\t"
+    "mult       $ac1,           %[wr],            %[tmp4]              \n\t"
+    "madd       $ac1,           %[wi],            %[tmp3]              \n\t"
+    "mflo       %[tmp1]                                                \n\t"
+    "mflo       %[tmp2],        $ac1                                   \n\t"
+    "sll        %[tmp6],        %[tmp6],          14                   \n\t"
+    "sll        %[tmp5],        %[tmp5],          14                   \n\t"
+    "shra_r.w   %[tmp1],        %[tmp1],          1                    \n\t"
+    "shra_r.w   %[tmp2],        %[tmp2],          1                    \n\t"
+    "addu       %[tmp6],        %[tmp6],          %[round2]            \n\t"
+    "addu       %[tmp5],        %[tmp5],          %[round2]            \n\t"
+    "subu       %[tmp4],        %[tmp6],          %[tmp1]              \n\t"
+    "addu       %[tmp1],        %[tmp6],          %[tmp1]              \n\t"
+    "addu       %[tmp6],        %[tmp5],          %[tmp2]              \n\t"
+    "subu       %[tmp5],        %[tmp5],          %[tmp2]              \n\t"
+    "srav       %[tmp4],        %[tmp4],          %[shift]             \n\t"
+    "srav       %[tmp1],        %[tmp1],          %[shift]             \n\t"
+    "srav       %[tmp6],        %[tmp6],          %[shift]             \n\t"
+    "srav       %[tmp5],        %[tmp5],          %[shift]             \n\t"
+#else  // #if defined(MIPS_DSP_R2_LE)
+    "mul        %[tmp1],        %[wr],            %[tmp3]              \n\t"
+    "mul        %[tmp2],        %[wr],            %[tmp4]              \n\t"
+    "mul        %[tmp4],        %[wi],            %[tmp4]              \n\t"
+    "mul        %[tmp3],        %[wi],            %[tmp3]              \n\t"
+    "sll        %[tmp6],        %[tmp6],          14                   \n\t"
+    "sll        %[tmp5],        %[tmp5],          14                   \n\t"
+    "sub        %[tmp1],        %[tmp1],          %[tmp4]              \n\t"
+    "addu       %[tmp2],        %[tmp2],          %[tmp3]              \n\t"
+    "addiu      %[tmp1],        %[tmp1],          1                    \n\t"
+    "addiu      %[tmp2],        %[tmp2],          1                    \n\t"
+    "sra        %[tmp2],        %[tmp2],          1                    \n\t"
+    "sra        %[tmp1],        %[tmp1],          1                    \n\t"
+    "addu       %[tmp6],        %[tmp6],          %[round2]            \n\t"
+    "addu       %[tmp5],        %[tmp5],          %[round2]            \n\t"
+    "subu       %[tmp4],        %[tmp6],          %[tmp1]              \n\t"
+    "addu       %[tmp1],        %[tmp6],          %[tmp1]              \n\t"
+    "addu       %[tmp6],        %[tmp5],          %[tmp2]              \n\t"
+    "subu       %[tmp5],        %[tmp5],          %[tmp2]              \n\t"
+    "sra        %[tmp4],        %[tmp4],          %[shift]             \n\t"
+    "sra        %[tmp1],        %[tmp1],          %[shift]             \n\t"
+    "sra        %[tmp6],        %[tmp6],          %[shift]             \n\t"
+    "sra        %[tmp5],        %[tmp5],          %[shift]             \n\t"
+#endif  // #if defined(MIPS_DSP_R2_LE)
+    "sh         %[tmp1],         0(%[ptr_i])                           \n\t"
+    "sh         %[tmp6],         2(%[ptr_i])                           \n\t"
+    "sh         %[tmp4],         0(%[ptr_j])                           \n\t"
+    "blt        %[i],            %[n],            1b                   \n\t"
+    " sh        %[tmp5],         2(%[ptr_j])                           \n\t"
+    "blt        %[m],            %[l],            2b                   \n\t"
+    " addu      %[i],            $zero,           %[m]                 \n\t"
+    "move       %[l],            %[istep]                              \n\t"
+    "blt        %[l],            %[n],            3b                   \n\t"
+    " addiu     %[k],            %[k],            -1                   \n\t"
+
+    ".set pop                                                          \n\t"
+
+    : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
+      [tmp4] "=&r" (tmp4), [tmp5] "=&r" (tmp5), [tmp6] "=&r" (tmp6),
+      [ptr_i] "=&r" (ptr_i), [i] "=&r" (i), [m] "=&r" (m), [tmp] "=&r" (tmp),
+      [istep] "=&r" (istep), [wi] "=&r" (wi), [wr] "=&r" (wr), [l] "=&r" (l),
+      [k] "=&r" (k), [round2] "=&r" (round2), [ptr_j] "=&r" (ptr_j),
+      [shift] "=&r" (shift), [scale] "=&r" (scale), [tempMax] "=&r" (tempMax)
+    : [n] "r" (n), [frfi] "r" (frfi), [kSinTable1024] "r" (kSinTable1024)
+    : "hi", "lo", "memory"
+#if defined(MIPS_DSP_R2_LE)
+    , "$ac1hi", "$ac1lo"
+#endif  // #if defined(MIPS_DSP_R2_LE)
+  );
+
+  return scale;
+
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft_tables.h b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft_tables.h
new file mode 100644
index 00000000..ca7b7fe3
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft_tables.h
@@ -0,0 +1,148 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_
+#define WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_
+
+#include "webrtc/typedefs.h"
+
+static const int16_t kSinTable1024[] = {
+       0,    201,    402,    603,    804,   1005,   1206,   1406,
+    1607,   1808,   2009,   2209,   2410,   2610,   2811,   3011,
+    3211,   3411,   3611,   3811,   4011,   4210,   4409,   4608,
+    4807,   5006,   5205,   5403,   5601,   5799,   5997,   6195,
+    6392,   6589,   6786,   6982,   7179,   7375,   7571,   7766,
+    7961,   8156,   8351,   8545,   8739,   8932,   9126,   9319,
+    9511,   9703,   9895,  10087,  10278,  10469,  10659,  10849,
+   11038,  11227,  11416,  11604,  11792,  11980,  12166,  12353,
+   12539,  12724,  12909,  13094,  13278,  13462,  13645,  13827,
+   14009,  14191,  14372,  14552,  14732,  14911,  15090,  15268,
+   15446,  15623,  15799,  15975,  16150,  16325,  16499,  16672,
+   16845,  17017,  17189,  17360,  17530,  17699,  17868,  18036,
+   18204,  18371,  18537,  18702,  18867,  19031,  19194,  19357,
+   19519,  19680,  19840,  20000,  20159,  20317,  20474,  20631,
+   20787,  20942,  21096,  21249,  21402,  21554,  21705,  21855,
+   22004,  22153,  22301,  22448,  22594,  22739,  22883,  23027,
+   23169,  23311,  23452,  23592,  23731,  23869,  24006,  24143,
+   24278,  24413,  24546,  24679,  24811,  24942,  25072,  25201,
+   25329,  25456,  25582,  25707,  25831,  25954,  26077,  26198,
+   26318,  26437,  26556,  26673,  26789,  26905,  27019,  27132,
+   27244,  27355,  27466,  27575,  27683,  27790,  27896,  28001,
+   28105,  28208,  28309,  28410,  28510,  28608,  28706,  28802,
+   28897,  28992,  29085,  29177,  29268,  29358,  29446,  29534,
+   29621,  29706,  29790,  29873,  29955,  30036,  30116,  30195,
+   30272,  30349,  30424,  30498,  30571,  30643,  30713,  30783,
+   30851,  30918,  30984,  31049,  31113,  31175,  31236,  31297,
+   31356,  31413,  31470,  31525,  31580,  31633,  31684,  31735,
+   31785,  31833,  31880,  31926,  31970,  32014,  32056,  32097,
+   32137,  32176,  32213,  32249,  32284,  32318,  32350,  32382,
+   32412,  32441,  32468,  32495,  32520,  32544,  32567,  32588,
+   32609,  32628,  32646,  32662,  32678,  32692,  32705,  32717,
+   32727,  32736,  32744,  32751,  32757,  32761,  32764,  32766,
+   32767,  32766,  32764,  32761,  32757,  32751,  32744,  32736,
+   32727,  32717,  32705,  32692,  32678,  32662,  32646,  32628,
+   32609,  32588,  32567,  32544,  32520,  32495,  32468,  32441,
+   32412,  32382,  32350,  32318,  32284,  32249,  32213,  32176,
+   32137,  32097,  32056,  32014,  31970,  31926,  31880,  31833,
+   31785,  31735,  31684,  31633,  31580,  31525,  31470,  31413,
+   31356,  31297,  31236,  31175,  31113,  31049,  30984,  30918,
+   30851,  30783,  30713,  30643,  30571,  30498,  30424,  30349,
+   30272,  30195,  30116,  30036,  29955,  29873,  29790,  29706,
+   29621,  29534,  29446,  29358,  29268,  29177,  29085,  28992,
+   28897,  28802,  28706,  28608,  28510,  28410,  28309,  28208,
+   28105,  28001,  27896,  27790,  27683,  27575,  27466,  27355,
+   27244,  27132,  27019,  26905,  26789,  26673,  26556,  26437,
+   26318,  26198,  26077,  25954,  25831,  25707,  25582,  25456,
+   25329,  25201,  25072,  24942,  24811,  24679,  24546,  24413,
+   24278,  24143,  24006,  23869,  23731,  23592,  23452,  23311,
+   23169,  23027,  22883,  22739,  22594,  22448,  22301,  22153,
+   22004,  21855,  21705,  21554,  21402,  21249,  21096,  20942,
+   20787,  20631,  20474,  20317,  20159,  20000,  19840,  19680,
+   19519,  19357,  19194,  19031,  18867,  18702,  18537,  18371,
+   18204,  18036,  17868,  17699,  17530,  17360,  17189,  17017,
+   16845,  16672,  16499,  16325,  16150,  15975,  15799,  15623,
+   15446,  15268,  15090,  14911,  14732,  14552,  14372,  14191,
+   14009,  13827,  13645,  13462,  13278,  13094,  12909,  12724,
+   12539,  12353,  12166,  11980,  11792,  11604,  11416,  11227,
+   11038,  10849,  10659,  10469,  10278,  10087,   9895,   9703,
+    9511,   9319,   9126,   8932,   8739,   8545,   8351,   8156,
+    7961,   7766,   7571,   7375,   7179,   6982,   6786,   6589,
+    6392,   6195,   5997,   5799,   5601,   5403,   5205,   5006,
+    4807,   4608,   4409,   4210,   4011,   3811,   3611,   3411,
+    3211,   3011,   2811,   2610,   2410,   2209,   2009,   1808,
+    1607,   1406,   1206,   1005,    804,    603,    402,    201,
+       0,   -201,   -402,   -603,   -804,  -1005,  -1206,  -1406,
+   -1607,  -1808,  -2009,  -2209,  -2410,  -2610,  -2811,  -3011,
+   -3211,  -3411,  -3611,  -3811,  -4011,  -4210,  -4409,  -4608,
+   -4807,  -5006,  -5205,  -5403,  -5601,  -5799,  -5997,  -6195,
+   -6392,  -6589,  -6786,  -6982,  -7179,  -7375,  -7571,  -7766,
+   -7961,  -8156,  -8351,  -8545,  -8739,  -8932,  -9126,  -9319,
+   -9511,  -9703,  -9895, -10087, -10278, -10469, -10659, -10849,
+  -11038, -11227, -11416, -11604, -11792, -11980, -12166, -12353,
+  -12539, -12724, -12909, -13094, -13278, -13462, -13645, -13827,
+  -14009, -14191, -14372, -14552, -14732, -14911, -15090, -15268,
+  -15446, -15623, -15799, -15975, -16150, -16325, -16499, -16672,
+  -16845, -17017, -17189, -17360, -17530, -17699, -17868, -18036,
+  -18204, -18371, -18537, -18702, -18867, -19031, -19194, -19357,
+  -19519, -19680, -19840, -20000, -20159, -20317, -20474, -20631,
+  -20787, -20942, -21096, -21249, -21402, -21554, -21705, -21855,
+  -22004, -22153, -22301, -22448, -22594, -22739, -22883, -23027,
+  -23169, -23311, -23452, -23592, -23731, -23869, -24006, -24143,
+  -24278, -24413, -24546, -24679, -24811, -24942, -25072, -25201,
+  -25329, -25456, -25582, -25707, -25831, -25954, -26077, -26198,
+  -26318, -26437, -26556, -26673, -26789, -26905, -27019, -27132,
+  -27244, -27355, -27466, -27575, -27683, -27790, -27896, -28001,
+  -28105, -28208, -28309, -28410, -28510, -28608, -28706, -28802,
+  -28897, -28992, -29085, -29177, -29268, -29358, -29446, -29534,
+  -29621, -29706, -29790, -29873, -29955, -30036, -30116, -30195,
+  -30272, -30349, -30424, -30498, -30571, -30643, -30713, -30783,
+  -30851, -30918, -30984, -31049, -31113, -31175, -31236, -31297,
+  -31356, -31413, -31470, -31525, -31580, -31633, -31684, -31735,
+  -31785, -31833, -31880, -31926, -31970, -32014, -32056, -32097,
+  -32137, -32176, -32213, -32249, -32284, -32318, -32350, -32382,
+  -32412, -32441, -32468, -32495, -32520, -32544, -32567, -32588,
+  -32609, -32628, -32646, -32662, -32678, -32692, -32705, -32717,
+  -32727, -32736, -32744, -32751, -32757, -32761, -32764, -32766,
+  -32767, -32766, -32764, -32761, -32757, -32751, -32744, -32736,
+  -32727, -32717, -32705, -32692, -32678, -32662, -32646, -32628,
+  -32609, -32588, -32567, -32544, -32520, -32495, -32468, -32441,
+  -32412, -32382, -32350, -32318, -32284, -32249, -32213, -32176,
+  -32137, -32097, -32056, -32014, -31970, -31926, -31880, -31833,
+  -31785, -31735, -31684, -31633, -31580, -31525, -31470, -31413,
+  -31356, -31297, -31236, -31175, -31113, -31049, -30984, -30918,
+  -30851, -30783, -30713, -30643, -30571, -30498, -30424, -30349,
+  -30272, -30195, -30116, -30036, -29955, -29873, -29790, -29706,
+  -29621, -29534, -29446, -29358, -29268, -29177, -29085, -28992,
+  -28897, -28802, -28706, -28608, -28510, -28410, -28309, -28208,
+  -28105, -28001, -27896, -27790, -27683, -27575, -27466, -27355,
+  -27244, -27132, -27019, -26905, -26789, -26673, -26556, -26437,
+  -26318, -26198, -26077, -25954, -25831, -25707, -25582, -25456,
+  -25329, -25201, -25072, -24942, -24811, -24679, -24546, -24413,
+  -24278, -24143, -24006, -23869, -23731, -23592, -23452, -23311,
+  -23169, -23027, -22883, -22739, -22594, -22448, -22301, -22153,
+  -22004, -21855, -21705, -21554, -21402, -21249, -21096, -20942,
+  -20787, -20631, -20474, -20317, -20159, -20000, -19840, -19680,
+  -19519, -19357, -19194, -19031, -18867, -18702, -18537, -18371,
+  -18204, -18036, -17868, -17699, -17530, -17360, -17189, -17017,
+  -16845, -16672, -16499, -16325, -16150, -15975, -15799, -15623,
+  -15446, -15268, -15090, -14911, -14732, -14552, -14372, -14191,
+  -14009, -13827, -13645, -13462, -13278, -13094, -12909, -12724,
+  -12539, -12353, -12166, -11980, -11792, -11604, -11416, -11227,
+  -11038, -10849, -10659, -10469, -10278, -10087,  -9895,  -9703,
+   -9511,  -9319,  -9126,  -8932,  -8739,  -8545,  -8351,  -8156,
+   -7961,  -7766,  -7571,  -7375,  -7179,  -6982,  -6786,  -6589,
+   -6392,  -6195,  -5997,  -5799,  -5601,  -5403,  -5205,  -5006,
+   -4807,  -4608,  -4409,  -4210,  -4011,  -3811,  -3611,  -3411,
+   -3211,  -3011,  -2811,  -2610,  -2410,  -2209,  -2009,  -1808,
+   -1607,  -1406,  -1206,  -1005,   -804,   -603,   -402,   -201
+};
+
+#endif  // WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/copy_set_operations.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/copy_set_operations.c
new file mode 100644
index 00000000..9d7cf47e
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/copy_set_operations.c
@@ -0,0 +1,82 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the implementation of functions
+ * WebRtcSpl_MemSetW16()
+ * WebRtcSpl_MemSetW32()
+ * WebRtcSpl_MemCpyReversedOrder()
+ * WebRtcSpl_CopyFromEndW16()
+ * WebRtcSpl_ZerosArrayW16()
+ * WebRtcSpl_ZerosArrayW32()
+ *
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include <string.h>
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+
+void WebRtcSpl_MemSetW16(int16_t *ptr, int16_t set_value, size_t length)
+{
+    size_t j;
+    int16_t *arrptr = ptr;
+
+    for (j = length; j > 0; j--)
+    {
+        *arrptr++ = set_value;
+    }
+}
+
+void WebRtcSpl_MemSetW32(int32_t *ptr, int32_t set_value, size_t length)
+{
+    size_t j;
+    int32_t *arrptr = ptr;
+
+    for (j = length; j > 0; j--)
+    {
+        *arrptr++ = set_value;
+    }
+}
+
+void WebRtcSpl_MemCpyReversedOrder(int16_t* dest,
+                                   int16_t* source,
+                                   size_t length)
+{
+    size_t j;
+    int16_t* destPtr = dest;
+    int16_t* sourcePtr = source;
+
+    for (j = 0; j < length; j++)
+    {
+        *destPtr-- = *sourcePtr++;
+    }
+}
+
+void WebRtcSpl_CopyFromEndW16(const int16_t *vector_in,
+                              size_t length,
+                              size_t samples,
+                              int16_t *vector_out)
+{
+    // Copy the last <samples> of the input vector to vector_out
+    WEBRTC_SPL_MEMCPY_W16(vector_out, &vector_in[length - samples], samples);
+}
+
+void WebRtcSpl_ZerosArrayW16(int16_t *vector, size_t length)
+{
+    WebRtcSpl_MemSetW16(vector, 0, length);
+}
+
+void WebRtcSpl_ZerosArrayW32(int32_t *vector, size_t length)
+{
+    WebRtcSpl_MemSetW32(vector, 0, length);
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation.c
new file mode 100644
index 00000000..d7c9f2b9
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation.c
@@ -0,0 +1,30 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+/* C version of WebRtcSpl_CrossCorrelation() for generic platforms. */
+void WebRtcSpl_CrossCorrelationC(int32_t* cross_correlation,
+                                 const int16_t* seq1,
+                                 const int16_t* seq2,
+                                 size_t dim_seq,
+                                 size_t dim_cross_correlation,
+                                 int right_shifts,
+                                 int step_seq2) {
+  size_t i = 0, j = 0;
+
+  for (i = 0; i < dim_cross_correlation; i++) {
+    int32_t corr = 0;
+    for (j = 0; j < dim_seq; j++)
+      corr += (seq1[j] * seq2[j]) >> right_shifts;
+    seq2 += step_seq2;
+    *cross_correlation++ = corr;
+  }
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation_mips.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation_mips.c
new file mode 100644
index 00000000..b2364026
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation_mips.c
@@ -0,0 +1,104 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+void WebRtcSpl_CrossCorrelation_mips(int32_t* cross_correlation,
+                                     const int16_t* seq1,
+                                     const int16_t* seq2,
+                                     size_t dim_seq,
+                                     size_t dim_cross_correlation,
+                                     int right_shifts,
+                                     int step_seq2) {
+
+  int32_t t0 = 0, t1 = 0, t2 = 0, t3 = 0, sum = 0;
+  int16_t *pseq2 = NULL;
+  int16_t *pseq1 = NULL;
+  int16_t *pseq1_0 = (int16_t*)&seq1[0];
+  int16_t *pseq2_0 = (int16_t*)&seq2[0];
+  int k = 0;
+
+  __asm __volatile (
+    ".set        push                                           \n\t"
+    ".set        noreorder                                      \n\t"
+    "sll         %[step_seq2], %[step_seq2],   1                \n\t"
+    "andi        %[t0],        %[dim_seq],     1                \n\t"
+    "bgtz        %[t0],        3f                               \n\t"
+    " nop                                                       \n\t"
+   "1:                                                          \n\t"
+    "move        %[pseq1],     %[pseq1_0]                       \n\t"
+    "move        %[pseq2],     %[pseq2_0]                       \n\t"
+    "sra         %[k],         %[dim_seq],     1                \n\t"
+    "addiu       %[dim_cc],    %[dim_cc],      -1               \n\t"
+    "xor         %[sum],       %[sum],         %[sum]           \n\t"
+   "2:                                                          \n\t"
+    "lh          %[t0],        0(%[pseq1])                      \n\t"
+    "lh          %[t1],        0(%[pseq2])                      \n\t"
+    "lh          %[t2],        2(%[pseq1])                      \n\t"
+    "lh          %[t3],        2(%[pseq2])                      \n\t"
+    "mul         %[t0],        %[t0],          %[t1]            \n\t"
+    "addiu       %[k],         %[k],           -1               \n\t"
+    "mul         %[t2],        %[t2],          %[t3]            \n\t"
+    "addiu       %[pseq1],     %[pseq1],       4                \n\t"
+    "addiu       %[pseq2],     %[pseq2],       4                \n\t"
+    "srav        %[t0],        %[t0],          %[right_shifts]  \n\t"
+    "addu        %[sum],       %[sum],         %[t0]            \n\t"
+    "srav        %[t2],        %[t2],          %[right_shifts]  \n\t"
+    "bgtz        %[k],         2b                               \n\t"
+    " addu       %[sum],       %[sum],         %[t2]            \n\t"
+    "addu        %[pseq2_0],   %[pseq2_0],     %[step_seq2]     \n\t"
+    "sw          %[sum],       0(%[cc])                         \n\t"
+    "bgtz        %[dim_cc],    1b                               \n\t"
+    " addiu      %[cc],        %[cc],          4                \n\t"
+    "b           6f                                             \n\t"
+    " nop                                                       \n\t"
+   "3:                                                          \n\t"
+    "move        %[pseq1],     %[pseq1_0]                       \n\t"
+    "move        %[pseq2],     %[pseq2_0]                       \n\t"
+    "sra         %[k],         %[dim_seq],     1                \n\t"
+    "addiu       %[dim_cc],    %[dim_cc],      -1               \n\t"
+    "beqz        %[k],         5f                               \n\t"
+    " xor        %[sum],       %[sum],         %[sum]           \n\t"
+   "4:                                                          \n\t"
+    "lh          %[t0],        0(%[pseq1])                      \n\t"
+    "lh          %[t1],        0(%[pseq2])                      \n\t"
+    "lh          %[t2],        2(%[pseq1])                      \n\t"
+    "lh          %[t3],        2(%[pseq2])                      \n\t"
+    "mul         %[t0],        %[t0],          %[t1]            \n\t"
+    "addiu       %[k],         %[k],           -1               \n\t"
+    "mul         %[t2],        %[t2],          %[t3]            \n\t"
+    "addiu       %[pseq1],     %[pseq1],       4                \n\t"
+    "addiu       %[pseq2],     %[pseq2],       4                \n\t"
+    "srav        %[t0],        %[t0],          %[right_shifts]  \n\t"
+    "addu        %[sum],       %[sum],         %[t0]            \n\t"
+    "srav        %[t2],        %[t2],          %[right_shifts]  \n\t"
+    "bgtz        %[k],         4b                               \n\t"
+    " addu       %[sum],       %[sum],         %[t2]            \n\t"
+   "5:                                                          \n\t"
+    "lh          %[t0],        0(%[pseq1])                      \n\t"
+    "lh          %[t1],        0(%[pseq2])                      \n\t"
+    "mul         %[t0],        %[t0],          %[t1]            \n\t"
+    "srav        %[t0],        %[t0],          %[right_shifts]  \n\t"
+    "addu        %[sum],       %[sum],         %[t0]            \n\t"
+    "addu        %[pseq2_0],   %[pseq2_0],     %[step_seq2]     \n\t"
+    "sw          %[sum],       0(%[cc])                         \n\t"
+    "bgtz        %[dim_cc],    3b                               \n\t"
+    " addiu      %[cc],        %[cc],          4                \n\t"
+   "6:                                                          \n\t"
+    ".set        pop                                            \n\t"
+    : [step_seq2] "+r" (step_seq2), [t0] "=&r" (t0), [t1] "=&r" (t1),
+      [t2] "=&r" (t2), [t3] "=&r" (t3), [pseq1] "=&r" (pseq1),
+      [pseq2] "=&r" (pseq2), [pseq1_0] "+r" (pseq1_0), [pseq2_0] "+r" (pseq2_0),
+      [k] "=&r" (k), [dim_cc] "+r" (dim_cross_correlation), [sum] "=&r" (sum),
+      [cc] "+r" (cross_correlation)
+    : [dim_seq] "r" (dim_seq), [right_shifts] "r" (right_shifts)
+    : "hi", "lo", "memory"
+  );
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation_neon.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation_neon.c
new file mode 100644
index 00000000..918b6715
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation_neon.c
@@ -0,0 +1,87 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+#include <arm_neon.h>
+
+static inline void DotProductWithScaleNeon(int32_t* cross_correlation,
+                                           const int16_t* vector1,
+                                           const int16_t* vector2,
+                                           size_t length,
+                                           int scaling) {
+  size_t i = 0;
+  size_t len1 = length >> 3;
+  size_t len2 = length & 7;
+  int64x2_t sum0 = vdupq_n_s64(0);
+  int64x2_t sum1 = vdupq_n_s64(0);
+
+  for (i = len1; i > 0; i -= 1) {
+    int16x8_t seq1_16x8 = vld1q_s16(vector1);
+    int16x8_t seq2_16x8 = vld1q_s16(vector2);
+#if defined(WEBRTC_ARCH_ARM64)
+    int32x4_t tmp0 = vmull_s16(vget_low_s16(seq1_16x8),
+                               vget_low_s16(seq2_16x8));
+    int32x4_t tmp1 = vmull_high_s16(seq1_16x8, seq2_16x8);
+#else
+    int32x4_t tmp0 = vmull_s16(vget_low_s16(seq1_16x8),
+                               vget_low_s16(seq2_16x8));
+    int32x4_t tmp1 = vmull_s16(vget_high_s16(seq1_16x8),
+                               vget_high_s16(seq2_16x8));
+#endif
+    sum0 = vpadalq_s32(sum0, tmp0);
+    sum1 = vpadalq_s32(sum1, tmp1);
+    vector1 += 8;
+    vector2 += 8;
+  }
+
+  // Calculate the rest of the samples.
+  int64_t sum_res = 0;
+  for (i = len2; i > 0; i -= 1) {
+    sum_res += WEBRTC_SPL_MUL_16_16(*vector1, *vector2);
+    vector1++;
+    vector2++;
+  }
+
+  sum0 = vaddq_s64(sum0, sum1);
+#if defined(WEBRTC_ARCH_ARM64)
+  int64_t sum2 = vaddvq_s64(sum0);
+  *cross_correlation = (int32_t)((sum2 + sum_res) >> scaling);
+#else
+  int64x1_t shift = vdup_n_s64(-scaling);
+  int64x1_t sum2 = vadd_s64(vget_low_s64(sum0), vget_high_s64(sum0));
+  sum2 = vadd_s64(sum2, vdup_n_s64(sum_res));
+  sum2 = vshl_s64(sum2, shift);
+  vst1_lane_s32(cross_correlation, vreinterpret_s32_s64(sum2), 0);
+#endif
+}
+
+/* NEON version of WebRtcSpl_CrossCorrelation() for ARM32/64 platforms. */
+void WebRtcSpl_CrossCorrelationNeon(int32_t* cross_correlation,
+                                    const int16_t* seq1,
+                                    const int16_t* seq2,
+                                    size_t dim_seq,
+                                    size_t dim_cross_correlation,
+                                    int right_shifts,
+                                    int step_seq2) {
+  size_t i = 0;
+
+  for (i = 0; i < dim_cross_correlation; i++) {
+    const int16_t* seq1_ptr = seq1;
+    const int16_t* seq2_ptr = seq2 + (step_seq2 * i);
+
+    DotProductWithScaleNeon(cross_correlation,
+                            seq1_ptr,
+                            seq2_ptr,
+                            dim_seq,
+                            right_shifts);
+    cross_correlation++;
+  }
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/division_operations.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/division_operations.c
new file mode 100644
index 00000000..eaa06a1f
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/division_operations.c
@@ -0,0 +1,138 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains implementations of the divisions
+ * WebRtcSpl_DivU32U16()
+ * WebRtcSpl_DivW32W16()
+ * WebRtcSpl_DivW32W16ResW16()
+ * WebRtcSpl_DivResultInQ31()
+ * WebRtcSpl_DivW32HiLow()
+ *
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+uint32_t WebRtcSpl_DivU32U16(uint32_t num, uint16_t den)
+{
+    // Guard against division with 0
+    if (den != 0)
+    {
+        return (uint32_t)(num / den);
+    } else
+    {
+        return (uint32_t)0xFFFFFFFF;
+    }
+}
+
+int32_t WebRtcSpl_DivW32W16(int32_t num, int16_t den)
+{
+    // Guard against division with 0
+    if (den != 0)
+    {
+        return (int32_t)(num / den);
+    } else
+    {
+        return (int32_t)0x7FFFFFFF;
+    }
+}
+
+int16_t WebRtcSpl_DivW32W16ResW16(int32_t num, int16_t den)
+{
+    // Guard against division with 0
+    if (den != 0)
+    {
+        return (int16_t)(num / den);
+    } else
+    {
+        return (int16_t)0x7FFF;
+    }
+}
+
+int32_t WebRtcSpl_DivResultInQ31(int32_t num, int32_t den)
+{
+    int32_t L_num = num;
+    int32_t L_den = den;
+    int32_t div = 0;
+    int k = 31;
+    int change_sign = 0;
+
+    if (num == 0)
+        return 0;
+
+    if (num < 0)
+    {
+        change_sign++;
+        L_num = -num;
+    }
+    if (den < 0)
+    {
+        change_sign++;
+        L_den = -den;
+    }
+    while (k--)
+    {
+        div <<= 1;
+        L_num <<= 1;
+        if (L_num >= L_den)
+        {
+            L_num -= L_den;
+            div++;
+        }
+    }
+    if (change_sign == 1)
+    {
+        div = -div;
+    }
+    return div;
+}
+
+int32_t WebRtcSpl_DivW32HiLow(int32_t num, int16_t den_hi, int16_t den_low)
+{
+    int16_t approx, tmp_hi, tmp_low, num_hi, num_low;
+    int32_t tmpW32;
+
+    approx = (int16_t)WebRtcSpl_DivW32W16((int32_t)0x1FFFFFFF, den_hi);
+    // result in Q14 (Note: 3FFFFFFF = 0.5 in Q30)
+
+    // tmpW32 = 1/den = approx * (2.0 - den * approx) (in Q30)
+    tmpW32 = (den_hi * approx << 1) + ((den_low * approx >> 15) << 1);
+    // tmpW32 = den * approx
+
+    tmpW32 = (int32_t)0x7fffffffL - tmpW32; // result in Q30 (tmpW32 = 2.0-(den*approx))
+
+    // Store tmpW32 in hi and low format
+    tmp_hi = (int16_t)(tmpW32 >> 16);
+    tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1);
+
+    // tmpW32 = 1/den in Q29
+    tmpW32 = (tmp_hi * approx + (tmp_low * approx >> 15)) << 1;
+
+    // 1/den in hi and low format
+    tmp_hi = (int16_t)(tmpW32 >> 16);
+    tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1);
+
+    // Store num in hi and low format
+    num_hi = (int16_t)(num >> 16);
+    num_low = (int16_t)((num - ((int32_t)num_hi << 16)) >> 1);
+
+    // num * (1/den) by 32 bit multiplication (result in Q28)
+
+    tmpW32 = num_hi * tmp_hi + (num_hi * tmp_low >> 15) +
+        (num_low * tmp_hi >> 15);
+
+    // Put result in Q31 (convert from Q28)
+    tmpW32 = WEBRTC_SPL_LSHIFT_W32(tmpW32, 3);
+
+    return tmpW32;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/dot_product_with_scale.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/dot_product_with_scale.c
new file mode 100644
index 00000000..1302d625
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/dot_product_with_scale.c
@@ -0,0 +1,32 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1,
+                                      const int16_t* vector2,
+                                      size_t length,
+                                      int scaling) {
+  int32_t sum = 0;
+  size_t i = 0;
+
+  /* Unroll the loop to improve performance. */
+  for (i = 0; i + 3 < length; i += 4) {
+    sum += (vector1[i + 0] * vector2[i + 0]) >> scaling;
+    sum += (vector1[i + 1] * vector2[i + 1]) >> scaling;
+    sum += (vector1[i + 2] * vector2[i + 2]) >> scaling;
+    sum += (vector1[i + 3] * vector2[i + 3]) >> scaling;
+  }
+  for (; i < length; i++) {
+    sum += (vector1[i] * vector2[i]) >> scaling;
+  }
+
+  return sum;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast.c
new file mode 100644
index 00000000..726a8881
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast.c
@@ -0,0 +1,48 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+// TODO(Bjornv): Change the function parameter order to WebRTC code style.
+// C version of WebRtcSpl_DownsampleFast() for generic platforms.
+int WebRtcSpl_DownsampleFastC(const int16_t* data_in,
+                              size_t data_in_length,
+                              int16_t* data_out,
+                              size_t data_out_length,
+                              const int16_t* __restrict coefficients,
+                              size_t coefficients_length,
+                              int factor,
+                              size_t delay) {
+  size_t i = 0;
+  size_t j = 0;
+  int32_t out_s32 = 0;
+  size_t endpos = delay + factor * (data_out_length - 1) + 1;
+
+  // Return error if any of the running conditions doesn't meet.
+  if (data_out_length == 0 || coefficients_length == 0
+                           || data_in_length < endpos) {
+    return -1;
+  }
+
+  for (i = delay; i < endpos; i += factor) {
+    out_s32 = 2048;  // Round value, 0.5 in Q12.
+
+    for (j = 0; j < coefficients_length; j++) {
+      out_s32 += coefficients[j] * data_in[i - j];  // Q12.
+    }
+
+    out_s32 >>= 12;  // Q0.
+
+    // Saturate and store the output.
+    *data_out++ = WebRtcSpl_SatW32ToW16(out_s32);
+  }
+
+  return 0;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast_mips.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast_mips.c
new file mode 100644
index 00000000..ac39401a
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast_mips.c
@@ -0,0 +1,169 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+// Version of WebRtcSpl_DownsampleFast() for MIPS platforms.
+int WebRtcSpl_DownsampleFast_mips(const int16_t* data_in,
+                                  size_t data_in_length,
+                                  int16_t* data_out,
+                                  size_t data_out_length,
+                                  const int16_t* __restrict coefficients,
+                                  size_t coefficients_length,
+                                  int factor,
+                                  size_t delay) {
+  int i;
+  int j;
+  int k;
+  int32_t out_s32 = 0;
+  size_t endpos = delay + factor * (data_out_length - 1) + 1;
+
+  int32_t  tmp1, tmp2, tmp3, tmp4, factor_2;
+  int16_t* p_coefficients;
+  int16_t* p_data_in;
+  int16_t* p_data_in_0 = (int16_t*)&data_in[delay];
+  int16_t* p_coefficients_0 = (int16_t*)&coefficients[0];
+#if !defined(MIPS_DSP_R1_LE)
+  int32_t max_16 = 0x7FFF;
+  int32_t min_16 = 0xFFFF8000;
+#endif  // #if !defined(MIPS_DSP_R1_LE)
+
+  // Return error if any of the running conditions doesn't meet.
+  if (data_out_length == 0 || coefficients_length == 0
+                           || data_in_length < endpos) {
+    return -1;
+  }
+#if defined(MIPS_DSP_R2_LE)
+  __asm __volatile (
+    ".set        push                                                \n\t"
+    ".set        noreorder                                           \n\t"
+    "subu        %[i],            %[endpos],       %[delay]          \n\t"
+    "sll         %[factor_2],     %[factor],       1                 \n\t"
+   "1:                                                               \n\t"
+    "move        %[p_data_in],    %[p_data_in_0]                     \n\t"
+    "mult        $zero,           $zero                              \n\t"
+    "move        %[p_coefs],      %[p_coefs_0]                       \n\t"
+    "sra         %[j],            %[coef_length],  2                 \n\t"
+    "beq         %[j],            $zero,           3f                \n\t"
+    " andi       %[k],            %[coef_length],  3                 \n\t"
+   "2:                                                               \n\t"
+    "lwl         %[tmp1],         1(%[p_data_in])                    \n\t"
+    "lwl         %[tmp2],         3(%[p_coefs])                      \n\t"
+    "lwl         %[tmp3],         -3(%[p_data_in])                   \n\t"
+    "lwl         %[tmp4],         7(%[p_coefs])                      \n\t"
+    "lwr         %[tmp1],         -2(%[p_data_in])                   \n\t"
+    "lwr         %[tmp2],         0(%[p_coefs])                      \n\t"
+    "lwr         %[tmp3],         -6(%[p_data_in])                   \n\t"
+    "lwr         %[tmp4],         4(%[p_coefs])                      \n\t"
+    "packrl.ph   %[tmp1],         %[tmp1],         %[tmp1]           \n\t"
+    "packrl.ph   %[tmp3],         %[tmp3],         %[tmp3]           \n\t"
+    "dpa.w.ph    $ac0,            %[tmp1],         %[tmp2]           \n\t"
+    "dpa.w.ph    $ac0,            %[tmp3],         %[tmp4]           \n\t"
+    "addiu       %[j],            %[j],            -1                \n\t"
+    "addiu       %[p_data_in],    %[p_data_in],    -8                \n\t"
+    "bgtz        %[j],            2b                                 \n\t"
+    " addiu      %[p_coefs],      %[p_coefs],      8                 \n\t"
+   "3:                                                               \n\t"
+    "beq         %[k],            $zero,           5f                \n\t"
+    " nop                                                            \n\t"
+   "4:                                                               \n\t"
+    "lhu         %[tmp1],         0(%[p_data_in])                    \n\t"
+    "lhu         %[tmp2],         0(%[p_coefs])                      \n\t"
+    "addiu       %[p_data_in],    %[p_data_in],    -2                \n\t"
+    "addiu       %[k],            %[k],            -1                \n\t"
+    "dpa.w.ph    $ac0,            %[tmp1],         %[tmp2]           \n\t"
+    "bgtz        %[k],            4b                                 \n\t"
+    " addiu      %[p_coefs],      %[p_coefs],      2                 \n\t"
+   "5:                                                               \n\t"
+    "extr_r.w    %[out_s32],      $ac0,            12                \n\t"
+    "addu        %[p_data_in_0],  %[p_data_in_0],  %[factor_2]       \n\t"
+    "subu        %[i],            %[i],            %[factor]         \n\t"
+    "shll_s.w    %[out_s32],      %[out_s32],      16                \n\t"
+    "sra         %[out_s32],      %[out_s32],      16                \n\t"
+    "sh          %[out_s32],      0(%[data_out])                     \n\t"
+    "bgtz        %[i],            1b                                 \n\t"
+    " addiu      %[data_out],     %[data_out],     2                 \n\t"
+    ".set        pop                                                 \n\t"
+    : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
+      [tmp4] "=&r" (tmp4), [p_data_in] "=&r" (p_data_in),
+      [p_data_in_0] "+r" (p_data_in_0), [p_coefs] "=&r" (p_coefficients),
+      [j] "=&r" (j), [out_s32] "=&r" (out_s32), [factor_2] "=&r" (factor_2),
+      [i] "=&r" (i), [k] "=&r" (k)
+    : [coef_length] "r" (coefficients_length), [data_out] "r" (data_out),
+      [p_coefs_0] "r" (p_coefficients_0), [endpos] "r" (endpos),
+      [delay] "r" (delay), [factor] "r" (factor)
+    : "memory", "hi", "lo"
+ );
+#else  // #if defined(MIPS_DSP_R2_LE)
+  __asm __volatile (
+    ".set        push                                                \n\t"
+    ".set        noreorder                                           \n\t"
+    "sll         %[factor_2],     %[factor],       1                 \n\t"
+    "subu        %[i],            %[endpos],       %[delay]          \n\t"
+   "1:                                                               \n\t"
+    "move        %[p_data_in],    %[p_data_in_0]                     \n\t"
+    "addiu       %[out_s32],      $zero,           2048              \n\t"
+    "move        %[p_coefs],      %[p_coefs_0]                       \n\t"
+    "sra         %[j],            %[coef_length],  1                 \n\t"
+    "beq         %[j],            $zero,           3f                \n\t"
+    " andi       %[k],            %[coef_length],  1                 \n\t"
+   "2:                                                               \n\t"
+    "lh          %[tmp1],         0(%[p_data_in])                    \n\t"
+    "lh          %[tmp2],         0(%[p_coefs])                      \n\t"
+    "lh          %[tmp3],         -2(%[p_data_in])                   \n\t"
+    "lh          %[tmp4],         2(%[p_coefs])                      \n\t"
+    "mul         %[tmp1],         %[tmp1],         %[tmp2]           \n\t"
+    "addiu       %[p_coefs],      %[p_coefs],      4                 \n\t"
+    "mul         %[tmp3],         %[tmp3],         %[tmp4]           \n\t"
+    "addiu       %[j],            %[j],            -1                \n\t"
+    "addiu       %[p_data_in],    %[p_data_in],    -4                \n\t"
+    "addu        %[tmp1],         %[tmp1],         %[tmp3]           \n\t"
+    "bgtz        %[j],            2b                                 \n\t"
+    " addu       %[out_s32],      %[out_s32],      %[tmp1]           \n\t"
+   "3:                                                               \n\t"
+    "beq         %[k],            $zero,           4f                \n\t"
+    " nop                                                            \n\t"
+    "lh          %[tmp1],         0(%[p_data_in])                    \n\t"
+    "lh          %[tmp2],         0(%[p_coefs])                      \n\t"
+    "mul         %[tmp1],         %[tmp1],         %[tmp2]           \n\t"
+    "addu        %[out_s32],      %[out_s32],      %[tmp1]           \n\t"
+   "4:                                                               \n\t"
+    "sra         %[out_s32],      %[out_s32],      12                \n\t"
+    "addu        %[p_data_in_0],  %[p_data_in_0],  %[factor_2]       \n\t"
+#if defined(MIPS_DSP_R1_LE)
+    "shll_s.w    %[out_s32],      %[out_s32],      16                \n\t"
+    "sra         %[out_s32],      %[out_s32],      16                \n\t"
+#else  // #if defined(MIPS_DSP_R1_LE)
+    "slt         %[tmp1],         %[max_16],       %[out_s32]        \n\t"
+    "movn        %[out_s32],      %[max_16],       %[tmp1]           \n\t"
+    "slt         %[tmp1],         %[out_s32],      %[min_16]         \n\t"
+    "movn        %[out_s32],      %[min_16],       %[tmp1]           \n\t"
+#endif  // #if defined(MIPS_DSP_R1_LE)
+    "subu        %[i],            %[i],            %[factor]         \n\t"
+    "sh          %[out_s32],      0(%[data_out])                     \n\t"
+    "bgtz        %[i],            1b                                 \n\t"
+    " addiu      %[data_out],     %[data_out],     2                 \n\t"
+    ".set        pop                                                 \n\t"
+    : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
+      [tmp4] "=&r" (tmp4), [p_data_in] "=&r" (p_data_in), [k] "=&r" (k),
+      [p_data_in_0] "+r" (p_data_in_0), [p_coefs] "=&r" (p_coefficients),
+      [j] "=&r" (j), [out_s32] "=&r" (out_s32), [factor_2] "=&r" (factor_2),
+      [i] "=&r" (i)
+    : [coef_length] "r" (coefficients_length), [data_out] "r" (data_out),
+      [p_coefs_0] "r" (p_coefficients_0), [endpos] "r" (endpos),
+#if !defined(MIPS_DSP_R1_LE)
+      [max_16] "r" (max_16), [min_16] "r" (min_16),
+#endif  // #if !defined(MIPS_DSP_R1_LE)
+      [delay] "r" (delay), [factor] "r" (factor)
+    : "memory", "hi", "lo"
+  );
+#endif  // #if defined(MIPS_DSP_R2_LE)
+  return 0;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast_neon.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast_neon.c
new file mode 100644
index 00000000..58732dab
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast_neon.c
@@ -0,0 +1,217 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+#include <arm_neon.h>
+
+// NEON intrinsics version of WebRtcSpl_DownsampleFast()
+// for ARM 32-bit/64-bit platforms.
+int WebRtcSpl_DownsampleFastNeon(const int16_t* data_in,
+                                 size_t data_in_length,
+                                 int16_t* data_out,
+                                 size_t data_out_length,
+                                 const int16_t* __restrict coefficients,
+                                 size_t coefficients_length,
+                                 int factor,
+                                 size_t delay) {
+  size_t i = 0;
+  size_t j = 0;
+  int32_t out_s32 = 0;
+  size_t endpos = delay + factor * (data_out_length - 1) + 1;
+  size_t res = data_out_length & 0x7;
+  size_t endpos1 = endpos - factor * res;
+
+  // Return error if any of the running conditions doesn't meet.
+  if (data_out_length == 0 || coefficients_length == 0
+                           || data_in_length < endpos) {
+    return -1;
+  }
+
+  // First part, unroll the loop 8 times, with 3 subcases
+  // (factor == 2, 4, others).
+  switch (factor) {
+    case 2: {
+      for (i = delay; i < endpos1; i += 16) {
+        // Round value, 0.5 in Q12.
+        int32x4_t out32x4_0 = vdupq_n_s32(2048);
+        int32x4_t out32x4_1 = vdupq_n_s32(2048);
+
+#if defined(WEBRTC_ARCH_ARM64)
+        // Unroll the loop 2 times.
+        for (j = 0; j < coefficients_length - 1; j += 2) {
+          int32x2_t coeff32 = vld1_dup_s32((int32_t*)&coefficients[j]);
+          int16x4_t coeff16x4 = vreinterpret_s16_s32(coeff32);
+          int16x8x2_t in16x8x2 = vld2q_s16(&data_in[i - j - 1]);
+
+          // Mul and accumulate low 64-bit data.
+          int16x4_t in16x4_0 = vget_low_s16(in16x8x2.val[0]);
+          int16x4_t in16x4_1 = vget_low_s16(in16x8x2.val[1]);
+          out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 1);
+          out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_1, coeff16x4, 0);
+
+          // Mul and accumulate high 64-bit data.
+          // TODO: vget_high_s16 need extra cost on ARM64. This could be
+          // replaced by vmlal_high_lane_s16. But for the interface of
+          // vmlal_high_lane_s16, there is a bug in gcc 4.9.
+          // This issue need to be tracked in the future.
+          int16x4_t in16x4_2 = vget_high_s16(in16x8x2.val[0]);
+          int16x4_t in16x4_3 = vget_high_s16(in16x8x2.val[1]);
+          out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_2, coeff16x4, 1);
+          out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_3, coeff16x4, 0);
+        }
+
+        for (; j < coefficients_length; j++) {
+          int16x4_t coeff16x4 = vld1_dup_s16(&coefficients[j]);
+          int16x8x2_t in16x8x2 = vld2q_s16(&data_in[i - j]);
+
+          // Mul and accumulate low 64-bit data.
+          int16x4_t in16x4_0 = vget_low_s16(in16x8x2.val[0]);
+          out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 0);
+
+          // Mul and accumulate high 64-bit data.
+          // TODO: vget_high_s16 need extra cost on ARM64. This could be
+          // replaced by vmlal_high_lane_s16. But for the interface of
+          // vmlal_high_lane_s16, there is a bug in gcc 4.9.
+          // This issue need to be tracked in the future.
+          int16x4_t in16x4_1 = vget_high_s16(in16x8x2.val[0]);
+          out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 0);
+        }
+#else
+        // On ARMv7, the loop unrolling 2 times results in performance
+        // regression.
+        for (j = 0; j < coefficients_length; j++) {
+          int16x4_t coeff16x4 = vld1_dup_s16(&coefficients[j]);
+          int16x8x2_t in16x8x2 = vld2q_s16(&data_in[i - j]);
+
+          // Mul and accumulate.
+          int16x4_t in16x4_0 = vget_low_s16(in16x8x2.val[0]);
+          int16x4_t in16x4_1 = vget_high_s16(in16x8x2.val[0]);
+          out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 0);
+          out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 0);
+        }
+#endif
+
+        // Saturate and store the output.
+        int16x4_t out16x4_0 = vqshrn_n_s32(out32x4_0, 12);
+        int16x4_t out16x4_1 = vqshrn_n_s32(out32x4_1, 12);
+        vst1q_s16(data_out, vcombine_s16(out16x4_0, out16x4_1));
+        data_out += 8;
+      }
+      break;
+    }
+    case 4: {
+      for (i = delay; i < endpos1; i += 32) {
+        // Round value, 0.5 in Q12.
+        int32x4_t out32x4_0 = vdupq_n_s32(2048);
+        int32x4_t out32x4_1 = vdupq_n_s32(2048);
+
+        // Unroll the loop 4 times.
+        for (j = 0; j < coefficients_length - 3; j += 4) {
+          int16x4_t coeff16x4 = vld1_s16(&coefficients[j]);
+          int16x8x4_t in16x8x4 = vld4q_s16(&data_in[i - j - 3]);
+
+          // Mul and accumulate low 64-bit data.
+          int16x4_t in16x4_0 = vget_low_s16(in16x8x4.val[0]);
+          int16x4_t in16x4_2 = vget_low_s16(in16x8x4.val[1]);
+          int16x4_t in16x4_4 = vget_low_s16(in16x8x4.val[2]);
+          int16x4_t in16x4_6 = vget_low_s16(in16x8x4.val[3]);
+          out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 3);
+          out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_2, coeff16x4, 2);
+          out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_4, coeff16x4, 1);
+          out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_6, coeff16x4, 0);
+
+          // Mul and accumulate high 64-bit data.
+          // TODO: vget_high_s16 need extra cost on ARM64. This could be
+          // replaced by vmlal_high_lane_s16. But for the interface of
+          // vmlal_high_lane_s16, there is a bug in gcc 4.9.
+          // This issue need to be tracked in the future.
+          int16x4_t in16x4_1 = vget_high_s16(in16x8x4.val[0]);
+          int16x4_t in16x4_3 = vget_high_s16(in16x8x4.val[1]);
+          int16x4_t in16x4_5 = vget_high_s16(in16x8x4.val[2]);
+          int16x4_t in16x4_7 = vget_high_s16(in16x8x4.val[3]);
+          out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 3);
+          out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_3, coeff16x4, 2);
+          out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_5, coeff16x4, 1);
+          out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_7, coeff16x4, 0);
+        }
+
+        for (; j < coefficients_length; j++) {
+          int16x4_t coeff16x4 = vld1_dup_s16(&coefficients[j]);
+          int16x8x4_t in16x8x4 = vld4q_s16(&data_in[i - j]);
+
+          // Mul and accumulate low 64-bit data.
+          int16x4_t in16x4_0 = vget_low_s16(in16x8x4.val[0]);
+          out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 0);
+
+          // Mul and accumulate high 64-bit data.
+          // TODO: vget_high_s16 need extra cost on ARM64. This could be
+          // replaced by vmlal_high_lane_s16. But for the interface of
+          // vmlal_high_lane_s16, there is a bug in gcc 4.9.
+          // This issue need to be tracked in the future.
+          int16x4_t in16x4_1 = vget_high_s16(in16x8x4.val[0]);
+          out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 0);
+        }
+
+        // Saturate and store the output.
+        int16x4_t out16x4_0 = vqshrn_n_s32(out32x4_0, 12);
+        int16x4_t out16x4_1 = vqshrn_n_s32(out32x4_1, 12);
+        vst1q_s16(data_out, vcombine_s16(out16x4_0, out16x4_1));
+        data_out += 8;
+      }
+      break;
+    }
+    default: {
+      for (i = delay; i < endpos1; i += factor * 8) {
+        // Round value, 0.5 in Q12.
+        int32x4_t out32x4_0 = vdupq_n_s32(2048);
+        int32x4_t out32x4_1 = vdupq_n_s32(2048);
+
+        for (j = 0; j < coefficients_length; j++) {
+          int16x4_t coeff16x4 = vld1_dup_s16(&coefficients[j]);
+          int16x4_t in16x4_0 = vld1_dup_s16(&data_in[i - j]);
+          in16x4_0 = vld1_lane_s16(&data_in[i + factor - j], in16x4_0, 1);
+          in16x4_0 = vld1_lane_s16(&data_in[i + factor * 2 - j], in16x4_0, 2);
+          in16x4_0 = vld1_lane_s16(&data_in[i + factor * 3 - j], in16x4_0, 3);
+          int16x4_t in16x4_1 = vld1_dup_s16(&data_in[i + factor * 4 - j]);
+          in16x4_1 = vld1_lane_s16(&data_in[i + factor * 5 - j], in16x4_1, 1);
+          in16x4_1 = vld1_lane_s16(&data_in[i + factor * 6 - j], in16x4_1, 2);
+          in16x4_1 = vld1_lane_s16(&data_in[i + factor * 7 - j], in16x4_1, 3);
+
+          // Mul and accumulate.
+          out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 0);
+          out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 0);
+        }
+
+        // Saturate and store the output.
+        int16x4_t out16x4_0 = vqshrn_n_s32(out32x4_0, 12);
+        int16x4_t out16x4_1 = vqshrn_n_s32(out32x4_1, 12);
+        vst1q_s16(data_out, vcombine_s16(out16x4_0, out16x4_1));
+        data_out += 8;
+      }
+      break;
+    }
+  }
+
+  // Second part, do the rest iterations (if any).
+  for (; i < endpos; i += factor) {
+    out_s32 = 2048;  // Round value, 0.5 in Q12.
+
+    for (j = 0; j < coefficients_length; j++) {
+      out_s32 = WebRtc_MulAccumW16(coefficients[j], data_in[i - j], out_s32);
+    }
+
+    // Saturate and store the output.
+    out_s32 >>= 12;
+    *data_out++ = WebRtcSpl_SatW32ToW16(out_s32);
+  }
+
+  return 0;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/energy.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/energy.c
new file mode 100644
index 00000000..e83f1a69
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/energy.c
@@ -0,0 +1,39 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_Energy().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+int32_t WebRtcSpl_Energy(int16_t* vector,
+                         size_t vector_length,
+                         int* scale_factor)
+{
+    int32_t en = 0;
+    size_t i;
+    int scaling =
+        WebRtcSpl_GetScalingSquare(vector, vector_length, vector_length);
+    size_t looptimes = vector_length;
+    int16_t *vectorptr = vector;
+
+    for (i = 0; i < looptimes; i++)
+    {
+      en += (*vectorptr * *vectorptr) >> scaling;
+      vectorptr++;
+    }
+    *scale_factor = scaling;
+
+    return en;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar.c
new file mode 100644
index 00000000..dfbc4c2f
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar.c
@@ -0,0 +1,89 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_FilterAR().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+size_t WebRtcSpl_FilterAR(const int16_t* a,
+                          size_t a_length,
+                          const int16_t* x,
+                          size_t x_length,
+                          int16_t* state,
+                          size_t state_length,
+                          int16_t* state_low,
+                          size_t state_low_length,
+                          int16_t* filtered,
+                          int16_t* filtered_low,
+                          size_t filtered_low_length)
+{
+    int32_t o;
+    int32_t oLOW;
+    size_t i, j, stop;
+    const int16_t* x_ptr = &x[0];
+    int16_t* filteredFINAL_ptr = filtered;
+    int16_t* filteredFINAL_LOW_ptr = filtered_low;
+
+    for (i = 0; i < x_length; i++)
+    {
+        // Calculate filtered[i] and filtered_low[i]
+        const int16_t* a_ptr = &a[1];
+        int16_t* filtered_ptr = &filtered[i - 1];
+        int16_t* filtered_low_ptr = &filtered_low[i - 1];
+        int16_t* state_ptr = &state[state_length - 1];
+        int16_t* state_low_ptr = &state_low[state_length - 1];
+
+        o = (int32_t)(*x_ptr++) << 12;
+        oLOW = (int32_t)0;
+
+        stop = (i < a_length) ? i + 1 : a_length;
+        for (j = 1; j < stop; j++)
+        {
+          o -= *a_ptr * *filtered_ptr--;
+          oLOW -= *a_ptr++ * *filtered_low_ptr--;
+        }
+        for (j = i + 1; j < a_length; j++)
+        {
+          o -= *a_ptr * *state_ptr--;
+          oLOW -= *a_ptr++ * *state_low_ptr--;
+        }
+
+        o += (oLOW >> 12);
+        *filteredFINAL_ptr = (int16_t)((o + (int32_t)2048) >> 12);
+        *filteredFINAL_LOW_ptr++ = (int16_t)(o - ((int32_t)(*filteredFINAL_ptr++)
+                << 12));
+    }
+
+    // Save the filter state
+    if (x_length >= state_length)
+    {
+        WebRtcSpl_CopyFromEndW16(filtered, x_length, a_length - 1, state);
+        WebRtcSpl_CopyFromEndW16(filtered_low, x_length, a_length - 1, state_low);
+    } else
+    {
+        for (i = 0; i < state_length - x_length; i++)
+        {
+            state[i] = state[i + x_length];
+            state_low[i] = state_low[i + x_length];
+        }
+        for (i = 0; i < x_length; i++)
+        {
+            state[state_length - x_length + i] = filtered[i];
+            state[state_length - x_length + i] = filtered_low[i];
+        }
+    }
+
+    return x_length;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12.c
new file mode 100644
index 00000000..70001a08
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12.c
@@ -0,0 +1,42 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include <assert.h>
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+// TODO(bjornv): Change the return type to report errors.
+
+void WebRtcSpl_FilterARFastQ12(const int16_t* data_in,
+                               int16_t* data_out,
+                               const int16_t* __restrict coefficients,
+                               size_t coefficients_length,
+                               size_t data_length) {
+  size_t i = 0;
+  size_t j = 0;
+
+  assert(data_length > 0);
+  assert(coefficients_length > 1);
+
+  for (i = 0; i < data_length; i++) {
+    int32_t output = 0;
+    int32_t sum = 0;
+
+    for (j = coefficients_length - 1; j > 0; j--) {
+      sum += coefficients[j] * data_out[i - j];
+    }
+
+    output = coefficients[0] * data_in[i];
+    output -= sum;
+
+    // Saturate and store the output.
+    output = WEBRTC_SPL_SAT(134215679, output, -134217728);
+    data_out[i] = (int16_t)((output + 2048) >> 12);
+  }
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12_armv7.S b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12_armv7.S
new file mode 100644
index 00000000..76c8eee7
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12_armv7.S
@@ -0,0 +1,218 @@
+@
+@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+@
+@ Use of this source code is governed by a BSD-style license
+@ that can be found in the LICENSE file in the root of the source
+@ tree. An additional intellectual property rights grant can be found
+@ in the file PATENTS.  All contributing project authors may
+@ be found in the AUTHORS file in the root of the source tree.
+@
+
+@ This file contains the function WebRtcSpl_FilterARFastQ12(), optimized for
+@ ARMv7  platform. The description header can be found in
+@ signal_processing_library.h
+@
+@ Output is bit-exact with the generic C code as in filter_ar_fast_q12.c, and
+@ the reference C code at end of this file.
+
+@ Assumptions:
+@ (1) data_length > 0
+@ (2) coefficients_length > 1
+
+@ Register usage:
+@
+@ r0:  &data_in[i]
+@ r1:  &data_out[i], for result ouput
+@ r2:  &coefficients[0]
+@ r3:  coefficients_length
+@ r4:  Iteration counter for the outer loop.
+@ r5:  data_out[j] as multiplication inputs
+@ r6:  Calculated value for output data_out[]; interation counter for inner loop
+@ r7:  Partial sum of a filtering multiplication results
+@ r8:  Partial sum of a filtering multiplication results
+@ r9:  &data_out[], for filtering input; data_in[i]
+@ r10: coefficients[j]
+@ r11: Scratch
+@ r12: &coefficients[j]
+
+#include "webrtc/system_wrappers/interface/asm_defines.h"
+
+GLOBAL_FUNCTION WebRtcSpl_FilterARFastQ12
+.align  2
+DEFINE_FUNCTION WebRtcSpl_FilterARFastQ12
+  push {r4-r11}
+
+  ldrsh r12, [sp, #32]         @ data_length
+  subs r4, r12, #1
+  beq ODD_LENGTH               @ jump if data_length == 1
+
+LOOP_LENGTH:
+  add r12, r2, r3, lsl #1
+  sub r12, #4                  @ &coefficients[coefficients_length - 2]
+  sub r9, r1, r3, lsl #1
+  add r9, #2                   @ &data_out[i - coefficients_length + 1]
+  ldr r5, [r9], #4             @ data_out[i - coefficients_length + {1,2}]
+
+  mov r7, #0                   @ sum1
+  mov r8, #0                   @ sum2
+  subs r6, r3, #3              @ Iteration counter for inner loop.
+  beq ODD_A_LENGTH             @ branch if coefficients_length == 3
+  blt POST_LOOP_A_LENGTH       @ branch if coefficients_length == 2
+
+LOOP_A_LENGTH:
+  ldr r10, [r12], #-4          @ coefficients[j - 1], coefficients[j]
+  subs r6, #2
+  smlatt r8, r10, r5, r8       @ sum2 += coefficients[j] * data_out[i - j + 1];
+  smlatb r7, r10, r5, r7       @ sum1 += coefficients[j] * data_out[i - j];
+  smlabt r7, r10, r5, r7       @ coefficients[j - 1] * data_out[i - j + 1];
+  ldr r5, [r9], #4             @ data_out[i - j + 2],  data_out[i - j + 3]
+  smlabb r8, r10, r5, r8       @ coefficients[j - 1] * data_out[i - j + 2];
+  bgt LOOP_A_LENGTH
+  blt POST_LOOP_A_LENGTH
+
+ODD_A_LENGTH:
+  ldrsh r10, [r12, #2]         @ Filter coefficients coefficients[2]
+  sub r12, #2                  @ &coefficients[0]
+  smlabb r7, r10, r5, r7       @ sum1 += coefficients[2] * data_out[i - 2];
+  smlabt r8, r10, r5, r8       @ sum2 += coefficients[2] * data_out[i - 1];
+  ldr r5, [r9, #-2]            @ data_out[i - 1],  data_out[i]
+
+POST_LOOP_A_LENGTH:
+  ldr r10, [r12]               @ coefficients[0], coefficients[1]
+  smlatb r7, r10, r5, r7       @ sum1 += coefficients[1] * data_out[i - 1];
+
+  ldr r9, [r0], #4             @ data_in[i], data_in[i + 1]
+  smulbb r6, r10, r9           @ output1 = coefficients[0] * data_in[i];
+  sub r6, r7                   @ output1 -= sum1;
+
+  sbfx r11, r6, #12, #16
+  ssat r7, #16, r6, asr #12
+  cmp r7, r11
+  addeq r6, r6, #2048
+  ssat r6, #16, r6, asr #12
+  strh r6, [r1], #2            @ Store data_out[i]
+
+  smlatb r8, r10, r6, r8       @ sum2 += coefficients[1] * data_out[i];
+  smulbt r6, r10, r9           @ output2 = coefficients[0] * data_in[i + 1];
+  sub r6, r8                   @ output1 -= sum1;
+
+  sbfx r11, r6, #12, #16
+  ssat r7, #16, r6, asr #12
+  cmp r7, r11
+  addeq r6, r6, #2048
+  ssat r6, #16, r6, asr #12
+  strh r6, [r1], #2            @ Store data_out[i + 1]
+
+  subs r4, #2
+  bgt LOOP_LENGTH
+  blt END                      @ For even data_length, it's done. Jump to END.
+
+@ Process i = data_length -1, for the case of an odd length.
+ODD_LENGTH:
+  add r12, r2, r3, lsl #1
+  sub r12, #4                  @ &coefficients[coefficients_length - 2]
+  sub r9, r1, r3, lsl #1
+  add r9, #2                   @ &data_out[i - coefficients_length + 1]
+  mov r7, #0                   @ sum1
+  mov r8, #0                   @ sum1
+  subs r6, r3, #2              @ inner loop counter
+  beq EVEN_A_LENGTH            @ branch if coefficients_length == 2
+
+LOOP2_A_LENGTH:
+  ldr r10, [r12], #-4          @ coefficients[j - 1], coefficients[j]
+  ldr r5, [r9], #4             @ data_out[i - j],  data_out[i - j + 1]
+  subs r6, #2
+  smlatb r7, r10, r5, r7       @ sum1 += coefficients[j] * data_out[i - j];
+  smlabt r8, r10, r5, r8       @ coefficients[j - 1] * data_out[i - j + 1];
+  bgt LOOP2_A_LENGTH
+  addlt r12, #2
+  blt POST_LOOP2_A_LENGTH
+
+EVEN_A_LENGTH:
+  ldrsh r10, [r12, #2]         @ Filter coefficients coefficients[1]
+  ldrsh r5, [r9]               @ data_out[i - 1]
+  smlabb r7, r10, r5, r7       @ sum1 += coefficients[1] * data_out[i - 1];
+
+POST_LOOP2_A_LENGTH:
+  ldrsh r10, [r12]             @ Filter coefficients coefficients[0]
+  ldrsh r9, [r0]               @ data_in[i]
+  smulbb r6, r10, r9           @ output1 = coefficients[0] * data_in[i];
+  sub r6, r7                   @ output1 -= sum1;
+  sub r6, r8                   @ output1 -= sum1;
+  sbfx r8, r6, #12, #16
+  ssat r7, #16, r6, asr #12
+  cmp r7, r8
+  addeq r6, r6, #2048
+  ssat r6, #16, r6, asr #12
+  strh r6, [r1]                @ Store the data_out[i]
+
+END:
+  pop {r4-r11}
+  bx  lr
+
+@Reference C code:
+@
+@void WebRtcSpl_FilterARFastQ12(int16_t* data_in,
+@                               int16_t* data_out,
+@                               int16_t* __restrict coefficients,
+@                               size_t coefficients_length,
+@                               size_t data_length) {
+@  size_t i = 0;
+@  size_t j = 0;
+@
+@  assert(data_length > 0);
+@  assert(coefficients_length > 1);
+@
+@  for (i = 0; i < data_length - 1; i += 2) {
+@    int32_t output1 = 0;
+@    int32_t sum1 = 0;
+@    int32_t output2 = 0;
+@    int32_t sum2 = 0;
+@
+@    for (j = coefficients_length - 1; j > 2; j -= 2) {
+@      sum1 += coefficients[j]      * data_out[i - j];
+@      sum1 += coefficients[j - 1]  * data_out[i - j + 1];
+@      sum2 += coefficients[j]     * data_out[i - j + 1];
+@      sum2 += coefficients[j - 1] * data_out[i - j + 2];
+@    }
+@
+@    if (j == 2) {
+@      sum1 += coefficients[2] * data_out[i - 2];
+@      sum2 += coefficients[2] * data_out[i - 1];
+@    }
+@
+@    sum1 += coefficients[1] * data_out[i - 1];
+@    output1 = coefficients[0] * data_in[i];
+@    output1 -= sum1;
+@    // Saturate and store the output.
+@    output1 = WEBRTC_SPL_SAT(134215679, output1, -134217728);
+@    data_out[i] = (int16_t)((output1 + 2048) >> 12);
+@
+@    sum2 += coefficients[1] * data_out[i];
+@    output2 = coefficients[0] * data_in[i + 1];
+@    output2 -= sum2;
+@    // Saturate and store the output.
+@    output2 = WEBRTC_SPL_SAT(134215679, output2, -134217728);
+@    data_out[i + 1] = (int16_t)((output2 + 2048) >> 12);
+@  }
+@
+@  if (i == data_length - 1) {
+@    int32_t output1 = 0;
+@    int32_t sum1 = 0;
+@
+@    for (j = coefficients_length - 1; j > 1; j -= 2) {
+@      sum1 += coefficients[j]      * data_out[i - j];
+@      sum1 += coefficients[j - 1]  * data_out[i - j + 1];
+@    }
+@
+@    if (j == 1) {
+@      sum1 += coefficients[1] * data_out[i - 1];
+@    }
+@
+@    output1 = coefficients[0] * data_in[i];
+@    output1 -= sum1;
+@    // Saturate and store the output.
+@    output1 = WEBRTC_SPL_SAT(134215679, output1, -134217728);
+@    data_out[i] = (int16_t)((output1 + 2048) >> 12);
+@  }
+@}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12_mips.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12_mips.c
new file mode 100644
index 00000000..03847018
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12_mips.c
@@ -0,0 +1,140 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include <assert.h>
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+void WebRtcSpl_FilterARFastQ12(const int16_t* data_in,
+                               int16_t* data_out,
+                               const int16_t* __restrict coefficients,
+                               size_t coefficients_length,
+                               size_t data_length) {
+  int r0, r1, r2, r3;
+  int coef0, offset;
+  int i, j, k;
+  int coefptr, outptr, tmpout, inptr;
+#if !defined(MIPS_DSP_R1_LE)
+  int max16 = 0x7FFF;
+  int min16 = 0xFFFF8000;
+#endif  // #if !defined(MIPS_DSP_R1_LE)
+
+  assert(data_length > 0);
+  assert(coefficients_length > 1);
+
+  __asm __volatile (
+    ".set       push                                             \n\t"
+    ".set       noreorder                                        \n\t"
+    "addiu      %[i],       %[data_length],          0           \n\t"
+    "lh         %[coef0],   0(%[coefficients])                   \n\t"
+    "addiu      %[j],       %[coefficients_length],  -1          \n\t"
+    "andi       %[k],       %[j],                    1           \n\t"
+    "sll        %[offset],  %[j],                    1           \n\t"
+    "subu       %[outptr],  %[data_out],             %[offset]   \n\t"
+    "addiu      %[inptr],   %[data_in],              0           \n\t"
+    "bgtz       %[k],       3f                                   \n\t"
+    " addu      %[coefptr], %[coefficients],         %[offset]   \n\t"
+   "1:                                                           \n\t"
+    "lh         %[r0],      0(%[inptr])                          \n\t"
+    "addiu      %[i],       %[i],                    -1          \n\t"
+    "addiu      %[tmpout],  %[outptr],               0           \n\t"
+    "mult       %[r0],      %[coef0]                             \n\t"
+   "2:                                                           \n\t"
+    "lh         %[r0],      0(%[tmpout])                         \n\t"
+    "lh         %[r1],      0(%[coefptr])                        \n\t"
+    "lh         %[r2],      2(%[tmpout])                         \n\t"
+    "lh         %[r3],      -2(%[coefptr])                       \n\t"
+    "addiu      %[tmpout],  %[tmpout],               4           \n\t"
+    "msub       %[r0],      %[r1]                                \n\t"
+    "msub       %[r2],      %[r3]                                \n\t"
+    "addiu      %[j],       %[j],                    -2          \n\t"
+    "bgtz       %[j],       2b                                   \n\t"
+    " addiu     %[coefptr], %[coefptr],              -4          \n\t"
+#if defined(MIPS_DSP_R1_LE)
+    "extr_r.w   %[r0],      $ac0,                    12          \n\t"
+#else  // #if defined(MIPS_DSP_R1_LE)
+    "mflo       %[r0]                                            \n\t"
+#endif  // #if defined(MIPS_DSP_R1_LE)
+    "addu       %[coefptr], %[coefficients],         %[offset]   \n\t"
+    "addiu      %[inptr],   %[inptr],                2           \n\t"
+    "addiu      %[j],       %[coefficients_length],  -1          \n\t"
+#if defined(MIPS_DSP_R1_LE)
+    "shll_s.w   %[r0],      %[r0],                   16          \n\t"
+    "sra        %[r0],      %[r0],                   16          \n\t"
+#else  // #if defined(MIPS_DSP_R1_LE)
+    "addiu      %[r0],      %[r0],                   2048        \n\t"
+    "sra        %[r0],      %[r0],                   12          \n\t"
+    "slt        %[r1],      %[max16],                %[r0]       \n\t"
+    "movn       %[r0],      %[max16],                %[r1]       \n\t"
+    "slt        %[r1],      %[r0],                   %[min16]    \n\t"
+    "movn       %[r0],      %[min16],                %[r1]       \n\t"
+#endif  // #if defined(MIPS_DSP_R1_LE)
+    "sh         %[r0],      0(%[tmpout])                         \n\t"
+    "bgtz       %[i],       1b                                   \n\t"
+    " addiu     %[outptr],  %[outptr],               2           \n\t"
+    "b          5f                                               \n\t"
+    " nop                                                        \n\t"
+   "3:                                                           \n\t"
+    "lh         %[r0],      0(%[inptr])                          \n\t"
+    "addiu      %[i],       %[i],                    -1          \n\t"
+    "addiu      %[tmpout],  %[outptr],               0           \n\t"
+    "mult       %[r0],      %[coef0]                             \n\t"
+   "4:                                                           \n\t"
+    "lh         %[r0],      0(%[tmpout])                         \n\t"
+    "lh         %[r1],      0(%[coefptr])                        \n\t"
+    "lh         %[r2],      2(%[tmpout])                         \n\t"
+    "lh         %[r3],      -2(%[coefptr])                       \n\t"
+    "addiu      %[tmpout],  %[tmpout],               4           \n\t"
+    "msub       %[r0],      %[r1]                                \n\t"
+    "msub       %[r2],      %[r3]                                \n\t"
+    "addiu      %[j],       %[j],                    -2          \n\t"
+    "bgtz       %[j],       4b                                   \n\t"
+    " addiu     %[coefptr], %[coefptr],              -4          \n\t"
+    "lh         %[r0],      0(%[tmpout])                         \n\t"
+    "lh         %[r1],      0(%[coefptr])                        \n\t"
+    "msub       %[r0],      %[r1]                                \n\t"
+#if defined(MIPS_DSP_R1_LE)
+    "extr_r.w   %[r0],      $ac0,                    12          \n\t"
+#else  // #if defined(MIPS_DSP_R1_LE)
+    "mflo       %[r0]                                            \n\t"
+#endif  // #if defined(MIPS_DSP_R1_LE)
+    "addu       %[coefptr], %[coefficients],         %[offset]   \n\t"
+    "addiu      %[inptr],   %[inptr],                2           \n\t"
+    "addiu      %[j],       %[coefficients_length],  -1          \n\t"
+#if defined(MIPS_DSP_R1_LE)
+    "shll_s.w   %[r0],      %[r0],                   16          \n\t"
+    "sra        %[r0],      %[r0],                   16          \n\t"
+#else  // #if defined(MIPS_DSP_R1_LE)
+    "addiu      %[r0],      %[r0],                   2048        \n\t"
+    "sra        %[r0],      %[r0],                   12          \n\t"
+    "slt        %[r1],      %[max16],                %[r0]       \n\t"
+    "movn       %[r0],      %[max16],                %[r1]       \n\t"
+    "slt        %[r1],      %[r0],                   %[min16]    \n\t"
+    "movn       %[r0],      %[min16],                %[r1]       \n\t"
+#endif  // #if defined(MIPS_DSP_R1_LE)
+    "sh         %[r0],      2(%[tmpout])                         \n\t"
+    "bgtz       %[i],       3b                                   \n\t"
+    " addiu     %[outptr],  %[outptr],               2           \n\t"
+   "5:                                                           \n\t"
+    ".set       pop                                              \n\t"
+    : [i] "=&r" (i), [j] "=&r" (j), [k] "=&r" (k), [r0] "=&r" (r0),
+      [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
+      [coef0] "=&r" (coef0), [offset] "=&r" (offset),
+      [outptr] "=&r" (outptr), [inptr] "=&r" (inptr),
+      [coefptr] "=&r" (coefptr), [tmpout] "=&r" (tmpout)
+    : [coefficients] "r" (coefficients), [data_length] "r" (data_length),
+      [coefficients_length] "r" (coefficients_length),
+#if !defined(MIPS_DSP_R1_LE)
+      [max16] "r" (max16), [min16] "r" (min16),
+#endif
+      [data_out] "r" (data_out), [data_in] "r" (data_in)
+    : "hi", "lo", "memory"
+  );
+}
+
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ma_fast_q12.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ma_fast_q12.c
new file mode 100644
index 00000000..f4d9a3d3
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ma_fast_q12.c
@@ -0,0 +1,45 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_FilterMAFastQ12().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+void WebRtcSpl_FilterMAFastQ12(const int16_t* in_ptr,
+                               int16_t* out_ptr,
+                               const int16_t* B,
+                               size_t B_length,
+                               size_t length)
+{
+    size_t i, j;
+    for (i = 0; i < length; i++)
+    {
+        int32_t o = 0;
+
+        for (j = 0; j < B_length; j++)
+        {
+          o += B[j] * in_ptr[i - j];
+        }
+
+        // If output is higher than 32768, saturate it. Same with negative side
+        // 2^27 = 134217728, which corresponds to 32768 in Q12
+
+        // Saturate the output
+        o = WEBRTC_SPL_SAT((int32_t)134215679, o, (int32_t)-134217728);
+
+        *out_ptr++ = (int16_t)((o + (int32_t)2048) >> 12);
+    }
+    return;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/get_hanning_window.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/get_hanning_window.c
new file mode 100644
index 00000000..d83ac216
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/get_hanning_window.c
@@ -0,0 +1,77 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_GetHanningWindow().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+// Hanning table with 256 entries
+static const int16_t kHanningTable[] = {
+    1,      2,      6,     10,     15,     22,     30,     39,
+   50,     62,     75,     89,    104,    121,    138,    157,
+  178,    199,    222,    246,    271,    297,    324,    353,
+  383,    413,    446,    479,    513,    549,    586,    624,
+  663,    703,    744,    787,    830,    875,    920,    967,
+ 1015,   1064,   1114,   1165,   1218,   1271,   1325,   1381,
+ 1437,   1494,   1553,   1612,   1673,   1734,   1796,   1859,
+ 1924,   1989,   2055,   2122,   2190,   2259,   2329,   2399,
+ 2471,   2543,   2617,   2691,   2765,   2841,   2918,   2995,
+ 3073,   3152,   3232,   3312,   3393,   3475,   3558,   3641,
+ 3725,   3809,   3895,   3980,   4067,   4154,   4242,   4330,
+ 4419,   4509,   4599,   4689,   4781,   4872,   4964,   5057,
+ 5150,   5244,   5338,   5432,   5527,   5622,   5718,   5814,
+ 5910,   6007,   6104,   6202,   6299,   6397,   6495,   6594,
+ 6693,   6791,   6891,   6990,   7090,   7189,   7289,   7389,
+ 7489,   7589,   7690,   7790,   7890,   7991,   8091,   8192,
+ 8293,   8393,   8494,   8594,   8694,   8795,   8895,   8995,
+ 9095,   9195,   9294,   9394,   9493,   9593,   9691,   9790,
+ 9889,   9987,  10085,  10182,  10280,  10377,  10474,  10570,
+10666,  10762,  10857,  10952,  11046,  11140,  11234,  11327,
+11420,  11512,  11603,  11695,  11785,  11875,  11965,  12054,
+12142,  12230,  12317,  12404,  12489,  12575,  12659,  12743,
+12826,  12909,  12991,  13072,  13152,  13232,  13311,  13389,
+13466,  13543,  13619,  13693,  13767,  13841,  13913,  13985,
+14055,  14125,  14194,  14262,  14329,  14395,  14460,  14525,
+14588,  14650,  14711,  14772,  14831,  14890,  14947,  15003,
+15059,  15113,  15166,  15219,  15270,  15320,  15369,  15417,
+15464,  15509,  15554,  15597,  15640,  15681,  15721,  15760,
+15798,  15835,  15871,  15905,  15938,  15971,  16001,  16031,
+16060,  16087,  16113,  16138,  16162,  16185,  16206,  16227,
+16246,  16263,  16280,  16295,  16309,  16322,  16334,  16345,
+16354,  16362,  16369,  16374,  16378,  16382,  16383,  16384
+};
+
+void WebRtcSpl_GetHanningWindow(int16_t *v, size_t size)
+{
+    size_t jj;
+    int16_t *vptr1;
+
+    int32_t index;
+    int32_t factor = ((int32_t)0x40000000);
+
+    factor = WebRtcSpl_DivW32W16(factor, (int16_t)size);
+    if (size < 513)
+        index = (int32_t)-0x200000;
+    else
+        index = (int32_t)-0x100000;
+    vptr1 = v;
+
+    for (jj = 0; jj < size; jj++)
+    {
+        index += factor;
+        (*vptr1++) = kHanningTable[index >> 22];
+    }
+
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/get_scaling_square.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/get_scaling_square.c
new file mode 100644
index 00000000..82e3c8b0
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/get_scaling_square.c
@@ -0,0 +1,46 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_GetScalingSquare().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+int16_t WebRtcSpl_GetScalingSquare(int16_t* in_vector,
+                                   size_t in_vector_length,
+                                   size_t times)
+{
+    int16_t nbits = WebRtcSpl_GetSizeInBits((uint32_t)times);
+    size_t i;
+    int16_t smax = -1;
+    int16_t sabs;
+    int16_t *sptr = in_vector;
+    int16_t t;
+    size_t looptimes = in_vector_length;
+
+    for (i = looptimes; i > 0; i--)
+    {
+        sabs = (*sptr > 0 ? *sptr++ : -*sptr++);
+        smax = (sabs > smax ? sabs : smax);
+    }
+    t = WebRtcSpl_NormW32(WEBRTC_SPL_MUL(smax, smax));
+
+    if (smax == 0)
+    {
+        return 0; // Since norm(0) returns 0
+    } else
+    {
+        return (t > nbits) ? 0 : nbits - t;
+    }
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/ilbc_specific_functions.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/ilbc_specific_functions.c
new file mode 100644
index 00000000..301a922d
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/ilbc_specific_functions.c
@@ -0,0 +1,90 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains implementations of the iLBC specific functions
+ * WebRtcSpl_ReverseOrderMultArrayElements()
+ * WebRtcSpl_ElementwiseVectorMult()
+ * WebRtcSpl_AddVectorsAndShift()
+ * WebRtcSpl_AddAffineVectorToVector()
+ * WebRtcSpl_AffineTransformVector()
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+void WebRtcSpl_ReverseOrderMultArrayElements(int16_t *out, const int16_t *in,
+                                             const int16_t *win,
+                                             size_t vector_length,
+                                             int16_t right_shifts)
+{
+    size_t i;
+    int16_t *outptr = out;
+    const int16_t *inptr = in;
+    const int16_t *winptr = win;
+    for (i = 0; i < vector_length; i++)
+    {
+      *outptr++ = (int16_t)((*inptr++ * *winptr--) >> right_shifts);
+    }
+}
+
+void WebRtcSpl_ElementwiseVectorMult(int16_t *out, const int16_t *in,
+                                     const int16_t *win, size_t vector_length,
+                                     int16_t right_shifts)
+{
+    size_t i;
+    int16_t *outptr = out;
+    const int16_t *inptr = in;
+    const int16_t *winptr = win;
+    for (i = 0; i < vector_length; i++)
+    {
+      *outptr++ = (int16_t)((*inptr++ * *winptr++) >> right_shifts);
+    }
+}
+
+void WebRtcSpl_AddVectorsAndShift(int16_t *out, const int16_t *in1,
+                                  const int16_t *in2, size_t vector_length,
+                                  int16_t right_shifts)
+{
+    size_t i;
+    int16_t *outptr = out;
+    const int16_t *in1ptr = in1;
+    const int16_t *in2ptr = in2;
+    for (i = vector_length; i > 0; i--)
+    {
+        (*outptr++) = (int16_t)(((*in1ptr++) + (*in2ptr++)) >> right_shifts);
+    }
+}
+
+void WebRtcSpl_AddAffineVectorToVector(int16_t *out, int16_t *in,
+                                       int16_t gain, int32_t add_constant,
+                                       int16_t right_shifts,
+                                       size_t vector_length)
+{
+    size_t i;
+
+    for (i = 0; i < vector_length; i++)
+    {
+      out[i] += (int16_t)((in[i] * gain + add_constant) >> right_shifts);
+    }
+}
+
+void WebRtcSpl_AffineTransformVector(int16_t *out, int16_t *in,
+                                     int16_t gain, int32_t add_constant,
+                                     int16_t right_shifts, size_t vector_length)
+{
+    size_t i;
+
+    for (i = 0; i < vector_length; i++)
+    {
+      out[i] = (int16_t)((in[i] * gain + add_constant) >> right_shifts);
+    }
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/real_fft.h b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/real_fft.h
new file mode 100644
index 00000000..e7942f04
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/real_fft.h
@@ -0,0 +1,97 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_
+#define WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_
+
+#include "webrtc/typedefs.h"
+
+// For ComplexFFT(), the maximum fft order is 10;
+// for OpenMax FFT in ARM, it is 12;
+// WebRTC APM uses orders of only 7 and 8.
+enum {kMaxFFTOrder = 10};
+
+struct RealFFT;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct RealFFT* WebRtcSpl_CreateRealFFT(int order);
+void WebRtcSpl_FreeRealFFT(struct RealFFT* self);
+
+// Compute an FFT for a real-valued signal of length of 2^order,
+// where 1 < order <= MAX_FFT_ORDER. Transform length is determined by the
+// specification structure, which must be initialized prior to calling the FFT
+// function with WebRtcSpl_CreateRealFFT().
+// The relationship between the input and output sequences can
+// be expressed in terms of the DFT, i.e.:
+//     x[n] = (2^(-scalefactor)/N)  . SUM[k=0,...,N-1] X[k].e^(jnk.2.pi/N)
+//     n=0,1,2,...N-1
+//     N=2^order.
+// The conjugate-symmetric output sequence is represented using a CCS vector,
+// which is of length N+2, and is organized as follows:
+//     Index:      0  1  2  3  4  5   . . .   N-2       N-1       N       N+1
+//     Component:  R0 0  R1 I1 R2 I2  . . .   R[N/2-1]  I[N/2-1]  R[N/2]  0
+// where R[n] and I[n], respectively, denote the real and imaginary components
+// for FFT bin 'n'. Bins  are numbered from 0 to N/2, where N is the FFT length.
+// Bin index 0 corresponds to the DC component, and bin index N/2 corresponds to
+// the foldover frequency.
+//
+// Input Arguments:
+//   self - pointer to preallocated and initialized FFT specification structure.
+//   real_data_in - the input signal. For an ARM Neon platform, it must be
+//                  aligned on a 32-byte boundary.
+//
+// Output Arguments:
+//   complex_data_out - the output complex signal with (2^order + 2) 16-bit
+//                      elements. For an ARM Neon platform, it must be different
+//                      from real_data_in, and aligned on a 32-byte boundary.
+//
+// Return Value:
+//   0  - FFT calculation is successful.
+//   -1 - Error with bad arguments (NULL pointers).
+int WebRtcSpl_RealForwardFFT(struct RealFFT* self,
+                             const int16_t* real_data_in,
+                             int16_t* complex_data_out);
+
+// Compute the inverse FFT for a conjugate-symmetric input sequence of length of
+// 2^order, where 1 < order <= MAX_FFT_ORDER. Transform length is determined by
+// the specification structure, which must be initialized prior to calling the
+// FFT function with WebRtcSpl_CreateRealFFT().
+// For a transform of length M, the input sequence is represented using a packed
+// CCS vector of length M+2, which is explained in the comments for
+// WebRtcSpl_RealForwardFFTC above.
+//
+// Input Arguments:
+//   self - pointer to preallocated and initialized FFT specification structure.
+//   complex_data_in - the input complex signal with (2^order + 2) 16-bit
+//                     elements. For an ARM Neon platform, it must be aligned on
+//                     a 32-byte boundary.
+//
+// Output Arguments:
+//   real_data_out - the output real signal. For an ARM Neon platform, it must
+//                   be different to complex_data_in, and aligned on a 32-byte
+//                   boundary.
+//
+// Return Value:
+//   0 or a positive number - a value that the elements in the |real_data_out|
+//                            should be shifted left with in order to get
+//                            correct physical values.
+//   -1 - Error with bad arguments (NULL pointers).
+int WebRtcSpl_RealInverseFFT(struct RealFFT* self,
+                             const int16_t* complex_data_in,
+                             int16_t* real_data_out);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/signal_processing_library.h b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/signal_processing_library.h
new file mode 100644
index 00000000..2e96883e
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/signal_processing_library.h
@@ -0,0 +1,1645 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This header file includes all of the fix point signal processing library (SPL) function
+ * descriptions and declarations.
+ * For specific function calls, see bottom of file.
+ */
+
+#ifndef WEBRTC_SPL_SIGNAL_PROCESSING_LIBRARY_H_
+#define WEBRTC_SPL_SIGNAL_PROCESSING_LIBRARY_H_
+
+#include <string.h>
+#include "webrtc/typedefs.h"
+
+// Macros specific for the fixed point implementation
+#define WEBRTC_SPL_WORD16_MAX       32767
+#define WEBRTC_SPL_WORD16_MIN       -32768
+#define WEBRTC_SPL_WORD32_MAX       (int32_t)0x7fffffff
+#define WEBRTC_SPL_WORD32_MIN       (int32_t)0x80000000
+#define WEBRTC_SPL_MAX_LPC_ORDER    14
+#define WEBRTC_SPL_MIN(A, B)        (A < B ? A : B)  // Get min value
+#define WEBRTC_SPL_MAX(A, B)        (A > B ? A : B)  // Get max value
+// TODO(kma/bjorn): For the next two macros, investigate how to correct the code
+// for inputs of a = WEBRTC_SPL_WORD16_MIN or WEBRTC_SPL_WORD32_MIN.
+#define WEBRTC_SPL_ABS_W16(a) \
+    (((int16_t)a >= 0) ? ((int16_t)a) : -((int16_t)a))
+#define WEBRTC_SPL_ABS_W32(a) \
+    (((int32_t)a >= 0) ? ((int32_t)a) : -((int32_t)a))
+
+#define WEBRTC_SPL_MUL(a, b) \
+    ((int32_t) ((int32_t)(a) * (int32_t)(b)))
+#define WEBRTC_SPL_UMUL(a, b) \
+    ((uint32_t) ((uint32_t)(a) * (uint32_t)(b)))
+#define WEBRTC_SPL_UMUL_32_16(a, b) \
+    ((uint32_t) ((uint32_t)(a) * (uint16_t)(b)))
+#define WEBRTC_SPL_MUL_16_U16(a, b) \
+    ((int32_t)(int16_t)(a) * (uint16_t)(b))
+
+#ifndef WEBRTC_ARCH_ARM_V7
+// For ARMv7 platforms, these are inline functions in spl_inl_armv7.h
+#ifndef MIPS32_LE
+// For MIPS platforms, these are inline functions in spl_inl_mips.h
+#define WEBRTC_SPL_MUL_16_16(a, b) \
+    ((int32_t) (((int16_t)(a)) * ((int16_t)(b))))
+#define WEBRTC_SPL_MUL_16_32_RSFT16(a, b) \
+    (WEBRTC_SPL_MUL_16_16(a, b >> 16) \
+     + ((WEBRTC_SPL_MUL_16_16(a, (b & 0xffff) >> 1) + 0x4000) >> 15))
+#endif
+#endif
+
+#define WEBRTC_SPL_MUL_16_32_RSFT11(a, b) \
+    ((WEBRTC_SPL_MUL_16_16(a, (b) >> 16) << 5) \
+    + (((WEBRTC_SPL_MUL_16_U16(a, (uint16_t)(b)) >> 1) + 0x0200) >> 10))
+#define WEBRTC_SPL_MUL_16_32_RSFT14(a, b) \
+    ((WEBRTC_SPL_MUL_16_16(a, (b) >> 16) << 2) \
+    + (((WEBRTC_SPL_MUL_16_U16(a, (uint16_t)(b)) >> 1) + 0x1000) >> 13))
+#define WEBRTC_SPL_MUL_16_32_RSFT15(a, b) \
+    ((WEBRTC_SPL_MUL_16_16(a, (b) >> 16) << 1) \
+    + (((WEBRTC_SPL_MUL_16_U16(a, (uint16_t)(b)) >> 1) + 0x2000) >> 14))
+
+#define WEBRTC_SPL_MUL_16_16_RSFT(a, b, c) \
+    (WEBRTC_SPL_MUL_16_16(a, b) >> (c))
+
+#define WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(a, b, c) \
+    ((WEBRTC_SPL_MUL_16_16(a, b) + ((int32_t) \
+                                  (((int32_t)1) << ((c) - 1)))) >> (c))
+
+// C + the 32 most significant bits of A * B
+#define WEBRTC_SPL_SCALEDIFF32(A, B, C) \
+    (C + (B >> 16) * A + (((uint32_t)(0x0000FFFF & B) * A) >> 16))
+
+#define WEBRTC_SPL_SAT(a, b, c)         (b > a ? a : b < c ? c : b)
+
+// Shifting with negative numbers allowed
+// Positive means left shift
+#define WEBRTC_SPL_SHIFT_W32(x, c) \
+    (((c) >= 0) ? ((x) << (c)) : ((x) >> (-(c))))
+
+// Shifting with negative numbers not allowed
+// We cannot do casting here due to signed/unsigned problem
+#define WEBRTC_SPL_LSHIFT_W32(x, c)     ((x) << (c))
+
+#define WEBRTC_SPL_RSHIFT_U32(x, c)     ((uint32_t)(x) >> (c))
+
+#define WEBRTC_SPL_RAND(a) \
+    ((int16_t)((((int16_t)a * 18816) >> 7) & 0x00007fff))
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define WEBRTC_SPL_MEMCPY_W16(v1, v2, length) \
+  memcpy(v1, v2, (length) * sizeof(int16_t))
+
+// inline functions:
+#include "webrtc/common_audio/signal_processing/include/spl_inl.h"
+
+// Initialize SPL. Currently it contains only function pointer initialization.
+// If the underlying platform is known to be ARM-Neon (WEBRTC_HAS_NEON defined),
+// the pointers will be assigned to code optimized for Neon; otherwise
+// if run-time Neon detection (WEBRTC_DETECT_NEON) is enabled, the pointers
+// will be assigned to either Neon code or generic C code; otherwise, generic C
+// code will be assigned.
+// Note that this function MUST be called in any application that uses SPL
+// functions.
+void WebRtcSpl_Init();
+
+int16_t WebRtcSpl_GetScalingSquare(int16_t* in_vector,
+                                   size_t in_vector_length,
+                                   size_t times);
+
+// Copy and set operations. Implementation in copy_set_operations.c.
+// Descriptions at bottom of file.
+void WebRtcSpl_MemSetW16(int16_t* vector,
+                         int16_t set_value,
+                         size_t vector_length);
+void WebRtcSpl_MemSetW32(int32_t* vector,
+                         int32_t set_value,
+                         size_t vector_length);
+void WebRtcSpl_MemCpyReversedOrder(int16_t* out_vector,
+                                   int16_t* in_vector,
+                                   size_t vector_length);
+void WebRtcSpl_CopyFromEndW16(const int16_t* in_vector,
+                              size_t in_vector_length,
+                              size_t samples,
+                              int16_t* out_vector);
+void WebRtcSpl_ZerosArrayW16(int16_t* vector,
+                             size_t vector_length);
+void WebRtcSpl_ZerosArrayW32(int32_t* vector,
+                             size_t vector_length);
+// End: Copy and set operations.
+
+
+// Minimum and maximum operation functions and their pointers.
+// Implementation in min_max_operations.c.
+
+// Returns the largest absolute value in a signed 16-bit vector.
+//
+// Input:
+//      - vector : 16-bit input vector.
+//      - length : Number of samples in vector.
+//
+// Return value  : Maximum absolute value in vector.
+typedef int16_t (*MaxAbsValueW16)(const int16_t* vector, size_t length);
+extern MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16;
+int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t* vector, size_t length);
+#if (defined WEBRTC_DETECT_NEON) || (defined WEBRTC_HAS_NEON)
+int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, size_t length);
+#endif
+#if defined(MIPS32_LE)
+int16_t WebRtcSpl_MaxAbsValueW16_mips(const int16_t* vector, size_t length);
+#endif
+
+// Returns the largest absolute value in a signed 32-bit vector.
+//
+// Input:
+//      - vector : 32-bit input vector.
+//      - length : Number of samples in vector.
+//
+// Return value  : Maximum absolute value in vector.
+typedef int32_t (*MaxAbsValueW32)(const int32_t* vector, size_t length);
+extern MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32;
+int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t* vector, size_t length);
+#if (defined WEBRTC_DETECT_NEON) || (defined WEBRTC_HAS_NEON)
+int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, size_t length);
+#endif
+#if defined(MIPS_DSP_R1_LE)
+int32_t WebRtcSpl_MaxAbsValueW32_mips(const int32_t* vector, size_t length);
+#endif
+
+// Returns the maximum value of a 16-bit vector.
+//
+// Input:
+//      - vector : 16-bit input vector.
+//      - length : Number of samples in vector.
+//
+// Return value  : Maximum sample value in |vector|.
+typedef int16_t (*MaxValueW16)(const int16_t* vector, size_t length);
+extern MaxValueW16 WebRtcSpl_MaxValueW16;
+int16_t WebRtcSpl_MaxValueW16C(const int16_t* vector, size_t length);
+#if (defined WEBRTC_DETECT_NEON) || (defined WEBRTC_HAS_NEON)
+int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, size_t length);
+#endif
+#if defined(MIPS32_LE)
+int16_t WebRtcSpl_MaxValueW16_mips(const int16_t* vector, size_t length);
+#endif
+
+// Returns the maximum value of a 32-bit vector.
+//
+// Input:
+//      - vector : 32-bit input vector.
+//      - length : Number of samples in vector.
+//
+// Return value  : Maximum sample value in |vector|.
+typedef int32_t (*MaxValueW32)(const int32_t* vector, size_t length);
+extern MaxValueW32 WebRtcSpl_MaxValueW32;
+int32_t WebRtcSpl_MaxValueW32C(const int32_t* vector, size_t length);
+#if (defined WEBRTC_DETECT_NEON) || (defined WEBRTC_HAS_NEON)
+int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, size_t length);
+#endif
+#if defined(MIPS32_LE)
+int32_t WebRtcSpl_MaxValueW32_mips(const int32_t* vector, size_t length);
+#endif
+
+// Returns the minimum value of a 16-bit vector.
+//
+// Input:
+//      - vector : 16-bit input vector.
+//      - length : Number of samples in vector.
+//
+// Return value  : Minimum sample value in |vector|.
+typedef int16_t (*MinValueW16)(const int16_t* vector, size_t length);
+extern MinValueW16 WebRtcSpl_MinValueW16;
+int16_t WebRtcSpl_MinValueW16C(const int16_t* vector, size_t length);
+#if (defined WEBRTC_DETECT_NEON) || (defined WEBRTC_HAS_NEON)
+int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, size_t length);
+#endif
+#if defined(MIPS32_LE)
+int16_t WebRtcSpl_MinValueW16_mips(const int16_t* vector, size_t length);
+#endif
+
+// Returns the minimum value of a 32-bit vector.
+//
+// Input:
+//      - vector : 32-bit input vector.
+//      - length : Number of samples in vector.
+//
+// Return value  : Minimum sample value in |vector|.
+typedef int32_t (*MinValueW32)(const int32_t* vector, size_t length);
+extern MinValueW32 WebRtcSpl_MinValueW32;
+int32_t WebRtcSpl_MinValueW32C(const int32_t* vector, size_t length);
+#if (defined WEBRTC_DETECT_NEON) || (defined WEBRTC_HAS_NEON)
+int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, size_t length);
+#endif
+#if defined(MIPS32_LE)
+int32_t WebRtcSpl_MinValueW32_mips(const int32_t* vector, size_t length);
+#endif
+
+// Returns the vector index to the largest absolute value of a 16-bit vector.
+//
+// Input:
+//      - vector : 16-bit input vector.
+//      - length : Number of samples in vector.
+//
+// Return value  : Index to the maximum absolute value in vector.
+//                 If there are multiple equal maxima, return the index of the
+//                 first. -32768 will always have precedence over 32767 (despite
+//                 -32768 presenting an int16 absolute value of 32767).
+size_t WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, size_t length);
+
+// Returns the vector index to the maximum sample value of a 16-bit vector.
+//
+// Input:
+//      - vector : 16-bit input vector.
+//      - length : Number of samples in vector.
+//
+// Return value  : Index to the maximum value in vector (if multiple
+//                 indexes have the maximum, return the first).
+size_t WebRtcSpl_MaxIndexW16(const int16_t* vector, size_t length);
+
+// Returns the vector index to the maximum sample value of a 32-bit vector.
+//
+// Input:
+//      - vector : 32-bit input vector.
+//      - length : Number of samples in vector.
+//
+// Return value  : Index to the maximum value in vector (if multiple
+//                 indexes have the maximum, return the first).
+size_t WebRtcSpl_MaxIndexW32(const int32_t* vector, size_t length);
+
+// Returns the vector index to the minimum sample value of a 16-bit vector.
+//
+// Input:
+//      - vector : 16-bit input vector.
+//      - length : Number of samples in vector.
+//
+// Return value  : Index to the mimimum value in vector  (if multiple
+//                 indexes have the minimum, return the first).
+size_t WebRtcSpl_MinIndexW16(const int16_t* vector, size_t length);
+
+// Returns the vector index to the minimum sample value of a 32-bit vector.
+//
+// Input:
+//      - vector : 32-bit input vector.
+//      - length : Number of samples in vector.
+//
+// Return value  : Index to the mimimum value in vector  (if multiple
+//                 indexes have the minimum, return the first).
+size_t WebRtcSpl_MinIndexW32(const int32_t* vector, size_t length);
+
+// End: Minimum and maximum operations.
+
+
+// Vector scaling operations. Implementation in vector_scaling_operations.c.
+// Description at bottom of file.
+void WebRtcSpl_VectorBitShiftW16(int16_t* out_vector,
+                                 size_t vector_length,
+                                 const int16_t* in_vector,
+                                 int16_t right_shifts);
+void WebRtcSpl_VectorBitShiftW32(int32_t* out_vector,
+                                 size_t vector_length,
+                                 const int32_t* in_vector,
+                                 int16_t right_shifts);
+void WebRtcSpl_VectorBitShiftW32ToW16(int16_t* out_vector,
+                                      size_t vector_length,
+                                      const int32_t* in_vector,
+                                      int right_shifts);
+void WebRtcSpl_ScaleVector(const int16_t* in_vector,
+                           int16_t* out_vector,
+                           int16_t gain,
+                           size_t vector_length,
+                           int16_t right_shifts);
+void WebRtcSpl_ScaleVectorWithSat(const int16_t* in_vector,
+                                  int16_t* out_vector,
+                                  int16_t gain,
+                                  size_t vector_length,
+                                  int16_t right_shifts);
+void WebRtcSpl_ScaleAndAddVectors(const int16_t* in_vector1,
+                                  int16_t gain1, int right_shifts1,
+                                  const int16_t* in_vector2,
+                                  int16_t gain2, int right_shifts2,
+                                  int16_t* out_vector,
+                                  size_t vector_length);
+
+// The functions (with related pointer) perform the vector operation:
+//   out_vector[k] = ((scale1 * in_vector1[k]) + (scale2 * in_vector2[k])
+//        + round_value) >> right_shifts,
+//   where  round_value = (1 << right_shifts) >> 1.
+//
+// Input:
+//      - in_vector1       : Input vector 1
+//      - in_vector1_scale : Gain to be used for vector 1
+//      - in_vector2       : Input vector 2
+//      - in_vector2_scale : Gain to be used for vector 2
+//      - right_shifts     : Number of right bit shifts to be applied
+//      - length           : Number of elements in the input vectors
+//
+// Output:
+//      - out_vector       : Output vector
+// Return value            : 0 if OK, -1 if (in_vector1 == NULL
+//                           || in_vector2 == NULL || out_vector == NULL
+//                           || length <= 0 || right_shift < 0).
+typedef int (*ScaleAndAddVectorsWithRound)(const int16_t* in_vector1,
+                                           int16_t in_vector1_scale,
+                                           const int16_t* in_vector2,
+                                           int16_t in_vector2_scale,
+                                           int right_shifts,
+                                           int16_t* out_vector,
+                                           size_t length);
+extern ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound;
+int WebRtcSpl_ScaleAndAddVectorsWithRoundC(const int16_t* in_vector1,
+                                           int16_t in_vector1_scale,
+                                           const int16_t* in_vector2,
+                                           int16_t in_vector2_scale,
+                                           int right_shifts,
+                                           int16_t* out_vector,
+                                           size_t length);
+#if defined(MIPS_DSP_R1_LE)
+int WebRtcSpl_ScaleAndAddVectorsWithRound_mips(const int16_t* in_vector1,
+                                               int16_t in_vector1_scale,
+                                               const int16_t* in_vector2,
+                                               int16_t in_vector2_scale,
+                                               int right_shifts,
+                                               int16_t* out_vector,
+                                               size_t length);
+#endif
+// End: Vector scaling operations.
+
+// iLBC specific functions. Implementations in ilbc_specific_functions.c.
+// Description at bottom of file.
+void WebRtcSpl_ReverseOrderMultArrayElements(int16_t* out_vector,
+                                             const int16_t* in_vector,
+                                             const int16_t* window,
+                                             size_t vector_length,
+                                             int16_t right_shifts);
+void WebRtcSpl_ElementwiseVectorMult(int16_t* out_vector,
+                                     const int16_t* in_vector,
+                                     const int16_t* window,
+                                     size_t vector_length,
+                                     int16_t right_shifts);
+void WebRtcSpl_AddVectorsAndShift(int16_t* out_vector,
+                                  const int16_t* in_vector1,
+                                  const int16_t* in_vector2,
+                                  size_t vector_length,
+                                  int16_t right_shifts);
+void WebRtcSpl_AddAffineVectorToVector(int16_t* out_vector,
+                                       int16_t* in_vector,
+                                       int16_t gain,
+                                       int32_t add_constant,
+                                       int16_t right_shifts,
+                                       size_t vector_length);
+void WebRtcSpl_AffineTransformVector(int16_t* out_vector,
+                                     int16_t* in_vector,
+                                     int16_t gain,
+                                     int32_t add_constant,
+                                     int16_t right_shifts,
+                                     size_t vector_length);
+// End: iLBC specific functions.
+
+// Signal processing operations.
+
+// A 32-bit fix-point implementation of auto-correlation computation
+//
+// Input:
+//      - in_vector        : Vector to calculate autocorrelation upon
+//      - in_vector_length : Length (in samples) of |vector|
+//      - order            : The order up to which the autocorrelation should be
+//                           calculated
+//
+// Output:
+//      - result           : auto-correlation values (values should be seen
+//                           relative to each other since the absolute values
+//                           might have been down shifted to avoid overflow)
+//
+//      - scale            : The number of left shifts required to obtain the
+//                           auto-correlation in Q0
+//
+// Return value            : Number of samples in |result|, i.e. (order+1)
+size_t WebRtcSpl_AutoCorrelation(const int16_t* in_vector,
+                                 size_t in_vector_length,
+                                 size_t order,
+                                 int32_t* result,
+                                 int* scale);
+
+// A 32-bit fix-point implementation of the Levinson-Durbin algorithm that
+// does NOT use the 64 bit class
+//
+// Input:
+//      - auto_corr : Vector with autocorrelation values of length >= |order|+1
+//      - order     : The LPC filter order (support up to order 20)
+//
+// Output:
+//      - lpc_coef  : lpc_coef[0..order] LPC coefficients in Q12
+//      - refl_coef : refl_coef[0...order-1]| Reflection coefficients in Q15
+//
+// Return value     : 1 for stable 0 for unstable
+int16_t WebRtcSpl_LevinsonDurbin(const int32_t* auto_corr,
+                                 int16_t* lpc_coef,
+                                 int16_t* refl_coef,
+                                 size_t order);
+
+// Converts reflection coefficients |refl_coef| to LPC coefficients |lpc_coef|.
+// This version is a 16 bit operation.
+//
+// NOTE: The 16 bit refl_coef -> lpc_coef conversion might result in a
+// "slightly unstable" filter (i.e., a pole just outside the unit circle) in
+// "rare" cases even if the reflection coefficients are stable.
+//
+// Input:
+//      - refl_coef : Reflection coefficients in Q15 that should be converted
+//                    to LPC coefficients
+//      - use_order : Number of coefficients in |refl_coef|
+//
+// Output:
+//      - lpc_coef  : LPC coefficients in Q12
+void WebRtcSpl_ReflCoefToLpc(const int16_t* refl_coef,
+                             int use_order,
+                             int16_t* lpc_coef);
+
+// Converts LPC coefficients |lpc_coef| to reflection coefficients |refl_coef|.
+// This version is a 16 bit operation.
+// The conversion is implemented by the step-down algorithm.
+//
+// Input:
+//      - lpc_coef  : LPC coefficients in Q12, that should be converted to
+//                    reflection coefficients
+//      - use_order : Number of coefficients in |lpc_coef|
+//
+// Output:
+//      - refl_coef : Reflection coefficients in Q15.
+void WebRtcSpl_LpcToReflCoef(int16_t* lpc_coef,
+                             int use_order,
+                             int16_t* refl_coef);
+
+// Calculates reflection coefficients (16 bit) from auto-correlation values
+//
+// Input:
+//      - auto_corr : Auto-correlation values
+//      - use_order : Number of coefficients wanted be calculated
+//
+// Output:
+//      - refl_coef : Reflection coefficients in Q15.
+void WebRtcSpl_AutoCorrToReflCoef(const int32_t* auto_corr,
+                                  int use_order,
+                                  int16_t* refl_coef);
+
+// The functions (with related pointer) calculate the cross-correlation between
+// two sequences |seq1| and |seq2|.
+// |seq1| is fixed and |seq2| slides as the pointer is increased with the
+// amount |step_seq2|. Note the arguments should obey the relationship:
+// |dim_seq| - 1 + |step_seq2| * (|dim_cross_correlation| - 1) <
+//      buffer size of |seq2|
+//
+// Input:
+//      - seq1           : First sequence (fixed throughout the correlation)
+//      - seq2           : Second sequence (slides |step_vector2| for each
+//                            new correlation)
+//      - dim_seq        : Number of samples to use in the cross-correlation
+//      - dim_cross_correlation : Number of cross-correlations to calculate (the
+//                            start position for |vector2| is updated for each
+//                            new one)
+//      - right_shifts   : Number of right bit shifts to use. This will
+//                            become the output Q-domain.
+//      - step_seq2      : How many (positive or negative) steps the
+//                            |vector2| pointer should be updated for each new
+//                            cross-correlation value.
+//
+// Output:
+//      - cross_correlation : The cross-correlation in Q(-right_shifts)
+typedef void (*CrossCorrelation)(int32_t* cross_correlation,
+                                 const int16_t* seq1,
+                                 const int16_t* seq2,
+                                 size_t dim_seq,
+                                 size_t dim_cross_correlation,
+                                 int right_shifts,
+                                 int step_seq2);
+extern CrossCorrelation WebRtcSpl_CrossCorrelation;
+void WebRtcSpl_CrossCorrelationC(int32_t* cross_correlation,
+                                 const int16_t* seq1,
+                                 const int16_t* seq2,
+                                 size_t dim_seq,
+                                 size_t dim_cross_correlation,
+                                 int right_shifts,
+                                 int step_seq2);
+#if (defined WEBRTC_DETECT_NEON) || (defined WEBRTC_HAS_NEON)
+void WebRtcSpl_CrossCorrelationNeon(int32_t* cross_correlation,
+                                    const int16_t* seq1,
+                                    const int16_t* seq2,
+                                    size_t dim_seq,
+                                    size_t dim_cross_correlation,
+                                    int right_shifts,
+                                    int step_seq2);
+#endif
+#if defined(MIPS32_LE)
+void WebRtcSpl_CrossCorrelation_mips(int32_t* cross_correlation,
+                                     const int16_t* seq1,
+                                     const int16_t* seq2,
+                                     size_t dim_seq,
+                                     size_t dim_cross_correlation,
+                                     int right_shifts,
+                                     int step_seq2);
+#endif
+
+// Creates (the first half of) a Hanning window. Size must be at least 1 and
+// at most 512.
+//
+// Input:
+//      - size      : Length of the requested Hanning window (1 to 512)
+//
+// Output:
+//      - window    : Hanning vector in Q14.
+void WebRtcSpl_GetHanningWindow(int16_t* window, size_t size);
+
+// Calculates y[k] = sqrt(1 - x[k]^2) for each element of the input vector
+// |in_vector|. Input and output values are in Q15.
+//
+// Inputs:
+//      - in_vector     : Values to calculate sqrt(1 - x^2) of
+//      - vector_length : Length of vector |in_vector|
+//
+// Output:
+//      - out_vector    : Output values in Q15
+void WebRtcSpl_SqrtOfOneMinusXSquared(int16_t* in_vector,
+                                      size_t vector_length,
+                                      int16_t* out_vector);
+// End: Signal processing operations.
+
+// Randomization functions. Implementations collected in
+// randomization_functions.c and descriptions at bottom of this file.
+int16_t WebRtcSpl_RandU(uint32_t* seed);
+int16_t WebRtcSpl_RandN(uint32_t* seed);
+int16_t WebRtcSpl_RandUArray(int16_t* vector,
+                             int16_t vector_length,
+                             uint32_t* seed);
+// End: Randomization functions.
+
+// Math functions
+int32_t WebRtcSpl_Sqrt(int32_t value);
+int32_t WebRtcSpl_SqrtFloor(int32_t value);
+
+// Divisions. Implementations collected in division_operations.c and
+// descriptions at bottom of this file.
+uint32_t WebRtcSpl_DivU32U16(uint32_t num, uint16_t den);
+int32_t WebRtcSpl_DivW32W16(int32_t num, int16_t den);
+int16_t WebRtcSpl_DivW32W16ResW16(int32_t num, int16_t den);
+int32_t WebRtcSpl_DivResultInQ31(int32_t num, int32_t den);
+int32_t WebRtcSpl_DivW32HiLow(int32_t num, int16_t den_hi, int16_t den_low);
+// End: Divisions.
+
+int32_t WebRtcSpl_Energy(int16_t* vector,
+                         size_t vector_length,
+                         int* scale_factor);
+
+// Calculates the dot product between two (int16_t) vectors.
+//
+// Input:
+//      - vector1       : Vector 1
+//      - vector2       : Vector 2
+//      - vector_length : Number of samples used in the dot product
+//      - scaling       : The number of right bit shifts to apply on each term
+//                        during calculation to avoid overflow, i.e., the
+//                        output will be in Q(-|scaling|)
+//
+// Return value         : The dot product in Q(-scaling)
+int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1,
+                                      const int16_t* vector2,
+                                      size_t length,
+                                      int scaling);
+
+// Filter operations.
+size_t WebRtcSpl_FilterAR(const int16_t* ar_coef,
+                          size_t ar_coef_length,
+                          const int16_t* in_vector,
+                          size_t in_vector_length,
+                          int16_t* filter_state,
+                          size_t filter_state_length,
+                          int16_t* filter_state_low,
+                          size_t filter_state_low_length,
+                          int16_t* out_vector,
+                          int16_t* out_vector_low,
+                          size_t out_vector_low_length);
+
+// WebRtcSpl_FilterMAFastQ12(...)
+//
+// Performs a MA filtering on a vector in Q12
+//
+// Input:
+//      - in_vector         : Input samples (state in positions
+//                            in_vector[-order] .. in_vector[-1])
+//      - ma_coef           : Filter coefficients (in Q12)
+//      - ma_coef_length    : Number of B coefficients (order+1)
+//      - vector_length     : Number of samples to be filtered
+//
+// Output:
+//      - out_vector        : Filtered samples
+//
+void WebRtcSpl_FilterMAFastQ12(const int16_t* in_vector,
+                               int16_t* out_vector,
+                               const int16_t* ma_coef,
+                               size_t ma_coef_length,
+                               size_t vector_length);
+
+// Performs a AR filtering on a vector in Q12
+// Input:
+//      - data_in            : Input samples
+//      - data_out           : State information in positions
+//                               data_out[-order] .. data_out[-1]
+//      - coefficients       : Filter coefficients (in Q12)
+//      - coefficients_length: Number of coefficients (order+1)
+//      - data_length        : Number of samples to be filtered
+// Output:
+//      - data_out           : Filtered samples
+void WebRtcSpl_FilterARFastQ12(const int16_t* data_in,
+                               int16_t* data_out,
+                               const int16_t* __restrict coefficients,
+                               size_t coefficients_length,
+                               size_t data_length);
+
+// The functions (with related pointer) perform a MA down sampling filter
+// on a vector.
+// Input:
+//      - data_in            : Input samples (state in positions
+//                               data_in[-order] .. data_in[-1])
+//      - data_in_length     : Number of samples in |data_in| to be filtered.
+//                               This must be at least
+//                               |delay| + |factor|*(|out_vector_length|-1) + 1)
+//      - data_out_length    : Number of down sampled samples desired
+//      - coefficients       : Filter coefficients (in Q12)
+//      - coefficients_length: Number of coefficients (order+1)
+//      - factor             : Decimation factor
+//      - delay              : Delay of filter (compensated for in out_vector)
+// Output:
+//      - data_out           : Filtered samples
+// Return value              : 0 if OK, -1 if |in_vector| is too short
+typedef int (*DownsampleFast)(const int16_t* data_in,
+                              size_t data_in_length,
+                              int16_t* data_out,
+                              size_t data_out_length,
+                              const int16_t* __restrict coefficients,
+                              size_t coefficients_length,
+                              int factor,
+                              size_t delay);
+extern DownsampleFast WebRtcSpl_DownsampleFast;
+int WebRtcSpl_DownsampleFastC(const int16_t* data_in,
+                              size_t data_in_length,
+                              int16_t* data_out,
+                              size_t data_out_length,
+                              const int16_t* __restrict coefficients,
+                              size_t coefficients_length,
+                              int factor,
+                              size_t delay);
+#if (defined WEBRTC_DETECT_NEON) || (defined WEBRTC_HAS_NEON)
+int WebRtcSpl_DownsampleFastNeon(const int16_t* data_in,
+                                 size_t data_in_length,
+                                 int16_t* data_out,
+                                 size_t data_out_length,
+                                 const int16_t* __restrict coefficients,
+                                 size_t coefficients_length,
+                                 int factor,
+                                 size_t delay);
+#endif
+#if defined(MIPS32_LE)
+int WebRtcSpl_DownsampleFast_mips(const int16_t* data_in,
+                                  size_t data_in_length,
+                                  int16_t* data_out,
+                                  size_t data_out_length,
+                                  const int16_t* __restrict coefficients,
+                                  size_t coefficients_length,
+                                  int factor,
+                                  size_t delay);
+#endif
+
+// End: Filter operations.
+
+// FFT operations
+
+int WebRtcSpl_ComplexFFT(int16_t vector[], int stages, int mode);
+int WebRtcSpl_ComplexIFFT(int16_t vector[], int stages, int mode);
+
+// Treat a 16-bit complex data buffer |complex_data| as an array of 32-bit
+// values, and swap elements whose indexes are bit-reverses of each other.
+//
+// Input:
+//      - complex_data  : Complex data buffer containing 2^|stages| real
+//                        elements interleaved with 2^|stages| imaginary
+//                        elements: [Re Im Re Im Re Im....]
+//      - stages        : Number of FFT stages. Must be at least 3 and at most
+//                        10, since the table WebRtcSpl_kSinTable1024[] is 1024
+//                        elements long.
+//
+// Output:
+//      - complex_data  : The complex data buffer.
+
+void WebRtcSpl_ComplexBitReverse(int16_t* __restrict complex_data, int stages);
+
+// End: FFT operations
+
+/************************************************************
+ *
+ * RESAMPLING FUNCTIONS AND THEIR STRUCTS ARE DEFINED BELOW
+ *
+ ************************************************************/
+
+/*******************************************************************
+ * resample.c
+ *
+ * Includes the following resampling combinations
+ * 22 kHz -> 16 kHz
+ * 16 kHz -> 22 kHz
+ * 22 kHz ->  8 kHz
+ *  8 kHz -> 22 kHz
+ *
+ ******************************************************************/
+
+// state structure for 22 -> 16 resampler
+typedef struct {
+  int32_t S_22_44[8];
+  int32_t S_44_32[8];
+  int32_t S_32_16[8];
+} WebRtcSpl_State22khzTo16khz;
+
+void WebRtcSpl_Resample22khzTo16khz(const int16_t* in,
+                                    int16_t* out,
+                                    WebRtcSpl_State22khzTo16khz* state,
+                                    int32_t* tmpmem);
+
+void WebRtcSpl_ResetResample22khzTo16khz(WebRtcSpl_State22khzTo16khz* state);
+
+// state structure for 16 -> 22 resampler
+typedef struct {
+  int32_t S_16_32[8];
+  int32_t S_32_22[8];
+} WebRtcSpl_State16khzTo22khz;
+
+void WebRtcSpl_Resample16khzTo22khz(const int16_t* in,
+                                    int16_t* out,
+                                    WebRtcSpl_State16khzTo22khz* state,
+                                    int32_t* tmpmem);
+
+void WebRtcSpl_ResetResample16khzTo22khz(WebRtcSpl_State16khzTo22khz* state);
+
+// state structure for 22 -> 8 resampler
+typedef struct {
+  int32_t S_22_22[16];
+  int32_t S_22_16[8];
+  int32_t S_16_8[8];
+} WebRtcSpl_State22khzTo8khz;
+
+void WebRtcSpl_Resample22khzTo8khz(const int16_t* in, int16_t* out,
+                                   WebRtcSpl_State22khzTo8khz* state,
+                                   int32_t* tmpmem);
+
+void WebRtcSpl_ResetResample22khzTo8khz(WebRtcSpl_State22khzTo8khz* state);
+
+// state structure for 8 -> 22 resampler
+typedef struct {
+  int32_t S_8_16[8];
+  int32_t S_16_11[8];
+  int32_t S_11_22[8];
+} WebRtcSpl_State8khzTo22khz;
+
+void WebRtcSpl_Resample8khzTo22khz(const int16_t* in, int16_t* out,
+                                   WebRtcSpl_State8khzTo22khz* state,
+                                   int32_t* tmpmem);
+
+void WebRtcSpl_ResetResample8khzTo22khz(WebRtcSpl_State8khzTo22khz* state);
+
+/*******************************************************************
+ * resample_fractional.c
+ * Functions for internal use in the other resample functions
+ *
+ * Includes the following resampling combinations
+ * 48 kHz -> 32 kHz
+ * 32 kHz -> 24 kHz
+ * 44 kHz -> 32 kHz
+ *
+ ******************************************************************/
+
+void WebRtcSpl_Resample48khzTo32khz(const int32_t* In, int32_t* Out, size_t K);
+
+void WebRtcSpl_Resample32khzTo24khz(const int32_t* In, int32_t* Out, size_t K);
+
+void WebRtcSpl_Resample44khzTo32khz(const int32_t* In, int32_t* Out, size_t K);
+
+/*******************************************************************
+ * resample_48khz.c
+ *
+ * Includes the following resampling combinations
+ * 48 kHz -> 16 kHz
+ * 16 kHz -> 48 kHz
+ * 48 kHz ->  8 kHz
+ *  8 kHz -> 48 kHz
+ *
+ ******************************************************************/
+
+typedef struct {
+  int32_t S_48_48[16];
+  int32_t S_48_32[8];
+  int32_t S_32_16[8];
+} WebRtcSpl_State48khzTo16khz;
+
+void WebRtcSpl_Resample48khzTo16khz(const int16_t* in, int16_t* out,
+                                    WebRtcSpl_State48khzTo16khz* state,
+                                    int32_t* tmpmem);
+
+void WebRtcSpl_ResetResample48khzTo16khz(WebRtcSpl_State48khzTo16khz* state);
+
+typedef struct {
+  int32_t S_16_32[8];
+  int32_t S_32_24[8];
+  int32_t S_24_48[8];
+} WebRtcSpl_State16khzTo48khz;
+
+void WebRtcSpl_Resample16khzTo48khz(const int16_t* in, int16_t* out,
+                                    WebRtcSpl_State16khzTo48khz* state,
+                                    int32_t* tmpmem);
+
+void WebRtcSpl_ResetResample16khzTo48khz(WebRtcSpl_State16khzTo48khz* state);
+
+typedef struct {
+  int32_t S_48_24[8];
+  int32_t S_24_24[16];
+  int32_t S_24_16[8];
+  int32_t S_16_8[8];
+} WebRtcSpl_State48khzTo8khz;
+
+void WebRtcSpl_Resample48khzTo8khz(const int16_t* in, int16_t* out,
+                                   WebRtcSpl_State48khzTo8khz* state,
+                                   int32_t* tmpmem);
+
+void WebRtcSpl_ResetResample48khzTo8khz(WebRtcSpl_State48khzTo8khz* state);
+
+typedef struct {
+  int32_t S_8_16[8];
+  int32_t S_16_12[8];
+  int32_t S_12_24[8];
+  int32_t S_24_48[8];
+} WebRtcSpl_State8khzTo48khz;
+
+void WebRtcSpl_Resample8khzTo48khz(const int16_t* in, int16_t* out,
+                                   WebRtcSpl_State8khzTo48khz* state,
+                                   int32_t* tmpmem);
+
+void WebRtcSpl_ResetResample8khzTo48khz(WebRtcSpl_State8khzTo48khz* state);
+
+/*******************************************************************
+ * resample_by_2.c
+ *
+ * Includes down and up sampling by a factor of two.
+ *
+ ******************************************************************/
+
+void WebRtcSpl_DownsampleBy2(const int16_t* in, size_t len,
+                             int16_t* out, int32_t* filtState);
+
+void WebRtcSpl_UpsampleBy2(const int16_t* in, size_t len,
+                           int16_t* out, int32_t* filtState);
+
+/************************************************************
+ * END OF RESAMPLING FUNCTIONS
+ ************************************************************/
+void WebRtcSpl_AnalysisQMF(const int16_t* in_data,
+                           size_t in_data_length,
+                           int16_t* low_band,
+                           int16_t* high_band,
+                           int32_t* filter_state1,
+                           int32_t* filter_state2);
+void WebRtcSpl_SynthesisQMF(const int16_t* low_band,
+                            const int16_t* high_band,
+                            size_t band_length,
+                            int16_t* out_data,
+                            int32_t* filter_state1,
+                            int32_t* filter_state2);
+
+#ifdef __cplusplus
+}
+#endif  // __cplusplus
+#endif  // WEBRTC_SPL_SIGNAL_PROCESSING_LIBRARY_H_
+
+//
+// WebRtcSpl_AddSatW16(...)
+// WebRtcSpl_AddSatW32(...)
+//
+// Returns the result of a saturated 16-bit, respectively 32-bit, addition of
+// the numbers specified by the |var1| and |var2| parameters.
+//
+// Input:
+//      - var1      : Input variable 1
+//      - var2      : Input variable 2
+//
+// Return value     : Added and saturated value
+//
+
+//
+// WebRtcSpl_SubSatW16(...)
+// WebRtcSpl_SubSatW32(...)
+//
+// Returns the result of a saturated 16-bit, respectively 32-bit, subtraction
+// of the numbers specified by the |var1| and |var2| parameters.
+//
+// Input:
+//      - var1      : Input variable 1
+//      - var2      : Input variable 2
+//
+// Returned value   : Subtracted and saturated value
+//
+
+//
+// WebRtcSpl_GetSizeInBits(...)
+//
+// Returns the # of bits that are needed at the most to represent the number
+// specified by the |value| parameter.
+//
+// Input:
+//      - value     : Input value
+//
+// Return value     : Number of bits needed to represent |value|
+//
+
+//
+// WebRtcSpl_NormW32(...)
+//
+// Norm returns the # of left shifts required to 32-bit normalize the 32-bit
+// signed number specified by the |value| parameter.
+//
+// Input:
+//      - value     : Input value
+//
+// Return value     : Number of bit shifts needed to 32-bit normalize |value|
+//
+
+//
+// WebRtcSpl_NormW16(...)
+//
+// Norm returns the # of left shifts required to 16-bit normalize the 16-bit
+// signed number specified by the |value| parameter.
+//
+// Input:
+//      - value     : Input value
+//
+// Return value     : Number of bit shifts needed to 32-bit normalize |value|
+//
+
+//
+// WebRtcSpl_NormU32(...)
+//
+// Norm returns the # of left shifts required to 32-bit normalize the unsigned
+// 32-bit number specified by the |value| parameter.
+//
+// Input:
+//      - value     : Input value
+//
+// Return value     : Number of bit shifts needed to 32-bit normalize |value|
+//
+
+//
+// WebRtcSpl_GetScalingSquare(...)
+//
+// Returns the # of bits required to scale the samples specified in the
+// |in_vector| parameter so that, if the squares of the samples are added the
+// # of times specified by the |times| parameter, the 32-bit addition will not
+// overflow (result in int32_t).
+//
+// Input:
+//      - in_vector         : Input vector to check scaling on
+//      - in_vector_length  : Samples in |in_vector|
+//      - times             : Number of additions to be performed
+//
+// Return value             : Number of right bit shifts needed to avoid
+//                            overflow in the addition calculation
+//
+
+//
+// WebRtcSpl_MemSetW16(...)
+//
+// Sets all the values in the int16_t vector |vector| of length
+// |vector_length| to the specified value |set_value|
+//
+// Input:
+//      - vector        : Pointer to the int16_t vector
+//      - set_value     : Value specified
+//      - vector_length : Length of vector
+//
+
+//
+// WebRtcSpl_MemSetW32(...)
+//
+// Sets all the values in the int32_t vector |vector| of length
+// |vector_length| to the specified value |set_value|
+//
+// Input:
+//      - vector        : Pointer to the int16_t vector
+//      - set_value     : Value specified
+//      - vector_length : Length of vector
+//
+
+//
+// WebRtcSpl_MemCpyReversedOrder(...)
+//
+// Copies all the values from the source int16_t vector |in_vector| to a
+// destination int16_t vector |out_vector|. It is done in reversed order,
+// meaning that the first sample of |in_vector| is copied to the last sample of
+// the |out_vector|. The procedure continues until the last sample of
+// |in_vector| has been copied to the first sample of |out_vector|. This
+// creates a reversed vector. Used in e.g. prediction in iLBC.
+//
+// Input:
+//      - in_vector     : Pointer to the first sample in a int16_t vector
+//                        of length |length|
+//      - vector_length : Number of elements to copy
+//
+// Output:
+//      - out_vector    : Pointer to the last sample in a int16_t vector
+//                        of length |length|
+//
+
+//
+// WebRtcSpl_CopyFromEndW16(...)
+//
+// Copies the rightmost |samples| of |in_vector| (of length |in_vector_length|)
+// to the vector |out_vector|.
+//
+// Input:
+//      - in_vector         : Input vector
+//      - in_vector_length  : Number of samples in |in_vector|
+//      - samples           : Number of samples to extract (from right side)
+//                            from |in_vector|
+//
+// Output:
+//      - out_vector        : Vector with the requested samples
+//
+
+//
+// WebRtcSpl_ZerosArrayW16(...)
+// WebRtcSpl_ZerosArrayW32(...)
+//
+// Inserts the value "zero" in all positions of a w16 and a w32 vector
+// respectively.
+//
+// Input:
+//      - vector_length : Number of samples in vector
+//
+// Output:
+//      - vector        : Vector containing all zeros
+//
+
+//
+// WebRtcSpl_VectorBitShiftW16(...)
+// WebRtcSpl_VectorBitShiftW32(...)
+//
+// Bit shifts all the values in a vector up or downwards. Different calls for
+// int16_t and int32_t vectors respectively.
+//
+// Input:
+//      - vector_length : Length of vector
+//      - in_vector     : Pointer to the vector that should be bit shifted
+//      - right_shifts  : Number of right bit shifts (negative value gives left
+//                        shifts)
+//
+// Output:
+//      - out_vector    : Pointer to the result vector (can be the same as
+//                        |in_vector|)
+//
+
+//
+// WebRtcSpl_VectorBitShiftW32ToW16(...)
+//
+// Bit shifts all the values in a int32_t vector up or downwards and
+// stores the result as an int16_t vector. The function will saturate the
+// signal if needed, before storing in the output vector.
+//
+// Input:
+//      - vector_length : Length of vector
+//      - in_vector     : Pointer to the vector that should be bit shifted
+//      - right_shifts  : Number of right bit shifts (negative value gives left
+//                        shifts)
+//
+// Output:
+//      - out_vector    : Pointer to the result vector (can be the same as
+//                        |in_vector|)
+//
+
+//
+// WebRtcSpl_ScaleVector(...)
+//
+// Performs the vector operation:
+//  out_vector[k] = (gain*in_vector[k])>>right_shifts
+//
+// Input:
+//      - in_vector     : Input vector
+//      - gain          : Scaling gain
+//      - vector_length : Elements in the |in_vector|
+//      - right_shifts  : Number of right bit shifts applied
+//
+// Output:
+//      - out_vector    : Output vector (can be the same as |in_vector|)
+//
+
+//
+// WebRtcSpl_ScaleVectorWithSat(...)
+//
+// Performs the vector operation:
+//  out_vector[k] = SATURATE( (gain*in_vector[k])>>right_shifts )
+//
+// Input:
+//      - in_vector     : Input vector
+//      - gain          : Scaling gain
+//      - vector_length : Elements in the |in_vector|
+//      - right_shifts  : Number of right bit shifts applied
+//
+// Output:
+//      - out_vector    : Output vector (can be the same as |in_vector|)
+//
+
+//
+// WebRtcSpl_ScaleAndAddVectors(...)
+//
+// Performs the vector operation:
+//  out_vector[k] = (gain1*in_vector1[k])>>right_shifts1
+//                  + (gain2*in_vector2[k])>>right_shifts2
+//
+// Input:
+//      - in_vector1    : Input vector 1
+//      - gain1         : Gain to be used for vector 1
+//      - right_shifts1 : Right bit shift to be used for vector 1
+//      - in_vector2    : Input vector 2
+//      - gain2         : Gain to be used for vector 2
+//      - right_shifts2 : Right bit shift to be used for vector 2
+//      - vector_length : Elements in the input vectors
+//
+// Output:
+//      - out_vector    : Output vector
+//
+
+//
+// WebRtcSpl_ReverseOrderMultArrayElements(...)
+//
+// Performs the vector operation:
+//  out_vector[n] = (in_vector[n]*window[-n])>>right_shifts
+//
+// Input:
+//      - in_vector     : Input vector
+//      - window        : Window vector (should be reversed). The pointer
+//                        should be set to the last value in the vector
+//      - right_shifts  : Number of right bit shift to be applied after the
+//                        multiplication
+//      - vector_length : Number of elements in |in_vector|
+//
+// Output:
+//      - out_vector    : Output vector (can be same as |in_vector|)
+//
+
+//
+// WebRtcSpl_ElementwiseVectorMult(...)
+//
+// Performs the vector operation:
+//  out_vector[n] = (in_vector[n]*window[n])>>right_shifts
+//
+// Input:
+//      - in_vector     : Input vector
+//      - window        : Window vector.
+//      - right_shifts  : Number of right bit shift to be applied after the
+//                        multiplication
+//      - vector_length : Number of elements in |in_vector|
+//
+// Output:
+//      - out_vector    : Output vector (can be same as |in_vector|)
+//
+
+//
+// WebRtcSpl_AddVectorsAndShift(...)
+//
+// Performs the vector operation:
+//  out_vector[k] = (in_vector1[k] + in_vector2[k])>>right_shifts
+//
+// Input:
+//      - in_vector1    : Input vector 1
+//      - in_vector2    : Input vector 2
+//      - right_shifts  : Number of right bit shift to be applied after the
+//                        multiplication
+//      - vector_length : Number of elements in |in_vector1| and |in_vector2|
+//
+// Output:
+//      - out_vector    : Output vector (can be same as |in_vector1|)
+//
+
+//
+// WebRtcSpl_AddAffineVectorToVector(...)
+//
+// Adds an affine transformed vector to another vector |out_vector|, i.e,
+// performs
+//  out_vector[k] += (in_vector[k]*gain+add_constant)>>right_shifts
+//
+// Input:
+//      - in_vector     : Input vector
+//      - gain          : Gain value, used to multiply the in vector with
+//      - add_constant  : Constant value to add (usually 1<<(right_shifts-1),
+//                        but others can be used as well
+//      - right_shifts  : Number of right bit shifts (0-16)
+//      - vector_length : Number of samples in |in_vector| and |out_vector|
+//
+// Output:
+//      - out_vector    : Vector with the output
+//
+
+//
+// WebRtcSpl_AffineTransformVector(...)
+//
+// Affine transforms a vector, i.e, performs
+//  out_vector[k] = (in_vector[k]*gain+add_constant)>>right_shifts
+//
+// Input:
+//      - in_vector     : Input vector
+//      - gain          : Gain value, used to multiply the in vector with
+//      - add_constant  : Constant value to add (usually 1<<(right_shifts-1),
+//                        but others can be used as well
+//      - right_shifts  : Number of right bit shifts (0-16)
+//      - vector_length : Number of samples in |in_vector| and |out_vector|
+//
+// Output:
+//      - out_vector    : Vector with the output
+//
+
+//
+// WebRtcSpl_IncreaseSeed(...)
+//
+// Increases the seed (and returns the new value)
+//
+// Input:
+//      - seed      : Seed for random calculation
+//
+// Output:
+//      - seed      : Updated seed value
+//
+// Return value     : The new seed value
+//
+
+//
+// WebRtcSpl_RandU(...)
+//
+// Produces a uniformly distributed value in the int16_t range
+//
+// Input:
+//      - seed      : Seed for random calculation
+//
+// Output:
+//      - seed      : Updated seed value
+//
+// Return value     : Uniformly distributed value in the range
+//                    [Word16_MIN...Word16_MAX]
+//
+
+//
+// WebRtcSpl_RandN(...)
+//
+// Produces a normal distributed value in the int16_t range
+//
+// Input:
+//      - seed      : Seed for random calculation
+//
+// Output:
+//      - seed      : Updated seed value
+//
+// Return value     : N(0,1) value in the Q13 domain
+//
+
+//
+// WebRtcSpl_RandUArray(...)
+//
+// Produces a uniformly distributed vector with elements in the int16_t
+// range
+//
+// Input:
+//      - vector_length : Samples wanted in the vector
+//      - seed          : Seed for random calculation
+//
+// Output:
+//      - vector        : Vector with the uniform values
+//      - seed          : Updated seed value
+//
+// Return value         : Number of samples in vector, i.e., |vector_length|
+//
+
+//
+// WebRtcSpl_Sqrt(...)
+//
+// Returns the square root of the input value |value|. The precision of this
+// function is integer precision, i.e., sqrt(8) gives 2 as answer.
+// If |value| is a negative number then 0 is returned.
+//
+// Algorithm:
+//
+// A sixth order Taylor Series expansion is used here to compute the square
+// root of a number y^0.5 = (1+x)^0.5
+// where
+// x = y-1
+//   = 1+(x/2)-0.5*((x/2)^2+0.5*((x/2)^3-0.625*((x/2)^4+0.875*((x/2)^5)
+// 0.5 <= x < 1
+//
+// Input:
+//      - value     : Value to calculate sqrt of
+//
+// Return value     : Result of the sqrt calculation
+//
+
+//
+// WebRtcSpl_SqrtFloor(...)
+//
+// Returns the square root of the input value |value|. The precision of this
+// function is rounding down integer precision, i.e., sqrt(8) gives 2 as answer.
+// If |value| is a negative number then 0 is returned.
+//
+// Algorithm:
+//
+// An iterative 4 cylce/bit routine
+//
+// Input:
+//      - value     : Value to calculate sqrt of
+//
+// Return value     : Result of the sqrt calculation
+//
+
+//
+// WebRtcSpl_DivU32U16(...)
+//
+// Divides a uint32_t |num| by a uint16_t |den|.
+//
+// If |den|==0, (uint32_t)0xFFFFFFFF is returned.
+//
+// Input:
+//      - num       : Numerator
+//      - den       : Denominator
+//
+// Return value     : Result of the division (as a uint32_t), i.e., the
+//                    integer part of num/den.
+//
+
+//
+// WebRtcSpl_DivW32W16(...)
+//
+// Divides a int32_t |num| by a int16_t |den|.
+//
+// If |den|==0, (int32_t)0x7FFFFFFF is returned.
+//
+// Input:
+//      - num       : Numerator
+//      - den       : Denominator
+//
+// Return value     : Result of the division (as a int32_t), i.e., the
+//                    integer part of num/den.
+//
+
+//
+// WebRtcSpl_DivW32W16ResW16(...)
+//
+// Divides a int32_t |num| by a int16_t |den|, assuming that the
+// result is less than 32768, otherwise an unpredictable result will occur.
+//
+// If |den|==0, (int16_t)0x7FFF is returned.
+//
+// Input:
+//      - num       : Numerator
+//      - den       : Denominator
+//
+// Return value     : Result of the division (as a int16_t), i.e., the
+//                    integer part of num/den.
+//
+
+//
+// WebRtcSpl_DivResultInQ31(...)
+//
+// Divides a int32_t |num| by a int16_t |den|, assuming that the
+// absolute value of the denominator is larger than the numerator, otherwise
+// an unpredictable result will occur.
+//
+// Input:
+//      - num       : Numerator
+//      - den       : Denominator
+//
+// Return value     : Result of the division in Q31.
+//
+
+//
+// WebRtcSpl_DivW32HiLow(...)
+//
+// Divides a int32_t |num| by a denominator in hi, low format. The
+// absolute value of the denominator has to be larger (or equal to) the
+// numerator.
+//
+// Input:
+//      - num       : Numerator
+//      - den_hi    : High part of denominator
+//      - den_low   : Low part of denominator
+//
+// Return value     : Divided value in Q31
+//
+
+//
+// WebRtcSpl_Energy(...)
+//
+// Calculates the energy of a vector
+//
+// Input:
+//      - vector        : Vector which the energy should be calculated on
+//      - vector_length : Number of samples in vector
+//
+// Output:
+//      - scale_factor  : Number of left bit shifts needed to get the physical
+//                        energy value, i.e, to get the Q0 value
+//
+// Return value         : Energy value in Q(-|scale_factor|)
+//
+
+//
+// WebRtcSpl_FilterAR(...)
+//
+// Performs a 32-bit AR filtering on a vector in Q12
+//
+// Input:
+//  - ar_coef                   : AR-coefficient vector (values in Q12),
+//                                ar_coef[0] must be 4096.
+//  - ar_coef_length            : Number of coefficients in |ar_coef|.
+//  - in_vector                 : Vector to be filtered.
+//  - in_vector_length          : Number of samples in |in_vector|.
+//  - filter_state              : Current state (higher part) of the filter.
+//  - filter_state_length       : Length (in samples) of |filter_state|.
+//  - filter_state_low          : Current state (lower part) of the filter.
+//  - filter_state_low_length   : Length (in samples) of |filter_state_low|.
+//  - out_vector_low_length     : Maximum length (in samples) of
+//                                |out_vector_low|.
+//
+// Output:
+//  - filter_state              : Updated state (upper part) vector.
+//  - filter_state_low          : Updated state (lower part) vector.
+//  - out_vector                : Vector containing the upper part of the
+//                                filtered values.
+//  - out_vector_low            : Vector containing the lower part of the
+//                                filtered values.
+//
+// Return value                 : Number of samples in the |out_vector|.
+//
+
+//
+// WebRtcSpl_ComplexIFFT(...)
+//
+// Complex Inverse FFT
+//
+// Computes an inverse complex 2^|stages|-point FFT on the input vector, which
+// is in bit-reversed order. The original content of the vector is destroyed in
+// the process, since the input is overwritten by the output, normal-ordered,
+// FFT vector. With X as the input complex vector, y as the output complex
+// vector and with M = 2^|stages|, the following is computed:
+//
+//        M-1
+// y(k) = sum[X(i)*[cos(2*pi*i*k/M) + j*sin(2*pi*i*k/M)]]
+//        i=0
+//
+// The implementations are optimized for speed, not for code size. It uses the
+// decimation-in-time algorithm with radix-2 butterfly technique.
+//
+// Input:
+//      - vector    : In pointer to complex vector containing 2^|stages|
+//                    real elements interleaved with 2^|stages| imaginary
+//                    elements.
+//                    [ReImReImReIm....]
+//                    The elements are in Q(-scale) domain, see more on Return
+//                    Value below.
+//
+//      - stages    : Number of FFT stages. Must be at least 3 and at most 10,
+//                    since the table WebRtcSpl_kSinTable1024[] is 1024
+//                    elements long.
+//
+//      - mode      : This parameter gives the user to choose how the FFT
+//                    should work.
+//                    mode==0: Low-complexity and Low-accuracy mode
+//                    mode==1: High-complexity and High-accuracy mode
+//
+// Output:
+//      - vector    : Out pointer to the FFT vector (the same as input).
+//
+// Return Value     : The scale value that tells the number of left bit shifts
+//                    that the elements in the |vector| should be shifted with
+//                    in order to get Q0 values, i.e. the physically correct
+//                    values. The scale parameter is always 0 or positive,
+//                    except if N>1024 (|stages|>10), which returns a scale
+//                    value of -1, indicating error.
+//
+
+//
+// WebRtcSpl_ComplexFFT(...)
+//
+// Complex FFT
+//
+// Computes a complex 2^|stages|-point FFT on the input vector, which is in
+// bit-reversed order. The original content of the vector is destroyed in
+// the process, since the input is overwritten by the output, normal-ordered,
+// FFT vector. With x as the input complex vector, Y as the output complex
+// vector and with M = 2^|stages|, the following is computed:
+//
+//              M-1
+// Y(k) = 1/M * sum[x(i)*[cos(2*pi*i*k/M) + j*sin(2*pi*i*k/M)]]
+//              i=0
+//
+// The implementations are optimized for speed, not for code size. It uses the
+// decimation-in-time algorithm with radix-2 butterfly technique.
+//
+// This routine prevents overflow by scaling by 2 before each FFT stage. This is
+// a fixed scaling, for proper normalization - there will be log2(n) passes, so
+// this results in an overall factor of 1/n, distributed to maximize arithmetic
+// accuracy.
+//
+// Input:
+//      - vector    : In pointer to complex vector containing 2^|stages| real
+//                    elements interleaved with 2^|stages| imaginary elements.
+//                    [ReImReImReIm....]
+//                    The output is in the Q0 domain.
+//
+//      - stages    : Number of FFT stages. Must be at least 3 and at most 10,
+//                    since the table WebRtcSpl_kSinTable1024[] is 1024
+//                    elements long.
+//
+//      - mode      : This parameter gives the user to choose how the FFT
+//                    should work.
+//                    mode==0: Low-complexity and Low-accuracy mode
+//                    mode==1: High-complexity and High-accuracy mode
+//
+// Output:
+//      - vector    : The output FFT vector is in the Q0 domain.
+//
+// Return value     : The scale parameter is always 0, except if N>1024,
+//                    which returns a scale value of -1, indicating error.
+//
+
+//
+// WebRtcSpl_AnalysisQMF(...)
+//
+// Splits a 0-2*F Hz signal into two sub bands: 0-F Hz and F-2*F Hz. The
+// current version has F = 8000, therefore, a super-wideband audio signal is
+// split to lower-band 0-8 kHz and upper-band 8-16 kHz.
+//
+// Input:
+//      - in_data       : Wide band speech signal, 320 samples (10 ms)
+//
+// Input & Output:
+//      - filter_state1 : Filter state for first All-pass filter
+//      - filter_state2 : Filter state for second All-pass filter
+//
+// Output:
+//      - low_band      : Lower-band signal 0-8 kHz band, 160 samples (10 ms)
+//      - high_band     : Upper-band signal 8-16 kHz band (flipped in frequency
+//                        domain), 160 samples (10 ms)
+//
+
+//
+// WebRtcSpl_SynthesisQMF(...)
+//
+// Combines the two sub bands (0-F and F-2*F Hz) into a signal of 0-2*F
+// Hz, (current version has F = 8000 Hz). So the filter combines lower-band
+// (0-8 kHz) and upper-band (8-16 kHz) channels to obtain super-wideband 0-16
+// kHz audio.
+//
+// Input:
+//      - low_band      : The signal with the 0-8 kHz band, 160 samples (10 ms)
+//      - high_band     : The signal with the 8-16 kHz band, 160 samples (10 ms)
+//
+// Input & Output:
+//      - filter_state1 : Filter state for first All-pass filter
+//      - filter_state2 : Filter state for second All-pass filter
+//
+// Output:
+//      - out_data      : Super-wideband speech signal, 0-16 kHz
+//
+
+// int16_t WebRtcSpl_SatW32ToW16(...)
+//
+// This function saturates a 32-bit word into a 16-bit word.
+//
+// Input:
+//      - value32   : The value of a 32-bit word.
+//
+// Output:
+//      - out16     : the saturated 16-bit word.
+//
+
+// int32_t WebRtc_MulAccumW16(...)
+//
+// This function multiply a 16-bit word by a 16-bit word, and accumulate this
+// value to a 32-bit integer.
+//
+// Input:
+//      - a    : The value of the first 16-bit word.
+//      - b    : The value of the second 16-bit word.
+//      - c    : The value of an 32-bit integer.
+//
+// Return Value: The value of a * b + c.
+//
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl.h b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl.h
new file mode 100644
index 00000000..d3cc6dee
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl.h
@@ -0,0 +1,173 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+// This header file includes the inline functions in
+// the fix point signal processing library.
+
+#ifndef WEBRTC_SPL_SPL_INL_H_
+#define WEBRTC_SPL_SPL_INL_H_
+
+#ifdef WEBRTC_ARCH_ARM_V7
+#include "webrtc/common_audio/signal_processing/include/spl_inl_armv7.h"
+#else
+
+#if defined(MIPS32_LE)
+#include "webrtc/common_audio/signal_processing/include/spl_inl_mips.h"
+#endif
+
+#if !defined(MIPS_DSP_R1_LE)
+static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) {
+  int16_t out16 = (int16_t) value32;
+
+  if (value32 > 32767)
+    out16 = 32767;
+  else if (value32 < -32768)
+    out16 = -32768;
+
+  return out16;
+}
+
+static __inline int32_t WebRtcSpl_AddSatW32(int32_t l_var1, int32_t l_var2) {
+  int32_t l_sum;
+
+  // Perform long addition
+  l_sum = l_var1 + l_var2;
+
+  if (l_var1 < 0) {  // Check for underflow.
+    if ((l_var2 < 0) && (l_sum >= 0)) {
+        l_sum = (int32_t)0x80000000;
+    }
+  } else {  // Check for overflow.
+    if ((l_var2 > 0) && (l_sum < 0)) {
+        l_sum = (int32_t)0x7FFFFFFF;
+    }
+  }
+
+  return l_sum;
+}
+
+static __inline int32_t WebRtcSpl_SubSatW32(int32_t l_var1, int32_t l_var2) {
+  int32_t l_diff;
+
+  // Perform subtraction.
+  l_diff = l_var1 - l_var2;
+
+  if (l_var1 < 0) {  // Check for underflow.
+    if ((l_var2 > 0) && (l_diff > 0)) {
+      l_diff = (int32_t)0x80000000;
+    }
+  } else {  // Check for overflow.
+    if ((l_var2 < 0) && (l_diff < 0)) {
+      l_diff = (int32_t)0x7FFFFFFF;
+    }
+  }
+
+  return l_diff;
+}
+
+static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) {
+  return WebRtcSpl_SatW32ToW16((int32_t) a + (int32_t) b);
+}
+
+static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) {
+  return WebRtcSpl_SatW32ToW16((int32_t) var1 - (int32_t) var2);
+}
+#endif  // #if !defined(MIPS_DSP_R1_LE)
+
+#if !defined(MIPS32_LE)
+static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) {
+  int16_t bits;
+
+  if (0xFFFF0000 & n) {
+    bits = 16;
+  } else {
+    bits = 0;
+  }
+  if (0x0000FF00 & (n >> bits)) bits += 8;
+  if (0x000000F0 & (n >> bits)) bits += 4;
+  if (0x0000000C & (n >> bits)) bits += 2;
+  if (0x00000002 & (n >> bits)) bits += 1;
+  if (0x00000001 & (n >> bits)) bits += 1;
+
+  return bits;
+}
+
+static __inline int16_t WebRtcSpl_NormW32(int32_t a) {
+  int16_t zeros;
+
+  if (a == 0) {
+    return 0;
+  }
+  else if (a < 0) {
+    a = ~a;
+  }
+
+  if (!(0xFFFF8000 & a)) {
+    zeros = 16;
+  } else {
+    zeros = 0;
+  }
+  if (!(0xFF800000 & (a << zeros))) zeros += 8;
+  if (!(0xF8000000 & (a << zeros))) zeros += 4;
+  if (!(0xE0000000 & (a << zeros))) zeros += 2;
+  if (!(0xC0000000 & (a << zeros))) zeros += 1;
+
+  return zeros;
+}
+
+static __inline int16_t WebRtcSpl_NormU32(uint32_t a) {
+  int16_t zeros;
+
+  if (a == 0) return 0;
+
+  if (!(0xFFFF0000 & a)) {
+    zeros = 16;
+  } else {
+    zeros = 0;
+  }
+  if (!(0xFF000000 & (a << zeros))) zeros += 8;
+  if (!(0xF0000000 & (a << zeros))) zeros += 4;
+  if (!(0xC0000000 & (a << zeros))) zeros += 2;
+  if (!(0x80000000 & (a << zeros))) zeros += 1;
+
+  return zeros;
+}
+
+static __inline int16_t WebRtcSpl_NormW16(int16_t a) {
+  int16_t zeros;
+
+  if (a == 0) {
+    return 0;
+  }
+  else if (a < 0) {
+    a = ~a;
+  }
+
+  if (!(0xFF80 & a)) {
+    zeros = 8;
+  } else {
+    zeros = 0;
+  }
+  if (!(0xF800 & (a << zeros))) zeros += 4;
+  if (!(0xE000 & (a << zeros))) zeros += 2;
+  if (!(0xC000 & (a << zeros))) zeros += 1;
+
+  return zeros;
+}
+
+static __inline int32_t WebRtc_MulAccumW16(int16_t a, int16_t b, int32_t c) {
+  return (a * b + c);
+}
+#endif  // #if !defined(MIPS32_LE)
+
+#endif  // WEBRTC_ARCH_ARM_V7
+
+#endif  // WEBRTC_SPL_SPL_INL_H_
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl_armv7.h b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl_armv7.h
new file mode 100644
index 00000000..27188011
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl_armv7.h
@@ -0,0 +1,136 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/* This header file includes the inline functions for ARM processors in
+ * the fix point signal processing library.
+ */
+
+#ifndef WEBRTC_SPL_SPL_INL_ARMV7_H_
+#define WEBRTC_SPL_SPL_INL_ARMV7_H_
+
+/* TODO(kma): Replace some assembly code with GCC intrinsics
+ * (e.g. __builtin_clz).
+ */
+
+/* This function produces result that is not bit exact with that by the generic
+ * C version in some cases, although the former is at least as accurate as the
+ * later.
+ */
+static __inline int32_t WEBRTC_SPL_MUL_16_32_RSFT16(int16_t a, int32_t b) {
+  int32_t tmp = 0;
+  __asm __volatile ("smulwb %0, %1, %2":"=r"(tmp):"r"(b), "r"(a));
+  return tmp;
+}
+
+static __inline int32_t WEBRTC_SPL_MUL_16_16(int16_t a, int16_t b) {
+  int32_t tmp = 0;
+  __asm __volatile ("smulbb %0, %1, %2":"=r"(tmp):"r"(a), "r"(b));
+  return tmp;
+}
+
+// TODO(kma): add unit test.
+static __inline int32_t WebRtc_MulAccumW16(int16_t a, int16_t b, int32_t c) {
+  int32_t tmp = 0;
+  __asm __volatile ("smlabb %0, %1, %2, %3":"=r"(tmp):"r"(a), "r"(b), "r"(c));
+  return tmp;
+}
+
+static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) {
+  int32_t s_sum = 0;
+
+  __asm __volatile ("qadd16 %0, %1, %2":"=r"(s_sum):"r"(a), "r"(b));
+
+  return (int16_t) s_sum;
+}
+
+static __inline int32_t WebRtcSpl_AddSatW32(int32_t l_var1, int32_t l_var2) {
+  int32_t l_sum = 0;
+
+  __asm __volatile ("qadd %0, %1, %2":"=r"(l_sum):"r"(l_var1), "r"(l_var2));
+
+  return l_sum;
+}
+
+static __inline int32_t WebRtcSpl_SubSatW32(int32_t l_var1, int32_t l_var2) {
+  int32_t l_sub = 0;
+
+  __asm __volatile ("qsub %0, %1, %2":"=r"(l_sub):"r"(l_var1), "r"(l_var2));
+
+  return l_sub;
+}
+
+static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) {
+  int32_t s_sub = 0;
+
+  __asm __volatile ("qsub16 %0, %1, %2":"=r"(s_sub):"r"(var1), "r"(var2));
+
+  return (int16_t)s_sub;
+}
+
+static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) {
+  int32_t tmp = 0;
+
+  __asm __volatile ("clz %0, %1":"=r"(tmp):"r"(n));
+
+  return (int16_t)(32 - tmp);
+}
+
+static __inline int16_t WebRtcSpl_NormW32(int32_t a) {
+  int32_t tmp = 0;
+
+  if (a == 0) {
+    return 0;
+  }
+  else if (a < 0) {
+    a ^= 0xFFFFFFFF;
+  }
+
+  __asm __volatile ("clz %0, %1":"=r"(tmp):"r"(a));
+
+  return (int16_t)(tmp - 1);
+}
+
+static __inline int16_t WebRtcSpl_NormU32(uint32_t a) {
+  int tmp = 0;
+
+  if (a == 0) return 0;
+
+  __asm __volatile ("clz %0, %1":"=r"(tmp):"r"(a));
+
+  return (int16_t)tmp;
+}
+
+static __inline int16_t WebRtcSpl_NormW16(int16_t a) {
+  int32_t tmp = 0;
+  int32_t a_32 = a;
+
+  if (a_32 == 0) {
+    return 0;
+  }
+  else if (a_32 < 0) {
+    a_32 ^= 0xFFFFFFFF;
+  }
+
+  __asm __volatile ("clz %0, %1":"=r"(tmp):"r"(a_32));
+
+  return (int16_t)(tmp - 17);
+}
+
+// TODO(kma): add unit test.
+static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) {
+  int32_t out = 0;
+
+  __asm __volatile ("ssat %0, #16, %1" : "=r"(out) : "r"(value32));
+
+  return (int16_t)out;
+}
+
+#endif  // WEBRTC_SPL_SPL_INL_ARMV7_H_
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl_mips.h b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl_mips.h
new file mode 100644
index 00000000..cd04bddc
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl_mips.h
@@ -0,0 +1,225 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+// This header file includes the inline functions in
+// the fix point signal processing library.
+
+#ifndef WEBRTC_SPL_SPL_INL_MIPS_H_
+#define WEBRTC_SPL_SPL_INL_MIPS_H_
+
+static __inline int32_t WEBRTC_SPL_MUL_16_16(int32_t a,
+                                             int32_t b) {
+  int32_t value32 = 0;
+  int32_t a1 = 0, b1 = 0;
+
+  __asm __volatile(
+#if defined(MIPS32_R2_LE)
+    "seh    %[a1],          %[a]                \n\t"
+    "seh    %[b1],          %[b]                \n\t"
+#else
+    "sll    %[a1],          %[a],         16    \n\t"
+    "sll    %[b1],          %[b],         16    \n\t"
+    "sra    %[a1],          %[a1],        16    \n\t"
+    "sra    %[b1],          %[b1],        16    \n\t"
+#endif
+    "mul    %[value32],     %[a1],  %[b1]       \n\t"
+    : [value32] "=r" (value32), [a1] "=&r" (a1), [b1] "=&r" (b1)
+    : [a] "r" (a), [b] "r" (b)
+    : "hi", "lo"
+  );
+  return value32;
+}
+
+static __inline int32_t WEBRTC_SPL_MUL_16_32_RSFT16(int16_t a,
+                                                    int32_t b) {
+  int32_t value32 = 0, b1 = 0, b2 = 0;
+  int32_t a1 = 0;
+
+  __asm __volatile(
+#if defined(MIPS32_R2_LE)
+    "seh    %[a1],          %[a]                        \n\t"
+#else
+    "sll    %[a1],          %[a],           16          \n\t"
+    "sra    %[a1],          %[a1],          16          \n\t"
+#endif
+    "andi   %[b2],          %[b],           0xFFFF      \n\t"
+    "sra    %[b1],          %[b],           16          \n\t"
+    "sra    %[b2],          %[b2],          1           \n\t"
+    "mul    %[value32],     %[a1],          %[b1]       \n\t"
+    "mul    %[b2],          %[a1],          %[b2]       \n\t"
+    "addiu  %[b2],          %[b2],          0x4000      \n\t"
+    "sra    %[b2],          %[b2],          15          \n\t"
+    "addu   %[value32],     %[value32],     %[b2]       \n\t"
+    : [value32] "=&r" (value32), [b1] "=&r" (b1), [b2] "=&r" (b2),
+      [a1] "=&r" (a1)
+    : [a] "r" (a), [b] "r" (b)
+    : "hi", "lo"
+  );
+  return value32;
+}
+
+#if defined(MIPS_DSP_R1_LE)
+static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) {
+  __asm __volatile(
+    "shll_s.w   %[value32], %[value32], 16      \n\t"
+    "sra        %[value32], %[value32], 16      \n\t"
+    : [value32] "+r" (value32)
+    :
+  );
+  int16_t out16 = (int16_t)value32;
+  return out16;
+}
+
+static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) {
+  int32_t value32 = 0;
+
+  __asm __volatile(
+    "addq_s.ph      %[value32],     %[a],   %[b]    \n\t"
+    : [value32] "=r" (value32)
+    : [a] "r" (a), [b] "r" (b)
+  );
+  return (int16_t)value32;
+}
+
+static __inline int32_t WebRtcSpl_AddSatW32(int32_t l_var1, int32_t l_var2) {
+  int32_t l_sum;
+
+  __asm __volatile(
+    "addq_s.w   %[l_sum],       %[l_var1],      %[l_var2]    \n\t"
+    : [l_sum] "=r" (l_sum)
+    : [l_var1] "r" (l_var1), [l_var2] "r" (l_var2)
+  );
+
+  return l_sum;
+}
+
+static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) {
+  int32_t value32;
+
+  __asm __volatile(
+    "subq_s.ph  %[value32], %[var1],    %[var2]     \n\t"
+    : [value32] "=r" (value32)
+    : [var1] "r" (var1), [var2] "r" (var2)
+  );
+
+  return (int16_t)value32;
+}
+
+static __inline int32_t WebRtcSpl_SubSatW32(int32_t l_var1, int32_t l_var2) {
+  int32_t l_diff;
+
+  __asm __volatile(
+    "subq_s.w   %[l_diff],      %[l_var1],      %[l_var2]    \n\t"
+    : [l_diff] "=r" (l_diff)
+    : [l_var1] "r" (l_var1), [l_var2] "r" (l_var2)
+  );
+
+  return l_diff;
+}
+#endif
+
+static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) {
+  int bits = 0;
+  int i32 = 32;
+
+  __asm __volatile(
+    "clz    %[bits],    %[n]                    \n\t"
+    "subu   %[bits],    %[i32],     %[bits]     \n\t"
+    : [bits] "=&r" (bits)
+    : [n] "r" (n), [i32] "r" (i32)
+  );
+
+  return (int16_t)bits;
+}
+
+static __inline int16_t WebRtcSpl_NormW32(int32_t a) {
+  int zeros = 0;
+
+  __asm __volatile(
+    ".set       push                                \n\t"
+    ".set       noreorder                           \n\t"
+    "bnez       %[a],       1f                      \n\t"
+    " sra       %[zeros],   %[a],       31          \n\t"
+    "b          2f                                  \n\t"
+    " move      %[zeros],   $zero                   \n\t"
+   "1:                                              \n\t"
+    "xor        %[zeros],   %[a],       %[zeros]    \n\t"
+    "clz        %[zeros],   %[zeros]                \n\t"
+    "addiu      %[zeros],   %[zeros],   -1          \n\t"
+   "2:                                              \n\t"
+    ".set       pop                                 \n\t"
+    : [zeros]"=&r"(zeros)
+    : [a] "r" (a)
+  );
+
+  return (int16_t)zeros;
+}
+
+static __inline int16_t WebRtcSpl_NormU32(uint32_t a) {
+  int zeros = 0;
+
+  __asm __volatile(
+    "clz    %[zeros],   %[a]    \n\t"
+    : [zeros] "=r" (zeros)
+    : [a] "r" (a)
+  );
+
+  return (int16_t)(zeros & 0x1f);
+}
+
+static __inline int16_t WebRtcSpl_NormW16(int16_t a) {
+  int zeros = 0;
+  int a0 = a << 16;
+
+  __asm __volatile(
+    ".set       push                                \n\t"
+    ".set       noreorder                           \n\t"
+    "bnez       %[a0],      1f                      \n\t"
+    " sra       %[zeros],   %[a0],      31          \n\t"
+    "b          2f                                  \n\t"
+    " move      %[zeros],   $zero                   \n\t"
+   "1:                                              \n\t"
+    "xor        %[zeros],   %[a0],      %[zeros]    \n\t"
+    "clz        %[zeros],   %[zeros]                \n\t"
+    "addiu      %[zeros],   %[zeros],   -1          \n\t"
+   "2:                                              \n\t"
+    ".set       pop                                 \n\t"
+    : [zeros]"=&r"(zeros)
+    : [a0] "r" (a0)
+  );
+
+  return (int16_t)zeros;
+}
+
+static __inline int32_t WebRtc_MulAccumW16(int16_t a,
+                                           int16_t b,
+                                           int32_t c) {
+  int32_t res = 0, c1 = 0;
+  __asm __volatile(
+#if defined(MIPS32_R2_LE)
+    "seh    %[a],       %[a]            \n\t"
+    "seh    %[b],       %[b]            \n\t"
+#else
+    "sll    %[a],       %[a],   16      \n\t"
+    "sll    %[b],       %[b],   16      \n\t"
+    "sra    %[a],       %[a],   16      \n\t"
+    "sra    %[b],       %[b],   16      \n\t"
+#endif
+    "mul    %[res],     %[a],   %[b]    \n\t"
+    "addu   %[c1],      %[c],   %[res]  \n\t"
+    : [c1] "=r" (c1), [res] "=&r" (res)
+    : [a] "r" (a), [b] "r" (b), [c] "r" (c)
+    : "hi", "lo"
+  );
+  return (c1);
+}
+
+#endif  // WEBRTC_SPL_SPL_INL_MIPS_H_
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/levinson_durbin.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/levinson_durbin.c
new file mode 100644
index 00000000..d46e5513
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/levinson_durbin.c
@@ -0,0 +1,246 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_LevinsonDurbin().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+#define SPL_LEVINSON_MAXORDER 20
+
+int16_t WebRtcSpl_LevinsonDurbin(const int32_t* R, int16_t* A, int16_t* K,
+                                 size_t order)
+{
+    size_t i, j;
+    // Auto-correlation coefficients in high precision
+    int16_t R_hi[SPL_LEVINSON_MAXORDER + 1], R_low[SPL_LEVINSON_MAXORDER + 1];
+    // LPC coefficients in high precision
+    int16_t A_hi[SPL_LEVINSON_MAXORDER + 1], A_low[SPL_LEVINSON_MAXORDER + 1];
+    // LPC coefficients for next iteration
+    int16_t A_upd_hi[SPL_LEVINSON_MAXORDER + 1], A_upd_low[SPL_LEVINSON_MAXORDER + 1];
+    // Reflection coefficient in high precision
+    int16_t K_hi, K_low;
+    // Prediction gain Alpha in high precision and with scale factor
+    int16_t Alpha_hi, Alpha_low, Alpha_exp;
+    int16_t tmp_hi, tmp_low;
+    int32_t temp1W32, temp2W32, temp3W32;
+    int16_t norm;
+
+    // Normalize the autocorrelation R[0]...R[order+1]
+
+    norm = WebRtcSpl_NormW32(R[0]);
+
+    for (i = 0; i <= order; ++i)
+    {
+        temp1W32 = WEBRTC_SPL_LSHIFT_W32(R[i], norm);
+        // Put R in hi and low format
+        R_hi[i] = (int16_t)(temp1W32 >> 16);
+        R_low[i] = (int16_t)((temp1W32 - ((int32_t)R_hi[i] << 16)) >> 1);
+    }
+
+    // K = A[1] = -R[1] / R[0]
+
+    temp2W32 = WEBRTC_SPL_LSHIFT_W32((int32_t)R_hi[1],16)
+            + WEBRTC_SPL_LSHIFT_W32((int32_t)R_low[1],1); // R[1] in Q31
+    temp3W32 = WEBRTC_SPL_ABS_W32(temp2W32); // abs R[1]
+    temp1W32 = WebRtcSpl_DivW32HiLow(temp3W32, R_hi[0], R_low[0]); // abs(R[1])/R[0] in Q31
+    // Put back the sign on R[1]
+    if (temp2W32 > 0)
+    {
+        temp1W32 = -temp1W32;
+    }
+
+    // Put K in hi and low format
+    K_hi = (int16_t)(temp1W32 >> 16);
+    K_low = (int16_t)((temp1W32 - ((int32_t)K_hi << 16)) >> 1);
+
+    // Store first reflection coefficient
+    K[0] = K_hi;
+
+    temp1W32 >>= 4;  // A[1] in Q27.
+
+    // Put A[1] in hi and low format
+    A_hi[1] = (int16_t)(temp1W32 >> 16);
+    A_low[1] = (int16_t)((temp1W32 - ((int32_t)A_hi[1] << 16)) >> 1);
+
+    // Alpha = R[0] * (1-K^2)
+
+    temp1W32 = ((K_hi * K_low >> 14) + K_hi * K_hi) << 1;  // = k^2 in Q31
+
+    temp1W32 = WEBRTC_SPL_ABS_W32(temp1W32); // Guard against <0
+    temp1W32 = (int32_t)0x7fffffffL - temp1W32; // temp1W32 = (1 - K[0]*K[0]) in Q31
+
+    // Store temp1W32 = 1 - K[0]*K[0] on hi and low format
+    tmp_hi = (int16_t)(temp1W32 >> 16);
+    tmp_low = (int16_t)((temp1W32 - ((int32_t)tmp_hi << 16)) >> 1);
+
+    // Calculate Alpha in Q31
+    temp1W32 = (R_hi[0] * tmp_hi + (R_hi[0] * tmp_low >> 15) +
+        (R_low[0] * tmp_hi >> 15)) << 1;
+
+    // Normalize Alpha and put it in hi and low format
+
+    Alpha_exp = WebRtcSpl_NormW32(temp1W32);
+    temp1W32 = WEBRTC_SPL_LSHIFT_W32(temp1W32, Alpha_exp);
+    Alpha_hi = (int16_t)(temp1W32 >> 16);
+    Alpha_low = (int16_t)((temp1W32 - ((int32_t)Alpha_hi << 16)) >> 1);
+
+    // Perform the iterative calculations in the Levinson-Durbin algorithm
+
+    for (i = 2; i <= order; i++)
+    {
+        /*                    ----
+         temp1W32 =  R[i] + > R[j]*A[i-j]
+         /
+         ----
+         j=1..i-1
+         */
+
+        temp1W32 = 0;
+
+        for (j = 1; j < i; j++)
+        {
+          // temp1W32 is in Q31
+          temp1W32 += (R_hi[j] * A_hi[i - j] << 1) +
+              (((R_hi[j] * A_low[i - j] >> 15) +
+              (R_low[j] * A_hi[i - j] >> 15)) << 1);
+        }
+
+        temp1W32 = WEBRTC_SPL_LSHIFT_W32(temp1W32, 4);
+        temp1W32 += (WEBRTC_SPL_LSHIFT_W32((int32_t)R_hi[i], 16)
+                + WEBRTC_SPL_LSHIFT_W32((int32_t)R_low[i], 1));
+
+        // K = -temp1W32 / Alpha
+        temp2W32 = WEBRTC_SPL_ABS_W32(temp1W32); // abs(temp1W32)
+        temp3W32 = WebRtcSpl_DivW32HiLow(temp2W32, Alpha_hi, Alpha_low); // abs(temp1W32)/Alpha
+
+        // Put the sign of temp1W32 back again
+        if (temp1W32 > 0)
+        {
+            temp3W32 = -temp3W32;
+        }
+
+        // Use the Alpha shifts from earlier to de-normalize
+        norm = WebRtcSpl_NormW32(temp3W32);
+        if ((Alpha_exp <= norm) || (temp3W32 == 0))
+        {
+            temp3W32 = WEBRTC_SPL_LSHIFT_W32(temp3W32, Alpha_exp);
+        } else
+        {
+            if (temp3W32 > 0)
+            {
+                temp3W32 = (int32_t)0x7fffffffL;
+            } else
+            {
+                temp3W32 = (int32_t)0x80000000L;
+            }
+        }
+
+        // Put K on hi and low format
+        K_hi = (int16_t)(temp3W32 >> 16);
+        K_low = (int16_t)((temp3W32 - ((int32_t)K_hi << 16)) >> 1);
+
+        // Store Reflection coefficient in Q15
+        K[i - 1] = K_hi;
+
+        // Test for unstable filter.
+        // If unstable return 0 and let the user decide what to do in that case
+
+        if ((int32_t)WEBRTC_SPL_ABS_W16(K_hi) > (int32_t)32750)
+        {
+            return 0; // Unstable filter
+        }
+
+        /*
+         Compute updated LPC coefficient: Anew[i]
+         Anew[j]= A[j] + K*A[i-j]   for j=1..i-1
+         Anew[i]= K
+         */
+
+        for (j = 1; j < i; j++)
+        {
+            // temp1W32 = A[j] in Q27
+            temp1W32 = WEBRTC_SPL_LSHIFT_W32((int32_t)A_hi[j],16)
+                    + WEBRTC_SPL_LSHIFT_W32((int32_t)A_low[j],1);
+
+            // temp1W32 += K*A[i-j] in Q27
+            temp1W32 += (K_hi * A_hi[i - j] + (K_hi * A_low[i - j] >> 15) +
+                (K_low * A_hi[i - j] >> 15)) << 1;
+
+            // Put Anew in hi and low format
+            A_upd_hi[j] = (int16_t)(temp1W32 >> 16);
+            A_upd_low[j] = (int16_t)(
+                (temp1W32 - ((int32_t)A_upd_hi[j] << 16)) >> 1);
+        }
+
+        // temp3W32 = K in Q27 (Convert from Q31 to Q27)
+        temp3W32 >>= 4;
+
+        // Store Anew in hi and low format
+        A_upd_hi[i] = (int16_t)(temp3W32 >> 16);
+        A_upd_low[i] = (int16_t)(
+            (temp3W32 - ((int32_t)A_upd_hi[i] << 16)) >> 1);
+
+        // Alpha = Alpha * (1-K^2)
+
+        temp1W32 = ((K_hi * K_low >> 14) + K_hi * K_hi) << 1;  // K*K in Q31
+
+        temp1W32 = WEBRTC_SPL_ABS_W32(temp1W32); // Guard against <0
+        temp1W32 = (int32_t)0x7fffffffL - temp1W32; // 1 - K*K  in Q31
+
+        // Convert 1- K^2 in hi and low format
+        tmp_hi = (int16_t)(temp1W32 >> 16);
+        tmp_low = (int16_t)((temp1W32 - ((int32_t)tmp_hi << 16)) >> 1);
+
+        // Calculate Alpha = Alpha * (1-K^2) in Q31
+        temp1W32 = (Alpha_hi * tmp_hi + (Alpha_hi * tmp_low >> 15) +
+            (Alpha_low * tmp_hi >> 15)) << 1;
+
+        // Normalize Alpha and store it on hi and low format
+
+        norm = WebRtcSpl_NormW32(temp1W32);
+        temp1W32 = WEBRTC_SPL_LSHIFT_W32(temp1W32, norm);
+
+        Alpha_hi = (int16_t)(temp1W32 >> 16);
+        Alpha_low = (int16_t)((temp1W32 - ((int32_t)Alpha_hi << 16)) >> 1);
+
+        // Update the total normalization of Alpha
+        Alpha_exp = Alpha_exp + norm;
+
+        // Update A[]
+
+        for (j = 1; j <= i; j++)
+        {
+            A_hi[j] = A_upd_hi[j];
+            A_low[j] = A_upd_low[j];
+        }
+    }
+
+    /*
+     Set A[0] to 1.0 and store the A[i] i=1...order in Q12
+     (Convert from Q27 and use rounding)
+     */
+
+    A[0] = 4096;
+
+    for (i = 1; i <= order; i++)
+    {
+        // temp1W32 in Q27
+        temp1W32 = WEBRTC_SPL_LSHIFT_W32((int32_t)A_hi[i], 16)
+                + WEBRTC_SPL_LSHIFT_W32((int32_t)A_low[i], 1);
+        // Round and store upper word
+        A[i] = (int16_t)(((temp1W32 << 1) + 32768) >> 16);
+    }
+    return 1; // Stable filters
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/lpc_to_refl_coef.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/lpc_to_refl_coef.c
new file mode 100644
index 00000000..edcebd4e
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/lpc_to_refl_coef.c
@@ -0,0 +1,56 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_LpcToReflCoef().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+#define SPL_LPC_TO_REFL_COEF_MAX_AR_MODEL_ORDER 50
+
+void WebRtcSpl_LpcToReflCoef(int16_t* a16, int use_order, int16_t* k16)
+{
+    int m, k;
+    int32_t tmp32[SPL_LPC_TO_REFL_COEF_MAX_AR_MODEL_ORDER];
+    int32_t tmp_inv_denom32;
+    int16_t tmp_inv_denom16;
+
+    k16[use_order - 1] = a16[use_order] << 3;  // Q12<<3 => Q15
+    for (m = use_order - 1; m > 0; m--)
+    {
+        // (1 - k^2) in Q30
+        tmp_inv_denom32 = 1073741823 - k16[m] * k16[m];
+        // (1 - k^2) in Q15
+        tmp_inv_denom16 = (int16_t)(tmp_inv_denom32 >> 15);
+
+        for (k = 1; k <= m; k++)
+        {
+            // tmp[k] = (a[k] - RC[m] * a[m-k+1]) / (1.0 - RC[m]*RC[m]);
+
+            // [Q12<<16 - (Q15*Q12)<<1] = [Q28 - Q28] = Q28
+            tmp32[k] = (a16[k] << 16) - (k16[m] * a16[m - k + 1] << 1);
+
+            tmp32[k] = WebRtcSpl_DivW32W16(tmp32[k], tmp_inv_denom16); //Q28/Q15 = Q13
+        }
+
+        for (k = 1; k < m; k++)
+        {
+            a16[k] = (int16_t)(tmp32[k] >> 1);  // Q13>>1 => Q12
+        }
+
+        tmp32[m] = WEBRTC_SPL_SAT(8191, tmp32[m], -8191);
+        k16[m - 1] = (int16_t)WEBRTC_SPL_LSHIFT_W32(tmp32[m], 2); //Q13<<2 => Q15
+    }
+    return;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations.c
new file mode 100644
index 00000000..4a962f86
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations.c
@@ -0,0 +1,224 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * This file contains the implementation of functions
+ * WebRtcSpl_MaxAbsValueW16C()
+ * WebRtcSpl_MaxAbsValueW32C()
+ * WebRtcSpl_MaxValueW16C()
+ * WebRtcSpl_MaxValueW32C()
+ * WebRtcSpl_MinValueW16C()
+ * WebRtcSpl_MinValueW32C()
+ * WebRtcSpl_MaxAbsIndexW16()
+ * WebRtcSpl_MaxIndexW16()
+ * WebRtcSpl_MaxIndexW32()
+ * WebRtcSpl_MinIndexW16()
+ * WebRtcSpl_MinIndexW32()
+ *
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+// TODO(bjorn/kma): Consolidate function pairs (e.g. combine
+//   WebRtcSpl_MaxAbsValueW16C and WebRtcSpl_MaxAbsIndexW16 into a single one.)
+// TODO(kma): Move the next six functions into min_max_operations_c.c.
+
+// Maximum absolute value of word16 vector. C version for generic platforms.
+int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t* vector, size_t length) {
+  size_t i = 0;
+  int absolute = 0, maximum = 0;
+
+  assert(length > 0);
+
+  for (i = 0; i < length; i++) {
+    absolute = abs((int)vector[i]);
+
+    if (absolute > maximum) {
+      maximum = absolute;
+    }
+  }
+
+  // Guard the case for abs(-32768).
+  if (maximum > WEBRTC_SPL_WORD16_MAX) {
+    maximum = WEBRTC_SPL_WORD16_MAX;
+  }
+
+  return (int16_t)maximum;
+}
+
+// Maximum absolute value of word32 vector. C version for generic platforms.
+int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t* vector, size_t length) {
+  // Use uint32_t for the local variables, to accommodate the return value
+  // of abs(0x80000000), which is 0x80000000.
+
+  uint32_t absolute = 0, maximum = 0;
+  size_t i = 0;
+
+  assert(length > 0);
+
+  for (i = 0; i < length; i++) {
+    absolute = abs((int)vector[i]);
+    if (absolute > maximum) {
+      maximum = absolute;
+    }
+  }
+
+  maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX);
+
+  return (int32_t)maximum;
+}
+
+// Maximum value of word16 vector. C version for generic platforms.
+int16_t WebRtcSpl_MaxValueW16C(const int16_t* vector, size_t length) {
+  int16_t maximum = WEBRTC_SPL_WORD16_MIN;
+  size_t i = 0;
+
+  assert(length > 0);
+
+  for (i = 0; i < length; i++) {
+    if (vector[i] > maximum)
+      maximum = vector[i];
+  }
+  return maximum;
+}
+
+// Maximum value of word32 vector. C version for generic platforms.
+int32_t WebRtcSpl_MaxValueW32C(const int32_t* vector, size_t length) {
+  int32_t maximum = WEBRTC_SPL_WORD32_MIN;
+  size_t i = 0;
+
+  assert(length > 0);
+
+  for (i = 0; i < length; i++) {
+    if (vector[i] > maximum)
+      maximum = vector[i];
+  }
+  return maximum;
+}
+
+// Minimum value of word16 vector. C version for generic platforms.
+int16_t WebRtcSpl_MinValueW16C(const int16_t* vector, size_t length) {
+  int16_t minimum = WEBRTC_SPL_WORD16_MAX;
+  size_t i = 0;
+
+  assert(length > 0);
+
+  for (i = 0; i < length; i++) {
+    if (vector[i] < minimum)
+      minimum = vector[i];
+  }
+  return minimum;
+}
+
+// Minimum value of word32 vector. C version for generic platforms.
+int32_t WebRtcSpl_MinValueW32C(const int32_t* vector, size_t length) {
+  int32_t minimum = WEBRTC_SPL_WORD32_MAX;
+  size_t i = 0;
+
+  assert(length > 0);
+
+  for (i = 0; i < length; i++) {
+    if (vector[i] < minimum)
+      minimum = vector[i];
+  }
+  return minimum;
+}
+
+// Index of maximum absolute value in a word16 vector.
+size_t WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, size_t length) {
+  // Use type int for local variables, to accomodate the value of abs(-32768).
+
+  size_t i = 0, index = 0;
+  int absolute = 0, maximum = 0;
+
+  assert(length > 0);
+
+  for (i = 0; i < length; i++) {
+    absolute = abs((int)vector[i]);
+
+    if (absolute > maximum) {
+      maximum = absolute;
+      index = i;
+    }
+  }
+
+  return index;
+}
+
+// Index of maximum value in a word16 vector.
+size_t WebRtcSpl_MaxIndexW16(const int16_t* vector, size_t length) {
+  size_t i = 0, index = 0;
+  int16_t maximum = WEBRTC_SPL_WORD16_MIN;
+
+  assert(length > 0);
+
+  for (i = 0; i < length; i++) {
+    if (vector[i] > maximum) {
+      maximum = vector[i];
+      index = i;
+    }
+  }
+
+  return index;
+}
+
+// Index of maximum value in a word32 vector.
+size_t WebRtcSpl_MaxIndexW32(const int32_t* vector, size_t length) {
+  size_t i = 0, index = 0;
+  int32_t maximum = WEBRTC_SPL_WORD32_MIN;
+
+  assert(length > 0);
+
+  for (i = 0; i < length; i++) {
+    if (vector[i] > maximum) {
+      maximum = vector[i];
+      index = i;
+    }
+  }
+
+  return index;
+}
+
+// Index of minimum value in a word16 vector.
+size_t WebRtcSpl_MinIndexW16(const int16_t* vector, size_t length) {
+  size_t i = 0, index = 0;
+  int16_t minimum = WEBRTC_SPL_WORD16_MAX;
+
+  assert(length > 0);
+
+  for (i = 0; i < length; i++) {
+    if (vector[i] < minimum) {
+      minimum = vector[i];
+      index = i;
+    }
+  }
+
+  return index;
+}
+
+// Index of minimum value in a word32 vector.
+size_t WebRtcSpl_MinIndexW32(const int32_t* vector, size_t length) {
+  size_t i = 0, index = 0;
+  int32_t minimum = WEBRTC_SPL_WORD32_MAX;
+
+  assert(length > 0);
+
+  for (i = 0; i < length; i++) {
+    if (vector[i] < minimum) {
+      minimum = vector[i];
+      index = i;
+    }
+  }
+
+  return index;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations_mips.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations_mips.c
new file mode 100644
index 00000000..28de45b3
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations_mips.c
@@ -0,0 +1,376 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * This file contains the implementation of function
+ * WebRtcSpl_MaxAbsValueW16()
+ *
+ * The description header can be found in signal_processing_library.h.
+ *
+ */
+
+#include <assert.h>
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+// Maximum absolute value of word16 vector.
+int16_t WebRtcSpl_MaxAbsValueW16_mips(const int16_t* vector, size_t length) {
+  int32_t totMax = 0;
+  int32_t tmp32_0, tmp32_1, tmp32_2, tmp32_3;
+  size_t i, loop_size;
+
+  assert(length > 0);
+
+#if defined(MIPS_DSP_R1)
+  const int32_t* tmpvec32 = (int32_t*)vector;
+  loop_size = length >> 4;
+
+  for (i = 0; i < loop_size; i++) {
+    __asm__ volatile (
+      "lw         %[tmp32_0],     0(%[tmpvec32])              \n\t"
+      "lw         %[tmp32_1],     4(%[tmpvec32])              \n\t"
+      "lw         %[tmp32_2],     8(%[tmpvec32])              \n\t"
+      "lw         %[tmp32_3],     12(%[tmpvec32])             \n\t"
+
+      "absq_s.ph  %[tmp32_0],     %[tmp32_0]                  \n\t"
+      "absq_s.ph  %[tmp32_1],     %[tmp32_1]                  \n\t"
+      "cmp.lt.ph  %[totMax],      %[tmp32_0]                  \n\t"
+      "pick.ph    %[totMax],      %[tmp32_0],     %[totMax]   \n\t"
+
+      "lw         %[tmp32_0],     16(%[tmpvec32])             \n\t"
+      "absq_s.ph  %[tmp32_2],     %[tmp32_2]                  \n\t"
+      "cmp.lt.ph  %[totMax],      %[tmp32_1]                  \n\t"
+      "pick.ph    %[totMax],      %[tmp32_1],     %[totMax]   \n\t"
+
+      "lw         %[tmp32_1],     20(%[tmpvec32])             \n\t"
+      "absq_s.ph  %[tmp32_3],     %[tmp32_3]                  \n\t"
+      "cmp.lt.ph  %[totMax],      %[tmp32_2]                  \n\t"
+      "pick.ph    %[totMax],      %[tmp32_2],     %[totMax]   \n\t"
+
+      "lw         %[tmp32_2],     24(%[tmpvec32])             \n\t"
+      "cmp.lt.ph  %[totMax],      %[tmp32_3]                  \n\t"
+      "pick.ph    %[totMax],      %[tmp32_3],     %[totMax]   \n\t"
+
+      "lw         %[tmp32_3],     28(%[tmpvec32])             \n\t"
+      "absq_s.ph  %[tmp32_0],     %[tmp32_0]                  \n\t"
+      "absq_s.ph  %[tmp32_1],     %[tmp32_1]                  \n\t"
+      "cmp.lt.ph  %[totMax],      %[tmp32_0]                  \n\t"
+      "pick.ph    %[totMax],      %[tmp32_0],     %[totMax]   \n\t"
+
+      "absq_s.ph  %[tmp32_2],     %[tmp32_2]                  \n\t"
+      "cmp.lt.ph  %[totMax],      %[tmp32_1]                  \n\t"
+      "pick.ph    %[totMax],      %[tmp32_1],     %[totMax]   \n\t"
+      "absq_s.ph  %[tmp32_3],     %[tmp32_3]                  \n\t"
+      "cmp.lt.ph  %[totMax],      %[tmp32_2]                  \n\t"
+      "pick.ph    %[totMax],      %[tmp32_2],     %[totMax]   \n\t"
+
+      "cmp.lt.ph  %[totMax],      %[tmp32_3]                  \n\t"
+      "pick.ph    %[totMax],      %[tmp32_3],     %[totMax]   \n\t"
+
+      "addiu      %[tmpvec32],    %[tmpvec32],    32          \n\t"
+      : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
+        [tmp32_2] "=&r" (tmp32_2), [tmp32_3] "=&r" (tmp32_3),
+        [totMax] "+r" (totMax), [tmpvec32] "+r" (tmpvec32)
+      :
+      : "memory"
+    );
+  }
+  __asm__ volatile (
+    "rotr       %[tmp32_0],     %[totMax],      16          \n\t"
+    "cmp.lt.ph  %[totMax],      %[tmp32_0]                  \n\t"
+    "pick.ph    %[totMax],      %[tmp32_0],     %[totMax]   \n\t"
+    "packrl.ph  %[totMax],      $0,             %[totMax]   \n\t"
+    : [tmp32_0] "=&r" (tmp32_0), [totMax] "+r" (totMax)
+    :
+  );
+  loop_size = length & 0xf;
+  for (i = 0; i < loop_size; i++) {
+    __asm__ volatile (
+      "lh         %[tmp32_0],     0(%[tmpvec32])              \n\t"
+      "addiu      %[tmpvec32],    %[tmpvec32],     2          \n\t"
+      "absq_s.w   %[tmp32_0],     %[tmp32_0]                  \n\t"
+      "slt        %[tmp32_1],     %[totMax],       %[tmp32_0] \n\t"
+      "movn       %[totMax],      %[tmp32_0],      %[tmp32_1] \n\t"
+      : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
+        [tmpvec32] "+r" (tmpvec32), [totMax] "+r" (totMax)
+      :
+      : "memory"
+    );
+  }
+#else  // #if defined(MIPS_DSP_R1)
+  int32_t v16MaxMax = WEBRTC_SPL_WORD16_MAX;
+  int32_t r, r1, r2, r3;
+  const int16_t* tmpvector = vector;
+  loop_size = length >> 4;
+  for (i = 0; i < loop_size; i++) {
+    __asm__ volatile (
+      "lh     %[tmp32_0],     0(%[tmpvector])                 \n\t"
+      "lh     %[tmp32_1],     2(%[tmpvector])                 \n\t"
+      "lh     %[tmp32_2],     4(%[tmpvector])                 \n\t"
+      "lh     %[tmp32_3],     6(%[tmpvector])                 \n\t"
+
+      "abs    %[tmp32_0],     %[tmp32_0]                      \n\t"
+      "abs    %[tmp32_1],     %[tmp32_1]                      \n\t"
+      "abs    %[tmp32_2],     %[tmp32_2]                      \n\t"
+      "abs    %[tmp32_3],     %[tmp32_3]                      \n\t"
+
+      "slt    %[r],           %[totMax],      %[tmp32_0]      \n\t"
+      "movn   %[totMax],      %[tmp32_0],     %[r]            \n\t"
+      "slt    %[r1],          %[totMax],      %[tmp32_1]      \n\t"
+      "movn   %[totMax],      %[tmp32_1],     %[r1]           \n\t"
+      "slt    %[r2],          %[totMax],      %[tmp32_2]      \n\t"
+      "movn   %[totMax],      %[tmp32_2],     %[r2]           \n\t"
+      "slt    %[r3],          %[totMax],      %[tmp32_3]      \n\t"
+      "movn   %[totMax],      %[tmp32_3],     %[r3]           \n\t"
+
+      "lh     %[tmp32_0],     8(%[tmpvector])                 \n\t"
+      "lh     %[tmp32_1],     10(%[tmpvector])                \n\t"
+      "lh     %[tmp32_2],     12(%[tmpvector])                \n\t"
+      "lh     %[tmp32_3],     14(%[tmpvector])                \n\t"
+
+      "abs    %[tmp32_0],     %[tmp32_0]                      \n\t"
+      "abs    %[tmp32_1],     %[tmp32_1]                      \n\t"
+      "abs    %[tmp32_2],     %[tmp32_2]                      \n\t"
+      "abs    %[tmp32_3],     %[tmp32_3]                      \n\t"
+
+      "slt    %[r],           %[totMax],      %[tmp32_0]      \n\t"
+      "movn   %[totMax],      %[tmp32_0],     %[r]            \n\t"
+      "slt    %[r1],          %[totMax],      %[tmp32_1]      \n\t"
+      "movn   %[totMax],      %[tmp32_1],     %[r1]           \n\t"
+      "slt    %[r2],          %[totMax],      %[tmp32_2]      \n\t"
+      "movn   %[totMax],      %[tmp32_2],     %[r2]           \n\t"
+      "slt    %[r3],          %[totMax],      %[tmp32_3]      \n\t"
+      "movn   %[totMax],      %[tmp32_3],     %[r3]           \n\t"
+
+      "lh     %[tmp32_0],     16(%[tmpvector])                \n\t"
+      "lh     %[tmp32_1],     18(%[tmpvector])                \n\t"
+      "lh     %[tmp32_2],     20(%[tmpvector])                \n\t"
+      "lh     %[tmp32_3],     22(%[tmpvector])                \n\t"
+
+      "abs    %[tmp32_0],     %[tmp32_0]                      \n\t"
+      "abs    %[tmp32_1],     %[tmp32_1]                      \n\t"
+      "abs    %[tmp32_2],     %[tmp32_2]                      \n\t"
+      "abs    %[tmp32_3],     %[tmp32_3]                      \n\t"
+
+      "slt    %[r],           %[totMax],      %[tmp32_0]      \n\t"
+      "movn   %[totMax],      %[tmp32_0],     %[r]            \n\t"
+      "slt    %[r1],          %[totMax],      %[tmp32_1]      \n\t"
+      "movn   %[totMax],      %[tmp32_1],     %[r1]           \n\t"
+      "slt    %[r2],          %[totMax],      %[tmp32_2]      \n\t"
+      "movn   %[totMax],      %[tmp32_2],     %[r2]           \n\t"
+      "slt    %[r3],          %[totMax],      %[tmp32_3]      \n\t"
+      "movn   %[totMax],      %[tmp32_3],     %[r3]           \n\t"
+
+      "lh     %[tmp32_0],     24(%[tmpvector])                \n\t"
+      "lh     %[tmp32_1],     26(%[tmpvector])                \n\t"
+      "lh     %[tmp32_2],     28(%[tmpvector])                \n\t"
+      "lh     %[tmp32_3],     30(%[tmpvector])                \n\t"
+
+      "abs    %[tmp32_0],     %[tmp32_0]                      \n\t"
+      "abs    %[tmp32_1],     %[tmp32_1]                      \n\t"
+      "abs    %[tmp32_2],     %[tmp32_2]                      \n\t"
+      "abs    %[tmp32_3],     %[tmp32_3]                      \n\t"
+
+      "slt    %[r],           %[totMax],      %[tmp32_0]      \n\t"
+      "movn   %[totMax],      %[tmp32_0],     %[r]            \n\t"
+      "slt    %[r1],          %[totMax],      %[tmp32_1]      \n\t"
+      "movn   %[totMax],      %[tmp32_1],     %[r1]           \n\t"
+      "slt    %[r2],          %[totMax],      %[tmp32_2]      \n\t"
+      "movn   %[totMax],      %[tmp32_2],     %[r2]           \n\t"
+      "slt    %[r3],          %[totMax],      %[tmp32_3]      \n\t"
+      "movn   %[totMax],      %[tmp32_3],     %[r3]           \n\t"
+
+      "addiu  %[tmpvector],   %[tmpvector],   32              \n\t"
+      : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
+        [tmp32_2] "=&r" (tmp32_2), [tmp32_3] "=&r" (tmp32_3),
+        [totMax] "+r" (totMax), [r] "=&r" (r), [tmpvector] "+r" (tmpvector),
+        [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3)
+      :
+      : "memory"
+    );
+  }
+  loop_size = length & 0xf;
+  for (i = 0; i < loop_size; i++) {
+    __asm__ volatile (
+      "lh         %[tmp32_0],     0(%[tmpvector])             \n\t"
+      "addiu      %[tmpvector],   %[tmpvector],    2          \n\t"
+      "abs        %[tmp32_0],     %[tmp32_0]                  \n\t"
+      "slt        %[tmp32_1],     %[totMax],       %[tmp32_0] \n\t"
+      "movn       %[totMax],      %[tmp32_0],      %[tmp32_1] \n\t"
+      : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
+        [tmpvector] "+r" (tmpvector), [totMax] "+r" (totMax)
+      :
+      : "memory"
+    );
+  }
+
+  __asm__ volatile (
+    "slt    %[r],       %[v16MaxMax],   %[totMax]   \n\t"
+    "movn   %[totMax],  %[v16MaxMax],   %[r]        \n\t"
+    : [totMax] "+r" (totMax), [r] "=&r" (r)
+    : [v16MaxMax] "r" (v16MaxMax)
+  );
+#endif  // #if defined(MIPS_DSP_R1)
+  return (int16_t)totMax;
+}
+
+#if defined(MIPS_DSP_R1_LE)
+// Maximum absolute value of word32 vector. Version for MIPS platform.
+int32_t WebRtcSpl_MaxAbsValueW32_mips(const int32_t* vector, size_t length) {
+  // Use uint32_t for the local variables, to accommodate the return value
+  // of abs(0x80000000), which is 0x80000000.
+
+  uint32_t absolute = 0, maximum = 0;
+  int tmp1 = 0, max_value = 0x7fffffff;
+
+  assert(length > 0);
+
+  __asm__ volatile (
+    ".set push                                                        \n\t"
+    ".set noreorder                                                   \n\t"
+
+   "1:                                                                \n\t"
+    "lw         %[absolute],      0(%[vector])                        \n\t"
+    "absq_s.w   %[absolute],      %[absolute]                         \n\t"
+    "addiu      %[length],        %[length],          -1              \n\t"
+    "slt        %[tmp1],          %[maximum],         %[absolute]     \n\t"
+    "movn       %[maximum],       %[absolute],        %[tmp1]         \n\t"
+    "bgtz       %[length],        1b                                  \n\t"
+    " addiu     %[vector],        %[vector],          4               \n\t"
+    "slt        %[tmp1],          %[max_value],       %[maximum]      \n\t"
+    "movn       %[maximum],       %[max_value],       %[tmp1]         \n\t"
+
+    ".set pop                                                         \n\t"
+
+    : [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [absolute] "+r" (absolute)
+    : [vector] "r" (vector), [length] "r" (length), [max_value] "r" (max_value)
+    : "memory"
+  );
+
+  return (int32_t)maximum;
+}
+#endif  // #if defined(MIPS_DSP_R1_LE)
+
+// Maximum value of word16 vector. Version for MIPS platform.
+int16_t WebRtcSpl_MaxValueW16_mips(const int16_t* vector, size_t length) {
+  int16_t maximum = WEBRTC_SPL_WORD16_MIN;
+  int tmp1;
+  int16_t value;
+
+  assert(length > 0);
+
+  __asm__ volatile (
+    ".set push                                                        \n\t"
+    ".set noreorder                                                   \n\t"
+
+   "1:                                                                \n\t"
+    "lh         %[value],         0(%[vector])                        \n\t"
+    "addiu      %[length],        %[length],          -1              \n\t"
+    "slt        %[tmp1],          %[maximum],         %[value]        \n\t"
+    "movn       %[maximum],       %[value],           %[tmp1]         \n\t"
+    "bgtz       %[length],        1b                                  \n\t"
+    " addiu     %[vector],        %[vector],          2               \n\t"
+    ".set pop                                                         \n\t"
+
+    : [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [value] "=&r" (value)
+    : [vector] "r" (vector), [length] "r" (length)
+    : "memory"
+  );
+
+  return maximum;
+}
+
+// Maximum value of word32 vector. Version for MIPS platform.
+int32_t WebRtcSpl_MaxValueW32_mips(const int32_t* vector, size_t length) {
+  int32_t maximum = WEBRTC_SPL_WORD32_MIN;
+  int tmp1, value;
+
+  assert(length > 0);
+
+  __asm__ volatile (
+    ".set push                                                        \n\t"
+    ".set noreorder                                                   \n\t"
+
+   "1:                                                                \n\t"
+    "lw         %[value],         0(%[vector])                        \n\t"
+    "addiu      %[length],        %[length],          -1              \n\t"
+    "slt        %[tmp1],          %[maximum],         %[value]        \n\t"
+    "movn       %[maximum],       %[value],           %[tmp1]         \n\t"
+    "bgtz       %[length],        1b                                  \n\t"
+    " addiu     %[vector],        %[vector],          4               \n\t"
+
+    ".set pop                                                         \n\t"
+
+    : [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [value] "=&r" (value)
+    : [vector] "r" (vector), [length] "r" (length)
+    : "memory"
+  );
+
+  return maximum;
+}
+
+// Minimum value of word16 vector. Version for MIPS platform.
+int16_t WebRtcSpl_MinValueW16_mips(const int16_t* vector, size_t length) {
+  int16_t minimum = WEBRTC_SPL_WORD16_MAX;
+  int tmp1;
+  int16_t value;
+
+  assert(length > 0);
+
+  __asm__ volatile (
+    ".set push                                                        \n\t"
+    ".set noreorder                                                   \n\t"
+
+   "1:                                                                \n\t"
+    "lh         %[value],         0(%[vector])                        \n\t"
+    "addiu      %[length],        %[length],          -1              \n\t"
+    "slt        %[tmp1],          %[value],           %[minimum]      \n\t"
+    "movn       %[minimum],       %[value],           %[tmp1]         \n\t"
+    "bgtz       %[length],        1b                                  \n\t"
+    " addiu     %[vector],        %[vector],          2               \n\t"
+
+    ".set pop                                                         \n\t"
+
+    : [tmp1] "=&r" (tmp1), [minimum] "+r" (minimum), [value] "=&r" (value)
+    : [vector] "r" (vector), [length] "r" (length)
+    : "memory"
+  );
+
+  return minimum;
+}
+
+// Minimum value of word32 vector. Version for MIPS platform.
+int32_t WebRtcSpl_MinValueW32_mips(const int32_t* vector, size_t length) {
+  int32_t minimum = WEBRTC_SPL_WORD32_MAX;
+  int tmp1, value;
+
+  assert(length > 0);
+
+  __asm__ volatile (
+    ".set push                                                        \n\t"
+    ".set noreorder                                                   \n\t"
+
+   "1:                                                                \n\t"
+    "lw         %[value],         0(%[vector])                        \n\t"
+    "addiu      %[length],        %[length],          -1              \n\t"
+    "slt        %[tmp1],          %[value],           %[minimum]      \n\t"
+    "movn       %[minimum],       %[value],           %[tmp1]         \n\t"
+    "bgtz       %[length],        1b                                  \n\t"
+    " addiu     %[vector],        %[vector],          4               \n\t"
+
+    ".set pop                                                         \n\t"
+
+    : [tmp1] "=&r" (tmp1), [minimum] "+r" (minimum), [value] "=&r" (value)
+    : [vector] "r" (vector), [length] "r" (length)
+    : "memory"
+  );
+
+  return minimum;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations_neon.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations_neon.c
new file mode 100644
index 00000000..6fbbf94e
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations_neon.c
@@ -0,0 +1,283 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+#include <assert.h>
+#include <stdlib.h>
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+// Maximum absolute value of word16 vector. C version for generic platforms.
+int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, size_t length) {
+  int absolute = 0, maximum = 0;
+
+  assert(length > 0);
+
+  const int16_t* p_start = vector;
+  size_t rest = length & 7;
+  const int16_t* p_end = vector + length - rest;
+
+  int16x8_t v;
+  uint16x8_t max_qv;
+  max_qv = vdupq_n_u16(0);
+
+  while (p_start < p_end) {
+    v = vld1q_s16(p_start);
+    // Note vabs doesn't change the value of -32768.
+    v = vabsq_s16(v);
+    // Use u16 so we don't lose the value -32768.
+    max_qv = vmaxq_u16(max_qv, vreinterpretq_u16_s16(v));
+    p_start += 8;
+  }
+
+#ifdef WEBRTC_ARCH_ARM64
+  maximum = (int)vmaxvq_u16(max_qv);
+#else
+  uint16x4_t max_dv;
+  max_dv = vmax_u16(vget_low_u16(max_qv), vget_high_u16(max_qv));
+  max_dv = vpmax_u16(max_dv, max_dv);
+  max_dv = vpmax_u16(max_dv, max_dv);
+
+  maximum = (int)vget_lane_u16(max_dv, 0);
+#endif
+
+  p_end = vector + length;
+  while (p_start < p_end) {
+    absolute = abs((int)(*p_start));
+
+    if (absolute > maximum) {
+      maximum = absolute;
+    }
+    p_start++;
+  }
+
+  // Guard the case for abs(-32768).
+  if (maximum > WEBRTC_SPL_WORD16_MAX) {
+    maximum = WEBRTC_SPL_WORD16_MAX;
+  }
+
+  return (int16_t)maximum;
+}
+
+// Maximum absolute value of word32 vector. NEON intrinsics version for
+// ARM 32-bit/64-bit platforms.
+int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, size_t length) {
+  // Use uint32_t for the local variables, to accommodate the return value
+  // of abs(0x80000000), which is 0x80000000.
+
+  uint32_t absolute = 0, maximum = 0;
+  size_t i = 0;
+  size_t residual = length & 0x7;
+
+  assert(length > 0);
+
+  const int32_t* p_start = vector;
+  uint32x4_t max32x4_0 = vdupq_n_u32(0);
+  uint32x4_t max32x4_1 = vdupq_n_u32(0);
+
+  // First part, unroll the loop 8 times.
+  for (i = 0; i < length - residual; i += 8) {
+    int32x4_t in32x4_0 = vld1q_s32(p_start);
+    p_start += 4;
+    int32x4_t in32x4_1 = vld1q_s32(p_start);
+    p_start += 4;
+    in32x4_0 = vabsq_s32(in32x4_0);
+    in32x4_1 = vabsq_s32(in32x4_1);
+    // vabs doesn't change the value of 0x80000000.
+    // Use u32 so we don't lose the value 0x80000000.
+    max32x4_0 = vmaxq_u32(max32x4_0, vreinterpretq_u32_s32(in32x4_0));
+    max32x4_1 = vmaxq_u32(max32x4_1, vreinterpretq_u32_s32(in32x4_1));
+  }
+
+  uint32x4_t max32x4 = vmaxq_u32(max32x4_0, max32x4_1);
+#if defined(WEBRTC_ARCH_ARM64)
+  maximum = vmaxvq_u32(max32x4);
+#else
+  uint32x2_t max32x2 = vmax_u32(vget_low_u32(max32x4), vget_high_u32(max32x4));
+  max32x2 = vpmax_u32(max32x2, max32x2);
+
+  maximum = vget_lane_u32(max32x2, 0);
+#endif
+
+  // Second part, do the remaining iterations (if any).
+  for (i = residual; i > 0; i--) {
+    absolute = abs((int)(*p_start));
+    if (absolute > maximum) {
+      maximum = absolute;
+    }
+    p_start++;
+  }
+
+  // Guard against the case for 0x80000000.
+  maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX);
+
+  return (int32_t)maximum;
+}
+
+// Maximum value of word16 vector. NEON intrinsics version for
+// ARM 32-bit/64-bit platforms.
+int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, size_t length) {
+  int16_t maximum = WEBRTC_SPL_WORD16_MIN;
+  size_t i = 0;
+  size_t residual = length & 0x7;
+
+  assert(length > 0);
+
+  const int16_t* p_start = vector;
+  int16x8_t max16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MIN);
+
+  // First part, unroll the loop 8 times.
+  for (i = 0; i < length - residual; i += 8) {
+    int16x8_t in16x8 = vld1q_s16(p_start);
+    max16x8 = vmaxq_s16(max16x8, in16x8);
+    p_start += 8;
+  }
+
+#if defined(WEBRTC_ARCH_ARM64)
+  maximum = vmaxvq_s16(max16x8);
+#else
+  int16x4_t max16x4 = vmax_s16(vget_low_s16(max16x8), vget_high_s16(max16x8));
+  max16x4 = vpmax_s16(max16x4, max16x4);
+  max16x4 = vpmax_s16(max16x4, max16x4);
+
+  maximum = vget_lane_s16(max16x4, 0);
+#endif
+
+  // Second part, do the remaining iterations (if any).
+  for (i = residual; i > 0; i--) {
+    if (*p_start > maximum)
+      maximum = *p_start;
+    p_start++;
+  }
+  return maximum;
+}
+
+// Maximum value of word32 vector. NEON intrinsics version for
+// ARM 32-bit/64-bit platforms.
+int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, size_t length) {
+  int32_t maximum = WEBRTC_SPL_WORD32_MIN;
+  size_t i = 0;
+  size_t residual = length & 0x7;
+
+  assert(length > 0);
+
+  const int32_t* p_start = vector;
+  int32x4_t max32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN);
+  int32x4_t max32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN);
+
+  // First part, unroll the loop 8 times.
+  for (i = 0; i < length - residual; i += 8) {
+    int32x4_t in32x4_0 = vld1q_s32(p_start);
+    p_start += 4;
+    int32x4_t in32x4_1 = vld1q_s32(p_start);
+    p_start += 4;
+    max32x4_0 = vmaxq_s32(max32x4_0, in32x4_0);
+    max32x4_1 = vmaxq_s32(max32x4_1, in32x4_1);
+  }
+
+  int32x4_t max32x4 = vmaxq_s32(max32x4_0, max32x4_1);
+#if defined(WEBRTC_ARCH_ARM64)
+  maximum = vmaxvq_s32(max32x4);
+#else
+  int32x2_t max32x2 = vmax_s32(vget_low_s32(max32x4), vget_high_s32(max32x4));
+  max32x2 = vpmax_s32(max32x2, max32x2);
+
+  maximum = vget_lane_s32(max32x2, 0);
+#endif
+
+  // Second part, do the remaining iterations (if any).
+  for (i = residual; i > 0; i--) {
+    if (*p_start > maximum)
+      maximum = *p_start;
+    p_start++;
+  }
+  return maximum;
+}
+
+// Minimum value of word16 vector. NEON intrinsics version for
+// ARM 32-bit/64-bit platforms.
+int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, size_t length) {
+  int16_t minimum = WEBRTC_SPL_WORD16_MAX;
+  size_t i = 0;
+  size_t residual = length & 0x7;
+
+  assert(length > 0);
+
+  const int16_t* p_start = vector;
+  int16x8_t min16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MAX);
+
+  // First part, unroll the loop 8 times.
+  for (i = 0; i < length - residual; i += 8) {
+    int16x8_t in16x8 = vld1q_s16(p_start);
+    min16x8 = vminq_s16(min16x8, in16x8);
+    p_start += 8;
+  }
+
+#if defined(WEBRTC_ARCH_ARM64)
+  minimum = vminvq_s16(min16x8);
+#else
+  int16x4_t min16x4 = vmin_s16(vget_low_s16(min16x8), vget_high_s16(min16x8));
+  min16x4 = vpmin_s16(min16x4, min16x4);
+  min16x4 = vpmin_s16(min16x4, min16x4);
+
+  minimum = vget_lane_s16(min16x4, 0);
+#endif
+
+  // Second part, do the remaining iterations (if any).
+  for (i = residual; i > 0; i--) {
+    if (*p_start < minimum)
+      minimum = *p_start;
+    p_start++;
+  }
+  return minimum;
+}
+
+// Minimum value of word32 vector. NEON intrinsics version for
+// ARM 32-bit/64-bit platforms.
+int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, size_t length) {
+  int32_t minimum = WEBRTC_SPL_WORD32_MAX;
+  size_t i = 0;
+  size_t residual = length & 0x7;
+
+  assert(length > 0);
+
+  const int32_t* p_start = vector;
+  int32x4_t min32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX);
+  int32x4_t min32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX);
+
+  // First part, unroll the loop 8 times.
+  for (i = 0; i < length - residual; i += 8) {
+    int32x4_t in32x4_0 = vld1q_s32(p_start);
+    p_start += 4;
+    int32x4_t in32x4_1 = vld1q_s32(p_start);
+    p_start += 4;
+    min32x4_0 = vminq_s32(min32x4_0, in32x4_0);
+    min32x4_1 = vminq_s32(min32x4_1, in32x4_1);
+  }
+
+  int32x4_t min32x4 = vminq_s32(min32x4_0, min32x4_1);
+#if defined(WEBRTC_ARCH_ARM64)
+  minimum = vminvq_s32(min32x4);
+#else
+  int32x2_t min32x2 = vmin_s32(vget_low_s32(min32x4), vget_high_s32(min32x4));
+  min32x2 = vpmin_s32(min32x2, min32x2);
+
+  minimum = vget_lane_s32(min32x2, 0);
+#endif
+
+  // Second part, do the remaining iterations (if any).
+  for (i = residual; i > 0; i--) {
+    if (*p_start < minimum)
+      minimum = *p_start;
+    p_start++;
+  }
+  return minimum;
+}
+
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/randomization_functions.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/randomization_functions.c
new file mode 100644
index 00000000..73f24093
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/randomization_functions.c
@@ -0,0 +1,115 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains implementations of the randomization functions
+ * WebRtcSpl_RandU()
+ * WebRtcSpl_RandN()
+ * WebRtcSpl_RandUArray()
+ *
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+static const uint32_t kMaxSeedUsed = 0x80000000;
+
+static const int16_t kRandNTable[] = {
+    9178,    -7260,       40,    10189,     4894,    -3531,   -13779,    14764,
+   -4008,    -8884,    -8990,     1008,     7368,     5184,     3251,    -5817,
+   -9786,     5963,     1770,     8066,    -7135,    10772,    -2298,     1361,
+    6484,     2241,    -8633,      792,      199,    -3344,     6553,   -10079,
+  -15040,       95,    11608,   -12469,    14161,    -4176,     2476,     6403,
+   13685,   -16005,     6646,     2239,    10916,    -3004,     -602,    -3141,
+    2142,    14144,    -5829,     5305,     8209,     4713,     2697,    -5112,
+   16092,    -1210,    -2891,    -6631,    -5360,   -11878,    -6781,    -2739,
+   -6392,      536,    10923,    10872,     5059,    -4748,    -7770,     5477,
+      38,    -1025,    -2892,     1638,     6304,    14375,   -11028,     1553,
+   -1565,    10762,     -393,     4040,     5257,    12310,     6554,    -4799,
+    4899,    -6354,     1603,    -1048,    -2220,     8247,     -186,    -8944,
+  -12004,     2332,     4801,    -4933,     6371,      131,     8614,    -5927,
+   -8287,   -22760,     4033,   -15162,     3385,     3246,     3153,    -5250,
+    3766,      784,     6494,      -62,     3531,    -1582,    15572,      662,
+   -3952,     -330,    -3196,      669,     7236,    -2678,    -6569,    23319,
+   -8645,     -741,    14830,   -15976,     4903,      315,   -11342,    10311,
+    1858,    -7777,     2145,     5436,     5677,     -113,   -10033,      826,
+   -1353,    17210,     7768,      986,    -1471,     8291,    -4982,     8207,
+  -14911,    -6255,    -2449,   -11881,    -7059,   -11703,    -4338,     8025,
+    7538,    -2823,   -12490,     9470,    -1613,    -2529,   -10092,    -7807,
+    9480,     6970,   -12844,     5123,     3532,     4816,     4803,    -8455,
+   -5045,    14032,    -4378,    -1643,     5756,   -11041,    -2732,   -16618,
+   -6430,   -18375,    -3320,     6098,     5131,    -4269,    -8840,     2482,
+   -7048,     1547,   -21890,    -6505,    -7414,     -424,   -11722,     7955,
+    1653,   -17299,     1823,      473,    -9232,     3337,     1111,      873,
+    4018,    -8982,     9889,     3531,   -11763,    -3799,     7373,    -4539,
+    3231,     7054,    -8537,     7616,     6244,    16635,      447,    -2915,
+   13967,      705,    -2669,    -1520,    -1771,   -16188,     5956,     5117,
+    6371,    -9936,    -1448,     2480,     5128,     7550,    -8130,     5236,
+    8213,    -6443,     7707,    -1950,   -13811,     7218,     7031,    -3883,
+      67,     5731,    -2874,    13480,    -3743,     9298,    -3280,     3552,
+   -4425,      -18,    -3785,    -9988,    -5357,     5477,   -11794,     2117,
+    1416,    -9935,     3376,      802,    -5079,    -8243,    12652,       66,
+    3653,    -2368,     6781,   -21895,    -7227,     2487,     7839,     -385,
+    6646,    -7016,    -4658,     5531,    -1705,      834,      129,     3694,
+   -1343,     2238,   -22640,    -6417,   -11139,    11301,    -2945,    -3494,
+   -5626,      185,    -3615,    -2041,    -7972,    -3106,      -60,   -23497,
+   -1566,    17064,     3519,     2518,      304,    -6805,   -10269,     2105,
+    1936,     -426,     -736,    -8122,    -1467,     4238,    -6939,   -13309,
+     360,     7402,    -7970,    12576,     3287,    12194,    -6289,   -16006,
+    9171,     4042,    -9193,     9123,    -2512,     6388,    -4734,    -8739,
+    1028,    -5406,    -1696,     5889,     -666,    -4736,     4971,     3565,
+    9362,    -6292,     3876,    -3652,   -19666,     7523,    -4061,      391,
+  -11773,     7502,    -3763,     4929,    -9478,    13278,     2805,     4496,
+    7814,    16419,    12455,   -14773,     2127,    -2746,     3763,     4847,
+    3698,     6978,     4751,    -6957,    -3581,      -45,     6252,     1513,
+   -4797,    -7925,    11270,    16188,    -2359,    -5269,     9376,   -10777,
+    7262,    20031,    -6515,    -2208,    -5353,     8085,    -1341,    -1303,
+    7333,     5576,     3625,     5763,    -7931,     9833,    -3371,   -10305,
+    6534,   -13539,    -9971,      997,     8464,    -4064,    -1495,     1857,
+   13624,     5458,     9490,   -11086,    -4524,    12022,     -550,     -198,
+     408,    -8455,    -7068,    10289,     9712,    -3366,     9028,    -7621,
+   -5243,     2362,     6909,     4672,    -4933,    -1799,     4709,    -4563,
+     -62,     -566,     1624,    -7010,    14730,   -17791,    -3697,    -2344,
+   -1741,     7099,    -9509,    -6855,    -1989,     3495,    -2289,     2031,
+   12784,      891,    14189,    -3963,    -5683,      421,   -12575,     1724,
+  -12682,    -5970,    -8169,     3143,    -1824,    -5488,    -5130,     8536,
+   12799,      794,     5738,     3459,   -11689,     -258,    -3738,    -3775,
+   -8742,     2333,     8312,    -9383,    10331,    13119,     8398,    10644,
+  -19433,    -6446,   -16277,   -11793,    16284,     9345,    15222,    15834,
+    2009,    -7349,      130,   -14547,      338,    -5998,     3337,    21492,
+    2406,     7703,     -951,    11196,     -564,     3406,     2217,     4806,
+    2374,    -5797,    11839,     8940,   -11874,    18213,     2855,    10492
+};
+
+static uint32_t IncreaseSeed(uint32_t* seed) {
+  seed[0] = (seed[0] * ((int32_t)69069) + 1) & (kMaxSeedUsed - 1);
+  return seed[0];
+}
+
+int16_t WebRtcSpl_RandU(uint32_t* seed) {
+  return (int16_t)(IncreaseSeed(seed) >> 16);
+}
+
+int16_t WebRtcSpl_RandN(uint32_t* seed) {
+  return kRandNTable[IncreaseSeed(seed) >> 23];
+}
+
+// Creates an array of uniformly distributed variables.
+int16_t WebRtcSpl_RandUArray(int16_t* vector,
+                             int16_t vector_length,
+                             uint32_t* seed) {
+  int i;
+  for (i = 0; i < vector_length; i++) {
+    vector[i] = WebRtcSpl_RandU(seed);
+  }
+  return vector_length;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/real_fft.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/real_fft.c
new file mode 100644
index 00000000..92daae4d
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/real_fft.c
@@ -0,0 +1,102 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/real_fft.h"
+
+#include <stdlib.h>
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+struct RealFFT {
+  int order;
+};
+
+struct RealFFT* WebRtcSpl_CreateRealFFT(int order) {
+  struct RealFFT* self = NULL;
+
+  if (order > kMaxFFTOrder || order < 0) {
+    return NULL;
+  }
+
+  self = malloc(sizeof(struct RealFFT));
+  if (self == NULL) {
+    return NULL;
+  }
+  self->order = order;
+
+  return self;
+}
+
+void WebRtcSpl_FreeRealFFT(struct RealFFT* self) {
+  if (self != NULL) {
+    free(self);
+  }
+}
+
+// The C version FFT functions (i.e. WebRtcSpl_RealForwardFFT and
+// WebRtcSpl_RealInverseFFT) are real-valued FFT wrappers for complex-valued
+// FFT implementation in SPL.
+
+int WebRtcSpl_RealForwardFFT(struct RealFFT* self,
+                             const int16_t* real_data_in,
+                             int16_t* complex_data_out) {
+  int i = 0;
+  int j = 0;
+  int result = 0;
+  int n = 1 << self->order;
+  // The complex-value FFT implementation needs a buffer to hold 2^order
+  // 16-bit COMPLEX numbers, for both time and frequency data.
+  int16_t complex_buffer[2 << kMaxFFTOrder];
+
+  // Insert zeros to the imaginary parts for complex forward FFT input.
+  for (i = 0, j = 0; i < n; i += 1, j += 2) {
+    complex_buffer[j] = real_data_in[i];
+    complex_buffer[j + 1] = 0;
+  };
+
+  WebRtcSpl_ComplexBitReverse(complex_buffer, self->order);
+  result = WebRtcSpl_ComplexFFT(complex_buffer, self->order, 1);
+
+  // For real FFT output, use only the first N + 2 elements from
+  // complex forward FFT.
+  memcpy(complex_data_out, complex_buffer, sizeof(int16_t) * (n + 2));
+
+  return result;
+}
+
+int WebRtcSpl_RealInverseFFT(struct RealFFT* self,
+                             const int16_t* complex_data_in,
+                             int16_t* real_data_out) {
+  int i = 0;
+  int j = 0;
+  int result = 0;
+  int n = 1 << self->order;
+  // Create the buffer specific to complex-valued FFT implementation.
+  int16_t complex_buffer[2 << kMaxFFTOrder];
+
+  // For n-point FFT, first copy the first n + 2 elements into complex
+  // FFT, then construct the remaining n - 2 elements by real FFT's
+  // conjugate-symmetric properties.
+  memcpy(complex_buffer, complex_data_in, sizeof(int16_t) * (n + 2));
+  for (i = n + 2; i < 2 * n; i += 2) {
+    complex_buffer[i] = complex_data_in[2 * n - i];
+    complex_buffer[i + 1] = -complex_data_in[2 * n - i + 1];
+  }
+
+  WebRtcSpl_ComplexBitReverse(complex_buffer, self->order);
+  result = WebRtcSpl_ComplexIFFT(complex_buffer, self->order, 1);
+
+  // Strip out the imaginary parts of the complex inverse FFT output.
+  for (i = 0, j = 0; i < n; i += 1, j += 2) {
+    real_data_out[i] = complex_buffer[j];
+  }
+
+  return result;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/real_fft_unittest.cc b/third_party/webrtc/src/webrtc/common_audio/signal_processing/real_fft_unittest.cc
new file mode 100644
index 00000000..9bd35cd6
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/real_fft_unittest.cc
@@ -0,0 +1,108 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/real_fft.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/test/testsupport/gtest_disable.h"
+#include "webrtc/typedefs.h"
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace webrtc {
+namespace {
+
+// FFT order.
+const int kOrder = 5;
+// Lengths for real FFT's time and frequency bufffers.
+// For N-point FFT, the length requirements from API are N and N+2 respectively.
+const int kTimeDataLength = 1 << kOrder;
+const int kFreqDataLength = (1 << kOrder) + 2;
+// For complex FFT's time and freq buffer. The implementation requires
+// 2*N 16-bit words.
+const int kComplexFftDataLength = 2 << kOrder;
+// Reference data for time signal.
+const int16_t kRefData[kTimeDataLength] = {
+  11739, 6848, -8688, 31980, -30295, 25242, 27085, 19410,
+  -26299, 15607, -10791, 11778, -23819, 14498, -25772, 10076,
+  1173, 6848, -8688, 31980, -30295, 2522, 27085, 19410,
+  -2629, 5607, -3, 1178, -23819, 1498, -25772, 10076
+};
+
+class RealFFTTest : public ::testing::Test {
+ protected:
+   RealFFTTest() {
+     WebRtcSpl_Init();
+   }
+};
+
+TEST_F(RealFFTTest, CreateFailsOnBadInput) {
+  RealFFT* fft = WebRtcSpl_CreateRealFFT(11);
+  EXPECT_TRUE(fft == NULL);
+  fft = WebRtcSpl_CreateRealFFT(-1);
+  EXPECT_TRUE(fft == NULL);
+}
+
+TEST_F(RealFFTTest, RealAndComplexMatch) {
+  int i = 0;
+  int j = 0;
+  int16_t real_fft_time[kTimeDataLength] = {0};
+  int16_t real_fft_freq[kFreqDataLength] = {0};
+  // One common buffer for complex FFT's time and frequency data.
+  int16_t complex_fft_buff[kComplexFftDataLength] = {0};
+
+  // Prepare the inputs to forward FFT's.
+  memcpy(real_fft_time, kRefData, sizeof(kRefData));
+  for (i = 0, j = 0; i < kTimeDataLength; i += 1, j += 2) {
+    complex_fft_buff[j] = kRefData[i];
+    complex_fft_buff[j + 1] = 0;  // Insert zero's to imaginary parts.
+  };
+
+  // Create and run real forward FFT.
+  RealFFT* fft = WebRtcSpl_CreateRealFFT(kOrder);
+  EXPECT_TRUE(fft != NULL);
+  EXPECT_EQ(0, WebRtcSpl_RealForwardFFT(fft, real_fft_time, real_fft_freq));
+
+  // Run complex forward FFT.
+  WebRtcSpl_ComplexBitReverse(complex_fft_buff, kOrder);
+  EXPECT_EQ(0, WebRtcSpl_ComplexFFT(complex_fft_buff, kOrder, 1));
+
+  // Verify the results between complex and real forward FFT.
+  for (i = 0; i < kFreqDataLength; i++) {
+    EXPECT_EQ(real_fft_freq[i], complex_fft_buff[i]);
+  }
+
+  // Prepare the inputs to inverse real FFT.
+  // We use whatever data in complex_fft_buff[] since we don't care
+  // about data contents. Only kFreqDataLength 16-bit words are copied
+  // from complex_fft_buff to real_fft_freq since remaining words (2nd half)
+  // are conjugate-symmetric to the first half in theory.
+  memcpy(real_fft_freq, complex_fft_buff, sizeof(real_fft_freq));
+
+  // Run real inverse FFT.
+  int real_scale = WebRtcSpl_RealInverseFFT(fft, real_fft_freq, real_fft_time);
+  EXPECT_GE(real_scale, 0);
+
+  // Run complex inverse FFT.
+  WebRtcSpl_ComplexBitReverse(complex_fft_buff, kOrder);
+  int complex_scale = WebRtcSpl_ComplexIFFT(complex_fft_buff, kOrder, 1);
+
+  // Verify the results between complex and real inverse FFT.
+  // They are not bit-exact, since complex IFFT doesn't produce
+  // exactly conjugate-symmetric data (between first and second half).
+  EXPECT_EQ(real_scale, complex_scale);
+  for (i = 0, j = 0; i < kTimeDataLength; i += 1, j += 2) {
+    EXPECT_LE(abs(real_fft_time[i] - complex_fft_buff[j]), 1);
+  }
+
+  WebRtcSpl_FreeRealFFT(fft);
+}
+
+}  // namespace
+}  // namespace webrtc
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/refl_coef_to_lpc.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/refl_coef_to_lpc.c
new file mode 100644
index 00000000..06a29b66
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/refl_coef_to_lpc.c
@@ -0,0 +1,59 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_ReflCoefToLpc().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+void WebRtcSpl_ReflCoefToLpc(const int16_t *k, int use_order, int16_t *a)
+{
+    int16_t any[WEBRTC_SPL_MAX_LPC_ORDER + 1];
+    int16_t *aptr, *aptr2, *anyptr;
+    const int16_t *kptr;
+    int m, i;
+
+    kptr = k;
+    *a = 4096; // i.e., (Word16_MAX >> 3)+1.
+    *any = *a;
+    a[1] = *k >> 3;
+
+    for (m = 1; m < use_order; m++)
+    {
+        kptr++;
+        aptr = a;
+        aptr++;
+        aptr2 = &a[m];
+        anyptr = any;
+        anyptr++;
+
+        any[m + 1] = *kptr >> 3;
+        for (i = 0; i < m; i++)
+        {
+            *anyptr = *aptr + (int16_t)((*aptr2 * *kptr) >> 15);
+            anyptr++;
+            aptr++;
+            aptr2--;
+        }
+
+        aptr = a;
+        anyptr = any;
+        for (i = 0; i < (m + 2); i++)
+        {
+            *aptr = *anyptr;
+            aptr++;
+            anyptr++;
+        }
+    }
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample.c
new file mode 100644
index 00000000..45fe52aa
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample.c
@@ -0,0 +1,505 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the resampling functions for 22 kHz.
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/common_audio/signal_processing/resample_by_2_internal.h"
+
+// Declaration of internally used functions
+static void WebRtcSpl_32khzTo22khzIntToShort(const int32_t *In, int16_t *Out,
+                                             int32_t K);
+
+void WebRtcSpl_32khzTo22khzIntToInt(const int32_t *In, int32_t *Out,
+                                    int32_t K);
+
+// interpolation coefficients
+static const int16_t kCoefficients32To22[5][9] = {
+        {127, -712,  2359, -6333, 23456, 16775, -3695,  945, -154},
+        {-39,  230,  -830,  2785, 32366, -2324,   760, -218,   38},
+        {117, -663,  2222, -6133, 26634, 13070, -3174,  831, -137},
+        {-77,  457, -1677,  5958, 31175, -4136,  1405, -408,   71},
+        { 98, -560,  1900, -5406, 29240,  9423, -2480,  663, -110}
+};
+
+//////////////////////
+// 22 kHz -> 16 kHz //
+//////////////////////
+
+// number of subblocks; options: 1, 2, 4, 5, 10
+#define SUB_BLOCKS_22_16    5
+
+// 22 -> 16 resampler
+void WebRtcSpl_Resample22khzTo16khz(const int16_t* in, int16_t* out,
+                                    WebRtcSpl_State22khzTo16khz* state, int32_t* tmpmem)
+{
+    int k;
+
+    // process two blocks of 10/SUB_BLOCKS_22_16 ms (to reduce temp buffer size)
+    for (k = 0; k < SUB_BLOCKS_22_16; k++)
+    {
+        ///// 22 --> 44 /////
+        // int16_t  in[220/SUB_BLOCKS_22_16]
+        // int32_t out[440/SUB_BLOCKS_22_16]
+        /////
+        WebRtcSpl_UpBy2ShortToInt(in, 220 / SUB_BLOCKS_22_16, tmpmem + 16, state->S_22_44);
+
+        ///// 44 --> 32 /////
+        // int32_t  in[440/SUB_BLOCKS_22_16]
+        // int32_t out[320/SUB_BLOCKS_22_16]
+        /////
+        // copy state to and from input array
+        tmpmem[8] = state->S_44_32[0];
+        tmpmem[9] = state->S_44_32[1];
+        tmpmem[10] = state->S_44_32[2];
+        tmpmem[11] = state->S_44_32[3];
+        tmpmem[12] = state->S_44_32[4];
+        tmpmem[13] = state->S_44_32[5];
+        tmpmem[14] = state->S_44_32[6];
+        tmpmem[15] = state->S_44_32[7];
+        state->S_44_32[0] = tmpmem[440 / SUB_BLOCKS_22_16 + 8];
+        state->S_44_32[1] = tmpmem[440 / SUB_BLOCKS_22_16 + 9];
+        state->S_44_32[2] = tmpmem[440 / SUB_BLOCKS_22_16 + 10];
+        state->S_44_32[3] = tmpmem[440 / SUB_BLOCKS_22_16 + 11];
+        state->S_44_32[4] = tmpmem[440 / SUB_BLOCKS_22_16 + 12];
+        state->S_44_32[5] = tmpmem[440 / SUB_BLOCKS_22_16 + 13];
+        state->S_44_32[6] = tmpmem[440 / SUB_BLOCKS_22_16 + 14];
+        state->S_44_32[7] = tmpmem[440 / SUB_BLOCKS_22_16 + 15];
+
+        WebRtcSpl_Resample44khzTo32khz(tmpmem + 8, tmpmem, 40 / SUB_BLOCKS_22_16);
+
+        ///// 32 --> 16 /////
+        // int32_t  in[320/SUB_BLOCKS_22_16]
+        // int32_t out[160/SUB_BLOCKS_22_16]
+        /////
+        WebRtcSpl_DownBy2IntToShort(tmpmem, 320 / SUB_BLOCKS_22_16, out, state->S_32_16);
+
+        // move input/output pointers 10/SUB_BLOCKS_22_16 ms seconds ahead
+        in += 220 / SUB_BLOCKS_22_16;
+        out += 160 / SUB_BLOCKS_22_16;
+    }
+}
+
+// initialize state of 22 -> 16 resampler
+void WebRtcSpl_ResetResample22khzTo16khz(WebRtcSpl_State22khzTo16khz* state)
+{
+    int k;
+    for (k = 0; k < 8; k++)
+    {
+        state->S_22_44[k] = 0;
+        state->S_44_32[k] = 0;
+        state->S_32_16[k] = 0;
+    }
+}
+
+//////////////////////
+// 16 kHz -> 22 kHz //
+//////////////////////
+
+// number of subblocks; options: 1, 2, 4, 5, 10
+#define SUB_BLOCKS_16_22    4
+
+// 16 -> 22 resampler
+void WebRtcSpl_Resample16khzTo22khz(const int16_t* in, int16_t* out,
+                                    WebRtcSpl_State16khzTo22khz* state, int32_t* tmpmem)
+{
+    int k;
+
+    // process two blocks of 10/SUB_BLOCKS_16_22 ms (to reduce temp buffer size)
+    for (k = 0; k < SUB_BLOCKS_16_22; k++)
+    {
+        ///// 16 --> 32 /////
+        // int16_t  in[160/SUB_BLOCKS_16_22]
+        // int32_t out[320/SUB_BLOCKS_16_22]
+        /////
+        WebRtcSpl_UpBy2ShortToInt(in, 160 / SUB_BLOCKS_16_22, tmpmem + 8, state->S_16_32);
+
+        ///// 32 --> 22 /////
+        // int32_t  in[320/SUB_BLOCKS_16_22]
+        // int32_t out[220/SUB_BLOCKS_16_22]
+        /////
+        // copy state to and from input array
+        tmpmem[0] = state->S_32_22[0];
+        tmpmem[1] = state->S_32_22[1];
+        tmpmem[2] = state->S_32_22[2];
+        tmpmem[3] = state->S_32_22[3];
+        tmpmem[4] = state->S_32_22[4];
+        tmpmem[5] = state->S_32_22[5];
+        tmpmem[6] = state->S_32_22[6];
+        tmpmem[7] = state->S_32_22[7];
+        state->S_32_22[0] = tmpmem[320 / SUB_BLOCKS_16_22];
+        state->S_32_22[1] = tmpmem[320 / SUB_BLOCKS_16_22 + 1];
+        state->S_32_22[2] = tmpmem[320 / SUB_BLOCKS_16_22 + 2];
+        state->S_32_22[3] = tmpmem[320 / SUB_BLOCKS_16_22 + 3];
+        state->S_32_22[4] = tmpmem[320 / SUB_BLOCKS_16_22 + 4];
+        state->S_32_22[5] = tmpmem[320 / SUB_BLOCKS_16_22 + 5];
+        state->S_32_22[6] = tmpmem[320 / SUB_BLOCKS_16_22 + 6];
+        state->S_32_22[7] = tmpmem[320 / SUB_BLOCKS_16_22 + 7];
+
+        WebRtcSpl_32khzTo22khzIntToShort(tmpmem, out, 20 / SUB_BLOCKS_16_22);
+
+        // move input/output pointers 10/SUB_BLOCKS_16_22 ms seconds ahead
+        in += 160 / SUB_BLOCKS_16_22;
+        out += 220 / SUB_BLOCKS_16_22;
+    }
+}
+
+// initialize state of 16 -> 22 resampler
+void WebRtcSpl_ResetResample16khzTo22khz(WebRtcSpl_State16khzTo22khz* state)
+{
+    int k;
+    for (k = 0; k < 8; k++)
+    {
+        state->S_16_32[k] = 0;
+        state->S_32_22[k] = 0;
+    }
+}
+
+//////////////////////
+// 22 kHz ->  8 kHz //
+//////////////////////
+
+// number of subblocks; options: 1, 2, 5, 10
+#define SUB_BLOCKS_22_8     2
+
+// 22 -> 8 resampler
+void WebRtcSpl_Resample22khzTo8khz(const int16_t* in, int16_t* out,
+                                   WebRtcSpl_State22khzTo8khz* state, int32_t* tmpmem)
+{
+    int k;
+
+    // process two blocks of 10/SUB_BLOCKS_22_8 ms (to reduce temp buffer size)
+    for (k = 0; k < SUB_BLOCKS_22_8; k++)
+    {
+        ///// 22 --> 22 lowpass /////
+        // int16_t  in[220/SUB_BLOCKS_22_8]
+        // int32_t out[220/SUB_BLOCKS_22_8]
+        /////
+        WebRtcSpl_LPBy2ShortToInt(in, 220 / SUB_BLOCKS_22_8, tmpmem + 16, state->S_22_22);
+
+        ///// 22 --> 16 /////
+        // int32_t  in[220/SUB_BLOCKS_22_8]
+        // int32_t out[160/SUB_BLOCKS_22_8]
+        /////
+        // copy state to and from input array
+        tmpmem[8] = state->S_22_16[0];
+        tmpmem[9] = state->S_22_16[1];
+        tmpmem[10] = state->S_22_16[2];
+        tmpmem[11] = state->S_22_16[3];
+        tmpmem[12] = state->S_22_16[4];
+        tmpmem[13] = state->S_22_16[5];
+        tmpmem[14] = state->S_22_16[6];
+        tmpmem[15] = state->S_22_16[7];
+        state->S_22_16[0] = tmpmem[220 / SUB_BLOCKS_22_8 + 8];
+        state->S_22_16[1] = tmpmem[220 / SUB_BLOCKS_22_8 + 9];
+        state->S_22_16[2] = tmpmem[220 / SUB_BLOCKS_22_8 + 10];
+        state->S_22_16[3] = tmpmem[220 / SUB_BLOCKS_22_8 + 11];
+        state->S_22_16[4] = tmpmem[220 / SUB_BLOCKS_22_8 + 12];
+        state->S_22_16[5] = tmpmem[220 / SUB_BLOCKS_22_8 + 13];
+        state->S_22_16[6] = tmpmem[220 / SUB_BLOCKS_22_8 + 14];
+        state->S_22_16[7] = tmpmem[220 / SUB_BLOCKS_22_8 + 15];
+
+        WebRtcSpl_Resample44khzTo32khz(tmpmem + 8, tmpmem, 20 / SUB_BLOCKS_22_8);
+
+        ///// 16 --> 8 /////
+        // int32_t in[160/SUB_BLOCKS_22_8]
+        // int32_t out[80/SUB_BLOCKS_22_8]
+        /////
+        WebRtcSpl_DownBy2IntToShort(tmpmem, 160 / SUB_BLOCKS_22_8, out, state->S_16_8);
+
+        // move input/output pointers 10/SUB_BLOCKS_22_8 ms seconds ahead
+        in += 220 / SUB_BLOCKS_22_8;
+        out += 80 / SUB_BLOCKS_22_8;
+    }
+}
+
+// initialize state of 22 -> 8 resampler
+void WebRtcSpl_ResetResample22khzTo8khz(WebRtcSpl_State22khzTo8khz* state)
+{
+    int k;
+    for (k = 0; k < 8; k++)
+    {
+        state->S_22_22[k] = 0;
+        state->S_22_22[k + 8] = 0;
+        state->S_22_16[k] = 0;
+        state->S_16_8[k] = 0;
+    }
+}
+
+//////////////////////
+//  8 kHz -> 22 kHz //
+//////////////////////
+
+// number of subblocks; options: 1, 2, 5, 10
+#define SUB_BLOCKS_8_22     2
+
+// 8 -> 22 resampler
+void WebRtcSpl_Resample8khzTo22khz(const int16_t* in, int16_t* out,
+                                   WebRtcSpl_State8khzTo22khz* state, int32_t* tmpmem)
+{
+    int k;
+
+    // process two blocks of 10/SUB_BLOCKS_8_22 ms (to reduce temp buffer size)
+    for (k = 0; k < SUB_BLOCKS_8_22; k++)
+    {
+        ///// 8 --> 16 /////
+        // int16_t  in[80/SUB_BLOCKS_8_22]
+        // int32_t out[160/SUB_BLOCKS_8_22]
+        /////
+        WebRtcSpl_UpBy2ShortToInt(in, 80 / SUB_BLOCKS_8_22, tmpmem + 18, state->S_8_16);
+
+        ///// 16 --> 11 /////
+        // int32_t  in[160/SUB_BLOCKS_8_22]
+        // int32_t out[110/SUB_BLOCKS_8_22]
+        /////
+        // copy state to and from input array
+        tmpmem[10] = state->S_16_11[0];
+        tmpmem[11] = state->S_16_11[1];
+        tmpmem[12] = state->S_16_11[2];
+        tmpmem[13] = state->S_16_11[3];
+        tmpmem[14] = state->S_16_11[4];
+        tmpmem[15] = state->S_16_11[5];
+        tmpmem[16] = state->S_16_11[6];
+        tmpmem[17] = state->S_16_11[7];
+        state->S_16_11[0] = tmpmem[160 / SUB_BLOCKS_8_22 + 10];
+        state->S_16_11[1] = tmpmem[160 / SUB_BLOCKS_8_22 + 11];
+        state->S_16_11[2] = tmpmem[160 / SUB_BLOCKS_8_22 + 12];
+        state->S_16_11[3] = tmpmem[160 / SUB_BLOCKS_8_22 + 13];
+        state->S_16_11[4] = tmpmem[160 / SUB_BLOCKS_8_22 + 14];
+        state->S_16_11[5] = tmpmem[160 / SUB_BLOCKS_8_22 + 15];
+        state->S_16_11[6] = tmpmem[160 / SUB_BLOCKS_8_22 + 16];
+        state->S_16_11[7] = tmpmem[160 / SUB_BLOCKS_8_22 + 17];
+
+        WebRtcSpl_32khzTo22khzIntToInt(tmpmem + 10, tmpmem, 10 / SUB_BLOCKS_8_22);
+
+        ///// 11 --> 22 /////
+        // int32_t  in[110/SUB_BLOCKS_8_22]
+        // int16_t out[220/SUB_BLOCKS_8_22]
+        /////
+        WebRtcSpl_UpBy2IntToShort(tmpmem, 110 / SUB_BLOCKS_8_22, out, state->S_11_22);
+
+        // move input/output pointers 10/SUB_BLOCKS_8_22 ms seconds ahead
+        in += 80 / SUB_BLOCKS_8_22;
+        out += 220 / SUB_BLOCKS_8_22;
+    }
+}
+
+// initialize state of 8 -> 22 resampler
+void WebRtcSpl_ResetResample8khzTo22khz(WebRtcSpl_State8khzTo22khz* state)
+{
+    int k;
+    for (k = 0; k < 8; k++)
+    {
+        state->S_8_16[k] = 0;
+        state->S_16_11[k] = 0;
+        state->S_11_22[k] = 0;
+    }
+}
+
+// compute two inner-products and store them to output array
+static void WebRtcSpl_DotProdIntToInt(const int32_t* in1, const int32_t* in2,
+                                      const int16_t* coef_ptr, int32_t* out1,
+                                      int32_t* out2)
+{
+    int32_t tmp1 = 16384;
+    int32_t tmp2 = 16384;
+    int16_t coef;
+
+    coef = coef_ptr[0];
+    tmp1 += coef * in1[0];
+    tmp2 += coef * in2[-0];
+
+    coef = coef_ptr[1];
+    tmp1 += coef * in1[1];
+    tmp2 += coef * in2[-1];
+
+    coef = coef_ptr[2];
+    tmp1 += coef * in1[2];
+    tmp2 += coef * in2[-2];
+
+    coef = coef_ptr[3];
+    tmp1 += coef * in1[3];
+    tmp2 += coef * in2[-3];
+
+    coef = coef_ptr[4];
+    tmp1 += coef * in1[4];
+    tmp2 += coef * in2[-4];
+
+    coef = coef_ptr[5];
+    tmp1 += coef * in1[5];
+    tmp2 += coef * in2[-5];
+
+    coef = coef_ptr[6];
+    tmp1 += coef * in1[6];
+    tmp2 += coef * in2[-6];
+
+    coef = coef_ptr[7];
+    tmp1 += coef * in1[7];
+    tmp2 += coef * in2[-7];
+
+    coef = coef_ptr[8];
+    *out1 = tmp1 + coef * in1[8];
+    *out2 = tmp2 + coef * in2[-8];
+}
+
+// compute two inner-products and store them to output array
+static void WebRtcSpl_DotProdIntToShort(const int32_t* in1, const int32_t* in2,
+                                        const int16_t* coef_ptr, int16_t* out1,
+                                        int16_t* out2)
+{
+    int32_t tmp1 = 16384;
+    int32_t tmp2 = 16384;
+    int16_t coef;
+
+    coef = coef_ptr[0];
+    tmp1 += coef * in1[0];
+    tmp2 += coef * in2[-0];
+
+    coef = coef_ptr[1];
+    tmp1 += coef * in1[1];
+    tmp2 += coef * in2[-1];
+
+    coef = coef_ptr[2];
+    tmp1 += coef * in1[2];
+    tmp2 += coef * in2[-2];
+
+    coef = coef_ptr[3];
+    tmp1 += coef * in1[3];
+    tmp2 += coef * in2[-3];
+
+    coef = coef_ptr[4];
+    tmp1 += coef * in1[4];
+    tmp2 += coef * in2[-4];
+
+    coef = coef_ptr[5];
+    tmp1 += coef * in1[5];
+    tmp2 += coef * in2[-5];
+
+    coef = coef_ptr[6];
+    tmp1 += coef * in1[6];
+    tmp2 += coef * in2[-6];
+
+    coef = coef_ptr[7];
+    tmp1 += coef * in1[7];
+    tmp2 += coef * in2[-7];
+
+    coef = coef_ptr[8];
+    tmp1 += coef * in1[8];
+    tmp2 += coef * in2[-8];
+
+    // scale down, round and saturate
+    tmp1 >>= 15;
+    if (tmp1 > (int32_t)0x00007FFF)
+        tmp1 = 0x00007FFF;
+    if (tmp1 < (int32_t)0xFFFF8000)
+        tmp1 = 0xFFFF8000;
+    tmp2 >>= 15;
+    if (tmp2 > (int32_t)0x00007FFF)
+        tmp2 = 0x00007FFF;
+    if (tmp2 < (int32_t)0xFFFF8000)
+        tmp2 = 0xFFFF8000;
+    *out1 = (int16_t)tmp1;
+    *out2 = (int16_t)tmp2;
+}
+
+//   Resampling ratio: 11/16
+// input:  int32_t (normalized, not saturated) :: size 16 * K
+// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 11 * K
+//      K: Number of blocks
+
+void WebRtcSpl_32khzTo22khzIntToInt(const int32_t* In,
+                                    int32_t* Out,
+                                    int32_t K)
+{
+    /////////////////////////////////////////////////////////////
+    // Filter operation:
+    //
+    // Perform resampling (16 input samples -> 11 output samples);
+    // process in sub blocks of size 16 samples.
+    int32_t m;
+
+    for (m = 0; m < K; m++)
+    {
+        // first output sample
+        Out[0] = ((int32_t)In[3] << 15) + (1 << 14);
+
+        // sum and accumulate filter coefficients and input samples
+        WebRtcSpl_DotProdIntToInt(&In[0], &In[22], kCoefficients32To22[0], &Out[1], &Out[10]);
+
+        // sum and accumulate filter coefficients and input samples
+        WebRtcSpl_DotProdIntToInt(&In[2], &In[20], kCoefficients32To22[1], &Out[2], &Out[9]);
+
+        // sum and accumulate filter coefficients and input samples
+        WebRtcSpl_DotProdIntToInt(&In[3], &In[19], kCoefficients32To22[2], &Out[3], &Out[8]);
+
+        // sum and accumulate filter coefficients and input samples
+        WebRtcSpl_DotProdIntToInt(&In[5], &In[17], kCoefficients32To22[3], &Out[4], &Out[7]);
+
+        // sum and accumulate filter coefficients and input samples
+        WebRtcSpl_DotProdIntToInt(&In[6], &In[16], kCoefficients32To22[4], &Out[5], &Out[6]);
+
+        // update pointers
+        In += 16;
+        Out += 11;
+    }
+}
+
+//   Resampling ratio: 11/16
+// input:  int32_t (normalized, not saturated) :: size 16 * K
+// output: int16_t (saturated) :: size 11 * K
+//      K: Number of blocks
+
+void WebRtcSpl_32khzTo22khzIntToShort(const int32_t *In,
+                                      int16_t *Out,
+                                      int32_t K)
+{
+    /////////////////////////////////////////////////////////////
+    // Filter operation:
+    //
+    // Perform resampling (16 input samples -> 11 output samples);
+    // process in sub blocks of size 16 samples.
+    int32_t tmp;
+    int32_t m;
+
+    for (m = 0; m < K; m++)
+    {
+        // first output sample
+        tmp = In[3];
+        if (tmp > (int32_t)0x00007FFF)
+            tmp = 0x00007FFF;
+        if (tmp < (int32_t)0xFFFF8000)
+            tmp = 0xFFFF8000;
+        Out[0] = (int16_t)tmp;
+
+        // sum and accumulate filter coefficients and input samples
+        WebRtcSpl_DotProdIntToShort(&In[0], &In[22], kCoefficients32To22[0], &Out[1], &Out[10]);
+
+        // sum and accumulate filter coefficients and input samples
+        WebRtcSpl_DotProdIntToShort(&In[2], &In[20], kCoefficients32To22[1], &Out[2], &Out[9]);
+
+        // sum and accumulate filter coefficients and input samples
+        WebRtcSpl_DotProdIntToShort(&In[3], &In[19], kCoefficients32To22[2], &Out[3], &Out[8]);
+
+        // sum and accumulate filter coefficients and input samples
+        WebRtcSpl_DotProdIntToShort(&In[5], &In[17], kCoefficients32To22[3], &Out[4], &Out[7]);
+
+        // sum and accumulate filter coefficients and input samples
+        WebRtcSpl_DotProdIntToShort(&In[6], &In[16], kCoefficients32To22[4], &Out[5], &Out[6]);
+
+        // update pointers
+        In += 16;
+        Out += 11;
+    }
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_48khz.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_48khz.c
new file mode 100644
index 00000000..2220cc33
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_48khz.c
@@ -0,0 +1,186 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains resampling functions between 48 kHz and nb/wb.
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include <string.h>
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/common_audio/signal_processing/resample_by_2_internal.h"
+
+////////////////////////////
+///// 48 kHz -> 16 kHz /////
+////////////////////////////
+
+// 48 -> 16 resampler
+void WebRtcSpl_Resample48khzTo16khz(const int16_t* in, int16_t* out,
+                                    WebRtcSpl_State48khzTo16khz* state, int32_t* tmpmem)
+{
+    ///// 48 --> 48(LP) /////
+    // int16_t  in[480]
+    // int32_t out[480]
+    /////
+    WebRtcSpl_LPBy2ShortToInt(in, 480, tmpmem + 16, state->S_48_48);
+
+    ///// 48 --> 32 /////
+    // int32_t  in[480]
+    // int32_t out[320]
+    /////
+    // copy state to and from input array
+    memcpy(tmpmem + 8, state->S_48_32, 8 * sizeof(int32_t));
+    memcpy(state->S_48_32, tmpmem + 488, 8 * sizeof(int32_t));
+    WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 160);
+
+    ///// 32 --> 16 /////
+    // int32_t  in[320]
+    // int16_t out[160]
+    /////
+    WebRtcSpl_DownBy2IntToShort(tmpmem, 320, out, state->S_32_16);
+}
+
+// initialize state of 48 -> 16 resampler
+void WebRtcSpl_ResetResample48khzTo16khz(WebRtcSpl_State48khzTo16khz* state)
+{
+    memset(state->S_48_48, 0, 16 * sizeof(int32_t));
+    memset(state->S_48_32, 0, 8 * sizeof(int32_t));
+    memset(state->S_32_16, 0, 8 * sizeof(int32_t));
+}
+
+////////////////////////////
+///// 16 kHz -> 48 kHz /////
+////////////////////////////
+
+// 16 -> 48 resampler
+void WebRtcSpl_Resample16khzTo48khz(const int16_t* in, int16_t* out,
+                                    WebRtcSpl_State16khzTo48khz* state, int32_t* tmpmem)
+{
+    ///// 16 --> 32 /////
+    // int16_t  in[160]
+    // int32_t out[320]
+    /////
+    WebRtcSpl_UpBy2ShortToInt(in, 160, tmpmem + 16, state->S_16_32);
+
+    ///// 32 --> 24 /////
+    // int32_t  in[320]
+    // int32_t out[240]
+    // copy state to and from input array
+    /////
+    memcpy(tmpmem + 8, state->S_32_24, 8 * sizeof(int32_t));
+    memcpy(state->S_32_24, tmpmem + 328, 8 * sizeof(int32_t));
+    WebRtcSpl_Resample32khzTo24khz(tmpmem + 8, tmpmem, 80);
+
+    ///// 24 --> 48 /////
+    // int32_t  in[240]
+    // int16_t out[480]
+    /////
+    WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48);
+}
+
+// initialize state of 16 -> 48 resampler
+void WebRtcSpl_ResetResample16khzTo48khz(WebRtcSpl_State16khzTo48khz* state)
+{
+    memset(state->S_16_32, 0, 8 * sizeof(int32_t));
+    memset(state->S_32_24, 0, 8 * sizeof(int32_t));
+    memset(state->S_24_48, 0, 8 * sizeof(int32_t));
+}
+
+////////////////////////////
+///// 48 kHz ->  8 kHz /////
+////////////////////////////
+
+// 48 -> 8 resampler
+void WebRtcSpl_Resample48khzTo8khz(const int16_t* in, int16_t* out,
+                                   WebRtcSpl_State48khzTo8khz* state, int32_t* tmpmem)
+{
+    ///// 48 --> 24 /////
+    // int16_t  in[480]
+    // int32_t out[240]
+    /////
+    WebRtcSpl_DownBy2ShortToInt(in, 480, tmpmem + 256, state->S_48_24);
+
+    ///// 24 --> 24(LP) /////
+    // int32_t  in[240]
+    // int32_t out[240]
+    /////
+    WebRtcSpl_LPBy2IntToInt(tmpmem + 256, 240, tmpmem + 16, state->S_24_24);
+
+    ///// 24 --> 16 /////
+    // int32_t  in[240]
+    // int32_t out[160]
+    /////
+    // copy state to and from input array
+    memcpy(tmpmem + 8, state->S_24_16, 8 * sizeof(int32_t));
+    memcpy(state->S_24_16, tmpmem + 248, 8 * sizeof(int32_t));
+    WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 80);
+
+    ///// 16 --> 8 /////
+    // int32_t  in[160]
+    // int16_t out[80]
+    /////
+    WebRtcSpl_DownBy2IntToShort(tmpmem, 160, out, state->S_16_8);
+}
+
+// initialize state of 48 -> 8 resampler
+void WebRtcSpl_ResetResample48khzTo8khz(WebRtcSpl_State48khzTo8khz* state)
+{
+    memset(state->S_48_24, 0, 8 * sizeof(int32_t));
+    memset(state->S_24_24, 0, 16 * sizeof(int32_t));
+    memset(state->S_24_16, 0, 8 * sizeof(int32_t));
+    memset(state->S_16_8, 0, 8 * sizeof(int32_t));
+}
+
+////////////////////////////
+/////  8 kHz -> 48 kHz /////
+////////////////////////////
+
+// 8 -> 48 resampler
+void WebRtcSpl_Resample8khzTo48khz(const int16_t* in, int16_t* out,
+                                   WebRtcSpl_State8khzTo48khz* state, int32_t* tmpmem)
+{
+    ///// 8 --> 16 /////
+    // int16_t  in[80]
+    // int32_t out[160]
+    /////
+    WebRtcSpl_UpBy2ShortToInt(in, 80, tmpmem + 264, state->S_8_16);
+
+    ///// 16 --> 12 /////
+    // int32_t  in[160]
+    // int32_t out[120]
+    /////
+    // copy state to and from input array
+    memcpy(tmpmem + 256, state->S_16_12, 8 * sizeof(int32_t));
+    memcpy(state->S_16_12, tmpmem + 416, 8 * sizeof(int32_t));
+    WebRtcSpl_Resample32khzTo24khz(tmpmem + 256, tmpmem + 240, 40);
+
+    ///// 12 --> 24 /////
+    // int32_t  in[120]
+    // int16_t out[240]
+    /////
+    WebRtcSpl_UpBy2IntToInt(tmpmem + 240, 120, tmpmem, state->S_12_24);
+
+    ///// 24 --> 48 /////
+    // int32_t  in[240]
+    // int16_t out[480]
+    /////
+    WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48);
+}
+
+// initialize state of 8 -> 48 resampler
+void WebRtcSpl_ResetResample8khzTo48khz(WebRtcSpl_State8khzTo48khz* state)
+{
+    memset(state->S_8_16, 0, 8 * sizeof(int32_t));
+    memset(state->S_16_12, 0, 8 * sizeof(int32_t));
+    memset(state->S_12_24, 0, 8 * sizeof(int32_t));
+    memset(state->S_24_48, 0, 8 * sizeof(int32_t));
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2.c
new file mode 100644
index 00000000..dcba82e3
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2.c
@@ -0,0 +1,183 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the resampling by two functions.
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+#ifdef WEBRTC_ARCH_ARM_V7
+
+// allpass filter coefficients.
+static const uint32_t kResampleAllpass1[3] = {3284, 24441, 49528 << 15};
+static const uint32_t kResampleAllpass2[3] =
+  {12199, 37471 << 15, 60255 << 15};
+
+// Multiply two 32-bit values and accumulate to another input value.
+// Return: state + ((diff * tbl_value) >> 16)
+
+static __inline int32_t MUL_ACCUM_1(int32_t tbl_value,
+                                    int32_t diff,
+                                    int32_t state) {
+  int32_t result;
+  __asm __volatile ("smlawb %0, %1, %2, %3": "=r"(result): "r"(diff),
+                                   "r"(tbl_value), "r"(state));
+  return result;
+}
+
+// Multiply two 32-bit values and accumulate to another input value.
+// Return: Return: state + (((diff << 1) * tbl_value) >> 32)
+//
+// The reason to introduce this function is that, in case we can't use smlawb
+// instruction (in MUL_ACCUM_1) due to input value range, we can still use 
+// smmla to save some cycles.
+
+static __inline int32_t MUL_ACCUM_2(int32_t tbl_value,
+                                    int32_t diff,
+                                    int32_t state) {
+  int32_t result;
+  __asm __volatile ("smmla %0, %1, %2, %3": "=r"(result): "r"(diff << 1),
+                                  "r"(tbl_value), "r"(state));
+  return result;
+}
+
+#else
+
+// allpass filter coefficients.
+static const uint16_t kResampleAllpass1[3] = {3284, 24441, 49528};
+static const uint16_t kResampleAllpass2[3] = {12199, 37471, 60255};
+
+// Multiply a 32-bit value with a 16-bit value and accumulate to another input:
+#define MUL_ACCUM_1(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c)
+#define MUL_ACCUM_2(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c)
+
+#endif  // WEBRTC_ARCH_ARM_V7
+
+
+// decimator
+#if !defined(MIPS32_LE)
+void WebRtcSpl_DownsampleBy2(const int16_t* in, size_t len,
+                             int16_t* out, int32_t* filtState) {
+  int32_t tmp1, tmp2, diff, in32, out32;
+  size_t i;
+
+  register int32_t state0 = filtState[0];
+  register int32_t state1 = filtState[1];
+  register int32_t state2 = filtState[2];
+  register int32_t state3 = filtState[3];
+  register int32_t state4 = filtState[4];
+  register int32_t state5 = filtState[5];
+  register int32_t state6 = filtState[6];
+  register int32_t state7 = filtState[7];
+
+  for (i = (len >> 1); i > 0; i--) {
+    // lower allpass filter
+    in32 = (int32_t)(*in++) << 10;
+    diff = in32 - state1;
+    tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0);
+    state0 = in32;
+    diff = tmp1 - state2;
+    tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1);
+    state1 = tmp1;
+    diff = tmp2 - state3;
+    state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2);
+    state2 = tmp2;
+
+    // upper allpass filter
+    in32 = (int32_t)(*in++) << 10;
+    diff = in32 - state5;
+    tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4);
+    state4 = in32;
+    diff = tmp1 - state6;
+    tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5);
+    state5 = tmp1;
+    diff = tmp2 - state7;
+    state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6);
+    state6 = tmp2;
+
+    // add two allpass outputs, divide by two and round
+    out32 = (state3 + state7 + 1024) >> 11;
+
+    // limit amplitude to prevent wrap-around, and write to output array
+    *out++ = WebRtcSpl_SatW32ToW16(out32);
+  }
+
+  filtState[0] = state0;
+  filtState[1] = state1;
+  filtState[2] = state2;
+  filtState[3] = state3;
+  filtState[4] = state4;
+  filtState[5] = state5;
+  filtState[6] = state6;
+  filtState[7] = state7;
+}
+#endif  // #if defined(MIPS32_LE)
+
+
+void WebRtcSpl_UpsampleBy2(const int16_t* in, size_t len,
+                           int16_t* out, int32_t* filtState) {
+  int32_t tmp1, tmp2, diff, in32, out32;
+  size_t i;
+
+  register int32_t state0 = filtState[0];
+  register int32_t state1 = filtState[1];
+  register int32_t state2 = filtState[2];
+  register int32_t state3 = filtState[3];
+  register int32_t state4 = filtState[4];
+  register int32_t state5 = filtState[5];
+  register int32_t state6 = filtState[6];
+  register int32_t state7 = filtState[7];
+
+  for (i = len; i > 0; i--) {
+    // lower allpass filter
+    in32 = (int32_t)(*in++) << 10;
+    diff = in32 - state1;
+    tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state0);
+    state0 = in32;
+    diff = tmp1 - state2;
+    tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state1);
+    state1 = tmp1;
+    diff = tmp2 - state3;
+    state3 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state2);
+    state2 = tmp2;
+
+    // round; limit amplitude to prevent wrap-around; write to output array
+    out32 = (state3 + 512) >> 10;
+    *out++ = WebRtcSpl_SatW32ToW16(out32);
+
+    // upper allpass filter
+    diff = in32 - state5;
+    tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state4);
+    state4 = in32;
+    diff = tmp1 - state6;
+    tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state5);
+    state5 = tmp1;
+    diff = tmp2 - state7;
+    state7 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state6);
+    state6 = tmp2;
+
+    // round; limit amplitude to prevent wrap-around; write to output array
+    out32 = (state7 + 512) >> 10;
+    *out++ = WebRtcSpl_SatW32ToW16(out32);
+  }
+
+  filtState[0] = state0;
+  filtState[1] = state1;
+  filtState[2] = state2;
+  filtState[3] = state3;
+  filtState[4] = state4;
+  filtState[5] = state5;
+  filtState[6] = state6;
+  filtState[7] = state7;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_internal.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_internal.c
new file mode 100644
index 00000000..085069c8
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_internal.c
@@ -0,0 +1,679 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This header file contains some internal resampling functions.
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/resample_by_2_internal.h"
+
+// allpass filter coefficients.
+static const int16_t kResampleAllpass[2][3] = {
+        {821, 6110, 12382},
+        {3050, 9368, 15063}
+};
+
+//
+//   decimator
+// input:  int32_t (shifted 15 positions to the left, + offset 16384) OVERWRITTEN!
+// output: int16_t (saturated) (of length len/2)
+// state:  filter state array; length = 8
+
+void WebRtcSpl_DownBy2IntToShort(int32_t *in, int32_t len, int16_t *out,
+                                 int32_t *state)
+{
+    int32_t tmp0, tmp1, diff;
+    int32_t i;
+
+    len >>= 1;
+
+    // lower allpass filter (operates on even input samples)
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = in[i << 1];
+        diff = tmp0 - state[1];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[0] + diff * kResampleAllpass[1][0];
+        state[0] = tmp0;
+        diff = tmp1 - state[2];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[1] + diff * kResampleAllpass[1][1];
+        state[1] = tmp1;
+        diff = tmp0 - state[3];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[3] = state[2] + diff * kResampleAllpass[1][2];
+        state[2] = tmp0;
+
+        // divide by two and store temporarily
+        in[i << 1] = (state[3] >> 1);
+    }
+
+    in++;
+
+    // upper allpass filter (operates on odd input samples)
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = in[i << 1];
+        diff = tmp0 - state[5];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[4] + diff * kResampleAllpass[0][0];
+        state[4] = tmp0;
+        diff = tmp1 - state[6];
+        // scale down and round
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[5] + diff * kResampleAllpass[0][1];
+        state[5] = tmp1;
+        diff = tmp0 - state[7];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[7] = state[6] + diff * kResampleAllpass[0][2];
+        state[6] = tmp0;
+
+        // divide by two and store temporarily
+        in[i << 1] = (state[7] >> 1);
+    }
+
+    in--;
+
+    // combine allpass outputs
+    for (i = 0; i < len; i += 2)
+    {
+        // divide by two, add both allpass outputs and round
+        tmp0 = (in[i << 1] + in[(i << 1) + 1]) >> 15;
+        tmp1 = (in[(i << 1) + 2] + in[(i << 1) + 3]) >> 15;
+        if (tmp0 > (int32_t)0x00007FFF)
+            tmp0 = 0x00007FFF;
+        if (tmp0 < (int32_t)0xFFFF8000)
+            tmp0 = 0xFFFF8000;
+        out[i] = (int16_t)tmp0;
+        if (tmp1 > (int32_t)0x00007FFF)
+            tmp1 = 0x00007FFF;
+        if (tmp1 < (int32_t)0xFFFF8000)
+            tmp1 = 0xFFFF8000;
+        out[i + 1] = (int16_t)tmp1;
+    }
+}
+
+//
+//   decimator
+// input:  int16_t
+// output: int32_t (shifted 15 positions to the left, + offset 16384) (of length len/2)
+// state:  filter state array; length = 8
+
+void WebRtcSpl_DownBy2ShortToInt(const int16_t *in,
+                                  int32_t len,
+                                  int32_t *out,
+                                  int32_t *state)
+{
+    int32_t tmp0, tmp1, diff;
+    int32_t i;
+
+    len >>= 1;
+
+    // lower allpass filter (operates on even input samples)
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
+        diff = tmp0 - state[1];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[0] + diff * kResampleAllpass[1][0];
+        state[0] = tmp0;
+        diff = tmp1 - state[2];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[1] + diff * kResampleAllpass[1][1];
+        state[1] = tmp1;
+        diff = tmp0 - state[3];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[3] = state[2] + diff * kResampleAllpass[1][2];
+        state[2] = tmp0;
+
+        // divide by two and store temporarily
+        out[i] = (state[3] >> 1);
+    }
+
+    in++;
+
+    // upper allpass filter (operates on odd input samples)
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
+        diff = tmp0 - state[5];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[4] + diff * kResampleAllpass[0][0];
+        state[4] = tmp0;
+        diff = tmp1 - state[6];
+        // scale down and round
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[5] + diff * kResampleAllpass[0][1];
+        state[5] = tmp1;
+        diff = tmp0 - state[7];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[7] = state[6] + diff * kResampleAllpass[0][2];
+        state[6] = tmp0;
+
+        // divide by two and store temporarily
+        out[i] += (state[7] >> 1);
+    }
+
+    in--;
+}
+
+//
+//   interpolator
+// input:  int16_t
+// output: int32_t (normalized, not saturated) (of length len*2)
+// state:  filter state array; length = 8
+void WebRtcSpl_UpBy2ShortToInt(const int16_t *in, int32_t len, int32_t *out,
+                               int32_t *state)
+{
+    int32_t tmp0, tmp1, diff;
+    int32_t i;
+
+    // upper allpass filter (generates odd output samples)
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = ((int32_t)in[i] << 15) + (1 << 14);
+        diff = tmp0 - state[5];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[4] + diff * kResampleAllpass[0][0];
+        state[4] = tmp0;
+        diff = tmp1 - state[6];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[5] + diff * kResampleAllpass[0][1];
+        state[5] = tmp1;
+        diff = tmp0 - state[7];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[7] = state[6] + diff * kResampleAllpass[0][2];
+        state[6] = tmp0;
+
+        // scale down, round and store
+        out[i << 1] = state[7] >> 15;
+    }
+
+    out++;
+
+    // lower allpass filter (generates even output samples)
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = ((int32_t)in[i] << 15) + (1 << 14);
+        diff = tmp0 - state[1];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[0] + diff * kResampleAllpass[1][0];
+        state[0] = tmp0;
+        diff = tmp1 - state[2];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[1] + diff * kResampleAllpass[1][1];
+        state[1] = tmp1;
+        diff = tmp0 - state[3];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[3] = state[2] + diff * kResampleAllpass[1][2];
+        state[2] = tmp0;
+
+        // scale down, round and store
+        out[i << 1] = state[3] >> 15;
+    }
+}
+
+//
+//   interpolator
+// input:  int32_t (shifted 15 positions to the left, + offset 16384)
+// output: int32_t (shifted 15 positions to the left, + offset 16384) (of length len*2)
+// state:  filter state array; length = 8
+void WebRtcSpl_UpBy2IntToInt(const int32_t *in, int32_t len, int32_t *out,
+                             int32_t *state)
+{
+    int32_t tmp0, tmp1, diff;
+    int32_t i;
+
+    // upper allpass filter (generates odd output samples)
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = in[i];
+        diff = tmp0 - state[5];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[4] + diff * kResampleAllpass[0][0];
+        state[4] = tmp0;
+        diff = tmp1 - state[6];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[5] + diff * kResampleAllpass[0][1];
+        state[5] = tmp1;
+        diff = tmp0 - state[7];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[7] = state[6] + diff * kResampleAllpass[0][2];
+        state[6] = tmp0;
+
+        // scale down, round and store
+        out[i << 1] = state[7];
+    }
+
+    out++;
+
+    // lower allpass filter (generates even output samples)
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = in[i];
+        diff = tmp0 - state[1];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[0] + diff * kResampleAllpass[1][0];
+        state[0] = tmp0;
+        diff = tmp1 - state[2];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[1] + diff * kResampleAllpass[1][1];
+        state[1] = tmp1;
+        diff = tmp0 - state[3];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[3] = state[2] + diff * kResampleAllpass[1][2];
+        state[2] = tmp0;
+
+        // scale down, round and store
+        out[i << 1] = state[3];
+    }
+}
+
+//
+//   interpolator
+// input:  int32_t (shifted 15 positions to the left, + offset 16384)
+// output: int16_t (saturated) (of length len*2)
+// state:  filter state array; length = 8
+void WebRtcSpl_UpBy2IntToShort(const int32_t *in, int32_t len, int16_t *out,
+                               int32_t *state)
+{
+    int32_t tmp0, tmp1, diff;
+    int32_t i;
+
+    // upper allpass filter (generates odd output samples)
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = in[i];
+        diff = tmp0 - state[5];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[4] + diff * kResampleAllpass[0][0];
+        state[4] = tmp0;
+        diff = tmp1 - state[6];
+        // scale down and round
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[5] + diff * kResampleAllpass[0][1];
+        state[5] = tmp1;
+        diff = tmp0 - state[7];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[7] = state[6] + diff * kResampleAllpass[0][2];
+        state[6] = tmp0;
+
+        // scale down, saturate and store
+        tmp1 = state[7] >> 15;
+        if (tmp1 > (int32_t)0x00007FFF)
+            tmp1 = 0x00007FFF;
+        if (tmp1 < (int32_t)0xFFFF8000)
+            tmp1 = 0xFFFF8000;
+        out[i << 1] = (int16_t)tmp1;
+    }
+
+    out++;
+
+    // lower allpass filter (generates even output samples)
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = in[i];
+        diff = tmp0 - state[1];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[0] + diff * kResampleAllpass[1][0];
+        state[0] = tmp0;
+        diff = tmp1 - state[2];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[1] + diff * kResampleAllpass[1][1];
+        state[1] = tmp1;
+        diff = tmp0 - state[3];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[3] = state[2] + diff * kResampleAllpass[1][2];
+        state[2] = tmp0;
+
+        // scale down, saturate and store
+        tmp1 = state[3] >> 15;
+        if (tmp1 > (int32_t)0x00007FFF)
+            tmp1 = 0x00007FFF;
+        if (tmp1 < (int32_t)0xFFFF8000)
+            tmp1 = 0xFFFF8000;
+        out[i << 1] = (int16_t)tmp1;
+    }
+}
+
+//   lowpass filter
+// input:  int16_t
+// output: int32_t (normalized, not saturated)
+// state:  filter state array; length = 8
+void WebRtcSpl_LPBy2ShortToInt(const int16_t* in, int32_t len, int32_t* out,
+                               int32_t* state)
+{
+    int32_t tmp0, tmp1, diff;
+    int32_t i;
+
+    len >>= 1;
+
+    // lower allpass filter: odd input -> even output samples
+    in++;
+    // initial state of polyphase delay element
+    tmp0 = state[12];
+    for (i = 0; i < len; i++)
+    {
+        diff = tmp0 - state[1];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[0] + diff * kResampleAllpass[1][0];
+        state[0] = tmp0;
+        diff = tmp1 - state[2];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[1] + diff * kResampleAllpass[1][1];
+        state[1] = tmp1;
+        diff = tmp0 - state[3];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[3] = state[2] + diff * kResampleAllpass[1][2];
+        state[2] = tmp0;
+
+        // scale down, round and store
+        out[i << 1] = state[3] >> 1;
+        tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
+    }
+    in--;
+
+    // upper allpass filter: even input -> even output samples
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
+        diff = tmp0 - state[5];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[4] + diff * kResampleAllpass[0][0];
+        state[4] = tmp0;
+        diff = tmp1 - state[6];
+        // scale down and round
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[5] + diff * kResampleAllpass[0][1];
+        state[5] = tmp1;
+        diff = tmp0 - state[7];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[7] = state[6] + diff * kResampleAllpass[0][2];
+        state[6] = tmp0;
+
+        // average the two allpass outputs, scale down and store
+        out[i << 1] = (out[i << 1] + (state[7] >> 1)) >> 15;
+    }
+
+    // switch to odd output samples
+    out++;
+
+    // lower allpass filter: even input -> odd output samples
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
+        diff = tmp0 - state[9];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[8] + diff * kResampleAllpass[1][0];
+        state[8] = tmp0;
+        diff = tmp1 - state[10];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[9] + diff * kResampleAllpass[1][1];
+        state[9] = tmp1;
+        diff = tmp0 - state[11];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[11] = state[10] + diff * kResampleAllpass[1][2];
+        state[10] = tmp0;
+
+        // scale down, round and store
+        out[i << 1] = state[11] >> 1;
+    }
+
+    // upper allpass filter: odd input -> odd output samples
+    in++;
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
+        diff = tmp0 - state[13];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[12] + diff * kResampleAllpass[0][0];
+        state[12] = tmp0;
+        diff = tmp1 - state[14];
+        // scale down and round
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[13] + diff * kResampleAllpass[0][1];
+        state[13] = tmp1;
+        diff = tmp0 - state[15];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[15] = state[14] + diff * kResampleAllpass[0][2];
+        state[14] = tmp0;
+
+        // average the two allpass outputs, scale down and store
+        out[i << 1] = (out[i << 1] + (state[15] >> 1)) >> 15;
+    }
+}
+
+//   lowpass filter
+// input:  int32_t (shifted 15 positions to the left, + offset 16384)
+// output: int32_t (normalized, not saturated)
+// state:  filter state array; length = 8
+void WebRtcSpl_LPBy2IntToInt(const int32_t* in, int32_t len, int32_t* out,
+                             int32_t* state)
+{
+    int32_t tmp0, tmp1, diff;
+    int32_t i;
+
+    len >>= 1;
+
+    // lower allpass filter: odd input -> even output samples
+    in++;
+    // initial state of polyphase delay element
+    tmp0 = state[12];
+    for (i = 0; i < len; i++)
+    {
+        diff = tmp0 - state[1];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[0] + diff * kResampleAllpass[1][0];
+        state[0] = tmp0;
+        diff = tmp1 - state[2];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[1] + diff * kResampleAllpass[1][1];
+        state[1] = tmp1;
+        diff = tmp0 - state[3];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[3] = state[2] + diff * kResampleAllpass[1][2];
+        state[2] = tmp0;
+
+        // scale down, round and store
+        out[i << 1] = state[3] >> 1;
+        tmp0 = in[i << 1];
+    }
+    in--;
+
+    // upper allpass filter: even input -> even output samples
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = in[i << 1];
+        diff = tmp0 - state[5];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[4] + diff * kResampleAllpass[0][0];
+        state[4] = tmp0;
+        diff = tmp1 - state[6];
+        // scale down and round
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[5] + diff * kResampleAllpass[0][1];
+        state[5] = tmp1;
+        diff = tmp0 - state[7];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[7] = state[6] + diff * kResampleAllpass[0][2];
+        state[6] = tmp0;
+
+        // average the two allpass outputs, scale down and store
+        out[i << 1] = (out[i << 1] + (state[7] >> 1)) >> 15;
+    }
+
+    // switch to odd output samples
+    out++;
+
+    // lower allpass filter: even input -> odd output samples
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = in[i << 1];
+        diff = tmp0 - state[9];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[8] + diff * kResampleAllpass[1][0];
+        state[8] = tmp0;
+        diff = tmp1 - state[10];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[9] + diff * kResampleAllpass[1][1];
+        state[9] = tmp1;
+        diff = tmp0 - state[11];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[11] = state[10] + diff * kResampleAllpass[1][2];
+        state[10] = tmp0;
+
+        // scale down, round and store
+        out[i << 1] = state[11] >> 1;
+    }
+
+    // upper allpass filter: odd input -> odd output samples
+    in++;
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = in[i << 1];
+        diff = tmp0 - state[13];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[12] + diff * kResampleAllpass[0][0];
+        state[12] = tmp0;
+        diff = tmp1 - state[14];
+        // scale down and round
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[13] + diff * kResampleAllpass[0][1];
+        state[13] = tmp1;
+        diff = tmp0 - state[15];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[15] = state[14] + diff * kResampleAllpass[0][2];
+        state[14] = tmp0;
+
+        // average the two allpass outputs, scale down and store
+        out[i << 1] = (out[i << 1] + (state[15] >> 1)) >> 15;
+    }
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_internal.h b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_internal.h
new file mode 100644
index 00000000..5c9533ee
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_internal.h
@@ -0,0 +1,47 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This header file contains some internal resampling functions.
+ *
+ */
+
+#ifndef WEBRTC_SPL_RESAMPLE_BY_2_INTERNAL_H_
+#define WEBRTC_SPL_RESAMPLE_BY_2_INTERNAL_H_
+
+#include "webrtc/typedefs.h"
+
+/*******************************************************************
+ * resample_by_2_fast.c
+ * Functions for internal use in the other resample functions
+ ******************************************************************/
+void WebRtcSpl_DownBy2IntToShort(int32_t *in, int32_t len, int16_t *out,
+                                 int32_t *state);
+
+void WebRtcSpl_DownBy2ShortToInt(const int16_t *in, int32_t len,
+                                 int32_t *out, int32_t *state);
+
+void WebRtcSpl_UpBy2ShortToInt(const int16_t *in, int32_t len,
+                               int32_t *out, int32_t *state);
+
+void WebRtcSpl_UpBy2IntToInt(const int32_t *in, int32_t len, int32_t *out,
+                             int32_t *state);
+
+void WebRtcSpl_UpBy2IntToShort(const int32_t *in, int32_t len,
+                               int16_t *out, int32_t *state);
+
+void WebRtcSpl_LPBy2ShortToInt(const int16_t* in, int32_t len,
+                               int32_t* out, int32_t* state);
+
+void WebRtcSpl_LPBy2IntToInt(const int32_t* in, int32_t len, int32_t* out,
+                             int32_t* state);
+
+#endif // WEBRTC_SPL_RESAMPLE_BY_2_INTERNAL_H_
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_mips.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_mips.c
new file mode 100644
index 00000000..ec5fc8b3
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_mips.c
@@ -0,0 +1,290 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the resampling by two functions.
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#if defined(MIPS32_LE)
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+// allpass filter coefficients.
+static const uint16_t kResampleAllpass1[3] = {3284, 24441, 49528};
+static const uint16_t kResampleAllpass2[3] = {12199, 37471, 60255};
+
+// Multiply a 32-bit value with a 16-bit value and accumulate to another input:
+#define MUL_ACCUM_1(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c)
+#define MUL_ACCUM_2(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c)
+
+// decimator
+void WebRtcSpl_DownsampleBy2(const int16_t* in,
+                             size_t len,
+                             int16_t* out,
+                             int32_t* filtState) {
+  int32_t out32;
+  size_t i, len1;
+
+  register int32_t state0 = filtState[0];
+  register int32_t state1 = filtState[1];
+  register int32_t state2 = filtState[2];
+  register int32_t state3 = filtState[3];
+  register int32_t state4 = filtState[4];
+  register int32_t state5 = filtState[5];
+  register int32_t state6 = filtState[6];
+  register int32_t state7 = filtState[7];
+
+#if defined(MIPS_DSP_R2_LE)
+  int32_t k1Res0, k1Res1, k1Res2, k2Res0, k2Res1, k2Res2;
+
+  k1Res0= 3284;
+  k1Res1= 24441;
+  k1Res2= 49528;
+  k2Res0= 12199;
+  k2Res1= 37471;
+  k2Res2= 60255;
+  len1 = (len >> 1);
+
+  const int32_t* inw = (int32_t*)in;
+  int32_t tmp11, tmp12, tmp21, tmp22;
+  int32_t in322, in321;
+  int32_t diff1, diff2;
+  for (i = len1; i > 0; i--) {
+    __asm__ volatile (
+      "lh         %[in321],    0(%[inw])                  \n\t"
+      "lh         %[in322],    2(%[inw])                  \n\t"
+
+      "sll        %[in321],    %[in321],      10          \n\t"
+      "sll        %[in322],    %[in322],      10          \n\t"
+
+      "addiu      %[inw],      %[inw],        4           \n\t"
+
+      "subu       %[diff1],    %[in321],      %[state1]   \n\t"
+      "subu       %[diff2],    %[in322],      %[state5]   \n\t"
+
+      : [in322] "=&r" (in322), [in321] "=&r" (in321),
+        [diff1] "=&r" (diff1), [diff2] "=r" (diff2), [inw] "+r" (inw)
+      : [state1] "r" (state1), [state5] "r" (state5)
+      : "memory"
+    );
+
+    __asm__ volatile (
+      "mult       $ac0,       %[diff1],       %[k2Res0]   \n\t"
+      "mult       $ac1,       %[diff2],       %[k1Res0]   \n\t"
+
+      "extr.w     %[tmp11],   $ac0,           16          \n\t"
+      "extr.w     %[tmp12],   $ac1,           16          \n\t"
+
+      "addu       %[tmp11],   %[state0],      %[tmp11]    \n\t"
+      "addu       %[tmp12],   %[state4],      %[tmp12]    \n\t"
+
+      "addiu      %[state0],  %[in321],       0           \n\t"
+      "addiu      %[state4],  %[in322],       0           \n\t"
+
+      "subu       %[diff1],   %[tmp11],       %[state2]   \n\t"
+      "subu       %[diff2],   %[tmp12],       %[state6]   \n\t"
+
+      "mult       $ac0,       %[diff1],       %[k2Res1]   \n\t"
+      "mult       $ac1,       %[diff2],       %[k1Res1]   \n\t"
+
+      "extr.w     %[tmp21],   $ac0,           16          \n\t"
+      "extr.w     %[tmp22],   $ac1,           16          \n\t"
+
+      "addu       %[tmp21],   %[state1],      %[tmp21]    \n\t"
+      "addu       %[tmp22],   %[state5],      %[tmp22]    \n\t"
+
+      "addiu      %[state1],  %[tmp11],       0           \n\t"
+      "addiu      %[state5],  %[tmp12],       0           \n\t"
+      : [tmp22] "=r" (tmp22), [tmp21] "=&r" (tmp21),
+        [tmp11] "=&r" (tmp11), [state0] "+r" (state0),
+        [state1] "+r" (state1),
+        [state2] "+r" (state2),
+        [state4] "+r" (state4), [tmp12] "=&r" (tmp12),
+        [state6] "+r" (state6), [state5] "+r" (state5)
+      : [k1Res1] "r" (k1Res1), [k2Res1] "r" (k2Res1), [k2Res0] "r" (k2Res0),
+        [diff2] "r" (diff2), [diff1] "r" (diff1), [in322] "r" (in322),
+        [in321] "r" (in321), [k1Res0] "r" (k1Res0)
+      : "hi", "lo", "$ac1hi", "$ac1lo"
+    );
+
+    // upper allpass filter
+    __asm__ volatile (
+      "subu       %[diff1],   %[tmp21],       %[state3]   \n\t"
+      "subu       %[diff2],   %[tmp22],       %[state7]   \n\t"
+
+      "mult       $ac0,       %[diff1],       %[k2Res2]   \n\t"
+      "mult       $ac1,       %[diff2],       %[k1Res2]   \n\t"
+      "extr.w     %[state3],  $ac0,           16          \n\t"
+      "extr.w     %[state7],  $ac1,           16          \n\t"
+      "addu       %[state3],  %[state2],      %[state3]   \n\t"
+      "addu       %[state7],  %[state6],      %[state7]   \n\t"
+
+      "addiu      %[state2],  %[tmp21],       0           \n\t"
+      "addiu      %[state6],  %[tmp22],       0           \n\t"
+
+      // add two allpass outputs, divide by two and round
+      "addu       %[out32],   %[state3],      %[state7]   \n\t"
+      "addiu      %[out32],   %[out32],       1024        \n\t"
+      "sra        %[out32],   %[out32],       11          \n\t"
+      : [state3] "+r" (state3), [state6] "+r" (state6),
+        [state2] "+r" (state2), [diff2] "=&r" (diff2),
+        [out32] "=r" (out32), [diff1] "=&r" (diff1), [state7] "+r" (state7)
+      : [tmp22] "r" (tmp22), [tmp21] "r" (tmp21),
+        [k1Res2] "r" (k1Res2), [k2Res2] "r" (k2Res2)
+      : "hi", "lo", "$ac1hi", "$ac1lo"
+    );
+
+    // limit amplitude to prevent wrap-around, and write to output array
+    *out++ = WebRtcSpl_SatW32ToW16(out32);
+  }
+#else  // #if defined(MIPS_DSP_R2_LE)
+  int32_t tmp1, tmp2, diff;
+  int32_t in32;
+  len1 = (len >> 1)/4;
+  for (i = len1; i > 0; i--) {
+    // lower allpass filter
+    in32 = (int32_t)(*in++) << 10;
+    diff = in32 - state1;
+    tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0);
+    state0 = in32;
+    diff = tmp1 - state2;
+    tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1);
+    state1 = tmp1;
+    diff = tmp2 - state3;
+    state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2);
+    state2 = tmp2;
+
+    // upper allpass filter
+    in32 = (int32_t)(*in++) << 10;
+    diff = in32 - state5;
+    tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4);
+    state4 = in32;
+    diff = tmp1 - state6;
+    tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5);
+    state5 = tmp1;
+    diff = tmp2 - state7;
+    state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6);
+    state6 = tmp2;
+
+    // add two allpass outputs, divide by two and round
+    out32 = (state3 + state7 + 1024) >> 11;
+
+    // limit amplitude to prevent wrap-around, and write to output array
+    *out++ = WebRtcSpl_SatW32ToW16(out32);
+    // lower allpass filter
+    in32 = (int32_t)(*in++) << 10;
+    diff = in32 - state1;
+    tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0);
+    state0 = in32;
+    diff = tmp1 - state2;
+    tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1);
+    state1 = tmp1;
+    diff = tmp2 - state3;
+    state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2);
+    state2 = tmp2;
+
+    // upper allpass filter
+    in32 = (int32_t)(*in++) << 10;
+    diff = in32 - state5;
+    tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4);
+    state4 = in32;
+    diff = tmp1 - state6;
+    tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5);
+    state5 = tmp1;
+    diff = tmp2 - state7;
+    state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6);
+    state6 = tmp2;
+
+    // add two allpass outputs, divide by two and round
+    out32 = (state3 + state7 + 1024) >> 11;
+
+    // limit amplitude to prevent wrap-around, and write to output array
+    *out++ = WebRtcSpl_SatW32ToW16(out32);
+    // lower allpass filter
+    in32 = (int32_t)(*in++) << 10;
+    diff = in32 - state1;
+    tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0);
+    state0 = in32;
+    diff = tmp1 - state2;
+    tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1);
+    state1 = tmp1;
+    diff = tmp2 - state3;
+    state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2);
+    state2 = tmp2;
+
+    // upper allpass filter
+    in32 = (int32_t)(*in++) << 10;
+    diff = in32 - state5;
+    tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4);
+    state4 = in32;
+    diff = tmp1 - state6;
+    tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5);
+    state5 = tmp1;
+    diff = tmp2 - state7;
+    state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6);
+    state6 = tmp2;
+
+    // add two allpass outputs, divide by two and round
+    out32 = (state3 + state7 + 1024) >> 11;
+
+    // limit amplitude to prevent wrap-around, and write to output array
+    *out++ = WebRtcSpl_SatW32ToW16(out32);
+    // lower allpass filter
+    in32 = (int32_t)(*in++) << 10;
+    diff = in32 - state1;
+    tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0);
+    state0 = in32;
+    diff = tmp1 - state2;
+    tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1);
+    state1 = tmp1;
+    diff = tmp2 - state3;
+    state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2);
+    state2 = tmp2;
+
+    // upper allpass filter
+    in32 = (int32_t)(*in++) << 10;
+    diff = in32 - state5;
+    tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4);
+    state4 = in32;
+    diff = tmp1 - state6;
+    tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5);
+    state5 = tmp1;
+    diff = tmp2 - state7;
+    state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6);
+    state6 = tmp2;
+
+    // add two allpass outputs, divide by two and round
+    out32 = (state3 + state7 + 1024) >> 11;
+
+    // limit amplitude to prevent wrap-around, and write to output array
+    *out++ = WebRtcSpl_SatW32ToW16(out32);
+  }
+#endif  // #if defined(MIPS_DSP_R2_LE)
+  __asm__ volatile (
+    "sw       %[state0],      0(%[filtState])     \n\t"
+    "sw       %[state1],      4(%[filtState])     \n\t"
+    "sw       %[state2],      8(%[filtState])     \n\t"
+    "sw       %[state3],      12(%[filtState])    \n\t"
+    "sw       %[state4],      16(%[filtState])    \n\t"
+    "sw       %[state5],      20(%[filtState])    \n\t"
+    "sw       %[state6],      24(%[filtState])    \n\t"
+    "sw       %[state7],      28(%[filtState])    \n\t"
+    :
+    : [state0] "r" (state0), [state1] "r" (state1), [state2] "r" (state2),
+      [state3] "r" (state3), [state4] "r" (state4), [state5] "r" (state5),
+      [state6] "r" (state6), [state7] "r" (state7), [filtState] "r" (filtState)
+    : "memory"
+  );
+}
+
+#endif  // #if defined(MIPS32_LE)
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_fractional.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_fractional.c
new file mode 100644
index 00000000..6409fbac
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_fractional.c
@@ -0,0 +1,239 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the resampling functions between 48, 44, 32 and 24 kHz.
+ * The description headers can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+// interpolation coefficients
+static const int16_t kCoefficients48To32[2][8] = {
+        {778, -2050, 1087, 23285, 12903, -3783, 441, 222},
+        {222, 441, -3783, 12903, 23285, 1087, -2050, 778}
+};
+
+static const int16_t kCoefficients32To24[3][8] = {
+        {767, -2362, 2434, 24406, 10620, -3838, 721, 90},
+        {386, -381, -2646, 19062, 19062, -2646, -381, 386},
+        {90, 721, -3838, 10620, 24406, 2434, -2362, 767}
+};
+
+static const int16_t kCoefficients44To32[4][9] = {
+        {117, -669, 2245, -6183, 26267, 13529, -3245, 845, -138},
+        {-101, 612, -2283, 8532, 29790, -5138, 1789, -524, 91},
+        {50, -292, 1016, -3064, 32010, 3933, -1147, 315, -53},
+        {-156, 974, -3863, 18603, 21691, -6246, 2353, -712, 126}
+};
+
+//   Resampling ratio: 2/3
+// input:  int32_t (normalized, not saturated) :: size 3 * K
+// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 2 * K
+//      K: number of blocks
+
+void WebRtcSpl_Resample48khzTo32khz(const int32_t *In, int32_t *Out, size_t K)
+{
+    /////////////////////////////////////////////////////////////
+    // Filter operation:
+    //
+    // Perform resampling (3 input samples -> 2 output samples);
+    // process in sub blocks of size 3 samples.
+    int32_t tmp;
+    size_t m;
+
+    for (m = 0; m < K; m++)
+    {
+        tmp = 1 << 14;
+        tmp += kCoefficients48To32[0][0] * In[0];
+        tmp += kCoefficients48To32[0][1] * In[1];
+        tmp += kCoefficients48To32[0][2] * In[2];
+        tmp += kCoefficients48To32[0][3] * In[3];
+        tmp += kCoefficients48To32[0][4] * In[4];
+        tmp += kCoefficients48To32[0][5] * In[5];
+        tmp += kCoefficients48To32[0][6] * In[6];
+        tmp += kCoefficients48To32[0][7] * In[7];
+        Out[0] = tmp;
+
+        tmp = 1 << 14;
+        tmp += kCoefficients48To32[1][0] * In[1];
+        tmp += kCoefficients48To32[1][1] * In[2];
+        tmp += kCoefficients48To32[1][2] * In[3];
+        tmp += kCoefficients48To32[1][3] * In[4];
+        tmp += kCoefficients48To32[1][4] * In[5];
+        tmp += kCoefficients48To32[1][5] * In[6];
+        tmp += kCoefficients48To32[1][6] * In[7];
+        tmp += kCoefficients48To32[1][7] * In[8];
+        Out[1] = tmp;
+
+        // update pointers
+        In += 3;
+        Out += 2;
+    }
+}
+
+//   Resampling ratio: 3/4
+// input:  int32_t (normalized, not saturated) :: size 4 * K
+// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 3 * K
+//      K: number of blocks
+
+void WebRtcSpl_Resample32khzTo24khz(const int32_t *In, int32_t *Out, size_t K)
+{
+    /////////////////////////////////////////////////////////////
+    // Filter operation:
+    //
+    // Perform resampling (4 input samples -> 3 output samples);
+    // process in sub blocks of size 4 samples.
+    size_t m;
+    int32_t tmp;
+
+    for (m = 0; m < K; m++)
+    {
+        tmp = 1 << 14;
+        tmp += kCoefficients32To24[0][0] * In[0];
+        tmp += kCoefficients32To24[0][1] * In[1];
+        tmp += kCoefficients32To24[0][2] * In[2];
+        tmp += kCoefficients32To24[0][3] * In[3];
+        tmp += kCoefficients32To24[0][4] * In[4];
+        tmp += kCoefficients32To24[0][5] * In[5];
+        tmp += kCoefficients32To24[0][6] * In[6];
+        tmp += kCoefficients32To24[0][7] * In[7];
+        Out[0] = tmp;
+
+        tmp = 1 << 14;
+        tmp += kCoefficients32To24[1][0] * In[1];
+        tmp += kCoefficients32To24[1][1] * In[2];
+        tmp += kCoefficients32To24[1][2] * In[3];
+        tmp += kCoefficients32To24[1][3] * In[4];
+        tmp += kCoefficients32To24[1][4] * In[5];
+        tmp += kCoefficients32To24[1][5] * In[6];
+        tmp += kCoefficients32To24[1][6] * In[7];
+        tmp += kCoefficients32To24[1][7] * In[8];
+        Out[1] = tmp;
+
+        tmp = 1 << 14;
+        tmp += kCoefficients32To24[2][0] * In[2];
+        tmp += kCoefficients32To24[2][1] * In[3];
+        tmp += kCoefficients32To24[2][2] * In[4];
+        tmp += kCoefficients32To24[2][3] * In[5];
+        tmp += kCoefficients32To24[2][4] * In[6];
+        tmp += kCoefficients32To24[2][5] * In[7];
+        tmp += kCoefficients32To24[2][6] * In[8];
+        tmp += kCoefficients32To24[2][7] * In[9];
+        Out[2] = tmp;
+
+        // update pointers
+        In += 4;
+        Out += 3;
+    }
+}
+
+//
+// fractional resampling filters
+//   Fout = 11/16 * Fin
+//   Fout =  8/11 * Fin
+//
+
+// compute two inner-products and store them to output array
+static void WebRtcSpl_ResampDotProduct(const int32_t *in1, const int32_t *in2,
+                                       const int16_t *coef_ptr, int32_t *out1,
+                                       int32_t *out2)
+{
+    int32_t tmp1 = 16384;
+    int32_t tmp2 = 16384;
+    int16_t coef;
+
+    coef = coef_ptr[0];
+    tmp1 += coef * in1[0];
+    tmp2 += coef * in2[-0];
+
+    coef = coef_ptr[1];
+    tmp1 += coef * in1[1];
+    tmp2 += coef * in2[-1];
+
+    coef = coef_ptr[2];
+    tmp1 += coef * in1[2];
+    tmp2 += coef * in2[-2];
+
+    coef = coef_ptr[3];
+    tmp1 += coef * in1[3];
+    tmp2 += coef * in2[-3];
+
+    coef = coef_ptr[4];
+    tmp1 += coef * in1[4];
+    tmp2 += coef * in2[-4];
+
+    coef = coef_ptr[5];
+    tmp1 += coef * in1[5];
+    tmp2 += coef * in2[-5];
+
+    coef = coef_ptr[6];
+    tmp1 += coef * in1[6];
+    tmp2 += coef * in2[-6];
+
+    coef = coef_ptr[7];
+    tmp1 += coef * in1[7];
+    tmp2 += coef * in2[-7];
+
+    coef = coef_ptr[8];
+    *out1 = tmp1 + coef * in1[8];
+    *out2 = tmp2 + coef * in2[-8];
+}
+
+//   Resampling ratio: 8/11
+// input:  int32_t (normalized, not saturated) :: size 11 * K
+// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size  8 * K
+//      K: number of blocks
+
+void WebRtcSpl_Resample44khzTo32khz(const int32_t *In, int32_t *Out, size_t K)
+{
+    /////////////////////////////////////////////////////////////
+    // Filter operation:
+    //
+    // Perform resampling (11 input samples -> 8 output samples);
+    // process in sub blocks of size 11 samples.
+    int32_t tmp;
+    size_t m;
+
+    for (m = 0; m < K; m++)
+    {
+        tmp = 1 << 14;
+
+        // first output sample
+        Out[0] = ((int32_t)In[3] << 15) + tmp;
+
+        // sum and accumulate filter coefficients and input samples
+        tmp += kCoefficients44To32[3][0] * In[5];
+        tmp += kCoefficients44To32[3][1] * In[6];
+        tmp += kCoefficients44To32[3][2] * In[7];
+        tmp += kCoefficients44To32[3][3] * In[8];
+        tmp += kCoefficients44To32[3][4] * In[9];
+        tmp += kCoefficients44To32[3][5] * In[10];
+        tmp += kCoefficients44To32[3][6] * In[11];
+        tmp += kCoefficients44To32[3][7] * In[12];
+        tmp += kCoefficients44To32[3][8] * In[13];
+        Out[4] = tmp;
+
+        // sum and accumulate filter coefficients and input samples
+        WebRtcSpl_ResampDotProduct(&In[0], &In[17], kCoefficients44To32[0], &Out[1], &Out[7]);
+
+        // sum and accumulate filter coefficients and input samples
+        WebRtcSpl_ResampDotProduct(&In[2], &In[15], kCoefficients44To32[1], &Out[2], &Out[6]);
+
+        // sum and accumulate filter coefficients and input samples
+        WebRtcSpl_ResampDotProduct(&In[3], &In[14], kCoefficients44To32[2], &Out[3], &Out[5]);
+
+        // update pointers
+        In += 11;
+        Out += 8;
+    }
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/signal_processing_unittest.cc b/third_party/webrtc/src/webrtc/common_audio/signal_processing/signal_processing_unittest.cc
new file mode 100644
index 00000000..108f459c
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/signal_processing_unittest.cc
@@ -0,0 +1,579 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "testing/gtest/include/gtest/gtest.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+static const size_t kVector16Size = 9;
+static const int16_t vector16[kVector16Size] = {1, -15511, 4323, 1963,
+  WEBRTC_SPL_WORD16_MAX, 0, WEBRTC_SPL_WORD16_MIN + 5, -3333, 345};
+
+class SplTest : public testing::Test {
+ protected:
+   SplTest() {
+     WebRtcSpl_Init();
+   }
+   virtual ~SplTest() {
+   }
+};
+
+TEST_F(SplTest, MacroTest) {
+    // Macros with inputs.
+    int A = 10;
+    int B = 21;
+    int a = -3;
+    int b = WEBRTC_SPL_WORD32_MAX;
+
+    EXPECT_EQ(10, WEBRTC_SPL_MIN(A, B));
+    EXPECT_EQ(21, WEBRTC_SPL_MAX(A, B));
+
+    EXPECT_EQ(3, WEBRTC_SPL_ABS_W16(a));
+    EXPECT_EQ(3, WEBRTC_SPL_ABS_W32(a));
+
+    EXPECT_EQ(-63, WEBRTC_SPL_MUL(a, B));
+    EXPECT_EQ(-2147483645, WEBRTC_SPL_MUL(a, b));
+    EXPECT_EQ(2147483651u, WEBRTC_SPL_UMUL(a, b));
+    b = WEBRTC_SPL_WORD16_MAX >> 1;
+    EXPECT_EQ(4294918147u, WEBRTC_SPL_UMUL_32_16(a, b));
+    EXPECT_EQ(-49149, WEBRTC_SPL_MUL_16_U16(a, b));
+
+    a = b;
+    b = -3;
+
+    EXPECT_EQ(-1, WEBRTC_SPL_MUL_16_32_RSFT16(a, b));
+    EXPECT_EQ(-1, WEBRTC_SPL_MUL_16_32_RSFT15(a, b));
+    EXPECT_EQ(-3, WEBRTC_SPL_MUL_16_32_RSFT14(a, b));
+    EXPECT_EQ(-24, WEBRTC_SPL_MUL_16_32_RSFT11(a, b));
+
+    EXPECT_EQ(-12288, WEBRTC_SPL_MUL_16_16_RSFT(a, b, 2));
+    EXPECT_EQ(-12287, WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(a, b, 2));
+
+    EXPECT_EQ(21, WEBRTC_SPL_SAT(a, A, B));
+    EXPECT_EQ(21, WEBRTC_SPL_SAT(a, B, A));
+
+    // Shifting with negative numbers allowed
+    int shift_amount = 1;  // Workaround compiler warning using variable here.
+    // Positive means left shift
+    EXPECT_EQ(32766, WEBRTC_SPL_SHIFT_W32(a, shift_amount));
+
+    // Shifting with negative numbers not allowed
+    // We cannot do casting here due to signed/unsigned problem
+    EXPECT_EQ(32766, WEBRTC_SPL_LSHIFT_W32(a, 1));
+
+    EXPECT_EQ(8191u, WEBRTC_SPL_RSHIFT_U32(a, 1));
+
+    EXPECT_EQ(1470, WEBRTC_SPL_RAND(A));
+
+    EXPECT_EQ(-49149, WEBRTC_SPL_MUL_16_16(a, b));
+    EXPECT_EQ(1073676289, WEBRTC_SPL_MUL_16_16(WEBRTC_SPL_WORD16_MAX,
+                                               WEBRTC_SPL_WORD16_MAX));
+    EXPECT_EQ(1073709055, WEBRTC_SPL_MUL_16_32_RSFT16(WEBRTC_SPL_WORD16_MAX,
+                                                      WEBRTC_SPL_WORD32_MAX));
+    EXPECT_EQ(1073741824, WEBRTC_SPL_MUL_16_32_RSFT16(WEBRTC_SPL_WORD16_MIN,
+                                                      WEBRTC_SPL_WORD32_MIN));
+#ifdef WEBRTC_ARCH_ARM_V7
+    EXPECT_EQ(-1073741824,
+              WEBRTC_SPL_MUL_16_32_RSFT16(WEBRTC_SPL_WORD16_MIN,
+                                          WEBRTC_SPL_WORD32_MAX));
+#else
+    EXPECT_EQ(-1073741823,
+              WEBRTC_SPL_MUL_16_32_RSFT16(WEBRTC_SPL_WORD16_MIN,
+                                          WEBRTC_SPL_WORD32_MAX));
+#endif
+}
+
+TEST_F(SplTest, InlineTest) {
+    int16_t a16 = 121;
+    int16_t b16 = -17;
+    int32_t a32 = 111121;
+    int32_t b32 = -1711;
+
+    EXPECT_EQ(17, WebRtcSpl_GetSizeInBits(a32));
+
+    EXPECT_EQ(0, WebRtcSpl_NormW32(0));
+    EXPECT_EQ(31, WebRtcSpl_NormW32(-1));
+    EXPECT_EQ(0, WebRtcSpl_NormW32(WEBRTC_SPL_WORD32_MIN));
+    EXPECT_EQ(14, WebRtcSpl_NormW32(a32));
+
+    EXPECT_EQ(0, WebRtcSpl_NormW16(0));
+    EXPECT_EQ(15, WebRtcSpl_NormW16(-1));
+    EXPECT_EQ(0, WebRtcSpl_NormW16(WEBRTC_SPL_WORD16_MIN));
+    EXPECT_EQ(4, WebRtcSpl_NormW16(b32));
+    for (int ii = 0; ii < 15; ++ii) {
+      int16_t value = 1 << ii;
+      EXPECT_EQ(14 - ii, WebRtcSpl_NormW16(value));
+      EXPECT_EQ(15 - ii, WebRtcSpl_NormW16(-value));
+    }
+
+    EXPECT_EQ(0, WebRtcSpl_NormU32(0u));
+    EXPECT_EQ(0, WebRtcSpl_NormU32(0xffffffff));
+    EXPECT_EQ(15, WebRtcSpl_NormU32(static_cast<uint32_t>(a32)));
+
+    EXPECT_EQ(104, WebRtcSpl_AddSatW16(a16, b16));
+    EXPECT_EQ(138, WebRtcSpl_SubSatW16(a16, b16));
+
+    EXPECT_EQ(109410, WebRtcSpl_AddSatW32(a32, b32));
+    EXPECT_EQ(112832, WebRtcSpl_SubSatW32(a32, b32));
+
+    a32 = 0x80000000;
+    b32 = 0x80000000;
+    // Cast to signed int to avoid compiler complaint on gtest.h.
+    EXPECT_EQ(static_cast<int>(0x80000000), WebRtcSpl_AddSatW32(a32, b32));
+    a32 = 0x7fffffff;
+    b32 = 0x7fffffff;
+    EXPECT_EQ(0x7fffffff, WebRtcSpl_AddSatW32(a32, b32));
+    a32 = 0;
+    b32 = 0x80000000;
+    EXPECT_EQ(0x7fffffff, WebRtcSpl_SubSatW32(a32, b32));
+    a32 = 0x7fffffff;
+    b32 = 0x80000000;
+    EXPECT_EQ(0x7fffffff, WebRtcSpl_SubSatW32(a32, b32));
+    a32 = 0x80000000;
+    b32 = 0x7fffffff;
+    EXPECT_EQ(static_cast<int>(0x80000000), WebRtcSpl_SubSatW32(a32, b32));
+}
+
+TEST_F(SplTest, MathOperationsTest) {
+    int A = 1134567892;
+    int32_t num = 117;
+    int32_t den = -5;
+    uint16_t denU = 5;
+    EXPECT_EQ(33700, WebRtcSpl_Sqrt(A));
+    EXPECT_EQ(33683, WebRtcSpl_SqrtFloor(A));
+
+
+    EXPECT_EQ(-91772805, WebRtcSpl_DivResultInQ31(den, num));
+    EXPECT_EQ(-23, WebRtcSpl_DivW32W16ResW16(num, (int16_t)den));
+    EXPECT_EQ(-23, WebRtcSpl_DivW32W16(num, (int16_t)den));
+    EXPECT_EQ(23u, WebRtcSpl_DivU32U16(num, denU));
+    EXPECT_EQ(0, WebRtcSpl_DivW32HiLow(128, 0, 256));
+}
+
+TEST_F(SplTest, BasicArrayOperationsTest) {
+    const size_t kVectorSize = 4;
+    int B[] = {4, 12, 133, 1100};
+    int16_t b16[kVectorSize];
+    int32_t b32[kVectorSize];
+
+    int16_t bTmp16[kVectorSize];
+    int32_t bTmp32[kVectorSize];
+
+    WebRtcSpl_MemSetW16(b16, 3, kVectorSize);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ(3, b16[kk]);
+    }
+    WebRtcSpl_ZerosArrayW16(b16, kVectorSize);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ(0, b16[kk]);
+    }
+    WebRtcSpl_MemSetW32(b32, 3, kVectorSize);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ(3, b32[kk]);
+    }
+    WebRtcSpl_ZerosArrayW32(b32, kVectorSize);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ(0, b32[kk]);
+    }
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        bTmp16[kk] = (int16_t)kk;
+        bTmp32[kk] = (int32_t)kk;
+    }
+    WEBRTC_SPL_MEMCPY_W16(b16, bTmp16, kVectorSize);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ(b16[kk], bTmp16[kk]);
+    }
+//    WEBRTC_SPL_MEMCPY_W32(b32, bTmp32, kVectorSize);
+//    for (int kk = 0; kk < kVectorSize; ++kk) {
+//        EXPECT_EQ(b32[kk], bTmp32[kk]);
+//    }
+    WebRtcSpl_CopyFromEndW16(b16, kVectorSize, 2, bTmp16);
+    for (size_t kk = 0; kk < 2; ++kk) {
+        EXPECT_EQ(static_cast<int16_t>(kk+2), bTmp16[kk]);
+    }
+
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        b32[kk] = B[kk];
+        b16[kk] = (int16_t)B[kk];
+    }
+    WebRtcSpl_VectorBitShiftW32ToW16(bTmp16, kVectorSize, b32, 1);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ((B[kk]>>1), bTmp16[kk]);
+    }
+    WebRtcSpl_VectorBitShiftW16(bTmp16, kVectorSize, b16, 1);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ((B[kk]>>1), bTmp16[kk]);
+    }
+    WebRtcSpl_VectorBitShiftW32(bTmp32, kVectorSize, b32, 1);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ((B[kk]>>1), bTmp32[kk]);
+    }
+
+    WebRtcSpl_MemCpyReversedOrder(&bTmp16[3], b16, kVectorSize);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ(b16[3-kk], bTmp16[kk]);
+    }
+}
+
+TEST_F(SplTest, MinMaxOperationsTest) {
+  const size_t kVectorSize = 17;
+
+  // Vectors to test the cases where minimum values have to be caught
+  // outside of the unrolled loops in ARM-Neon.
+  int16_t vector16[kVectorSize] = {-1, 7485, 0, 3333,
+      -18283, 0, 12334, -29871, 988, -3333,
+      345, -456, 222, 999,  888, 8774, WEBRTC_SPL_WORD16_MIN};
+  int32_t vector32[kVectorSize] = {-1, 0, 283211, 3333,
+      8712345, 0, -3333, 89345, -374585456, 222, 999, 122345334,
+      -12389756, -987329871, 888, -2, WEBRTC_SPL_WORD32_MIN};
+
+  EXPECT_EQ(WEBRTC_SPL_WORD16_MIN,
+            WebRtcSpl_MinValueW16(vector16, kVectorSize));
+  EXPECT_EQ(WEBRTC_SPL_WORD32_MIN,
+            WebRtcSpl_MinValueW32(vector32, kVectorSize));
+  EXPECT_EQ(kVectorSize - 1, WebRtcSpl_MinIndexW16(vector16, kVectorSize));
+  EXPECT_EQ(kVectorSize - 1, WebRtcSpl_MinIndexW32(vector32, kVectorSize));
+
+  // Test the cases where maximum values have to be caught
+  // outside of the unrolled loops in ARM-Neon.
+  vector16[kVectorSize - 1] = WEBRTC_SPL_WORD16_MAX;
+  vector32[kVectorSize - 1] = WEBRTC_SPL_WORD32_MAX;
+
+  EXPECT_EQ(WEBRTC_SPL_WORD16_MAX,
+            WebRtcSpl_MaxAbsValueW16(vector16, kVectorSize));
+  EXPECT_EQ(WEBRTC_SPL_WORD16_MAX,
+            WebRtcSpl_MaxValueW16(vector16, kVectorSize));
+  EXPECT_EQ(WEBRTC_SPL_WORD32_MAX,
+            WebRtcSpl_MaxAbsValueW32(vector32, kVectorSize));
+  EXPECT_EQ(WEBRTC_SPL_WORD32_MAX,
+            WebRtcSpl_MaxValueW32(vector32, kVectorSize));
+  EXPECT_EQ(kVectorSize - 1, WebRtcSpl_MaxAbsIndexW16(vector16, kVectorSize));
+  EXPECT_EQ(kVectorSize - 1, WebRtcSpl_MaxIndexW16(vector16, kVectorSize));
+  EXPECT_EQ(kVectorSize - 1, WebRtcSpl_MaxIndexW32(vector32, kVectorSize));
+
+  // Test the cases where multiple maximum and minimum values are present.
+  vector16[1] = WEBRTC_SPL_WORD16_MAX;
+  vector16[6] = WEBRTC_SPL_WORD16_MIN;
+  vector16[11] = WEBRTC_SPL_WORD16_MIN;
+  vector32[1] = WEBRTC_SPL_WORD32_MAX;
+  vector32[6] = WEBRTC_SPL_WORD32_MIN;
+  vector32[11] = WEBRTC_SPL_WORD32_MIN;
+
+  EXPECT_EQ(WEBRTC_SPL_WORD16_MAX,
+            WebRtcSpl_MaxAbsValueW16(vector16, kVectorSize));
+  EXPECT_EQ(WEBRTC_SPL_WORD16_MAX,
+            WebRtcSpl_MaxValueW16(vector16, kVectorSize));
+  EXPECT_EQ(WEBRTC_SPL_WORD16_MIN,
+            WebRtcSpl_MinValueW16(vector16, kVectorSize));
+  EXPECT_EQ(WEBRTC_SPL_WORD32_MAX,
+            WebRtcSpl_MaxAbsValueW32(vector32, kVectorSize));
+  EXPECT_EQ(WEBRTC_SPL_WORD32_MAX,
+            WebRtcSpl_MaxValueW32(vector32, kVectorSize));
+  EXPECT_EQ(WEBRTC_SPL_WORD32_MIN,
+            WebRtcSpl_MinValueW32(vector32, kVectorSize));
+  EXPECT_EQ(6u, WebRtcSpl_MaxAbsIndexW16(vector16, kVectorSize));
+  EXPECT_EQ(1u, WebRtcSpl_MaxIndexW16(vector16, kVectorSize));
+  EXPECT_EQ(1u, WebRtcSpl_MaxIndexW32(vector32, kVectorSize));
+  EXPECT_EQ(6u, WebRtcSpl_MinIndexW16(vector16, kVectorSize));
+  EXPECT_EQ(6u, WebRtcSpl_MinIndexW32(vector32, kVectorSize));
+}
+
+TEST_F(SplTest, VectorOperationsTest) {
+    const size_t kVectorSize = 4;
+    int B[] = {4, 12, 133, 1100};
+    int16_t a16[kVectorSize];
+    int16_t b16[kVectorSize];
+    int16_t bTmp16[kVectorSize];
+
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        a16[kk] = B[kk];
+        b16[kk] = B[kk];
+    }
+
+    WebRtcSpl_AffineTransformVector(bTmp16, b16, 3, 7, 2, kVectorSize);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ((B[kk]*3+7)>>2, bTmp16[kk]);
+    }
+    WebRtcSpl_ScaleAndAddVectorsWithRound(b16, 3, b16, 2, 2, bTmp16, kVectorSize);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ((B[kk]*3+B[kk]*2+2)>>2, bTmp16[kk]);
+    }
+
+    WebRtcSpl_AddAffineVectorToVector(bTmp16, b16, 3, 7, 2, kVectorSize);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ(((B[kk]*3+B[kk]*2+2)>>2)+((b16[kk]*3+7)>>2), bTmp16[kk]);
+    }
+
+    WebRtcSpl_ScaleVector(b16, bTmp16, 13, kVectorSize, 2);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ((b16[kk]*13)>>2, bTmp16[kk]);
+    }
+    WebRtcSpl_ScaleVectorWithSat(b16, bTmp16, 13, kVectorSize, 2);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ((b16[kk]*13)>>2, bTmp16[kk]);
+    }
+    WebRtcSpl_ScaleAndAddVectors(a16, 13, 2, b16, 7, 2, bTmp16, kVectorSize);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ(((a16[kk]*13)>>2)+((b16[kk]*7)>>2), bTmp16[kk]);
+    }
+
+    WebRtcSpl_AddVectorsAndShift(bTmp16, a16, b16, kVectorSize, 2);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ(B[kk] >> 1, bTmp16[kk]);
+    }
+    WebRtcSpl_ReverseOrderMultArrayElements(bTmp16, a16, &b16[3], kVectorSize, 2);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ((a16[kk]*b16[3-kk])>>2, bTmp16[kk]);
+    }
+    WebRtcSpl_ElementwiseVectorMult(bTmp16, a16, b16, kVectorSize, 6);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ((a16[kk]*b16[kk])>>6, bTmp16[kk]);
+    }
+
+    WebRtcSpl_SqrtOfOneMinusXSquared(b16, kVectorSize, bTmp16);
+    for (size_t kk = 0; kk < kVectorSize - 1; ++kk) {
+        EXPECT_EQ(32767, bTmp16[kk]);
+    }
+    EXPECT_EQ(32749, bTmp16[kVectorSize - 1]);
+
+    EXPECT_EQ(0, WebRtcSpl_GetScalingSquare(b16, kVectorSize, 1));
+}
+
+TEST_F(SplTest, EstimatorsTest) {
+  const size_t kOrder = 2;
+  const int32_t unstable_filter[] = { 4, 12, 133, 1100 };
+  const int32_t stable_filter[] = { 1100, 133, 12, 4 };
+  int16_t lpc[kOrder + 2] = { 0 };
+  int16_t refl[kOrder + 2] = { 0 };
+  int16_t lpc_result[] = { 4096, -497, 15, 0 };
+  int16_t refl_result[] = { -3962, 123, 0, 0 };
+
+  EXPECT_EQ(0, WebRtcSpl_LevinsonDurbin(unstable_filter, lpc, refl, kOrder));
+  EXPECT_EQ(1, WebRtcSpl_LevinsonDurbin(stable_filter, lpc, refl, kOrder));
+  for (size_t i = 0; i < kOrder + 2; ++i) {
+    EXPECT_EQ(lpc_result[i], lpc[i]);
+    EXPECT_EQ(refl_result[i], refl[i]);
+  }
+}
+
+TEST_F(SplTest, FilterTest) {
+    const size_t kVectorSize = 4;
+    const size_t kFilterOrder = 3;
+    int16_t A[] = {1, 2, 33, 100};
+    int16_t A5[] = {1, 2, 33, 100, -5};
+    int16_t B[] = {4, 12, 133, 110};
+    int16_t data_in[kVectorSize];
+    int16_t data_out[kVectorSize];
+    int16_t bTmp16Low[kVectorSize];
+    int16_t bState[kVectorSize];
+    int16_t bStateLow[kVectorSize];
+
+    WebRtcSpl_ZerosArrayW16(bState, kVectorSize);
+    WebRtcSpl_ZerosArrayW16(bStateLow, kVectorSize);
+
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        data_in[kk] = A[kk];
+        data_out[kk] = 0;
+    }
+
+    // MA filters.
+    // Note that the input data has |kFilterOrder| states before the actual
+    // data (one sample).
+    WebRtcSpl_FilterMAFastQ12(&data_in[kFilterOrder], data_out, B,
+                              kFilterOrder + 1, 1);
+    EXPECT_EQ(0, data_out[0]);
+    // AR filters.
+    // Note that the output data has |kFilterOrder| states before the actual
+    // data (one sample).
+    WebRtcSpl_FilterARFastQ12(data_in, &data_out[kFilterOrder], A,
+                              kFilterOrder + 1, 1);
+    EXPECT_EQ(0, data_out[kFilterOrder]);
+
+    EXPECT_EQ(kVectorSize, WebRtcSpl_FilterAR(A5,
+                                              5,
+                                              data_in,
+                                              kVectorSize,
+                                              bState,
+                                              kVectorSize,
+                                              bStateLow,
+                                              kVectorSize,
+                                              data_out,
+                                              bTmp16Low,
+                                              kVectorSize));
+}
+
+TEST_F(SplTest, RandTest) {
+    const int kVectorSize = 4;
+    int16_t BU[] = {3653, 12446, 8525, 30691};
+    int16_t b16[kVectorSize];
+    uint32_t bSeed = 100000;
+
+    EXPECT_EQ(7086, WebRtcSpl_RandU(&bSeed));
+    EXPECT_EQ(31565, WebRtcSpl_RandU(&bSeed));
+    EXPECT_EQ(-9786, WebRtcSpl_RandN(&bSeed));
+    EXPECT_EQ(kVectorSize, WebRtcSpl_RandUArray(b16, kVectorSize, &bSeed));
+    for (int kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ(BU[kk], b16[kk]);
+    }
+}
+
+TEST_F(SplTest, DotProductWithScaleTest) {
+  EXPECT_EQ(605362796, WebRtcSpl_DotProductWithScale(vector16,
+      vector16, kVector16Size, 2));
+}
+
+TEST_F(SplTest, CrossCorrelationTest) {
+  // Note the function arguments relation specificed by API.
+  const size_t kCrossCorrelationDimension = 3;
+  const int kShift = 2;
+  const int kStep = 1;
+  const size_t kSeqDimension = 6;
+
+  const int16_t kVector16[kVector16Size] = {1, 4323, 1963,
+    WEBRTC_SPL_WORD16_MAX, WEBRTC_SPL_WORD16_MIN + 5, -3333, -876, 8483, 142};
+  int32_t vector32[kCrossCorrelationDimension] = {0};
+
+  WebRtcSpl_CrossCorrelation(vector32, vector16, kVector16, kSeqDimension,
+                             kCrossCorrelationDimension, kShift, kStep);
+
+  // WebRtcSpl_CrossCorrelationC() and WebRtcSpl_CrossCorrelationNeon()
+  // are not bit-exact.
+  const int32_t kExpected[kCrossCorrelationDimension] =
+      {-266947903, -15579555, -171282001};
+  const int32_t* expected = kExpected;
+#if !defined(MIPS32_LE)
+  const int32_t kExpectedNeon[kCrossCorrelationDimension] =
+      {-266947901, -15579553, -171281999};
+  if (WebRtcSpl_CrossCorrelation != WebRtcSpl_CrossCorrelationC) {
+    expected = kExpectedNeon;
+  }
+#endif
+  for (size_t i = 0; i < kCrossCorrelationDimension; ++i) {
+    EXPECT_EQ(expected[i], vector32[i]);
+  }
+}
+
+TEST_F(SplTest, AutoCorrelationTest) {
+  int scale = 0;
+  int32_t vector32[kVector16Size];
+  const int32_t expected[kVector16Size] = {302681398, 14223410, -121705063,
+    -85221647, -17104971, 61806945, 6644603, -669329, 43};
+
+  EXPECT_EQ(kVector16Size,
+            WebRtcSpl_AutoCorrelation(vector16, kVector16Size,
+                                      kVector16Size - 1, vector32, &scale));
+  EXPECT_EQ(3, scale);
+  for (size_t i = 0; i < kVector16Size; ++i) {
+    EXPECT_EQ(expected[i], vector32[i]);
+  }
+}
+
+TEST_F(SplTest, SignalProcessingTest) {
+    const size_t kVectorSize = 4;
+    int A[] = {1, 2, 33, 100};
+    const int16_t kHanning[4] = { 2399, 8192, 13985, 16384 };
+    int16_t b16[kVectorSize];
+
+    int16_t bTmp16[kVectorSize];
+
+    int bScale = 0;
+
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        b16[kk] = A[kk];
+    }
+
+    // TODO(bjornv): Activate the Reflection Coefficient tests when refactoring.
+//    WebRtcSpl_ReflCoefToLpc(b16, kVectorSize, bTmp16);
+////    for (int kk = 0; kk < kVectorSize; ++kk) {
+////        EXPECT_EQ(aTmp16[kk], bTmp16[kk]);
+////    }
+//    WebRtcSpl_LpcToReflCoef(bTmp16, kVectorSize, b16);
+////    for (int kk = 0; kk < kVectorSize; ++kk) {
+////        EXPECT_EQ(a16[kk], b16[kk]);
+////    }
+//    WebRtcSpl_AutoCorrToReflCoef(b32, kVectorSize, bTmp16);
+////    for (int kk = 0; kk < kVectorSize; ++kk) {
+////        EXPECT_EQ(aTmp16[kk], bTmp16[kk]);
+////    }
+
+    WebRtcSpl_GetHanningWindow(bTmp16, kVectorSize);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ(kHanning[kk], bTmp16[kk]);
+    }
+
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        b16[kk] = A[kk];
+    }
+    EXPECT_EQ(11094 , WebRtcSpl_Energy(b16, kVectorSize, &bScale));
+    EXPECT_EQ(0, bScale);
+}
+
+TEST_F(SplTest, FFTTest) {
+    int16_t B[] = {1, 2, 33, 100,
+            2, 3, 34, 101,
+            3, 4, 35, 102,
+            4, 5, 36, 103};
+
+    EXPECT_EQ(0, WebRtcSpl_ComplexFFT(B, 3, 1));
+//    for (int kk = 0; kk < 16; ++kk) {
+//        EXPECT_EQ(A[kk], B[kk]);
+//    }
+    EXPECT_EQ(0, WebRtcSpl_ComplexIFFT(B, 3, 1));
+//    for (int kk = 0; kk < 16; ++kk) {
+//        EXPECT_EQ(A[kk], B[kk]);
+//    }
+    WebRtcSpl_ComplexBitReverse(B, 3);
+    for (int kk = 0; kk < 16; ++kk) {
+        //EXPECT_EQ(A[kk], B[kk]);
+    }
+}
+
+TEST_F(SplTest, Resample48WithSaturationTest) {
+  // The test resamples 3*kBlockSize number of samples to 2*kBlockSize number
+  // of samples.
+  const size_t kBlockSize = 16;
+
+  // Saturated input vector of 48 samples.
+  const int32_t kVectorSaturated[3 * kBlockSize + 7] = {
+     -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768,
+     -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768,
+     -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768,
+     32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+     32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+     32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+     32767, 32767, 32767, 32767, 32767, 32767, 32767
+  };
+
+  // All values in |out_vector| should be |kRefValue32kHz|.
+  const int32_t kRefValue32kHz1 = -1077493760;
+  const int32_t kRefValue32kHz2 = 1077493645;
+
+  // After bit shift with saturation, |out_vector_w16| is saturated.
+
+  const int16_t kRefValue16kHz1 = -32768;
+  const int16_t kRefValue16kHz2 = 32767;
+  // Vector for storing output.
+  int32_t out_vector[2 * kBlockSize];
+  int16_t out_vector_w16[2 * kBlockSize];
+
+  WebRtcSpl_Resample48khzTo32khz(kVectorSaturated, out_vector, kBlockSize);
+  WebRtcSpl_VectorBitShiftW32ToW16(out_vector_w16, 2 * kBlockSize, out_vector,
+                                   15);
+
+  // Comparing output values against references. The values at position
+  // 12-15 are skipped to account for the filter lag.
+  for (size_t i = 0; i < 12; ++i) {
+    EXPECT_EQ(kRefValue32kHz1, out_vector[i]);
+    EXPECT_EQ(kRefValue16kHz1, out_vector_w16[i]);
+  }
+  for (size_t i = 16; i < 2 * kBlockSize; ++i) {
+    EXPECT_EQ(kRefValue32kHz2, out_vector[i]);
+    EXPECT_EQ(kRefValue16kHz2, out_vector_w16[i]);
+  }
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_init.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_init.c
new file mode 100644
index 00000000..73c2039e
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_init.c
@@ -0,0 +1,140 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/* The global function contained in this file initializes SPL function
+ * pointers, currently only for ARM platforms.
+ *
+ * Some code came from common/rtcd.c in the WebM project.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
+
+/* Declare function pointers. */
+MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16;
+MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32;
+MaxValueW16 WebRtcSpl_MaxValueW16;
+MaxValueW32 WebRtcSpl_MaxValueW32;
+MinValueW16 WebRtcSpl_MinValueW16;
+MinValueW32 WebRtcSpl_MinValueW32;
+CrossCorrelation WebRtcSpl_CrossCorrelation;
+DownsampleFast WebRtcSpl_DownsampleFast;
+ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound;
+
+#if (defined(WEBRTC_DETECT_NEON) || !defined(WEBRTC_HAS_NEON)) && \
+    !defined(MIPS32_LE)
+/* Initialize function pointers to the generic C version. */
+static void InitPointersToC() {
+  WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16C;
+  WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C;
+  WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16C;
+  WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32C;
+  WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16C;
+  WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32C;
+  WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationC;
+  WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastC;
+  WebRtcSpl_ScaleAndAddVectorsWithRound =
+      WebRtcSpl_ScaleAndAddVectorsWithRoundC;
+}
+#endif
+
+#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON)
+/* Initialize function pointers to the Neon version. */
+static void InitPointersToNeon() {
+  WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16Neon;
+  WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32Neon;
+  WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16Neon;
+  WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32Neon;
+  WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16Neon;
+  WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32Neon;
+  WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationNeon;
+  WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastNeon;
+  WebRtcSpl_ScaleAndAddVectorsWithRound =
+      WebRtcSpl_ScaleAndAddVectorsWithRoundC;
+}
+#endif
+
+#if defined(MIPS32_LE)
+/* Initialize function pointers to the MIPS version. */
+static void InitPointersToMIPS() {
+  WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16_mips;
+  WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16_mips;
+  WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32_mips;
+  WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16_mips;
+  WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32_mips;
+  WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelation_mips;
+  WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFast_mips;
+#if defined(MIPS_DSP_R1_LE)
+  WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32_mips;
+  WebRtcSpl_ScaleAndAddVectorsWithRound =
+      WebRtcSpl_ScaleAndAddVectorsWithRound_mips;
+#else
+  WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C;
+  WebRtcSpl_ScaleAndAddVectorsWithRound =
+      WebRtcSpl_ScaleAndAddVectorsWithRoundC;
+#endif
+}
+#endif
+
+static void InitFunctionPointers(void) {
+#if defined(WEBRTC_DETECT_NEON)
+  if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) {
+    InitPointersToNeon();
+  } else {
+    InitPointersToC();
+  }
+#elif defined(WEBRTC_HAS_NEON)
+  InitPointersToNeon();
+#elif defined(MIPS32_LE)
+  InitPointersToMIPS();
+#else
+  InitPointersToC();
+#endif  /* WEBRTC_DETECT_NEON */
+}
+
+#if defined(WEBRTC_POSIX)
+#include <pthread.h>
+
+static void once(void (*func)(void)) {
+  static pthread_once_t lock = PTHREAD_ONCE_INIT;
+  pthread_once(&lock, func);
+}
+
+#elif defined(_WIN32)
+#include <windows.h>
+
+static void once(void (*func)(void)) {
+  /* Didn't use InitializeCriticalSection() since there's no race-free context
+   * in which to execute it.
+   *
+   * TODO(kma): Change to different implementation (e.g.
+   * InterlockedCompareExchangePointer) to avoid issues similar to
+   * http://code.google.com/p/webm/issues/detail?id=467.
+   */
+  static CRITICAL_SECTION lock = {(void *)((size_t)-1), -1, 0, 0, 0, 0};
+  static int done = 0;
+
+  EnterCriticalSection(&lock);
+  if (!done) {
+    func();
+    done = 1;
+  }
+  LeaveCriticalSection(&lock);
+}
+
+/* There's no fallback version as an #else block here to ensure thread safety.
+ * In case of neither pthread for WEBRTC_POSIX nor _WIN32 is present, build
+ * system should pick it up.
+ */
+#endif  /* WEBRTC_POSIX */
+
+void WebRtcSpl_Init() {
+  once(InitFunctionPointers);
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt.c
new file mode 100644
index 00000000..24db4f82
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt.c
@@ -0,0 +1,184 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_Sqrt().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+#include <assert.h>
+
+int32_t WebRtcSpl_SqrtLocal(int32_t in);
+
+int32_t WebRtcSpl_SqrtLocal(int32_t in)
+{
+
+    int16_t x_half, t16;
+    int32_t A, B, x2;
+
+    /* The following block performs:
+     y=in/2
+     x=y-2^30
+     x_half=x/2^31
+     t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4)
+         + 0.875*((x_half)^5)
+     */
+
+    B = in / 2;
+
+    B = B - ((int32_t)0x40000000); // B = in/2 - 1/2
+    x_half = (int16_t)(B >> 16);  // x_half = x/2 = (in-1)/2
+    B = B + ((int32_t)0x40000000); // B = 1 + x/2
+    B = B + ((int32_t)0x40000000); // Add 0.5 twice (since 1.0 does not exist in Q31)
+
+    x2 = ((int32_t)x_half) * ((int32_t)x_half) * 2; // A = (x/2)^2
+    A = -x2; // A = -(x/2)^2
+    B = B + (A >> 1); // B = 1 + x/2 - 0.5*(x/2)^2
+
+    A >>= 16;
+    A = A * A * 2; // A = (x/2)^4
+    t16 = (int16_t)(A >> 16);
+    B += -20480 * t16 * 2;  // B = B - 0.625*A
+    // After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4
+
+    A = x_half * t16 * 2;  // A = (x/2)^5
+    t16 = (int16_t)(A >> 16);
+    B += 28672 * t16 * 2;  // B = B + 0.875*A
+    // After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4 + 0.875*(x/2)^5
+
+    t16 = (int16_t)(x2 >> 16);
+    A = x_half * t16 * 2;  // A = x/2^3
+
+    B = B + (A >> 1); // B = B + 0.5*A
+    // After this, B = 1 + x/2 - 0.5*(x/2)^2 + 0.5*(x/2)^3 - 0.625*(x/2)^4 + 0.875*(x/2)^5
+
+    B = B + ((int32_t)32768); // Round off bit
+
+    return B;
+}
+
+int32_t WebRtcSpl_Sqrt(int32_t value)
+{
+    /*
+     Algorithm:
+
+     Six term Taylor Series is used here to compute the square root of a number
+     y^0.5 = (1+x)^0.5 where x = y-1
+     = 1+(x/2)-0.5*((x/2)^2+0.5*((x/2)^3-0.625*((x/2)^4+0.875*((x/2)^5)
+     0.5 <= x < 1
+
+     Example of how the algorithm works, with ut=sqrt(in), and
+     with in=73632 and ut=271 (even shift value case):
+
+     in=73632
+     y= in/131072
+     x=y-1
+     t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5)
+     ut=t*(1/sqrt(2))*512
+
+     or:
+
+     in=73632
+     in2=73632*2^14
+     y= in2/2^31
+     x=y-1
+     t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5)
+     ut=t*(1/sqrt(2))
+     ut2=ut*2^9
+
+     which gives:
+
+     in  = 73632
+     in2 = 1206386688
+     y   = 0.56176757812500
+     x   = -0.43823242187500
+     t   = 0.74973506527313
+     ut  = 0.53014274874797
+     ut2 = 2.714330873589594e+002
+
+     or:
+
+     in=73632
+     in2=73632*2^14
+     y=in2/2
+     x=y-2^30
+     x_half=x/2^31
+     t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4)
+         + 0.875*((x_half)^5)
+     ut=t*(1/sqrt(2))
+     ut2=ut*2^9
+
+     which gives:
+
+     in  = 73632
+     in2 = 1206386688
+     y   = 603193344
+     x   = -470548480
+     x_half =  -0.21911621093750
+     t   = 0.74973506527313
+     ut  = 0.53014274874797
+     ut2 = 2.714330873589594e+002
+
+     */
+
+    int16_t x_norm, nshift, t16, sh;
+    int32_t A;
+
+    int16_t k_sqrt_2 = 23170; // 1/sqrt2 (==5a82)
+
+    A = value;
+
+    if (A == 0)
+        return (int32_t)0; // sqrt(0) = 0
+
+    sh = WebRtcSpl_NormW32(A); // # shifts to normalize A
+    A = WEBRTC_SPL_LSHIFT_W32(A, sh); // Normalize A
+    if (A < (WEBRTC_SPL_WORD32_MAX - 32767))
+    {
+        A = A + ((int32_t)32768); // Round off bit
+    } else
+    {
+        A = WEBRTC_SPL_WORD32_MAX;
+    }
+
+    x_norm = (int16_t)(A >> 16);  // x_norm = AH
+
+    nshift = (sh / 2);
+    assert(nshift >= 0);
+
+    A = (int32_t)WEBRTC_SPL_LSHIFT_W32((int32_t)x_norm, 16);
+    A = WEBRTC_SPL_ABS_W32(A); // A = abs(x_norm<<16)
+    A = WebRtcSpl_SqrtLocal(A); // A = sqrt(A)
+
+    if (2 * nshift == sh) {
+        // Even shift value case
+
+        t16 = (int16_t)(A >> 16);  // t16 = AH
+
+        A = k_sqrt_2 * t16 * 2;  // A = 1/sqrt(2)*t16
+        A = A + ((int32_t)32768); // Round off
+        A = A & ((int32_t)0x7fff0000); // Round off
+
+        A >>= 15;  // A = A>>16
+
+    } else
+    {
+        A >>= 16;  // A = A>>16
+    }
+
+    A = A & ((int32_t)0x0000ffff);
+    A >>= nshift;  // De-normalize the result.
+
+    return A;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor.c
new file mode 100644
index 00000000..370307a0
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor.c
@@ -0,0 +1,77 @@
+/*
+ * Written by Wilco Dijkstra, 1996. The following email exchange establishes the
+ * license.
+ *
+ * From: Wilco Dijkstra <Wilco.Dijkstra@ntlworld.com>
+ * Date: Fri, Jun 24, 2011 at 3:20 AM
+ * Subject: Re: sqrt routine
+ * To: Kevin Ma <kma@google.com>
+ * Hi Kevin,
+ * Thanks for asking. Those routines are public domain (originally posted to
+ * comp.sys.arm a long time ago), so you can use them freely for any purpose.
+ * Cheers,
+ * Wilco
+ *
+ * ----- Original Message -----
+ * From: "Kevin Ma" <kma@google.com>
+ * To: <Wilco.Dijkstra@ntlworld.com>
+ * Sent: Thursday, June 23, 2011 11:44 PM
+ * Subject: Fwd: sqrt routine
+ * Hi Wilco,
+ * I saw your sqrt routine from several web sites, including
+ * http://www.finesse.demon.co.uk/steven/sqrt.html.
+ * Just wonder if there's any copyright information with your Successive
+ * approximation routines, or if I can freely use it for any purpose.
+ * Thanks.
+ * Kevin
+ */
+
+// Minor modifications in code style for WebRTC, 2012.
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+/*
+ * Algorithm:
+ * Successive approximation of the equation (root + delta) ^ 2 = N
+ * until delta < 1. If delta < 1 we have the integer part of SQRT (N).
+ * Use delta = 2^i for i = 15 .. 0.
+ *
+ * Output precision is 16 bits. Note for large input values (close to
+ * 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word)
+ * contains the MSB information (a non-sign value). Do with caution
+ * if you need to cast the output to int16_t type.
+ *
+ * If the input value is negative, it returns 0.
+ */
+
+#define WEBRTC_SPL_SQRT_ITER(N)                 \
+  try1 = root + (1 << (N));                     \
+  if (value >= try1 << (N))                     \
+  {                                             \
+    value -= try1 << (N);                       \
+    root |= 2 << (N);                           \
+  }
+
+int32_t WebRtcSpl_SqrtFloor(int32_t value)
+{
+  int32_t root = 0, try1;
+
+  WEBRTC_SPL_SQRT_ITER (15);
+  WEBRTC_SPL_SQRT_ITER (14);
+  WEBRTC_SPL_SQRT_ITER (13);
+  WEBRTC_SPL_SQRT_ITER (12);
+  WEBRTC_SPL_SQRT_ITER (11);
+  WEBRTC_SPL_SQRT_ITER (10);
+  WEBRTC_SPL_SQRT_ITER ( 9);
+  WEBRTC_SPL_SQRT_ITER ( 8);
+  WEBRTC_SPL_SQRT_ITER ( 7);
+  WEBRTC_SPL_SQRT_ITER ( 6);
+  WEBRTC_SPL_SQRT_ITER ( 5);
+  WEBRTC_SPL_SQRT_ITER ( 4);
+  WEBRTC_SPL_SQRT_ITER ( 3);
+  WEBRTC_SPL_SQRT_ITER ( 2);
+  WEBRTC_SPL_SQRT_ITER ( 1);
+  WEBRTC_SPL_SQRT_ITER ( 0);
+
+  return root >> 1;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor_arm.S b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor_arm.S
new file mode 100644
index 00000000..f44ddd46
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor_arm.S
@@ -0,0 +1,110 @@
+@
+@ Written by Wilco Dijkstra, 1996. The following email exchange establishes the
+@ license.
+@
+@ From: Wilco Dijkstra <Wilco.Dijkstra@ntlworld.com>
+@ Date: Fri, Jun 24, 2011 at 3:20 AM
+@ Subject: Re: sqrt routine
+@ To: Kevin Ma <kma@google.com>
+@ Hi Kevin,
+@ Thanks for asking. Those routines are public domain (originally posted to
+@ comp.sys.arm a long time ago), so you can use them freely for any purpose.
+@ Cheers,
+@ Wilco
+@
+@ ----- Original Message -----
+@ From: "Kevin Ma" <kma@google.com>
+@ To: <Wilco.Dijkstra@ntlworld.com>
+@ Sent: Thursday, June 23, 2011 11:44 PM
+@ Subject: Fwd: sqrt routine
+@ Hi Wilco,
+@ I saw your sqrt routine from several web sites, including
+@ http://www.finesse.demon.co.uk/steven/sqrt.html.
+@ Just wonder if there's any copyright information with your Successive
+@ approximation routines, or if I can freely use it for any purpose.
+@ Thanks.
+@ Kevin
+
+@ Minor modifications in code style for WebRTC, 2012.
+@ Output is bit-exact with the reference C code in spl_sqrt_floor.c.
+
+@ Input :             r0 32 bit unsigned integer
+@ Output:             r0 = INT (SQRT (r0)), precision is 16 bits
+@ Registers touched:  r1, r2
+
+#include "webrtc/system_wrappers/interface/asm_defines.h"
+
+GLOBAL_FUNCTION WebRtcSpl_SqrtFloor
+.align  2
+DEFINE_FUNCTION WebRtcSpl_SqrtFloor
+  mov    r1, #3 << 30
+  mov    r2, #1 << 30
+
+  @ unroll for i = 0 .. 15
+
+  cmp    r0, r2, ror #2 * 0
+  subhs  r0, r0, r2, ror #2 * 0
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 1
+  subhs  r0, r0, r2, ror #2 * 1
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 2
+  subhs  r0, r0, r2, ror #2 * 2
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 3
+  subhs  r0, r0, r2, ror #2 * 3
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 4
+  subhs  r0, r0, r2, ror #2 * 4
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 5
+  subhs  r0, r0, r2, ror #2 * 5
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 6
+  subhs  r0, r0, r2, ror #2 * 6
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 7
+  subhs  r0, r0, r2, ror #2 * 7
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 8
+  subhs  r0, r0, r2, ror #2 * 8
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 9
+  subhs  r0, r0, r2, ror #2 * 9
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 10
+  subhs  r0, r0, r2, ror #2 * 10
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 11
+  subhs  r0, r0, r2, ror #2 * 11
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 12
+  subhs  r0, r0, r2, ror #2 * 12
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 13
+  subhs  r0, r0, r2, ror #2 * 13
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 14
+  subhs  r0, r0, r2, ror #2 * 14
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 15
+  subhs  r0, r0, r2, ror #2 * 15
+  adc    r2, r1, r2, lsl #1
+
+  bic    r0, r2, #3 << 30  @ for rounding add: cmp r0, r2  adc r2, #1
+  bx lr
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor_mips.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor_mips.c
new file mode 100644
index 00000000..8716459b
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor_mips.c
@@ -0,0 +1,207 @@
+/*
+ * Written by Wilco Dijkstra, 1996. The following email exchange establishes the
+ * license.
+ *
+ * From: Wilco Dijkstra <Wilco.Dijkstra@ntlworld.com>
+ * Date: Fri, Jun 24, 2011 at 3:20 AM
+ * Subject: Re: sqrt routine
+ * To: Kevin Ma <kma@google.com>
+ * Hi Kevin,
+ * Thanks for asking. Those routines are public domain (originally posted to
+ * comp.sys.arm a long time ago), so you can use them freely for any purpose.
+ * Cheers,
+ * Wilco
+ *
+ * ----- Original Message -----
+ * From: "Kevin Ma" <kma@google.com>
+ * To: <Wilco.Dijkstra@ntlworld.com>
+ * Sent: Thursday, June 23, 2011 11:44 PM
+ * Subject: Fwd: sqrt routine
+ * Hi Wilco,
+ * I saw your sqrt routine from several web sites, including
+ * http://www.finesse.demon.co.uk/steven/sqrt.html.
+ * Just wonder if there's any copyright information with your Successive
+ * approximation routines, or if I can freely use it for any purpose.
+ * Thanks.
+ * Kevin
+ */
+
+// Minor modifications in code style for WebRTC, 2012.
+// Code optimizations for MIPS, 2013.
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+/*
+ * Algorithm:
+ * Successive approximation of the equation (root + delta) ^ 2 = N
+ * until delta < 1. If delta < 1 we have the integer part of SQRT (N).
+ * Use delta = 2^i for i = 15 .. 0.
+ *
+ * Output precision is 16 bits. Note for large input values (close to
+ * 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word)
+ * contains the MSB information (a non-sign value). Do with caution
+ * if you need to cast the output to int16_t type.
+ *
+ * If the input value is negative, it returns 0.
+ */
+
+
+int32_t WebRtcSpl_SqrtFloor(int32_t value)
+{
+  int32_t root = 0, tmp1, tmp2, tmp3, tmp4;
+
+  __asm __volatile(
+    ".set   push                                       \n\t"
+    ".set   noreorder                                  \n\t"
+
+    "lui    %[tmp1],      0x4000                       \n\t"
+    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
+    "sub    %[tmp3],      %[value],     %[tmp1]        \n\t"
+    "lui    %[tmp1],      0x1                          \n\t"
+    "or     %[tmp4],      %[root],      %[tmp1]        \n\t"
+    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
+    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
+
+    "addiu  %[tmp1],      $0,           0x4000         \n\t"
+    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
+    "sll    %[tmp1],      14                           \n\t"
+    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
+    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
+    "ori    %[tmp4],      %[root],      0x8000         \n\t"
+    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
+    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
+
+    "addiu  %[tmp1],      $0,           0x2000         \n\t"
+    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
+    "sll    %[tmp1],      13                           \n\t"
+    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
+    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
+    "ori    %[tmp4],      %[root],      0x4000         \n\t"
+    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
+    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
+
+    "addiu  %[tmp1],      $0,           0x1000         \n\t"
+    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
+    "sll    %[tmp1],      12                           \n\t"
+    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
+    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
+    "ori    %[tmp4],      %[root],      0x2000         \n\t"
+    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
+    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
+
+    "addiu  %[tmp1],      $0,           0x800          \n\t"
+    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
+    "sll    %[tmp1],      11                           \n\t"
+    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
+    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
+    "ori    %[tmp4],      %[root],      0x1000         \n\t"
+    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
+    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
+
+    "addiu  %[tmp1],      $0,           0x400          \n\t"
+    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
+    "sll    %[tmp1],      10                           \n\t"
+    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
+    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
+    "ori    %[tmp4],      %[root],      0x800          \n\t"
+    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
+    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
+
+    "addiu  %[tmp1],      $0,           0x200          \n\t"
+    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
+    "sll    %[tmp1],      9                            \n\t"
+    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
+    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
+    "ori    %[tmp4],      %[root],       0x400         \n\t"
+    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
+    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
+
+    "addiu  %[tmp1],      $0,           0x100          \n\t"
+    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
+    "sll    %[tmp1],      8                            \n\t"
+    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
+    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
+    "ori    %[tmp4],      %[root],      0x200          \n\t"
+    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
+    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
+
+    "addiu  %[tmp1],      $0,           0x80           \n\t"
+    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
+    "sll    %[tmp1],      7                            \n\t"
+    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
+    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
+    "ori    %[tmp4],      %[root],      0x100          \n\t"
+    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
+    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
+
+    "addiu  %[tmp1],      $0,           0x40           \n\t"
+    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
+    "sll    %[tmp1],      6                            \n\t"
+    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
+    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
+    "ori    %[tmp4],      %[root],      0x80           \n\t"
+    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
+    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
+
+    "addiu  %[tmp1],      $0,           0x20           \n\t"
+    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
+    "sll    %[tmp1],      5                            \n\t"
+    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
+    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
+    "ori    %[tmp4],      %[root],      0x40           \n\t"
+    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
+    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
+
+    "addiu  %[tmp1],      $0,           0x10           \n\t"
+    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
+    "sll    %[tmp1],      4                            \n\t"
+    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
+    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
+    "ori    %[tmp4],      %[root],      0x20           \n\t"
+    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
+    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
+
+    "addiu  %[tmp1],      $0,           0x8            \n\t"
+    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
+    "sll    %[tmp1],      3                            \n\t"
+    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
+    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
+    "ori    %[tmp4],      %[root],      0x10           \n\t"
+    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
+    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
+
+    "addiu  %[tmp1],      $0,           0x4            \n\t"
+    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
+    "sll    %[tmp1],      2                            \n\t"
+    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
+    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
+    "ori    %[tmp4],      %[root],      0x8            \n\t"
+    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
+    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
+
+    "addiu  %[tmp1],      $0,           0x2            \n\t"
+    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
+    "sll    %[tmp1],      1                            \n\t"
+    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
+    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
+    "ori    %[tmp4],      %[root],      0x4            \n\t"
+    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
+    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
+
+    "addiu  %[tmp1],      $0,           0x1            \n\t"
+    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
+    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
+    "ori    %[tmp4],      %[root],      0x2            \n\t"
+    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
+
+    ".set   pop                                        \n\t"
+
+    : [root] "+r" (root), [value] "+r" (value),
+      [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2),
+      [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4)
+    :
+  );
+
+  return root >> 1;
+}
+
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/splitting_filter.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/splitting_filter.c
new file mode 100644
index 00000000..36fcf355
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/splitting_filter.c
@@ -0,0 +1,208 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * This file contains the splitting filter functions.
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+#include <assert.h>
+
+// Maximum number of samples in a low/high-band frame.
+enum
+{
+    kMaxBandFrameLength = 320  // 10 ms at 64 kHz.
+};
+
+// QMF filter coefficients in Q16.
+static const uint16_t WebRtcSpl_kAllPassFilter1[3] = {6418, 36982, 57261};
+static const uint16_t WebRtcSpl_kAllPassFilter2[3] = {21333, 49062, 63010};
+
+///////////////////////////////////////////////////////////////////////////////////////////////
+// WebRtcSpl_AllPassQMF(...)
+//
+// Allpass filter used by the analysis and synthesis parts of the QMF filter.
+//
+// Input:
+//    - in_data             : Input data sequence (Q10)
+//    - data_length         : Length of data sequence (>2)
+//    - filter_coefficients : Filter coefficients (length 3, Q16)
+//
+// Input & Output:
+//    - filter_state        : Filter state (length 6, Q10).
+//
+// Output:
+//    - out_data            : Output data sequence (Q10), length equal to
+//                            |data_length|
+//
+
+void WebRtcSpl_AllPassQMF(int32_t* in_data, size_t data_length,
+                          int32_t* out_data, const uint16_t* filter_coefficients,
+                          int32_t* filter_state)
+{
+    // The procedure is to filter the input with three first order all pass filters
+    // (cascade operations).
+    //
+    //         a_3 + q^-1    a_2 + q^-1    a_1 + q^-1
+    // y[n] =  -----------   -----------   -----------   x[n]
+    //         1 + a_3q^-1   1 + a_2q^-1   1 + a_1q^-1
+    //
+    // The input vector |filter_coefficients| includes these three filter coefficients.
+    // The filter state contains the in_data state, in_data[-1], followed by
+    // the out_data state, out_data[-1]. This is repeated for each cascade.
+    // The first cascade filter will filter the |in_data| and store the output in
+    // |out_data|. The second will the take the |out_data| as input and make an
+    // intermediate storage in |in_data|, to save memory. The third, and final, cascade
+    // filter operation takes the |in_data| (which is the output from the previous cascade
+    // filter) and store the output in |out_data|.
+    // Note that the input vector values are changed during the process.
+    size_t k;
+    int32_t diff;
+    // First all-pass cascade; filter from in_data to out_data.
+
+    // Let y_i[n] indicate the output of cascade filter i (with filter coefficient a_i) at
+    // vector position n. Then the final output will be y[n] = y_3[n]
+
+    // First loop, use the states stored in memory.
+    // "diff" should be safe from wrap around since max values are 2^25
+    // diff = (x[0] - y_1[-1])
+    diff = WebRtcSpl_SubSatW32(in_data[0], filter_state[1]);
+    // y_1[0] =  x[-1] + a_1 * (x[0] - y_1[-1])
+    out_data[0] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[0], diff, filter_state[0]);
+
+    // For the remaining loops, use previous values.
+    for (k = 1; k < data_length; k++)
+    {
+        // diff = (x[n] - y_1[n-1])
+        diff = WebRtcSpl_SubSatW32(in_data[k], out_data[k - 1]);
+        // y_1[n] =  x[n-1] + a_1 * (x[n] - y_1[n-1])
+        out_data[k] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[0], diff, in_data[k - 1]);
+    }
+
+    // Update states.
+    filter_state[0] = in_data[data_length - 1]; // x[N-1], becomes x[-1] next time
+    filter_state[1] = out_data[data_length - 1]; // y_1[N-1], becomes y_1[-1] next time
+
+    // Second all-pass cascade; filter from out_data to in_data.
+    // diff = (y_1[0] - y_2[-1])
+    diff = WebRtcSpl_SubSatW32(out_data[0], filter_state[3]);
+    // y_2[0] =  y_1[-1] + a_2 * (y_1[0] - y_2[-1])
+    in_data[0] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[1], diff, filter_state[2]);
+    for (k = 1; k < data_length; k++)
+    {
+        // diff = (y_1[n] - y_2[n-1])
+        diff = WebRtcSpl_SubSatW32(out_data[k], in_data[k - 1]);
+        // y_2[0] =  y_1[-1] + a_2 * (y_1[0] - y_2[-1])
+        in_data[k] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[1], diff, out_data[k-1]);
+    }
+
+    filter_state[2] = out_data[data_length - 1]; // y_1[N-1], becomes y_1[-1] next time
+    filter_state[3] = in_data[data_length - 1]; // y_2[N-1], becomes y_2[-1] next time
+
+    // Third all-pass cascade; filter from in_data to out_data.
+    // diff = (y_2[0] - y[-1])
+    diff = WebRtcSpl_SubSatW32(in_data[0], filter_state[5]);
+    // y[0] =  y_2[-1] + a_3 * (y_2[0] - y[-1])
+    out_data[0] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[2], diff, filter_state[4]);
+    for (k = 1; k < data_length; k++)
+    {
+        // diff = (y_2[n] - y[n-1])
+        diff = WebRtcSpl_SubSatW32(in_data[k], out_data[k - 1]);
+        // y[n] =  y_2[n-1] + a_3 * (y_2[n] - y[n-1])
+        out_data[k] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[2], diff, in_data[k-1]);
+    }
+    filter_state[4] = in_data[data_length - 1]; // y_2[N-1], becomes y_2[-1] next time
+    filter_state[5] = out_data[data_length - 1]; // y[N-1], becomes y[-1] next time
+}
+
+void WebRtcSpl_AnalysisQMF(const int16_t* in_data, size_t in_data_length,
+                           int16_t* low_band, int16_t* high_band,
+                           int32_t* filter_state1, int32_t* filter_state2)
+{
+    size_t i;
+    int16_t k;
+    int32_t tmp;
+    int32_t half_in1[kMaxBandFrameLength];
+    int32_t half_in2[kMaxBandFrameLength];
+    int32_t filter1[kMaxBandFrameLength];
+    int32_t filter2[kMaxBandFrameLength];
+    const size_t band_length = in_data_length / 2;
+    assert(in_data_length % 2 == 0);
+    assert(band_length <= kMaxBandFrameLength);
+
+    // Split even and odd samples. Also shift them to Q10.
+    for (i = 0, k = 0; i < band_length; i++, k += 2)
+    {
+        half_in2[i] = WEBRTC_SPL_LSHIFT_W32((int32_t)in_data[k], 10);
+        half_in1[i] = WEBRTC_SPL_LSHIFT_W32((int32_t)in_data[k + 1], 10);
+    }
+
+    // All pass filter even and odd samples, independently.
+    WebRtcSpl_AllPassQMF(half_in1, band_length, filter1,
+                         WebRtcSpl_kAllPassFilter1, filter_state1);
+    WebRtcSpl_AllPassQMF(half_in2, band_length, filter2,
+                         WebRtcSpl_kAllPassFilter2, filter_state2);
+
+    // Take the sum and difference of filtered version of odd and even
+    // branches to get upper & lower band.
+    for (i = 0; i < band_length; i++)
+    {
+        tmp = (filter1[i] + filter2[i] + 1024) >> 11;
+        low_band[i] = WebRtcSpl_SatW32ToW16(tmp);
+
+        tmp = (filter1[i] - filter2[i] + 1024) >> 11;
+        high_band[i] = WebRtcSpl_SatW32ToW16(tmp);
+    }
+}
+
+void WebRtcSpl_SynthesisQMF(const int16_t* low_band, const int16_t* high_band,
+                            size_t band_length, int16_t* out_data,
+                            int32_t* filter_state1, int32_t* filter_state2)
+{
+    int32_t tmp;
+    int32_t half_in1[kMaxBandFrameLength];
+    int32_t half_in2[kMaxBandFrameLength];
+    int32_t filter1[kMaxBandFrameLength];
+    int32_t filter2[kMaxBandFrameLength];
+    size_t i;
+    int16_t k;
+    assert(band_length <= kMaxBandFrameLength);
+
+    // Obtain the sum and difference channels out of upper and lower-band channels.
+    // Also shift to Q10 domain.
+    for (i = 0; i < band_length; i++)
+    {
+        tmp = (int32_t)low_band[i] + (int32_t)high_band[i];
+        half_in1[i] = WEBRTC_SPL_LSHIFT_W32(tmp, 10);
+        tmp = (int32_t)low_band[i] - (int32_t)high_band[i];
+        half_in2[i] = WEBRTC_SPL_LSHIFT_W32(tmp, 10);
+    }
+
+    // all-pass filter the sum and difference channels
+    WebRtcSpl_AllPassQMF(half_in1, band_length, filter1,
+                         WebRtcSpl_kAllPassFilter2, filter_state1);
+    WebRtcSpl_AllPassQMF(half_in2, band_length, filter2,
+                         WebRtcSpl_kAllPassFilter1, filter_state2);
+
+    // The filtered signals are even and odd samples of the output. Combine
+    // them. The signals are Q10 should shift them back to Q0 and take care of
+    // saturation.
+    for (i = 0, k = 0; i < band_length; i++)
+    {
+        tmp = (filter2[i] + 512) >> 10;
+        out_data[k++] = WebRtcSpl_SatW32ToW16(tmp);
+
+        tmp = (filter1[i] + 512) >> 10;
+        out_data[k++] = WebRtcSpl_SatW32ToW16(tmp);
+    }
+
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/sqrt_of_one_minus_x_squared.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/sqrt_of_one_minus_x_squared.c
new file mode 100644
index 00000000..ff78b522
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/sqrt_of_one_minus_x_squared.c
@@ -0,0 +1,35 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_SqrtOfOneMinusXSquared().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+void WebRtcSpl_SqrtOfOneMinusXSquared(int16_t *xQ15, size_t vector_length,
+                                      int16_t *yQ15)
+{
+    int32_t sq;
+    size_t m;
+    int16_t tmp;
+
+    for (m = 0; m < vector_length; m++)
+    {
+        tmp = xQ15[m];
+        sq = tmp * tmp;  // x^2 in Q30
+        sq = 1073741823 - sq; // 1-x^2, where 1 ~= 0.99999999906 is 1073741823 in Q30
+        sq = WebRtcSpl_Sqrt(sq); // sqrt(1-x^2) in Q15
+        yQ15[m] = (int16_t)sq;
+    }
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/vector_scaling_operations.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/vector_scaling_operations.c
new file mode 100644
index 00000000..fdefd067
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/vector_scaling_operations.c
@@ -0,0 +1,165 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains implementations of the functions
+ * WebRtcSpl_VectorBitShiftW16()
+ * WebRtcSpl_VectorBitShiftW32()
+ * WebRtcSpl_VectorBitShiftW32ToW16()
+ * WebRtcSpl_ScaleVector()
+ * WebRtcSpl_ScaleVectorWithSat()
+ * WebRtcSpl_ScaleAndAddVectors()
+ * WebRtcSpl_ScaleAndAddVectorsWithRoundC()
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+void WebRtcSpl_VectorBitShiftW16(int16_t *res, size_t length,
+                                 const int16_t *in, int16_t right_shifts)
+{
+    size_t i;
+
+    if (right_shifts > 0)
+    {
+        for (i = length; i > 0; i--)
+        {
+            (*res++) = ((*in++) >> right_shifts);
+        }
+    } else
+    {
+        for (i = length; i > 0; i--)
+        {
+            (*res++) = ((*in++) << (-right_shifts));
+        }
+    }
+}
+
+void WebRtcSpl_VectorBitShiftW32(int32_t *out_vector,
+                                 size_t vector_length,
+                                 const int32_t *in_vector,
+                                 int16_t right_shifts)
+{
+    size_t i;
+
+    if (right_shifts > 0)
+    {
+        for (i = vector_length; i > 0; i--)
+        {
+            (*out_vector++) = ((*in_vector++) >> right_shifts);
+        }
+    } else
+    {
+        for (i = vector_length; i > 0; i--)
+        {
+            (*out_vector++) = ((*in_vector++) << (-right_shifts));
+        }
+    }
+}
+
+void WebRtcSpl_VectorBitShiftW32ToW16(int16_t* out, size_t length,
+                                      const int32_t* in, int right_shifts) {
+  size_t i;
+  int32_t tmp_w32;
+
+  if (right_shifts >= 0) {
+    for (i = length; i > 0; i--) {
+      tmp_w32 = (*in++) >> right_shifts;
+      (*out++) = WebRtcSpl_SatW32ToW16(tmp_w32);
+    }
+  } else {
+    int left_shifts = -right_shifts;
+    for (i = length; i > 0; i--) {
+      tmp_w32 = (*in++) << left_shifts;
+      (*out++) = WebRtcSpl_SatW32ToW16(tmp_w32);
+    }
+  }
+}
+
+void WebRtcSpl_ScaleVector(const int16_t *in_vector, int16_t *out_vector,
+                           int16_t gain, size_t in_vector_length,
+                           int16_t right_shifts)
+{
+    // Performs vector operation: out_vector = (gain*in_vector)>>right_shifts
+    size_t i;
+    const int16_t *inptr;
+    int16_t *outptr;
+
+    inptr = in_vector;
+    outptr = out_vector;
+
+    for (i = 0; i < in_vector_length; i++)
+    {
+      *outptr++ = (int16_t)((*inptr++ * gain) >> right_shifts);
+    }
+}
+
+void WebRtcSpl_ScaleVectorWithSat(const int16_t *in_vector, int16_t *out_vector,
+                                 int16_t gain, size_t in_vector_length,
+                                 int16_t right_shifts)
+{
+    // Performs vector operation: out_vector = (gain*in_vector)>>right_shifts
+    size_t i;
+    const int16_t *inptr;
+    int16_t *outptr;
+
+    inptr = in_vector;
+    outptr = out_vector;
+
+    for (i = 0; i < in_vector_length; i++) {
+      *outptr++ = WebRtcSpl_SatW32ToW16((*inptr++ * gain) >> right_shifts);
+    }
+}
+
+void WebRtcSpl_ScaleAndAddVectors(const int16_t *in1, int16_t gain1, int shift1,
+                                  const int16_t *in2, int16_t gain2, int shift2,
+                                  int16_t *out, size_t vector_length)
+{
+    // Performs vector operation: out = (gain1*in1)>>shift1 + (gain2*in2)>>shift2
+    size_t i;
+    const int16_t *in1ptr;
+    const int16_t *in2ptr;
+    int16_t *outptr;
+
+    in1ptr = in1;
+    in2ptr = in2;
+    outptr = out;
+
+    for (i = 0; i < vector_length; i++)
+    {
+      *outptr++ = (int16_t)((gain1 * *in1ptr++) >> shift1) +
+          (int16_t)((gain2 * *in2ptr++) >> shift2);
+    }
+}
+
+// C version of WebRtcSpl_ScaleAndAddVectorsWithRound() for generic platforms.
+int WebRtcSpl_ScaleAndAddVectorsWithRoundC(const int16_t* in_vector1,
+                                           int16_t in_vector1_scale,
+                                           const int16_t* in_vector2,
+                                           int16_t in_vector2_scale,
+                                           int right_shifts,
+                                           int16_t* out_vector,
+                                           size_t length) {
+  size_t i = 0;
+  int round_value = (1 << right_shifts) >> 1;
+
+  if (in_vector1 == NULL || in_vector2 == NULL || out_vector == NULL ||
+      length == 0 || right_shifts < 0) {
+    return -1;
+  }
+
+  for (i = 0; i < length; i++) {
+    out_vector[i] = (int16_t)((
+        in_vector1[i] * in_vector1_scale + in_vector2[i] * in_vector2_scale +
+        round_value) >> right_shifts);
+  }
+
+  return 0;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/vector_scaling_operations_mips.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/vector_scaling_operations_mips.c
new file mode 100644
index 00000000..dd73eeae
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/vector_scaling_operations_mips.c
@@ -0,0 +1,57 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains implementations of the functions
+ * WebRtcSpl_ScaleAndAddVectorsWithRound_mips()
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+int WebRtcSpl_ScaleAndAddVectorsWithRound_mips(const int16_t* in_vector1,
+                                               int16_t in_vector1_scale,
+                                               const int16_t* in_vector2,
+                                               int16_t in_vector2_scale,
+                                               int right_shifts,
+                                               int16_t* out_vector,
+                                               size_t length) {
+  int16_t r0 = 0, r1 = 0;
+  int16_t *in1 = (int16_t*)in_vector1;
+  int16_t *in2 = (int16_t*)in_vector2;
+  int16_t *out = out_vector;
+  size_t i = 0;
+  int value32 = 0;
+
+  if (in_vector1 == NULL || in_vector2 == NULL || out_vector == NULL ||
+      length == 0 || right_shifts < 0) {
+    return -1;
+  }
+  for (i = 0; i < length; i++) {
+    __asm __volatile (
+      "lh         %[r0],          0(%[in1])                               \n\t"
+      "lh         %[r1],          0(%[in2])                               \n\t"
+      "mult       %[r0],          %[in_vector1_scale]                     \n\t"
+      "madd       %[r1],          %[in_vector2_scale]                     \n\t"
+      "extrv_r.w  %[value32],     $ac0,               %[right_shifts]     \n\t"
+      "addiu      %[in1],         %[in1],             2                   \n\t"
+      "addiu      %[in2],         %[in2],             2                   \n\t"
+      "sh         %[value32],     0(%[out])                               \n\t"
+      "addiu      %[out],         %[out],             2                   \n\t"
+      : [value32] "=&r" (value32), [out] "+r" (out), [in1] "+r" (in1),
+        [in2] "+r" (in2), [r0] "=&r" (r0), [r1] "=&r" (r1)
+      : [in_vector1_scale] "r" (in_vector1_scale),
+        [in_vector2_scale] "r" (in_vector2_scale),
+        [right_shifts] "r" (right_shifts)
+      : "hi", "lo", "memory"
+    );
+  }
+  return 0;
+}
diff --git a/third_party/webrtc/src/webrtc/common_audio/wav_file.h b/third_party/webrtc/src/webrtc/common_audio/wav_file.h
new file mode 100644
index 00000000..2eadd3f7
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/common_audio/wav_file.h
@@ -0,0 +1,115 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_COMMON_AUDIO_WAV_FILE_H_
+#define WEBRTC_COMMON_AUDIO_WAV_FILE_H_
+
+#ifdef __cplusplus
+
+#include <stdint.h>
+#include <cstddef>
+#include <string>
+
+#include "webrtc/base/constructormagic.h"
+
+namespace webrtc {
+
+// Interface to provide access to WAV file parameters.
+class WavFile {
+ public:
+  virtual ~WavFile() {}
+
+  virtual int sample_rate() const = 0;
+  virtual int num_channels() const = 0;
+  virtual uint32_t num_samples() const = 0;
+};
+
+// Simple C++ class for writing 16-bit PCM WAV files. All error handling is
+// by calls to RTC_CHECK(), making it unsuitable for anything but debug code.
+class WavWriter final : public WavFile {
+ public:
+  // Open a new WAV file for writing.
+  WavWriter(const std::string& filename, int sample_rate, int num_channels);
+
+  // Close the WAV file, after writing its header.
+  ~WavWriter();
+
+  // Write additional samples to the file. Each sample is in the range
+  // [-32768,32767], and there must be the previously specified number of
+  // interleaved channels.
+  void WriteSamples(const float* samples, size_t num_samples);
+  void WriteSamples(const int16_t* samples, size_t num_samples);
+
+  int sample_rate() const override { return sample_rate_; }
+  int num_channels() const override { return num_channels_; }
+  uint32_t num_samples() const override { return num_samples_; }
+
+ private:
+  void Close();
+  const int sample_rate_;
+  const int num_channels_;
+  uint32_t num_samples_;  // Total number of samples written to file.
+  FILE* file_handle_;  // Output file, owned by this class
+
+  RTC_DISALLOW_COPY_AND_ASSIGN(WavWriter);
+};
+
+// Follows the conventions of WavWriter.
+class WavReader final : public WavFile {
+ public:
+  // Opens an existing WAV file for reading.
+  explicit WavReader(const std::string& filename);
+
+  // Close the WAV file.
+  ~WavReader();
+
+  // Returns the number of samples read. If this is less than requested,
+  // verifies that the end of the file was reached.
+  size_t ReadSamples(size_t num_samples, float* samples);
+  size_t ReadSamples(size_t num_samples, int16_t* samples);
+
+  int sample_rate() const override { return sample_rate_; }
+  int num_channels() const override { return num_channels_; }
+  uint32_t num_samples() const override { return num_samples_; }
+
+ private:
+  void Close();
+  int sample_rate_;
+  int num_channels_;
+  uint32_t num_samples_;  // Total number of samples in the file.
+  uint32_t num_samples_remaining_;
+  FILE* file_handle_;  // Input file, owned by this class.
+
+  RTC_DISALLOW_COPY_AND_ASSIGN(WavReader);
+};
+
+}  // namespace webrtc
+
+extern "C" {
+#endif  // __cplusplus
+
+// C wrappers for the WavWriter class.
+typedef struct rtc_WavWriter rtc_WavWriter;
+rtc_WavWriter* rtc_WavOpen(const char* filename,
+                           int sample_rate,
+                           int num_channels);
+void rtc_WavClose(rtc_WavWriter* wf);
+void rtc_WavWriteSamples(rtc_WavWriter* wf,
+                         const float* samples,
+                         size_t num_samples);
+int rtc_WavSampleRate(const rtc_WavWriter* wf);
+int rtc_WavNumChannels(const rtc_WavWriter* wf);
+uint32_t rtc_WavNumSamples(const rtc_WavWriter* wf);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // WEBRTC_COMMON_AUDIO_WAV_FILE_H_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_common.h b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_common.h
new file mode 100644
index 00000000..1e24ca99
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_common.h
@@ -0,0 +1,32 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_COMMON_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_COMMON_H_
+
+#include "webrtc/typedefs.h"
+
+#ifdef _MSC_VER /* visual c++ */
+#define ALIGN16_BEG __declspec(align(16))
+#define ALIGN16_END
+#else /* gcc or icc */
+#define ALIGN16_BEG
+#define ALIGN16_END __attribute__((aligned(16)))
+#endif
+
+extern ALIGN16_BEG const float ALIGN16_END WebRtcAec_sqrtHanning[65];
+extern ALIGN16_BEG const float ALIGN16_END WebRtcAec_weightCurve[65];
+extern ALIGN16_BEG const float ALIGN16_END WebRtcAec_overDriveCurve[65];
+extern const float WebRtcAec_kExtendedSmoothingCoefficients[2][2];
+extern const float WebRtcAec_kNormalSmoothingCoefficients[2][2];
+extern const float WebRtcAec_kMinFarendPSD;
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_COMMON_H_
+
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core.c
new file mode 100644
index 00000000..b2162ac0
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core.c
@@ -0,0 +1,1929 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * The core AEC algorithm, which is presented with time-aligned signals.
+ */
+
+#include "webrtc/modules/audio_processing/aec/aec_core.h"
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+#include <stdio.h>
+#endif
+
+#include <assert.h>
+#include <math.h>
+#include <stddef.h>  // size_t
+#include <stdlib.h>
+#include <string.h>
+
+#include "webrtc/common_audio/ring_buffer.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/aec/aec_common.h"
+#include "webrtc/modules/audio_processing/aec/aec_core_internal.h"
+#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
+#include "webrtc/modules/audio_processing/logging/aec_logging.h"
+#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h"
+#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
+#include "webrtc/typedefs.h"
+
+
+// Buffer size (samples)
+static const size_t kBufSizePartitions = 250;  // 1 second of audio in 16 kHz.
+
+// Metrics
+static const int subCountLen = 4;
+static const int countLen = 50;
+static const int kDelayMetricsAggregationWindow = 1250;  // 5 seconds at 16 kHz.
+
+// Quantities to control H band scaling for SWB input
+static const int flagHbandCn = 1;  // flag for adding comfort noise in H band
+static const float cnScaleHband =
+    (float)0.4;  // scale for comfort noise in H band
+// Initial bin for averaging nlp gain in low band
+static const int freqAvgIc = PART_LEN / 2;
+
+// Matlab code to produce table:
+// win = sqrt(hanning(63)); win = [0 ; win(1:32)];
+// fprintf(1, '\t%.14f, %.14f, %.14f,\n', win);
+ALIGN16_BEG const float ALIGN16_END WebRtcAec_sqrtHanning[65] = {
+    0.00000000000000f, 0.02454122852291f, 0.04906767432742f, 0.07356456359967f,
+    0.09801714032956f, 0.12241067519922f, 0.14673047445536f, 0.17096188876030f,
+    0.19509032201613f, 0.21910124015687f, 0.24298017990326f, 0.26671275747490f,
+    0.29028467725446f, 0.31368174039889f, 0.33688985339222f, 0.35989503653499f,
+    0.38268343236509f, 0.40524131400499f, 0.42755509343028f, 0.44961132965461f,
+    0.47139673682600f, 0.49289819222978f, 0.51410274419322f, 0.53499761988710f,
+    0.55557023301960f, 0.57580819141785f, 0.59569930449243f, 0.61523159058063f,
+    0.63439328416365f, 0.65317284295378f, 0.67155895484702f, 0.68954054473707f,
+    0.70710678118655f, 0.72424708295147f, 0.74095112535496f, 0.75720884650648f,
+    0.77301045336274f, 0.78834642762661f, 0.80320753148064f, 0.81758481315158f,
+    0.83146961230255f, 0.84485356524971f, 0.85772861000027f, 0.87008699110871f,
+    0.88192126434835f, 0.89322430119552f, 0.90398929312344f, 0.91420975570353f,
+    0.92387953251129f, 0.93299279883474f, 0.94154406518302f, 0.94952818059304f,
+    0.95694033573221f, 0.96377606579544f, 0.97003125319454f, 0.97570213003853f,
+    0.98078528040323f, 0.98527764238894f, 0.98917650996478f, 0.99247953459871f,
+    0.99518472667220f, 0.99729045667869f, 0.99879545620517f, 0.99969881869620f,
+    1.00000000000000f};
+
+// Matlab code to produce table:
+// weightCurve = [0 ; 0.3 * sqrt(linspace(0,1,64))' + 0.1];
+// fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', weightCurve);
+ALIGN16_BEG const float ALIGN16_END WebRtcAec_weightCurve[65] = {
+    0.0000f, 0.1000f, 0.1378f, 0.1535f, 0.1655f, 0.1756f, 0.1845f, 0.1926f,
+    0.2000f, 0.2069f, 0.2134f, 0.2195f, 0.2254f, 0.2309f, 0.2363f, 0.2414f,
+    0.2464f, 0.2512f, 0.2558f, 0.2604f, 0.2648f, 0.2690f, 0.2732f, 0.2773f,
+    0.2813f, 0.2852f, 0.2890f, 0.2927f, 0.2964f, 0.3000f, 0.3035f, 0.3070f,
+    0.3104f, 0.3138f, 0.3171f, 0.3204f, 0.3236f, 0.3268f, 0.3299f, 0.3330f,
+    0.3360f, 0.3390f, 0.3420f, 0.3449f, 0.3478f, 0.3507f, 0.3535f, 0.3563f,
+    0.3591f, 0.3619f, 0.3646f, 0.3673f, 0.3699f, 0.3726f, 0.3752f, 0.3777f,
+    0.3803f, 0.3828f, 0.3854f, 0.3878f, 0.3903f, 0.3928f, 0.3952f, 0.3976f,
+    0.4000f};
+
+// Matlab code to produce table:
+// overDriveCurve = [sqrt(linspace(0,1,65))' + 1];
+// fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', overDriveCurve);
+ALIGN16_BEG const float ALIGN16_END WebRtcAec_overDriveCurve[65] = {
+    1.0000f, 1.1250f, 1.1768f, 1.2165f, 1.2500f, 1.2795f, 1.3062f, 1.3307f,
+    1.3536f, 1.3750f, 1.3953f, 1.4146f, 1.4330f, 1.4507f, 1.4677f, 1.4841f,
+    1.5000f, 1.5154f, 1.5303f, 1.5449f, 1.5590f, 1.5728f, 1.5863f, 1.5995f,
+    1.6124f, 1.6250f, 1.6374f, 1.6495f, 1.6614f, 1.6731f, 1.6847f, 1.6960f,
+    1.7071f, 1.7181f, 1.7289f, 1.7395f, 1.7500f, 1.7603f, 1.7706f, 1.7806f,
+    1.7906f, 1.8004f, 1.8101f, 1.8197f, 1.8292f, 1.8385f, 1.8478f, 1.8570f,
+    1.8660f, 1.8750f, 1.8839f, 1.8927f, 1.9014f, 1.9100f, 1.9186f, 1.9270f,
+    1.9354f, 1.9437f, 1.9520f, 1.9601f, 1.9682f, 1.9763f, 1.9843f, 1.9922f,
+    2.0000f};
+
+// Delay Agnostic AEC parameters, still under development and may change.
+static const float kDelayQualityThresholdMax = 0.07f;
+static const float kDelayQualityThresholdMin = 0.01f;
+static const int kInitialShiftOffset = 5;
+#if !defined(WEBRTC_ANDROID)
+static const int kDelayCorrectionStart = 1500;  // 10 ms chunks
+#endif
+
+// Target suppression levels for nlp modes.
+// log{0.001, 0.00001, 0.00000001}
+static const float kTargetSupp[3] = {-6.9f, -11.5f, -18.4f};
+
+// Two sets of parameters, one for the extended filter mode.
+static const float kExtendedMinOverDrive[3] = {3.0f, 6.0f, 15.0f};
+static const float kNormalMinOverDrive[3] = {1.0f, 2.0f, 5.0f};
+const float WebRtcAec_kExtendedSmoothingCoefficients[2][2] = {{0.9f, 0.1f},
+                                                              {0.92f, 0.08f}};
+const float WebRtcAec_kNormalSmoothingCoefficients[2][2] = {{0.9f, 0.1f},
+                                                            {0.93f, 0.07f}};
+
+// Number of partitions forming the NLP's "preferred" bands.
+enum {
+  kPrefBandSize = 24
+};
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+extern int webrtc_aec_instance_count;
+#endif
+
+WebRtcAecFilterFar WebRtcAec_FilterFar;
+WebRtcAecScaleErrorSignal WebRtcAec_ScaleErrorSignal;
+WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation;
+WebRtcAecOverdriveAndSuppress WebRtcAec_OverdriveAndSuppress;
+WebRtcAecComfortNoise WebRtcAec_ComfortNoise;
+WebRtcAecSubBandCoherence WebRtcAec_SubbandCoherence;
+
+__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) {
+  return aRe * bRe - aIm * bIm;
+}
+
+__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) {
+  return aRe * bIm + aIm * bRe;
+}
+
+static int CmpFloat(const void* a, const void* b) {
+  const float* da = (const float*)a;
+  const float* db = (const float*)b;
+
+  return (*da > *db) - (*da < *db);
+}
+
+static void FilterFar(AecCore* aec, float yf[2][PART_LEN1]) {
+  int i;
+  for (i = 0; i < aec->num_partitions; i++) {
+    int j;
+    int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
+    int pos = i * PART_LEN1;
+    // Check for wrap
+    if (i + aec->xfBufBlockPos >= aec->num_partitions) {
+      xPos -= aec->num_partitions * (PART_LEN1);
+    }
+
+    for (j = 0; j < PART_LEN1; j++) {
+      yf[0][j] += MulRe(aec->xfBuf[0][xPos + j],
+                        aec->xfBuf[1][xPos + j],
+                        aec->wfBuf[0][pos + j],
+                        aec->wfBuf[1][pos + j]);
+      yf[1][j] += MulIm(aec->xfBuf[0][xPos + j],
+                        aec->xfBuf[1][xPos + j],
+                        aec->wfBuf[0][pos + j],
+                        aec->wfBuf[1][pos + j]);
+    }
+  }
+}
+
+static void ScaleErrorSignal(AecCore* aec, float ef[2][PART_LEN1]) {
+  const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
+  const float error_threshold = aec->extended_filter_enabled
+                                    ? kExtendedErrorThreshold
+                                    : aec->normal_error_threshold;
+  int i;
+  float abs_ef;
+  for (i = 0; i < (PART_LEN1); i++) {
+    ef[0][i] /= (aec->xPow[i] + 1e-10f);
+    ef[1][i] /= (aec->xPow[i] + 1e-10f);
+    abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
+
+    if (abs_ef > error_threshold) {
+      abs_ef = error_threshold / (abs_ef + 1e-10f);
+      ef[0][i] *= abs_ef;
+      ef[1][i] *= abs_ef;
+    }
+
+    // Stepsize factor
+    ef[0][i] *= mu;
+    ef[1][i] *= mu;
+  }
+}
+
+// Time-unconstrined filter adaptation.
+// TODO(andrew): consider for a low-complexity mode.
+// static void FilterAdaptationUnconstrained(AecCore* aec, float *fft,
+//                                          float ef[2][PART_LEN1]) {
+//  int i, j;
+//  for (i = 0; i < aec->num_partitions; i++) {
+//    int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1);
+//    int pos;
+//    // Check for wrap
+//    if (i + aec->xfBufBlockPos >= aec->num_partitions) {
+//      xPos -= aec->num_partitions * PART_LEN1;
+//    }
+//
+//    pos = i * PART_LEN1;
+//
+//    for (j = 0; j < PART_LEN1; j++) {
+//      aec->wfBuf[0][pos + j] += MulRe(aec->xfBuf[0][xPos + j],
+//                                      -aec->xfBuf[1][xPos + j],
+//                                      ef[0][j], ef[1][j]);
+//      aec->wfBuf[1][pos + j] += MulIm(aec->xfBuf[0][xPos + j],
+//                                      -aec->xfBuf[1][xPos + j],
+//                                      ef[0][j], ef[1][j]);
+//    }
+//  }
+//}
+
+static void FilterAdaptation(AecCore* aec, float* fft, float ef[2][PART_LEN1]) {
+  int i, j;
+  for (i = 0; i < aec->num_partitions; i++) {
+    int xPos = (i + aec->xfBufBlockPos) * (PART_LEN1);
+    int pos;
+    // Check for wrap
+    if (i + aec->xfBufBlockPos >= aec->num_partitions) {
+      xPos -= aec->num_partitions * PART_LEN1;
+    }
+
+    pos = i * PART_LEN1;
+
+    for (j = 0; j < PART_LEN; j++) {
+
+      fft[2 * j] = MulRe(aec->xfBuf[0][xPos + j],
+                         -aec->xfBuf[1][xPos + j],
+                         ef[0][j],
+                         ef[1][j]);
+      fft[2 * j + 1] = MulIm(aec->xfBuf[0][xPos + j],
+                             -aec->xfBuf[1][xPos + j],
+                             ef[0][j],
+                             ef[1][j]);
+    }
+    fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN],
+                   -aec->xfBuf[1][xPos + PART_LEN],
+                   ef[0][PART_LEN],
+                   ef[1][PART_LEN]);
+
+    aec_rdft_inverse_128(fft);
+    memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
+
+    // fft scaling
+    {
+      float scale = 2.0f / PART_LEN2;
+      for (j = 0; j < PART_LEN; j++) {
+        fft[j] *= scale;
+      }
+    }
+    aec_rdft_forward_128(fft);
+
+    aec->wfBuf[0][pos] += fft[0];
+    aec->wfBuf[0][pos + PART_LEN] += fft[1];
+
+    for (j = 1; j < PART_LEN; j++) {
+      aec->wfBuf[0][pos + j] += fft[2 * j];
+      aec->wfBuf[1][pos + j] += fft[2 * j + 1];
+    }
+  }
+}
+
+static void OverdriveAndSuppress(AecCore* aec,
+                                 float hNl[PART_LEN1],
+                                 const float hNlFb,
+                                 float efw[2][PART_LEN1]) {
+  int i;
+  for (i = 0; i < PART_LEN1; i++) {
+    // Weight subbands
+    if (hNl[i] > hNlFb) {
+      hNl[i] = WebRtcAec_weightCurve[i] * hNlFb +
+               (1 - WebRtcAec_weightCurve[i]) * hNl[i];
+    }
+    hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]);
+
+    // Suppress error signal
+    efw[0][i] *= hNl[i];
+    efw[1][i] *= hNl[i];
+
+    // Ooura fft returns incorrect sign on imaginary component. It matters here
+    // because we are making an additive change with comfort noise.
+    efw[1][i] *= -1;
+  }
+}
+
+static int PartitionDelay(const AecCore* aec) {
+  // Measures the energy in each filter partition and returns the partition with
+  // highest energy.
+  // TODO(bjornv): Spread computational cost by computing one partition per
+  // block?
+  float wfEnMax = 0;
+  int i;
+  int delay = 0;
+
+  for (i = 0; i < aec->num_partitions; i++) {
+    int j;
+    int pos = i * PART_LEN1;
+    float wfEn = 0;
+    for (j = 0; j < PART_LEN1; j++) {
+      wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] +
+          aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j];
+    }
+
+    if (wfEn > wfEnMax) {
+      wfEnMax = wfEn;
+      delay = i;
+    }
+  }
+  return delay;
+}
+
+// Threshold to protect against the ill-effects of a zero far-end.
+const float WebRtcAec_kMinFarendPSD = 15;
+
+// Updates the following smoothed  Power Spectral Densities (PSD):
+//  - sd  : near-end
+//  - se  : residual echo
+//  - sx  : far-end
+//  - sde : cross-PSD of near-end and residual echo
+//  - sxd : cross-PSD of near-end and far-end
+//
+// In addition to updating the PSDs, also the filter diverge state is determined
+// upon actions are taken.
+static void SmoothedPSD(AecCore* aec,
+                        float efw[2][PART_LEN1],
+                        float dfw[2][PART_LEN1],
+                        float xfw[2][PART_LEN1]) {
+  // Power estimate smoothing coefficients.
+  const float* ptrGCoh = aec->extended_filter_enabled
+      ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1]
+      : WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1];
+  int i;
+  float sdSum = 0, seSum = 0;
+
+  for (i = 0; i < PART_LEN1; i++) {
+    aec->sd[i] = ptrGCoh[0] * aec->sd[i] +
+                 ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
+    aec->se[i] = ptrGCoh[0] * aec->se[i] +
+                 ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
+    // We threshold here to protect against the ill-effects of a zero farend.
+    // The threshold is not arbitrarily chosen, but balances protection and
+    // adverse interaction with the algorithm's tuning.
+    // TODO(bjornv): investigate further why this is so sensitive.
+    aec->sx[i] =
+        ptrGCoh[0] * aec->sx[i] +
+        ptrGCoh[1] * WEBRTC_SPL_MAX(
+            xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i],
+            WebRtcAec_kMinFarendPSD);
+
+    aec->sde[i][0] =
+        ptrGCoh[0] * aec->sde[i][0] +
+        ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);
+    aec->sde[i][1] =
+        ptrGCoh[0] * aec->sde[i][1] +
+        ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);
+
+    aec->sxd[i][0] =
+        ptrGCoh[0] * aec->sxd[i][0] +
+        ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);
+    aec->sxd[i][1] =
+        ptrGCoh[0] * aec->sxd[i][1] +
+        ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);
+
+    sdSum += aec->sd[i];
+    seSum += aec->se[i];
+  }
+
+  // Divergent filter safeguard.
+  aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum;
+
+  if (aec->divergeState)
+    memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1);
+
+  // Reset if error is significantly larger than nearend (13 dB).
+  if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum))
+    memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
+}
+
+// Window time domain data to be used by the fft.
+__inline static void WindowData(float* x_windowed, const float* x) {
+  int i;
+  for (i = 0; i < PART_LEN; i++) {
+    x_windowed[i] = x[i] * WebRtcAec_sqrtHanning[i];
+    x_windowed[PART_LEN + i] =
+        x[PART_LEN + i] * WebRtcAec_sqrtHanning[PART_LEN - i];
+  }
+}
+
+// Puts fft output data into a complex valued array.
+__inline static void StoreAsComplex(const float* data,
+                                    float data_complex[2][PART_LEN1]) {
+  int i;
+  data_complex[0][0] = data[0];
+  data_complex[1][0] = 0;
+  for (i = 1; i < PART_LEN; i++) {
+    data_complex[0][i] = data[2 * i];
+    data_complex[1][i] = data[2 * i + 1];
+  }
+  data_complex[0][PART_LEN] = data[1];
+  data_complex[1][PART_LEN] = 0;
+}
+
+static void SubbandCoherence(AecCore* aec,
+                             float efw[2][PART_LEN1],
+                             float xfw[2][PART_LEN1],
+                             float* fft,
+                             float* cohde,
+                             float* cohxd) {
+  float dfw[2][PART_LEN1];
+  int i;
+
+  if (aec->delayEstCtr == 0)
+    aec->delayIdx = PartitionDelay(aec);
+
+  // Use delayed far.
+  memcpy(xfw,
+         aec->xfwBuf + aec->delayIdx * PART_LEN1,
+         sizeof(xfw[0][0]) * 2 * PART_LEN1);
+
+  // Windowed near fft
+  WindowData(fft, aec->dBuf);
+  aec_rdft_forward_128(fft);
+  StoreAsComplex(fft, dfw);
+
+  // Windowed error fft
+  WindowData(fft, aec->eBuf);
+  aec_rdft_forward_128(fft);
+  StoreAsComplex(fft, efw);
+
+  SmoothedPSD(aec, efw, dfw, xfw);
+
+  // Subband coherence
+  for (i = 0; i < PART_LEN1; i++) {
+    cohde[i] =
+        (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) /
+        (aec->sd[i] * aec->se[i] + 1e-10f);
+    cohxd[i] =
+        (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) /
+        (aec->sx[i] * aec->sd[i] + 1e-10f);
+  }
+}
+
+static void GetHighbandGain(const float* lambda, float* nlpGainHband) {
+  int i;
+
+  nlpGainHband[0] = (float)0.0;
+  for (i = freqAvgIc; i < PART_LEN1 - 1; i++) {
+    nlpGainHband[0] += lambda[i];
+  }
+  nlpGainHband[0] /= (float)(PART_LEN1 - 1 - freqAvgIc);
+}
+
+static void ComfortNoise(AecCore* aec,
+                         float efw[2][PART_LEN1],
+                         complex_t* comfortNoiseHband,
+                         const float* noisePow,
+                         const float* lambda) {
+  int i, num;
+  float rand[PART_LEN];
+  float noise, noiseAvg, tmp, tmpAvg;
+  int16_t randW16[PART_LEN];
+  complex_t u[PART_LEN1];
+
+  const float pi2 = 6.28318530717959f;
+
+  // Generate a uniform random array on [0 1]
+  WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed);
+  for (i = 0; i < PART_LEN; i++) {
+    rand[i] = ((float)randW16[i]) / 32768;
+  }
+
+  // Reject LF noise
+  u[0][0] = 0;
+  u[0][1] = 0;
+  for (i = 1; i < PART_LEN1; i++) {
+    tmp = pi2 * rand[i - 1];
+
+    noise = sqrtf(noisePow[i]);
+    u[i][0] = noise * cosf(tmp);
+    u[i][1] = -noise * sinf(tmp);
+  }
+  u[PART_LEN][1] = 0;
+
+  for (i = 0; i < PART_LEN1; i++) {
+    // This is the proper weighting to match the background noise power
+    tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));
+    // tmp = 1 - lambda[i];
+    efw[0][i] += tmp * u[i][0];
+    efw[1][i] += tmp * u[i][1];
+  }
+
+  // For H band comfort noise
+  // TODO: don't compute noise and "tmp" twice. Use the previous results.
+  noiseAvg = 0.0;
+  tmpAvg = 0.0;
+  num = 0;
+  if (aec->num_bands > 1 && flagHbandCn == 1) {
+
+    // average noise scale
+    // average over second half of freq spectrum (i.e., 4->8khz)
+    // TODO: we shouldn't need num. We know how many elements we're summing.
+    for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
+      num++;
+      noiseAvg += sqrtf(noisePow[i]);
+    }
+    noiseAvg /= (float)num;
+
+    // average nlp scale
+    // average over second half of freq spectrum (i.e., 4->8khz)
+    // TODO: we shouldn't need num. We know how many elements we're summing.
+    num = 0;
+    for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
+      num++;
+      tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));
+    }
+    tmpAvg /= (float)num;
+
+    // Use average noise for H band
+    // TODO: we should probably have a new random vector here.
+    // Reject LF noise
+    u[0][0] = 0;
+    u[0][1] = 0;
+    for (i = 1; i < PART_LEN1; i++) {
+      tmp = pi2 * rand[i - 1];
+
+      // Use average noise for H band
+      u[i][0] = noiseAvg * (float)cos(tmp);
+      u[i][1] = -noiseAvg * (float)sin(tmp);
+    }
+    u[PART_LEN][1] = 0;
+
+    for (i = 0; i < PART_LEN1; i++) {
+      // Use average NLP weight for H band
+      comfortNoiseHband[i][0] = tmpAvg * u[i][0];
+      comfortNoiseHband[i][1] = tmpAvg * u[i][1];
+    }
+  }
+}
+
+static void InitLevel(PowerLevel* level) {
+  const float kBigFloat = 1E17f;
+
+  level->averagelevel = 0;
+  level->framelevel = 0;
+  level->minlevel = kBigFloat;
+  level->frsum = 0;
+  level->sfrsum = 0;
+  level->frcounter = 0;
+  level->sfrcounter = 0;
+}
+
+static void InitStats(Stats* stats) {
+  stats->instant = kOffsetLevel;
+  stats->average = kOffsetLevel;
+  stats->max = kOffsetLevel;
+  stats->min = kOffsetLevel * (-1);
+  stats->sum = 0;
+  stats->hisum = 0;
+  stats->himean = kOffsetLevel;
+  stats->counter = 0;
+  stats->hicounter = 0;
+}
+
+static void InitMetrics(AecCore* self) {
+  self->stateCounter = 0;
+  InitLevel(&self->farlevel);
+  InitLevel(&self->nearlevel);
+  InitLevel(&self->linoutlevel);
+  InitLevel(&self->nlpoutlevel);
+
+  InitStats(&self->erl);
+  InitStats(&self->erle);
+  InitStats(&self->aNlp);
+  InitStats(&self->rerl);
+}
+
+static void UpdateLevel(PowerLevel* level, float in[2][PART_LEN1]) {
+  // Do the energy calculation in the frequency domain. The FFT is performed on
+  // a segment of PART_LEN2 samples due to overlap, but we only want the energy
+  // of half that data (the last PART_LEN samples). Parseval's relation states
+  // that the energy is preserved according to
+  //
+  // \sum_{n=0}^{N-1} |x(n)|^2 = 1/N * \sum_{n=0}^{N-1} |X(n)|^2
+  //                           = ENERGY,
+  //
+  // where N = PART_LEN2. Since we are only interested in calculating the energy
+  // for the last PART_LEN samples we approximate by calculating ENERGY and
+  // divide by 2,
+  //
+  // \sum_{n=N/2}^{N-1} |x(n)|^2 ~= ENERGY / 2
+  //
+  // Since we deal with real valued time domain signals we only store frequency
+  // bins [0, PART_LEN], which is what |in| consists of. To calculate ENERGY we
+  // need to add the contribution from the missing part in
+  // [PART_LEN+1, PART_LEN2-1]. These values are, up to a phase shift, identical
+  // with the values in [1, PART_LEN-1], hence multiply those values by 2. This
+  // is the values in the for loop below, but multiplication by 2 and division
+  // by 2 cancel.
+
+  // TODO(bjornv): Investigate reusing energy calculations performed at other
+  // places in the code.
+  int k = 1;
+  // Imaginary parts are zero at end points and left out of the calculation.
+  float energy = (in[0][0] * in[0][0]) / 2;
+  energy += (in[0][PART_LEN] * in[0][PART_LEN]) / 2;
+
+  for (k = 1; k < PART_LEN; k++) {
+    energy += (in[0][k] * in[0][k] + in[1][k] * in[1][k]);
+  }
+  energy /= PART_LEN2;
+
+  level->sfrsum += energy;
+  level->sfrcounter++;
+
+  if (level->sfrcounter > subCountLen) {
+    level->framelevel = level->sfrsum / (subCountLen * PART_LEN);
+    level->sfrsum = 0;
+    level->sfrcounter = 0;
+    if (level->framelevel > 0) {
+      if (level->framelevel < level->minlevel) {
+        level->minlevel = level->framelevel;  // New minimum.
+      } else {
+        level->minlevel *= (1 + 0.001f);  // Small increase.
+      }
+    }
+    level->frcounter++;
+    level->frsum += level->framelevel;
+    if (level->frcounter > countLen) {
+      level->averagelevel = level->frsum / countLen;
+      level->frsum = 0;
+      level->frcounter = 0;
+    }
+  }
+}
+
+static void UpdateMetrics(AecCore* aec) {
+  float dtmp, dtmp2;
+
+  const float actThresholdNoisy = 8.0f;
+  const float actThresholdClean = 40.0f;
+  const float safety = 0.99995f;
+  const float noisyPower = 300000.0f;
+
+  float actThreshold;
+  float echo, suppressedEcho;
+
+  if (aec->echoState) {  // Check if echo is likely present
+    aec->stateCounter++;
+  }
+
+  if (aec->farlevel.frcounter == 0) {
+
+    if (aec->farlevel.minlevel < noisyPower) {
+      actThreshold = actThresholdClean;
+    } else {
+      actThreshold = actThresholdNoisy;
+    }
+
+    if ((aec->stateCounter > (0.5f * countLen * subCountLen)) &&
+        (aec->farlevel.sfrcounter == 0)
+
+        // Estimate in active far-end segments only
+        &&
+        (aec->farlevel.averagelevel >
+         (actThreshold * aec->farlevel.minlevel))) {
+
+      // Subtract noise power
+      echo = aec->nearlevel.averagelevel - safety * aec->nearlevel.minlevel;
+
+      // ERL
+      dtmp = 10 * (float)log10(aec->farlevel.averagelevel /
+                                   aec->nearlevel.averagelevel +
+                               1e-10f);
+      dtmp2 = 10 * (float)log10(aec->farlevel.averagelevel / echo + 1e-10f);
+
+      aec->erl.instant = dtmp;
+      if (dtmp > aec->erl.max) {
+        aec->erl.max = dtmp;
+      }
+
+      if (dtmp < aec->erl.min) {
+        aec->erl.min = dtmp;
+      }
+
+      aec->erl.counter++;
+      aec->erl.sum += dtmp;
+      aec->erl.average = aec->erl.sum / aec->erl.counter;
+
+      // Upper mean
+      if (dtmp > aec->erl.average) {
+        aec->erl.hicounter++;
+        aec->erl.hisum += dtmp;
+        aec->erl.himean = aec->erl.hisum / aec->erl.hicounter;
+      }
+
+      // A_NLP
+      dtmp = 10 * (float)log10(aec->nearlevel.averagelevel /
+                                   (2 * aec->linoutlevel.averagelevel) +
+                               1e-10f);
+
+      // subtract noise power
+      suppressedEcho = 2 * (aec->linoutlevel.averagelevel -
+                            safety * aec->linoutlevel.minlevel);
+
+      dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f);
+
+      aec->aNlp.instant = dtmp2;
+      if (dtmp > aec->aNlp.max) {
+        aec->aNlp.max = dtmp;
+      }
+
+      if (dtmp < aec->aNlp.min) {
+        aec->aNlp.min = dtmp;
+      }
+
+      aec->aNlp.counter++;
+      aec->aNlp.sum += dtmp;
+      aec->aNlp.average = aec->aNlp.sum / aec->aNlp.counter;
+
+      // Upper mean
+      if (dtmp > aec->aNlp.average) {
+        aec->aNlp.hicounter++;
+        aec->aNlp.hisum += dtmp;
+        aec->aNlp.himean = aec->aNlp.hisum / aec->aNlp.hicounter;
+      }
+
+      // ERLE
+
+      // subtract noise power
+      suppressedEcho = 2 * (aec->nlpoutlevel.averagelevel -
+                            safety * aec->nlpoutlevel.minlevel);
+
+      dtmp = 10 * (float)log10(aec->nearlevel.averagelevel /
+                                   (2 * aec->nlpoutlevel.averagelevel) +
+                               1e-10f);
+      dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f);
+
+      dtmp = dtmp2;
+      aec->erle.instant = dtmp;
+      if (dtmp > aec->erle.max) {
+        aec->erle.max = dtmp;
+      }
+
+      if (dtmp < aec->erle.min) {
+        aec->erle.min = dtmp;
+      }
+
+      aec->erle.counter++;
+      aec->erle.sum += dtmp;
+      aec->erle.average = aec->erle.sum / aec->erle.counter;
+
+      // Upper mean
+      if (dtmp > aec->erle.average) {
+        aec->erle.hicounter++;
+        aec->erle.hisum += dtmp;
+        aec->erle.himean = aec->erle.hisum / aec->erle.hicounter;
+      }
+    }
+
+    aec->stateCounter = 0;
+  }
+}
+
+static void UpdateDelayMetrics(AecCore* self) {
+  int i = 0;
+  int delay_values = 0;
+  int median = 0;
+  int lookahead = WebRtc_lookahead(self->delay_estimator);
+  const int kMsPerBlock = PART_LEN / (self->mult * 8);
+  int64_t l1_norm = 0;
+
+  if (self->num_delay_values == 0) {
+    // We have no new delay value data. Even though -1 is a valid |median| in
+    // the sense that we allow negative values, it will practically never be
+    // used since multiples of |kMsPerBlock| will always be returned.
+    // We therefore use -1 to indicate in the logs that the delay estimator was
+    // not able to estimate the delay.
+    self->delay_median = -1;
+    self->delay_std = -1;
+    self->fraction_poor_delays = -1;
+    return;
+  }
+
+  // Start value for median count down.
+  delay_values = self->num_delay_values >> 1;
+  // Get median of delay values since last update.
+  for (i = 0; i < kHistorySizeBlocks; i++) {
+    delay_values -= self->delay_histogram[i];
+    if (delay_values < 0) {
+      median = i;
+      break;
+    }
+  }
+  // Account for lookahead.
+  self->delay_median = (median - lookahead) * kMsPerBlock;
+
+  // Calculate the L1 norm, with median value as central moment.
+  for (i = 0; i < kHistorySizeBlocks; i++) {
+    l1_norm += abs(i - median) * self->delay_histogram[i];
+  }
+  self->delay_std = (int)((l1_norm + self->num_delay_values / 2) /
+      self->num_delay_values) * kMsPerBlock;
+
+  // Determine fraction of delays that are out of bounds, that is, either
+  // negative (anti-causal system) or larger than the AEC filter length.
+  {
+    int num_delays_out_of_bounds = self->num_delay_values;
+    const int histogram_length = sizeof(self->delay_histogram) /
+      sizeof(self->delay_histogram[0]);
+    for (i = lookahead; i < lookahead + self->num_partitions; ++i) {
+      if (i < histogram_length)
+        num_delays_out_of_bounds -= self->delay_histogram[i];
+    }
+    self->fraction_poor_delays = (float)num_delays_out_of_bounds /
+        self->num_delay_values;
+  }
+
+  // Reset histogram.
+  memset(self->delay_histogram, 0, sizeof(self->delay_histogram));
+  self->num_delay_values = 0;
+
+  return;
+}
+
+static void TimeToFrequency(float time_data[PART_LEN2],
+                            float freq_data[2][PART_LEN1],
+                            int window) {
+  int i = 0;
+
+  // TODO(bjornv): Should we have a different function/wrapper for windowed FFT?
+  if (window) {
+    for (i = 0; i < PART_LEN; i++) {
+      time_data[i] *= WebRtcAec_sqrtHanning[i];
+      time_data[PART_LEN + i] *= WebRtcAec_sqrtHanning[PART_LEN - i];
+    }
+  }
+
+  aec_rdft_forward_128(time_data);
+  // Reorder.
+  freq_data[1][0] = 0;
+  freq_data[1][PART_LEN] = 0;
+  freq_data[0][0] = time_data[0];
+  freq_data[0][PART_LEN] = time_data[1];
+  for (i = 1; i < PART_LEN; i++) {
+    freq_data[0][i] = time_data[2 * i];
+    freq_data[1][i] = time_data[2 * i + 1];
+  }
+}
+
+static int MoveFarReadPtrWithoutSystemDelayUpdate(AecCore* self, int elements) {
+  WebRtc_MoveReadPtr(self->far_buf_windowed, elements);
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+  WebRtc_MoveReadPtr(self->far_time_buf, elements);
+#endif
+  return WebRtc_MoveReadPtr(self->far_buf, elements);
+}
+
+static int SignalBasedDelayCorrection(AecCore* self) {
+  int delay_correction = 0;
+  int last_delay = -2;
+  assert(self != NULL);
+#if !defined(WEBRTC_ANDROID)
+  // On desktops, turn on correction after |kDelayCorrectionStart| frames.  This
+  // is to let the delay estimation get a chance to converge.  Also, if the
+  // playout audio volume is low (or even muted) the delay estimation can return
+  // a very large delay, which will break the AEC if it is applied.
+  if (self->frame_count < kDelayCorrectionStart) {
+    return 0;
+  }
+#endif
+
+  // 1. Check for non-negative delay estimate.  Note that the estimates we get
+  //    from the delay estimation are not compensated for lookahead.  Hence, a
+  //    negative |last_delay| is an invalid one.
+  // 2. Verify that there is a delay change.  In addition, only allow a change
+  //    if the delay is outside a certain region taking the AEC filter length
+  //    into account.
+  // TODO(bjornv): Investigate if we can remove the non-zero delay change check.
+  // 3. Only allow delay correction if the delay estimation quality exceeds
+  //    |delay_quality_threshold|.
+  // 4. Finally, verify that the proposed |delay_correction| is feasible by
+  //    comparing with the size of the far-end buffer.
+  last_delay = WebRtc_last_delay(self->delay_estimator);
+  if ((last_delay >= 0) &&
+      (last_delay != self->previous_delay) &&
+      (WebRtc_last_delay_quality(self->delay_estimator) >
+           self->delay_quality_threshold)) {
+    int delay = last_delay - WebRtc_lookahead(self->delay_estimator);
+    // Allow for a slack in the actual delay, defined by a |lower_bound| and an
+    // |upper_bound|.  The adaptive echo cancellation filter is currently
+    // |num_partitions| (of 64 samples) long.  If the delay estimate is negative
+    // or at least 3/4 of the filter length we open up for correction.
+    const int lower_bound = 0;
+    const int upper_bound = self->num_partitions * 3 / 4;
+    const int do_correction = delay <= lower_bound || delay > upper_bound;
+    if (do_correction == 1) {
+      int available_read = (int)WebRtc_available_read(self->far_buf);
+      // With |shift_offset| we gradually rely on the delay estimates.  For
+      // positive delays we reduce the correction by |shift_offset| to lower the
+      // risk of pushing the AEC into a non causal state.  For negative delays
+      // we rely on the values up to a rounding error, hence compensate by 1
+      // element to make sure to push the delay into the causal region.
+      delay_correction = -delay;
+      delay_correction += delay > self->shift_offset ? self->shift_offset : 1;
+      self->shift_offset--;
+      self->shift_offset = (self->shift_offset <= 1 ? 1 : self->shift_offset);
+      if (delay_correction > available_read - self->mult - 1) {
+        // There is not enough data in the buffer to perform this shift.  Hence,
+        // we do not rely on the delay estimate and do nothing.
+        delay_correction = 0;
+      } else {
+        self->previous_delay = last_delay;
+        ++self->delay_correction_count;
+      }
+    }
+  }
+  // Update the |delay_quality_threshold| once we have our first delay
+  // correction.
+  if (self->delay_correction_count > 0) {
+    float delay_quality = WebRtc_last_delay_quality(self->delay_estimator);
+    delay_quality = (delay_quality > kDelayQualityThresholdMax ?
+        kDelayQualityThresholdMax : delay_quality);
+    self->delay_quality_threshold =
+        (delay_quality > self->delay_quality_threshold ? delay_quality :
+            self->delay_quality_threshold);
+  }
+  return delay_correction;
+}
+
+static void NonLinearProcessing(AecCore* aec,
+                                float* output,
+                                float* const* outputH) {
+  float efw[2][PART_LEN1], xfw[2][PART_LEN1];
+  complex_t comfortNoiseHband[PART_LEN1];
+  float fft[PART_LEN2];
+  float scale, dtmp;
+  float nlpGainHband;
+  int i;
+  size_t j;
+
+  // Coherence and non-linear filter
+  float cohde[PART_LEN1], cohxd[PART_LEN1];
+  float hNlDeAvg, hNlXdAvg;
+  float hNl[PART_LEN1];
+  float hNlPref[kPrefBandSize];
+  float hNlFb = 0, hNlFbLow = 0;
+  const float prefBandQuant = 0.75f, prefBandQuantLow = 0.5f;
+  const int prefBandSize = kPrefBandSize / aec->mult;
+  const int minPrefBand = 4 / aec->mult;
+  // Power estimate smoothing coefficients.
+  const float* min_overdrive = aec->extended_filter_enabled
+                                   ? kExtendedMinOverDrive
+                                   : kNormalMinOverDrive;
+
+  // Filter energy
+  const int delayEstInterval = 10 * aec->mult;
+
+  float* xfw_ptr = NULL;
+
+  aec->delayEstCtr++;
+  if (aec->delayEstCtr == delayEstInterval) {
+    aec->delayEstCtr = 0;
+  }
+
+  // initialize comfort noise for H band
+  memset(comfortNoiseHband, 0, sizeof(comfortNoiseHband));
+  nlpGainHband = (float)0.0;
+  dtmp = (float)0.0;
+
+  // We should always have at least one element stored in |far_buf|.
+  assert(WebRtc_available_read(aec->far_buf_windowed) > 0);
+  // NLP
+  WebRtc_ReadBuffer(aec->far_buf_windowed, (void**)&xfw_ptr, &xfw[0][0], 1);
+
+  // TODO(bjornv): Investigate if we can reuse |far_buf_windowed| instead of
+  // |xfwBuf|.
+  // Buffer far.
+  memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1);
+
+  WebRtcAec_SubbandCoherence(aec, efw, xfw, fft, cohde, cohxd);
+
+  hNlXdAvg = 0;
+  for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) {
+    hNlXdAvg += cohxd[i];
+  }
+  hNlXdAvg /= prefBandSize;
+  hNlXdAvg = 1 - hNlXdAvg;
+
+  hNlDeAvg = 0;
+  for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) {
+    hNlDeAvg += cohde[i];
+  }
+  hNlDeAvg /= prefBandSize;
+
+  if (hNlXdAvg < 0.75f && hNlXdAvg < aec->hNlXdAvgMin) {
+    aec->hNlXdAvgMin = hNlXdAvg;
+  }
+
+  if (hNlDeAvg > 0.98f && hNlXdAvg > 0.9f) {
+    aec->stNearState = 1;
+  } else if (hNlDeAvg < 0.95f || hNlXdAvg < 0.8f) {
+    aec->stNearState = 0;
+  }
+
+  if (aec->hNlXdAvgMin == 1) {
+    aec->echoState = 0;
+    aec->overDrive = min_overdrive[aec->nlp_mode];
+
+    if (aec->stNearState == 1) {
+      memcpy(hNl, cohde, sizeof(hNl));
+      hNlFb = hNlDeAvg;
+      hNlFbLow = hNlDeAvg;
+    } else {
+      for (i = 0; i < PART_LEN1; i++) {
+        hNl[i] = 1 - cohxd[i];
+      }
+      hNlFb = hNlXdAvg;
+      hNlFbLow = hNlXdAvg;
+    }
+  } else {
+
+    if (aec->stNearState == 1) {
+      aec->echoState = 0;
+      memcpy(hNl, cohde, sizeof(hNl));
+      hNlFb = hNlDeAvg;
+      hNlFbLow = hNlDeAvg;
+    } else {
+      aec->echoState = 1;
+      for (i = 0; i < PART_LEN1; i++) {
+        hNl[i] = WEBRTC_SPL_MIN(cohde[i], 1 - cohxd[i]);
+      }
+
+      // Select an order statistic from the preferred bands.
+      // TODO: Using quicksort now, but a selection algorithm may be preferred.
+      memcpy(hNlPref, &hNl[minPrefBand], sizeof(float) * prefBandSize);
+      qsort(hNlPref, prefBandSize, sizeof(float), CmpFloat);
+      hNlFb = hNlPref[(int)floor(prefBandQuant * (prefBandSize - 1))];
+      hNlFbLow = hNlPref[(int)floor(prefBandQuantLow * (prefBandSize - 1))];
+    }
+  }
+
+  // Track the local filter minimum to determine suppression overdrive.
+  if (hNlFbLow < 0.6f && hNlFbLow < aec->hNlFbLocalMin) {
+    aec->hNlFbLocalMin = hNlFbLow;
+    aec->hNlFbMin = hNlFbLow;
+    aec->hNlNewMin = 1;
+    aec->hNlMinCtr = 0;
+  }
+  aec->hNlFbLocalMin =
+      WEBRTC_SPL_MIN(aec->hNlFbLocalMin + 0.0008f / aec->mult, 1);
+  aec->hNlXdAvgMin = WEBRTC_SPL_MIN(aec->hNlXdAvgMin + 0.0006f / aec->mult, 1);
+
+  if (aec->hNlNewMin == 1) {
+    aec->hNlMinCtr++;
+  }
+  if (aec->hNlMinCtr == 2) {
+    aec->hNlNewMin = 0;
+    aec->hNlMinCtr = 0;
+    aec->overDrive =
+        WEBRTC_SPL_MAX(kTargetSupp[aec->nlp_mode] /
+                           ((float)log(aec->hNlFbMin + 1e-10f) + 1e-10f),
+                       min_overdrive[aec->nlp_mode]);
+  }
+
+  // Smooth the overdrive.
+  if (aec->overDrive < aec->overDriveSm) {
+    aec->overDriveSm = 0.99f * aec->overDriveSm + 0.01f * aec->overDrive;
+  } else {
+    aec->overDriveSm = 0.9f * aec->overDriveSm + 0.1f * aec->overDrive;
+  }
+
+  WebRtcAec_OverdriveAndSuppress(aec, hNl, hNlFb, efw);
+
+  // Add comfort noise.
+  WebRtcAec_ComfortNoise(aec, efw, comfortNoiseHband, aec->noisePow, hNl);
+
+  // TODO(bjornv): Investigate how to take the windowing below into account if
+  // needed.
+  if (aec->metricsMode == 1) {
+    // Note that we have a scaling by two in the time domain |eBuf|.
+    // In addition the time domain signal is windowed before transformation,
+    // losing half the energy on the average. We take care of the first
+    // scaling only in UpdateMetrics().
+    UpdateLevel(&aec->nlpoutlevel, efw);
+  }
+  // Inverse error fft.
+  fft[0] = efw[0][0];
+  fft[1] = efw[0][PART_LEN];
+  for (i = 1; i < PART_LEN; i++) {
+    fft[2 * i] = efw[0][i];
+    // Sign change required by Ooura fft.
+    fft[2 * i + 1] = -efw[1][i];
+  }
+  aec_rdft_inverse_128(fft);
+
+  // Overlap and add to obtain output.
+  scale = 2.0f / PART_LEN2;
+  for (i = 0; i < PART_LEN; i++) {
+    fft[i] *= scale;  // fft scaling
+    fft[i] = fft[i] * WebRtcAec_sqrtHanning[i] + aec->outBuf[i];
+
+    fft[PART_LEN + i] *= scale;  // fft scaling
+    aec->outBuf[i] = fft[PART_LEN + i] * WebRtcAec_sqrtHanning[PART_LEN - i];
+
+    // Saturate output to keep it in the allowed range.
+    output[i] = WEBRTC_SPL_SAT(
+        WEBRTC_SPL_WORD16_MAX, fft[i], WEBRTC_SPL_WORD16_MIN);
+  }
+
+  // For H band
+  if (aec->num_bands > 1) {
+
+    // H band gain
+    // average nlp over low band: average over second half of freq spectrum
+    // (4->8khz)
+    GetHighbandGain(hNl, &nlpGainHband);
+
+    // Inverse comfort_noise
+    if (flagHbandCn == 1) {
+      fft[0] = comfortNoiseHband[0][0];
+      fft[1] = comfortNoiseHband[PART_LEN][0];
+      for (i = 1; i < PART_LEN; i++) {
+        fft[2 * i] = comfortNoiseHband[i][0];
+        fft[2 * i + 1] = comfortNoiseHband[i][1];
+      }
+      aec_rdft_inverse_128(fft);
+      scale = 2.0f / PART_LEN2;
+    }
+
+    // compute gain factor
+    for (j = 0; j < aec->num_bands - 1; ++j) {
+      for (i = 0; i < PART_LEN; i++) {
+        dtmp = aec->dBufH[j][i];
+        dtmp = dtmp * nlpGainHband;  // for variable gain
+
+        // add some comfort noise where Hband is attenuated
+        if (flagHbandCn == 1 && j == 0) {
+          fft[i] *= scale;  // fft scaling
+          dtmp += cnScaleHband * fft[i];
+        }
+
+        // Saturate output to keep it in the allowed range.
+        outputH[j][i] = WEBRTC_SPL_SAT(
+            WEBRTC_SPL_WORD16_MAX, dtmp, WEBRTC_SPL_WORD16_MIN);
+      }
+    }
+  }
+
+  // Copy the current block to the old position.
+  memcpy(aec->dBuf, aec->dBuf + PART_LEN, sizeof(float) * PART_LEN);
+  memcpy(aec->eBuf, aec->eBuf + PART_LEN, sizeof(float) * PART_LEN);
+
+  // Copy the current block to the old position for H band
+  for (j = 0; j < aec->num_bands - 1; ++j) {
+    memcpy(aec->dBufH[j], aec->dBufH[j] + PART_LEN, sizeof(float) * PART_LEN);
+  }
+
+  memmove(aec->xfwBuf + PART_LEN1,
+          aec->xfwBuf,
+          sizeof(aec->xfwBuf) - sizeof(complex_t) * PART_LEN1);
+}
+
+static void ProcessBlock(AecCore* aec) {
+  size_t i;
+  float y[PART_LEN], e[PART_LEN];
+  float scale;
+
+  float fft[PART_LEN2];
+  float xf[2][PART_LEN1], yf[2][PART_LEN1], ef[2][PART_LEN1];
+  float df[2][PART_LEN1];
+  float far_spectrum = 0.0f;
+  float near_spectrum = 0.0f;
+  float abs_far_spectrum[PART_LEN1];
+  float abs_near_spectrum[PART_LEN1];
+
+  const float gPow[2] = {0.9f, 0.1f};
+
+  // Noise estimate constants.
+  const int noiseInitBlocks = 500 * aec->mult;
+  const float step = 0.1f;
+  const float ramp = 1.0002f;
+  const float gInitNoise[2] = {0.999f, 0.001f};
+
+  float nearend[PART_LEN];
+  float* nearend_ptr = NULL;
+  float output[PART_LEN];
+  float outputH[NUM_HIGH_BANDS_MAX][PART_LEN];
+  float* outputH_ptr[NUM_HIGH_BANDS_MAX];
+  for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) {
+    outputH_ptr[i] = outputH[i];
+  }
+
+  float* xf_ptr = NULL;
+
+  // Concatenate old and new nearend blocks.
+  for (i = 0; i < aec->num_bands - 1; ++i) {
+    WebRtc_ReadBuffer(aec->nearFrBufH[i],
+                      (void**)&nearend_ptr,
+                      nearend,
+                      PART_LEN);
+    memcpy(aec->dBufH[i] + PART_LEN, nearend_ptr, sizeof(nearend));
+  }
+  WebRtc_ReadBuffer(aec->nearFrBuf, (void**)&nearend_ptr, nearend, PART_LEN);
+  memcpy(aec->dBuf + PART_LEN, nearend_ptr, sizeof(nearend));
+
+  // ---------- Ooura fft ----------
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+  {
+    float farend[PART_LEN];
+    float* farend_ptr = NULL;
+    WebRtc_ReadBuffer(aec->far_time_buf, (void**)&farend_ptr, farend, 1);
+    RTC_AEC_DEBUG_WAV_WRITE(aec->farFile, farend_ptr, PART_LEN);
+    RTC_AEC_DEBUG_WAV_WRITE(aec->nearFile, nearend_ptr, PART_LEN);
+  }
+#endif
+
+  // We should always have at least one element stored in |far_buf|.
+  assert(WebRtc_available_read(aec->far_buf) > 0);
+  WebRtc_ReadBuffer(aec->far_buf, (void**)&xf_ptr, &xf[0][0], 1);
+
+  // Near fft
+  memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2);
+  TimeToFrequency(fft, df, 0);
+
+  // Power smoothing
+  for (i = 0; i < PART_LEN1; i++) {
+    far_spectrum = (xf_ptr[i] * xf_ptr[i]) +
+                   (xf_ptr[PART_LEN1 + i] * xf_ptr[PART_LEN1 + i]);
+    aec->xPow[i] =
+        gPow[0] * aec->xPow[i] + gPow[1] * aec->num_partitions * far_spectrum;
+    // Calculate absolute spectra
+    abs_far_spectrum[i] = sqrtf(far_spectrum);
+
+    near_spectrum = df[0][i] * df[0][i] + df[1][i] * df[1][i];
+    aec->dPow[i] = gPow[0] * aec->dPow[i] + gPow[1] * near_spectrum;
+    // Calculate absolute spectra
+    abs_near_spectrum[i] = sqrtf(near_spectrum);
+  }
+
+  // Estimate noise power. Wait until dPow is more stable.
+  if (aec->noiseEstCtr > 50) {
+    for (i = 0; i < PART_LEN1; i++) {
+      if (aec->dPow[i] < aec->dMinPow[i]) {
+        aec->dMinPow[i] =
+            (aec->dPow[i] + step * (aec->dMinPow[i] - aec->dPow[i])) * ramp;
+      } else {
+        aec->dMinPow[i] *= ramp;
+      }
+    }
+  }
+
+  // Smooth increasing noise power from zero at the start,
+  // to avoid a sudden burst of comfort noise.
+  if (aec->noiseEstCtr < noiseInitBlocks) {
+    aec->noiseEstCtr++;
+    for (i = 0; i < PART_LEN1; i++) {
+      if (aec->dMinPow[i] > aec->dInitMinPow[i]) {
+        aec->dInitMinPow[i] = gInitNoise[0] * aec->dInitMinPow[i] +
+                              gInitNoise[1] * aec->dMinPow[i];
+      } else {
+        aec->dInitMinPow[i] = aec->dMinPow[i];
+      }
+    }
+    aec->noisePow = aec->dInitMinPow;
+  } else {
+    aec->noisePow = aec->dMinPow;
+  }
+
+  // Block wise delay estimation used for logging
+  if (aec->delay_logging_enabled) {
+    if (WebRtc_AddFarSpectrumFloat(
+            aec->delay_estimator_farend, abs_far_spectrum, PART_LEN1) == 0) {
+      int delay_estimate = WebRtc_DelayEstimatorProcessFloat(
+          aec->delay_estimator, abs_near_spectrum, PART_LEN1);
+      if (delay_estimate >= 0) {
+        // Update delay estimate buffer.
+        aec->delay_histogram[delay_estimate]++;
+        aec->num_delay_values++;
+      }
+      if (aec->delay_metrics_delivered == 1 &&
+          aec->num_delay_values >= kDelayMetricsAggregationWindow) {
+        UpdateDelayMetrics(aec);
+      }
+    }
+  }
+
+  // Update the xfBuf block position.
+  aec->xfBufBlockPos--;
+  if (aec->xfBufBlockPos == -1) {
+    aec->xfBufBlockPos = aec->num_partitions - 1;
+  }
+
+  // Buffer xf
+  memcpy(aec->xfBuf[0] + aec->xfBufBlockPos * PART_LEN1,
+         xf_ptr,
+         sizeof(float) * PART_LEN1);
+  memcpy(aec->xfBuf[1] + aec->xfBufBlockPos * PART_LEN1,
+         &xf_ptr[PART_LEN1],
+         sizeof(float) * PART_LEN1);
+
+  memset(yf, 0, sizeof(yf));
+
+  // Filter far
+  WebRtcAec_FilterFar(aec, yf);
+
+  // Inverse fft to obtain echo estimate and error.
+  fft[0] = yf[0][0];
+  fft[1] = yf[0][PART_LEN];
+  for (i = 1; i < PART_LEN; i++) {
+    fft[2 * i] = yf[0][i];
+    fft[2 * i + 1] = yf[1][i];
+  }
+  aec_rdft_inverse_128(fft);
+
+  scale = 2.0f / PART_LEN2;
+  for (i = 0; i < PART_LEN; i++) {
+    y[i] = fft[PART_LEN + i] * scale;  // fft scaling
+  }
+
+  for (i = 0; i < PART_LEN; i++) {
+    e[i] = nearend_ptr[i] - y[i];
+  }
+
+  // Error fft
+  memcpy(aec->eBuf + PART_LEN, e, sizeof(float) * PART_LEN);
+  memset(fft, 0, sizeof(float) * PART_LEN);
+  memcpy(fft + PART_LEN, e, sizeof(float) * PART_LEN);
+  // TODO(bjornv): Change to use TimeToFrequency().
+  aec_rdft_forward_128(fft);
+
+  ef[1][0] = 0;
+  ef[1][PART_LEN] = 0;
+  ef[0][0] = fft[0];
+  ef[0][PART_LEN] = fft[1];
+  for (i = 1; i < PART_LEN; i++) {
+    ef[0][i] = fft[2 * i];
+    ef[1][i] = fft[2 * i + 1];
+  }
+
+  RTC_AEC_DEBUG_RAW_WRITE(aec->e_fft_file,
+                          &ef[0][0],
+                          sizeof(ef[0][0]) * PART_LEN1 * 2);
+
+  if (aec->metricsMode == 1) {
+    // Note that the first PART_LEN samples in fft (before transformation) are
+    // zero. Hence, the scaling by two in UpdateLevel() should not be
+    // performed. That scaling is taken care of in UpdateMetrics() instead.
+    UpdateLevel(&aec->linoutlevel, ef);
+  }
+
+  // Scale error signal inversely with far power.
+  WebRtcAec_ScaleErrorSignal(aec, ef);
+  WebRtcAec_FilterAdaptation(aec, fft, ef);
+  NonLinearProcessing(aec, output, outputH_ptr);
+
+  if (aec->metricsMode == 1) {
+    // Update power levels and echo metrics
+    UpdateLevel(&aec->farlevel, (float(*)[PART_LEN1])xf_ptr);
+    UpdateLevel(&aec->nearlevel, df);
+    UpdateMetrics(aec);
+  }
+
+  // Store the output block.
+  WebRtc_WriteBuffer(aec->outFrBuf, output, PART_LEN);
+  // For high bands
+  for (i = 0; i < aec->num_bands - 1; ++i) {
+    WebRtc_WriteBuffer(aec->outFrBufH[i], outputH[i], PART_LEN);
+  }
+
+  RTC_AEC_DEBUG_WAV_WRITE(aec->outLinearFile, e, PART_LEN);
+  RTC_AEC_DEBUG_WAV_WRITE(aec->outFile, output, PART_LEN);
+}
+
+AecCore* WebRtcAec_CreateAec() {
+  int i;
+  AecCore* aec = malloc(sizeof(AecCore));
+  if (!aec) {
+    return NULL;
+  }
+
+  aec->nearFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(float));
+  if (!aec->nearFrBuf) {
+    WebRtcAec_FreeAec(aec);
+    return NULL;
+  }
+
+  aec->outFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(float));
+  if (!aec->outFrBuf) {
+    WebRtcAec_FreeAec(aec);
+    return NULL;
+  }
+
+  for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) {
+    aec->nearFrBufH[i] = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN,
+                                             sizeof(float));
+    if (!aec->nearFrBufH[i]) {
+      WebRtcAec_FreeAec(aec);
+      return NULL;
+    }
+    aec->outFrBufH[i] = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN,
+                                            sizeof(float));
+    if (!aec->outFrBufH[i]) {
+      WebRtcAec_FreeAec(aec);
+      return NULL;
+    }
+  }
+
+  // Create far-end buffers.
+  aec->far_buf =
+      WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * 2 * PART_LEN1);
+  if (!aec->far_buf) {
+    WebRtcAec_FreeAec(aec);
+    return NULL;
+  }
+  aec->far_buf_windowed =
+      WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * 2 * PART_LEN1);
+  if (!aec->far_buf_windowed) {
+    WebRtcAec_FreeAec(aec);
+    return NULL;
+  }
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+  aec->instance_index = webrtc_aec_instance_count;
+  aec->far_time_buf =
+      WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * PART_LEN);
+  if (!aec->far_time_buf) {
+    WebRtcAec_FreeAec(aec);
+    return NULL;
+  }
+  aec->farFile = aec->nearFile = aec->outFile = aec->outLinearFile = NULL;
+  aec->debug_dump_count = 0;
+#endif
+  aec->delay_estimator_farend =
+      WebRtc_CreateDelayEstimatorFarend(PART_LEN1, kHistorySizeBlocks);
+  if (aec->delay_estimator_farend == NULL) {
+    WebRtcAec_FreeAec(aec);
+    return NULL;
+  }
+  // We create the delay_estimator with the same amount of maximum lookahead as
+  // the delay history size (kHistorySizeBlocks) for symmetry reasons.
+  aec->delay_estimator = WebRtc_CreateDelayEstimator(
+      aec->delay_estimator_farend, kHistorySizeBlocks);
+  if (aec->delay_estimator == NULL) {
+    WebRtcAec_FreeAec(aec);
+    return NULL;
+  }
+#ifdef WEBRTC_ANDROID
+  aec->delay_agnostic_enabled = 1;  // DA-AEC enabled by default.
+  // DA-AEC assumes the system is causal from the beginning and will self adjust
+  // the lookahead when shifting is required.
+  WebRtc_set_lookahead(aec->delay_estimator, 0);
+#else
+  aec->delay_agnostic_enabled = 0;
+  WebRtc_set_lookahead(aec->delay_estimator, kLookaheadBlocks);
+#endif
+  aec->extended_filter_enabled = 0;
+
+  // Assembly optimization
+  WebRtcAec_FilterFar = FilterFar;
+  WebRtcAec_ScaleErrorSignal = ScaleErrorSignal;
+  WebRtcAec_FilterAdaptation = FilterAdaptation;
+  WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress;
+  WebRtcAec_ComfortNoise = ComfortNoise;
+  WebRtcAec_SubbandCoherence = SubbandCoherence;
+
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+  if (WebRtc_GetCPUInfo(kSSE2)) {
+    WebRtcAec_InitAec_SSE2();
+  }
+#endif
+
+#if defined(MIPS_FPU_LE)
+  WebRtcAec_InitAec_mips();
+#endif
+
+#if defined(WEBRTC_HAS_NEON)
+  WebRtcAec_InitAec_neon();
+#elif defined(WEBRTC_DETECT_NEON)
+  if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) {
+    WebRtcAec_InitAec_neon();
+  }
+#endif
+
+  aec_rdft_init();
+
+  return aec;
+}
+
+void WebRtcAec_FreeAec(AecCore* aec) {
+  int i;
+  if (aec == NULL) {
+    return;
+  }
+
+  WebRtc_FreeBuffer(aec->nearFrBuf);
+  WebRtc_FreeBuffer(aec->outFrBuf);
+
+  for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) {
+    WebRtc_FreeBuffer(aec->nearFrBufH[i]);
+    WebRtc_FreeBuffer(aec->outFrBufH[i]);
+  }
+
+  WebRtc_FreeBuffer(aec->far_buf);
+  WebRtc_FreeBuffer(aec->far_buf_windowed);
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+  WebRtc_FreeBuffer(aec->far_time_buf);
+#endif
+  RTC_AEC_DEBUG_WAV_CLOSE(aec->farFile);
+  RTC_AEC_DEBUG_WAV_CLOSE(aec->nearFile);
+  RTC_AEC_DEBUG_WAV_CLOSE(aec->outFile);
+  RTC_AEC_DEBUG_WAV_CLOSE(aec->outLinearFile);
+  RTC_AEC_DEBUG_RAW_CLOSE(aec->e_fft_file);
+
+  WebRtc_FreeDelayEstimator(aec->delay_estimator);
+  WebRtc_FreeDelayEstimatorFarend(aec->delay_estimator_farend);
+
+  free(aec);
+}
+
+int WebRtcAec_InitAec(AecCore* aec, int sampFreq) {
+  int i;
+
+  aec->sampFreq = sampFreq;
+
+  if (sampFreq == 8000) {
+    aec->normal_mu = 0.6f;
+    aec->normal_error_threshold = 2e-6f;
+    aec->num_bands = 1;
+  } else {
+    aec->normal_mu = 0.5f;
+    aec->normal_error_threshold = 1.5e-6f;
+    aec->num_bands = (size_t)(sampFreq / 16000);
+  }
+
+  WebRtc_InitBuffer(aec->nearFrBuf);
+  WebRtc_InitBuffer(aec->outFrBuf);
+  for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) {
+    WebRtc_InitBuffer(aec->nearFrBufH[i]);
+    WebRtc_InitBuffer(aec->outFrBufH[i]);
+  }
+
+  // Initialize far-end buffers.
+  WebRtc_InitBuffer(aec->far_buf);
+  WebRtc_InitBuffer(aec->far_buf_windowed);
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+  WebRtc_InitBuffer(aec->far_time_buf);
+  {
+    int process_rate = sampFreq > 16000 ? 16000 : sampFreq;
+    RTC_AEC_DEBUG_WAV_REOPEN("aec_far", aec->instance_index,
+                             aec->debug_dump_count, process_rate,
+                             &aec->farFile );
+    RTC_AEC_DEBUG_WAV_REOPEN("aec_near", aec->instance_index,
+                             aec->debug_dump_count, process_rate,
+                             &aec->nearFile);
+    RTC_AEC_DEBUG_WAV_REOPEN("aec_out", aec->instance_index,
+                             aec->debug_dump_count, process_rate,
+                             &aec->outFile );
+    RTC_AEC_DEBUG_WAV_REOPEN("aec_out_linear", aec->instance_index,
+                             aec->debug_dump_count, process_rate,
+                             &aec->outLinearFile);
+  }
+
+  RTC_AEC_DEBUG_RAW_OPEN("aec_e_fft",
+                         aec->debug_dump_count,
+                         &aec->e_fft_file);
+
+  ++aec->debug_dump_count;
+#endif
+  aec->system_delay = 0;
+
+  if (WebRtc_InitDelayEstimatorFarend(aec->delay_estimator_farend) != 0) {
+    return -1;
+  }
+  if (WebRtc_InitDelayEstimator(aec->delay_estimator) != 0) {
+    return -1;
+  }
+  aec->delay_logging_enabled = 0;
+  aec->delay_metrics_delivered = 0;
+  memset(aec->delay_histogram, 0, sizeof(aec->delay_histogram));
+  aec->num_delay_values = 0;
+  aec->delay_median = -1;
+  aec->delay_std = -1;
+  aec->fraction_poor_delays = -1.0f;
+
+  aec->signal_delay_correction = 0;
+  aec->previous_delay = -2;  // (-2): Uninitialized.
+  aec->delay_correction_count = 0;
+  aec->shift_offset = kInitialShiftOffset;
+  aec->delay_quality_threshold = kDelayQualityThresholdMin;
+
+  aec->num_partitions = kNormalNumPartitions;
+
+  // Update the delay estimator with filter length.  We use half the
+  // |num_partitions| to take the echo path into account.  In practice we say
+  // that the echo has a duration of maximum half |num_partitions|, which is not
+  // true, but serves as a crude measure.
+  WebRtc_set_allowed_offset(aec->delay_estimator, aec->num_partitions / 2);
+  // TODO(bjornv): I currently hard coded the enable.  Once we've established
+  // that AECM has no performance regression, robust_validation will be enabled
+  // all the time and the APIs to turn it on/off will be removed.  Hence, remove
+  // this line then.
+  WebRtc_enable_robust_validation(aec->delay_estimator, 1);
+  aec->frame_count = 0;
+
+  // Default target suppression mode.
+  aec->nlp_mode = 1;
+
+  // Sampling frequency multiplier w.r.t. 8 kHz.
+  // In case of multiple bands we process the lower band in 16 kHz, hence the
+  // multiplier is always 2.
+  if (aec->num_bands > 1) {
+    aec->mult = 2;
+  } else {
+    aec->mult = (short)aec->sampFreq / 8000;
+  }
+
+  aec->farBufWritePos = 0;
+  aec->farBufReadPos = 0;
+
+  aec->inSamples = 0;
+  aec->outSamples = 0;
+  aec->knownDelay = 0;
+
+  // Initialize buffers
+  memset(aec->dBuf, 0, sizeof(aec->dBuf));
+  memset(aec->eBuf, 0, sizeof(aec->eBuf));
+  // For H bands
+  for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) {
+    memset(aec->dBufH[i], 0, sizeof(aec->dBufH[i]));
+  }
+
+  memset(aec->xPow, 0, sizeof(aec->xPow));
+  memset(aec->dPow, 0, sizeof(aec->dPow));
+  memset(aec->dInitMinPow, 0, sizeof(aec->dInitMinPow));
+  aec->noisePow = aec->dInitMinPow;
+  aec->noiseEstCtr = 0;
+
+  // Initial comfort noise power
+  for (i = 0; i < PART_LEN1; i++) {
+    aec->dMinPow[i] = 1.0e6f;
+  }
+
+  // Holds the last block written to
+  aec->xfBufBlockPos = 0;
+  // TODO: Investigate need for these initializations. Deleting them doesn't
+  //       change the output at all and yields 0.4% overall speedup.
+  memset(aec->xfBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1);
+  memset(aec->wfBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1);
+  memset(aec->sde, 0, sizeof(complex_t) * PART_LEN1);
+  memset(aec->sxd, 0, sizeof(complex_t) * PART_LEN1);
+  memset(
+      aec->xfwBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1);
+  memset(aec->se, 0, sizeof(float) * PART_LEN1);
+
+  // To prevent numerical instability in the first block.
+  for (i = 0; i < PART_LEN1; i++) {
+    aec->sd[i] = 1;
+  }
+  for (i = 0; i < PART_LEN1; i++) {
+    aec->sx[i] = 1;
+  }
+
+  memset(aec->hNs, 0, sizeof(aec->hNs));
+  memset(aec->outBuf, 0, sizeof(float) * PART_LEN);
+
+  aec->hNlFbMin = 1;
+  aec->hNlFbLocalMin = 1;
+  aec->hNlXdAvgMin = 1;
+  aec->hNlNewMin = 0;
+  aec->hNlMinCtr = 0;
+  aec->overDrive = 2;
+  aec->overDriveSm = 2;
+  aec->delayIdx = 0;
+  aec->stNearState = 0;
+  aec->echoState = 0;
+  aec->divergeState = 0;
+
+  aec->seed = 777;
+  aec->delayEstCtr = 0;
+
+  // Metrics disabled by default
+  aec->metricsMode = 0;
+  InitMetrics(aec);
+
+  return 0;
+}
+
+void WebRtcAec_BufferFarendPartition(AecCore* aec, const float* farend) {
+  float fft[PART_LEN2];
+  float xf[2][PART_LEN1];
+
+  // Check if the buffer is full, and in that case flush the oldest data.
+  if (WebRtc_available_write(aec->far_buf) < 1) {
+    WebRtcAec_MoveFarReadPtr(aec, 1);
+  }
+  // Convert far-end partition to the frequency domain without windowing.
+  memcpy(fft, farend, sizeof(float) * PART_LEN2);
+  TimeToFrequency(fft, xf, 0);
+  WebRtc_WriteBuffer(aec->far_buf, &xf[0][0], 1);
+
+  // Convert far-end partition to the frequency domain with windowing.
+  memcpy(fft, farend, sizeof(float) * PART_LEN2);
+  TimeToFrequency(fft, xf, 1);
+  WebRtc_WriteBuffer(aec->far_buf_windowed, &xf[0][0], 1);
+}
+
+int WebRtcAec_MoveFarReadPtr(AecCore* aec, int elements) {
+  int elements_moved = MoveFarReadPtrWithoutSystemDelayUpdate(aec, elements);
+  aec->system_delay -= elements_moved * PART_LEN;
+  return elements_moved;
+}
+
+void WebRtcAec_ProcessFrames(AecCore* aec,
+                             const float* const* nearend,
+                             size_t num_bands,
+                             size_t num_samples,
+                             int knownDelay,
+                             float* const* out) {
+  size_t i, j;
+  int out_elements = 0;
+
+  aec->frame_count++;
+  // For each frame the process is as follows:
+  // 1) If the system_delay indicates on being too small for processing a
+  //    frame we stuff the buffer with enough data for 10 ms.
+  // 2 a) Adjust the buffer to the system delay, by moving the read pointer.
+  //   b) Apply signal based delay correction, if we have detected poor AEC
+  //    performance.
+  // 3) TODO(bjornv): Investigate if we need to add this:
+  //    If we can't move read pointer due to buffer size limitations we
+  //    flush/stuff the buffer.
+  // 4) Process as many partitions as possible.
+  // 5) Update the |system_delay| with respect to a full frame of FRAME_LEN
+  //    samples. Even though we will have data left to process (we work with
+  //    partitions) we consider updating a whole frame, since that's the
+  //    amount of data we input and output in audio_processing.
+  // 6) Update the outputs.
+
+  // The AEC has two different delay estimation algorithms built in.  The
+  // first relies on delay input values from the user and the amount of
+  // shifted buffer elements is controlled by |knownDelay|.  This delay will
+  // give a guess on how much we need to shift far-end buffers to align with
+  // the near-end signal.  The other delay estimation algorithm uses the
+  // far- and near-end signals to find the offset between them.  This one
+  // (called "signal delay") is then used to fine tune the alignment, or
+  // simply compensate for errors in the system based one.
+  // Note that the two algorithms operate independently.  Currently, we only
+  // allow one algorithm to be turned on.
+
+  assert(aec->num_bands == num_bands);
+
+  for (j = 0; j < num_samples; j+= FRAME_LEN) {
+    // TODO(bjornv): Change the near-end buffer handling to be the same as for
+    // far-end, that is, with a near_pre_buf.
+    // Buffer the near-end frame.
+    WebRtc_WriteBuffer(aec->nearFrBuf, &nearend[0][j], FRAME_LEN);
+    // For H band
+    for (i = 1; i < num_bands; ++i) {
+      WebRtc_WriteBuffer(aec->nearFrBufH[i - 1], &nearend[i][j], FRAME_LEN);
+    }
+
+    // 1) At most we process |aec->mult|+1 partitions in 10 ms. Make sure we
+    // have enough far-end data for that by stuffing the buffer if the
+    // |system_delay| indicates others.
+    if (aec->system_delay < FRAME_LEN) {
+      // We don't have enough data so we rewind 10 ms.
+      WebRtcAec_MoveFarReadPtr(aec, -(aec->mult + 1));
+    }
+
+    if (!aec->delay_agnostic_enabled) {
+      // 2 a) Compensate for a possible change in the system delay.
+
+      // TODO(bjornv): Investigate how we should round the delay difference;
+      // right now we know that incoming |knownDelay| is underestimated when
+      // it's less than |aec->knownDelay|. We therefore, round (-32) in that
+      // direction. In the other direction, we don't have this situation, but
+      // might flush one partition too little. This can cause non-causality,
+      // which should be investigated. Maybe, allow for a non-symmetric
+      // rounding, like -16.
+      int move_elements = (aec->knownDelay - knownDelay - 32) / PART_LEN;
+      int moved_elements =
+          MoveFarReadPtrWithoutSystemDelayUpdate(aec, move_elements);
+      aec->knownDelay -= moved_elements * PART_LEN;
+    } else {
+      // 2 b) Apply signal based delay correction.
+      int move_elements = SignalBasedDelayCorrection(aec);
+      int moved_elements =
+          MoveFarReadPtrWithoutSystemDelayUpdate(aec, move_elements);
+      int far_near_buffer_diff = WebRtc_available_read(aec->far_buf) -
+          WebRtc_available_read(aec->nearFrBuf) / PART_LEN;
+      WebRtc_SoftResetDelayEstimator(aec->delay_estimator, moved_elements);
+      WebRtc_SoftResetDelayEstimatorFarend(aec->delay_estimator_farend,
+                                           moved_elements);
+      aec->signal_delay_correction += moved_elements;
+      // If we rely on reported system delay values only, a buffer underrun here
+      // can never occur since we've taken care of that in 1) above.  Here, we
+      // apply signal based delay correction and can therefore end up with
+      // buffer underruns since the delay estimation can be wrong.  We therefore
+      // stuff the buffer with enough elements if needed.
+      if (far_near_buffer_diff < 0) {
+        WebRtcAec_MoveFarReadPtr(aec, far_near_buffer_diff);
+      }
+    }
+
+    // 4) Process as many blocks as possible.
+    while (WebRtc_available_read(aec->nearFrBuf) >= PART_LEN) {
+      ProcessBlock(aec);
+    }
+
+    // 5) Update system delay with respect to the entire frame.
+    aec->system_delay -= FRAME_LEN;
+
+    // 6) Update output frame.
+    // Stuff the out buffer if we have less than a frame to output.
+    // This should only happen for the first frame.
+    out_elements = (int)WebRtc_available_read(aec->outFrBuf);
+    if (out_elements < FRAME_LEN) {
+      WebRtc_MoveReadPtr(aec->outFrBuf, out_elements - FRAME_LEN);
+      for (i = 0; i < num_bands - 1; ++i) {
+        WebRtc_MoveReadPtr(aec->outFrBufH[i], out_elements - FRAME_LEN);
+      }
+    }
+    // Obtain an output frame.
+    WebRtc_ReadBuffer(aec->outFrBuf, NULL, &out[0][j], FRAME_LEN);
+    // For H bands.
+    for (i = 1; i < num_bands; ++i) {
+      WebRtc_ReadBuffer(aec->outFrBufH[i - 1], NULL, &out[i][j], FRAME_LEN);
+    }
+  }
+}
+
+int WebRtcAec_GetDelayMetricsCore(AecCore* self, int* median, int* std,
+                                  float* fraction_poor_delays) {
+  assert(self != NULL);
+  assert(median != NULL);
+  assert(std != NULL);
+
+  if (self->delay_logging_enabled == 0) {
+    // Logging disabled.
+    return -1;
+  }
+
+  if (self->delay_metrics_delivered == 0) {
+    UpdateDelayMetrics(self);
+    self->delay_metrics_delivered = 1;
+  }
+  *median = self->delay_median;
+  *std = self->delay_std;
+  *fraction_poor_delays = self->fraction_poor_delays;
+
+  return 0;
+}
+
+int WebRtcAec_echo_state(AecCore* self) { return self->echoState; }
+
+void WebRtcAec_GetEchoStats(AecCore* self,
+                            Stats* erl,
+                            Stats* erle,
+                            Stats* a_nlp) {
+  assert(erl != NULL);
+  assert(erle != NULL);
+  assert(a_nlp != NULL);
+  *erl = self->erl;
+  *erle = self->erle;
+  *a_nlp = self->aNlp;
+}
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+void* WebRtcAec_far_time_buf(AecCore* self) { return self->far_time_buf; }
+#endif
+
+void WebRtcAec_SetConfigCore(AecCore* self,
+                             int nlp_mode,
+                             int metrics_mode,
+                             int delay_logging) {
+  assert(nlp_mode >= 0 && nlp_mode < 3);
+  self->nlp_mode = nlp_mode;
+  self->metricsMode = metrics_mode;
+  if (self->metricsMode) {
+    InitMetrics(self);
+  }
+  // Turn on delay logging if it is either set explicitly or if delay agnostic
+  // AEC is enabled (which requires delay estimates).
+  self->delay_logging_enabled = delay_logging || self->delay_agnostic_enabled;
+  if (self->delay_logging_enabled) {
+    memset(self->delay_histogram, 0, sizeof(self->delay_histogram));
+  }
+}
+
+void WebRtcAec_enable_delay_agnostic(AecCore* self, int enable) {
+  self->delay_agnostic_enabled = enable;
+}
+
+int WebRtcAec_delay_agnostic_enabled(AecCore* self) {
+  return self->delay_agnostic_enabled;
+}
+
+void WebRtcAec_enable_extended_filter(AecCore* self, int enable) {
+  self->extended_filter_enabled = enable;
+  self->num_partitions = enable ? kExtendedNumPartitions : kNormalNumPartitions;
+  // Update the delay estimator with filter length.  See InitAEC() for details.
+  WebRtc_set_allowed_offset(self->delay_estimator, self->num_partitions / 2);
+}
+
+int WebRtcAec_extended_filter_enabled(AecCore* self) {
+  return self->extended_filter_enabled;
+}
+
+int WebRtcAec_system_delay(AecCore* self) { return self->system_delay; }
+
+void WebRtcAec_SetSystemDelay(AecCore* self, int delay) {
+  assert(delay >= 0);
+  self->system_delay = delay;
+}
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core.h b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core.h
new file mode 100644
index 00000000..241f0775
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core.h
@@ -0,0 +1,129 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * Specifies the interface for the AEC core.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_
+
+#include <stddef.h>
+
+#include "webrtc/typedefs.h"
+
+#define FRAME_LEN 80
+#define PART_LEN 64               // Length of partition
+#define PART_LEN1 (PART_LEN + 1)  // Unique fft coefficients
+#define PART_LEN2 (PART_LEN * 2)  // Length of partition * 2
+#define NUM_HIGH_BANDS_MAX  2     // Max number of high bands
+
+typedef float complex_t[2];
+// For performance reasons, some arrays of complex numbers are replaced by twice
+// as long arrays of float, all the real parts followed by all the imaginary
+// ones (complex_t[SIZE] -> float[2][SIZE]). This allows SIMD optimizations and
+// is better than two arrays (one for the real parts and one for the imaginary
+// parts) as this other way would require two pointers instead of one and cause
+// extra register spilling. This also allows the offsets to be calculated at
+// compile time.
+
+// Metrics
+enum {
+  kOffsetLevel = -100
+};
+
+typedef struct Stats {
+  float instant;
+  float average;
+  float min;
+  float max;
+  float sum;
+  float hisum;
+  float himean;
+  int counter;
+  int hicounter;
+} Stats;
+
+typedef struct AecCore AecCore;
+
+AecCore* WebRtcAec_CreateAec();  // Returns NULL on error.
+void WebRtcAec_FreeAec(AecCore* aec);
+int WebRtcAec_InitAec(AecCore* aec, int sampFreq);
+void WebRtcAec_InitAec_SSE2(void);
+#if defined(MIPS_FPU_LE)
+void WebRtcAec_InitAec_mips(void);
+#endif
+#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON)
+void WebRtcAec_InitAec_neon(void);
+#endif
+
+void WebRtcAec_BufferFarendPartition(AecCore* aec, const float* farend);
+void WebRtcAec_ProcessFrames(AecCore* aec,
+                             const float* const* nearend,
+                             size_t num_bands,
+                             size_t num_samples,
+                             int knownDelay,
+                             float* const* out);
+
+// A helper function to call WebRtc_MoveReadPtr() for all far-end buffers.
+// Returns the number of elements moved, and adjusts |system_delay| by the
+// corresponding amount in ms.
+int WebRtcAec_MoveFarReadPtr(AecCore* aec, int elements);
+
+// Calculates the median, standard deviation and amount of poor values among the
+// delay estimates aggregated up to the first call to the function. After that
+// first call the metrics are aggregated and updated every second. With poor
+// values we mean values that most likely will cause the AEC to perform poorly.
+// TODO(bjornv): Consider changing tests and tools to handle constant
+// constant aggregation window throughout the session instead.
+int WebRtcAec_GetDelayMetricsCore(AecCore* self, int* median, int* std,
+                                  float* fraction_poor_delays);
+
+// Returns the echo state (1: echo, 0: no echo).
+int WebRtcAec_echo_state(AecCore* self);
+
+// Gets statistics of the echo metrics ERL, ERLE, A_NLP.
+void WebRtcAec_GetEchoStats(AecCore* self,
+                            Stats* erl,
+                            Stats* erle,
+                            Stats* a_nlp);
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+void* WebRtcAec_far_time_buf(AecCore* self);
+#endif
+
+// Sets local configuration modes.
+void WebRtcAec_SetConfigCore(AecCore* self,
+                             int nlp_mode,
+                             int metrics_mode,
+                             int delay_logging);
+
+// Non-zero enables, zero disables.
+void WebRtcAec_enable_delay_agnostic(AecCore* self, int enable);
+
+// Returns non-zero if delay agnostic (i.e., signal based delay estimation) is
+// enabled and zero if disabled.
+int WebRtcAec_delay_agnostic_enabled(AecCore* self);
+
+// Enables or disables extended filter mode. Non-zero enables, zero disables.
+void WebRtcAec_enable_extended_filter(AecCore* self, int enable);
+
+// Returns non-zero if extended filter mode is enabled and zero if disabled.
+int WebRtcAec_extended_filter_enabled(AecCore* self);
+
+// Returns the current |system_delay|, i.e., the buffered difference between
+// far-end and near-end.
+int WebRtcAec_system_delay(AecCore* self);
+
+// Sets the |system_delay| to |value|.  Note that if the value is changed
+// improperly, there can be a performance regression.  So it should be used with
+// care.
+void WebRtcAec_SetSystemDelay(AecCore* self, int delay);
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_internal.h b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_internal.h
new file mode 100644
index 00000000..2de02837
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_internal.h
@@ -0,0 +1,202 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_
+
+#include "webrtc/common_audio/ring_buffer.h"
+#include "webrtc/common_audio/wav_file.h"
+#include "webrtc/modules/audio_processing/aec/aec_common.h"
+#include "webrtc/modules/audio_processing/aec/aec_core.h"
+#include "webrtc/typedefs.h"
+
+// Number of partitions for the extended filter mode. The first one is an enum
+// to be used in array declarations, as it represents the maximum filter length.
+enum {
+  kExtendedNumPartitions = 32
+};
+static const int kNormalNumPartitions = 12;
+
+// Delay estimator constants, used for logging and delay compensation if
+// if reported delays are disabled.
+enum {
+  kLookaheadBlocks = 15
+};
+enum {
+  // 500 ms for 16 kHz which is equivalent with the limit of reported delays.
+  kHistorySizeBlocks = 125
+};
+
+// Extended filter adaptation parameters.
+// TODO(ajm): No narrowband tuning yet.
+static const float kExtendedMu = 0.4f;
+static const float kExtendedErrorThreshold = 1.0e-6f;
+
+typedef struct PowerLevel {
+  float sfrsum;
+  int sfrcounter;
+  float framelevel;
+  float frsum;
+  int frcounter;
+  float minlevel;
+  float averagelevel;
+} PowerLevel;
+
+struct AecCore {
+  int farBufWritePos, farBufReadPos;
+
+  int knownDelay;
+  int inSamples, outSamples;
+  int delayEstCtr;
+
+  RingBuffer* nearFrBuf;
+  RingBuffer* outFrBuf;
+
+  RingBuffer* nearFrBufH[NUM_HIGH_BANDS_MAX];
+  RingBuffer* outFrBufH[NUM_HIGH_BANDS_MAX];
+
+  float dBuf[PART_LEN2];  // nearend
+  float eBuf[PART_LEN2];  // error
+
+  float dBufH[NUM_HIGH_BANDS_MAX][PART_LEN2];  // nearend
+
+  float xPow[PART_LEN1];
+  float dPow[PART_LEN1];
+  float dMinPow[PART_LEN1];
+  float dInitMinPow[PART_LEN1];
+  float* noisePow;
+
+  float xfBuf[2][kExtendedNumPartitions * PART_LEN1];  // farend fft buffer
+  float wfBuf[2][kExtendedNumPartitions * PART_LEN1];  // filter fft
+  complex_t sde[PART_LEN1];  // cross-psd of nearend and error
+  complex_t sxd[PART_LEN1];  // cross-psd of farend and nearend
+  // Farend windowed fft buffer.
+  complex_t xfwBuf[kExtendedNumPartitions * PART_LEN1];
+
+  float sx[PART_LEN1], sd[PART_LEN1], se[PART_LEN1];  // far, near, error psd
+  float hNs[PART_LEN1];
+  float hNlFbMin, hNlFbLocalMin;
+  float hNlXdAvgMin;
+  int hNlNewMin, hNlMinCtr;
+  float overDrive, overDriveSm;
+  int nlp_mode;
+  float outBuf[PART_LEN];
+  int delayIdx;
+
+  short stNearState, echoState;
+  short divergeState;
+
+  int xfBufBlockPos;
+
+  RingBuffer* far_buf;
+  RingBuffer* far_buf_windowed;
+  int system_delay;  // Current system delay buffered in AEC.
+
+  int mult;  // sampling frequency multiple
+  int sampFreq;
+  size_t num_bands;
+  uint32_t seed;
+
+  float normal_mu;               // stepsize
+  float normal_error_threshold;  // error threshold
+
+  int noiseEstCtr;
+
+  PowerLevel farlevel;
+  PowerLevel nearlevel;
+  PowerLevel linoutlevel;
+  PowerLevel nlpoutlevel;
+
+  int metricsMode;
+  int stateCounter;
+  Stats erl;
+  Stats erle;
+  Stats aNlp;
+  Stats rerl;
+
+  // Quantities to control H band scaling for SWB input
+  int freq_avg_ic;       // initial bin for averaging nlp gain
+  int flag_Hband_cn;     // for comfort noise
+  float cn_scale_Hband;  // scale for comfort noise in H band
+
+  int delay_metrics_delivered;
+  int delay_histogram[kHistorySizeBlocks];
+  int num_delay_values;
+  int delay_median;
+  int delay_std;
+  float fraction_poor_delays;
+  int delay_logging_enabled;
+  void* delay_estimator_farend;
+  void* delay_estimator;
+  // Variables associated with delay correction through signal based delay
+  // estimation feedback.
+  int signal_delay_correction;
+  int previous_delay;
+  int delay_correction_count;
+  int shift_offset;
+  float delay_quality_threshold;
+  int frame_count;
+
+  // 0 = delay agnostic mode (signal based delay correction) disabled.
+  // Otherwise enabled.
+  int delay_agnostic_enabled;
+  // 1 = extended filter mode enabled, 0 = disabled.
+  int extended_filter_enabled;
+  // Runtime selection of number of filter partitions.
+  int num_partitions;
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+  // Sequence number of this AEC instance, so that different instances can
+  // choose different dump file names.
+  int instance_index;
+
+  // Number of times we've restarted dumping; used to pick new dump file names
+  // each time.
+  int debug_dump_count;
+
+  RingBuffer* far_time_buf;
+  rtc_WavWriter* farFile;
+  rtc_WavWriter* nearFile;
+  rtc_WavWriter* outFile;
+  rtc_WavWriter* outLinearFile;
+  FILE* e_fft_file;
+#endif
+};
+
+typedef void (*WebRtcAecFilterFar)(AecCore* aec, float yf[2][PART_LEN1]);
+extern WebRtcAecFilterFar WebRtcAec_FilterFar;
+typedef void (*WebRtcAecScaleErrorSignal)(AecCore* aec, float ef[2][PART_LEN1]);
+extern WebRtcAecScaleErrorSignal WebRtcAec_ScaleErrorSignal;
+typedef void (*WebRtcAecFilterAdaptation)(AecCore* aec,
+                                          float* fft,
+                                          float ef[2][PART_LEN1]);
+extern WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation;
+typedef void (*WebRtcAecOverdriveAndSuppress)(AecCore* aec,
+                                              float hNl[PART_LEN1],
+                                              const float hNlFb,
+                                              float efw[2][PART_LEN1]);
+extern WebRtcAecOverdriveAndSuppress WebRtcAec_OverdriveAndSuppress;
+
+typedef void (*WebRtcAecComfortNoise)(AecCore* aec,
+                                      float efw[2][PART_LEN1],
+                                      complex_t* comfortNoiseHband,
+                                      const float* noisePow,
+                                      const float* lambda);
+extern WebRtcAecComfortNoise WebRtcAec_ComfortNoise;
+
+typedef void (*WebRtcAecSubBandCoherence)(AecCore* aec,
+                                          float efw[2][PART_LEN1],
+                                          float xfw[2][PART_LEN1],
+                                          float* fft,
+                                          float* cohde,
+                                          float* cohxd);
+extern WebRtcAecSubBandCoherence WebRtcAec_SubbandCoherence;
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_mips.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_mips.c
new file mode 100644
index 00000000..bb33087a
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_mips.c
@@ -0,0 +1,774 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * The core AEC algorithm, which is presented with time-aligned signals.
+ */
+
+#include "webrtc/modules/audio_processing/aec/aec_core.h"
+
+#include <math.h>
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/aec/aec_core_internal.h"
+#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
+
+static const int flagHbandCn = 1; // flag for adding comfort noise in H band
+extern const float WebRtcAec_weightCurve[65];
+extern const float WebRtcAec_overDriveCurve[65];
+
+void WebRtcAec_ComfortNoise_mips(AecCore* aec,
+                                 float efw[2][PART_LEN1],
+                                 complex_t* comfortNoiseHband,
+                                 const float* noisePow,
+                                 const float* lambda) {
+  int i, num;
+  float rand[PART_LEN];
+  float noise, noiseAvg, tmp, tmpAvg;
+  int16_t randW16[PART_LEN];
+  complex_t u[PART_LEN1];
+
+  const float pi2 = 6.28318530717959f;
+  const float pi2t = pi2 / 32768;
+
+  // Generate a uniform random array on [0 1]
+  WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed);
+
+  int16_t* randWptr = randW16;
+  float randTemp, randTemp2, randTemp3, randTemp4;
+  int32_t tmp1s, tmp2s, tmp3s, tmp4s;
+
+  for (i = 0; i < PART_LEN; i+=4) {
+    __asm __volatile (
+      ".set     push                                           \n\t"
+      ".set     noreorder                                      \n\t"
+      "lh       %[tmp1s],       0(%[randWptr])                 \n\t"
+      "lh       %[tmp2s],       2(%[randWptr])                 \n\t"
+      "lh       %[tmp3s],       4(%[randWptr])                 \n\t"
+      "lh       %[tmp4s],       6(%[randWptr])                 \n\t"
+      "mtc1     %[tmp1s],       %[randTemp]                    \n\t"
+      "mtc1     %[tmp2s],       %[randTemp2]                   \n\t"
+      "mtc1     %[tmp3s],       %[randTemp3]                   \n\t"
+      "mtc1     %[tmp4s],       %[randTemp4]                   \n\t"
+      "cvt.s.w  %[randTemp],    %[randTemp]                    \n\t"
+      "cvt.s.w  %[randTemp2],   %[randTemp2]                   \n\t"
+      "cvt.s.w  %[randTemp3],   %[randTemp3]                   \n\t"
+      "cvt.s.w  %[randTemp4],   %[randTemp4]                   \n\t"
+      "addiu    %[randWptr],    %[randWptr],      8            \n\t"
+      "mul.s    %[randTemp],    %[randTemp],      %[pi2t]      \n\t"
+      "mul.s    %[randTemp2],   %[randTemp2],     %[pi2t]      \n\t"
+      "mul.s    %[randTemp3],   %[randTemp3],     %[pi2t]      \n\t"
+      "mul.s    %[randTemp4],   %[randTemp4],     %[pi2t]      \n\t"
+      ".set     pop                                            \n\t"
+      : [randWptr] "+r" (randWptr), [randTemp] "=&f" (randTemp),
+        [randTemp2] "=&f" (randTemp2), [randTemp3] "=&f" (randTemp3),
+        [randTemp4] "=&f" (randTemp4), [tmp1s] "=&r" (tmp1s),
+        [tmp2s] "=&r" (tmp2s), [tmp3s] "=&r" (tmp3s),
+        [tmp4s] "=&r" (tmp4s)
+      : [pi2t] "f" (pi2t)
+      : "memory"
+    );
+
+    u[i+1][0] = cosf(randTemp);
+    u[i+1][1] = sinf(randTemp);
+    u[i+2][0] = cosf(randTemp2);
+    u[i+2][1] = sinf(randTemp2);
+    u[i+3][0] = cosf(randTemp3);
+    u[i+3][1] = sinf(randTemp3);
+    u[i+4][0] = cosf(randTemp4);
+    u[i+4][1] = sinf(randTemp4);
+  }
+
+  // Reject LF noise
+  float* u_ptr = &u[1][0];
+  float noise2, noise3, noise4;
+  float tmp1f, tmp2f, tmp3f, tmp4f, tmp5f, tmp6f, tmp7f, tmp8f;
+
+  u[0][0] = 0;
+  u[0][1] = 0;
+  for (i = 1; i < PART_LEN1; i+=4) {
+    __asm __volatile (
+      ".set     push                                            \n\t"
+      ".set     noreorder                                       \n\t"
+      "lwc1     %[noise],       4(%[noisePow])                  \n\t"
+      "lwc1     %[noise2],      8(%[noisePow])                  \n\t"
+      "lwc1     %[noise3],      12(%[noisePow])                 \n\t"
+      "lwc1     %[noise4],      16(%[noisePow])                 \n\t"
+      "sqrt.s   %[noise],       %[noise]                        \n\t"
+      "sqrt.s   %[noise2],      %[noise2]                       \n\t"
+      "sqrt.s   %[noise3],      %[noise3]                       \n\t"
+      "sqrt.s   %[noise4],      %[noise4]                       \n\t"
+      "lwc1     %[tmp1f],       0(%[u_ptr])                     \n\t"
+      "lwc1     %[tmp2f],       4(%[u_ptr])                     \n\t"
+      "lwc1     %[tmp3f],       8(%[u_ptr])                     \n\t"
+      "lwc1     %[tmp4f],       12(%[u_ptr])                    \n\t"
+      "lwc1     %[tmp5f],       16(%[u_ptr])                    \n\t"
+      "lwc1     %[tmp6f],       20(%[u_ptr])                    \n\t"
+      "lwc1     %[tmp7f],       24(%[u_ptr])                    \n\t"
+      "lwc1     %[tmp8f],       28(%[u_ptr])                    \n\t"
+      "addiu    %[noisePow],    %[noisePow],      16            \n\t"
+      "mul.s    %[tmp1f],       %[tmp1f],         %[noise]      \n\t"
+      "mul.s    %[tmp2f],       %[tmp2f],         %[noise]      \n\t"
+      "mul.s    %[tmp3f],       %[tmp3f],         %[noise2]     \n\t"
+      "mul.s    %[tmp4f],       %[tmp4f],         %[noise2]     \n\t"
+      "mul.s    %[tmp5f],       %[tmp5f],         %[noise3]     \n\t"
+      "mul.s    %[tmp6f],       %[tmp6f],         %[noise3]     \n\t"
+      "swc1     %[tmp1f],       0(%[u_ptr])                     \n\t"
+      "swc1     %[tmp3f],       8(%[u_ptr])                     \n\t"
+      "mul.s    %[tmp8f],       %[tmp8f],         %[noise4]     \n\t"
+      "mul.s    %[tmp7f],       %[tmp7f],         %[noise4]     \n\t"
+      "neg.s    %[tmp2f]                                        \n\t"
+      "neg.s    %[tmp4f]                                        \n\t"
+      "neg.s    %[tmp6f]                                        \n\t"
+      "neg.s    %[tmp8f]                                        \n\t"
+      "swc1     %[tmp5f],       16(%[u_ptr])                    \n\t"
+      "swc1     %[tmp7f],       24(%[u_ptr])                    \n\t"
+      "swc1     %[tmp2f],       4(%[u_ptr])                     \n\t"
+      "swc1     %[tmp4f],       12(%[u_ptr])                    \n\t"
+      "swc1     %[tmp6f],       20(%[u_ptr])                    \n\t"
+      "swc1     %[tmp8f],       28(%[u_ptr])                    \n\t"
+      "addiu    %[u_ptr],       %[u_ptr],         32            \n\t"
+      ".set     pop                                             \n\t"
+      : [u_ptr] "+r" (u_ptr),  [noisePow] "+r" (noisePow),
+        [noise] "=&f" (noise), [noise2] "=&f" (noise2),
+        [noise3] "=&f" (noise3), [noise4] "=&f" (noise4),
+        [tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f),
+        [tmp3f] "=&f" (tmp3f), [tmp4f] "=&f" (tmp4f),
+        [tmp5f] "=&f" (tmp5f), [tmp6f] "=&f" (tmp6f),
+        [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f)
+      :
+      : "memory"
+    );
+  }
+  u[PART_LEN][1] = 0;
+  noisePow -= PART_LEN;
+
+  u_ptr = &u[0][0];
+  float* u_ptr_end = &u[PART_LEN][0];
+  float* efw_ptr_0 = &efw[0][0];
+  float* efw_ptr_1 = &efw[1][0];
+  float tmp9f, tmp10f;
+  const float tmp1c = 1.0;
+
+  __asm __volatile (
+    ".set     push                                                        \n\t"
+    ".set     noreorder                                                   \n\t"
+   "1:                                                                    \n\t"
+    "lwc1     %[tmp1f],       0(%[lambda])                                \n\t"
+    "lwc1     %[tmp6f],       4(%[lambda])                                \n\t"
+    "addiu    %[lambda],      %[lambda],        8                         \n\t"
+    "c.lt.s   %[tmp1f],       %[tmp1c]                                    \n\t"
+    "bc1f     4f                                                          \n\t"
+    " nop                                                                 \n\t"
+    "c.lt.s   %[tmp6f],       %[tmp1c]                                    \n\t"
+    "bc1f     3f                                                          \n\t"
+    " nop                                                                 \n\t"
+   "2:                                                                    \n\t"
+    "mul.s    %[tmp1f],       %[tmp1f],         %[tmp1f]                  \n\t"
+    "mul.s    %[tmp6f],       %[tmp6f],         %[tmp6f]                  \n\t"
+    "sub.s    %[tmp1f],       %[tmp1c],         %[tmp1f]                  \n\t"
+    "sub.s    %[tmp6f],       %[tmp1c],         %[tmp6f]                  \n\t"
+    "sqrt.s   %[tmp1f],       %[tmp1f]                                    \n\t"
+    "sqrt.s   %[tmp6f],       %[tmp6f]                                    \n\t"
+    "lwc1     %[tmp2f],       0(%[efw_ptr_0])                             \n\t"
+    "lwc1     %[tmp3f],       0(%[u_ptr])                                 \n\t"
+    "lwc1     %[tmp7f],       4(%[efw_ptr_0])                             \n\t"
+    "lwc1     %[tmp8f],       8(%[u_ptr])                                 \n\t"
+    "lwc1     %[tmp4f],       0(%[efw_ptr_1])                             \n\t"
+    "lwc1     %[tmp5f],       4(%[u_ptr])                                 \n\t"
+    "lwc1     %[tmp9f],       4(%[efw_ptr_1])                             \n\t"
+    "lwc1     %[tmp10f],      12(%[u_ptr])                                \n\t"
+#if !defined(MIPS32_R2_LE)
+    "mul.s    %[tmp3f],       %[tmp1f],         %[tmp3f]                  \n\t"
+    "add.s    %[tmp2f],       %[tmp2f],         %[tmp3f]                  \n\t"
+    "mul.s    %[tmp3f],       %[tmp1f],         %[tmp5f]                  \n\t"
+    "add.s    %[tmp4f],       %[tmp4f],         %[tmp3f]                  \n\t"
+    "mul.s    %[tmp3f],       %[tmp6f],         %[tmp8f]                  \n\t"
+    "add.s    %[tmp7f],       %[tmp7f],         %[tmp3f]                  \n\t"
+    "mul.s    %[tmp3f],       %[tmp6f],         %[tmp10f]                 \n\t"
+    "add.s    %[tmp9f],       %[tmp9f],         %[tmp3f]                  \n\t"
+#else // #if !defined(MIPS32_R2_LE)
+    "madd.s   %[tmp2f],       %[tmp2f],         %[tmp1f],     %[tmp3f]    \n\t"
+    "madd.s   %[tmp4f],       %[tmp4f],         %[tmp1f],     %[tmp5f]    \n\t"
+    "madd.s   %[tmp7f],       %[tmp7f],         %[tmp6f],     %[tmp8f]    \n\t"
+    "madd.s   %[tmp9f],       %[tmp9f],         %[tmp6f],     %[tmp10f]   \n\t"
+#endif // #if !defined(MIPS32_R2_LE)
+    "swc1     %[tmp2f],       0(%[efw_ptr_0])                             \n\t"
+    "swc1     %[tmp4f],       0(%[efw_ptr_1])                             \n\t"
+    "swc1     %[tmp7f],       4(%[efw_ptr_0])                             \n\t"
+    "b        5f                                                          \n\t"
+    " swc1    %[tmp9f],       4(%[efw_ptr_1])                             \n\t"
+   "3:                                                                    \n\t"
+    "mul.s    %[tmp1f],       %[tmp1f],         %[tmp1f]                  \n\t"
+    "sub.s    %[tmp1f],       %[tmp1c],         %[tmp1f]                  \n\t"
+    "sqrt.s   %[tmp1f],       %[tmp1f]                                    \n\t"
+    "lwc1     %[tmp2f],       0(%[efw_ptr_0])                             \n\t"
+    "lwc1     %[tmp3f],       0(%[u_ptr])                                 \n\t"
+    "lwc1     %[tmp4f],       0(%[efw_ptr_1])                             \n\t"
+    "lwc1     %[tmp5f],       4(%[u_ptr])                                 \n\t"
+#if !defined(MIPS32_R2_LE)
+    "mul.s    %[tmp3f],       %[tmp1f],         %[tmp3f]                  \n\t"
+    "add.s    %[tmp2f],       %[tmp2f],         %[tmp3f]                  \n\t"
+    "mul.s    %[tmp3f],       %[tmp1f],         %[tmp5f]                  \n\t"
+    "add.s    %[tmp4f],       %[tmp4f],         %[tmp3f]                  \n\t"
+#else // #if !defined(MIPS32_R2_LE)
+    "madd.s   %[tmp2f],       %[tmp2f],         %[tmp1f],     %[tmp3f]    \n\t"
+    "madd.s   %[tmp4f],       %[tmp4f],         %[tmp1f],     %[tmp5f]    \n\t"
+#endif // #if !defined(MIPS32_R2_LE)
+    "swc1     %[tmp2f],       0(%[efw_ptr_0])                             \n\t"
+    "b        5f                                                          \n\t"
+    " swc1    %[tmp4f],       0(%[efw_ptr_1])                             \n\t"
+   "4:                                                                    \n\t"
+    "c.lt.s   %[tmp6f],       %[tmp1c]                                    \n\t"
+    "bc1f     5f                                                          \n\t"
+    " nop                                                                 \n\t"
+    "mul.s    %[tmp6f],       %[tmp6f],         %[tmp6f]                  \n\t"
+    "sub.s    %[tmp6f],       %[tmp1c],         %[tmp6f]                  \n\t"
+    "sqrt.s   %[tmp6f],       %[tmp6f]                                    \n\t"
+    "lwc1     %[tmp7f],       4(%[efw_ptr_0])                             \n\t"
+    "lwc1     %[tmp8f],       8(%[u_ptr])                                 \n\t"
+    "lwc1     %[tmp9f],       4(%[efw_ptr_1])                             \n\t"
+    "lwc1     %[tmp10f],      12(%[u_ptr])                                \n\t"
+#if !defined(MIPS32_R2_LE)
+    "mul.s    %[tmp3f],       %[tmp6f],         %[tmp8f]                  \n\t"
+    "add.s    %[tmp7f],       %[tmp7f],         %[tmp3f]                  \n\t"
+    "mul.s    %[tmp3f],       %[tmp6f],         %[tmp10f]                 \n\t"
+    "add.s    %[tmp9f],       %[tmp9f],         %[tmp3f]                  \n\t"
+#else // #if !defined(MIPS32_R2_LE)
+    "madd.s   %[tmp7f],       %[tmp7f],         %[tmp6f],     %[tmp8f]    \n\t"
+    "madd.s   %[tmp9f],       %[tmp9f],         %[tmp6f],     %[tmp10f]   \n\t"
+#endif // #if !defined(MIPS32_R2_LE)
+    "swc1     %[tmp7f],       4(%[efw_ptr_0])                             \n\t"
+    "swc1     %[tmp9f],       4(%[efw_ptr_1])                             \n\t"
+   "5:                                                                    \n\t"
+    "addiu    %[u_ptr],       %[u_ptr],         16                        \n\t"
+    "addiu    %[efw_ptr_0],   %[efw_ptr_0],     8                         \n\t"
+    "bne      %[u_ptr],       %[u_ptr_end],     1b                        \n\t"
+    " addiu   %[efw_ptr_1],   %[efw_ptr_1],     8                         \n\t"
+    ".set     pop                                                         \n\t"
+    : [lambda] "+r" (lambda), [u_ptr] "+r" (u_ptr),
+      [efw_ptr_0] "+r" (efw_ptr_0), [efw_ptr_1] "+r" (efw_ptr_1),
+      [tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f), [tmp3f] "=&f" (tmp3f),
+      [tmp4f] "=&f" (tmp4f), [tmp5f] "=&f" (tmp5f),
+      [tmp6f] "=&f" (tmp6f), [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f),
+      [tmp9f] "=&f" (tmp9f), [tmp10f] "=&f" (tmp10f)
+    : [tmp1c] "f" (tmp1c), [u_ptr_end] "r" (u_ptr_end)
+    : "memory"
+  );
+
+  lambda -= PART_LEN;
+  tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[PART_LEN] * lambda[PART_LEN], 0));
+  //tmp = 1 - lambda[i];
+  efw[0][PART_LEN] += tmp * u[PART_LEN][0];
+  efw[1][PART_LEN] += tmp * u[PART_LEN][1];
+
+  // For H band comfort noise
+  // TODO: don't compute noise and "tmp" twice. Use the previous results.
+  noiseAvg = 0.0;
+  tmpAvg = 0.0;
+  num = 0;
+  if ((aec->sampFreq == 32000 || aec->sampFreq == 48000) && flagHbandCn == 1) {
+    for (i = 0; i < PART_LEN; i++) {
+      rand[i] = ((float)randW16[i]) / 32768;
+    }
+
+    // average noise scale
+    // average over second half of freq spectrum (i.e., 4->8khz)
+    // TODO: we shouldn't need num. We know how many elements we're summing.
+    for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
+      num++;
+      noiseAvg += sqrtf(noisePow[i]);
+    }
+    noiseAvg /= (float)num;
+
+    // average nlp scale
+    // average over second half of freq spectrum (i.e., 4->8khz)
+    // TODO: we shouldn't need num. We know how many elements we're summing.
+    num = 0;
+    for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
+      num++;
+      tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));
+    }
+    tmpAvg /= (float)num;
+
+    // Use average noise for H band
+    // TODO: we should probably have a new random vector here.
+    // Reject LF noise
+    u[0][0] = 0;
+    u[0][1] = 0;
+    for (i = 1; i < PART_LEN1; i++) {
+      tmp = pi2 * rand[i - 1];
+
+      // Use average noise for H band
+      u[i][0] = noiseAvg * (float)cos(tmp);
+      u[i][1] = -noiseAvg * (float)sin(tmp);
+    }
+    u[PART_LEN][1] = 0;
+
+    for (i = 0; i < PART_LEN1; i++) {
+      // Use average NLP weight for H band
+      comfortNoiseHband[i][0] = tmpAvg * u[i][0];
+      comfortNoiseHband[i][1] = tmpAvg * u[i][1];
+    }
+  }
+}
+
+void WebRtcAec_FilterFar_mips(AecCore* aec, float yf[2][PART_LEN1]) {
+  int i;
+  for (i = 0; i < aec->num_partitions; i++) {
+    int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
+    int pos = i * PART_LEN1;
+    // Check for wrap
+    if (i + aec->xfBufBlockPos >=  aec->num_partitions) {
+      xPos -=  aec->num_partitions * (PART_LEN1);
+    }
+    float* yf0 = yf[0];
+    float* yf1 = yf[1];
+    float* aRe = aec->xfBuf[0] + xPos;
+    float* aIm = aec->xfBuf[1] + xPos;
+    float* bRe = aec->wfBuf[0] + pos;
+    float* bIm = aec->wfBuf[1] + pos;
+    float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13;
+    int len = PART_LEN1 >> 1;
+
+    __asm __volatile (
+      ".set       push                                                \n\t"
+      ".set       noreorder                                           \n\t"
+     "1:                                                              \n\t"
+      "lwc1       %[f0],      0(%[aRe])                               \n\t"
+      "lwc1       %[f1],      0(%[bRe])                               \n\t"
+      "lwc1       %[f2],      0(%[bIm])                               \n\t"
+      "lwc1       %[f3],      0(%[aIm])                               \n\t"
+      "lwc1       %[f4],      4(%[aRe])                               \n\t"
+      "lwc1       %[f5],      4(%[bRe])                               \n\t"
+      "lwc1       %[f6],      4(%[bIm])                               \n\t"
+      "mul.s      %[f8],      %[f0],          %[f1]                   \n\t"
+      "mul.s      %[f0],      %[f0],          %[f2]                   \n\t"
+      "mul.s      %[f9],      %[f4],          %[f5]                   \n\t"
+      "mul.s      %[f4],      %[f4],          %[f6]                   \n\t"
+      "lwc1       %[f7],      4(%[aIm])                               \n\t"
+#if !defined(MIPS32_R2_LE)
+      "mul.s      %[f12],     %[f2],          %[f3]                   \n\t"
+      "mul.s      %[f1],      %[f3],          %[f1]                   \n\t"
+      "mul.s      %[f11],     %[f6],          %[f7]                   \n\t"
+      "addiu      %[aRe],     %[aRe],         8                       \n\t"
+      "addiu      %[aIm],     %[aIm],         8                       \n\t"
+      "addiu      %[len],     %[len],         -1                      \n\t"
+      "sub.s      %[f8],      %[f8],          %[f12]                  \n\t"
+      "mul.s      %[f12],     %[f7],          %[f5]                   \n\t"
+      "lwc1       %[f2],      0(%[yf0])                               \n\t"
+      "add.s      %[f1],      %[f0],          %[f1]                   \n\t"
+      "lwc1       %[f3],      0(%[yf1])                               \n\t"
+      "sub.s      %[f9],      %[f9],          %[f11]                  \n\t"
+      "lwc1       %[f6],      4(%[yf0])                               \n\t"
+      "add.s      %[f4],      %[f4],          %[f12]                  \n\t"
+#else // #if !defined(MIPS32_R2_LE)
+      "addiu      %[aRe],     %[aRe],         8                       \n\t"
+      "addiu      %[aIm],     %[aIm],         8                       \n\t"
+      "addiu      %[len],     %[len],         -1                      \n\t"
+      "nmsub.s    %[f8],      %[f8],          %[f2],      %[f3]       \n\t"
+      "lwc1       %[f2],      0(%[yf0])                               \n\t"
+      "madd.s     %[f1],      %[f0],          %[f3],      %[f1]       \n\t"
+      "lwc1       %[f3],      0(%[yf1])                               \n\t"
+      "nmsub.s    %[f9],      %[f9],          %[f6],      %[f7]       \n\t"
+      "lwc1       %[f6],      4(%[yf0])                               \n\t"
+      "madd.s     %[f4],      %[f4],          %[f7],      %[f5]       \n\t"
+#endif // #if !defined(MIPS32_R2_LE)
+      "lwc1       %[f5],      4(%[yf1])                               \n\t"
+      "add.s      %[f2],      %[f2],          %[f8]                   \n\t"
+      "addiu      %[bRe],     %[bRe],         8                       \n\t"
+      "addiu      %[bIm],     %[bIm],         8                       \n\t"
+      "add.s      %[f3],      %[f3],          %[f1]                   \n\t"
+      "add.s      %[f6],      %[f6],          %[f9]                   \n\t"
+      "add.s      %[f5],      %[f5],          %[f4]                   \n\t"
+      "swc1       %[f2],      0(%[yf0])                               \n\t"
+      "swc1       %[f3],      0(%[yf1])                               \n\t"
+      "swc1       %[f6],      4(%[yf0])                               \n\t"
+      "swc1       %[f5],      4(%[yf1])                               \n\t"
+      "addiu      %[yf0],     %[yf0],         8                       \n\t"
+      "bgtz       %[len],     1b                                      \n\t"
+      " addiu     %[yf1],     %[yf1],         8                       \n\t"
+      "lwc1       %[f0],      0(%[aRe])                               \n\t"
+      "lwc1       %[f1],      0(%[bRe])                               \n\t"
+      "lwc1       %[f2],      0(%[bIm])                               \n\t"
+      "lwc1       %[f3],      0(%[aIm])                               \n\t"
+      "mul.s      %[f8],      %[f0],          %[f1]                   \n\t"
+      "mul.s      %[f0],      %[f0],          %[f2]                   \n\t"
+#if !defined(MIPS32_R2_LE)
+      "mul.s      %[f12],     %[f2],          %[f3]                   \n\t"
+      "mul.s      %[f1],      %[f3],          %[f1]                   \n\t"
+      "sub.s      %[f8],      %[f8],          %[f12]                  \n\t"
+      "lwc1       %[f2],      0(%[yf0])                               \n\t"
+      "add.s      %[f1],      %[f0],          %[f1]                   \n\t"
+      "lwc1       %[f3],      0(%[yf1])                               \n\t"
+#else // #if !defined(MIPS32_R2_LE)
+      "nmsub.s    %[f8],      %[f8],          %[f2],      %[f3]       \n\t"
+      "lwc1       %[f2],      0(%[yf0])                               \n\t"
+      "madd.s     %[f1],      %[f0],          %[f3],      %[f1]       \n\t"
+      "lwc1       %[f3],      0(%[yf1])                               \n\t"
+#endif // #if !defined(MIPS32_R2_LE)
+      "add.s      %[f2],      %[f2],          %[f8]                   \n\t"
+      "add.s      %[f3],      %[f3],          %[f1]                   \n\t"
+      "swc1       %[f2],      0(%[yf0])                               \n\t"
+      "swc1       %[f3],      0(%[yf1])                               \n\t"
+      ".set       pop                                                 \n\t"
+      : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
+        [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
+        [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8),
+        [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11),
+        [f12] "=&f" (f12), [f13] "=&f" (f13), [aRe] "+r" (aRe),
+        [aIm] "+r" (aIm), [bRe] "+r" (bRe), [bIm] "+r" (bIm),
+        [yf0] "+r" (yf0), [yf1] "+r" (yf1), [len] "+r" (len)
+      :
+      : "memory"
+    );
+  }
+}
+
+void WebRtcAec_FilterAdaptation_mips(AecCore* aec,
+                                     float* fft,
+                                     float ef[2][PART_LEN1]) {
+  int i;
+  for (i = 0; i < aec->num_partitions; i++) {
+    int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1);
+    int pos;
+    // Check for wrap
+    if (i + aec->xfBufBlockPos >= aec->num_partitions) {
+      xPos -= aec->num_partitions * PART_LEN1;
+    }
+
+    pos = i * PART_LEN1;
+    float* aRe = aec->xfBuf[0] + xPos;
+    float* aIm = aec->xfBuf[1] + xPos;
+    float* bRe = ef[0];
+    float* bIm = ef[1];
+    float* fft_tmp;
+
+    float f0, f1, f2, f3, f4, f5, f6 ,f7, f8, f9, f10, f11, f12;
+    int len = PART_LEN >> 1;
+
+    __asm __volatile (
+      ".set       push                                                \n\t"
+      ".set       noreorder                                           \n\t"
+      "addiu      %[fft_tmp], %[fft],         0                       \n\t"
+     "1:                                                              \n\t"
+      "lwc1       %[f0],      0(%[aRe])                               \n\t"
+      "lwc1       %[f1],      0(%[bRe])                               \n\t"
+      "lwc1       %[f2],      0(%[bIm])                               \n\t"
+      "lwc1       %[f4],      4(%[aRe])                               \n\t"
+      "lwc1       %[f5],      4(%[bRe])                               \n\t"
+      "lwc1       %[f6],      4(%[bIm])                               \n\t"
+      "addiu      %[aRe],     %[aRe],         8                       \n\t"
+      "addiu      %[bRe],     %[bRe],         8                       \n\t"
+      "mul.s      %[f8],      %[f0],          %[f1]                   \n\t"
+      "mul.s      %[f0],      %[f0],          %[f2]                   \n\t"
+      "lwc1       %[f3],      0(%[aIm])                               \n\t"
+      "mul.s      %[f9],      %[f4],          %[f5]                   \n\t"
+      "lwc1       %[f7],      4(%[aIm])                               \n\t"
+      "mul.s      %[f4],      %[f4],          %[f6]                   \n\t"
+#if !defined(MIPS32_R2_LE)
+      "mul.s      %[f10],     %[f3],          %[f2]                   \n\t"
+      "mul.s      %[f1],      %[f3],          %[f1]                   \n\t"
+      "mul.s      %[f11],     %[f7],          %[f6]                   \n\t"
+      "mul.s      %[f5],      %[f7],          %[f5]                   \n\t"
+      "addiu      %[aIm],     %[aIm],         8                       \n\t"
+      "addiu      %[bIm],     %[bIm],         8                       \n\t"
+      "addiu      %[len],     %[len],         -1                      \n\t"
+      "add.s      %[f8],      %[f8],          %[f10]                  \n\t"
+      "sub.s      %[f1],      %[f0],          %[f1]                   \n\t"
+      "add.s      %[f9],      %[f9],          %[f11]                  \n\t"
+      "sub.s      %[f5],      %[f4],          %[f5]                   \n\t"
+#else // #if !defined(MIPS32_R2_LE)
+      "addiu      %[aIm],     %[aIm],         8                       \n\t"
+      "addiu      %[bIm],     %[bIm],         8                       \n\t"
+      "addiu      %[len],     %[len],         -1                      \n\t"
+      "madd.s     %[f8],      %[f8],          %[f3],      %[f2]       \n\t"
+      "nmsub.s    %[f1],      %[f0],          %[f3],      %[f1]       \n\t"
+      "madd.s     %[f9],      %[f9],          %[f7],      %[f6]       \n\t"
+      "nmsub.s    %[f5],      %[f4],          %[f7],      %[f5]       \n\t"
+#endif // #if !defined(MIPS32_R2_LE)
+      "swc1       %[f8],      0(%[fft_tmp])                           \n\t"
+      "swc1       %[f1],      4(%[fft_tmp])                           \n\t"
+      "swc1       %[f9],      8(%[fft_tmp])                           \n\t"
+      "swc1       %[f5],      12(%[fft_tmp])                          \n\t"
+      "bgtz       %[len],     1b                                      \n\t"
+      " addiu     %[fft_tmp], %[fft_tmp],     16                      \n\t"
+      "lwc1       %[f0],      0(%[aRe])                               \n\t"
+      "lwc1       %[f1],      0(%[bRe])                               \n\t"
+      "lwc1       %[f2],      0(%[bIm])                               \n\t"
+      "lwc1       %[f3],      0(%[aIm])                               \n\t"
+      "mul.s      %[f8],      %[f0],          %[f1]                   \n\t"
+#if !defined(MIPS32_R2_LE)
+      "mul.s      %[f10],     %[f3],          %[f2]                   \n\t"
+      "add.s      %[f8],      %[f8],          %[f10]                  \n\t"
+#else // #if !defined(MIPS32_R2_LE)
+      "madd.s     %[f8],      %[f8],          %[f3],      %[f2]       \n\t"
+#endif // #if !defined(MIPS32_R2_LE)
+      "swc1       %[f8],      4(%[fft])                               \n\t"
+      ".set       pop                                                 \n\t"
+      : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
+        [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
+        [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8),
+        [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11),
+        [f12] "=&f" (f12), [aRe] "+r" (aRe), [aIm] "+r" (aIm),
+        [bRe] "+r" (bRe), [bIm] "+r" (bIm), [fft_tmp] "=&r" (fft_tmp),
+        [len] "+r" (len)
+      : [fft] "r" (fft)
+      : "memory"
+    );
+
+    aec_rdft_inverse_128(fft);
+    memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
+
+    // fft scaling
+    {
+      float scale = 2.0f / PART_LEN2;
+      __asm __volatile (
+        ".set     push                                    \n\t"
+        ".set     noreorder                               \n\t"
+        "addiu    %[fft_tmp], %[fft],        0            \n\t"
+        "addiu    %[len],     $zero,         8            \n\t"
+       "1:                                                \n\t"
+        "addiu    %[len],     %[len],        -1           \n\t"
+        "lwc1     %[f0],      0(%[fft_tmp])               \n\t"
+        "lwc1     %[f1],      4(%[fft_tmp])               \n\t"
+        "lwc1     %[f2],      8(%[fft_tmp])               \n\t"
+        "lwc1     %[f3],      12(%[fft_tmp])              \n\t"
+        "mul.s    %[f0],      %[f0],         %[scale]     \n\t"
+        "mul.s    %[f1],      %[f1],         %[scale]     \n\t"
+        "mul.s    %[f2],      %[f2],         %[scale]     \n\t"
+        "mul.s    %[f3],      %[f3],         %[scale]     \n\t"
+        "lwc1     %[f4],      16(%[fft_tmp])              \n\t"
+        "lwc1     %[f5],      20(%[fft_tmp])              \n\t"
+        "lwc1     %[f6],      24(%[fft_tmp])              \n\t"
+        "lwc1     %[f7],      28(%[fft_tmp])              \n\t"
+        "mul.s    %[f4],      %[f4],         %[scale]     \n\t"
+        "mul.s    %[f5],      %[f5],         %[scale]     \n\t"
+        "mul.s    %[f6],      %[f6],         %[scale]     \n\t"
+        "mul.s    %[f7],      %[f7],         %[scale]     \n\t"
+        "swc1     %[f0],      0(%[fft_tmp])               \n\t"
+        "swc1     %[f1],      4(%[fft_tmp])               \n\t"
+        "swc1     %[f2],      8(%[fft_tmp])               \n\t"
+        "swc1     %[f3],      12(%[fft_tmp])              \n\t"
+        "swc1     %[f4],      16(%[fft_tmp])              \n\t"
+        "swc1     %[f5],      20(%[fft_tmp])              \n\t"
+        "swc1     %[f6],      24(%[fft_tmp])              \n\t"
+        "swc1     %[f7],      28(%[fft_tmp])              \n\t"
+        "bgtz     %[len],     1b                          \n\t"
+        " addiu   %[fft_tmp], %[fft_tmp],    32           \n\t"
+        ".set     pop                                     \n\t"
+        : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
+          [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
+          [f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len),
+          [fft_tmp] "=&r" (fft_tmp)
+        : [scale] "f" (scale), [fft] "r" (fft)
+        : "memory"
+      );
+    }
+    aec_rdft_forward_128(fft);
+    aRe = aec->wfBuf[0] + pos;
+    aIm = aec->wfBuf[1] + pos;
+    __asm __volatile (
+      ".set     push                                    \n\t"
+      ".set     noreorder                               \n\t"
+      "addiu    %[fft_tmp], %[fft],        0            \n\t"
+      "addiu    %[len],     $zero,         31           \n\t"
+      "lwc1     %[f0],      0(%[aRe])                   \n\t"
+      "lwc1     %[f1],      0(%[fft_tmp])               \n\t"
+      "lwc1     %[f2],      256(%[aRe])                 \n\t"
+      "lwc1     %[f3],      4(%[fft_tmp])               \n\t"
+      "lwc1     %[f4],      4(%[aRe])                   \n\t"
+      "lwc1     %[f5],      8(%[fft_tmp])               \n\t"
+      "lwc1     %[f6],      4(%[aIm])                   \n\t"
+      "lwc1     %[f7],      12(%[fft_tmp])              \n\t"
+      "add.s    %[f0],      %[f0],         %[f1]        \n\t"
+      "add.s    %[f2],      %[f2],         %[f3]        \n\t"
+      "add.s    %[f4],      %[f4],         %[f5]        \n\t"
+      "add.s    %[f6],      %[f6],         %[f7]        \n\t"
+      "addiu    %[fft_tmp], %[fft_tmp],    16           \n\t"
+      "swc1     %[f0],      0(%[aRe])                   \n\t"
+      "swc1     %[f2],      256(%[aRe])                 \n\t"
+      "swc1     %[f4],      4(%[aRe])                   \n\t"
+      "addiu    %[aRe],     %[aRe],        8            \n\t"
+      "swc1     %[f6],      4(%[aIm])                   \n\t"
+      "addiu    %[aIm],     %[aIm],        8            \n\t"
+     "1:                                                \n\t"
+      "lwc1     %[f0],      0(%[aRe])                   \n\t"
+      "lwc1     %[f1],      0(%[fft_tmp])               \n\t"
+      "lwc1     %[f2],      0(%[aIm])                   \n\t"
+      "lwc1     %[f3],      4(%[fft_tmp])               \n\t"
+      "lwc1     %[f4],      4(%[aRe])                   \n\t"
+      "lwc1     %[f5],      8(%[fft_tmp])               \n\t"
+      "lwc1     %[f6],      4(%[aIm])                   \n\t"
+      "lwc1     %[f7],      12(%[fft_tmp])              \n\t"
+      "add.s    %[f0],      %[f0],         %[f1]        \n\t"
+      "add.s    %[f2],      %[f2],         %[f3]        \n\t"
+      "add.s    %[f4],      %[f4],         %[f5]        \n\t"
+      "add.s    %[f6],      %[f6],         %[f7]        \n\t"
+      "addiu    %[len],     %[len],        -1           \n\t"
+      "addiu    %[fft_tmp], %[fft_tmp],    16           \n\t"
+      "swc1     %[f0],      0(%[aRe])                   \n\t"
+      "swc1     %[f2],      0(%[aIm])                   \n\t"
+      "swc1     %[f4],      4(%[aRe])                   \n\t"
+      "addiu    %[aRe],     %[aRe],        8            \n\t"
+      "swc1     %[f6],      4(%[aIm])                   \n\t"
+      "bgtz     %[len],     1b                          \n\t"
+      " addiu   %[aIm],     %[aIm],        8            \n\t"
+      ".set     pop                                     \n\t"
+      : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
+        [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
+        [f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len),
+        [fft_tmp] "=&r" (fft_tmp), [aRe] "+r" (aRe), [aIm] "+r" (aIm)
+      : [fft] "r" (fft)
+      : "memory"
+    );
+  }
+}
+
+void WebRtcAec_OverdriveAndSuppress_mips(AecCore* aec,
+                                         float hNl[PART_LEN1],
+                                         const float hNlFb,
+                                         float efw[2][PART_LEN1]) {
+  int i;
+  const float one = 1.0;
+  float* p_hNl;
+  float* p_efw0;
+  float* p_efw1;
+  float* p_WebRtcAec_wC;
+  float temp1, temp2, temp3, temp4;
+
+  p_hNl = &hNl[0];
+  p_efw0 = &efw[0][0];
+  p_efw1 = &efw[1][0];
+  p_WebRtcAec_wC = (float*)&WebRtcAec_weightCurve[0];
+
+  for (i = 0; i < PART_LEN1; i++) {
+    // Weight subbands
+    __asm __volatile (
+      ".set      push                                              \n\t"
+      ".set      noreorder                                         \n\t"
+      "lwc1      %[temp1],    0(%[p_hNl])                          \n\t"
+      "lwc1      %[temp2],    0(%[p_wC])                           \n\t"
+      "c.lt.s    %[hNlFb],    %[temp1]                             \n\t"
+      "bc1f      1f                                                \n\t"
+      " mul.s    %[temp3],    %[temp2],     %[hNlFb]               \n\t"
+      "sub.s     %[temp4],    %[one],       %[temp2]               \n\t"
+#if !defined(MIPS32_R2_LE)
+      "mul.s     %[temp1],    %[temp1],     %[temp4]               \n\t"
+      "add.s     %[temp1],    %[temp3],     %[temp1]               \n\t"
+#else // #if !defined(MIPS32_R2_LE)
+      "madd.s    %[temp1],    %[temp3],     %[temp1],   %[temp4]   \n\t"
+#endif // #if !defined(MIPS32_R2_LE)
+      "swc1      %[temp1],    0(%[p_hNl])                          \n\t"
+     "1:                                                           \n\t"
+      "addiu     %[p_wC],     %[p_wC],      4                      \n\t"
+      ".set      pop                                               \n\t"
+      : [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3),
+        [temp4] "=&f" (temp4), [p_wC] "+r" (p_WebRtcAec_wC)
+      : [hNlFb] "f" (hNlFb), [one] "f" (one), [p_hNl] "r" (p_hNl)
+      : "memory"
+    );
+
+    hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]);
+
+    __asm __volatile (
+      "lwc1      %[temp1],    0(%[p_hNl])              \n\t"
+      "lwc1      %[temp3],    0(%[p_efw1])             \n\t"
+      "lwc1      %[temp2],    0(%[p_efw0])             \n\t"
+      "addiu     %[p_hNl],    %[p_hNl],     4          \n\t"
+      "mul.s     %[temp3],    %[temp3],     %[temp1]   \n\t"
+      "mul.s     %[temp2],    %[temp2],     %[temp1]   \n\t"
+      "addiu     %[p_efw0],   %[p_efw0],    4          \n\t"
+      "addiu     %[p_efw1],   %[p_efw1],    4          \n\t"
+      "neg.s     %[temp4],    %[temp3]                 \n\t"
+      "swc1      %[temp2],    -4(%[p_efw0])            \n\t"
+      "swc1      %[temp4],    -4(%[p_efw1])            \n\t"
+      : [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3),
+        [temp4] "=&f" (temp4), [p_efw0] "+r" (p_efw0), [p_efw1] "+r" (p_efw1),
+        [p_hNl] "+r" (p_hNl)
+      :
+      : "memory"
+    );
+  }
+}
+
+void WebRtcAec_ScaleErrorSignal_mips(AecCore* aec, float ef[2][PART_LEN1]) {
+  const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
+  const float error_threshold = aec->extended_filter_enabled
+                                    ? kExtendedErrorThreshold
+                                    : aec->normal_error_threshold;
+  int len = (PART_LEN1);
+  float* ef0 = ef[0];
+  float* ef1 = ef[1];
+  float* xPow = aec->xPow;
+  float fac1 = 1e-10f;
+  float err_th2 = error_threshold * error_threshold;
+  float f0, f1, f2;
+#if !defined(MIPS32_R2_LE)
+  float f3;
+#endif
+
+  __asm __volatile (
+    ".set       push                                   \n\t"
+    ".set       noreorder                              \n\t"
+   "1:                                                 \n\t"
+    "lwc1       %[f0],     0(%[xPow])                  \n\t"
+    "lwc1       %[f1],     0(%[ef0])                   \n\t"
+    "lwc1       %[f2],     0(%[ef1])                   \n\t"
+    "add.s      %[f0],     %[f0],       %[fac1]        \n\t"
+    "div.s      %[f1],     %[f1],       %[f0]          \n\t"
+    "div.s      %[f2],     %[f2],       %[f0]          \n\t"
+    "mul.s      %[f0],     %[f1],       %[f1]          \n\t"
+#if defined(MIPS32_R2_LE)
+    "madd.s     %[f0],     %[f0],       %[f2],   %[f2] \n\t"
+#else
+    "mul.s      %[f3],     %[f2],       %[f2]          \n\t"
+    "add.s      %[f0],     %[f0],       %[f3]          \n\t"
+#endif
+    "c.le.s     %[f0],     %[err_th2]                  \n\t"
+    "nop                                               \n\t"
+    "bc1t       2f                                     \n\t"
+    " nop                                              \n\t"
+    "sqrt.s     %[f0],     %[f0]                       \n\t"
+    "add.s      %[f0],     %[f0],       %[fac1]        \n\t"
+    "div.s      %[f0],     %[err_th],   %[f0]          \n\t"
+    "mul.s      %[f1],     %[f1],       %[f0]          \n\t"
+    "mul.s      %[f2],     %[f2],       %[f0]          \n\t"
+   "2:                                                 \n\t"
+    "mul.s      %[f1],     %[f1],       %[mu]          \n\t"
+    "mul.s      %[f2],     %[f2],       %[mu]          \n\t"
+    "swc1       %[f1],     0(%[ef0])                   \n\t"
+    "swc1       %[f2],     0(%[ef1])                   \n\t"
+    "addiu      %[len],    %[len],      -1             \n\t"
+    "addiu      %[xPow],   %[xPow],     4              \n\t"
+    "addiu      %[ef0],    %[ef0],      4              \n\t"
+    "bgtz       %[len],    1b                          \n\t"
+    " addiu     %[ef1],    %[ef1],      4              \n\t"
+    ".set       pop                                    \n\t"
+    : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
+#if !defined(MIPS32_R2_LE)
+      [f3] "=&f" (f3),
+#endif
+      [xPow] "+r" (xPow), [ef0] "+r" (ef0), [ef1] "+r" (ef1),
+      [len] "+r" (len)
+    : [fac1] "f" (fac1), [err_th2] "f" (err_th2), [mu] "f" (mu),
+      [err_th] "f" (error_threshold)
+    : "memory"
+  );
+}
+
+void WebRtcAec_InitAec_mips(void) {
+  WebRtcAec_FilterFar = WebRtcAec_FilterFar_mips;
+  WebRtcAec_FilterAdaptation = WebRtcAec_FilterAdaptation_mips;
+  WebRtcAec_ScaleErrorSignal = WebRtcAec_ScaleErrorSignal_mips;
+  WebRtcAec_ComfortNoise = WebRtcAec_ComfortNoise_mips;
+  WebRtcAec_OverdriveAndSuppress = WebRtcAec_OverdriveAndSuppress_mips;
+}
+
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_neon.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_neon.c
new file mode 100644
index 00000000..9a677aaa
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_neon.c
@@ -0,0 +1,736 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * The core AEC algorithm, neon version of speed-critical functions.
+ *
+ * Based on aec_core_sse2.c.
+ */
+
+#include <arm_neon.h>
+#include <math.h>
+#include <string.h>  // memset
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/aec/aec_common.h"
+#include "webrtc/modules/audio_processing/aec/aec_core_internal.h"
+#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
+
+enum { kShiftExponentIntoTopMantissa = 8 };
+enum { kFloatExponentShift = 23 };
+
+__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) {
+  return aRe * bRe - aIm * bIm;
+}
+
+__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) {
+  return aRe * bIm + aIm * bRe;
+}
+
+static void FilterFarNEON(AecCore* aec, float yf[2][PART_LEN1]) {
+  int i;
+  const int num_partitions = aec->num_partitions;
+  for (i = 0; i < num_partitions; i++) {
+    int j;
+    int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
+    int pos = i * PART_LEN1;
+    // Check for wrap
+    if (i + aec->xfBufBlockPos >= num_partitions) {
+      xPos -= num_partitions * PART_LEN1;
+    }
+
+    // vectorized code (four at once)
+    for (j = 0; j + 3 < PART_LEN1; j += 4) {
+      const float32x4_t xfBuf_re = vld1q_f32(&aec->xfBuf[0][xPos + j]);
+      const float32x4_t xfBuf_im = vld1q_f32(&aec->xfBuf[1][xPos + j]);
+      const float32x4_t wfBuf_re = vld1q_f32(&aec->wfBuf[0][pos + j]);
+      const float32x4_t wfBuf_im = vld1q_f32(&aec->wfBuf[1][pos + j]);
+      const float32x4_t yf_re = vld1q_f32(&yf[0][j]);
+      const float32x4_t yf_im = vld1q_f32(&yf[1][j]);
+      const float32x4_t a = vmulq_f32(xfBuf_re, wfBuf_re);
+      const float32x4_t e = vmlsq_f32(a, xfBuf_im, wfBuf_im);
+      const float32x4_t c = vmulq_f32(xfBuf_re, wfBuf_im);
+      const float32x4_t f = vmlaq_f32(c, xfBuf_im, wfBuf_re);
+      const float32x4_t g = vaddq_f32(yf_re, e);
+      const float32x4_t h = vaddq_f32(yf_im, f);
+      vst1q_f32(&yf[0][j], g);
+      vst1q_f32(&yf[1][j], h);
+    }
+    // scalar code for the remaining items.
+    for (; j < PART_LEN1; j++) {
+      yf[0][j] += MulRe(aec->xfBuf[0][xPos + j],
+                        aec->xfBuf[1][xPos + j],
+                        aec->wfBuf[0][pos + j],
+                        aec->wfBuf[1][pos + j]);
+      yf[1][j] += MulIm(aec->xfBuf[0][xPos + j],
+                        aec->xfBuf[1][xPos + j],
+                        aec->wfBuf[0][pos + j],
+                        aec->wfBuf[1][pos + j]);
+    }
+  }
+}
+
+// ARM64's arm_neon.h has already defined vdivq_f32 vsqrtq_f32.
+#if !defined (WEBRTC_ARCH_ARM64)
+static float32x4_t vdivq_f32(float32x4_t a, float32x4_t b) {
+  int i;
+  float32x4_t x = vrecpeq_f32(b);
+  // from arm documentation
+  // The Newton-Raphson iteration:
+  //     x[n+1] = x[n] * (2 - d * x[n])
+  // converges to (1/d) if x0 is the result of VRECPE applied to d.
+  //
+  // Note: The precision did not improve after 2 iterations.
+  for (i = 0; i < 2; i++) {
+    x = vmulq_f32(vrecpsq_f32(b, x), x);
+  }
+  // a/b = a*(1/b)
+  return vmulq_f32(a, x);
+}
+
+static float32x4_t vsqrtq_f32(float32x4_t s) {
+  int i;
+  float32x4_t x = vrsqrteq_f32(s);
+
+  // Code to handle sqrt(0).
+  // If the input to sqrtf() is zero, a zero will be returned.
+  // If the input to vrsqrteq_f32() is zero, positive infinity is returned.
+  const uint32x4_t vec_p_inf = vdupq_n_u32(0x7F800000);
+  // check for divide by zero
+  const uint32x4_t div_by_zero = vceqq_u32(vec_p_inf, vreinterpretq_u32_f32(x));
+  // zero out the positive infinity results
+  x = vreinterpretq_f32_u32(vandq_u32(vmvnq_u32(div_by_zero),
+                                      vreinterpretq_u32_f32(x)));
+  // from arm documentation
+  // The Newton-Raphson iteration:
+  //     x[n+1] = x[n] * (3 - d * (x[n] * x[n])) / 2)
+  // converges to (1/√d) if x0 is the result of VRSQRTE applied to d.
+  //
+  // Note: The precision did not improve after 2 iterations.
+  for (i = 0; i < 2; i++) {
+    x = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, x), s), x);
+  }
+  // sqrt(s) = s * 1/sqrt(s)
+  return vmulq_f32(s, x);;
+}
+#endif  // WEBRTC_ARCH_ARM64
+
+static void ScaleErrorSignalNEON(AecCore* aec, float ef[2][PART_LEN1]) {
+  const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
+  const float error_threshold = aec->extended_filter_enabled ?
+      kExtendedErrorThreshold : aec->normal_error_threshold;
+  const float32x4_t k1e_10f = vdupq_n_f32(1e-10f);
+  const float32x4_t kMu = vmovq_n_f32(mu);
+  const float32x4_t kThresh = vmovq_n_f32(error_threshold);
+  int i;
+  // vectorized code (four at once)
+  for (i = 0; i + 3 < PART_LEN1; i += 4) {
+    const float32x4_t xPow = vld1q_f32(&aec->xPow[i]);
+    const float32x4_t ef_re_base = vld1q_f32(&ef[0][i]);
+    const float32x4_t ef_im_base = vld1q_f32(&ef[1][i]);
+    const float32x4_t xPowPlus = vaddq_f32(xPow, k1e_10f);
+    float32x4_t ef_re = vdivq_f32(ef_re_base, xPowPlus);
+    float32x4_t ef_im = vdivq_f32(ef_im_base, xPowPlus);
+    const float32x4_t ef_re2 = vmulq_f32(ef_re, ef_re);
+    const float32x4_t ef_sum2 = vmlaq_f32(ef_re2, ef_im, ef_im);
+    const float32x4_t absEf = vsqrtq_f32(ef_sum2);
+    const uint32x4_t bigger = vcgtq_f32(absEf, kThresh);
+    const float32x4_t absEfPlus = vaddq_f32(absEf, k1e_10f);
+    const float32x4_t absEfInv = vdivq_f32(kThresh, absEfPlus);
+    uint32x4_t ef_re_if = vreinterpretq_u32_f32(vmulq_f32(ef_re, absEfInv));
+    uint32x4_t ef_im_if = vreinterpretq_u32_f32(vmulq_f32(ef_im, absEfInv));
+    uint32x4_t ef_re_u32 = vandq_u32(vmvnq_u32(bigger),
+                                     vreinterpretq_u32_f32(ef_re));
+    uint32x4_t ef_im_u32 = vandq_u32(vmvnq_u32(bigger),
+                                     vreinterpretq_u32_f32(ef_im));
+    ef_re_if = vandq_u32(bigger, ef_re_if);
+    ef_im_if = vandq_u32(bigger, ef_im_if);
+    ef_re_u32 = vorrq_u32(ef_re_u32, ef_re_if);
+    ef_im_u32 = vorrq_u32(ef_im_u32, ef_im_if);
+    ef_re = vmulq_f32(vreinterpretq_f32_u32(ef_re_u32), kMu);
+    ef_im = vmulq_f32(vreinterpretq_f32_u32(ef_im_u32), kMu);
+    vst1q_f32(&ef[0][i], ef_re);
+    vst1q_f32(&ef[1][i], ef_im);
+  }
+  // scalar code for the remaining items.
+  for (; i < PART_LEN1; i++) {
+    float abs_ef;
+    ef[0][i] /= (aec->xPow[i] + 1e-10f);
+    ef[1][i] /= (aec->xPow[i] + 1e-10f);
+    abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
+
+    if (abs_ef > error_threshold) {
+      abs_ef = error_threshold / (abs_ef + 1e-10f);
+      ef[0][i] *= abs_ef;
+      ef[1][i] *= abs_ef;
+    }
+
+    // Stepsize factor
+    ef[0][i] *= mu;
+    ef[1][i] *= mu;
+  }
+}
+
+static void FilterAdaptationNEON(AecCore* aec,
+                                 float* fft,
+                                 float ef[2][PART_LEN1]) {
+  int i;
+  const int num_partitions = aec->num_partitions;
+  for (i = 0; i < num_partitions; i++) {
+    int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
+    int pos = i * PART_LEN1;
+    int j;
+    // Check for wrap
+    if (i + aec->xfBufBlockPos >= num_partitions) {
+      xPos -= num_partitions * PART_LEN1;
+    }
+
+    // Process the whole array...
+    for (j = 0; j < PART_LEN; j += 4) {
+      // Load xfBuf and ef.
+      const float32x4_t xfBuf_re = vld1q_f32(&aec->xfBuf[0][xPos + j]);
+      const float32x4_t xfBuf_im = vld1q_f32(&aec->xfBuf[1][xPos + j]);
+      const float32x4_t ef_re = vld1q_f32(&ef[0][j]);
+      const float32x4_t ef_im = vld1q_f32(&ef[1][j]);
+      // Calculate the product of conjugate(xfBuf) by ef.
+      //   re(conjugate(a) * b) = aRe * bRe + aIm * bIm
+      //   im(conjugate(a) * b)=  aRe * bIm - aIm * bRe
+      const float32x4_t a = vmulq_f32(xfBuf_re, ef_re);
+      const float32x4_t e = vmlaq_f32(a, xfBuf_im, ef_im);
+      const float32x4_t c = vmulq_f32(xfBuf_re, ef_im);
+      const float32x4_t f = vmlsq_f32(c, xfBuf_im, ef_re);
+      // Interleave real and imaginary parts.
+      const float32x4x2_t g_n_h = vzipq_f32(e, f);
+      // Store
+      vst1q_f32(&fft[2 * j + 0], g_n_h.val[0]);
+      vst1q_f32(&fft[2 * j + 4], g_n_h.val[1]);
+    }
+    // ... and fixup the first imaginary entry.
+    fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN],
+                   -aec->xfBuf[1][xPos + PART_LEN],
+                   ef[0][PART_LEN],
+                   ef[1][PART_LEN]);
+
+    aec_rdft_inverse_128(fft);
+    memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
+
+    // fft scaling
+    {
+      const float scale = 2.0f / PART_LEN2;
+      const float32x4_t scale_ps = vmovq_n_f32(scale);
+      for (j = 0; j < PART_LEN; j += 4) {
+        const float32x4_t fft_ps = vld1q_f32(&fft[j]);
+        const float32x4_t fft_scale = vmulq_f32(fft_ps, scale_ps);
+        vst1q_f32(&fft[j], fft_scale);
+      }
+    }
+    aec_rdft_forward_128(fft);
+
+    {
+      const float wt1 = aec->wfBuf[1][pos];
+      aec->wfBuf[0][pos + PART_LEN] += fft[1];
+      for (j = 0; j < PART_LEN; j += 4) {
+        float32x4_t wtBuf_re = vld1q_f32(&aec->wfBuf[0][pos + j]);
+        float32x4_t wtBuf_im = vld1q_f32(&aec->wfBuf[1][pos + j]);
+        const float32x4_t fft0 = vld1q_f32(&fft[2 * j + 0]);
+        const float32x4_t fft4 = vld1q_f32(&fft[2 * j + 4]);
+        const float32x4x2_t fft_re_im = vuzpq_f32(fft0, fft4);
+        wtBuf_re = vaddq_f32(wtBuf_re, fft_re_im.val[0]);
+        wtBuf_im = vaddq_f32(wtBuf_im, fft_re_im.val[1]);
+
+        vst1q_f32(&aec->wfBuf[0][pos + j], wtBuf_re);
+        vst1q_f32(&aec->wfBuf[1][pos + j], wtBuf_im);
+      }
+      aec->wfBuf[1][pos] = wt1;
+    }
+  }
+}
+
+static float32x4_t vpowq_f32(float32x4_t a, float32x4_t b) {
+  // a^b = exp2(b * log2(a))
+  //   exp2(x) and log2(x) are calculated using polynomial approximations.
+  float32x4_t log2_a, b_log2_a, a_exp_b;
+
+  // Calculate log2(x), x = a.
+  {
+    // To calculate log2(x), we decompose x like this:
+    //   x = y * 2^n
+    //     n is an integer
+    //     y is in the [1.0, 2.0) range
+    //
+    //   log2(x) = log2(y) + n
+    //     n       can be evaluated by playing with float representation.
+    //     log2(y) in a small range can be approximated, this code uses an order
+    //             five polynomial approximation. The coefficients have been
+    //             estimated with the Remez algorithm and the resulting
+    //             polynomial has a maximum relative error of 0.00086%.
+
+    // Compute n.
+    //    This is done by masking the exponent, shifting it into the top bit of
+    //    the mantissa, putting eight into the biased exponent (to shift/
+    //    compensate the fact that the exponent has been shifted in the top/
+    //    fractional part and finally getting rid of the implicit leading one
+    //    from the mantissa by substracting it out.
+    const uint32x4_t vec_float_exponent_mask = vdupq_n_u32(0x7F800000);
+    const uint32x4_t vec_eight_biased_exponent = vdupq_n_u32(0x43800000);
+    const uint32x4_t vec_implicit_leading_one = vdupq_n_u32(0x43BF8000);
+    const uint32x4_t two_n = vandq_u32(vreinterpretq_u32_f32(a),
+                                       vec_float_exponent_mask);
+    const uint32x4_t n_1 = vshrq_n_u32(two_n, kShiftExponentIntoTopMantissa);
+    const uint32x4_t n_0 = vorrq_u32(n_1, vec_eight_biased_exponent);
+    const float32x4_t n =
+        vsubq_f32(vreinterpretq_f32_u32(n_0),
+                  vreinterpretq_f32_u32(vec_implicit_leading_one));
+    // Compute y.
+    const uint32x4_t vec_mantissa_mask = vdupq_n_u32(0x007FFFFF);
+    const uint32x4_t vec_zero_biased_exponent_is_one = vdupq_n_u32(0x3F800000);
+    const uint32x4_t mantissa = vandq_u32(vreinterpretq_u32_f32(a),
+                                          vec_mantissa_mask);
+    const float32x4_t y =
+        vreinterpretq_f32_u32(vorrq_u32(mantissa,
+                                        vec_zero_biased_exponent_is_one));
+    // Approximate log2(y) ~= (y - 1) * pol5(y).
+    //    pol5(y) = C5 * y^5 + C4 * y^4 + C3 * y^3 + C2 * y^2 + C1 * y + C0
+    const float32x4_t C5 = vdupq_n_f32(-3.4436006e-2f);
+    const float32x4_t C4 = vdupq_n_f32(3.1821337e-1f);
+    const float32x4_t C3 = vdupq_n_f32(-1.2315303f);
+    const float32x4_t C2 = vdupq_n_f32(2.5988452f);
+    const float32x4_t C1 = vdupq_n_f32(-3.3241990f);
+    const float32x4_t C0 = vdupq_n_f32(3.1157899f);
+    float32x4_t pol5_y = C5;
+    pol5_y = vmlaq_f32(C4, y, pol5_y);
+    pol5_y = vmlaq_f32(C3, y, pol5_y);
+    pol5_y = vmlaq_f32(C2, y, pol5_y);
+    pol5_y = vmlaq_f32(C1, y, pol5_y);
+    pol5_y = vmlaq_f32(C0, y, pol5_y);
+    const float32x4_t y_minus_one =
+        vsubq_f32(y, vreinterpretq_f32_u32(vec_zero_biased_exponent_is_one));
+    const float32x4_t log2_y = vmulq_f32(y_minus_one, pol5_y);
+
+    // Combine parts.
+    log2_a = vaddq_f32(n, log2_y);
+  }
+
+  // b * log2(a)
+  b_log2_a = vmulq_f32(b, log2_a);
+
+  // Calculate exp2(x), x = b * log2(a).
+  {
+    // To calculate 2^x, we decompose x like this:
+    //   x = n + y
+    //     n is an integer, the value of x - 0.5 rounded down, therefore
+    //     y is in the [0.5, 1.5) range
+    //
+    //   2^x = 2^n * 2^y
+    //     2^n can be evaluated by playing with float representation.
+    //     2^y in a small range can be approximated, this code uses an order two
+    //         polynomial approximation. The coefficients have been estimated
+    //         with the Remez algorithm and the resulting polynomial has a
+    //         maximum relative error of 0.17%.
+    // To avoid over/underflow, we reduce the range of input to ]-127, 129].
+    const float32x4_t max_input = vdupq_n_f32(129.f);
+    const float32x4_t min_input = vdupq_n_f32(-126.99999f);
+    const float32x4_t x_min = vminq_f32(b_log2_a, max_input);
+    const float32x4_t x_max = vmaxq_f32(x_min, min_input);
+    // Compute n.
+    const float32x4_t half = vdupq_n_f32(0.5f);
+    const float32x4_t x_minus_half = vsubq_f32(x_max, half);
+    const int32x4_t x_minus_half_floor = vcvtq_s32_f32(x_minus_half);
+
+    // Compute 2^n.
+    const int32x4_t float_exponent_bias = vdupq_n_s32(127);
+    const int32x4_t two_n_exponent =
+        vaddq_s32(x_minus_half_floor, float_exponent_bias);
+    const float32x4_t two_n =
+        vreinterpretq_f32_s32(vshlq_n_s32(two_n_exponent, kFloatExponentShift));
+    // Compute y.
+    const float32x4_t y = vsubq_f32(x_max, vcvtq_f32_s32(x_minus_half_floor));
+
+    // Approximate 2^y ~= C2 * y^2 + C1 * y + C0.
+    const float32x4_t C2 = vdupq_n_f32(3.3718944e-1f);
+    const float32x4_t C1 = vdupq_n_f32(6.5763628e-1f);
+    const float32x4_t C0 = vdupq_n_f32(1.0017247f);
+    float32x4_t exp2_y = C2;
+    exp2_y = vmlaq_f32(C1, y, exp2_y);
+    exp2_y = vmlaq_f32(C0, y, exp2_y);
+
+    // Combine parts.
+    a_exp_b = vmulq_f32(exp2_y, two_n);
+  }
+
+  return a_exp_b;
+}
+
+static void OverdriveAndSuppressNEON(AecCore* aec,
+                                     float hNl[PART_LEN1],
+                                     const float hNlFb,
+                                     float efw[2][PART_LEN1]) {
+  int i;
+  const float32x4_t vec_hNlFb = vmovq_n_f32(hNlFb);
+  const float32x4_t vec_one = vdupq_n_f32(1.0f);
+  const float32x4_t vec_minus_one = vdupq_n_f32(-1.0f);
+  const float32x4_t vec_overDriveSm = vmovq_n_f32(aec->overDriveSm);
+
+  // vectorized code (four at once)
+  for (i = 0; i + 3 < PART_LEN1; i += 4) {
+    // Weight subbands
+    float32x4_t vec_hNl = vld1q_f32(&hNl[i]);
+    const float32x4_t vec_weightCurve = vld1q_f32(&WebRtcAec_weightCurve[i]);
+    const uint32x4_t bigger = vcgtq_f32(vec_hNl, vec_hNlFb);
+    const float32x4_t vec_weightCurve_hNlFb = vmulq_f32(vec_weightCurve,
+                                                        vec_hNlFb);
+    const float32x4_t vec_one_weightCurve = vsubq_f32(vec_one, vec_weightCurve);
+    const float32x4_t vec_one_weightCurve_hNl = vmulq_f32(vec_one_weightCurve,
+                                                          vec_hNl);
+    const uint32x4_t vec_if0 = vandq_u32(vmvnq_u32(bigger),
+                                         vreinterpretq_u32_f32(vec_hNl));
+    const float32x4_t vec_one_weightCurve_add =
+        vaddq_f32(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl);
+    const uint32x4_t vec_if1 =
+        vandq_u32(bigger, vreinterpretq_u32_f32(vec_one_weightCurve_add));
+
+    vec_hNl = vreinterpretq_f32_u32(vorrq_u32(vec_if0, vec_if1));
+
+    {
+      const float32x4_t vec_overDriveCurve =
+          vld1q_f32(&WebRtcAec_overDriveCurve[i]);
+      const float32x4_t vec_overDriveSm_overDriveCurve =
+          vmulq_f32(vec_overDriveSm, vec_overDriveCurve);
+      vec_hNl = vpowq_f32(vec_hNl, vec_overDriveSm_overDriveCurve);
+      vst1q_f32(&hNl[i], vec_hNl);
+    }
+
+    // Suppress error signal
+    {
+      float32x4_t vec_efw_re = vld1q_f32(&efw[0][i]);
+      float32x4_t vec_efw_im = vld1q_f32(&efw[1][i]);
+      vec_efw_re = vmulq_f32(vec_efw_re, vec_hNl);
+      vec_efw_im = vmulq_f32(vec_efw_im, vec_hNl);
+
+      // Ooura fft returns incorrect sign on imaginary component. It matters
+      // here because we are making an additive change with comfort noise.
+      vec_efw_im = vmulq_f32(vec_efw_im, vec_minus_one);
+      vst1q_f32(&efw[0][i], vec_efw_re);
+      vst1q_f32(&efw[1][i], vec_efw_im);
+    }
+  }
+
+  // scalar code for the remaining items.
+  for (; i < PART_LEN1; i++) {
+    // Weight subbands
+    if (hNl[i] > hNlFb) {
+      hNl[i] = WebRtcAec_weightCurve[i] * hNlFb +
+               (1 - WebRtcAec_weightCurve[i]) * hNl[i];
+    }
+
+    hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]);
+
+    // Suppress error signal
+    efw[0][i] *= hNl[i];
+    efw[1][i] *= hNl[i];
+
+    // Ooura fft returns incorrect sign on imaginary component. It matters
+    // here because we are making an additive change with comfort noise.
+    efw[1][i] *= -1;
+  }
+}
+
+static int PartitionDelay(const AecCore* aec) {
+  // Measures the energy in each filter partition and returns the partition with
+  // highest energy.
+  // TODO(bjornv): Spread computational cost by computing one partition per
+  // block?
+  float wfEnMax = 0;
+  int i;
+  int delay = 0;
+
+  for (i = 0; i < aec->num_partitions; i++) {
+    int j;
+    int pos = i * PART_LEN1;
+    float wfEn = 0;
+    float32x4_t vec_wfEn = vdupq_n_f32(0.0f);
+    // vectorized code (four at once)
+    for (j = 0; j + 3 < PART_LEN1; j += 4) {
+      const float32x4_t vec_wfBuf0 = vld1q_f32(&aec->wfBuf[0][pos + j]);
+      const float32x4_t vec_wfBuf1 = vld1q_f32(&aec->wfBuf[1][pos + j]);
+      vec_wfEn = vmlaq_f32(vec_wfEn, vec_wfBuf0, vec_wfBuf0);
+      vec_wfEn = vmlaq_f32(vec_wfEn, vec_wfBuf1, vec_wfBuf1);
+    }
+    {
+      float32x2_t vec_total;
+      // A B C D
+      vec_total = vpadd_f32(vget_low_f32(vec_wfEn), vget_high_f32(vec_wfEn));
+      // A+B C+D
+      vec_total = vpadd_f32(vec_total, vec_total);
+      // A+B+C+D A+B+C+D
+      wfEn = vget_lane_f32(vec_total, 0);
+    }
+
+    // scalar code for the remaining items.
+    for (; j < PART_LEN1; j++) {
+      wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] +
+              aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j];
+    }
+
+    if (wfEn > wfEnMax) {
+      wfEnMax = wfEn;
+      delay = i;
+    }
+  }
+  return delay;
+}
+
+// Updates the following smoothed  Power Spectral Densities (PSD):
+//  - sd  : near-end
+//  - se  : residual echo
+//  - sx  : far-end
+//  - sde : cross-PSD of near-end and residual echo
+//  - sxd : cross-PSD of near-end and far-end
+//
+// In addition to updating the PSDs, also the filter diverge state is determined
+// upon actions are taken.
+static void SmoothedPSD(AecCore* aec,
+                        float efw[2][PART_LEN1],
+                        float dfw[2][PART_LEN1],
+                        float xfw[2][PART_LEN1]) {
+  // Power estimate smoothing coefficients.
+  const float* ptrGCoh = aec->extended_filter_enabled
+      ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1]
+      : WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1];
+  int i;
+  float sdSum = 0, seSum = 0;
+  const float32x4_t vec_15 =  vdupq_n_f32(WebRtcAec_kMinFarendPSD);
+  float32x4_t vec_sdSum = vdupq_n_f32(0.0f);
+  float32x4_t vec_seSum = vdupq_n_f32(0.0f);
+
+  for (i = 0; i + 3 < PART_LEN1; i += 4) {
+    const float32x4_t vec_dfw0 = vld1q_f32(&dfw[0][i]);
+    const float32x4_t vec_dfw1 = vld1q_f32(&dfw[1][i]);
+    const float32x4_t vec_efw0 = vld1q_f32(&efw[0][i]);
+    const float32x4_t vec_efw1 = vld1q_f32(&efw[1][i]);
+    const float32x4_t vec_xfw0 = vld1q_f32(&xfw[0][i]);
+    const float32x4_t vec_xfw1 = vld1q_f32(&xfw[1][i]);
+    float32x4_t vec_sd = vmulq_n_f32(vld1q_f32(&aec->sd[i]), ptrGCoh[0]);
+    float32x4_t vec_se = vmulq_n_f32(vld1q_f32(&aec->se[i]), ptrGCoh[0]);
+    float32x4_t vec_sx = vmulq_n_f32(vld1q_f32(&aec->sx[i]), ptrGCoh[0]);
+    float32x4_t vec_dfw_sumsq = vmulq_f32(vec_dfw0, vec_dfw0);
+    float32x4_t vec_efw_sumsq = vmulq_f32(vec_efw0, vec_efw0);
+    float32x4_t vec_xfw_sumsq = vmulq_f32(vec_xfw0, vec_xfw0);
+
+    vec_dfw_sumsq = vmlaq_f32(vec_dfw_sumsq, vec_dfw1, vec_dfw1);
+    vec_efw_sumsq = vmlaq_f32(vec_efw_sumsq, vec_efw1, vec_efw1);
+    vec_xfw_sumsq = vmlaq_f32(vec_xfw_sumsq, vec_xfw1, vec_xfw1);
+    vec_xfw_sumsq = vmaxq_f32(vec_xfw_sumsq, vec_15);
+    vec_sd = vmlaq_n_f32(vec_sd, vec_dfw_sumsq, ptrGCoh[1]);
+    vec_se = vmlaq_n_f32(vec_se, vec_efw_sumsq, ptrGCoh[1]);
+    vec_sx = vmlaq_n_f32(vec_sx, vec_xfw_sumsq, ptrGCoh[1]);
+
+    vst1q_f32(&aec->sd[i], vec_sd);
+    vst1q_f32(&aec->se[i], vec_se);
+    vst1q_f32(&aec->sx[i], vec_sx);
+
+    {
+      float32x4x2_t vec_sde = vld2q_f32(&aec->sde[i][0]);
+      float32x4_t vec_dfwefw0011 = vmulq_f32(vec_dfw0, vec_efw0);
+      float32x4_t vec_dfwefw0110 = vmulq_f32(vec_dfw0, vec_efw1);
+      vec_sde.val[0] = vmulq_n_f32(vec_sde.val[0], ptrGCoh[0]);
+      vec_sde.val[1] = vmulq_n_f32(vec_sde.val[1], ptrGCoh[0]);
+      vec_dfwefw0011 = vmlaq_f32(vec_dfwefw0011, vec_dfw1, vec_efw1);
+      vec_dfwefw0110 = vmlsq_f32(vec_dfwefw0110, vec_dfw1, vec_efw0);
+      vec_sde.val[0] = vmlaq_n_f32(vec_sde.val[0], vec_dfwefw0011, ptrGCoh[1]);
+      vec_sde.val[1] = vmlaq_n_f32(vec_sde.val[1], vec_dfwefw0110, ptrGCoh[1]);
+      vst2q_f32(&aec->sde[i][0], vec_sde);
+    }
+
+    {
+      float32x4x2_t vec_sxd = vld2q_f32(&aec->sxd[i][0]);
+      float32x4_t vec_dfwxfw0011 = vmulq_f32(vec_dfw0, vec_xfw0);
+      float32x4_t vec_dfwxfw0110 = vmulq_f32(vec_dfw0, vec_xfw1);
+      vec_sxd.val[0] = vmulq_n_f32(vec_sxd.val[0], ptrGCoh[0]);
+      vec_sxd.val[1] = vmulq_n_f32(vec_sxd.val[1], ptrGCoh[0]);
+      vec_dfwxfw0011 = vmlaq_f32(vec_dfwxfw0011, vec_dfw1, vec_xfw1);
+      vec_dfwxfw0110 = vmlsq_f32(vec_dfwxfw0110, vec_dfw1, vec_xfw0);
+      vec_sxd.val[0] = vmlaq_n_f32(vec_sxd.val[0], vec_dfwxfw0011, ptrGCoh[1]);
+      vec_sxd.val[1] = vmlaq_n_f32(vec_sxd.val[1], vec_dfwxfw0110, ptrGCoh[1]);
+      vst2q_f32(&aec->sxd[i][0], vec_sxd);
+    }
+
+    vec_sdSum = vaddq_f32(vec_sdSum, vec_sd);
+    vec_seSum = vaddq_f32(vec_seSum, vec_se);
+  }
+  {
+    float32x2_t vec_sdSum_total;
+    float32x2_t vec_seSum_total;
+    // A B C D
+    vec_sdSum_total = vpadd_f32(vget_low_f32(vec_sdSum),
+                                vget_high_f32(vec_sdSum));
+    vec_seSum_total = vpadd_f32(vget_low_f32(vec_seSum),
+                                vget_high_f32(vec_seSum));
+    // A+B C+D
+    vec_sdSum_total = vpadd_f32(vec_sdSum_total, vec_sdSum_total);
+    vec_seSum_total = vpadd_f32(vec_seSum_total, vec_seSum_total);
+    // A+B+C+D A+B+C+D
+    sdSum = vget_lane_f32(vec_sdSum_total, 0);
+    seSum = vget_lane_f32(vec_seSum_total, 0);
+  }
+
+  // scalar code for the remaining items.
+  for (; i < PART_LEN1; i++) {
+    aec->sd[i] = ptrGCoh[0] * aec->sd[i] +
+                 ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
+    aec->se[i] = ptrGCoh[0] * aec->se[i] +
+                 ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
+    // We threshold here to protect against the ill-effects of a zero farend.
+    // The threshold is not arbitrarily chosen, but balances protection and
+    // adverse interaction with the algorithm's tuning.
+    // TODO(bjornv): investigate further why this is so sensitive.
+    aec->sx[i] =
+        ptrGCoh[0] * aec->sx[i] +
+        ptrGCoh[1] * WEBRTC_SPL_MAX(
+            xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i],
+            WebRtcAec_kMinFarendPSD);
+
+    aec->sde[i][0] =
+        ptrGCoh[0] * aec->sde[i][0] +
+        ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);
+    aec->sde[i][1] =
+        ptrGCoh[0] * aec->sde[i][1] +
+        ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);
+
+    aec->sxd[i][0] =
+        ptrGCoh[0] * aec->sxd[i][0] +
+        ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);
+    aec->sxd[i][1] =
+        ptrGCoh[0] * aec->sxd[i][1] +
+        ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);
+
+    sdSum += aec->sd[i];
+    seSum += aec->se[i];
+  }
+
+  // Divergent filter safeguard.
+  aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum;
+
+  if (aec->divergeState)
+    memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1);
+
+  // Reset if error is significantly larger than nearend (13 dB).
+  if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum))
+    memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
+}
+
+// Window time domain data to be used by the fft.
+__inline static void WindowData(float* x_windowed, const float* x) {
+  int i;
+  for (i = 0; i < PART_LEN; i += 4) {
+    const float32x4_t vec_Buf1 = vld1q_f32(&x[i]);
+    const float32x4_t vec_Buf2 = vld1q_f32(&x[PART_LEN + i]);
+    const float32x4_t vec_sqrtHanning = vld1q_f32(&WebRtcAec_sqrtHanning[i]);
+    // A B C D
+    float32x4_t vec_sqrtHanning_rev =
+        vld1q_f32(&WebRtcAec_sqrtHanning[PART_LEN - i - 3]);
+    // B A D C
+    vec_sqrtHanning_rev = vrev64q_f32(vec_sqrtHanning_rev);
+    // D C B A
+    vec_sqrtHanning_rev = vcombine_f32(vget_high_f32(vec_sqrtHanning_rev),
+                                       vget_low_f32(vec_sqrtHanning_rev));
+    vst1q_f32(&x_windowed[i], vmulq_f32(vec_Buf1, vec_sqrtHanning));
+    vst1q_f32(&x_windowed[PART_LEN + i],
+            vmulq_f32(vec_Buf2, vec_sqrtHanning_rev));
+  }
+}
+
+// Puts fft output data into a complex valued array.
+__inline static void StoreAsComplex(const float* data,
+                                    float data_complex[2][PART_LEN1]) {
+  int i;
+  for (i = 0; i < PART_LEN; i += 4) {
+    const float32x4x2_t vec_data = vld2q_f32(&data[2 * i]);
+    vst1q_f32(&data_complex[0][i], vec_data.val[0]);
+    vst1q_f32(&data_complex[1][i], vec_data.val[1]);
+  }
+  // fix beginning/end values
+  data_complex[1][0] = 0;
+  data_complex[1][PART_LEN] = 0;
+  data_complex[0][0] = data[0];
+  data_complex[0][PART_LEN] = data[1];
+}
+
+static void SubbandCoherenceNEON(AecCore* aec,
+                                 float efw[2][PART_LEN1],
+                                 float xfw[2][PART_LEN1],
+                                 float* fft,
+                                 float* cohde,
+                                 float* cohxd) {
+  float dfw[2][PART_LEN1];
+  int i;
+
+  if (aec->delayEstCtr == 0)
+    aec->delayIdx = PartitionDelay(aec);
+
+  // Use delayed far.
+  memcpy(xfw,
+         aec->xfwBuf + aec->delayIdx * PART_LEN1,
+         sizeof(xfw[0][0]) * 2 * PART_LEN1);
+
+  // Windowed near fft
+  WindowData(fft, aec->dBuf);
+  aec_rdft_forward_128(fft);
+  StoreAsComplex(fft, dfw);
+
+  // Windowed error fft
+  WindowData(fft, aec->eBuf);
+  aec_rdft_forward_128(fft);
+  StoreAsComplex(fft, efw);
+
+  SmoothedPSD(aec, efw, dfw, xfw);
+
+  {
+    const float32x4_t vec_1eminus10 =  vdupq_n_f32(1e-10f);
+
+    // Subband coherence
+    for (i = 0; i + 3 < PART_LEN1; i += 4) {
+      const float32x4_t vec_sd = vld1q_f32(&aec->sd[i]);
+      const float32x4_t vec_se = vld1q_f32(&aec->se[i]);
+      const float32x4_t vec_sx = vld1q_f32(&aec->sx[i]);
+      const float32x4_t vec_sdse = vmlaq_f32(vec_1eminus10, vec_sd, vec_se);
+      const float32x4_t vec_sdsx = vmlaq_f32(vec_1eminus10, vec_sd, vec_sx);
+      float32x4x2_t vec_sde = vld2q_f32(&aec->sde[i][0]);
+      float32x4x2_t vec_sxd = vld2q_f32(&aec->sxd[i][0]);
+      float32x4_t vec_cohde = vmulq_f32(vec_sde.val[0], vec_sde.val[0]);
+      float32x4_t vec_cohxd = vmulq_f32(vec_sxd.val[0], vec_sxd.val[0]);
+      vec_cohde = vmlaq_f32(vec_cohde, vec_sde.val[1], vec_sde.val[1]);
+      vec_cohde = vdivq_f32(vec_cohde, vec_sdse);
+      vec_cohxd = vmlaq_f32(vec_cohxd, vec_sxd.val[1], vec_sxd.val[1]);
+      vec_cohxd = vdivq_f32(vec_cohxd, vec_sdsx);
+
+      vst1q_f32(&cohde[i], vec_cohde);
+      vst1q_f32(&cohxd[i], vec_cohxd);
+    }
+  }
+  // scalar code for the remaining items.
+  for (; i < PART_LEN1; i++) {
+    cohde[i] =
+        (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) /
+        (aec->sd[i] * aec->se[i] + 1e-10f);
+    cohxd[i] =
+        (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) /
+        (aec->sx[i] * aec->sd[i] + 1e-10f);
+  }
+}
+
+void WebRtcAec_InitAec_neon(void) {
+  WebRtcAec_FilterFar = FilterFarNEON;
+  WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON;
+  WebRtcAec_FilterAdaptation = FilterAdaptationNEON;
+  WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON;
+  WebRtcAec_SubbandCoherence = SubbandCoherenceNEON;
+}
+
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_sse2.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_sse2.c
new file mode 100644
index 00000000..b1bffcbb
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_sse2.c
@@ -0,0 +1,731 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * The core AEC algorithm, SSE2 version of speed-critical functions.
+ */
+
+#include <emmintrin.h>
+#include <math.h>
+#include <string.h>  // memset
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/aec/aec_common.h"
+#include "webrtc/modules/audio_processing/aec/aec_core_internal.h"
+#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
+
+__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) {
+  return aRe * bRe - aIm * bIm;
+}
+
+__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) {
+  return aRe * bIm + aIm * bRe;
+}
+
+static void FilterFarSSE2(AecCore* aec, float yf[2][PART_LEN1]) {
+  int i;
+  const int num_partitions = aec->num_partitions;
+  for (i = 0; i < num_partitions; i++) {
+    int j;
+    int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
+    int pos = i * PART_LEN1;
+    // Check for wrap
+    if (i + aec->xfBufBlockPos >= num_partitions) {
+      xPos -= num_partitions * (PART_LEN1);
+    }
+
+    // vectorized code (four at once)
+    for (j = 0; j + 3 < PART_LEN1; j += 4) {
+      const __m128 xfBuf_re = _mm_loadu_ps(&aec->xfBuf[0][xPos + j]);
+      const __m128 xfBuf_im = _mm_loadu_ps(&aec->xfBuf[1][xPos + j]);
+      const __m128 wfBuf_re = _mm_loadu_ps(&aec->wfBuf[0][pos + j]);
+      const __m128 wfBuf_im = _mm_loadu_ps(&aec->wfBuf[1][pos + j]);
+      const __m128 yf_re = _mm_loadu_ps(&yf[0][j]);
+      const __m128 yf_im = _mm_loadu_ps(&yf[1][j]);
+      const __m128 a = _mm_mul_ps(xfBuf_re, wfBuf_re);
+      const __m128 b = _mm_mul_ps(xfBuf_im, wfBuf_im);
+      const __m128 c = _mm_mul_ps(xfBuf_re, wfBuf_im);
+      const __m128 d = _mm_mul_ps(xfBuf_im, wfBuf_re);
+      const __m128 e = _mm_sub_ps(a, b);
+      const __m128 f = _mm_add_ps(c, d);
+      const __m128 g = _mm_add_ps(yf_re, e);
+      const __m128 h = _mm_add_ps(yf_im, f);
+      _mm_storeu_ps(&yf[0][j], g);
+      _mm_storeu_ps(&yf[1][j], h);
+    }
+    // scalar code for the remaining items.
+    for (; j < PART_LEN1; j++) {
+      yf[0][j] += MulRe(aec->xfBuf[0][xPos + j],
+                        aec->xfBuf[1][xPos + j],
+                        aec->wfBuf[0][pos + j],
+                        aec->wfBuf[1][pos + j]);
+      yf[1][j] += MulIm(aec->xfBuf[0][xPos + j],
+                        aec->xfBuf[1][xPos + j],
+                        aec->wfBuf[0][pos + j],
+                        aec->wfBuf[1][pos + j]);
+    }
+  }
+}
+
+static void ScaleErrorSignalSSE2(AecCore* aec, float ef[2][PART_LEN1]) {
+  const __m128 k1e_10f = _mm_set1_ps(1e-10f);
+  const __m128 kMu = aec->extended_filter_enabled ? _mm_set1_ps(kExtendedMu)
+                                                  : _mm_set1_ps(aec->normal_mu);
+  const __m128 kThresh = aec->extended_filter_enabled
+                             ? _mm_set1_ps(kExtendedErrorThreshold)
+                             : _mm_set1_ps(aec->normal_error_threshold);
+
+  int i;
+  // vectorized code (four at once)
+  for (i = 0; i + 3 < PART_LEN1; i += 4) {
+    const __m128 xPow = _mm_loadu_ps(&aec->xPow[i]);
+    const __m128 ef_re_base = _mm_loadu_ps(&ef[0][i]);
+    const __m128 ef_im_base = _mm_loadu_ps(&ef[1][i]);
+
+    const __m128 xPowPlus = _mm_add_ps(xPow, k1e_10f);
+    __m128 ef_re = _mm_div_ps(ef_re_base, xPowPlus);
+    __m128 ef_im = _mm_div_ps(ef_im_base, xPowPlus);
+    const __m128 ef_re2 = _mm_mul_ps(ef_re, ef_re);
+    const __m128 ef_im2 = _mm_mul_ps(ef_im, ef_im);
+    const __m128 ef_sum2 = _mm_add_ps(ef_re2, ef_im2);
+    const __m128 absEf = _mm_sqrt_ps(ef_sum2);
+    const __m128 bigger = _mm_cmpgt_ps(absEf, kThresh);
+    __m128 absEfPlus = _mm_add_ps(absEf, k1e_10f);
+    const __m128 absEfInv = _mm_div_ps(kThresh, absEfPlus);
+    __m128 ef_re_if = _mm_mul_ps(ef_re, absEfInv);
+    __m128 ef_im_if = _mm_mul_ps(ef_im, absEfInv);
+    ef_re_if = _mm_and_ps(bigger, ef_re_if);
+    ef_im_if = _mm_and_ps(bigger, ef_im_if);
+    ef_re = _mm_andnot_ps(bigger, ef_re);
+    ef_im = _mm_andnot_ps(bigger, ef_im);
+    ef_re = _mm_or_ps(ef_re, ef_re_if);
+    ef_im = _mm_or_ps(ef_im, ef_im_if);
+    ef_re = _mm_mul_ps(ef_re, kMu);
+    ef_im = _mm_mul_ps(ef_im, kMu);
+
+    _mm_storeu_ps(&ef[0][i], ef_re);
+    _mm_storeu_ps(&ef[1][i], ef_im);
+  }
+  // scalar code for the remaining items.
+  {
+    const float mu =
+        aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
+    const float error_threshold = aec->extended_filter_enabled
+                                      ? kExtendedErrorThreshold
+                                      : aec->normal_error_threshold;
+    for (; i < (PART_LEN1); i++) {
+      float abs_ef;
+      ef[0][i] /= (aec->xPow[i] + 1e-10f);
+      ef[1][i] /= (aec->xPow[i] + 1e-10f);
+      abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
+
+      if (abs_ef > error_threshold) {
+        abs_ef = error_threshold / (abs_ef + 1e-10f);
+        ef[0][i] *= abs_ef;
+        ef[1][i] *= abs_ef;
+      }
+
+      // Stepsize factor
+      ef[0][i] *= mu;
+      ef[1][i] *= mu;
+    }
+  }
+}
+
+static void FilterAdaptationSSE2(AecCore* aec,
+                                 float* fft,
+                                 float ef[2][PART_LEN1]) {
+  int i, j;
+  const int num_partitions = aec->num_partitions;
+  for (i = 0; i < num_partitions; i++) {
+    int xPos = (i + aec->xfBufBlockPos) * (PART_LEN1);
+    int pos = i * PART_LEN1;
+    // Check for wrap
+    if (i + aec->xfBufBlockPos >= num_partitions) {
+      xPos -= num_partitions * PART_LEN1;
+    }
+
+    // Process the whole array...
+    for (j = 0; j < PART_LEN; j += 4) {
+      // Load xfBuf and ef.
+      const __m128 xfBuf_re = _mm_loadu_ps(&aec->xfBuf[0][xPos + j]);
+      const __m128 xfBuf_im = _mm_loadu_ps(&aec->xfBuf[1][xPos + j]);
+      const __m128 ef_re = _mm_loadu_ps(&ef[0][j]);
+      const __m128 ef_im = _mm_loadu_ps(&ef[1][j]);
+      // Calculate the product of conjugate(xfBuf) by ef.
+      //   re(conjugate(a) * b) = aRe * bRe + aIm * bIm
+      //   im(conjugate(a) * b)=  aRe * bIm - aIm * bRe
+      const __m128 a = _mm_mul_ps(xfBuf_re, ef_re);
+      const __m128 b = _mm_mul_ps(xfBuf_im, ef_im);
+      const __m128 c = _mm_mul_ps(xfBuf_re, ef_im);
+      const __m128 d = _mm_mul_ps(xfBuf_im, ef_re);
+      const __m128 e = _mm_add_ps(a, b);
+      const __m128 f = _mm_sub_ps(c, d);
+      // Interleave real and imaginary parts.
+      const __m128 g = _mm_unpacklo_ps(e, f);
+      const __m128 h = _mm_unpackhi_ps(e, f);
+      // Store
+      _mm_storeu_ps(&fft[2 * j + 0], g);
+      _mm_storeu_ps(&fft[2 * j + 4], h);
+    }
+    // ... and fixup the first imaginary entry.
+    fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN],
+                   -aec->xfBuf[1][xPos + PART_LEN],
+                   ef[0][PART_LEN],
+                   ef[1][PART_LEN]);
+
+    aec_rdft_inverse_128(fft);
+    memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
+
+    // fft scaling
+    {
+      float scale = 2.0f / PART_LEN2;
+      const __m128 scale_ps = _mm_load_ps1(&scale);
+      for (j = 0; j < PART_LEN; j += 4) {
+        const __m128 fft_ps = _mm_loadu_ps(&fft[j]);
+        const __m128 fft_scale = _mm_mul_ps(fft_ps, scale_ps);
+        _mm_storeu_ps(&fft[j], fft_scale);
+      }
+    }
+    aec_rdft_forward_128(fft);
+
+    {
+      float wt1 = aec->wfBuf[1][pos];
+      aec->wfBuf[0][pos + PART_LEN] += fft[1];
+      for (j = 0; j < PART_LEN; j += 4) {
+        __m128 wtBuf_re = _mm_loadu_ps(&aec->wfBuf[0][pos + j]);
+        __m128 wtBuf_im = _mm_loadu_ps(&aec->wfBuf[1][pos + j]);
+        const __m128 fft0 = _mm_loadu_ps(&fft[2 * j + 0]);
+        const __m128 fft4 = _mm_loadu_ps(&fft[2 * j + 4]);
+        const __m128 fft_re =
+            _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(2, 0, 2, 0));
+        const __m128 fft_im =
+            _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(3, 1, 3, 1));
+        wtBuf_re = _mm_add_ps(wtBuf_re, fft_re);
+        wtBuf_im = _mm_add_ps(wtBuf_im, fft_im);
+        _mm_storeu_ps(&aec->wfBuf[0][pos + j], wtBuf_re);
+        _mm_storeu_ps(&aec->wfBuf[1][pos + j], wtBuf_im);
+      }
+      aec->wfBuf[1][pos] = wt1;
+    }
+  }
+}
+
+static __m128 mm_pow_ps(__m128 a, __m128 b) {
+  // a^b = exp2(b * log2(a))
+  //   exp2(x) and log2(x) are calculated using polynomial approximations.
+  __m128 log2_a, b_log2_a, a_exp_b;
+
+  // Calculate log2(x), x = a.
+  {
+    // To calculate log2(x), we decompose x like this:
+    //   x = y * 2^n
+    //     n is an integer
+    //     y is in the [1.0, 2.0) range
+    //
+    //   log2(x) = log2(y) + n
+    //     n       can be evaluated by playing with float representation.
+    //     log2(y) in a small range can be approximated, this code uses an order
+    //             five polynomial approximation. The coefficients have been
+    //             estimated with the Remez algorithm and the resulting
+    //             polynomial has a maximum relative error of 0.00086%.
+
+    // Compute n.
+    //    This is done by masking the exponent, shifting it into the top bit of
+    //    the mantissa, putting eight into the biased exponent (to shift/
+    //    compensate the fact that the exponent has been shifted in the top/
+    //    fractional part and finally getting rid of the implicit leading one
+    //    from the mantissa by substracting it out.
+    static const ALIGN16_BEG int float_exponent_mask[4] ALIGN16_END = {
+        0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000};
+    static const ALIGN16_BEG int eight_biased_exponent[4] ALIGN16_END = {
+        0x43800000, 0x43800000, 0x43800000, 0x43800000};
+    static const ALIGN16_BEG int implicit_leading_one[4] ALIGN16_END = {
+        0x43BF8000, 0x43BF8000, 0x43BF8000, 0x43BF8000};
+    static const int shift_exponent_into_top_mantissa = 8;
+    const __m128 two_n = _mm_and_ps(a, *((__m128*)float_exponent_mask));
+    const __m128 n_1 = _mm_castsi128_ps(_mm_srli_epi32(
+        _mm_castps_si128(two_n), shift_exponent_into_top_mantissa));
+    const __m128 n_0 = _mm_or_ps(n_1, *((__m128*)eight_biased_exponent));
+    const __m128 n = _mm_sub_ps(n_0, *((__m128*)implicit_leading_one));
+
+    // Compute y.
+    static const ALIGN16_BEG int mantissa_mask[4] ALIGN16_END = {
+        0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF};
+    static const ALIGN16_BEG int zero_biased_exponent_is_one[4] ALIGN16_END = {
+        0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000};
+    const __m128 mantissa = _mm_and_ps(a, *((__m128*)mantissa_mask));
+    const __m128 y =
+        _mm_or_ps(mantissa, *((__m128*)zero_biased_exponent_is_one));
+
+    // Approximate log2(y) ~= (y - 1) * pol5(y).
+    //    pol5(y) = C5 * y^5 + C4 * y^4 + C3 * y^3 + C2 * y^2 + C1 * y + C0
+    static const ALIGN16_BEG float ALIGN16_END C5[4] = {
+        -3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f};
+    static const ALIGN16_BEG float ALIGN16_END
+        C4[4] = {3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f};
+    static const ALIGN16_BEG float ALIGN16_END
+        C3[4] = {-1.2315303f, -1.2315303f, -1.2315303f, -1.2315303f};
+    static const ALIGN16_BEG float ALIGN16_END
+        C2[4] = {2.5988452f, 2.5988452f, 2.5988452f, 2.5988452f};
+    static const ALIGN16_BEG float ALIGN16_END
+        C1[4] = {-3.3241990f, -3.3241990f, -3.3241990f, -3.3241990f};
+    static const ALIGN16_BEG float ALIGN16_END
+        C0[4] = {3.1157899f, 3.1157899f, 3.1157899f, 3.1157899f};
+    const __m128 pol5_y_0 = _mm_mul_ps(y, *((__m128*)C5));
+    const __m128 pol5_y_1 = _mm_add_ps(pol5_y_0, *((__m128*)C4));
+    const __m128 pol5_y_2 = _mm_mul_ps(pol5_y_1, y);
+    const __m128 pol5_y_3 = _mm_add_ps(pol5_y_2, *((__m128*)C3));
+    const __m128 pol5_y_4 = _mm_mul_ps(pol5_y_3, y);
+    const __m128 pol5_y_5 = _mm_add_ps(pol5_y_4, *((__m128*)C2));
+    const __m128 pol5_y_6 = _mm_mul_ps(pol5_y_5, y);
+    const __m128 pol5_y_7 = _mm_add_ps(pol5_y_6, *((__m128*)C1));
+    const __m128 pol5_y_8 = _mm_mul_ps(pol5_y_7, y);
+    const __m128 pol5_y = _mm_add_ps(pol5_y_8, *((__m128*)C0));
+    const __m128 y_minus_one =
+        _mm_sub_ps(y, *((__m128*)zero_biased_exponent_is_one));
+    const __m128 log2_y = _mm_mul_ps(y_minus_one, pol5_y);
+
+    // Combine parts.
+    log2_a = _mm_add_ps(n, log2_y);
+  }
+
+  // b * log2(a)
+  b_log2_a = _mm_mul_ps(b, log2_a);
+
+  // Calculate exp2(x), x = b * log2(a).
+  {
+    // To calculate 2^x, we decompose x like this:
+    //   x = n + y
+    //     n is an integer, the value of x - 0.5 rounded down, therefore
+    //     y is in the [0.5, 1.5) range
+    //
+    //   2^x = 2^n * 2^y
+    //     2^n can be evaluated by playing with float representation.
+    //     2^y in a small range can be approximated, this code uses an order two
+    //         polynomial approximation. The coefficients have been estimated
+    //         with the Remez algorithm and the resulting polynomial has a
+    //         maximum relative error of 0.17%.
+
+    // To avoid over/underflow, we reduce the range of input to ]-127, 129].
+    static const ALIGN16_BEG float max_input[4] ALIGN16_END = {129.f, 129.f,
+                                                               129.f, 129.f};
+    static const ALIGN16_BEG float min_input[4] ALIGN16_END = {
+        -126.99999f, -126.99999f, -126.99999f, -126.99999f};
+    const __m128 x_min = _mm_min_ps(b_log2_a, *((__m128*)max_input));
+    const __m128 x_max = _mm_max_ps(x_min, *((__m128*)min_input));
+    // Compute n.
+    static const ALIGN16_BEG float half[4] ALIGN16_END = {0.5f, 0.5f,
+                                                          0.5f, 0.5f};
+    const __m128 x_minus_half = _mm_sub_ps(x_max, *((__m128*)half));
+    const __m128i x_minus_half_floor = _mm_cvtps_epi32(x_minus_half);
+    // Compute 2^n.
+    static const ALIGN16_BEG int float_exponent_bias[4] ALIGN16_END = {
+        127, 127, 127, 127};
+    static const int float_exponent_shift = 23;
+    const __m128i two_n_exponent =
+        _mm_add_epi32(x_minus_half_floor, *((__m128i*)float_exponent_bias));
+    const __m128 two_n =
+        _mm_castsi128_ps(_mm_slli_epi32(two_n_exponent, float_exponent_shift));
+    // Compute y.
+    const __m128 y = _mm_sub_ps(x_max, _mm_cvtepi32_ps(x_minus_half_floor));
+    // Approximate 2^y ~= C2 * y^2 + C1 * y + C0.
+    static const ALIGN16_BEG float C2[4] ALIGN16_END = {
+        3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f};
+    static const ALIGN16_BEG float C1[4] ALIGN16_END = {
+        6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f};
+    static const ALIGN16_BEG float C0[4] ALIGN16_END = {1.0017247f, 1.0017247f,
+                                                        1.0017247f, 1.0017247f};
+    const __m128 exp2_y_0 = _mm_mul_ps(y, *((__m128*)C2));
+    const __m128 exp2_y_1 = _mm_add_ps(exp2_y_0, *((__m128*)C1));
+    const __m128 exp2_y_2 = _mm_mul_ps(exp2_y_1, y);
+    const __m128 exp2_y = _mm_add_ps(exp2_y_2, *((__m128*)C0));
+
+    // Combine parts.
+    a_exp_b = _mm_mul_ps(exp2_y, two_n);
+  }
+  return a_exp_b;
+}
+
+static void OverdriveAndSuppressSSE2(AecCore* aec,
+                                     float hNl[PART_LEN1],
+                                     const float hNlFb,
+                                     float efw[2][PART_LEN1]) {
+  int i;
+  const __m128 vec_hNlFb = _mm_set1_ps(hNlFb);
+  const __m128 vec_one = _mm_set1_ps(1.0f);
+  const __m128 vec_minus_one = _mm_set1_ps(-1.0f);
+  const __m128 vec_overDriveSm = _mm_set1_ps(aec->overDriveSm);
+  // vectorized code (four at once)
+  for (i = 0; i + 3 < PART_LEN1; i += 4) {
+    // Weight subbands
+    __m128 vec_hNl = _mm_loadu_ps(&hNl[i]);
+    const __m128 vec_weightCurve = _mm_loadu_ps(&WebRtcAec_weightCurve[i]);
+    const __m128 bigger = _mm_cmpgt_ps(vec_hNl, vec_hNlFb);
+    const __m128 vec_weightCurve_hNlFb = _mm_mul_ps(vec_weightCurve, vec_hNlFb);
+    const __m128 vec_one_weightCurve = _mm_sub_ps(vec_one, vec_weightCurve);
+    const __m128 vec_one_weightCurve_hNl =
+        _mm_mul_ps(vec_one_weightCurve, vec_hNl);
+    const __m128 vec_if0 = _mm_andnot_ps(bigger, vec_hNl);
+    const __m128 vec_if1 = _mm_and_ps(
+        bigger, _mm_add_ps(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl));
+    vec_hNl = _mm_or_ps(vec_if0, vec_if1);
+
+    {
+      const __m128 vec_overDriveCurve =
+          _mm_loadu_ps(&WebRtcAec_overDriveCurve[i]);
+      const __m128 vec_overDriveSm_overDriveCurve =
+          _mm_mul_ps(vec_overDriveSm, vec_overDriveCurve);
+      vec_hNl = mm_pow_ps(vec_hNl, vec_overDriveSm_overDriveCurve);
+      _mm_storeu_ps(&hNl[i], vec_hNl);
+    }
+
+    // Suppress error signal
+    {
+      __m128 vec_efw_re = _mm_loadu_ps(&efw[0][i]);
+      __m128 vec_efw_im = _mm_loadu_ps(&efw[1][i]);
+      vec_efw_re = _mm_mul_ps(vec_efw_re, vec_hNl);
+      vec_efw_im = _mm_mul_ps(vec_efw_im, vec_hNl);
+
+      // Ooura fft returns incorrect sign on imaginary component. It matters
+      // here because we are making an additive change with comfort noise.
+      vec_efw_im = _mm_mul_ps(vec_efw_im, vec_minus_one);
+      _mm_storeu_ps(&efw[0][i], vec_efw_re);
+      _mm_storeu_ps(&efw[1][i], vec_efw_im);
+    }
+  }
+  // scalar code for the remaining items.
+  for (; i < PART_LEN1; i++) {
+    // Weight subbands
+    if (hNl[i] > hNlFb) {
+      hNl[i] = WebRtcAec_weightCurve[i] * hNlFb +
+               (1 - WebRtcAec_weightCurve[i]) * hNl[i];
+    }
+    hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]);
+
+    // Suppress error signal
+    efw[0][i] *= hNl[i];
+    efw[1][i] *= hNl[i];
+
+    // Ooura fft returns incorrect sign on imaginary component. It matters
+    // here because we are making an additive change with comfort noise.
+    efw[1][i] *= -1;
+  }
+}
+
+__inline static void _mm_add_ps_4x1(__m128 sum, float *dst) {
+  // A+B C+D
+  sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(0, 0, 3, 2)));
+  // A+B+C+D A+B+C+D
+  sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(1, 1, 1, 1)));
+  _mm_store_ss(dst, sum);
+}
+static int PartitionDelay(const AecCore* aec) {
+  // Measures the energy in each filter partition and returns the partition with
+  // highest energy.
+  // TODO(bjornv): Spread computational cost by computing one partition per
+  // block?
+  float wfEnMax = 0;
+  int i;
+  int delay = 0;
+
+  for (i = 0; i < aec->num_partitions; i++) {
+    int j;
+    int pos = i * PART_LEN1;
+    float wfEn = 0;
+    __m128 vec_wfEn = _mm_set1_ps(0.0f);
+    // vectorized code (four at once)
+    for (j = 0; j + 3 < PART_LEN1; j += 4) {
+      const __m128 vec_wfBuf0 = _mm_loadu_ps(&aec->wfBuf[0][pos + j]);
+      const __m128 vec_wfBuf1 = _mm_loadu_ps(&aec->wfBuf[1][pos + j]);
+      vec_wfEn = _mm_add_ps(vec_wfEn, _mm_mul_ps(vec_wfBuf0, vec_wfBuf0));
+      vec_wfEn = _mm_add_ps(vec_wfEn, _mm_mul_ps(vec_wfBuf1, vec_wfBuf1));
+    }
+    _mm_add_ps_4x1(vec_wfEn, &wfEn);
+
+    // scalar code for the remaining items.
+    for (; j < PART_LEN1; j++) {
+      wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] +
+              aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j];
+    }
+
+    if (wfEn > wfEnMax) {
+      wfEnMax = wfEn;
+      delay = i;
+    }
+  }
+  return delay;
+}
+
+// Updates the following smoothed  Power Spectral Densities (PSD):
+//  - sd  : near-end
+//  - se  : residual echo
+//  - sx  : far-end
+//  - sde : cross-PSD of near-end and residual echo
+//  - sxd : cross-PSD of near-end and far-end
+//
+// In addition to updating the PSDs, also the filter diverge state is determined
+// upon actions are taken.
+static void SmoothedPSD(AecCore* aec,
+                        float efw[2][PART_LEN1],
+                        float dfw[2][PART_LEN1],
+                        float xfw[2][PART_LEN1]) {
+  // Power estimate smoothing coefficients.
+  const float* ptrGCoh = aec->extended_filter_enabled
+      ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1]
+      : WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1];
+  int i;
+  float sdSum = 0, seSum = 0;
+  const __m128 vec_15 =  _mm_set1_ps(WebRtcAec_kMinFarendPSD);
+  const __m128 vec_GCoh0 = _mm_set1_ps(ptrGCoh[0]);
+  const __m128 vec_GCoh1 = _mm_set1_ps(ptrGCoh[1]);
+  __m128 vec_sdSum = _mm_set1_ps(0.0f);
+  __m128 vec_seSum = _mm_set1_ps(0.0f);
+
+  for (i = 0; i + 3 < PART_LEN1; i += 4) {
+    const __m128 vec_dfw0 = _mm_loadu_ps(&dfw[0][i]);
+    const __m128 vec_dfw1 = _mm_loadu_ps(&dfw[1][i]);
+    const __m128 vec_efw0 = _mm_loadu_ps(&efw[0][i]);
+    const __m128 vec_efw1 = _mm_loadu_ps(&efw[1][i]);
+    const __m128 vec_xfw0 = _mm_loadu_ps(&xfw[0][i]);
+    const __m128 vec_xfw1 = _mm_loadu_ps(&xfw[1][i]);
+    __m128 vec_sd = _mm_mul_ps(_mm_loadu_ps(&aec->sd[i]), vec_GCoh0);
+    __m128 vec_se = _mm_mul_ps(_mm_loadu_ps(&aec->se[i]), vec_GCoh0);
+    __m128 vec_sx = _mm_mul_ps(_mm_loadu_ps(&aec->sx[i]), vec_GCoh0);
+    __m128 vec_dfw_sumsq = _mm_mul_ps(vec_dfw0, vec_dfw0);
+    __m128 vec_efw_sumsq = _mm_mul_ps(vec_efw0, vec_efw0);
+    __m128 vec_xfw_sumsq = _mm_mul_ps(vec_xfw0, vec_xfw0);
+    vec_dfw_sumsq = _mm_add_ps(vec_dfw_sumsq, _mm_mul_ps(vec_dfw1, vec_dfw1));
+    vec_efw_sumsq = _mm_add_ps(vec_efw_sumsq, _mm_mul_ps(vec_efw1, vec_efw1));
+    vec_xfw_sumsq = _mm_add_ps(vec_xfw_sumsq, _mm_mul_ps(vec_xfw1, vec_xfw1));
+    vec_xfw_sumsq = _mm_max_ps(vec_xfw_sumsq, vec_15);
+    vec_sd = _mm_add_ps(vec_sd, _mm_mul_ps(vec_dfw_sumsq, vec_GCoh1));
+    vec_se = _mm_add_ps(vec_se, _mm_mul_ps(vec_efw_sumsq, vec_GCoh1));
+    vec_sx = _mm_add_ps(vec_sx, _mm_mul_ps(vec_xfw_sumsq, vec_GCoh1));
+    _mm_storeu_ps(&aec->sd[i], vec_sd);
+    _mm_storeu_ps(&aec->se[i], vec_se);
+    _mm_storeu_ps(&aec->sx[i], vec_sx);
+
+    {
+      const __m128 vec_3210 = _mm_loadu_ps(&aec->sde[i][0]);
+      const __m128 vec_7654 = _mm_loadu_ps(&aec->sde[i + 2][0]);
+      __m128 vec_a = _mm_shuffle_ps(vec_3210, vec_7654,
+                                    _MM_SHUFFLE(2, 0, 2, 0));
+      __m128 vec_b = _mm_shuffle_ps(vec_3210, vec_7654,
+                                    _MM_SHUFFLE(3, 1, 3, 1));
+      __m128 vec_dfwefw0011 = _mm_mul_ps(vec_dfw0, vec_efw0);
+      __m128 vec_dfwefw0110 = _mm_mul_ps(vec_dfw0, vec_efw1);
+      vec_a = _mm_mul_ps(vec_a, vec_GCoh0);
+      vec_b = _mm_mul_ps(vec_b, vec_GCoh0);
+      vec_dfwefw0011 = _mm_add_ps(vec_dfwefw0011,
+                                  _mm_mul_ps(vec_dfw1, vec_efw1));
+      vec_dfwefw0110 = _mm_sub_ps(vec_dfwefw0110,
+                                  _mm_mul_ps(vec_dfw1, vec_efw0));
+      vec_a = _mm_add_ps(vec_a, _mm_mul_ps(vec_dfwefw0011, vec_GCoh1));
+      vec_b = _mm_add_ps(vec_b, _mm_mul_ps(vec_dfwefw0110, vec_GCoh1));
+      _mm_storeu_ps(&aec->sde[i][0], _mm_unpacklo_ps(vec_a, vec_b));
+      _mm_storeu_ps(&aec->sde[i + 2][0], _mm_unpackhi_ps(vec_a, vec_b));
+    }
+
+    {
+      const __m128 vec_3210 = _mm_loadu_ps(&aec->sxd[i][0]);
+      const __m128 vec_7654 = _mm_loadu_ps(&aec->sxd[i + 2][0]);
+      __m128 vec_a = _mm_shuffle_ps(vec_3210, vec_7654,
+                                    _MM_SHUFFLE(2, 0, 2, 0));
+      __m128 vec_b = _mm_shuffle_ps(vec_3210, vec_7654,
+                                    _MM_SHUFFLE(3, 1, 3, 1));
+      __m128 vec_dfwxfw0011 = _mm_mul_ps(vec_dfw0, vec_xfw0);
+      __m128 vec_dfwxfw0110 = _mm_mul_ps(vec_dfw0, vec_xfw1);
+      vec_a = _mm_mul_ps(vec_a, vec_GCoh0);
+      vec_b = _mm_mul_ps(vec_b, vec_GCoh0);
+      vec_dfwxfw0011 = _mm_add_ps(vec_dfwxfw0011,
+                                  _mm_mul_ps(vec_dfw1, vec_xfw1));
+      vec_dfwxfw0110 = _mm_sub_ps(vec_dfwxfw0110,
+                                  _mm_mul_ps(vec_dfw1, vec_xfw0));
+      vec_a = _mm_add_ps(vec_a, _mm_mul_ps(vec_dfwxfw0011, vec_GCoh1));
+      vec_b = _mm_add_ps(vec_b, _mm_mul_ps(vec_dfwxfw0110, vec_GCoh1));
+      _mm_storeu_ps(&aec->sxd[i][0], _mm_unpacklo_ps(vec_a, vec_b));
+      _mm_storeu_ps(&aec->sxd[i + 2][0], _mm_unpackhi_ps(vec_a, vec_b));
+    }
+
+    vec_sdSum = _mm_add_ps(vec_sdSum, vec_sd);
+    vec_seSum = _mm_add_ps(vec_seSum, vec_se);
+  }
+
+  _mm_add_ps_4x1(vec_sdSum, &sdSum);
+  _mm_add_ps_4x1(vec_seSum, &seSum);
+
+  for (; i < PART_LEN1; i++) {
+    aec->sd[i] = ptrGCoh[0] * aec->sd[i] +
+                 ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
+    aec->se[i] = ptrGCoh[0] * aec->se[i] +
+                 ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
+    // We threshold here to protect against the ill-effects of a zero farend.
+    // The threshold is not arbitrarily chosen, but balances protection and
+    // adverse interaction with the algorithm's tuning.
+    // TODO(bjornv): investigate further why this is so sensitive.
+    aec->sx[i] =
+        ptrGCoh[0] * aec->sx[i] +
+        ptrGCoh[1] * WEBRTC_SPL_MAX(
+            xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i],
+            WebRtcAec_kMinFarendPSD);
+
+    aec->sde[i][0] =
+        ptrGCoh[0] * aec->sde[i][0] +
+        ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);
+    aec->sde[i][1] =
+        ptrGCoh[0] * aec->sde[i][1] +
+        ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);
+
+    aec->sxd[i][0] =
+        ptrGCoh[0] * aec->sxd[i][0] +
+        ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);
+    aec->sxd[i][1] =
+        ptrGCoh[0] * aec->sxd[i][1] +
+        ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);
+
+    sdSum += aec->sd[i];
+    seSum += aec->se[i];
+  }
+
+  // Divergent filter safeguard.
+  aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum;
+
+  if (aec->divergeState)
+    memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1);
+
+  // Reset if error is significantly larger than nearend (13 dB).
+  if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum))
+    memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
+}
+
+// Window time domain data to be used by the fft.
+__inline static void WindowData(float* x_windowed, const float* x) {
+  int i;
+  for (i = 0; i < PART_LEN; i += 4) {
+    const __m128 vec_Buf1 = _mm_loadu_ps(&x[i]);
+    const __m128 vec_Buf2 = _mm_loadu_ps(&x[PART_LEN + i]);
+    const __m128 vec_sqrtHanning = _mm_load_ps(&WebRtcAec_sqrtHanning[i]);
+    // A B C D
+    __m128 vec_sqrtHanning_rev =
+        _mm_loadu_ps(&WebRtcAec_sqrtHanning[PART_LEN - i - 3]);
+    // D C B A
+    vec_sqrtHanning_rev =
+        _mm_shuffle_ps(vec_sqrtHanning_rev, vec_sqrtHanning_rev,
+                       _MM_SHUFFLE(0, 1, 2, 3));
+    _mm_storeu_ps(&x_windowed[i], _mm_mul_ps(vec_Buf1, vec_sqrtHanning));
+    _mm_storeu_ps(&x_windowed[PART_LEN + i],
+                  _mm_mul_ps(vec_Buf2, vec_sqrtHanning_rev));
+  }
+}
+
+// Puts fft output data into a complex valued array.
+__inline static void StoreAsComplex(const float* data,
+                                    float data_complex[2][PART_LEN1]) {
+  int i;
+  for (i = 0; i < PART_LEN; i += 4) {
+    const __m128 vec_fft0 = _mm_loadu_ps(&data[2 * i]);
+    const __m128 vec_fft4 = _mm_loadu_ps(&data[2 * i + 4]);
+    const __m128 vec_a = _mm_shuffle_ps(vec_fft0, vec_fft4,
+                                        _MM_SHUFFLE(2, 0, 2, 0));
+    const __m128 vec_b = _mm_shuffle_ps(vec_fft0, vec_fft4,
+                                        _MM_SHUFFLE(3, 1, 3, 1));
+    _mm_storeu_ps(&data_complex[0][i], vec_a);
+    _mm_storeu_ps(&data_complex[1][i], vec_b);
+  }
+  // fix beginning/end values
+  data_complex[1][0] = 0;
+  data_complex[1][PART_LEN] = 0;
+  data_complex[0][0] = data[0];
+  data_complex[0][PART_LEN] = data[1];
+}
+
+static void SubbandCoherenceSSE2(AecCore* aec,
+                                 float efw[2][PART_LEN1],
+                                 float xfw[2][PART_LEN1],
+                                 float* fft,
+                                 float* cohde,
+                                 float* cohxd) {
+  float dfw[2][PART_LEN1];
+  int i;
+
+  if (aec->delayEstCtr == 0)
+    aec->delayIdx = PartitionDelay(aec);
+
+  // Use delayed far.
+  memcpy(xfw,
+         aec->xfwBuf + aec->delayIdx * PART_LEN1,
+         sizeof(xfw[0][0]) * 2 * PART_LEN1);
+
+  // Windowed near fft
+  WindowData(fft, aec->dBuf);
+  aec_rdft_forward_128(fft);
+  StoreAsComplex(fft, dfw);
+
+  // Windowed error fft
+  WindowData(fft, aec->eBuf);
+  aec_rdft_forward_128(fft);
+  StoreAsComplex(fft, efw);
+
+  SmoothedPSD(aec, efw, dfw, xfw);
+
+  {
+    const __m128 vec_1eminus10 =  _mm_set1_ps(1e-10f);
+
+    // Subband coherence
+    for (i = 0; i + 3 < PART_LEN1; i += 4) {
+      const __m128 vec_sd = _mm_loadu_ps(&aec->sd[i]);
+      const __m128 vec_se = _mm_loadu_ps(&aec->se[i]);
+      const __m128 vec_sx = _mm_loadu_ps(&aec->sx[i]);
+      const __m128 vec_sdse = _mm_add_ps(vec_1eminus10,
+                                         _mm_mul_ps(vec_sd, vec_se));
+      const __m128 vec_sdsx = _mm_add_ps(vec_1eminus10,
+                                         _mm_mul_ps(vec_sd, vec_sx));
+      const __m128 vec_sde_3210 = _mm_loadu_ps(&aec->sde[i][0]);
+      const __m128 vec_sde_7654 = _mm_loadu_ps(&aec->sde[i + 2][0]);
+      const __m128 vec_sxd_3210 = _mm_loadu_ps(&aec->sxd[i][0]);
+      const __m128 vec_sxd_7654 = _mm_loadu_ps(&aec->sxd[i + 2][0]);
+      const __m128 vec_sde_0 = _mm_shuffle_ps(vec_sde_3210, vec_sde_7654,
+                                              _MM_SHUFFLE(2, 0, 2, 0));
+      const __m128 vec_sde_1 = _mm_shuffle_ps(vec_sde_3210, vec_sde_7654,
+                                              _MM_SHUFFLE(3, 1, 3, 1));
+      const __m128 vec_sxd_0 = _mm_shuffle_ps(vec_sxd_3210, vec_sxd_7654,
+                                              _MM_SHUFFLE(2, 0, 2, 0));
+      const __m128 vec_sxd_1 = _mm_shuffle_ps(vec_sxd_3210, vec_sxd_7654,
+                                              _MM_SHUFFLE(3, 1, 3, 1));
+      __m128 vec_cohde = _mm_mul_ps(vec_sde_0, vec_sde_0);
+      __m128 vec_cohxd = _mm_mul_ps(vec_sxd_0, vec_sxd_0);
+      vec_cohde = _mm_add_ps(vec_cohde, _mm_mul_ps(vec_sde_1, vec_sde_1));
+      vec_cohde = _mm_div_ps(vec_cohde, vec_sdse);
+      vec_cohxd = _mm_add_ps(vec_cohxd, _mm_mul_ps(vec_sxd_1, vec_sxd_1));
+      vec_cohxd = _mm_div_ps(vec_cohxd, vec_sdsx);
+      _mm_storeu_ps(&cohde[i], vec_cohde);
+      _mm_storeu_ps(&cohxd[i], vec_cohxd);
+    }
+
+    // scalar code for the remaining items.
+    for (; i < PART_LEN1; i++) {
+      cohde[i] =
+          (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) /
+          (aec->sd[i] * aec->se[i] + 1e-10f);
+      cohxd[i] =
+          (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) /
+          (aec->sx[i] * aec->sd[i] + 1e-10f);
+    }
+  }
+}
+
+void WebRtcAec_InitAec_SSE2(void) {
+  WebRtcAec_FilterFar = FilterFarSSE2;
+  WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2;
+  WebRtcAec_FilterAdaptation = FilterAdaptationSSE2;
+  WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2;
+  WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2;
+}
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft.c
new file mode 100644
index 00000000..2c3cff2d
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft.c
@@ -0,0 +1,589 @@
+/*
+ * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
+ * Copyright Takuya OOURA, 1996-2001
+ *
+ * You may use, copy, modify and distribute this code for any purpose (include
+ * commercial use) and without fee. Please refer to this package when you modify
+ * this code.
+ *
+ * Changes by the WebRTC authors:
+ *    - Trivial type modifications.
+ *    - Minimal code subset to do rdft of length 128.
+ *    - Optimizations because of known length.
+ *
+ *  All changes are covered by the WebRTC license and IP grant:
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
+
+#include <math.h>
+
+#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
+#include "webrtc/typedefs.h"
+
+// These tables used to be computed at run-time. For example, refer to:
+// https://code.google.com/p/webrtc/source/browse/trunk/webrtc/modules/audio_processing/aec/aec_rdft.c?r=6564
+// to see the initialization code.
+const float rdft_w[64] = {
+    1.0000000000f, 0.0000000000f, 0.7071067691f, 0.7071067691f,
+    0.9238795638f, 0.3826834559f, 0.3826834559f, 0.9238795638f,
+    0.9807852507f, 0.1950903237f, 0.5555702448f, 0.8314695954f,
+    0.8314695954f, 0.5555702448f, 0.1950903237f, 0.9807852507f,
+    0.9951847196f, 0.0980171412f, 0.6343933344f, 0.7730104327f,
+    0.8819212914f, 0.4713967443f, 0.2902846634f, 0.9569403529f,
+    0.9569403529f, 0.2902846634f, 0.4713967443f, 0.8819212914f,
+    0.7730104327f, 0.6343933344f, 0.0980171412f, 0.9951847196f,
+    0.7071067691f, 0.4993977249f, 0.4975923598f, 0.4945882559f,
+    0.4903926253f, 0.4850156307f, 0.4784701765f, 0.4707720280f,
+    0.4619397819f, 0.4519946277f, 0.4409606457f, 0.4288643003f,
+    0.4157347977f, 0.4016037583f, 0.3865052164f, 0.3704755902f,
+    0.3535533845f, 0.3357794881f, 0.3171966672f, 0.2978496552f,
+    0.2777851224f, 0.2570513785f, 0.2356983721f, 0.2137775421f,
+    0.1913417280f, 0.1684449315f, 0.1451423317f, 0.1214900985f,
+    0.0975451618f, 0.0733652338f, 0.0490085706f, 0.0245338380f,
+};
+const float rdft_wk3ri_first[16] = {
+    1.000000000f, 0.000000000f, 0.382683456f, 0.923879564f,
+    0.831469536f, 0.555570245f, -0.195090353f, 0.980785251f,
+    0.956940353f, 0.290284693f, 0.098017156f, 0.995184720f,
+    0.634393334f, 0.773010492f, -0.471396863f, 0.881921172f,
+};
+const float rdft_wk3ri_second[16] = {
+    -0.707106769f, 0.707106769f, -0.923879564f, -0.382683456f,
+    -0.980785251f, 0.195090353f, -0.555570245f, -0.831469536f,
+    -0.881921172f, 0.471396863f, -0.773010492f, -0.634393334f,
+    -0.995184720f, -0.098017156f, -0.290284693f, -0.956940353f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk1r[32] = {
+    1.000000000f, 1.000000000f, 0.707106769f, 0.707106769f,
+    0.923879564f, 0.923879564f, 0.382683456f, 0.382683456f,
+    0.980785251f, 0.980785251f, 0.555570245f, 0.555570245f,
+    0.831469595f, 0.831469595f, 0.195090324f, 0.195090324f,
+    0.995184720f, 0.995184720f, 0.634393334f, 0.634393334f,
+    0.881921291f, 0.881921291f, 0.290284663f, 0.290284663f,
+    0.956940353f, 0.956940353f, 0.471396744f, 0.471396744f,
+    0.773010433f, 0.773010433f, 0.098017141f, 0.098017141f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk2r[32] = {
+    1.000000000f, 1.000000000f, -0.000000000f, -0.000000000f,
+    0.707106769f, 0.707106769f, -0.707106769f, -0.707106769f,
+    0.923879564f, 0.923879564f, -0.382683456f, -0.382683456f,
+    0.382683456f, 0.382683456f, -0.923879564f, -0.923879564f,
+    0.980785251f, 0.980785251f, -0.195090324f, -0.195090324f,
+    0.555570245f, 0.555570245f, -0.831469595f, -0.831469595f,
+    0.831469595f, 0.831469595f, -0.555570245f, -0.555570245f,
+    0.195090324f, 0.195090324f, -0.980785251f, -0.980785251f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk3r[32] = {
+    1.000000000f, 1.000000000f, -0.707106769f, -0.707106769f,
+    0.382683456f, 0.382683456f, -0.923879564f, -0.923879564f,
+    0.831469536f, 0.831469536f, -0.980785251f, -0.980785251f,
+    -0.195090353f, -0.195090353f, -0.555570245f, -0.555570245f,
+    0.956940353f, 0.956940353f, -0.881921172f, -0.881921172f,
+    0.098017156f, 0.098017156f, -0.773010492f, -0.773010492f,
+    0.634393334f, 0.634393334f, -0.995184720f, -0.995184720f,
+    -0.471396863f, -0.471396863f, -0.290284693f, -0.290284693f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk1i[32] = {
+    -0.000000000f, 0.000000000f, -0.707106769f, 0.707106769f,
+    -0.382683456f, 0.382683456f, -0.923879564f, 0.923879564f,
+    -0.195090324f, 0.195090324f, -0.831469595f, 0.831469595f,
+    -0.555570245f, 0.555570245f, -0.980785251f, 0.980785251f,
+    -0.098017141f, 0.098017141f, -0.773010433f, 0.773010433f,
+    -0.471396744f, 0.471396744f, -0.956940353f, 0.956940353f,
+    -0.290284663f, 0.290284663f, -0.881921291f, 0.881921291f,
+    -0.634393334f, 0.634393334f, -0.995184720f, 0.995184720f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk2i[32] = {
+    -0.000000000f, 0.000000000f, -1.000000000f, 1.000000000f,
+    -0.707106769f, 0.707106769f, -0.707106769f, 0.707106769f,
+    -0.382683456f, 0.382683456f, -0.923879564f, 0.923879564f,
+    -0.923879564f, 0.923879564f, -0.382683456f, 0.382683456f,
+    -0.195090324f, 0.195090324f, -0.980785251f, 0.980785251f,
+    -0.831469595f, 0.831469595f, -0.555570245f, 0.555570245f,
+    -0.555570245f, 0.555570245f, -0.831469595f, 0.831469595f,
+    -0.980785251f, 0.980785251f, -0.195090324f, 0.195090324f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk3i[32] = {
+    -0.000000000f, 0.000000000f, -0.707106769f, 0.707106769f,
+    -0.923879564f, 0.923879564f, 0.382683456f, -0.382683456f,
+    -0.555570245f, 0.555570245f, -0.195090353f, 0.195090353f,
+    -0.980785251f, 0.980785251f, 0.831469536f, -0.831469536f,
+    -0.290284693f, 0.290284693f, -0.471396863f, 0.471396863f,
+    -0.995184720f, 0.995184720f, 0.634393334f, -0.634393334f,
+    -0.773010492f, 0.773010492f, 0.098017156f, -0.098017156f,
+    -0.881921172f, 0.881921172f, 0.956940353f, -0.956940353f,
+};
+ALIGN16_BEG const float ALIGN16_END cftmdl_wk1r[4] = {
+    0.707106769f, 0.707106769f, 0.707106769f, -0.707106769f,
+};
+
+static void bitrv2_128_C(float* a) {
+  /*
+      Following things have been attempted but are no faster:
+      (a) Storing the swap indexes in a LUT (index calculations are done
+          for 'free' while waiting on memory/L1).
+      (b) Consolidate the load/store of two consecutive floats by a 64 bit
+          integer (execution is memory/L1 bound).
+      (c) Do a mix of floats and 64 bit integer to maximize register
+          utilization (execution is memory/L1 bound).
+      (d) Replacing ip[i] by ((k<<31)>>25) + ((k >> 1)<<5).
+      (e) Hard-coding of the offsets to completely eliminates index
+          calculations.
+  */
+
+  unsigned int j, j1, k, k1;
+  float xr, xi, yr, yi;
+
+  static const int ip[4] = {0, 64, 32, 96};
+  for (k = 0; k < 4; k++) {
+    for (j = 0; j < k; j++) {
+      j1 = 2 * j + ip[k];
+      k1 = 2 * k + ip[j];
+      xr = a[j1 + 0];
+      xi = a[j1 + 1];
+      yr = a[k1 + 0];
+      yi = a[k1 + 1];
+      a[j1 + 0] = yr;
+      a[j1 + 1] = yi;
+      a[k1 + 0] = xr;
+      a[k1 + 1] = xi;
+      j1 += 8;
+      k1 += 16;
+      xr = a[j1 + 0];
+      xi = a[j1 + 1];
+      yr = a[k1 + 0];
+      yi = a[k1 + 1];
+      a[j1 + 0] = yr;
+      a[j1 + 1] = yi;
+      a[k1 + 0] = xr;
+      a[k1 + 1] = xi;
+      j1 += 8;
+      k1 -= 8;
+      xr = a[j1 + 0];
+      xi = a[j1 + 1];
+      yr = a[k1 + 0];
+      yi = a[k1 + 1];
+      a[j1 + 0] = yr;
+      a[j1 + 1] = yi;
+      a[k1 + 0] = xr;
+      a[k1 + 1] = xi;
+      j1 += 8;
+      k1 += 16;
+      xr = a[j1 + 0];
+      xi = a[j1 + 1];
+      yr = a[k1 + 0];
+      yi = a[k1 + 1];
+      a[j1 + 0] = yr;
+      a[j1 + 1] = yi;
+      a[k1 + 0] = xr;
+      a[k1 + 1] = xi;
+    }
+    j1 = 2 * k + 8 + ip[k];
+    k1 = j1 + 8;
+    xr = a[j1 + 0];
+    xi = a[j1 + 1];
+    yr = a[k1 + 0];
+    yi = a[k1 + 1];
+    a[j1 + 0] = yr;
+    a[j1 + 1] = yi;
+    a[k1 + 0] = xr;
+    a[k1 + 1] = xi;
+  }
+}
+
+static void cft1st_128_C(float* a) {
+  const int n = 128;
+  int j, k1, k2;
+  float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
+  float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+  // The processing of the first set of elements was simplified in C to avoid
+  // some operations (multiplication by zero or one, addition of two elements
+  // multiplied by the same weight, ...).
+  x0r = a[0] + a[2];
+  x0i = a[1] + a[3];
+  x1r = a[0] - a[2];
+  x1i = a[1] - a[3];
+  x2r = a[4] + a[6];
+  x2i = a[5] + a[7];
+  x3r = a[4] - a[6];
+  x3i = a[5] - a[7];
+  a[0] = x0r + x2r;
+  a[1] = x0i + x2i;
+  a[4] = x0r - x2r;
+  a[5] = x0i - x2i;
+  a[2] = x1r - x3i;
+  a[3] = x1i + x3r;
+  a[6] = x1r + x3i;
+  a[7] = x1i - x3r;
+  wk1r = rdft_w[2];
+  x0r = a[8] + a[10];
+  x0i = a[9] + a[11];
+  x1r = a[8] - a[10];
+  x1i = a[9] - a[11];
+  x2r = a[12] + a[14];
+  x2i = a[13] + a[15];
+  x3r = a[12] - a[14];
+  x3i = a[13] - a[15];
+  a[8] = x0r + x2r;
+  a[9] = x0i + x2i;
+  a[12] = x2i - x0i;
+  a[13] = x0r - x2r;
+  x0r = x1r - x3i;
+  x0i = x1i + x3r;
+  a[10] = wk1r * (x0r - x0i);
+  a[11] = wk1r * (x0r + x0i);
+  x0r = x3i + x1r;
+  x0i = x3r - x1i;
+  a[14] = wk1r * (x0i - x0r);
+  a[15] = wk1r * (x0i + x0r);
+  k1 = 0;
+  for (j = 16; j < n; j += 16) {
+    k1 += 2;
+    k2 = 2 * k1;
+    wk2r = rdft_w[k1 + 0];
+    wk2i = rdft_w[k1 + 1];
+    wk1r = rdft_w[k2 + 0];
+    wk1i = rdft_w[k2 + 1];
+    wk3r = rdft_wk3ri_first[k1 + 0];
+    wk3i = rdft_wk3ri_first[k1 + 1];
+    x0r = a[j + 0] + a[j + 2];
+    x0i = a[j + 1] + a[j + 3];
+    x1r = a[j + 0] - a[j + 2];
+    x1i = a[j + 1] - a[j + 3];
+    x2r = a[j + 4] + a[j + 6];
+    x2i = a[j + 5] + a[j + 7];
+    x3r = a[j + 4] - a[j + 6];
+    x3i = a[j + 5] - a[j + 7];
+    a[j + 0] = x0r + x2r;
+    a[j + 1] = x0i + x2i;
+    x0r -= x2r;
+    x0i -= x2i;
+    a[j + 4] = wk2r * x0r - wk2i * x0i;
+    a[j + 5] = wk2r * x0i + wk2i * x0r;
+    x0r = x1r - x3i;
+    x0i = x1i + x3r;
+    a[j + 2] = wk1r * x0r - wk1i * x0i;
+    a[j + 3] = wk1r * x0i + wk1i * x0r;
+    x0r = x1r + x3i;
+    x0i = x1i - x3r;
+    a[j + 6] = wk3r * x0r - wk3i * x0i;
+    a[j + 7] = wk3r * x0i + wk3i * x0r;
+    wk1r = rdft_w[k2 + 2];
+    wk1i = rdft_w[k2 + 3];
+    wk3r = rdft_wk3ri_second[k1 + 0];
+    wk3i = rdft_wk3ri_second[k1 + 1];
+    x0r = a[j + 8] + a[j + 10];
+    x0i = a[j + 9] + a[j + 11];
+    x1r = a[j + 8] - a[j + 10];
+    x1i = a[j + 9] - a[j + 11];
+    x2r = a[j + 12] + a[j + 14];
+    x2i = a[j + 13] + a[j + 15];
+    x3r = a[j + 12] - a[j + 14];
+    x3i = a[j + 13] - a[j + 15];
+    a[j + 8] = x0r + x2r;
+    a[j + 9] = x0i + x2i;
+    x0r -= x2r;
+    x0i -= x2i;
+    a[j + 12] = -wk2i * x0r - wk2r * x0i;
+    a[j + 13] = -wk2i * x0i + wk2r * x0r;
+    x0r = x1r - x3i;
+    x0i = x1i + x3r;
+    a[j + 10] = wk1r * x0r - wk1i * x0i;
+    a[j + 11] = wk1r * x0i + wk1i * x0r;
+    x0r = x1r + x3i;
+    x0i = x1i - x3r;
+    a[j + 14] = wk3r * x0r - wk3i * x0i;
+    a[j + 15] = wk3r * x0i + wk3i * x0r;
+  }
+}
+
+static void cftmdl_128_C(float* a) {
+  const int l = 8;
+  const int n = 128;
+  const int m = 32;
+  int j0, j1, j2, j3, k, k1, k2, m2;
+  float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
+  float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+  for (j0 = 0; j0 < l; j0 += 2) {
+    j1 = j0 + 8;
+    j2 = j0 + 16;
+    j3 = j0 + 24;
+    x0r = a[j0 + 0] + a[j1 + 0];
+    x0i = a[j0 + 1] + a[j1 + 1];
+    x1r = a[j0 + 0] - a[j1 + 0];
+    x1i = a[j0 + 1] - a[j1 + 1];
+    x2r = a[j2 + 0] + a[j3 + 0];
+    x2i = a[j2 + 1] + a[j3 + 1];
+    x3r = a[j2 + 0] - a[j3 + 0];
+    x3i = a[j2 + 1] - a[j3 + 1];
+    a[j0 + 0] = x0r + x2r;
+    a[j0 + 1] = x0i + x2i;
+    a[j2 + 0] = x0r - x2r;
+    a[j2 + 1] = x0i - x2i;
+    a[j1 + 0] = x1r - x3i;
+    a[j1 + 1] = x1i + x3r;
+    a[j3 + 0] = x1r + x3i;
+    a[j3 + 1] = x1i - x3r;
+  }
+  wk1r = rdft_w[2];
+  for (j0 = m; j0 < l + m; j0 += 2) {
+    j1 = j0 + 8;
+    j2 = j0 + 16;
+    j3 = j0 + 24;
+    x0r = a[j0 + 0] + a[j1 + 0];
+    x0i = a[j0 + 1] + a[j1 + 1];
+    x1r = a[j0 + 0] - a[j1 + 0];
+    x1i = a[j0 + 1] - a[j1 + 1];
+    x2r = a[j2 + 0] + a[j3 + 0];
+    x2i = a[j2 + 1] + a[j3 + 1];
+    x3r = a[j2 + 0] - a[j3 + 0];
+    x3i = a[j2 + 1] - a[j3 + 1];
+    a[j0 + 0] = x0r + x2r;
+    a[j0 + 1] = x0i + x2i;
+    a[j2 + 0] = x2i - x0i;
+    a[j2 + 1] = x0r - x2r;
+    x0r = x1r - x3i;
+    x0i = x1i + x3r;
+    a[j1 + 0] = wk1r * (x0r - x0i);
+    a[j1 + 1] = wk1r * (x0r + x0i);
+    x0r = x3i + x1r;
+    x0i = x3r - x1i;
+    a[j3 + 0] = wk1r * (x0i - x0r);
+    a[j3 + 1] = wk1r * (x0i + x0r);
+  }
+  k1 = 0;
+  m2 = 2 * m;
+  for (k = m2; k < n; k += m2) {
+    k1 += 2;
+    k2 = 2 * k1;
+    wk2r = rdft_w[k1 + 0];
+    wk2i = rdft_w[k1 + 1];
+    wk1r = rdft_w[k2 + 0];
+    wk1i = rdft_w[k2 + 1];
+    wk3r = rdft_wk3ri_first[k1 + 0];
+    wk3i = rdft_wk3ri_first[k1 + 1];
+    for (j0 = k; j0 < l + k; j0 += 2) {
+      j1 = j0 + 8;
+      j2 = j0 + 16;
+      j3 = j0 + 24;
+      x0r = a[j0 + 0] + a[j1 + 0];
+      x0i = a[j0 + 1] + a[j1 + 1];
+      x1r = a[j0 + 0] - a[j1 + 0];
+      x1i = a[j0 + 1] - a[j1 + 1];
+      x2r = a[j2 + 0] + a[j3 + 0];
+      x2i = a[j2 + 1] + a[j3 + 1];
+      x3r = a[j2 + 0] - a[j3 + 0];
+      x3i = a[j2 + 1] - a[j3 + 1];
+      a[j0 + 0] = x0r + x2r;
+      a[j0 + 1] = x0i + x2i;
+      x0r -= x2r;
+      x0i -= x2i;
+      a[j2 + 0] = wk2r * x0r - wk2i * x0i;
+      a[j2 + 1] = wk2r * x0i + wk2i * x0r;
+      x0r = x1r - x3i;
+      x0i = x1i + x3r;
+      a[j1 + 0] = wk1r * x0r - wk1i * x0i;
+      a[j1 + 1] = wk1r * x0i + wk1i * x0r;
+      x0r = x1r + x3i;
+      x0i = x1i - x3r;
+      a[j3 + 0] = wk3r * x0r - wk3i * x0i;
+      a[j3 + 1] = wk3r * x0i + wk3i * x0r;
+    }
+    wk1r = rdft_w[k2 + 2];
+    wk1i = rdft_w[k2 + 3];
+    wk3r = rdft_wk3ri_second[k1 + 0];
+    wk3i = rdft_wk3ri_second[k1 + 1];
+    for (j0 = k + m; j0 < l + (k + m); j0 += 2) {
+      j1 = j0 + 8;
+      j2 = j0 + 16;
+      j3 = j0 + 24;
+      x0r = a[j0 + 0] + a[j1 + 0];
+      x0i = a[j0 + 1] + a[j1 + 1];
+      x1r = a[j0 + 0] - a[j1 + 0];
+      x1i = a[j0 + 1] - a[j1 + 1];
+      x2r = a[j2 + 0] + a[j3 + 0];
+      x2i = a[j2 + 1] + a[j3 + 1];
+      x3r = a[j2 + 0] - a[j3 + 0];
+      x3i = a[j2 + 1] - a[j3 + 1];
+      a[j0 + 0] = x0r + x2r;
+      a[j0 + 1] = x0i + x2i;
+      x0r -= x2r;
+      x0i -= x2i;
+      a[j2 + 0] = -wk2i * x0r - wk2r * x0i;
+      a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
+      x0r = x1r - x3i;
+      x0i = x1i + x3r;
+      a[j1 + 0] = wk1r * x0r - wk1i * x0i;
+      a[j1 + 1] = wk1r * x0i + wk1i * x0r;
+      x0r = x1r + x3i;
+      x0i = x1i - x3r;
+      a[j3 + 0] = wk3r * x0r - wk3i * x0i;
+      a[j3 + 1] = wk3r * x0i + wk3i * x0r;
+    }
+  }
+}
+
+static void cftfsub_128_C(float* a) {
+  int j, j1, j2, j3, l;
+  float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+  cft1st_128(a);
+  cftmdl_128(a);
+  l = 32;
+  for (j = 0; j < l; j += 2) {
+    j1 = j + l;
+    j2 = j1 + l;
+    j3 = j2 + l;
+    x0r = a[j] + a[j1];
+    x0i = a[j + 1] + a[j1 + 1];
+    x1r = a[j] - a[j1];
+    x1i = a[j + 1] - a[j1 + 1];
+    x2r = a[j2] + a[j3];
+    x2i = a[j2 + 1] + a[j3 + 1];
+    x3r = a[j2] - a[j3];
+    x3i = a[j2 + 1] - a[j3 + 1];
+    a[j] = x0r + x2r;
+    a[j + 1] = x0i + x2i;
+    a[j2] = x0r - x2r;
+    a[j2 + 1] = x0i - x2i;
+    a[j1] = x1r - x3i;
+    a[j1 + 1] = x1i + x3r;
+    a[j3] = x1r + x3i;
+    a[j3 + 1] = x1i - x3r;
+  }
+}
+
+static void cftbsub_128_C(float* a) {
+  int j, j1, j2, j3, l;
+  float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+  cft1st_128(a);
+  cftmdl_128(a);
+  l = 32;
+
+  for (j = 0; j < l; j += 2) {
+    j1 = j + l;
+    j2 = j1 + l;
+    j3 = j2 + l;
+    x0r = a[j] + a[j1];
+    x0i = -a[j + 1] - a[j1 + 1];
+    x1r = a[j] - a[j1];
+    x1i = -a[j + 1] + a[j1 + 1];
+    x2r = a[j2] + a[j3];
+    x2i = a[j2 + 1] + a[j3 + 1];
+    x3r = a[j2] - a[j3];
+    x3i = a[j2 + 1] - a[j3 + 1];
+    a[j] = x0r + x2r;
+    a[j + 1] = x0i - x2i;
+    a[j2] = x0r - x2r;
+    a[j2 + 1] = x0i + x2i;
+    a[j1] = x1r - x3i;
+    a[j1 + 1] = x1i - x3r;
+    a[j3] = x1r + x3i;
+    a[j3 + 1] = x1i + x3r;
+  }
+}
+
+static void rftfsub_128_C(float* a) {
+  const float* c = rdft_w + 32;
+  int j1, j2, k1, k2;
+  float wkr, wki, xr, xi, yr, yi;
+
+  for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
+    k2 = 128 - j2;
+    k1 = 32 - j1;
+    wkr = 0.5f - c[k1];
+    wki = c[j1];
+    xr = a[j2 + 0] - a[k2 + 0];
+    xi = a[j2 + 1] + a[k2 + 1];
+    yr = wkr * xr - wki * xi;
+    yi = wkr * xi + wki * xr;
+    a[j2 + 0] -= yr;
+    a[j2 + 1] -= yi;
+    a[k2 + 0] += yr;
+    a[k2 + 1] -= yi;
+  }
+}
+
+static void rftbsub_128_C(float* a) {
+  const float* c = rdft_w + 32;
+  int j1, j2, k1, k2;
+  float wkr, wki, xr, xi, yr, yi;
+
+  a[1] = -a[1];
+  for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
+    k2 = 128 - j2;
+    k1 = 32 - j1;
+    wkr = 0.5f - c[k1];
+    wki = c[j1];
+    xr = a[j2 + 0] - a[k2 + 0];
+    xi = a[j2 + 1] + a[k2 + 1];
+    yr = wkr * xr + wki * xi;
+    yi = wkr * xi - wki * xr;
+    a[j2 + 0] = a[j2 + 0] - yr;
+    a[j2 + 1] = yi - a[j2 + 1];
+    a[k2 + 0] = yr + a[k2 + 0];
+    a[k2 + 1] = yi - a[k2 + 1];
+  }
+  a[65] = -a[65];
+}
+
+void aec_rdft_forward_128(float* a) {
+  float xi;
+  bitrv2_128(a);
+  cftfsub_128(a);
+  rftfsub_128(a);
+  xi = a[0] - a[1];
+  a[0] += a[1];
+  a[1] = xi;
+}
+
+void aec_rdft_inverse_128(float* a) {
+  a[1] = 0.5f * (a[0] - a[1]);
+  a[0] -= a[1];
+  rftbsub_128(a);
+  bitrv2_128(a);
+  cftbsub_128(a);
+}
+
+// code path selection
+RftSub128 cft1st_128;
+RftSub128 cftmdl_128;
+RftSub128 rftfsub_128;
+RftSub128 rftbsub_128;
+RftSub128 cftfsub_128;
+RftSub128 cftbsub_128;
+RftSub128 bitrv2_128;
+
+void aec_rdft_init(void) {
+  cft1st_128 = cft1st_128_C;
+  cftmdl_128 = cftmdl_128_C;
+  rftfsub_128 = rftfsub_128_C;
+  rftbsub_128 = rftbsub_128_C;
+  cftfsub_128 = cftfsub_128_C;
+  cftbsub_128 = cftbsub_128_C;
+  bitrv2_128 = bitrv2_128_C;
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+  if (WebRtc_GetCPUInfo(kSSE2)) {
+    aec_rdft_init_sse2();
+  }
+#endif
+#if defined(MIPS_FPU_LE)
+  aec_rdft_init_mips();
+#endif
+#if defined(WEBRTC_HAS_NEON)
+  aec_rdft_init_neon();
+#elif defined(WEBRTC_DETECT_NEON)
+  if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) {
+    aec_rdft_init_neon();
+  }
+#endif
+}
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft.h b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft.h
new file mode 100644
index 00000000..18eb7a5c
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft.h
@@ -0,0 +1,61 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_
+
+#include "webrtc/modules/audio_processing/aec/aec_common.h"
+
+// These intrinsics were unavailable before VS 2008.
+// TODO(andrew): move to a common file.
+#if defined(_MSC_VER) && _MSC_VER < 1500
+#include <emmintrin.h>
+static __inline __m128 _mm_castsi128_ps(__m128i a) { return *(__m128*)&a; }
+static __inline __m128i _mm_castps_si128(__m128 a) { return *(__m128i*)&a; }
+#endif
+
+// Constants shared by all paths (C, SSE2, NEON).
+extern const float rdft_w[64];
+// Constants used by the C path.
+extern const float rdft_wk3ri_first[16];
+extern const float rdft_wk3ri_second[16];
+// Constants used by SSE2 and NEON but initialized in the C path.
+extern ALIGN16_BEG const float ALIGN16_END rdft_wk1r[32];
+extern ALIGN16_BEG const float ALIGN16_END rdft_wk2r[32];
+extern ALIGN16_BEG const float ALIGN16_END rdft_wk3r[32];
+extern ALIGN16_BEG const float ALIGN16_END rdft_wk1i[32];
+extern ALIGN16_BEG const float ALIGN16_END rdft_wk2i[32];
+extern ALIGN16_BEG const float ALIGN16_END rdft_wk3i[32];
+extern ALIGN16_BEG const float ALIGN16_END cftmdl_wk1r[4];
+
+// code path selection function pointers
+typedef void (*RftSub128)(float* a);
+extern RftSub128 rftfsub_128;
+extern RftSub128 rftbsub_128;
+extern RftSub128 cft1st_128;
+extern RftSub128 cftmdl_128;
+extern RftSub128 cftfsub_128;
+extern RftSub128 cftbsub_128;
+extern RftSub128 bitrv2_128;
+
+// entry points
+void aec_rdft_init(void);
+void aec_rdft_init_sse2(void);
+void aec_rdft_forward_128(float* a);
+void aec_rdft_inverse_128(float* a);
+
+#if defined(MIPS_FPU_LE)
+void aec_rdft_init_mips(void);
+#endif
+#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON)
+void aec_rdft_init_neon(void);
+#endif
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_mips.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_mips.c
new file mode 100644
index 00000000..7e64e657
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_mips.c
@@ -0,0 +1,1187 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
+#include "webrtc/typedefs.h"
+
+static void bitrv2_128_mips(float* a) {
+  // n is 128
+  float xr, xi, yr, yi;
+
+  xr = a[8];
+  xi = a[9];
+  yr = a[16];
+  yi = a[17];
+  a[8] = yr;
+  a[9] = yi;
+  a[16] = xr;
+  a[17] = xi;
+
+  xr = a[64];
+  xi = a[65];
+  yr = a[2];
+  yi = a[3];
+  a[64] = yr;
+  a[65] = yi;
+  a[2] = xr;
+  a[3] = xi;
+
+  xr = a[72];
+  xi = a[73];
+  yr = a[18];
+  yi = a[19];
+  a[72] = yr;
+  a[73] = yi;
+  a[18] = xr;
+  a[19] = xi;
+
+  xr = a[80];
+  xi = a[81];
+  yr = a[10];
+  yi = a[11];
+  a[80] = yr;
+  a[81] = yi;
+  a[10] = xr;
+  a[11] = xi;
+
+  xr = a[88];
+  xi = a[89];
+  yr = a[26];
+  yi = a[27];
+  a[88] = yr;
+  a[89] = yi;
+  a[26] = xr;
+  a[27] = xi;
+
+  xr = a[74];
+  xi = a[75];
+  yr = a[82];
+  yi = a[83];
+  a[74] = yr;
+  a[75] = yi;
+  a[82] = xr;
+  a[83] = xi;
+
+  xr = a[32];
+  xi = a[33];
+  yr = a[4];
+  yi = a[5];
+  a[32] = yr;
+  a[33] = yi;
+  a[4] = xr;
+  a[5] = xi;
+
+  xr = a[40];
+  xi = a[41];
+  yr = a[20];
+  yi = a[21];
+  a[40] = yr;
+  a[41] = yi;
+  a[20] = xr;
+  a[21] = xi;
+
+  xr = a[48];
+  xi = a[49];
+  yr = a[12];
+  yi = a[13];
+  a[48] = yr;
+  a[49] = yi;
+  a[12] = xr;
+  a[13] = xi;
+
+  xr = a[56];
+  xi = a[57];
+  yr = a[28];
+  yi = a[29];
+  a[56] = yr;
+  a[57] = yi;
+  a[28] = xr;
+  a[29] = xi;
+
+  xr = a[34];
+  xi = a[35];
+  yr = a[68];
+  yi = a[69];
+  a[34] = yr;
+  a[35] = yi;
+  a[68] = xr;
+  a[69] = xi;
+
+  xr = a[42];
+  xi = a[43];
+  yr = a[84];
+  yi = a[85];
+  a[42] = yr;
+  a[43] = yi;
+  a[84] = xr;
+  a[85] = xi;
+
+  xr = a[50];
+  xi = a[51];
+  yr = a[76];
+  yi = a[77];
+  a[50] = yr;
+  a[51] = yi;
+  a[76] = xr;
+  a[77] = xi;
+
+  xr = a[58];
+  xi = a[59];
+  yr = a[92];
+  yi = a[93];
+  a[58] = yr;
+  a[59] = yi;
+  a[92] = xr;
+  a[93] = xi;
+
+  xr = a[44];
+  xi = a[45];
+  yr = a[52];
+  yi = a[53];
+  a[44] = yr;
+  a[45] = yi;
+  a[52] = xr;
+  a[53] = xi;
+
+  xr = a[96];
+  xi = a[97];
+  yr = a[6];
+  yi = a[7];
+  a[96] = yr;
+  a[97] = yi;
+  a[6] = xr;
+  a[7] = xi;
+
+  xr = a[104];
+  xi = a[105];
+  yr = a[22];
+  yi = a[23];
+  a[104] = yr;
+  a[105] = yi;
+  a[22] = xr;
+  a[23] = xi;
+
+  xr = a[112];
+  xi = a[113];
+  yr = a[14];
+  yi = a[15];
+  a[112] = yr;
+  a[113] = yi;
+  a[14] = xr;
+  a[15] = xi;
+
+  xr = a[120];
+  xi = a[121];
+  yr = a[30];
+  yi = a[31];
+  a[120] = yr;
+  a[121] = yi;
+  a[30] = xr;
+  a[31] = xi;
+
+  xr = a[98];
+  xi = a[99];
+  yr = a[70];
+  yi = a[71];
+  a[98] = yr;
+  a[99] = yi;
+  a[70] = xr;
+  a[71] = xi;
+
+  xr = a[106];
+  xi = a[107];
+  yr = a[86];
+  yi = a[87];
+  a[106] = yr;
+  a[107] = yi;
+  a[86] = xr;
+  a[87] = xi;
+
+  xr = a[114];
+  xi = a[115];
+  yr = a[78];
+  yi = a[79];
+  a[114] = yr;
+  a[115] = yi;
+  a[78] = xr;
+  a[79] = xi;
+
+  xr = a[122];
+  xi = a[123];
+  yr = a[94];
+  yi = a[95];
+  a[122] = yr;
+  a[123] = yi;
+  a[94] = xr;
+  a[95] = xi;
+
+  xr = a[100];
+  xi = a[101];
+  yr = a[38];
+  yi = a[39];
+  a[100] = yr;
+  a[101] = yi;
+  a[38] = xr;
+  a[39] = xi;
+
+  xr = a[108];
+  xi = a[109];
+  yr = a[54];
+  yi = a[55];
+  a[108] = yr;
+  a[109] = yi;
+  a[54] = xr;
+  a[55] = xi;
+
+  xr = a[116];
+  xi = a[117];
+  yr = a[46];
+  yi = a[47];
+  a[116] = yr;
+  a[117] = yi;
+  a[46] = xr;
+  a[47] = xi;
+
+  xr = a[124];
+  xi = a[125];
+  yr = a[62];
+  yi = a[63];
+  a[124] = yr;
+  a[125] = yi;
+  a[62] = xr;
+  a[63] = xi;
+
+  xr = a[110];
+  xi = a[111];
+  yr = a[118];
+  yi = a[119];
+  a[110] = yr;
+  a[111] = yi;
+  a[118] = xr;
+  a[119] = xi;
+}
+
+static void cft1st_128_mips(float* a) {
+  float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14;
+  int a_ptr, p1_rdft, p2_rdft, count;
+  const float* first = rdft_wk3ri_first;
+  const float* second = rdft_wk3ri_second;
+
+  __asm __volatile (
+    ".set       push                                                    \n\t"
+    ".set       noreorder                                               \n\t"
+    // first 8
+    "lwc1       %[f0],        0(%[a])                                   \n\t"
+    "lwc1       %[f1],        4(%[a])                                   \n\t"
+    "lwc1       %[f2],        8(%[a])                                   \n\t"
+    "lwc1       %[f3],        12(%[a])                                  \n\t"
+    "lwc1       %[f4],        16(%[a])                                  \n\t"
+    "lwc1       %[f5],        20(%[a])                                  \n\t"
+    "lwc1       %[f6],        24(%[a])                                  \n\t"
+    "lwc1       %[f7],        28(%[a])                                  \n\t"
+    "add.s      %[f8],        %[f0],        %[f2]                       \n\t"
+    "sub.s      %[f0],        %[f0],        %[f2]                       \n\t"
+    "add.s      %[f2],        %[f4],        %[f6]                       \n\t"
+    "sub.s      %[f4],        %[f4],        %[f6]                       \n\t"
+    "add.s      %[f6],        %[f1],        %[f3]                       \n\t"
+    "sub.s      %[f1],        %[f1],        %[f3]                       \n\t"
+    "add.s      %[f3],        %[f5],        %[f7]                       \n\t"
+    "sub.s      %[f5],        %[f5],        %[f7]                       \n\t"
+    "add.s      %[f7],        %[f8],        %[f2]                       \n\t"
+    "sub.s      %[f8],        %[f8],        %[f2]                       \n\t"
+    "sub.s      %[f2],        %[f1],        %[f4]                       \n\t"
+    "add.s      %[f1],        %[f1],        %[f4]                       \n\t"
+    "add.s      %[f4],        %[f6],        %[f3]                       \n\t"
+    "sub.s      %[f6],        %[f6],        %[f3]                       \n\t"
+    "sub.s      %[f3],        %[f0],        %[f5]                       \n\t"
+    "add.s      %[f0],        %[f0],        %[f5]                       \n\t"
+    "swc1       %[f7],        0(%[a])                                   \n\t"
+    "swc1       %[f8],        16(%[a])                                  \n\t"
+    "swc1       %[f2],        28(%[a])                                  \n\t"
+    "swc1       %[f1],        12(%[a])                                  \n\t"
+    "swc1       %[f4],        4(%[a])                                   \n\t"
+    "swc1       %[f6],        20(%[a])                                  \n\t"
+    "swc1       %[f3],        8(%[a])                                   \n\t"
+    "swc1       %[f0],        24(%[a])                                  \n\t"
+    // second 8
+    "lwc1       %[f0],        32(%[a])                                  \n\t"
+    "lwc1       %[f1],        36(%[a])                                  \n\t"
+    "lwc1       %[f2],        40(%[a])                                  \n\t"
+    "lwc1       %[f3],        44(%[a])                                  \n\t"
+    "lwc1       %[f4],        48(%[a])                                  \n\t"
+    "lwc1       %[f5],        52(%[a])                                  \n\t"
+    "lwc1       %[f6],        56(%[a])                                  \n\t"
+    "lwc1       %[f7],        60(%[a])                                  \n\t"
+    "add.s      %[f8],        %[f4],        %[f6]                       \n\t"
+    "sub.s      %[f4],        %[f4],        %[f6]                       \n\t"
+    "add.s      %[f6],        %[f1],        %[f3]                       \n\t"
+    "sub.s      %[f1],        %[f1],        %[f3]                       \n\t"
+    "add.s      %[f3],        %[f0],        %[f2]                       \n\t"
+    "sub.s      %[f0],        %[f0],        %[f2]                       \n\t"
+    "add.s      %[f2],        %[f5],        %[f7]                       \n\t"
+    "sub.s      %[f5],        %[f5],        %[f7]                       \n\t"
+    "add.s      %[f7],        %[f4],        %[f1]                       \n\t"
+    "sub.s      %[f4],        %[f4],        %[f1]                       \n\t"
+    "add.s      %[f1],        %[f3],        %[f8]                       \n\t"
+    "sub.s      %[f3],        %[f3],        %[f8]                       \n\t"
+    "sub.s      %[f8],        %[f0],        %[f5]                       \n\t"
+    "add.s      %[f0],        %[f0],        %[f5]                       \n\t"
+    "add.s      %[f5],        %[f6],        %[f2]                       \n\t"
+    "sub.s      %[f6],        %[f2],        %[f6]                       \n\t"
+    "lwc1       %[f9],        8(%[rdft_w])                              \n\t"
+    "sub.s      %[f2],        %[f8],        %[f7]                       \n\t"
+    "add.s      %[f8],        %[f8],        %[f7]                       \n\t"
+    "sub.s      %[f7],        %[f4],        %[f0]                       \n\t"
+    "add.s      %[f4],        %[f4],        %[f0]                       \n\t"
+    // prepare for loop
+    "addiu      %[a_ptr],     %[a],         64                          \n\t"
+    "addiu      %[p1_rdft],   %[rdft_w],    8                           \n\t"
+    "addiu      %[p2_rdft],   %[rdft_w],    16                          \n\t"
+    "addiu      %[count],     $zero,        7                           \n\t"
+    // finish second 8
+    "mul.s      %[f2],        %[f9],        %[f2]                       \n\t"
+    "mul.s      %[f8],        %[f9],        %[f8]                       \n\t"
+    "mul.s      %[f7],        %[f9],        %[f7]                       \n\t"
+    "mul.s      %[f4],        %[f9],        %[f4]                       \n\t"
+    "swc1       %[f1],        32(%[a])                                  \n\t"
+    "swc1       %[f3],        52(%[a])                                  \n\t"
+    "swc1       %[f5],        36(%[a])                                  \n\t"
+    "swc1       %[f6],        48(%[a])                                  \n\t"
+    "swc1       %[f2],        40(%[a])                                  \n\t"
+    "swc1       %[f8],        44(%[a])                                  \n\t"
+    "swc1       %[f7],        56(%[a])                                  \n\t"
+    "swc1       %[f4],        60(%[a])                                  \n\t"
+    // loop
+   "1:                                                                  \n\t"
+    "lwc1       %[f0],        0(%[a_ptr])                               \n\t"
+    "lwc1       %[f1],        4(%[a_ptr])                               \n\t"
+    "lwc1       %[f2],        8(%[a_ptr])                               \n\t"
+    "lwc1       %[f3],        12(%[a_ptr])                              \n\t"
+    "lwc1       %[f4],        16(%[a_ptr])                              \n\t"
+    "lwc1       %[f5],        20(%[a_ptr])                              \n\t"
+    "lwc1       %[f6],        24(%[a_ptr])                              \n\t"
+    "lwc1       %[f7],        28(%[a_ptr])                              \n\t"
+    "add.s      %[f8],        %[f0],        %[f2]                       \n\t"
+    "sub.s      %[f0],        %[f0],        %[f2]                       \n\t"
+    "add.s      %[f2],        %[f4],        %[f6]                       \n\t"
+    "sub.s      %[f4],        %[f4],        %[f6]                       \n\t"
+    "add.s      %[f6],        %[f1],        %[f3]                       \n\t"
+    "sub.s      %[f1],        %[f1],        %[f3]                       \n\t"
+    "add.s      %[f3],        %[f5],        %[f7]                       \n\t"
+    "sub.s      %[f5],        %[f5],        %[f7]                       \n\t"
+    "lwc1       %[f10],       4(%[p1_rdft])                             \n\t"
+    "lwc1       %[f11],       0(%[p2_rdft])                             \n\t"
+    "lwc1       %[f12],       4(%[p2_rdft])                             \n\t"
+    "lwc1       %[f13],       8(%[first])                               \n\t"
+    "lwc1       %[f14],       12(%[first])                              \n\t"
+    "add.s      %[f7],        %[f8],        %[f2]                       \n\t"
+    "sub.s      %[f8],        %[f8],        %[f2]                       \n\t"
+    "add.s      %[f2],        %[f6],        %[f3]                       \n\t"
+    "sub.s      %[f6],        %[f6],        %[f3]                       \n\t"
+    "add.s      %[f3],        %[f0],        %[f5]                       \n\t"
+    "sub.s      %[f0],        %[f0],        %[f5]                       \n\t"
+    "add.s      %[f5],        %[f1],        %[f4]                       \n\t"
+    "sub.s      %[f1],        %[f1],        %[f4]                       \n\t"
+    "swc1       %[f7],        0(%[a_ptr])                               \n\t"
+    "swc1       %[f2],        4(%[a_ptr])                               \n\t"
+    "mul.s      %[f4],        %[f9],        %[f8]                       \n\t"
+#if defined(MIPS32_R2_LE)
+    "mul.s      %[f8],        %[f10],       %[f8]                       \n\t"
+    "mul.s      %[f7],        %[f11],       %[f0]                       \n\t"
+    "mul.s      %[f0],        %[f12],       %[f0]                       \n\t"
+    "mul.s      %[f2],        %[f13],       %[f3]                       \n\t"
+    "mul.s      %[f3],        %[f14],       %[f3]                       \n\t"
+    "nmsub.s    %[f4],        %[f4],        %[f10],       %[f6]         \n\t"
+    "madd.s     %[f8],        %[f8],        %[f9],        %[f6]         \n\t"
+    "nmsub.s    %[f7],        %[f7],        %[f12],       %[f5]         \n\t"
+    "madd.s     %[f0],        %[f0],        %[f11],       %[f5]         \n\t"
+    "nmsub.s    %[f2],        %[f2],        %[f14],       %[f1]         \n\t"
+    "madd.s     %[f3],        %[f3],        %[f13],       %[f1]         \n\t"
+#else
+    "mul.s      %[f7],        %[f10],       %[f6]                       \n\t"
+    "mul.s      %[f6],        %[f9],        %[f6]                       \n\t"
+    "mul.s      %[f8],        %[f10],       %[f8]                       \n\t"
+    "mul.s      %[f2],        %[f11],       %[f0]                       \n\t"
+    "mul.s      %[f11],       %[f11],       %[f5]                       \n\t"
+    "mul.s      %[f5],        %[f12],       %[f5]                       \n\t"
+    "mul.s      %[f0],        %[f12],       %[f0]                       \n\t"
+    "mul.s      %[f12],       %[f13],       %[f3]                       \n\t"
+    "mul.s      %[f13],       %[f13],       %[f1]                       \n\t"
+    "mul.s      %[f1],        %[f14],       %[f1]                       \n\t"
+    "mul.s      %[f3],        %[f14],       %[f3]                       \n\t"
+    "sub.s      %[f4],        %[f4],        %[f7]                       \n\t"
+    "add.s      %[f8],        %[f6],        %[f8]                       \n\t"
+    "sub.s      %[f7],        %[f2],        %[f5]                       \n\t"
+    "add.s      %[f0],        %[f11],       %[f0]                       \n\t"
+    "sub.s      %[f2],        %[f12],       %[f1]                       \n\t"
+    "add.s      %[f3],        %[f13],       %[f3]                       \n\t"
+#endif
+    "swc1       %[f4],        16(%[a_ptr])                              \n\t"
+    "swc1       %[f8],        20(%[a_ptr])                              \n\t"
+    "swc1       %[f7],        8(%[a_ptr])                               \n\t"
+    "swc1       %[f0],        12(%[a_ptr])                              \n\t"
+    "swc1       %[f2],        24(%[a_ptr])                              \n\t"
+    "swc1       %[f3],        28(%[a_ptr])                              \n\t"
+    "lwc1       %[f0],        32(%[a_ptr])                              \n\t"
+    "lwc1       %[f1],        36(%[a_ptr])                              \n\t"
+    "lwc1       %[f2],        40(%[a_ptr])                              \n\t"
+    "lwc1       %[f3],        44(%[a_ptr])                              \n\t"
+    "lwc1       %[f4],        48(%[a_ptr])                              \n\t"
+    "lwc1       %[f5],        52(%[a_ptr])                              \n\t"
+    "lwc1       %[f6],        56(%[a_ptr])                              \n\t"
+    "lwc1       %[f7],        60(%[a_ptr])                              \n\t"
+    "add.s      %[f8],        %[f0],        %[f2]                       \n\t"
+    "sub.s      %[f0],        %[f0],        %[f2]                       \n\t"
+    "add.s      %[f2],        %[f4],        %[f6]                       \n\t"
+    "sub.s      %[f4],        %[f4],        %[f6]                       \n\t"
+    "add.s      %[f6],        %[f1],        %[f3]                       \n\t"
+    "sub.s      %[f1],        %[f1],        %[f3]                       \n\t"
+    "add.s      %[f3],        %[f5],        %[f7]                       \n\t"
+    "sub.s      %[f5],        %[f5],        %[f7]                       \n\t"
+    "lwc1       %[f11],       8(%[p2_rdft])                             \n\t"
+    "lwc1       %[f12],       12(%[p2_rdft])                            \n\t"
+    "lwc1       %[f13],       8(%[second])                              \n\t"
+    "lwc1       %[f14],       12(%[second])                             \n\t"
+    "add.s      %[f7],        %[f8],        %[f2]                       \n\t"
+    "sub.s      %[f8],        %[f2],        %[f8]                       \n\t"
+    "add.s      %[f2],        %[f6],        %[f3]                       \n\t"
+    "sub.s      %[f6],        %[f3],        %[f6]                       \n\t"
+    "add.s      %[f3],        %[f0],        %[f5]                       \n\t"
+    "sub.s      %[f0],        %[f0],        %[f5]                       \n\t"
+    "add.s      %[f5],        %[f1],        %[f4]                       \n\t"
+    "sub.s      %[f1],        %[f1],        %[f4]                       \n\t"
+    "swc1       %[f7],        32(%[a_ptr])                              \n\t"
+    "swc1       %[f2],        36(%[a_ptr])                              \n\t"
+    "mul.s      %[f4],        %[f10],       %[f8]                       \n\t"
+#if defined(MIPS32_R2_LE)
+    "mul.s      %[f10],       %[f10],       %[f6]                       \n\t"
+    "mul.s      %[f7],        %[f11],       %[f0]                       \n\t"
+    "mul.s      %[f11],       %[f11],       %[f5]                       \n\t"
+    "mul.s      %[f2],        %[f13],       %[f3]                       \n\t"
+    "mul.s      %[f13],       %[f13],       %[f1]                       \n\t"
+    "madd.s     %[f4],        %[f4],        %[f9],        %[f6]         \n\t"
+    "nmsub.s    %[f10],       %[f10],       %[f9],        %[f8]         \n\t"
+    "nmsub.s    %[f7],        %[f7],        %[f12],       %[f5]         \n\t"
+    "madd.s     %[f11],       %[f11],       %[f12],       %[f0]         \n\t"
+    "nmsub.s    %[f2],        %[f2],        %[f14],       %[f1]         \n\t"
+    "madd.s     %[f13],       %[f13],       %[f14],       %[f3]         \n\t"
+#else
+    "mul.s      %[f2],        %[f9],        %[f6]                       \n\t"
+    "mul.s      %[f10],       %[f10],       %[f6]                       \n\t"
+    "mul.s      %[f9],        %[f9],        %[f8]                       \n\t"
+    "mul.s      %[f7],        %[f11],       %[f0]                       \n\t"
+    "mul.s      %[f8],        %[f12],       %[f5]                       \n\t"
+    "mul.s      %[f11],       %[f11],       %[f5]                       \n\t"
+    "mul.s      %[f12],       %[f12],       %[f0]                       \n\t"
+    "mul.s      %[f5],        %[f13],       %[f3]                       \n\t"
+    "mul.s      %[f0],        %[f14],       %[f1]                       \n\t"
+    "mul.s      %[f13],       %[f13],       %[f1]                       \n\t"
+    "mul.s      %[f14],       %[f14],       %[f3]                       \n\t"
+    "add.s      %[f4],        %[f4],        %[f2]                       \n\t"
+    "sub.s      %[f10],       %[f10],       %[f9]                       \n\t"
+    "sub.s      %[f7],        %[f7],        %[f8]                       \n\t"
+    "add.s      %[f11],       %[f11],       %[f12]                      \n\t"
+    "sub.s      %[f2],        %[f5],        %[f0]                       \n\t"
+    "add.s      %[f13],       %[f13],       %[f14]                      \n\t"
+#endif
+    "swc1       %[f4],        48(%[a_ptr])                              \n\t"
+    "swc1       %[f10],       52(%[a_ptr])                              \n\t"
+    "swc1       %[f7],        40(%[a_ptr])                              \n\t"
+    "swc1       %[f11],       44(%[a_ptr])                              \n\t"
+    "swc1       %[f2],        56(%[a_ptr])                              \n\t"
+    "swc1       %[f13],       60(%[a_ptr])                              \n\t"
+    "addiu      %[count],     %[count],     -1                          \n\t"
+    "lwc1       %[f9],        8(%[p1_rdft])                             \n\t"
+    "addiu      %[a_ptr],     %[a_ptr],     64                          \n\t"
+    "addiu      %[p1_rdft],   %[p1_rdft],   8                           \n\t"
+    "addiu      %[p2_rdft],   %[p2_rdft],   16                          \n\t"
+    "addiu      %[first],     %[first],     8                           \n\t"
+    "bgtz       %[count],     1b                                        \n\t"
+    " addiu     %[second],    %[second],    8                           \n\t"
+    ".set       pop                                                     \n\t"
+    : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
+      [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
+      [f8] "=&f" (f8), [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11),
+      [f12] "=&f" (f12), [f13] "=&f" (f13), [f14] "=&f" (f14),
+      [a_ptr] "=&r" (a_ptr), [p1_rdft] "=&r" (p1_rdft), [first] "+r" (first),
+      [p2_rdft] "=&r" (p2_rdft), [count] "=&r" (count), [second] "+r" (second)
+    : [a] "r" (a), [rdft_w] "r" (rdft_w)
+    : "memory"
+  );
+}
+
+static void cftmdl_128_mips(float* a) {
+  float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14;
+  int tmp_a, count;
+  __asm __volatile (
+    ".set       push                                      \n\t"
+    ".set       noreorder                                 \n\t"
+    "addiu      %[tmp_a],   %[a],         0               \n\t"
+    "addiu      %[count],   $zero,        4               \n\t"
+   "1:                                                    \n\t"
+    "addiu      %[count],   %[count],     -1              \n\t"
+    "lwc1       %[f0],      0(%[tmp_a])                   \n\t"
+    "lwc1       %[f2],      32(%[tmp_a])                  \n\t"
+    "lwc1       %[f4],      64(%[tmp_a])                  \n\t"
+    "lwc1       %[f6],      96(%[tmp_a])                  \n\t"
+    "lwc1       %[f1],      4(%[tmp_a])                   \n\t"
+    "lwc1       %[f3],      36(%[tmp_a])                  \n\t"
+    "lwc1       %[f5],      68(%[tmp_a])                  \n\t"
+    "lwc1       %[f7],      100(%[tmp_a])                 \n\t"
+    "add.s      %[f8],      %[f0],        %[f2]           \n\t"
+    "sub.s      %[f0],      %[f0],        %[f2]           \n\t"
+    "add.s      %[f2],      %[f4],        %[f6]           \n\t"
+    "sub.s      %[f4],      %[f4],        %[f6]           \n\t"
+    "add.s      %[f6],      %[f1],        %[f3]           \n\t"
+    "sub.s      %[f1],      %[f1],        %[f3]           \n\t"
+    "add.s      %[f3],      %[f5],        %[f7]           \n\t"
+    "sub.s      %[f5],      %[f5],        %[f7]           \n\t"
+    "add.s      %[f7],      %[f8],        %[f2]           \n\t"
+    "sub.s      %[f8],      %[f8],        %[f2]           \n\t"
+    "add.s      %[f2],      %[f1],        %[f4]           \n\t"
+    "sub.s      %[f1],      %[f1],        %[f4]           \n\t"
+    "add.s      %[f4],      %[f6],        %[f3]           \n\t"
+    "sub.s      %[f6],      %[f6],        %[f3]           \n\t"
+    "sub.s      %[f3],      %[f0],        %[f5]           \n\t"
+    "add.s      %[f0],      %[f0],        %[f5]           \n\t"
+    "swc1       %[f7],      0(%[tmp_a])                   \n\t"
+    "swc1       %[f8],      64(%[tmp_a])                  \n\t"
+    "swc1       %[f2],      36(%[tmp_a])                  \n\t"
+    "swc1       %[f1],      100(%[tmp_a])                 \n\t"
+    "swc1       %[f4],      4(%[tmp_a])                   \n\t"
+    "swc1       %[f6],      68(%[tmp_a])                  \n\t"
+    "swc1       %[f3],      32(%[tmp_a])                  \n\t"
+    "swc1       %[f0],      96(%[tmp_a])                  \n\t"
+    "bgtz       %[count],   1b                            \n\t"
+    " addiu     %[tmp_a],   %[tmp_a],     8               \n\t"
+    ".set       pop                                       \n\t"
+    : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
+      [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
+      [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count)
+    : [a] "r" (a)
+    : "memory"
+  );
+  f9 = rdft_w[2];
+  __asm __volatile (
+    ".set       push                                      \n\t"
+    ".set       noreorder                                 \n\t"
+    "addiu      %[tmp_a],   %[a],         128             \n\t"
+    "addiu      %[count],   $zero,        4               \n\t"
+   "1:                                                    \n\t"
+    "addiu      %[count],   %[count],     -1              \n\t"
+    "lwc1       %[f0],      0(%[tmp_a])                   \n\t"
+    "lwc1       %[f2],      32(%[tmp_a])                  \n\t"
+    "lwc1       %[f5],      68(%[tmp_a])                  \n\t"
+    "lwc1       %[f7],      100(%[tmp_a])                 \n\t"
+    "lwc1       %[f1],      4(%[tmp_a])                   \n\t"
+    "lwc1       %[f3],      36(%[tmp_a])                  \n\t"
+    "lwc1       %[f4],      64(%[tmp_a])                  \n\t"
+    "lwc1       %[f6],      96(%[tmp_a])                  \n\t"
+    "sub.s      %[f8],      %[f0],        %[f2]           \n\t"
+    "add.s      %[f0],      %[f0],        %[f2]           \n\t"
+    "sub.s      %[f2],      %[f5],        %[f7]           \n\t"
+    "add.s      %[f5],      %[f5],        %[f7]           \n\t"
+    "sub.s      %[f7],      %[f1],        %[f3]           \n\t"
+    "add.s      %[f1],      %[f1],        %[f3]           \n\t"
+    "sub.s      %[f3],      %[f4],        %[f6]           \n\t"
+    "add.s      %[f4],      %[f4],        %[f6]           \n\t"
+    "sub.s      %[f6],      %[f8],        %[f2]           \n\t"
+    "add.s      %[f8],      %[f8],        %[f2]           \n\t"
+    "add.s      %[f2],      %[f5],        %[f1]           \n\t"
+    "sub.s      %[f5],      %[f5],        %[f1]           \n\t"
+    "add.s      %[f1],      %[f3],        %[f7]           \n\t"
+    "sub.s      %[f3],      %[f3],        %[f7]           \n\t"
+    "add.s      %[f7],      %[f0],        %[f4]           \n\t"
+    "sub.s      %[f0],      %[f0],        %[f4]           \n\t"
+    "sub.s      %[f4],      %[f6],        %[f1]           \n\t"
+    "add.s      %[f6],      %[f6],        %[f1]           \n\t"
+    "sub.s      %[f1],      %[f3],        %[f8]           \n\t"
+    "add.s      %[f3],      %[f3],        %[f8]           \n\t"
+    "mul.s      %[f4],      %[f4],        %[f9]           \n\t"
+    "mul.s      %[f6],      %[f6],        %[f9]           \n\t"
+    "mul.s      %[f1],      %[f1],        %[f9]           \n\t"
+    "mul.s      %[f3],      %[f3],        %[f9]           \n\t"
+    "swc1       %[f7],      0(%[tmp_a])                   \n\t"
+    "swc1       %[f2],      4(%[tmp_a])                   \n\t"
+    "swc1       %[f5],      64(%[tmp_a])                  \n\t"
+    "swc1       %[f0],      68(%[tmp_a])                  \n\t"
+    "swc1       %[f4],      32(%[tmp_a])                  \n\t"
+    "swc1       %[f6],      36(%[tmp_a])                  \n\t"
+    "swc1       %[f1],      96(%[tmp_a])                  \n\t"
+    "swc1       %[f3],      100(%[tmp_a])                 \n\t"
+    "bgtz       %[count],   1b                            \n\t"
+    " addiu     %[tmp_a],   %[tmp_a],     8               \n\t"
+    ".set       pop                                       \n\t"
+    : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
+      [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
+      [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count)
+    : [a] "r" (a), [f9] "f" (f9)
+    : "memory"
+  );
+  f10 = rdft_w[3];
+  f11 = rdft_w[4];
+  f12 = rdft_w[5];
+  f13 = rdft_wk3ri_first[2];
+  f14 = rdft_wk3ri_first[3];
+
+  __asm __volatile (
+    ".set       push                                                    \n\t"
+    ".set       noreorder                                               \n\t"
+    "addiu      %[tmp_a],     %[a],         256                         \n\t"
+    "addiu      %[count],     $zero,        4                           \n\t"
+   "1:                                                                  \n\t"
+    "addiu      %[count],     %[count],     -1                          \n\t"
+    "lwc1       %[f0],        0(%[tmp_a])                               \n\t"
+    "lwc1       %[f2],        32(%[tmp_a])                              \n\t"
+    "lwc1       %[f4],        64(%[tmp_a])                              \n\t"
+    "lwc1       %[f6],        96(%[tmp_a])                              \n\t"
+    "lwc1       %[f1],        4(%[tmp_a])                               \n\t"
+    "lwc1       %[f3],        36(%[tmp_a])                              \n\t"
+    "lwc1       %[f5],        68(%[tmp_a])                              \n\t"
+    "lwc1       %[f7],        100(%[tmp_a])                             \n\t"
+    "add.s      %[f8],        %[f0],        %[f2]                       \n\t"
+    "sub.s      %[f0],        %[f0],        %[f2]                       \n\t"
+    "add.s      %[f2],        %[f4],        %[f6]                       \n\t"
+    "sub.s      %[f4],        %[f4],        %[f6]                       \n\t"
+    "add.s      %[f6],        %[f1],        %[f3]                       \n\t"
+    "sub.s      %[f1],        %[f1],        %[f3]                       \n\t"
+    "add.s      %[f3],        %[f5],        %[f7]                       \n\t"
+    "sub.s      %[f5],        %[f5],        %[f7]                       \n\t"
+    "sub.s      %[f7],        %[f8],        %[f2]                       \n\t"
+    "add.s      %[f8],        %[f8],        %[f2]                       \n\t"
+    "add.s      %[f2],        %[f1],        %[f4]                       \n\t"
+    "sub.s      %[f1],        %[f1],        %[f4]                       \n\t"
+    "sub.s      %[f4],        %[f6],        %[f3]                       \n\t"
+    "add.s      %[f6],        %[f6],        %[f3]                       \n\t"
+    "sub.s      %[f3],        %[f0],        %[f5]                       \n\t"
+    "add.s      %[f0],        %[f0],        %[f5]                       \n\t"
+    "swc1       %[f8],        0(%[tmp_a])                               \n\t"
+    "swc1       %[f6],        4(%[tmp_a])                               \n\t"
+    "mul.s      %[f5],        %[f9],        %[f7]                       \n\t"
+#if defined(MIPS32_R2_LE)
+    "mul.s      %[f7],        %[f10],       %[f7]                       \n\t"
+    "mul.s      %[f8],        %[f11],       %[f3]                       \n\t"
+    "mul.s      %[f3],        %[f12],       %[f3]                       \n\t"
+    "mul.s      %[f6],        %[f13],       %[f0]                       \n\t"
+    "mul.s      %[f0],        %[f14],       %[f0]                       \n\t"
+    "nmsub.s    %[f5],        %[f5],        %[f10],       %[f4]         \n\t"
+    "madd.s     %[f7],        %[f7],        %[f9],        %[f4]         \n\t"
+    "nmsub.s    %[f8],        %[f8],        %[f12],       %[f2]         \n\t"
+    "madd.s     %[f3],        %[f3],        %[f11],       %[f2]         \n\t"
+    "nmsub.s    %[f6],        %[f6],        %[f14],       %[f1]         \n\t"
+    "madd.s     %[f0],        %[f0],        %[f13],       %[f1]         \n\t"
+    "swc1       %[f5],        64(%[tmp_a])                              \n\t"
+    "swc1       %[f7],        68(%[tmp_a])                              \n\t"
+#else
+    "mul.s      %[f8],        %[f10],       %[f4]                       \n\t"
+    "mul.s      %[f4],        %[f9],        %[f4]                       \n\t"
+    "mul.s      %[f7],        %[f10],       %[f7]                       \n\t"
+    "mul.s      %[f6],        %[f11],       %[f3]                       \n\t"
+    "mul.s      %[f3],        %[f12],       %[f3]                       \n\t"
+    "sub.s      %[f5],        %[f5],        %[f8]                       \n\t"
+    "mul.s      %[f8],        %[f12],       %[f2]                       \n\t"
+    "mul.s      %[f2],        %[f11],       %[f2]                       \n\t"
+    "add.s      %[f7],        %[f4],        %[f7]                       \n\t"
+    "mul.s      %[f4],        %[f13],       %[f0]                       \n\t"
+    "mul.s      %[f0],        %[f14],       %[f0]                       \n\t"
+    "sub.s      %[f8],        %[f6],        %[f8]                       \n\t"
+    "mul.s      %[f6],        %[f14],       %[f1]                       \n\t"
+    "mul.s      %[f1],        %[f13],       %[f1]                       \n\t"
+    "add.s      %[f3],        %[f2],        %[f3]                       \n\t"
+    "swc1       %[f5],        64(%[tmp_a])                              \n\t"
+    "swc1       %[f7],        68(%[tmp_a])                              \n\t"
+    "sub.s      %[f6],        %[f4],        %[f6]                       \n\t"
+    "add.s      %[f0],        %[f1],        %[f0]                       \n\t"
+#endif
+    "swc1       %[f8],        32(%[tmp_a])                              \n\t"
+    "swc1       %[f3],        36(%[tmp_a])                              \n\t"
+    "swc1       %[f6],        96(%[tmp_a])                              \n\t"
+    "swc1       %[f0],        100(%[tmp_a])                             \n\t"
+    "bgtz       %[count],     1b                                        \n\t"
+    " addiu     %[tmp_a],     %[tmp_a],     8                           \n\t"
+    ".set       pop                                                     \n\t"
+    : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
+      [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
+      [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count)
+    : [a] "r" (a),  [f9] "f" (f9), [f10] "f" (f10), [f11] "f" (f11),
+      [f12] "f" (f12), [f13] "f" (f13), [f14] "f" (f14)
+    : "memory"
+  );
+  f11 = rdft_w[6];
+  f12 = rdft_w[7];
+  f13 = rdft_wk3ri_second[2];
+  f14 = rdft_wk3ri_second[3];
+  __asm __volatile (
+    ".set       push                                                       \n\t"
+    ".set       noreorder                                                  \n\t"
+    "addiu      %[tmp_a],       %[a],           384                        \n\t"
+    "addiu      %[count],       $zero,          4                          \n\t"
+   "1:                                                                     \n\t"
+    "addiu      %[count],       %[count],       -1                         \n\t"
+    "lwc1       %[f0],          0(%[tmp_a])                                \n\t"
+    "lwc1       %[f1],          4(%[tmp_a])                                \n\t"
+    "lwc1       %[f2],          32(%[tmp_a])                               \n\t"
+    "lwc1       %[f3],          36(%[tmp_a])                               \n\t"
+    "lwc1       %[f4],          64(%[tmp_a])                               \n\t"
+    "lwc1       %[f5],          68(%[tmp_a])                               \n\t"
+    "lwc1       %[f6],          96(%[tmp_a])                               \n\t"
+    "lwc1       %[f7],          100(%[tmp_a])                              \n\t"
+    "add.s      %[f8],          %[f0],          %[f2]                      \n\t"
+    "sub.s      %[f0],          %[f0],          %[f2]                      \n\t"
+    "add.s      %[f2],          %[f4],          %[f6]                      \n\t"
+    "sub.s      %[f4],          %[f4],          %[f6]                      \n\t"
+    "add.s      %[f6],          %[f1],          %[f3]                      \n\t"
+    "sub.s      %[f1],          %[f1],          %[f3]                      \n\t"
+    "add.s      %[f3],          %[f5],          %[f7]                      \n\t"
+    "sub.s      %[f5],          %[f5],          %[f7]                      \n\t"
+    "sub.s      %[f7],          %[f2],          %[f8]                      \n\t"
+    "add.s      %[f2],          %[f2],          %[f8]                      \n\t"
+    "add.s      %[f8],          %[f1],          %[f4]                      \n\t"
+    "sub.s      %[f1],          %[f1],          %[f4]                      \n\t"
+    "sub.s      %[f4],          %[f3],          %[f6]                      \n\t"
+    "add.s      %[f3],          %[f3],          %[f6]                      \n\t"
+    "sub.s      %[f6],          %[f0],          %[f5]                      \n\t"
+    "add.s      %[f0],          %[f0],          %[f5]                      \n\t"
+    "swc1       %[f2],          0(%[tmp_a])                                \n\t"
+    "swc1       %[f3],          4(%[tmp_a])                                \n\t"
+    "mul.s      %[f5],          %[f10],         %[f7]                      \n\t"
+#if defined(MIPS32_R2_LE)
+    "mul.s      %[f7],          %[f9],          %[f7]                      \n\t"
+    "mul.s      %[f2],          %[f12],         %[f8]                      \n\t"
+    "mul.s      %[f8],          %[f11],         %[f8]                      \n\t"
+    "mul.s      %[f3],          %[f14],         %[f1]                      \n\t"
+    "mul.s      %[f1],          %[f13],         %[f1]                      \n\t"
+    "madd.s     %[f5],          %[f5],          %[f9],       %[f4]         \n\t"
+    "msub.s     %[f7],          %[f7],          %[f10],      %[f4]         \n\t"
+    "msub.s     %[f2],          %[f2],          %[f11],      %[f6]         \n\t"
+    "madd.s     %[f8],          %[f8],          %[f12],      %[f6]         \n\t"
+    "msub.s     %[f3],          %[f3],          %[f13],      %[f0]         \n\t"
+    "madd.s     %[f1],          %[f1],          %[f14],      %[f0]         \n\t"
+    "swc1       %[f5],          64(%[tmp_a])                               \n\t"
+    "swc1       %[f7],          68(%[tmp_a])                               \n\t"
+#else
+    "mul.s      %[f2],          %[f9],          %[f4]                      \n\t"
+    "mul.s      %[f4],          %[f10],         %[f4]                      \n\t"
+    "mul.s      %[f7],          %[f9],          %[f7]                      \n\t"
+    "mul.s      %[f3],          %[f11],         %[f6]                      \n\t"
+    "mul.s      %[f6],          %[f12],         %[f6]                      \n\t"
+    "add.s      %[f5],          %[f5],          %[f2]                      \n\t"
+    "sub.s      %[f7],          %[f4],          %[f7]                      \n\t"
+    "mul.s      %[f2],          %[f12],         %[f8]                      \n\t"
+    "mul.s      %[f8],          %[f11],         %[f8]                      \n\t"
+    "mul.s      %[f4],          %[f14],         %[f1]                      \n\t"
+    "mul.s      %[f1],          %[f13],         %[f1]                      \n\t"
+    "sub.s      %[f2],          %[f3],          %[f2]                      \n\t"
+    "mul.s      %[f3],          %[f13],         %[f0]                      \n\t"
+    "mul.s      %[f0],          %[f14],         %[f0]                      \n\t"
+    "add.s      %[f8],          %[f8],          %[f6]                      \n\t"
+    "swc1       %[f5],          64(%[tmp_a])                               \n\t"
+    "swc1       %[f7],          68(%[tmp_a])                               \n\t"
+    "sub.s      %[f3],          %[f3],          %[f4]                      \n\t"
+    "add.s      %[f1],          %[f1],          %[f0]                      \n\t"
+#endif
+    "swc1       %[f2],          32(%[tmp_a])                               \n\t"
+    "swc1       %[f8],          36(%[tmp_a])                               \n\t"
+    "swc1       %[f3],          96(%[tmp_a])                               \n\t"
+    "swc1       %[f1],          100(%[tmp_a])                              \n\t"
+    "bgtz       %[count],       1b                                         \n\t"
+    " addiu     %[tmp_a],       %[tmp_a],       8                          \n\t"
+    ".set       pop                                                        \n\t"
+    : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
+      [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
+      [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count)
+    : [a] "r" (a), [f9] "f" (f9), [f10] "f" (f10), [f11] "f" (f11),
+      [f12] "f" (f12), [f13] "f" (f13), [f14] "f" (f14)
+    : "memory"
+  );
+}
+
+static void cftfsub_128_mips(float* a) {
+  float f0, f1, f2, f3, f4, f5, f6, f7, f8;
+  int tmp_a, count;
+
+  cft1st_128(a);
+  cftmdl_128(a);
+
+  __asm __volatile (
+    ".set       push                                      \n\t"
+    ".set       noreorder                                 \n\t"
+    "addiu      %[tmp_a],       %[a],         0           \n\t"
+    "addiu      %[count],       $zero,        16          \n\t"
+   "1:                                                    \n\t"
+    "addiu      %[count],       %[count],     -1          \n\t"
+    "lwc1       %[f0],          0(%[tmp_a])               \n\t"
+    "lwc1       %[f2],          128(%[tmp_a])             \n\t"
+    "lwc1       %[f4],          256(%[tmp_a])             \n\t"
+    "lwc1       %[f6],          384(%[tmp_a])             \n\t"
+    "lwc1       %[f1],          4(%[tmp_a])               \n\t"
+    "lwc1       %[f3],          132(%[tmp_a])             \n\t"
+    "lwc1       %[f5],          260(%[tmp_a])             \n\t"
+    "lwc1       %[f7],          388(%[tmp_a])             \n\t"
+    "add.s      %[f8],          %[f0],        %[f2]       \n\t"
+    "sub.s      %[f0],          %[f0],        %[f2]       \n\t"
+    "add.s      %[f2],          %[f4],        %[f6]       \n\t"
+    "sub.s      %[f4],          %[f4],        %[f6]       \n\t"
+    "add.s      %[f6],          %[f1],        %[f3]       \n\t"
+    "sub.s      %[f1],          %[f1],        %[f3]       \n\t"
+    "add.s      %[f3],          %[f5],        %[f7]       \n\t"
+    "sub.s      %[f5],          %[f5],        %[f7]       \n\t"
+    "add.s      %[f7],          %[f8],        %[f2]       \n\t"
+    "sub.s      %[f8],          %[f8],        %[f2]       \n\t"
+    "add.s      %[f2],          %[f1],        %[f4]       \n\t"
+    "sub.s      %[f1],          %[f1],        %[f4]       \n\t"
+    "add.s      %[f4],          %[f6],        %[f3]       \n\t"
+    "sub.s      %[f6],          %[f6],        %[f3]       \n\t"
+    "sub.s      %[f3],          %[f0],        %[f5]       \n\t"
+    "add.s      %[f0],          %[f0],        %[f5]       \n\t"
+    "swc1       %[f7],          0(%[tmp_a])               \n\t"
+    "swc1       %[f8],          256(%[tmp_a])             \n\t"
+    "swc1       %[f2],          132(%[tmp_a])             \n\t"
+    "swc1       %[f1],          388(%[tmp_a])             \n\t"
+    "swc1       %[f4],          4(%[tmp_a])               \n\t"
+    "swc1       %[f6],          260(%[tmp_a])             \n\t"
+    "swc1       %[f3],          128(%[tmp_a])             \n\t"
+    "swc1       %[f0],          384(%[tmp_a])             \n\t"
+    "bgtz       %[count],       1b                        \n\t"
+    " addiu     %[tmp_a],       %[tmp_a],   8             \n\t"
+    ".set       pop                                       \n\t"
+    : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
+      [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
+      [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a),
+      [count] "=&r" (count)
+    : [a] "r" (a)
+    : "memory"
+  );
+}
+
+static void cftbsub_128_mips(float* a) {
+  float f0, f1, f2, f3, f4, f5, f6, f7, f8;
+  int tmp_a, count;
+
+  cft1st_128(a);
+  cftmdl_128(a);
+
+  __asm __volatile (
+    ".set       push                                        \n\t"
+    ".set       noreorder                                   \n\t"
+    "addiu      %[tmp_a],   %[a],           0               \n\t"
+    "addiu      %[count],   $zero,          16              \n\t"
+   "1:                                                      \n\t"
+    "addiu      %[count],   %[count],       -1              \n\t"
+    "lwc1       %[f0],      0(%[tmp_a])                     \n\t"
+    "lwc1       %[f2],      128(%[tmp_a])                   \n\t"
+    "lwc1       %[f4],      256(%[tmp_a])                   \n\t"
+    "lwc1       %[f6],      384(%[tmp_a])                   \n\t"
+    "lwc1       %[f1],      4(%[tmp_a])                     \n\t"
+    "lwc1       %[f3],      132(%[tmp_a])                   \n\t"
+    "lwc1       %[f5],      260(%[tmp_a])                   \n\t"
+    "lwc1       %[f7],      388(%[tmp_a])                   \n\t"
+    "add.s      %[f8],      %[f0],          %[f2]           \n\t"
+    "sub.s      %[f0],      %[f0],          %[f2]           \n\t"
+    "add.s      %[f2],      %[f4],          %[f6]           \n\t"
+    "sub.s      %[f4],      %[f4],          %[f6]           \n\t"
+    "add.s      %[f6],      %[f1],          %[f3]           \n\t"
+    "sub.s      %[f1],      %[f3],          %[f1]           \n\t"
+    "add.s      %[f3],      %[f5],          %[f7]           \n\t"
+    "sub.s      %[f5],      %[f5],          %[f7]           \n\t"
+    "add.s      %[f7],      %[f8],          %[f2]           \n\t"
+    "sub.s      %[f8],      %[f8],          %[f2]           \n\t"
+    "sub.s      %[f2],      %[f1],          %[f4]           \n\t"
+    "add.s      %[f1],      %[f1],          %[f4]           \n\t"
+    "add.s      %[f4],      %[f3],          %[f6]           \n\t"
+    "sub.s      %[f6],      %[f3],          %[f6]           \n\t"
+    "sub.s      %[f3],      %[f0],          %[f5]           \n\t"
+    "add.s      %[f0],      %[f0],          %[f5]           \n\t"
+    "neg.s      %[f4],      %[f4]                           \n\t"
+    "swc1       %[f7],      0(%[tmp_a])                     \n\t"
+    "swc1       %[f8],      256(%[tmp_a])                   \n\t"
+    "swc1       %[f2],      132(%[tmp_a])                   \n\t"
+    "swc1       %[f1],      388(%[tmp_a])                   \n\t"
+    "swc1       %[f6],      260(%[tmp_a])                   \n\t"
+    "swc1       %[f3],      128(%[tmp_a])                   \n\t"
+    "swc1       %[f0],      384(%[tmp_a])                   \n\t"
+    "swc1       %[f4],       4(%[tmp_a])                     \n\t"
+    "bgtz       %[count],   1b                              \n\t"
+    " addiu     %[tmp_a],   %[tmp_a],       8               \n\t"
+    ".set       pop                                         \n\t"
+    : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3),
+      [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7),
+      [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count)
+    : [a] "r" (a)
+    : "memory"
+  );
+}
+
+static void rftfsub_128_mips(float* a) {
+  const float* c = rdft_w + 32;
+  const float f0 = 0.5f;
+  float* a1 = &a[2];
+  float* a2 = &a[126];
+  const float* c1 = &c[1];
+  const float* c2 = &c[31];
+  float f1, f2, f3 ,f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15;
+  int count;
+
+  __asm __volatile (
+    ".set      push                                             \n\t"
+    ".set      noreorder                                        \n\t"
+    "lwc1      %[f6],       0(%[c2])                            \n\t"
+    "lwc1      %[f1],       0(%[a1])                            \n\t"
+    "lwc1      %[f2],       0(%[a2])                            \n\t"
+    "lwc1      %[f3],       4(%[a1])                            \n\t"
+    "lwc1      %[f4],       4(%[a2])                            \n\t"
+    "lwc1      %[f5],       0(%[c1])                            \n\t"
+    "sub.s     %[f6],       %[f0],        %[f6]                 \n\t"
+    "sub.s     %[f7],       %[f1],        %[f2]                 \n\t"
+    "add.s     %[f8],       %[f3],        %[f4]                 \n\t"
+    "addiu     %[count],    $zero,        15                    \n\t"
+    "mul.s     %[f9],       %[f6],        %[f7]                 \n\t"
+    "mul.s     %[f6],       %[f6],        %[f8]                 \n\t"
+#if !defined(MIPS32_R2_LE)
+    "mul.s     %[f8],       %[f5],        %[f8]                 \n\t"
+    "mul.s     %[f5],       %[f5],        %[f7]                 \n\t"
+    "sub.s     %[f9],       %[f9],        %[f8]                 \n\t"
+    "add.s     %[f6],       %[f6],        %[f5]                 \n\t"
+#else
+    "nmsub.s   %[f9],       %[f9],        %[f5],      %[f8]     \n\t"
+    "madd.s    %[f6],       %[f6],        %[f5],      %[f7]     \n\t"
+#endif
+    "sub.s     %[f1],       %[f1],        %[f9]                 \n\t"
+    "add.s     %[f2],       %[f2],        %[f9]                 \n\t"
+    "sub.s     %[f3],       %[f3],        %[f6]                 \n\t"
+    "sub.s     %[f4],       %[f4],        %[f6]                 \n\t"
+    "swc1      %[f1],       0(%[a1])                            \n\t"
+    "swc1      %[f2],       0(%[a2])                            \n\t"
+    "swc1      %[f3],       4(%[a1])                            \n\t"
+    "swc1      %[f4],       4(%[a2])                            \n\t"
+    "addiu     %[a1],       %[a1],        8                     \n\t"
+    "addiu     %[a2],       %[a2],        -8                    \n\t"
+    "addiu     %[c1],       %[c1],        4                     \n\t"
+    "addiu     %[c2],       %[c2],        -4                    \n\t"
+   "1:                                                          \n\t"
+    "lwc1      %[f6],       0(%[c2])                            \n\t"
+    "lwc1      %[f1],       0(%[a1])                            \n\t"
+    "lwc1      %[f2],       0(%[a2])                            \n\t"
+    "lwc1      %[f3],       4(%[a1])                            \n\t"
+    "lwc1      %[f4],       4(%[a2])                            \n\t"
+    "lwc1      %[f5],       0(%[c1])                            \n\t"
+    "sub.s     %[f6],       %[f0],        %[f6]                 \n\t"
+    "sub.s     %[f7],       %[f1],        %[f2]                 \n\t"
+    "add.s     %[f8],       %[f3],        %[f4]                 \n\t"
+    "lwc1      %[f10],      -4(%[c2])                           \n\t"
+    "lwc1      %[f11],      8(%[a1])                            \n\t"
+    "lwc1      %[f12],      -8(%[a2])                           \n\t"
+    "mul.s     %[f9],       %[f6],        %[f7]                 \n\t"
+    "mul.s     %[f6],       %[f6],        %[f8]                 \n\t"
+#if !defined(MIPS32_R2_LE)
+    "mul.s     %[f8],       %[f5],        %[f8]                 \n\t"
+    "mul.s     %[f5],       %[f5],        %[f7]                 \n\t"
+    "lwc1      %[f13],      12(%[a1])                           \n\t"
+    "lwc1      %[f14],      -4(%[a2])                           \n\t"
+    "lwc1      %[f15],      4(%[c1])                            \n\t"
+    "sub.s     %[f9],       %[f9],        %[f8]                 \n\t"
+    "add.s     %[f6],       %[f6],        %[f5]                 \n\t"
+#else
+    "lwc1      %[f13],      12(%[a1])                           \n\t"
+    "lwc1      %[f14],      -4(%[a2])                           \n\t"
+    "lwc1      %[f15],      4(%[c1])                            \n\t"
+    "nmsub.s   %[f9],       %[f9],        %[f5],      %[f8]     \n\t"
+    "madd.s    %[f6],       %[f6],        %[f5],      %[f7]     \n\t"
+#endif
+    "sub.s     %[f10],      %[f0],        %[f10]                \n\t"
+    "sub.s     %[f5],       %[f11],       %[f12]                \n\t"
+    "add.s     %[f7],       %[f13],       %[f14]                \n\t"
+    "sub.s     %[f1],       %[f1],        %[f9]                 \n\t"
+    "add.s     %[f2],       %[f2],        %[f9]                 \n\t"
+    "sub.s     %[f3],       %[f3],        %[f6]                 \n\t"
+    "mul.s     %[f8],       %[f10],       %[f5]                 \n\t"
+    "mul.s     %[f10],      %[f10],       %[f7]                 \n\t"
+#if !defined(MIPS32_R2_LE)
+    "mul.s     %[f9],       %[f15],       %[f7]                 \n\t"
+    "mul.s     %[f15],      %[f15],       %[f5]                 \n\t"
+    "sub.s     %[f4],       %[f4],        %[f6]                 \n\t"
+    "swc1      %[f1],       0(%[a1])                            \n\t"
+    "swc1      %[f2],       0(%[a2])                            \n\t"
+    "sub.s     %[f8],       %[f8],        %[f9]                 \n\t"
+    "add.s     %[f10],      %[f10],       %[f15]                \n\t"
+#else
+    "swc1      %[f1],       0(%[a1])                            \n\t"
+    "swc1      %[f2],       0(%[a2])                            \n\t"
+    "sub.s     %[f4],       %[f4],        %[f6]                 \n\t"
+    "nmsub.s   %[f8],       %[f8],        %[f15],     %[f7]     \n\t"
+    "madd.s    %[f10],      %[f10],       %[f15],     %[f5]     \n\t"
+#endif
+    "swc1      %[f3],       4(%[a1])                            \n\t"
+    "swc1      %[f4],       4(%[a2])                            \n\t"
+    "sub.s     %[f11],      %[f11],       %[f8]                 \n\t"
+    "add.s     %[f12],      %[f12],       %[f8]                 \n\t"
+    "sub.s     %[f13],      %[f13],       %[f10]                \n\t"
+    "sub.s     %[f14],      %[f14],       %[f10]                \n\t"
+    "addiu     %[c2],       %[c2],        -8                    \n\t"
+    "addiu     %[c1],       %[c1],        8                     \n\t"
+    "swc1      %[f11],      8(%[a1])                            \n\t"
+    "swc1      %[f12],      -8(%[a2])                           \n\t"
+    "swc1      %[f13],      12(%[a1])                           \n\t"
+    "swc1      %[f14],      -4(%[a2])                           \n\t"
+    "addiu     %[a1],       %[a1],        16                    \n\t"
+    "addiu     %[count],    %[count],     -1                    \n\t"
+    "bgtz      %[count],    1b                                  \n\t"
+    " addiu    %[a2],       %[a2],        -16                   \n\t"
+    ".set      pop                                              \n\t"
+    : [a1] "+r" (a1), [a2] "+r" (a2), [c1] "+r" (c1), [c2] "+r" (c2),
+      [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), [f4] "=&f" (f4),
+      [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8),
+      [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), [f12] "=&f" (f12),
+      [f13] "=&f" (f13), [f14] "=&f" (f14), [f15] "=&f" (f15),
+      [count] "=&r" (count)
+    : [f0] "f" (f0)
+    : "memory"
+  );
+}
+
+static void rftbsub_128_mips(float* a) {
+  const float *c = rdft_w + 32;
+  const float f0 = 0.5f;
+  float* a1 = &a[2];
+  float* a2 = &a[126];
+  const float* c1 = &c[1];
+  const float* c2 = &c[31];
+  float f1, f2, f3 ,f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15;
+  int count;
+
+  a[1] = -a[1];
+  a[65] = -a[65];
+
+  __asm __volatile (
+    ".set      push                                             \n\t"
+    ".set      noreorder                                        \n\t"
+    "lwc1      %[f6],       0(%[c2])                            \n\t"
+    "lwc1      %[f1],       0(%[a1])                            \n\t"
+    "lwc1      %[f2],       0(%[a2])                            \n\t"
+    "lwc1      %[f3],       4(%[a1])                            \n\t"
+    "lwc1      %[f4],       4(%[a2])                            \n\t"
+    "lwc1      %[f5],       0(%[c1])                            \n\t"
+    "sub.s     %[f6],       %[f0],        %[f6]                 \n\t"
+    "sub.s     %[f7],       %[f1],        %[f2]                 \n\t"
+    "add.s     %[f8],       %[f3],        %[f4]                 \n\t"
+    "addiu     %[count],    $zero,        15                    \n\t"
+    "mul.s     %[f9],       %[f6],        %[f7]                 \n\t"
+    "mul.s     %[f6],       %[f6],        %[f8]                 \n\t"
+#if !defined(MIPS32_R2_LE)
+    "mul.s     %[f8],       %[f5],        %[f8]                 \n\t"
+    "mul.s     %[f5],       %[f5],        %[f7]                 \n\t"
+    "add.s     %[f9],       %[f9],        %[f8]                 \n\t"
+    "sub.s     %[f6],       %[f6],        %[f5]                 \n\t"
+#else
+    "madd.s    %[f9],       %[f9],        %[f5],      %[f8]     \n\t"
+    "nmsub.s   %[f6],       %[f6],        %[f5],      %[f7]     \n\t"
+#endif
+    "sub.s     %[f1],       %[f1],        %[f9]                 \n\t"
+    "add.s     %[f2],       %[f2],        %[f9]                 \n\t"
+    "sub.s     %[f3],       %[f6],        %[f3]                 \n\t"
+    "sub.s     %[f4],       %[f6],        %[f4]                 \n\t"
+    "swc1      %[f1],       0(%[a1])                            \n\t"
+    "swc1      %[f2],       0(%[a2])                            \n\t"
+    "swc1      %[f3],       4(%[a1])                            \n\t"
+    "swc1      %[f4],       4(%[a2])                            \n\t"
+    "addiu     %[a1],       %[a1],        8                     \n\t"
+    "addiu     %[a2],       %[a2],        -8                    \n\t"
+    "addiu     %[c1],       %[c1],        4                     \n\t"
+    "addiu     %[c2],       %[c2],        -4                    \n\t"
+   "1:                                                          \n\t"
+    "lwc1      %[f6],       0(%[c2])                            \n\t"
+    "lwc1      %[f1],       0(%[a1])                            \n\t"
+    "lwc1      %[f2],       0(%[a2])                            \n\t"
+    "lwc1      %[f3],       4(%[a1])                            \n\t"
+    "lwc1      %[f4],       4(%[a2])                            \n\t"
+    "lwc1      %[f5],       0(%[c1])                            \n\t"
+    "sub.s     %[f6],       %[f0],        %[f6]                 \n\t"
+    "sub.s     %[f7],       %[f1],        %[f2]                 \n\t"
+    "add.s     %[f8],       %[f3],        %[f4]                 \n\t"
+    "lwc1      %[f10],      -4(%[c2])                           \n\t"
+    "lwc1      %[f11],      8(%[a1])                            \n\t"
+    "lwc1      %[f12],      -8(%[a2])                           \n\t"
+    "mul.s     %[f9],       %[f6],        %[f7]                 \n\t"
+    "mul.s     %[f6],       %[f6],        %[f8]                 \n\t"
+#if !defined(MIPS32_R2_LE)
+    "mul.s     %[f8],       %[f5],        %[f8]                 \n\t"
+    "mul.s     %[f5],       %[f5],        %[f7]                 \n\t"
+    "lwc1      %[f13],      12(%[a1])                           \n\t"
+    "lwc1      %[f14],      -4(%[a2])                           \n\t"
+    "lwc1      %[f15],      4(%[c1])                            \n\t"
+    "add.s     %[f9],       %[f9],        %[f8]                 \n\t"
+    "sub.s     %[f6],       %[f6],        %[f5]                 \n\t"
+#else
+    "lwc1      %[f13],      12(%[a1])                           \n\t"
+    "lwc1      %[f14],      -4(%[a2])                           \n\t"
+    "lwc1      %[f15],      4(%[c1])                            \n\t"
+    "madd.s    %[f9],       %[f9],        %[f5],      %[f8]     \n\t"
+    "nmsub.s   %[f6],       %[f6],        %[f5],      %[f7]     \n\t"
+#endif
+    "sub.s     %[f10],      %[f0],        %[f10]                \n\t"
+    "sub.s     %[f5],       %[f11],       %[f12]                \n\t"
+    "add.s     %[f7],       %[f13],       %[f14]                \n\t"
+    "sub.s     %[f1],       %[f1],        %[f9]                 \n\t"
+    "add.s     %[f2],       %[f2],        %[f9]                 \n\t"
+    "sub.s     %[f3],       %[f6],        %[f3]                 \n\t"
+    "mul.s     %[f8],       %[f10],       %[f5]                 \n\t"
+    "mul.s     %[f10],      %[f10],       %[f7]                 \n\t"
+#if !defined(MIPS32_R2_LE)
+    "mul.s     %[f9],       %[f15],       %[f7]                 \n\t"
+    "mul.s     %[f15],      %[f15],       %[f5]                 \n\t"
+    "sub.s     %[f4],       %[f6],        %[f4]                 \n\t"
+    "swc1      %[f1],       0(%[a1])                            \n\t"
+    "swc1      %[f2],       0(%[a2])                            \n\t"
+    "add.s     %[f8],       %[f8],        %[f9]                 \n\t"
+    "sub.s     %[f10],      %[f10],       %[f15]                \n\t"
+#else
+    "swc1      %[f1],       0(%[a1])                            \n\t"
+    "swc1      %[f2],       0(%[a2])                            \n\t"
+    "sub.s     %[f4],       %[f6],        %[f4]                 \n\t"
+    "madd.s    %[f8],       %[f8],        %[f15],     %[f7]     \n\t"
+    "nmsub.s   %[f10],      %[f10],       %[f15],     %[f5]     \n\t"
+#endif
+    "swc1      %[f3],       4(%[a1])                            \n\t"
+    "swc1      %[f4],       4(%[a2])                            \n\t"
+    "sub.s     %[f11],      %[f11],       %[f8]                 \n\t"
+    "add.s     %[f12],      %[f12],       %[f8]                 \n\t"
+    "sub.s     %[f13],      %[f10],       %[f13]                \n\t"
+    "sub.s     %[f14],      %[f10],       %[f14]                \n\t"
+    "addiu     %[c2],       %[c2],        -8                    \n\t"
+    "addiu     %[c1],       %[c1],        8                     \n\t"
+    "swc1      %[f11],      8(%[a1])                            \n\t"
+    "swc1      %[f12],      -8(%[a2])                           \n\t"
+    "swc1      %[f13],      12(%[a1])                           \n\t"
+    "swc1      %[f14],      -4(%[a2])                           \n\t"
+    "addiu     %[a1],       %[a1],        16                    \n\t"
+    "addiu     %[count],    %[count],     -1                    \n\t"
+    "bgtz      %[count],    1b                                  \n\t"
+    " addiu    %[a2],       %[a2],        -16                   \n\t"
+    ".set      pop                                              \n\t"
+    : [a1] "+r" (a1), [a2] "+r" (a2), [c1] "+r" (c1), [c2] "+r" (c2),
+      [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), [f4] "=&f" (f4),
+      [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8),
+      [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), [f12] "=&f" (f12),
+      [f13] "=&f" (f13), [f14] "=&f" (f14), [f15] "=&f" (f15),
+      [count] "=&r" (count)
+    : [f0] "f" (f0)
+    : "memory"
+  );
+}
+
+void aec_rdft_init_mips(void) {
+  cft1st_128 = cft1st_128_mips;
+  cftmdl_128 = cftmdl_128_mips;
+  rftfsub_128 = rftfsub_128_mips;
+  rftbsub_128 = rftbsub_128_mips;
+  cftfsub_128 = cftfsub_128_mips;
+  cftbsub_128 = cftbsub_128_mips;
+  bitrv2_128 = bitrv2_128_mips;
+}
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_neon.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_neon.c
new file mode 100644
index 00000000..43b6a68c
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_neon.c
@@ -0,0 +1,355 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * The rdft AEC algorithm, neon version of speed-critical functions.
+ *
+ * Based on the sse2 version.
+ */
+
+
+#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
+
+#include <arm_neon.h>
+
+static const ALIGN16_BEG float ALIGN16_END
+    k_swap_sign[4] = {-1.f, 1.f, -1.f, 1.f};
+
+static void cft1st_128_neon(float* a) {
+  const float32x4_t vec_swap_sign = vld1q_f32((float32_t*)k_swap_sign);
+  int j, k2;
+
+  for (k2 = 0, j = 0; j < 128; j += 16, k2 += 4) {
+    float32x4_t a00v = vld1q_f32(&a[j + 0]);
+    float32x4_t a04v = vld1q_f32(&a[j + 4]);
+    float32x4_t a08v = vld1q_f32(&a[j + 8]);
+    float32x4_t a12v = vld1q_f32(&a[j + 12]);
+    float32x4_t a01v = vcombine_f32(vget_low_f32(a00v), vget_low_f32(a08v));
+    float32x4_t a23v = vcombine_f32(vget_high_f32(a00v), vget_high_f32(a08v));
+    float32x4_t a45v = vcombine_f32(vget_low_f32(a04v), vget_low_f32(a12v));
+    float32x4_t a67v = vcombine_f32(vget_high_f32(a04v), vget_high_f32(a12v));
+    const float32x4_t wk1rv = vld1q_f32(&rdft_wk1r[k2]);
+    const float32x4_t wk1iv = vld1q_f32(&rdft_wk1i[k2]);
+    const float32x4_t wk2rv = vld1q_f32(&rdft_wk2r[k2]);
+    const float32x4_t wk2iv = vld1q_f32(&rdft_wk2i[k2]);
+    const float32x4_t wk3rv = vld1q_f32(&rdft_wk3r[k2]);
+    const float32x4_t wk3iv = vld1q_f32(&rdft_wk3i[k2]);
+    float32x4_t x0v = vaddq_f32(a01v, a23v);
+    const float32x4_t x1v = vsubq_f32(a01v, a23v);
+    const float32x4_t x2v = vaddq_f32(a45v, a67v);
+    const float32x4_t x3v = vsubq_f32(a45v, a67v);
+    const float32x4_t x3w = vrev64q_f32(x3v);
+    float32x4_t x0w;
+    a01v = vaddq_f32(x0v, x2v);
+    x0v = vsubq_f32(x0v, x2v);
+    x0w = vrev64q_f32(x0v);
+    a45v = vmulq_f32(wk2rv, x0v);
+    a45v = vmlaq_f32(a45v, wk2iv, x0w);
+    x0v = vmlaq_f32(x1v, x3w, vec_swap_sign);
+    x0w = vrev64q_f32(x0v);
+    a23v = vmulq_f32(wk1rv, x0v);
+    a23v = vmlaq_f32(a23v, wk1iv, x0w);
+    x0v = vmlsq_f32(x1v, x3w, vec_swap_sign);
+    x0w = vrev64q_f32(x0v);
+    a67v = vmulq_f32(wk3rv, x0v);
+    a67v = vmlaq_f32(a67v, wk3iv, x0w);
+    a00v = vcombine_f32(vget_low_f32(a01v), vget_low_f32(a23v));
+    a04v = vcombine_f32(vget_low_f32(a45v), vget_low_f32(a67v));
+    a08v = vcombine_f32(vget_high_f32(a01v), vget_high_f32(a23v));
+    a12v = vcombine_f32(vget_high_f32(a45v), vget_high_f32(a67v));
+    vst1q_f32(&a[j + 0], a00v);
+    vst1q_f32(&a[j + 4], a04v);
+    vst1q_f32(&a[j + 8], a08v);
+    vst1q_f32(&a[j + 12], a12v);
+  }
+}
+
+static void cftmdl_128_neon(float* a) {
+  int j;
+  const int l = 8;
+  const float32x4_t vec_swap_sign = vld1q_f32((float32_t*)k_swap_sign);
+  float32x4_t wk1rv = vld1q_f32(cftmdl_wk1r);
+
+  for (j = 0; j < l; j += 2) {
+    const float32x2_t a_00 = vld1_f32(&a[j + 0]);
+    const float32x2_t a_08 = vld1_f32(&a[j + 8]);
+    const float32x2_t a_32 = vld1_f32(&a[j + 32]);
+    const float32x2_t a_40 = vld1_f32(&a[j + 40]);
+    const float32x4_t a_00_32 = vcombine_f32(a_00, a_32);
+    const float32x4_t a_08_40 = vcombine_f32(a_08, a_40);
+    const float32x4_t x0r0_0i0_0r1_x0i1 = vaddq_f32(a_00_32, a_08_40);
+    const float32x4_t x1r0_1i0_1r1_x1i1 = vsubq_f32(a_00_32, a_08_40);
+    const float32x2_t a_16 = vld1_f32(&a[j + 16]);
+    const float32x2_t a_24 = vld1_f32(&a[j + 24]);
+    const float32x2_t a_48 = vld1_f32(&a[j + 48]);
+    const float32x2_t a_56 = vld1_f32(&a[j + 56]);
+    const float32x4_t a_16_48 = vcombine_f32(a_16, a_48);
+    const float32x4_t a_24_56 = vcombine_f32(a_24, a_56);
+    const float32x4_t x2r0_2i0_2r1_x2i1 = vaddq_f32(a_16_48, a_24_56);
+    const float32x4_t x3r0_3i0_3r1_x3i1 = vsubq_f32(a_16_48, a_24_56);
+    const float32x4_t xx0 = vaddq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+    const float32x4_t xx1 = vsubq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+    const float32x4_t x3i0_3r0_3i1_x3r1 = vrev64q_f32(x3r0_3i0_3r1_x3i1);
+    const float32x4_t x1_x3_add =
+        vmlaq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
+    const float32x4_t x1_x3_sub =
+        vmlsq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
+    const float32x2_t yy0_a = vdup_lane_f32(vget_high_f32(x1_x3_add), 0);
+    const float32x2_t yy0_s = vdup_lane_f32(vget_high_f32(x1_x3_sub), 0);
+    const float32x4_t yy0_as = vcombine_f32(yy0_a, yy0_s);
+    const float32x2_t yy1_a = vdup_lane_f32(vget_high_f32(x1_x3_add), 1);
+    const float32x2_t yy1_s = vdup_lane_f32(vget_high_f32(x1_x3_sub), 1);
+    const float32x4_t yy1_as = vcombine_f32(yy1_a, yy1_s);
+    const float32x4_t yy0 = vmlaq_f32(yy0_as, vec_swap_sign, yy1_as);
+    const float32x4_t yy4 = vmulq_f32(wk1rv, yy0);
+    const float32x4_t xx1_rev = vrev64q_f32(xx1);
+    const float32x4_t yy4_rev = vrev64q_f32(yy4);
+
+    vst1_f32(&a[j + 0], vget_low_f32(xx0));
+    vst1_f32(&a[j + 32], vget_high_f32(xx0));
+    vst1_f32(&a[j + 16], vget_low_f32(xx1));
+    vst1_f32(&a[j + 48], vget_high_f32(xx1_rev));
+
+    a[j + 48] = -a[j + 48];
+
+    vst1_f32(&a[j + 8], vget_low_f32(x1_x3_add));
+    vst1_f32(&a[j + 24], vget_low_f32(x1_x3_sub));
+    vst1_f32(&a[j + 40], vget_low_f32(yy4));
+    vst1_f32(&a[j + 56], vget_high_f32(yy4_rev));
+  }
+
+  {
+    const int k = 64;
+    const int k1 = 2;
+    const int k2 = 2 * k1;
+    const float32x4_t wk2rv = vld1q_f32(&rdft_wk2r[k2 + 0]);
+    const float32x4_t wk2iv = vld1q_f32(&rdft_wk2i[k2 + 0]);
+    const float32x4_t wk1iv = vld1q_f32(&rdft_wk1i[k2 + 0]);
+    const float32x4_t wk3rv = vld1q_f32(&rdft_wk3r[k2 + 0]);
+    const float32x4_t wk3iv = vld1q_f32(&rdft_wk3i[k2 + 0]);
+    wk1rv = vld1q_f32(&rdft_wk1r[k2 + 0]);
+    for (j = k; j < l + k; j += 2) {
+      const float32x2_t a_00 = vld1_f32(&a[j + 0]);
+      const float32x2_t a_08 = vld1_f32(&a[j + 8]);
+      const float32x2_t a_32 = vld1_f32(&a[j + 32]);
+      const float32x2_t a_40 = vld1_f32(&a[j + 40]);
+      const float32x4_t a_00_32 = vcombine_f32(a_00, a_32);
+      const float32x4_t a_08_40 = vcombine_f32(a_08, a_40);
+      const float32x4_t x0r0_0i0_0r1_x0i1 = vaddq_f32(a_00_32, a_08_40);
+      const float32x4_t x1r0_1i0_1r1_x1i1 = vsubq_f32(a_00_32, a_08_40);
+      const float32x2_t a_16 = vld1_f32(&a[j + 16]);
+      const float32x2_t a_24 = vld1_f32(&a[j + 24]);
+      const float32x2_t a_48 = vld1_f32(&a[j + 48]);
+      const float32x2_t a_56 = vld1_f32(&a[j + 56]);
+      const float32x4_t a_16_48 = vcombine_f32(a_16, a_48);
+      const float32x4_t a_24_56 = vcombine_f32(a_24, a_56);
+      const float32x4_t x2r0_2i0_2r1_x2i1 = vaddq_f32(a_16_48, a_24_56);
+      const float32x4_t x3r0_3i0_3r1_x3i1 = vsubq_f32(a_16_48, a_24_56);
+      const float32x4_t xx = vaddq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+      const float32x4_t xx1 = vsubq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+      const float32x4_t x3i0_3r0_3i1_x3r1 = vrev64q_f32(x3r0_3i0_3r1_x3i1);
+      const float32x4_t x1_x3_add =
+          vmlaq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
+      const float32x4_t x1_x3_sub =
+          vmlsq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
+      float32x4_t xx4 = vmulq_f32(wk2rv, xx1);
+      float32x4_t xx12 = vmulq_f32(wk1rv, x1_x3_add);
+      float32x4_t xx22 = vmulq_f32(wk3rv, x1_x3_sub);
+      xx4 = vmlaq_f32(xx4, wk2iv, vrev64q_f32(xx1));
+      xx12 = vmlaq_f32(xx12, wk1iv, vrev64q_f32(x1_x3_add));
+      xx22 = vmlaq_f32(xx22, wk3iv, vrev64q_f32(x1_x3_sub));
+
+      vst1_f32(&a[j + 0], vget_low_f32(xx));
+      vst1_f32(&a[j + 32], vget_high_f32(xx));
+      vst1_f32(&a[j + 16], vget_low_f32(xx4));
+      vst1_f32(&a[j + 48], vget_high_f32(xx4));
+      vst1_f32(&a[j + 8], vget_low_f32(xx12));
+      vst1_f32(&a[j + 40], vget_high_f32(xx12));
+      vst1_f32(&a[j + 24], vget_low_f32(xx22));
+      vst1_f32(&a[j + 56], vget_high_f32(xx22));
+    }
+  }
+}
+
+__inline static float32x4_t reverse_order_f32x4(float32x4_t in) {
+  // A B C D -> C D A B
+  const float32x4_t rev = vcombine_f32(vget_high_f32(in), vget_low_f32(in));
+  // C D A B -> D C B A
+  return vrev64q_f32(rev);
+}
+
+static void rftfsub_128_neon(float* a) {
+  const float* c = rdft_w + 32;
+  int j1, j2;
+  const float32x4_t mm_half = vdupq_n_f32(0.5f);
+
+  // Vectorized code (four at once).
+  // Note: commented number are indexes for the first iteration of the loop.
+  for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
+    // Load 'wk'.
+    const float32x4_t c_j1 = vld1q_f32(&c[j1]);          //  1,  2,  3,  4,
+    const float32x4_t c_k1 = vld1q_f32(&c[29 - j1]);     // 28, 29, 30, 31,
+    const float32x4_t wkrt = vsubq_f32(mm_half, c_k1);   // 28, 29, 30, 31,
+    const float32x4_t wkr_ = reverse_order_f32x4(wkrt);  // 31, 30, 29, 28,
+    const float32x4_t wki_ = c_j1;                       //  1,  2,  3,  4,
+    // Load and shuffle 'a'.
+    //   2,   4,   6,   8,   3,   5,   7,   9
+    float32x4x2_t a_j2_p = vld2q_f32(&a[0 + j2]);
+    // 120, 122, 124, 126, 121, 123, 125, 127,
+    const float32x4x2_t k2_0_4 = vld2q_f32(&a[122 - j2]);
+    // 126, 124, 122, 120
+    const float32x4_t a_k2_p0 = reverse_order_f32x4(k2_0_4.val[0]);
+    // 127, 125, 123, 121
+    const float32x4_t a_k2_p1 = reverse_order_f32x4(k2_0_4.val[1]);
+    // Calculate 'x'.
+    const float32x4_t xr_ = vsubq_f32(a_j2_p.val[0], a_k2_p0);
+    // 2-126, 4-124, 6-122, 8-120,
+    const float32x4_t xi_ = vaddq_f32(a_j2_p.val[1], a_k2_p1);
+    // 3-127, 5-125, 7-123, 9-121,
+    // Calculate product into 'y'.
+    //    yr = wkr * xr - wki * xi;
+    //    yi = wkr * xi + wki * xr;
+    const float32x4_t a_ = vmulq_f32(wkr_, xr_);
+    const float32x4_t b_ = vmulq_f32(wki_, xi_);
+    const float32x4_t c_ = vmulq_f32(wkr_, xi_);
+    const float32x4_t d_ = vmulq_f32(wki_, xr_);
+    const float32x4_t yr_ = vsubq_f32(a_, b_);  // 2-126, 4-124, 6-122, 8-120,
+    const float32x4_t yi_ = vaddq_f32(c_, d_);  // 3-127, 5-125, 7-123, 9-121,
+                                                // Update 'a'.
+                                                //    a[j2 + 0] -= yr;
+                                                //    a[j2 + 1] -= yi;
+                                                //    a[k2 + 0] += yr;
+                                                //    a[k2 + 1] -= yi;
+    // 126, 124, 122, 120,
+    const float32x4_t a_k2_p0n = vaddq_f32(a_k2_p0, yr_);
+    // 127, 125, 123, 121,
+    const float32x4_t a_k2_p1n = vsubq_f32(a_k2_p1, yi_);
+    // Shuffle in right order and store.
+    const float32x4_t a_k2_p0nr = vrev64q_f32(a_k2_p0n);
+    const float32x4_t a_k2_p1nr = vrev64q_f32(a_k2_p1n);
+    // 124, 125, 126, 127, 120, 121, 122, 123
+    const float32x4x2_t a_k2_n = vzipq_f32(a_k2_p0nr, a_k2_p1nr);
+    //   2,   4,   6,   8,
+    a_j2_p.val[0] = vsubq_f32(a_j2_p.val[0], yr_);
+    //   3,   5,   7,   9,
+    a_j2_p.val[1] = vsubq_f32(a_j2_p.val[1], yi_);
+    //   2,   3,   4,   5,   6,   7,   8,   9,
+    vst2q_f32(&a[0 + j2], a_j2_p);
+
+    vst1q_f32(&a[122 - j2], a_k2_n.val[1]);
+    vst1q_f32(&a[126 - j2], a_k2_n.val[0]);
+  }
+
+  // Scalar code for the remaining items.
+  for (; j2 < 64; j1 += 1, j2 += 2) {
+    const int k2 = 128 - j2;
+    const int k1 = 32 - j1;
+    const float wkr = 0.5f - c[k1];
+    const float wki = c[j1];
+    const float xr = a[j2 + 0] - a[k2 + 0];
+    const float xi = a[j2 + 1] + a[k2 + 1];
+    const float yr = wkr * xr - wki * xi;
+    const float yi = wkr * xi + wki * xr;
+    a[j2 + 0] -= yr;
+    a[j2 + 1] -= yi;
+    a[k2 + 0] += yr;
+    a[k2 + 1] -= yi;
+  }
+}
+
+static void rftbsub_128_neon(float* a) {
+  const float* c = rdft_w + 32;
+  int j1, j2;
+  const float32x4_t mm_half = vdupq_n_f32(0.5f);
+
+  a[1] = -a[1];
+  // Vectorized code (four at once).
+  //    Note: commented number are indexes for the first iteration of the loop.
+  for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
+    // Load 'wk'.
+    const float32x4_t c_j1 = vld1q_f32(&c[j1]);         //  1,  2,  3,  4,
+    const float32x4_t c_k1 = vld1q_f32(&c[29 - j1]);    // 28, 29, 30, 31,
+    const float32x4_t wkrt = vsubq_f32(mm_half, c_k1);  // 28, 29, 30, 31,
+    const float32x4_t wkr_ = reverse_order_f32x4(wkrt); // 31, 30, 29, 28,
+    const float32x4_t wki_ = c_j1;                      //  1,  2,  3,  4,
+    // Load and shuffle 'a'.
+    //   2,   4,   6,   8,   3,   5,   7,   9
+    float32x4x2_t a_j2_p = vld2q_f32(&a[0 + j2]);
+    // 120, 122, 124, 126, 121, 123, 125, 127,
+    const float32x4x2_t k2_0_4 = vld2q_f32(&a[122 - j2]);
+    // 126, 124, 122, 120
+    const float32x4_t a_k2_p0 = reverse_order_f32x4(k2_0_4.val[0]);
+    // 127, 125, 123, 121
+    const float32x4_t a_k2_p1 = reverse_order_f32x4(k2_0_4.val[1]);
+    // Calculate 'x'.
+    const float32x4_t xr_ = vsubq_f32(a_j2_p.val[0], a_k2_p0);
+    // 2-126, 4-124, 6-122, 8-120,
+    const float32x4_t xi_ = vaddq_f32(a_j2_p.val[1], a_k2_p1);
+    // 3-127, 5-125, 7-123, 9-121,
+    // Calculate product into 'y'.
+    //    yr = wkr * xr - wki * xi;
+    //    yi = wkr * xi + wki * xr;
+    const float32x4_t a_ = vmulq_f32(wkr_, xr_);
+    const float32x4_t b_ = vmulq_f32(wki_, xi_);
+    const float32x4_t c_ = vmulq_f32(wkr_, xi_);
+    const float32x4_t d_ = vmulq_f32(wki_, xr_);
+    const float32x4_t yr_ = vaddq_f32(a_, b_);  // 2-126, 4-124, 6-122, 8-120,
+    const float32x4_t yi_ = vsubq_f32(c_, d_);  // 3-127, 5-125, 7-123, 9-121,
+                                                // Update 'a'.
+                                                //    a[j2 + 0] -= yr;
+                                                //    a[j2 + 1] -= yi;
+                                                //    a[k2 + 0] += yr;
+                                                //    a[k2 + 1] -= yi;
+    // 126, 124, 122, 120,
+    const float32x4_t a_k2_p0n = vaddq_f32(a_k2_p0, yr_);
+    // 127, 125, 123, 121,
+    const float32x4_t a_k2_p1n = vsubq_f32(yi_, a_k2_p1);
+    // Shuffle in right order and store.
+    //   2,   3,   4,   5,   6,   7,   8,   9,
+    const float32x4_t a_k2_p0nr = vrev64q_f32(a_k2_p0n);
+    const float32x4_t a_k2_p1nr = vrev64q_f32(a_k2_p1n);
+    // 124, 125, 126, 127, 120, 121, 122, 123
+    const float32x4x2_t a_k2_n = vzipq_f32(a_k2_p0nr, a_k2_p1nr);
+    //   2,   4,   6,   8,
+    a_j2_p.val[0] = vsubq_f32(a_j2_p.val[0], yr_);
+    //   3,   5,   7,   9,
+    a_j2_p.val[1] = vsubq_f32(yi_, a_j2_p.val[1]);
+    //   2,   3,   4,   5,   6,   7,   8,   9,
+    vst2q_f32(&a[0 + j2], a_j2_p);
+
+    vst1q_f32(&a[122 - j2], a_k2_n.val[1]);
+    vst1q_f32(&a[126 - j2], a_k2_n.val[0]);
+  }
+
+  // Scalar code for the remaining items.
+  for (; j2 < 64; j1 += 1, j2 += 2) {
+    const int k2 = 128 - j2;
+    const int k1 = 32 - j1;
+    const float wkr = 0.5f - c[k1];
+    const float wki = c[j1];
+    const float xr = a[j2 + 0] - a[k2 + 0];
+    const float xi = a[j2 + 1] + a[k2 + 1];
+    const float yr = wkr * xr + wki * xi;
+    const float yi = wkr * xi - wki * xr;
+    a[j2 + 0] = a[j2 + 0] - yr;
+    a[j2 + 1] = yi - a[j2 + 1];
+    a[k2 + 0] = yr + a[k2 + 0];
+    a[k2 + 1] = yi - a[k2 + 1];
+  }
+  a[65] = -a[65];
+}
+
+void aec_rdft_init_neon(void) {
+  cft1st_128 = cft1st_128_neon;
+  cftmdl_128 = cftmdl_128_neon;
+  rftfsub_128 = rftfsub_128_neon;
+  rftbsub_128 = rftbsub_128_neon;
+}
+
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_sse2.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_sse2.c
new file mode 100644
index 00000000..b4e453ff
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_sse2.c
@@ -0,0 +1,427 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
+
+#include <emmintrin.h>
+
+static const ALIGN16_BEG float ALIGN16_END
+    k_swap_sign[4] = {-1.f, 1.f, -1.f, 1.f};
+
+static void cft1st_128_SSE2(float* a) {
+  const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign);
+  int j, k2;
+
+  for (k2 = 0, j = 0; j < 128; j += 16, k2 += 4) {
+    __m128 a00v = _mm_loadu_ps(&a[j + 0]);
+    __m128 a04v = _mm_loadu_ps(&a[j + 4]);
+    __m128 a08v = _mm_loadu_ps(&a[j + 8]);
+    __m128 a12v = _mm_loadu_ps(&a[j + 12]);
+    __m128 a01v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(1, 0, 1, 0));
+    __m128 a23v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(3, 2, 3, 2));
+    __m128 a45v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(1, 0, 1, 0));
+    __m128 a67v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(3, 2, 3, 2));
+
+    const __m128 wk1rv = _mm_load_ps(&rdft_wk1r[k2]);
+    const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2]);
+    const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2]);
+    const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2]);
+    const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2]);
+    const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2]);
+    __m128 x0v = _mm_add_ps(a01v, a23v);
+    const __m128 x1v = _mm_sub_ps(a01v, a23v);
+    const __m128 x2v = _mm_add_ps(a45v, a67v);
+    const __m128 x3v = _mm_sub_ps(a45v, a67v);
+    __m128 x0w;
+    a01v = _mm_add_ps(x0v, x2v);
+    x0v = _mm_sub_ps(x0v, x2v);
+    x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1));
+    {
+      const __m128 a45_0v = _mm_mul_ps(wk2rv, x0v);
+      const __m128 a45_1v = _mm_mul_ps(wk2iv, x0w);
+      a45v = _mm_add_ps(a45_0v, a45_1v);
+    }
+    {
+      __m128 a23_0v, a23_1v;
+      const __m128 x3w = _mm_shuffle_ps(x3v, x3v, _MM_SHUFFLE(2, 3, 0, 1));
+      const __m128 x3s = _mm_mul_ps(mm_swap_sign, x3w);
+      x0v = _mm_add_ps(x1v, x3s);
+      x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1));
+      a23_0v = _mm_mul_ps(wk1rv, x0v);
+      a23_1v = _mm_mul_ps(wk1iv, x0w);
+      a23v = _mm_add_ps(a23_0v, a23_1v);
+
+      x0v = _mm_sub_ps(x1v, x3s);
+      x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1));
+    }
+    {
+      const __m128 a67_0v = _mm_mul_ps(wk3rv, x0v);
+      const __m128 a67_1v = _mm_mul_ps(wk3iv, x0w);
+      a67v = _mm_add_ps(a67_0v, a67_1v);
+    }
+
+    a00v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(1, 0, 1, 0));
+    a04v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(1, 0, 1, 0));
+    a08v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(3, 2, 3, 2));
+    a12v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(3, 2, 3, 2));
+    _mm_storeu_ps(&a[j + 0], a00v);
+    _mm_storeu_ps(&a[j + 4], a04v);
+    _mm_storeu_ps(&a[j + 8], a08v);
+    _mm_storeu_ps(&a[j + 12], a12v);
+  }
+}
+
+static void cftmdl_128_SSE2(float* a) {
+  const int l = 8;
+  const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign);
+  int j0;
+
+  __m128 wk1rv = _mm_load_ps(cftmdl_wk1r);
+  for (j0 = 0; j0 < l; j0 += 2) {
+    const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]);
+    const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]);
+    const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]);
+    const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]);
+    const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00),
+                                          _mm_castsi128_ps(a_32),
+                                          _MM_SHUFFLE(1, 0, 1, 0));
+    const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08),
+                                          _mm_castsi128_ps(a_40),
+                                          _MM_SHUFFLE(1, 0, 1, 0));
+    __m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40);
+    const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40);
+
+    const __m128i a_16 = _mm_loadl_epi64((__m128i*)&a[j0 + 16]);
+    const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]);
+    const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]);
+    const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]);
+    const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16),
+                                          _mm_castsi128_ps(a_48),
+                                          _MM_SHUFFLE(1, 0, 1, 0));
+    const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24),
+                                          _mm_castsi128_ps(a_56),
+                                          _MM_SHUFFLE(1, 0, 1, 0));
+    const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56);
+    const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56);
+
+    const __m128 xx0 = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+    const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+
+    const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(_mm_shuffle_epi32(
+        _mm_castps_si128(x3r0_3i0_3r1_x3i1), _MM_SHUFFLE(2, 3, 0, 1)));
+    const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1);
+    const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
+    const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
+
+    const __m128 yy0 =
+        _mm_shuffle_ps(x1_x3_add, x1_x3_sub, _MM_SHUFFLE(2, 2, 2, 2));
+    const __m128 yy1 =
+        _mm_shuffle_ps(x1_x3_add, x1_x3_sub, _MM_SHUFFLE(3, 3, 3, 3));
+    const __m128 yy2 = _mm_mul_ps(mm_swap_sign, yy1);
+    const __m128 yy3 = _mm_add_ps(yy0, yy2);
+    const __m128 yy4 = _mm_mul_ps(wk1rv, yy3);
+
+    _mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx0));
+    _mm_storel_epi64(
+        (__m128i*)&a[j0 + 32],
+        _mm_shuffle_epi32(_mm_castps_si128(xx0), _MM_SHUFFLE(3, 2, 3, 2)));
+
+    _mm_storel_epi64((__m128i*)&a[j0 + 16], _mm_castps_si128(xx1));
+    _mm_storel_epi64(
+        (__m128i*)&a[j0 + 48],
+        _mm_shuffle_epi32(_mm_castps_si128(xx1), _MM_SHUFFLE(2, 3, 2, 3)));
+    a[j0 + 48] = -a[j0 + 48];
+
+    _mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(x1_x3_add));
+    _mm_storel_epi64((__m128i*)&a[j0 + 24], _mm_castps_si128(x1_x3_sub));
+
+    _mm_storel_epi64((__m128i*)&a[j0 + 40], _mm_castps_si128(yy4));
+    _mm_storel_epi64(
+        (__m128i*)&a[j0 + 56],
+        _mm_shuffle_epi32(_mm_castps_si128(yy4), _MM_SHUFFLE(2, 3, 2, 3)));
+  }
+
+  {
+    int k = 64;
+    int k1 = 2;
+    int k2 = 2 * k1;
+    const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2 + 0]);
+    const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2 + 0]);
+    const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2 + 0]);
+    const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2 + 0]);
+    const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2 + 0]);
+    wk1rv = _mm_load_ps(&rdft_wk1r[k2 + 0]);
+    for (j0 = k; j0 < l + k; j0 += 2) {
+      const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]);
+      const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]);
+      const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]);
+      const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]);
+      const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00),
+                                            _mm_castsi128_ps(a_32),
+                                            _MM_SHUFFLE(1, 0, 1, 0));
+      const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08),
+                                            _mm_castsi128_ps(a_40),
+                                            _MM_SHUFFLE(1, 0, 1, 0));
+      __m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40);
+      const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40);
+
+      const __m128i a_16 = _mm_loadl_epi64((__m128i*)&a[j0 + 16]);
+      const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]);
+      const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]);
+      const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]);
+      const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16),
+                                            _mm_castsi128_ps(a_48),
+                                            _MM_SHUFFLE(1, 0, 1, 0));
+      const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24),
+                                            _mm_castsi128_ps(a_56),
+                                            _MM_SHUFFLE(1, 0, 1, 0));
+      const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56);
+      const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56);
+
+      const __m128 xx = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+      const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+      const __m128 xx2 = _mm_mul_ps(xx1, wk2rv);
+      const __m128 xx3 =
+          _mm_mul_ps(wk2iv,
+                     _mm_castsi128_ps(_mm_shuffle_epi32(
+                         _mm_castps_si128(xx1), _MM_SHUFFLE(2, 3, 0, 1))));
+      const __m128 xx4 = _mm_add_ps(xx2, xx3);
+
+      const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(_mm_shuffle_epi32(
+          _mm_castps_si128(x3r0_3i0_3r1_x3i1), _MM_SHUFFLE(2, 3, 0, 1)));
+      const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1);
+      const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
+      const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
+
+      const __m128 xx10 = _mm_mul_ps(x1_x3_add, wk1rv);
+      const __m128 xx11 = _mm_mul_ps(
+          wk1iv,
+          _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_add),
+                                             _MM_SHUFFLE(2, 3, 0, 1))));
+      const __m128 xx12 = _mm_add_ps(xx10, xx11);
+
+      const __m128 xx20 = _mm_mul_ps(x1_x3_sub, wk3rv);
+      const __m128 xx21 = _mm_mul_ps(
+          wk3iv,
+          _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_sub),
+                                             _MM_SHUFFLE(2, 3, 0, 1))));
+      const __m128 xx22 = _mm_add_ps(xx20, xx21);
+
+      _mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx));
+      _mm_storel_epi64(
+          (__m128i*)&a[j0 + 32],
+          _mm_shuffle_epi32(_mm_castps_si128(xx), _MM_SHUFFLE(3, 2, 3, 2)));
+
+      _mm_storel_epi64((__m128i*)&a[j0 + 16], _mm_castps_si128(xx4));
+      _mm_storel_epi64(
+          (__m128i*)&a[j0 + 48],
+          _mm_shuffle_epi32(_mm_castps_si128(xx4), _MM_SHUFFLE(3, 2, 3, 2)));
+
+      _mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(xx12));
+      _mm_storel_epi64(
+          (__m128i*)&a[j0 + 40],
+          _mm_shuffle_epi32(_mm_castps_si128(xx12), _MM_SHUFFLE(3, 2, 3, 2)));
+
+      _mm_storel_epi64((__m128i*)&a[j0 + 24], _mm_castps_si128(xx22));
+      _mm_storel_epi64(
+          (__m128i*)&a[j0 + 56],
+          _mm_shuffle_epi32(_mm_castps_si128(xx22), _MM_SHUFFLE(3, 2, 3, 2)));
+    }
+  }
+}
+
+static void rftfsub_128_SSE2(float* a) {
+  const float* c = rdft_w + 32;
+  int j1, j2, k1, k2;
+  float wkr, wki, xr, xi, yr, yi;
+
+  static const ALIGN16_BEG float ALIGN16_END
+      k_half[4] = {0.5f, 0.5f, 0.5f, 0.5f};
+  const __m128 mm_half = _mm_load_ps(k_half);
+
+  // Vectorized code (four at once).
+  //    Note: commented number are indexes for the first iteration of the loop.
+  for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
+    // Load 'wk'.
+    const __m128 c_j1 = _mm_loadu_ps(&c[j1]);       //  1,  2,  3,  4,
+    const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]);  // 28, 29, 30, 31,
+    const __m128 wkrt = _mm_sub_ps(mm_half, c_k1);  // 28, 29, 30, 31,
+    const __m128 wkr_ =
+        _mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3));  // 31, 30, 29, 28,
+    const __m128 wki_ = c_j1;                                 //  1,  2,  3,  4,
+    // Load and shuffle 'a'.
+    const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]);    //   2,   3,   4,   5,
+    const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]);    //   6,   7,   8,   9,
+    const __m128 a_k2_0 = _mm_loadu_ps(&a[122 - j2]);  // 120, 121, 122, 123,
+    const __m128 a_k2_4 = _mm_loadu_ps(&a[126 - j2]);  // 124, 125, 126, 127,
+    const __m128 a_j2_p0 = _mm_shuffle_ps(
+        a_j2_0, a_j2_4, _MM_SHUFFLE(2, 0, 2, 0));  //   2,   4,   6,   8,
+    const __m128 a_j2_p1 = _mm_shuffle_ps(
+        a_j2_0, a_j2_4, _MM_SHUFFLE(3, 1, 3, 1));  //   3,   5,   7,   9,
+    const __m128 a_k2_p0 = _mm_shuffle_ps(
+        a_k2_4, a_k2_0, _MM_SHUFFLE(0, 2, 0, 2));  // 126, 124, 122, 120,
+    const __m128 a_k2_p1 = _mm_shuffle_ps(
+        a_k2_4, a_k2_0, _MM_SHUFFLE(1, 3, 1, 3));  // 127, 125, 123, 121,
+    // Calculate 'x'.
+    const __m128 xr_ = _mm_sub_ps(a_j2_p0, a_k2_p0);
+    // 2-126, 4-124, 6-122, 8-120,
+    const __m128 xi_ = _mm_add_ps(a_j2_p1, a_k2_p1);
+    // 3-127, 5-125, 7-123, 9-121,
+    // Calculate product into 'y'.
+    //    yr = wkr * xr - wki * xi;
+    //    yi = wkr * xi + wki * xr;
+    const __m128 a_ = _mm_mul_ps(wkr_, xr_);
+    const __m128 b_ = _mm_mul_ps(wki_, xi_);
+    const __m128 c_ = _mm_mul_ps(wkr_, xi_);
+    const __m128 d_ = _mm_mul_ps(wki_, xr_);
+    const __m128 yr_ = _mm_sub_ps(a_, b_);  // 2-126, 4-124, 6-122, 8-120,
+    const __m128 yi_ = _mm_add_ps(c_, d_);  // 3-127, 5-125, 7-123, 9-121,
+                                            // Update 'a'.
+                                            //    a[j2 + 0] -= yr;
+                                            //    a[j2 + 1] -= yi;
+                                            //    a[k2 + 0] += yr;
+    //    a[k2 + 1] -= yi;
+    const __m128 a_j2_p0n = _mm_sub_ps(a_j2_p0, yr_);  //   2,   4,   6,   8,
+    const __m128 a_j2_p1n = _mm_sub_ps(a_j2_p1, yi_);  //   3,   5,   7,   9,
+    const __m128 a_k2_p0n = _mm_add_ps(a_k2_p0, yr_);  // 126, 124, 122, 120,
+    const __m128 a_k2_p1n = _mm_sub_ps(a_k2_p1, yi_);  // 127, 125, 123, 121,
+    // Shuffle in right order and store.
+    const __m128 a_j2_0n = _mm_unpacklo_ps(a_j2_p0n, a_j2_p1n);
+    //   2,   3,   4,   5,
+    const __m128 a_j2_4n = _mm_unpackhi_ps(a_j2_p0n, a_j2_p1n);
+    //   6,   7,   8,   9,
+    const __m128 a_k2_0nt = _mm_unpackhi_ps(a_k2_p0n, a_k2_p1n);
+    // 122, 123, 120, 121,
+    const __m128 a_k2_4nt = _mm_unpacklo_ps(a_k2_p0n, a_k2_p1n);
+    // 126, 127, 124, 125,
+    const __m128 a_k2_0n = _mm_shuffle_ps(
+        a_k2_0nt, a_k2_0nt, _MM_SHUFFLE(1, 0, 3, 2));  // 120, 121, 122, 123,
+    const __m128 a_k2_4n = _mm_shuffle_ps(
+        a_k2_4nt, a_k2_4nt, _MM_SHUFFLE(1, 0, 3, 2));  // 124, 125, 126, 127,
+    _mm_storeu_ps(&a[0 + j2], a_j2_0n);
+    _mm_storeu_ps(&a[4 + j2], a_j2_4n);
+    _mm_storeu_ps(&a[122 - j2], a_k2_0n);
+    _mm_storeu_ps(&a[126 - j2], a_k2_4n);
+  }
+  // Scalar code for the remaining items.
+  for (; j2 < 64; j1 += 1, j2 += 2) {
+    k2 = 128 - j2;
+    k1 = 32 - j1;
+    wkr = 0.5f - c[k1];
+    wki = c[j1];
+    xr = a[j2 + 0] - a[k2 + 0];
+    xi = a[j2 + 1] + a[k2 + 1];
+    yr = wkr * xr - wki * xi;
+    yi = wkr * xi + wki * xr;
+    a[j2 + 0] -= yr;
+    a[j2 + 1] -= yi;
+    a[k2 + 0] += yr;
+    a[k2 + 1] -= yi;
+  }
+}
+
+static void rftbsub_128_SSE2(float* a) {
+  const float* c = rdft_w + 32;
+  int j1, j2, k1, k2;
+  float wkr, wki, xr, xi, yr, yi;
+
+  static const ALIGN16_BEG float ALIGN16_END
+      k_half[4] = {0.5f, 0.5f, 0.5f, 0.5f};
+  const __m128 mm_half = _mm_load_ps(k_half);
+
+  a[1] = -a[1];
+  // Vectorized code (four at once).
+  //    Note: commented number are indexes for the first iteration of the loop.
+  for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
+    // Load 'wk'.
+    const __m128 c_j1 = _mm_loadu_ps(&c[j1]);       //  1,  2,  3,  4,
+    const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]);  // 28, 29, 30, 31,
+    const __m128 wkrt = _mm_sub_ps(mm_half, c_k1);  // 28, 29, 30, 31,
+    const __m128 wkr_ =
+        _mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3));  // 31, 30, 29, 28,
+    const __m128 wki_ = c_j1;                                 //  1,  2,  3,  4,
+    // Load and shuffle 'a'.
+    const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]);    //   2,   3,   4,   5,
+    const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]);    //   6,   7,   8,   9,
+    const __m128 a_k2_0 = _mm_loadu_ps(&a[122 - j2]);  // 120, 121, 122, 123,
+    const __m128 a_k2_4 = _mm_loadu_ps(&a[126 - j2]);  // 124, 125, 126, 127,
+    const __m128 a_j2_p0 = _mm_shuffle_ps(
+        a_j2_0, a_j2_4, _MM_SHUFFLE(2, 0, 2, 0));  //   2,   4,   6,   8,
+    const __m128 a_j2_p1 = _mm_shuffle_ps(
+        a_j2_0, a_j2_4, _MM_SHUFFLE(3, 1, 3, 1));  //   3,   5,   7,   9,
+    const __m128 a_k2_p0 = _mm_shuffle_ps(
+        a_k2_4, a_k2_0, _MM_SHUFFLE(0, 2, 0, 2));  // 126, 124, 122, 120,
+    const __m128 a_k2_p1 = _mm_shuffle_ps(
+        a_k2_4, a_k2_0, _MM_SHUFFLE(1, 3, 1, 3));  // 127, 125, 123, 121,
+    // Calculate 'x'.
+    const __m128 xr_ = _mm_sub_ps(a_j2_p0, a_k2_p0);
+    // 2-126, 4-124, 6-122, 8-120,
+    const __m128 xi_ = _mm_add_ps(a_j2_p1, a_k2_p1);
+    // 3-127, 5-125, 7-123, 9-121,
+    // Calculate product into 'y'.
+    //    yr = wkr * xr + wki * xi;
+    //    yi = wkr * xi - wki * xr;
+    const __m128 a_ = _mm_mul_ps(wkr_, xr_);
+    const __m128 b_ = _mm_mul_ps(wki_, xi_);
+    const __m128 c_ = _mm_mul_ps(wkr_, xi_);
+    const __m128 d_ = _mm_mul_ps(wki_, xr_);
+    const __m128 yr_ = _mm_add_ps(a_, b_);  // 2-126, 4-124, 6-122, 8-120,
+    const __m128 yi_ = _mm_sub_ps(c_, d_);  // 3-127, 5-125, 7-123, 9-121,
+                                            // Update 'a'.
+                                            //    a[j2 + 0] = a[j2 + 0] - yr;
+                                            //    a[j2 + 1] = yi - a[j2 + 1];
+                                            //    a[k2 + 0] = yr + a[k2 + 0];
+    //    a[k2 + 1] = yi - a[k2 + 1];
+    const __m128 a_j2_p0n = _mm_sub_ps(a_j2_p0, yr_);  //   2,   4,   6,   8,
+    const __m128 a_j2_p1n = _mm_sub_ps(yi_, a_j2_p1);  //   3,   5,   7,   9,
+    const __m128 a_k2_p0n = _mm_add_ps(a_k2_p0, yr_);  // 126, 124, 122, 120,
+    const __m128 a_k2_p1n = _mm_sub_ps(yi_, a_k2_p1);  // 127, 125, 123, 121,
+    // Shuffle in right order and store.
+    const __m128 a_j2_0n = _mm_unpacklo_ps(a_j2_p0n, a_j2_p1n);
+    //   2,   3,   4,   5,
+    const __m128 a_j2_4n = _mm_unpackhi_ps(a_j2_p0n, a_j2_p1n);
+    //   6,   7,   8,   9,
+    const __m128 a_k2_0nt = _mm_unpackhi_ps(a_k2_p0n, a_k2_p1n);
+    // 122, 123, 120, 121,
+    const __m128 a_k2_4nt = _mm_unpacklo_ps(a_k2_p0n, a_k2_p1n);
+    // 126, 127, 124, 125,
+    const __m128 a_k2_0n = _mm_shuffle_ps(
+        a_k2_0nt, a_k2_0nt, _MM_SHUFFLE(1, 0, 3, 2));  // 120, 121, 122, 123,
+    const __m128 a_k2_4n = _mm_shuffle_ps(
+        a_k2_4nt, a_k2_4nt, _MM_SHUFFLE(1, 0, 3, 2));  // 124, 125, 126, 127,
+    _mm_storeu_ps(&a[0 + j2], a_j2_0n);
+    _mm_storeu_ps(&a[4 + j2], a_j2_4n);
+    _mm_storeu_ps(&a[122 - j2], a_k2_0n);
+    _mm_storeu_ps(&a[126 - j2], a_k2_4n);
+  }
+  // Scalar code for the remaining items.
+  for (; j2 < 64; j1 += 1, j2 += 2) {
+    k2 = 128 - j2;
+    k1 = 32 - j1;
+    wkr = 0.5f - c[k1];
+    wki = c[j1];
+    xr = a[j2 + 0] - a[k2 + 0];
+    xi = a[j2 + 1] + a[k2 + 1];
+    yr = wkr * xr + wki * xi;
+    yi = wkr * xi - wki * xr;
+    a[j2 + 0] = a[j2 + 0] - yr;
+    a[j2 + 1] = yi - a[j2 + 1];
+    a[k2 + 0] = yr + a[k2 + 0];
+    a[k2 + 1] = yi - a[k2 + 1];
+  }
+  a[65] = -a[65];
+}
+
+void aec_rdft_init_sse2(void) {
+  cft1st_128 = cft1st_128_SSE2;
+  cftmdl_128 = cftmdl_128_SSE2;
+  rftfsub_128 = rftfsub_128_SSE2;
+  rftbsub_128 = rftbsub_128_SSE2;
+}
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_resampler.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_resampler.c
new file mode 100644
index 00000000..99c39efa
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_resampler.c
@@ -0,0 +1,209 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/* Resamples a signal to an arbitrary rate. Used by the AEC to compensate for
+ * clock skew by resampling the farend signal.
+ */
+
+#include "webrtc/modules/audio_processing/aec/aec_resampler.h"
+
+#include <assert.h>
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "webrtc/modules/audio_processing/aec/aec_core.h"
+
+enum {
+  kEstimateLengthFrames = 400
+};
+
+typedef struct {
+  float buffer[kResamplerBufferSize];
+  float position;
+
+  int deviceSampleRateHz;
+  int skewData[kEstimateLengthFrames];
+  int skewDataIndex;
+  float skewEstimate;
+} AecResampler;
+
+static int EstimateSkew(const int* rawSkew,
+                        int size,
+                        int absLimit,
+                        float* skewEst);
+
+void* WebRtcAec_CreateResampler() {
+  return malloc(sizeof(AecResampler));
+}
+
+int WebRtcAec_InitResampler(void* resampInst, int deviceSampleRateHz) {
+  AecResampler* obj = (AecResampler*)resampInst;
+  memset(obj->buffer, 0, sizeof(obj->buffer));
+  obj->position = 0.0;
+
+  obj->deviceSampleRateHz = deviceSampleRateHz;
+  memset(obj->skewData, 0, sizeof(obj->skewData));
+  obj->skewDataIndex = 0;
+  obj->skewEstimate = 0.0;
+
+  return 0;
+}
+
+void WebRtcAec_FreeResampler(void* resampInst) {
+  AecResampler* obj = (AecResampler*)resampInst;
+  free(obj);
+}
+
+void WebRtcAec_ResampleLinear(void* resampInst,
+                              const float* inspeech,
+                              size_t size,
+                              float skew,
+                              float* outspeech,
+                              size_t* size_out) {
+  AecResampler* obj = (AecResampler*)resampInst;
+
+  float* y;
+  float be, tnew;
+  size_t tn, mm;
+
+  assert(size <= 2 * FRAME_LEN);
+  assert(resampInst != NULL);
+  assert(inspeech != NULL);
+  assert(outspeech != NULL);
+  assert(size_out != NULL);
+
+  // Add new frame data in lookahead
+  memcpy(&obj->buffer[FRAME_LEN + kResamplingDelay],
+         inspeech,
+         size * sizeof(inspeech[0]));
+
+  // Sample rate ratio
+  be = 1 + skew;
+
+  // Loop over input frame
+  mm = 0;
+  y = &obj->buffer[FRAME_LEN];  // Point at current frame
+
+  tnew = be * mm + obj->position;
+  tn = (size_t)tnew;
+
+  while (tn < size) {
+
+    // Interpolation
+    outspeech[mm] = y[tn] + (tnew - tn) * (y[tn + 1] - y[tn]);
+    mm++;
+
+    tnew = be * mm + obj->position;
+    tn = (int)tnew;
+  }
+
+  *size_out = mm;
+  obj->position += (*size_out) * be - size;
+
+  // Shift buffer
+  memmove(obj->buffer,
+          &obj->buffer[size],
+          (kResamplerBufferSize - size) * sizeof(obj->buffer[0]));
+}
+
+int WebRtcAec_GetSkew(void* resampInst, int rawSkew, float* skewEst) {
+  AecResampler* obj = (AecResampler*)resampInst;
+  int err = 0;
+
+  if (obj->skewDataIndex < kEstimateLengthFrames) {
+    obj->skewData[obj->skewDataIndex] = rawSkew;
+    obj->skewDataIndex++;
+  } else if (obj->skewDataIndex == kEstimateLengthFrames) {
+    err = EstimateSkew(
+        obj->skewData, kEstimateLengthFrames, obj->deviceSampleRateHz, skewEst);
+    obj->skewEstimate = *skewEst;
+    obj->skewDataIndex++;
+  } else {
+    *skewEst = obj->skewEstimate;
+  }
+
+  return err;
+}
+
+int EstimateSkew(const int* rawSkew,
+                 int size,
+                 int deviceSampleRateHz,
+                 float* skewEst) {
+  const int absLimitOuter = (int)(0.04f * deviceSampleRateHz);
+  const int absLimitInner = (int)(0.0025f * deviceSampleRateHz);
+  int i = 0;
+  int n = 0;
+  float rawAvg = 0;
+  float err = 0;
+  float rawAbsDev = 0;
+  int upperLimit = 0;
+  int lowerLimit = 0;
+  float cumSum = 0;
+  float x = 0;
+  float x2 = 0;
+  float y = 0;
+  float xy = 0;
+  float xAvg = 0;
+  float denom = 0;
+  float skew = 0;
+
+  *skewEst = 0;  // Set in case of error below.
+  for (i = 0; i < size; i++) {
+    if ((rawSkew[i] < absLimitOuter && rawSkew[i] > -absLimitOuter)) {
+      n++;
+      rawAvg += rawSkew[i];
+    }
+  }
+
+  if (n == 0) {
+    return -1;
+  }
+  assert(n > 0);
+  rawAvg /= n;
+
+  for (i = 0; i < size; i++) {
+    if ((rawSkew[i] < absLimitOuter && rawSkew[i] > -absLimitOuter)) {
+      err = rawSkew[i] - rawAvg;
+      rawAbsDev += err >= 0 ? err : -err;
+    }
+  }
+  assert(n > 0);
+  rawAbsDev /= n;
+  upperLimit = (int)(rawAvg + 5 * rawAbsDev + 1);  // +1 for ceiling.
+  lowerLimit = (int)(rawAvg - 5 * rawAbsDev - 1);  // -1 for floor.
+
+  n = 0;
+  for (i = 0; i < size; i++) {
+    if ((rawSkew[i] < absLimitInner && rawSkew[i] > -absLimitInner) ||
+        (rawSkew[i] < upperLimit && rawSkew[i] > lowerLimit)) {
+      n++;
+      cumSum += rawSkew[i];
+      x += n;
+      x2 += n * n;
+      y += cumSum;
+      xy += n * cumSum;
+    }
+  }
+
+  if (n == 0) {
+    return -1;
+  }
+  assert(n > 0);
+  xAvg = x / n;
+  denom = x2 - xAvg * x;
+
+  if (denom != 0) {
+    skew = (xy - xAvg * y) / denom;
+  }
+
+  *skewEst = skew;
+  return 0;
+}
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_resampler.h b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_resampler.h
new file mode 100644
index 00000000..a5002c15
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_resampler.h
@@ -0,0 +1,39 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_RESAMPLER_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_RESAMPLER_H_
+
+#include "webrtc/modules/audio_processing/aec/aec_core.h"
+
+enum {
+  kResamplingDelay = 1
+};
+enum {
+  kResamplerBufferSize = FRAME_LEN * 4
+};
+
+// Unless otherwise specified, functions return 0 on success and -1 on error.
+void* WebRtcAec_CreateResampler();  // Returns NULL on error.
+int WebRtcAec_InitResampler(void* resampInst, int deviceSampleRateHz);
+void WebRtcAec_FreeResampler(void* resampInst);
+
+// Estimates skew from raw measurement.
+int WebRtcAec_GetSkew(void* resampInst, int rawSkew, float* skewEst);
+
+// Resamples input using linear interpolation.
+void WebRtcAec_ResampleLinear(void* resampInst,
+                              const float* inspeech,
+                              size_t size,
+                              float skew,
+                              float* outspeech,
+                              size_t* size_out);
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_RESAMPLER_H_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation.c
new file mode 100644
index 00000000..0f5cd31d
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation.c
@@ -0,0 +1,923 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * Contains the API functions for the AEC.
+ */
+#include "webrtc/modules/audio_processing/aec/include/echo_cancellation.h"
+
+#include <math.h>
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+#include <stdio.h>
+#endif
+#include <stdlib.h>
+#include <string.h>
+
+#include "webrtc/common_audio/ring_buffer.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/aec/aec_core.h"
+#include "webrtc/modules/audio_processing/aec/aec_resampler.h"
+#include "webrtc/modules/audio_processing/aec/echo_cancellation_internal.h"
+#include "webrtc/typedefs.h"
+
+// Measured delays [ms]
+// Device                Chrome  GTP
+// MacBook Air           10
+// MacBook Retina        10      100
+// MacPro                30?
+//
+// Win7 Desktop          70      80?
+// Win7 T430s            110
+// Win8 T420s            70
+//
+// Daisy                 50
+// Pixel (w/ preproc?)           240
+// Pixel (w/o preproc?)  110     110
+
+// The extended filter mode gives us the flexibility to ignore the system's
+// reported delays. We do this for platforms which we believe provide results
+// which are incompatible with the AEC's expectations. Based on measurements
+// (some provided above) we set a conservative (i.e. lower than measured)
+// fixed delay.
+//
+// WEBRTC_UNTRUSTED_DELAY will only have an impact when |extended_filter_mode|
+// is enabled. See the note along with |DelayCorrection| in
+// echo_cancellation_impl.h for more details on the mode.
+//
+// Justification:
+// Chromium/Mac: Here, the true latency is so low (~10-20 ms), that it plays
+// havoc with the AEC's buffering. To avoid this, we set a fixed delay of 20 ms
+// and then compensate by rewinding by 10 ms (in wideband) through
+// kDelayDiffOffsetSamples. This trick does not seem to work for larger rewind
+// values, but fortunately this is sufficient.
+//
+// Chromium/Linux(ChromeOS): The values we get on this platform don't correspond
+// well to reality. The variance doesn't match the AEC's buffer changes, and the
+// bulk values tend to be too low. However, the range across different hardware
+// appears to be too large to choose a single value.
+//
+// GTP/Linux(ChromeOS): TBD, but for the moment we will trust the values.
+#if defined(WEBRTC_CHROMIUM_BUILD) && defined(WEBRTC_MAC)
+#define WEBRTC_UNTRUSTED_DELAY
+#endif
+
+#if defined(WEBRTC_UNTRUSTED_DELAY) && defined(WEBRTC_MAC)
+static const int kDelayDiffOffsetSamples = -160;
+#else
+// Not enabled for now.
+static const int kDelayDiffOffsetSamples = 0;
+#endif
+
+#if defined(WEBRTC_MAC)
+static const int kFixedDelayMs = 20;
+#else
+static const int kFixedDelayMs = 50;
+#endif
+#if !defined(WEBRTC_UNTRUSTED_DELAY)
+static const int kMinTrustedDelayMs = 20;
+#endif
+static const int kMaxTrustedDelayMs = 500;
+
+// Maximum length of resampled signal. Must be an integer multiple of frames
+// (ceil(1/(1 + MIN_SKEW)*2) + 1)*FRAME_LEN
+// The factor of 2 handles wb, and the + 1 is as a safety margin
+// TODO(bjornv): Replace with kResamplerBufferSize
+#define MAX_RESAMP_LEN (5 * FRAME_LEN)
+
+static const int kMaxBufSizeStart = 62;  // In partitions
+static const int sampMsNb = 8;           // samples per ms in nb
+static const int initCheck = 42;
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+int webrtc_aec_instance_count = 0;
+#endif
+
+// Estimates delay to set the position of the far-end buffer read pointer
+// (controlled by knownDelay)
+static void EstBufDelayNormal(Aec* aecInst);
+static void EstBufDelayExtended(Aec* aecInst);
+static int ProcessNormal(Aec* self,
+                         const float* const* near,
+                         size_t num_bands,
+                         float* const* out,
+                         size_t num_samples,
+                         int16_t reported_delay_ms,
+                         int32_t skew);
+static void ProcessExtended(Aec* self,
+                            const float* const* near,
+                            size_t num_bands,
+                            float* const* out,
+                            size_t num_samples,
+                            int16_t reported_delay_ms,
+                            int32_t skew);
+
+void* WebRtcAec_Create() {
+  Aec* aecpc = malloc(sizeof(Aec));
+
+  if (!aecpc) {
+    return NULL;
+  }
+
+  aecpc->aec = WebRtcAec_CreateAec();
+  if (!aecpc->aec) {
+    WebRtcAec_Free(aecpc);
+    return NULL;
+  }
+  aecpc->resampler = WebRtcAec_CreateResampler();
+  if (!aecpc->resampler) {
+    WebRtcAec_Free(aecpc);
+    return NULL;
+  }
+  // Create far-end pre-buffer. The buffer size has to be large enough for
+  // largest possible drift compensation (kResamplerBufferSize) + "almost" an
+  // FFT buffer (PART_LEN2 - 1).
+  aecpc->far_pre_buf =
+      WebRtc_CreateBuffer(PART_LEN2 + kResamplerBufferSize, sizeof(float));
+  if (!aecpc->far_pre_buf) {
+    WebRtcAec_Free(aecpc);
+    return NULL;
+  }
+
+  aecpc->initFlag = 0;
+  aecpc->lastError = 0;
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+  {
+    char filename[64];
+    sprintf(filename, "aec_buf%d.dat", webrtc_aec_instance_count);
+    aecpc->bufFile = fopen(filename, "wb");
+    sprintf(filename, "aec_skew%d.dat", webrtc_aec_instance_count);
+    aecpc->skewFile = fopen(filename, "wb");
+    sprintf(filename, "aec_delay%d.dat", webrtc_aec_instance_count);
+    aecpc->delayFile = fopen(filename, "wb");
+    webrtc_aec_instance_count++;
+  }
+#endif
+
+  return aecpc;
+}
+
+void WebRtcAec_Free(void* aecInst) {
+  Aec* aecpc = aecInst;
+
+  if (aecpc == NULL) {
+    return;
+  }
+
+  WebRtc_FreeBuffer(aecpc->far_pre_buf);
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+  fclose(aecpc->bufFile);
+  fclose(aecpc->skewFile);
+  fclose(aecpc->delayFile);
+#endif
+
+  WebRtcAec_FreeAec(aecpc->aec);
+  WebRtcAec_FreeResampler(aecpc->resampler);
+  free(aecpc);
+}
+
+int32_t WebRtcAec_Init(void* aecInst, int32_t sampFreq, int32_t scSampFreq) {
+  Aec* aecpc = aecInst;
+  AecConfig aecConfig;
+
+  if (sampFreq != 8000 &&
+      sampFreq != 16000 &&
+      sampFreq != 32000 &&
+      sampFreq != 48000) {
+    aecpc->lastError = AEC_BAD_PARAMETER_ERROR;
+    return -1;
+  }
+  aecpc->sampFreq = sampFreq;
+
+  if (scSampFreq < 1 || scSampFreq > 96000) {
+    aecpc->lastError = AEC_BAD_PARAMETER_ERROR;
+    return -1;
+  }
+  aecpc->scSampFreq = scSampFreq;
+
+  // Initialize echo canceller core
+  if (WebRtcAec_InitAec(aecpc->aec, aecpc->sampFreq) == -1) {
+    aecpc->lastError = AEC_UNSPECIFIED_ERROR;
+    return -1;
+  }
+
+  if (WebRtcAec_InitResampler(aecpc->resampler, aecpc->scSampFreq) == -1) {
+    aecpc->lastError = AEC_UNSPECIFIED_ERROR;
+    return -1;
+  }
+
+  WebRtc_InitBuffer(aecpc->far_pre_buf);
+  WebRtc_MoveReadPtr(aecpc->far_pre_buf, -PART_LEN);  // Start overlap.
+
+  aecpc->initFlag = initCheck;  // indicates that initialization has been done
+
+  if (aecpc->sampFreq == 32000 || aecpc->sampFreq == 48000) {
+    aecpc->splitSampFreq = 16000;
+  } else {
+    aecpc->splitSampFreq = sampFreq;
+  }
+
+  aecpc->delayCtr = 0;
+  aecpc->sampFactor = (aecpc->scSampFreq * 1.0f) / aecpc->splitSampFreq;
+  // Sampling frequency multiplier (SWB is processed as 160 frame size).
+  aecpc->rate_factor = aecpc->splitSampFreq / 8000;
+
+  aecpc->sum = 0;
+  aecpc->counter = 0;
+  aecpc->checkBuffSize = 1;
+  aecpc->firstVal = 0;
+
+  // We skip the startup_phase completely (setting to 0) if DA-AEC is enabled,
+  // but not extended_filter mode.
+  aecpc->startup_phase = WebRtcAec_extended_filter_enabled(aecpc->aec) ||
+      !WebRtcAec_delay_agnostic_enabled(aecpc->aec);
+  aecpc->bufSizeStart = 0;
+  aecpc->checkBufSizeCtr = 0;
+  aecpc->msInSndCardBuf = 0;
+  aecpc->filtDelay = -1;  // -1 indicates an initialized state.
+  aecpc->timeForDelayChange = 0;
+  aecpc->knownDelay = 0;
+  aecpc->lastDelayDiff = 0;
+
+  aecpc->skewFrCtr = 0;
+  aecpc->resample = kAecFalse;
+  aecpc->highSkewCtr = 0;
+  aecpc->skew = 0;
+
+  aecpc->farend_started = 0;
+
+  // Default settings.
+  aecConfig.nlpMode = kAecNlpModerate;
+  aecConfig.skewMode = kAecFalse;
+  aecConfig.metricsMode = kAecFalse;
+  aecConfig.delay_logging = kAecFalse;
+
+  if (WebRtcAec_set_config(aecpc, aecConfig) == -1) {
+    aecpc->lastError = AEC_UNSPECIFIED_ERROR;
+    return -1;
+  }
+
+  return 0;
+}
+
+// only buffer L band for farend
+int32_t WebRtcAec_BufferFarend(void* aecInst,
+                               const float* farend,
+                               size_t nrOfSamples) {
+  Aec* aecpc = aecInst;
+  size_t newNrOfSamples = nrOfSamples;
+  float new_farend[MAX_RESAMP_LEN];
+  const float* farend_ptr = farend;
+
+  if (farend == NULL) {
+    aecpc->lastError = AEC_NULL_POINTER_ERROR;
+    return -1;
+  }
+
+  if (aecpc->initFlag != initCheck) {
+    aecpc->lastError = AEC_UNINITIALIZED_ERROR;
+    return -1;
+  }
+
+  // number of samples == 160 for SWB input
+  if (nrOfSamples != 80 && nrOfSamples != 160) {
+    aecpc->lastError = AEC_BAD_PARAMETER_ERROR;
+    return -1;
+  }
+
+  if (aecpc->skewMode == kAecTrue && aecpc->resample == kAecTrue) {
+    // Resample and get a new number of samples
+    WebRtcAec_ResampleLinear(aecpc->resampler,
+                             farend,
+                             nrOfSamples,
+                             aecpc->skew,
+                             new_farend,
+                             &newNrOfSamples);
+    farend_ptr = new_farend;
+  }
+
+  aecpc->farend_started = 1;
+  WebRtcAec_SetSystemDelay(
+      aecpc->aec, WebRtcAec_system_delay(aecpc->aec) + (int)newNrOfSamples);
+
+  // Write the time-domain data to |far_pre_buf|.
+  WebRtc_WriteBuffer(aecpc->far_pre_buf, farend_ptr, newNrOfSamples);
+
+  // Transform to frequency domain if we have enough data.
+  while (WebRtc_available_read(aecpc->far_pre_buf) >= PART_LEN2) {
+    // We have enough data to pass to the FFT, hence read PART_LEN2 samples.
+    {
+      float* ptmp = NULL;
+      float tmp[PART_LEN2];
+      WebRtc_ReadBuffer(aecpc->far_pre_buf, (void**)&ptmp, tmp, PART_LEN2);
+      WebRtcAec_BufferFarendPartition(aecpc->aec, ptmp);
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+      WebRtc_WriteBuffer(
+          WebRtcAec_far_time_buf(aecpc->aec), &ptmp[PART_LEN], 1);
+#endif
+    }
+
+    // Rewind |far_pre_buf| PART_LEN samples for overlap before continuing.
+    WebRtc_MoveReadPtr(aecpc->far_pre_buf, -PART_LEN);
+  }
+
+  return 0;
+}
+
+int32_t WebRtcAec_Process(void* aecInst,
+                          const float* const* nearend,
+                          size_t num_bands,
+                          float* const* out,
+                          size_t nrOfSamples,
+                          int16_t msInSndCardBuf,
+                          int32_t skew) {
+  Aec* aecpc = aecInst;
+  int32_t retVal = 0;
+
+  if (out == NULL) {
+    aecpc->lastError = AEC_NULL_POINTER_ERROR;
+    return -1;
+  }
+
+  if (aecpc->initFlag != initCheck) {
+    aecpc->lastError = AEC_UNINITIALIZED_ERROR;
+    return -1;
+  }
+
+  // number of samples == 160 for SWB input
+  if (nrOfSamples != 80 && nrOfSamples != 160) {
+    aecpc->lastError = AEC_BAD_PARAMETER_ERROR;
+    return -1;
+  }
+
+  if (msInSndCardBuf < 0) {
+    msInSndCardBuf = 0;
+    aecpc->lastError = AEC_BAD_PARAMETER_WARNING;
+    retVal = -1;
+  } else if (msInSndCardBuf > kMaxTrustedDelayMs) {
+    // The clamping is now done in ProcessExtended/Normal().
+    aecpc->lastError = AEC_BAD_PARAMETER_WARNING;
+    retVal = -1;
+  }
+
+  // This returns the value of aec->extended_filter_enabled.
+  if (WebRtcAec_extended_filter_enabled(aecpc->aec)) {
+    ProcessExtended(aecpc,
+                    nearend,
+                    num_bands,
+                    out,
+                    nrOfSamples,
+                    msInSndCardBuf,
+                    skew);
+  } else {
+    if (ProcessNormal(aecpc,
+                      nearend,
+                      num_bands,
+                      out,
+                      nrOfSamples,
+                      msInSndCardBuf,
+                      skew) != 0) {
+      retVal = -1;
+    }
+  }
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+  {
+    int16_t far_buf_size_ms = (int16_t)(WebRtcAec_system_delay(aecpc->aec) /
+                                        (sampMsNb * aecpc->rate_factor));
+    (void)fwrite(&far_buf_size_ms, 2, 1, aecpc->bufFile);
+    (void)fwrite(
+        &aecpc->knownDelay, sizeof(aecpc->knownDelay), 1, aecpc->delayFile);
+  }
+#endif
+
+  return retVal;
+}
+
+int WebRtcAec_set_config(void* handle, AecConfig config) {
+  Aec* self = (Aec*)handle;
+  if (self->initFlag != initCheck) {
+    self->lastError = AEC_UNINITIALIZED_ERROR;
+    return -1;
+  }
+
+  if (config.skewMode != kAecFalse && config.skewMode != kAecTrue) {
+    self->lastError = AEC_BAD_PARAMETER_ERROR;
+    return -1;
+  }
+  self->skewMode = config.skewMode;
+
+  if (config.nlpMode != kAecNlpConservative &&
+      config.nlpMode != kAecNlpModerate &&
+      config.nlpMode != kAecNlpAggressive) {
+    self->lastError = AEC_BAD_PARAMETER_ERROR;
+    return -1;
+  }
+
+  if (config.metricsMode != kAecFalse && config.metricsMode != kAecTrue) {
+    self->lastError = AEC_BAD_PARAMETER_ERROR;
+    return -1;
+  }
+
+  if (config.delay_logging != kAecFalse && config.delay_logging != kAecTrue) {
+    self->lastError = AEC_BAD_PARAMETER_ERROR;
+    return -1;
+  }
+
+  WebRtcAec_SetConfigCore(
+      self->aec, config.nlpMode, config.metricsMode, config.delay_logging);
+  return 0;
+}
+
+int WebRtcAec_get_echo_status(void* handle, int* status) {
+  Aec* self = (Aec*)handle;
+  if (status == NULL) {
+    self->lastError = AEC_NULL_POINTER_ERROR;
+    return -1;
+  }
+  if (self->initFlag != initCheck) {
+    self->lastError = AEC_UNINITIALIZED_ERROR;
+    return -1;
+  }
+
+  *status = WebRtcAec_echo_state(self->aec);
+
+  return 0;
+}
+
+int WebRtcAec_GetMetrics(void* handle, AecMetrics* metrics) {
+  const float kUpWeight = 0.7f;
+  float dtmp;
+  int stmp;
+  Aec* self = (Aec*)handle;
+  Stats erl;
+  Stats erle;
+  Stats a_nlp;
+
+  if (handle == NULL) {
+    return -1;
+  }
+  if (metrics == NULL) {
+    self->lastError = AEC_NULL_POINTER_ERROR;
+    return -1;
+  }
+  if (self->initFlag != initCheck) {
+    self->lastError = AEC_UNINITIALIZED_ERROR;
+    return -1;
+  }
+
+  WebRtcAec_GetEchoStats(self->aec, &erl, &erle, &a_nlp);
+
+  // ERL
+  metrics->erl.instant = (int)erl.instant;
+
+  if ((erl.himean > kOffsetLevel) && (erl.average > kOffsetLevel)) {
+    // Use a mix between regular average and upper part average.
+    dtmp = kUpWeight * erl.himean + (1 - kUpWeight) * erl.average;
+    metrics->erl.average = (int)dtmp;
+  } else {
+    metrics->erl.average = kOffsetLevel;
+  }
+
+  metrics->erl.max = (int)erl.max;
+
+  if (erl.min < (kOffsetLevel * (-1))) {
+    metrics->erl.min = (int)erl.min;
+  } else {
+    metrics->erl.min = kOffsetLevel;
+  }
+
+  // ERLE
+  metrics->erle.instant = (int)erle.instant;
+
+  if ((erle.himean > kOffsetLevel) && (erle.average > kOffsetLevel)) {
+    // Use a mix between regular average and upper part average.
+    dtmp = kUpWeight * erle.himean + (1 - kUpWeight) * erle.average;
+    metrics->erle.average = (int)dtmp;
+  } else {
+    metrics->erle.average = kOffsetLevel;
+  }
+
+  metrics->erle.max = (int)erle.max;
+
+  if (erle.min < (kOffsetLevel * (-1))) {
+    metrics->erle.min = (int)erle.min;
+  } else {
+    metrics->erle.min = kOffsetLevel;
+  }
+
+  // RERL
+  if ((metrics->erl.average > kOffsetLevel) &&
+      (metrics->erle.average > kOffsetLevel)) {
+    stmp = metrics->erl.average + metrics->erle.average;
+  } else {
+    stmp = kOffsetLevel;
+  }
+  metrics->rerl.average = stmp;
+
+  // No other statistics needed, but returned for completeness.
+  metrics->rerl.instant = stmp;
+  metrics->rerl.max = stmp;
+  metrics->rerl.min = stmp;
+
+  // A_NLP
+  metrics->aNlp.instant = (int)a_nlp.instant;
+
+  if ((a_nlp.himean > kOffsetLevel) && (a_nlp.average > kOffsetLevel)) {
+    // Use a mix between regular average and upper part average.
+    dtmp = kUpWeight * a_nlp.himean + (1 - kUpWeight) * a_nlp.average;
+    metrics->aNlp.average = (int)dtmp;
+  } else {
+    metrics->aNlp.average = kOffsetLevel;
+  }
+
+  metrics->aNlp.max = (int)a_nlp.max;
+
+  if (a_nlp.min < (kOffsetLevel * (-1))) {
+    metrics->aNlp.min = (int)a_nlp.min;
+  } else {
+    metrics->aNlp.min = kOffsetLevel;
+  }
+
+  return 0;
+}
+
+int WebRtcAec_GetDelayMetrics(void* handle,
+                              int* median,
+                              int* std,
+                              float* fraction_poor_delays) {
+  Aec* self = handle;
+  if (median == NULL) {
+    self->lastError = AEC_NULL_POINTER_ERROR;
+    return -1;
+  }
+  if (std == NULL) {
+    self->lastError = AEC_NULL_POINTER_ERROR;
+    return -1;
+  }
+  if (self->initFlag != initCheck) {
+    self->lastError = AEC_UNINITIALIZED_ERROR;
+    return -1;
+  }
+  if (WebRtcAec_GetDelayMetricsCore(self->aec, median, std,
+                                    fraction_poor_delays) ==
+      -1) {
+    // Logging disabled.
+    self->lastError = AEC_UNSUPPORTED_FUNCTION_ERROR;
+    return -1;
+  }
+
+  return 0;
+}
+
+int32_t WebRtcAec_get_error_code(void* aecInst) {
+  Aec* aecpc = aecInst;
+  return aecpc->lastError;
+}
+
+AecCore* WebRtcAec_aec_core(void* handle) {
+  if (!handle) {
+    return NULL;
+  }
+  return ((Aec*)handle)->aec;
+}
+
+static int ProcessNormal(Aec* aecpc,
+                         const float* const* nearend,
+                         size_t num_bands,
+                         float* const* out,
+                         size_t nrOfSamples,
+                         int16_t msInSndCardBuf,
+                         int32_t skew) {
+  int retVal = 0;
+  size_t i;
+  size_t nBlocks10ms;
+  // Limit resampling to doubling/halving of signal
+  const float minSkewEst = -0.5f;
+  const float maxSkewEst = 1.0f;
+
+  msInSndCardBuf =
+      msInSndCardBuf > kMaxTrustedDelayMs ? kMaxTrustedDelayMs : msInSndCardBuf;
+  // TODO(andrew): we need to investigate if this +10 is really wanted.
+  msInSndCardBuf += 10;
+  aecpc->msInSndCardBuf = msInSndCardBuf;
+
+  if (aecpc->skewMode == kAecTrue) {
+    if (aecpc->skewFrCtr < 25) {
+      aecpc->skewFrCtr++;
+    } else {
+      retVal = WebRtcAec_GetSkew(aecpc->resampler, skew, &aecpc->skew);
+      if (retVal == -1) {
+        aecpc->skew = 0;
+        aecpc->lastError = AEC_BAD_PARAMETER_WARNING;
+      }
+
+      aecpc->skew /= aecpc->sampFactor * nrOfSamples;
+
+      if (aecpc->skew < 1.0e-3 && aecpc->skew > -1.0e-3) {
+        aecpc->resample = kAecFalse;
+      } else {
+        aecpc->resample = kAecTrue;
+      }
+
+      if (aecpc->skew < minSkewEst) {
+        aecpc->skew = minSkewEst;
+      } else if (aecpc->skew > maxSkewEst) {
+        aecpc->skew = maxSkewEst;
+      }
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+      (void)fwrite(&aecpc->skew, sizeof(aecpc->skew), 1, aecpc->skewFile);
+#endif
+    }
+  }
+
+  nBlocks10ms = nrOfSamples / (FRAME_LEN * aecpc->rate_factor);
+
+  if (aecpc->startup_phase) {
+    for (i = 0; i < num_bands; ++i) {
+      // Only needed if they don't already point to the same place.
+      if (nearend[i] != out[i]) {
+        memcpy(out[i], nearend[i], sizeof(nearend[i][0]) * nrOfSamples);
+      }
+    }
+
+    // The AEC is in the start up mode
+    // AEC is disabled until the system delay is OK
+
+    // Mechanism to ensure that the system delay is reasonably stable.
+    if (aecpc->checkBuffSize) {
+      aecpc->checkBufSizeCtr++;
+      // Before we fill up the far-end buffer we require the system delay
+      // to be stable (+/-8 ms) compared to the first value. This
+      // comparison is made during the following 6 consecutive 10 ms
+      // blocks. If it seems to be stable then we start to fill up the
+      // far-end buffer.
+      if (aecpc->counter == 0) {
+        aecpc->firstVal = aecpc->msInSndCardBuf;
+        aecpc->sum = 0;
+      }
+
+      if (abs(aecpc->firstVal - aecpc->msInSndCardBuf) <
+          WEBRTC_SPL_MAX(0.2 * aecpc->msInSndCardBuf, sampMsNb)) {
+        aecpc->sum += aecpc->msInSndCardBuf;
+        aecpc->counter++;
+      } else {
+        aecpc->counter = 0;
+      }
+
+      if (aecpc->counter * nBlocks10ms >= 6) {
+        // The far-end buffer size is determined in partitions of
+        // PART_LEN samples. Use 75% of the average value of the system
+        // delay as buffer size to start with.
+        aecpc->bufSizeStart =
+            WEBRTC_SPL_MIN((3 * aecpc->sum * aecpc->rate_factor * 8) /
+                               (4 * aecpc->counter * PART_LEN),
+                           kMaxBufSizeStart);
+        // Buffer size has now been determined.
+        aecpc->checkBuffSize = 0;
+      }
+
+      if (aecpc->checkBufSizeCtr * nBlocks10ms > 50) {
+        // For really bad systems, don't disable the echo canceller for
+        // more than 0.5 sec.
+        aecpc->bufSizeStart = WEBRTC_SPL_MIN(
+            (aecpc->msInSndCardBuf * aecpc->rate_factor * 3) / 40,
+            kMaxBufSizeStart);
+        aecpc->checkBuffSize = 0;
+      }
+    }
+
+    // If |checkBuffSize| changed in the if-statement above.
+    if (!aecpc->checkBuffSize) {
+      // The system delay is now reasonably stable (or has been unstable
+      // for too long). When the far-end buffer is filled with
+      // approximately the same amount of data as reported by the system
+      // we end the startup phase.
+      int overhead_elements =
+          WebRtcAec_system_delay(aecpc->aec) / PART_LEN - aecpc->bufSizeStart;
+      if (overhead_elements == 0) {
+        // Enable the AEC
+        aecpc->startup_phase = 0;
+      } else if (overhead_elements > 0) {
+        // TODO(bjornv): Do we need a check on how much we actually
+        // moved the read pointer? It should always be possible to move
+        // the pointer |overhead_elements| since we have only added data
+        // to the buffer and no delay compensation nor AEC processing
+        // has been done.
+        WebRtcAec_MoveFarReadPtr(aecpc->aec, overhead_elements);
+
+        // Enable the AEC
+        aecpc->startup_phase = 0;
+      }
+    }
+  } else {
+    // AEC is enabled.
+    EstBufDelayNormal(aecpc);
+
+    // Call the AEC.
+    // TODO(bjornv): Re-structure such that we don't have to pass
+    // |aecpc->knownDelay| as input. Change name to something like
+    // |system_buffer_diff|.
+    WebRtcAec_ProcessFrames(aecpc->aec,
+                            nearend,
+                            num_bands,
+                            nrOfSamples,
+                            aecpc->knownDelay,
+                            out);
+  }
+
+  return retVal;
+}
+
+static void ProcessExtended(Aec* self,
+                            const float* const* near,
+                            size_t num_bands,
+                            float* const* out,
+                            size_t num_samples,
+                            int16_t reported_delay_ms,
+                            int32_t skew) {
+  size_t i;
+  const int delay_diff_offset = kDelayDiffOffsetSamples;
+#if defined(WEBRTC_UNTRUSTED_DELAY)
+  reported_delay_ms = kFixedDelayMs;
+#else
+  // This is the usual mode where we trust the reported system delay values.
+  // Due to the longer filter, we no longer add 10 ms to the reported delay
+  // to reduce chance of non-causality. Instead we apply a minimum here to avoid
+  // issues with the read pointer jumping around needlessly.
+  reported_delay_ms = reported_delay_ms < kMinTrustedDelayMs
+                          ? kMinTrustedDelayMs
+                          : reported_delay_ms;
+  // If the reported delay appears to be bogus, we attempt to recover by using
+  // the measured fixed delay values. We use >= here because higher layers
+  // may already clamp to this maximum value, and we would otherwise not
+  // detect it here.
+  reported_delay_ms = reported_delay_ms >= kMaxTrustedDelayMs
+                          ? kFixedDelayMs
+                          : reported_delay_ms;
+#endif
+  self->msInSndCardBuf = reported_delay_ms;
+
+  if (!self->farend_started) {
+    for (i = 0; i < num_bands; ++i) {
+      // Only needed if they don't already point to the same place.
+      if (near[i] != out[i]) {
+        memcpy(out[i], near[i], sizeof(near[i][0]) * num_samples);
+      }
+    }
+    return;
+  }
+  if (self->startup_phase) {
+    // In the extended mode, there isn't a startup "phase", just a special
+    // action on the first frame. In the trusted delay case, we'll take the
+    // current reported delay, unless it's less then our conservative
+    // measurement.
+    int startup_size_ms =
+        reported_delay_ms < kFixedDelayMs ? kFixedDelayMs : reported_delay_ms;
+#if defined(WEBRTC_ANDROID)
+    int target_delay = startup_size_ms * self->rate_factor * 8;
+#else
+    // To avoid putting the AEC in a non-causal state we're being slightly
+    // conservative and scale by 2. On Android we use a fixed delay and
+    // therefore there is no need to scale the target_delay.
+    int target_delay = startup_size_ms * self->rate_factor * 8 / 2;
+#endif
+    int overhead_elements =
+        (WebRtcAec_system_delay(self->aec) - target_delay) / PART_LEN;
+    WebRtcAec_MoveFarReadPtr(self->aec, overhead_elements);
+    self->startup_phase = 0;
+  }
+
+  EstBufDelayExtended(self);
+
+  {
+    // |delay_diff_offset| gives us the option to manually rewind the delay on
+    // very low delay platforms which can't be expressed purely through
+    // |reported_delay_ms|.
+    const int adjusted_known_delay =
+        WEBRTC_SPL_MAX(0, self->knownDelay + delay_diff_offset);
+
+    WebRtcAec_ProcessFrames(self->aec,
+                            near,
+                            num_bands,
+                            num_samples,
+                            adjusted_known_delay,
+                            out);
+  }
+}
+
+static void EstBufDelayNormal(Aec* aecpc) {
+  int nSampSndCard = aecpc->msInSndCardBuf * sampMsNb * aecpc->rate_factor;
+  int current_delay = nSampSndCard - WebRtcAec_system_delay(aecpc->aec);
+  int delay_difference = 0;
+
+  // Before we proceed with the delay estimate filtering we:
+  // 1) Compensate for the frame that will be read.
+  // 2) Compensate for drift resampling.
+  // 3) Compensate for non-causality if needed, since the estimated delay can't
+  //    be negative.
+
+  // 1) Compensating for the frame(s) that will be read/processed.
+  current_delay += FRAME_LEN * aecpc->rate_factor;
+
+  // 2) Account for resampling frame delay.
+  if (aecpc->skewMode == kAecTrue && aecpc->resample == kAecTrue) {
+    current_delay -= kResamplingDelay;
+  }
+
+  // 3) Compensate for non-causality, if needed, by flushing one block.
+  if (current_delay < PART_LEN) {
+    current_delay += WebRtcAec_MoveFarReadPtr(aecpc->aec, 1) * PART_LEN;
+  }
+
+  // We use -1 to signal an initialized state in the "extended" implementation;
+  // compensate for that.
+  aecpc->filtDelay = aecpc->filtDelay < 0 ? 0 : aecpc->filtDelay;
+  aecpc->filtDelay =
+      WEBRTC_SPL_MAX(0, (short)(0.8 * aecpc->filtDelay + 0.2 * current_delay));
+
+  delay_difference = aecpc->filtDelay - aecpc->knownDelay;
+  if (delay_difference > 224) {
+    if (aecpc->lastDelayDiff < 96) {
+      aecpc->timeForDelayChange = 0;
+    } else {
+      aecpc->timeForDelayChange++;
+    }
+  } else if (delay_difference < 96 && aecpc->knownDelay > 0) {
+    if (aecpc->lastDelayDiff > 224) {
+      aecpc->timeForDelayChange = 0;
+    } else {
+      aecpc->timeForDelayChange++;
+    }
+  } else {
+    aecpc->timeForDelayChange = 0;
+  }
+  aecpc->lastDelayDiff = delay_difference;
+
+  if (aecpc->timeForDelayChange > 25) {
+    aecpc->knownDelay = WEBRTC_SPL_MAX((int)aecpc->filtDelay - 160, 0);
+  }
+}
+
+static void EstBufDelayExtended(Aec* self) {
+  int reported_delay = self->msInSndCardBuf * sampMsNb * self->rate_factor;
+  int current_delay = reported_delay - WebRtcAec_system_delay(self->aec);
+  int delay_difference = 0;
+
+  // Before we proceed with the delay estimate filtering we:
+  // 1) Compensate for the frame that will be read.
+  // 2) Compensate for drift resampling.
+  // 3) Compensate for non-causality if needed, since the estimated delay can't
+  //    be negative.
+
+  // 1) Compensating for the frame(s) that will be read/processed.
+  current_delay += FRAME_LEN * self->rate_factor;
+
+  // 2) Account for resampling frame delay.
+  if (self->skewMode == kAecTrue && self->resample == kAecTrue) {
+    current_delay -= kResamplingDelay;
+  }
+
+  // 3) Compensate for non-causality, if needed, by flushing two blocks.
+  if (current_delay < PART_LEN) {
+    current_delay += WebRtcAec_MoveFarReadPtr(self->aec, 2) * PART_LEN;
+  }
+
+  if (self->filtDelay == -1) {
+    self->filtDelay = WEBRTC_SPL_MAX(0, 0.5 * current_delay);
+  } else {
+    self->filtDelay = WEBRTC_SPL_MAX(
+        0, (short)(0.95 * self->filtDelay + 0.05 * current_delay));
+  }
+
+  delay_difference = self->filtDelay - self->knownDelay;
+  if (delay_difference > 384) {
+    if (self->lastDelayDiff < 128) {
+      self->timeForDelayChange = 0;
+    } else {
+      self->timeForDelayChange++;
+    }
+  } else if (delay_difference < 128 && self->knownDelay > 0) {
+    if (self->lastDelayDiff > 384) {
+      self->timeForDelayChange = 0;
+    } else {
+      self->timeForDelayChange++;
+    }
+  } else {
+    self->timeForDelayChange = 0;
+  }
+  self->lastDelayDiff = delay_difference;
+
+  if (self->timeForDelayChange > 25) {
+    self->knownDelay = WEBRTC_SPL_MAX((int)self->filtDelay - 256, 0);
+  }
+}
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation_internal.h b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation_internal.h
new file mode 100644
index 00000000..95a6cf33
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation_internal.h
@@ -0,0 +1,67 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_INTERNAL_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_INTERNAL_H_
+
+#include "webrtc/common_audio/ring_buffer.h"
+#include "webrtc/modules/audio_processing/aec/aec_core.h"
+
+typedef struct {
+  int delayCtr;
+  int sampFreq;
+  int splitSampFreq;
+  int scSampFreq;
+  float sampFactor;  // scSampRate / sampFreq
+  short skewMode;
+  int bufSizeStart;
+  int knownDelay;
+  int rate_factor;
+
+  short initFlag;  // indicates if AEC has been initialized
+
+  // Variables used for averaging far end buffer size
+  short counter;
+  int sum;
+  short firstVal;
+  short checkBufSizeCtr;
+
+  // Variables used for delay shifts
+  short msInSndCardBuf;
+  short filtDelay;  // Filtered delay estimate.
+  int timeForDelayChange;
+  int startup_phase;
+  int checkBuffSize;
+  short lastDelayDiff;
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+  FILE* bufFile;
+  FILE* delayFile;
+  FILE* skewFile;
+#endif
+
+  // Structures
+  void* resampler;
+
+  int skewFrCtr;
+  int resample;  // if the skew is small enough we don't resample
+  int highSkewCtr;
+  float skew;
+
+  RingBuffer* far_pre_buf;  // Time domain far-end pre-buffer.
+
+  int lastError;
+
+  int farend_started;
+
+  AecCore* aec;
+} Aec;
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_INTERNAL_H_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation_unittest.cc b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation_unittest.cc
new file mode 100644
index 00000000..315ac3e9
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation_unittest.cc
@@ -0,0 +1,48 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+// TODO(bjornv): Make this a comprehensive test.
+
+#include "webrtc/modules/audio_processing/aec/include/echo_cancellation.h"
+
+#include <stdlib.h>
+#include <time.h>
+
+extern "C" {
+#include "webrtc/modules/audio_processing/aec/aec_core.h"
+}
+
+#include "testing/gtest/include/gtest/gtest.h"
+#include "webrtc/base/checks.h"
+
+namespace webrtc {
+
+TEST(EchoCancellationTest, CreateAndFreeHasExpectedBehavior) {
+  void* handle = WebRtcAec_Create();
+  ASSERT_TRUE(handle);
+  WebRtcAec_Free(nullptr);
+  WebRtcAec_Free(handle);
+}
+
+TEST(EchoCancellationTest, ApplyAecCoreHandle) {
+  void* handle = WebRtcAec_Create();
+  ASSERT_TRUE(handle);
+  EXPECT_TRUE(WebRtcAec_aec_core(NULL) == NULL);
+  AecCore* aec_core = WebRtcAec_aec_core(handle);
+  EXPECT_TRUE(aec_core != NULL);
+  // A simple test to verify that we can set and get a value from the lower
+  // level |aec_core| handle.
+  int delay = 111;
+  WebRtcAec_SetSystemDelay(aec_core, delay);
+  EXPECT_EQ(delay, WebRtcAec_system_delay(aec_core));
+  WebRtcAec_Free(handle);
+}
+
+}  // namespace webrtc
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/include/echo_cancellation.h b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/include/echo_cancellation.h
new file mode 100644
index 00000000..a340cf84
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/include/echo_cancellation.h
@@ -0,0 +1,245 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_
+
+#include <stddef.h>
+
+#include "webrtc/typedefs.h"
+
+// Errors
+#define AEC_UNSPECIFIED_ERROR 12000
+#define AEC_UNSUPPORTED_FUNCTION_ERROR 12001
+#define AEC_UNINITIALIZED_ERROR 12002
+#define AEC_NULL_POINTER_ERROR 12003
+#define AEC_BAD_PARAMETER_ERROR 12004
+
+// Warnings
+#define AEC_BAD_PARAMETER_WARNING 12050
+
+enum {
+  kAecNlpConservative = 0,
+  kAecNlpModerate,
+  kAecNlpAggressive
+};
+
+enum {
+  kAecFalse = 0,
+  kAecTrue
+};
+
+typedef struct {
+  int16_t nlpMode;      // default kAecNlpModerate
+  int16_t skewMode;     // default kAecFalse
+  int16_t metricsMode;  // default kAecFalse
+  int delay_logging;    // default kAecFalse
+  // float realSkew;
+} AecConfig;
+
+typedef struct {
+  int instant;
+  int average;
+  int max;
+  int min;
+} AecLevel;
+
+typedef struct {
+  AecLevel rerl;
+  AecLevel erl;
+  AecLevel erle;
+  AecLevel aNlp;
+} AecMetrics;
+
+struct AecCore;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Allocates the memory needed by the AEC. The memory needs to be initialized
+ * separately using the WebRtcAec_Init() function. Returns a pointer to the
+ * object or NULL on error.
+ */
+void* WebRtcAec_Create();
+
+/*
+ * This function releases the memory allocated by WebRtcAec_Create().
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*        aecInst         Pointer to the AEC instance
+ */
+void WebRtcAec_Free(void* aecInst);
+
+/*
+ * Initializes an AEC instance.
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*          aecInst       Pointer to the AEC instance
+ * int32_t        sampFreq      Sampling frequency of data
+ * int32_t        scSampFreq    Soundcard sampling frequency
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * int32_t        return        0: OK
+ *                             -1: error
+ */
+int32_t WebRtcAec_Init(void* aecInst, int32_t sampFreq, int32_t scSampFreq);
+
+/*
+ * Inserts an 80 or 160 sample block of data into the farend buffer.
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*          aecInst       Pointer to the AEC instance
+ * const float*   farend        In buffer containing one frame of
+ *                              farend signal for L band
+ * int16_t        nrOfSamples   Number of samples in farend buffer
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * int32_t        return        0: OK
+ *                             -1: error
+ */
+int32_t WebRtcAec_BufferFarend(void* aecInst,
+                               const float* farend,
+                               size_t nrOfSamples);
+
+/*
+ * Runs the echo canceller on an 80 or 160 sample blocks of data.
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*         aecInst        Pointer to the AEC instance
+ * float* const* nearend        In buffer containing one frame of
+ *                              nearend+echo signal for each band
+ * int           num_bands      Number of bands in nearend buffer
+ * int16_t       nrOfSamples    Number of samples in nearend buffer
+ * int16_t       msInSndCardBuf Delay estimate for sound card and
+ *                              system buffers
+ * int16_t       skew           Difference between number of samples played
+ *                              and recorded at the soundcard (for clock skew
+ *                              compensation)
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * float* const* out            Out buffer, one frame of processed nearend
+ *                              for each band
+ * int32_t       return         0: OK
+ *                             -1: error
+ */
+int32_t WebRtcAec_Process(void* aecInst,
+                          const float* const* nearend,
+                          size_t num_bands,
+                          float* const* out,
+                          size_t nrOfSamples,
+                          int16_t msInSndCardBuf,
+                          int32_t skew);
+
+/*
+ * This function enables the user to set certain parameters on-the-fly.
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*          handle        Pointer to the AEC instance
+ * AecConfig      config        Config instance that contains all
+ *                              properties to be set
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * int            return         0: OK
+ *                              -1: error
+ */
+int WebRtcAec_set_config(void* handle, AecConfig config);
+
+/*
+ * Gets the current echo status of the nearend signal.
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*          handle        Pointer to the AEC instance
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * int*           status        0: Almost certainly nearend single-talk
+ *                              1: Might not be neared single-talk
+ * int            return         0: OK
+ *                              -1: error
+ */
+int WebRtcAec_get_echo_status(void* handle, int* status);
+
+/*
+ * Gets the current echo metrics for the session.
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*          handle        Pointer to the AEC instance
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * AecMetrics*    metrics       Struct which will be filled out with the
+ *                              current echo metrics.
+ * int            return         0: OK
+ *                              -1: error
+ */
+int WebRtcAec_GetMetrics(void* handle, AecMetrics* metrics);
+
+/*
+ * Gets the current delay metrics for the session.
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*   handle               Pointer to the AEC instance
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * int*    median               Delay median value.
+ * int*    std                  Delay standard deviation.
+ * float*  fraction_poor_delays Fraction of the delay estimates that may
+ *                              cause the AEC to perform poorly.
+ *
+ * int     return                0: OK
+ *                              -1: error
+ */
+int WebRtcAec_GetDelayMetrics(void* handle,
+                              int* median,
+                              int* std,
+                              float* fraction_poor_delays);
+
+/*
+ * Gets the last error code.
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*          aecInst       Pointer to the AEC instance
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * int32_t        return        11000-11100: error code
+ */
+int32_t WebRtcAec_get_error_code(void* aecInst);
+
+// Returns a pointer to the low level AEC handle.
+//
+// Input:
+//  - handle                    : Pointer to the AEC instance.
+//
+// Return value:
+//  - AecCore pointer           : NULL for error.
+//
+struct AecCore* WebRtcAec_aec_core(void* handle);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/system_delay_unittest.cc b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/system_delay_unittest.cc
new file mode 100644
index 00000000..07e3cf8a
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/system_delay_unittest.cc
@@ -0,0 +1,602 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "testing/gtest/include/gtest/gtest.h"
+extern "C" {
+#include "webrtc/modules/audio_processing/aec/aec_core.h"
+}
+#include "webrtc/modules/audio_processing/aec/echo_cancellation_internal.h"
+#include "webrtc/modules/audio_processing/aec/include/echo_cancellation.h"
+#include "webrtc/test/testsupport/gtest_disable.h"
+#include "webrtc/typedefs.h"
+
+namespace {
+
+class SystemDelayTest : public ::testing::Test {
+ protected:
+  SystemDelayTest();
+  virtual void SetUp();
+  virtual void TearDown();
+
+  // Initialization of AEC handle with respect to |sample_rate_hz|. Since the
+  // device sample rate is unimportant we set that value to 48000 Hz.
+  void Init(int sample_rate_hz);
+
+  // Makes one render call and one capture call in that specific order.
+  void RenderAndCapture(int device_buffer_ms);
+
+  // Fills up the far-end buffer with respect to the default device buffer size.
+  size_t BufferFillUp();
+
+  // Runs and verifies the behavior in a stable startup procedure.
+  void RunStableStartup();
+
+  // Maps buffer size in ms into samples, taking the unprocessed frame into
+  // account.
+  int MapBufferSizeToSamples(int size_in_ms, bool extended_filter);
+
+  void* handle_;
+  Aec* self_;
+  size_t samples_per_frame_;
+  // Dummy input/output speech data.
+  static const int kSamplesPerChunk = 160;
+  float far_[kSamplesPerChunk];
+  float near_[kSamplesPerChunk];
+  float out_[kSamplesPerChunk];
+  const float* near_ptr_;
+  float* out_ptr_;
+};
+
+SystemDelayTest::SystemDelayTest()
+    : handle_(NULL), self_(NULL), samples_per_frame_(0) {
+  // Dummy input data are set with more or less arbitrary non-zero values.
+  for (int i = 0; i < kSamplesPerChunk; i++) {
+    far_[i] = 257.0;
+    near_[i] = 514.0;
+  }
+  memset(out_, 0, sizeof(out_));
+  near_ptr_ = near_;
+  out_ptr_ = out_;
+}
+
+void SystemDelayTest::SetUp() {
+  handle_ = WebRtcAec_Create();
+  ASSERT_TRUE(handle_);
+  self_ = reinterpret_cast<Aec*>(handle_);
+}
+
+void SystemDelayTest::TearDown() {
+  // Free AEC
+  WebRtcAec_Free(handle_);
+  handle_ = NULL;
+}
+
+// In SWB mode nothing is added to the buffer handling with respect to
+// functionality compared to WB. We therefore only verify behavior in NB and WB.
+static const int kSampleRateHz[] = {8000, 16000};
+static const size_t kNumSampleRates =
+    sizeof(kSampleRateHz) / sizeof(*kSampleRateHz);
+
+// Default audio device buffer size used.
+static const int kDeviceBufMs = 100;
+
+// Requirement for a stable device convergence time in ms. Should converge in
+// less than |kStableConvergenceMs|.
+static const int kStableConvergenceMs = 100;
+
+// Maximum convergence time in ms. This means that we should leave the startup
+// phase after |kMaxConvergenceMs| independent of device buffer stability
+// conditions.
+static const int kMaxConvergenceMs = 500;
+
+void SystemDelayTest::Init(int sample_rate_hz) {
+  // Initialize AEC
+  EXPECT_EQ(0, WebRtcAec_Init(handle_, sample_rate_hz, 48000));
+  EXPECT_EQ(0, WebRtcAec_system_delay(self_->aec));
+
+  // One frame equals 10 ms of data.
+  samples_per_frame_ = static_cast<size_t>(sample_rate_hz / 100);
+}
+
+void SystemDelayTest::RenderAndCapture(int device_buffer_ms) {
+  EXPECT_EQ(0, WebRtcAec_BufferFarend(handle_, far_, samples_per_frame_));
+  EXPECT_EQ(0,
+            WebRtcAec_Process(handle_,
+                              &near_ptr_,
+                              1,
+                              &out_ptr_,
+                              samples_per_frame_,
+                              device_buffer_ms,
+                              0));
+}
+
+size_t SystemDelayTest::BufferFillUp() {
+  // To make sure we have a full buffer when we verify stability we first fill
+  // up the far-end buffer with the same amount as we will report in through
+  // Process().
+  size_t buffer_size = 0;
+  for (int i = 0; i < kDeviceBufMs / 10; i++) {
+    EXPECT_EQ(0, WebRtcAec_BufferFarend(handle_, far_, samples_per_frame_));
+    buffer_size += samples_per_frame_;
+    EXPECT_EQ(static_cast<int>(buffer_size),
+              WebRtcAec_system_delay(self_->aec));
+  }
+  return buffer_size;
+}
+
+void SystemDelayTest::RunStableStartup() {
+  // To make sure we have a full buffer when we verify stability we first fill
+  // up the far-end buffer with the same amount as we will report in through
+  // Process().
+  size_t buffer_size = BufferFillUp();
+
+  if (WebRtcAec_delay_agnostic_enabled(self_->aec) == 1) {
+    // In extended_filter mode we set the buffer size after the first processed
+    // 10 ms chunk. Hence, we don't need to wait for the reported system delay
+    // values to become stable.
+    RenderAndCapture(kDeviceBufMs);
+    buffer_size += samples_per_frame_;
+    EXPECT_EQ(0, self_->startup_phase);
+  } else {
+    // A stable device should be accepted and put in a regular process mode
+    // within |kStableConvergenceMs|.
+    int process_time_ms = 0;
+    for (; process_time_ms < kStableConvergenceMs; process_time_ms += 10) {
+      RenderAndCapture(kDeviceBufMs);
+      buffer_size += samples_per_frame_;
+      if (self_->startup_phase == 0) {
+        // We have left the startup phase.
+        break;
+      }
+    }
+    // Verify convergence time.
+    EXPECT_GT(kStableConvergenceMs, process_time_ms);
+  }
+  // Verify that the buffer has been flushed.
+  EXPECT_GE(static_cast<int>(buffer_size),
+            WebRtcAec_system_delay(self_->aec));
+}
+
+  int SystemDelayTest::MapBufferSizeToSamples(int size_in_ms,
+                                              bool extended_filter) {
+  // If extended_filter is disabled we add an extra 10 ms for the unprocessed
+  // frame. That is simply how the algorithm is constructed.
+  return static_cast<int>(
+      (size_in_ms + (extended_filter ? 0 : 10)) * samples_per_frame_ / 10);
+}
+
+// The tests should meet basic requirements and not be adjusted to what is
+// actually implemented. If we don't get good code coverage this way we either
+// lack in tests or have unnecessary code.
+// General requirements:
+// 1) If we add far-end data the system delay should be increased with the same
+//    amount we add.
+// 2) If the far-end buffer is full we should flush the oldest data to make room
+//    for the new. In this case the system delay is unaffected.
+// 3) There should exist a startup phase in which the buffer size is to be
+//    determined. In this phase no cancellation should be performed.
+// 4) Under stable conditions (small variations in device buffer sizes) the AEC
+//    should determine an appropriate local buffer size within
+//    |kStableConvergenceMs| ms.
+// 5) Under unstable conditions the AEC should make a decision within
+//    |kMaxConvergenceMs| ms.
+// 6) If the local buffer runs out of data we should stuff the buffer with older
+//    frames.
+// 7) The system delay should within |kMaxConvergenceMs| ms heal from
+//    disturbances like drift, data glitches, toggling events and outliers.
+// 8) The system delay should never become negative.
+
+TEST_F(SystemDelayTest, CorrectIncreaseWhenBufferFarend) {
+  // When we add data to the AEC buffer the internal system delay should be
+  // incremented with the same amount as the size of data.
+  // This process should be independent of DA-AEC and extended_filter mode.
+  for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) {
+    WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
+    EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
+    for (int da_aec = 0; da_aec <= 1; ++da_aec) {
+      WebRtcAec_enable_delay_agnostic(self_->aec, da_aec);
+      EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec));
+      for (size_t i = 0; i < kNumSampleRates; i++) {
+        Init(kSampleRateHz[i]);
+        // Loop through a couple of calls to make sure the system delay
+        // increments correctly.
+        for (int j = 1; j <= 5; j++) {
+          EXPECT_EQ(0,
+                    WebRtcAec_BufferFarend(handle_, far_, samples_per_frame_));
+          EXPECT_EQ(static_cast<int>(j * samples_per_frame_),
+                    WebRtcAec_system_delay(self_->aec));
+        }
+      }
+    }
+  }
+}
+
+// TODO(bjornv): Add a test to verify behavior if the far-end buffer is full
+// when adding new data.
+
+TEST_F(SystemDelayTest, CorrectDelayAfterStableStartup) {
+  // We run the system in a stable startup. After that we verify that the system
+  // delay meets the requirements.
+  // This process should be independent of DA-AEC and extended_filter mode.
+  for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) {
+    WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
+    EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
+    for (int da_aec = 0; da_aec <= 1; ++da_aec) {
+      WebRtcAec_enable_delay_agnostic(self_->aec, da_aec);
+      EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec));
+      for (size_t i = 0; i < kNumSampleRates; i++) {
+        Init(kSampleRateHz[i]);
+        RunStableStartup();
+
+        // Verify system delay with respect to requirements, i.e., the
+        // |system_delay| is in the interval [75%, 100%] of what's reported on
+        // the average.
+        // In extended_filter mode we target 50% and measure after one processed
+        // 10 ms chunk.
+        int average_reported_delay =
+            static_cast<int>(kDeviceBufMs * samples_per_frame_ / 10);
+        EXPECT_GE(average_reported_delay, WebRtcAec_system_delay(self_->aec));
+        int lower_bound = WebRtcAec_extended_filter_enabled(self_->aec)
+                              ? average_reported_delay / 2 - samples_per_frame_
+                              : average_reported_delay * 3 / 4;
+        EXPECT_LE(lower_bound, WebRtcAec_system_delay(self_->aec));
+      }
+    }
+  }
+}
+
+TEST_F(SystemDelayTest, CorrectDelayAfterUnstableStartup) {
+  // This test does not apply in extended_filter mode, since we only use the
+  // the first 10 ms chunk to determine a reasonable buffer size. Neither does
+  // it apply if DA-AEC is on because that overrides the startup procedure.
+  WebRtcAec_enable_extended_filter(self_->aec, 0);
+  EXPECT_EQ(0, WebRtcAec_extended_filter_enabled(self_->aec));
+  WebRtcAec_enable_delay_agnostic(self_->aec, 0);
+  EXPECT_EQ(0, WebRtcAec_delay_agnostic_enabled(self_->aec));
+
+  // In an unstable system we would start processing after |kMaxConvergenceMs|.
+  // On the last frame the AEC buffer is adjusted to 60% of the last reported
+  // device buffer size.
+  // We construct an unstable system by altering the device buffer size between
+  // two values |kDeviceBufMs| +- 25 ms.
+  for (size_t i = 0; i < kNumSampleRates; i++) {
+    Init(kSampleRateHz[i]);
+
+    // To make sure we have a full buffer when we verify stability we first fill
+    // up the far-end buffer with the same amount as we will report in on the
+    // average through Process().
+    size_t buffer_size = BufferFillUp();
+
+    int buffer_offset_ms = 25;
+    int reported_delay_ms = 0;
+    int process_time_ms = 0;
+    for (; process_time_ms <= kMaxConvergenceMs; process_time_ms += 10) {
+      reported_delay_ms = kDeviceBufMs + buffer_offset_ms;
+      RenderAndCapture(reported_delay_ms);
+      buffer_size += samples_per_frame_;
+      buffer_offset_ms = -buffer_offset_ms;
+      if (self_->startup_phase == 0) {
+        // We have left the startup phase.
+        break;
+      }
+    }
+    // Verify convergence time.
+    EXPECT_GE(kMaxConvergenceMs, process_time_ms);
+    // Verify that the buffer has been flushed.
+    EXPECT_GE(static_cast<int>(buffer_size),
+              WebRtcAec_system_delay(self_->aec));
+
+    // Verify system delay with respect to requirements, i.e., the
+    // |system_delay| is in the interval [60%, 100%] of what's last reported.
+    EXPECT_GE(static_cast<int>(reported_delay_ms * samples_per_frame_ / 10),
+              WebRtcAec_system_delay(self_->aec));
+    EXPECT_LE(
+        static_cast<int>(reported_delay_ms * samples_per_frame_ / 10 * 3 / 5),
+        WebRtcAec_system_delay(self_->aec));
+  }
+}
+
+TEST_F(SystemDelayTest, CorrectDelayAfterStableBufferBuildUp) {
+  // This test does not apply in extended_filter mode, since we only use the
+  // the first 10 ms chunk to determine a reasonable buffer size. Neither does
+  // it apply if DA-AEC is on because that overrides the startup procedure.
+  WebRtcAec_enable_extended_filter(self_->aec, 0);
+  EXPECT_EQ(0, WebRtcAec_extended_filter_enabled(self_->aec));
+  WebRtcAec_enable_delay_agnostic(self_->aec, 0);
+  EXPECT_EQ(0, WebRtcAec_delay_agnostic_enabled(self_->aec));
+
+  // In this test we start by establishing the device buffer size during stable
+  // conditions, but with an empty internal far-end buffer. Once that is done we
+  // verify that the system delay is increased correctly until we have reach an
+  // internal buffer size of 75% of what's been reported.
+  for (size_t i = 0; i < kNumSampleRates; i++) {
+    Init(kSampleRateHz[i]);
+
+    // We assume that running |kStableConvergenceMs| calls will put the
+    // algorithm in a state where the device buffer size has been determined. We
+    // can make that assumption since we have a separate stability test.
+    int process_time_ms = 0;
+    for (; process_time_ms < kStableConvergenceMs; process_time_ms += 10) {
+      EXPECT_EQ(0,
+                WebRtcAec_Process(handle_,
+                                  &near_ptr_,
+                                  1,
+                                  &out_ptr_,
+                                  samples_per_frame_,
+                                  kDeviceBufMs,
+                                  0));
+    }
+    // Verify that a buffer size has been established.
+    EXPECT_EQ(0, self_->checkBuffSize);
+
+    // We now have established the required buffer size. Let us verify that we
+    // fill up before leaving the startup phase for normal processing.
+    size_t buffer_size = 0;
+    size_t target_buffer_size = kDeviceBufMs * samples_per_frame_ / 10 * 3 / 4;
+    process_time_ms = 0;
+    for (; process_time_ms <= kMaxConvergenceMs; process_time_ms += 10) {
+      RenderAndCapture(kDeviceBufMs);
+      buffer_size += samples_per_frame_;
+      if (self_->startup_phase == 0) {
+        // We have left the startup phase.
+        break;
+      }
+    }
+    // Verify convergence time.
+    EXPECT_GT(kMaxConvergenceMs, process_time_ms);
+    // Verify that the buffer has reached the desired size.
+    EXPECT_LE(static_cast<int>(target_buffer_size),
+              WebRtcAec_system_delay(self_->aec));
+
+    // Verify normal behavior (system delay is kept constant) after startup by
+    // running a couple of calls to BufferFarend() and Process().
+    for (int j = 0; j < 6; j++) {
+      int system_delay_before_calls = WebRtcAec_system_delay(self_->aec);
+      RenderAndCapture(kDeviceBufMs);
+      EXPECT_EQ(system_delay_before_calls, WebRtcAec_system_delay(self_->aec));
+    }
+  }
+}
+
+TEST_F(SystemDelayTest, CorrectDelayWhenBufferUnderrun) {
+  // Here we test a buffer under run scenario. If we keep on calling
+  // WebRtcAec_Process() we will finally run out of data, but should
+  // automatically stuff the buffer. We verify this behavior by checking if the
+  // system delay goes negative.
+  // This process should be independent of DA-AEC and extended_filter mode.
+  for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) {
+    WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
+    EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
+    for (int da_aec = 0; da_aec <= 1; ++da_aec) {
+      WebRtcAec_enable_delay_agnostic(self_->aec, da_aec);
+      EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec));
+      for (size_t i = 0; i < kNumSampleRates; i++) {
+        Init(kSampleRateHz[i]);
+        RunStableStartup();
+
+        // The AEC has now left the Startup phase. We now have at most
+        // |kStableConvergenceMs| in the buffer. Keep on calling Process() until
+        // we run out of data and verify that the system delay is non-negative.
+        for (int j = 0; j <= kStableConvergenceMs; j += 10) {
+          EXPECT_EQ(0, WebRtcAec_Process(handle_, &near_ptr_, 1, &out_ptr_,
+                                         samples_per_frame_, kDeviceBufMs, 0));
+          EXPECT_LE(0, WebRtcAec_system_delay(self_->aec));
+        }
+      }
+    }
+  }
+}
+
+TEST_F(SystemDelayTest, CorrectDelayDuringDrift) {
+  // This drift test should verify that the system delay is never exceeding the
+  // device buffer. The drift is simulated by decreasing the reported device
+  // buffer size by 1 ms every 100 ms. If the device buffer size goes below 30
+  // ms we jump (add) 10 ms to give a repeated pattern.
+
+  // This process should be independent of DA-AEC and extended_filter mode.
+  for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) {
+    WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
+    EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
+    for (int da_aec = 0; da_aec <= 1; ++da_aec) {
+      WebRtcAec_enable_delay_agnostic(self_->aec, da_aec);
+      EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec));
+      for (size_t i = 0; i < kNumSampleRates; i++) {
+        Init(kSampleRateHz[i]);
+        RunStableStartup();
+
+        // We have left the startup phase and proceed with normal processing.
+        int jump = 0;
+        for (int j = 0; j < 1000; j++) {
+          // Drift = -1 ms per 100 ms of data.
+          int device_buf_ms = kDeviceBufMs - (j / 10) + jump;
+          int device_buf = MapBufferSizeToSamples(device_buf_ms,
+                                                  extended_filter == 1);
+
+          if (device_buf_ms < 30) {
+            // Add 10 ms data, taking affect next frame.
+            jump += 10;
+          }
+          RenderAndCapture(device_buf_ms);
+
+          // Verify that the system delay does not exceed the device buffer.
+          EXPECT_GE(device_buf, WebRtcAec_system_delay(self_->aec));
+
+          // Verify that the system delay is non-negative.
+          EXPECT_LE(0, WebRtcAec_system_delay(self_->aec));
+        }
+      }
+    }
+  }
+}
+
+TEST_F(SystemDelayTest, ShouldRecoverAfterGlitch) {
+  // This glitch test should verify that the system delay recovers if there is
+  // a glitch in data. The data glitch is constructed as 200 ms of buffering
+  // after which the stable procedure continues. The glitch is never reported by
+  // the device.
+  // The system is said to be in a non-causal state if the difference between
+  // the device buffer and system delay is less than a block (64 samples).
+
+  // This process should be independent of DA-AEC and extended_filter mode.
+  for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) {
+    WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
+    EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
+    for (int da_aec = 0; da_aec <= 1; ++da_aec) {
+      WebRtcAec_enable_delay_agnostic(self_->aec, da_aec);
+      EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec));
+      for (size_t i = 0; i < kNumSampleRates; i++) {
+        Init(kSampleRateHz[i]);
+        RunStableStartup();
+        int device_buf = MapBufferSizeToSamples(kDeviceBufMs,
+                                                extended_filter == 1);
+        // Glitch state.
+        for (int j = 0; j < 20; j++) {
+          EXPECT_EQ(0,
+                    WebRtcAec_BufferFarend(handle_, far_, samples_per_frame_));
+          // No need to verify system delay, since that is done in a separate
+          // test.
+        }
+        // Verify that we are in a non-causal state, i.e.,
+        // |system_delay| > |device_buf|.
+        EXPECT_LT(device_buf, WebRtcAec_system_delay(self_->aec));
+
+        // Recover state. Should recover at least 4 ms of data per 10 ms, hence
+        // a glitch of 200 ms will take at most 200 * 10 / 4 = 500 ms to recover
+        // from.
+        bool non_causal = true;  // We are currently in a non-causal state.
+        for (int j = 0; j < 50; j++) {
+          int system_delay_before = WebRtcAec_system_delay(self_->aec);
+          RenderAndCapture(kDeviceBufMs);
+          int system_delay_after = WebRtcAec_system_delay(self_->aec);
+          // We have recovered if
+          // |device_buf| - |system_delay_after| >= PART_LEN (1 block).
+          // During recovery, |system_delay_after| < |system_delay_before|,
+          // otherwise they are equal.
+          if (non_causal) {
+            EXPECT_LT(system_delay_after, system_delay_before);
+            if (device_buf - system_delay_after >= PART_LEN) {
+              non_causal = false;
+            }
+          } else {
+            EXPECT_EQ(system_delay_before, system_delay_after);
+          }
+          // Verify that the system delay is non-negative.
+          EXPECT_LE(0, WebRtcAec_system_delay(self_->aec));
+        }
+        // Check that we have recovered.
+        EXPECT_FALSE(non_causal);
+      }
+    }
+  }
+}
+
+TEST_F(SystemDelayTest, UnaffectedWhenSpuriousDeviceBufferValues) {
+  // This test does not apply in extended_filter mode, since we only use the
+  // the first 10 ms chunk to determine a reasonable buffer size.
+  const int extended_filter = 0;
+  WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
+  EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
+
+  // Should be DA-AEC independent.
+  for (int da_aec = 0; da_aec <= 1; ++da_aec) {
+    WebRtcAec_enable_delay_agnostic(self_->aec, da_aec);
+    EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec));
+    // This spurious device buffer data test aims at verifying that the system
+    // delay is unaffected by large outliers.
+    // The system is said to be in a non-causal state if the difference between
+    // the device buffer and system delay is less than a block (64 samples).
+    for (size_t i = 0; i < kNumSampleRates; i++) {
+      Init(kSampleRateHz[i]);
+      RunStableStartup();
+      int device_buf = MapBufferSizeToSamples(kDeviceBufMs,
+                                              extended_filter == 1);
+
+      // Normal state. We are currently not in a non-causal state.
+      bool non_causal = false;
+
+      // Run 1 s and replace device buffer size with 500 ms every 100 ms.
+      for (int j = 0; j < 100; j++) {
+        int system_delay_before_calls = WebRtcAec_system_delay(self_->aec);
+        int device_buf_ms = j % 10 == 0 ? 500 : kDeviceBufMs;
+        RenderAndCapture(device_buf_ms);
+
+        // Check for non-causality.
+        if (device_buf - WebRtcAec_system_delay(self_->aec) < PART_LEN) {
+          non_causal = true;
+        }
+        EXPECT_FALSE(non_causal);
+        EXPECT_EQ(system_delay_before_calls,
+                  WebRtcAec_system_delay(self_->aec));
+
+        // Verify that the system delay is non-negative.
+        EXPECT_LE(0, WebRtcAec_system_delay(self_->aec));
+      }
+    }
+  }
+}
+
+TEST_F(SystemDelayTest, CorrectImpactWhenTogglingDeviceBufferValues) {
+  // This test aims at verifying that the system delay is "unaffected" by
+  // toggling values reported by the device.
+  // The test is constructed such that every other device buffer value is zero
+  // and then 2 * |kDeviceBufMs|, hence the size is constant on the average. The
+  // zero values will force us into a non-causal state and thereby lowering the
+  // system delay until we basically run out of data. Once that happens the
+  // buffer will be stuffed.
+  // TODO(bjornv): This test will have a better impact if we verified that the
+  // delay estimate goes up when the system delay goes down to meet the average
+  // device buffer size.
+
+  // This test does not apply if DA-AEC is enabled and extended_filter mode
+  // disabled.
+  for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) {
+    WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
+    EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
+    for (int da_aec = 0; da_aec <= 1; ++da_aec) {
+      WebRtcAec_enable_delay_agnostic(self_->aec, da_aec);
+      EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec));
+      if (extended_filter == 0 && da_aec == 1) {
+        continue;
+      }
+      for (size_t i = 0; i < kNumSampleRates; i++) {
+        Init(kSampleRateHz[i]);
+        RunStableStartup();
+        const int device_buf = MapBufferSizeToSamples(kDeviceBufMs,
+                                                      extended_filter == 1);
+
+        // Normal state. We are currently not in a non-causal state.
+        bool non_causal = false;
+
+        // Loop through 100 frames (both render and capture), which equals 1 s
+        // of data. Every odd frame we set the device buffer size to
+        // 2 * |kDeviceBufMs| and even frames we set the device buffer size to
+        // zero.
+        for (int j = 0; j < 100; j++) {
+          int system_delay_before_calls = WebRtcAec_system_delay(self_->aec);
+          int device_buf_ms = 2 * (j % 2) * kDeviceBufMs;
+          RenderAndCapture(device_buf_ms);
+
+          // Check for non-causality, compared with the average device buffer
+          // size.
+          non_causal |= (device_buf - WebRtcAec_system_delay(self_->aec) < 64);
+          EXPECT_GE(system_delay_before_calls,
+                    WebRtcAec_system_delay(self_->aec));
+
+          // Verify that the system delay is non-negative.
+          EXPECT_LE(0, WebRtcAec_system_delay(self_->aec));
+        }
+        // Verify we are not in a non-causal state.
+        EXPECT_FALSE(non_causal);
+      }
+    }
+  }
+}
+
+}  // namespace
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core.c
new file mode 100644
index 00000000..b801f07a
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core.c
@@ -0,0 +1,1233 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/aecm/aecm_core.h"
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#include "webrtc/common_audio/ring_buffer.h"
+#include "webrtc/common_audio/signal_processing/include/real_fft.h"
+#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h"
+#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h"
+#include "webrtc/system_wrappers/interface/compile_assert_c.h"
+#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
+#include "webrtc/typedefs.h"
+
+#ifdef AEC_DEBUG
+FILE *dfile;
+FILE *testfile;
+#endif
+
+const int16_t WebRtcAecm_kCosTable[] = {
+    8192,  8190,  8187,  8180,  8172,  8160,  8147,  8130,  8112,
+    8091,  8067,  8041,  8012,  7982,  7948,  7912,  7874,  7834,
+    7791,  7745,  7697,  7647,  7595,  7540,  7483,  7424,  7362,
+    7299,  7233,  7164,  7094,  7021,  6947,  6870,  6791,  6710,
+    6627,  6542,  6455,  6366,  6275,  6182,  6087,  5991,  5892,
+    5792,  5690,  5586,  5481,  5374,  5265,  5155,  5043,  4930,
+    4815,  4698,  4580,  4461,  4341,  4219,  4096,  3971,  3845,
+    3719,  3591,  3462,  3331,  3200,  3068,  2935,  2801,  2667,
+    2531,  2395,  2258,  2120,  1981,  1842,  1703,  1563,  1422,
+    1281,  1140,   998,   856,   713,   571,   428,   285,   142,
+       0,  -142,  -285,  -428,  -571,  -713,  -856,  -998, -1140,
+   -1281, -1422, -1563, -1703, -1842, -1981, -2120, -2258, -2395,
+   -2531, -2667, -2801, -2935, -3068, -3200, -3331, -3462, -3591,
+   -3719, -3845, -3971, -4095, -4219, -4341, -4461, -4580, -4698,
+   -4815, -4930, -5043, -5155, -5265, -5374, -5481, -5586, -5690,
+   -5792, -5892, -5991, -6087, -6182, -6275, -6366, -6455, -6542,
+   -6627, -6710, -6791, -6870, -6947, -7021, -7094, -7164, -7233,
+   -7299, -7362, -7424, -7483, -7540, -7595, -7647, -7697, -7745,
+   -7791, -7834, -7874, -7912, -7948, -7982, -8012, -8041, -8067,
+   -8091, -8112, -8130, -8147, -8160, -8172, -8180, -8187, -8190,
+   -8191, -8190, -8187, -8180, -8172, -8160, -8147, -8130, -8112,
+   -8091, -8067, -8041, -8012, -7982, -7948, -7912, -7874, -7834,
+   -7791, -7745, -7697, -7647, -7595, -7540, -7483, -7424, -7362,
+   -7299, -7233, -7164, -7094, -7021, -6947, -6870, -6791, -6710,
+   -6627, -6542, -6455, -6366, -6275, -6182, -6087, -5991, -5892,
+   -5792, -5690, -5586, -5481, -5374, -5265, -5155, -5043, -4930,
+   -4815, -4698, -4580, -4461, -4341, -4219, -4096, -3971, -3845,
+   -3719, -3591, -3462, -3331, -3200, -3068, -2935, -2801, -2667,
+   -2531, -2395, -2258, -2120, -1981, -1842, -1703, -1563, -1422,
+   -1281, -1140,  -998,  -856,  -713,  -571,  -428,  -285,  -142,
+       0,   142,   285,   428,   571,   713,   856,   998,  1140,
+    1281,  1422,  1563,  1703,  1842,  1981,  2120,  2258,  2395,
+    2531,  2667,  2801,  2935,  3068,  3200,  3331,  3462,  3591,
+    3719,  3845,  3971,  4095,  4219,  4341,  4461,  4580,  4698,
+    4815,  4930,  5043,  5155,  5265,  5374,  5481,  5586,  5690,
+    5792,  5892,  5991,  6087,  6182,  6275,  6366,  6455,  6542,
+    6627,  6710,  6791,  6870,  6947,  7021,  7094,  7164,  7233,
+    7299,  7362,  7424,  7483,  7540,  7595,  7647,  7697,  7745,
+    7791,  7834,  7874,  7912,  7948,  7982,  8012,  8041,  8067,
+    8091,  8112,  8130,  8147,  8160,  8172,  8180,  8187,  8190
+};
+
+const int16_t WebRtcAecm_kSinTable[] = {
+       0,    142,    285,    428,    571,    713,    856,    998,
+    1140,   1281,   1422,   1563,   1703,   1842,   1981,   2120,
+    2258,   2395,   2531,   2667,   2801,   2935,   3068,   3200,
+    3331,   3462,   3591,   3719,   3845,   3971,   4095,   4219,
+    4341,   4461,   4580,   4698,   4815,   4930,   5043,   5155,
+    5265,   5374,   5481,   5586,   5690,   5792,   5892,   5991,
+    6087,   6182,   6275,   6366,   6455,   6542,   6627,   6710,
+    6791,   6870,   6947,   7021,   7094,   7164,   7233,   7299,
+    7362,   7424,   7483,   7540,   7595,   7647,   7697,   7745,
+    7791,   7834,   7874,   7912,   7948,   7982,   8012,   8041,
+    8067,   8091,   8112,   8130,   8147,   8160,   8172,   8180,
+    8187,   8190,   8191,   8190,   8187,   8180,   8172,   8160,
+    8147,   8130,   8112,   8091,   8067,   8041,   8012,   7982,
+    7948,   7912,   7874,   7834,   7791,   7745,   7697,   7647,
+    7595,   7540,   7483,   7424,   7362,   7299,   7233,   7164,
+    7094,   7021,   6947,   6870,   6791,   6710,   6627,   6542,
+    6455,   6366,   6275,   6182,   6087,   5991,   5892,   5792,
+    5690,   5586,   5481,   5374,   5265,   5155,   5043,   4930,
+    4815,   4698,   4580,   4461,   4341,   4219,   4096,   3971,
+    3845,   3719,   3591,   3462,   3331,   3200,   3068,   2935,
+    2801,   2667,   2531,   2395,   2258,   2120,   1981,   1842,
+    1703,   1563,   1422,   1281,   1140,    998,    856,    713,
+     571,    428,    285,    142,      0,   -142,   -285,   -428,
+    -571,   -713,   -856,   -998,  -1140,  -1281,  -1422,  -1563,
+   -1703,  -1842,  -1981,  -2120,  -2258,  -2395,  -2531,  -2667,
+   -2801,  -2935,  -3068,  -3200,  -3331,  -3462,  -3591,  -3719,
+   -3845,  -3971,  -4095,  -4219,  -4341,  -4461,  -4580,  -4698,
+   -4815,  -4930,  -5043,  -5155,  -5265,  -5374,  -5481,  -5586,
+   -5690,  -5792,  -5892,  -5991,  -6087,  -6182,  -6275,  -6366,
+   -6455,  -6542,  -6627,  -6710,  -6791,  -6870,  -6947,  -7021,
+   -7094,  -7164,  -7233,  -7299,  -7362,  -7424,  -7483,  -7540,
+   -7595,  -7647,  -7697,  -7745,  -7791,  -7834,  -7874,  -7912,
+   -7948,  -7982,  -8012,  -8041,  -8067,  -8091,  -8112,  -8130,
+   -8147,  -8160,  -8172,  -8180,  -8187,  -8190,  -8191,  -8190,
+   -8187,  -8180,  -8172,  -8160,  -8147,  -8130,  -8112,  -8091,
+   -8067,  -8041,  -8012,  -7982,  -7948,  -7912,  -7874,  -7834,
+   -7791,  -7745,  -7697,  -7647,  -7595,  -7540,  -7483,  -7424,
+   -7362,  -7299,  -7233,  -7164,  -7094,  -7021,  -6947,  -6870,
+   -6791,  -6710,  -6627,  -6542,  -6455,  -6366,  -6275,  -6182,
+   -6087,  -5991,  -5892,  -5792,  -5690,  -5586,  -5481,  -5374,
+   -5265,  -5155,  -5043,  -4930,  -4815,  -4698,  -4580,  -4461,
+   -4341,  -4219,  -4096,  -3971,  -3845,  -3719,  -3591,  -3462,
+   -3331,  -3200,  -3068,  -2935,  -2801,  -2667,  -2531,  -2395,
+   -2258,  -2120,  -1981,  -1842,  -1703,  -1563,  -1422,  -1281,
+   -1140,   -998,   -856,   -713,   -571,   -428,   -285,   -142
+};
+
+// Initialization table for echo channel in 8 kHz
+static const int16_t kChannelStored8kHz[PART_LEN1] = {
+    2040,   1815,   1590,   1498,   1405,   1395,   1385,   1418,
+    1451,   1506,   1562,   1644,   1726,   1804,   1882,   1918,
+    1953,   1982,   2010,   2025,   2040,   2034,   2027,   2021,
+    2014,   1997,   1980,   1925,   1869,   1800,   1732,   1683,
+    1635,   1604,   1572,   1545,   1517,   1481,   1444,   1405,
+    1367,   1331,   1294,   1270,   1245,   1239,   1233,   1247,
+    1260,   1282,   1303,   1338,   1373,   1407,   1441,   1470,
+    1499,   1524,   1549,   1565,   1582,   1601,   1621,   1649,
+    1676
+};
+
+// Initialization table for echo channel in 16 kHz
+static const int16_t kChannelStored16kHz[PART_LEN1] = {
+    2040,   1590,   1405,   1385,   1451,   1562,   1726,   1882,
+    1953,   2010,   2040,   2027,   2014,   1980,   1869,   1732,
+    1635,   1572,   1517,   1444,   1367,   1294,   1245,   1233,
+    1260,   1303,   1373,   1441,   1499,   1549,   1582,   1621,
+    1676,   1741,   1802,   1861,   1921,   1983,   2040,   2102,
+    2170,   2265,   2375,   2515,   2651,   2781,   2922,   3075,
+    3253,   3471,   3738,   3976,   4151,   4258,   4308,   4288,
+    4270,   4253,   4237,   4179,   4086,   3947,   3757,   3484,
+    3153
+};
+
+// Moves the pointer to the next entry and inserts |far_spectrum| and
+// corresponding Q-domain in its buffer.
+//
+// Inputs:
+//      - self          : Pointer to the delay estimation instance
+//      - far_spectrum  : Pointer to the far end spectrum
+//      - far_q         : Q-domain of far end spectrum
+//
+void WebRtcAecm_UpdateFarHistory(AecmCore* self,
+                                 uint16_t* far_spectrum,
+                                 int far_q) {
+  // Get new buffer position
+  self->far_history_pos++;
+  if (self->far_history_pos >= MAX_DELAY) {
+    self->far_history_pos = 0;
+  }
+  // Update Q-domain buffer
+  self->far_q_domains[self->far_history_pos] = far_q;
+  // Update far end spectrum buffer
+  memcpy(&(self->far_history[self->far_history_pos * PART_LEN1]),
+         far_spectrum,
+         sizeof(uint16_t) * PART_LEN1);
+}
+
+// Returns a pointer to the far end spectrum aligned to current near end
+// spectrum. The function WebRtc_DelayEstimatorProcessFix(...) should have been
+// called before AlignedFarend(...). Otherwise, you get the pointer to the
+// previous frame. The memory is only valid until the next call of
+// WebRtc_DelayEstimatorProcessFix(...).
+//
+// Inputs:
+//      - self              : Pointer to the AECM instance.
+//      - delay             : Current delay estimate.
+//
+// Output:
+//      - far_q             : The Q-domain of the aligned far end spectrum
+//
+// Return value:
+//      - far_spectrum      : Pointer to the aligned far end spectrum
+//                            NULL - Error
+//
+const uint16_t* WebRtcAecm_AlignedFarend(AecmCore* self,
+                                         int* far_q,
+                                         int delay) {
+  int buffer_position = 0;
+  assert(self != NULL);
+  buffer_position = self->far_history_pos - delay;
+
+  // Check buffer position
+  if (buffer_position < 0) {
+    buffer_position += MAX_DELAY;
+  }
+  // Get Q-domain
+  *far_q = self->far_q_domains[buffer_position];
+  // Return far end spectrum
+  return &(self->far_history[buffer_position * PART_LEN1]);
+}
+
+// Declare function pointers.
+CalcLinearEnergies WebRtcAecm_CalcLinearEnergies;
+StoreAdaptiveChannel WebRtcAecm_StoreAdaptiveChannel;
+ResetAdaptiveChannel WebRtcAecm_ResetAdaptiveChannel;
+
+AecmCore* WebRtcAecm_CreateCore() {
+    AecmCore* aecm = malloc(sizeof(AecmCore));
+
+    aecm->farFrameBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN,
+                                            sizeof(int16_t));
+    if (!aecm->farFrameBuf)
+    {
+        WebRtcAecm_FreeCore(aecm);
+        return NULL;
+    }
+
+    aecm->nearNoisyFrameBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN,
+                                                  sizeof(int16_t));
+    if (!aecm->nearNoisyFrameBuf)
+    {
+        WebRtcAecm_FreeCore(aecm);
+        return NULL;
+    }
+
+    aecm->nearCleanFrameBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN,
+                                                  sizeof(int16_t));
+    if (!aecm->nearCleanFrameBuf)
+    {
+        WebRtcAecm_FreeCore(aecm);
+        return NULL;
+    }
+
+    aecm->outFrameBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN,
+                                            sizeof(int16_t));
+    if (!aecm->outFrameBuf)
+    {
+        WebRtcAecm_FreeCore(aecm);
+        return NULL;
+    }
+
+    aecm->delay_estimator_farend = WebRtc_CreateDelayEstimatorFarend(PART_LEN1,
+                                                                     MAX_DELAY);
+    if (aecm->delay_estimator_farend == NULL) {
+      WebRtcAecm_FreeCore(aecm);
+      return NULL;
+    }
+    aecm->delay_estimator =
+        WebRtc_CreateDelayEstimator(aecm->delay_estimator_farend, 0);
+    if (aecm->delay_estimator == NULL) {
+      WebRtcAecm_FreeCore(aecm);
+      return NULL;
+    }
+    // TODO(bjornv): Explicitly disable robust delay validation until no
+    // performance regression has been established.  Then remove the line.
+    WebRtc_enable_robust_validation(aecm->delay_estimator, 0);
+
+    aecm->real_fft = WebRtcSpl_CreateRealFFT(PART_LEN_SHIFT);
+    if (aecm->real_fft == NULL) {
+      WebRtcAecm_FreeCore(aecm);
+      return NULL;
+    }
+
+    // Init some aecm pointers. 16 and 32 byte alignment is only necessary
+    // for Neon code currently.
+    aecm->xBuf = (int16_t*) (((uintptr_t)aecm->xBuf_buf + 31) & ~ 31);
+    aecm->dBufClean = (int16_t*) (((uintptr_t)aecm->dBufClean_buf + 31) & ~ 31);
+    aecm->dBufNoisy = (int16_t*) (((uintptr_t)aecm->dBufNoisy_buf + 31) & ~ 31);
+    aecm->outBuf = (int16_t*) (((uintptr_t)aecm->outBuf_buf + 15) & ~ 15);
+    aecm->channelStored = (int16_t*) (((uintptr_t)
+                                             aecm->channelStored_buf + 15) & ~ 15);
+    aecm->channelAdapt16 = (int16_t*) (((uintptr_t)
+                                              aecm->channelAdapt16_buf + 15) & ~ 15);
+    aecm->channelAdapt32 = (int32_t*) (((uintptr_t)
+                                              aecm->channelAdapt32_buf + 31) & ~ 31);
+
+    return aecm;
+}
+
+void WebRtcAecm_InitEchoPathCore(AecmCore* aecm, const int16_t* echo_path) {
+    int i = 0;
+
+    // Reset the stored channel
+    memcpy(aecm->channelStored, echo_path, sizeof(int16_t) * PART_LEN1);
+    // Reset the adapted channels
+    memcpy(aecm->channelAdapt16, echo_path, sizeof(int16_t) * PART_LEN1);
+    for (i = 0; i < PART_LEN1; i++)
+    {
+        aecm->channelAdapt32[i] = (int32_t)aecm->channelAdapt16[i] << 16;
+    }
+
+    // Reset channel storing variables
+    aecm->mseAdaptOld = 1000;
+    aecm->mseStoredOld = 1000;
+    aecm->mseThreshold = WEBRTC_SPL_WORD32_MAX;
+    aecm->mseChannelCount = 0;
+}
+
+static void CalcLinearEnergiesC(AecmCore* aecm,
+                                const uint16_t* far_spectrum,
+                                int32_t* echo_est,
+                                uint32_t* far_energy,
+                                uint32_t* echo_energy_adapt,
+                                uint32_t* echo_energy_stored) {
+    int i;
+
+    // Get energy for the delayed far end signal and estimated
+    // echo using both stored and adapted channels.
+    for (i = 0; i < PART_LEN1; i++)
+    {
+        echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
+                                           far_spectrum[i]);
+        (*far_energy) += (uint32_t)(far_spectrum[i]);
+        *echo_energy_adapt += aecm->channelAdapt16[i] * far_spectrum[i];
+        (*echo_energy_stored) += (uint32_t)echo_est[i];
+    }
+}
+
+static void StoreAdaptiveChannelC(AecmCore* aecm,
+                                  const uint16_t* far_spectrum,
+                                  int32_t* echo_est) {
+    int i;
+
+    // During startup we store the channel every block.
+    memcpy(aecm->channelStored, aecm->channelAdapt16, sizeof(int16_t) * PART_LEN1);
+    // Recalculate echo estimate
+    for (i = 0; i < PART_LEN; i += 4)
+    {
+        echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
+                                           far_spectrum[i]);
+        echo_est[i + 1] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 1],
+                                           far_spectrum[i + 1]);
+        echo_est[i + 2] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 2],
+                                           far_spectrum[i + 2]);
+        echo_est[i + 3] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 3],
+                                           far_spectrum[i + 3]);
+    }
+    echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
+                                       far_spectrum[i]);
+}
+
+static void ResetAdaptiveChannelC(AecmCore* aecm) {
+    int i;
+
+    // The stored channel has a significantly lower MSE than the adaptive one for
+    // two consecutive calculations. Reset the adaptive channel.
+    memcpy(aecm->channelAdapt16, aecm->channelStored,
+           sizeof(int16_t) * PART_LEN1);
+    // Restore the W32 channel
+    for (i = 0; i < PART_LEN; i += 4)
+    {
+        aecm->channelAdapt32[i] = (int32_t)aecm->channelStored[i] << 16;
+        aecm->channelAdapt32[i + 1] = (int32_t)aecm->channelStored[i + 1] << 16;
+        aecm->channelAdapt32[i + 2] = (int32_t)aecm->channelStored[i + 2] << 16;
+        aecm->channelAdapt32[i + 3] = (int32_t)aecm->channelStored[i + 3] << 16;
+    }
+    aecm->channelAdapt32[i] = (int32_t)aecm->channelStored[i] << 16;
+}
+
+// Initialize function pointers for ARM Neon platform.
+#if (defined WEBRTC_DETECT_NEON || defined WEBRTC_HAS_NEON)
+static void WebRtcAecm_InitNeon(void)
+{
+  WebRtcAecm_StoreAdaptiveChannel = WebRtcAecm_StoreAdaptiveChannelNeon;
+  WebRtcAecm_ResetAdaptiveChannel = WebRtcAecm_ResetAdaptiveChannelNeon;
+  WebRtcAecm_CalcLinearEnergies = WebRtcAecm_CalcLinearEnergiesNeon;
+}
+#endif
+
+// Initialize function pointers for MIPS platform.
+#if defined(MIPS32_LE)
+static void WebRtcAecm_InitMips(void)
+{
+#if defined(MIPS_DSP_R1_LE)
+  WebRtcAecm_StoreAdaptiveChannel = WebRtcAecm_StoreAdaptiveChannel_mips;
+  WebRtcAecm_ResetAdaptiveChannel = WebRtcAecm_ResetAdaptiveChannel_mips;
+#endif
+  WebRtcAecm_CalcLinearEnergies = WebRtcAecm_CalcLinearEnergies_mips;
+}
+#endif
+
+// WebRtcAecm_InitCore(...)
+//
+// This function initializes the AECM instant created with WebRtcAecm_CreateCore(...)
+// Input:
+//      - aecm            : Pointer to the Echo Suppression instance
+//      - samplingFreq   : Sampling Frequency
+//
+// Output:
+//      - aecm            : Initialized instance
+//
+// Return value         :  0 - Ok
+//                        -1 - Error
+//
+int WebRtcAecm_InitCore(AecmCore* const aecm, int samplingFreq) {
+    int i = 0;
+    int32_t tmp32 = PART_LEN1 * PART_LEN1;
+    int16_t tmp16 = PART_LEN1;
+
+    if (samplingFreq != 8000 && samplingFreq != 16000)
+    {
+        samplingFreq = 8000;
+        return -1;
+    }
+    // sanity check of sampling frequency
+    aecm->mult = (int16_t)samplingFreq / 8000;
+
+    aecm->farBufWritePos = 0;
+    aecm->farBufReadPos = 0;
+    aecm->knownDelay = 0;
+    aecm->lastKnownDelay = 0;
+
+    WebRtc_InitBuffer(aecm->farFrameBuf);
+    WebRtc_InitBuffer(aecm->nearNoisyFrameBuf);
+    WebRtc_InitBuffer(aecm->nearCleanFrameBuf);
+    WebRtc_InitBuffer(aecm->outFrameBuf);
+
+    memset(aecm->xBuf_buf, 0, sizeof(aecm->xBuf_buf));
+    memset(aecm->dBufClean_buf, 0, sizeof(aecm->dBufClean_buf));
+    memset(aecm->dBufNoisy_buf, 0, sizeof(aecm->dBufNoisy_buf));
+    memset(aecm->outBuf_buf, 0, sizeof(aecm->outBuf_buf));
+
+    aecm->seed = 666;
+    aecm->totCount = 0;
+
+    if (WebRtc_InitDelayEstimatorFarend(aecm->delay_estimator_farend) != 0) {
+      return -1;
+    }
+    if (WebRtc_InitDelayEstimator(aecm->delay_estimator) != 0) {
+      return -1;
+    }
+    // Set far end histories to zero
+    memset(aecm->far_history, 0, sizeof(uint16_t) * PART_LEN1 * MAX_DELAY);
+    memset(aecm->far_q_domains, 0, sizeof(int) * MAX_DELAY);
+    aecm->far_history_pos = MAX_DELAY;
+
+    aecm->nlpFlag = 1;
+    aecm->fixedDelay = -1;
+
+    aecm->dfaCleanQDomain = 0;
+    aecm->dfaCleanQDomainOld = 0;
+    aecm->dfaNoisyQDomain = 0;
+    aecm->dfaNoisyQDomainOld = 0;
+
+    memset(aecm->nearLogEnergy, 0, sizeof(aecm->nearLogEnergy));
+    aecm->farLogEnergy = 0;
+    memset(aecm->echoAdaptLogEnergy, 0, sizeof(aecm->echoAdaptLogEnergy));
+    memset(aecm->echoStoredLogEnergy, 0, sizeof(aecm->echoStoredLogEnergy));
+
+    // Initialize the echo channels with a stored shape.
+    if (samplingFreq == 8000)
+    {
+        WebRtcAecm_InitEchoPathCore(aecm, kChannelStored8kHz);
+    }
+    else
+    {
+        WebRtcAecm_InitEchoPathCore(aecm, kChannelStored16kHz);
+    }
+
+    memset(aecm->echoFilt, 0, sizeof(aecm->echoFilt));
+    memset(aecm->nearFilt, 0, sizeof(aecm->nearFilt));
+    aecm->noiseEstCtr = 0;
+
+    aecm->cngMode = AecmTrue;
+
+    memset(aecm->noiseEstTooLowCtr, 0, sizeof(aecm->noiseEstTooLowCtr));
+    memset(aecm->noiseEstTooHighCtr, 0, sizeof(aecm->noiseEstTooHighCtr));
+    // Shape the initial noise level to an approximate pink noise.
+    for (i = 0; i < (PART_LEN1 >> 1) - 1; i++)
+    {
+        aecm->noiseEst[i] = (tmp32 << 8);
+        tmp16--;
+        tmp32 -= (int32_t)((tmp16 << 1) + 1);
+    }
+    for (; i < PART_LEN1; i++)
+    {
+        aecm->noiseEst[i] = (tmp32 << 8);
+    }
+
+    aecm->farEnergyMin = WEBRTC_SPL_WORD16_MAX;
+    aecm->farEnergyMax = WEBRTC_SPL_WORD16_MIN;
+    aecm->farEnergyMaxMin = 0;
+    aecm->farEnergyVAD = FAR_ENERGY_MIN; // This prevents false speech detection at the
+                                         // beginning.
+    aecm->farEnergyMSE = 0;
+    aecm->currentVADValue = 0;
+    aecm->vadUpdateCount = 0;
+    aecm->firstVAD = 1;
+
+    aecm->startupState = 0;
+    aecm->supGain = SUPGAIN_DEFAULT;
+    aecm->supGainOld = SUPGAIN_DEFAULT;
+
+    aecm->supGainErrParamA = SUPGAIN_ERROR_PARAM_A;
+    aecm->supGainErrParamD = SUPGAIN_ERROR_PARAM_D;
+    aecm->supGainErrParamDiffAB = SUPGAIN_ERROR_PARAM_A - SUPGAIN_ERROR_PARAM_B;
+    aecm->supGainErrParamDiffBD = SUPGAIN_ERROR_PARAM_B - SUPGAIN_ERROR_PARAM_D;
+
+    // Assert a preprocessor definition at compile-time. It's an assumption
+    // used in assembly code, so check the assembly files before any change.
+    COMPILE_ASSERT(PART_LEN % 16 == 0);
+
+    // Initialize function pointers.
+    WebRtcAecm_CalcLinearEnergies = CalcLinearEnergiesC;
+    WebRtcAecm_StoreAdaptiveChannel = StoreAdaptiveChannelC;
+    WebRtcAecm_ResetAdaptiveChannel = ResetAdaptiveChannelC;
+
+#ifdef WEBRTC_DETECT_NEON
+    uint64_t features = WebRtc_GetCPUFeaturesARM();
+    if ((features & kCPUFeatureNEON) != 0)
+    {
+      WebRtcAecm_InitNeon();
+    }
+#elif defined(WEBRTC_HAS_NEON)
+    WebRtcAecm_InitNeon();
+#endif
+
+#if defined(MIPS32_LE)
+    WebRtcAecm_InitMips();
+#endif
+    return 0;
+}
+
+// TODO(bjornv): This function is currently not used. Add support for these
+// parameters from a higher level
+int WebRtcAecm_Control(AecmCore* aecm, int delay, int nlpFlag) {
+    aecm->nlpFlag = nlpFlag;
+    aecm->fixedDelay = delay;
+
+    return 0;
+}
+
+void WebRtcAecm_FreeCore(AecmCore* aecm) {
+    if (aecm == NULL) {
+      return;
+    }
+
+    WebRtc_FreeBuffer(aecm->farFrameBuf);
+    WebRtc_FreeBuffer(aecm->nearNoisyFrameBuf);
+    WebRtc_FreeBuffer(aecm->nearCleanFrameBuf);
+    WebRtc_FreeBuffer(aecm->outFrameBuf);
+
+    WebRtc_FreeDelayEstimator(aecm->delay_estimator);
+    WebRtc_FreeDelayEstimatorFarend(aecm->delay_estimator_farend);
+    WebRtcSpl_FreeRealFFT(aecm->real_fft);
+
+    free(aecm);
+}
+
+int WebRtcAecm_ProcessFrame(AecmCore* aecm,
+                            const int16_t* farend,
+                            const int16_t* nearendNoisy,
+                            const int16_t* nearendClean,
+                            int16_t* out) {
+    int16_t outBlock_buf[PART_LEN + 8]; // Align buffer to 8-byte boundary.
+    int16_t* outBlock = (int16_t*) (((uintptr_t) outBlock_buf + 15) & ~ 15);
+
+    int16_t farFrame[FRAME_LEN];
+    const int16_t* out_ptr = NULL;
+    int size = 0;
+
+    // Buffer the current frame.
+    // Fetch an older one corresponding to the delay.
+    WebRtcAecm_BufferFarFrame(aecm, farend, FRAME_LEN);
+    WebRtcAecm_FetchFarFrame(aecm, farFrame, FRAME_LEN, aecm->knownDelay);
+
+    // Buffer the synchronized far and near frames,
+    // to pass the smaller blocks individually.
+    WebRtc_WriteBuffer(aecm->farFrameBuf, farFrame, FRAME_LEN);
+    WebRtc_WriteBuffer(aecm->nearNoisyFrameBuf, nearendNoisy, FRAME_LEN);
+    if (nearendClean != NULL)
+    {
+        WebRtc_WriteBuffer(aecm->nearCleanFrameBuf, nearendClean, FRAME_LEN);
+    }
+
+    // Process as many blocks as possible.
+    while (WebRtc_available_read(aecm->farFrameBuf) >= PART_LEN)
+    {
+        int16_t far_block[PART_LEN];
+        const int16_t* far_block_ptr = NULL;
+        int16_t near_noisy_block[PART_LEN];
+        const int16_t* near_noisy_block_ptr = NULL;
+
+        WebRtc_ReadBuffer(aecm->farFrameBuf, (void**) &far_block_ptr, far_block,
+                          PART_LEN);
+        WebRtc_ReadBuffer(aecm->nearNoisyFrameBuf,
+                          (void**) &near_noisy_block_ptr,
+                          near_noisy_block,
+                          PART_LEN);
+        if (nearendClean != NULL)
+        {
+            int16_t near_clean_block[PART_LEN];
+            const int16_t* near_clean_block_ptr = NULL;
+
+            WebRtc_ReadBuffer(aecm->nearCleanFrameBuf,
+                              (void**) &near_clean_block_ptr,
+                              near_clean_block,
+                              PART_LEN);
+            if (WebRtcAecm_ProcessBlock(aecm,
+                                        far_block_ptr,
+                                        near_noisy_block_ptr,
+                                        near_clean_block_ptr,
+                                        outBlock) == -1)
+            {
+                return -1;
+            }
+        } else
+        {
+            if (WebRtcAecm_ProcessBlock(aecm,
+                                        far_block_ptr,
+                                        near_noisy_block_ptr,
+                                        NULL,
+                                        outBlock) == -1)
+            {
+                return -1;
+            }
+        }
+
+        WebRtc_WriteBuffer(aecm->outFrameBuf, outBlock, PART_LEN);
+    }
+
+    // Stuff the out buffer if we have less than a frame to output.
+    // This should only happen for the first frame.
+    size = (int) WebRtc_available_read(aecm->outFrameBuf);
+    if (size < FRAME_LEN)
+    {
+        WebRtc_MoveReadPtr(aecm->outFrameBuf, size - FRAME_LEN);
+    }
+
+    // Obtain an output frame.
+    WebRtc_ReadBuffer(aecm->outFrameBuf, (void**) &out_ptr, out, FRAME_LEN);
+    if (out_ptr != out) {
+      // ReadBuffer() hasn't copied to |out| in this case.
+      memcpy(out, out_ptr, FRAME_LEN * sizeof(int16_t));
+    }
+
+    return 0;
+}
+
+// WebRtcAecm_AsymFilt(...)
+//
+// Performs asymmetric filtering.
+//
+// Inputs:
+//      - filtOld       : Previous filtered value.
+//      - inVal         : New input value.
+//      - stepSizePos   : Step size when we have a positive contribution.
+//      - stepSizeNeg   : Step size when we have a negative contribution.
+//
+// Output:
+//
+// Return: - Filtered value.
+//
+int16_t WebRtcAecm_AsymFilt(const int16_t filtOld, const int16_t inVal,
+                            const int16_t stepSizePos,
+                            const int16_t stepSizeNeg)
+{
+    int16_t retVal;
+
+    if ((filtOld == WEBRTC_SPL_WORD16_MAX) | (filtOld == WEBRTC_SPL_WORD16_MIN))
+    {
+        return inVal;
+    }
+    retVal = filtOld;
+    if (filtOld > inVal)
+    {
+        retVal -= (filtOld - inVal) >> stepSizeNeg;
+    } else
+    {
+        retVal += (inVal - filtOld) >> stepSizePos;
+    }
+
+    return retVal;
+}
+
+// ExtractFractionPart(a, zeros)
+//
+// returns the fraction part of |a|, with |zeros| number of leading zeros, as an
+// int16_t scaled to Q8. There is no sanity check of |a| in the sense that the
+// number of zeros match.
+static int16_t ExtractFractionPart(uint32_t a, int zeros) {
+  return (int16_t)(((a << zeros) & 0x7FFFFFFF) >> 23);
+}
+
+// Calculates and returns the log of |energy| in Q8. The input |energy| is
+// supposed to be in Q(|q_domain|).
+static int16_t LogOfEnergyInQ8(uint32_t energy, int q_domain) {
+  static const int16_t kLogLowValue = PART_LEN_SHIFT << 7;
+  int16_t log_energy_q8 = kLogLowValue;
+  if (energy > 0) {
+    int zeros = WebRtcSpl_NormU32(energy);
+    int16_t frac = ExtractFractionPart(energy, zeros);
+    // log2 of |energy| in Q8.
+    log_energy_q8 += ((31 - zeros) << 8) + frac - (q_domain << 8);
+  }
+  return log_energy_q8;
+}
+
+// WebRtcAecm_CalcEnergies(...)
+//
+// This function calculates the log of energies for nearend, farend and estimated
+// echoes. There is also an update of energy decision levels, i.e. internal VAD.
+//
+//
+// @param  aecm         [i/o]   Handle of the AECM instance.
+// @param  far_spectrum [in]    Pointer to farend spectrum.
+// @param  far_q        [in]    Q-domain of farend spectrum.
+// @param  nearEner     [in]    Near end energy for current block in
+//                              Q(aecm->dfaQDomain).
+// @param  echoEst      [out]   Estimated echo in Q(xfa_q+RESOLUTION_CHANNEL16).
+//
+void WebRtcAecm_CalcEnergies(AecmCore* aecm,
+                             const uint16_t* far_spectrum,
+                             const int16_t far_q,
+                             const uint32_t nearEner,
+                             int32_t* echoEst) {
+    // Local variables
+    uint32_t tmpAdapt = 0;
+    uint32_t tmpStored = 0;
+    uint32_t tmpFar = 0;
+
+    int i;
+
+    int16_t tmp16;
+    int16_t increase_max_shifts = 4;
+    int16_t decrease_max_shifts = 11;
+    int16_t increase_min_shifts = 11;
+    int16_t decrease_min_shifts = 3;
+
+    // Get log of near end energy and store in buffer
+
+    // Shift buffer
+    memmove(aecm->nearLogEnergy + 1, aecm->nearLogEnergy,
+            sizeof(int16_t) * (MAX_BUF_LEN - 1));
+
+    // Logarithm of integrated magnitude spectrum (nearEner)
+    aecm->nearLogEnergy[0] = LogOfEnergyInQ8(nearEner, aecm->dfaNoisyQDomain);
+
+    WebRtcAecm_CalcLinearEnergies(aecm, far_spectrum, echoEst, &tmpFar, &tmpAdapt, &tmpStored);
+
+    // Shift buffers
+    memmove(aecm->echoAdaptLogEnergy + 1, aecm->echoAdaptLogEnergy,
+            sizeof(int16_t) * (MAX_BUF_LEN - 1));
+    memmove(aecm->echoStoredLogEnergy + 1, aecm->echoStoredLogEnergy,
+            sizeof(int16_t) * (MAX_BUF_LEN - 1));
+
+    // Logarithm of delayed far end energy
+    aecm->farLogEnergy = LogOfEnergyInQ8(tmpFar, far_q);
+
+    // Logarithm of estimated echo energy through adapted channel
+    aecm->echoAdaptLogEnergy[0] = LogOfEnergyInQ8(tmpAdapt,
+                                                  RESOLUTION_CHANNEL16 + far_q);
+
+    // Logarithm of estimated echo energy through stored channel
+    aecm->echoStoredLogEnergy[0] =
+        LogOfEnergyInQ8(tmpStored, RESOLUTION_CHANNEL16 + far_q);
+
+    // Update farend energy levels (min, max, vad, mse)
+    if (aecm->farLogEnergy > FAR_ENERGY_MIN)
+    {
+        if (aecm->startupState == 0)
+        {
+            increase_max_shifts = 2;
+            decrease_min_shifts = 2;
+            increase_min_shifts = 8;
+        }
+
+        aecm->farEnergyMin = WebRtcAecm_AsymFilt(aecm->farEnergyMin, aecm->farLogEnergy,
+                                                 increase_min_shifts, decrease_min_shifts);
+        aecm->farEnergyMax = WebRtcAecm_AsymFilt(aecm->farEnergyMax, aecm->farLogEnergy,
+                                                 increase_max_shifts, decrease_max_shifts);
+        aecm->farEnergyMaxMin = (aecm->farEnergyMax - aecm->farEnergyMin);
+
+        // Dynamic VAD region size
+        tmp16 = 2560 - aecm->farEnergyMin;
+        if (tmp16 > 0)
+        {
+          tmp16 = (int16_t)((tmp16 * FAR_ENERGY_VAD_REGION) >> 9);
+        } else
+        {
+            tmp16 = 0;
+        }
+        tmp16 += FAR_ENERGY_VAD_REGION;
+
+        if ((aecm->startupState == 0) | (aecm->vadUpdateCount > 1024))
+        {
+            // In startup phase or VAD update halted
+            aecm->farEnergyVAD = aecm->farEnergyMin + tmp16;
+        } else
+        {
+            if (aecm->farEnergyVAD > aecm->farLogEnergy)
+            {
+                aecm->farEnergyVAD +=
+                    (aecm->farLogEnergy + tmp16 - aecm->farEnergyVAD) >> 6;
+                aecm->vadUpdateCount = 0;
+            } else
+            {
+                aecm->vadUpdateCount++;
+            }
+        }
+        // Put MSE threshold higher than VAD
+        aecm->farEnergyMSE = aecm->farEnergyVAD + (1 << 8);
+    }
+
+    // Update VAD variables
+    if (aecm->farLogEnergy > aecm->farEnergyVAD)
+    {
+        if ((aecm->startupState == 0) | (aecm->farEnergyMaxMin > FAR_ENERGY_DIFF))
+        {
+            // We are in startup or have significant dynamics in input speech level
+            aecm->currentVADValue = 1;
+        }
+    } else
+    {
+        aecm->currentVADValue = 0;
+    }
+    if ((aecm->currentVADValue) && (aecm->firstVAD))
+    {
+        aecm->firstVAD = 0;
+        if (aecm->echoAdaptLogEnergy[0] > aecm->nearLogEnergy[0])
+        {
+            // The estimated echo has higher energy than the near end signal.
+            // This means that the initialization was too aggressive. Scale
+            // down by a factor 8
+            for (i = 0; i < PART_LEN1; i++)
+            {
+                aecm->channelAdapt16[i] >>= 3;
+            }
+            // Compensate the adapted echo energy level accordingly.
+            aecm->echoAdaptLogEnergy[0] -= (3 << 8);
+            aecm->firstVAD = 1;
+        }
+    }
+}
+
+// WebRtcAecm_CalcStepSize(...)
+//
+// This function calculates the step size used in channel estimation
+//
+//
+// @param  aecm  [in]    Handle of the AECM instance.
+// @param  mu    [out]   (Return value) Stepsize in log2(), i.e. number of shifts.
+//
+//
+int16_t WebRtcAecm_CalcStepSize(AecmCore* const aecm) {
+    int32_t tmp32;
+    int16_t tmp16;
+    int16_t mu = MU_MAX;
+
+    // Here we calculate the step size mu used in the
+    // following NLMS based Channel estimation algorithm
+    if (!aecm->currentVADValue)
+    {
+        // Far end energy level too low, no channel update
+        mu = 0;
+    } else if (aecm->startupState > 0)
+    {
+        if (aecm->farEnergyMin >= aecm->farEnergyMax)
+        {
+            mu = MU_MIN;
+        } else
+        {
+            tmp16 = (aecm->farLogEnergy - aecm->farEnergyMin);
+            tmp32 = tmp16 * MU_DIFF;
+            tmp32 = WebRtcSpl_DivW32W16(tmp32, aecm->farEnergyMaxMin);
+            mu = MU_MIN - 1 - (int16_t)(tmp32);
+            // The -1 is an alternative to rounding. This way we get a larger
+            // stepsize, so we in some sense compensate for truncation in NLMS
+        }
+        if (mu < MU_MAX)
+        {
+            mu = MU_MAX; // Equivalent with maximum step size of 2^-MU_MAX
+        }
+    }
+
+    return mu;
+}
+
+// WebRtcAecm_UpdateChannel(...)
+//
+// This function performs channel estimation. NLMS and decision on channel storage.
+//
+//
+// @param  aecm         [i/o]   Handle of the AECM instance.
+// @param  far_spectrum [in]    Absolute value of the farend signal in Q(far_q)
+// @param  far_q        [in]    Q-domain of the farend signal
+// @param  dfa          [in]    Absolute value of the nearend signal (Q[aecm->dfaQDomain])
+// @param  mu           [in]    NLMS step size.
+// @param  echoEst      [i/o]   Estimated echo in Q(far_q+RESOLUTION_CHANNEL16).
+//
+void WebRtcAecm_UpdateChannel(AecmCore* aecm,
+                              const uint16_t* far_spectrum,
+                              const int16_t far_q,
+                              const uint16_t* const dfa,
+                              const int16_t mu,
+                              int32_t* echoEst) {
+    uint32_t tmpU32no1, tmpU32no2;
+    int32_t tmp32no1, tmp32no2;
+    int32_t mseStored;
+    int32_t mseAdapt;
+
+    int i;
+
+    int16_t zerosFar, zerosNum, zerosCh, zerosDfa;
+    int16_t shiftChFar, shiftNum, shift2ResChan;
+    int16_t tmp16no1;
+    int16_t xfaQ, dfaQ;
+
+    // This is the channel estimation algorithm. It is base on NLMS but has a variable step
+    // length, which was calculated above.
+    if (mu)
+    {
+        for (i = 0; i < PART_LEN1; i++)
+        {
+            // Determine norm of channel and farend to make sure we don't get overflow in
+            // multiplication
+            zerosCh = WebRtcSpl_NormU32(aecm->channelAdapt32[i]);
+            zerosFar = WebRtcSpl_NormU32((uint32_t)far_spectrum[i]);
+            if (zerosCh + zerosFar > 31)
+            {
+                // Multiplication is safe
+                tmpU32no1 = WEBRTC_SPL_UMUL_32_16(aecm->channelAdapt32[i],
+                        far_spectrum[i]);
+                shiftChFar = 0;
+            } else
+            {
+                // We need to shift down before multiplication
+                shiftChFar = 32 - zerosCh - zerosFar;
+                tmpU32no1 = (aecm->channelAdapt32[i] >> shiftChFar) *
+                    far_spectrum[i];
+            }
+            // Determine Q-domain of numerator
+            zerosNum = WebRtcSpl_NormU32(tmpU32no1);
+            if (dfa[i])
+            {
+                zerosDfa = WebRtcSpl_NormU32((uint32_t)dfa[i]);
+            } else
+            {
+                zerosDfa = 32;
+            }
+            tmp16no1 = zerosDfa - 2 + aecm->dfaNoisyQDomain -
+                RESOLUTION_CHANNEL32 - far_q + shiftChFar;
+            if (zerosNum > tmp16no1 + 1)
+            {
+                xfaQ = tmp16no1;
+                dfaQ = zerosDfa - 2;
+            } else
+            {
+                xfaQ = zerosNum - 2;
+                dfaQ = RESOLUTION_CHANNEL32 + far_q - aecm->dfaNoisyQDomain -
+                    shiftChFar + xfaQ;
+            }
+            // Add in the same Q-domain
+            tmpU32no1 = WEBRTC_SPL_SHIFT_W32(tmpU32no1, xfaQ);
+            tmpU32no2 = WEBRTC_SPL_SHIFT_W32((uint32_t)dfa[i], dfaQ);
+            tmp32no1 = (int32_t)tmpU32no2 - (int32_t)tmpU32no1;
+            zerosNum = WebRtcSpl_NormW32(tmp32no1);
+            if ((tmp32no1) && (far_spectrum[i] > (CHANNEL_VAD << far_q)))
+            {
+                //
+                // Update is needed
+                //
+                // This is what we would like to compute
+                //
+                // tmp32no1 = dfa[i] - (aecm->channelAdapt[i] * far_spectrum[i])
+                // tmp32norm = (i + 1)
+                // aecm->channelAdapt[i] += (2^mu) * tmp32no1
+                //                        / (tmp32norm * far_spectrum[i])
+                //
+
+                // Make sure we don't get overflow in multiplication.
+                if (zerosNum + zerosFar > 31)
+                {
+                    if (tmp32no1 > 0)
+                    {
+                        tmp32no2 = (int32_t)WEBRTC_SPL_UMUL_32_16(tmp32no1,
+                                                                        far_spectrum[i]);
+                    } else
+                    {
+                        tmp32no2 = -(int32_t)WEBRTC_SPL_UMUL_32_16(-tmp32no1,
+                                                                         far_spectrum[i]);
+                    }
+                    shiftNum = 0;
+                } else
+                {
+                    shiftNum = 32 - (zerosNum + zerosFar);
+                    if (tmp32no1 > 0)
+                    {
+                        tmp32no2 = (tmp32no1 >> shiftNum) * far_spectrum[i];
+                    } else
+                    {
+                        tmp32no2 = -((-tmp32no1 >> shiftNum) * far_spectrum[i]);
+                    }
+                }
+                // Normalize with respect to frequency bin
+                tmp32no2 = WebRtcSpl_DivW32W16(tmp32no2, i + 1);
+                // Make sure we are in the right Q-domain
+                shift2ResChan = shiftNum + shiftChFar - xfaQ - mu - ((30 - zerosFar) << 1);
+                if (WebRtcSpl_NormW32(tmp32no2) < shift2ResChan)
+                {
+                    tmp32no2 = WEBRTC_SPL_WORD32_MAX;
+                } else
+                {
+                    tmp32no2 = WEBRTC_SPL_SHIFT_W32(tmp32no2, shift2ResChan);
+                }
+                aecm->channelAdapt32[i] =
+                    WebRtcSpl_AddSatW32(aecm->channelAdapt32[i], tmp32no2);
+                if (aecm->channelAdapt32[i] < 0)
+                {
+                    // We can never have negative channel gain
+                    aecm->channelAdapt32[i] = 0;
+                }
+                aecm->channelAdapt16[i] =
+                    (int16_t)(aecm->channelAdapt32[i] >> 16);
+            }
+        }
+    }
+    // END: Adaptive channel update
+
+    // Determine if we should store or restore the channel
+    if ((aecm->startupState == 0) & (aecm->currentVADValue))
+    {
+        // During startup we store the channel every block,
+        // and we recalculate echo estimate
+        WebRtcAecm_StoreAdaptiveChannel(aecm, far_spectrum, echoEst);
+    } else
+    {
+        if (aecm->farLogEnergy < aecm->farEnergyMSE)
+        {
+            aecm->mseChannelCount = 0;
+        } else
+        {
+            aecm->mseChannelCount++;
+        }
+        // Enough data for validation. Store channel if we can.
+        if (aecm->mseChannelCount >= (MIN_MSE_COUNT + 10))
+        {
+            // We have enough data.
+            // Calculate MSE of "Adapt" and "Stored" versions.
+            // It is actually not MSE, but average absolute error.
+            mseStored = 0;
+            mseAdapt = 0;
+            for (i = 0; i < MIN_MSE_COUNT; i++)
+            {
+                tmp32no1 = ((int32_t)aecm->echoStoredLogEnergy[i]
+                        - (int32_t)aecm->nearLogEnergy[i]);
+                tmp32no2 = WEBRTC_SPL_ABS_W32(tmp32no1);
+                mseStored += tmp32no2;
+
+                tmp32no1 = ((int32_t)aecm->echoAdaptLogEnergy[i]
+                        - (int32_t)aecm->nearLogEnergy[i]);
+                tmp32no2 = WEBRTC_SPL_ABS_W32(tmp32no1);
+                mseAdapt += tmp32no2;
+            }
+            if (((mseStored << MSE_RESOLUTION) < (MIN_MSE_DIFF * mseAdapt))
+                    & ((aecm->mseStoredOld << MSE_RESOLUTION) < (MIN_MSE_DIFF
+                            * aecm->mseAdaptOld)))
+            {
+                // The stored channel has a significantly lower MSE than the adaptive one for
+                // two consecutive calculations. Reset the adaptive channel.
+                WebRtcAecm_ResetAdaptiveChannel(aecm);
+            } else if (((MIN_MSE_DIFF * mseStored) > (mseAdapt << MSE_RESOLUTION)) & (mseAdapt
+                    < aecm->mseThreshold) & (aecm->mseAdaptOld < aecm->mseThreshold))
+            {
+                // The adaptive channel has a significantly lower MSE than the stored one.
+                // The MSE for the adaptive channel has also been low for two consecutive
+                // calculations. Store the adaptive channel.
+                WebRtcAecm_StoreAdaptiveChannel(aecm, far_spectrum, echoEst);
+
+                // Update threshold
+                if (aecm->mseThreshold == WEBRTC_SPL_WORD32_MAX)
+                {
+                    aecm->mseThreshold = (mseAdapt + aecm->mseAdaptOld);
+                } else
+                {
+                  int scaled_threshold = aecm->mseThreshold * 5 / 8;
+                  aecm->mseThreshold +=
+                      ((mseAdapt - scaled_threshold) * 205) >> 8;
+                }
+
+            }
+
+            // Reset counter
+            aecm->mseChannelCount = 0;
+
+            // Store the MSE values.
+            aecm->mseStoredOld = mseStored;
+            aecm->mseAdaptOld = mseAdapt;
+        }
+    }
+    // END: Determine if we should store or reset channel estimate.
+}
+
+// CalcSuppressionGain(...)
+//
+// This function calculates the suppression gain that is used in the Wiener filter.
+//
+//
+// @param  aecm     [i/n]   Handle of the AECM instance.
+// @param  supGain  [out]   (Return value) Suppression gain with which to scale the noise
+//                          level (Q14).
+//
+//
+int16_t WebRtcAecm_CalcSuppressionGain(AecmCore* const aecm) {
+    int32_t tmp32no1;
+
+    int16_t supGain = SUPGAIN_DEFAULT;
+    int16_t tmp16no1;
+    int16_t dE = 0;
+
+    // Determine suppression gain used in the Wiener filter. The gain is based on a mix of far
+    // end energy and echo estimation error.
+    // Adjust for the far end signal level. A low signal level indicates no far end signal,
+    // hence we set the suppression gain to 0
+    if (!aecm->currentVADValue)
+    {
+        supGain = 0;
+    } else
+    {
+        // Adjust for possible double talk. If we have large variations in estimation error we
+        // likely have double talk (or poor channel).
+        tmp16no1 = (aecm->nearLogEnergy[0] - aecm->echoStoredLogEnergy[0] - ENERGY_DEV_OFFSET);
+        dE = WEBRTC_SPL_ABS_W16(tmp16no1);
+
+        if (dE < ENERGY_DEV_TOL)
+        {
+            // Likely no double talk. The better estimation, the more we can suppress signal.
+            // Update counters
+            if (dE < SUPGAIN_EPC_DT)
+            {
+                tmp32no1 = aecm->supGainErrParamDiffAB * dE;
+                tmp32no1 += (SUPGAIN_EPC_DT >> 1);
+                tmp16no1 = (int16_t)WebRtcSpl_DivW32W16(tmp32no1, SUPGAIN_EPC_DT);
+                supGain = aecm->supGainErrParamA - tmp16no1;
+            } else
+            {
+                tmp32no1 = aecm->supGainErrParamDiffBD * (ENERGY_DEV_TOL - dE);
+                tmp32no1 += ((ENERGY_DEV_TOL - SUPGAIN_EPC_DT) >> 1);
+                tmp16no1 = (int16_t)WebRtcSpl_DivW32W16(tmp32no1, (ENERGY_DEV_TOL
+                        - SUPGAIN_EPC_DT));
+                supGain = aecm->supGainErrParamD + tmp16no1;
+            }
+        } else
+        {
+            // Likely in double talk. Use default value
+            supGain = aecm->supGainErrParamD;
+        }
+    }
+
+    if (supGain > aecm->supGainOld)
+    {
+        tmp16no1 = supGain;
+    } else
+    {
+        tmp16no1 = aecm->supGainOld;
+    }
+    aecm->supGainOld = supGain;
+    if (tmp16no1 < aecm->supGain)
+    {
+        aecm->supGain += (int16_t)((tmp16no1 - aecm->supGain) >> 4);
+    } else
+    {
+        aecm->supGain += (int16_t)((tmp16no1 - aecm->supGain) >> 4);
+    }
+
+    // END: Update suppression gain
+
+    return aecm->supGain;
+}
+
+void WebRtcAecm_BufferFarFrame(AecmCore* const aecm,
+                               const int16_t* const farend,
+                               const int farLen) {
+    int writeLen = farLen, writePos = 0;
+
+    // Check if the write position must be wrapped
+    while (aecm->farBufWritePos + writeLen > FAR_BUF_LEN)
+    {
+        // Write to remaining buffer space before wrapping
+        writeLen = FAR_BUF_LEN - aecm->farBufWritePos;
+        memcpy(aecm->farBuf + aecm->farBufWritePos, farend + writePos,
+               sizeof(int16_t) * writeLen);
+        aecm->farBufWritePos = 0;
+        writePos = writeLen;
+        writeLen = farLen - writeLen;
+    }
+
+    memcpy(aecm->farBuf + aecm->farBufWritePos, farend + writePos,
+           sizeof(int16_t) * writeLen);
+    aecm->farBufWritePos += writeLen;
+}
+
+void WebRtcAecm_FetchFarFrame(AecmCore* const aecm,
+                              int16_t* const farend,
+                              const int farLen,
+                              const int knownDelay) {
+    int readLen = farLen;
+    int readPos = 0;
+    int delayChange = knownDelay - aecm->lastKnownDelay;
+
+    aecm->farBufReadPos -= delayChange;
+
+    // Check if delay forces a read position wrap
+    while (aecm->farBufReadPos < 0)
+    {
+        aecm->farBufReadPos += FAR_BUF_LEN;
+    }
+    while (aecm->farBufReadPos > FAR_BUF_LEN - 1)
+    {
+        aecm->farBufReadPos -= FAR_BUF_LEN;
+    }
+
+    aecm->lastKnownDelay = knownDelay;
+
+    // Check if read position must be wrapped
+    while (aecm->farBufReadPos + readLen > FAR_BUF_LEN)
+    {
+
+        // Read from remaining buffer space before wrapping
+        readLen = FAR_BUF_LEN - aecm->farBufReadPos;
+        memcpy(farend + readPos, aecm->farBuf + aecm->farBufReadPos,
+               sizeof(int16_t) * readLen);
+        aecm->farBufReadPos = 0;
+        readPos = readLen;
+        readLen = farLen - readLen;
+    }
+    memcpy(farend + readPos, aecm->farBuf + aecm->farBufReadPos,
+           sizeof(int16_t) * readLen);
+    aecm->farBufReadPos += readLen;
+}
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core.h b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core.h
new file mode 100644
index 00000000..b52bb62d
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core.h
@@ -0,0 +1,434 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+// Performs echo control (suppression) with fft routines in fixed-point.
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_AECM_CORE_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_AECM_CORE_H_
+
+#include "webrtc/common_audio/ring_buffer.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/aecm/aecm_defines.h"
+#include "webrtc/typedefs.h"
+
+#ifdef _MSC_VER  // visual c++
+#define ALIGN8_BEG __declspec(align(8))
+#define ALIGN8_END
+#else  // gcc or icc
+#define ALIGN8_BEG
+#define ALIGN8_END __attribute__((aligned(8)))
+#endif
+
+typedef struct {
+    int16_t real;
+    int16_t imag;
+} ComplexInt16;
+
+typedef struct {
+    int farBufWritePos;
+    int farBufReadPos;
+    int knownDelay;
+    int lastKnownDelay;
+    int firstVAD;  // Parameter to control poorly initialized channels
+
+    RingBuffer* farFrameBuf;
+    RingBuffer* nearNoisyFrameBuf;
+    RingBuffer* nearCleanFrameBuf;
+    RingBuffer* outFrameBuf;
+
+    int16_t farBuf[FAR_BUF_LEN];
+
+    int16_t mult;
+    uint32_t seed;
+
+    // Delay estimation variables
+    void* delay_estimator_farend;
+    void* delay_estimator;
+    uint16_t currentDelay;
+    // Far end history variables
+    // TODO(bjornv): Replace |far_history| with ring_buffer.
+    uint16_t far_history[PART_LEN1 * MAX_DELAY];
+    int far_history_pos;
+    int far_q_domains[MAX_DELAY];
+
+    int16_t nlpFlag;
+    int16_t fixedDelay;
+
+    uint32_t totCount;
+
+    int16_t dfaCleanQDomain;
+    int16_t dfaCleanQDomainOld;
+    int16_t dfaNoisyQDomain;
+    int16_t dfaNoisyQDomainOld;
+
+    int16_t nearLogEnergy[MAX_BUF_LEN];
+    int16_t farLogEnergy;
+    int16_t echoAdaptLogEnergy[MAX_BUF_LEN];
+    int16_t echoStoredLogEnergy[MAX_BUF_LEN];
+
+    // The extra 16 or 32 bytes in the following buffers are for alignment based
+    // Neon code.
+    // It's designed this way since the current GCC compiler can't align a
+    // buffer in 16 or 32 byte boundaries properly.
+    int16_t channelStored_buf[PART_LEN1 + 8];
+    int16_t channelAdapt16_buf[PART_LEN1 + 8];
+    int32_t channelAdapt32_buf[PART_LEN1 + 8];
+    int16_t xBuf_buf[PART_LEN2 + 16];  // farend
+    int16_t dBufClean_buf[PART_LEN2 + 16];  // nearend
+    int16_t dBufNoisy_buf[PART_LEN2 + 16];  // nearend
+    int16_t outBuf_buf[PART_LEN + 8];
+
+    // Pointers to the above buffers
+    int16_t *channelStored;
+    int16_t *channelAdapt16;
+    int32_t *channelAdapt32;
+    int16_t *xBuf;
+    int16_t *dBufClean;
+    int16_t *dBufNoisy;
+    int16_t *outBuf;
+
+    int32_t echoFilt[PART_LEN1];
+    int16_t nearFilt[PART_LEN1];
+    int32_t noiseEst[PART_LEN1];
+    int           noiseEstTooLowCtr[PART_LEN1];
+    int           noiseEstTooHighCtr[PART_LEN1];
+    int16_t noiseEstCtr;
+    int16_t cngMode;
+
+    int32_t mseAdaptOld;
+    int32_t mseStoredOld;
+    int32_t mseThreshold;
+
+    int16_t farEnergyMin;
+    int16_t farEnergyMax;
+    int16_t farEnergyMaxMin;
+    int16_t farEnergyVAD;
+    int16_t farEnergyMSE;
+    int currentVADValue;
+    int16_t vadUpdateCount;
+
+    int16_t startupState;
+    int16_t mseChannelCount;
+    int16_t supGain;
+    int16_t supGainOld;
+
+    int16_t supGainErrParamA;
+    int16_t supGainErrParamD;
+    int16_t supGainErrParamDiffAB;
+    int16_t supGainErrParamDiffBD;
+
+    struct RealFFT* real_fft;
+
+#ifdef AEC_DEBUG
+    FILE *farFile;
+    FILE *nearFile;
+    FILE *outFile;
+#endif
+} AecmCore;
+
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_CreateCore()
+//
+// Allocates the memory needed by the AECM. The memory needs to be
+// initialized separately using the WebRtcAecm_InitCore() function.
+// Returns a pointer to the instance and a nullptr at failure.
+AecmCore* WebRtcAecm_CreateCore();
+
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_InitCore(...)
+//
+// This function initializes the AECM instant created with
+// WebRtcAecm_CreateCore()
+// Input:
+//      - aecm          : Pointer to the AECM instance
+//      - samplingFreq  : Sampling Frequency
+//
+// Output:
+//      - aecm          : Initialized instance
+//
+// Return value         :  0 - Ok
+//                        -1 - Error
+//
+int WebRtcAecm_InitCore(AecmCore* const aecm, int samplingFreq);
+
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_FreeCore(...)
+//
+// This function releases the memory allocated by WebRtcAecm_CreateCore()
+// Input:
+//      - aecm          : Pointer to the AECM instance
+//
+void WebRtcAecm_FreeCore(AecmCore* aecm);
+
+int WebRtcAecm_Control(AecmCore* aecm, int delay, int nlpFlag);
+
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_InitEchoPathCore(...)
+//
+// This function resets the echo channel adaptation with the specified channel.
+// Input:
+//      - aecm          : Pointer to the AECM instance
+//      - echo_path     : Pointer to the data that should initialize the echo
+//                        path
+//
+// Output:
+//      - aecm          : Initialized instance
+//
+void WebRtcAecm_InitEchoPathCore(AecmCore* aecm, const int16_t* echo_path);
+
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_ProcessFrame(...)
+//
+// This function processes frames and sends blocks to
+// WebRtcAecm_ProcessBlock(...)
+//
+// Inputs:
+//      - aecm          : Pointer to the AECM instance
+//      - farend        : In buffer containing one frame of echo signal
+//      - nearendNoisy  : In buffer containing one frame of nearend+echo signal
+//                        without NS
+//      - nearendClean  : In buffer containing one frame of nearend+echo signal
+//                        with NS
+//
+// Output:
+//      - out           : Out buffer, one frame of nearend signal          :
+//
+//
+int WebRtcAecm_ProcessFrame(AecmCore* aecm,
+                            const int16_t* farend,
+                            const int16_t* nearendNoisy,
+                            const int16_t* nearendClean,
+                            int16_t* out);
+
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_ProcessBlock(...)
+//
+// This function is called for every block within one frame
+// This function is called by WebRtcAecm_ProcessFrame(...)
+//
+// Inputs:
+//      - aecm          : Pointer to the AECM instance
+//      - farend        : In buffer containing one block of echo signal
+//      - nearendNoisy  : In buffer containing one frame of nearend+echo signal
+//                        without NS
+//      - nearendClean  : In buffer containing one frame of nearend+echo signal
+//                        with NS
+//
+// Output:
+//      - out           : Out buffer, one block of nearend signal          :
+//
+//
+int WebRtcAecm_ProcessBlock(AecmCore* aecm,
+                            const int16_t* farend,
+                            const int16_t* nearendNoisy,
+                            const int16_t* noisyClean,
+                            int16_t* out);
+
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_BufferFarFrame()
+//
+// Inserts a frame of data into farend buffer.
+//
+// Inputs:
+//      - aecm          : Pointer to the AECM instance
+//      - farend        : In buffer containing one frame of farend signal
+//      - farLen        : Length of frame
+//
+void WebRtcAecm_BufferFarFrame(AecmCore* const aecm,
+                               const int16_t* const farend,
+                               const int farLen);
+
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_FetchFarFrame()
+//
+// Read the farend buffer to account for known delay
+//
+// Inputs:
+//      - aecm          : Pointer to the AECM instance
+//      - farend        : In buffer containing one frame of farend signal
+//      - farLen        : Length of frame
+//      - knownDelay    : known delay
+//
+void WebRtcAecm_FetchFarFrame(AecmCore* const aecm,
+                              int16_t* const farend,
+                              const int farLen,
+                              const int knownDelay);
+
+// All the functions below are intended to be private
+
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_UpdateFarHistory()
+//
+// Moves the pointer to the next entry and inserts |far_spectrum| and
+// corresponding Q-domain in its buffer.
+//
+// Inputs:
+//      - self          : Pointer to the delay estimation instance
+//      - far_spectrum  : Pointer to the far end spectrum
+//      - far_q         : Q-domain of far end spectrum
+//
+void WebRtcAecm_UpdateFarHistory(AecmCore* self,
+                                 uint16_t* far_spectrum,
+                                 int far_q);
+
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_AlignedFarend()
+//
+// Returns a pointer to the far end spectrum aligned to current near end
+// spectrum. The function WebRtc_DelayEstimatorProcessFix(...) should have been
+// called before AlignedFarend(...). Otherwise, you get the pointer to the
+// previous frame. The memory is only valid until the next call of
+// WebRtc_DelayEstimatorProcessFix(...).
+//
+// Inputs:
+//      - self              : Pointer to the AECM instance.
+//      - delay             : Current delay estimate.
+//
+// Output:
+//      - far_q             : The Q-domain of the aligned far end spectrum
+//
+// Return value:
+//      - far_spectrum      : Pointer to the aligned far end spectrum
+//                            NULL - Error
+//
+const uint16_t* WebRtcAecm_AlignedFarend(AecmCore* self, int* far_q, int delay);
+
+///////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_CalcSuppressionGain()
+//
+// This function calculates the suppression gain that is used in the
+// Wiener filter.
+//
+// Inputs:
+//      - aecm              : Pointer to the AECM instance.
+//
+// Return value:
+//      - supGain           : Suppression gain with which to scale the noise
+//                            level (Q14).
+//
+int16_t WebRtcAecm_CalcSuppressionGain(AecmCore* const aecm);
+
+///////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_CalcEnergies()
+//
+// This function calculates the log of energies for nearend, farend and
+// estimated echoes. There is also an update of energy decision levels,
+// i.e. internal VAD.
+//
+// Inputs:
+//      - aecm              : Pointer to the AECM instance.
+//      - far_spectrum      : Pointer to farend spectrum.
+//      - far_q             : Q-domain of farend spectrum.
+//      - nearEner          : Near end energy for current block in
+//                            Q(aecm->dfaQDomain).
+//
+// Output:
+//     - echoEst            : Estimated echo in Q(xfa_q+RESOLUTION_CHANNEL16).
+//
+void WebRtcAecm_CalcEnergies(AecmCore* aecm,
+                             const uint16_t* far_spectrum,
+                             const int16_t far_q,
+                             const uint32_t nearEner,
+                             int32_t* echoEst);
+
+///////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_CalcStepSize()
+//
+// This function calculates the step size used in channel estimation
+//
+// Inputs:
+//      - aecm              : Pointer to the AECM instance.
+//
+// Return value:
+//      - mu                : Stepsize in log2(), i.e. number of shifts.
+//
+int16_t WebRtcAecm_CalcStepSize(AecmCore* const aecm);
+
+///////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_UpdateChannel(...)
+//
+// This function performs channel estimation.
+// NLMS and decision on channel storage.
+//
+// Inputs:
+//      - aecm              : Pointer to the AECM instance.
+//      - far_spectrum      : Absolute value of the farend signal in Q(far_q)
+//      - far_q             : Q-domain of the farend signal
+//      - dfa               : Absolute value of the nearend signal
+//                            (Q[aecm->dfaQDomain])
+//      - mu                : NLMS step size.
+// Input/Output:
+//      - echoEst           : Estimated echo in Q(far_q+RESOLUTION_CHANNEL16).
+//
+void WebRtcAecm_UpdateChannel(AecmCore* aecm,
+                              const uint16_t* far_spectrum,
+                              const int16_t far_q,
+                              const uint16_t* const dfa,
+                              const int16_t mu,
+                              int32_t* echoEst);
+
+extern const int16_t WebRtcAecm_kCosTable[];
+extern const int16_t WebRtcAecm_kSinTable[];
+
+///////////////////////////////////////////////////////////////////////////////
+// Some function pointers, for internal functions shared by ARM NEON and
+// generic C code.
+//
+typedef void (*CalcLinearEnergies)(AecmCore* aecm,
+                                   const uint16_t* far_spectrum,
+                                   int32_t* echoEst,
+                                   uint32_t* far_energy,
+                                   uint32_t* echo_energy_adapt,
+                                   uint32_t* echo_energy_stored);
+extern CalcLinearEnergies WebRtcAecm_CalcLinearEnergies;
+
+typedef void (*StoreAdaptiveChannel)(AecmCore* aecm,
+                                     const uint16_t* far_spectrum,
+                                     int32_t* echo_est);
+extern StoreAdaptiveChannel WebRtcAecm_StoreAdaptiveChannel;
+
+typedef void (*ResetAdaptiveChannel)(AecmCore* aecm);
+extern ResetAdaptiveChannel WebRtcAecm_ResetAdaptiveChannel;
+
+// For the above function pointers, functions for generic platforms are declared
+// and defined as static in file aecm_core.c, while those for ARM Neon platforms
+// are declared below and defined in file aecm_core_neon.c.
+#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON)
+void WebRtcAecm_CalcLinearEnergiesNeon(AecmCore* aecm,
+                                       const uint16_t* far_spectrum,
+                                       int32_t* echo_est,
+                                       uint32_t* far_energy,
+                                       uint32_t* echo_energy_adapt,
+                                       uint32_t* echo_energy_stored);
+
+void WebRtcAecm_StoreAdaptiveChannelNeon(AecmCore* aecm,
+                                         const uint16_t* far_spectrum,
+                                         int32_t* echo_est);
+
+void WebRtcAecm_ResetAdaptiveChannelNeon(AecmCore* aecm);
+#endif
+
+#if defined(MIPS32_LE)
+void WebRtcAecm_CalcLinearEnergies_mips(AecmCore* aecm,
+                                        const uint16_t* far_spectrum,
+                                        int32_t* echo_est,
+                                        uint32_t* far_energy,
+                                        uint32_t* echo_energy_adapt,
+                                        uint32_t* echo_energy_stored);
+#if defined(MIPS_DSP_R1_LE)
+void WebRtcAecm_StoreAdaptiveChannel_mips(AecmCore* aecm,
+                                          const uint16_t* far_spectrum,
+                                          int32_t* echo_est);
+
+void WebRtcAecm_ResetAdaptiveChannel_mips(AecmCore* aecm);
+#endif
+#endif
+
+#endif
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_c.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_c.c
new file mode 100644
index 00000000..eb2bd918
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_c.c
@@ -0,0 +1,771 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/aecm/aecm_core.h"
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#include "webrtc/common_audio/ring_buffer.h"
+#include "webrtc/common_audio/signal_processing/include/real_fft.h"
+#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h"
+#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h"
+#include "webrtc/system_wrappers/interface/compile_assert_c.h"
+#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
+#include "webrtc/typedefs.h"
+
+// Square root of Hanning window in Q14.
+#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON)
+// Table is defined in an ARM assembly file.
+extern const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END;
+#else
+static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = {
+  0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172,
+  3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224,
+  6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040,
+  9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514,
+  11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553,
+  13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079,
+  15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034,
+  16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384
+};
+#endif
+
+#ifdef AECM_WITH_ABS_APPROX
+//Q15 alpha = 0.99439986968132  const Factor for magnitude approximation
+static const uint16_t kAlpha1 = 32584;
+//Q15 beta = 0.12967166976970   const Factor for magnitude approximation
+static const uint16_t kBeta1 = 4249;
+//Q15 alpha = 0.94234827210087  const Factor for magnitude approximation
+static const uint16_t kAlpha2 = 30879;
+//Q15 beta = 0.33787806009150   const Factor for magnitude approximation
+static const uint16_t kBeta2 = 11072;
+//Q15 alpha = 0.82247698684306  const Factor for magnitude approximation
+static const uint16_t kAlpha3 = 26951;
+//Q15 beta = 0.57762063060713   const Factor for magnitude approximation
+static const uint16_t kBeta3 = 18927;
+#endif
+
+static const int16_t kNoiseEstQDomain = 15;
+static const int16_t kNoiseEstIncCount = 5;
+
+static void ComfortNoise(AecmCore* aecm,
+                         const uint16_t* dfa,
+                         ComplexInt16* out,
+                         const int16_t* lambda);
+
+static void WindowAndFFT(AecmCore* aecm,
+                         int16_t* fft,
+                         const int16_t* time_signal,
+                         ComplexInt16* freq_signal,
+                         int time_signal_scaling) {
+  int i = 0;
+
+  // FFT of signal
+  for (i = 0; i < PART_LEN; i++) {
+    // Window time domain signal and insert into real part of
+    // transformation array |fft|
+    int16_t scaled_time_signal = time_signal[i] << time_signal_scaling;
+    fft[i] = (int16_t)((scaled_time_signal * WebRtcAecm_kSqrtHanning[i]) >> 14);
+    scaled_time_signal = time_signal[i + PART_LEN] << time_signal_scaling;
+    fft[PART_LEN + i] = (int16_t)((
+        scaled_time_signal * WebRtcAecm_kSqrtHanning[PART_LEN - i]) >> 14);
+  }
+
+  // Do forward FFT, then take only the first PART_LEN complex samples,
+  // and change signs of the imaginary parts.
+  WebRtcSpl_RealForwardFFT(aecm->real_fft, fft, (int16_t*)freq_signal);
+  for (i = 0; i < PART_LEN; i++) {
+    freq_signal[i].imag = -freq_signal[i].imag;
+  }
+}
+
+static void InverseFFTAndWindow(AecmCore* aecm,
+                                int16_t* fft,
+                                ComplexInt16* efw,
+                                int16_t* output,
+                                const int16_t* nearendClean) {
+  int i, j, outCFFT;
+  int32_t tmp32no1;
+  // Reuse |efw| for the inverse FFT output after transferring
+  // the contents to |fft|.
+  int16_t* ifft_out = (int16_t*)efw;
+
+  // Synthesis
+  for (i = 1, j = 2; i < PART_LEN; i += 1, j += 2) {
+    fft[j] = efw[i].real;
+    fft[j + 1] = -efw[i].imag;
+  }
+  fft[0] = efw[0].real;
+  fft[1] = -efw[0].imag;
+
+  fft[PART_LEN2] = efw[PART_LEN].real;
+  fft[PART_LEN2 + 1] = -efw[PART_LEN].imag;
+
+  // Inverse FFT. Keep outCFFT to scale the samples in the next block.
+  outCFFT = WebRtcSpl_RealInverseFFT(aecm->real_fft, fft, ifft_out);
+  for (i = 0; i < PART_LEN; i++) {
+    ifft_out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
+                    ifft_out[i], WebRtcAecm_kSqrtHanning[i], 14);
+    tmp32no1 = WEBRTC_SPL_SHIFT_W32((int32_t)ifft_out[i],
+                                     outCFFT - aecm->dfaCleanQDomain);
+    output[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
+                                        tmp32no1 + aecm->outBuf[i],
+                                        WEBRTC_SPL_WORD16_MIN);
+
+    tmp32no1 = (ifft_out[PART_LEN + i] *
+        WebRtcAecm_kSqrtHanning[PART_LEN - i]) >> 14;
+    tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1,
+                                    outCFFT - aecm->dfaCleanQDomain);
+    aecm->outBuf[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
+                                                tmp32no1,
+                                                WEBRTC_SPL_WORD16_MIN);
+  }
+
+  // Copy the current block to the old position
+  // (aecm->outBuf is shifted elsewhere)
+  memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(int16_t) * PART_LEN);
+  memcpy(aecm->dBufNoisy,
+         aecm->dBufNoisy + PART_LEN,
+         sizeof(int16_t) * PART_LEN);
+  if (nearendClean != NULL)
+  {
+    memcpy(aecm->dBufClean,
+           aecm->dBufClean + PART_LEN,
+           sizeof(int16_t) * PART_LEN);
+  }
+}
+
+// Transforms a time domain signal into the frequency domain, outputting the
+// complex valued signal, absolute value and sum of absolute values.
+//
+// time_signal          [in]    Pointer to time domain signal
+// freq_signal_real     [out]   Pointer to real part of frequency domain array
+// freq_signal_imag     [out]   Pointer to imaginary part of frequency domain
+//                              array
+// freq_signal_abs      [out]   Pointer to absolute value of frequency domain
+//                              array
+// freq_signal_sum_abs  [out]   Pointer to the sum of all absolute values in
+//                              the frequency domain array
+// return value                 The Q-domain of current frequency values
+//
+static int TimeToFrequencyDomain(AecmCore* aecm,
+                                 const int16_t* time_signal,
+                                 ComplexInt16* freq_signal,
+                                 uint16_t* freq_signal_abs,
+                                 uint32_t* freq_signal_sum_abs) {
+  int i = 0;
+  int time_signal_scaling = 0;
+
+  int32_t tmp32no1 = 0;
+  int32_t tmp32no2 = 0;
+
+  // In fft_buf, +16 for 32-byte alignment.
+  int16_t fft_buf[PART_LEN4 + 16];
+  int16_t *fft = (int16_t *) (((uintptr_t) fft_buf + 31) & ~31);
+
+  int16_t tmp16no1;
+#ifndef WEBRTC_ARCH_ARM_V7
+  int16_t tmp16no2;
+#endif
+#ifdef AECM_WITH_ABS_APPROX
+  int16_t max_value = 0;
+  int16_t min_value = 0;
+  uint16_t alpha = 0;
+  uint16_t beta = 0;
+#endif
+
+#ifdef AECM_DYNAMIC_Q
+  tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2);
+  time_signal_scaling = WebRtcSpl_NormW16(tmp16no1);
+#endif
+
+  WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling);
+
+  // Extract imaginary and real part, calculate the magnitude for
+  // all frequency bins
+  freq_signal[0].imag = 0;
+  freq_signal[PART_LEN].imag = 0;
+  freq_signal_abs[0] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[0].real);
+  freq_signal_abs[PART_LEN] = (uint16_t)WEBRTC_SPL_ABS_W16(
+                                freq_signal[PART_LEN].real);
+  (*freq_signal_sum_abs) = (uint32_t)(freq_signal_abs[0]) +
+                           (uint32_t)(freq_signal_abs[PART_LEN]);
+
+  for (i = 1; i < PART_LEN; i++)
+  {
+    if (freq_signal[i].real == 0)
+    {
+      freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
+    }
+    else if (freq_signal[i].imag == 0)
+    {
+      freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].real);
+    }
+    else
+    {
+      // Approximation for magnitude of complex fft output
+      // magn = sqrt(real^2 + imag^2)
+      // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|)
+      //
+      // The parameters alpha and beta are stored in Q15
+
+#ifdef AECM_WITH_ABS_APPROX
+      tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real);
+      tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
+
+      if(tmp16no1 > tmp16no2)
+      {
+        max_value = tmp16no1;
+        min_value = tmp16no2;
+      } else
+      {
+        max_value = tmp16no2;
+        min_value = tmp16no1;
+      }
+
+      // Magnitude in Q(-6)
+      if ((max_value >> 2) > min_value)
+      {
+        alpha = kAlpha1;
+        beta = kBeta1;
+      } else if ((max_value >> 1) > min_value)
+      {
+        alpha = kAlpha2;
+        beta = kBeta2;
+      } else
+      {
+        alpha = kAlpha3;
+        beta = kBeta3;
+      }
+      tmp16no1 = (int16_t)((max_value * alpha) >> 15);
+      tmp16no2 = (int16_t)((min_value * beta) >> 15);
+      freq_signal_abs[i] = (uint16_t)tmp16no1 + (uint16_t)tmp16no2;
+#else
+#ifdef WEBRTC_ARCH_ARM_V7
+      __asm __volatile(
+        "smulbb %[tmp32no1], %[real], %[real]\n\t"
+        "smlabb %[tmp32no2], %[imag], %[imag], %[tmp32no1]\n\t"
+        :[tmp32no1]"+&r"(tmp32no1),
+         [tmp32no2]"=r"(tmp32no2)
+        :[real]"r"(freq_signal[i].real),
+         [imag]"r"(freq_signal[i].imag)
+      );
+#else
+      tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real);
+      tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
+      tmp32no1 = tmp16no1 * tmp16no1;
+      tmp32no2 = tmp16no2 * tmp16no2;
+      tmp32no2 = WebRtcSpl_AddSatW32(tmp32no1, tmp32no2);
+#endif // WEBRTC_ARCH_ARM_V7
+      tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2);
+
+      freq_signal_abs[i] = (uint16_t)tmp32no1;
+#endif // AECM_WITH_ABS_APPROX
+    }
+    (*freq_signal_sum_abs) += (uint32_t)freq_signal_abs[i];
+  }
+
+  return time_signal_scaling;
+}
+
+int WebRtcAecm_ProcessBlock(AecmCore* aecm,
+                            const int16_t* farend,
+                            const int16_t* nearendNoisy,
+                            const int16_t* nearendClean,
+                            int16_t* output) {
+  int i;
+
+  uint32_t xfaSum;
+  uint32_t dfaNoisySum;
+  uint32_t dfaCleanSum;
+  uint32_t echoEst32Gained;
+  uint32_t tmpU32;
+
+  int32_t tmp32no1;
+
+  uint16_t xfa[PART_LEN1];
+  uint16_t dfaNoisy[PART_LEN1];
+  uint16_t dfaClean[PART_LEN1];
+  uint16_t* ptrDfaClean = dfaClean;
+  const uint16_t* far_spectrum_ptr = NULL;
+
+  // 32 byte aligned buffers (with +8 or +16).
+  // TODO(kma): define fft with ComplexInt16.
+  int16_t fft_buf[PART_LEN4 + 2 + 16]; // +2 to make a loop safe.
+  int32_t echoEst32_buf[PART_LEN1 + 8];
+  int32_t dfw_buf[PART_LEN2 + 8];
+  int32_t efw_buf[PART_LEN2 + 8];
+
+  int16_t* fft = (int16_t*) (((uintptr_t) fft_buf + 31) & ~ 31);
+  int32_t* echoEst32 = (int32_t*) (((uintptr_t) echoEst32_buf + 31) & ~ 31);
+  ComplexInt16* dfw = (ComplexInt16*)(((uintptr_t)dfw_buf + 31) & ~31);
+  ComplexInt16* efw = (ComplexInt16*)(((uintptr_t)efw_buf + 31) & ~31);
+
+  int16_t hnl[PART_LEN1];
+  int16_t numPosCoef = 0;
+  int16_t nlpGain = ONE_Q14;
+  int delay;
+  int16_t tmp16no1;
+  int16_t tmp16no2;
+  int16_t mu;
+  int16_t supGain;
+  int16_t zeros32, zeros16;
+  int16_t zerosDBufNoisy, zerosDBufClean, zerosXBuf;
+  int far_q;
+  int16_t resolutionDiff, qDomainDiff, dfa_clean_q_domain_diff;
+
+  const int kMinPrefBand = 4;
+  const int kMaxPrefBand = 24;
+  int32_t avgHnl32 = 0;
+
+  // Determine startup state. There are three states:
+  // (0) the first CONV_LEN blocks
+  // (1) another CONV_LEN blocks
+  // (2) the rest
+
+  if (aecm->startupState < 2)
+  {
+    aecm->startupState = (aecm->totCount >= CONV_LEN) +
+                         (aecm->totCount >= CONV_LEN2);
+  }
+  // END: Determine startup state
+
+  // Buffer near and far end signals
+  memcpy(aecm->xBuf + PART_LEN, farend, sizeof(int16_t) * PART_LEN);
+  memcpy(aecm->dBufNoisy + PART_LEN, nearendNoisy, sizeof(int16_t) * PART_LEN);
+  if (nearendClean != NULL)
+  {
+    memcpy(aecm->dBufClean + PART_LEN,
+           nearendClean,
+           sizeof(int16_t) * PART_LEN);
+  }
+
+  // Transform far end signal from time domain to frequency domain.
+  far_q = TimeToFrequencyDomain(aecm,
+                                aecm->xBuf,
+                                dfw,
+                                xfa,
+                                &xfaSum);
+
+  // Transform noisy near end signal from time domain to frequency domain.
+  zerosDBufNoisy = TimeToFrequencyDomain(aecm,
+                                         aecm->dBufNoisy,
+                                         dfw,
+                                         dfaNoisy,
+                                         &dfaNoisySum);
+  aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain;
+  aecm->dfaNoisyQDomain = (int16_t)zerosDBufNoisy;
+
+
+  if (nearendClean == NULL)
+  {
+    ptrDfaClean = dfaNoisy;
+    aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld;
+    aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain;
+    dfaCleanSum = dfaNoisySum;
+  } else
+  {
+    // Transform clean near end signal from time domain to frequency domain.
+    zerosDBufClean = TimeToFrequencyDomain(aecm,
+                                           aecm->dBufClean,
+                                           dfw,
+                                           dfaClean,
+                                           &dfaCleanSum);
+    aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain;
+    aecm->dfaCleanQDomain = (int16_t)zerosDBufClean;
+  }
+
+  // Get the delay
+  // Save far-end history and estimate delay
+  WebRtcAecm_UpdateFarHistory(aecm, xfa, far_q);
+  if (WebRtc_AddFarSpectrumFix(aecm->delay_estimator_farend,
+                               xfa,
+                               PART_LEN1,
+                               far_q) == -1) {
+    return -1;
+  }
+  delay = WebRtc_DelayEstimatorProcessFix(aecm->delay_estimator,
+                                          dfaNoisy,
+                                          PART_LEN1,
+                                          zerosDBufNoisy);
+  if (delay == -1)
+  {
+    return -1;
+  }
+  else if (delay == -2)
+  {
+    // If the delay is unknown, we assume zero.
+    // NOTE: this will have to be adjusted if we ever add lookahead.
+    delay = 0;
+  }
+
+  if (aecm->fixedDelay >= 0)
+  {
+    // Use fixed delay
+    delay = aecm->fixedDelay;
+  }
+
+  // Get aligned far end spectrum
+  far_spectrum_ptr = WebRtcAecm_AlignedFarend(aecm, &far_q, delay);
+  zerosXBuf = (int16_t) far_q;
+  if (far_spectrum_ptr == NULL)
+  {
+    return -1;
+  }
+
+  // Calculate log(energy) and update energy threshold levels
+  WebRtcAecm_CalcEnergies(aecm,
+                          far_spectrum_ptr,
+                          zerosXBuf,
+                          dfaNoisySum,
+                          echoEst32);
+
+  // Calculate stepsize
+  mu = WebRtcAecm_CalcStepSize(aecm);
+
+  // Update counters
+  aecm->totCount++;
+
+  // This is the channel estimation algorithm.
+  // It is base on NLMS but has a variable step length,
+  // which was calculated above.
+  WebRtcAecm_UpdateChannel(aecm,
+                           far_spectrum_ptr,
+                           zerosXBuf,
+                           dfaNoisy,
+                           mu,
+                           echoEst32);
+  supGain = WebRtcAecm_CalcSuppressionGain(aecm);
+
+
+  // Calculate Wiener filter hnl[]
+  for (i = 0; i < PART_LEN1; i++)
+  {
+    // Far end signal through channel estimate in Q8
+    // How much can we shift right to preserve resolution
+    tmp32no1 = echoEst32[i] - aecm->echoFilt[i];
+    aecm->echoFilt[i] += (tmp32no1 * 50) >> 8;
+
+    zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1;
+    zeros16 = WebRtcSpl_NormW16(supGain) + 1;
+    if (zeros32 + zeros16 > 16)
+    {
+      // Multiplication is safe
+      // Result in
+      // Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+
+      //   aecm->xfaQDomainBuf[diff])
+      echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i],
+                                              (uint16_t)supGain);
+      resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN;
+      resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);
+    } else
+    {
+      tmp16no1 = 17 - zeros32 - zeros16;
+      resolutionDiff = 14 + tmp16no1 - RESOLUTION_CHANNEL16 -
+                       RESOLUTION_SUPGAIN;
+      resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);
+      if (zeros32 > tmp16no1)
+      {
+        echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i],
+                                                supGain >> tmp16no1);
+      } else
+      {
+        // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16)
+        echoEst32Gained = (aecm->echoFilt[i] >> tmp16no1) * supGain;
+      }
+    }
+
+    zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]);
+    assert(zeros16 >= 0);  // |zeros16| is a norm, hence non-negative.
+    dfa_clean_q_domain_diff = aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld;
+    if (zeros16 < dfa_clean_q_domain_diff && aecm->nearFilt[i]) {
+      tmp16no1 = aecm->nearFilt[i] << zeros16;
+      qDomainDiff = zeros16 - dfa_clean_q_domain_diff;
+      tmp16no2 = ptrDfaClean[i] >> -qDomainDiff;
+    } else {
+      tmp16no1 = dfa_clean_q_domain_diff < 0
+          ? aecm->nearFilt[i] >> -dfa_clean_q_domain_diff
+          : aecm->nearFilt[i] << dfa_clean_q_domain_diff;
+      qDomainDiff = 0;
+      tmp16no2 = ptrDfaClean[i];
+    }
+    tmp32no1 = (int32_t)(tmp16no2 - tmp16no1);
+    tmp16no2 = (int16_t)(tmp32no1 >> 4);
+    tmp16no2 += tmp16no1;
+    zeros16 = WebRtcSpl_NormW16(tmp16no2);
+    if ((tmp16no2) & (-qDomainDiff > zeros16)) {
+      aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX;
+    } else {
+      aecm->nearFilt[i] = qDomainDiff < 0 ? tmp16no2 << -qDomainDiff
+                                          : tmp16no2 >> qDomainDiff;
+    }
+
+    // Wiener filter coefficients, resulting hnl in Q14
+    if (echoEst32Gained == 0)
+    {
+      hnl[i] = ONE_Q14;
+    } else if (aecm->nearFilt[i] == 0)
+    {
+      hnl[i] = 0;
+    } else
+    {
+      // Multiply the suppression gain
+      // Rounding
+      echoEst32Gained += (uint32_t)(aecm->nearFilt[i] >> 1);
+      tmpU32 = WebRtcSpl_DivU32U16(echoEst32Gained,
+                                   (uint16_t)aecm->nearFilt[i]);
+
+      // Current resolution is
+      // Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN- max(0,17-zeros16- zeros32))
+      // Make sure we are in Q14
+      tmp32no1 = (int32_t)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff);
+      if (tmp32no1 > ONE_Q14)
+      {
+        hnl[i] = 0;
+      } else if (tmp32no1 < 0)
+      {
+        hnl[i] = ONE_Q14;
+      } else
+      {
+        // 1-echoEst/dfa
+        hnl[i] = ONE_Q14 - (int16_t)tmp32no1;
+        if (hnl[i] < 0)
+        {
+          hnl[i] = 0;
+        }
+      }
+    }
+    if (hnl[i])
+    {
+      numPosCoef++;
+    }
+  }
+  // Only in wideband. Prevent the gain in upper band from being larger than
+  // in lower band.
+  if (aecm->mult == 2)
+  {
+    // TODO(bjornv): Investigate if the scaling of hnl[i] below can cause
+    //               speech distortion in double-talk.
+    for (i = 0; i < PART_LEN1; i++)
+    {
+      hnl[i] = (int16_t)((hnl[i] * hnl[i]) >> 14);
+    }
+
+    for (i = kMinPrefBand; i <= kMaxPrefBand; i++)
+    {
+      avgHnl32 += (int32_t)hnl[i];
+    }
+    assert(kMaxPrefBand - kMinPrefBand + 1 > 0);
+    avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1);
+
+    for (i = kMaxPrefBand; i < PART_LEN1; i++)
+    {
+      if (hnl[i] > (int16_t)avgHnl32)
+      {
+        hnl[i] = (int16_t)avgHnl32;
+      }
+    }
+  }
+
+  // Calculate NLP gain, result is in Q14
+  if (aecm->nlpFlag)
+  {
+    for (i = 0; i < PART_LEN1; i++)
+    {
+      // Truncate values close to zero and one.
+      if (hnl[i] > NLP_COMP_HIGH)
+      {
+        hnl[i] = ONE_Q14;
+      } else if (hnl[i] < NLP_COMP_LOW)
+      {
+        hnl[i] = 0;
+      }
+
+      // Remove outliers
+      if (numPosCoef < 3)
+      {
+        nlpGain = 0;
+      } else
+      {
+        nlpGain = ONE_Q14;
+      }
+
+      // NLP
+      if ((hnl[i] == ONE_Q14) && (nlpGain == ONE_Q14))
+      {
+        hnl[i] = ONE_Q14;
+      } else
+      {
+        hnl[i] = (int16_t)((hnl[i] * nlpGain) >> 14);
+      }
+
+      // multiply with Wiener coefficients
+      efw[i].real = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real,
+                                                                   hnl[i], 14));
+      efw[i].imag = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag,
+                                                                   hnl[i], 14));
+    }
+  }
+  else
+  {
+    // multiply with Wiener coefficients
+    for (i = 0; i < PART_LEN1; i++)
+    {
+      efw[i].real = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real,
+                                                                   hnl[i], 14));
+      efw[i].imag = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag,
+                                                                   hnl[i], 14));
+    }
+  }
+
+  if (aecm->cngMode == AecmTrue)
+  {
+    ComfortNoise(aecm, ptrDfaClean, efw, hnl);
+  }
+
+  InverseFFTAndWindow(aecm, fft, efw, output, nearendClean);
+
+  return 0;
+}
+
+static void ComfortNoise(AecmCore* aecm,
+                         const uint16_t* dfa,
+                         ComplexInt16* out,
+                         const int16_t* lambda) {
+  int16_t i;
+  int16_t tmp16;
+  int32_t tmp32;
+
+  int16_t randW16[PART_LEN];
+  int16_t uReal[PART_LEN1];
+  int16_t uImag[PART_LEN1];
+  int32_t outLShift32;
+  int16_t noiseRShift16[PART_LEN1];
+
+  int16_t shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain;
+  int16_t minTrackShift;
+
+  assert(shiftFromNearToNoise >= 0);
+  assert(shiftFromNearToNoise < 16);
+
+  if (aecm->noiseEstCtr < 100)
+  {
+    // Track the minimum more quickly initially.
+    aecm->noiseEstCtr++;
+    minTrackShift = 6;
+  } else
+  {
+    minTrackShift = 9;
+  }
+
+  // Estimate noise power.
+  for (i = 0; i < PART_LEN1; i++)
+  {
+    // Shift to the noise domain.
+    tmp32 = (int32_t)dfa[i];
+    outLShift32 = tmp32 << shiftFromNearToNoise;
+
+    if (outLShift32 < aecm->noiseEst[i])
+    {
+      // Reset "too low" counter
+      aecm->noiseEstTooLowCtr[i] = 0;
+      // Track the minimum.
+      if (aecm->noiseEst[i] < (1 << minTrackShift))
+      {
+        // For small values, decrease noiseEst[i] every
+        // |kNoiseEstIncCount| block. The regular approach below can not
+        // go further down due to truncation.
+        aecm->noiseEstTooHighCtr[i]++;
+        if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount)
+        {
+          aecm->noiseEst[i]--;
+          aecm->noiseEstTooHighCtr[i] = 0; // Reset the counter
+        }
+      }
+      else
+      {
+        aecm->noiseEst[i] -= ((aecm->noiseEst[i] - outLShift32)
+                              >> minTrackShift);
+      }
+    } else
+    {
+      // Reset "too high" counter
+      aecm->noiseEstTooHighCtr[i] = 0;
+      // Ramp slowly upwards until we hit the minimum again.
+      if ((aecm->noiseEst[i] >> 19) > 0)
+      {
+        // Avoid overflow.
+        // Multiplication with 2049 will cause wrap around. Scale
+        // down first and then multiply
+        aecm->noiseEst[i] >>= 11;
+        aecm->noiseEst[i] *= 2049;
+      }
+      else if ((aecm->noiseEst[i] >> 11) > 0)
+      {
+        // Large enough for relative increase
+        aecm->noiseEst[i] *= 2049;
+        aecm->noiseEst[i] >>= 11;
+      }
+      else
+      {
+        // Make incremental increases based on size every
+        // |kNoiseEstIncCount| block
+        aecm->noiseEstTooLowCtr[i]++;
+        if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount)
+        {
+          aecm->noiseEst[i] += (aecm->noiseEst[i] >> 9) + 1;
+          aecm->noiseEstTooLowCtr[i] = 0; // Reset counter
+        }
+      }
+    }
+  }
+
+  for (i = 0; i < PART_LEN1; i++)
+  {
+    tmp32 = aecm->noiseEst[i] >> shiftFromNearToNoise;
+    if (tmp32 > 32767)
+    {
+      tmp32 = 32767;
+      aecm->noiseEst[i] = tmp32 << shiftFromNearToNoise;
+    }
+    noiseRShift16[i] = (int16_t)tmp32;
+
+    tmp16 = ONE_Q14 - lambda[i];
+    noiseRShift16[i] = (int16_t)((tmp16 * noiseRShift16[i]) >> 14);
+  }
+
+  // Generate a uniform random array on [0 2^15-1].
+  WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed);
+
+  // Generate noise according to estimated energy.
+  uReal[0] = 0; // Reject LF noise.
+  uImag[0] = 0;
+  for (i = 1; i < PART_LEN1; i++)
+  {
+    // Get a random index for the cos and sin tables over [0 359].
+    tmp16 = (int16_t)((359 * randW16[i - 1]) >> 15);
+
+    // Tables are in Q13.
+    uReal[i] = (int16_t)((noiseRShift16[i] * WebRtcAecm_kCosTable[tmp16]) >>
+        13);
+    uImag[i] = (int16_t)((-noiseRShift16[i] * WebRtcAecm_kSinTable[tmp16]) >>
+        13);
+  }
+  uImag[PART_LEN] = 0;
+
+  for (i = 0; i < PART_LEN1; i++)
+  {
+    out[i].real = WebRtcSpl_AddSatW16(out[i].real, uReal[i]);
+    out[i].imag = WebRtcSpl_AddSatW16(out[i].imag, uImag[i]);
+  }
+}
+
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_mips.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_mips.c
new file mode 100644
index 00000000..3c2343a8
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_mips.c
@@ -0,0 +1,1566 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/aecm/aecm_core.h"
+
+#include <assert.h>
+
+#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h"
+#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h"
+
+static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = {
+  0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172,
+  3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224,
+  6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040,
+  9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514,
+  11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553,
+  13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079,
+  15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034,
+  16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384
+};
+
+static const int16_t kNoiseEstQDomain = 15;
+static const int16_t kNoiseEstIncCount = 5;
+
+static int16_t coefTable[] = {
+   0,   4, 256, 260, 128, 132, 384, 388,
+  64,  68, 320, 324, 192, 196, 448, 452,
+  32,  36, 288, 292, 160, 164, 416, 420,
+  96, 100, 352, 356, 224, 228, 480, 484,
+  16,  20, 272, 276, 144, 148, 400, 404,
+  80,  84, 336, 340, 208, 212, 464, 468,
+  48,  52, 304, 308, 176, 180, 432, 436,
+ 112, 116, 368, 372, 240, 244, 496, 500,
+   8,  12, 264, 268, 136, 140, 392, 396,
+  72,  76, 328, 332, 200, 204, 456, 460,
+  40,  44, 296, 300, 168, 172, 424, 428,
+ 104, 108, 360, 364, 232, 236, 488, 492,
+  24,  28, 280, 284, 152, 156, 408, 412,
+  88,  92, 344, 348, 216, 220, 472, 476,
+  56,  60, 312, 316, 184, 188, 440, 444,
+ 120, 124, 376, 380, 248, 252, 504, 508
+};
+
+static int16_t coefTable_ifft[] = {
+    0, 512, 256, 508, 128, 252, 384, 380,
+   64, 124, 320, 444, 192, 188, 448, 316,
+   32,  60, 288, 476, 160, 220, 416, 348,
+   96,  92, 352, 412, 224, 156, 480, 284,
+   16,  28, 272, 492, 144, 236, 400, 364,
+   80, 108, 336, 428, 208, 172, 464, 300,
+   48,  44, 304, 460, 176, 204, 432, 332,
+  112,  76, 368, 396, 240, 140, 496, 268,
+    8,  12, 264, 500, 136, 244, 392, 372,
+   72, 116, 328, 436, 200, 180, 456, 308,
+   40,  52, 296, 468, 168, 212, 424, 340,
+  104,  84, 360, 404, 232, 148, 488, 276,
+   24,  20, 280, 484, 152, 228, 408, 356,
+   88, 100, 344, 420, 216, 164, 472, 292,
+   56,  36, 312, 452, 184, 196, 440, 324,
+  120,  68, 376, 388, 248, 132, 504, 260
+};
+
+static void ComfortNoise(AecmCore* aecm,
+                         const uint16_t* dfa,
+                         ComplexInt16* out,
+                         const int16_t* lambda);
+
+static void WindowAndFFT(AecmCore* aecm,
+                         int16_t* fft,
+                         const int16_t* time_signal,
+                         ComplexInt16* freq_signal,
+                         int time_signal_scaling) {
+  int i, j;
+  int32_t tmp1, tmp2, tmp3, tmp4;
+  int16_t* pfrfi;
+  ComplexInt16* pfreq_signal;
+  int16_t  f_coef, s_coef;
+  int32_t load_ptr, store_ptr1, store_ptr2, shift, shift1;
+  int32_t hann, hann1, coefs;
+
+  memset(fft, 0, sizeof(int16_t) * PART_LEN4);
+
+  // FFT of signal
+  __asm __volatile (
+    ".set        push                                                    \n\t"
+    ".set        noreorder                                               \n\t"
+    "addiu       %[shift],          %[time_signal_scaling], -14          \n\t"
+    "addiu       %[i],              $zero,                  64           \n\t"
+    "addiu       %[load_ptr],       %[time_signal],         0            \n\t"
+    "addiu       %[hann],           %[hanning],             0            \n\t"
+    "addiu       %[hann1],          %[hanning],             128          \n\t"
+    "addiu       %[coefs],          %[coefTable],           0            \n\t"
+    "bltz        %[shift],          2f                                   \n\t"
+    " negu       %[shift1],         %[shift]                             \n\t"
+   "1:                                                                   \n\t"
+    "lh          %[tmp1],           0(%[load_ptr])                       \n\t"
+    "lh          %[tmp2],           0(%[hann])                           \n\t"
+    "lh          %[tmp3],           128(%[load_ptr])                     \n\t"
+    "lh          %[tmp4],           0(%[hann1])                          \n\t"
+    "addiu       %[i],              %[i],                   -1           \n\t"
+    "mul         %[tmp1],           %[tmp1],                %[tmp2]      \n\t"
+    "mul         %[tmp3],           %[tmp3],                %[tmp4]      \n\t"
+    "lh          %[f_coef],         0(%[coefs])                          \n\t"
+    "lh          %[s_coef],         2(%[coefs])                          \n\t"
+    "addiu       %[load_ptr],       %[load_ptr],            2            \n\t"
+    "addiu       %[hann],           %[hann],                2            \n\t"
+    "addiu       %[hann1],          %[hann1],               -2           \n\t"
+    "addu        %[store_ptr1],     %[fft],                 %[f_coef]    \n\t"
+    "addu        %[store_ptr2],     %[fft],                 %[s_coef]    \n\t"
+    "sllv        %[tmp1],           %[tmp1],                %[shift]     \n\t"
+    "sllv        %[tmp3],           %[tmp3],                %[shift]     \n\t"
+    "sh          %[tmp1],           0(%[store_ptr1])                     \n\t"
+    "sh          %[tmp3],           0(%[store_ptr2])                     \n\t"
+    "bgtz        %[i],              1b                                   \n\t"
+    " addiu      %[coefs],          %[coefs],               4            \n\t"
+    "b           3f                                                      \n\t"
+    " nop                                                                \n\t"
+   "2:                                                                   \n\t"
+    "lh          %[tmp1],           0(%[load_ptr])                       \n\t"
+    "lh          %[tmp2],           0(%[hann])                           \n\t"
+    "lh          %[tmp3],           128(%[load_ptr])                     \n\t"
+    "lh          %[tmp4],           0(%[hann1])                          \n\t"
+    "addiu       %[i],              %[i],                   -1           \n\t"
+    "mul         %[tmp1],           %[tmp1],                %[tmp2]      \n\t"
+    "mul         %[tmp3],           %[tmp3],                %[tmp4]      \n\t"
+    "lh          %[f_coef],         0(%[coefs])                          \n\t"
+    "lh          %[s_coef],         2(%[coefs])                          \n\t"
+    "addiu       %[load_ptr],       %[load_ptr],            2            \n\t"
+    "addiu       %[hann],           %[hann],                2            \n\t"
+    "addiu       %[hann1],          %[hann1],               -2           \n\t"
+    "addu        %[store_ptr1],     %[fft],                 %[f_coef]    \n\t"
+    "addu        %[store_ptr2],     %[fft],                 %[s_coef]    \n\t"
+    "srav        %[tmp1],           %[tmp1],                %[shift1]    \n\t"
+    "srav        %[tmp3],           %[tmp3],                %[shift1]    \n\t"
+    "sh          %[tmp1],           0(%[store_ptr1])                     \n\t"
+    "sh          %[tmp3],           0(%[store_ptr2])                     \n\t"
+    "bgtz        %[i],              2b                                   \n\t"
+    " addiu      %[coefs],          %[coefs],               4            \n\t"
+   "3:                                                                   \n\t"
+    ".set        pop                                                     \n\t"
+    : [load_ptr] "=&r" (load_ptr), [shift] "=&r" (shift), [hann] "=&r" (hann),
+      [hann1] "=&r" (hann1), [shift1] "=&r" (shift1), [coefs] "=&r" (coefs),
+      [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
+      [tmp4] "=&r" (tmp4), [i] "=&r" (i), [f_coef] "=&r" (f_coef),
+      [s_coef] "=&r" (s_coef), [store_ptr1] "=&r" (store_ptr1),
+      [store_ptr2] "=&r" (store_ptr2)
+    : [time_signal] "r" (time_signal), [coefTable] "r" (coefTable),
+      [time_signal_scaling] "r" (time_signal_scaling),
+      [hanning] "r" (WebRtcAecm_kSqrtHanning), [fft] "r" (fft)
+    : "memory", "hi", "lo"
+  );
+
+  WebRtcSpl_ComplexFFT(fft, PART_LEN_SHIFT, 1);
+  pfrfi = fft;
+  pfreq_signal = freq_signal;
+
+  __asm __volatile (
+    ".set        push                                                     \n\t"
+    ".set        noreorder                                                \n\t"
+    "addiu       %[j],              $zero,                 128            \n\t"
+   "1:                                                                    \n\t"
+    "lh          %[tmp1],           0(%[pfrfi])                           \n\t"
+    "lh          %[tmp2],           2(%[pfrfi])                           \n\t"
+    "lh          %[tmp3],           4(%[pfrfi])                           \n\t"
+    "lh          %[tmp4],           6(%[pfrfi])                           \n\t"
+    "subu        %[tmp2],           $zero,                 %[tmp2]        \n\t"
+    "sh          %[tmp1],           0(%[pfreq_signal])                    \n\t"
+    "sh          %[tmp2],           2(%[pfreq_signal])                    \n\t"
+    "subu        %[tmp4],           $zero,                 %[tmp4]        \n\t"
+    "sh          %[tmp3],           4(%[pfreq_signal])                    \n\t"
+    "sh          %[tmp4],           6(%[pfreq_signal])                    \n\t"
+    "lh          %[tmp1],           8(%[pfrfi])                           \n\t"
+    "lh          %[tmp2],           10(%[pfrfi])                          \n\t"
+    "lh          %[tmp3],           12(%[pfrfi])                          \n\t"
+    "lh          %[tmp4],           14(%[pfrfi])                          \n\t"
+    "addiu       %[j],              %[j],                  -8             \n\t"
+    "subu        %[tmp2],           $zero,                 %[tmp2]        \n\t"
+    "sh          %[tmp1],           8(%[pfreq_signal])                    \n\t"
+    "sh          %[tmp2],           10(%[pfreq_signal])                   \n\t"
+    "subu        %[tmp4],           $zero,                 %[tmp4]        \n\t"
+    "sh          %[tmp3],           12(%[pfreq_signal])                   \n\t"
+    "sh          %[tmp4],           14(%[pfreq_signal])                   \n\t"
+    "addiu       %[pfreq_signal],   %[pfreq_signal],       16             \n\t"
+    "bgtz        %[j],              1b                                    \n\t"
+    " addiu      %[pfrfi],          %[pfrfi],              16             \n\t"
+    ".set        pop                                                      \n\t"
+    : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
+      [j] "=&r" (j), [pfrfi] "+r" (pfrfi), [pfreq_signal] "+r" (pfreq_signal),
+      [tmp4] "=&r" (tmp4)
+    :
+    : "memory"
+  );
+}
+
+static void InverseFFTAndWindow(AecmCore* aecm,
+                                int16_t* fft,
+                                ComplexInt16* efw,
+                                int16_t* output,
+                                const int16_t* nearendClean) {
+  int i, outCFFT;
+  int32_t tmp1, tmp2, tmp3, tmp4, tmp_re, tmp_im;
+  int16_t* pcoefTable_ifft = coefTable_ifft;
+  int16_t* pfft = fft;
+  int16_t* ppfft = fft;
+  ComplexInt16* pefw = efw;
+  int32_t out_aecm;
+  int16_t* paecm_buf = aecm->outBuf;
+  const int16_t* p_kSqrtHanning = WebRtcAecm_kSqrtHanning;
+  const int16_t* pp_kSqrtHanning = &WebRtcAecm_kSqrtHanning[PART_LEN];
+  int16_t* output1 = output;
+
+  __asm __volatile (
+    ".set      push                                                        \n\t"
+    ".set      noreorder                                                   \n\t"
+    "addiu     %[i],                $zero,                   64            \n\t"
+   "1:                                                                     \n\t"
+    "lh        %[tmp1],             0(%[pcoefTable_ifft])                  \n\t"
+    "lh        %[tmp2],             2(%[pcoefTable_ifft])                  \n\t"
+    "lh        %[tmp_re],           0(%[pefw])                             \n\t"
+    "lh        %[tmp_im],           2(%[pefw])                             \n\t"
+    "addu      %[pfft],             %[fft],                  %[tmp2]       \n\t"
+    "sh        %[tmp_re],           0(%[pfft])                             \n\t"
+    "sh        %[tmp_im],           2(%[pfft])                             \n\t"
+    "addu      %[pfft],             %[fft],                  %[tmp1]       \n\t"
+    "sh        %[tmp_re],           0(%[pfft])                             \n\t"
+    "subu      %[tmp_im],           $zero,                   %[tmp_im]     \n\t"
+    "sh        %[tmp_im],           2(%[pfft])                             \n\t"
+    "lh        %[tmp1],             4(%[pcoefTable_ifft])                  \n\t"
+    "lh        %[tmp2],             6(%[pcoefTable_ifft])                  \n\t"
+    "lh        %[tmp_re],           4(%[pefw])                             \n\t"
+    "lh        %[tmp_im],           6(%[pefw])                             \n\t"
+    "addu      %[pfft],             %[fft],                  %[tmp2]       \n\t"
+    "sh        %[tmp_re],           0(%[pfft])                             \n\t"
+    "sh        %[tmp_im],           2(%[pfft])                             \n\t"
+    "addu      %[pfft],             %[fft],                  %[tmp1]       \n\t"
+    "sh        %[tmp_re],           0(%[pfft])                             \n\t"
+    "subu      %[tmp_im],           $zero,                   %[tmp_im]     \n\t"
+    "sh        %[tmp_im],           2(%[pfft])                             \n\t"
+    "lh        %[tmp1],             8(%[pcoefTable_ifft])                  \n\t"
+    "lh        %[tmp2],             10(%[pcoefTable_ifft])                 \n\t"
+    "lh        %[tmp_re],           8(%[pefw])                             \n\t"
+    "lh        %[tmp_im],           10(%[pefw])                            \n\t"
+    "addu      %[pfft],             %[fft],                  %[tmp2]       \n\t"
+    "sh        %[tmp_re],           0(%[pfft])                             \n\t"
+    "sh        %[tmp_im],           2(%[pfft])                             \n\t"
+    "addu      %[pfft],             %[fft],                  %[tmp1]       \n\t"
+    "sh        %[tmp_re],           0(%[pfft])                             \n\t"
+    "subu      %[tmp_im],           $zero,                   %[tmp_im]     \n\t"
+    "sh        %[tmp_im],           2(%[pfft])                             \n\t"
+    "lh        %[tmp1],             12(%[pcoefTable_ifft])                 \n\t"
+    "lh        %[tmp2],             14(%[pcoefTable_ifft])                 \n\t"
+    "lh        %[tmp_re],           12(%[pefw])                            \n\t"
+    "lh        %[tmp_im],           14(%[pefw])                            \n\t"
+    "addu      %[pfft],             %[fft],                  %[tmp2]       \n\t"
+    "sh        %[tmp_re],           0(%[pfft])                             \n\t"
+    "sh        %[tmp_im],           2(%[pfft])                             \n\t"
+    "addu      %[pfft],             %[fft],                  %[tmp1]       \n\t"
+    "sh        %[tmp_re],           0(%[pfft])                             \n\t"
+    "subu      %[tmp_im],           $zero,                   %[tmp_im]     \n\t"
+    "sh        %[tmp_im],           2(%[pfft])                             \n\t"
+    "addiu     %[pcoefTable_ifft],  %[pcoefTable_ifft],      16            \n\t"
+    "addiu     %[i],                %[i],                    -4            \n\t"
+    "bgtz      %[i],                1b                                     \n\t"
+    " addiu    %[pefw],             %[pefw],                 16            \n\t"
+    ".set      pop                                                         \n\t"
+    : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft),
+      [i] "=&r" (i), [tmp_re] "=&r" (tmp_re), [tmp_im] "=&r" (tmp_im),
+      [pefw] "+r" (pefw), [pcoefTable_ifft] "+r" (pcoefTable_ifft),
+      [fft] "+r" (fft)
+    :
+    : "memory"
+  );
+
+  fft[2] = efw[PART_LEN].real;
+  fft[3] = -efw[PART_LEN].imag;
+
+  outCFFT = WebRtcSpl_ComplexIFFT(fft, PART_LEN_SHIFT, 1);
+  pfft = fft;
+
+  __asm __volatile (
+    ".set       push                                               \n\t"
+    ".set       noreorder                                          \n\t"
+    "addiu      %[i],            $zero,               128          \n\t"
+   "1:                                                             \n\t"
+    "lh         %[tmp1],         0(%[ppfft])                       \n\t"
+    "lh         %[tmp2],         4(%[ppfft])                       \n\t"
+    "lh         %[tmp3],         8(%[ppfft])                       \n\t"
+    "lh         %[tmp4],         12(%[ppfft])                      \n\t"
+    "addiu      %[i],            %[i],                -4           \n\t"
+    "sh         %[tmp1],         0(%[pfft])                        \n\t"
+    "sh         %[tmp2],         2(%[pfft])                        \n\t"
+    "sh         %[tmp3],         4(%[pfft])                        \n\t"
+    "sh         %[tmp4],         6(%[pfft])                        \n\t"
+    "addiu      %[ppfft],        %[ppfft],            16           \n\t"
+    "bgtz       %[i],            1b                                \n\t"
+    " addiu     %[pfft],         %[pfft],             8            \n\t"
+    ".set       pop                                                \n\t"
+    : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft),
+      [i] "=&r" (i), [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4),
+      [ppfft] "+r" (ppfft)
+    :
+    : "memory"
+  );
+
+  pfft = fft;
+  out_aecm = (int32_t)(outCFFT - aecm->dfaCleanQDomain);
+
+  __asm __volatile (
+    ".set       push                                                       \n\t"
+    ".set       noreorder                                                  \n\t"
+    "addiu      %[i],                $zero,                  64            \n\t"
+   "11:                                                                    \n\t"
+    "lh         %[tmp1],             0(%[pfft])                            \n\t"
+    "lh         %[tmp2],             0(%[p_kSqrtHanning])                  \n\t"
+    "addiu      %[i],                %[i],                   -2            \n\t"
+    "mul        %[tmp1],             %[tmp1],                %[tmp2]       \n\t"
+    "lh         %[tmp3],             2(%[pfft])                            \n\t"
+    "lh         %[tmp4],             2(%[p_kSqrtHanning])                  \n\t"
+    "mul        %[tmp3],             %[tmp3],                %[tmp4]       \n\t"
+    "addiu      %[tmp1],             %[tmp1],                8192          \n\t"
+    "sra        %[tmp1],             %[tmp1],                14            \n\t"
+    "addiu      %[tmp3],             %[tmp3],                8192          \n\t"
+    "sra        %[tmp3],             %[tmp3],                14            \n\t"
+    "bgez       %[out_aecm],         1f                                    \n\t"
+    " negu      %[tmp2],             %[out_aecm]                           \n\t"
+    "srav       %[tmp1],             %[tmp1],                %[tmp2]       \n\t"
+    "b          2f                                                         \n\t"
+    " srav      %[tmp3],             %[tmp3],                %[tmp2]       \n\t"
+   "1:                                                                     \n\t"
+    "sllv       %[tmp1],             %[tmp1],                %[out_aecm]   \n\t"
+    "sllv       %[tmp3],             %[tmp3],                %[out_aecm]   \n\t"
+   "2:                                                                     \n\t"
+    "lh         %[tmp4],             0(%[paecm_buf])                       \n\t"
+    "lh         %[tmp2],             2(%[paecm_buf])                       \n\t"
+    "addu       %[tmp3],             %[tmp3],                %[tmp2]       \n\t"
+    "addu       %[tmp1],             %[tmp1],                %[tmp4]       \n\t"
+#if defined(MIPS_DSP_R1_LE)
+    "shll_s.w   %[tmp1],             %[tmp1],                16            \n\t"
+    "sra        %[tmp1],             %[tmp1],                16            \n\t"
+    "shll_s.w   %[tmp3],             %[tmp3],                16            \n\t"
+    "sra        %[tmp3],             %[tmp3],                16            \n\t"
+#else  // #if defined(MIPS_DSP_R1_LE)
+    "sra        %[tmp4],             %[tmp1],                31            \n\t"
+    "sra        %[tmp2],             %[tmp1],                15            \n\t"
+    "beq        %[tmp4],             %[tmp2],                3f            \n\t"
+    " ori       %[tmp2],             $zero,                  0x7fff        \n\t"
+    "xor        %[tmp1],             %[tmp2],                %[tmp4]       \n\t"
+   "3:                                                                     \n\t"
+    "sra        %[tmp2],             %[tmp3],                31            \n\t"
+    "sra        %[tmp4],             %[tmp3],                15            \n\t"
+    "beq        %[tmp2],             %[tmp4],                4f            \n\t"
+    " ori       %[tmp4],             $zero,                  0x7fff        \n\t"
+    "xor        %[tmp3],             %[tmp4],                %[tmp2]       \n\t"
+   "4:                                                                     \n\t"
+#endif  // #if defined(MIPS_DSP_R1_LE)
+    "sh         %[tmp1],             0(%[pfft])                            \n\t"
+    "sh         %[tmp1],             0(%[output1])                         \n\t"
+    "sh         %[tmp3],             2(%[pfft])                            \n\t"
+    "sh         %[tmp3],             2(%[output1])                         \n\t"
+    "lh         %[tmp1],             128(%[pfft])                          \n\t"
+    "lh         %[tmp2],             0(%[pp_kSqrtHanning])                 \n\t"
+    "mul        %[tmp1],             %[tmp1],                %[tmp2]       \n\t"
+    "lh         %[tmp3],             130(%[pfft])                          \n\t"
+    "lh         %[tmp4],             -2(%[pp_kSqrtHanning])                \n\t"
+    "mul        %[tmp3],             %[tmp3],                %[tmp4]       \n\t"
+    "sra        %[tmp1],             %[tmp1],                14            \n\t"
+    "sra        %[tmp3],             %[tmp3],                14            \n\t"
+    "bgez       %[out_aecm],         5f                                    \n\t"
+    " negu      %[tmp2],             %[out_aecm]                           \n\t"
+    "srav       %[tmp3],             %[tmp3],                %[tmp2]       \n\t"
+    "b          6f                                                         \n\t"
+    " srav      %[tmp1],             %[tmp1],                %[tmp2]       \n\t"
+   "5:                                                                     \n\t"
+    "sllv       %[tmp1],             %[tmp1],                %[out_aecm]   \n\t"
+    "sllv       %[tmp3],             %[tmp3],                %[out_aecm]   \n\t"
+   "6:                                                                     \n\t"
+#if defined(MIPS_DSP_R1_LE)
+    "shll_s.w   %[tmp1],             %[tmp1],                16            \n\t"
+    "sra        %[tmp1],             %[tmp1],                16            \n\t"
+    "shll_s.w   %[tmp3],             %[tmp3],                16            \n\t"
+    "sra        %[tmp3],             %[tmp3],                16            \n\t"
+#else  // #if defined(MIPS_DSP_R1_LE)
+    "sra        %[tmp4],             %[tmp1],                31            \n\t"
+    "sra        %[tmp2],             %[tmp1],                15            \n\t"
+    "beq        %[tmp4],             %[tmp2],                7f            \n\t"
+    " ori       %[tmp2],             $zero,                  0x7fff        \n\t"
+    "xor        %[tmp1],             %[tmp2],                %[tmp4]       \n\t"
+   "7:                                                                     \n\t"
+    "sra        %[tmp2],             %[tmp3],                31            \n\t"
+    "sra        %[tmp4],             %[tmp3],                15            \n\t"
+    "beq        %[tmp2],             %[tmp4],                8f            \n\t"
+    " ori       %[tmp4],             $zero,                  0x7fff        \n\t"
+    "xor        %[tmp3],             %[tmp4],                %[tmp2]       \n\t"
+   "8:                                                                     \n\t"
+#endif  // #if defined(MIPS_DSP_R1_LE)
+    "sh         %[tmp1],             0(%[paecm_buf])                       \n\t"
+    "sh         %[tmp3],             2(%[paecm_buf])                       \n\t"
+    "addiu      %[output1],          %[output1],             4             \n\t"
+    "addiu      %[paecm_buf],        %[paecm_buf],           4             \n\t"
+    "addiu      %[pfft],             %[pfft],                4             \n\t"
+    "addiu      %[p_kSqrtHanning],   %[p_kSqrtHanning],      4             \n\t"
+    "bgtz       %[i],                11b                                   \n\t"
+    " addiu     %[pp_kSqrtHanning],  %[pp_kSqrtHanning],     -4            \n\t"
+    ".set       pop                                                        \n\t"
+    : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft),
+      [output1] "+r" (output1), [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4),
+      [paecm_buf] "+r" (paecm_buf), [i] "=&r" (i),
+      [pp_kSqrtHanning] "+r" (pp_kSqrtHanning),
+      [p_kSqrtHanning] "+r" (p_kSqrtHanning)
+    : [out_aecm] "r" (out_aecm),
+      [WebRtcAecm_kSqrtHanning] "r" (WebRtcAecm_kSqrtHanning)
+    : "hi", "lo","memory"
+  );
+
+  // Copy the current block to the old position
+  // (aecm->outBuf is shifted elsewhere)
+  memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(int16_t) * PART_LEN);
+  memcpy(aecm->dBufNoisy,
+         aecm->dBufNoisy + PART_LEN,
+         sizeof(int16_t) * PART_LEN);
+  if (nearendClean != NULL) {
+    memcpy(aecm->dBufClean,
+           aecm->dBufClean + PART_LEN,
+           sizeof(int16_t) * PART_LEN);
+  }
+}
+
+void WebRtcAecm_CalcLinearEnergies_mips(AecmCore* aecm,
+                                        const uint16_t* far_spectrum,
+                                        int32_t* echo_est,
+                                        uint32_t* far_energy,
+                                        uint32_t* echo_energy_adapt,
+                                        uint32_t* echo_energy_stored) {
+  int i;
+  uint32_t par1 = (*far_energy);
+  uint32_t par2 = (*echo_energy_adapt);
+  uint32_t par3 = (*echo_energy_stored);
+  int16_t* ch_stored_p = &(aecm->channelStored[0]);
+  int16_t* ch_adapt_p = &(aecm->channelAdapt16[0]);
+  uint16_t* spectrum_p = (uint16_t*)(&(far_spectrum[0]));
+  int32_t* echo_p = &(echo_est[0]);
+  int32_t temp0, stored0, echo0, adept0, spectrum0;
+  int32_t stored1, adept1, spectrum1, echo1, temp1;
+
+  // Get energy for the delayed far end signal and estimated
+  // echo using both stored and adapted channels.
+  for (i = 0; i < PART_LEN; i+= 4) {
+    __asm __volatile (
+      ".set           push                                            \n\t"
+      ".set           noreorder                                       \n\t"
+      "lh             %[stored0],     0(%[ch_stored_p])               \n\t"
+      "lhu            %[adept0],      0(%[ch_adapt_p])                \n\t"
+      "lhu            %[spectrum0],   0(%[spectrum_p])                \n\t"
+      "lh             %[stored1],     2(%[ch_stored_p])               \n\t"
+      "lhu            %[adept1],      2(%[ch_adapt_p])                \n\t"
+      "lhu            %[spectrum1],   2(%[spectrum_p])                \n\t"
+      "mul            %[echo0],       %[stored0],     %[spectrum0]    \n\t"
+      "mul            %[temp0],       %[adept0],      %[spectrum0]    \n\t"
+      "mul            %[echo1],       %[stored1],     %[spectrum1]    \n\t"
+      "mul            %[temp1],       %[adept1],      %[spectrum1]    \n\t"
+      "addu           %[par1],        %[par1],        %[spectrum0]    \n\t"
+      "addu           %[par1],        %[par1],        %[spectrum1]    \n\t"
+      "addiu          %[echo_p],      %[echo_p],      16              \n\t"
+      "addu           %[par3],        %[par3],        %[echo0]        \n\t"
+      "addu           %[par2],        %[par2],        %[temp0]        \n\t"
+      "addu           %[par3],        %[par3],        %[echo1]        \n\t"
+      "addu           %[par2],        %[par2],        %[temp1]        \n\t"
+      "usw            %[echo0],       -16(%[echo_p])                  \n\t"
+      "usw            %[echo1],       -12(%[echo_p])                  \n\t"
+      "lh             %[stored0],     4(%[ch_stored_p])               \n\t"
+      "lhu            %[adept0],      4(%[ch_adapt_p])                \n\t"
+      "lhu            %[spectrum0],   4(%[spectrum_p])                \n\t"
+      "lh             %[stored1],     6(%[ch_stored_p])               \n\t"
+      "lhu            %[adept1],      6(%[ch_adapt_p])                \n\t"
+      "lhu            %[spectrum1],   6(%[spectrum_p])                \n\t"
+      "mul            %[echo0],       %[stored0],     %[spectrum0]    \n\t"
+      "mul            %[temp0],       %[adept0],      %[spectrum0]    \n\t"
+      "mul            %[echo1],       %[stored1],     %[spectrum1]    \n\t"
+      "mul            %[temp1],       %[adept1],      %[spectrum1]    \n\t"
+      "addu           %[par1],        %[par1],        %[spectrum0]    \n\t"
+      "addu           %[par1],        %[par1],        %[spectrum1]    \n\t"
+      "addiu          %[ch_stored_p], %[ch_stored_p], 8               \n\t"
+      "addiu          %[ch_adapt_p],  %[ch_adapt_p],  8               \n\t"
+      "addiu          %[spectrum_p],  %[spectrum_p],  8               \n\t"
+      "addu           %[par3],        %[par3],        %[echo0]        \n\t"
+      "addu           %[par2],        %[par2],        %[temp0]        \n\t"
+      "addu           %[par3],        %[par3],        %[echo1]        \n\t"
+      "addu           %[par2],        %[par2],        %[temp1]        \n\t"
+      "usw            %[echo0],       -8(%[echo_p])                   \n\t"
+      "usw            %[echo1],       -4(%[echo_p])                   \n\t"
+      ".set           pop                                             \n\t"
+      : [temp0] "=&r" (temp0), [stored0] "=&r" (stored0),
+        [adept0] "=&r" (adept0), [spectrum0] "=&r" (spectrum0),
+        [echo0] "=&r" (echo0), [echo_p] "+r" (echo_p), [par3] "+r" (par3),
+        [par1] "+r" (par1), [par2] "+r" (par2), [stored1] "=&r" (stored1),
+        [adept1] "=&r" (adept1), [echo1] "=&r" (echo1),
+        [spectrum1] "=&r" (spectrum1), [temp1] "=&r" (temp1),
+        [ch_stored_p] "+r" (ch_stored_p), [ch_adapt_p] "+r" (ch_adapt_p),
+        [spectrum_p] "+r" (spectrum_p)
+      :
+      : "hi", "lo", "memory"
+    );
+  }
+
+  echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN],
+                                             far_spectrum[PART_LEN]);
+  par1 += (uint32_t)(far_spectrum[PART_LEN]);
+  par2 += aecm->channelAdapt16[PART_LEN] * far_spectrum[PART_LEN];
+  par3 += (uint32_t)echo_est[PART_LEN];
+
+  (*far_energy) = par1;
+  (*echo_energy_adapt) = par2;
+  (*echo_energy_stored) = par3;
+}
+
+#if defined(MIPS_DSP_R1_LE)
+void WebRtcAecm_StoreAdaptiveChannel_mips(AecmCore* aecm,
+                                          const uint16_t* far_spectrum,
+                                          int32_t* echo_est) {
+  int i;
+  int16_t* temp1;
+  uint16_t* temp8;
+  int32_t temp0, temp2, temp3, temp4, temp5, temp6;
+  int32_t* temp7 = &(echo_est[0]);
+  temp1 = &(aecm->channelStored[0]);
+  temp8 = (uint16_t*)(&far_spectrum[0]);
+
+  // During startup we store the channel every block.
+  memcpy(aecm->channelStored, aecm->channelAdapt16,
+         sizeof(int16_t) * PART_LEN1);
+  // Recalculate echo estimate
+  for (i = 0; i < PART_LEN; i += 4) {
+    __asm __volatile (
+      "ulw            %[temp0],   0(%[temp8])               \n\t"
+      "ulw            %[temp2],   0(%[temp1])               \n\t"
+      "ulw            %[temp4],   4(%[temp8])               \n\t"
+      "ulw            %[temp5],   4(%[temp1])               \n\t"
+      "muleq_s.w.phl  %[temp3],   %[temp2],     %[temp0]    \n\t"
+      "muleq_s.w.phr  %[temp0],   %[temp2],     %[temp0]    \n\t"
+      "muleq_s.w.phl  %[temp6],   %[temp5],     %[temp4]    \n\t"
+      "muleq_s.w.phr  %[temp4],   %[temp5],     %[temp4]    \n\t"
+      "addiu          %[temp7],   %[temp7],     16          \n\t"
+      "addiu          %[temp1],   %[temp1],     8           \n\t"
+      "addiu          %[temp8],   %[temp8],     8           \n\t"
+      "sra            %[temp3],   %[temp3],     1           \n\t"
+      "sra            %[temp0],   %[temp0],     1           \n\t"
+      "sra            %[temp6],   %[temp6],     1           \n\t"
+      "sra            %[temp4],   %[temp4],     1           \n\t"
+      "usw            %[temp3],   -12(%[temp7])             \n\t"
+      "usw            %[temp0],   -16(%[temp7])             \n\t"
+      "usw            %[temp6],   -4(%[temp7])              \n\t"
+      "usw            %[temp4],   -8(%[temp7])              \n\t"
+      : [temp0] "=&r" (temp0), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),
+        [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [temp6] "=&r" (temp6),
+        [temp1] "+r" (temp1), [temp8] "+r" (temp8), [temp7] "+r" (temp7)
+      :
+      : "hi", "lo", "memory"
+    );
+  }
+  echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
+                                      far_spectrum[i]);
+}
+
+void WebRtcAecm_ResetAdaptiveChannel_mips(AecmCore* aecm) {
+  int i;
+  int32_t* temp3;
+  int16_t* temp0;
+  int32_t temp1, temp2, temp4, temp5;
+
+  temp0 = &(aecm->channelStored[0]);
+  temp3 = &(aecm->channelAdapt32[0]);
+
+  // The stored channel has a significantly lower MSE than the adaptive one for
+  // two consecutive calculations. Reset the adaptive channel.
+  memcpy(aecm->channelAdapt16,
+         aecm->channelStored,
+         sizeof(int16_t) * PART_LEN1);
+
+  // Restore the W32 channel
+  for (i = 0; i < PART_LEN; i += 4) {
+    __asm __volatile (
+      "ulw            %[temp1], 0(%[temp0])           \n\t"
+      "ulw            %[temp4], 4(%[temp0])           \n\t"
+      "preceq.w.phl   %[temp2], %[temp1]              \n\t"
+      "preceq.w.phr   %[temp1], %[temp1]              \n\t"
+      "preceq.w.phl   %[temp5], %[temp4]              \n\t"
+      "preceq.w.phr   %[temp4], %[temp4]              \n\t"
+      "addiu          %[temp0], %[temp0], 8           \n\t"
+      "usw            %[temp2], 4(%[temp3])           \n\t"
+      "usw            %[temp1], 0(%[temp3])           \n\t"
+      "usw            %[temp5], 12(%[temp3])          \n\t"
+      "usw            %[temp4], 8(%[temp3])           \n\t"
+      "addiu          %[temp3], %[temp3], 16          \n\t"
+      : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2),
+        [temp4] "=&r" (temp4), [temp5] "=&r" (temp5),
+        [temp3] "+r" (temp3), [temp0] "+r" (temp0)
+      :
+      : "memory"
+    );
+  }
+
+  aecm->channelAdapt32[i] = (int32_t)aecm->channelStored[i] << 16;
+}
+#endif  // #if defined(MIPS_DSP_R1_LE)
+
+// Transforms a time domain signal into the frequency domain, outputting the
+// complex valued signal, absolute value and sum of absolute values.
+//
+// time_signal          [in]    Pointer to time domain signal
+// freq_signal_real     [out]   Pointer to real part of frequency domain array
+// freq_signal_imag     [out]   Pointer to imaginary part of frequency domain
+//                              array
+// freq_signal_abs      [out]   Pointer to absolute value of frequency domain
+//                              array
+// freq_signal_sum_abs  [out]   Pointer to the sum of all absolute values in
+//                              the frequency domain array
+// return value                 The Q-domain of current frequency values
+//
+static int TimeToFrequencyDomain(AecmCore* aecm,
+                                 const int16_t* time_signal,
+                                 ComplexInt16* freq_signal,
+                                 uint16_t* freq_signal_abs,
+                                 uint32_t* freq_signal_sum_abs) {
+  int i = 0;
+  int time_signal_scaling = 0;
+
+  // In fft_buf, +16 for 32-byte alignment.
+  int16_t fft_buf[PART_LEN4 + 16];
+  int16_t *fft = (int16_t *) (((uintptr_t) fft_buf + 31) & ~31);
+
+  int16_t tmp16no1;
+#if !defined(MIPS_DSP_R2_LE)
+  int32_t tmp32no1;
+  int32_t tmp32no2;
+  int16_t tmp16no2;
+#else
+  int32_t tmp32no10, tmp32no11, tmp32no12, tmp32no13;
+  int32_t tmp32no20, tmp32no21, tmp32no22, tmp32no23;
+  int16_t* freqp;
+  uint16_t* freqabsp;
+  uint32_t freqt0, freqt1, freqt2, freqt3;
+  uint32_t freqs;
+#endif
+
+#ifdef AECM_DYNAMIC_Q
+  tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2);
+  time_signal_scaling = WebRtcSpl_NormW16(tmp16no1);
+#endif
+
+  WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling);
+
+  // Extract imaginary and real part,
+  // calculate the magnitude for all frequency bins
+  freq_signal[0].imag = 0;
+  freq_signal[PART_LEN].imag = 0;
+  freq_signal[PART_LEN].real = fft[PART_LEN2];
+  freq_signal_abs[0] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[0].real);
+  freq_signal_abs[PART_LEN] = (uint16_t)WEBRTC_SPL_ABS_W16(
+    freq_signal[PART_LEN].real);
+  (*freq_signal_sum_abs) = (uint32_t)(freq_signal_abs[0]) +
+    (uint32_t)(freq_signal_abs[PART_LEN]);
+
+#if !defined(MIPS_DSP_R2_LE)
+  for (i = 1; i < PART_LEN; i++) {
+    if (freq_signal[i].real == 0)
+    {
+      freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(
+        freq_signal[i].imag);
+    }
+    else if (freq_signal[i].imag == 0)
+    {
+      freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(
+        freq_signal[i].real);
+    }
+    else
+    {
+      // Approximation for magnitude of complex fft output
+      // magn = sqrt(real^2 + imag^2)
+      // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|)
+      //
+      // The parameters alpha and beta are stored in Q15
+      tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real);
+      tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
+      tmp32no1 = tmp16no1 * tmp16no1;
+      tmp32no2 = tmp16no2 * tmp16no2;
+      tmp32no2 = WebRtcSpl_AddSatW32(tmp32no1, tmp32no2);
+      tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2);
+
+      freq_signal_abs[i] = (uint16_t)tmp32no1;
+    }
+    (*freq_signal_sum_abs) += (uint32_t)freq_signal_abs[i];
+  }
+#else // #if !defined(MIPS_DSP_R2_LE)
+  freqs = (uint32_t)(freq_signal_abs[0]) +
+          (uint32_t)(freq_signal_abs[PART_LEN]);
+  freqp = &(freq_signal[1].real);
+
+  __asm __volatile (
+    "lw             %[freqt0],      0(%[freqp])             \n\t"
+    "lw             %[freqt1],      4(%[freqp])             \n\t"
+    "lw             %[freqt2],      8(%[freqp])             \n\t"
+    "mult           $ac0,           $zero,      $zero       \n\t"
+    "mult           $ac1,           $zero,      $zero       \n\t"
+    "mult           $ac2,           $zero,      $zero       \n\t"
+    "dpaq_s.w.ph    $ac0,           %[freqt0],  %[freqt0]   \n\t"
+    "dpaq_s.w.ph    $ac1,           %[freqt1],  %[freqt1]   \n\t"
+    "dpaq_s.w.ph    $ac2,           %[freqt2],  %[freqt2]   \n\t"
+    "addiu          %[freqp],       %[freqp],   12          \n\t"
+    "extr.w         %[tmp32no20],   $ac0,       1           \n\t"
+    "extr.w         %[tmp32no21],   $ac1,       1           \n\t"
+    "extr.w         %[tmp32no22],   $ac2,       1           \n\t"
+    : [freqt0] "=&r" (freqt0), [freqt1] "=&r" (freqt1),
+      [freqt2] "=&r" (freqt2), [freqp] "+r" (freqp),
+      [tmp32no20] "=r" (tmp32no20), [tmp32no21] "=r" (tmp32no21),
+      [tmp32no22] "=r" (tmp32no22)
+    :
+    : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo"
+  );
+
+  tmp32no10 = WebRtcSpl_SqrtFloor(tmp32no20);
+  tmp32no11 = WebRtcSpl_SqrtFloor(tmp32no21);
+  tmp32no12 = WebRtcSpl_SqrtFloor(tmp32no22);
+  freq_signal_abs[1] = (uint16_t)tmp32no10;
+  freq_signal_abs[2] = (uint16_t)tmp32no11;
+  freq_signal_abs[3] = (uint16_t)tmp32no12;
+  freqs += (uint32_t)tmp32no10;
+  freqs += (uint32_t)tmp32no11;
+  freqs += (uint32_t)tmp32no12;
+  freqabsp = &(freq_signal_abs[4]);
+  for (i = 4; i < PART_LEN; i+=4)
+  {
+    __asm __volatile (
+      "ulw            %[freqt0],      0(%[freqp])                 \n\t"
+      "ulw            %[freqt1],      4(%[freqp])                 \n\t"
+      "ulw            %[freqt2],      8(%[freqp])                 \n\t"
+      "ulw            %[freqt3],      12(%[freqp])                \n\t"
+      "mult           $ac0,           $zero,          $zero       \n\t"
+      "mult           $ac1,           $zero,          $zero       \n\t"
+      "mult           $ac2,           $zero,          $zero       \n\t"
+      "mult           $ac3,           $zero,          $zero       \n\t"
+      "dpaq_s.w.ph    $ac0,           %[freqt0],      %[freqt0]   \n\t"
+      "dpaq_s.w.ph    $ac1,           %[freqt1],      %[freqt1]   \n\t"
+      "dpaq_s.w.ph    $ac2,           %[freqt2],      %[freqt2]   \n\t"
+      "dpaq_s.w.ph    $ac3,           %[freqt3],      %[freqt3]   \n\t"
+      "addiu          %[freqp],       %[freqp],       16          \n\t"
+      "addiu          %[freqabsp],    %[freqabsp],    8           \n\t"
+      "extr.w         %[tmp32no20],   $ac0,           1           \n\t"
+      "extr.w         %[tmp32no21],   $ac1,           1           \n\t"
+      "extr.w         %[tmp32no22],   $ac2,           1           \n\t"
+      "extr.w         %[tmp32no23],   $ac3,           1           \n\t"
+      : [freqt0] "=&r" (freqt0), [freqt1] "=&r" (freqt1),
+        [freqt2] "=&r" (freqt2), [freqt3] "=&r" (freqt3),
+        [tmp32no20] "=r" (tmp32no20), [tmp32no21] "=r" (tmp32no21),
+        [tmp32no22] "=r" (tmp32no22), [tmp32no23] "=r" (tmp32no23),
+        [freqabsp] "+r" (freqabsp), [freqp] "+r" (freqp)
+      :
+      : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
+        "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
+    );
+
+    tmp32no10 = WebRtcSpl_SqrtFloor(tmp32no20);
+    tmp32no11 = WebRtcSpl_SqrtFloor(tmp32no21);
+    tmp32no12 = WebRtcSpl_SqrtFloor(tmp32no22);
+    tmp32no13 = WebRtcSpl_SqrtFloor(tmp32no23);
+
+    __asm __volatile (
+      "sh             %[tmp32no10],   -8(%[freqabsp])                 \n\t"
+      "sh             %[tmp32no11],   -6(%[freqabsp])                 \n\t"
+      "sh             %[tmp32no12],   -4(%[freqabsp])                 \n\t"
+      "sh             %[tmp32no13],   -2(%[freqabsp])                 \n\t"
+      "addu           %[freqs],       %[freqs],       %[tmp32no10]    \n\t"
+      "addu           %[freqs],       %[freqs],       %[tmp32no11]    \n\t"
+      "addu           %[freqs],       %[freqs],       %[tmp32no12]    \n\t"
+      "addu           %[freqs],       %[freqs],       %[tmp32no13]    \n\t"
+      : [freqs] "+r" (freqs)
+      : [tmp32no10] "r" (tmp32no10), [tmp32no11] "r" (tmp32no11),
+        [tmp32no12] "r" (tmp32no12), [tmp32no13] "r" (tmp32no13),
+        [freqabsp] "r" (freqabsp)
+      : "memory"
+    );
+  }
+
+  (*freq_signal_sum_abs) = freqs;
+#endif
+
+  return time_signal_scaling;
+}
+
+int WebRtcAecm_ProcessBlock(AecmCore* aecm,
+                            const int16_t* farend,
+                            const int16_t* nearendNoisy,
+                            const int16_t* nearendClean,
+                            int16_t* output) {
+  int i;
+  uint32_t xfaSum;
+  uint32_t dfaNoisySum;
+  uint32_t dfaCleanSum;
+  uint32_t echoEst32Gained;
+  uint32_t tmpU32;
+  int32_t tmp32no1;
+
+  uint16_t xfa[PART_LEN1];
+  uint16_t dfaNoisy[PART_LEN1];
+  uint16_t dfaClean[PART_LEN1];
+  uint16_t* ptrDfaClean = dfaClean;
+  const uint16_t* far_spectrum_ptr = NULL;
+
+  // 32 byte aligned buffers (with +8 or +16).
+  int16_t fft_buf[PART_LEN4 + 2 + 16]; // +2 to make a loop safe.
+  int32_t echoEst32_buf[PART_LEN1 + 8];
+  int32_t dfw_buf[PART_LEN2 + 8];
+  int32_t efw_buf[PART_LEN2 + 8];
+
+  int16_t* fft = (int16_t*)(((uint32_t)fft_buf + 31) & ~ 31);
+  int32_t* echoEst32 = (int32_t*)(((uint32_t)echoEst32_buf + 31) & ~ 31);
+  ComplexInt16* dfw = (ComplexInt16*)(((uint32_t)dfw_buf + 31) & ~31);
+  ComplexInt16* efw = (ComplexInt16*)(((uint32_t)efw_buf + 31) & ~31);
+
+  int16_t hnl[PART_LEN1];
+  int16_t numPosCoef = 0;
+  int delay;
+  int16_t tmp16no1;
+  int16_t tmp16no2;
+  int16_t mu;
+  int16_t supGain;
+  int16_t zeros32, zeros16;
+  int16_t zerosDBufNoisy, zerosDBufClean, zerosXBuf;
+  int far_q;
+  int16_t resolutionDiff, qDomainDiff, dfa_clean_q_domain_diff;
+
+  const int kMinPrefBand = 4;
+  const int kMaxPrefBand = 24;
+  int32_t avgHnl32 = 0;
+
+  int32_t temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
+  int16_t* ptr;
+  int16_t* ptr1;
+  int16_t* er_ptr;
+  int16_t* dr_ptr;
+
+  ptr = &hnl[0];
+  ptr1 = &hnl[0];
+  er_ptr = &efw[0].real;
+  dr_ptr = &dfw[0].real;
+
+  // Determine startup state. There are three states:
+  // (0) the first CONV_LEN blocks
+  // (1) another CONV_LEN blocks
+  // (2) the rest
+
+  if (aecm->startupState < 2) {
+    aecm->startupState = (aecm->totCount >= CONV_LEN) +
+                         (aecm->totCount >= CONV_LEN2);
+  }
+  // END: Determine startup state
+
+  // Buffer near and far end signals
+  memcpy(aecm->xBuf + PART_LEN, farend, sizeof(int16_t) * PART_LEN);
+  memcpy(aecm->dBufNoisy + PART_LEN,
+         nearendNoisy,
+         sizeof(int16_t) * PART_LEN);
+  if (nearendClean != NULL) {
+    memcpy(aecm->dBufClean + PART_LEN,
+           nearendClean,
+           sizeof(int16_t) * PART_LEN);
+  }
+
+  // Transform far end signal from time domain to frequency domain.
+  far_q = TimeToFrequencyDomain(aecm,
+                                aecm->xBuf,
+                                dfw,
+                                xfa,
+                                &xfaSum);
+
+  // Transform noisy near end signal from time domain to frequency domain.
+  zerosDBufNoisy = TimeToFrequencyDomain(aecm,
+                                         aecm->dBufNoisy,
+                                         dfw,
+                                         dfaNoisy,
+                                         &dfaNoisySum);
+  aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain;
+  aecm->dfaNoisyQDomain = (int16_t)zerosDBufNoisy;
+
+  if (nearendClean == NULL) {
+    ptrDfaClean = dfaNoisy;
+    aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld;
+    aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain;
+    dfaCleanSum = dfaNoisySum;
+  } else {
+    // Transform clean near end signal from time domain to frequency domain.
+    zerosDBufClean = TimeToFrequencyDomain(aecm,
+                                           aecm->dBufClean,
+                                           dfw,
+                                           dfaClean,
+                                           &dfaCleanSum);
+    aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain;
+    aecm->dfaCleanQDomain = (int16_t)zerosDBufClean;
+  }
+
+  // Get the delay
+  // Save far-end history and estimate delay
+  WebRtcAecm_UpdateFarHistory(aecm, xfa, far_q);
+
+  if (WebRtc_AddFarSpectrumFix(aecm->delay_estimator_farend, xfa, PART_LEN1,
+                               far_q) == -1) {
+    return -1;
+  }
+  delay = WebRtc_DelayEstimatorProcessFix(aecm->delay_estimator,
+                                          dfaNoisy,
+                                          PART_LEN1,
+                                          zerosDBufNoisy);
+  if (delay == -1) {
+    return -1;
+  }
+  else if (delay == -2) {
+    // If the delay is unknown, we assume zero.
+    // NOTE: this will have to be adjusted if we ever add lookahead.
+    delay = 0;
+  }
+
+  if (aecm->fixedDelay >= 0) {
+    // Use fixed delay
+    delay = aecm->fixedDelay;
+  }
+
+  // Get aligned far end spectrum
+  far_spectrum_ptr = WebRtcAecm_AlignedFarend(aecm, &far_q, delay);
+  zerosXBuf = (int16_t) far_q;
+
+  if (far_spectrum_ptr == NULL) {
+    return -1;
+  }
+
+  // Calculate log(energy) and update energy threshold levels
+  WebRtcAecm_CalcEnergies(aecm,
+                          far_spectrum_ptr,
+                          zerosXBuf,
+                          dfaNoisySum,
+                          echoEst32);
+  // Calculate stepsize
+  mu = WebRtcAecm_CalcStepSize(aecm);
+
+  // Update counters
+  aecm->totCount++;
+
+  // This is the channel estimation algorithm.
+  // It is base on NLMS but has a variable step length,
+  // which was calculated above.
+  WebRtcAecm_UpdateChannel(aecm,
+                           far_spectrum_ptr,
+                           zerosXBuf,
+                           dfaNoisy,
+                           mu,
+                           echoEst32);
+
+  supGain = WebRtcAecm_CalcSuppressionGain(aecm);
+
+  // Calculate Wiener filter hnl[]
+  for (i = 0; i < PART_LEN1; i++) {
+    // Far end signal through channel estimate in Q8
+    // How much can we shift right to preserve resolution
+    tmp32no1 = echoEst32[i] - aecm->echoFilt[i];
+    aecm->echoFilt[i] += (tmp32no1 * 50) >> 8;
+
+    zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1;
+    zeros16 = WebRtcSpl_NormW16(supGain) + 1;
+    if (zeros32 + zeros16 > 16) {
+      // Multiplication is safe
+      // Result in
+      // Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+aecm->xfaQDomainBuf[diff])
+      echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i],
+                                              (uint16_t)supGain);
+      resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN;
+      resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);
+    } else {
+      tmp16no1 = 17 - zeros32 - zeros16;
+      resolutionDiff = 14 + tmp16no1 - RESOLUTION_CHANNEL16 -
+                       RESOLUTION_SUPGAIN;
+      resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);
+      if (zeros32 > tmp16no1) {
+        echoEst32Gained = WEBRTC_SPL_UMUL_32_16(
+                            (uint32_t)aecm->echoFilt[i],
+                            supGain >> tmp16no1);
+      } else {
+        // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16)
+        echoEst32Gained = (aecm->echoFilt[i] >> tmp16no1) * supGain;
+      }
+    }
+
+    zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]);
+    assert(zeros16 >= 0);  // |zeros16| is a norm, hence non-negative.
+    dfa_clean_q_domain_diff = aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld;
+    if (zeros16 < dfa_clean_q_domain_diff && aecm->nearFilt[i]) {
+      tmp16no1 = aecm->nearFilt[i] << zeros16;
+      qDomainDiff = zeros16 - dfa_clean_q_domain_diff;
+      tmp16no2 = ptrDfaClean[i] >> -qDomainDiff;
+    } else {
+      tmp16no1 = dfa_clean_q_domain_diff < 0
+          ? aecm->nearFilt[i] >> -dfa_clean_q_domain_diff
+          : aecm->nearFilt[i] << dfa_clean_q_domain_diff;
+      qDomainDiff = 0;
+      tmp16no2 = ptrDfaClean[i];
+    }
+
+    tmp32no1 = (int32_t)(tmp16no2 - tmp16no1);
+    tmp16no2 = (int16_t)(tmp32no1 >> 4);
+    tmp16no2 += tmp16no1;
+    zeros16 = WebRtcSpl_NormW16(tmp16no2);
+    if ((tmp16no2) & (-qDomainDiff > zeros16)) {
+      aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX;
+    } else {
+      aecm->nearFilt[i] = qDomainDiff < 0 ? tmp16no2 << -qDomainDiff
+                                          : tmp16no2 >> qDomainDiff;
+    }
+
+    // Wiener filter coefficients, resulting hnl in Q14
+    if (echoEst32Gained == 0) {
+      hnl[i] = ONE_Q14;
+      numPosCoef++;
+    } else if (aecm->nearFilt[i] == 0) {
+      hnl[i] = 0;
+    } else {
+      // Multiply the suppression gain
+      // Rounding
+      echoEst32Gained += (uint32_t)(aecm->nearFilt[i] >> 1);
+      tmpU32 = WebRtcSpl_DivU32U16(echoEst32Gained,
+                                   (uint16_t)aecm->nearFilt[i]);
+
+      // Current resolution is
+      // Q-(RESOLUTION_CHANNEL + RESOLUTION_SUPGAIN
+      //    - max(0, 17 - zeros16 - zeros32))
+      // Make sure we are in Q14
+      tmp32no1 = (int32_t)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff);
+      if (tmp32no1 > ONE_Q14) {
+        hnl[i] = 0;
+      } else if (tmp32no1 < 0) {
+        hnl[i] = ONE_Q14;
+        numPosCoef++;
+      } else {
+        // 1-echoEst/dfa
+        hnl[i] = ONE_Q14 - (int16_t)tmp32no1;
+        if (hnl[i] <= 0) {
+          hnl[i] = 0;
+        } else {
+          numPosCoef++;
+        }
+      }
+    }
+  }
+
+  // Only in wideband. Prevent the gain in upper band from being larger than
+  // in lower band.
+  if (aecm->mult == 2) {
+    // TODO(bjornv): Investigate if the scaling of hnl[i] below can cause
+    //               speech distortion in double-talk.
+    for (i = 0; i < (PART_LEN1 >> 3); i++) {
+      __asm __volatile (
+        "lh         %[temp1],       0(%[ptr1])                  \n\t"
+        "lh         %[temp2],       2(%[ptr1])                  \n\t"
+        "lh         %[temp3],       4(%[ptr1])                  \n\t"
+        "lh         %[temp4],       6(%[ptr1])                  \n\t"
+        "lh         %[temp5],       8(%[ptr1])                  \n\t"
+        "lh         %[temp6],       10(%[ptr1])                 \n\t"
+        "lh         %[temp7],       12(%[ptr1])                 \n\t"
+        "lh         %[temp8],       14(%[ptr1])                 \n\t"
+        "mul        %[temp1],       %[temp1],       %[temp1]    \n\t"
+        "mul        %[temp2],       %[temp2],       %[temp2]    \n\t"
+        "mul        %[temp3],       %[temp3],       %[temp3]    \n\t"
+        "mul        %[temp4],       %[temp4],       %[temp4]    \n\t"
+        "mul        %[temp5],       %[temp5],       %[temp5]    \n\t"
+        "mul        %[temp6],       %[temp6],       %[temp6]    \n\t"
+        "mul        %[temp7],       %[temp7],       %[temp7]    \n\t"
+        "mul        %[temp8],       %[temp8],       %[temp8]    \n\t"
+        "sra        %[temp1],       %[temp1],       14          \n\t"
+        "sra        %[temp2],       %[temp2],       14          \n\t"
+        "sra        %[temp3],       %[temp3],       14          \n\t"
+        "sra        %[temp4],       %[temp4],       14          \n\t"
+        "sra        %[temp5],       %[temp5],       14          \n\t"
+        "sra        %[temp6],       %[temp6],       14          \n\t"
+        "sra        %[temp7],       %[temp7],       14          \n\t"
+        "sra        %[temp8],       %[temp8],       14          \n\t"
+        "sh         %[temp1],       0(%[ptr1])                  \n\t"
+        "sh         %[temp2],       2(%[ptr1])                  \n\t"
+        "sh         %[temp3],       4(%[ptr1])                  \n\t"
+        "sh         %[temp4],       6(%[ptr1])                  \n\t"
+        "sh         %[temp5],       8(%[ptr1])                  \n\t"
+        "sh         %[temp6],       10(%[ptr1])                 \n\t"
+        "sh         %[temp7],       12(%[ptr1])                 \n\t"
+        "sh         %[temp8],       14(%[ptr1])                 \n\t"
+        "addiu      %[ptr1],        %[ptr1],        16          \n\t"
+        : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),
+          [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [temp6] "=&r" (temp6),
+          [temp7] "=&r" (temp7), [temp8] "=&r" (temp8), [ptr1] "+r" (ptr1)
+        :
+        : "memory", "hi", "lo"
+      );
+    }
+    for(i = 0; i < (PART_LEN1 & 7); i++) {
+      __asm __volatile (
+        "lh         %[temp1],       0(%[ptr1])                  \n\t"
+        "mul        %[temp1],       %[temp1],       %[temp1]    \n\t"
+        "sra        %[temp1],       %[temp1],       14          \n\t"
+        "sh         %[temp1],       0(%[ptr1])                  \n\t"
+        "addiu      %[ptr1],        %[ptr1],        2           \n\t"
+        : [temp1] "=&r" (temp1), [ptr1] "+r" (ptr1)
+        :
+        : "memory", "hi", "lo"
+      );
+    }
+
+    for (i = kMinPrefBand; i <= kMaxPrefBand; i++) {
+      avgHnl32 += (int32_t)hnl[i];
+    }
+
+    assert(kMaxPrefBand - kMinPrefBand + 1 > 0);
+    avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1);
+
+    for (i = kMaxPrefBand; i < PART_LEN1; i++) {
+      if (hnl[i] > (int16_t)avgHnl32) {
+        hnl[i] = (int16_t)avgHnl32;
+      }
+    }
+  }
+
+  // Calculate NLP gain, result is in Q14
+  if (aecm->nlpFlag) {
+    if (numPosCoef < 3) {
+      for (i = 0; i < PART_LEN1; i++) {
+        efw[i].real = 0;
+        efw[i].imag = 0;
+        hnl[i] = 0;
+      }
+    } else {
+      for (i = 0; i < PART_LEN1; i++) {
+#if defined(MIPS_DSP_R1_LE)
+        __asm __volatile (
+          ".set       push                                        \n\t"
+          ".set       noreorder                                   \n\t"
+          "lh         %[temp1],       0(%[ptr])                   \n\t"
+          "lh         %[temp2],       0(%[dr_ptr])                \n\t"
+          "slti       %[temp4],       %[temp1],       0x4001      \n\t"
+          "beqz       %[temp4],       3f                          \n\t"
+          " lh        %[temp3],       2(%[dr_ptr])                \n\t"
+          "slti       %[temp5],       %[temp1],       3277        \n\t"
+          "bnez       %[temp5],       2f                          \n\t"
+          " addiu     %[dr_ptr],      %[dr_ptr],      4           \n\t"
+          "mul        %[temp2],       %[temp2],       %[temp1]    \n\t"
+          "mul        %[temp3],       %[temp3],       %[temp1]    \n\t"
+          "shra_r.w   %[temp2],       %[temp2],       14          \n\t"
+          "shra_r.w   %[temp3],       %[temp3],       14          \n\t"
+          "b          4f                                          \n\t"
+          " nop                                                   \n\t"
+         "2:                                                      \n\t"
+          "addu       %[temp1],       $zero,          $zero       \n\t"
+          "addu       %[temp2],       $zero,          $zero       \n\t"
+          "addu       %[temp3],       $zero,          $zero       \n\t"
+          "b          1f                                          \n\t"
+          " nop                                                   \n\t"
+         "3:                                                      \n\t"
+          "addiu      %[temp1],       $0,             0x4000      \n\t"
+         "1:                                                      \n\t"
+          "sh         %[temp1],       0(%[ptr])                   \n\t"
+         "4:                                                      \n\t"
+          "sh         %[temp2],       0(%[er_ptr])                \n\t"
+          "sh         %[temp3],       2(%[er_ptr])                \n\t"
+          "addiu      %[ptr],         %[ptr],         2           \n\t"
+          "addiu      %[er_ptr],      %[er_ptr],      4           \n\t"
+          ".set       pop                                         \n\t"
+          : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),
+            [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [ptr] "+r" (ptr),
+            [er_ptr] "+r" (er_ptr), [dr_ptr] "+r" (dr_ptr)
+          :
+          : "memory", "hi", "lo"
+        );
+#else
+        __asm __volatile (
+          ".set       push                                        \n\t"
+          ".set       noreorder                                   \n\t"
+          "lh         %[temp1],       0(%[ptr])                   \n\t"
+          "lh         %[temp2],       0(%[dr_ptr])                \n\t"
+          "slti       %[temp4],       %[temp1],       0x4001      \n\t"
+          "beqz       %[temp4],       3f                          \n\t"
+          " lh        %[temp3],       2(%[dr_ptr])                \n\t"
+          "slti       %[temp5],       %[temp1],       3277        \n\t"
+          "bnez       %[temp5],       2f                          \n\t"
+          " addiu     %[dr_ptr],      %[dr_ptr],      4           \n\t"
+          "mul        %[temp2],       %[temp2],       %[temp1]    \n\t"
+          "mul        %[temp3],       %[temp3],       %[temp1]    \n\t"
+          "addiu      %[temp2],       %[temp2],       0x2000      \n\t"
+          "addiu      %[temp3],       %[temp3],       0x2000      \n\t"
+          "sra        %[temp2],       %[temp2],       14          \n\t"
+          "sra        %[temp3],       %[temp3],       14          \n\t"
+          "b          4f                                          \n\t"
+          " nop                                                   \n\t"
+         "2:                                                      \n\t"
+          "addu       %[temp1],       $zero,          $zero       \n\t"
+          "addu       %[temp2],       $zero,          $zero       \n\t"
+          "addu       %[temp3],       $zero,          $zero       \n\t"
+          "b          1f                                          \n\t"
+          " nop                                                   \n\t"
+         "3:                                                      \n\t"
+          "addiu      %[temp1],       $0,             0x4000      \n\t"
+         "1:                                                      \n\t"
+          "sh         %[temp1],       0(%[ptr])                   \n\t"
+         "4:                                                      \n\t"
+          "sh         %[temp2],       0(%[er_ptr])                \n\t"
+          "sh         %[temp3],       2(%[er_ptr])                \n\t"
+          "addiu      %[ptr],         %[ptr],         2           \n\t"
+          "addiu      %[er_ptr],      %[er_ptr],      4           \n\t"
+          ".set       pop                                         \n\t"
+          : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),
+            [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [ptr] "+r" (ptr),
+            [er_ptr] "+r" (er_ptr), [dr_ptr] "+r" (dr_ptr)
+          :
+          : "memory", "hi", "lo"
+        );
+#endif
+      }
+    }
+  }
+  else {
+    // multiply with Wiener coefficients
+    for (i = 0; i < PART_LEN1; i++) {
+      efw[i].real = (int16_t)
+                      (WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real,
+                                                            hnl[i],
+                                                            14));
+      efw[i].imag = (int16_t)
+                      (WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag,
+                                                            hnl[i],
+                                                            14));
+    }
+  }
+
+  if (aecm->cngMode == AecmTrue) {
+    ComfortNoise(aecm, ptrDfaClean, efw, hnl);
+  }
+
+  InverseFFTAndWindow(aecm, fft, efw, output, nearendClean);
+
+  return 0;
+}
+
+// Generate comfort noise and add to output signal.
+static void ComfortNoise(AecmCore* aecm,
+                         const uint16_t* dfa,
+                         ComplexInt16* out,
+                         const int16_t* lambda) {
+  int16_t i;
+  int16_t tmp16, tmp161, tmp162, tmp163, nrsh1, nrsh2;
+  int32_t tmp32, tmp321, tnoise, tnoise1;
+  int32_t tmp322, tmp323, *tmp1;
+  int16_t* dfap;
+  int16_t* lambdap;
+  const int32_t c2049 = 2049;
+  const int32_t c359 = 359;
+  const int32_t c114 = ONE_Q14;
+
+  int16_t randW16[PART_LEN];
+  int16_t uReal[PART_LEN1];
+  int16_t uImag[PART_LEN1];
+  int32_t outLShift32;
+
+  int16_t shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain;
+  int16_t minTrackShift = 9;
+
+  assert(shiftFromNearToNoise >= 0);
+  assert(shiftFromNearToNoise < 16);
+
+  if (aecm->noiseEstCtr < 100) {
+    // Track the minimum more quickly initially.
+    aecm->noiseEstCtr++;
+    minTrackShift = 6;
+  }
+
+  // Generate a uniform random array on [0 2^15-1].
+  WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed);
+  int16_t* randW16p = (int16_t*)randW16;
+#if defined (MIPS_DSP_R1_LE)
+  int16_t* kCosTablep = (int16_t*)WebRtcAecm_kCosTable;
+  int16_t* kSinTablep = (int16_t*)WebRtcAecm_kSinTable;
+#endif   // #if defined(MIPS_DSP_R1_LE)
+  tmp1 = (int32_t*)aecm->noiseEst + 1;
+  dfap = (int16_t*)dfa + 1;
+  lambdap = (int16_t*)lambda + 1;
+  // Estimate noise power.
+  for (i = 1; i < PART_LEN1; i+=2) {
+  // Shift to the noise domain.
+    __asm __volatile (
+      "lh     %[tmp32],       0(%[dfap])                              \n\t"
+      "lw     %[tnoise],      0(%[tmp1])                              \n\t"
+      "sllv   %[outLShift32], %[tmp32],   %[shiftFromNearToNoise]     \n\t"
+      : [tmp32] "=&r" (tmp32), [outLShift32] "=r" (outLShift32),
+        [tnoise] "=&r" (tnoise)
+      : [tmp1] "r" (tmp1), [dfap] "r" (dfap),
+        [shiftFromNearToNoise] "r" (shiftFromNearToNoise)
+      : "memory"
+    );
+
+    if (outLShift32 < tnoise) {
+      // Reset "too low" counter
+      aecm->noiseEstTooLowCtr[i] = 0;
+      // Track the minimum.
+      if (tnoise < (1 << minTrackShift)) {
+        // For small values, decrease noiseEst[i] every
+        // |kNoiseEstIncCount| block. The regular approach below can not
+        // go further down due to truncation.
+        aecm->noiseEstTooHighCtr[i]++;
+        if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount) {
+          tnoise--;
+          aecm->noiseEstTooHighCtr[i] = 0;  // Reset the counter
+        }
+      } else {
+        __asm __volatile (
+          "subu   %[tmp32],       %[tnoise],      %[outLShift32]      \n\t"
+          "srav   %[tmp32],       %[tmp32],       %[minTrackShift]    \n\t"
+          "subu   %[tnoise],      %[tnoise],      %[tmp32]            \n\t"
+          : [tmp32] "=&r" (tmp32), [tnoise] "+r" (tnoise)
+          : [outLShift32] "r" (outLShift32), [minTrackShift] "r" (minTrackShift)
+        );
+      }
+    } else {
+      // Reset "too high" counter
+      aecm->noiseEstTooHighCtr[i] = 0;
+      // Ramp slowly upwards until we hit the minimum again.
+      if ((tnoise >> 19) <= 0) {
+        if ((tnoise >> 11) > 0) {
+          // Large enough for relative increase
+          __asm __volatile (
+            "mul    %[tnoise],  %[tnoise],  %[c2049]    \n\t"
+            "sra    %[tnoise],  %[tnoise],  11          \n\t"
+            : [tnoise] "+r" (tnoise)
+            : [c2049] "r" (c2049)
+            : "hi", "lo"
+          );
+        } else {
+          // Make incremental increases based on size every
+          // |kNoiseEstIncCount| block
+          aecm->noiseEstTooLowCtr[i]++;
+          if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount) {
+            __asm __volatile (
+              "sra    %[tmp32],   %[tnoise],  9           \n\t"
+              "addi   %[tnoise],  %[tnoise],  1           \n\t"
+              "addu   %[tnoise],  %[tnoise],  %[tmp32]    \n\t"
+              : [tnoise] "+r" (tnoise), [tmp32] "=&r" (tmp32)
+              :
+            );
+            aecm->noiseEstTooLowCtr[i] = 0; // Reset counter
+          }
+        }
+      } else {
+        // Avoid overflow.
+        // Multiplication with 2049 will cause wrap around. Scale
+        // down first and then multiply
+        __asm __volatile (
+          "sra    %[tnoise],  %[tnoise],  11          \n\t"
+          "mul    %[tnoise],  %[tnoise],  %[c2049]    \n\t"
+          : [tnoise] "+r" (tnoise)
+          : [c2049] "r" (c2049)
+          : "hi", "lo"
+        );
+      }
+    }
+
+    // Shift to the noise domain.
+    __asm __volatile (
+      "lh     %[tmp32],       2(%[dfap])                              \n\t"
+      "lw     %[tnoise1],     4(%[tmp1])                              \n\t"
+      "addiu  %[dfap],        %[dfap],    4                           \n\t"
+      "sllv   %[outLShift32], %[tmp32],   %[shiftFromNearToNoise]     \n\t"
+      : [tmp32] "=&r" (tmp32), [dfap] "+r" (dfap),
+        [outLShift32] "=r" (outLShift32), [tnoise1] "=&r" (tnoise1)
+      : [tmp1] "r" (tmp1), [shiftFromNearToNoise] "r" (shiftFromNearToNoise)
+      : "memory"
+    );
+
+    if (outLShift32 < tnoise1) {
+      // Reset "too low" counter
+      aecm->noiseEstTooLowCtr[i + 1] = 0;
+      // Track the minimum.
+      if (tnoise1 < (1 << minTrackShift)) {
+        // For small values, decrease noiseEst[i] every
+        // |kNoiseEstIncCount| block. The regular approach below can not
+        // go further down due to truncation.
+        aecm->noiseEstTooHighCtr[i + 1]++;
+        if (aecm->noiseEstTooHighCtr[i + 1] >= kNoiseEstIncCount) {
+          tnoise1--;
+          aecm->noiseEstTooHighCtr[i + 1] = 0; // Reset the counter
+        }
+      } else {
+        __asm __volatile (
+          "subu   %[tmp32],       %[tnoise1],     %[outLShift32]      \n\t"
+          "srav   %[tmp32],       %[tmp32],       %[minTrackShift]    \n\t"
+          "subu   %[tnoise1],     %[tnoise1],     %[tmp32]            \n\t"
+          : [tmp32] "=&r" (tmp32), [tnoise1] "+r" (tnoise1)
+          : [outLShift32] "r" (outLShift32), [minTrackShift] "r" (minTrackShift)
+        );
+      }
+    } else {
+      // Reset "too high" counter
+      aecm->noiseEstTooHighCtr[i + 1] = 0;
+      // Ramp slowly upwards until we hit the minimum again.
+      if ((tnoise1 >> 19) <= 0) {
+        if ((tnoise1 >> 11) > 0) {
+          // Large enough for relative increase
+          __asm __volatile (
+            "mul    %[tnoise1], %[tnoise1], %[c2049]   \n\t"
+            "sra    %[tnoise1], %[tnoise1], 11         \n\t"
+            : [tnoise1] "+r" (tnoise1)
+            : [c2049] "r" (c2049)
+            : "hi", "lo"
+          );
+        } else {
+          // Make incremental increases based on size every
+          // |kNoiseEstIncCount| block
+          aecm->noiseEstTooLowCtr[i + 1]++;
+          if (aecm->noiseEstTooLowCtr[i + 1] >= kNoiseEstIncCount) {
+            __asm __volatile (
+              "sra    %[tmp32],   %[tnoise1], 9           \n\t"
+              "addi   %[tnoise1], %[tnoise1], 1           \n\t"
+              "addu   %[tnoise1], %[tnoise1], %[tmp32]    \n\t"
+              : [tnoise1] "+r" (tnoise1), [tmp32] "=&r" (tmp32)
+              :
+            );
+            aecm->noiseEstTooLowCtr[i + 1] = 0; // Reset counter
+          }
+        }
+      } else {
+        // Avoid overflow.
+        // Multiplication with 2049 will cause wrap around. Scale
+        // down first and then multiply
+        __asm __volatile (
+          "sra    %[tnoise1], %[tnoise1], 11          \n\t"
+          "mul    %[tnoise1], %[tnoise1], %[c2049]    \n\t"
+          : [tnoise1] "+r" (tnoise1)
+          : [c2049] "r" (c2049)
+          : "hi", "lo"
+        );
+      }
+    }
+
+    __asm __volatile (
+      "lh     %[tmp16],   0(%[lambdap])                           \n\t"
+      "lh     %[tmp161],  2(%[lambdap])                           \n\t"
+      "sw     %[tnoise],  0(%[tmp1])                              \n\t"
+      "sw     %[tnoise1], 4(%[tmp1])                              \n\t"
+      "subu   %[tmp16],   %[c114],        %[tmp16]                \n\t"
+      "subu   %[tmp161],  %[c114],        %[tmp161]               \n\t"
+      "srav   %[tmp32],   %[tnoise],      %[shiftFromNearToNoise] \n\t"
+      "srav   %[tmp321],  %[tnoise1],     %[shiftFromNearToNoise] \n\t"
+      "addiu  %[lambdap], %[lambdap],     4                       \n\t"
+      "addiu  %[tmp1],    %[tmp1],        8                       \n\t"
+      : [tmp16] "=&r" (tmp16), [tmp161] "=&r" (tmp161), [tmp1] "+r" (tmp1),
+        [tmp32] "=&r" (tmp32), [tmp321] "=&r" (tmp321), [lambdap] "+r" (lambdap)
+      : [tnoise] "r" (tnoise), [tnoise1] "r" (tnoise1), [c114] "r" (c114),
+        [shiftFromNearToNoise] "r" (shiftFromNearToNoise)
+      : "memory"
+    );
+
+    if (tmp32 > 32767) {
+      tmp32 = 32767;
+      aecm->noiseEst[i] = tmp32 << shiftFromNearToNoise;
+    }
+    if (tmp321 > 32767) {
+      tmp321 = 32767;
+      aecm->noiseEst[i+1] = tmp321 << shiftFromNearToNoise;
+    }
+
+    __asm __volatile (
+      "mul    %[tmp32],   %[tmp32],       %[tmp16]                \n\t"
+      "mul    %[tmp321],  %[tmp321],      %[tmp161]               \n\t"
+      "sra    %[nrsh1],   %[tmp32],       14                      \n\t"
+      "sra    %[nrsh2],   %[tmp321],      14                      \n\t"
+      : [nrsh1] "=&r" (nrsh1), [nrsh2] "=r" (nrsh2)
+      : [tmp16] "r" (tmp16), [tmp161] "r" (tmp161), [tmp32] "r" (tmp32),
+        [tmp321] "r" (tmp321)
+      : "memory", "hi", "lo"
+    );
+
+    __asm __volatile (
+      "lh     %[tmp32],       0(%[randW16p])              \n\t"
+      "lh     %[tmp321],      2(%[randW16p])              \n\t"
+      "addiu  %[randW16p],    %[randW16p],    4           \n\t"
+      "mul    %[tmp32],       %[tmp32],       %[c359]     \n\t"
+      "mul    %[tmp321],      %[tmp321],      %[c359]     \n\t"
+      "sra    %[tmp16],       %[tmp32],       15          \n\t"
+      "sra    %[tmp161],      %[tmp321],      15          \n\t"
+      : [randW16p] "+r" (randW16p), [tmp32] "=&r" (tmp32),
+        [tmp16] "=r" (tmp16), [tmp161] "=r" (tmp161), [tmp321] "=&r" (tmp321)
+      : [c359] "r" (c359)
+      : "memory", "hi", "lo"
+    );
+
+#if !defined(MIPS_DSP_R1_LE)
+    tmp32 = WebRtcAecm_kCosTable[tmp16];
+    tmp321 = WebRtcAecm_kSinTable[tmp16];
+    tmp322 = WebRtcAecm_kCosTable[tmp161];
+    tmp323 = WebRtcAecm_kSinTable[tmp161];
+#else
+    __asm __volatile (
+      "sll    %[tmp16],       %[tmp16],                   1           \n\t"
+      "sll    %[tmp161],      %[tmp161],                  1           \n\t"
+      "lhx    %[tmp32],       %[tmp16](%[kCosTablep])                 \n\t"
+      "lhx    %[tmp321],      %[tmp16](%[kSinTablep])                 \n\t"
+      "lhx    %[tmp322],      %[tmp161](%[kCosTablep])                \n\t"
+      "lhx    %[tmp323],      %[tmp161](%[kSinTablep])                \n\t"
+      : [tmp32] "=&r" (tmp32), [tmp321] "=&r" (tmp321),
+        [tmp322] "=&r" (tmp322), [tmp323] "=&r" (tmp323)
+      : [kCosTablep] "r" (kCosTablep), [tmp16] "r" (tmp16),
+        [tmp161] "r" (tmp161), [kSinTablep] "r" (kSinTablep)
+      : "memory"
+    );
+#endif
+    __asm __volatile (
+      "mul    %[tmp32],       %[tmp32],                   %[nrsh1]    \n\t"
+      "negu   %[tmp162],      %[nrsh1]                                \n\t"
+      "mul    %[tmp322],      %[tmp322],                  %[nrsh2]    \n\t"
+      "negu   %[tmp163],      %[nrsh2]                                \n\t"
+      "sra    %[tmp32],       %[tmp32],                   13          \n\t"
+      "mul    %[tmp321],      %[tmp321],                  %[tmp162]   \n\t"
+      "sra    %[tmp322],      %[tmp322],                  13          \n\t"
+      "mul    %[tmp323],      %[tmp323],                  %[tmp163]   \n\t"
+      "sra    %[tmp321],      %[tmp321],                  13          \n\t"
+      "sra    %[tmp323],      %[tmp323],                  13          \n\t"
+      : [tmp32] "+r" (tmp32), [tmp321] "+r" (tmp321), [tmp162] "=&r" (tmp162),
+        [tmp322] "+r" (tmp322), [tmp323] "+r" (tmp323), [tmp163] "=&r" (tmp163)
+      : [nrsh1] "r" (nrsh1), [nrsh2] "r" (nrsh2)
+      : "hi", "lo"
+    );
+    // Tables are in Q13.
+    uReal[i] = (int16_t)tmp32;
+    uImag[i] = (int16_t)tmp321;
+    uReal[i + 1] = (int16_t)tmp322;
+    uImag[i + 1] = (int16_t)tmp323;
+  }
+
+  int32_t tt, sgn;
+  tt = out[0].real;
+  sgn = ((int)tt) >> 31;
+  out[0].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);
+  tt = out[0].imag;
+  sgn = ((int)tt) >> 31;
+  out[0].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);
+  for (i = 1; i < PART_LEN; i++) {
+    tt = out[i].real + uReal[i];
+    sgn = ((int)tt) >> 31;
+    out[i].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);
+    tt = out[i].imag + uImag[i];
+    sgn = ((int)tt) >> 31;
+    out[i].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);
+  }
+  tt = out[PART_LEN].real + uReal[PART_LEN];
+  sgn = ((int)tt) >> 31;
+  out[PART_LEN].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);
+  tt = out[PART_LEN].imag;
+  sgn = ((int)tt) >> 31;
+  out[PART_LEN].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);
+}
+
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_neon.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_neon.c
new file mode 100644
index 00000000..1751fcf7
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_neon.c
@@ -0,0 +1,212 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/aecm/aecm_core.h"
+
+#include <arm_neon.h>
+#include <assert.h>
+
+#include "webrtc/common_audio/signal_processing/include/real_fft.h"
+
+// TODO(kma): Re-write the corresponding assembly file, the offset
+// generating script and makefile, to replace these C functions.
+
+// Square root of Hanning window in Q14.
+const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = {
+  0,
+  399, 798, 1196, 1594, 1990, 2386, 2780, 3172,
+  3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224,
+  6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040,
+  9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514,
+  11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553,
+  13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079,
+  15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034,
+  16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384
+};
+
+static inline void AddLanes(uint32_t* ptr, uint32x4_t v) {
+#if defined(WEBRTC_ARCH_ARM64)
+  *(ptr) = vaddvq_u32(v);
+#else
+  uint32x2_t tmp_v;
+  tmp_v = vadd_u32(vget_low_u32(v), vget_high_u32(v));
+  tmp_v = vpadd_u32(tmp_v, tmp_v);
+  *(ptr) = vget_lane_u32(tmp_v, 0);
+#endif
+}
+
+void WebRtcAecm_CalcLinearEnergiesNeon(AecmCore* aecm,
+                                       const uint16_t* far_spectrum,
+                                       int32_t* echo_est,
+                                       uint32_t* far_energy,
+                                       uint32_t* echo_energy_adapt,
+                                       uint32_t* echo_energy_stored) {
+  int16_t* start_stored_p = aecm->channelStored;
+  int16_t* start_adapt_p = aecm->channelAdapt16;
+  int32_t* echo_est_p = echo_est;
+  const int16_t* end_stored_p = aecm->channelStored + PART_LEN;
+  const uint16_t* far_spectrum_p = far_spectrum;
+  int16x8_t store_v, adapt_v;
+  uint16x8_t spectrum_v;
+  uint32x4_t echo_est_v_low, echo_est_v_high;
+  uint32x4_t far_energy_v, echo_stored_v, echo_adapt_v;
+
+  far_energy_v = vdupq_n_u32(0);
+  echo_adapt_v = vdupq_n_u32(0);
+  echo_stored_v = vdupq_n_u32(0);
+
+  // Get energy for the delayed far end signal and estimated
+  // echo using both stored and adapted channels.
+  // The C code:
+  //  for (i = 0; i < PART_LEN1; i++) {
+  //      echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
+  //                                         far_spectrum[i]);
+  //      (*far_energy) += (uint32_t)(far_spectrum[i]);
+  //      *echo_energy_adapt += aecm->channelAdapt16[i] * far_spectrum[i];
+  //      (*echo_energy_stored) += (uint32_t)echo_est[i];
+  //  }
+  while (start_stored_p < end_stored_p) {
+    spectrum_v = vld1q_u16(far_spectrum_p);
+    adapt_v = vld1q_s16(start_adapt_p);
+    store_v = vld1q_s16(start_stored_p);
+
+    far_energy_v = vaddw_u16(far_energy_v, vget_low_u16(spectrum_v));
+    far_energy_v = vaddw_u16(far_energy_v, vget_high_u16(spectrum_v));
+
+    echo_est_v_low = vmull_u16(vreinterpret_u16_s16(vget_low_s16(store_v)),
+                               vget_low_u16(spectrum_v));
+    echo_est_v_high = vmull_u16(vreinterpret_u16_s16(vget_high_s16(store_v)),
+                                vget_high_u16(spectrum_v));
+    vst1q_s32(echo_est_p, vreinterpretq_s32_u32(echo_est_v_low));
+    vst1q_s32(echo_est_p + 4, vreinterpretq_s32_u32(echo_est_v_high));
+
+    echo_stored_v = vaddq_u32(echo_est_v_low, echo_stored_v);
+    echo_stored_v = vaddq_u32(echo_est_v_high, echo_stored_v);
+
+    echo_adapt_v = vmlal_u16(echo_adapt_v,
+                             vreinterpret_u16_s16(vget_low_s16(adapt_v)),
+                             vget_low_u16(spectrum_v));
+    echo_adapt_v = vmlal_u16(echo_adapt_v,
+                             vreinterpret_u16_s16(vget_high_s16(adapt_v)),
+                             vget_high_u16(spectrum_v));
+
+    start_stored_p += 8;
+    start_adapt_p += 8;
+    far_spectrum_p += 8;
+    echo_est_p += 8;
+  }
+
+  AddLanes(far_energy, far_energy_v);
+  AddLanes(echo_energy_stored, echo_stored_v);
+  AddLanes(echo_energy_adapt, echo_adapt_v);
+
+  echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN],
+                                             far_spectrum[PART_LEN]);
+  *echo_energy_stored += (uint32_t)echo_est[PART_LEN];
+  *far_energy += (uint32_t)far_spectrum[PART_LEN];
+  *echo_energy_adapt += aecm->channelAdapt16[PART_LEN] * far_spectrum[PART_LEN];
+}
+
+void WebRtcAecm_StoreAdaptiveChannelNeon(AecmCore* aecm,
+                                         const uint16_t* far_spectrum,
+                                         int32_t* echo_est) {
+  assert((uintptr_t)echo_est % 32 == 0);
+  assert((uintptr_t)(aecm->channelStored) % 16 == 0);
+  assert((uintptr_t)(aecm->channelAdapt16) % 16 == 0);
+
+  // This is C code of following optimized code.
+  // During startup we store the channel every block.
+  //  memcpy(aecm->channelStored,
+  //         aecm->channelAdapt16,
+  //         sizeof(int16_t) * PART_LEN1);
+  // Recalculate echo estimate
+  //  for (i = 0; i < PART_LEN; i += 4) {
+  //    echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
+  //                                        far_spectrum[i]);
+  //    echo_est[i + 1] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 1],
+  //                                            far_spectrum[i + 1]);
+  //    echo_est[i + 2] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 2],
+  //                                            far_spectrum[i + 2]);
+  //    echo_est[i + 3] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 3],
+  //                                            far_spectrum[i + 3]);
+  //  }
+  //  echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
+  //                                     far_spectrum[i]);
+  const uint16_t* far_spectrum_p = far_spectrum;
+  int16_t* start_adapt_p = aecm->channelAdapt16;
+  int16_t* start_stored_p = aecm->channelStored;
+  const int16_t* end_stored_p = aecm->channelStored + PART_LEN;
+  int32_t* echo_est_p = echo_est;
+
+  uint16x8_t far_spectrum_v;
+  int16x8_t adapt_v;
+  uint32x4_t echo_est_v_low, echo_est_v_high;
+
+  while (start_stored_p < end_stored_p) {
+    far_spectrum_v = vld1q_u16(far_spectrum_p);
+    adapt_v = vld1q_s16(start_adapt_p);
+
+    vst1q_s16(start_stored_p, adapt_v);
+
+    echo_est_v_low = vmull_u16(vget_low_u16(far_spectrum_v),
+                               vget_low_u16(vreinterpretq_u16_s16(adapt_v)));
+    echo_est_v_high = vmull_u16(vget_high_u16(far_spectrum_v),
+                                vget_high_u16(vreinterpretq_u16_s16(adapt_v)));
+
+    vst1q_s32(echo_est_p, vreinterpretq_s32_u32(echo_est_v_low));
+    vst1q_s32(echo_est_p + 4, vreinterpretq_s32_u32(echo_est_v_high));
+
+    far_spectrum_p += 8;
+    start_adapt_p += 8;
+    start_stored_p += 8;
+    echo_est_p += 8;
+  }
+  aecm->channelStored[PART_LEN] = aecm->channelAdapt16[PART_LEN];
+  echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN],
+                                             far_spectrum[PART_LEN]);
+}
+
+void WebRtcAecm_ResetAdaptiveChannelNeon(AecmCore* aecm) {
+  assert((uintptr_t)(aecm->channelStored) % 16 == 0);
+  assert((uintptr_t)(aecm->channelAdapt16) % 16 == 0);
+  assert((uintptr_t)(aecm->channelAdapt32) % 32 == 0);
+
+  // The C code of following optimized code.
+  // for (i = 0; i < PART_LEN1; i++) {
+  //   aecm->channelAdapt16[i] = aecm->channelStored[i];
+  //   aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32(
+  //              (int32_t)aecm->channelStored[i], 16);
+  // }
+
+  int16_t* start_stored_p = aecm->channelStored;
+  int16_t* start_adapt16_p = aecm->channelAdapt16;
+  int32_t* start_adapt32_p = aecm->channelAdapt32;
+  const int16_t* end_stored_p = start_stored_p + PART_LEN;
+
+  int16x8_t stored_v;
+  int32x4_t adapt32_v_low, adapt32_v_high;
+
+  while (start_stored_p < end_stored_p) {
+    stored_v = vld1q_s16(start_stored_p);
+    vst1q_s16(start_adapt16_p, stored_v);
+
+    adapt32_v_low = vshll_n_s16(vget_low_s16(stored_v), 16);
+    adapt32_v_high = vshll_n_s16(vget_high_s16(stored_v), 16);
+
+    vst1q_s32(start_adapt32_p, adapt32_v_low);
+    vst1q_s32(start_adapt32_p + 4, adapt32_v_high);
+
+    start_stored_p += 8;
+    start_adapt16_p += 8;
+    start_adapt32_p += 8;
+  }
+  aecm->channelAdapt16[PART_LEN] = aecm->channelStored[PART_LEN];
+  aecm->channelAdapt32[PART_LEN] = (int32_t)aecm->channelStored[PART_LEN] << 16;
+}
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_defines.h b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_defines.h
new file mode 100644
index 00000000..6d63990b
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_defines.h
@@ -0,0 +1,87 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_AECM_DEFINES_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_AECM_DEFINES_H_
+
+#define AECM_DYNAMIC_Q                 /* Turn on/off dynamic Q-domain. */
+
+/* Algorithm parameters */
+#define FRAME_LEN       80             /* Total frame length, 10 ms. */
+
+#define PART_LEN        64             /* Length of partition. */
+#define PART_LEN_SHIFT  7              /* Length of (PART_LEN * 2) in base 2. */
+
+#define PART_LEN1       (PART_LEN + 1)  /* Unique fft coefficients. */
+#define PART_LEN2       (PART_LEN << 1) /* Length of partition * 2. */
+#define PART_LEN4       (PART_LEN << 2) /* Length of partition * 4. */
+#define FAR_BUF_LEN     PART_LEN4       /* Length of buffers. */
+#define MAX_DELAY       100
+
+/* Counter parameters */
+#define CONV_LEN        512          /* Convergence length used at startup. */
+#define CONV_LEN2       (CONV_LEN << 1) /* Used at startup. */
+
+/* Energy parameters */
+#define MAX_BUF_LEN     64           /* History length of energy signals. */
+#define FAR_ENERGY_MIN  1025         /* Lowest Far energy level: At least 2 */
+                                     /* in energy. */
+#define FAR_ENERGY_DIFF 929          /* Allowed difference between max */
+                                     /* and min. */
+#define ENERGY_DEV_OFFSET       0    /* The energy error offset in Q8. */
+#define ENERGY_DEV_TOL  400          /* The energy estimation tolerance (Q8). */
+#define FAR_ENERGY_VAD_REGION   230  /* Far VAD tolerance region. */
+
+/* Stepsize parameters */
+#define MU_MIN          10          /* Min stepsize 2^-MU_MIN (far end energy */
+                                    /* dependent). */
+#define MU_MAX          1           /* Max stepsize 2^-MU_MAX (far end energy */
+                                    /* dependent). */
+#define MU_DIFF         9           /* MU_MIN - MU_MAX */
+
+/* Channel parameters */
+#define MIN_MSE_COUNT   20 /* Min number of consecutive blocks with enough */
+                           /* far end energy to compare channel estimates. */
+#define MIN_MSE_DIFF    29 /* The ratio between adapted and stored channel to */
+                           /* accept a new storage (0.8 in Q-MSE_RESOLUTION). */
+#define MSE_RESOLUTION  5           /* MSE parameter resolution. */
+#define RESOLUTION_CHANNEL16    12  /* W16 Channel in Q-RESOLUTION_CHANNEL16. */
+#define RESOLUTION_CHANNEL32    28  /* W32 Channel in Q-RESOLUTION_CHANNEL. */
+#define CHANNEL_VAD     16          /* Minimum energy in frequency band */
+                                    /* to update channel. */
+
+/* Suppression gain parameters: SUPGAIN parameters in Q-(RESOLUTION_SUPGAIN). */
+#define RESOLUTION_SUPGAIN      8     /* Channel in Q-(RESOLUTION_SUPGAIN). */
+#define SUPGAIN_DEFAULT (1 << RESOLUTION_SUPGAIN)  /* Default. */
+#define SUPGAIN_ERROR_PARAM_A   3072  /* Estimation error parameter */
+                                      /* (Maximum gain) (8 in Q8). */
+#define SUPGAIN_ERROR_PARAM_B   1536  /* Estimation error parameter */
+                                      /* (Gain before going down). */
+#define SUPGAIN_ERROR_PARAM_D   SUPGAIN_DEFAULT /* Estimation error parameter */
+                                /* (Should be the same as Default) (1 in Q8). */
+#define SUPGAIN_EPC_DT  200     /* SUPGAIN_ERROR_PARAM_C * ENERGY_DEV_TOL */
+
+/* Defines for "check delay estimation" */
+#define CORR_WIDTH      31      /* Number of samples to correlate over. */
+#define CORR_MAX        16      /* Maximum correlation offset. */
+#define CORR_MAX_BUF    63
+#define CORR_DEV        4
+#define CORR_MAX_LEVEL  20
+#define CORR_MAX_LOW    4
+#define CORR_BUF_LEN    (CORR_MAX << 1) + 1
+/* Note that CORR_WIDTH + 2*CORR_MAX <= MAX_BUF_LEN. */
+
+#define ONE_Q14         (1 << 14)
+
+/* NLP defines */
+#define NLP_COMP_LOW    3277    /* 0.2 in Q14 */
+#define NLP_COMP_HIGH   ONE_Q14 /* 1 in Q14 */
+
+#endif
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/echo_control_mobile.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/echo_control_mobile.c
new file mode 100644
index 00000000..83781e97
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/echo_control_mobile.c
@@ -0,0 +1,702 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h"
+
+#ifdef AEC_DEBUG
+#include <stdio.h>
+#endif
+#include <stdlib.h>
+
+#include "webrtc/common_audio/ring_buffer.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/aecm/aecm_core.h"
+
+#define BUF_SIZE_FRAMES 50 // buffer size (frames)
+// Maximum length of resampled signal. Must be an integer multiple of frames
+// (ceil(1/(1 + MIN_SKEW)*2) + 1)*FRAME_LEN
+// The factor of 2 handles wb, and the + 1 is as a safety margin
+#define MAX_RESAMP_LEN (5 * FRAME_LEN)
+
+static const size_t kBufSizeSamp = BUF_SIZE_FRAMES * FRAME_LEN; // buffer size (samples)
+static const int kSampMsNb = 8; // samples per ms in nb
+// Target suppression levels for nlp modes
+// log{0.001, 0.00001, 0.00000001}
+static const int kInitCheck = 42;
+
+typedef struct
+{
+    int sampFreq;
+    int scSampFreq;
+    short bufSizeStart;
+    int knownDelay;
+
+    // Stores the last frame added to the farend buffer
+    short farendOld[2][FRAME_LEN];
+    short initFlag; // indicates if AEC has been initialized
+
+    // Variables used for averaging far end buffer size
+    short counter;
+    short sum;
+    short firstVal;
+    short checkBufSizeCtr;
+
+    // Variables used for delay shifts
+    short msInSndCardBuf;
+    short filtDelay;
+    int timeForDelayChange;
+    int ECstartup;
+    int checkBuffSize;
+    int delayChange;
+    short lastDelayDiff;
+
+    int16_t echoMode;
+
+#ifdef AEC_DEBUG
+    FILE *bufFile;
+    FILE *delayFile;
+    FILE *preCompFile;
+    FILE *postCompFile;
+#endif // AEC_DEBUG
+    // Structures
+    RingBuffer *farendBuf;
+
+    int lastError;
+
+    AecmCore* aecmCore;
+} AecMobile;
+
+// Estimates delay to set the position of the farend buffer read pointer
+// (controlled by knownDelay)
+static int WebRtcAecm_EstBufDelay(AecMobile* aecmInst, short msInSndCardBuf);
+
+// Stuffs the farend buffer if the estimated delay is too large
+static int WebRtcAecm_DelayComp(AecMobile* aecmInst);
+
+void* WebRtcAecm_Create() {
+    AecMobile* aecm = malloc(sizeof(AecMobile));
+
+    WebRtcSpl_Init();
+
+    aecm->aecmCore = WebRtcAecm_CreateCore();
+    if (!aecm->aecmCore) {
+        WebRtcAecm_Free(aecm);
+        return NULL;
+    }
+
+    aecm->farendBuf = WebRtc_CreateBuffer(kBufSizeSamp,
+                                          sizeof(int16_t));
+    if (!aecm->farendBuf)
+    {
+        WebRtcAecm_Free(aecm);
+        return NULL;
+    }
+
+    aecm->initFlag = 0;
+    aecm->lastError = 0;
+
+#ifdef AEC_DEBUG
+    aecm->aecmCore->farFile = fopen("aecFar.pcm","wb");
+    aecm->aecmCore->nearFile = fopen("aecNear.pcm","wb");
+    aecm->aecmCore->outFile = fopen("aecOut.pcm","wb");
+    //aecm->aecmCore->outLpFile = fopen("aecOutLp.pcm","wb");
+
+    aecm->bufFile = fopen("aecBuf.dat", "wb");
+    aecm->delayFile = fopen("aecDelay.dat", "wb");
+    aecm->preCompFile = fopen("preComp.pcm", "wb");
+    aecm->postCompFile = fopen("postComp.pcm", "wb");
+#endif // AEC_DEBUG
+    return aecm;
+}
+
+void WebRtcAecm_Free(void* aecmInst) {
+  AecMobile* aecm = aecmInst;
+
+    if (aecm == NULL) {
+      return;
+    }
+
+#ifdef AEC_DEBUG
+    fclose(aecm->aecmCore->farFile);
+    fclose(aecm->aecmCore->nearFile);
+    fclose(aecm->aecmCore->outFile);
+    //fclose(aecm->aecmCore->outLpFile);
+
+    fclose(aecm->bufFile);
+    fclose(aecm->delayFile);
+    fclose(aecm->preCompFile);
+    fclose(aecm->postCompFile);
+#endif // AEC_DEBUG
+    WebRtcAecm_FreeCore(aecm->aecmCore);
+    WebRtc_FreeBuffer(aecm->farendBuf);
+    free(aecm);
+}
+
+int32_t WebRtcAecm_Init(void *aecmInst, int32_t sampFreq)
+{
+  AecMobile* aecm = aecmInst;
+    AecmConfig aecConfig;
+
+    if (aecm == NULL)
+    {
+        return -1;
+    }
+
+    if (sampFreq != 8000 && sampFreq != 16000)
+    {
+        aecm->lastError = AECM_BAD_PARAMETER_ERROR;
+        return -1;
+    }
+    aecm->sampFreq = sampFreq;
+
+    // Initialize AECM core
+    if (WebRtcAecm_InitCore(aecm->aecmCore, aecm->sampFreq) == -1)
+    {
+        aecm->lastError = AECM_UNSPECIFIED_ERROR;
+        return -1;
+    }
+
+    // Initialize farend buffer
+    WebRtc_InitBuffer(aecm->farendBuf);
+
+    aecm->initFlag = kInitCheck; // indicates that initialization has been done
+
+    aecm->delayChange = 1;
+
+    aecm->sum = 0;
+    aecm->counter = 0;
+    aecm->checkBuffSize = 1;
+    aecm->firstVal = 0;
+
+    aecm->ECstartup = 1;
+    aecm->bufSizeStart = 0;
+    aecm->checkBufSizeCtr = 0;
+    aecm->filtDelay = 0;
+    aecm->timeForDelayChange = 0;
+    aecm->knownDelay = 0;
+    aecm->lastDelayDiff = 0;
+
+    memset(&aecm->farendOld[0][0], 0, 160);
+
+    // Default settings.
+    aecConfig.cngMode = AecmTrue;
+    aecConfig.echoMode = 3;
+
+    if (WebRtcAecm_set_config(aecm, aecConfig) == -1)
+    {
+        aecm->lastError = AECM_UNSPECIFIED_ERROR;
+        return -1;
+    }
+
+    return 0;
+}
+
+int32_t WebRtcAecm_BufferFarend(void *aecmInst, const int16_t *farend,
+                                size_t nrOfSamples)
+{
+  AecMobile* aecm = aecmInst;
+    int32_t retVal = 0;
+
+    if (aecm == NULL)
+    {
+        return -1;
+    }
+
+    if (farend == NULL)
+    {
+        aecm->lastError = AECM_NULL_POINTER_ERROR;
+        return -1;
+    }
+
+    if (aecm->initFlag != kInitCheck)
+    {
+        aecm->lastError = AECM_UNINITIALIZED_ERROR;
+        return -1;
+    }
+
+    if (nrOfSamples != 80 && nrOfSamples != 160)
+    {
+        aecm->lastError = AECM_BAD_PARAMETER_ERROR;
+        return -1;
+    }
+
+    // TODO: Is this really a good idea?
+    if (!aecm->ECstartup)
+    {
+        WebRtcAecm_DelayComp(aecm);
+    }
+
+    WebRtc_WriteBuffer(aecm->farendBuf, farend, nrOfSamples);
+
+    return retVal;
+}
+
+int32_t WebRtcAecm_Process(void *aecmInst, const int16_t *nearendNoisy,
+                           const int16_t *nearendClean, int16_t *out,
+                           size_t nrOfSamples, int16_t msInSndCardBuf)
+{
+  AecMobile* aecm = aecmInst;
+    int32_t retVal = 0;
+    size_t i;
+    short nmbrOfFilledBuffers;
+    size_t nBlocks10ms;
+    size_t nFrames;
+#ifdef AEC_DEBUG
+    short msInAECBuf;
+#endif
+
+    if (aecm == NULL)
+    {
+        return -1;
+    }
+
+    if (nearendNoisy == NULL)
+    {
+        aecm->lastError = AECM_NULL_POINTER_ERROR;
+        return -1;
+    }
+
+    if (out == NULL)
+    {
+        aecm->lastError = AECM_NULL_POINTER_ERROR;
+        return -1;
+    }
+
+    if (aecm->initFlag != kInitCheck)
+    {
+        aecm->lastError = AECM_UNINITIALIZED_ERROR;
+        return -1;
+    }
+
+    if (nrOfSamples != 80 && nrOfSamples != 160)
+    {
+        aecm->lastError = AECM_BAD_PARAMETER_ERROR;
+        return -1;
+    }
+
+    if (msInSndCardBuf < 0)
+    {
+        msInSndCardBuf = 0;
+        aecm->lastError = AECM_BAD_PARAMETER_WARNING;
+        retVal = -1;
+    } else if (msInSndCardBuf > 500)
+    {
+        msInSndCardBuf = 500;
+        aecm->lastError = AECM_BAD_PARAMETER_WARNING;
+        retVal = -1;
+    }
+    msInSndCardBuf += 10;
+    aecm->msInSndCardBuf = msInSndCardBuf;
+
+    nFrames = nrOfSamples / FRAME_LEN;
+    nBlocks10ms = nFrames / aecm->aecmCore->mult;
+
+    if (aecm->ECstartup)
+    {
+        if (nearendClean == NULL)
+        {
+            if (out != nearendNoisy)
+            {
+                memcpy(out, nearendNoisy, sizeof(short) * nrOfSamples);
+            }
+        } else if (out != nearendClean)
+        {
+            memcpy(out, nearendClean, sizeof(short) * nrOfSamples);
+        }
+
+        nmbrOfFilledBuffers =
+            (short) WebRtc_available_read(aecm->farendBuf) / FRAME_LEN;
+        // The AECM is in the start up mode
+        // AECM is disabled until the soundcard buffer and farend buffers are OK
+
+        // Mechanism to ensure that the soundcard buffer is reasonably stable.
+        if (aecm->checkBuffSize)
+        {
+            aecm->checkBufSizeCtr++;
+            // Before we fill up the far end buffer we require the amount of data on the
+            // sound card to be stable (+/-8 ms) compared to the first value. This
+            // comparison is made during the following 4 consecutive frames. If it seems
+            // to be stable then we start to fill up the far end buffer.
+
+            if (aecm->counter == 0)
+            {
+                aecm->firstVal = aecm->msInSndCardBuf;
+                aecm->sum = 0;
+            }
+
+            if (abs(aecm->firstVal - aecm->msInSndCardBuf)
+                    < WEBRTC_SPL_MAX(0.2 * aecm->msInSndCardBuf, kSampMsNb))
+            {
+                aecm->sum += aecm->msInSndCardBuf;
+                aecm->counter++;
+            } else
+            {
+                aecm->counter = 0;
+            }
+
+            if (aecm->counter * nBlocks10ms >= 6)
+            {
+                // The farend buffer size is determined in blocks of 80 samples
+                // Use 75% of the average value of the soundcard buffer
+                aecm->bufSizeStart
+                        = WEBRTC_SPL_MIN((3 * aecm->sum
+                                        * aecm->aecmCore->mult) / (aecm->counter * 40), BUF_SIZE_FRAMES);
+                // buffersize has now been determined
+                aecm->checkBuffSize = 0;
+            }
+
+            if (aecm->checkBufSizeCtr * nBlocks10ms > 50)
+            {
+                // for really bad sound cards, don't disable echocanceller for more than 0.5 sec
+                aecm->bufSizeStart = WEBRTC_SPL_MIN((3 * aecm->msInSndCardBuf
+                                * aecm->aecmCore->mult) / 40, BUF_SIZE_FRAMES);
+                aecm->checkBuffSize = 0;
+            }
+        }
+
+        // if checkBuffSize changed in the if-statement above
+        if (!aecm->checkBuffSize)
+        {
+            // soundcard buffer is now reasonably stable
+            // When the far end buffer is filled with approximately the same amount of
+            // data as the amount on the sound card we end the start up phase and start
+            // to cancel echoes.
+
+            if (nmbrOfFilledBuffers == aecm->bufSizeStart)
+            {
+                aecm->ECstartup = 0; // Enable the AECM
+            } else if (nmbrOfFilledBuffers > aecm->bufSizeStart)
+            {
+                WebRtc_MoveReadPtr(aecm->farendBuf,
+                                   (int) WebRtc_available_read(aecm->farendBuf)
+                                   - (int) aecm->bufSizeStart * FRAME_LEN);
+                aecm->ECstartup = 0;
+            }
+        }
+
+    } else
+    {
+        // AECM is enabled
+
+        // Note only 1 block supported for nb and 2 blocks for wb
+        for (i = 0; i < nFrames; i++)
+        {
+            int16_t farend[FRAME_LEN];
+            const int16_t* farend_ptr = NULL;
+
+            nmbrOfFilledBuffers =
+                (short) WebRtc_available_read(aecm->farendBuf) / FRAME_LEN;
+
+            // Check that there is data in the far end buffer
+            if (nmbrOfFilledBuffers > 0)
+            {
+                // Get the next 80 samples from the farend buffer
+                WebRtc_ReadBuffer(aecm->farendBuf, (void**) &farend_ptr, farend,
+                                  FRAME_LEN);
+
+                // Always store the last frame for use when we run out of data
+                memcpy(&(aecm->farendOld[i][0]), farend_ptr,
+                       FRAME_LEN * sizeof(short));
+            } else
+            {
+                // We have no data so we use the last played frame
+                memcpy(farend, &(aecm->farendOld[i][0]), FRAME_LEN * sizeof(short));
+                farend_ptr = farend;
+            }
+
+            // Call buffer delay estimator when all data is extracted,
+            // i,e. i = 0 for NB and i = 1 for WB
+            if ((i == 0 && aecm->sampFreq == 8000) || (i == 1 && aecm->sampFreq == 16000))
+            {
+                WebRtcAecm_EstBufDelay(aecm, aecm->msInSndCardBuf);
+            }
+
+            // Call the AECM
+            /*WebRtcAecm_ProcessFrame(aecm->aecmCore, farend, &nearend[FRAME_LEN * i],
+             &out[FRAME_LEN * i], aecm->knownDelay);*/
+            if (WebRtcAecm_ProcessFrame(aecm->aecmCore,
+                                        farend_ptr,
+                                        &nearendNoisy[FRAME_LEN * i],
+                                        (nearendClean
+                                         ? &nearendClean[FRAME_LEN * i]
+                                         : NULL),
+                                        &out[FRAME_LEN * i]) == -1)
+                return -1;
+        }
+    }
+
+#ifdef AEC_DEBUG
+    msInAECBuf = (short) WebRtc_available_read(aecm->farendBuf) /
+        (kSampMsNb * aecm->aecmCore->mult);
+    fwrite(&msInAECBuf, 2, 1, aecm->bufFile);
+    fwrite(&(aecm->knownDelay), sizeof(aecm->knownDelay), 1, aecm->delayFile);
+#endif
+
+    return retVal;
+}
+
+int32_t WebRtcAecm_set_config(void *aecmInst, AecmConfig config)
+{
+  AecMobile* aecm = aecmInst;
+
+    if (aecm == NULL)
+    {
+        return -1;
+    }
+
+    if (aecm->initFlag != kInitCheck)
+    {
+        aecm->lastError = AECM_UNINITIALIZED_ERROR;
+        return -1;
+    }
+
+    if (config.cngMode != AecmFalse && config.cngMode != AecmTrue)
+    {
+        aecm->lastError = AECM_BAD_PARAMETER_ERROR;
+        return -1;
+    }
+    aecm->aecmCore->cngMode = config.cngMode;
+
+    if (config.echoMode < 0 || config.echoMode > 4)
+    {
+        aecm->lastError = AECM_BAD_PARAMETER_ERROR;
+        return -1;
+    }
+    aecm->echoMode = config.echoMode;
+
+    if (aecm->echoMode == 0)
+    {
+        aecm->aecmCore->supGain = SUPGAIN_DEFAULT >> 3;
+        aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT >> 3;
+        aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A >> 3;
+        aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D >> 3;
+        aecm->aecmCore->supGainErrParamDiffAB = (SUPGAIN_ERROR_PARAM_A >> 3)
+                - (SUPGAIN_ERROR_PARAM_B >> 3);
+        aecm->aecmCore->supGainErrParamDiffBD = (SUPGAIN_ERROR_PARAM_B >> 3)
+                - (SUPGAIN_ERROR_PARAM_D >> 3);
+    } else if (aecm->echoMode == 1)
+    {
+        aecm->aecmCore->supGain = SUPGAIN_DEFAULT >> 2;
+        aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT >> 2;
+        aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A >> 2;
+        aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D >> 2;
+        aecm->aecmCore->supGainErrParamDiffAB = (SUPGAIN_ERROR_PARAM_A >> 2)
+                - (SUPGAIN_ERROR_PARAM_B >> 2);
+        aecm->aecmCore->supGainErrParamDiffBD = (SUPGAIN_ERROR_PARAM_B >> 2)
+                - (SUPGAIN_ERROR_PARAM_D >> 2);
+    } else if (aecm->echoMode == 2)
+    {
+        aecm->aecmCore->supGain = SUPGAIN_DEFAULT >> 1;
+        aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT >> 1;
+        aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A >> 1;
+        aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D >> 1;
+        aecm->aecmCore->supGainErrParamDiffAB = (SUPGAIN_ERROR_PARAM_A >> 1)
+                - (SUPGAIN_ERROR_PARAM_B >> 1);
+        aecm->aecmCore->supGainErrParamDiffBD = (SUPGAIN_ERROR_PARAM_B >> 1)
+                - (SUPGAIN_ERROR_PARAM_D >> 1);
+    } else if (aecm->echoMode == 3)
+    {
+        aecm->aecmCore->supGain = SUPGAIN_DEFAULT;
+        aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT;
+        aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A;
+        aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D;
+        aecm->aecmCore->supGainErrParamDiffAB = SUPGAIN_ERROR_PARAM_A - SUPGAIN_ERROR_PARAM_B;
+        aecm->aecmCore->supGainErrParamDiffBD = SUPGAIN_ERROR_PARAM_B - SUPGAIN_ERROR_PARAM_D;
+    } else if (aecm->echoMode == 4)
+    {
+        aecm->aecmCore->supGain = SUPGAIN_DEFAULT << 1;
+        aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT << 1;
+        aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A << 1;
+        aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D << 1;
+        aecm->aecmCore->supGainErrParamDiffAB = (SUPGAIN_ERROR_PARAM_A << 1)
+                - (SUPGAIN_ERROR_PARAM_B << 1);
+        aecm->aecmCore->supGainErrParamDiffBD = (SUPGAIN_ERROR_PARAM_B << 1)
+                - (SUPGAIN_ERROR_PARAM_D << 1);
+    }
+
+    return 0;
+}
+
+int32_t WebRtcAecm_get_config(void *aecmInst, AecmConfig *config)
+{
+  AecMobile* aecm = aecmInst;
+
+    if (aecm == NULL)
+    {
+        return -1;
+    }
+
+    if (config == NULL)
+    {
+        aecm->lastError = AECM_NULL_POINTER_ERROR;
+        return -1;
+    }
+
+    if (aecm->initFlag != kInitCheck)
+    {
+        aecm->lastError = AECM_UNINITIALIZED_ERROR;
+        return -1;
+    }
+
+    config->cngMode = aecm->aecmCore->cngMode;
+    config->echoMode = aecm->echoMode;
+
+    return 0;
+}
+
+int32_t WebRtcAecm_InitEchoPath(void* aecmInst,
+                                const void* echo_path,
+                                size_t size_bytes)
+{
+  AecMobile* aecm = aecmInst;
+    const int16_t* echo_path_ptr = echo_path;
+
+    if (aecmInst == NULL) {
+      return -1;
+    }
+    if (echo_path == NULL) {
+      aecm->lastError = AECM_NULL_POINTER_ERROR;
+      return -1;
+    }
+    if (size_bytes != WebRtcAecm_echo_path_size_bytes())
+    {
+        // Input channel size does not match the size of AECM
+        aecm->lastError = AECM_BAD_PARAMETER_ERROR;
+        return -1;
+    }
+    if (aecm->initFlag != kInitCheck)
+    {
+        aecm->lastError = AECM_UNINITIALIZED_ERROR;
+        return -1;
+    }
+
+    WebRtcAecm_InitEchoPathCore(aecm->aecmCore, echo_path_ptr);
+
+    return 0;
+}
+
+int32_t WebRtcAecm_GetEchoPath(void* aecmInst,
+                               void* echo_path,
+                               size_t size_bytes)
+{
+  AecMobile* aecm = aecmInst;
+    int16_t* echo_path_ptr = echo_path;
+
+    if (aecmInst == NULL) {
+      return -1;
+    }
+    if (echo_path == NULL) {
+      aecm->lastError = AECM_NULL_POINTER_ERROR;
+      return -1;
+    }
+    if (size_bytes != WebRtcAecm_echo_path_size_bytes())
+    {
+        // Input channel size does not match the size of AECM
+        aecm->lastError = AECM_BAD_PARAMETER_ERROR;
+        return -1;
+    }
+    if (aecm->initFlag != kInitCheck)
+    {
+        aecm->lastError = AECM_UNINITIALIZED_ERROR;
+        return -1;
+    }
+
+    memcpy(echo_path_ptr, aecm->aecmCore->channelStored, size_bytes);
+    return 0;
+}
+
+size_t WebRtcAecm_echo_path_size_bytes()
+{
+    return (PART_LEN1 * sizeof(int16_t));
+}
+
+int32_t WebRtcAecm_get_error_code(void *aecmInst)
+{
+  AecMobile* aecm = aecmInst;
+
+    if (aecm == NULL)
+    {
+        return -1;
+    }
+
+    return aecm->lastError;
+}
+
+static int WebRtcAecm_EstBufDelay(AecMobile* aecm, short msInSndCardBuf) {
+    short delayNew, nSampSndCard;
+    short nSampFar = (short) WebRtc_available_read(aecm->farendBuf);
+    short diff;
+
+    nSampSndCard = msInSndCardBuf * kSampMsNb * aecm->aecmCore->mult;
+
+    delayNew = nSampSndCard - nSampFar;
+
+    if (delayNew < FRAME_LEN)
+    {
+        WebRtc_MoveReadPtr(aecm->farendBuf, FRAME_LEN);
+        delayNew += FRAME_LEN;
+    }
+
+    aecm->filtDelay = WEBRTC_SPL_MAX(0, (8 * aecm->filtDelay + 2 * delayNew) / 10);
+
+    diff = aecm->filtDelay - aecm->knownDelay;
+    if (diff > 224)
+    {
+        if (aecm->lastDelayDiff < 96)
+        {
+            aecm->timeForDelayChange = 0;
+        } else
+        {
+            aecm->timeForDelayChange++;
+        }
+    } else if (diff < 96 && aecm->knownDelay > 0)
+    {
+        if (aecm->lastDelayDiff > 224)
+        {
+            aecm->timeForDelayChange = 0;
+        } else
+        {
+            aecm->timeForDelayChange++;
+        }
+    } else
+    {
+        aecm->timeForDelayChange = 0;
+    }
+    aecm->lastDelayDiff = diff;
+
+    if (aecm->timeForDelayChange > 25)
+    {
+        aecm->knownDelay = WEBRTC_SPL_MAX((int)aecm->filtDelay - 160, 0);
+    }
+    return 0;
+}
+
+static int WebRtcAecm_DelayComp(AecMobile* aecm) {
+    int nSampFar = (int) WebRtc_available_read(aecm->farendBuf);
+    int nSampSndCard, delayNew, nSampAdd;
+    const int maxStuffSamp = 10 * FRAME_LEN;
+
+    nSampSndCard = aecm->msInSndCardBuf * kSampMsNb * aecm->aecmCore->mult;
+    delayNew = nSampSndCard - nSampFar;
+
+    if (delayNew > FAR_BUF_LEN - FRAME_LEN * aecm->aecmCore->mult)
+    {
+        // The difference of the buffer sizes is larger than the maximum
+        // allowed known delay. Compensate by stuffing the buffer.
+        nSampAdd = (int)(WEBRTC_SPL_MAX(((nSampSndCard >> 1) - nSampFar),
+                FRAME_LEN));
+        nSampAdd = WEBRTC_SPL_MIN(nSampAdd, maxStuffSamp);
+
+        WebRtc_MoveReadPtr(aecm->farendBuf, -nSampAdd);
+        aecm->delayChange = 1; // the delay needs to be updated
+    }
+
+    return 0;
+}
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h
new file mode 100644
index 00000000..7ae15c2a
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h
@@ -0,0 +1,218 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_INCLUDE_ECHO_CONTROL_MOBILE_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_INCLUDE_ECHO_CONTROL_MOBILE_H_
+
+#include <stdlib.h>
+
+#include "webrtc/typedefs.h"
+
+enum {
+    AecmFalse = 0,
+    AecmTrue
+};
+
+// Errors
+#define AECM_UNSPECIFIED_ERROR           12000
+#define AECM_UNSUPPORTED_FUNCTION_ERROR  12001
+#define AECM_UNINITIALIZED_ERROR         12002
+#define AECM_NULL_POINTER_ERROR          12003
+#define AECM_BAD_PARAMETER_ERROR         12004
+
+// Warnings
+#define AECM_BAD_PARAMETER_WARNING       12100
+
+typedef struct {
+    int16_t cngMode;            // AECM_FALSE, AECM_TRUE (default)
+    int16_t echoMode;           // 0, 1, 2, 3 (default), 4
+} AecmConfig;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Allocates the memory needed by the AECM. The memory needs to be
+ * initialized separately using the WebRtcAecm_Init() function.
+ * Returns a pointer to the instance and a nullptr at failure.
+ */
+void* WebRtcAecm_Create();
+
+/*
+ * This function releases the memory allocated by WebRtcAecm_Create()
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*    aecmInst            Pointer to the AECM instance
+ */
+void WebRtcAecm_Free(void* aecmInst);
+
+/*
+ * Initializes an AECM instance.
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*          aecmInst      Pointer to the AECM instance
+ * int32_t        sampFreq      Sampling frequency of data
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * int32_t        return        0: OK
+ *                             -1: error
+ */
+int32_t WebRtcAecm_Init(void* aecmInst, int32_t sampFreq);
+
+/*
+ * Inserts an 80 or 160 sample block of data into the farend buffer.
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*          aecmInst      Pointer to the AECM instance
+ * int16_t*       farend        In buffer containing one frame of
+ *                              farend signal
+ * int16_t        nrOfSamples   Number of samples in farend buffer
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * int32_t        return        0: OK
+ *                             -1: error
+ */
+int32_t WebRtcAecm_BufferFarend(void* aecmInst,
+                                const int16_t* farend,
+                                size_t nrOfSamples);
+
+/*
+ * Runs the AECM on an 80 or 160 sample blocks of data.
+ *
+ * Inputs                        Description
+ * -------------------------------------------------------------------
+ * void*          aecmInst       Pointer to the AECM instance
+ * int16_t*       nearendNoisy   In buffer containing one frame of
+ *                               reference nearend+echo signal. If
+ *                               noise reduction is active, provide
+ *                               the noisy signal here.
+ * int16_t*       nearendClean   In buffer containing one frame of
+ *                               nearend+echo signal. If noise
+ *                               reduction is active, provide the
+ *                               clean signal here. Otherwise pass a
+ *                               NULL pointer.
+ * int16_t        nrOfSamples    Number of samples in nearend buffer
+ * int16_t        msInSndCardBuf Delay estimate for sound card and
+ *                               system buffers
+ *
+ * Outputs                       Description
+ * -------------------------------------------------------------------
+ * int16_t*       out            Out buffer, one frame of processed nearend
+ * int32_t        return         0: OK
+ *                              -1: error
+ */
+int32_t WebRtcAecm_Process(void* aecmInst,
+                           const int16_t* nearendNoisy,
+                           const int16_t* nearendClean,
+                           int16_t* out,
+                           size_t nrOfSamples,
+                           int16_t msInSndCardBuf);
+
+/*
+ * This function enables the user to set certain parameters on-the-fly
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*          aecmInst      Pointer to the AECM instance
+ * AecmConfig     config        Config instance that contains all
+ *                              properties to be set
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * int32_t        return        0: OK
+ *                             -1: error
+ */
+int32_t WebRtcAecm_set_config(void* aecmInst, AecmConfig config);
+
+/*
+ * This function enables the user to set certain parameters on-the-fly
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*          aecmInst      Pointer to the AECM instance
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * AecmConfig*    config        Pointer to the config instance that
+ *                              all properties will be written to
+ * int32_t        return        0: OK
+ *                             -1: error
+ */
+int32_t WebRtcAecm_get_config(void *aecmInst, AecmConfig *config);
+
+/*
+ * This function enables the user to set the echo path on-the-fly.
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*        aecmInst        Pointer to the AECM instance
+ * void*        echo_path       Pointer to the echo path to be set
+ * size_t       size_bytes      Size in bytes of the echo path
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * int32_t      return          0: OK
+ *                             -1: error
+ */
+int32_t WebRtcAecm_InitEchoPath(void* aecmInst,
+                                const void* echo_path,
+                                size_t size_bytes);
+
+/*
+ * This function enables the user to get the currently used echo path
+ * on-the-fly
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*        aecmInst        Pointer to the AECM instance
+ * void*        echo_path       Pointer to echo path
+ * size_t       size_bytes      Size in bytes of the echo path
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * int32_t      return          0: OK
+ *                             -1: error
+ */
+int32_t WebRtcAecm_GetEchoPath(void* aecmInst,
+                               void* echo_path,
+                               size_t size_bytes);
+
+/*
+ * This function enables the user to get the echo path size in bytes
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * size_t       return          Size in bytes
+ */
+size_t WebRtcAecm_echo_path_size_bytes();
+
+/*
+ * Gets the last error code.
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*          aecmInst      Pointer to the AECM instance
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * int32_t        return        11000-11100: error code
+ */
+int32_t WebRtcAecm_get_error_code(void *aecmInst);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AECM_INCLUDE_ECHO_CONTROL_MOBILE_H_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging.h b/third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging.h
new file mode 100644
index 00000000..3cf9ff89
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging.h
@@ -0,0 +1,86 @@
+/*
+ *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_
+
+#include <stdio.h>
+
+#include "webrtc/modules/audio_processing/logging/aec_logging_file_handling.h"
+
+// To enable AEC logging, invoke GYP with -Daec_debug_dump=1.
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+// Dumps a wav data to file.
+#define RTC_AEC_DEBUG_WAV_WRITE(file, data, num_samples) \
+  do {                                                   \
+    rtc_WavWriteSamples(file, data, num_samples);        \
+  } while (0)
+
+// (Re)opens a wav file for writing using the specified sample rate.
+#define RTC_AEC_DEBUG_WAV_REOPEN(name, instance_index, process_rate,     \
+                                 sample_rate, wav_file)                  \
+  do {                                                                   \
+    WebRtcAec_ReopenWav(name, instance_index, process_rate, sample_rate, \
+                        wav_file);                                       \
+  } while (0)
+
+// Closes a wav file.
+#define RTC_AEC_DEBUG_WAV_CLOSE(wav_file) \
+  do {                                    \
+    rtc_WavClose(wav_file);               \
+  } while (0)
+
+// Dumps a raw data to file.
+#define RTC_AEC_DEBUG_RAW_WRITE(file, data, data_size) \
+  do {                                                 \
+    (void) fwrite(data, data_size, 1, file);           \
+  } while (0)
+
+// Opens a raw data file for writing using the specified sample rate.
+#define RTC_AEC_DEBUG_RAW_OPEN(name, instance_counter, file) \
+  do {                                                       \
+    WebRtcAec_RawFileOpen(name, instance_counter, file);     \
+  } while (0)
+
+// Closes a raw data file.
+#define RTC_AEC_DEBUG_RAW_CLOSE(file) \
+  do {                                \
+    fclose(file);                     \
+  } while (0)
+
+#else  // RTC_AEC_DEBUG_DUMP
+#define RTC_AEC_DEBUG_WAV_WRITE(file, data, num_samples) \
+  do {                                                   \
+  } while (0)
+
+#define RTC_AEC_DEBUG_WAV_REOPEN(wav_file, name, instance_index, process_rate, \
+                                 sample_rate)                                  \
+  do {                                                                         \
+  } while (0)
+
+#define RTC_AEC_DEBUG_WAV_CLOSE(wav_file) \
+  do {                                    \
+  } while (0)
+
+#define RTC_AEC_DEBUG_RAW_WRITE(file, data, data_size) \
+  do {                                                 \
+  } while (0)
+
+#define RTC_AEC_DEBUG_RAW_OPEN(file, name, instance_counter) \
+  do {                                                       \
+  } while (0)
+
+#define RTC_AEC_DEBUG_RAW_CLOSE(file) \
+  do {                                \
+  } while (0)
+
+#endif  // WEBRTC_AEC_DEBUG_DUMP
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging_file_handling.cc b/third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging_file_handling.cc
new file mode 100644
index 00000000..3a434714
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging_file_handling.cc
@@ -0,0 +1,57 @@
+/*
+ *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/logging/aec_logging_file_handling.h"
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include "webrtc/base/checks.h"
+#include "webrtc/base/stringutils.h"
+#include "webrtc/common_audio/wav_file.h"
+#include "webrtc/typedefs.h"
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+void WebRtcAec_ReopenWav(const char* name,
+                         int instance_index,
+                         int process_rate,
+                         int sample_rate,
+                         rtc_WavWriter** wav_file) {
+  if (*wav_file) {
+    if (rtc_WavSampleRate(*wav_file) == sample_rate)
+      return;
+    rtc_WavClose(*wav_file);
+  }
+  char filename[64];
+  int written = rtc::sprintfn(filename, sizeof(filename), "%s%d-%d.wav", name,
+                              instance_index, process_rate);
+
+  // Ensure there was no buffer output error.
+  RTC_DCHECK_GE(written, 0);
+  // Ensure that the buffer size was sufficient.
+  RTC_DCHECK_LT(static_cast<size_t>(written), sizeof(filename));
+
+  *wav_file = rtc_WavOpen(filename, sample_rate, 1);
+}
+
+void WebRtcAec_RawFileOpen(const char* name, int instance_index, FILE** file) {
+  char filename[64];
+  int written = rtc::sprintfn(filename, sizeof(filename), "%s_%d.dat", name,
+                              instance_index);
+
+  // Ensure there was no buffer output error.
+  RTC_DCHECK_GE(written, 0);
+  // Ensure that the buffer size was sufficient.
+  RTC_DCHECK_LT(static_cast<size_t>(written), sizeof(filename));
+
+  *file = fopen(filename, "wb");
+}
+
+#endif  // WEBRTC_AEC_DEBUG_DUMP
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging_file_handling.h b/third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging_file_handling.h
new file mode 100644
index 00000000..5ec83948
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging_file_handling.h
@@ -0,0 +1,41 @@
+/*
+ *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_FILE_HANDLING_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_FILE_HANDLING_
+
+#include <stdio.h>
+
+#include "webrtc/common_audio/wav_file.h"
+#include "webrtc/typedefs.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+// Opens a new Wav file for writing. If it was already open with a different
+// sample frequency, it closes it first.
+void WebRtcAec_ReopenWav(const char* name,
+                         int instance_index,
+                         int process_rate,
+                         int sample_rate,
+                         rtc_WavWriter** wav_file);
+
+// Opens dumpfile with instance-specific filename.
+void WebRtcAec_RawFileOpen(const char* name, int instance_index, FILE** file);
+
+#endif  // WEBRTC_AEC_DEBUG_DUMP
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_FILE_HANDLING_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/defines.h b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/defines.h
new file mode 100644
index 00000000..8271332c
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/defines.h
@@ -0,0 +1,49 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_DEFINES_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_DEFINES_H_
+
+#define BLOCKL_MAX          160 // max processing block length: 160
+#define ANAL_BLOCKL_MAX     256 // max analysis block length: 256
+#define HALF_ANAL_BLOCKL    129 // half max analysis block length + 1
+#define NUM_HIGH_BANDS_MAX  2   // max number of high bands: 2
+
+#define QUANTILE            (float)0.25
+
+#define SIMULT              3
+#define END_STARTUP_LONG    200
+#define END_STARTUP_SHORT   50
+#define FACTOR              (float)40.0
+#define WIDTH               (float)0.01
+
+// Length of fft work arrays.
+#define IP_LENGTH (ANAL_BLOCKL_MAX >> 1) // must be at least ceil(2 + sqrt(ANAL_BLOCKL_MAX/2))
+#define W_LENGTH (ANAL_BLOCKL_MAX >> 1)
+
+//PARAMETERS FOR NEW METHOD
+#define DD_PR_SNR           (float)0.98 // DD update of prior SNR
+#define LRT_TAVG            (float)0.50 // tavg parameter for LRT (previously 0.90)
+#define SPECT_FL_TAVG       (float)0.30 // tavg parameter for spectral flatness measure
+#define SPECT_DIFF_TAVG     (float)0.30 // tavg parameter for spectral difference measure
+#define PRIOR_UPDATE        (float)0.10 // update parameter of prior model
+#define NOISE_UPDATE        (float)0.90 // update parameter for noise
+#define SPEECH_UPDATE       (float)0.99 // update parameter when likely speech
+#define WIDTH_PR_MAP        (float)4.0  // width parameter in sigmoid map for prior model
+#define LRT_FEATURE_THR     (float)0.5  // default threshold for LRT feature
+#define SF_FEATURE_THR      (float)0.5  // default threshold for Spectral Flatness feature
+#define SD_FEATURE_THR      (float)0.5  // default threshold for Spectral Difference feature
+#define PROB_RANGE          (float)0.20 // probability threshold for noise state in
+                                        // speech/noise likelihood
+#define HIST_PAR_EST         1000       // histogram size for estimation of parameters
+#define GAMMA_PAUSE         (float)0.05 // update for conservative noise estimate
+//
+#define B_LIM               (float)0.5  // threshold in final energy gain factor calculation
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_DEFINES_H_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/include/noise_suppression.h b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/include/noise_suppression.h
new file mode 100644
index 00000000..9dac56bd
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/include/noise_suppression.h
@@ -0,0 +1,116 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_H_
+
+#include <stddef.h>
+
+#include "webrtc/typedefs.h"
+
+typedef struct NsHandleT NsHandle;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * This function creates an instance of the floating point Noise Suppression.
+ */
+NsHandle* WebRtcNs_Create();
+
+/*
+ * This function frees the dynamic memory of a specified noise suppression
+ * instance.
+ *
+ * Input:
+ *      - NS_inst       : Pointer to NS instance that should be freed
+ */
+void WebRtcNs_Free(NsHandle* NS_inst);
+
+/*
+ * This function initializes a NS instance and has to be called before any other
+ * processing is made.
+ *
+ * Input:
+ *      - NS_inst       : Instance that should be initialized
+ *      - fs            : sampling frequency
+ *
+ * Output:
+ *      - NS_inst       : Initialized instance
+ *
+ * Return value         :  0 - Ok
+ *                        -1 - Error
+ */
+int WebRtcNs_Init(NsHandle* NS_inst, uint32_t fs);
+
+/*
+ * This changes the aggressiveness of the noise suppression method.
+ *
+ * Input:
+ *      - NS_inst       : Noise suppression instance.
+ *      - mode          : 0: Mild, 1: Medium , 2: Aggressive
+ *
+ * Output:
+ *      - NS_inst       : Updated instance.
+ *
+ * Return value         :  0 - Ok
+ *                        -1 - Error
+ */
+int WebRtcNs_set_policy(NsHandle* NS_inst, int mode);
+
+/*
+ * This functions estimates the background noise for the inserted speech frame.
+ * The input and output signals should always be 10ms (80 or 160 samples).
+ *
+ * Input
+ *      - NS_inst       : Noise suppression instance.
+ *      - spframe       : Pointer to speech frame buffer for L band
+ *
+ * Output:
+ *      - NS_inst       : Updated NS instance
+ */
+void WebRtcNs_Analyze(NsHandle* NS_inst, const float* spframe);
+
+/*
+ * This functions does Noise Suppression for the inserted speech frame. The
+ * input and output signals should always be 10ms (80 or 160 samples).
+ *
+ * Input
+ *      - NS_inst       : Noise suppression instance.
+ *      - spframe       : Pointer to speech frame buffer for each band
+ *      - num_bands     : Number of bands
+ *
+ * Output:
+ *      - NS_inst       : Updated NS instance
+ *      - outframe      : Pointer to output frame for each band
+ */
+void WebRtcNs_Process(NsHandle* NS_inst,
+                     const float* const* spframe,
+                     size_t num_bands,
+                     float* const* outframe);
+
+/* Returns the internally used prior speech probability of the current frame.
+ * There is a frequency bin based one as well, with which this should not be
+ * confused.
+ *
+ * Input
+ *      - handle        : Noise suppression instance.
+ *
+ * Return value         : Prior speech probability in interval [0.0, 1.0].
+ *                        -1 - NULL pointer or uninitialized instance.
+ */
+float WebRtcNs_prior_speech_probability(NsHandle* handle);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_H_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/include/noise_suppression_x.h b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/include/noise_suppression_x.h
new file mode 100644
index 00000000..88fe4cd6
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/include/noise_suppression_x.h
@@ -0,0 +1,88 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_X_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_X_H_
+
+#include "webrtc/typedefs.h"
+
+typedef struct NsxHandleT NsxHandle;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * This function creates an instance of the fixed point Noise Suppression.
+ */
+NsxHandle* WebRtcNsx_Create();
+
+/*
+ * This function frees the dynamic memory of a specified Noise Suppression
+ * instance.
+ *
+ * Input:
+ *      - nsxInst       : Pointer to NS instance that should be freed
+ */
+void WebRtcNsx_Free(NsxHandle* nsxInst);
+
+/*
+ * This function initializes a NS instance
+ *
+ * Input:
+ *      - nsxInst       : Instance that should be initialized
+ *      - fs            : sampling frequency
+ *
+ * Output:
+ *      - nsxInst       : Initialized instance
+ *
+ * Return value         :  0 - Ok
+ *                        -1 - Error
+ */
+int WebRtcNsx_Init(NsxHandle* nsxInst, uint32_t fs);
+
+/*
+ * This changes the aggressiveness of the noise suppression method.
+ *
+ * Input:
+ *      - nsxInst       : Instance that should be initialized
+ *      - mode          : 0: Mild, 1: Medium , 2: Aggressive
+ *
+ * Output:
+ *      - nsxInst       : Initialized instance
+ *
+ * Return value         :  0 - Ok
+ *                        -1 - Error
+ */
+int WebRtcNsx_set_policy(NsxHandle* nsxInst, int mode);
+
+/*
+ * This functions does noise suppression for the inserted speech frame. The
+ * input and output signals should always be 10ms (80 or 160 samples).
+ *
+ * Input
+ *      - nsxInst       : NSx instance. Needs to be initiated before call.
+ *      - speechFrame   : Pointer to speech frame buffer for each band
+ *      - num_bands     : Number of bands
+ *
+ * Output:
+ *      - nsxInst       : Updated NSx instance
+ *      - outFrame      : Pointer to output frame for each band
+ */
+void WebRtcNsx_Process(NsxHandle* nsxInst,
+                       const short* const* speechFrame,
+                       int num_bands,
+                       short* const* outFrame);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_X_H_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/noise_suppression.c b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/noise_suppression.c
new file mode 100644
index 00000000..13f1b2d6
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/noise_suppression.c
@@ -0,0 +1,59 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/ns/include/noise_suppression.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/ns/defines.h"
+#include "webrtc/modules/audio_processing/ns/ns_core.h"
+
+NsHandle* WebRtcNs_Create() {
+  NoiseSuppressionC* self = malloc(sizeof(NoiseSuppressionC));
+  self->initFlag = 0;
+  return (NsHandle*)self;
+}
+
+void WebRtcNs_Free(NsHandle* NS_inst) {
+  free(NS_inst);
+}
+
+int WebRtcNs_Init(NsHandle* NS_inst, uint32_t fs) {
+  return WebRtcNs_InitCore((NoiseSuppressionC*)NS_inst, fs);
+}
+
+int WebRtcNs_set_policy(NsHandle* NS_inst, int mode) {
+  return WebRtcNs_set_policy_core((NoiseSuppressionC*)NS_inst, mode);
+}
+
+void WebRtcNs_Analyze(NsHandle* NS_inst, const float* spframe) {
+  WebRtcNs_AnalyzeCore((NoiseSuppressionC*)NS_inst, spframe);
+}
+
+void WebRtcNs_Process(NsHandle* NS_inst,
+                      const float* const* spframe,
+                      size_t num_bands,
+                      float* const* outframe) {
+  WebRtcNs_ProcessCore((NoiseSuppressionC*)NS_inst, spframe, num_bands,
+                       outframe);
+}
+
+float WebRtcNs_prior_speech_probability(NsHandle* handle) {
+  NoiseSuppressionC* self = (NoiseSuppressionC*)handle;
+  if (handle == NULL) {
+    return -1;
+  }
+  if (self->initFlag == 0) {
+    return -1;
+  }
+  return self->priorSpeechProb;
+}
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/noise_suppression_x.c b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/noise_suppression_x.c
new file mode 100644
index 00000000..150fe608
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/noise_suppression_x.c
@@ -0,0 +1,46 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h"
+
+#include <stdlib.h>
+
+#include "webrtc/common_audio/signal_processing/include/real_fft.h"
+#include "webrtc/modules/audio_processing/ns/nsx_core.h"
+#include "webrtc/modules/audio_processing/ns/nsx_defines.h"
+
+NsxHandle* WebRtcNsx_Create() {
+  NoiseSuppressionFixedC* self = malloc(sizeof(NoiseSuppressionFixedC));
+  WebRtcSpl_Init();
+  self->real_fft = NULL;
+  self->initFlag = 0;
+  return (NsxHandle*)self;
+}
+
+void WebRtcNsx_Free(NsxHandle* nsxInst) {
+  WebRtcSpl_FreeRealFFT(((NoiseSuppressionFixedC*)nsxInst)->real_fft);
+  free(nsxInst);
+}
+
+int WebRtcNsx_Init(NsxHandle* nsxInst, uint32_t fs) {
+  return WebRtcNsx_InitCore((NoiseSuppressionFixedC*)nsxInst, fs);
+}
+
+int WebRtcNsx_set_policy(NsxHandle* nsxInst, int mode) {
+  return WebRtcNsx_set_policy_core((NoiseSuppressionFixedC*)nsxInst, mode);
+}
+
+void WebRtcNsx_Process(NsxHandle* nsxInst,
+                      const short* const* speechFrame,
+                      int num_bands,
+                      short* const* outFrame) {
+  WebRtcNsx_ProcessCore((NoiseSuppressionFixedC*)nsxInst, speechFrame,
+                        num_bands, outFrame);
+}
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/ns_core.c b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/ns_core.c
new file mode 100644
index 00000000..1d609140
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/ns_core.c
@@ -0,0 +1,1416 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <math.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "webrtc/common_audio/fft4g.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/ns/include/noise_suppression.h"
+#include "webrtc/modules/audio_processing/ns/ns_core.h"
+#include "webrtc/modules/audio_processing/ns/windows_private.h"
+
+// Set Feature Extraction Parameters.
+static void set_feature_extraction_parameters(NoiseSuppressionC* self) {
+  // Bin size of histogram.
+  self->featureExtractionParams.binSizeLrt = 0.1f;
+  self->featureExtractionParams.binSizeSpecFlat = 0.05f;
+  self->featureExtractionParams.binSizeSpecDiff = 0.1f;
+
+  // Range of histogram over which LRT threshold is computed.
+  self->featureExtractionParams.rangeAvgHistLrt = 1.f;
+
+  // Scale parameters: multiply dominant peaks of the histograms by scale factor
+  // to obtain thresholds for prior model.
+  // For LRT and spectral difference.
+  self->featureExtractionParams.factor1ModelPars = 1.2f;
+  // For spectral_flatness: used when noise is flatter than speech.
+  self->featureExtractionParams.factor2ModelPars = 0.9f;
+
+  // Peak limit for spectral flatness (varies between 0 and 1).
+  self->featureExtractionParams.thresPosSpecFlat = 0.6f;
+
+  // Limit on spacing of two highest peaks in histogram: spacing determined by
+  // bin size.
+  self->featureExtractionParams.limitPeakSpacingSpecFlat =
+      2 * self->featureExtractionParams.binSizeSpecFlat;
+  self->featureExtractionParams.limitPeakSpacingSpecDiff =
+      2 * self->featureExtractionParams.binSizeSpecDiff;
+
+  // Limit on relevance of second peak.
+  self->featureExtractionParams.limitPeakWeightsSpecFlat = 0.5f;
+  self->featureExtractionParams.limitPeakWeightsSpecDiff = 0.5f;
+
+  // Fluctuation limit of LRT feature.
+  self->featureExtractionParams.thresFluctLrt = 0.05f;
+
+  // Limit on the max and min values for the feature thresholds.
+  self->featureExtractionParams.maxLrt = 1.f;
+  self->featureExtractionParams.minLrt = 0.2f;
+
+  self->featureExtractionParams.maxSpecFlat = 0.95f;
+  self->featureExtractionParams.minSpecFlat = 0.1f;
+
+  self->featureExtractionParams.maxSpecDiff = 1.f;
+  self->featureExtractionParams.minSpecDiff = 0.16f;
+
+  // Criteria of weight of histogram peak to accept/reject feature.
+  self->featureExtractionParams.thresWeightSpecFlat =
+      (int)(0.3 * (self->modelUpdatePars[1]));  // For spectral flatness.
+  self->featureExtractionParams.thresWeightSpecDiff =
+      (int)(0.3 * (self->modelUpdatePars[1]));  // For spectral difference.
+}
+
+// Initialize state.
+int WebRtcNs_InitCore(NoiseSuppressionC* self, uint32_t fs) {
+  int i;
+  // Check for valid pointer.
+  if (self == NULL) {
+    return -1;
+  }
+
+  // Initialization of struct.
+  if (fs == 8000 || fs == 16000 || fs == 32000 || fs == 48000) {
+    self->fs = fs;
+  } else {
+    return -1;
+  }
+  self->windShift = 0;
+  // We only support 10ms frames.
+  if (fs == 8000) {
+    self->blockLen = 80;
+    self->anaLen = 128;
+    self->window = kBlocks80w128;
+  } else {
+    self->blockLen = 160;
+    self->anaLen = 256;
+    self->window = kBlocks160w256;
+  }
+  self->magnLen = self->anaLen / 2 + 1;  // Number of frequency bins.
+
+  // Initialize FFT work arrays.
+  self->ip[0] = 0;  // Setting this triggers initialization.
+  memset(self->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX);
+  WebRtc_rdft(self->anaLen, 1, self->dataBuf, self->ip, self->wfft);
+
+  memset(self->analyzeBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX);
+  memset(self->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX);
+  memset(self->syntBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX);
+
+  // For HB processing.
+  memset(self->dataBufHB,
+         0,
+         sizeof(float) * NUM_HIGH_BANDS_MAX * ANAL_BLOCKL_MAX);
+
+  // For quantile noise estimation.
+  memset(self->quantile, 0, sizeof(float) * HALF_ANAL_BLOCKL);
+  for (i = 0; i < SIMULT * HALF_ANAL_BLOCKL; i++) {
+    self->lquantile[i] = 8.f;
+    self->density[i] = 0.3f;
+  }
+
+  for (i = 0; i < SIMULT; i++) {
+    self->counter[i] =
+        (int)floor((float)(END_STARTUP_LONG * (i + 1)) / (float)SIMULT);
+  }
+
+  self->updates = 0;
+
+  // Wiener filter initialization.
+  for (i = 0; i < HALF_ANAL_BLOCKL; i++) {
+    self->smooth[i] = 1.f;
+  }
+
+  // Set the aggressiveness: default.
+  self->aggrMode = 0;
+
+  // Initialize variables for new method.
+  self->priorSpeechProb = 0.5f;  // Prior prob for speech/noise.
+  // Previous analyze mag spectrum.
+  memset(self->magnPrevAnalyze, 0, sizeof(float) * HALF_ANAL_BLOCKL);
+  // Previous process mag spectrum.
+  memset(self->magnPrevProcess, 0, sizeof(float) * HALF_ANAL_BLOCKL);
+  // Current noise-spectrum.
+  memset(self->noise, 0, sizeof(float) * HALF_ANAL_BLOCKL);
+  // Previous noise-spectrum.
+  memset(self->noisePrev, 0, sizeof(float) * HALF_ANAL_BLOCKL);
+  // Conservative noise spectrum estimate.
+  memset(self->magnAvgPause, 0, sizeof(float) * HALF_ANAL_BLOCKL);
+  // For estimation of HB in second pass.
+  memset(self->speechProb, 0, sizeof(float) * HALF_ANAL_BLOCKL);
+  // Initial average magnitude spectrum.
+  memset(self->initMagnEst, 0, sizeof(float) * HALF_ANAL_BLOCKL);
+  for (i = 0; i < HALF_ANAL_BLOCKL; i++) {
+    // Smooth LR (same as threshold).
+    self->logLrtTimeAvg[i] = LRT_FEATURE_THR;
+  }
+
+  // Feature quantities.
+  // Spectral flatness (start on threshold).
+  self->featureData[0] = SF_FEATURE_THR;
+  self->featureData[1] = 0.f;  // Spectral entropy: not used in this version.
+  self->featureData[2] = 0.f;  // Spectral variance: not used in this version.
+  // Average LRT factor (start on threshold).
+  self->featureData[3] = LRT_FEATURE_THR;
+  // Spectral template diff (start on threshold).
+  self->featureData[4] = SF_FEATURE_THR;
+  self->featureData[5] = 0.f;  // Normalization for spectral difference.
+  // Window time-average of input magnitude spectrum.
+  self->featureData[6] = 0.f;
+
+  // Histogram quantities: used to estimate/update thresholds for features.
+  memset(self->histLrt, 0, sizeof(int) * HIST_PAR_EST);
+  memset(self->histSpecFlat, 0, sizeof(int) * HIST_PAR_EST);
+  memset(self->histSpecDiff, 0, sizeof(int) * HIST_PAR_EST);
+
+
+  self->blockInd = -1;  // Frame counter.
+  // Default threshold for LRT feature.
+  self->priorModelPars[0] = LRT_FEATURE_THR;
+  // Threshold for spectral flatness: determined on-line.
+  self->priorModelPars[1] = 0.5f;
+  // sgn_map par for spectral measure: 1 for flatness measure.
+  self->priorModelPars[2] = 1.f;
+  // Threshold for template-difference feature: determined on-line.
+  self->priorModelPars[3] = 0.5f;
+  // Default weighting parameter for LRT feature.
+  self->priorModelPars[4] = 1.f;
+  // Default weighting parameter for spectral flatness feature.
+  self->priorModelPars[5] = 0.f;
+  // Default weighting parameter for spectral difference feature.
+  self->priorModelPars[6] = 0.f;
+
+  // Update flag for parameters:
+  // 0 no update, 1 = update once, 2 = update every window.
+  self->modelUpdatePars[0] = 2;
+  self->modelUpdatePars[1] = 500;  // Window for update.
+  // Counter for update of conservative noise spectrum.
+  self->modelUpdatePars[2] = 0;
+  // Counter if the feature thresholds are updated during the sequence.
+  self->modelUpdatePars[3] = self->modelUpdatePars[1];
+
+  self->signalEnergy = 0.0;
+  self->sumMagn = 0.0;
+  self->whiteNoiseLevel = 0.0;
+  self->pinkNoiseNumerator = 0.0;
+  self->pinkNoiseExp = 0.0;
+
+  set_feature_extraction_parameters(self);
+
+  // Default mode.
+  WebRtcNs_set_policy_core(self, 0);
+
+  self->initFlag = 1;
+  return 0;
+}
+
+// Estimate noise.
+static void NoiseEstimation(NoiseSuppressionC* self,
+                            float* magn,
+                            float* noise) {
+  size_t i, s, offset;
+  float lmagn[HALF_ANAL_BLOCKL], delta;
+
+  if (self->updates < END_STARTUP_LONG) {
+    self->updates++;
+  }
+
+  for (i = 0; i < self->magnLen; i++) {
+    lmagn[i] = (float)log(magn[i]);
+  }
+
+  // Loop over simultaneous estimates.
+  for (s = 0; s < SIMULT; s++) {
+    offset = s * self->magnLen;
+
+    // newquantest(...)
+    for (i = 0; i < self->magnLen; i++) {
+      // Compute delta.
+      if (self->density[offset + i] > 1.0) {
+        delta = FACTOR * 1.f / self->density[offset + i];
+      } else {
+        delta = FACTOR;
+      }
+
+      // Update log quantile estimate.
+      if (lmagn[i] > self->lquantile[offset + i]) {
+        self->lquantile[offset + i] +=
+            QUANTILE * delta / (float)(self->counter[s] + 1);
+      } else {
+        self->lquantile[offset + i] -=
+            (1.f - QUANTILE) * delta / (float)(self->counter[s] + 1);
+      }
+
+      // Update density estimate.
+      if (fabs(lmagn[i] - self->lquantile[offset + i]) < WIDTH) {
+        self->density[offset + i] =
+            ((float)self->counter[s] * self->density[offset + i] +
+             1.f / (2.f * WIDTH)) /
+            (float)(self->counter[s] + 1);
+      }
+    }  // End loop over magnitude spectrum.
+
+    if (self->counter[s] >= END_STARTUP_LONG) {
+      self->counter[s] = 0;
+      if (self->updates >= END_STARTUP_LONG) {
+        for (i = 0; i < self->magnLen; i++) {
+          self->quantile[i] = (float)exp(self->lquantile[offset + i]);
+        }
+      }
+    }
+
+    self->counter[s]++;
+  }  // End loop over simultaneous estimates.
+
+  // Sequentially update the noise during startup.
+  if (self->updates < END_STARTUP_LONG) {
+    // Use the last "s" to get noise during startup that differ from zero.
+    for (i = 0; i < self->magnLen; i++) {
+      self->quantile[i] = (float)exp(self->lquantile[offset + i]);
+    }
+  }
+
+  for (i = 0; i < self->magnLen; i++) {
+    noise[i] = self->quantile[i];
+  }
+}
+
+// Extract thresholds for feature parameters.
+// Histograms are computed over some window size (given by
+// self->modelUpdatePars[1]).
+// Thresholds and weights are extracted every window.
+// |flag| = 0 updates histogram only, |flag| = 1 computes the threshold/weights.
+// Threshold and weights are returned in: self->priorModelPars.
+static void FeatureParameterExtraction(NoiseSuppressionC* self, int flag) {
+  int i, useFeatureSpecFlat, useFeatureSpecDiff, numHistLrt;
+  int maxPeak1, maxPeak2;
+  int weightPeak1SpecFlat, weightPeak2SpecFlat, weightPeak1SpecDiff,
+      weightPeak2SpecDiff;
+
+  float binMid, featureSum;
+  float posPeak1SpecFlat, posPeak2SpecFlat, posPeak1SpecDiff, posPeak2SpecDiff;
+  float fluctLrt, avgHistLrt, avgSquareHistLrt, avgHistLrtCompl;
+
+  // 3 features: LRT, flatness, difference.
+  // lrt_feature = self->featureData[3];
+  // flat_feature = self->featureData[0];
+  // diff_feature = self->featureData[4];
+
+  // Update histograms.
+  if (flag == 0) {
+    // LRT
+    if ((self->featureData[3] <
+         HIST_PAR_EST * self->featureExtractionParams.binSizeLrt) &&
+        (self->featureData[3] >= 0.0)) {
+      i = (int)(self->featureData[3] /
+                self->featureExtractionParams.binSizeLrt);
+      self->histLrt[i]++;
+    }
+    // Spectral flatness.
+    if ((self->featureData[0] <
+         HIST_PAR_EST * self->featureExtractionParams.binSizeSpecFlat) &&
+        (self->featureData[0] >= 0.0)) {
+      i = (int)(self->featureData[0] /
+                self->featureExtractionParams.binSizeSpecFlat);
+      self->histSpecFlat[i]++;
+    }
+    // Spectral difference.
+    if ((self->featureData[4] <
+         HIST_PAR_EST * self->featureExtractionParams.binSizeSpecDiff) &&
+        (self->featureData[4] >= 0.0)) {
+      i = (int)(self->featureData[4] /
+                self->featureExtractionParams.binSizeSpecDiff);
+      self->histSpecDiff[i]++;
+    }
+  }
+
+  // Extract parameters for speech/noise probability.
+  if (flag == 1) {
+    // LRT feature: compute the average over
+    // self->featureExtractionParams.rangeAvgHistLrt.
+    avgHistLrt = 0.0;
+    avgHistLrtCompl = 0.0;
+    avgSquareHistLrt = 0.0;
+    numHistLrt = 0;
+    for (i = 0; i < HIST_PAR_EST; i++) {
+      binMid = ((float)i + 0.5f) * self->featureExtractionParams.binSizeLrt;
+      if (binMid <= self->featureExtractionParams.rangeAvgHistLrt) {
+        avgHistLrt += self->histLrt[i] * binMid;
+        numHistLrt += self->histLrt[i];
+      }
+      avgSquareHistLrt += self->histLrt[i] * binMid * binMid;
+      avgHistLrtCompl += self->histLrt[i] * binMid;
+    }
+    if (numHistLrt > 0) {
+      avgHistLrt = avgHistLrt / ((float)numHistLrt);
+    }
+    avgHistLrtCompl = avgHistLrtCompl / ((float)self->modelUpdatePars[1]);
+    avgSquareHistLrt = avgSquareHistLrt / ((float)self->modelUpdatePars[1]);
+    fluctLrt = avgSquareHistLrt - avgHistLrt * avgHistLrtCompl;
+    // Get threshold for LRT feature.
+    if (fluctLrt < self->featureExtractionParams.thresFluctLrt) {
+      // Very low fluctuation, so likely noise.
+      self->priorModelPars[0] = self->featureExtractionParams.maxLrt;
+    } else {
+      self->priorModelPars[0] =
+          self->featureExtractionParams.factor1ModelPars * avgHistLrt;
+      // Check if value is within min/max range.
+      if (self->priorModelPars[0] < self->featureExtractionParams.minLrt) {
+        self->priorModelPars[0] = self->featureExtractionParams.minLrt;
+      }
+      if (self->priorModelPars[0] > self->featureExtractionParams.maxLrt) {
+        self->priorModelPars[0] = self->featureExtractionParams.maxLrt;
+      }
+    }
+    // Done with LRT feature.
+
+    // For spectral flatness and spectral difference: compute the main peaks of
+    // histogram.
+    maxPeak1 = 0;
+    maxPeak2 = 0;
+    posPeak1SpecFlat = 0.0;
+    posPeak2SpecFlat = 0.0;
+    weightPeak1SpecFlat = 0;
+    weightPeak2SpecFlat = 0;
+
+    // Peaks for flatness.
+    for (i = 0; i < HIST_PAR_EST; i++) {
+      binMid =
+          (i + 0.5f) * self->featureExtractionParams.binSizeSpecFlat;
+      if (self->histSpecFlat[i] > maxPeak1) {
+        // Found new "first" peak.
+        maxPeak2 = maxPeak1;
+        weightPeak2SpecFlat = weightPeak1SpecFlat;
+        posPeak2SpecFlat = posPeak1SpecFlat;
+
+        maxPeak1 = self->histSpecFlat[i];
+        weightPeak1SpecFlat = self->histSpecFlat[i];
+        posPeak1SpecFlat = binMid;
+      } else if (self->histSpecFlat[i] > maxPeak2) {
+        // Found new "second" peak.
+        maxPeak2 = self->histSpecFlat[i];
+        weightPeak2SpecFlat = self->histSpecFlat[i];
+        posPeak2SpecFlat = binMid;
+      }
+    }
+
+    // Compute two peaks for spectral difference.
+    maxPeak1 = 0;
+    maxPeak2 = 0;
+    posPeak1SpecDiff = 0.0;
+    posPeak2SpecDiff = 0.0;
+    weightPeak1SpecDiff = 0;
+    weightPeak2SpecDiff = 0;
+    // Peaks for spectral difference.
+    for (i = 0; i < HIST_PAR_EST; i++) {
+      binMid =
+          ((float)i + 0.5f) * self->featureExtractionParams.binSizeSpecDiff;
+      if (self->histSpecDiff[i] > maxPeak1) {
+        // Found new "first" peak.
+        maxPeak2 = maxPeak1;
+        weightPeak2SpecDiff = weightPeak1SpecDiff;
+        posPeak2SpecDiff = posPeak1SpecDiff;
+
+        maxPeak1 = self->histSpecDiff[i];
+        weightPeak1SpecDiff = self->histSpecDiff[i];
+        posPeak1SpecDiff = binMid;
+      } else if (self->histSpecDiff[i] > maxPeak2) {
+        // Found new "second" peak.
+        maxPeak2 = self->histSpecDiff[i];
+        weightPeak2SpecDiff = self->histSpecDiff[i];
+        posPeak2SpecDiff = binMid;
+      }
+    }
+
+    // For spectrum flatness feature.
+    useFeatureSpecFlat = 1;
+    // Merge the two peaks if they are close.
+    if ((fabs(posPeak2SpecFlat - posPeak1SpecFlat) <
+         self->featureExtractionParams.limitPeakSpacingSpecFlat) &&
+        (weightPeak2SpecFlat >
+         self->featureExtractionParams.limitPeakWeightsSpecFlat *
+             weightPeak1SpecFlat)) {
+      weightPeak1SpecFlat += weightPeak2SpecFlat;
+      posPeak1SpecFlat = 0.5f * (posPeak1SpecFlat + posPeak2SpecFlat);
+    }
+    // Reject if weight of peaks is not large enough, or peak value too small.
+    if (weightPeak1SpecFlat <
+            self->featureExtractionParams.thresWeightSpecFlat ||
+        posPeak1SpecFlat < self->featureExtractionParams.thresPosSpecFlat) {
+      useFeatureSpecFlat = 0;
+    }
+    // If selected, get the threshold.
+    if (useFeatureSpecFlat == 1) {
+      // Compute the threshold.
+      self->priorModelPars[1] =
+          self->featureExtractionParams.factor2ModelPars * posPeak1SpecFlat;
+      // Check if value is within min/max range.
+      if (self->priorModelPars[1] < self->featureExtractionParams.minSpecFlat) {
+        self->priorModelPars[1] = self->featureExtractionParams.minSpecFlat;
+      }
+      if (self->priorModelPars[1] > self->featureExtractionParams.maxSpecFlat) {
+        self->priorModelPars[1] = self->featureExtractionParams.maxSpecFlat;
+      }
+    }
+    // Done with flatness feature.
+
+    // For template feature.
+    useFeatureSpecDiff = 1;
+    // Merge the two peaks if they are close.
+    if ((fabs(posPeak2SpecDiff - posPeak1SpecDiff) <
+         self->featureExtractionParams.limitPeakSpacingSpecDiff) &&
+        (weightPeak2SpecDiff >
+         self->featureExtractionParams.limitPeakWeightsSpecDiff *
+             weightPeak1SpecDiff)) {
+      weightPeak1SpecDiff += weightPeak2SpecDiff;
+      posPeak1SpecDiff = 0.5f * (posPeak1SpecDiff + posPeak2SpecDiff);
+    }
+    // Get the threshold value.
+    self->priorModelPars[3] =
+        self->featureExtractionParams.factor1ModelPars * posPeak1SpecDiff;
+    // Reject if weight of peaks is not large enough.
+    if (weightPeak1SpecDiff <
+        self->featureExtractionParams.thresWeightSpecDiff) {
+      useFeatureSpecDiff = 0;
+    }
+    // Check if value is within min/max range.
+    if (self->priorModelPars[3] < self->featureExtractionParams.minSpecDiff) {
+      self->priorModelPars[3] = self->featureExtractionParams.minSpecDiff;
+    }
+    if (self->priorModelPars[3] > self->featureExtractionParams.maxSpecDiff) {
+      self->priorModelPars[3] = self->featureExtractionParams.maxSpecDiff;
+    }
+    // Done with spectral difference feature.
+
+    // Don't use template feature if fluctuation of LRT feature is very low:
+    // most likely just noise state.
+    if (fluctLrt < self->featureExtractionParams.thresFluctLrt) {
+      useFeatureSpecDiff = 0;
+    }
+
+    // Select the weights between the features.
+    // self->priorModelPars[4] is weight for LRT: always selected.
+    // self->priorModelPars[5] is weight for spectral flatness.
+    // self->priorModelPars[6] is weight for spectral difference.
+    featureSum = (float)(1 + useFeatureSpecFlat + useFeatureSpecDiff);
+    self->priorModelPars[4] = 1.f / featureSum;
+    self->priorModelPars[5] = ((float)useFeatureSpecFlat) / featureSum;
+    self->priorModelPars[6] = ((float)useFeatureSpecDiff) / featureSum;
+
+    // Set hists to zero for next update.
+    if (self->modelUpdatePars[0] >= 1) {
+      for (i = 0; i < HIST_PAR_EST; i++) {
+        self->histLrt[i] = 0;
+        self->histSpecFlat[i] = 0;
+        self->histSpecDiff[i] = 0;
+      }
+    }
+  }  // End of flag == 1.
+}
+
+// Compute spectral flatness on input spectrum.
+// |magnIn| is the magnitude spectrum.
+// Spectral flatness is returned in self->featureData[0].
+static void ComputeSpectralFlatness(NoiseSuppressionC* self,
+                                    const float* magnIn) {
+  size_t i;
+  size_t shiftLP = 1;  // Option to remove first bin(s) from spectral measures.
+  float avgSpectralFlatnessNum, avgSpectralFlatnessDen, spectralTmp;
+
+  // Compute spectral measures.
+  // For flatness.
+  avgSpectralFlatnessNum = 0.0;
+  avgSpectralFlatnessDen = self->sumMagn;
+  for (i = 0; i < shiftLP; i++) {
+    avgSpectralFlatnessDen -= magnIn[i];
+  }
+  // Compute log of ratio of the geometric to arithmetic mean: check for log(0)
+  // case.
+  for (i = shiftLP; i < self->magnLen; i++) {
+    if (magnIn[i] > 0.0) {
+      avgSpectralFlatnessNum += (float)log(magnIn[i]);
+    } else {
+      self->featureData[0] -= SPECT_FL_TAVG * self->featureData[0];
+      return;
+    }
+  }
+  // Normalize.
+  avgSpectralFlatnessDen = avgSpectralFlatnessDen / self->magnLen;
+  avgSpectralFlatnessNum = avgSpectralFlatnessNum / self->magnLen;
+
+  // Ratio and inverse log: check for case of log(0).
+  spectralTmp = (float)exp(avgSpectralFlatnessNum) / avgSpectralFlatnessDen;
+
+  // Time-avg update of spectral flatness feature.
+  self->featureData[0] += SPECT_FL_TAVG * (spectralTmp - self->featureData[0]);
+  // Done with flatness feature.
+}
+
+// Compute prior and post SNR based on quantile noise estimation.
+// Compute DD estimate of prior SNR.
+// Inputs:
+//   * |magn| is the signal magnitude spectrum estimate.
+//   * |noise| is the magnitude noise spectrum estimate.
+// Outputs:
+//   * |snrLocPrior| is the computed prior SNR.
+//   * |snrLocPost| is the computed post SNR.
+static void ComputeSnr(const NoiseSuppressionC* self,
+                       const float* magn,
+                       const float* noise,
+                       float* snrLocPrior,
+                       float* snrLocPost) {
+  size_t i;
+
+  for (i = 0; i < self->magnLen; i++) {
+    // Previous post SNR.
+    // Previous estimate: based on previous frame with gain filter.
+    float previousEstimateStsa = self->magnPrevAnalyze[i] /
+        (self->noisePrev[i] + 0.0001f) * self->smooth[i];
+    // Post SNR.
+    snrLocPost[i] = 0.f;
+    if (magn[i] > noise[i]) {
+      snrLocPost[i] = magn[i] / (noise[i] + 0.0001f) - 1.f;
+    }
+    // DD estimate is sum of two terms: current estimate and previous estimate.
+    // Directed decision update of snrPrior.
+    snrLocPrior[i] =
+        DD_PR_SNR * previousEstimateStsa + (1.f - DD_PR_SNR) * snrLocPost[i];
+  }  // End of loop over frequencies.
+}
+
+// Compute the difference measure between input spectrum and a template/learned
+// noise spectrum.
+// |magnIn| is the input spectrum.
+// The reference/template spectrum is self->magnAvgPause[i].
+// Returns (normalized) spectral difference in self->featureData[4].
+static void ComputeSpectralDifference(NoiseSuppressionC* self,
+                                      const float* magnIn) {
+  // avgDiffNormMagn = var(magnIn) - cov(magnIn, magnAvgPause)^2 /
+  // var(magnAvgPause)
+  size_t i;
+  float avgPause, avgMagn, covMagnPause, varPause, varMagn, avgDiffNormMagn;
+
+  avgPause = 0.0;
+  avgMagn = self->sumMagn;
+  // Compute average quantities.
+  for (i = 0; i < self->magnLen; i++) {
+    // Conservative smooth noise spectrum from pause frames.
+    avgPause += self->magnAvgPause[i];
+  }
+  avgPause /= self->magnLen;
+  avgMagn /= self->magnLen;
+
+  covMagnPause = 0.0;
+  varPause = 0.0;
+  varMagn = 0.0;
+  // Compute variance and covariance quantities.
+  for (i = 0; i < self->magnLen; i++) {
+    covMagnPause += (magnIn[i] - avgMagn) * (self->magnAvgPause[i] - avgPause);
+    varPause +=
+        (self->magnAvgPause[i] - avgPause) * (self->magnAvgPause[i] - avgPause);
+    varMagn += (magnIn[i] - avgMagn) * (magnIn[i] - avgMagn);
+  }
+  covMagnPause /= self->magnLen;
+  varPause /= self->magnLen;
+  varMagn /= self->magnLen;
+  // Update of average magnitude spectrum.
+  self->featureData[6] += self->signalEnergy;
+
+  avgDiffNormMagn =
+      varMagn - (covMagnPause * covMagnPause) / (varPause + 0.0001f);
+  // Normalize and compute time-avg update of difference feature.
+  avgDiffNormMagn = (float)(avgDiffNormMagn / (self->featureData[5] + 0.0001f));
+  self->featureData[4] +=
+      SPECT_DIFF_TAVG * (avgDiffNormMagn - self->featureData[4]);
+}
+
+// Compute speech/noise probability.
+// Speech/noise probability is returned in |probSpeechFinal|.
+// |magn| is the input magnitude spectrum.
+// |noise| is the noise spectrum.
+// |snrLocPrior| is the prior SNR for each frequency.
+// |snrLocPost| is the post SNR for each frequency.
+static void SpeechNoiseProb(NoiseSuppressionC* self,
+                            float* probSpeechFinal,
+                            const float* snrLocPrior,
+                            const float* snrLocPost) {
+  size_t i;
+  int sgnMap;
+  float invLrt, gainPrior, indPrior;
+  float logLrtTimeAvgKsum, besselTmp;
+  float indicator0, indicator1, indicator2;
+  float tmpFloat1, tmpFloat2;
+  float weightIndPrior0, weightIndPrior1, weightIndPrior2;
+  float threshPrior0, threshPrior1, threshPrior2;
+  float widthPrior, widthPrior0, widthPrior1, widthPrior2;
+
+  widthPrior0 = WIDTH_PR_MAP;
+  // Width for pause region: lower range, so increase width in tanh map.
+  widthPrior1 = 2.f * WIDTH_PR_MAP;
+  widthPrior2 = 2.f * WIDTH_PR_MAP;  // For spectral-difference measure.
+
+  // Threshold parameters for features.
+  threshPrior0 = self->priorModelPars[0];
+  threshPrior1 = self->priorModelPars[1];
+  threshPrior2 = self->priorModelPars[3];
+
+  // Sign for flatness feature.
+  sgnMap = (int)(self->priorModelPars[2]);
+
+  // Weight parameters for features.
+  weightIndPrior0 = self->priorModelPars[4];
+  weightIndPrior1 = self->priorModelPars[5];
+  weightIndPrior2 = self->priorModelPars[6];
+
+  // Compute feature based on average LR factor.
+  // This is the average over all frequencies of the smooth log LRT.
+  logLrtTimeAvgKsum = 0.0;
+  for (i = 0; i < self->magnLen; i++) {
+    tmpFloat1 = 1.f + 2.f * snrLocPrior[i];
+    tmpFloat2 = 2.f * snrLocPrior[i] / (tmpFloat1 + 0.0001f);
+    besselTmp = (snrLocPost[i] + 1.f) * tmpFloat2;
+    self->logLrtTimeAvg[i] +=
+        LRT_TAVG * (besselTmp - (float)log(tmpFloat1) - self->logLrtTimeAvg[i]);
+    logLrtTimeAvgKsum += self->logLrtTimeAvg[i];
+  }
+  logLrtTimeAvgKsum = (float)logLrtTimeAvgKsum / (self->magnLen);
+  self->featureData[3] = logLrtTimeAvgKsum;
+  // Done with computation of LR factor.
+
+  // Compute the indicator functions.
+  // Average LRT feature.
+  widthPrior = widthPrior0;
+  // Use larger width in tanh map for pause regions.
+  if (logLrtTimeAvgKsum < threshPrior0) {
+    widthPrior = widthPrior1;
+  }
+  // Compute indicator function: sigmoid map.
+  indicator0 =
+      0.5f *
+      ((float)tanh(widthPrior * (logLrtTimeAvgKsum - threshPrior0)) + 1.f);
+
+  // Spectral flatness feature.
+  tmpFloat1 = self->featureData[0];
+  widthPrior = widthPrior0;
+  // Use larger width in tanh map for pause regions.
+  if (sgnMap == 1 && (tmpFloat1 > threshPrior1)) {
+    widthPrior = widthPrior1;
+  }
+  if (sgnMap == -1 && (tmpFloat1 < threshPrior1)) {
+    widthPrior = widthPrior1;
+  }
+  // Compute indicator function: sigmoid map.
+  indicator1 =
+      0.5f *
+      ((float)tanh((float)sgnMap * widthPrior * (threshPrior1 - tmpFloat1)) +
+       1.f);
+
+  // For template spectrum-difference.
+  tmpFloat1 = self->featureData[4];
+  widthPrior = widthPrior0;
+  // Use larger width in tanh map for pause regions.
+  if (tmpFloat1 < threshPrior2) {
+    widthPrior = widthPrior2;
+  }
+  // Compute indicator function: sigmoid map.
+  indicator2 =
+      0.5f * ((float)tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.f);
+
+  // Combine the indicator function with the feature weights.
+  indPrior = weightIndPrior0 * indicator0 + weightIndPrior1 * indicator1 +
+             weightIndPrior2 * indicator2;
+  // Done with computing indicator function.
+
+  // Compute the prior probability.
+  self->priorSpeechProb += PRIOR_UPDATE * (indPrior - self->priorSpeechProb);
+  // Make sure probabilities are within range: keep floor to 0.01.
+  if (self->priorSpeechProb > 1.f) {
+    self->priorSpeechProb = 1.f;
+  }
+  if (self->priorSpeechProb < 0.01f) {
+    self->priorSpeechProb = 0.01f;
+  }
+
+  // Final speech probability: combine prior model with LR factor:.
+  gainPrior = (1.f - self->priorSpeechProb) / (self->priorSpeechProb + 0.0001f);
+  for (i = 0; i < self->magnLen; i++) {
+    invLrt = (float)exp(-self->logLrtTimeAvg[i]);
+    invLrt = (float)gainPrior * invLrt;
+    probSpeechFinal[i] = 1.f / (1.f + invLrt);
+  }
+}
+
+// Update the noise features.
+// Inputs:
+//   * |magn| is the signal magnitude spectrum estimate.
+//   * |updateParsFlag| is an update flag for parameters.
+static void FeatureUpdate(NoiseSuppressionC* self,
+                          const float* magn,
+                          int updateParsFlag) {
+  // Compute spectral flatness on input spectrum.
+  ComputeSpectralFlatness(self, magn);
+  // Compute difference of input spectrum with learned/estimated noise spectrum.
+  ComputeSpectralDifference(self, magn);
+  // Compute histograms for parameter decisions (thresholds and weights for
+  // features).
+  // Parameters are extracted once every window time.
+  // (=self->modelUpdatePars[1])
+  if (updateParsFlag >= 1) {
+    // Counter update.
+    self->modelUpdatePars[3]--;
+    // Update histogram.
+    if (self->modelUpdatePars[3] > 0) {
+      FeatureParameterExtraction(self, 0);
+    }
+    // Compute model parameters.
+    if (self->modelUpdatePars[3] == 0) {
+      FeatureParameterExtraction(self, 1);
+      self->modelUpdatePars[3] = self->modelUpdatePars[1];
+      // If wish to update only once, set flag to zero.
+      if (updateParsFlag == 1) {
+        self->modelUpdatePars[0] = 0;
+      } else {
+        // Update every window:
+        // Get normalization for spectral difference for next window estimate.
+        self->featureData[6] =
+            self->featureData[6] / ((float)self->modelUpdatePars[1]);
+        self->featureData[5] =
+            0.5f * (self->featureData[6] + self->featureData[5]);
+        self->featureData[6] = 0.f;
+      }
+    }
+  }
+}
+
+// Update the noise estimate.
+// Inputs:
+//   * |magn| is the signal magnitude spectrum estimate.
+//   * |snrLocPrior| is the prior SNR.
+//   * |snrLocPost| is the post SNR.
+// Output:
+//   * |noise| is the updated noise magnitude spectrum estimate.
+static void UpdateNoiseEstimate(NoiseSuppressionC* self,
+                                const float* magn,
+                                const float* snrLocPrior,
+                                const float* snrLocPost,
+                                float* noise) {
+  size_t i;
+  float probSpeech, probNonSpeech;
+  // Time-avg parameter for noise update.
+  float gammaNoiseTmp = NOISE_UPDATE;
+  float gammaNoiseOld;
+  float noiseUpdateTmp;
+
+  for (i = 0; i < self->magnLen; i++) {
+    probSpeech = self->speechProb[i];
+    probNonSpeech = 1.f - probSpeech;
+    // Temporary noise update:
+    // Use it for speech frames if update value is less than previous.
+    noiseUpdateTmp = gammaNoiseTmp * self->noisePrev[i] +
+                     (1.f - gammaNoiseTmp) * (probNonSpeech * magn[i] +
+                                              probSpeech * self->noisePrev[i]);
+    // Time-constant based on speech/noise state.
+    gammaNoiseOld = gammaNoiseTmp;
+    gammaNoiseTmp = NOISE_UPDATE;
+    // Increase gamma (i.e., less noise update) for frame likely to be speech.
+    if (probSpeech > PROB_RANGE) {
+      gammaNoiseTmp = SPEECH_UPDATE;
+    }
+    // Conservative noise update.
+    if (probSpeech < PROB_RANGE) {
+      self->magnAvgPause[i] += GAMMA_PAUSE * (magn[i] - self->magnAvgPause[i]);
+    }
+    // Noise update.
+    if (gammaNoiseTmp == gammaNoiseOld) {
+      noise[i] = noiseUpdateTmp;
+    } else {
+      noise[i] = gammaNoiseTmp * self->noisePrev[i] +
+                 (1.f - gammaNoiseTmp) * (probNonSpeech * magn[i] +
+                                          probSpeech * self->noisePrev[i]);
+      // Allow for noise update downwards:
+      // If noise update decreases the noise, it is safe, so allow it to
+      // happen.
+      if (noiseUpdateTmp < noise[i]) {
+        noise[i] = noiseUpdateTmp;
+      }
+    }
+  }  // End of freq loop.
+}
+
+// Updates |buffer| with a new |frame|.
+// Inputs:
+//   * |frame| is a new speech frame or NULL for setting to zero.
+//   * |frame_length| is the length of the new frame.
+//   * |buffer_length| is the length of the buffer.
+// Output:
+//   * |buffer| is the updated buffer.
+static void UpdateBuffer(const float* frame,
+                         size_t frame_length,
+                         size_t buffer_length,
+                         float* buffer) {
+  assert(buffer_length < 2 * frame_length);
+
+  memcpy(buffer,
+         buffer + frame_length,
+         sizeof(*buffer) * (buffer_length - frame_length));
+  if (frame) {
+    memcpy(buffer + buffer_length - frame_length,
+           frame,
+           sizeof(*buffer) * frame_length);
+  } else {
+    memset(buffer + buffer_length - frame_length,
+           0,
+           sizeof(*buffer) * frame_length);
+  }
+}
+
+// Transforms the signal from time to frequency domain.
+// Inputs:
+//   * |time_data| is the signal in the time domain.
+//   * |time_data_length| is the length of the analysis buffer.
+//   * |magnitude_length| is the length of the spectrum magnitude, which equals
+//     the length of both |real| and |imag| (time_data_length / 2 + 1).
+// Outputs:
+//   * |time_data| is the signal in the frequency domain.
+//   * |real| is the real part of the frequency domain.
+//   * |imag| is the imaginary part of the frequency domain.
+//   * |magn| is the calculated signal magnitude in the frequency domain.
+static void FFT(NoiseSuppressionC* self,
+                float* time_data,
+                size_t time_data_length,
+                size_t magnitude_length,
+                float* real,
+                float* imag,
+                float* magn) {
+  size_t i;
+
+  assert(magnitude_length == time_data_length / 2 + 1);
+
+  WebRtc_rdft(time_data_length, 1, time_data, self->ip, self->wfft);
+
+  imag[0] = 0;
+  real[0] = time_data[0];
+  magn[0] = fabsf(real[0]) + 1.f;
+  imag[magnitude_length - 1] = 0;
+  real[magnitude_length - 1] = time_data[1];
+  magn[magnitude_length - 1] = fabsf(real[magnitude_length - 1]) + 1.f;
+  for (i = 1; i < magnitude_length - 1; ++i) {
+    real[i] = time_data[2 * i];
+    imag[i] = time_data[2 * i + 1];
+    // Magnitude spectrum.
+    magn[i] = sqrtf(real[i] * real[i] + imag[i] * imag[i]) + 1.f;
+  }
+}
+
+// Transforms the signal from frequency to time domain.
+// Inputs:
+//   * |real| is the real part of the frequency domain.
+//   * |imag| is the imaginary part of the frequency domain.
+//   * |magnitude_length| is the length of the spectrum magnitude, which equals
+//     the length of both |real| and |imag|.
+//   * |time_data_length| is the length of the analysis buffer
+//     (2 * (magnitude_length - 1)).
+// Output:
+//   * |time_data| is the signal in the time domain.
+static void IFFT(NoiseSuppressionC* self,
+                 const float* real,
+                 const float* imag,
+                 size_t magnitude_length,
+                 size_t time_data_length,
+                 float* time_data) {
+  size_t i;
+
+  assert(time_data_length == 2 * (magnitude_length - 1));
+
+  time_data[0] = real[0];
+  time_data[1] = real[magnitude_length - 1];
+  for (i = 1; i < magnitude_length - 1; ++i) {
+    time_data[2 * i] = real[i];
+    time_data[2 * i + 1] = imag[i];
+  }
+  WebRtc_rdft(time_data_length, -1, time_data, self->ip, self->wfft);
+
+  for (i = 0; i < time_data_length; ++i) {
+    time_data[i] *= 2.f / time_data_length;  // FFT scaling.
+  }
+}
+
+// Calculates the energy of a buffer.
+// Inputs:
+//   * |buffer| is the buffer over which the energy is calculated.
+//   * |length| is the length of the buffer.
+// Returns the calculated energy.
+static float Energy(const float* buffer, size_t length) {
+  size_t i;
+  float energy = 0.f;
+
+  for (i = 0; i < length; ++i) {
+    energy += buffer[i] * buffer[i];
+  }
+
+  return energy;
+}
+
+// Windows a buffer.
+// Inputs:
+//   * |window| is the window by which to multiply.
+//   * |data| is the data without windowing.
+//   * |length| is the length of the window and data.
+// Output:
+//   * |data_windowed| is the windowed data.
+static void Windowing(const float* window,
+                      const float* data,
+                      size_t length,
+                      float* data_windowed) {
+  size_t i;
+
+  for (i = 0; i < length; ++i) {
+    data_windowed[i] = window[i] * data[i];
+  }
+}
+
+// Estimate prior SNR decision-directed and compute DD based Wiener Filter.
+// Input:
+//   * |magn| is the signal magnitude spectrum estimate.
+// Output:
+//   * |theFilter| is the frequency response of the computed Wiener filter.
+static void ComputeDdBasedWienerFilter(const NoiseSuppressionC* self,
+                                       const float* magn,
+                                       float* theFilter) {
+  size_t i;
+  float snrPrior, previousEstimateStsa, currentEstimateStsa;
+
+  for (i = 0; i < self->magnLen; i++) {
+    // Previous estimate: based on previous frame with gain filter.
+    previousEstimateStsa = self->magnPrevProcess[i] /
+                           (self->noisePrev[i] + 0.0001f) * self->smooth[i];
+    // Post and prior SNR.
+    currentEstimateStsa = 0.f;
+    if (magn[i] > self->noise[i]) {
+      currentEstimateStsa = magn[i] / (self->noise[i] + 0.0001f) - 1.f;
+    }
+    // DD estimate is sum of two terms: current estimate and previous estimate.
+    // Directed decision update of |snrPrior|.
+    snrPrior = DD_PR_SNR * previousEstimateStsa +
+               (1.f - DD_PR_SNR) * currentEstimateStsa;
+    // Gain filter.
+    theFilter[i] = snrPrior / (self->overdrive + snrPrior);
+  }  // End of loop over frequencies.
+}
+
+// Changes the aggressiveness of the noise suppression method.
+// |mode| = 0 is mild (6dB), |mode| = 1 is medium (10dB) and |mode| = 2 is
+// aggressive (15dB).
+// Returns 0 on success and -1 otherwise.
+int WebRtcNs_set_policy_core(NoiseSuppressionC* self, int mode) {
+  // Allow for modes: 0, 1, 2, 3.
+  if (mode < 0 || mode > 3) {
+    return (-1);
+  }
+
+  self->aggrMode = mode;
+  if (mode == 0) {
+    self->overdrive = 1.f;
+    self->denoiseBound = 0.5f;
+    self->gainmap = 0;
+  } else if (mode == 1) {
+    // self->overdrive = 1.25f;
+    self->overdrive = 1.f;
+    self->denoiseBound = 0.25f;
+    self->gainmap = 1;
+  } else if (mode == 2) {
+    // self->overdrive = 1.25f;
+    self->overdrive = 1.1f;
+    self->denoiseBound = 0.125f;
+    self->gainmap = 1;
+  } else if (mode == 3) {
+    // self->overdrive = 1.3f;
+    self->overdrive = 1.25f;
+    self->denoiseBound = 0.09f;
+    self->gainmap = 1;
+  }
+  return 0;
+}
+
+void WebRtcNs_AnalyzeCore(NoiseSuppressionC* self, const float* speechFrame) {
+  size_t i;
+  const size_t kStartBand = 5;  // Skip first frequency bins during estimation.
+  int updateParsFlag;
+  float energy;
+  float signalEnergy = 0.f;
+  float sumMagn = 0.f;
+  float tmpFloat1, tmpFloat2, tmpFloat3;
+  float winData[ANAL_BLOCKL_MAX];
+  float magn[HALF_ANAL_BLOCKL], noise[HALF_ANAL_BLOCKL];
+  float snrLocPost[HALF_ANAL_BLOCKL], snrLocPrior[HALF_ANAL_BLOCKL];
+  float real[ANAL_BLOCKL_MAX], imag[HALF_ANAL_BLOCKL];
+  // Variables during startup.
+  float sum_log_i = 0.0;
+  float sum_log_i_square = 0.0;
+  float sum_log_magn = 0.0;
+  float sum_log_i_log_magn = 0.0;
+  float parametric_exp = 0.0;
+  float parametric_num = 0.0;
+
+  // Check that initiation has been done.
+  assert(self->initFlag == 1);
+  updateParsFlag = self->modelUpdatePars[0];
+
+  // Update analysis buffer for L band.
+  UpdateBuffer(speechFrame, self->blockLen, self->anaLen, self->analyzeBuf);
+
+  Windowing(self->window, self->analyzeBuf, self->anaLen, winData);
+  energy = Energy(winData, self->anaLen);
+  if (energy == 0.0) {
+    // We want to avoid updating statistics in this case:
+    // Updating feature statistics when we have zeros only will cause
+    // thresholds to move towards zero signal situations. This in turn has the
+    // effect that once the signal is "turned on" (non-zero values) everything
+    // will be treated as speech and there is no noise suppression effect.
+    // Depending on the duration of the inactive signal it takes a
+    // considerable amount of time for the system to learn what is noise and
+    // what is speech.
+    return;
+  }
+
+  self->blockInd++;  // Update the block index only when we process a block.
+
+  FFT(self, winData, self->anaLen, self->magnLen, real, imag, magn);
+
+  for (i = 0; i < self->magnLen; i++) {
+    signalEnergy += real[i] * real[i] + imag[i] * imag[i];
+    sumMagn += magn[i];
+    if (self->blockInd < END_STARTUP_SHORT) {
+      if (i >= kStartBand) {
+        tmpFloat2 = logf((float)i);
+        sum_log_i += tmpFloat2;
+        sum_log_i_square += tmpFloat2 * tmpFloat2;
+        tmpFloat1 = logf(magn[i]);
+        sum_log_magn += tmpFloat1;
+        sum_log_i_log_magn += tmpFloat2 * tmpFloat1;
+      }
+    }
+  }
+  signalEnergy /= self->magnLen;
+  self->signalEnergy = signalEnergy;
+  self->sumMagn = sumMagn;
+
+  // Quantile noise estimate.
+  NoiseEstimation(self, magn, noise);
+  // Compute simplified noise model during startup.
+  if (self->blockInd < END_STARTUP_SHORT) {
+    // Estimate White noise.
+    self->whiteNoiseLevel += sumMagn / self->magnLen * self->overdrive;
+    // Estimate Pink noise parameters.
+    tmpFloat1 = sum_log_i_square * (self->magnLen - kStartBand);
+    tmpFloat1 -= (sum_log_i * sum_log_i);
+    tmpFloat2 =
+        (sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn);
+    tmpFloat3 = tmpFloat2 / tmpFloat1;
+    // Constrain the estimated spectrum to be positive.
+    if (tmpFloat3 < 0.f) {
+      tmpFloat3 = 0.f;
+    }
+    self->pinkNoiseNumerator += tmpFloat3;
+    tmpFloat2 = (sum_log_i * sum_log_magn);
+    tmpFloat2 -= (self->magnLen - kStartBand) * sum_log_i_log_magn;
+    tmpFloat3 = tmpFloat2 / tmpFloat1;
+    // Constrain the pink noise power to be in the interval [0, 1].
+    if (tmpFloat3 < 0.f) {
+      tmpFloat3 = 0.f;
+    }
+    if (tmpFloat3 > 1.f) {
+      tmpFloat3 = 1.f;
+    }
+    self->pinkNoiseExp += tmpFloat3;
+
+    // Calculate frequency independent parts of parametric noise estimate.
+    if (self->pinkNoiseExp > 0.f) {
+      // Use pink noise estimate.
+      parametric_num =
+          expf(self->pinkNoiseNumerator / (float)(self->blockInd + 1));
+      parametric_num *= (float)(self->blockInd + 1);
+      parametric_exp = self->pinkNoiseExp / (float)(self->blockInd + 1);
+    }
+    for (i = 0; i < self->magnLen; i++) {
+      // Estimate the background noise using the white and pink noise
+      // parameters.
+      if (self->pinkNoiseExp == 0.f) {
+        // Use white noise estimate.
+        self->parametricNoise[i] = self->whiteNoiseLevel;
+      } else {
+        // Use pink noise estimate.
+        float use_band = (float)(i < kStartBand ? kStartBand : i);
+        self->parametricNoise[i] =
+            parametric_num / powf(use_band, parametric_exp);
+      }
+      // Weight quantile noise with modeled noise.
+      noise[i] *= (self->blockInd);
+      tmpFloat2 =
+          self->parametricNoise[i] * (END_STARTUP_SHORT - self->blockInd);
+      noise[i] += (tmpFloat2 / (float)(self->blockInd + 1));
+      noise[i] /= END_STARTUP_SHORT;
+    }
+  }
+  // Compute average signal during END_STARTUP_LONG time:
+  // used to normalize spectral difference measure.
+  if (self->blockInd < END_STARTUP_LONG) {
+    self->featureData[5] *= self->blockInd;
+    self->featureData[5] += signalEnergy;
+    self->featureData[5] /= (self->blockInd + 1);
+  }
+
+  // Post and prior SNR needed for SpeechNoiseProb.
+  ComputeSnr(self, magn, noise, snrLocPrior, snrLocPost);
+
+  FeatureUpdate(self, magn, updateParsFlag);
+  SpeechNoiseProb(self, self->speechProb, snrLocPrior, snrLocPost);
+  UpdateNoiseEstimate(self, magn, snrLocPrior, snrLocPost, noise);
+
+  // Keep track of noise spectrum for next frame.
+  memcpy(self->noise, noise, sizeof(*noise) * self->magnLen);
+  memcpy(self->magnPrevAnalyze, magn, sizeof(*magn) * self->magnLen);
+}
+
+void WebRtcNs_ProcessCore(NoiseSuppressionC* self,
+                          const float* const* speechFrame,
+                          size_t num_bands,
+                          float* const* outFrame) {
+  // Main routine for noise reduction.
+  int flagHB = 0;
+  size_t i, j;
+
+  float energy1, energy2, gain, factor, factor1, factor2;
+  float fout[BLOCKL_MAX];
+  float winData[ANAL_BLOCKL_MAX];
+  float magn[HALF_ANAL_BLOCKL];
+  float theFilter[HALF_ANAL_BLOCKL], theFilterTmp[HALF_ANAL_BLOCKL];
+  float real[ANAL_BLOCKL_MAX], imag[HALF_ANAL_BLOCKL];
+
+  // SWB variables.
+  int deltaBweHB = 1;
+  int deltaGainHB = 1;
+  float decayBweHB = 1.0;
+  float gainMapParHB = 1.0;
+  float gainTimeDomainHB = 1.0;
+  float avgProbSpeechHB, avgProbSpeechHBTmp, avgFilterGainHB, gainModHB;
+  float sumMagnAnalyze, sumMagnProcess;
+
+  // Check that initiation has been done.
+  assert(self->initFlag == 1);
+  assert((num_bands - 1) <= NUM_HIGH_BANDS_MAX);
+
+  const float* const* speechFrameHB = NULL;
+  float* const* outFrameHB = NULL;
+  size_t num_high_bands = 0;
+  if (num_bands > 1) {
+    speechFrameHB = &speechFrame[1];
+    outFrameHB = &outFrame[1];
+    num_high_bands = num_bands - 1;
+    flagHB = 1;
+    // Range for averaging low band quantities for H band gain.
+    deltaBweHB = (int)self->magnLen / 4;
+    deltaGainHB = deltaBweHB;
+  }
+
+  // Update analysis buffer for L band.
+  UpdateBuffer(speechFrame[0], self->blockLen, self->anaLen, self->dataBuf);
+
+  if (flagHB == 1) {
+    // Update analysis buffer for H bands.
+    for (i = 0; i < num_high_bands; ++i) {
+      UpdateBuffer(speechFrameHB[i],
+                   self->blockLen,
+                   self->anaLen,
+                   self->dataBufHB[i]);
+    }
+  }
+
+  Windowing(self->window, self->dataBuf, self->anaLen, winData);
+  energy1 = Energy(winData, self->anaLen);
+  if (energy1 == 0.0) {
+    // Synthesize the special case of zero input.
+    // Read out fully processed segment.
+    for (i = self->windShift; i < self->blockLen + self->windShift; i++) {
+      fout[i - self->windShift] = self->syntBuf[i];
+    }
+    // Update synthesis buffer.
+    UpdateBuffer(NULL, self->blockLen, self->anaLen, self->syntBuf);
+
+    for (i = 0; i < self->blockLen; ++i)
+      outFrame[0][i] =
+          WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN);
+
+    // For time-domain gain of HB.
+    if (flagHB == 1) {
+      for (i = 0; i < num_high_bands; ++i) {
+        for (j = 0; j < self->blockLen; ++j) {
+          outFrameHB[i][j] = WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
+                                            self->dataBufHB[i][j],
+                                            WEBRTC_SPL_WORD16_MIN);
+        }
+      }
+    }
+
+    return;
+  }
+
+  FFT(self, winData, self->anaLen, self->magnLen, real, imag, magn);
+
+  if (self->blockInd < END_STARTUP_SHORT) {
+    for (i = 0; i < self->magnLen; i++) {
+      self->initMagnEst[i] += magn[i];
+    }
+  }
+
+  ComputeDdBasedWienerFilter(self, magn, theFilter);
+
+  for (i = 0; i < self->magnLen; i++) {
+    // Flooring bottom.
+    if (theFilter[i] < self->denoiseBound) {
+      theFilter[i] = self->denoiseBound;
+    }
+    // Flooring top.
+    if (theFilter[i] > 1.f) {
+      theFilter[i] = 1.f;
+    }
+    if (self->blockInd < END_STARTUP_SHORT) {
+      theFilterTmp[i] =
+          (self->initMagnEst[i] - self->overdrive * self->parametricNoise[i]);
+      theFilterTmp[i] /= (self->initMagnEst[i] + 0.0001f);
+      // Flooring bottom.
+      if (theFilterTmp[i] < self->denoiseBound) {
+        theFilterTmp[i] = self->denoiseBound;
+      }
+      // Flooring top.
+      if (theFilterTmp[i] > 1.f) {
+        theFilterTmp[i] = 1.f;
+      }
+      // Weight the two suppression filters.
+      theFilter[i] *= (self->blockInd);
+      theFilterTmp[i] *= (END_STARTUP_SHORT - self->blockInd);
+      theFilter[i] += theFilterTmp[i];
+      theFilter[i] /= (END_STARTUP_SHORT);
+    }
+
+    self->smooth[i] = theFilter[i];
+    real[i] *= self->smooth[i];
+    imag[i] *= self->smooth[i];
+  }
+  // Keep track of |magn| spectrum for next frame.
+  memcpy(self->magnPrevProcess, magn, sizeof(*magn) * self->magnLen);
+  memcpy(self->noisePrev, self->noise, sizeof(self->noise[0]) * self->magnLen);
+  // Back to time domain.
+  IFFT(self, real, imag, self->magnLen, self->anaLen, winData);
+
+  // Scale factor: only do it after END_STARTUP_LONG time.
+  factor = 1.f;
+  if (self->gainmap == 1 && self->blockInd > END_STARTUP_LONG) {
+    factor1 = 1.f;
+    factor2 = 1.f;
+
+    energy2 = Energy(winData, self->anaLen);
+    gain = (float)sqrt(energy2 / (energy1 + 1.f));
+
+    // Scaling for new version.
+    if (gain > B_LIM) {
+      factor1 = 1.f + 1.3f * (gain - B_LIM);
+      if (gain * factor1 > 1.f) {
+        factor1 = 1.f / gain;
+      }
+    }
+    if (gain < B_LIM) {
+      // Don't reduce scale too much for pause regions:
+      // attenuation here should be controlled by flooring.
+      if (gain <= self->denoiseBound) {
+        gain = self->denoiseBound;
+      }
+      factor2 = 1.f - 0.3f * (B_LIM - gain);
+    }
+    // Combine both scales with speech/noise prob:
+    // note prior (priorSpeechProb) is not frequency dependent.
+    factor = self->priorSpeechProb * factor1 +
+             (1.f - self->priorSpeechProb) * factor2;
+  }  // Out of self->gainmap == 1.
+
+  Windowing(self->window, winData, self->anaLen, winData);
+
+  // Synthesis.
+  for (i = 0; i < self->anaLen; i++) {
+    self->syntBuf[i] += factor * winData[i];
+  }
+  // Read out fully processed segment.
+  for (i = self->windShift; i < self->blockLen + self->windShift; i++) {
+    fout[i - self->windShift] = self->syntBuf[i];
+  }
+  // Update synthesis buffer.
+  UpdateBuffer(NULL, self->blockLen, self->anaLen, self->syntBuf);
+
+  for (i = 0; i < self->blockLen; ++i)
+    outFrame[0][i] =
+        WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN);
+
+  // For time-domain gain of HB.
+  if (flagHB == 1) {
+    // Average speech prob from low band.
+    // Average over second half (i.e., 4->8kHz) of frequencies spectrum.
+    avgProbSpeechHB = 0.0;
+    for (i = self->magnLen - deltaBweHB - 1; i < self->magnLen - 1; i++) {
+      avgProbSpeechHB += self->speechProb[i];
+    }
+    avgProbSpeechHB = avgProbSpeechHB / ((float)deltaBweHB);
+    // If the speech was suppressed by a component between Analyze and
+    // Process, for example the AEC, then it should not be considered speech
+    // for high band suppression purposes.
+    sumMagnAnalyze = 0;
+    sumMagnProcess = 0;
+    for (i = 0; i < self->magnLen; ++i) {
+      sumMagnAnalyze += self->magnPrevAnalyze[i];
+      sumMagnProcess += self->magnPrevProcess[i];
+    }
+    avgProbSpeechHB *= sumMagnProcess / sumMagnAnalyze;
+    // Average filter gain from low band.
+    // Average over second half (i.e., 4->8kHz) of frequencies spectrum.
+    avgFilterGainHB = 0.0;
+    for (i = self->magnLen - deltaGainHB - 1; i < self->magnLen - 1; i++) {
+      avgFilterGainHB += self->smooth[i];
+    }
+    avgFilterGainHB = avgFilterGainHB / ((float)(deltaGainHB));
+    avgProbSpeechHBTmp = 2.f * avgProbSpeechHB - 1.f;
+    // Gain based on speech probability.
+    gainModHB = 0.5f * (1.f + (float)tanh(gainMapParHB * avgProbSpeechHBTmp));
+    // Combine gain with low band gain.
+    gainTimeDomainHB = 0.5f * gainModHB + 0.5f * avgFilterGainHB;
+    if (avgProbSpeechHB >= 0.5f) {
+      gainTimeDomainHB = 0.25f * gainModHB + 0.75f * avgFilterGainHB;
+    }
+    gainTimeDomainHB = gainTimeDomainHB * decayBweHB;
+    // Make sure gain is within flooring range.
+    // Flooring bottom.
+    if (gainTimeDomainHB < self->denoiseBound) {
+      gainTimeDomainHB = self->denoiseBound;
+    }
+    // Flooring top.
+    if (gainTimeDomainHB > 1.f) {
+      gainTimeDomainHB = 1.f;
+    }
+    // Apply gain.
+    for (i = 0; i < num_high_bands; ++i) {
+      for (j = 0; j < self->blockLen; j++) {
+        outFrameHB[i][j] =
+            WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
+                           gainTimeDomainHB * self->dataBufHB[i][j],
+                           WEBRTC_SPL_WORD16_MIN);
+      }
+    }
+  }  // End of H band gain computation.
+}
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/ns_core.h b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/ns_core.h
new file mode 100644
index 00000000..aba1c468
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/ns_core.h
@@ -0,0 +1,190 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_
+
+#include "webrtc/modules/audio_processing/ns/defines.h"
+
+typedef struct NSParaExtract_ {
+  // Bin size of histogram.
+  float binSizeLrt;
+  float binSizeSpecFlat;
+  float binSizeSpecDiff;
+  // Range of histogram over which LRT threshold is computed.
+  float rangeAvgHistLrt;
+  // Scale parameters: multiply dominant peaks of the histograms by scale factor
+  // to obtain thresholds for prior model.
+  float factor1ModelPars;  // For LRT and spectral difference.
+  float factor2ModelPars;  // For spectral_flatness: used when noise is flatter
+                           // than speech.
+  // Peak limit for spectral flatness (varies between 0 and 1).
+  float thresPosSpecFlat;
+  // Limit on spacing of two highest peaks in histogram: spacing determined by
+  // bin size.
+  float limitPeakSpacingSpecFlat;
+  float limitPeakSpacingSpecDiff;
+  // Limit on relevance of second peak.
+  float limitPeakWeightsSpecFlat;
+  float limitPeakWeightsSpecDiff;
+  // Limit on fluctuation of LRT feature.
+  float thresFluctLrt;
+  // Limit on the max and min values for the feature thresholds.
+  float maxLrt;
+  float minLrt;
+  float maxSpecFlat;
+  float minSpecFlat;
+  float maxSpecDiff;
+  float minSpecDiff;
+  // Criteria of weight of histogram peak to accept/reject feature.
+  int thresWeightSpecFlat;
+  int thresWeightSpecDiff;
+
+} NSParaExtract;
+
+typedef struct NoiseSuppressionC_ {
+  uint32_t fs;
+  size_t blockLen;
+  size_t windShift;
+  size_t anaLen;
+  size_t magnLen;
+  int aggrMode;
+  const float* window;
+  float analyzeBuf[ANAL_BLOCKL_MAX];
+  float dataBuf[ANAL_BLOCKL_MAX];
+  float syntBuf[ANAL_BLOCKL_MAX];
+
+  int initFlag;
+  // Parameters for quantile noise estimation.
+  float density[SIMULT * HALF_ANAL_BLOCKL];
+  float lquantile[SIMULT * HALF_ANAL_BLOCKL];
+  float quantile[HALF_ANAL_BLOCKL];
+  int counter[SIMULT];
+  int updates;
+  // Parameters for Wiener filter.
+  float smooth[HALF_ANAL_BLOCKL];
+  float overdrive;
+  float denoiseBound;
+  int gainmap;
+  // FFT work arrays.
+  size_t ip[IP_LENGTH];
+  float wfft[W_LENGTH];
+
+  // Parameters for new method: some not needed, will reduce/cleanup later.
+  int32_t blockInd;  // Frame index counter.
+  int modelUpdatePars[4];  // Parameters for updating or estimating.
+  // Thresholds/weights for prior model.
+  float priorModelPars[7];  // Parameters for prior model.
+  float noise[HALF_ANAL_BLOCKL];  // Noise spectrum from current frame.
+  float noisePrev[HALF_ANAL_BLOCKL];  // Noise spectrum from previous frame.
+  // Magnitude spectrum of previous analyze frame.
+  float magnPrevAnalyze[HALF_ANAL_BLOCKL];
+  // Magnitude spectrum of previous process frame.
+  float magnPrevProcess[HALF_ANAL_BLOCKL];
+  float logLrtTimeAvg[HALF_ANAL_BLOCKL];  // Log LRT factor with time-smoothing.
+  float priorSpeechProb;  // Prior speech/noise probability.
+  float featureData[7];
+  // Conservative noise spectrum estimate.
+  float magnAvgPause[HALF_ANAL_BLOCKL];
+  float signalEnergy;  // Energy of |magn|.
+  float sumMagn;
+  float whiteNoiseLevel;  // Initial noise estimate.
+  float initMagnEst[HALF_ANAL_BLOCKL];  // Initial magnitude spectrum estimate.
+  float pinkNoiseNumerator;  // Pink noise parameter: numerator.
+  float pinkNoiseExp;  // Pink noise parameter: power of frequencies.
+  float parametricNoise[HALF_ANAL_BLOCKL];
+  // Parameters for feature extraction.
+  NSParaExtract featureExtractionParams;
+  // Histograms for parameter estimation.
+  int histLrt[HIST_PAR_EST];
+  int histSpecFlat[HIST_PAR_EST];
+  int histSpecDiff[HIST_PAR_EST];
+  // Quantities for high band estimate.
+  float speechProb[HALF_ANAL_BLOCKL];  // Final speech/noise prob: prior + LRT.
+  // Buffering data for HB.
+  float dataBufHB[NUM_HIGH_BANDS_MAX][ANAL_BLOCKL_MAX];
+
+} NoiseSuppressionC;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/****************************************************************************
+ * WebRtcNs_InitCore(...)
+ *
+ * This function initializes a noise suppression instance
+ *
+ * Input:
+ *      - self          : Instance that should be initialized
+ *      - fs            : Sampling frequency
+ *
+ * Output:
+ *      - self          : Initialized instance
+ *
+ * Return value         :  0 - Ok
+ *                        -1 - Error
+ */
+int WebRtcNs_InitCore(NoiseSuppressionC* self, uint32_t fs);
+
+/****************************************************************************
+ * WebRtcNs_set_policy_core(...)
+ *
+ * This changes the aggressiveness of the noise suppression method.
+ *
+ * Input:
+ *      - self          : Instance that should be initialized
+ *      - mode          : 0: Mild (6dB), 1: Medium (10dB), 2: Aggressive (15dB)
+ *
+ * Output:
+ *      - self          : Initialized instance
+ *
+ * Return value         :  0 - Ok
+ *                        -1 - Error
+ */
+int WebRtcNs_set_policy_core(NoiseSuppressionC* self, int mode);
+
+/****************************************************************************
+ * WebRtcNs_AnalyzeCore
+ *
+ * Estimate the background noise.
+ *
+ * Input:
+ *      - self          : Instance that should be initialized
+ *      - speechFrame   : Input speech frame for lower band
+ *
+ * Output:
+ *      - self          : Updated instance
+ */
+void WebRtcNs_AnalyzeCore(NoiseSuppressionC* self, const float* speechFrame);
+
+/****************************************************************************
+ * WebRtcNs_ProcessCore
+ *
+ * Do noise suppression.
+ *
+ * Input:
+ *      - self          : Instance that should be initialized
+ *      - inFrame       : Input speech frame for each band
+ *      - num_bands     : Number of bands
+ *
+ * Output:
+ *      - self          : Updated instance
+ *      - outFrame      : Output speech frame for each band
+ */
+void WebRtcNs_ProcessCore(NoiseSuppressionC* self,
+                          const float* const* inFrame,
+                          size_t num_bands,
+                          float* const* outFrame);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core.c b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core.c
new file mode 100644
index 00000000..ed6125aa
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core.c
@@ -0,0 +1,2112 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h"
+
+#include <assert.h>
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "webrtc/common_audio/signal_processing/include/real_fft.h"
+#include "webrtc/modules/audio_processing/ns/nsx_core.h"
+#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
+
+#if (defined WEBRTC_DETECT_NEON || defined WEBRTC_HAS_NEON)
+/* Tables are defined in ARM assembly files. */
+extern const int16_t WebRtcNsx_kLogTable[9];
+extern const int16_t WebRtcNsx_kCounterDiv[201];
+extern const int16_t WebRtcNsx_kLogTableFrac[256];
+#else
+static const int16_t WebRtcNsx_kLogTable[9] = {
+  0, 177, 355, 532, 710, 887, 1065, 1242, 1420
+};
+
+static const int16_t WebRtcNsx_kCounterDiv[201] = {
+  32767, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3641, 3277, 2979, 2731,
+  2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, 1489, 1425, 1365, 1311,
+  1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964, 936, 910, 886, 862, 840,
+  819, 799, 780, 762, 745, 728, 712, 697, 683, 669, 655, 643, 630, 618, 607,
+  596, 585, 575, 565, 555, 546, 537, 529, 520, 512, 504, 496, 489, 482, 475,
+  468, 462, 455, 449, 443, 437, 431, 426, 420, 415, 410, 405, 400, 395, 390,
+  386, 381, 377, 372, 368, 364, 360, 356, 352, 349, 345, 341, 338, 334, 331,
+  328, 324, 321, 318, 315, 312, 309, 306, 303, 301, 298, 295, 293, 290, 287,
+  285, 282, 280, 278, 275, 273, 271, 269, 266, 264, 262, 260, 258, 256, 254,
+  252, 250, 248, 246, 245, 243, 241, 239, 237, 236, 234, 232, 231, 229, 228,
+  226, 224, 223, 221, 220, 218, 217, 216, 214, 213, 211, 210, 209, 207, 206,
+  205, 204, 202, 201, 200, 199, 197, 196, 195, 194, 193, 192, 191, 189, 188,
+  187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174, 173,
+  172, 172, 171, 170, 169, 168, 167, 166, 165, 165, 164, 163
+};
+
+static const int16_t WebRtcNsx_kLogTableFrac[256] = {
+  0,   1,   3,   4,   6,   7,   9,  10,  11,  13,  14,  16,  17,  18,  20,  21,
+  22,  24,  25,  26,  28,  29,  30,  32,  33,  34,  36,  37,  38,  40,  41,  42,
+  44,  45,  46,  47,  49,  50,  51,  52,  54,  55,  56,  57,  59,  60,  61,  62,
+  63,  65,  66,  67,  68,  69,  71,  72,  73,  74,  75,  77,  78,  79,  80,  81,
+  82,  84,  85,  86,  87,  88,  89,  90,  92,  93,  94,  95,  96,  97,  98,  99,
+  100, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 116,
+  117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131,
+  132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146,
+  147, 148, 149, 150, 151, 152, 153, 154, 155, 155, 156, 157, 158, 159, 160,
+  161, 162, 163, 164, 165, 166, 167, 168, 169, 169, 170, 171, 172, 173, 174,
+  175, 176, 177, 178, 178, 179, 180, 181, 182, 183, 184, 185, 185, 186, 187,
+  188, 189, 190, 191, 192, 192, 193, 194, 195, 196, 197, 198, 198, 199, 200,
+  201, 202, 203, 203, 204, 205, 206, 207, 208, 208, 209, 210, 211, 212, 212,
+  213, 214, 215, 216, 216, 217, 218, 219, 220, 220, 221, 222, 223, 224, 224,
+  225, 226, 227, 228, 228, 229, 230, 231, 231, 232, 233, 234, 234, 235, 236,
+  237, 238, 238, 239, 240, 241, 241, 242, 243, 244, 244, 245, 246, 247, 247,
+  248, 249, 249, 250, 251, 252, 252, 253, 254, 255, 255
+};
+#endif  // WEBRTC_DETECT_NEON || WEBRTC_HAS_NEON
+
+// Skip first frequency bins during estimation. (0 <= value < 64)
+static const size_t kStartBand = 5;
+
+// hybrib Hanning & flat window
+static const int16_t kBlocks80w128x[128] = {
+  0,    536,   1072,   1606,   2139,   2669,   3196,   3720,   4240,   4756,   5266,
+  5771,   6270,   6762,   7246,   7723,   8192,   8652,   9102,   9543,   9974,  10394,
+  10803,  11200,  11585,  11958,  12318,  12665,  12998,  13318,  13623,  13913,  14189,
+  14449,  14694,  14924,  15137,  15334,  15515,  15679,  15826,  15956,  16069,  16165,
+  16244,  16305,  16349,  16375,  16384,  16384,  16384,  16384,  16384,  16384,  16384,
+  16384,  16384,  16384,  16384,  16384,  16384,  16384,  16384,  16384,  16384,  16384,
+  16384,  16384,  16384,  16384,  16384,  16384,  16384,  16384,  16384,  16384,  16384,
+  16384,  16384,  16384,  16384,  16375,  16349,  16305,  16244,  16165,  16069,  15956,
+  15826,  15679,  15515,  15334,  15137,  14924,  14694,  14449,  14189,  13913,  13623,
+  13318,  12998,  12665,  12318,  11958,  11585,  11200,  10803,  10394,   9974,   9543,
+  9102,   8652,   8192,   7723,   7246,   6762,   6270,   5771,   5266,   4756,   4240,
+  3720,   3196,   2669,   2139,   1606,   1072,    536
+};
+
+// hybrib Hanning & flat window
+static const int16_t kBlocks160w256x[256] = {
+  0,   268,   536,   804,  1072,  1339,  1606,  1872,
+  2139,  2404,  2669,  2933,  3196,  3459,  3720,  3981,
+  4240,  4499,  4756,  5012,  5266,  5520,  5771,  6021,
+  6270,  6517,  6762,  7005,  7246,  7486,  7723,  7959,
+  8192,  8423,  8652,  8878,  9102,  9324,  9543,  9760,
+  9974, 10185, 10394, 10600, 10803, 11003, 11200, 11394,
+  11585, 11773, 11958, 12140, 12318, 12493, 12665, 12833,
+  12998, 13160, 13318, 13472, 13623, 13770, 13913, 14053,
+  14189, 14321, 14449, 14574, 14694, 14811, 14924, 15032,
+  15137, 15237, 15334, 15426, 15515, 15599, 15679, 15754,
+  15826, 15893, 15956, 16015, 16069, 16119, 16165, 16207,
+  16244, 16277, 16305, 16329, 16349, 16364, 16375, 16382,
+  16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384,
+  16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384,
+  16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384,
+  16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384,
+  16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384,
+  16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384,
+  16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384,
+  16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384,
+  16384, 16382, 16375, 16364, 16349, 16329, 16305, 16277,
+  16244, 16207, 16165, 16119, 16069, 16015, 15956, 15893,
+  15826, 15754, 15679, 15599, 15515, 15426, 15334, 15237,
+  15137, 15032, 14924, 14811, 14694, 14574, 14449, 14321,
+  14189, 14053, 13913, 13770, 13623, 13472, 13318, 13160,
+  12998, 12833, 12665, 12493, 12318, 12140, 11958, 11773,
+  11585, 11394, 11200, 11003, 10803, 10600, 10394, 10185,
+  9974,  9760,  9543,  9324,  9102,  8878,  8652,  8423,
+  8192,  7959,  7723,  7486,  7246,  7005,  6762,  6517,
+  6270,  6021,  5771,  5520,  5266,  5012,  4756,  4499,
+  4240,  3981,  3720,  3459,  3196,  2933,  2669,  2404,
+  2139,  1872,  1606,  1339,  1072,   804,   536,   268
+};
+
+// Gain factor1 table: Input value in Q8 and output value in Q13
+// original floating point code
+//  if (gain > blim) {
+//    factor1 = 1.0 + 1.3 * (gain - blim);
+//    if (gain * factor1 > 1.0) {
+//      factor1 = 1.0 / gain;
+//    }
+//  } else {
+//    factor1 = 1.0;
+//  }
+static const int16_t kFactor1Table[257] = {
+  8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8233, 8274, 8315, 8355, 8396, 8436, 8475, 8515, 8554, 8592, 8631, 8669,
+  8707, 8745, 8783, 8820, 8857, 8894, 8931, 8967, 9003, 9039, 9075, 9111, 9146, 9181,
+  9216, 9251, 9286, 9320, 9354, 9388, 9422, 9456, 9489, 9523, 9556, 9589, 9622, 9655,
+  9687, 9719, 9752, 9784, 9816, 9848, 9879, 9911, 9942, 9973, 10004, 10035, 10066,
+  10097, 10128, 10158, 10188, 10218, 10249, 10279, 10308, 10338, 10368, 10397, 10426,
+  10456, 10485, 10514, 10543, 10572, 10600, 10629, 10657, 10686, 10714, 10742, 10770,
+  10798, 10826, 10854, 10882, 10847, 10810, 10774, 10737, 10701, 10666, 10631, 10596,
+  10562, 10527, 10494, 10460, 10427, 10394, 10362, 10329, 10297, 10266, 10235, 10203,
+  10173, 10142, 10112, 10082, 10052, 10023, 9994, 9965, 9936, 9908, 9879, 9851, 9824,
+  9796, 9769, 9742, 9715, 9689, 9662, 9636, 9610, 9584, 9559, 9534, 9508, 9484, 9459,
+  9434, 9410, 9386, 9362, 9338, 9314, 9291, 9268, 9245, 9222, 9199, 9176, 9154, 9132,
+  9110, 9088, 9066, 9044, 9023, 9002, 8980, 8959, 8939, 8918, 8897, 8877, 8857, 8836,
+  8816, 8796, 8777, 8757, 8738, 8718, 8699, 8680, 8661, 8642, 8623, 8605, 8586, 8568,
+  8550, 8532, 8514, 8496, 8478, 8460, 8443, 8425, 8408, 8391, 8373, 8356, 8339, 8323,
+  8306, 8289, 8273, 8256, 8240, 8224, 8208, 8192
+};
+
+// For Factor2 tables
+// original floating point code
+// if (gain > blim) {
+//   factor2 = 1.0;
+// } else {
+//   factor2 = 1.0 - 0.3 * (blim - gain);
+//   if (gain <= inst->denoiseBound) {
+//     factor2 = 1.0 - 0.3 * (blim - inst->denoiseBound);
+//   }
+// }
+//
+// Gain factor table: Input value in Q8 and output value in Q13
+static const int16_t kFactor2Aggressiveness1[257] = {
+  7577, 7577, 7577, 7577, 7577, 7577,
+  7577, 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7596, 7614, 7632,
+  7650, 7667, 7683, 7699, 7715, 7731, 7746, 7761, 7775, 7790, 7804, 7818, 7832, 7845,
+  7858, 7871, 7884, 7897, 7910, 7922, 7934, 7946, 7958, 7970, 7982, 7993, 8004, 8016,
+  8027, 8038, 8049, 8060, 8070, 8081, 8091, 8102, 8112, 8122, 8132, 8143, 8152, 8162,
+  8172, 8182, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192
+};
+
+// Gain factor table: Input value in Q8 and output value in Q13
+static const int16_t kFactor2Aggressiveness2[257] = {
+  7270, 7270, 7270, 7270, 7270, 7306,
+  7339, 7369, 7397, 7424, 7448, 7472, 7495, 7517, 7537, 7558, 7577, 7596, 7614, 7632,
+  7650, 7667, 7683, 7699, 7715, 7731, 7746, 7761, 7775, 7790, 7804, 7818, 7832, 7845,
+  7858, 7871, 7884, 7897, 7910, 7922, 7934, 7946, 7958, 7970, 7982, 7993, 8004, 8016,
+  8027, 8038, 8049, 8060, 8070, 8081, 8091, 8102, 8112, 8122, 8132, 8143, 8152, 8162,
+  8172, 8182, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192
+};
+
+// Gain factor table: Input value in Q8 and output value in Q13
+static const int16_t kFactor2Aggressiveness3[257] = {
+  7184, 7184, 7184, 7229, 7270, 7306,
+  7339, 7369, 7397, 7424, 7448, 7472, 7495, 7517, 7537, 7558, 7577, 7596, 7614, 7632,
+  7650, 7667, 7683, 7699, 7715, 7731, 7746, 7761, 7775, 7790, 7804, 7818, 7832, 7845,
+  7858, 7871, 7884, 7897, 7910, 7922, 7934, 7946, 7958, 7970, 7982, 7993, 8004, 8016,
+  8027, 8038, 8049, 8060, 8070, 8081, 8091, 8102, 8112, 8122, 8132, 8143, 8152, 8162,
+  8172, 8182, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
+  8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192
+};
+
+// sum of log2(i) from table index to inst->anaLen2 in Q5
+// Note that the first table value is invalid, since log2(0) = -infinity
+static const int16_t kSumLogIndex[66] = {
+  0,  22917,  22917,  22885,  22834,  22770,  22696,  22613,
+  22524,  22428,  22326,  22220,  22109,  21994,  21876,  21754,
+  21629,  21501,  21370,  21237,  21101,  20963,  20822,  20679,
+  20535,  20388,  20239,  20089,  19937,  19783,  19628,  19470,
+  19312,  19152,  18991,  18828,  18664,  18498,  18331,  18164,
+  17994,  17824,  17653,  17480,  17306,  17132,  16956,  16779,
+  16602,  16423,  16243,  16063,  15881,  15699,  15515,  15331,
+  15146,  14960,  14774,  14586,  14398,  14209,  14019,  13829,
+  13637,  13445
+};
+
+// sum of log2(i)^2 from table index to inst->anaLen2 in Q2
+// Note that the first table value is invalid, since log2(0) = -infinity
+static const int16_t kSumSquareLogIndex[66] = {
+  0,  16959,  16959,  16955,  16945,  16929,  16908,  16881,
+  16850,  16814,  16773,  16729,  16681,  16630,  16575,  16517,
+  16456,  16392,  16325,  16256,  16184,  16109,  16032,  15952,
+  15870,  15786,  15700,  15612,  15521,  15429,  15334,  15238,
+  15140,  15040,  14938,  14834,  14729,  14622,  14514,  14404,
+  14292,  14179,  14064,  13947,  13830,  13710,  13590,  13468,
+  13344,  13220,  13094,  12966,  12837,  12707,  12576,  12444,
+  12310,  12175,  12039,  11902,  11763,  11624,  11483,  11341,
+  11198,  11054
+};
+
+// log2(table index) in Q12
+// Note that the first table value is invalid, since log2(0) = -infinity
+static const int16_t kLogIndex[129] = {
+  0,      0,   4096,   6492,   8192,   9511,  10588,  11499,
+  12288,  12984,  13607,  14170,  14684,  15157,  15595,  16003,
+  16384,  16742,  17080,  17400,  17703,  17991,  18266,  18529,
+  18780,  19021,  19253,  19476,  19691,  19898,  20099,  20292,
+  20480,  20662,  20838,  21010,  21176,  21338,  21496,  21649,
+  21799,  21945,  22087,  22226,  22362,  22495,  22625,  22752,
+  22876,  22998,  23117,  23234,  23349,  23462,  23572,  23680,
+  23787,  23892,  23994,  24095,  24195,  24292,  24388,  24483,
+  24576,  24668,  24758,  24847,  24934,  25021,  25106,  25189,
+  25272,  25354,  25434,  25513,  25592,  25669,  25745,  25820,
+  25895,  25968,  26041,  26112,  26183,  26253,  26322,  26390,
+  26458,  26525,  26591,  26656,  26721,  26784,  26848,  26910,
+  26972,  27033,  27094,  27154,  27213,  27272,  27330,  27388,
+  27445,  27502,  27558,  27613,  27668,  27722,  27776,  27830,
+  27883,  27935,  27988,  28039,  28090,  28141,  28191,  28241,
+  28291,  28340,  28388,  28437,  28484,  28532,  28579,  28626,
+  28672
+};
+
+// determinant of estimation matrix in Q0 corresponding to the log2 tables above
+// Note that the first table value is invalid, since log2(0) = -infinity
+static const int16_t kDeterminantEstMatrix[66] = {
+  0,  29814,  25574,  22640,  20351,  18469,  16873,  15491,
+  14277,  13199,  12233,  11362,  10571,   9851,   9192,   8587,
+  8030,   7515,   7038,   6596,   6186,   5804,   5448,   5115,
+  4805,   4514,   4242,   3988,   3749,   3524,   3314,   3116,
+  2930,   2755,   2590,   2435,   2289,   2152,   2022,   1900,
+  1785,   1677,   1575,   1478,   1388,   1302,   1221,   1145,
+  1073,   1005,    942,    881,    825,    771,    721,    674,
+  629,    587,    547,    510,    475,    442,    411,    382,
+  355,    330
+};
+
+// Update the noise estimation information.
+static void UpdateNoiseEstimate(NoiseSuppressionFixedC* inst, int offset) {
+  int32_t tmp32no1 = 0;
+  int32_t tmp32no2 = 0;
+  int16_t tmp16 = 0;
+  const int16_t kExp2Const = 11819; // Q13
+
+  size_t i = 0;
+
+  tmp16 = WebRtcSpl_MaxValueW16(inst->noiseEstLogQuantile + offset,
+                                   inst->magnLen);
+  // Guarantee a Q-domain as high as possible and still fit in int16
+  inst->qNoise = 14 - (int) WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
+                   kExp2Const, tmp16, 21);
+  for (i = 0; i < inst->magnLen; i++) {
+    // inst->quantile[i]=exp(inst->lquantile[offset+i]);
+    // in Q21
+    tmp32no2 = kExp2Const * inst->noiseEstLogQuantile[offset + i];
+    tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac
+    tmp16 = (int16_t)(tmp32no2 >> 21);
+    tmp16 -= 21;// shift 21 to get result in Q0
+    tmp16 += (int16_t) inst->qNoise; //shift to get result in Q(qNoise)
+    if (tmp16 < 0) {
+      tmp32no1 >>= -tmp16;
+    } else {
+      tmp32no1 <<= tmp16;
+    }
+    inst->noiseEstQuantile[i] = WebRtcSpl_SatW32ToW16(tmp32no1);
+  }
+}
+
+// Noise Estimation
+static void NoiseEstimationC(NoiseSuppressionFixedC* inst,
+                             uint16_t* magn,
+                             uint32_t* noise,
+                             int16_t* q_noise) {
+  int16_t lmagn[HALF_ANAL_BLOCKL], counter, countDiv;
+  int16_t countProd, delta, zeros, frac;
+  int16_t log2, tabind, logval, tmp16, tmp16no1, tmp16no2;
+  const int16_t log2_const = 22713; // Q15
+  const int16_t width_factor = 21845;
+
+  size_t i, s, offset;
+
+  tabind = inst->stages - inst->normData;
+  assert(tabind < 9);
+  assert(tabind > -9);
+  if (tabind < 0) {
+    logval = -WebRtcNsx_kLogTable[-tabind];
+  } else {
+    logval = WebRtcNsx_kLogTable[tabind];
+  }
+
+  // lmagn(i)=log(magn(i))=log(2)*log2(magn(i))
+  // magn is in Q(-stages), and the real lmagn values are:
+  // real_lmagn(i)=log(magn(i)*2^stages)=log(magn(i))+log(2^stages)
+  // lmagn in Q8
+  for (i = 0; i < inst->magnLen; i++) {
+    if (magn[i]) {
+      zeros = WebRtcSpl_NormU32((uint32_t)magn[i]);
+      frac = (int16_t)((((uint32_t)magn[i] << zeros)
+                              & 0x7FFFFFFF) >> 23);
+      // log2(magn(i))
+      assert(frac < 256);
+      log2 = (int16_t)(((31 - zeros) << 8)
+                             + WebRtcNsx_kLogTableFrac[frac]);
+      // log2(magn(i))*log(2)
+      lmagn[i] = (int16_t)((log2 * log2_const) >> 15);
+      // + log(2^stages)
+      lmagn[i] += logval;
+    } else {
+      lmagn[i] = logval;//0;
+    }
+  }
+
+  // loop over simultaneous estimates
+  for (s = 0; s < SIMULT; s++) {
+    offset = s * inst->magnLen;
+
+    // Get counter values from state
+    counter = inst->noiseEstCounter[s];
+    assert(counter < 201);
+    countDiv = WebRtcNsx_kCounterDiv[counter];
+    countProd = (int16_t)(counter * countDiv);
+
+    // quant_est(...)
+    for (i = 0; i < inst->magnLen; i++) {
+      // compute delta
+      if (inst->noiseEstDensity[offset + i] > 512) {
+        // Get the value for delta by shifting intead of dividing.
+        int factor = WebRtcSpl_NormW16(inst->noiseEstDensity[offset + i]);
+        delta = (int16_t)(FACTOR_Q16 >> (14 - factor));
+      } else {
+        delta = FACTOR_Q7;
+        if (inst->blockIndex < END_STARTUP_LONG) {
+          // Smaller step size during startup. This prevents from using
+          // unrealistic values causing overflow.
+          delta = FACTOR_Q7_STARTUP;
+        }
+      }
+
+      // update log quantile estimate
+      tmp16 = (int16_t)((delta * countDiv) >> 14);
+      if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) {
+        // +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2
+        // CounterDiv=1/(inst->counter[s]+1) in Q15
+        tmp16 += 2;
+        inst->noiseEstLogQuantile[offset + i] += tmp16 / 4;
+      } else {
+        tmp16 += 1;
+        // *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2
+        // TODO(bjornv): investigate why we need to truncate twice.
+        tmp16no2 = (int16_t)((tmp16 / 2) * 3 / 2);
+        inst->noiseEstLogQuantile[offset + i] -= tmp16no2;
+        if (inst->noiseEstLogQuantile[offset + i] < logval) {
+          // This is the smallest fixed point representation we can
+          // have, hence we limit the output.
+          inst->noiseEstLogQuantile[offset + i] = logval;
+        }
+      }
+
+      // update density estimate
+      if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i])
+          < WIDTH_Q8) {
+        tmp16no1 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
+                     inst->noiseEstDensity[offset + i], countProd, 15);
+        tmp16no2 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
+                     width_factor, countDiv, 15);
+        inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2;
+      }
+    }  // end loop over magnitude spectrum
+
+    if (counter >= END_STARTUP_LONG) {
+      inst->noiseEstCounter[s] = 0;
+      if (inst->blockIndex >= END_STARTUP_LONG) {
+        UpdateNoiseEstimate(inst, offset);
+      }
+    }
+    inst->noiseEstCounter[s]++;
+
+  }  // end loop over simultaneous estimates
+
+  // Sequentially update the noise during startup
+  if (inst->blockIndex < END_STARTUP_LONG) {
+    UpdateNoiseEstimate(inst, offset);
+  }
+
+  for (i = 0; i < inst->magnLen; i++) {
+    noise[i] = (uint32_t)(inst->noiseEstQuantile[i]); // Q(qNoise)
+  }
+  (*q_noise) = (int16_t)inst->qNoise;
+}
+
+// Filter the data in the frequency domain, and create spectrum.
+static void PrepareSpectrumC(NoiseSuppressionFixedC* inst, int16_t* freq_buf) {
+  size_t i = 0, j = 0;
+
+  for (i = 0; i < inst->magnLen; i++) {
+    inst->real[i] = (int16_t)((inst->real[i] *
+        (int16_t)(inst->noiseSupFilter[i])) >> 14);  // Q(normData-stages)
+    inst->imag[i] = (int16_t)((inst->imag[i] *
+        (int16_t)(inst->noiseSupFilter[i])) >> 14);  // Q(normData-stages)
+  }
+
+  freq_buf[0] = inst->real[0];
+  freq_buf[1] = -inst->imag[0];
+  for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) {
+    freq_buf[j] = inst->real[i];
+    freq_buf[j + 1] = -inst->imag[i];
+  }
+  freq_buf[inst->anaLen] = inst->real[inst->anaLen2];
+  freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2];
+}
+
+// Denormalize the real-valued signal |in|, the output from inverse FFT.
+static void DenormalizeC(NoiseSuppressionFixedC* inst,
+                         int16_t* in,
+                         int factor) {
+  size_t i = 0;
+  int32_t tmp32 = 0;
+  for (i = 0; i < inst->anaLen; i += 1) {
+    tmp32 = WEBRTC_SPL_SHIFT_W32((int32_t)in[i],
+                                 factor - inst->normData);
+    inst->real[i] = WebRtcSpl_SatW32ToW16(tmp32); // Q0
+  }
+}
+
+// For the noise supression process, synthesis, read out fully processed
+// segment, and update synthesis buffer.
+static void SynthesisUpdateC(NoiseSuppressionFixedC* inst,
+                             int16_t* out_frame,
+                             int16_t gain_factor) {
+  size_t i = 0;
+  int16_t tmp16a = 0;
+  int16_t tmp16b = 0;
+  int32_t tmp32 = 0;
+
+  // synthesis
+  for (i = 0; i < inst->anaLen; i++) {
+    tmp16a = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
+                 inst->window[i], inst->real[i], 14); // Q0, window in Q14
+    tmp32 = WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(tmp16a, gain_factor, 13); // Q0
+    // Down shift with rounding
+    tmp16b = WebRtcSpl_SatW32ToW16(tmp32); // Q0
+    inst->synthesisBuffer[i] = WebRtcSpl_AddSatW16(inst->synthesisBuffer[i],
+                                                   tmp16b); // Q0
+  }
+
+  // read out fully processed segment
+  for (i = 0; i < inst->blockLen10ms; i++) {
+    out_frame[i] = inst->synthesisBuffer[i]; // Q0
+  }
+
+  // update synthesis buffer
+  memcpy(inst->synthesisBuffer, inst->synthesisBuffer + inst->blockLen10ms,
+      (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->synthesisBuffer));
+  WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer
+      + inst->anaLen - inst->blockLen10ms, inst->blockLen10ms);
+}
+
+// Update analysis buffer for lower band, and window data before FFT.
+static void AnalysisUpdateC(NoiseSuppressionFixedC* inst,
+                            int16_t* out,
+                            int16_t* new_speech) {
+  size_t i = 0;
+
+  // For lower band update analysis buffer.
+  memcpy(inst->analysisBuffer, inst->analysisBuffer + inst->blockLen10ms,
+      (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->analysisBuffer));
+  memcpy(inst->analysisBuffer + inst->anaLen - inst->blockLen10ms, new_speech,
+      inst->blockLen10ms * sizeof(*inst->analysisBuffer));
+
+  // Window data before FFT.
+  for (i = 0; i < inst->anaLen; i++) {
+    out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
+               inst->window[i], inst->analysisBuffer[i], 14); // Q0
+  }
+}
+
+// Normalize the real-valued signal |in|, the input to forward FFT.
+static void NormalizeRealBufferC(NoiseSuppressionFixedC* inst,
+                                 const int16_t* in,
+                                 int16_t* out) {
+  size_t i = 0;
+  assert(inst->normData >= 0);
+  for (i = 0; i < inst->anaLen; ++i) {
+    out[i] = in[i] << inst->normData;  // Q(normData)
+  }
+}
+
+// Declare function pointers.
+NoiseEstimation WebRtcNsx_NoiseEstimation;
+PrepareSpectrum WebRtcNsx_PrepareSpectrum;
+SynthesisUpdate WebRtcNsx_SynthesisUpdate;
+AnalysisUpdate WebRtcNsx_AnalysisUpdate;
+Denormalize WebRtcNsx_Denormalize;
+NormalizeRealBuffer WebRtcNsx_NormalizeRealBuffer;
+
+#if (defined WEBRTC_DETECT_NEON || defined WEBRTC_HAS_NEON)
+// Initialize function pointers for ARM Neon platform.
+static void WebRtcNsx_InitNeon(void) {
+  WebRtcNsx_NoiseEstimation = WebRtcNsx_NoiseEstimationNeon;
+  WebRtcNsx_PrepareSpectrum = WebRtcNsx_PrepareSpectrumNeon;
+  WebRtcNsx_SynthesisUpdate = WebRtcNsx_SynthesisUpdateNeon;
+  WebRtcNsx_AnalysisUpdate = WebRtcNsx_AnalysisUpdateNeon;
+}
+#endif
+
+#if defined(MIPS32_LE)
+// Initialize function pointers for MIPS platform.
+static void WebRtcNsx_InitMips(void) {
+  WebRtcNsx_PrepareSpectrum = WebRtcNsx_PrepareSpectrum_mips;
+  WebRtcNsx_SynthesisUpdate = WebRtcNsx_SynthesisUpdate_mips;
+  WebRtcNsx_AnalysisUpdate = WebRtcNsx_AnalysisUpdate_mips;
+  WebRtcNsx_NormalizeRealBuffer = WebRtcNsx_NormalizeRealBuffer_mips;
+#if defined(MIPS_DSP_R1_LE)
+  WebRtcNsx_Denormalize = WebRtcNsx_Denormalize_mips;
+#endif
+}
+#endif
+
+void WebRtcNsx_CalcParametricNoiseEstimate(NoiseSuppressionFixedC* inst,
+                                           int16_t pink_noise_exp_avg,
+                                           int32_t pink_noise_num_avg,
+                                           int freq_index,
+                                           uint32_t* noise_estimate,
+                                           uint32_t* noise_estimate_avg) {
+  int32_t tmp32no1 = 0;
+  int32_t tmp32no2 = 0;
+
+  int16_t int_part = 0;
+  int16_t frac_part = 0;
+
+  // Use pink noise estimate
+  // noise_estimate = 2^(pinkNoiseNumerator + pinkNoiseExp * log2(j))
+  assert(freq_index >= 0);
+  assert(freq_index < 129);
+  tmp32no2 = (pink_noise_exp_avg * kLogIndex[freq_index]) >> 15;  // Q11
+  tmp32no1 = pink_noise_num_avg - tmp32no2; // Q11
+
+  // Calculate output: 2^tmp32no1
+  // Output in Q(minNorm-stages)
+  tmp32no1 += (inst->minNorm - inst->stages) << 11;
+  if (tmp32no1 > 0) {
+    int_part = (int16_t)(tmp32no1 >> 11);
+    frac_part = (int16_t)(tmp32no1 & 0x000007ff); // Q11
+    // Piecewise linear approximation of 'b' in
+    // 2^(int_part+frac_part) = 2^int_part * (1 + b)
+    // 'b' is given in Q11 and below stored in frac_part.
+    if (frac_part >> 10) {
+      // Upper fractional part
+      tmp32no2 = (2048 - frac_part) * 1244;  // Q21
+      tmp32no2 = 2048 - (tmp32no2 >> 10);
+    } else {
+      // Lower fractional part
+      tmp32no2 = (frac_part * 804) >> 10;
+    }
+    // Shift fractional part to Q(minNorm-stages)
+    tmp32no2 = WEBRTC_SPL_SHIFT_W32(tmp32no2, int_part - 11);
+    *noise_estimate_avg = (1 << int_part) + (uint32_t)tmp32no2;
+    // Scale up to initMagnEst, which is not block averaged
+    *noise_estimate = (*noise_estimate_avg) * (uint32_t)(inst->blockIndex + 1);
+  }
+}
+
+// Initialize state
+int32_t WebRtcNsx_InitCore(NoiseSuppressionFixedC* inst, uint32_t fs) {
+  int i;
+
+  //check for valid pointer
+  if (inst == NULL) {
+    return -1;
+  }
+  //
+
+  // Initialization of struct
+  if (fs == 8000 || fs == 16000 || fs == 32000 || fs == 48000) {
+    inst->fs = fs;
+  } else {
+    return -1;
+  }
+
+  if (fs == 8000) {
+    inst->blockLen10ms = 80;
+    inst->anaLen = 128;
+    inst->stages = 7;
+    inst->window = kBlocks80w128x;
+    inst->thresholdLogLrt = 131072; //default threshold for LRT feature
+    inst->maxLrt = 0x0040000;
+    inst->minLrt = 52429;
+  } else {
+    inst->blockLen10ms = 160;
+    inst->anaLen = 256;
+    inst->stages = 8;
+    inst->window = kBlocks160w256x;
+    inst->thresholdLogLrt = 212644; //default threshold for LRT feature
+    inst->maxLrt = 0x0080000;
+    inst->minLrt = 104858;
+  }
+  inst->anaLen2 = inst->anaLen / 2;
+  inst->magnLen = inst->anaLen2 + 1;
+
+  if (inst->real_fft != NULL) {
+    WebRtcSpl_FreeRealFFT(inst->real_fft);
+  }
+  inst->real_fft = WebRtcSpl_CreateRealFFT(inst->stages);
+  if (inst->real_fft == NULL) {
+    return -1;
+  }
+
+  WebRtcSpl_ZerosArrayW16(inst->analysisBuffer, ANAL_BLOCKL_MAX);
+  WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer, ANAL_BLOCKL_MAX);
+
+  // for HB processing
+  WebRtcSpl_ZerosArrayW16(inst->dataBufHBFX[0],
+                          NUM_HIGH_BANDS_MAX * ANAL_BLOCKL_MAX);
+  // for quantile noise estimation
+  WebRtcSpl_ZerosArrayW16(inst->noiseEstQuantile, HALF_ANAL_BLOCKL);
+  for (i = 0; i < SIMULT * HALF_ANAL_BLOCKL; i++) {
+    inst->noiseEstLogQuantile[i] = 2048; // Q8
+    inst->noiseEstDensity[i] = 153; // Q9
+  }
+  for (i = 0; i < SIMULT; i++) {
+    inst->noiseEstCounter[i] = (int16_t)(END_STARTUP_LONG * (i + 1)) / SIMULT;
+  }
+
+  // Initialize suppression filter with ones
+  WebRtcSpl_MemSetW16((int16_t*)inst->noiseSupFilter, 16384, HALF_ANAL_BLOCKL);
+
+  // Set the aggressiveness: default
+  inst->aggrMode = 0;
+
+  //initialize variables for new method
+  inst->priorNonSpeechProb = 8192; // Q14(0.5) prior probability for speech/noise
+  for (i = 0; i < HALF_ANAL_BLOCKL; i++) {
+    inst->prevMagnU16[i] = 0;
+    inst->prevNoiseU32[i] = 0; //previous noise-spectrum
+    inst->logLrtTimeAvgW32[i] = 0; //smooth LR ratio
+    inst->avgMagnPause[i] = 0; //conservative noise spectrum estimate
+    inst->initMagnEst[i] = 0; //initial average magnitude spectrum
+  }
+
+  //feature quantities
+  inst->thresholdSpecDiff = 50; //threshold for difference feature: determined on-line
+  inst->thresholdSpecFlat = 20480; //threshold for flatness: determined on-line
+  inst->featureLogLrt = inst->thresholdLogLrt; //average LRT factor (= threshold)
+  inst->featureSpecFlat = inst->thresholdSpecFlat; //spectral flatness (= threshold)
+  inst->featureSpecDiff = inst->thresholdSpecDiff; //spectral difference (= threshold)
+  inst->weightLogLrt = 6; //default weighting par for LRT feature
+  inst->weightSpecFlat = 0; //default weighting par for spectral flatness feature
+  inst->weightSpecDiff = 0; //default weighting par for spectral difference feature
+
+  inst->curAvgMagnEnergy = 0; //window time-average of input magnitude spectrum
+  inst->timeAvgMagnEnergy = 0; //normalization for spectral difference
+  inst->timeAvgMagnEnergyTmp = 0; //normalization for spectral difference
+
+  //histogram quantities: used to estimate/update thresholds for features
+  WebRtcSpl_ZerosArrayW16(inst->histLrt, HIST_PAR_EST);
+  WebRtcSpl_ZerosArrayW16(inst->histSpecDiff, HIST_PAR_EST);
+  WebRtcSpl_ZerosArrayW16(inst->histSpecFlat, HIST_PAR_EST);
+
+  inst->blockIndex = -1; //frame counter
+
+  //inst->modelUpdate    = 500;   //window for update
+  inst->modelUpdate = (1 << STAT_UPDATES); //window for update
+  inst->cntThresUpdate = 0; //counter feature thresholds updates
+
+  inst->sumMagn = 0;
+  inst->magnEnergy = 0;
+  inst->prevQMagn = 0;
+  inst->qNoise = 0;
+  inst->prevQNoise = 0;
+
+  inst->energyIn = 0;
+  inst->scaleEnergyIn = 0;
+
+  inst->whiteNoiseLevel = 0;
+  inst->pinkNoiseNumerator = 0;
+  inst->pinkNoiseExp = 0;
+  inst->minNorm = 15; // Start with full scale
+  inst->zeroInputSignal = 0;
+
+  //default mode
+  WebRtcNsx_set_policy_core(inst, 0);
+
+#ifdef NS_FILEDEBUG
+  inst->infile = fopen("indebug.pcm", "wb");
+  inst->outfile = fopen("outdebug.pcm", "wb");
+  inst->file1 = fopen("file1.pcm", "wb");
+  inst->file2 = fopen("file2.pcm", "wb");
+  inst->file3 = fopen("file3.pcm", "wb");
+  inst->file4 = fopen("file4.pcm", "wb");
+  inst->file5 = fopen("file5.pcm", "wb");
+#endif
+
+  // Initialize function pointers.
+  WebRtcNsx_NoiseEstimation = NoiseEstimationC;
+  WebRtcNsx_PrepareSpectrum = PrepareSpectrumC;
+  WebRtcNsx_SynthesisUpdate = SynthesisUpdateC;
+  WebRtcNsx_AnalysisUpdate = AnalysisUpdateC;
+  WebRtcNsx_Denormalize = DenormalizeC;
+  WebRtcNsx_NormalizeRealBuffer = NormalizeRealBufferC;
+
+#ifdef WEBRTC_DETECT_NEON
+  uint64_t features = WebRtc_GetCPUFeaturesARM();
+  if ((features & kCPUFeatureNEON) != 0) {
+      WebRtcNsx_InitNeon();
+  }
+#elif defined(WEBRTC_HAS_NEON)
+  WebRtcNsx_InitNeon();
+#endif
+
+#if defined(MIPS32_LE)
+  WebRtcNsx_InitMips();
+#endif
+
+  inst->initFlag = 1;
+
+  return 0;
+}
+
+int WebRtcNsx_set_policy_core(NoiseSuppressionFixedC* inst, int mode) {
+  // allow for modes:0,1,2,3
+  if (mode < 0 || mode > 3) {
+    return -1;
+  }
+
+  inst->aggrMode = mode;
+  if (mode == 0) {
+    inst->overdrive = 256; // Q8(1.0)
+    inst->denoiseBound = 8192; // Q14(0.5)
+    inst->gainMap = 0; // No gain compensation
+  } else if (mode == 1) {
+    inst->overdrive = 256; // Q8(1.0)
+    inst->denoiseBound = 4096; // Q14(0.25)
+    inst->factor2Table = kFactor2Aggressiveness1;
+    inst->gainMap = 1;
+  } else if (mode == 2) {
+    inst->overdrive = 282; // ~= Q8(1.1)
+    inst->denoiseBound = 2048; // Q14(0.125)
+    inst->factor2Table = kFactor2Aggressiveness2;
+    inst->gainMap = 1;
+  } else if (mode == 3) {
+    inst->overdrive = 320; // Q8(1.25)
+    inst->denoiseBound = 1475; // ~= Q14(0.09)
+    inst->factor2Table = kFactor2Aggressiveness3;
+    inst->gainMap = 1;
+  }
+  return 0;
+}
+
+// Extract thresholds for feature parameters
+// histograms are computed over some window_size (given by window_pars)
+// thresholds and weights are extracted every window
+// flag 0 means update histogram only, flag 1 means compute the thresholds/weights
+// threshold and weights are returned in: inst->priorModelPars
+void WebRtcNsx_FeatureParameterExtraction(NoiseSuppressionFixedC* inst,
+                                          int flag) {
+  uint32_t tmpU32;
+  uint32_t histIndex;
+  uint32_t posPeak1SpecFlatFX, posPeak2SpecFlatFX;
+  uint32_t posPeak1SpecDiffFX, posPeak2SpecDiffFX;
+
+  int32_t tmp32;
+  int32_t fluctLrtFX, thresFluctLrtFX;
+  int32_t avgHistLrtFX, avgSquareHistLrtFX, avgHistLrtComplFX;
+
+  int16_t j;
+  int16_t numHistLrt;
+
+  int i;
+  int useFeatureSpecFlat, useFeatureSpecDiff, featureSum;
+  int maxPeak1, maxPeak2;
+  int weightPeak1SpecFlat, weightPeak2SpecFlat;
+  int weightPeak1SpecDiff, weightPeak2SpecDiff;
+
+  //update histograms
+  if (!flag) {
+    // LRT
+    // Type casting to UWord32 is safe since negative values will not be wrapped to larger
+    // values than HIST_PAR_EST
+    histIndex = (uint32_t)(inst->featureLogLrt);
+    if (histIndex < HIST_PAR_EST) {
+      inst->histLrt[histIndex]++;
+    }
+    // Spectral flatness
+    // (inst->featureSpecFlat*20)>>10 = (inst->featureSpecFlat*5)>>8
+    histIndex = (inst->featureSpecFlat * 5) >> 8;
+    if (histIndex < HIST_PAR_EST) {
+      inst->histSpecFlat[histIndex]++;
+    }
+    // Spectral difference
+    histIndex = HIST_PAR_EST;
+    if (inst->timeAvgMagnEnergy > 0) {
+      // Guard against division by zero
+      // If timeAvgMagnEnergy == 0 we have no normalizing statistics and
+      // therefore can't update the histogram
+      histIndex = ((inst->featureSpecDiff * 5) >> inst->stages) /
+          inst->timeAvgMagnEnergy;
+    }
+    if (histIndex < HIST_PAR_EST) {
+      inst->histSpecDiff[histIndex]++;
+    }
+  }
+
+  // extract parameters for speech/noise probability
+  if (flag) {
+    useFeatureSpecDiff = 1;
+    //for LRT feature:
+    // compute the average over inst->featureExtractionParams.rangeAvgHistLrt
+    avgHistLrtFX = 0;
+    avgSquareHistLrtFX = 0;
+    numHistLrt = 0;
+    for (i = 0; i < BIN_SIZE_LRT; i++) {
+      j = (2 * i + 1);
+      tmp32 = inst->histLrt[i] * j;
+      avgHistLrtFX += tmp32;
+      numHistLrt += inst->histLrt[i];
+      avgSquareHistLrtFX += tmp32 * j;
+    }
+    avgHistLrtComplFX = avgHistLrtFX;
+    for (; i < HIST_PAR_EST; i++) {
+      j = (2 * i + 1);
+      tmp32 = inst->histLrt[i] * j;
+      avgHistLrtComplFX += tmp32;
+      avgSquareHistLrtFX += tmp32 * j;
+    }
+    fluctLrtFX = avgSquareHistLrtFX * numHistLrt -
+        avgHistLrtFX * avgHistLrtComplFX;
+    thresFluctLrtFX = THRES_FLUCT_LRT * numHistLrt;
+    // get threshold for LRT feature:
+    tmpU32 = (FACTOR_1_LRT_DIFF * (uint32_t)avgHistLrtFX);
+    if ((fluctLrtFX < thresFluctLrtFX) || (numHistLrt == 0) ||
+        (tmpU32 > (uint32_t)(100 * numHistLrt))) {
+      //very low fluctuation, so likely noise
+      inst->thresholdLogLrt = inst->maxLrt;
+    } else {
+      tmp32 = (int32_t)((tmpU32 << (9 + inst->stages)) / numHistLrt /
+                              25);
+      // check if value is within min/max range
+      inst->thresholdLogLrt = WEBRTC_SPL_SAT(inst->maxLrt,
+                                             tmp32,
+                                             inst->minLrt);
+    }
+    if (fluctLrtFX < thresFluctLrtFX) {
+      // Do not use difference feature if fluctuation of LRT feature is very low:
+      // most likely just noise state
+      useFeatureSpecDiff = 0;
+    }
+
+    // for spectral flatness and spectral difference: compute the main peaks of histogram
+    maxPeak1 = 0;
+    maxPeak2 = 0;
+    posPeak1SpecFlatFX = 0;
+    posPeak2SpecFlatFX = 0;
+    weightPeak1SpecFlat = 0;
+    weightPeak2SpecFlat = 0;
+
+    // peaks for flatness
+    for (i = 0; i < HIST_PAR_EST; i++) {
+      if (inst->histSpecFlat[i] > maxPeak1) {
+        // Found new "first" peak
+        maxPeak2 = maxPeak1;
+        weightPeak2SpecFlat = weightPeak1SpecFlat;
+        posPeak2SpecFlatFX = posPeak1SpecFlatFX;
+
+        maxPeak1 = inst->histSpecFlat[i];
+        weightPeak1SpecFlat = inst->histSpecFlat[i];
+        posPeak1SpecFlatFX = (uint32_t)(2 * i + 1);
+      } else if (inst->histSpecFlat[i] > maxPeak2) {
+        // Found new "second" peak
+        maxPeak2 = inst->histSpecFlat[i];
+        weightPeak2SpecFlat = inst->histSpecFlat[i];
+        posPeak2SpecFlatFX = (uint32_t)(2 * i + 1);
+      }
+    }
+
+    // for spectral flatness feature
+    useFeatureSpecFlat = 1;
+    // merge the two peaks if they are close
+    if ((posPeak1SpecFlatFX - posPeak2SpecFlatFX < LIM_PEAK_SPACE_FLAT_DIFF)
+        && (weightPeak2SpecFlat * LIM_PEAK_WEIGHT_FLAT_DIFF > weightPeak1SpecFlat)) {
+      weightPeak1SpecFlat += weightPeak2SpecFlat;
+      posPeak1SpecFlatFX = (posPeak1SpecFlatFX + posPeak2SpecFlatFX) >> 1;
+    }
+    //reject if weight of peaks is not large enough, or peak value too small
+    if (weightPeak1SpecFlat < THRES_WEIGHT_FLAT_DIFF || posPeak1SpecFlatFX
+        < THRES_PEAK_FLAT) {
+      useFeatureSpecFlat = 0;
+    } else { // if selected, get the threshold
+      // compute the threshold and check if value is within min/max range
+      inst->thresholdSpecFlat = WEBRTC_SPL_SAT(MAX_FLAT_Q10, FACTOR_2_FLAT_Q10
+                                               * posPeak1SpecFlatFX, MIN_FLAT_Q10); //Q10
+    }
+    // done with flatness feature
+
+    if (useFeatureSpecDiff) {
+      //compute two peaks for spectral difference
+      maxPeak1 = 0;
+      maxPeak2 = 0;
+      posPeak1SpecDiffFX = 0;
+      posPeak2SpecDiffFX = 0;
+      weightPeak1SpecDiff = 0;
+      weightPeak2SpecDiff = 0;
+      // peaks for spectral difference
+      for (i = 0; i < HIST_PAR_EST; i++) {
+        if (inst->histSpecDiff[i] > maxPeak1) {
+          // Found new "first" peak
+          maxPeak2 = maxPeak1;
+          weightPeak2SpecDiff = weightPeak1SpecDiff;
+          posPeak2SpecDiffFX = posPeak1SpecDiffFX;
+
+          maxPeak1 = inst->histSpecDiff[i];
+          weightPeak1SpecDiff = inst->histSpecDiff[i];
+          posPeak1SpecDiffFX = (uint32_t)(2 * i + 1);
+        } else if (inst->histSpecDiff[i] > maxPeak2) {
+          // Found new "second" peak
+          maxPeak2 = inst->histSpecDiff[i];
+          weightPeak2SpecDiff = inst->histSpecDiff[i];
+          posPeak2SpecDiffFX = (uint32_t)(2 * i + 1);
+        }
+      }
+
+      // merge the two peaks if they are close
+      if ((posPeak1SpecDiffFX - posPeak2SpecDiffFX < LIM_PEAK_SPACE_FLAT_DIFF)
+          && (weightPeak2SpecDiff * LIM_PEAK_WEIGHT_FLAT_DIFF > weightPeak1SpecDiff)) {
+        weightPeak1SpecDiff += weightPeak2SpecDiff;
+        posPeak1SpecDiffFX = (posPeak1SpecDiffFX + posPeak2SpecDiffFX) >> 1;
+      }
+      // get the threshold value and check if value is within min/max range
+      inst->thresholdSpecDiff = WEBRTC_SPL_SAT(MAX_DIFF, FACTOR_1_LRT_DIFF
+                                               * posPeak1SpecDiffFX, MIN_DIFF); //5x bigger
+      //reject if weight of peaks is not large enough
+      if (weightPeak1SpecDiff < THRES_WEIGHT_FLAT_DIFF) {
+        useFeatureSpecDiff = 0;
+      }
+      // done with spectral difference feature
+    }
+
+    // select the weights between the features
+    // inst->priorModelPars[4] is weight for LRT: always selected
+    featureSum = 6 / (1 + useFeatureSpecFlat + useFeatureSpecDiff);
+    inst->weightLogLrt = featureSum;
+    inst->weightSpecFlat = useFeatureSpecFlat * featureSum;
+    inst->weightSpecDiff = useFeatureSpecDiff * featureSum;
+
+    // set histograms to zero for next update
+    WebRtcSpl_ZerosArrayW16(inst->histLrt, HIST_PAR_EST);
+    WebRtcSpl_ZerosArrayW16(inst->histSpecDiff, HIST_PAR_EST);
+    WebRtcSpl_ZerosArrayW16(inst->histSpecFlat, HIST_PAR_EST);
+  }  // end of flag == 1
+}
+
+
+// Compute spectral flatness on input spectrum
+// magn is the magnitude spectrum
+// spectral flatness is returned in inst->featureSpecFlat
+void WebRtcNsx_ComputeSpectralFlatness(NoiseSuppressionFixedC* inst,
+                                       uint16_t* magn) {
+  uint32_t tmpU32;
+  uint32_t avgSpectralFlatnessNum, avgSpectralFlatnessDen;
+
+  int32_t tmp32;
+  int32_t currentSpectralFlatness, logCurSpectralFlatness;
+
+  int16_t zeros, frac, intPart;
+
+  size_t i;
+
+  // for flatness
+  avgSpectralFlatnessNum = 0;
+  avgSpectralFlatnessDen = inst->sumMagn - (uint32_t)magn[0]; // Q(normData-stages)
+
+  // compute log of ratio of the geometric to arithmetic mean: check for log(0) case
+  // flatness = exp( sum(log(magn[i]))/N - log(sum(magn[i])/N) )
+  //          = exp( sum(log(magn[i]))/N ) * N / sum(magn[i])
+  //          = 2^( sum(log2(magn[i]))/N - (log2(sum(magn[i])) - log2(N)) ) [This is used]
+  for (i = 1; i < inst->magnLen; i++) {
+    // First bin is excluded from spectrum measures. Number of bins is now a power of 2
+    if (magn[i]) {
+      zeros = WebRtcSpl_NormU32((uint32_t)magn[i]);
+      frac = (int16_t)(((uint32_t)((uint32_t)(magn[i]) << zeros)
+                              & 0x7FFFFFFF) >> 23);
+      // log2(magn(i))
+      assert(frac < 256);
+      tmpU32 = (uint32_t)(((31 - zeros) << 8)
+                                + WebRtcNsx_kLogTableFrac[frac]); // Q8
+      avgSpectralFlatnessNum += tmpU32; // Q8
+    } else {
+      //if at least one frequency component is zero, treat separately
+      tmpU32 = WEBRTC_SPL_UMUL_32_16(inst->featureSpecFlat, SPECT_FLAT_TAVG_Q14); // Q24
+      inst->featureSpecFlat -= tmpU32 >> 14;  // Q10
+      return;
+    }
+  }
+  //ratio and inverse log: check for case of log(0)
+  zeros = WebRtcSpl_NormU32(avgSpectralFlatnessDen);
+  frac = (int16_t)(((avgSpectralFlatnessDen << zeros) & 0x7FFFFFFF) >> 23);
+  // log2(avgSpectralFlatnessDen)
+  assert(frac < 256);
+  tmp32 = (int32_t)(((31 - zeros) << 8) + WebRtcNsx_kLogTableFrac[frac]); // Q8
+  logCurSpectralFlatness = (int32_t)avgSpectralFlatnessNum;
+  logCurSpectralFlatness += ((int32_t)(inst->stages - 1) << (inst->stages + 7)); // Q(8+stages-1)
+  logCurSpectralFlatness -= (tmp32 << (inst->stages - 1));
+  logCurSpectralFlatness <<= (10 - inst->stages);  // Q17
+  tmp32 = (int32_t)(0x00020000 | (WEBRTC_SPL_ABS_W32(logCurSpectralFlatness)
+                                        & 0x0001FFFF)); //Q17
+  intPart = 7 - (logCurSpectralFlatness >> 17);  // Add 7 for output in Q10.
+  if (intPart > 0) {
+    currentSpectralFlatness = tmp32 >> intPart;
+  } else {
+    currentSpectralFlatness = tmp32 << -intPart;
+  }
+
+  //time average update of spectral flatness feature
+  tmp32 = currentSpectralFlatness - (int32_t)inst->featureSpecFlat; // Q10
+  tmp32 *= SPECT_FLAT_TAVG_Q14;  // Q24
+  inst->featureSpecFlat += tmp32 >> 14;  // Q10
+  // done with flatness feature
+}
+
+
+// Compute the difference measure between input spectrum and a template/learned noise spectrum
+// magn_tmp is the input spectrum
+// the reference/template spectrum is  inst->magn_avg_pause[i]
+// returns (normalized) spectral difference in inst->featureSpecDiff
+void WebRtcNsx_ComputeSpectralDifference(NoiseSuppressionFixedC* inst,
+                                         uint16_t* magnIn) {
+  // This is to be calculated:
+  // avgDiffNormMagn = var(magnIn) - cov(magnIn, magnAvgPause)^2 / var(magnAvgPause)
+
+  uint32_t tmpU32no1, tmpU32no2;
+  uint32_t varMagnUFX, varPauseUFX, avgDiffNormMagnUFX;
+
+  int32_t tmp32no1, tmp32no2;
+  int32_t avgPauseFX, avgMagnFX, covMagnPauseFX;
+  int32_t maxPause, minPause;
+
+  int16_t tmp16no1;
+
+  size_t i;
+  int norm32, nShifts;
+
+  avgPauseFX = 0;
+  maxPause = 0;
+  minPause = inst->avgMagnPause[0]; // Q(prevQMagn)
+  // compute average quantities
+  for (i = 0; i < inst->magnLen; i++) {
+    // Compute mean of magn_pause
+    avgPauseFX += inst->avgMagnPause[i]; // in Q(prevQMagn)
+    maxPause = WEBRTC_SPL_MAX(maxPause, inst->avgMagnPause[i]);
+    minPause = WEBRTC_SPL_MIN(minPause, inst->avgMagnPause[i]);
+  }
+  // normalize by replacing div of "inst->magnLen" with "inst->stages-1" shifts
+  avgPauseFX >>= inst->stages - 1;
+  avgMagnFX = inst->sumMagn >> (inst->stages - 1);
+  // Largest possible deviation in magnPause for (co)var calculations
+  tmp32no1 = WEBRTC_SPL_MAX(maxPause - avgPauseFX, avgPauseFX - minPause);
+  // Get number of shifts to make sure we don't get wrap around in varPause
+  nShifts = WEBRTC_SPL_MAX(0, 10 + inst->stages - WebRtcSpl_NormW32(tmp32no1));
+
+  varMagnUFX = 0;
+  varPauseUFX = 0;
+  covMagnPauseFX = 0;
+  for (i = 0; i < inst->magnLen; i++) {
+    // Compute var and cov of magn and magn_pause
+    tmp16no1 = (int16_t)((int32_t)magnIn[i] - avgMagnFX);
+    tmp32no2 = inst->avgMagnPause[i] - avgPauseFX;
+    varMagnUFX += (uint32_t)(tmp16no1 * tmp16no1);  // Q(2*qMagn)
+    tmp32no1 = tmp32no2 * tmp16no1;  // Q(prevQMagn+qMagn)
+    covMagnPauseFX += tmp32no1; // Q(prevQMagn+qMagn)
+    tmp32no1 = tmp32no2 >> nShifts;  // Q(prevQMagn-minPause).
+    varPauseUFX += tmp32no1 * tmp32no1;  // Q(2*(prevQMagn-minPause))
+  }
+  //update of average magnitude spectrum: Q(-2*stages) and averaging replaced by shifts
+  inst->curAvgMagnEnergy +=
+      inst->magnEnergy >> (2 * inst->normData + inst->stages - 1);
+
+  avgDiffNormMagnUFX = varMagnUFX; // Q(2*qMagn)
+  if ((varPauseUFX) && (covMagnPauseFX)) {
+    tmpU32no1 = (uint32_t)WEBRTC_SPL_ABS_W32(covMagnPauseFX); // Q(prevQMagn+qMagn)
+    norm32 = WebRtcSpl_NormU32(tmpU32no1) - 16;
+    if (norm32 > 0) {
+      tmpU32no1 <<= norm32;  // Q(prevQMagn+qMagn+norm32)
+    } else {
+      tmpU32no1 >>= -norm32;  // Q(prevQMagn+qMagn+norm32)
+    }
+    tmpU32no2 = WEBRTC_SPL_UMUL(tmpU32no1, tmpU32no1); // Q(2*(prevQMagn+qMagn-norm32))
+
+    nShifts += norm32;
+    nShifts <<= 1;
+    if (nShifts < 0) {
+      varPauseUFX >>= (-nShifts); // Q(2*(qMagn+norm32+minPause))
+      nShifts = 0;
+    }
+    if (varPauseUFX > 0) {
+      // Q(2*(qMagn+norm32-16+minPause))
+      tmpU32no1 = tmpU32no2 / varPauseUFX;
+      tmpU32no1 >>= nShifts;
+
+      // Q(2*qMagn)
+      avgDiffNormMagnUFX -= WEBRTC_SPL_MIN(avgDiffNormMagnUFX, tmpU32no1);
+    } else {
+      avgDiffNormMagnUFX = 0;
+    }
+  }
+  //normalize and compute time average update of difference feature
+  tmpU32no1 = avgDiffNormMagnUFX >> (2 * inst->normData);
+  if (inst->featureSpecDiff > tmpU32no1) {
+    tmpU32no2 = WEBRTC_SPL_UMUL_32_16(inst->featureSpecDiff - tmpU32no1,
+                                      SPECT_DIFF_TAVG_Q8); // Q(8-2*stages)
+    inst->featureSpecDiff -= tmpU32no2 >> 8;  // Q(-2*stages)
+  } else {
+    tmpU32no2 = WEBRTC_SPL_UMUL_32_16(tmpU32no1 - inst->featureSpecDiff,
+                                      SPECT_DIFF_TAVG_Q8); // Q(8-2*stages)
+    inst->featureSpecDiff += tmpU32no2 >> 8;  // Q(-2*stages)
+  }
+}
+
+// Transform input (speechFrame) to frequency domain magnitude (magnU16)
+void WebRtcNsx_DataAnalysis(NoiseSuppressionFixedC* inst,
+                            short* speechFrame,
+                            uint16_t* magnU16) {
+  uint32_t tmpU32no1;
+
+  int32_t   tmp_1_w32 = 0;
+  int32_t   tmp_2_w32 = 0;
+  int32_t   sum_log_magn = 0;
+  int32_t   sum_log_i_log_magn = 0;
+
+  uint16_t  sum_log_magn_u16 = 0;
+  uint16_t  tmp_u16 = 0;
+
+  int16_t   sum_log_i = 0;
+  int16_t   sum_log_i_square = 0;
+  int16_t   frac = 0;
+  int16_t   log2 = 0;
+  int16_t   matrix_determinant = 0;
+  int16_t   maxWinData;
+
+  size_t i, j;
+  int zeros;
+  int net_norm = 0;
+  int right_shifts_in_magnU16 = 0;
+  int right_shifts_in_initMagnEst = 0;
+
+  int16_t winData_buff[ANAL_BLOCKL_MAX * 2 + 16];
+  int16_t realImag_buff[ANAL_BLOCKL_MAX * 2 + 16];
+
+  // Align the structures to 32-byte boundary for the FFT function.
+  int16_t* winData = (int16_t*) (((uintptr_t)winData_buff + 31) & ~31);
+  int16_t* realImag = (int16_t*) (((uintptr_t) realImag_buff + 31) & ~31);
+
+  // Update analysis buffer for lower band, and window data before FFT.
+  WebRtcNsx_AnalysisUpdate(inst, winData, speechFrame);
+
+  // Get input energy
+  inst->energyIn =
+      WebRtcSpl_Energy(winData, inst->anaLen, &inst->scaleEnergyIn);
+
+  // Reset zero input flag
+  inst->zeroInputSignal = 0;
+  // Acquire norm for winData
+  maxWinData = WebRtcSpl_MaxAbsValueW16(winData, inst->anaLen);
+  inst->normData = WebRtcSpl_NormW16(maxWinData);
+  if (maxWinData == 0) {
+    // Treat zero input separately.
+    inst->zeroInputSignal = 1;
+    return;
+  }
+
+  // Determine the net normalization in the frequency domain
+  net_norm = inst->stages - inst->normData;
+  // Track lowest normalization factor and use it to prevent wrap around in shifting
+  right_shifts_in_magnU16 = inst->normData - inst->minNorm;
+  right_shifts_in_initMagnEst = WEBRTC_SPL_MAX(-right_shifts_in_magnU16, 0);
+  inst->minNorm -= right_shifts_in_initMagnEst;
+  right_shifts_in_magnU16 = WEBRTC_SPL_MAX(right_shifts_in_magnU16, 0);
+
+  // create realImag as winData interleaved with zeros (= imag. part), normalize it
+  WebRtcNsx_NormalizeRealBuffer(inst, winData, realImag);
+
+  // FFT output will be in winData[].
+  WebRtcSpl_RealForwardFFT(inst->real_fft, realImag, winData);
+
+  inst->imag[0] = 0; // Q(normData-stages)
+  inst->imag[inst->anaLen2] = 0;
+  inst->real[0] = winData[0]; // Q(normData-stages)
+  inst->real[inst->anaLen2] = winData[inst->anaLen];
+  // Q(2*(normData-stages))
+  inst->magnEnergy = (uint32_t)(inst->real[0] * inst->real[0]);
+  inst->magnEnergy += (uint32_t)(inst->real[inst->anaLen2] *
+                                 inst->real[inst->anaLen2]);
+  magnU16[0] = (uint16_t)WEBRTC_SPL_ABS_W16(inst->real[0]); // Q(normData-stages)
+  magnU16[inst->anaLen2] = (uint16_t)WEBRTC_SPL_ABS_W16(inst->real[inst->anaLen2]);
+  inst->sumMagn = (uint32_t)magnU16[0]; // Q(normData-stages)
+  inst->sumMagn += (uint32_t)magnU16[inst->anaLen2];
+
+  if (inst->blockIndex >= END_STARTUP_SHORT) {
+    for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) {
+      inst->real[i] = winData[j];
+      inst->imag[i] = -winData[j + 1];
+      // magnitude spectrum
+      // energy in Q(2*(normData-stages))
+      tmpU32no1 = (uint32_t)(winData[j] * winData[j]);
+      tmpU32no1 += (uint32_t)(winData[j + 1] * winData[j + 1]);
+      inst->magnEnergy += tmpU32no1; // Q(2*(normData-stages))
+
+      magnU16[i] = (uint16_t)WebRtcSpl_SqrtFloor(tmpU32no1); // Q(normData-stages)
+      inst->sumMagn += (uint32_t)magnU16[i]; // Q(normData-stages)
+    }
+  } else {
+    //
+    // Gather information during startup for noise parameter estimation
+    //
+
+    // Switch initMagnEst to Q(minNorm-stages)
+    inst->initMagnEst[0] >>= right_shifts_in_initMagnEst;
+    inst->initMagnEst[inst->anaLen2] >>= right_shifts_in_initMagnEst;
+
+    // Update initMagnEst with magnU16 in Q(minNorm-stages).
+    inst->initMagnEst[0] += magnU16[0] >> right_shifts_in_magnU16;
+    inst->initMagnEst[inst->anaLen2] +=
+        magnU16[inst->anaLen2] >> right_shifts_in_magnU16;
+
+    log2 = 0;
+    if (magnU16[inst->anaLen2]) {
+      // Calculate log2(magnU16[inst->anaLen2])
+      zeros = WebRtcSpl_NormU32((uint32_t)magnU16[inst->anaLen2]);
+      frac = (int16_t)((((uint32_t)magnU16[inst->anaLen2] << zeros) &
+                              0x7FFFFFFF) >> 23); // Q8
+      // log2(magnU16(i)) in Q8
+      assert(frac < 256);
+      log2 = (int16_t)(((31 - zeros) << 8) + WebRtcNsx_kLogTableFrac[frac]);
+    }
+
+    sum_log_magn = (int32_t)log2; // Q8
+    // sum_log_i_log_magn in Q17
+    sum_log_i_log_magn = (kLogIndex[inst->anaLen2] * log2) >> 3;
+
+    for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) {
+      inst->real[i] = winData[j];
+      inst->imag[i] = -winData[j + 1];
+      // magnitude spectrum
+      // energy in Q(2*(normData-stages))
+      tmpU32no1 = (uint32_t)(winData[j] * winData[j]);
+      tmpU32no1 += (uint32_t)(winData[j + 1] * winData[j + 1]);
+      inst->magnEnergy += tmpU32no1; // Q(2*(normData-stages))
+
+      magnU16[i] = (uint16_t)WebRtcSpl_SqrtFloor(tmpU32no1); // Q(normData-stages)
+      inst->sumMagn += (uint32_t)magnU16[i]; // Q(normData-stages)
+
+      // Switch initMagnEst to Q(minNorm-stages)
+      inst->initMagnEst[i] >>= right_shifts_in_initMagnEst;
+
+      // Update initMagnEst with magnU16 in Q(minNorm-stages).
+      inst->initMagnEst[i] += magnU16[i] >> right_shifts_in_magnU16;
+
+      if (i >= kStartBand) {
+        // For pink noise estimation. Collect data neglecting lower frequency band
+        log2 = 0;
+        if (magnU16[i]) {
+          zeros = WebRtcSpl_NormU32((uint32_t)magnU16[i]);
+          frac = (int16_t)((((uint32_t)magnU16[i] << zeros) &
+                                  0x7FFFFFFF) >> 23);
+          // log2(magnU16(i)) in Q8
+          assert(frac < 256);
+          log2 = (int16_t)(((31 - zeros) << 8)
+                                 + WebRtcNsx_kLogTableFrac[frac]);
+        }
+        sum_log_magn += (int32_t)log2; // Q8
+        // sum_log_i_log_magn in Q17
+        sum_log_i_log_magn += (kLogIndex[i] * log2) >> 3;
+      }
+    }
+
+    //
+    //compute simplified noise model during startup
+    //
+
+    // Estimate White noise
+
+    // Switch whiteNoiseLevel to Q(minNorm-stages)
+    inst->whiteNoiseLevel >>= right_shifts_in_initMagnEst;
+
+    // Update the average magnitude spectrum, used as noise estimate.
+    tmpU32no1 = WEBRTC_SPL_UMUL_32_16(inst->sumMagn, inst->overdrive);
+    tmpU32no1 >>= inst->stages + 8;
+
+    // Replacing division above with 'stages' shifts
+    // Shift to same Q-domain as whiteNoiseLevel
+    tmpU32no1 >>= right_shifts_in_magnU16;
+    // This operation is safe from wrap around as long as END_STARTUP_SHORT < 128
+    assert(END_STARTUP_SHORT < 128);
+    inst->whiteNoiseLevel += tmpU32no1; // Q(minNorm-stages)
+
+    // Estimate Pink noise parameters
+    // Denominator used in both parameter estimates.
+    // The value is only dependent on the size of the frequency band (kStartBand)
+    // and to reduce computational complexity stored in a table (kDeterminantEstMatrix[])
+    assert(kStartBand < 66);
+    matrix_determinant = kDeterminantEstMatrix[kStartBand]; // Q0
+    sum_log_i = kSumLogIndex[kStartBand]; // Q5
+    sum_log_i_square = kSumSquareLogIndex[kStartBand]; // Q2
+    if (inst->fs == 8000) {
+      // Adjust values to shorter blocks in narrow band.
+      tmp_1_w32 = (int32_t)matrix_determinant;
+      tmp_1_w32 += (kSumLogIndex[65] * sum_log_i) >> 9;
+      tmp_1_w32 -= (kSumLogIndex[65] * kSumLogIndex[65]) >> 10;
+      tmp_1_w32 -= (int32_t)sum_log_i_square << 4;
+      tmp_1_w32 -= ((inst->magnLen - kStartBand) * kSumSquareLogIndex[65]) >> 2;
+      matrix_determinant = (int16_t)tmp_1_w32;
+      sum_log_i -= kSumLogIndex[65]; // Q5
+      sum_log_i_square -= kSumSquareLogIndex[65]; // Q2
+    }
+
+    // Necessary number of shifts to fit sum_log_magn in a word16
+    zeros = 16 - WebRtcSpl_NormW32(sum_log_magn);
+    if (zeros < 0) {
+      zeros = 0;
+    }
+    tmp_1_w32 = sum_log_magn << 1;  // Q9
+    sum_log_magn_u16 = (uint16_t)(tmp_1_w32 >> zeros);  // Q(9-zeros).
+
+    // Calculate and update pinkNoiseNumerator. Result in Q11.
+    tmp_2_w32 = WEBRTC_SPL_MUL_16_U16(sum_log_i_square, sum_log_magn_u16); // Q(11-zeros)
+    tmpU32no1 = sum_log_i_log_magn >> 12;  // Q5
+
+    // Shift the largest value of sum_log_i and tmp32no3 before multiplication
+    tmp_u16 = ((uint16_t)sum_log_i << 1);  // Q6
+    if ((uint32_t)sum_log_i > tmpU32no1) {
+      tmp_u16 >>= zeros;
+    } else {
+      tmpU32no1 >>= zeros;
+    }
+    tmp_2_w32 -= (int32_t)WEBRTC_SPL_UMUL_32_16(tmpU32no1, tmp_u16); // Q(11-zeros)
+    matrix_determinant >>= zeros;  // Q(-zeros)
+    tmp_2_w32 = WebRtcSpl_DivW32W16(tmp_2_w32, matrix_determinant); // Q11
+    tmp_2_w32 += (int32_t)net_norm << 11;  // Q11
+    if (tmp_2_w32 < 0) {
+      tmp_2_w32 = 0;
+    }
+    inst->pinkNoiseNumerator += tmp_2_w32; // Q11
+
+    // Calculate and update pinkNoiseExp. Result in Q14.
+    tmp_2_w32 = WEBRTC_SPL_MUL_16_U16(sum_log_i, sum_log_magn_u16); // Q(14-zeros)
+    tmp_1_w32 = sum_log_i_log_magn >> (3 + zeros);
+    tmp_1_w32 *= inst->magnLen - kStartBand;
+    tmp_2_w32 -= tmp_1_w32; // Q(14-zeros)
+    if (tmp_2_w32 > 0) {
+      // If the exponential parameter is negative force it to zero, which means a
+      // flat spectrum.
+      tmp_1_w32 = WebRtcSpl_DivW32W16(tmp_2_w32, matrix_determinant); // Q14
+      inst->pinkNoiseExp += WEBRTC_SPL_SAT(16384, tmp_1_w32, 0); // Q14
+    }
+  }
+}
+
+void WebRtcNsx_DataSynthesis(NoiseSuppressionFixedC* inst, short* outFrame) {
+  int32_t energyOut;
+
+  int16_t realImag_buff[ANAL_BLOCKL_MAX * 2 + 16];
+  int16_t rfft_out_buff[ANAL_BLOCKL_MAX * 2 + 16];
+
+  // Align the structures to 32-byte boundary for the FFT function.
+  int16_t* realImag = (int16_t*) (((uintptr_t)realImag_buff + 31) & ~31);
+  int16_t* rfft_out = (int16_t*) (((uintptr_t) rfft_out_buff + 31) & ~31);
+
+  int16_t tmp16no1, tmp16no2;
+  int16_t energyRatio;
+  int16_t gainFactor, gainFactor1, gainFactor2;
+
+  size_t i;
+  int outCIFFT;
+  int scaleEnergyOut = 0;
+
+  if (inst->zeroInputSignal) {
+    // synthesize the special case of zero input
+    // read out fully processed segment
+    for (i = 0; i < inst->blockLen10ms; i++) {
+      outFrame[i] = inst->synthesisBuffer[i]; // Q0
+    }
+    // update synthesis buffer
+    memcpy(inst->synthesisBuffer, inst->synthesisBuffer + inst->blockLen10ms,
+        (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->synthesisBuffer));
+    WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer + inst->anaLen - inst->blockLen10ms,
+                            inst->blockLen10ms);
+    return;
+  }
+
+  // Filter the data in the frequency domain, and create spectrum.
+  WebRtcNsx_PrepareSpectrum(inst, realImag);
+
+  // Inverse FFT output will be in rfft_out[].
+  outCIFFT = WebRtcSpl_RealInverseFFT(inst->real_fft, realImag, rfft_out);
+
+  WebRtcNsx_Denormalize(inst, rfft_out, outCIFFT);
+
+  //scale factor: only do it after END_STARTUP_LONG time
+  gainFactor = 8192; // 8192 = Q13(1.0)
+  if (inst->gainMap == 1 &&
+      inst->blockIndex > END_STARTUP_LONG &&
+      inst->energyIn > 0) {
+    // Q(-scaleEnergyOut)
+    energyOut = WebRtcSpl_Energy(inst->real, inst->anaLen, &scaleEnergyOut);
+    if (scaleEnergyOut == 0 && !(energyOut & 0x7f800000)) {
+      energyOut = WEBRTC_SPL_SHIFT_W32(energyOut, 8 + scaleEnergyOut
+                                       - inst->scaleEnergyIn);
+    } else {
+      // |energyIn| is currently in Q(|scaleEnergyIn|), but to later on end up
+      // with an |energyRatio| in Q8 we need to change the Q-domain to
+      // Q(-8-scaleEnergyOut).
+      inst->energyIn >>= 8 + scaleEnergyOut - inst->scaleEnergyIn;
+    }
+
+    assert(inst->energyIn > 0);
+    energyRatio = (energyOut + inst->energyIn / 2) / inst->energyIn;  // Q8
+    // Limit the ratio to [0, 1] in Q8, i.e., [0, 256]
+    energyRatio = WEBRTC_SPL_SAT(256, energyRatio, 0);
+
+    // all done in lookup tables now
+    assert(energyRatio < 257);
+    gainFactor1 = kFactor1Table[energyRatio]; // Q8
+    gainFactor2 = inst->factor2Table[energyRatio]; // Q8
+
+    //combine both scales with speech/noise prob: note prior (priorSpeechProb) is not frequency dependent
+
+    // factor = inst->priorSpeechProb*factor1 + (1.0-inst->priorSpeechProb)*factor2; // original code
+    tmp16no1 = (int16_t)(((16384 - inst->priorNonSpeechProb) * gainFactor1) >>
+        14);  // in Q13, where 16384 = Q14(1.0)
+    tmp16no2 = (int16_t)((inst->priorNonSpeechProb * gainFactor2) >> 14);
+    gainFactor = tmp16no1 + tmp16no2; // Q13
+  }  // out of flag_gain_map==1
+
+  // Synthesis, read out fully processed segment, and update synthesis buffer.
+  WebRtcNsx_SynthesisUpdate(inst, outFrame, gainFactor);
+}
+
+void WebRtcNsx_ProcessCore(NoiseSuppressionFixedC* inst,
+                           const short* const* speechFrame,
+                           int num_bands,
+                           short* const* outFrame) {
+  // main routine for noise suppression
+
+  uint32_t tmpU32no1, tmpU32no2, tmpU32no3;
+  uint32_t satMax, maxNoiseU32;
+  uint32_t tmpMagnU32, tmpNoiseU32;
+  uint32_t nearMagnEst;
+  uint32_t noiseUpdateU32;
+  uint32_t noiseU32[HALF_ANAL_BLOCKL];
+  uint32_t postLocSnr[HALF_ANAL_BLOCKL];
+  uint32_t priorLocSnr[HALF_ANAL_BLOCKL];
+  uint32_t prevNearSnr[HALF_ANAL_BLOCKL];
+  uint32_t curNearSnr;
+  uint32_t priorSnr;
+  uint32_t noise_estimate = 0;
+  uint32_t noise_estimate_avg = 0;
+  uint32_t numerator = 0;
+
+  int32_t tmp32no1, tmp32no2;
+  int32_t pink_noise_num_avg = 0;
+
+  uint16_t tmpU16no1;
+  uint16_t magnU16[HALF_ANAL_BLOCKL];
+  uint16_t prevNoiseU16[HALF_ANAL_BLOCKL];
+  uint16_t nonSpeechProbFinal[HALF_ANAL_BLOCKL];
+  uint16_t gammaNoise, prevGammaNoise;
+  uint16_t noiseSupFilterTmp[HALF_ANAL_BLOCKL];
+
+  int16_t qMagn, qNoise;
+  int16_t avgProbSpeechHB, gainModHB, avgFilterGainHB, gainTimeDomainHB;
+  int16_t pink_noise_exp_avg = 0;
+
+  size_t i, j;
+  int nShifts, postShifts;
+  int norm32no1, norm32no2;
+  int flag, sign;
+  int q_domain_to_use = 0;
+
+  // Code for ARMv7-Neon platform assumes the following:
+  assert(inst->anaLen > 0);
+  assert(inst->anaLen2 > 0);
+  assert(inst->anaLen % 16 == 0);
+  assert(inst->anaLen2 % 8 == 0);
+  assert(inst->blockLen10ms > 0);
+  assert(inst->blockLen10ms % 16 == 0);
+  assert(inst->magnLen == inst->anaLen2 + 1);
+
+#ifdef NS_FILEDEBUG
+  if (fwrite(spframe, sizeof(short),
+             inst->blockLen10ms, inst->infile) != inst->blockLen10ms) {
+    assert(false);
+  }
+#endif
+
+  // Check that initialization has been done
+  assert(inst->initFlag == 1);
+  assert((num_bands - 1) <= NUM_HIGH_BANDS_MAX);
+
+  const short* const* speechFrameHB = NULL;
+  short* const* outFrameHB = NULL;
+  size_t num_high_bands = 0;
+  if (num_bands > 1) {
+    speechFrameHB = &speechFrame[1];
+    outFrameHB = &outFrame[1];
+    num_high_bands = (size_t)(num_bands - 1);
+  }
+
+  // Store speechFrame and transform to frequency domain
+  WebRtcNsx_DataAnalysis(inst, (short*)speechFrame[0], magnU16);
+
+  if (inst->zeroInputSignal) {
+    WebRtcNsx_DataSynthesis(inst, outFrame[0]);
+
+    if (num_bands > 1) {
+      // update analysis buffer for H band
+      // append new data to buffer FX
+      for (i = 0; i < num_high_bands; ++i) {
+        int block_shift = inst->anaLen - inst->blockLen10ms;
+        memcpy(inst->dataBufHBFX[i], inst->dataBufHBFX[i] + inst->blockLen10ms,
+            block_shift * sizeof(*inst->dataBufHBFX[i]));
+        memcpy(inst->dataBufHBFX[i] + block_shift, speechFrameHB[i],
+            inst->blockLen10ms * sizeof(*inst->dataBufHBFX[i]));
+        for (j = 0; j < inst->blockLen10ms; j++) {
+          outFrameHB[i][j] = inst->dataBufHBFX[i][j]; // Q0
+        }
+      }
+    }  // end of H band gain computation
+    return;
+  }
+
+  // Update block index when we have something to process
+  inst->blockIndex++;
+  //
+
+  // Norm of magn
+  qMagn = inst->normData - inst->stages;
+
+  // Compute spectral flatness on input spectrum
+  WebRtcNsx_ComputeSpectralFlatness(inst, magnU16);
+
+  // quantile noise estimate
+  WebRtcNsx_NoiseEstimation(inst, magnU16, noiseU32, &qNoise);
+
+  //noise estimate from previous frame
+  for (i = 0; i < inst->magnLen; i++) {
+    prevNoiseU16[i] = (uint16_t)(inst->prevNoiseU32[i] >> 11);  // Q(prevQNoise)
+  }
+
+  if (inst->blockIndex < END_STARTUP_SHORT) {
+    // Noise Q-domain to be used later; see description at end of section.
+    q_domain_to_use = WEBRTC_SPL_MIN((int)qNoise, inst->minNorm - inst->stages);
+
+    // Calculate frequency independent parts in parametric noise estimate and calculate
+    // the estimate for the lower frequency band (same values for all frequency bins)
+    if (inst->pinkNoiseExp) {
+      pink_noise_exp_avg = (int16_t)WebRtcSpl_DivW32W16(inst->pinkNoiseExp,
+                                                              (int16_t)(inst->blockIndex + 1)); // Q14
+      pink_noise_num_avg = WebRtcSpl_DivW32W16(inst->pinkNoiseNumerator,
+                                               (int16_t)(inst->blockIndex + 1)); // Q11
+      WebRtcNsx_CalcParametricNoiseEstimate(inst,
+                                            pink_noise_exp_avg,
+                                            pink_noise_num_avg,
+                                            kStartBand,
+                                            &noise_estimate,
+                                            &noise_estimate_avg);
+    } else {
+      // Use white noise estimate if we have poor pink noise parameter estimates
+      noise_estimate = inst->whiteNoiseLevel; // Q(minNorm-stages)
+      noise_estimate_avg = noise_estimate / (inst->blockIndex + 1); // Q(minNorm-stages)
+    }
+    for (i = 0; i < inst->magnLen; i++) {
+      // Estimate the background noise using the pink noise parameters if permitted
+      if ((inst->pinkNoiseExp) && (i >= kStartBand)) {
+        // Reset noise_estimate
+        noise_estimate = 0;
+        noise_estimate_avg = 0;
+        // Calculate the parametric noise estimate for current frequency bin
+        WebRtcNsx_CalcParametricNoiseEstimate(inst,
+                                              pink_noise_exp_avg,
+                                              pink_noise_num_avg,
+                                              i,
+                                              &noise_estimate,
+                                              &noise_estimate_avg);
+      }
+      // Calculate parametric Wiener filter
+      noiseSupFilterTmp[i] = inst->denoiseBound;
+      if (inst->initMagnEst[i]) {
+        // numerator = (initMagnEst - noise_estimate * overdrive)
+        // Result in Q(8+minNorm-stages)
+        tmpU32no1 = WEBRTC_SPL_UMUL_32_16(noise_estimate, inst->overdrive);
+        numerator = inst->initMagnEst[i] << 8;
+        if (numerator > tmpU32no1) {
+          // Suppression filter coefficient larger than zero, so calculate.
+          numerator -= tmpU32no1;
+
+          // Determine number of left shifts in numerator for best accuracy after
+          // division
+          nShifts = WebRtcSpl_NormU32(numerator);
+          nShifts = WEBRTC_SPL_SAT(6, nShifts, 0);
+
+          // Shift numerator to Q(nShifts+8+minNorm-stages)
+          numerator <<= nShifts;
+
+          // Shift denominator to Q(nShifts-6+minNorm-stages)
+          tmpU32no1 = inst->initMagnEst[i] >> (6 - nShifts);
+          if (tmpU32no1 == 0) {
+            // This is only possible if numerator = 0, in which case
+            // we don't need any division.
+            tmpU32no1 = 1;
+          }
+          tmpU32no2 = numerator / tmpU32no1;  // Q14
+          noiseSupFilterTmp[i] = (uint16_t)WEBRTC_SPL_SAT(16384, tmpU32no2,
+              (uint32_t)(inst->denoiseBound)); // Q14
+        }
+      }
+      // Weight quantile noise 'noiseU32' with modeled noise 'noise_estimate_avg'
+      // 'noiseU32 is in Q(qNoise) and 'noise_estimate' in Q(minNorm-stages)
+      // To guarantee that we do not get wrap around when shifting to the same domain
+      // we use the lowest one. Furthermore, we need to save 6 bits for the weighting.
+      // 'noise_estimate_avg' can handle this operation by construction, but 'noiseU32'
+      // may not.
+
+      // Shift 'noiseU32' to 'q_domain_to_use'
+      tmpU32no1 = noiseU32[i] >> (qNoise - q_domain_to_use);
+      // Shift 'noise_estimate_avg' to 'q_domain_to_use'
+      tmpU32no2 = noise_estimate_avg >>
+          (inst->minNorm - inst->stages - q_domain_to_use);
+      // Make a simple check to see if we have enough room for weighting 'tmpU32no1'
+      // without wrap around
+      nShifts = 0;
+      if (tmpU32no1 & 0xfc000000) {
+        tmpU32no1 >>= 6;
+        tmpU32no2 >>= 6;
+        nShifts = 6;
+      }
+      tmpU32no1 *= inst->blockIndex;
+      tmpU32no2 *= (END_STARTUP_SHORT - inst->blockIndex);
+      // Add them together and divide by startup length
+      noiseU32[i] = WebRtcSpl_DivU32U16(tmpU32no1 + tmpU32no2, END_STARTUP_SHORT);
+      // Shift back if necessary
+      noiseU32[i] <<= nShifts;
+    }
+    // Update new Q-domain for 'noiseU32'
+    qNoise = q_domain_to_use;
+  }
+  // compute average signal during END_STARTUP_LONG time:
+  // used to normalize spectral difference measure
+  if (inst->blockIndex < END_STARTUP_LONG) {
+    // substituting division with shift ending up in Q(-2*stages)
+    inst->timeAvgMagnEnergyTmp +=
+        inst->magnEnergy >> (2 * inst->normData + inst->stages - 1);
+    inst->timeAvgMagnEnergy = WebRtcSpl_DivU32U16(inst->timeAvgMagnEnergyTmp,
+                                                  inst->blockIndex + 1);
+  }
+
+  //start processing at frames == converged+1
+  // STEP 1: compute prior and post SNR based on quantile noise estimates
+
+  // compute direct decision (DD) estimate of prior SNR: needed for new method
+  satMax = (uint32_t)1048575;// Largest possible value without getting overflow despite shifting 12 steps
+  postShifts = 6 + qMagn - qNoise;
+  nShifts = 5 - inst->prevQMagn + inst->prevQNoise;
+  for (i = 0; i < inst->magnLen; i++) {
+    // FLOAT:
+    // post SNR
+    // postLocSnr[i] = 0.0;
+    // if (magn[i] > noise[i])
+    // {
+    //   postLocSnr[i] = magn[i] / (noise[i] + 0.0001);
+    // }
+    // // previous post SNR
+    // // previous estimate: based on previous frame with gain filter (smooth is previous filter)
+    //
+    // prevNearSnr[i] = inst->prevMagnU16[i] / (inst->noisePrev[i] + 0.0001) * (inst->smooth[i]);
+    //
+    // // DD estimate is sum of two terms: current estimate and previous estimate
+    // // directed decision update of priorSnr (or we actually store [2*priorSnr+1])
+    //
+    // priorLocSnr[i] = DD_PR_SNR * prevNearSnr[i] + (1.0 - DD_PR_SNR) * (postLocSnr[i] - 1.0);
+
+    // calculate post SNR: output in Q11
+    postLocSnr[i] = 2048; // 1.0 in Q11
+    tmpU32no1 = (uint32_t)magnU16[i] << 6;  // Q(6+qMagn)
+    if (postShifts < 0) {
+      tmpU32no2 = noiseU32[i] >> -postShifts;  // Q(6+qMagn)
+    } else {
+      tmpU32no2 = noiseU32[i] << postShifts;  // Q(6+qMagn)
+    }
+    if (tmpU32no1 > tmpU32no2) {
+      // Current magnitude larger than noise
+      tmpU32no1 <<= 11;  // Q(17+qMagn)
+      if (tmpU32no2 > 0) {
+        tmpU32no1 /= tmpU32no2;  // Q11
+        postLocSnr[i] = WEBRTC_SPL_MIN(satMax, tmpU32no1); // Q11
+      } else {
+        postLocSnr[i] = satMax;
+      }
+    }
+
+    // calculate prevNearSnr[i] and save for later instead of recalculating it later
+    // |nearMagnEst| in Q(prevQMagn + 14)
+    nearMagnEst = inst->prevMagnU16[i] * inst->noiseSupFilter[i];
+    tmpU32no1 = nearMagnEst << 3;  // Q(prevQMagn+17)
+    tmpU32no2 = inst->prevNoiseU32[i] >> nShifts;  // Q(prevQMagn+6)
+
+    if (tmpU32no2 > 0) {
+      tmpU32no1 /= tmpU32no2;  // Q11
+      tmpU32no1 = WEBRTC_SPL_MIN(satMax, tmpU32no1); // Q11
+    } else {
+      tmpU32no1 = satMax; // Q11
+    }
+    prevNearSnr[i] = tmpU32no1; // Q11
+
+    //directed decision update of priorSnr
+    tmpU32no1 = WEBRTC_SPL_UMUL_32_16(prevNearSnr[i], DD_PR_SNR_Q11); // Q22
+    tmpU32no2 = WEBRTC_SPL_UMUL_32_16(postLocSnr[i] - 2048, ONE_MINUS_DD_PR_SNR_Q11); // Q22
+    priorSnr = tmpU32no1 + tmpU32no2 + 512; // Q22 (added 512 for rounding)
+    // priorLocSnr = 1 + 2*priorSnr
+    priorLocSnr[i] = 2048 + (priorSnr >> 10);  // Q11
+  }  // end of loop over frequencies
+  // done with step 1: DD computation of prior and post SNR
+
+  // STEP 2: compute speech/noise likelihood
+
+  //compute difference of input spectrum with learned/estimated noise spectrum
+  WebRtcNsx_ComputeSpectralDifference(inst, magnU16);
+  //compute histograms for determination of parameters (thresholds and weights for features)
+  //parameters are extracted once every window time (=inst->modelUpdate)
+  //counter update
+  inst->cntThresUpdate++;
+  flag = (int)(inst->cntThresUpdate == inst->modelUpdate);
+  //update histogram
+  WebRtcNsx_FeatureParameterExtraction(inst, flag);
+  //compute model parameters
+  if (flag) {
+    inst->cntThresUpdate = 0; // Reset counter
+    //update every window:
+    // get normalization for spectral difference for next window estimate
+
+    // Shift to Q(-2*stages)
+    inst->curAvgMagnEnergy >>= STAT_UPDATES;
+
+    tmpU32no1 = (inst->curAvgMagnEnergy + inst->timeAvgMagnEnergy + 1) >> 1; //Q(-2*stages)
+    // Update featureSpecDiff
+    if ((tmpU32no1 != inst->timeAvgMagnEnergy) && (inst->featureSpecDiff) &&
+        (inst->timeAvgMagnEnergy > 0)) {
+      norm32no1 = 0;
+      tmpU32no3 = tmpU32no1;
+      while (0xFFFF0000 & tmpU32no3) {
+        tmpU32no3 >>= 1;
+        norm32no1++;
+      }
+      tmpU32no2 = inst->featureSpecDiff;
+      while (0xFFFF0000 & tmpU32no2) {
+        tmpU32no2 >>= 1;
+        norm32no1++;
+      }
+      tmpU32no3 = WEBRTC_SPL_UMUL(tmpU32no3, tmpU32no2);
+      tmpU32no3 /= inst->timeAvgMagnEnergy;
+      if (WebRtcSpl_NormU32(tmpU32no3) < norm32no1) {
+        inst->featureSpecDiff = 0x007FFFFF;
+      } else {
+        inst->featureSpecDiff = WEBRTC_SPL_MIN(0x007FFFFF,
+                                               tmpU32no3 << norm32no1);
+      }
+    }
+
+    inst->timeAvgMagnEnergy = tmpU32no1; // Q(-2*stages)
+    inst->curAvgMagnEnergy = 0;
+  }
+
+  //compute speech/noise probability
+  WebRtcNsx_SpeechNoiseProb(inst, nonSpeechProbFinal, priorLocSnr, postLocSnr);
+
+  //time-avg parameter for noise update
+  gammaNoise = NOISE_UPDATE_Q8; // Q8
+
+  maxNoiseU32 = 0;
+  postShifts = inst->prevQNoise - qMagn;
+  nShifts = inst->prevQMagn - qMagn;
+  for (i = 0; i < inst->magnLen; i++) {
+    // temporary noise update: use it for speech frames if update value is less than previous
+    // the formula has been rewritten into:
+    // noiseUpdate = noisePrev[i] + (1 - gammaNoise) * nonSpeechProb * (magn[i] - noisePrev[i])
+
+    if (postShifts < 0) {
+      tmpU32no2 = magnU16[i] >> -postShifts;  // Q(prevQNoise)
+    } else {
+      tmpU32no2 = (uint32_t)magnU16[i] << postShifts;  // Q(prevQNoise)
+    }
+    if (prevNoiseU16[i] > tmpU32no2) {
+      sign = -1;
+      tmpU32no1 = prevNoiseU16[i] - tmpU32no2;
+    } else {
+      sign = 1;
+      tmpU32no1 = tmpU32no2 - prevNoiseU16[i];
+    }
+    noiseUpdateU32 = inst->prevNoiseU32[i]; // Q(prevQNoise+11)
+    tmpU32no3 = 0;
+    if ((tmpU32no1) && (nonSpeechProbFinal[i])) {
+      // This value will be used later, if gammaNoise changes
+      tmpU32no3 = WEBRTC_SPL_UMUL_32_16(tmpU32no1, nonSpeechProbFinal[i]); // Q(prevQNoise+8)
+      if (0x7c000000 & tmpU32no3) {
+        // Shifting required before multiplication
+        tmpU32no2 = (tmpU32no3 >> 5) * gammaNoise;  // Q(prevQNoise+11)
+      } else {
+        // We can do shifting after multiplication
+        tmpU32no2 = (tmpU32no3 * gammaNoise) >> 5;  // Q(prevQNoise+11)
+      }
+      if (sign > 0) {
+        noiseUpdateU32 += tmpU32no2; // Q(prevQNoise+11)
+      } else {
+        // This operation is safe. We can never get wrap around, since worst
+        // case scenario means magnU16 = 0
+        noiseUpdateU32 -= tmpU32no2; // Q(prevQNoise+11)
+      }
+    }
+
+    //increase gamma (i.e., less noise update) for frame likely to be speech
+    prevGammaNoise = gammaNoise;
+    gammaNoise = NOISE_UPDATE_Q8;
+    //time-constant based on speech/noise state
+    //increase gamma (i.e., less noise update) for frames likely to be speech
+    if (nonSpeechProbFinal[i] < ONE_MINUS_PROB_RANGE_Q8) {
+      gammaNoise = GAMMA_NOISE_TRANS_AND_SPEECH_Q8;
+    }
+
+    if (prevGammaNoise != gammaNoise) {
+      // new noise update
+      // this line is the same as above, only that the result is stored in a different variable and the gammaNoise
+      // has changed
+      //
+      // noiseUpdate = noisePrev[i] + (1 - gammaNoise) * nonSpeechProb * (magn[i] - noisePrev[i])
+
+      if (0x7c000000 & tmpU32no3) {
+        // Shifting required before multiplication
+        tmpU32no2 = (tmpU32no3 >> 5) * gammaNoise;  // Q(prevQNoise+11)
+      } else {
+        // We can do shifting after multiplication
+        tmpU32no2 = (tmpU32no3 * gammaNoise) >> 5;  // Q(prevQNoise+11)
+      }
+      if (sign > 0) {
+        tmpU32no1 = inst->prevNoiseU32[i] + tmpU32no2; // Q(prevQNoise+11)
+      } else {
+        tmpU32no1 = inst->prevNoiseU32[i] - tmpU32no2; // Q(prevQNoise+11)
+      }
+      if (noiseUpdateU32 > tmpU32no1) {
+        noiseUpdateU32 = tmpU32no1; // Q(prevQNoise+11)
+      }
+    }
+    noiseU32[i] = noiseUpdateU32; // Q(prevQNoise+11)
+    if (noiseUpdateU32 > maxNoiseU32) {
+      maxNoiseU32 = noiseUpdateU32;
+    }
+
+    // conservative noise update
+    // // original FLOAT code
+    // if (prob_speech < PROB_RANGE) {
+    // inst->avgMagnPause[i] = inst->avgMagnPause[i] + (1.0 - gamma_pause)*(magn[i] - inst->avgMagnPause[i]);
+    // }
+
+    tmp32no2 = WEBRTC_SPL_SHIFT_W32(inst->avgMagnPause[i], -nShifts);
+    if (nonSpeechProbFinal[i] > ONE_MINUS_PROB_RANGE_Q8) {
+      if (nShifts < 0) {
+        tmp32no1 = (int32_t)magnU16[i] - tmp32no2; // Q(qMagn)
+        tmp32no1 *= ONE_MINUS_GAMMA_PAUSE_Q8;  // Q(8+prevQMagn+nShifts)
+        tmp32no1 = (tmp32no1 + 128) >> 8;  // Q(qMagn).
+      } else {
+        // In Q(qMagn+nShifts)
+        tmp32no1 = ((int32_t)magnU16[i] << nShifts) - inst->avgMagnPause[i];
+        tmp32no1 *= ONE_MINUS_GAMMA_PAUSE_Q8;  // Q(8+prevQMagn+nShifts)
+        tmp32no1 = (tmp32no1 + (128 << nShifts)) >> (8 + nShifts);  // Q(qMagn).
+      }
+      tmp32no2 += tmp32no1; // Q(qMagn)
+    }
+    inst->avgMagnPause[i] = tmp32no2;
+  }  // end of frequency loop
+
+  norm32no1 = WebRtcSpl_NormU32(maxNoiseU32);
+  qNoise = inst->prevQNoise + norm32no1 - 5;
+  // done with step 2: noise update
+
+  // STEP 3: compute dd update of prior snr and post snr based on new noise estimate
+  nShifts = inst->prevQNoise + 11 - qMagn;
+  for (i = 0; i < inst->magnLen; i++) {
+    // FLOAT code
+    // // post and prior SNR
+    // curNearSnr = 0.0;
+    // if (magn[i] > noise[i])
+    // {
+    // curNearSnr = magn[i] / (noise[i] + 0.0001) - 1.0;
+    // }
+    // // DD estimate is sum of two terms: current estimate and previous estimate
+    // // directed decision update of snrPrior
+    // snrPrior = DD_PR_SNR * prevNearSnr[i] + (1.0 - DD_PR_SNR) * curNearSnr;
+    // // gain filter
+    // tmpFloat1 = inst->overdrive + snrPrior;
+    // tmpFloat2 = snrPrior / tmpFloat1;
+    // theFilter[i] = tmpFloat2;
+
+    // calculate curNearSnr again, this is necessary because a new noise estimate has been made since then. for the original
+    curNearSnr = 0; // Q11
+    if (nShifts < 0) {
+      // This case is equivalent with magn < noise which implies curNearSnr = 0;
+      tmpMagnU32 = (uint32_t)magnU16[i]; // Q(qMagn)
+      tmpNoiseU32 = noiseU32[i] << -nShifts;  // Q(qMagn)
+    } else if (nShifts > 17) {
+      tmpMagnU32 = (uint32_t)magnU16[i] << 17;  // Q(qMagn+17)
+      tmpNoiseU32 = noiseU32[i] >> (nShifts - 17);  // Q(qMagn+17)
+    } else {
+      tmpMagnU32 = (uint32_t)magnU16[i] << nShifts;  // Q(qNoise_prev+11)
+      tmpNoiseU32 = noiseU32[i]; // Q(qNoise_prev+11)
+    }
+    if (tmpMagnU32 > tmpNoiseU32) {
+      tmpU32no1 = tmpMagnU32 - tmpNoiseU32; // Q(qCur)
+      norm32no2 = WEBRTC_SPL_MIN(11, WebRtcSpl_NormU32(tmpU32no1));
+      tmpU32no1 <<= norm32no2;  // Q(qCur+norm32no2)
+      tmpU32no2 = tmpNoiseU32 >> (11 - norm32no2);  // Q(qCur+norm32no2-11)
+      if (tmpU32no2 > 0) {
+        tmpU32no1 /= tmpU32no2;  // Q11
+      }
+      curNearSnr = WEBRTC_SPL_MIN(satMax, tmpU32no1); // Q11
+    }
+
+    //directed decision update of priorSnr
+    // FLOAT
+    // priorSnr = DD_PR_SNR * prevNearSnr + (1.0-DD_PR_SNR) * curNearSnr;
+
+    tmpU32no1 = WEBRTC_SPL_UMUL_32_16(prevNearSnr[i], DD_PR_SNR_Q11); // Q22
+    tmpU32no2 = WEBRTC_SPL_UMUL_32_16(curNearSnr, ONE_MINUS_DD_PR_SNR_Q11); // Q22
+    priorSnr = tmpU32no1 + tmpU32no2; // Q22
+
+    //gain filter
+    tmpU32no1 = inst->overdrive + ((priorSnr + 8192) >> 14);  // Q8
+    assert(inst->overdrive > 0);
+    tmpU16no1 = (priorSnr + tmpU32no1 / 2) / tmpU32no1;  // Q14
+    inst->noiseSupFilter[i] = WEBRTC_SPL_SAT(16384, tmpU16no1, inst->denoiseBound); // 16384 = Q14(1.0) // Q14
+
+    // Weight in the parametric Wiener filter during startup
+    if (inst->blockIndex < END_STARTUP_SHORT) {
+      // Weight the two suppression filters
+      tmpU32no1 = inst->noiseSupFilter[i] * inst->blockIndex;
+      tmpU32no2 = noiseSupFilterTmp[i] *
+          (END_STARTUP_SHORT - inst->blockIndex);
+      tmpU32no1 += tmpU32no2;
+      inst->noiseSupFilter[i] = (uint16_t)WebRtcSpl_DivU32U16(tmpU32no1,
+                                                                    END_STARTUP_SHORT);
+    }
+  }  // end of loop over frequencies
+  //done with step3
+
+  // save noise and magnitude spectrum for next frame
+  inst->prevQNoise = qNoise;
+  inst->prevQMagn = qMagn;
+  if (norm32no1 > 5) {
+    for (i = 0; i < inst->magnLen; i++) {
+      inst->prevNoiseU32[i] = noiseU32[i] << (norm32no1 - 5);  // Q(qNoise+11)
+      inst->prevMagnU16[i] = magnU16[i]; // Q(qMagn)
+    }
+  } else {
+    for (i = 0; i < inst->magnLen; i++) {
+      inst->prevNoiseU32[i] = noiseU32[i] >> (5 - norm32no1);  // Q(qNoise+11)
+      inst->prevMagnU16[i] = magnU16[i]; // Q(qMagn)
+    }
+  }
+
+  WebRtcNsx_DataSynthesis(inst, outFrame[0]);
+#ifdef NS_FILEDEBUG
+  if (fwrite(outframe, sizeof(short),
+             inst->blockLen10ms, inst->outfile) != inst->blockLen10ms) {
+    assert(false);
+  }
+#endif
+
+  //for H band:
+  // only update data buffer, then apply time-domain gain is applied derived from L band
+  if (num_bands > 1) {
+    // update analysis buffer for H band
+    // append new data to buffer FX
+    for (i = 0; i < num_high_bands; ++i) {
+      memcpy(inst->dataBufHBFX[i], inst->dataBufHBFX[i] + inst->blockLen10ms,
+          (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->dataBufHBFX[i]));
+      memcpy(inst->dataBufHBFX[i] + inst->anaLen - inst->blockLen10ms,
+          speechFrameHB[i], inst->blockLen10ms * sizeof(*inst->dataBufHBFX[i]));
+    }
+    // range for averaging low band quantities for H band gain
+
+    gainTimeDomainHB = 16384; // 16384 = Q14(1.0)
+    //average speech prob from low band
+    //average filter gain from low band
+    //avg over second half (i.e., 4->8kHz) of freq. spectrum
+    tmpU32no1 = 0; // Q12
+    tmpU16no1 = 0; // Q8
+    for (i = inst->anaLen2 - (inst->anaLen2 >> 2); i < inst->anaLen2; i++) {
+      tmpU16no1 += nonSpeechProbFinal[i]; // Q8
+      tmpU32no1 += (uint32_t)(inst->noiseSupFilter[i]); // Q14
+    }
+    assert(inst->stages >= 7);
+    avgProbSpeechHB = (4096 - (tmpU16no1 >> (inst->stages - 7)));  // Q12
+    avgFilterGainHB = (int16_t)(tmpU32no1 >> (inst->stages - 3));  // Q14
+
+    // // original FLOAT code
+    // // gain based on speech probability:
+    // avg_prob_speech_tt=(float)2.0*avg_prob_speech-(float)1.0;
+    // gain_mod=(float)0.5*((float)1.0+(float)tanh(avg_prob_speech_tt)); // between 0 and 1
+
+    // gain based on speech probability:
+    // original expression: "0.5 * (1 + tanh(2x-1))"
+    // avgProbSpeechHB has been anyway saturated to a value between 0 and 1 so the other cases don't have to be dealt with
+    // avgProbSpeechHB and gainModHB are in Q12, 3607 = Q12(0.880615234375) which is a zero point of
+    // |0.5 * (1 + tanh(2x-1)) - x| - |0.5 * (1 + tanh(2x-1)) - 0.880615234375| meaning that from that point the error of approximating
+    // the expression with f(x) = x would be greater than the error of approximating the expression with f(x) = 0.880615234375
+    // error: "|0.5 * (1 + tanh(2x-1)) - x| from x=0 to 0.880615234375" -> http://www.wolframalpha.com/input/?i=|0.5+*+(1+%2B+tanh(2x-1))+-+x|+from+x%3D0+to+0.880615234375
+    // and:  "|0.5 * (1 + tanh(2x-1)) - 0.880615234375| from x=0.880615234375 to 1" -> http://www.wolframalpha.com/input/?i=+|0.5+*+(1+%2B+tanh(2x-1))+-+0.880615234375|+from+x%3D0.880615234375+to+1
+    gainModHB = WEBRTC_SPL_MIN(avgProbSpeechHB, 3607);
+
+    // // original FLOAT code
+    // //combine gain with low band gain
+    // if (avg_prob_speech < (float)0.5) {
+    // gain_time_domain_HB=(float)0.5*gain_mod+(float)0.5*avg_filter_gain;
+    // }
+    // else {
+    // gain_time_domain_HB=(float)0.25*gain_mod+(float)0.75*avg_filter_gain;
+    // }
+
+
+    //combine gain with low band gain
+    if (avgProbSpeechHB < 2048) {
+      // 2048 = Q12(0.5)
+      // the next two lines in float are  "gain_time_domain = 0.5 * gain_mod + 0.5 * avg_filter_gain"; Q2(0.5) = 2 equals one left shift
+      gainTimeDomainHB = (gainModHB << 1) + (avgFilterGainHB >> 1); // Q14
+    } else {
+      // "gain_time_domain = 0.25 * gain_mod + 0.75 * agv_filter_gain;"
+      gainTimeDomainHB = (int16_t)((3 * avgFilterGainHB) >> 2);  // 3 = Q2(0.75)
+      gainTimeDomainHB += gainModHB; // Q14
+    }
+    //make sure gain is within flooring range
+    gainTimeDomainHB
+      = WEBRTC_SPL_SAT(16384, gainTimeDomainHB, (int16_t)(inst->denoiseBound)); // 16384 = Q14(1.0)
+
+
+    //apply gain
+    for (i = 0; i < num_high_bands; ++i) {
+      for (j = 0; j < inst->blockLen10ms; j++) {
+        outFrameHB[i][j] = (int16_t)((gainTimeDomainHB *
+            inst->dataBufHBFX[i][j]) >> 14);  // Q0
+      }
+    }
+  }  // end of H band gain computation
+}
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core.h b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core.h
new file mode 100644
index 00000000..f463dbbe
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core.h
@@ -0,0 +1,263 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_CORE_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_CORE_H_
+
+#ifdef NS_FILEDEBUG
+#include <stdio.h>
+#endif
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/ns/nsx_defines.h"
+#include "webrtc/typedefs.h"
+
+typedef struct NoiseSuppressionFixedC_ {
+  uint32_t                fs;
+
+  const int16_t*          window;
+  int16_t                 analysisBuffer[ANAL_BLOCKL_MAX];
+  int16_t                 synthesisBuffer[ANAL_BLOCKL_MAX];
+  uint16_t                noiseSupFilter[HALF_ANAL_BLOCKL];
+  uint16_t                overdrive; /* Q8 */
+  uint16_t                denoiseBound; /* Q14 */
+  const int16_t*          factor2Table;
+  int16_t                 noiseEstLogQuantile[SIMULT* HALF_ANAL_BLOCKL];
+  int16_t                 noiseEstDensity[SIMULT* HALF_ANAL_BLOCKL];
+  int16_t                 noiseEstCounter[SIMULT];
+  int16_t                 noiseEstQuantile[HALF_ANAL_BLOCKL];
+
+  size_t                  anaLen;
+  size_t                  anaLen2;
+  size_t                  magnLen;
+  int                     aggrMode;
+  int                     stages;
+  int                     initFlag;
+  int                     gainMap;
+
+  int32_t                 maxLrt;
+  int32_t                 minLrt;
+  // Log LRT factor with time-smoothing in Q8.
+  int32_t                 logLrtTimeAvgW32[HALF_ANAL_BLOCKL];
+  int32_t                 featureLogLrt;
+  int32_t                 thresholdLogLrt;
+  int16_t                 weightLogLrt;
+
+  uint32_t                featureSpecDiff;
+  uint32_t                thresholdSpecDiff;
+  int16_t                 weightSpecDiff;
+
+  uint32_t                featureSpecFlat;
+  uint32_t                thresholdSpecFlat;
+  int16_t                 weightSpecFlat;
+
+  // Conservative estimate of noise spectrum.
+  int32_t                 avgMagnPause[HALF_ANAL_BLOCKL];
+  uint32_t                magnEnergy;
+  uint32_t                sumMagn;
+  uint32_t                curAvgMagnEnergy;
+  uint32_t                timeAvgMagnEnergy;
+  uint32_t                timeAvgMagnEnergyTmp;
+
+  uint32_t                whiteNoiseLevel;  // Initial noise estimate.
+  // Initial magnitude spectrum estimate.
+  uint32_t                initMagnEst[HALF_ANAL_BLOCKL];
+  // Pink noise parameters:
+  int32_t                 pinkNoiseNumerator;  // Numerator.
+  int32_t                 pinkNoiseExp;  // Power of freq.
+  int                     minNorm;  // Smallest normalization factor.
+  int                     zeroInputSignal;  // Zero input signal flag.
+
+  // Noise spectrum from previous frame.
+  uint32_t                prevNoiseU32[HALF_ANAL_BLOCKL];
+  // Magnitude spectrum from previous frame.
+  uint16_t                prevMagnU16[HALF_ANAL_BLOCKL];
+  // Prior speech/noise probability in Q14.
+  int16_t                 priorNonSpeechProb;
+
+  int                     blockIndex;  // Frame index counter.
+  // Parameter for updating or estimating thresholds/weights for prior model.
+  int                     modelUpdate;
+  int                     cntThresUpdate;
+
+  // Histograms for parameter estimation.
+  int16_t                 histLrt[HIST_PAR_EST];
+  int16_t                 histSpecFlat[HIST_PAR_EST];
+  int16_t                 histSpecDiff[HIST_PAR_EST];
+
+  // Quantities for high band estimate.
+  int16_t                 dataBufHBFX[NUM_HIGH_BANDS_MAX][ANAL_BLOCKL_MAX];
+
+  int                     qNoise;
+  int                     prevQNoise;
+  int                     prevQMagn;
+  size_t                  blockLen10ms;
+
+  int16_t                 real[ANAL_BLOCKL_MAX];
+  int16_t                 imag[ANAL_BLOCKL_MAX];
+  int32_t                 energyIn;
+  int                     scaleEnergyIn;
+  int                     normData;
+
+  struct RealFFT* real_fft;
+} NoiseSuppressionFixedC;
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/****************************************************************************
+ * WebRtcNsx_InitCore(...)
+ *
+ * This function initializes a noise suppression instance
+ *
+ * Input:
+ *      - inst          : Instance that should be initialized
+ *      - fs            : Sampling frequency
+ *
+ * Output:
+ *      - inst          : Initialized instance
+ *
+ * Return value         :  0 - Ok
+ *                        -1 - Error
+ */
+int32_t WebRtcNsx_InitCore(NoiseSuppressionFixedC* inst, uint32_t fs);
+
+/****************************************************************************
+ * WebRtcNsx_set_policy_core(...)
+ *
+ * This changes the aggressiveness of the noise suppression method.
+ *
+ * Input:
+ *      - inst       : Instance that should be initialized
+ *      - mode       : 0: Mild (6 dB), 1: Medium (10 dB), 2: Aggressive (15 dB)
+ *
+ * Output:
+ *      - inst       : Initialized instance
+ *
+ * Return value      :  0 - Ok
+ *                     -1 - Error
+ */
+int WebRtcNsx_set_policy_core(NoiseSuppressionFixedC* inst, int mode);
+
+/****************************************************************************
+ * WebRtcNsx_ProcessCore
+ *
+ * Do noise suppression.
+ *
+ * Input:
+ *      - inst          : Instance that should be initialized
+ *      - inFrame       : Input speech frame for each band
+ *      - num_bands     : Number of bands
+ *
+ * Output:
+ *      - inst          : Updated instance
+ *      - outFrame      : Output speech frame for each band
+ */
+void WebRtcNsx_ProcessCore(NoiseSuppressionFixedC* inst,
+                           const short* const* inFrame,
+                           int num_bands,
+                           short* const* outFrame);
+
+/****************************************************************************
+ * Some function pointers, for internal functions shared by ARM NEON and
+ * generic C code.
+ */
+// Noise Estimation.
+typedef void (*NoiseEstimation)(NoiseSuppressionFixedC* inst,
+                                uint16_t* magn,
+                                uint32_t* noise,
+                                int16_t* q_noise);
+extern NoiseEstimation WebRtcNsx_NoiseEstimation;
+
+// Filter the data in the frequency domain, and create spectrum.
+typedef void (*PrepareSpectrum)(NoiseSuppressionFixedC* inst,
+                                int16_t* freq_buff);
+extern PrepareSpectrum WebRtcNsx_PrepareSpectrum;
+
+// For the noise supression process, synthesis, read out fully processed
+// segment, and update synthesis buffer.
+typedef void (*SynthesisUpdate)(NoiseSuppressionFixedC* inst,
+                                int16_t* out_frame,
+                                int16_t gain_factor);
+extern SynthesisUpdate WebRtcNsx_SynthesisUpdate;
+
+// Update analysis buffer for lower band, and window data before FFT.
+typedef void (*AnalysisUpdate)(NoiseSuppressionFixedC* inst,
+                               int16_t* out,
+                               int16_t* new_speech);
+extern AnalysisUpdate WebRtcNsx_AnalysisUpdate;
+
+// Denormalize the real-valued signal |in|, the output from inverse FFT.
+typedef void (*Denormalize)(NoiseSuppressionFixedC* inst,
+                            int16_t* in,
+                            int factor);
+extern Denormalize WebRtcNsx_Denormalize;
+
+// Normalize the real-valued signal |in|, the input to forward FFT.
+typedef void (*NormalizeRealBuffer)(NoiseSuppressionFixedC* inst,
+                                    const int16_t* in,
+                                    int16_t* out);
+extern NormalizeRealBuffer WebRtcNsx_NormalizeRealBuffer;
+
+// Compute speech/noise probability.
+// Intended to be private.
+void WebRtcNsx_SpeechNoiseProb(NoiseSuppressionFixedC* inst,
+                               uint16_t* nonSpeechProbFinal,
+                               uint32_t* priorLocSnr,
+                               uint32_t* postLocSnr);
+
+#if (defined WEBRTC_DETECT_NEON || defined WEBRTC_HAS_NEON)
+// For the above function pointers, functions for generic platforms are declared
+// and defined as static in file nsx_core.c, while those for ARM Neon platforms
+// are declared below and defined in file nsx_core_neon.c.
+void WebRtcNsx_NoiseEstimationNeon(NoiseSuppressionFixedC* inst,
+                                   uint16_t* magn,
+                                   uint32_t* noise,
+                                   int16_t* q_noise);
+void WebRtcNsx_SynthesisUpdateNeon(NoiseSuppressionFixedC* inst,
+                                   int16_t* out_frame,
+                                   int16_t gain_factor);
+void WebRtcNsx_AnalysisUpdateNeon(NoiseSuppressionFixedC* inst,
+                                  int16_t* out,
+                                  int16_t* new_speech);
+void WebRtcNsx_PrepareSpectrumNeon(NoiseSuppressionFixedC* inst,
+                                   int16_t* freq_buff);
+#endif
+
+#if defined(MIPS32_LE)
+// For the above function pointers, functions for generic platforms are declared
+// and defined as static in file nsx_core.c, while those for MIPS platforms
+// are declared below and defined in file nsx_core_mips.c.
+void WebRtcNsx_SynthesisUpdate_mips(NoiseSuppressionFixedC* inst,
+                                    int16_t* out_frame,
+                                    int16_t gain_factor);
+void WebRtcNsx_AnalysisUpdate_mips(NoiseSuppressionFixedC* inst,
+                                   int16_t* out,
+                                   int16_t* new_speech);
+void WebRtcNsx_PrepareSpectrum_mips(NoiseSuppressionFixedC* inst,
+                                    int16_t* freq_buff);
+void WebRtcNsx_NormalizeRealBuffer_mips(NoiseSuppressionFixedC* inst,
+                                        const int16_t* in,
+                                        int16_t* out);
+#if defined(MIPS_DSP_R1_LE)
+void WebRtcNsx_Denormalize_mips(NoiseSuppressionFixedC* inst,
+                                int16_t* in,
+                                int factor);
+#endif
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_CORE_H_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_c.c b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_c.c
new file mode 100644
index 00000000..14322d38
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_c.c
@@ -0,0 +1,261 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h"
+#include "webrtc/modules/audio_processing/ns/nsx_core.h"
+#include "webrtc/modules/audio_processing/ns/nsx_defines.h"
+
+static const int16_t kIndicatorTable[17] = {
+  0, 2017, 3809, 5227, 6258, 6963, 7424, 7718,
+  7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187
+};
+
+// Compute speech/noise probability
+// speech/noise probability is returned in: probSpeechFinal
+//snrLocPrior is the prior SNR for each frequency (in Q11)
+//snrLocPost is the post SNR for each frequency (in Q11)
+void WebRtcNsx_SpeechNoiseProb(NoiseSuppressionFixedC* inst,
+                               uint16_t* nonSpeechProbFinal,
+                               uint32_t* priorLocSnr,
+                               uint32_t* postLocSnr) {
+  uint32_t zeros, num, den, tmpU32no1, tmpU32no2, tmpU32no3;
+  int32_t invLrtFX, indPriorFX, tmp32, tmp32no1, tmp32no2, besselTmpFX32;
+  int32_t frac32, logTmp;
+  int32_t logLrtTimeAvgKsumFX;
+  int16_t indPriorFX16;
+  int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac, intPart;
+  size_t i;
+  int normTmp, normTmp2, nShifts;
+
+  // compute feature based on average LR factor
+  // this is the average over all frequencies of the smooth log LRT
+  logLrtTimeAvgKsumFX = 0;
+  for (i = 0; i < inst->magnLen; i++) {
+    besselTmpFX32 = (int32_t)postLocSnr[i]; // Q11
+    normTmp = WebRtcSpl_NormU32(postLocSnr[i]);
+    num = postLocSnr[i] << normTmp;  // Q(11+normTmp)
+    if (normTmp > 10) {
+      den = priorLocSnr[i] << (normTmp - 11);  // Q(normTmp)
+    } else {
+      den = priorLocSnr[i] >> (11 - normTmp);  // Q(normTmp)
+    }
+    if (den > 0) {
+      besselTmpFX32 -= num / den;  // Q11
+    } else {
+      besselTmpFX32 = 0;
+    }
+
+    // inst->logLrtTimeAvg[i] += LRT_TAVG * (besselTmp - log(snrLocPrior)
+    //                                       - inst->logLrtTimeAvg[i]);
+    // Here, LRT_TAVG = 0.5
+    zeros = WebRtcSpl_NormU32(priorLocSnr[i]);
+    frac32 = (int32_t)(((priorLocSnr[i] << zeros) & 0x7FFFFFFF) >> 19);
+    tmp32 = (frac32 * frac32 * -43) >> 19;
+    tmp32 += ((int16_t)frac32 * 5412) >> 12;
+    frac32 = tmp32 + 37;
+    // tmp32 = log2(priorLocSnr[i])
+    tmp32 = (int32_t)(((31 - zeros) << 12) + frac32) - (11 << 12); // Q12
+    logTmp = (tmp32 * 178) >> 8;  // log2(priorLocSnr[i])*log(2)
+    // tmp32no1 = LRT_TAVG * (log(snrLocPrior) + inst->logLrtTimeAvg[i]) in Q12.
+    tmp32no1 = (logTmp + inst->logLrtTimeAvgW32[i]) / 2;
+    inst->logLrtTimeAvgW32[i] += (besselTmpFX32 - tmp32no1); // Q12
+
+    logLrtTimeAvgKsumFX += inst->logLrtTimeAvgW32[i]; // Q12
+  }
+  inst->featureLogLrt = (logLrtTimeAvgKsumFX * BIN_SIZE_LRT) >>
+      (inst->stages + 11);
+
+  // done with computation of LR factor
+
+  //
+  //compute the indicator functions
+  //
+
+  // average LRT feature
+  // FLOAT code
+  // indicator0 = 0.5 * (tanh(widthPrior *
+  //                      (logLrtTimeAvgKsum - threshPrior0)) + 1.0);
+  tmpIndFX = 16384; // Q14(1.0)
+  tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12
+  nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5;
+  //use larger width in tanh map for pause regions
+  if (tmp32no1 < 0) {
+    tmpIndFX = 0;
+    tmp32no1 = -tmp32no1;
+    //widthPrior = widthPrior * 2.0;
+    nShifts++;
+  }
+  tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14
+  // compute indicator function: sigmoid map
+  tableIndex = (int16_t)(tmp32no1 >> 14);
+  if ((tableIndex < 16) && (tableIndex >= 0)) {
+    tmp16no2 = kIndicatorTable[tableIndex];
+    tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
+    frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14
+    tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14);
+    if (tmpIndFX == 0) {
+      tmpIndFX = 8192 - tmp16no2; // Q14
+    } else {
+      tmpIndFX = 8192 + tmp16no2; // Q14
+    }
+  }
+  indPriorFX = inst->weightLogLrt * tmpIndFX;  // 6*Q14
+
+  //spectral flatness feature
+  if (inst->weightSpecFlat) {
+    tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10
+    tmpIndFX = 16384; // Q14(1.0)
+    //use larger width in tanh map for pause regions
+    tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10
+    nShifts = 4;
+    if (inst->thresholdSpecFlat < tmpU32no1) {
+      tmpIndFX = 0;
+      tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat;
+      //widthPrior = widthPrior * 2.0;
+      nShifts++;
+    }
+    tmpU32no1 = WebRtcSpl_DivU32U16(tmpU32no2 << nShifts, 25);  // Q14
+    // compute indicator function: sigmoid map
+    // FLOAT code
+    // indicator1 = 0.5 * (tanh(sgnMap * widthPrior *
+    //                          (threshPrior1 - tmpFloat1)) + 1.0);
+    tableIndex = (int16_t)(tmpU32no1 >> 14);
+    if (tableIndex < 16) {
+      tmp16no2 = kIndicatorTable[tableIndex];
+      tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
+      frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
+      tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14);
+      if (tmpIndFX) {
+        tmpIndFX = 8192 + tmp16no2; // Q14
+      } else {
+        tmpIndFX = 8192 - tmp16no2; // Q14
+      }
+    }
+    indPriorFX += inst->weightSpecFlat * tmpIndFX;  // 6*Q14
+  }
+
+  //for template spectral-difference
+  if (inst->weightSpecDiff) {
+    tmpU32no1 = 0;
+    if (inst->featureSpecDiff) {
+      normTmp = WEBRTC_SPL_MIN(20 - inst->stages,
+                               WebRtcSpl_NormU32(inst->featureSpecDiff));
+      assert(normTmp >= 0);
+      tmpU32no1 = inst->featureSpecDiff << normTmp;  // Q(normTmp-2*stages)
+      tmpU32no2 = inst->timeAvgMagnEnergy >> (20 - inst->stages - normTmp);
+      if (tmpU32no2 > 0) {
+        // Q(20 - inst->stages)
+        tmpU32no1 /= tmpU32no2;
+      } else {
+        tmpU32no1 = (uint32_t)(0x7fffffff);
+      }
+    }
+    tmpU32no3 = (inst->thresholdSpecDiff << 17) / 25;
+    tmpU32no2 = tmpU32no1 - tmpU32no3;
+    nShifts = 1;
+    tmpIndFX = 16384; // Q14(1.0)
+    //use larger width in tanh map for pause regions
+    if (tmpU32no2 & 0x80000000) {
+      tmpIndFX = 0;
+      tmpU32no2 = tmpU32no3 - tmpU32no1;
+      //widthPrior = widthPrior * 2.0;
+      nShifts--;
+    }
+    tmpU32no1 = tmpU32no2 >> nShifts;
+    // compute indicator function: sigmoid map
+    /* FLOAT code
+     indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0);
+     */
+    tableIndex = (int16_t)(tmpU32no1 >> 14);
+    if (tableIndex < 16) {
+      tmp16no2 = kIndicatorTable[tableIndex];
+      tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
+      frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
+      tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
+                    tmp16no1, frac, 14);
+      if (tmpIndFX) {
+        tmpIndFX = 8192 + tmp16no2;
+      } else {
+        tmpIndFX = 8192 - tmp16no2;
+      }
+    }
+    indPriorFX += inst->weightSpecDiff * tmpIndFX;  // 6*Q14
+  }
+
+  //combine the indicator function with the feature weights
+  // FLOAT code
+  // indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 *
+  //                 indicator1 + weightIndPrior2 * indicator2);
+  indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14
+  // done with computing indicator function
+
+  //compute the prior probability
+  // FLOAT code
+  // inst->priorNonSpeechProb += PRIOR_UPDATE *
+  //                             (indPriorNonSpeech - inst->priorNonSpeechProb);
+  tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14
+  inst->priorNonSpeechProb += (int16_t)((PRIOR_UPDATE_Q14 * tmp16) >> 14);
+
+  //final speech probability: combine prior model with LR factor:
+
+  memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen);
+
+  if (inst->priorNonSpeechProb > 0) {
+    for (i = 0; i < inst->magnLen; i++) {
+      // FLOAT code
+      // invLrt = exp(inst->logLrtTimeAvg[i]);
+      // invLrt = inst->priorSpeechProb * invLrt;
+      // nonSpeechProbFinal[i] = (1.0 - inst->priorSpeechProb) /
+      //                         (1.0 - inst->priorSpeechProb + invLrt);
+      // invLrt = (1.0 - inst->priorNonSpeechProb) * invLrt;
+      // nonSpeechProbFinal[i] = inst->priorNonSpeechProb /
+      //                         (inst->priorNonSpeechProb + invLrt);
+      if (inst->logLrtTimeAvgW32[i] < 65300) {
+        tmp32no1 = (inst->logLrtTimeAvgW32[i] * 23637) >> 14;  // Q12
+        intPart = (int16_t)(tmp32no1 >> 12);
+        if (intPart < -8) {
+          intPart = -8;
+        }
+        frac = (int16_t)(tmp32no1 & 0x00000fff); // Q12
+
+        // Quadratic approximation of 2^frac
+        tmp32no2 = (frac * frac * 44) >> 19;  // Q12.
+        tmp32no2 += (frac * 84) >> 7;  // Q12
+        invLrtFX = (1 << (8 + intPart)) +
+            WEBRTC_SPL_SHIFT_W32(tmp32no2, intPart - 4); // Q8
+
+        normTmp = WebRtcSpl_NormW32(invLrtFX);
+        normTmp2 = WebRtcSpl_NormW16((16384 - inst->priorNonSpeechProb));
+        if (normTmp + normTmp2 >= 7) {
+          if (normTmp + normTmp2 < 15) {
+            invLrtFX >>= 15 - normTmp2 - normTmp;
+            // Q(normTmp+normTmp2-7)
+            tmp32no1 = invLrtFX * (16384 - inst->priorNonSpeechProb);
+            // Q(normTmp+normTmp2+7)
+            invLrtFX = WEBRTC_SPL_SHIFT_W32(tmp32no1, 7 - normTmp - normTmp2);
+                                                                  // Q14
+          } else {
+            tmp32no1 = invLrtFX * (16384 - inst->priorNonSpeechProb);
+                                                                  // Q22
+            invLrtFX = tmp32no1 >> 8;  // Q14.
+          }
+
+          tmp32no1 = (int32_t)inst->priorNonSpeechProb << 8;  // Q22
+
+          nonSpeechProbFinal[i] = tmp32no1 /
+              (inst->priorNonSpeechProb + invLrtFX);  // Q8
+        }
+      }
+    }
+  }
+}
+
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_mips.c b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_mips.c
new file mode 100644
index 00000000..d99be872
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_mips.c
@@ -0,0 +1,1002 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <string.h>
+
+#include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h"
+#include "webrtc/modules/audio_processing/ns/nsx_core.h"
+
+static const int16_t kIndicatorTable[17] = {
+  0, 2017, 3809, 5227, 6258, 6963, 7424, 7718,
+  7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187
+};
+
+// Compute speech/noise probability
+// speech/noise probability is returned in: probSpeechFinal
+//snrLocPrior is the prior SNR for each frequency (in Q11)
+//snrLocPost is the post SNR for each frequency (in Q11)
+void WebRtcNsx_SpeechNoiseProb(NoiseSuppressionFixedC* inst,
+                               uint16_t* nonSpeechProbFinal,
+                               uint32_t* priorLocSnr,
+                               uint32_t* postLocSnr) {
+  uint32_t tmpU32no1, tmpU32no2, tmpU32no3;
+  int32_t indPriorFX, tmp32no1;
+  int32_t logLrtTimeAvgKsumFX;
+  int16_t indPriorFX16;
+  int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac;
+  size_t i;
+  int normTmp, nShifts;
+
+  int32_t r0, r1, r2, r3, r4, r5, r6, r7, r8, r9;
+  int32_t const_max = 0x7fffffff;
+  int32_t const_neg43 = -43;
+  int32_t const_5412 = 5412;
+  int32_t const_11rsh12 = (11 << 12);
+  int32_t const_178 = 178;
+
+
+  // compute feature based on average LR factor
+  // this is the average over all frequencies of the smooth log LRT
+  logLrtTimeAvgKsumFX = 0;
+  for (i = 0; i < inst->magnLen; i++) {
+    r0 = postLocSnr[i]; // Q11
+    r1 = priorLocSnr[i];
+    r2 = inst->logLrtTimeAvgW32[i];
+
+    __asm __volatile(
+      ".set       push                                    \n\t"
+      ".set       noreorder                               \n\t"
+      "clz        %[r3],    %[r0]                         \n\t"
+      "clz        %[r5],    %[r1]                         \n\t"
+      "slti       %[r4],    %[r3],    32                  \n\t"
+      "slti       %[r6],    %[r5],    32                  \n\t"
+      "movz       %[r3],    $0,       %[r4]               \n\t"
+      "movz       %[r5],    $0,       %[r6]               \n\t"
+      "slti       %[r4],    %[r3],    11                  \n\t"
+      "addiu      %[r6],    %[r3],    -11                 \n\t"
+      "neg        %[r7],    %[r6]                         \n\t"
+      "sllv       %[r6],    %[r1],    %[r6]               \n\t"
+      "srav       %[r7],    %[r1],    %[r7]               \n\t"
+      "movn       %[r6],    %[r7],    %[r4]               \n\t"
+      "sllv       %[r1],    %[r1],    %[r5]               \n\t"
+      "and        %[r1],    %[r1],    %[const_max]        \n\t"
+      "sra        %[r1],    %[r1],    19                  \n\t"
+      "mul        %[r7],    %[r1],    %[r1]               \n\t"
+      "sllv       %[r3],    %[r0],    %[r3]               \n\t"
+      "divu       %[r8],    %[r3],    %[r6]               \n\t"
+      "slti       %[r6],    %[r6],    1                   \n\t"
+      "mul        %[r7],    %[r7],    %[const_neg43]      \n\t"
+      "sra        %[r7],    %[r7],    19                  \n\t"
+      "movz       %[r3],    %[r8],    %[r6]               \n\t"
+      "subu       %[r0],    %[r0],    %[r3]               \n\t"
+      "movn       %[r0],    $0,       %[r6]               \n\t"
+      "mul        %[r1],    %[r1],    %[const_5412]       \n\t"
+      "sra        %[r1],    %[r1],    12                  \n\t"
+      "addu       %[r7],    %[r7],    %[r1]               \n\t"
+      "addiu      %[r1],    %[r7],    37                  \n\t"
+      "addiu      %[r5],    %[r5],    -31                 \n\t"
+      "neg        %[r5],    %[r5]                         \n\t"
+      "sll        %[r5],    %[r5],    12                  \n\t"
+      "addu       %[r5],    %[r5],    %[r1]               \n\t"
+      "subu       %[r7],    %[r5],    %[const_11rsh12]    \n\t"
+      "mul        %[r7],    %[r7],    %[const_178]        \n\t"
+      "sra        %[r7],    %[r7],    8                   \n\t"
+      "addu       %[r7],    %[r7],    %[r2]               \n\t"
+      "sra        %[r7],    %[r7],    1                   \n\t"
+      "subu       %[r2],    %[r2],    %[r7]               \n\t"
+      "addu       %[r2],    %[r2],    %[r0]               \n\t"
+      ".set       pop                                     \n\t"
+      : [r0] "+r" (r0), [r1] "+r" (r1), [r2] "+r" (r2),
+        [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
+        [r6] "=&r" (r6), [r7] "=&r" (r7), [r8] "=&r" (r8)
+      : [const_max] "r" (const_max), [const_neg43] "r" (const_neg43),
+        [const_5412] "r" (const_5412), [const_11rsh12] "r" (const_11rsh12),
+        [const_178] "r" (const_178)
+      : "hi", "lo"
+    );
+    inst->logLrtTimeAvgW32[i] = r2;
+    logLrtTimeAvgKsumFX += r2;
+  }
+
+  inst->featureLogLrt = (logLrtTimeAvgKsumFX * BIN_SIZE_LRT) >>
+      (inst->stages + 11);
+
+  // done with computation of LR factor
+
+  //
+  // compute the indicator functions
+  //
+
+  // average LRT feature
+  // FLOAT code
+  // indicator0 = 0.5 * (tanh(widthPrior *
+  //                      (logLrtTimeAvgKsum - threshPrior0)) + 1.0);
+  tmpIndFX = 16384; // Q14(1.0)
+  tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12
+  nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5;
+  //use larger width in tanh map for pause regions
+  if (tmp32no1 < 0) {
+    tmpIndFX = 0;
+    tmp32no1 = -tmp32no1;
+    //widthPrior = widthPrior * 2.0;
+    nShifts++;
+  }
+  tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14
+  // compute indicator function: sigmoid map
+  tableIndex = (int16_t)(tmp32no1 >> 14);
+  if ((tableIndex < 16) && (tableIndex >= 0)) {
+    tmp16no2 = kIndicatorTable[tableIndex];
+    tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
+    frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14
+    tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14);
+    if (tmpIndFX == 0) {
+      tmpIndFX = 8192 - tmp16no2; // Q14
+    } else {
+      tmpIndFX = 8192 + tmp16no2; // Q14
+    }
+  }
+  indPriorFX = inst->weightLogLrt * tmpIndFX;  // 6*Q14
+
+  //spectral flatness feature
+  if (inst->weightSpecFlat) {
+    tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10
+    tmpIndFX = 16384; // Q14(1.0)
+    //use larger width in tanh map for pause regions
+    tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10
+    nShifts = 4;
+    if (inst->thresholdSpecFlat < tmpU32no1) {
+      tmpIndFX = 0;
+      tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat;
+      //widthPrior = widthPrior * 2.0;
+      nShifts++;
+    }
+    tmpU32no1 = WebRtcSpl_DivU32U16(tmpU32no2 << nShifts, 25);  //Q14
+    // compute indicator function: sigmoid map
+    // FLOAT code
+    // indicator1 = 0.5 * (tanh(sgnMap * widthPrior *
+    //                          (threshPrior1 - tmpFloat1)) + 1.0);
+    tableIndex = (int16_t)(tmpU32no1 >> 14);
+    if (tableIndex < 16) {
+      tmp16no2 = kIndicatorTable[tableIndex];
+      tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
+      frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
+      tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14);
+      if (tmpIndFX) {
+        tmpIndFX = 8192 + tmp16no2; // Q14
+      } else {
+        tmpIndFX = 8192 - tmp16no2; // Q14
+      }
+    }
+    indPriorFX += inst->weightSpecFlat * tmpIndFX;  // 6*Q14
+  }
+
+  //for template spectral-difference
+  if (inst->weightSpecDiff) {
+    tmpU32no1 = 0;
+    if (inst->featureSpecDiff) {
+      normTmp = WEBRTC_SPL_MIN(20 - inst->stages,
+                               WebRtcSpl_NormU32(inst->featureSpecDiff));
+      assert(normTmp >= 0);
+      tmpU32no1 = inst->featureSpecDiff << normTmp;  // Q(normTmp-2*stages)
+      tmpU32no2 = inst->timeAvgMagnEnergy >> (20 - inst->stages - normTmp);
+      if (tmpU32no2 > 0) {
+        // Q(20 - inst->stages)
+        tmpU32no1 /= tmpU32no2;
+      } else {
+        tmpU32no1 = (uint32_t)(0x7fffffff);
+      }
+    }
+    tmpU32no3 = (inst->thresholdSpecDiff << 17) / 25;
+    tmpU32no2 = tmpU32no1 - tmpU32no3;
+    nShifts = 1;
+    tmpIndFX = 16384; // Q14(1.0)
+    //use larger width in tanh map for pause regions
+    if (tmpU32no2 & 0x80000000) {
+      tmpIndFX = 0;
+      tmpU32no2 = tmpU32no3 - tmpU32no1;
+      //widthPrior = widthPrior * 2.0;
+      nShifts--;
+    }
+    tmpU32no1 = tmpU32no2 >> nShifts;
+    // compute indicator function: sigmoid map
+    /* FLOAT code
+     indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0);
+     */
+    tableIndex = (int16_t)(tmpU32no1 >> 14);
+    if (tableIndex < 16) {
+      tmp16no2 = kIndicatorTable[tableIndex];
+      tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
+      frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
+      tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
+                    tmp16no1, frac, 14);
+      if (tmpIndFX) {
+        tmpIndFX = 8192 + tmp16no2;
+      } else {
+        tmpIndFX = 8192 - tmp16no2;
+      }
+    }
+    indPriorFX += inst->weightSpecDiff * tmpIndFX;  // 6*Q14
+  }
+
+  //combine the indicator function with the feature weights
+  // FLOAT code
+  // indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 *
+  //                 indicator1 + weightIndPrior2 * indicator2);
+  indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14
+  // done with computing indicator function
+
+  //compute the prior probability
+  // FLOAT code
+  // inst->priorNonSpeechProb += PRIOR_UPDATE *
+  //                             (indPriorNonSpeech - inst->priorNonSpeechProb);
+  tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14
+  inst->priorNonSpeechProb += (int16_t)((PRIOR_UPDATE_Q14 * tmp16) >> 14);
+
+  //final speech probability: combine prior model with LR factor:
+
+  memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen);
+
+  if (inst->priorNonSpeechProb > 0) {
+    r0 = inst->priorNonSpeechProb;
+    r1 = 16384 - r0;
+    int32_t const_23637 = 23637;
+    int32_t const_44 = 44;
+    int32_t const_84 = 84;
+    int32_t const_1 = 1;
+    int32_t const_neg8 = -8;
+    for (i = 0; i < inst->magnLen; i++) {
+      r2 = inst->logLrtTimeAvgW32[i];
+      if (r2 < 65300) {
+        __asm __volatile(
+          ".set         push                                      \n\t"
+          ".set         noreorder                                 \n\t"
+          "mul          %[r2],    %[r2],          %[const_23637]  \n\t"
+          "sll          %[r6],    %[r1],          16              \n\t"
+          "clz          %[r7],    %[r6]                           \n\t"
+          "clo          %[r8],    %[r6]                           \n\t"
+          "slt          %[r9],    %[r6],          $0              \n\t"
+          "movn         %[r7],    %[r8],          %[r9]           \n\t"
+          "sra          %[r2],    %[r2],          14              \n\t"
+          "andi         %[r3],    %[r2],          0xfff           \n\t"
+          "mul          %[r4],    %[r3],          %[r3]           \n\t"
+          "mul          %[r3],    %[r3],          %[const_84]     \n\t"
+          "sra          %[r2],    %[r2],          12              \n\t"
+          "slt          %[r5],    %[r2],          %[const_neg8]   \n\t"
+          "movn         %[r2],    %[const_neg8],  %[r5]           \n\t"
+          "mul          %[r4],    %[r4],          %[const_44]     \n\t"
+          "sra          %[r3],    %[r3],          7               \n\t"
+          "addiu        %[r7],    %[r7],          -1              \n\t"
+          "slti         %[r9],    %[r7],          31              \n\t"
+          "movz         %[r7],    $0,             %[r9]           \n\t"
+          "sra          %[r4],    %[r4],          19              \n\t"
+          "addu         %[r4],    %[r4],          %[r3]           \n\t"
+          "addiu        %[r3],    %[r2],          8               \n\t"
+          "addiu        %[r2],    %[r2],          -4              \n\t"
+          "neg          %[r5],    %[r2]                           \n\t"
+          "sllv         %[r6],    %[r4],          %[r2]           \n\t"
+          "srav         %[r5],    %[r4],          %[r5]           \n\t"
+          "slt          %[r2],    %[r2],          $0              \n\t"
+          "movn         %[r6],    %[r5],          %[r2]           \n\t"
+          "sllv         %[r3],    %[const_1],     %[r3]           \n\t"
+          "addu         %[r2],    %[r3],          %[r6]           \n\t"
+          "clz          %[r4],    %[r2]                           \n\t"
+          "clo          %[r5],    %[r2]                           \n\t"
+          "slt          %[r8],    %[r2],          $0              \n\t"
+          "movn         %[r4],    %[r5],          %[r8]           \n\t"
+          "addiu        %[r4],    %[r4],          -1              \n\t"
+          "slt          %[r5],    $0,             %[r2]           \n\t"
+          "or           %[r5],    %[r5],          %[r7]           \n\t"
+          "movz         %[r4],    $0,             %[r5]           \n\t"
+          "addiu        %[r6],    %[r7],          -7              \n\t"
+          "addu         %[r6],    %[r6],          %[r4]           \n\t"
+          "bltz         %[r6],    1f                              \n\t"
+          " nop                                                   \n\t"
+          "addiu        %[r4],    %[r6],          -8              \n\t"
+          "neg          %[r3],    %[r4]                           \n\t"
+          "srav         %[r5],    %[r2],          %[r3]           \n\t"
+          "mul          %[r5],    %[r5],          %[r1]           \n\t"
+          "mul          %[r2],    %[r2],          %[r1]           \n\t"
+          "slt          %[r4],    %[r4],          $0              \n\t"
+          "srav         %[r5],    %[r5],          %[r6]           \n\t"
+          "sra          %[r2],    %[r2],          8               \n\t"
+          "movn         %[r2],    %[r5],          %[r4]           \n\t"
+          "sll          %[r3],    %[r0],          8               \n\t"
+          "addu         %[r2],    %[r0],          %[r2]           \n\t"
+          "divu         %[r3],    %[r3],          %[r2]           \n\t"
+         "1:                                                      \n\t"
+          ".set         pop                                       \n\t"
+          : [r2] "+r" (r2), [r3] "=&r" (r3), [r4] "=&r" (r4),
+            [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7),
+            [r8] "=&r" (r8), [r9] "=&r" (r9)
+          : [r0] "r" (r0), [r1] "r" (r1), [const_23637] "r" (const_23637),
+            [const_neg8] "r" (const_neg8), [const_84] "r" (const_84),
+            [const_1] "r" (const_1), [const_44] "r" (const_44)
+          : "hi", "lo"
+        );
+        nonSpeechProbFinal[i] = r3;
+      }
+    }
+  }
+}
+
+// Update analysis buffer for lower band, and window data before FFT.
+void WebRtcNsx_AnalysisUpdate_mips(NoiseSuppressionFixedC* inst,
+                                   int16_t* out,
+                                   int16_t* new_speech) {
+  int iters, after;
+  int anaLen = (int)inst->anaLen;
+  int *window = (int*)inst->window;
+  int *anaBuf = (int*)inst->analysisBuffer;
+  int *outBuf = (int*)out;
+  int r0, r1, r2, r3, r4, r5, r6, r7;
+#if defined(MIPS_DSP_R1_LE)
+  int r8;
+#endif
+
+  // For lower band update analysis buffer.
+  memcpy(inst->analysisBuffer, inst->analysisBuffer + inst->blockLen10ms,
+      (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->analysisBuffer));
+  memcpy(inst->analysisBuffer + inst->anaLen - inst->blockLen10ms, new_speech,
+      inst->blockLen10ms * sizeof(*inst->analysisBuffer));
+
+  // Window data before FFT.
+#if defined(MIPS_DSP_R1_LE)
+  __asm __volatile(
+    ".set              push                                \n\t"
+    ".set              noreorder                           \n\t"
+    "sra               %[iters],   %[anaLen],    3         \n\t"
+   "1:                                                     \n\t"
+    "blez              %[iters],   2f                      \n\t"
+    " nop                                                  \n\t"
+    "lw                %[r0],      0(%[window])            \n\t"
+    "lw                %[r1],      0(%[anaBuf])            \n\t"
+    "lw                %[r2],      4(%[window])            \n\t"
+    "lw                %[r3],      4(%[anaBuf])            \n\t"
+    "lw                %[r4],      8(%[window])            \n\t"
+    "lw                %[r5],      8(%[anaBuf])            \n\t"
+    "lw                %[r6],      12(%[window])           \n\t"
+    "lw                %[r7],      12(%[anaBuf])           \n\t"
+    "muleq_s.w.phl     %[r8],      %[r0],        %[r1]     \n\t"
+    "muleq_s.w.phr     %[r0],      %[r0],        %[r1]     \n\t"
+    "muleq_s.w.phl     %[r1],      %[r2],        %[r3]     \n\t"
+    "muleq_s.w.phr     %[r2],      %[r2],        %[r3]     \n\t"
+    "muleq_s.w.phl     %[r3],      %[r4],        %[r5]     \n\t"
+    "muleq_s.w.phr     %[r4],      %[r4],        %[r5]     \n\t"
+    "muleq_s.w.phl     %[r5],      %[r6],        %[r7]     \n\t"
+    "muleq_s.w.phr     %[r6],      %[r6],        %[r7]     \n\t"
+#if defined(MIPS_DSP_R2_LE)
+    "precr_sra_r.ph.w  %[r8],      %[r0],        15        \n\t"
+    "precr_sra_r.ph.w  %[r1],      %[r2],        15        \n\t"
+    "precr_sra_r.ph.w  %[r3],      %[r4],        15        \n\t"
+    "precr_sra_r.ph.w  %[r5],      %[r6],        15        \n\t"
+    "sw                %[r8],      0(%[outBuf])            \n\t"
+    "sw                %[r1],      4(%[outBuf])            \n\t"
+    "sw                %[r3],      8(%[outBuf])            \n\t"
+    "sw                %[r5],      12(%[outBuf])           \n\t"
+#else
+    "shra_r.w          %[r8],      %[r8],        15        \n\t"
+    "shra_r.w          %[r0],      %[r0],        15        \n\t"
+    "shra_r.w          %[r1],      %[r1],        15        \n\t"
+    "shra_r.w          %[r2],      %[r2],        15        \n\t"
+    "shra_r.w          %[r3],      %[r3],        15        \n\t"
+    "shra_r.w          %[r4],      %[r4],        15        \n\t"
+    "shra_r.w          %[r5],      %[r5],        15        \n\t"
+    "shra_r.w          %[r6],      %[r6],        15        \n\t"
+    "sll               %[r0],      %[r0],        16        \n\t"
+    "sll               %[r2],      %[r2],        16        \n\t"
+    "sll               %[r4],      %[r4],        16        \n\t"
+    "sll               %[r6],      %[r6],        16        \n\t"
+    "packrl.ph         %[r0],      %[r8],        %[r0]     \n\t"
+    "packrl.ph         %[r2],      %[r1],        %[r2]     \n\t"
+    "packrl.ph         %[r4],      %[r3],        %[r4]     \n\t"
+    "packrl.ph         %[r6],      %[r5],        %[r6]     \n\t"
+    "sw                %[r0],      0(%[outBuf])            \n\t"
+    "sw                %[r2],      4(%[outBuf])            \n\t"
+    "sw                %[r4],      8(%[outBuf])            \n\t"
+    "sw                %[r6],      12(%[outBuf])           \n\t"
+#endif
+    "addiu             %[window],  %[window],    16        \n\t"
+    "addiu             %[anaBuf],  %[anaBuf],    16        \n\t"
+    "addiu             %[outBuf],  %[outBuf],    16        \n\t"
+    "b                 1b                                  \n\t"
+    " addiu            %[iters],   %[iters],     -1        \n\t"
+   "2:                                                     \n\t"
+    "andi              %[after],   %[anaLen],    7         \n\t"
+   "3:                                                     \n\t"
+    "blez              %[after],   4f                      \n\t"
+    " nop                                                  \n\t"
+    "lh                %[r0],      0(%[window])            \n\t"
+    "lh                %[r1],      0(%[anaBuf])            \n\t"
+    "mul               %[r0],      %[r0],        %[r1]     \n\t"
+    "addiu             %[window],  %[window],    2         \n\t"
+    "addiu             %[anaBuf],  %[anaBuf],    2         \n\t"
+    "addiu             %[outBuf],  %[outBuf],    2         \n\t"
+    "shra_r.w          %[r0],      %[r0],        14        \n\t"
+    "sh                %[r0],      -2(%[outBuf])           \n\t"
+    "b                 3b                                  \n\t"
+    " addiu            %[after],   %[after],     -1        \n\t"
+   "4:                                                     \n\t"
+    ".set              pop                                 \n\t"
+    : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
+      [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
+      [r6] "=&r" (r6), [r7] "=&r" (r7), [r8] "=&r" (r8),
+      [iters] "=&r" (iters), [after] "=&r" (after),
+      [window] "+r" (window),[anaBuf] "+r" (anaBuf),
+      [outBuf] "+r" (outBuf)
+    : [anaLen] "r" (anaLen)
+    : "memory", "hi", "lo"
+  );
+#else
+  __asm  __volatile(
+    ".set           push                                    \n\t"
+    ".set           noreorder                               \n\t"
+    "sra            %[iters],   %[anaLen],      2           \n\t"
+   "1:                                                      \n\t"
+    "blez           %[iters],   2f                          \n\t"
+    " nop                                                   \n\t"
+    "lh             %[r0],      0(%[window])                \n\t"
+    "lh             %[r1],      0(%[anaBuf])                \n\t"
+    "lh             %[r2],      2(%[window])                \n\t"
+    "lh             %[r3],      2(%[anaBuf])                \n\t"
+    "lh             %[r4],      4(%[window])                \n\t"
+    "lh             %[r5],      4(%[anaBuf])                \n\t"
+    "lh             %[r6],      6(%[window])                \n\t"
+    "lh             %[r7],      6(%[anaBuf])                \n\t"
+    "mul            %[r0],      %[r0],          %[r1]       \n\t"
+    "mul            %[r2],      %[r2],          %[r3]       \n\t"
+    "mul            %[r4],      %[r4],          %[r5]       \n\t"
+    "mul            %[r6],      %[r6],          %[r7]       \n\t"
+    "addiu          %[window],  %[window],      8           \n\t"
+    "addiu          %[anaBuf],  %[anaBuf],      8           \n\t"
+    "addiu          %[r0],      %[r0],          0x2000      \n\t"
+    "addiu          %[r2],      %[r2],          0x2000      \n\t"
+    "addiu          %[r4],      %[r4],          0x2000      \n\t"
+    "addiu          %[r6],      %[r6],          0x2000      \n\t"
+    "sra            %[r0],      %[r0],          14          \n\t"
+    "sra            %[r2],      %[r2],          14          \n\t"
+    "sra            %[r4],      %[r4],          14          \n\t"
+    "sra            %[r6],      %[r6],          14          \n\t"
+    "sh             %[r0],      0(%[outBuf])                \n\t"
+    "sh             %[r2],      2(%[outBuf])                \n\t"
+    "sh             %[r4],      4(%[outBuf])                \n\t"
+    "sh             %[r6],      6(%[outBuf])                \n\t"
+    "addiu          %[outBuf],  %[outBuf],      8           \n\t"
+    "b              1b                                      \n\t"
+    " addiu         %[iters],   %[iters],       -1          \n\t"
+   "2:                                                      \n\t"
+    "andi           %[after],   %[anaLen],      3           \n\t"
+   "3:                                                      \n\t"
+    "blez           %[after],   4f                          \n\t"
+    " nop                                                   \n\t"
+    "lh             %[r0],      0(%[window])                \n\t"
+    "lh             %[r1],      0(%[anaBuf])                \n\t"
+    "mul            %[r0],      %[r0],          %[r1]       \n\t"
+    "addiu          %[window],  %[window],      2           \n\t"
+    "addiu          %[anaBuf],  %[anaBuf],      2           \n\t"
+    "addiu          %[outBuf],  %[outBuf],      2           \n\t"
+    "addiu          %[r0],      %[r0],          0x2000      \n\t"
+    "sra            %[r0],      %[r0],          14          \n\t"
+    "sh             %[r0],      -2(%[outBuf])               \n\t"
+    "b              3b                                      \n\t"
+    " addiu         %[after],   %[after],       -1          \n\t"
+   "4:                                                      \n\t"
+    ".set           pop                                     \n\t"
+    : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
+      [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
+      [r6] "=&r" (r6), [r7] "=&r" (r7), [iters] "=&r" (iters),
+      [after] "=&r" (after), [window] "+r" (window),
+      [anaBuf] "+r" (anaBuf), [outBuf] "+r" (outBuf)
+    : [anaLen] "r" (anaLen)
+    : "memory", "hi", "lo"
+  );
+#endif
+}
+
+// For the noise supression process, synthesis, read out fully processed
+// segment, and update synthesis buffer.
+void WebRtcNsx_SynthesisUpdate_mips(NoiseSuppressionFixedC* inst,
+                                    int16_t* out_frame,
+                                    int16_t gain_factor) {
+  int iters = (int)inst->blockLen10ms >> 2;
+  int after = inst->blockLen10ms & 3;
+  int r0, r1, r2, r3, r4, r5, r6, r7;
+  int16_t *window = (int16_t*)inst->window;
+  int16_t *real = inst->real;
+  int16_t *synthBuf = inst->synthesisBuffer;
+  int16_t *out = out_frame;
+  int sat_pos = 0x7fff;
+  int sat_neg = 0xffff8000;
+  int block10 = (int)inst->blockLen10ms;
+  int anaLen = (int)inst->anaLen;
+
+  __asm __volatile(
+    ".set       push                                        \n\t"
+    ".set       noreorder                                   \n\t"
+   "1:                                                      \n\t"
+    "blez       %[iters],   2f                              \n\t"
+    " nop                                                   \n\t"
+    "lh         %[r0],      0(%[window])                    \n\t"
+    "lh         %[r1],      0(%[real])                      \n\t"
+    "lh         %[r2],      2(%[window])                    \n\t"
+    "lh         %[r3],      2(%[real])                      \n\t"
+    "lh         %[r4],      4(%[window])                    \n\t"
+    "lh         %[r5],      4(%[real])                      \n\t"
+    "lh         %[r6],      6(%[window])                    \n\t"
+    "lh         %[r7],      6(%[real])                      \n\t"
+    "mul        %[r0],      %[r0],          %[r1]           \n\t"
+    "mul        %[r2],      %[r2],          %[r3]           \n\t"
+    "mul        %[r4],      %[r4],          %[r5]           \n\t"
+    "mul        %[r6],      %[r6],          %[r7]           \n\t"
+    "addiu      %[r0],      %[r0],          0x2000          \n\t"
+    "addiu      %[r2],      %[r2],          0x2000          \n\t"
+    "addiu      %[r4],      %[r4],          0x2000          \n\t"
+    "addiu      %[r6],      %[r6],          0x2000          \n\t"
+    "sra        %[r0],      %[r0],          14              \n\t"
+    "sra        %[r2],      %[r2],          14              \n\t"
+    "sra        %[r4],      %[r4],          14              \n\t"
+    "sra        %[r6],      %[r6],          14              \n\t"
+    "mul        %[r0],      %[r0],          %[gain_factor]  \n\t"
+    "mul        %[r2],      %[r2],          %[gain_factor]  \n\t"
+    "mul        %[r4],      %[r4],          %[gain_factor]  \n\t"
+    "mul        %[r6],      %[r6],          %[gain_factor]  \n\t"
+    "addiu      %[r0],      %[r0],          0x1000          \n\t"
+    "addiu      %[r2],      %[r2],          0x1000          \n\t"
+    "addiu      %[r4],      %[r4],          0x1000          \n\t"
+    "addiu      %[r6],      %[r6],          0x1000          \n\t"
+    "sra        %[r0],      %[r0],          13              \n\t"
+    "sra        %[r2],      %[r2],          13              \n\t"
+    "sra        %[r4],      %[r4],          13              \n\t"
+    "sra        %[r6],      %[r6],          13              \n\t"
+    "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
+    "slt        %[r3],      %[r2],          %[sat_pos]      \n\t"
+    "slt        %[r5],      %[r4],          %[sat_pos]      \n\t"
+    "slt        %[r7],      %[r6],          %[sat_pos]      \n\t"
+    "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
+    "movz       %[r2],      %[sat_pos],     %[r3]           \n\t"
+    "movz       %[r4],      %[sat_pos],     %[r5]           \n\t"
+    "movz       %[r6],      %[sat_pos],     %[r7]           \n\t"
+    "lh         %[r1],      0(%[synthBuf])                  \n\t"
+    "lh         %[r3],      2(%[synthBuf])                  \n\t"
+    "lh         %[r5],      4(%[synthBuf])                  \n\t"
+    "lh         %[r7],      6(%[synthBuf])                  \n\t"
+    "addu       %[r0],      %[r0],          %[r1]           \n\t"
+    "addu       %[r2],      %[r2],          %[r3]           \n\t"
+    "addu       %[r4],      %[r4],          %[r5]           \n\t"
+    "addu       %[r6],      %[r6],          %[r7]           \n\t"
+    "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
+    "slt        %[r3],      %[r2],          %[sat_pos]      \n\t"
+    "slt        %[r5],      %[r4],          %[sat_pos]      \n\t"
+    "slt        %[r7],      %[r6],          %[sat_pos]      \n\t"
+    "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
+    "movz       %[r2],      %[sat_pos],     %[r3]           \n\t"
+    "movz       %[r4],      %[sat_pos],     %[r5]           \n\t"
+    "movz       %[r6],      %[sat_pos],     %[r7]           \n\t"
+    "slt        %[r1],      %[r0],          %[sat_neg]      \n\t"
+    "slt        %[r3],      %[r2],          %[sat_neg]      \n\t"
+    "slt        %[r5],      %[r4],          %[sat_neg]      \n\t"
+    "slt        %[r7],      %[r6],          %[sat_neg]      \n\t"
+    "movn       %[r0],      %[sat_neg],     %[r1]           \n\t"
+    "movn       %[r2],      %[sat_neg],     %[r3]           \n\t"
+    "movn       %[r4],      %[sat_neg],     %[r5]           \n\t"
+    "movn       %[r6],      %[sat_neg],     %[r7]           \n\t"
+    "sh         %[r0],      0(%[synthBuf])                  \n\t"
+    "sh         %[r2],      2(%[synthBuf])                  \n\t"
+    "sh         %[r4],      4(%[synthBuf])                  \n\t"
+    "sh         %[r6],      6(%[synthBuf])                  \n\t"
+    "sh         %[r0],      0(%[out])                       \n\t"
+    "sh         %[r2],      2(%[out])                       \n\t"
+    "sh         %[r4],      4(%[out])                       \n\t"
+    "sh         %[r6],      6(%[out])                       \n\t"
+    "addiu      %[window],  %[window],      8               \n\t"
+    "addiu      %[real],    %[real],        8               \n\t"
+    "addiu      %[synthBuf],%[synthBuf],    8               \n\t"
+    "addiu      %[out],     %[out],         8               \n\t"
+    "b          1b                                          \n\t"
+    " addiu     %[iters],   %[iters],       -1              \n\t"
+   "2:                                                      \n\t"
+    "blez       %[after],   3f                              \n\t"
+    " subu      %[block10], %[anaLen],      %[block10]      \n\t"
+    "lh         %[r0],      0(%[window])                    \n\t"
+    "lh         %[r1],      0(%[real])                      \n\t"
+    "mul        %[r0],      %[r0],          %[r1]           \n\t"
+    "addiu      %[window],  %[window],      2               \n\t"
+    "addiu      %[real],    %[real],        2               \n\t"
+    "addiu      %[r0],      %[r0],          0x2000          \n\t"
+    "sra        %[r0],      %[r0],          14              \n\t"
+    "mul        %[r0],      %[r0],          %[gain_factor]  \n\t"
+    "addiu      %[r0],      %[r0],          0x1000          \n\t"
+    "sra        %[r0],      %[r0],          13              \n\t"
+    "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
+    "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
+    "lh         %[r1],      0(%[synthBuf])                  \n\t"
+    "addu       %[r0],      %[r0],          %[r1]           \n\t"
+    "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
+    "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
+    "slt        %[r1],      %[r0],          %[sat_neg]      \n\t"
+    "movn       %[r0],      %[sat_neg],     %[r1]           \n\t"
+    "sh         %[r0],      0(%[synthBuf])                  \n\t"
+    "sh         %[r0],      0(%[out])                       \n\t"
+    "addiu      %[synthBuf],%[synthBuf],    2               \n\t"
+    "addiu      %[out],     %[out],         2               \n\t"
+    "b          2b                                          \n\t"
+    " addiu     %[after],   %[after],       -1              \n\t"
+   "3:                                                      \n\t"
+    "sra        %[iters],   %[block10],     2               \n\t"
+   "4:                                                      \n\t"
+    "blez       %[iters],   5f                              \n\t"
+    " andi      %[after],   %[block10],     3               \n\t"
+    "lh         %[r0],      0(%[window])                    \n\t"
+    "lh         %[r1],      0(%[real])                      \n\t"
+    "lh         %[r2],      2(%[window])                    \n\t"
+    "lh         %[r3],      2(%[real])                      \n\t"
+    "lh         %[r4],      4(%[window])                    \n\t"
+    "lh         %[r5],      4(%[real])                      \n\t"
+    "lh         %[r6],      6(%[window])                    \n\t"
+    "lh         %[r7],      6(%[real])                      \n\t"
+    "mul        %[r0],      %[r0],          %[r1]           \n\t"
+    "mul        %[r2],      %[r2],          %[r3]           \n\t"
+    "mul        %[r4],      %[r4],          %[r5]           \n\t"
+    "mul        %[r6],      %[r6],          %[r7]           \n\t"
+    "addiu      %[r0],      %[r0],          0x2000          \n\t"
+    "addiu      %[r2],      %[r2],          0x2000          \n\t"
+    "addiu      %[r4],      %[r4],          0x2000          \n\t"
+    "addiu      %[r6],      %[r6],          0x2000          \n\t"
+    "sra        %[r0],      %[r0],          14              \n\t"
+    "sra        %[r2],      %[r2],          14              \n\t"
+    "sra        %[r4],      %[r4],          14              \n\t"
+    "sra        %[r6],      %[r6],          14              \n\t"
+    "mul        %[r0],      %[r0],          %[gain_factor]  \n\t"
+    "mul        %[r2],      %[r2],          %[gain_factor]  \n\t"
+    "mul        %[r4],      %[r4],          %[gain_factor]  \n\t"
+    "mul        %[r6],      %[r6],          %[gain_factor]  \n\t"
+    "addiu      %[r0],      %[r0],          0x1000          \n\t"
+    "addiu      %[r2],      %[r2],          0x1000          \n\t"
+    "addiu      %[r4],      %[r4],          0x1000          \n\t"
+    "addiu      %[r6],      %[r6],          0x1000          \n\t"
+    "sra        %[r0],      %[r0],          13              \n\t"
+    "sra        %[r2],      %[r2],          13              \n\t"
+    "sra        %[r4],      %[r4],          13              \n\t"
+    "sra        %[r6],      %[r6],          13              \n\t"
+    "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
+    "slt        %[r3],      %[r2],          %[sat_pos]      \n\t"
+    "slt        %[r5],      %[r4],          %[sat_pos]      \n\t"
+    "slt        %[r7],      %[r6],          %[sat_pos]      \n\t"
+    "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
+    "movz       %[r2],      %[sat_pos],     %[r3]           \n\t"
+    "movz       %[r4],      %[sat_pos],     %[r5]           \n\t"
+    "movz       %[r6],      %[sat_pos],     %[r7]           \n\t"
+    "lh         %[r1],      0(%[synthBuf])                  \n\t"
+    "lh         %[r3],      2(%[synthBuf])                  \n\t"
+    "lh         %[r5],      4(%[synthBuf])                  \n\t"
+    "lh         %[r7],      6(%[synthBuf])                  \n\t"
+    "addu       %[r0],      %[r0],          %[r1]           \n\t"
+    "addu       %[r2],      %[r2],          %[r3]           \n\t"
+    "addu       %[r4],      %[r4],          %[r5]           \n\t"
+    "addu       %[r6],      %[r6],          %[r7]           \n\t"
+    "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
+    "slt        %[r3],      %[r2],          %[sat_pos]      \n\t"
+    "slt        %[r5],      %[r4],          %[sat_pos]      \n\t"
+    "slt        %[r7],      %[r6],          %[sat_pos]      \n\t"
+    "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
+    "movz       %[r2],      %[sat_pos],     %[r3]           \n\t"
+    "movz       %[r4],      %[sat_pos],     %[r5]           \n\t"
+    "movz       %[r6],      %[sat_pos],     %[r7]           \n\t"
+    "slt        %[r1],      %[r0],          %[sat_neg]      \n\t"
+    "slt        %[r3],      %[r2],          %[sat_neg]      \n\t"
+    "slt        %[r5],      %[r4],          %[sat_neg]      \n\t"
+    "slt        %[r7],      %[r6],          %[sat_neg]      \n\t"
+    "movn       %[r0],      %[sat_neg],     %[r1]           \n\t"
+    "movn       %[r2],      %[sat_neg],     %[r3]           \n\t"
+    "movn       %[r4],      %[sat_neg],     %[r5]           \n\t"
+    "movn       %[r6],      %[sat_neg],     %[r7]           \n\t"
+    "sh         %[r0],      0(%[synthBuf])                  \n\t"
+    "sh         %[r2],      2(%[synthBuf])                  \n\t"
+    "sh         %[r4],      4(%[synthBuf])                  \n\t"
+    "sh         %[r6],      6(%[synthBuf])                  \n\t"
+    "addiu      %[window],  %[window],      8               \n\t"
+    "addiu      %[real],    %[real],        8               \n\t"
+    "addiu      %[synthBuf],%[synthBuf],    8               \n\t"
+    "b          4b                                          \n\t"
+    " addiu     %[iters],   %[iters],       -1              \n\t"
+   "5:                                                      \n\t"
+    "blez       %[after],   6f                              \n\t"
+    " nop                                                   \n\t"
+    "lh         %[r0],      0(%[window])                    \n\t"
+    "lh         %[r1],      0(%[real])                      \n\t"
+    "mul        %[r0],      %[r0],          %[r1]           \n\t"
+    "addiu      %[window],  %[window],      2               \n\t"
+    "addiu      %[real],    %[real],        2               \n\t"
+    "addiu      %[r0],      %[r0],          0x2000          \n\t"
+    "sra        %[r0],      %[r0],          14              \n\t"
+    "mul        %[r0],      %[r0],          %[gain_factor]  \n\t"
+    "addiu      %[r0],      %[r0],          0x1000          \n\t"
+    "sra        %[r0],      %[r0],          13              \n\t"
+    "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
+    "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
+    "lh         %[r1],      0(%[synthBuf])                  \n\t"
+    "addu       %[r0],      %[r0],          %[r1]           \n\t"
+    "slt        %[r1],      %[r0],          %[sat_pos]      \n\t"
+    "movz       %[r0],      %[sat_pos],     %[r1]           \n\t"
+    "slt        %[r1],      %[r0],          %[sat_neg]      \n\t"
+    "movn       %[r0],      %[sat_neg],     %[r1]           \n\t"
+    "sh         %[r0],      0(%[synthBuf])                  \n\t"
+    "addiu      %[synthBuf],%[synthBuf],    2               \n\t"
+    "b          2b                                          \n\t"
+    " addiu     %[after],   %[after],       -1              \n\t"
+   "6:                                                      \n\t"
+    ".set       pop                                         \n\t"
+    : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
+      [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
+      [r6] "=&r" (r6), [r7] "=&r" (r7), [iters] "+r" (iters),
+      [after] "+r" (after), [block10] "+r" (block10),
+      [window] "+r" (window), [real] "+r" (real),
+      [synthBuf] "+r" (synthBuf), [out] "+r" (out)
+    : [gain_factor] "r" (gain_factor), [sat_pos] "r" (sat_pos),
+      [sat_neg] "r" (sat_neg), [anaLen] "r" (anaLen)
+    : "memory", "hi", "lo"
+  );
+
+  // update synthesis buffer
+  memcpy(inst->synthesisBuffer, inst->synthesisBuffer + inst->blockLen10ms,
+      (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->synthesisBuffer));
+  WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer
+      + inst->anaLen - inst->blockLen10ms, inst->blockLen10ms);
+}
+
+// Filter the data in the frequency domain, and create spectrum.
+void WebRtcNsx_PrepareSpectrum_mips(NoiseSuppressionFixedC* inst,
+                                    int16_t* freq_buf) {
+  uint16_t *noiseSupFilter = inst->noiseSupFilter;
+  int16_t *real = inst->real;
+  int16_t *imag = inst->imag;
+  int32_t loop_count = 2;
+  int16_t tmp_1, tmp_2, tmp_3, tmp_4, tmp_5, tmp_6;
+  int16_t tmp16 = (int16_t)(inst->anaLen << 1) - 4;
+  int16_t* freq_buf_f = freq_buf;
+  int16_t* freq_buf_s = &freq_buf[tmp16];
+
+  __asm __volatile (
+    ".set       push                                                 \n\t"
+    ".set       noreorder                                            \n\t"
+    //first sample
+    "lh         %[tmp_1],           0(%[noiseSupFilter])             \n\t"
+    "lh         %[tmp_2],           0(%[real])                       \n\t"
+    "lh         %[tmp_3],           0(%[imag])                       \n\t"
+    "mul        %[tmp_2],           %[tmp_2],             %[tmp_1]   \n\t"
+    "mul        %[tmp_3],           %[tmp_3],             %[tmp_1]   \n\t"
+    "sra        %[tmp_2],           %[tmp_2],             14         \n\t"
+    "sra        %[tmp_3],           %[tmp_3],             14         \n\t"
+    "sh         %[tmp_2],           0(%[real])                       \n\t"
+    "sh         %[tmp_3],           0(%[imag])                       \n\t"
+    "negu       %[tmp_3],           %[tmp_3]                         \n\t"
+    "sh         %[tmp_2],           0(%[freq_buf_f])                 \n\t"
+    "sh         %[tmp_3],           2(%[freq_buf_f])                 \n\t"
+    "addiu      %[real],            %[real],              2          \n\t"
+    "addiu      %[imag],            %[imag],              2          \n\t"
+    "addiu      %[noiseSupFilter],  %[noiseSupFilter],    2          \n\t"
+    "addiu      %[freq_buf_f],      %[freq_buf_f],        4          \n\t"
+   "1:                                                               \n\t"
+    "lh         %[tmp_1],           0(%[noiseSupFilter])             \n\t"
+    "lh         %[tmp_2],           0(%[real])                       \n\t"
+    "lh         %[tmp_3],           0(%[imag])                       \n\t"
+    "lh         %[tmp_4],           2(%[noiseSupFilter])             \n\t"
+    "lh         %[tmp_5],           2(%[real])                       \n\t"
+    "lh         %[tmp_6],           2(%[imag])                       \n\t"
+    "mul        %[tmp_2],           %[tmp_2],             %[tmp_1]   \n\t"
+    "mul        %[tmp_3],           %[tmp_3],             %[tmp_1]   \n\t"
+    "mul        %[tmp_5],           %[tmp_5],             %[tmp_4]   \n\t"
+    "mul        %[tmp_6],           %[tmp_6],             %[tmp_4]   \n\t"
+    "addiu      %[loop_count],      %[loop_count],        2          \n\t"
+    "sra        %[tmp_2],           %[tmp_2],             14         \n\t"
+    "sra        %[tmp_3],           %[tmp_3],             14         \n\t"
+    "sra        %[tmp_5],           %[tmp_5],             14         \n\t"
+    "sra        %[tmp_6],           %[tmp_6],             14         \n\t"
+    "addiu      %[noiseSupFilter],  %[noiseSupFilter],    4          \n\t"
+    "sh         %[tmp_2],           0(%[real])                       \n\t"
+    "sh         %[tmp_2],           4(%[freq_buf_s])                 \n\t"
+    "sh         %[tmp_3],           0(%[imag])                       \n\t"
+    "sh         %[tmp_3],           6(%[freq_buf_s])                 \n\t"
+    "negu       %[tmp_3],           %[tmp_3]                         \n\t"
+    "sh         %[tmp_5],           2(%[real])                       \n\t"
+    "sh         %[tmp_5],           0(%[freq_buf_s])                 \n\t"
+    "sh         %[tmp_6],           2(%[imag])                       \n\t"
+    "sh         %[tmp_6],           2(%[freq_buf_s])                 \n\t"
+    "negu       %[tmp_6],           %[tmp_6]                         \n\t"
+    "addiu      %[freq_buf_s],      %[freq_buf_s],        -8         \n\t"
+    "addiu      %[real],            %[real],              4          \n\t"
+    "addiu      %[imag],            %[imag],              4          \n\t"
+    "sh         %[tmp_2],           0(%[freq_buf_f])                 \n\t"
+    "sh         %[tmp_3],           2(%[freq_buf_f])                 \n\t"
+    "sh         %[tmp_5],           4(%[freq_buf_f])                 \n\t"
+    "sh         %[tmp_6],           6(%[freq_buf_f])                 \n\t"
+    "blt        %[loop_count],      %[loop_size],         1b         \n\t"
+    " addiu     %[freq_buf_f],      %[freq_buf_f],        8          \n\t"
+    //last two samples:
+    "lh         %[tmp_1],           0(%[noiseSupFilter])             \n\t"
+    "lh         %[tmp_2],           0(%[real])                       \n\t"
+    "lh         %[tmp_3],           0(%[imag])                       \n\t"
+    "lh         %[tmp_4],           2(%[noiseSupFilter])             \n\t"
+    "lh         %[tmp_5],           2(%[real])                       \n\t"
+    "lh         %[tmp_6],           2(%[imag])                       \n\t"
+    "mul        %[tmp_2],           %[tmp_2],             %[tmp_1]   \n\t"
+    "mul        %[tmp_3],           %[tmp_3],             %[tmp_1]   \n\t"
+    "mul        %[tmp_5],           %[tmp_5],             %[tmp_4]   \n\t"
+    "mul        %[tmp_6],           %[tmp_6],             %[tmp_4]   \n\t"
+    "sra        %[tmp_2],           %[tmp_2],             14         \n\t"
+    "sra        %[tmp_3],           %[tmp_3],             14         \n\t"
+    "sra        %[tmp_5],           %[tmp_5],             14         \n\t"
+    "sra        %[tmp_6],           %[tmp_6],             14         \n\t"
+    "sh         %[tmp_2],           0(%[real])                       \n\t"
+    "sh         %[tmp_2],           4(%[freq_buf_s])                 \n\t"
+    "sh         %[tmp_3],           0(%[imag])                       \n\t"
+    "sh         %[tmp_3],           6(%[freq_buf_s])                 \n\t"
+    "negu       %[tmp_3],           %[tmp_3]                         \n\t"
+    "sh         %[tmp_2],           0(%[freq_buf_f])                 \n\t"
+    "sh         %[tmp_3],           2(%[freq_buf_f])                 \n\t"
+    "sh         %[tmp_5],           4(%[freq_buf_f])                 \n\t"
+    "sh         %[tmp_6],           6(%[freq_buf_f])                 \n\t"
+    "sh         %[tmp_5],           2(%[real])                       \n\t"
+    "sh         %[tmp_6],           2(%[imag])                       \n\t"
+    ".set       pop                                                  \n\t"
+    : [real] "+r" (real), [imag] "+r" (imag),
+      [freq_buf_f] "+r" (freq_buf_f), [freq_buf_s] "+r" (freq_buf_s),
+      [loop_count] "+r" (loop_count), [noiseSupFilter] "+r" (noiseSupFilter),
+      [tmp_1] "=&r" (tmp_1), [tmp_2] "=&r" (tmp_2), [tmp_3] "=&r" (tmp_3),
+      [tmp_4] "=&r" (tmp_4), [tmp_5] "=&r" (tmp_5), [tmp_6] "=&r" (tmp_6)
+    : [loop_size] "r" (inst->anaLen2)
+    : "memory", "hi", "lo"
+  );
+}
+
+#if defined(MIPS_DSP_R1_LE)
+// Denormalize the real-valued signal |in|, the output from inverse FFT.
+void WebRtcNsx_Denormalize_mips(NoiseSuppressionFixedC* inst,
+                                int16_t* in,
+                                int factor) {
+  int32_t r0, r1, r2, r3, t0;
+  int len = (int)inst->anaLen;
+  int16_t *out = &inst->real[0];
+  int shift = factor - inst->normData;
+
+  __asm __volatile (
+    ".set          push                                \n\t"
+    ".set          noreorder                           \n\t"
+    "beqz          %[len],     8f                      \n\t"
+    " nop                                              \n\t"
+    "bltz          %[shift],   4f                      \n\t"
+    " sra          %[t0],      %[len],      2          \n\t"
+    "beqz          %[t0],      2f                      \n\t"
+    " andi         %[len],     %[len],      3          \n\t"
+   "1:                                                 \n\t"
+    "lh            %[r0],      0(%[in])                \n\t"
+    "lh            %[r1],      2(%[in])                \n\t"
+    "lh            %[r2],      4(%[in])                \n\t"
+    "lh            %[r3],      6(%[in])                \n\t"
+    "shllv_s.ph    %[r0],      %[r0],       %[shift]   \n\t"
+    "shllv_s.ph    %[r1],      %[r1],       %[shift]   \n\t"
+    "shllv_s.ph    %[r2],      %[r2],       %[shift]   \n\t"
+    "shllv_s.ph    %[r3],      %[r3],       %[shift]   \n\t"
+    "addiu         %[in],      %[in],       8          \n\t"
+    "addiu         %[t0],      %[t0],       -1         \n\t"
+    "sh            %[r0],      0(%[out])               \n\t"
+    "sh            %[r1],      2(%[out])               \n\t"
+    "sh            %[r2],      4(%[out])               \n\t"
+    "sh            %[r3],      6(%[out])               \n\t"
+    "bgtz          %[t0],      1b                      \n\t"
+    " addiu        %[out],     %[out],      8          \n\t"
+   "2:                                                 \n\t"
+    "beqz          %[len],     8f                      \n\t"
+    " nop                                              \n\t"
+   "3:                                                 \n\t"
+    "lh            %[r0],      0(%[in])                \n\t"
+    "addiu         %[in],      %[in],       2          \n\t"
+    "addiu         %[len],     %[len],      -1         \n\t"
+    "shllv_s.ph    %[r0],      %[r0],       %[shift]   \n\t"
+    "addiu         %[out],     %[out],      2          \n\t"
+    "bgtz          %[len],     3b                      \n\t"
+    " sh           %[r0],      -2(%[out])              \n\t"
+    "b             8f                                  \n\t"
+   "4:                                                 \n\t"
+    "negu          %[shift],   %[shift]                \n\t"
+    "beqz          %[t0],      6f                      \n\t"
+    " andi         %[len],     %[len],      3          \n\t"
+   "5:                                                 \n\t"
+    "lh            %[r0],      0(%[in])                \n\t"
+    "lh            %[r1],      2(%[in])                \n\t"
+    "lh            %[r2],      4(%[in])                \n\t"
+    "lh            %[r3],      6(%[in])                \n\t"
+    "srav          %[r0],      %[r0],       %[shift]   \n\t"
+    "srav          %[r1],      %[r1],       %[shift]   \n\t"
+    "srav          %[r2],      %[r2],       %[shift]   \n\t"
+    "srav          %[r3],      %[r3],       %[shift]   \n\t"
+    "addiu         %[in],      %[in],       8          \n\t"
+    "addiu         %[t0],      %[t0],       -1         \n\t"
+    "sh            %[r0],      0(%[out])               \n\t"
+    "sh            %[r1],      2(%[out])               \n\t"
+    "sh            %[r2],      4(%[out])               \n\t"
+    "sh            %[r3],      6(%[out])               \n\t"
+    "bgtz          %[t0],      5b                      \n\t"
+    " addiu        %[out],     %[out],      8          \n\t"
+   "6:                                                 \n\t"
+    "beqz          %[len],     8f                      \n\t"
+    " nop                                              \n\t"
+   "7:                                                 \n\t"
+    "lh            %[r0],      0(%[in])                \n\t"
+    "addiu         %[in],      %[in],       2          \n\t"
+    "addiu         %[len],     %[len],      -1         \n\t"
+    "srav          %[r0],      %[r0],       %[shift]   \n\t"
+    "addiu         %[out],     %[out],      2          \n\t"
+    "bgtz          %[len],     7b                      \n\t"
+    " sh           %[r0],      -2(%[out])              \n\t"
+   "8:                                                 \n\t"
+    ".set          pop                                 \n\t"
+    : [t0] "=&r" (t0), [r0] "=&r" (r0), [r1] "=&r" (r1),
+      [r2] "=&r" (r2), [r3] "=&r" (r3)
+    : [len] "r" (len), [shift] "r" (shift), [in] "r" (in),
+      [out] "r" (out)
+    : "memory"
+  );
+}
+#endif
+
+// Normalize the real-valued signal |in|, the input to forward FFT.
+void WebRtcNsx_NormalizeRealBuffer_mips(NoiseSuppressionFixedC* inst,
+                                        const int16_t* in,
+                                        int16_t* out) {
+  int32_t r0, r1, r2, r3, t0;
+  int len = (int)inst->anaLen;
+  int shift = inst->normData;
+
+  __asm __volatile (
+    ".set          push                                \n\t"
+    ".set          noreorder                           \n\t"
+    "beqz          %[len],     4f                      \n\t"
+    " sra          %[t0],      %[len],      2          \n\t"
+    "beqz          %[t0],      2f                      \n\t"
+    " andi         %[len],     %[len],      3          \n\t"
+   "1:                                                 \n\t"
+    "lh            %[r0],      0(%[in])                \n\t"
+    "lh            %[r1],      2(%[in])                \n\t"
+    "lh            %[r2],      4(%[in])                \n\t"
+    "lh            %[r3],      6(%[in])                \n\t"
+    "sllv          %[r0],      %[r0],       %[shift]   \n\t"
+    "sllv          %[r1],      %[r1],       %[shift]   \n\t"
+    "sllv          %[r2],      %[r2],       %[shift]   \n\t"
+    "sllv          %[r3],      %[r3],       %[shift]   \n\t"
+    "addiu         %[in],      %[in],       8          \n\t"
+    "addiu         %[t0],      %[t0],       -1         \n\t"
+    "sh            %[r0],      0(%[out])               \n\t"
+    "sh            %[r1],      2(%[out])               \n\t"
+    "sh            %[r2],      4(%[out])               \n\t"
+    "sh            %[r3],      6(%[out])               \n\t"
+    "bgtz          %[t0],      1b                      \n\t"
+    " addiu        %[out],     %[out],      8          \n\t"
+   "2:                                                 \n\t"
+    "beqz          %[len],     4f                      \n\t"
+    " nop                                              \n\t"
+   "3:                                                 \n\t"
+    "lh            %[r0],      0(%[in])                \n\t"
+    "addiu         %[in],      %[in],       2          \n\t"
+    "addiu         %[len],     %[len],      -1         \n\t"
+    "sllv          %[r0],      %[r0],       %[shift]   \n\t"
+    "addiu         %[out],     %[out],      2          \n\t"
+    "bgtz          %[len],     3b                      \n\t"
+    " sh           %[r0],      -2(%[out])              \n\t"
+   "4:                                                 \n\t"
+    ".set          pop                                 \n\t"
+    : [t0] "=&r" (t0), [r0] "=&r" (r0), [r1] "=&r" (r1),
+      [r2] "=&r" (r2), [r3] "=&r" (r3)
+    : [len] "r" (len), [shift] "r" (shift), [in] "r" (in),
+      [out] "r" (out)
+    : "memory"
+  );
+}
+
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_neon.c b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_neon.c
new file mode 100644
index 00000000..65788ae2
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_neon.c
@@ -0,0 +1,598 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/ns/nsx_core.h"
+
+#include <arm_neon.h>
+#include <assert.h>
+
+// Constants to compensate for shifting signal log(2^shifts).
+const int16_t WebRtcNsx_kLogTable[9] = {
+  0, 177, 355, 532, 710, 887, 1065, 1242, 1420
+};
+
+const int16_t WebRtcNsx_kCounterDiv[201] = {
+  32767, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3641, 3277, 2979, 2731,
+  2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, 1489, 1425, 1365, 1311,
+  1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964, 936, 910, 886, 862, 840,
+  819, 799, 780, 762, 745, 728, 712, 697, 683, 669, 655, 643, 630, 618, 607,
+  596, 585, 575, 565, 555, 546, 537, 529, 520, 512, 504, 496, 489, 482, 475,
+  468, 462, 455, 449, 443, 437, 431, 426, 420, 415, 410, 405, 400, 395, 390,
+  386, 381, 377, 372, 368, 364, 360, 356, 352, 349, 345, 341, 338, 334, 331,
+  328, 324, 321, 318, 315, 312, 309, 306, 303, 301, 298, 295, 293, 290, 287,
+  285, 282, 280, 278, 275, 273, 271, 269, 266, 264, 262, 260, 258, 256, 254,
+  252, 250, 248, 246, 245, 243, 241, 239, 237, 236, 234, 232, 231, 229, 228,
+  226, 224, 223, 221, 220, 218, 217, 216, 214, 213, 211, 210, 209, 207, 206,
+  205, 204, 202, 201, 200, 199, 197, 196, 195, 194, 193, 192, 191, 189, 188,
+  187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174, 173,
+  172, 172, 171, 170, 169, 168, 167, 166, 165, 165, 164, 163
+};
+
+const int16_t WebRtcNsx_kLogTableFrac[256] = {
+  0, 1, 3, 4, 6, 7, 9, 10, 11, 13, 14, 16, 17, 18, 20, 21,
+  22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 36, 37, 38, 40, 41, 42,
+  44, 45, 46, 47, 49, 50, 51, 52, 54, 55, 56, 57, 59, 60, 61, 62,
+  63, 65, 66, 67, 68, 69, 71, 72, 73, 74, 75, 77, 78, 79, 80, 81,
+  82, 84, 85, 86, 87, 88, 89, 90, 92, 93, 94, 95, 96, 97, 98, 99,
+  100, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 116,
+  117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131,
+  132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146,
+  147, 148, 149, 150, 151, 152, 153, 154, 155, 155, 156, 157, 158, 159, 160,
+  161, 162, 163, 164, 165, 166, 167, 168, 169, 169, 170, 171, 172, 173, 174,
+  175, 176, 177, 178, 178, 179, 180, 181, 182, 183, 184, 185, 185, 186, 187,
+  188, 189, 190, 191, 192, 192, 193, 194, 195, 196, 197, 198, 198, 199, 200,
+  201, 202, 203, 203, 204, 205, 206, 207, 208, 208, 209, 210, 211, 212, 212,
+  213, 214, 215, 216, 216, 217, 218, 219, 220, 220, 221, 222, 223, 224, 224,
+  225, 226, 227, 228, 228, 229, 230, 231, 231, 232, 233, 234, 234, 235, 236,
+  237, 238, 238, 239, 240, 241, 241, 242, 243, 244, 244, 245, 246, 247, 247,
+  248, 249, 249, 250, 251, 252, 252, 253, 254, 255, 255
+};
+
+// Update the noise estimation information.
+static void UpdateNoiseEstimateNeon(NoiseSuppressionFixedC* inst, int offset) {
+  const int16_t kExp2Const = 11819; // Q13
+  int16_t* ptr_noiseEstLogQuantile = NULL;
+  int16_t* ptr_noiseEstQuantile = NULL;
+  int16x4_t kExp2Const16x4 = vdup_n_s16(kExp2Const);
+  int32x4_t twentyOne32x4 = vdupq_n_s32(21);
+  int32x4_t constA32x4 = vdupq_n_s32(0x1fffff);
+  int32x4_t constB32x4 = vdupq_n_s32(0x200000);
+
+  int16_t tmp16 = WebRtcSpl_MaxValueW16(inst->noiseEstLogQuantile + offset,
+                                        inst->magnLen);
+
+  // Guarantee a Q-domain as high as possible and still fit in int16
+  inst->qNoise = 14 - (int) WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(kExp2Const,
+                                                                 tmp16,
+                                                                 21);
+
+  int32x4_t qNoise32x4 = vdupq_n_s32(inst->qNoise);
+
+  for (ptr_noiseEstLogQuantile = &inst->noiseEstLogQuantile[offset],
+       ptr_noiseEstQuantile = &inst->noiseEstQuantile[0];
+       ptr_noiseEstQuantile < &inst->noiseEstQuantile[inst->magnLen - 3];
+       ptr_noiseEstQuantile += 4, ptr_noiseEstLogQuantile += 4) {
+
+    // tmp32no2 = kExp2Const * inst->noiseEstLogQuantile[offset + i];
+    int16x4_t v16x4 = vld1_s16(ptr_noiseEstLogQuantile);
+    int32x4_t v32x4B = vmull_s16(v16x4, kExp2Const16x4);
+
+    // tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac
+    int32x4_t v32x4A = vandq_s32(v32x4B, constA32x4);
+    v32x4A = vorrq_s32(v32x4A, constB32x4);
+
+    // tmp16 = (int16_t)(tmp32no2 >> 21);
+    v32x4B = vshrq_n_s32(v32x4B, 21);
+
+    // tmp16 -= 21;// shift 21 to get result in Q0
+    v32x4B = vsubq_s32(v32x4B, twentyOne32x4);
+
+    // tmp16 += (int16_t) inst->qNoise;
+    // shift to get result in Q(qNoise)
+    v32x4B = vaddq_s32(v32x4B, qNoise32x4);
+
+    // if (tmp16 < 0) {
+    //   tmp32no1 >>= -tmp16;
+    // } else {
+    //   tmp32no1 <<= tmp16;
+    // }
+    v32x4B = vshlq_s32(v32x4A, v32x4B);
+
+    // tmp16 = WebRtcSpl_SatW32ToW16(tmp32no1);
+    v16x4 = vqmovn_s32(v32x4B);
+
+    //inst->noiseEstQuantile[i] = tmp16;
+    vst1_s16(ptr_noiseEstQuantile, v16x4);
+  }
+
+  // Last iteration:
+
+  // inst->quantile[i]=exp(inst->lquantile[offset+i]);
+  // in Q21
+  int32_t tmp32no2 = kExp2Const * *ptr_noiseEstLogQuantile;
+  int32_t tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac
+
+  tmp16 = (int16_t)(tmp32no2 >> 21);
+  tmp16 -= 21;// shift 21 to get result in Q0
+  tmp16 += (int16_t) inst->qNoise; //shift to get result in Q(qNoise)
+  if (tmp16 < 0) {
+    tmp32no1 >>= -tmp16;
+  } else {
+    tmp32no1 <<= tmp16;
+  }
+  *ptr_noiseEstQuantile = WebRtcSpl_SatW32ToW16(tmp32no1);
+}
+
+// Noise Estimation
+void WebRtcNsx_NoiseEstimationNeon(NoiseSuppressionFixedC* inst,
+                                   uint16_t* magn,
+                                   uint32_t* noise,
+                                   int16_t* q_noise) {
+  int16_t lmagn[HALF_ANAL_BLOCKL], counter, countDiv;
+  int16_t countProd, delta, zeros, frac;
+  int16_t log2, tabind, logval, tmp16, tmp16no1, tmp16no2;
+  const int16_t log2_const = 22713;
+  const int16_t width_factor = 21845;
+
+  size_t i, s, offset;
+
+  tabind = inst->stages - inst->normData;
+  assert(tabind < 9);
+  assert(tabind > -9);
+  if (tabind < 0) {
+    logval = -WebRtcNsx_kLogTable[-tabind];
+  } else {
+    logval = WebRtcNsx_kLogTable[tabind];
+  }
+
+  int16x8_t logval_16x8 = vdupq_n_s16(logval);
+
+  // lmagn(i)=log(magn(i))=log(2)*log2(magn(i))
+  // magn is in Q(-stages), and the real lmagn values are:
+  // real_lmagn(i)=log(magn(i)*2^stages)=log(magn(i))+log(2^stages)
+  // lmagn in Q8
+  for (i = 0; i < inst->magnLen; i++) {
+    if (magn[i]) {
+      zeros = WebRtcSpl_NormU32((uint32_t)magn[i]);
+      frac = (int16_t)((((uint32_t)magn[i] << zeros)
+                        & 0x7FFFFFFF) >> 23);
+      assert(frac < 256);
+      // log2(magn(i))
+      log2 = (int16_t)(((31 - zeros) << 8)
+                       + WebRtcNsx_kLogTableFrac[frac]);
+      // log2(magn(i))*log(2)
+      lmagn[i] = (int16_t)((log2 * log2_const) >> 15);
+      // + log(2^stages)
+      lmagn[i] += logval;
+    } else {
+      lmagn[i] = logval;
+    }
+  }
+
+  int16x4_t Q3_16x4  = vdup_n_s16(3);
+  int16x8_t WIDTHQ8_16x8 = vdupq_n_s16(WIDTH_Q8);
+  int16x8_t WIDTHFACTOR_16x8 = vdupq_n_s16(width_factor);
+
+  int16_t factor = FACTOR_Q7;
+  if (inst->blockIndex < END_STARTUP_LONG)
+    factor = FACTOR_Q7_STARTUP;
+
+  // Loop over simultaneous estimates
+  for (s = 0; s < SIMULT; s++) {
+    offset = s * inst->magnLen;
+
+    // Get counter values from state
+    counter = inst->noiseEstCounter[s];
+    assert(counter < 201);
+    countDiv = WebRtcNsx_kCounterDiv[counter];
+    countProd = (int16_t)(counter * countDiv);
+
+    // quant_est(...)
+    int16_t deltaBuff[8];
+    int16x4_t tmp16x4_0;
+    int16x4_t tmp16x4_1;
+    int16x4_t countDiv_16x4 = vdup_n_s16(countDiv);
+    int16x8_t countProd_16x8 = vdupq_n_s16(countProd);
+    int16x8_t tmp16x8_0 = vdupq_n_s16(countDiv);
+    int16x8_t prod16x8 = vqrdmulhq_s16(WIDTHFACTOR_16x8, tmp16x8_0);
+    int16x8_t tmp16x8_1;
+    int16x8_t tmp16x8_2;
+    int16x8_t tmp16x8_3;
+    uint16x8_t tmp16x8_4;
+    int32x4_t tmp32x4;
+
+    for (i = 0; i + 7 < inst->magnLen; i += 8) {
+      // Compute delta.
+      // Smaller step size during startup. This prevents from using
+      // unrealistic values causing overflow.
+      tmp16x8_0 = vdupq_n_s16(factor);
+      vst1q_s16(deltaBuff, tmp16x8_0);
+
+      int j;
+      for (j = 0; j < 8; j++) {
+        if (inst->noiseEstDensity[offset + i + j] > 512) {
+          // Get values for deltaBuff by shifting intead of dividing.
+          int factor = WebRtcSpl_NormW16(inst->noiseEstDensity[offset + i + j]);
+          deltaBuff[j] = (int16_t)(FACTOR_Q16 >> (14 - factor));
+        }
+      }
+
+      // Update log quantile estimate
+
+      // tmp16 = (int16_t)((delta * countDiv) >> 14);
+      tmp32x4 = vmull_s16(vld1_s16(&deltaBuff[0]), countDiv_16x4);
+      tmp16x4_1 = vshrn_n_s32(tmp32x4, 14);
+      tmp32x4 = vmull_s16(vld1_s16(&deltaBuff[4]), countDiv_16x4);
+      tmp16x4_0 = vshrn_n_s32(tmp32x4, 14);
+      tmp16x8_0 = vcombine_s16(tmp16x4_1, tmp16x4_0); // Keep for several lines.
+
+      // prepare for the "if" branch
+      // tmp16 += 2;
+      // tmp16_1 = (Word16)(tmp16>>2);
+      tmp16x8_1 = vrshrq_n_s16(tmp16x8_0, 2);
+
+      // inst->noiseEstLogQuantile[offset+i] + tmp16_1;
+      tmp16x8_2 = vld1q_s16(&inst->noiseEstLogQuantile[offset + i]); // Keep
+      tmp16x8_1 = vaddq_s16(tmp16x8_2, tmp16x8_1); // Keep for several lines
+
+      // Prepare for the "else" branch
+      // tmp16 += 1;
+      // tmp16_1 = (Word16)(tmp16>>1);
+      tmp16x8_0 = vrshrq_n_s16(tmp16x8_0, 1);
+
+      // tmp16_2 = (int16_t)((tmp16_1 * 3) >> 1);
+      tmp32x4 = vmull_s16(vget_low_s16(tmp16x8_0), Q3_16x4);
+      tmp16x4_1 = vshrn_n_s32(tmp32x4, 1);
+
+      // tmp16_2 = (int16_t)((tmp16_1 * 3) >> 1);
+      tmp32x4 = vmull_s16(vget_high_s16(tmp16x8_0), Q3_16x4);
+      tmp16x4_0 = vshrn_n_s32(tmp32x4, 1);
+
+      // inst->noiseEstLogQuantile[offset + i] - tmp16_2;
+      tmp16x8_0 = vcombine_s16(tmp16x4_1, tmp16x4_0); // keep
+      tmp16x8_0 = vsubq_s16(tmp16x8_2, tmp16x8_0);
+
+      // logval is the smallest fixed point representation we can have. Values
+      // below that will correspond to values in the interval [0, 1], which
+      // can't possibly occur.
+      tmp16x8_0 = vmaxq_s16(tmp16x8_0, logval_16x8);
+
+      // Do the if-else branches:
+      tmp16x8_3 = vld1q_s16(&lmagn[i]); // keep for several lines
+      tmp16x8_4 = vcgtq_s16(tmp16x8_3, tmp16x8_2);
+      tmp16x8_2 = vbslq_s16(tmp16x8_4, tmp16x8_1, tmp16x8_0);
+      vst1q_s16(&inst->noiseEstLogQuantile[offset + i], tmp16x8_2);
+
+      // Update density estimate
+      // tmp16_1 + tmp16_2
+      tmp16x8_1 = vld1q_s16(&inst->noiseEstDensity[offset + i]);
+      tmp16x8_0 = vqrdmulhq_s16(tmp16x8_1, countProd_16x8);
+      tmp16x8_0 = vaddq_s16(tmp16x8_0, prod16x8);
+
+      // lmagn[i] - inst->noiseEstLogQuantile[offset + i]
+      tmp16x8_3 = vsubq_s16(tmp16x8_3, tmp16x8_2);
+      tmp16x8_3 = vabsq_s16(tmp16x8_3);
+      tmp16x8_4 = vcgtq_s16(WIDTHQ8_16x8, tmp16x8_3);
+      tmp16x8_1 = vbslq_s16(tmp16x8_4, tmp16x8_0, tmp16x8_1);
+      vst1q_s16(&inst->noiseEstDensity[offset + i], tmp16x8_1);
+    }  // End loop over magnitude spectrum
+
+    // Last iteration over magnitude spectrum:
+    // compute delta
+    if (inst->noiseEstDensity[offset + i] > 512) {
+      // Get values for deltaBuff by shifting intead of dividing.
+      int factor = WebRtcSpl_NormW16(inst->noiseEstDensity[offset + i]);
+      delta = (int16_t)(FACTOR_Q16 >> (14 - factor));
+    } else {
+      delta = FACTOR_Q7;
+      if (inst->blockIndex < END_STARTUP_LONG) {
+        // Smaller step size during startup. This prevents from using
+        // unrealistic values causing overflow.
+        delta = FACTOR_Q7_STARTUP;
+      }
+    }
+    // update log quantile estimate
+    tmp16 = (int16_t)((delta * countDiv) >> 14);
+    if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) {
+      // +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2
+      // CounterDiv=1/(inst->counter[s]+1) in Q15
+      tmp16 += 2;
+      inst->noiseEstLogQuantile[offset + i] += tmp16 / 4;
+    } else {
+      tmp16 += 1;
+      // *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2
+      // TODO(bjornv): investigate why we need to truncate twice.
+      tmp16no2 = (int16_t)((tmp16 / 2) * 3 / 2);
+      inst->noiseEstLogQuantile[offset + i] -= tmp16no2;
+      if (inst->noiseEstLogQuantile[offset + i] < logval) {
+        // logval is the smallest fixed point representation we can have.
+        // Values below that will correspond to values in the interval
+        // [0, 1], which can't possibly occur.
+        inst->noiseEstLogQuantile[offset + i] = logval;
+      }
+    }
+
+    // update density estimate
+    if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i])
+        < WIDTH_Q8) {
+      tmp16no1 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
+                   inst->noiseEstDensity[offset + i], countProd, 15);
+      tmp16no2 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
+                   width_factor, countDiv, 15);
+      inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2;
+    }
+
+
+    if (counter >= END_STARTUP_LONG) {
+      inst->noiseEstCounter[s] = 0;
+      if (inst->blockIndex >= END_STARTUP_LONG) {
+        UpdateNoiseEstimateNeon(inst, offset);
+      }
+    }
+    inst->noiseEstCounter[s]++;
+
+  }  // end loop over simultaneous estimates
+
+  // Sequentially update the noise during startup
+  if (inst->blockIndex < END_STARTUP_LONG) {
+    UpdateNoiseEstimateNeon(inst, offset);
+  }
+
+  for (i = 0; i < inst->magnLen; i++) {
+    noise[i] = (uint32_t)(inst->noiseEstQuantile[i]); // Q(qNoise)
+  }
+  (*q_noise) = (int16_t)inst->qNoise;
+}
+
+// Filter the data in the frequency domain, and create spectrum.
+void WebRtcNsx_PrepareSpectrumNeon(NoiseSuppressionFixedC* inst,
+                                   int16_t* freq_buf) {
+  assert(inst->magnLen % 8 == 1);
+  assert(inst->anaLen2 % 16 == 0);
+
+  // (1) Filtering.
+
+  // Fixed point C code for the next block is as follows:
+  // for (i = 0; i < inst->magnLen; i++) {
+  //   inst->real[i] = (int16_t)((inst->real[i] *
+  //      (int16_t)(inst->noiseSupFilter[i])) >> 14);  // Q(normData-stages)
+  //   inst->imag[i] = (int16_t)((inst->imag[i] *
+  //      (int16_t)(inst->noiseSupFilter[i])) >> 14);  // Q(normData-stages)
+  // }
+
+  int16_t* preal = &inst->real[0];
+  int16_t* pimag = &inst->imag[0];
+  int16_t* pns_filter = (int16_t*)&inst->noiseSupFilter[0];
+  int16_t* pimag_end = pimag + inst->magnLen - 4;
+
+  while (pimag < pimag_end) {
+    int16x8_t real = vld1q_s16(preal);
+    int16x8_t imag = vld1q_s16(pimag);
+    int16x8_t ns_filter = vld1q_s16(pns_filter);
+
+    int32x4_t tmp_r_0 = vmull_s16(vget_low_s16(real), vget_low_s16(ns_filter));
+    int32x4_t tmp_i_0 = vmull_s16(vget_low_s16(imag), vget_low_s16(ns_filter));
+    int32x4_t tmp_r_1 = vmull_s16(vget_high_s16(real),
+                                  vget_high_s16(ns_filter));
+    int32x4_t tmp_i_1 = vmull_s16(vget_high_s16(imag),
+                                  vget_high_s16(ns_filter));
+
+    int16x4_t result_r_0 = vshrn_n_s32(tmp_r_0, 14);
+    int16x4_t result_i_0 = vshrn_n_s32(tmp_i_0, 14);
+    int16x4_t result_r_1 = vshrn_n_s32(tmp_r_1, 14);
+    int16x4_t result_i_1 = vshrn_n_s32(tmp_i_1, 14);
+
+    vst1q_s16(preal, vcombine_s16(result_r_0, result_r_1));
+    vst1q_s16(pimag, vcombine_s16(result_i_0, result_i_1));
+    preal += 8;
+    pimag += 8;
+    pns_filter += 8;
+  }
+
+  // Filter the last element
+  *preal = (int16_t)((*preal * *pns_filter) >> 14);
+  *pimag = (int16_t)((*pimag * *pns_filter) >> 14);
+
+  // (2) Create spectrum.
+
+  // Fixed point C code for the rest of the function is as follows:
+  // freq_buf[0] = inst->real[0];
+  // freq_buf[1] = -inst->imag[0];
+  // for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) {
+  //   freq_buf[j] = inst->real[i];
+  //   freq_buf[j + 1] = -inst->imag[i];
+  // }
+  // freq_buf[inst->anaLen] = inst->real[inst->anaLen2];
+  // freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2];
+
+  preal = &inst->real[0];
+  pimag = &inst->imag[0];
+  pimag_end = pimag + inst->anaLen2;
+  int16_t * freq_buf_start = freq_buf;
+  while (pimag < pimag_end) {
+    // loop unroll
+    int16x8x2_t real_imag_0;
+    int16x8x2_t real_imag_1;
+    real_imag_0.val[1] = vld1q_s16(pimag);
+    real_imag_0.val[0] = vld1q_s16(preal);
+    preal += 8;
+    pimag += 8;
+    real_imag_1.val[1] = vld1q_s16(pimag);
+    real_imag_1.val[0] = vld1q_s16(preal);
+    preal += 8;
+    pimag += 8;
+
+    real_imag_0.val[1] = vnegq_s16(real_imag_0.val[1]);
+    real_imag_1.val[1] = vnegq_s16(real_imag_1.val[1]);
+    vst2q_s16(freq_buf_start, real_imag_0);
+    freq_buf_start += 16;
+    vst2q_s16(freq_buf_start, real_imag_1);
+    freq_buf_start += 16;
+  }
+  freq_buf[inst->anaLen] = inst->real[inst->anaLen2];
+  freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2];
+}
+
+// For the noise supress process, synthesis, read out fully processed segment,
+// and update synthesis buffer.
+void WebRtcNsx_SynthesisUpdateNeon(NoiseSuppressionFixedC* inst,
+                                   int16_t* out_frame,
+                                   int16_t gain_factor) {
+  assert(inst->anaLen % 16 == 0);
+  assert(inst->blockLen10ms % 16 == 0);
+
+  int16_t* preal_start = inst->real;
+  const int16_t* pwindow = inst->window;
+  int16_t* preal_end = preal_start + inst->anaLen;
+  int16_t* psynthesis_buffer = inst->synthesisBuffer;
+
+  while (preal_start < preal_end) {
+    // Loop unroll.
+    int16x8_t window_0 = vld1q_s16(pwindow);
+    int16x8_t real_0 = vld1q_s16(preal_start);
+    int16x8_t synthesis_buffer_0 = vld1q_s16(psynthesis_buffer);
+
+    int16x8_t window_1 = vld1q_s16(pwindow + 8);
+    int16x8_t real_1 = vld1q_s16(preal_start + 8);
+    int16x8_t synthesis_buffer_1 = vld1q_s16(psynthesis_buffer + 8);
+
+    int32x4_t tmp32a_0_low = vmull_s16(vget_low_s16(real_0),
+                                       vget_low_s16(window_0));
+    int32x4_t tmp32a_0_high = vmull_s16(vget_high_s16(real_0),
+                                        vget_high_s16(window_0));
+
+    int32x4_t tmp32a_1_low = vmull_s16(vget_low_s16(real_1),
+                                       vget_low_s16(window_1));
+    int32x4_t tmp32a_1_high = vmull_s16(vget_high_s16(real_1),
+                                        vget_high_s16(window_1));
+
+    int16x4_t tmp16a_0_low = vqrshrn_n_s32(tmp32a_0_low, 14);
+    int16x4_t tmp16a_0_high = vqrshrn_n_s32(tmp32a_0_high, 14);
+
+    int16x4_t tmp16a_1_low = vqrshrn_n_s32(tmp32a_1_low, 14);
+    int16x4_t tmp16a_1_high = vqrshrn_n_s32(tmp32a_1_high, 14);
+
+    int32x4_t tmp32b_0_low = vmull_n_s16(tmp16a_0_low, gain_factor);
+    int32x4_t tmp32b_0_high = vmull_n_s16(tmp16a_0_high, gain_factor);
+
+    int32x4_t tmp32b_1_low = vmull_n_s16(tmp16a_1_low, gain_factor);
+    int32x4_t tmp32b_1_high = vmull_n_s16(tmp16a_1_high, gain_factor);
+
+    int16x4_t tmp16b_0_low = vqrshrn_n_s32(tmp32b_0_low, 13);
+    int16x4_t tmp16b_0_high = vqrshrn_n_s32(tmp32b_0_high, 13);
+
+    int16x4_t tmp16b_1_low = vqrshrn_n_s32(tmp32b_1_low, 13);
+    int16x4_t tmp16b_1_high = vqrshrn_n_s32(tmp32b_1_high, 13);
+
+    synthesis_buffer_0 = vqaddq_s16(vcombine_s16(tmp16b_0_low, tmp16b_0_high),
+                                    synthesis_buffer_0);
+    synthesis_buffer_1 = vqaddq_s16(vcombine_s16(tmp16b_1_low, tmp16b_1_high),
+                                    synthesis_buffer_1);
+    vst1q_s16(psynthesis_buffer, synthesis_buffer_0);
+    vst1q_s16(psynthesis_buffer + 8, synthesis_buffer_1);
+
+    pwindow += 16;
+    preal_start += 16;
+    psynthesis_buffer += 16;
+  }
+
+  // Read out fully processed segment.
+  int16_t * p_start = inst->synthesisBuffer;
+  int16_t * p_end = inst->synthesisBuffer + inst->blockLen10ms;
+  int16_t * p_frame = out_frame;
+  while (p_start < p_end) {
+    int16x8_t frame_0 = vld1q_s16(p_start);
+    vst1q_s16(p_frame, frame_0);
+    p_start += 8;
+    p_frame += 8;
+  }
+
+  // Update synthesis buffer.
+  int16_t* p_start_src = inst->synthesisBuffer + inst->blockLen10ms;
+  int16_t* p_end_src = inst->synthesisBuffer + inst->anaLen;
+  int16_t* p_start_dst = inst->synthesisBuffer;
+  while (p_start_src < p_end_src) {
+    int16x8_t frame = vld1q_s16(p_start_src);
+    vst1q_s16(p_start_dst, frame);
+    p_start_src += 8;
+    p_start_dst += 8;
+  }
+
+  p_start = inst->synthesisBuffer + inst->anaLen - inst->blockLen10ms;
+  p_end = p_start + inst->blockLen10ms;
+  int16x8_t zero = vdupq_n_s16(0);
+  for (;p_start < p_end; p_start += 8) {
+    vst1q_s16(p_start, zero);
+  }
+}
+
+// Update analysis buffer for lower band, and window data before FFT.
+void WebRtcNsx_AnalysisUpdateNeon(NoiseSuppressionFixedC* inst,
+                                  int16_t* out,
+                                  int16_t* new_speech) {
+  assert(inst->blockLen10ms % 16 == 0);
+  assert(inst->anaLen % 16 == 0);
+
+  // For lower band update analysis buffer.
+  // memcpy(inst->analysisBuffer, inst->analysisBuffer + inst->blockLen10ms,
+  //     (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->analysisBuffer));
+  int16_t* p_start_src = inst->analysisBuffer + inst->blockLen10ms;
+  int16_t* p_end_src = inst->analysisBuffer + inst->anaLen;
+  int16_t* p_start_dst = inst->analysisBuffer;
+  while (p_start_src < p_end_src) {
+    int16x8_t frame = vld1q_s16(p_start_src);
+    vst1q_s16(p_start_dst, frame);
+
+    p_start_src += 8;
+    p_start_dst += 8;
+  }
+
+  // memcpy(inst->analysisBuffer + inst->anaLen - inst->blockLen10ms,
+  //     new_speech, inst->blockLen10ms * sizeof(*inst->analysisBuffer));
+  p_start_src = new_speech;
+  p_end_src = new_speech + inst->blockLen10ms;
+  p_start_dst = inst->analysisBuffer + inst->anaLen - inst->blockLen10ms;
+  while (p_start_src < p_end_src) {
+    int16x8_t frame = vld1q_s16(p_start_src);
+    vst1q_s16(p_start_dst, frame);
+
+    p_start_src += 8;
+    p_start_dst += 8;
+  }
+
+  // Window data before FFT.
+  int16_t* p_start_window = (int16_t*) inst->window;
+  int16_t* p_start_buffer = inst->analysisBuffer;
+  int16_t* p_start_out = out;
+  const int16_t* p_end_out = out + inst->anaLen;
+
+  // Load the first element to reduce pipeline bubble.
+  int16x8_t window = vld1q_s16(p_start_window);
+  int16x8_t buffer = vld1q_s16(p_start_buffer);
+  p_start_window += 8;
+  p_start_buffer += 8;
+
+  while (p_start_out < p_end_out) {
+    // Unroll loop.
+    int32x4_t tmp32_low = vmull_s16(vget_low_s16(window), vget_low_s16(buffer));
+    int32x4_t tmp32_high = vmull_s16(vget_high_s16(window),
+                                     vget_high_s16(buffer));
+    window = vld1q_s16(p_start_window);
+    buffer = vld1q_s16(p_start_buffer);
+
+    int16x4_t result_low = vrshrn_n_s32(tmp32_low, 14);
+    int16x4_t result_high = vrshrn_n_s32(tmp32_high, 14);
+    vst1q_s16(p_start_out, vcombine_s16(result_low, result_high));
+
+    p_start_buffer += 8;
+    p_start_window += 8;
+    p_start_out += 8;
+  }
+}
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_defines.h b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_defines.h
new file mode 100644
index 00000000..862dc3ca
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_defines.h
@@ -0,0 +1,64 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_DEFINES_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_DEFINES_H_
+
+#define ANAL_BLOCKL_MAX         256 /* Max analysis block length */
+#define HALF_ANAL_BLOCKL        129 /* Half max analysis block length + 1 */
+#define NUM_HIGH_BANDS_MAX      2   /* Max number of high bands */
+#define SIMULT                  3
+#define END_STARTUP_LONG        200
+#define END_STARTUP_SHORT       50
+#define FACTOR_Q16              2621440 /* 40 in Q16 */
+#define FACTOR_Q7               5120 /* 40 in Q7 */
+#define FACTOR_Q7_STARTUP       1024 /* 8 in Q7 */
+#define WIDTH_Q8                3 /* 0.01 in Q8 (or 25 ) */
+
+/* PARAMETERS FOR NEW METHOD */
+#define DD_PR_SNR_Q11           2007 /* ~= Q11(0.98) DD update of prior SNR */
+#define ONE_MINUS_DD_PR_SNR_Q11 41 /* DD update of prior SNR */
+#define SPECT_FLAT_TAVG_Q14     4915 /* (0.30) tavg parameter for spectral flatness measure */
+#define SPECT_DIFF_TAVG_Q8      77 /* (0.30) tavg parameter for spectral flatness measure */
+#define PRIOR_UPDATE_Q14        1638 /* Q14(0.1) Update parameter of prior model */
+#define NOISE_UPDATE_Q8         26 /* 26 ~= Q8(0.1) Update parameter for noise */
+
+/* Probability threshold for noise state in speech/noise likelihood. */
+#define ONE_MINUS_PROB_RANGE_Q8 205 /* 205 ~= Q8(0.8) */
+#define HIST_PAR_EST            1000 /* Histogram size for estimation of parameters */
+
+/* FEATURE EXTRACTION CONFIG  */
+/* Bin size of histogram */
+#define BIN_SIZE_LRT            10
+/* Scale parameters: multiply dominant peaks of the histograms by scale factor to obtain. */
+/* Thresholds for prior model */
+#define FACTOR_1_LRT_DIFF       6 /* For LRT and spectral difference (5 times bigger) */
+/* For spectral_flatness: used when noise is flatter than speech (10 times bigger). */
+#define FACTOR_2_FLAT_Q10       922
+/* Peak limit for spectral flatness (varies between 0 and 1) */
+#define THRES_PEAK_FLAT         24 /* * 2 * BIN_SIZE_FLAT_FX */
+/* Limit on spacing of two highest peaks in histogram: spacing determined by bin size. */
+#define LIM_PEAK_SPACE_FLAT_DIFF    4 /* * 2 * BIN_SIZE_DIFF_FX */
+/* Limit on relevance of second peak */
+#define LIM_PEAK_WEIGHT_FLAT_DIFF   2
+#define THRES_FLUCT_LRT         10240 /* = 20 * inst->modelUpdate; fluctuation limit of LRT feat. */
+/* Limit on the max and min values for the feature thresholds */
+#define MAX_FLAT_Q10            38912 /*  * 2 * BIN_SIZE_FLAT_FX */
+#define MIN_FLAT_Q10            4096 /*  * 2 * BIN_SIZE_FLAT_FX */
+#define MAX_DIFF                100 /* * 2 * BIN_SIZE_DIFF_FX */
+#define MIN_DIFF                16 /* * 2 * BIN_SIZE_DIFF_FX */
+/* Criteria of weight of histogram peak  to accept/reject feature */
+#define THRES_WEIGHT_FLAT_DIFF  154 /*(int)(0.3*(inst->modelUpdate)) for flatness and difference */
+
+#define STAT_UPDATES            9 /* Update every 512 = 1 << 9 block */
+#define ONE_MINUS_GAMMA_PAUSE_Q8    13 /* ~= Q8(0.05) Update for conservative noise estimate */
+#define GAMMA_NOISE_TRANS_AND_SPEECH_Q8 3 /* ~= Q8(0.01) Update for transition and noise region */
+
+#endif /* WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_DEFINES_H_ */
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/windows_private.h b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/windows_private.h
new file mode 100644
index 00000000..44c2e846
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/windows_private.h
@@ -0,0 +1,574 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_WINDOWS_PRIVATE_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_WINDOWS_PRIVATE_H_
+
+// Hanning window for 4ms 16kHz
+static const float kHanning64w128[128] = {
+  0.00000000000000f, 0.02454122852291f, 0.04906767432742f,
+  0.07356456359967f, 0.09801714032956f, 0.12241067519922f,
+  0.14673047445536f, 0.17096188876030f, 0.19509032201613f,
+  0.21910124015687f, 0.24298017990326f, 0.26671275747490f,
+  0.29028467725446f, 0.31368174039889f, 0.33688985339222f,
+  0.35989503653499f, 0.38268343236509f, 0.40524131400499f,
+  0.42755509343028f, 0.44961132965461f, 0.47139673682600f,
+  0.49289819222978f, 0.51410274419322f, 0.53499761988710f,
+  0.55557023301960f, 0.57580819141785f, 0.59569930449243f,
+  0.61523159058063f, 0.63439328416365f, 0.65317284295378f,
+  0.67155895484702f, 0.68954054473707f, 0.70710678118655f,
+  0.72424708295147f, 0.74095112535496f, 0.75720884650648f,
+  0.77301045336274f, 0.78834642762661f, 0.80320753148064f,
+  0.81758481315158f, 0.83146961230255f, 0.84485356524971f,
+  0.85772861000027f, 0.87008699110871f, 0.88192126434835f,
+  0.89322430119552f, 0.90398929312344f, 0.91420975570353f,
+  0.92387953251129f, 0.93299279883474f, 0.94154406518302f,
+  0.94952818059304f, 0.95694033573221f, 0.96377606579544f,
+  0.97003125319454f, 0.97570213003853f, 0.98078528040323f,
+  0.98527764238894f, 0.98917650996478f, 0.99247953459871f,
+  0.99518472667220f, 0.99729045667869f, 0.99879545620517f,
+  0.99969881869620f, 1.00000000000000f,
+  0.99969881869620f, 0.99879545620517f, 0.99729045667869f,
+  0.99518472667220f, 0.99247953459871f, 0.98917650996478f,
+  0.98527764238894f, 0.98078528040323f, 0.97570213003853f,
+  0.97003125319454f, 0.96377606579544f, 0.95694033573221f,
+  0.94952818059304f, 0.94154406518302f, 0.93299279883474f,
+  0.92387953251129f, 0.91420975570353f, 0.90398929312344f,
+  0.89322430119552f, 0.88192126434835f, 0.87008699110871f,
+  0.85772861000027f, 0.84485356524971f, 0.83146961230255f,
+  0.81758481315158f, 0.80320753148064f, 0.78834642762661f,
+  0.77301045336274f, 0.75720884650648f, 0.74095112535496f,
+  0.72424708295147f, 0.70710678118655f, 0.68954054473707f,
+  0.67155895484702f, 0.65317284295378f, 0.63439328416365f,
+  0.61523159058063f, 0.59569930449243f, 0.57580819141785f,
+  0.55557023301960f, 0.53499761988710f, 0.51410274419322f,
+  0.49289819222978f, 0.47139673682600f, 0.44961132965461f,
+  0.42755509343028f, 0.40524131400499f, 0.38268343236509f,
+  0.35989503653499f, 0.33688985339222f, 0.31368174039889f,
+  0.29028467725446f, 0.26671275747490f, 0.24298017990326f,
+  0.21910124015687f, 0.19509032201613f, 0.17096188876030f,
+  0.14673047445536f, 0.12241067519922f, 0.09801714032956f,
+  0.07356456359967f, 0.04906767432742f, 0.02454122852291f
+};
+
+
+
+// hybrib Hanning & flat window
+static const float kBlocks80w128[128] = {
+  (float)0.00000000, (float)0.03271908, (float)0.06540313, (float)0.09801714, (float)0.13052619,
+  (float)0.16289547, (float)0.19509032, (float)0.22707626, (float)0.25881905, (float)0.29028468,
+  (float)0.32143947, (float)0.35225005, (float)0.38268343, (float)0.41270703, (float)0.44228869,
+  (float)0.47139674, (float)0.50000000, (float)0.52806785, (float)0.55557023, (float)0.58247770,
+  (float)0.60876143, (float)0.63439328, (float)0.65934582, (float)0.68359230, (float)0.70710678,
+  (float)0.72986407, (float)0.75183981, (float)0.77301045, (float)0.79335334, (float)0.81284668,
+  (float)0.83146961, (float)0.84920218, (float)0.86602540, (float)0.88192126, (float)0.89687274,
+  (float)0.91086382, (float)0.92387953, (float)0.93590593, (float)0.94693013, (float)0.95694034,
+  (float)0.96592583, (float)0.97387698, (float)0.98078528, (float)0.98664333, (float)0.99144486,
+  (float)0.99518473, (float)0.99785892, (float)0.99946459, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)0.99946459, (float)0.99785892, (float)0.99518473, (float)0.99144486,
+  (float)0.98664333, (float)0.98078528, (float)0.97387698, (float)0.96592583, (float)0.95694034,
+  (float)0.94693013, (float)0.93590593, (float)0.92387953, (float)0.91086382, (float)0.89687274,
+  (float)0.88192126, (float)0.86602540, (float)0.84920218, (float)0.83146961, (float)0.81284668,
+  (float)0.79335334, (float)0.77301045, (float)0.75183981, (float)0.72986407, (float)0.70710678,
+  (float)0.68359230, (float)0.65934582, (float)0.63439328, (float)0.60876143, (float)0.58247770,
+  (float)0.55557023, (float)0.52806785, (float)0.50000000, (float)0.47139674, (float)0.44228869,
+  (float)0.41270703, (float)0.38268343, (float)0.35225005, (float)0.32143947, (float)0.29028468,
+  (float)0.25881905, (float)0.22707626, (float)0.19509032, (float)0.16289547, (float)0.13052619,
+  (float)0.09801714, (float)0.06540313, (float)0.03271908
+};
+
+// hybrib Hanning & flat window
+static const float kBlocks160w256[256] = {
+  (float)0.00000000, (float)0.01636173, (float)0.03271908, (float)0.04906767, (float)0.06540313,
+  (float)0.08172107, (float)0.09801714, (float)0.11428696, (float)0.13052619, (float)0.14673047,
+  (float)0.16289547, (float)0.17901686, (float)0.19509032, (float)0.21111155, (float)0.22707626,
+  (float)0.24298018, (float)0.25881905, (float)0.27458862, (float)0.29028468, (float)0.30590302,
+  (float)0.32143947, (float)0.33688985, (float)0.35225005, (float)0.36751594, (float)0.38268343,
+  (float)0.39774847, (float)0.41270703, (float)0.42755509, (float)0.44228869, (float)0.45690388,
+  (float)0.47139674, (float)0.48576339, (float)0.50000000, (float)0.51410274, (float)0.52806785,
+  (float)0.54189158, (float)0.55557023, (float)0.56910015, (float)0.58247770, (float)0.59569930,
+  (float)0.60876143, (float)0.62166057, (float)0.63439328, (float)0.64695615, (float)0.65934582,
+  (float)0.67155895, (float)0.68359230, (float)0.69544264, (float)0.70710678, (float)0.71858162,
+  (float)0.72986407, (float)0.74095113, (float)0.75183981, (float)0.76252720, (float)0.77301045,
+  (float)0.78328675, (float)0.79335334, (float)0.80320753, (float)0.81284668, (float)0.82226822,
+  (float)0.83146961, (float)0.84044840, (float)0.84920218, (float)0.85772861, (float)0.86602540,
+  (float)0.87409034, (float)0.88192126, (float)0.88951608, (float)0.89687274, (float)0.90398929,
+  (float)0.91086382, (float)0.91749450, (float)0.92387953, (float)0.93001722, (float)0.93590593,
+  (float)0.94154407, (float)0.94693013, (float)0.95206268, (float)0.95694034, (float)0.96156180,
+  (float)0.96592583, (float)0.97003125, (float)0.97387698, (float)0.97746197, (float)0.98078528,
+  (float)0.98384601, (float)0.98664333, (float)0.98917651, (float)0.99144486, (float)0.99344778,
+  (float)0.99518473, (float)0.99665524, (float)0.99785892, (float)0.99879546, (float)0.99946459,
+  (float)0.99986614, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)0.99986614, (float)0.99946459, (float)0.99879546, (float)0.99785892,
+  (float)0.99665524, (float)0.99518473, (float)0.99344778, (float)0.99144486, (float)0.98917651,
+  (float)0.98664333, (float)0.98384601, (float)0.98078528, (float)0.97746197, (float)0.97387698,
+  (float)0.97003125, (float)0.96592583, (float)0.96156180, (float)0.95694034, (float)0.95206268,
+  (float)0.94693013, (float)0.94154407, (float)0.93590593, (float)0.93001722, (float)0.92387953,
+  (float)0.91749450, (float)0.91086382, (float)0.90398929, (float)0.89687274, (float)0.88951608,
+  (float)0.88192126, (float)0.87409034, (float)0.86602540, (float)0.85772861, (float)0.84920218,
+  (float)0.84044840, (float)0.83146961, (float)0.82226822, (float)0.81284668, (float)0.80320753,
+  (float)0.79335334, (float)0.78328675, (float)0.77301045, (float)0.76252720, (float)0.75183981,
+  (float)0.74095113, (float)0.72986407, (float)0.71858162, (float)0.70710678, (float)0.69544264,
+  (float)0.68359230, (float)0.67155895, (float)0.65934582, (float)0.64695615, (float)0.63439328,
+  (float)0.62166057, (float)0.60876143, (float)0.59569930, (float)0.58247770, (float)0.56910015,
+  (float)0.55557023, (float)0.54189158, (float)0.52806785, (float)0.51410274, (float)0.50000000,
+  (float)0.48576339, (float)0.47139674, (float)0.45690388, (float)0.44228869, (float)0.42755509,
+  (float)0.41270703, (float)0.39774847, (float)0.38268343, (float)0.36751594, (float)0.35225005,
+  (float)0.33688985, (float)0.32143947, (float)0.30590302, (float)0.29028468, (float)0.27458862,
+  (float)0.25881905, (float)0.24298018, (float)0.22707626, (float)0.21111155, (float)0.19509032,
+  (float)0.17901686, (float)0.16289547, (float)0.14673047, (float)0.13052619, (float)0.11428696,
+  (float)0.09801714, (float)0.08172107, (float)0.06540313, (float)0.04906767, (float)0.03271908,
+  (float)0.01636173
+};
+
+// hybrib Hanning & flat window: for 20ms
+static const float kBlocks320w512[512] = {
+  (float)0.00000000, (float)0.00818114, (float)0.01636173, (float)0.02454123, (float)0.03271908,
+  (float)0.04089475, (float)0.04906767, (float)0.05723732, (float)0.06540313, (float)0.07356456,
+  (float)0.08172107, (float)0.08987211, (float)0.09801714, (float)0.10615561, (float)0.11428696,
+  (float)0.12241068, (float)0.13052619, (float)0.13863297, (float)0.14673047, (float)0.15481816,
+  (float)0.16289547, (float)0.17096189, (float)0.17901686, (float)0.18705985, (float)0.19509032,
+  (float)0.20310773, (float)0.21111155, (float)0.21910124, (float)0.22707626, (float)0.23503609,
+  (float)0.24298018, (float)0.25090801, (float)0.25881905, (float)0.26671276, (float)0.27458862,
+  (float)0.28244610, (float)0.29028468, (float)0.29810383, (float)0.30590302, (float)0.31368174,
+  (float)0.32143947, (float)0.32917568, (float)0.33688985, (float)0.34458148, (float)0.35225005,
+  (float)0.35989504, (float)0.36751594, (float)0.37511224, (float)0.38268343, (float)0.39022901,
+  (float)0.39774847, (float)0.40524131, (float)0.41270703, (float)0.42014512, (float)0.42755509,
+  (float)0.43493645, (float)0.44228869, (float)0.44961133, (float)0.45690388, (float)0.46416584,
+  (float)0.47139674, (float)0.47859608, (float)0.48576339, (float)0.49289819, (float)0.50000000,
+  (float)0.50706834, (float)0.51410274, (float)0.52110274, (float)0.52806785, (float)0.53499762,
+  (float)0.54189158, (float)0.54874927, (float)0.55557023, (float)0.56235401, (float)0.56910015,
+  (float)0.57580819, (float)0.58247770, (float)0.58910822, (float)0.59569930, (float)0.60225052,
+  (float)0.60876143, (float)0.61523159, (float)0.62166057, (float)0.62804795, (float)0.63439328,
+  (float)0.64069616, (float)0.64695615, (float)0.65317284, (float)0.65934582, (float)0.66547466,
+  (float)0.67155895, (float)0.67759830, (float)0.68359230, (float)0.68954054, (float)0.69544264,
+  (float)0.70129818, (float)0.70710678, (float)0.71286806, (float)0.71858162, (float)0.72424708,
+  (float)0.72986407, (float)0.73543221, (float)0.74095113, (float)0.74642045, (float)0.75183981,
+  (float)0.75720885, (float)0.76252720, (float)0.76779452, (float)0.77301045, (float)0.77817464,
+  (float)0.78328675, (float)0.78834643, (float)0.79335334, (float)0.79830715, (float)0.80320753,
+  (float)0.80805415, (float)0.81284668, (float)0.81758481, (float)0.82226822, (float)0.82689659,
+  (float)0.83146961, (float)0.83598698, (float)0.84044840, (float)0.84485357, (float)0.84920218,
+  (float)0.85349396, (float)0.85772861, (float)0.86190585, (float)0.86602540, (float)0.87008699,
+  (float)0.87409034, (float)0.87803519, (float)0.88192126, (float)0.88574831, (float)0.88951608,
+  (float)0.89322430, (float)0.89687274, (float)0.90046115, (float)0.90398929, (float)0.90745693,
+  (float)0.91086382, (float)0.91420976, (float)0.91749450, (float)0.92071783, (float)0.92387953,
+  (float)0.92697940, (float)0.93001722, (float)0.93299280, (float)0.93590593, (float)0.93875641,
+  (float)0.94154407, (float)0.94426870, (float)0.94693013, (float)0.94952818, (float)0.95206268,
+  (float)0.95453345, (float)0.95694034, (float)0.95928317, (float)0.96156180, (float)0.96377607,
+  (float)0.96592583, (float)0.96801094, (float)0.97003125, (float)0.97198664, (float)0.97387698,
+  (float)0.97570213, (float)0.97746197, (float)0.97915640, (float)0.98078528, (float)0.98234852,
+  (float)0.98384601, (float)0.98527764, (float)0.98664333, (float)0.98794298, (float)0.98917651,
+  (float)0.99034383, (float)0.99144486, (float)0.99247953, (float)0.99344778, (float)0.99434953,
+  (float)0.99518473, (float)0.99595331, (float)0.99665524, (float)0.99729046, (float)0.99785892,
+  (float)0.99836060, (float)0.99879546, (float)0.99916346, (float)0.99946459, (float)0.99969882,
+  (float)0.99986614, (float)0.99996653, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000,
+  (float)1.00000000, (float)0.99996653, (float)0.99986614, (float)0.99969882, (float)0.99946459,
+  (float)0.99916346, (float)0.99879546, (float)0.99836060, (float)0.99785892, (float)0.99729046,
+  (float)0.99665524, (float)0.99595331, (float)0.99518473, (float)0.99434953, (float)0.99344778,
+  (float)0.99247953, (float)0.99144486, (float)0.99034383, (float)0.98917651, (float)0.98794298,
+  (float)0.98664333, (float)0.98527764, (float)0.98384601, (float)0.98234852, (float)0.98078528,
+  (float)0.97915640, (float)0.97746197, (float)0.97570213, (float)0.97387698, (float)0.97198664,
+  (float)0.97003125, (float)0.96801094, (float)0.96592583, (float)0.96377607, (float)0.96156180,
+  (float)0.95928317, (float)0.95694034, (float)0.95453345, (float)0.95206268, (float)0.94952818,
+  (float)0.94693013, (float)0.94426870, (float)0.94154407, (float)0.93875641, (float)0.93590593,
+  (float)0.93299280, (float)0.93001722, (float)0.92697940, (float)0.92387953, (float)0.92071783,
+  (float)0.91749450, (float)0.91420976, (float)0.91086382, (float)0.90745693, (float)0.90398929,
+  (float)0.90046115, (float)0.89687274, (float)0.89322430, (float)0.88951608, (float)0.88574831,
+  (float)0.88192126, (float)0.87803519, (float)0.87409034, (float)0.87008699, (float)0.86602540,
+  (float)0.86190585, (float)0.85772861, (float)0.85349396, (float)0.84920218, (float)0.84485357,
+  (float)0.84044840, (float)0.83598698, (float)0.83146961, (float)0.82689659, (float)0.82226822,
+  (float)0.81758481, (float)0.81284668, (float)0.80805415, (float)0.80320753, (float)0.79830715,
+  (float)0.79335334, (float)0.78834643, (float)0.78328675, (float)0.77817464, (float)0.77301045,
+  (float)0.76779452, (float)0.76252720, (float)0.75720885, (float)0.75183981, (float)0.74642045,
+  (float)0.74095113, (float)0.73543221, (float)0.72986407, (float)0.72424708, (float)0.71858162,
+  (float)0.71286806, (float)0.70710678, (float)0.70129818, (float)0.69544264, (float)0.68954054,
+  (float)0.68359230, (float)0.67759830, (float)0.67155895, (float)0.66547466, (float)0.65934582,
+  (float)0.65317284, (float)0.64695615, (float)0.64069616, (float)0.63439328, (float)0.62804795,
+  (float)0.62166057, (float)0.61523159, (float)0.60876143, (float)0.60225052, (float)0.59569930,
+  (float)0.58910822, (float)0.58247770, (float)0.57580819, (float)0.56910015, (float)0.56235401,
+  (float)0.55557023, (float)0.54874927, (float)0.54189158, (float)0.53499762, (float)0.52806785,
+  (float)0.52110274, (float)0.51410274, (float)0.50706834, (float)0.50000000, (float)0.49289819,
+  (float)0.48576339, (float)0.47859608, (float)0.47139674, (float)0.46416584, (float)0.45690388,
+  (float)0.44961133, (float)0.44228869, (float)0.43493645, (float)0.42755509, (float)0.42014512,
+  (float)0.41270703, (float)0.40524131, (float)0.39774847, (float)0.39022901, (float)0.38268343,
+  (float)0.37511224, (float)0.36751594, (float)0.35989504, (float)0.35225005, (float)0.34458148,
+  (float)0.33688985, (float)0.32917568, (float)0.32143947, (float)0.31368174, (float)0.30590302,
+  (float)0.29810383, (float)0.29028468, (float)0.28244610, (float)0.27458862, (float)0.26671276,
+  (float)0.25881905, (float)0.25090801, (float)0.24298018, (float)0.23503609, (float)0.22707626,
+  (float)0.21910124, (float)0.21111155, (float)0.20310773, (float)0.19509032, (float)0.18705985,
+  (float)0.17901686, (float)0.17096189, (float)0.16289547, (float)0.15481816, (float)0.14673047,
+  (float)0.13863297, (float)0.13052619, (float)0.12241068, (float)0.11428696, (float)0.10615561,
+  (float)0.09801714, (float)0.08987211, (float)0.08172107, (float)0.07356456, (float)0.06540313,
+  (float)0.05723732, (float)0.04906767, (float)0.04089475, (float)0.03271908, (float)0.02454123,
+  (float)0.01636173, (float)0.00818114
+};
+
+
+// Hanning window: for 15ms at 16kHz with symmetric zeros
+static const float kBlocks240w512[512] = {
+  (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+  (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+  (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+  (float)0.00000000, (float)0.00000000, (float)0.00654494, (float)0.01308960, (float)0.01963369,
+  (float)0.02617695, (float)0.03271908, (float)0.03925982, (float)0.04579887, (float)0.05233596,
+  (float)0.05887080, (float)0.06540313, (float)0.07193266, (float)0.07845910, (float)0.08498218,
+  (float)0.09150162, (float)0.09801714, (float)0.10452846, (float)0.11103531, (float)0.11753740,
+  (float)0.12403446, (float)0.13052620, (float)0.13701233, (float)0.14349262, (float)0.14996676,
+  (float)0.15643448, (float)0.16289547, (float)0.16934951, (float)0.17579629, (float)0.18223552,
+  (float)0.18866697, (float)0.19509032, (float)0.20150533, (float)0.20791170, (float)0.21430916,
+  (float)0.22069745, (float)0.22707628, (float)0.23344538, (float)0.23980446, (float)0.24615330,
+  (float)0.25249159, (float)0.25881904, (float)0.26513544, (float)0.27144045, (float)0.27773386,
+  (float)0.28401536, (float)0.29028466, (float)0.29654160, (float)0.30278578, (float)0.30901700,
+  (float)0.31523499, (float)0.32143945, (float)0.32763019, (float)0.33380687, (float)0.33996925,
+  (float)0.34611708, (float)0.35225007, (float)0.35836795, (float)0.36447051, (float)0.37055743,
+  (float)0.37662852, (float)0.38268346, (float)0.38872197, (float)0.39474389, (float)0.40074885,
+  (float)0.40673664, (float)0.41270703, (float)0.41865975, (float)0.42459452, (float)0.43051112,
+  (float)0.43640924, (float)0.44228873, (float)0.44814920, (float)0.45399052, (float)0.45981237,
+  (float)0.46561453, (float)0.47139674, (float)0.47715878, (float)0.48290035, (float)0.48862126,
+  (float)0.49432120, (float)0.50000000, (float)0.50565743, (float)0.51129311, (float)0.51690692,
+  (float)0.52249855, (float)0.52806789, (float)0.53361452, (float)0.53913832, (float)0.54463905,
+  (float)0.55011642, (float)0.55557024, (float)0.56100029, (float)0.56640625, (float)0.57178795,
+  (float)0.57714522, (float)0.58247769, (float)0.58778524, (float)0.59306765, (float)0.59832460,
+  (float)0.60355598, (float)0.60876143, (float)0.61394083, (float)0.61909395, (float)0.62422055,
+  (float)0.62932038, (float)0.63439333, (float)0.63943899, (float)0.64445734, (float)0.64944810,
+  (float)0.65441096, (float)0.65934587, (float)0.66425246, (float)0.66913062, (float)0.67398012,
+  (float)0.67880076, (float)0.68359232, (float)0.68835455, (float)0.69308740, (float)0.69779050,
+  (float)0.70246369, (float)0.70710677, (float)0.71171963, (float)0.71630198, (float)0.72085363,
+  (float)0.72537440, (float)0.72986406, (float)0.73432255, (float)0.73874950, (float)0.74314487,
+  (float)0.74750835, (float)0.75183982, (float)0.75613910, (float)0.76040596, (float)0.76464027,
+  (float)0.76884186, (float)0.77301043, (float)0.77714598, (float)0.78124821, (float)0.78531694,
+  (float)0.78935206, (float)0.79335338, (float)0.79732066, (float)0.80125386, (float)0.80515265,
+  (float)0.80901700, (float)0.81284672, (float)0.81664157, (float)0.82040149, (float)0.82412618,
+  (float)0.82781565, (float)0.83146966, (float)0.83508795, (float)0.83867061, (float)0.84221727,
+  (float)0.84572780, (float)0.84920216, (float)0.85264021, (float)0.85604161, (float)0.85940641,
+  (float)0.86273444, (float)0.86602545, (float)0.86927933, (float)0.87249607, (float)0.87567532,
+  (float)0.87881714, (float)0.88192129, (float)0.88498765, (float)0.88801610, (float)0.89100653,
+  (float)0.89395881, (float)0.89687276, (float)0.89974827, (float)0.90258533, (float)0.90538365,
+  (float)0.90814316, (float)0.91086388, (float)0.91354549, (float)0.91618794, (float)0.91879123,
+  (float)0.92135513, (float)0.92387950, (float)0.92636442, (float)0.92880958, (float)0.93121493,
+  (float)0.93358046, (float)0.93590593, (float)0.93819135, (float)0.94043654, (float)0.94264150,
+  (float)0.94480604, (float)0.94693011, (float)0.94901365, (float)0.95105654, (float)0.95305866,
+  (float)0.95501995, (float)0.95694035, (float)0.95881975, (float)0.96065807, (float)0.96245527,
+  (float)0.96421117, (float)0.96592581, (float)0.96759909, (float)0.96923089, (float)0.97082120,
+  (float)0.97236991, (float)0.97387701, (float)0.97534233, (float)0.97676587, (float)0.97814763,
+  (float)0.97948742, (float)0.98078531, (float)0.98204112, (float)0.98325491, (float)0.98442656,
+  (float)0.98555607, (float)0.98664331, (float)0.98768836, (float)0.98869103, (float)0.98965138,
+  (float)0.99056935, (float)0.99144489, (float)0.99227792, (float)0.99306846, (float)0.99381649,
+  (float)0.99452192, (float)0.99518472, (float)0.99580491, (float)0.99638247, (float)0.99691731,
+  (float)0.99740952, (float)0.99785894, (float)0.99826562, (float)0.99862951, (float)0.99895066,
+  (float)0.99922901, (float)0.99946457, (float)0.99965733, (float)0.99980724, (float)0.99991435,
+  (float)0.99997860, (float)1.00000000, (float)0.99997860, (float)0.99991435, (float)0.99980724,
+  (float)0.99965733, (float)0.99946457, (float)0.99922901, (float)0.99895066, (float)0.99862951,
+  (float)0.99826562, (float)0.99785894, (float)0.99740946, (float)0.99691731, (float)0.99638247,
+  (float)0.99580491, (float)0.99518472, (float)0.99452192, (float)0.99381644, (float)0.99306846,
+  (float)0.99227792, (float)0.99144489, (float)0.99056935, (float)0.98965138, (float)0.98869103,
+  (float)0.98768836, (float)0.98664331, (float)0.98555607, (float)0.98442656, (float)0.98325491,
+  (float)0.98204112, (float)0.98078525, (float)0.97948742, (float)0.97814757, (float)0.97676587,
+  (float)0.97534227, (float)0.97387695, (float)0.97236991, (float)0.97082120, (float)0.96923089,
+  (float)0.96759909, (float)0.96592581, (float)0.96421117, (float)0.96245521, (float)0.96065807,
+  (float)0.95881969, (float)0.95694029, (float)0.95501995, (float)0.95305860, (float)0.95105648,
+  (float)0.94901365, (float)0.94693011, (float)0.94480604, (float)0.94264150, (float)0.94043654,
+  (float)0.93819129, (float)0.93590593, (float)0.93358046, (float)0.93121493, (float)0.92880952,
+  (float)0.92636436, (float)0.92387950, (float)0.92135507, (float)0.91879123, (float)0.91618794,
+  (float)0.91354543, (float)0.91086382, (float)0.90814310, (float)0.90538365, (float)0.90258527,
+  (float)0.89974827, (float)0.89687276, (float)0.89395875, (float)0.89100647, (float)0.88801610,
+  (float)0.88498759, (float)0.88192123, (float)0.87881714, (float)0.87567532, (float)0.87249595,
+  (float)0.86927933, (float)0.86602539, (float)0.86273432, (float)0.85940641, (float)0.85604161,
+  (float)0.85264009, (float)0.84920216, (float)0.84572780, (float)0.84221715, (float)0.83867055,
+  (float)0.83508795, (float)0.83146954, (float)0.82781565, (float)0.82412612, (float)0.82040137,
+  (float)0.81664157, (float)0.81284660, (float)0.80901700, (float)0.80515265, (float)0.80125374,
+  (float)0.79732066, (float)0.79335332, (float)0.78935200, (float)0.78531694, (float)0.78124815,
+  (float)0.77714586, (float)0.77301049, (float)0.76884180, (float)0.76464021, (float)0.76040596,
+  (float)0.75613904, (float)0.75183970, (float)0.74750835, (float)0.74314481, (float)0.73874938,
+  (float)0.73432249, (float)0.72986400, (float)0.72537428, (float)0.72085363, (float)0.71630186,
+  (float)0.71171951, (float)0.70710677, (float)0.70246363, (float)0.69779032, (float)0.69308734,
+  (float)0.68835449, (float)0.68359220, (float)0.67880070, (float)0.67398006, (float)0.66913044,
+  (float)0.66425240, (float)0.65934575, (float)0.65441096, (float)0.64944804, (float)0.64445722,
+  (float)0.63943905, (float)0.63439327, (float)0.62932026, (float)0.62422055, (float)0.61909389,
+  (float)0.61394072, (float)0.60876143, (float)0.60355592, (float)0.59832448, (float)0.59306765,
+  (float)0.58778518, (float)0.58247757, (float)0.57714522, (float)0.57178789, (float)0.56640613,
+  (float)0.56100023, (float)0.55557019, (float)0.55011630, (float)0.54463905, (float)0.53913826,
+  (float)0.53361434, (float)0.52806783, (float)0.52249849, (float)0.51690674, (float)0.51129305,
+  (float)0.50565726, (float)0.50000006, (float)0.49432117, (float)0.48862115, (float)0.48290038,
+  (float)0.47715873, (float)0.47139663, (float)0.46561456, (float)0.45981231, (float)0.45399037,
+  (float)0.44814920, (float)0.44228864, (float)0.43640912, (float)0.43051112, (float)0.42459446,
+  (float)0.41865960, (float)0.41270703, (float)0.40673658, (float)0.40074870, (float)0.39474386,
+  (float)0.38872188, (float)0.38268328, (float)0.37662849, (float)0.37055734, (float)0.36447033,
+  (float)0.35836792, (float)0.35224995, (float)0.34611690, (float)0.33996922, (float)0.33380675,
+  (float)0.32763001, (float)0.32143945, (float)0.31523487, (float)0.30901679, (float)0.30278572,
+  (float)0.29654145, (float)0.29028472, (float)0.28401530, (float)0.27773371, (float)0.27144048,
+  (float)0.26513538, (float)0.25881892, (float)0.25249159, (float)0.24615324, (float)0.23980433,
+  (float)0.23344538, (float)0.22707619, (float)0.22069728, (float)0.21430916, (float)0.20791161,
+  (float)0.20150517, (float)0.19509031, (float)0.18866688, (float)0.18223536, (float)0.17579627,
+  (float)0.16934940, (float)0.16289529, (float)0.15643445, (float)0.14996666, (float)0.14349243,
+  (float)0.13701232, (float)0.13052608, (float)0.12403426, (float)0.11753736, (float)0.11103519,
+  (float)0.10452849, (float)0.09801710, (float)0.09150149, (float)0.08498220, (float)0.07845904,
+  (float)0.07193252, (float)0.06540315, (float)0.05887074, (float)0.05233581, (float)0.04579888,
+  (float)0.03925974, (float)0.03271893, (float)0.02617695, (float)0.01963361, (float)0.01308943,
+  (float)0.00654493, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+  (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+  (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+  (float)0.00000000, (float)0.00000000
+};
+
+
+// Hanning window: for 30ms with 1024 fft with symmetric zeros at 16kHz
+static const float kBlocks480w1024[1024] = {
+  (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+  (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+  (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+  (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+  (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+  (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+  (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00327249, (float)0.00654494,
+  (float)0.00981732, (float)0.01308960, (float)0.01636173, (float)0.01963369, (float)0.02290544,
+  (float)0.02617695, (float)0.02944817, (float)0.03271908, (float)0.03598964, (float)0.03925982,
+  (float)0.04252957, (float)0.04579887, (float)0.04906768, (float)0.05233596, (float)0.05560368,
+  (float)0.05887080, (float)0.06213730, (float)0.06540313, (float)0.06866825, (float)0.07193266,
+  (float)0.07519628, (float)0.07845910, (float)0.08172107, (float)0.08498218, (float)0.08824237,
+  (float)0.09150162, (float)0.09475989, (float)0.09801714, (float)0.10127335, (float)0.10452846,
+  (float)0.10778246, (float)0.11103531, (float)0.11428697, (float)0.11753740, (float)0.12078657,
+  (float)0.12403446, (float)0.12728101, (float)0.13052620, (float)0.13376999, (float)0.13701233,
+  (float)0.14025325, (float)0.14349262, (float)0.14673047, (float)0.14996676, (float)0.15320145,
+  (float)0.15643448, (float)0.15966582, (float)0.16289547, (float)0.16612339, (float)0.16934951,
+  (float)0.17257382, (float)0.17579629, (float)0.17901687, (float)0.18223552, (float)0.18545224,
+  (float)0.18866697, (float)0.19187967, (float)0.19509032, (float)0.19829889, (float)0.20150533,
+  (float)0.20470962, (float)0.20791170, (float)0.21111156, (float)0.21430916, (float)0.21750447,
+  (float)0.22069745, (float)0.22388805, (float)0.22707628, (float)0.23026206, (float)0.23344538,
+  (float)0.23662618, (float)0.23980446, (float)0.24298020, (float)0.24615330, (float)0.24932377,
+  (float)0.25249159, (float)0.25565669, (float)0.25881904, (float)0.26197866, (float)0.26513544,
+  (float)0.26828939, (float)0.27144045, (float)0.27458861, (float)0.27773386, (float)0.28087610,
+  (float)0.28401536, (float)0.28715158, (float)0.29028466, (float)0.29341471, (float)0.29654160,
+  (float)0.29966527, (float)0.30278578, (float)0.30590302, (float)0.30901700, (float)0.31212768,
+  (float)0.31523499, (float)0.31833893, (float)0.32143945, (float)0.32453656, (float)0.32763019,
+  (float)0.33072028, (float)0.33380687, (float)0.33688986, (float)0.33996925, (float)0.34304500,
+  (float)0.34611708, (float)0.34918544, (float)0.35225007, (float)0.35531089, (float)0.35836795,
+  (float)0.36142117, (float)0.36447051, (float)0.36751595, (float)0.37055743, (float)0.37359497,
+  (float)0.37662852, (float)0.37965801, (float)0.38268346, (float)0.38570479, (float)0.38872197,
+  (float)0.39173502, (float)0.39474389, (float)0.39774847, (float)0.40074885, (float)0.40374491,
+  (float)0.40673664, (float)0.40972406, (float)0.41270703, (float)0.41568562, (float)0.41865975,
+  (float)0.42162940, (float)0.42459452, (float)0.42755508, (float)0.43051112, (float)0.43346250,
+  (float)0.43640924, (float)0.43935132, (float)0.44228873, (float)0.44522133, (float)0.44814920,
+  (float)0.45107228, (float)0.45399052, (float)0.45690390, (float)0.45981237, (float)0.46271592,
+  (float)0.46561453, (float)0.46850815, (float)0.47139674, (float)0.47428030, (float)0.47715878,
+  (float)0.48003215, (float)0.48290035, (float)0.48576337, (float)0.48862126, (float)0.49147385,
+  (float)0.49432120, (float)0.49716330, (float)0.50000000, (float)0.50283140, (float)0.50565743,
+  (float)0.50847799, (float)0.51129311, (float)0.51410276, (float)0.51690692, (float)0.51970553,
+  (float)0.52249855, (float)0.52528602, (float)0.52806789, (float)0.53084403, (float)0.53361452,
+  (float)0.53637928, (float)0.53913832, (float)0.54189163, (float)0.54463905, (float)0.54738063,
+  (float)0.55011642, (float)0.55284631, (float)0.55557024, (float)0.55828828, (float)0.56100029,
+  (float)0.56370628, (float)0.56640625, (float)0.56910014, (float)0.57178795, (float)0.57446963,
+  (float)0.57714522, (float)0.57981455, (float)0.58247769, (float)0.58513463, (float)0.58778524,
+  (float)0.59042960, (float)0.59306765, (float)0.59569931, (float)0.59832460, (float)0.60094351,
+  (float)0.60355598, (float)0.60616195, (float)0.60876143, (float)0.61135441, (float)0.61394083,
+  (float)0.61652070, (float)0.61909395, (float)0.62166059, (float)0.62422055, (float)0.62677383,
+  (float)0.62932038, (float)0.63186020, (float)0.63439333, (float)0.63691956, (float)0.63943899,
+  (float)0.64195162, (float)0.64445734, (float)0.64695615, (float)0.64944810, (float)0.65193301,
+  (float)0.65441096, (float)0.65688187, (float)0.65934587, (float)0.66180271, (float)0.66425246,
+  (float)0.66669512, (float)0.66913062, (float)0.67155898, (float)0.67398012, (float)0.67639405,
+  (float)0.67880076, (float)0.68120021, (float)0.68359232, (float)0.68597710, (float)0.68835455,
+  (float)0.69072467, (float)0.69308740, (float)0.69544262, (float)0.69779050, (float)0.70013082,
+  (float)0.70246369, (float)0.70478904, (float)0.70710677, (float)0.70941699, (float)0.71171963,
+  (float)0.71401459, (float)0.71630198, (float)0.71858168, (float)0.72085363, (float)0.72311789,
+  (float)0.72537440, (float)0.72762316, (float)0.72986406, (float)0.73209721, (float)0.73432255,
+  (float)0.73653996, (float)0.73874950, (float)0.74095118, (float)0.74314487, (float)0.74533057,
+  (float)0.74750835, (float)0.74967808, (float)0.75183982, (float)0.75399351, (float)0.75613910,
+  (float)0.75827658, (float)0.76040596, (float)0.76252723, (float)0.76464027, (float)0.76674515,
+  (float)0.76884186, (float)0.77093029, (float)0.77301043, (float)0.77508241, (float)0.77714598,
+  (float)0.77920127, (float)0.78124821, (float)0.78328675, (float)0.78531694, (float)0.78733873,
+  (float)0.78935206, (float)0.79135692, (float)0.79335338, (float)0.79534125, (float)0.79732066,
+  (float)0.79929149, (float)0.80125386, (float)0.80320752, (float)0.80515265, (float)0.80708915,
+  (float)0.80901700, (float)0.81093621, (float)0.81284672, (float)0.81474853, (float)0.81664157,
+  (float)0.81852591, (float)0.82040149, (float)0.82226825, (float)0.82412618, (float)0.82597536,
+  (float)0.82781565, (float)0.82964706, (float)0.83146966, (float)0.83328325, (float)0.83508795,
+  (float)0.83688378, (float)0.83867061, (float)0.84044838, (float)0.84221727, (float)0.84397703,
+  (float)0.84572780, (float)0.84746957, (float)0.84920216, (float)0.85092574, (float)0.85264021,
+  (float)0.85434544, (float)0.85604161, (float)0.85772866, (float)0.85940641, (float)0.86107504,
+  (float)0.86273444, (float)0.86438453, (float)0.86602545, (float)0.86765707, (float)0.86927933,
+  (float)0.87089235, (float)0.87249607, (float)0.87409031, (float)0.87567532, (float)0.87725097,
+  (float)0.87881714, (float)0.88037390, (float)0.88192129, (float)0.88345921, (float)0.88498765,
+  (float)0.88650668, (float)0.88801610, (float)0.88951612, (float)0.89100653, (float)0.89248741,
+  (float)0.89395881, (float)0.89542055, (float)0.89687276, (float)0.89831537, (float)0.89974827,
+  (float)0.90117162, (float)0.90258533, (float)0.90398932, (float)0.90538365, (float)0.90676826,
+  (float)0.90814316, (float)0.90950841, (float)0.91086388, (float)0.91220951, (float)0.91354549,
+  (float)0.91487163, (float)0.91618794, (float)0.91749454, (float)0.91879123, (float)0.92007810,
+  (float)0.92135513, (float)0.92262226, (float)0.92387950, (float)0.92512691, (float)0.92636442,
+  (float)0.92759192, (float)0.92880958, (float)0.93001723, (float)0.93121493, (float)0.93240267,
+  (float)0.93358046, (float)0.93474817, (float)0.93590593, (float)0.93705362, (float)0.93819135,
+  (float)0.93931901, (float)0.94043654, (float)0.94154406, (float)0.94264150, (float)0.94372880,
+  (float)0.94480604, (float)0.94587320, (float)0.94693011, (float)0.94797695, (float)0.94901365,
+  (float)0.95004016, (float)0.95105654, (float)0.95206273, (float)0.95305866, (float)0.95404440,
+  (float)0.95501995, (float)0.95598525, (float)0.95694035, (float)0.95788521, (float)0.95881975,
+  (float)0.95974404, (float)0.96065807, (float)0.96156180, (float)0.96245527, (float)0.96333838,
+  (float)0.96421117, (float)0.96507370, (float)0.96592581, (float)0.96676767, (float)0.96759909,
+  (float)0.96842021, (float)0.96923089, (float)0.97003126, (float)0.97082120, (float)0.97160077,
+  (float)0.97236991, (float)0.97312868, (float)0.97387701, (float)0.97461486, (float)0.97534233,
+  (float)0.97605932, (float)0.97676587, (float)0.97746199, (float)0.97814763, (float)0.97882277,
+  (float)0.97948742, (float)0.98014158, (float)0.98078531, (float)0.98141843, (float)0.98204112,
+  (float)0.98265332, (float)0.98325491, (float)0.98384601, (float)0.98442656, (float)0.98499662,
+  (float)0.98555607, (float)0.98610497, (float)0.98664331, (float)0.98717111, (float)0.98768836,
+  (float)0.98819500, (float)0.98869103, (float)0.98917651, (float)0.98965138, (float)0.99011570,
+  (float)0.99056935, (float)0.99101239, (float)0.99144489, (float)0.99186671, (float)0.99227792,
+  (float)0.99267852, (float)0.99306846, (float)0.99344778, (float)0.99381649, (float)0.99417448,
+  (float)0.99452192, (float)0.99485862, (float)0.99518472, (float)0.99550015, (float)0.99580491,
+  (float)0.99609905, (float)0.99638247, (float)0.99665523, (float)0.99691731, (float)0.99716878,
+  (float)0.99740952, (float)0.99763954, (float)0.99785894, (float)0.99806762, (float)0.99826562,
+  (float)0.99845290, (float)0.99862951, (float)0.99879545, (float)0.99895066, (float)0.99909520,
+  (float)0.99922901, (float)0.99935216, (float)0.99946457, (float)0.99956632, (float)0.99965733,
+  (float)0.99973762, (float)0.99980724, (float)0.99986613, (float)0.99991435, (float)0.99995178,
+  (float)0.99997860, (float)0.99999464, (float)1.00000000, (float)0.99999464, (float)0.99997860,
+  (float)0.99995178, (float)0.99991435, (float)0.99986613, (float)0.99980724, (float)0.99973762,
+  (float)0.99965733, (float)0.99956632, (float)0.99946457, (float)0.99935216, (float)0.99922901,
+  (float)0.99909520, (float)0.99895066, (float)0.99879545, (float)0.99862951, (float)0.99845290,
+  (float)0.99826562, (float)0.99806762, (float)0.99785894, (float)0.99763954, (float)0.99740946,
+  (float)0.99716872, (float)0.99691731, (float)0.99665523, (float)0.99638247, (float)0.99609905,
+  (float)0.99580491, (float)0.99550015, (float)0.99518472, (float)0.99485862, (float)0.99452192,
+  (float)0.99417448, (float)0.99381644, (float)0.99344778, (float)0.99306846, (float)0.99267852,
+  (float)0.99227792, (float)0.99186671, (float)0.99144489, (float)0.99101239, (float)0.99056935,
+  (float)0.99011564, (float)0.98965138, (float)0.98917651, (float)0.98869103, (float)0.98819494,
+  (float)0.98768836, (float)0.98717111, (float)0.98664331, (float)0.98610497, (float)0.98555607,
+  (float)0.98499656, (float)0.98442656, (float)0.98384601, (float)0.98325491, (float)0.98265326,
+  (float)0.98204112, (float)0.98141843, (float)0.98078525, (float)0.98014158, (float)0.97948742,
+  (float)0.97882277, (float)0.97814757, (float)0.97746193, (float)0.97676587, (float)0.97605932,
+  (float)0.97534227, (float)0.97461486, (float)0.97387695, (float)0.97312862, (float)0.97236991,
+  (float)0.97160077, (float)0.97082120, (float)0.97003126, (float)0.96923089, (float)0.96842015,
+  (float)0.96759909, (float)0.96676761, (float)0.96592581, (float)0.96507365, (float)0.96421117,
+  (float)0.96333838, (float)0.96245521, (float)0.96156180, (float)0.96065807, (float)0.95974404,
+  (float)0.95881969, (float)0.95788515, (float)0.95694029, (float)0.95598525, (float)0.95501995,
+  (float)0.95404440, (float)0.95305860, (float)0.95206267, (float)0.95105648, (float)0.95004016,
+  (float)0.94901365, (float)0.94797695, (float)0.94693011, (float)0.94587314, (float)0.94480604,
+  (float)0.94372880, (float)0.94264150, (float)0.94154406, (float)0.94043654, (float)0.93931895,
+  (float)0.93819129, (float)0.93705362, (float)0.93590593, (float)0.93474817, (float)0.93358046,
+  (float)0.93240267, (float)0.93121493, (float)0.93001723, (float)0.92880952, (float)0.92759192,
+  (float)0.92636436, (float)0.92512691, (float)0.92387950, (float)0.92262226, (float)0.92135507,
+  (float)0.92007804, (float)0.91879123, (float)0.91749448, (float)0.91618794, (float)0.91487157,
+  (float)0.91354543, (float)0.91220951, (float)0.91086382, (float)0.90950835, (float)0.90814310,
+  (float)0.90676820, (float)0.90538365, (float)0.90398932, (float)0.90258527, (float)0.90117157,
+  (float)0.89974827, (float)0.89831525, (float)0.89687276, (float)0.89542055, (float)0.89395875,
+  (float)0.89248741, (float)0.89100647, (float)0.88951600, (float)0.88801610, (float)0.88650662,
+  (float)0.88498759, (float)0.88345915, (float)0.88192123, (float)0.88037384, (float)0.87881714,
+  (float)0.87725091, (float)0.87567532, (float)0.87409031, (float)0.87249595, (float)0.87089223,
+  (float)0.86927933, (float)0.86765701, (float)0.86602539, (float)0.86438447, (float)0.86273432,
+  (float)0.86107504, (float)0.85940641, (float)0.85772860, (float)0.85604161, (float)0.85434544,
+  (float)0.85264009, (float)0.85092574, (float)0.84920216, (float)0.84746951, (float)0.84572780,
+  (float)0.84397697, (float)0.84221715, (float)0.84044844, (float)0.83867055, (float)0.83688372,
+  (float)0.83508795, (float)0.83328319, (float)0.83146954, (float)0.82964706, (float)0.82781565,
+  (float)0.82597530, (float)0.82412612, (float)0.82226813, (float)0.82040137, (float)0.81852591,
+  (float)0.81664157, (float)0.81474847, (float)0.81284660, (float)0.81093609, (float)0.80901700,
+  (float)0.80708915, (float)0.80515265, (float)0.80320752, (float)0.80125374, (float)0.79929143,
+  (float)0.79732066, (float)0.79534125, (float)0.79335332, (float)0.79135686, (float)0.78935200,
+  (float)0.78733861, (float)0.78531694, (float)0.78328675, (float)0.78124815, (float)0.77920121,
+  (float)0.77714586, (float)0.77508223, (float)0.77301049, (float)0.77093029, (float)0.76884180,
+  (float)0.76674509, (float)0.76464021, (float)0.76252711, (float)0.76040596, (float)0.75827658,
+  (float)0.75613904, (float)0.75399339, (float)0.75183970, (float)0.74967796, (float)0.74750835,
+  (float)0.74533057, (float)0.74314481, (float)0.74095106, (float)0.73874938, (float)0.73653996,
+  (float)0.73432249, (float)0.73209721, (float)0.72986400, (float)0.72762305, (float)0.72537428,
+  (float)0.72311789, (float)0.72085363, (float)0.71858162, (float)0.71630186, (float)0.71401453,
+  (float)0.71171951, (float)0.70941705, (float)0.70710677, (float)0.70478898, (float)0.70246363,
+  (float)0.70013070, (float)0.69779032, (float)0.69544268, (float)0.69308734, (float)0.69072461,
+  (float)0.68835449, (float)0.68597704, (float)0.68359220, (float)0.68120021, (float)0.67880070,
+  (float)0.67639399, (float)0.67398006, (float)0.67155886, (float)0.66913044, (float)0.66669512,
+  (float)0.66425240, (float)0.66180259, (float)0.65934575, (float)0.65688181, (float)0.65441096,
+  (float)0.65193301, (float)0.64944804, (float)0.64695609, (float)0.64445722, (float)0.64195150,
+  (float)0.63943905, (float)0.63691956, (float)0.63439327, (float)0.63186014, (float)0.62932026,
+  (float)0.62677372, (float)0.62422055, (float)0.62166059, (float)0.61909389, (float)0.61652064,
+  (float)0.61394072, (float)0.61135429, (float)0.60876143, (float)0.60616189, (float)0.60355592,
+  (float)0.60094339, (float)0.59832448, (float)0.59569913, (float)0.59306765, (float)0.59042960,
+  (float)0.58778518, (float)0.58513451, (float)0.58247757, (float)0.57981461, (float)0.57714522,
+  (float)0.57446963, (float)0.57178789, (float)0.56910002, (float)0.56640613, (float)0.56370628,
+  (float)0.56100023, (float)0.55828822, (float)0.55557019, (float)0.55284619, (float)0.55011630,
+  (float)0.54738069, (float)0.54463905, (float)0.54189152, (float)0.53913826, (float)0.53637916,
+  (float)0.53361434, (float)0.53084403, (float)0.52806783, (float)0.52528596, (float)0.52249849,
+  (float)0.51970541, (float)0.51690674, (float)0.51410276, (float)0.51129305, (float)0.50847787,
+  (float)0.50565726, (float)0.50283122, (float)0.50000006, (float)0.49716327, (float)0.49432117,
+  (float)0.49147379, (float)0.48862115, (float)0.48576325, (float)0.48290038, (float)0.48003212,
+  (float)0.47715873, (float)0.47428021, (float)0.47139663, (float)0.46850798, (float)0.46561456,
+  (float)0.46271589, (float)0.45981231, (float)0.45690379, (float)0.45399037, (float)0.45107210,
+  (float)0.44814920, (float)0.44522130, (float)0.44228864, (float)0.43935123, (float)0.43640912,
+  (float)0.43346232, (float)0.43051112, (float)0.42755505, (float)0.42459446, (float)0.42162928,
+  (float)0.41865960, (float)0.41568545, (float)0.41270703, (float)0.40972400, (float)0.40673658,
+  (float)0.40374479, (float)0.40074870, (float)0.39774850, (float)0.39474386, (float)0.39173496,
+  (float)0.38872188, (float)0.38570464, (float)0.38268328, (float)0.37965804, (float)0.37662849,
+  (float)0.37359491, (float)0.37055734, (float)0.36751580, (float)0.36447033, (float)0.36142117,
+  (float)0.35836792, (float)0.35531086, (float)0.35224995, (float)0.34918529, (float)0.34611690,
+  (float)0.34304500, (float)0.33996922, (float)0.33688980, (float)0.33380675, (float)0.33072016,
+  (float)0.32763001, (float)0.32453656, (float)0.32143945, (float)0.31833887, (float)0.31523487,
+  (float)0.31212750, (float)0.30901679, (float)0.30590302, (float)0.30278572, (float)0.29966521,
+  (float)0.29654145, (float)0.29341453, (float)0.29028472, (float)0.28715155, (float)0.28401530,
+  (float)0.28087601, (float)0.27773371, (float)0.27458847, (float)0.27144048, (float)0.26828936,
+  (float)0.26513538, (float)0.26197854, (float)0.25881892, (float)0.25565651, (float)0.25249159,
+  (float)0.24932374, (float)0.24615324, (float)0.24298008, (float)0.23980433, (float)0.23662600,
+  (float)0.23344538, (float)0.23026201, (float)0.22707619, (float)0.22388794, (float)0.22069728,
+  (float)0.21750426, (float)0.21430916, (float)0.21111152, (float)0.20791161, (float)0.20470949,
+  (float)0.20150517, (float)0.19829892, (float)0.19509031, (float)0.19187963, (float)0.18866688,
+  (float)0.18545210, (float)0.18223536, (float)0.17901689, (float)0.17579627, (float)0.17257376,
+  (float)0.16934940, (float)0.16612324, (float)0.16289529, (float)0.15966584, (float)0.15643445,
+  (float)0.15320137, (float)0.14996666, (float)0.14673033, (float)0.14349243, (float)0.14025325,
+  (float)0.13701232, (float)0.13376991, (float)0.13052608, (float)0.12728085, (float)0.12403426,
+  (float)0.12078657, (float)0.11753736, (float)0.11428688, (float)0.11103519, (float)0.10778230,
+  (float)0.10452849, (float)0.10127334, (float)0.09801710, (float)0.09475980, (float)0.09150149,
+  (float)0.08824220, (float)0.08498220, (float)0.08172106, (float)0.07845904, (float)0.07519618,
+  (float)0.07193252, (float)0.06866808, (float)0.06540315, (float)0.06213728, (float)0.05887074,
+  (float)0.05560357, (float)0.05233581, (float)0.04906749, (float)0.04579888, (float)0.04252954,
+  (float)0.03925974, (float)0.03598953, (float)0.03271893, (float)0.02944798, (float)0.02617695,
+  (float)0.02290541, (float)0.01963361, (float)0.01636161, (float)0.01308943, (float)0.00981712,
+  (float)0.00654493, (float)0.00327244, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+  (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+  (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+  (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+  (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+  (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000,
+  (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000
+};
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_WINDOWS_PRIVATE_H_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator.c b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator.c
new file mode 100644
index 00000000..f9f3dc24
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator.c
@@ -0,0 +1,684 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/utility/delay_estimator.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+// Number of right shifts for scaling is linearly depending on number of bits in
+// the far-end binary spectrum.
+static const int kShiftsAtZero = 13;  // Right shifts at zero binary spectrum.
+static const int kShiftsLinearSlope = 3;
+
+static const int32_t kProbabilityOffset = 1024;  // 2 in Q9.
+static const int32_t kProbabilityLowerLimit = 8704;  // 17 in Q9.
+static const int32_t kProbabilityMinSpread = 2816;  // 5.5 in Q9.
+
+// Robust validation settings
+static const float kHistogramMax = 3000.f;
+static const float kLastHistogramMax = 250.f;
+static const float kMinHistogramThreshold = 1.5f;
+static const int kMinRequiredHits = 10;
+static const int kMaxHitsWhenPossiblyNonCausal = 10;
+static const int kMaxHitsWhenPossiblyCausal = 1000;
+static const float kQ14Scaling = 1.f / (1 << 14);  // Scaling by 2^14 to get Q0.
+static const float kFractionSlope = 0.05f;
+static const float kMinFractionWhenPossiblyCausal = 0.5f;
+static const float kMinFractionWhenPossiblyNonCausal = 0.25f;
+
+// Counts and returns number of bits of a 32-bit word.
+static int BitCount(uint32_t u32) {
+  uint32_t tmp = u32 - ((u32 >> 1) & 033333333333) -
+      ((u32 >> 2) & 011111111111);
+  tmp = ((tmp + (tmp >> 3)) & 030707070707);
+  tmp = (tmp + (tmp >> 6));
+  tmp = (tmp + (tmp >> 12) + (tmp >> 24)) & 077;
+
+  return ((int) tmp);
+}
+
+// Compares the |binary_vector| with all rows of the |binary_matrix| and counts
+// per row the number of times they have the same value.
+//
+// Inputs:
+//      - binary_vector     : binary "vector" stored in a long
+//      - binary_matrix     : binary "matrix" stored as a vector of long
+//      - matrix_size       : size of binary "matrix"
+//
+// Output:
+//      - bit_counts        : "Vector" stored as a long, containing for each
+//                            row the number of times the matrix row and the
+//                            input vector have the same value
+//
+static void BitCountComparison(uint32_t binary_vector,
+                               const uint32_t* binary_matrix,
+                               int matrix_size,
+                               int32_t* bit_counts) {
+  int n = 0;
+
+  // Compare |binary_vector| with all rows of the |binary_matrix|
+  for (; n < matrix_size; n++) {
+    bit_counts[n] = (int32_t) BitCount(binary_vector ^ binary_matrix[n]);
+  }
+}
+
+// Collects necessary statistics for the HistogramBasedValidation().  This
+// function has to be called prior to calling HistogramBasedValidation().  The
+// statistics updated and used by the HistogramBasedValidation() are:
+//  1. the number of |candidate_hits|, which states for how long we have had the
+//     same |candidate_delay|
+//  2. the |histogram| of candidate delays over time.  This histogram is
+//     weighted with respect to a reliability measure and time-varying to cope
+//     with possible delay shifts.
+// For further description see commented code.
+//
+// Inputs:
+//  - candidate_delay   : The delay to validate.
+//  - valley_depth_q14  : The cost function has a valley/minimum at the
+//                        |candidate_delay| location.  |valley_depth_q14| is the
+//                        cost function difference between the minimum and
+//                        maximum locations.  The value is in the Q14 domain.
+//  - valley_level_q14  : Is the cost function value at the minimum, in Q14.
+static void UpdateRobustValidationStatistics(BinaryDelayEstimator* self,
+                                             int candidate_delay,
+                                             int32_t valley_depth_q14,
+                                             int32_t valley_level_q14) {
+  const float valley_depth = valley_depth_q14 * kQ14Scaling;
+  float decrease_in_last_set = valley_depth;
+  const int max_hits_for_slow_change = (candidate_delay < self->last_delay) ?
+      kMaxHitsWhenPossiblyNonCausal : kMaxHitsWhenPossiblyCausal;
+  int i = 0;
+
+  assert(self->history_size == self->farend->history_size);
+  // Reset |candidate_hits| if we have a new candidate.
+  if (candidate_delay != self->last_candidate_delay) {
+    self->candidate_hits = 0;
+    self->last_candidate_delay = candidate_delay;
+  }
+  self->candidate_hits++;
+
+  // The |histogram| is updated differently across the bins.
+  // 1. The |candidate_delay| histogram bin is increased with the
+  //    |valley_depth|, which is a simple measure of how reliable the
+  //    |candidate_delay| is.  The histogram is not increased above
+  //    |kHistogramMax|.
+  self->histogram[candidate_delay] += valley_depth;
+  if (self->histogram[candidate_delay] > kHistogramMax) {
+    self->histogram[candidate_delay] = kHistogramMax;
+  }
+  // 2. The histogram bins in the neighborhood of |candidate_delay| are
+  //    unaffected.  The neighborhood is defined as x + {-2, -1, 0, 1}.
+  // 3. The histogram bins in the neighborhood of |last_delay| are decreased
+  //    with |decrease_in_last_set|.  This value equals the difference between
+  //    the cost function values at the locations |candidate_delay| and
+  //    |last_delay| until we reach |max_hits_for_slow_change| consecutive hits
+  //    at the |candidate_delay|.  If we exceed this amount of hits the
+  //    |candidate_delay| is a "potential" candidate and we start decreasing
+  //    these histogram bins more rapidly with |valley_depth|.
+  if (self->candidate_hits < max_hits_for_slow_change) {
+    decrease_in_last_set = (self->mean_bit_counts[self->compare_delay] -
+        valley_level_q14) * kQ14Scaling;
+  }
+  // 4. All other bins are decreased with |valley_depth|.
+  // TODO(bjornv): Investigate how to make this loop more efficient.  Split up
+  // the loop?  Remove parts that doesn't add too much.
+  for (i = 0; i < self->history_size; ++i) {
+    int is_in_last_set = (i >= self->last_delay - 2) &&
+        (i <= self->last_delay + 1) && (i != candidate_delay);
+    int is_in_candidate_set = (i >= candidate_delay - 2) &&
+        (i <= candidate_delay + 1);
+    self->histogram[i] -= decrease_in_last_set * is_in_last_set +
+        valley_depth * (!is_in_last_set && !is_in_candidate_set);
+    // 5. No histogram bin can go below 0.
+    if (self->histogram[i] < 0) {
+      self->histogram[i] = 0;
+    }
+  }
+}
+
+// Validates the |candidate_delay|, estimated in WebRtc_ProcessBinarySpectrum(),
+// based on a mix of counting concurring hits with a modified histogram
+// of recent delay estimates.  In brief a candidate is valid (returns 1) if it
+// is the most likely according to the histogram.  There are a couple of
+// exceptions that are worth mentioning:
+//  1. If the |candidate_delay| < |last_delay| it can be that we are in a
+//     non-causal state, breaking a possible echo control algorithm.  Hence, we
+//     open up for a quicker change by allowing the change even if the
+//     |candidate_delay| is not the most likely one according to the histogram.
+//  2. There's a minimum number of hits (kMinRequiredHits) and the histogram
+//     value has to reached a minimum (kMinHistogramThreshold) to be valid.
+//  3. The action is also depending on the filter length used for echo control.
+//     If the delay difference is larger than what the filter can capture, we
+//     also move quicker towards a change.
+// For further description see commented code.
+//
+// Input:
+//  - candidate_delay     : The delay to validate.
+//
+// Return value:
+//  - is_histogram_valid  : 1 - The |candidate_delay| is valid.
+//                          0 - Otherwise.
+static int HistogramBasedValidation(const BinaryDelayEstimator* self,
+                                    int candidate_delay) {
+  float fraction = 1.f;
+  float histogram_threshold = self->histogram[self->compare_delay];
+  const int delay_difference = candidate_delay - self->last_delay;
+  int is_histogram_valid = 0;
+
+  // The histogram based validation of |candidate_delay| is done by comparing
+  // the |histogram| at bin |candidate_delay| with a |histogram_threshold|.
+  // This |histogram_threshold| equals a |fraction| of the |histogram| at bin
+  // |last_delay|.  The |fraction| is a piecewise linear function of the
+  // |delay_difference| between the |candidate_delay| and the |last_delay|
+  // allowing for a quicker move if
+  //  i) a potential echo control filter can not handle these large differences.
+  // ii) keeping |last_delay| instead of updating to |candidate_delay| could
+  //     force an echo control into a non-causal state.
+  // We further require the histogram to have reached a minimum value of
+  // |kMinHistogramThreshold|.  In addition, we also require the number of
+  // |candidate_hits| to be more than |kMinRequiredHits| to remove spurious
+  // values.
+
+  // Calculate a comparison histogram value (|histogram_threshold|) that is
+  // depending on the distance between the |candidate_delay| and |last_delay|.
+  // TODO(bjornv): How much can we gain by turning the fraction calculation
+  // into tables?
+  if (delay_difference > self->allowed_offset) {
+    fraction = 1.f - kFractionSlope * (delay_difference - self->allowed_offset);
+    fraction = (fraction > kMinFractionWhenPossiblyCausal ? fraction :
+        kMinFractionWhenPossiblyCausal);
+  } else if (delay_difference < 0) {
+    fraction = kMinFractionWhenPossiblyNonCausal -
+        kFractionSlope * delay_difference;
+    fraction = (fraction > 1.f ? 1.f : fraction);
+  }
+  histogram_threshold *= fraction;
+  histogram_threshold = (histogram_threshold > kMinHistogramThreshold ?
+      histogram_threshold : kMinHistogramThreshold);
+
+  is_histogram_valid =
+      (self->histogram[candidate_delay] >= histogram_threshold) &&
+      (self->candidate_hits > kMinRequiredHits);
+
+  return is_histogram_valid;
+}
+
+// Performs a robust validation of the |candidate_delay| estimated in
+// WebRtc_ProcessBinarySpectrum().  The algorithm takes the
+// |is_instantaneous_valid| and the |is_histogram_valid| and combines them
+// into a robust validation.  The HistogramBasedValidation() has to be called
+// prior to this call.
+// For further description on how the combination is done, see commented code.
+//
+// Inputs:
+//  - candidate_delay         : The delay to validate.
+//  - is_instantaneous_valid  : The instantaneous validation performed in
+//                              WebRtc_ProcessBinarySpectrum().
+//  - is_histogram_valid      : The histogram based validation.
+//
+// Return value:
+//  - is_robust               : 1 - The candidate_delay is valid according to a
+//                                  combination of the two inputs.
+//                            : 0 - Otherwise.
+static int RobustValidation(const BinaryDelayEstimator* self,
+                            int candidate_delay,
+                            int is_instantaneous_valid,
+                            int is_histogram_valid) {
+  int is_robust = 0;
+
+  // The final robust validation is based on the two algorithms; 1) the
+  // |is_instantaneous_valid| and 2) the histogram based with result stored in
+  // |is_histogram_valid|.
+  //   i) Before we actually have a valid estimate (|last_delay| == -2), we say
+  //      a candidate is valid if either algorithm states so
+  //      (|is_instantaneous_valid| OR |is_histogram_valid|).
+  is_robust = (self->last_delay < 0) &&
+      (is_instantaneous_valid || is_histogram_valid);
+  //  ii) Otherwise, we need both algorithms to be certain
+  //      (|is_instantaneous_valid| AND |is_histogram_valid|)
+  is_robust |= is_instantaneous_valid && is_histogram_valid;
+  // iii) With one exception, i.e., the histogram based algorithm can overrule
+  //      the instantaneous one if |is_histogram_valid| = 1 and the histogram
+  //      is significantly strong.
+  is_robust |= is_histogram_valid &&
+      (self->histogram[candidate_delay] > self->last_delay_histogram);
+
+  return is_robust;
+}
+
+void WebRtc_FreeBinaryDelayEstimatorFarend(BinaryDelayEstimatorFarend* self) {
+
+  if (self == NULL) {
+    return;
+  }
+
+  free(self->binary_far_history);
+  self->binary_far_history = NULL;
+
+  free(self->far_bit_counts);
+  self->far_bit_counts = NULL;
+
+  free(self);
+}
+
+BinaryDelayEstimatorFarend* WebRtc_CreateBinaryDelayEstimatorFarend(
+    int history_size) {
+  BinaryDelayEstimatorFarend* self = NULL;
+
+  if (history_size > 1) {
+    // Sanity conditions fulfilled.
+    self = malloc(sizeof(BinaryDelayEstimatorFarend));
+  }
+  if (self == NULL) {
+    return NULL;
+  }
+
+  self->history_size = 0;
+  self->binary_far_history = NULL;
+  self->far_bit_counts = NULL;
+  if (WebRtc_AllocateFarendBufferMemory(self, history_size) == 0) {
+    WebRtc_FreeBinaryDelayEstimatorFarend(self);
+    self = NULL;
+  }
+  return self;
+}
+
+int WebRtc_AllocateFarendBufferMemory(BinaryDelayEstimatorFarend* self,
+                                      int history_size) {
+  assert(self != NULL);
+  // (Re-)Allocate memory for history buffers.
+  self->binary_far_history =
+      realloc(self->binary_far_history,
+              history_size * sizeof(*self->binary_far_history));
+  self->far_bit_counts = realloc(self->far_bit_counts,
+                                 history_size * sizeof(*self->far_bit_counts));
+  if ((self->binary_far_history == NULL) || (self->far_bit_counts == NULL)) {
+    history_size = 0;
+  }
+  // Fill with zeros if we have expanded the buffers.
+  if (history_size > self->history_size) {
+    int size_diff = history_size - self->history_size;
+    memset(&self->binary_far_history[self->history_size],
+           0,
+           sizeof(*self->binary_far_history) * size_diff);
+    memset(&self->far_bit_counts[self->history_size],
+           0,
+           sizeof(*self->far_bit_counts) * size_diff);
+  }
+  self->history_size = history_size;
+
+  return self->history_size;
+}
+
+void WebRtc_InitBinaryDelayEstimatorFarend(BinaryDelayEstimatorFarend* self) {
+  assert(self != NULL);
+  memset(self->binary_far_history, 0, sizeof(uint32_t) * self->history_size);
+  memset(self->far_bit_counts, 0, sizeof(int) * self->history_size);
+}
+
+void WebRtc_SoftResetBinaryDelayEstimatorFarend(
+    BinaryDelayEstimatorFarend* self, int delay_shift) {
+  int abs_shift = abs(delay_shift);
+  int shift_size = 0;
+  int dest_index = 0;
+  int src_index = 0;
+  int padding_index = 0;
+
+  assert(self != NULL);
+  shift_size = self->history_size - abs_shift;
+  assert(shift_size > 0);
+  if (delay_shift == 0) {
+    return;
+  } else if (delay_shift > 0) {
+    dest_index = abs_shift;
+  } else if (delay_shift < 0) {
+    src_index = abs_shift;
+    padding_index = shift_size;
+  }
+
+  // Shift and zero pad buffers.
+  memmove(&self->binary_far_history[dest_index],
+          &self->binary_far_history[src_index],
+          sizeof(*self->binary_far_history) * shift_size);
+  memset(&self->binary_far_history[padding_index], 0,
+         sizeof(*self->binary_far_history) * abs_shift);
+  memmove(&self->far_bit_counts[dest_index],
+          &self->far_bit_counts[src_index],
+          sizeof(*self->far_bit_counts) * shift_size);
+  memset(&self->far_bit_counts[padding_index], 0,
+         sizeof(*self->far_bit_counts) * abs_shift);
+}
+
+void WebRtc_AddBinaryFarSpectrum(BinaryDelayEstimatorFarend* handle,
+                                 uint32_t binary_far_spectrum) {
+  assert(handle != NULL);
+  // Shift binary spectrum history and insert current |binary_far_spectrum|.
+  memmove(&(handle->binary_far_history[1]), &(handle->binary_far_history[0]),
+          (handle->history_size - 1) * sizeof(uint32_t));
+  handle->binary_far_history[0] = binary_far_spectrum;
+
+  // Shift history of far-end binary spectrum bit counts and insert bit count
+  // of current |binary_far_spectrum|.
+  memmove(&(handle->far_bit_counts[1]), &(handle->far_bit_counts[0]),
+          (handle->history_size - 1) * sizeof(int));
+  handle->far_bit_counts[0] = BitCount(binary_far_spectrum);
+}
+
+void WebRtc_FreeBinaryDelayEstimator(BinaryDelayEstimator* self) {
+
+  if (self == NULL) {
+    return;
+  }
+
+  free(self->mean_bit_counts);
+  self->mean_bit_counts = NULL;
+
+  free(self->bit_counts);
+  self->bit_counts = NULL;
+
+  free(self->binary_near_history);
+  self->binary_near_history = NULL;
+
+  free(self->histogram);
+  self->histogram = NULL;
+
+  // BinaryDelayEstimator does not have ownership of |farend|, hence we do not
+  // free the memory here. That should be handled separately by the user.
+  self->farend = NULL;
+
+  free(self);
+}
+
+BinaryDelayEstimator* WebRtc_CreateBinaryDelayEstimator(
+    BinaryDelayEstimatorFarend* farend, int max_lookahead) {
+  BinaryDelayEstimator* self = NULL;
+
+  if ((farend != NULL) && (max_lookahead >= 0)) {
+    // Sanity conditions fulfilled.
+    self = malloc(sizeof(BinaryDelayEstimator));
+  }
+  if (self == NULL) {
+    return NULL;
+  }
+
+  self->farend = farend;
+  self->near_history_size = max_lookahead + 1;
+  self->history_size = 0;
+  self->robust_validation_enabled = 0;  // Disabled by default.
+  self->allowed_offset = 0;
+
+  self->lookahead = max_lookahead;
+
+  // Allocate memory for spectrum and history buffers.
+  self->mean_bit_counts = NULL;
+  self->bit_counts = NULL;
+  self->histogram = NULL;
+  self->binary_near_history =
+      malloc((max_lookahead + 1) * sizeof(*self->binary_near_history));
+  if (self->binary_near_history == NULL ||
+      WebRtc_AllocateHistoryBufferMemory(self, farend->history_size) == 0) {
+    WebRtc_FreeBinaryDelayEstimator(self);
+    self = NULL;
+  }
+
+  return self;
+}
+
+int WebRtc_AllocateHistoryBufferMemory(BinaryDelayEstimator* self,
+                                       int history_size) {
+  BinaryDelayEstimatorFarend* far = self->farend;
+  // (Re-)Allocate memory for spectrum and history buffers.
+  if (history_size != far->history_size) {
+    // Only update far-end buffers if we need.
+    history_size = WebRtc_AllocateFarendBufferMemory(far, history_size);
+  }
+  // The extra array element in |mean_bit_counts| and |histogram| is a dummy
+  // element only used while |last_delay| == -2, i.e., before we have a valid
+  // estimate.
+  self->mean_bit_counts =
+      realloc(self->mean_bit_counts,
+              (history_size + 1) * sizeof(*self->mean_bit_counts));
+  self->bit_counts =
+      realloc(self->bit_counts, history_size * sizeof(*self->bit_counts));
+  self->histogram =
+      realloc(self->histogram, (history_size + 1) * sizeof(*self->histogram));
+
+  if ((self->mean_bit_counts == NULL) ||
+      (self->bit_counts == NULL) ||
+      (self->histogram == NULL)) {
+    history_size = 0;
+  }
+  // Fill with zeros if we have expanded the buffers.
+  if (history_size > self->history_size) {
+    int size_diff = history_size - self->history_size;
+    memset(&self->mean_bit_counts[self->history_size],
+           0,
+           sizeof(*self->mean_bit_counts) * size_diff);
+    memset(&self->bit_counts[self->history_size],
+           0,
+           sizeof(*self->bit_counts) * size_diff);
+    memset(&self->histogram[self->history_size],
+           0,
+           sizeof(*self->histogram) * size_diff);
+  }
+  self->history_size = history_size;
+
+  return self->history_size;
+}
+
+void WebRtc_InitBinaryDelayEstimator(BinaryDelayEstimator* self) {
+  int i = 0;
+  assert(self != NULL);
+
+  memset(self->bit_counts, 0, sizeof(int32_t) * self->history_size);
+  memset(self->binary_near_history,
+         0,
+         sizeof(uint32_t) * self->near_history_size);
+  for (i = 0; i <= self->history_size; ++i) {
+    self->mean_bit_counts[i] = (20 << 9);  // 20 in Q9.
+    self->histogram[i] = 0.f;
+  }
+  self->minimum_probability = kMaxBitCountsQ9;  // 32 in Q9.
+  self->last_delay_probability = (int) kMaxBitCountsQ9;  // 32 in Q9.
+
+  // Default return value if we're unable to estimate. -1 is used for errors.
+  self->last_delay = -2;
+
+  self->last_candidate_delay = -2;
+  self->compare_delay = self->history_size;
+  self->candidate_hits = 0;
+  self->last_delay_histogram = 0.f;
+}
+
+int WebRtc_SoftResetBinaryDelayEstimator(BinaryDelayEstimator* self,
+                                         int delay_shift) {
+  int lookahead = 0;
+  assert(self != NULL);
+  lookahead = self->lookahead;
+  self->lookahead -= delay_shift;
+  if (self->lookahead < 0) {
+    self->lookahead = 0;
+  }
+  if (self->lookahead > self->near_history_size - 1) {
+    self->lookahead = self->near_history_size - 1;
+  }
+  return lookahead - self->lookahead;
+}
+
+int WebRtc_ProcessBinarySpectrum(BinaryDelayEstimator* self,
+                                 uint32_t binary_near_spectrum) {
+  int i = 0;
+  int candidate_delay = -1;
+  int valid_candidate = 0;
+
+  int32_t value_best_candidate = kMaxBitCountsQ9;
+  int32_t value_worst_candidate = 0;
+  int32_t valley_depth = 0;
+
+  assert(self != NULL);
+  if (self->farend->history_size != self->history_size) {
+    // Non matching history sizes.
+    return -1;
+  }
+  if (self->near_history_size > 1) {
+    // If we apply lookahead, shift near-end binary spectrum history. Insert
+    // current |binary_near_spectrum| and pull out the delayed one.
+    memmove(&(self->binary_near_history[1]), &(self->binary_near_history[0]),
+            (self->near_history_size - 1) * sizeof(uint32_t));
+    self->binary_near_history[0] = binary_near_spectrum;
+    binary_near_spectrum = self->binary_near_history[self->lookahead];
+  }
+
+  // Compare with delayed spectra and store the |bit_counts| for each delay.
+  BitCountComparison(binary_near_spectrum, self->farend->binary_far_history,
+                     self->history_size, self->bit_counts);
+
+  // Update |mean_bit_counts|, which is the smoothed version of |bit_counts|.
+  for (i = 0; i < self->history_size; i++) {
+    // |bit_counts| is constrained to [0, 32], meaning we can smooth with a
+    // factor up to 2^26. We use Q9.
+    int32_t bit_count = (self->bit_counts[i] << 9);  // Q9.
+
+    // Update |mean_bit_counts| only when far-end signal has something to
+    // contribute. If |far_bit_counts| is zero the far-end signal is weak and
+    // we likely have a poor echo condition, hence don't update.
+    if (self->farend->far_bit_counts[i] > 0) {
+      // Make number of right shifts piecewise linear w.r.t. |far_bit_counts|.
+      int shifts = kShiftsAtZero;
+      shifts -= (kShiftsLinearSlope * self->farend->far_bit_counts[i]) >> 4;
+      WebRtc_MeanEstimatorFix(bit_count, shifts, &(self->mean_bit_counts[i]));
+    }
+  }
+
+  // Find |candidate_delay|, |value_best_candidate| and |value_worst_candidate|
+  // of |mean_bit_counts|.
+  for (i = 0; i < self->history_size; i++) {
+    if (self->mean_bit_counts[i] < value_best_candidate) {
+      value_best_candidate = self->mean_bit_counts[i];
+      candidate_delay = i;
+    }
+    if (self->mean_bit_counts[i] > value_worst_candidate) {
+      value_worst_candidate = self->mean_bit_counts[i];
+    }
+  }
+  valley_depth = value_worst_candidate - value_best_candidate;
+
+  // The |value_best_candidate| is a good indicator on the probability of
+  // |candidate_delay| being an accurate delay (a small |value_best_candidate|
+  // means a good binary match). In the following sections we make a decision
+  // whether to update |last_delay| or not.
+  // 1) If the difference bit counts between the best and the worst delay
+  //    candidates is too small we consider the situation to be unreliable and
+  //    don't update |last_delay|.
+  // 2) If the situation is reliable we update |last_delay| if the value of the
+  //    best candidate delay has a value less than
+  //     i) an adaptive threshold |minimum_probability|, or
+  //    ii) this corresponding value |last_delay_probability|, but updated at
+  //        this time instant.
+
+  // Update |minimum_probability|.
+  if ((self->minimum_probability > kProbabilityLowerLimit) &&
+      (valley_depth > kProbabilityMinSpread)) {
+    // The "hard" threshold can't be lower than 17 (in Q9).
+    // The valley in the curve also has to be distinct, i.e., the
+    // difference between |value_worst_candidate| and |value_best_candidate| has
+    // to be large enough.
+    int32_t threshold = value_best_candidate + kProbabilityOffset;
+    if (threshold < kProbabilityLowerLimit) {
+      threshold = kProbabilityLowerLimit;
+    }
+    if (self->minimum_probability > threshold) {
+      self->minimum_probability = threshold;
+    }
+  }
+  // Update |last_delay_probability|.
+  // We use a Markov type model, i.e., a slowly increasing level over time.
+  self->last_delay_probability++;
+  // Validate |candidate_delay|.  We have a reliable instantaneous delay
+  // estimate if
+  //  1) The valley is distinct enough (|valley_depth| > |kProbabilityOffset|)
+  // and
+  //  2) The depth of the valley is deep enough
+  //      (|value_best_candidate| < |minimum_probability|)
+  //     and deeper than the best estimate so far
+  //      (|value_best_candidate| < |last_delay_probability|)
+  valid_candidate = ((valley_depth > kProbabilityOffset) &&
+      ((value_best_candidate < self->minimum_probability) ||
+          (value_best_candidate < self->last_delay_probability)));
+
+  UpdateRobustValidationStatistics(self, candidate_delay, valley_depth,
+                                   value_best_candidate);
+  if (self->robust_validation_enabled) {
+    int is_histogram_valid = HistogramBasedValidation(self, candidate_delay);
+    valid_candidate = RobustValidation(self, candidate_delay, valid_candidate,
+                                       is_histogram_valid);
+
+  }
+  if (valid_candidate) {
+    if (candidate_delay != self->last_delay) {
+      self->last_delay_histogram =
+          (self->histogram[candidate_delay] > kLastHistogramMax ?
+              kLastHistogramMax : self->histogram[candidate_delay]);
+      // Adjust the histogram if we made a change to |last_delay|, though it was
+      // not the most likely one according to the histogram.
+      if (self->histogram[candidate_delay] <
+          self->histogram[self->compare_delay]) {
+        self->histogram[self->compare_delay] = self->histogram[candidate_delay];
+      }
+    }
+    self->last_delay = candidate_delay;
+    if (value_best_candidate < self->last_delay_probability) {
+      self->last_delay_probability = value_best_candidate;
+    }
+    self->compare_delay = self->last_delay;
+  }
+
+  return self->last_delay;
+}
+
+int WebRtc_binary_last_delay(BinaryDelayEstimator* self) {
+  assert(self != NULL);
+  return self->last_delay;
+}
+
+float WebRtc_binary_last_delay_quality(BinaryDelayEstimator* self) {
+  float quality = 0;
+  assert(self != NULL);
+
+  if (self->robust_validation_enabled) {
+    // Simply a linear function of the histogram height at delay estimate.
+    quality = self->histogram[self->compare_delay] / kHistogramMax;
+  } else {
+    // Note that |last_delay_probability| states how deep the minimum of the
+    // cost function is, so it is rather an error probability.
+    quality = (float) (kMaxBitCountsQ9 - self->last_delay_probability) /
+        kMaxBitCountsQ9;
+    if (quality < 0) {
+      quality = 0;
+    }
+  }
+  return quality;
+}
+
+void WebRtc_MeanEstimatorFix(int32_t new_value,
+                             int factor,
+                             int32_t* mean_value) {
+  int32_t diff = new_value - *mean_value;
+
+  // mean_new = mean_value + ((new_value - mean_value) >> factor);
+  if (diff < 0) {
+    diff = -((-diff) >> factor);
+  } else {
+    diff = (diff >> factor);
+  }
+  *mean_value += diff;
+}
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator.h b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator.h
new file mode 100644
index 00000000..65c3f034
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator.h
@@ -0,0 +1,251 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+// Performs delay estimation on binary converted spectra.
+// The return value is  0 - OK and -1 - Error, unless otherwise stated.
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_
+
+#include "webrtc/typedefs.h"
+
+static const int32_t kMaxBitCountsQ9 = (32 << 9);  // 32 matching bits in Q9.
+
+typedef struct {
+  // Pointer to bit counts.
+  int* far_bit_counts;
+  // Binary history variables.
+  uint32_t* binary_far_history;
+  int history_size;
+} BinaryDelayEstimatorFarend;
+
+typedef struct {
+  // Pointer to bit counts.
+  int32_t* mean_bit_counts;
+  // Array only used locally in ProcessBinarySpectrum() but whose size is
+  // determined at run-time.
+  int32_t* bit_counts;
+
+  // Binary history variables.
+  uint32_t* binary_near_history;
+  int near_history_size;
+  int history_size;
+
+  // Delay estimation variables.
+  int32_t minimum_probability;
+  int last_delay_probability;
+
+  // Delay memory.
+  int last_delay;
+
+  // Robust validation
+  int robust_validation_enabled;
+  int allowed_offset;
+  int last_candidate_delay;
+  int compare_delay;
+  int candidate_hits;
+  float* histogram;
+  float last_delay_histogram;
+
+  // For dynamically changing the lookahead when using SoftReset...().
+  int lookahead;
+
+  // Far-end binary spectrum history buffer etc.
+  BinaryDelayEstimatorFarend* farend;
+} BinaryDelayEstimator;
+
+// Releases the memory allocated by
+// WebRtc_CreateBinaryDelayEstimatorFarend(...).
+// Input:
+//    - self              : Pointer to the binary delay estimation far-end
+//                          instance which is the return value of
+//                          WebRtc_CreateBinaryDelayEstimatorFarend().
+//
+void WebRtc_FreeBinaryDelayEstimatorFarend(BinaryDelayEstimatorFarend* self);
+
+// Allocates the memory needed by the far-end part of the binary delay
+// estimation. The memory needs to be initialized separately through
+// WebRtc_InitBinaryDelayEstimatorFarend(...).
+//
+// Inputs:
+//      - history_size    : Size of the far-end binary spectrum history.
+//
+// Return value:
+//      - BinaryDelayEstimatorFarend*
+//                        : Created |handle|. If the memory can't be allocated
+//                          or if any of the input parameters are invalid NULL
+//                          is returned.
+//
+BinaryDelayEstimatorFarend* WebRtc_CreateBinaryDelayEstimatorFarend(
+    int history_size);
+
+// Re-allocates the buffers.
+//
+// Inputs:
+//      - self            : Pointer to the binary estimation far-end instance
+//                          which is the return value of
+//                          WebRtc_CreateBinaryDelayEstimatorFarend().
+//      - history_size    : Size of the far-end binary spectrum history.
+//
+// Return value:
+//      - history_size    : The history size allocated.
+int WebRtc_AllocateFarendBufferMemory(BinaryDelayEstimatorFarend* self,
+                                      int history_size);
+
+// Initializes the delay estimation far-end instance created with
+// WebRtc_CreateBinaryDelayEstimatorFarend(...).
+//
+// Input:
+//    - self              : Pointer to the delay estimation far-end instance.
+//
+// Output:
+//    - self              : Initialized far-end instance.
+//
+void WebRtc_InitBinaryDelayEstimatorFarend(BinaryDelayEstimatorFarend* self);
+
+// Soft resets the delay estimation far-end instance created with
+// WebRtc_CreateBinaryDelayEstimatorFarend(...).
+//
+// Input:
+//    - delay_shift   : The amount of blocks to shift history buffers.
+//
+void WebRtc_SoftResetBinaryDelayEstimatorFarend(
+    BinaryDelayEstimatorFarend* self, int delay_shift);
+
+// Adds the binary far-end spectrum to the internal far-end history buffer. This
+// spectrum is used as reference when calculating the delay using
+// WebRtc_ProcessBinarySpectrum().
+//
+// Inputs:
+//    - self                  : Pointer to the delay estimation far-end
+//                              instance.
+//    - binary_far_spectrum   : Far-end binary spectrum.
+//
+// Output:
+//    - self                  : Updated far-end instance.
+//
+void WebRtc_AddBinaryFarSpectrum(BinaryDelayEstimatorFarend* self,
+                                 uint32_t binary_far_spectrum);
+
+// Releases the memory allocated by WebRtc_CreateBinaryDelayEstimator(...).
+//
+// Note that BinaryDelayEstimator utilizes BinaryDelayEstimatorFarend, but does
+// not take ownership of it, hence the BinaryDelayEstimator has to be torn down
+// before the far-end.
+//
+// Input:
+//    - self              : Pointer to the binary delay estimation instance
+//                          which is the return value of
+//                          WebRtc_CreateBinaryDelayEstimator().
+//
+void WebRtc_FreeBinaryDelayEstimator(BinaryDelayEstimator* self);
+
+// Allocates the memory needed by the binary delay estimation. The memory needs
+// to be initialized separately through WebRtc_InitBinaryDelayEstimator(...).
+//
+// See WebRtc_CreateDelayEstimator(..) in delay_estimator_wrapper.c for detailed
+// description.
+BinaryDelayEstimator* WebRtc_CreateBinaryDelayEstimator(
+    BinaryDelayEstimatorFarend* farend, int max_lookahead);
+
+// Re-allocates |history_size| dependent buffers. The far-end buffers will be
+// updated at the same time if needed.
+//
+// Input:
+//      - self            : Pointer to the binary estimation instance which is
+//                          the return value of
+//                          WebRtc_CreateBinaryDelayEstimator().
+//      - history_size    : Size of the history buffers.
+//
+// Return value:
+//      - history_size    : The history size allocated.
+int WebRtc_AllocateHistoryBufferMemory(BinaryDelayEstimator* self,
+                                       int history_size);
+
+// Initializes the delay estimation instance created with
+// WebRtc_CreateBinaryDelayEstimator(...).
+//
+// Input:
+//    - self              : Pointer to the delay estimation instance.
+//
+// Output:
+//    - self              : Initialized instance.
+//
+void WebRtc_InitBinaryDelayEstimator(BinaryDelayEstimator* self);
+
+// Soft resets the delay estimation instance created with
+// WebRtc_CreateBinaryDelayEstimator(...).
+//
+// Input:
+//    - delay_shift   : The amount of blocks to shift history buffers.
+//
+// Return value:
+//    - actual_shifts : The actual number of shifts performed.
+//
+int WebRtc_SoftResetBinaryDelayEstimator(BinaryDelayEstimator* self,
+                                         int delay_shift);
+
+// Estimates and returns the delay between the binary far-end and binary near-
+// end spectra. It is assumed the binary far-end spectrum has been added using
+// WebRtc_AddBinaryFarSpectrum() prior to this call. The value will be offset by
+// the lookahead (i.e. the lookahead should be subtracted from the returned
+// value).
+//
+// Inputs:
+//    - self                  : Pointer to the delay estimation instance.
+//    - binary_near_spectrum  : Near-end binary spectrum of the current block.
+//
+// Output:
+//    - self                  : Updated instance.
+//
+// Return value:
+//    - delay                 :  >= 0 - Calculated delay value.
+//                              -2    - Insufficient data for estimation.
+//
+int WebRtc_ProcessBinarySpectrum(BinaryDelayEstimator* self,
+                                 uint32_t binary_near_spectrum);
+
+// Returns the last calculated delay updated by the function
+// WebRtc_ProcessBinarySpectrum(...).
+//
+// Input:
+//    - self                  : Pointer to the delay estimation instance.
+//
+// Return value:
+//    - delay                 :  >= 0 - Last calculated delay value
+//                              -2    - Insufficient data for estimation.
+//
+int WebRtc_binary_last_delay(BinaryDelayEstimator* self);
+
+// Returns the estimation quality of the last calculated delay updated by the
+// function WebRtc_ProcessBinarySpectrum(...). The estimation quality is a value
+// in the interval [0, 1].  The higher the value, the better the quality.
+//
+// Return value:
+//    - delay_quality         :  >= 0 - Estimation quality of last calculated
+//                                      delay value.
+float WebRtc_binary_last_delay_quality(BinaryDelayEstimator* self);
+
+// Updates the |mean_value| recursively with a step size of 2^-|factor|. This
+// function is used internally in the Binary Delay Estimator as well as the
+// Fixed point wrapper.
+//
+// Inputs:
+//    - new_value             : The new value the mean should be updated with.
+//    - factor                : The step size, in number of right shifts.
+//
+// Input/Output:
+//    - mean_value            : Pointer to the mean value.
+//
+void WebRtc_MeanEstimatorFix(int32_t new_value,
+                             int factor,
+                             int32_t* mean_value);
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_internal.h b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_internal.h
new file mode 100644
index 00000000..fd11028f
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_internal.h
@@ -0,0 +1,48 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+// Header file including the delay estimator handle used for testing.
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_INTERNAL_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_INTERNAL_H_
+
+#include "webrtc/modules/audio_processing/utility/delay_estimator.h"
+#include "webrtc/typedefs.h"
+
+typedef union {
+  float float_;
+  int32_t int32_;
+} SpectrumType;
+
+typedef struct {
+  // Pointers to mean values of spectrum.
+  SpectrumType* mean_far_spectrum;
+  // |mean_far_spectrum| initialization indicator.
+  int far_spectrum_initialized;
+
+  int spectrum_size;
+
+  // Far-end part of binary spectrum based delay estimation.
+  BinaryDelayEstimatorFarend* binary_farend;
+} DelayEstimatorFarend;
+
+typedef struct {
+  // Pointers to mean values of spectrum.
+  SpectrumType* mean_near_spectrum;
+  // |mean_near_spectrum| initialization indicator.
+  int near_spectrum_initialized;
+
+  int spectrum_size;
+
+  // Binary spectrum based delay estimator
+  BinaryDelayEstimator* binary_handle;
+} DelayEstimator;
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_INTERNAL_H_
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_unittest.cc b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_unittest.cc
new file mode 100644
index 00000000..4ebe0e61
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_unittest.cc
@@ -0,0 +1,621 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+extern "C" {
+#include "webrtc/modules/audio_processing/utility/delay_estimator.h"
+#include "webrtc/modules/audio_processing/utility/delay_estimator_internal.h"
+#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h"
+}
+#include "webrtc/typedefs.h"
+
+namespace {
+
+enum { kSpectrumSize = 65 };
+// Delay history sizes.
+enum { kMaxDelay = 100 };
+enum { kLookahead = 10 };
+enum { kHistorySize = kMaxDelay + kLookahead };
+// Length of binary spectrum sequence.
+enum { kSequenceLength = 400 };
+
+const int kDifferentHistorySize = 3;
+const int kDifferentLookahead = 1;
+
+const int kEnable[] = { 0, 1 };
+const size_t kSizeEnable = sizeof(kEnable) / sizeof(*kEnable);
+
+class DelayEstimatorTest : public ::testing::Test {
+ protected:
+  DelayEstimatorTest();
+  virtual void SetUp();
+  virtual void TearDown();
+
+  void Init();
+  void InitBinary();
+  void VerifyDelay(BinaryDelayEstimator* binary_handle, int offset, int delay);
+  void RunBinarySpectra(BinaryDelayEstimator* binary1,
+                        BinaryDelayEstimator* binary2,
+                        int near_offset, int lookahead_offset, int far_offset);
+  void RunBinarySpectraTest(int near_offset, int lookahead_offset,
+                            int ref_robust_validation, int robust_validation);
+
+  void* handle_;
+  DelayEstimator* self_;
+  void* farend_handle_;
+  DelayEstimatorFarend* farend_self_;
+  BinaryDelayEstimator* binary_;
+  BinaryDelayEstimatorFarend* binary_farend_;
+  int spectrum_size_;
+  // Dummy input spectra.
+  float far_f_[kSpectrumSize];
+  float near_f_[kSpectrumSize];
+  uint16_t far_u16_[kSpectrumSize];
+  uint16_t near_u16_[kSpectrumSize];
+  uint32_t binary_spectrum_[kSequenceLength + kHistorySize];
+};
+
+DelayEstimatorTest::DelayEstimatorTest()
+    : handle_(NULL),
+      self_(NULL),
+      farend_handle_(NULL),
+      farend_self_(NULL),
+      binary_(NULL),
+      binary_farend_(NULL),
+      spectrum_size_(kSpectrumSize) {
+  // Dummy input data are set with more or less arbitrary non-zero values.
+  memset(far_f_, 1, sizeof(far_f_));
+  memset(near_f_, 2, sizeof(near_f_));
+  memset(far_u16_, 1, sizeof(far_u16_));
+  memset(near_u16_, 2, sizeof(near_u16_));
+  // Construct a sequence of binary spectra used to verify delay estimate. The
+  // |kSequenceLength| has to be long enough for the delay estimation to leave
+  // the initialized state.
+  binary_spectrum_[0] = 1;
+  for (int i = 1; i < (kSequenceLength + kHistorySize); i++) {
+    binary_spectrum_[i] = 3 * binary_spectrum_[i - 1];
+  }
+}
+
+void DelayEstimatorTest::SetUp() {
+  farend_handle_ = WebRtc_CreateDelayEstimatorFarend(kSpectrumSize,
+                                                     kHistorySize);
+  ASSERT_TRUE(farend_handle_ != NULL);
+  farend_self_ = reinterpret_cast<DelayEstimatorFarend*>(farend_handle_);
+  handle_ = WebRtc_CreateDelayEstimator(farend_handle_, kLookahead);
+  ASSERT_TRUE(handle_ != NULL);
+  self_ = reinterpret_cast<DelayEstimator*>(handle_);
+  binary_farend_ = WebRtc_CreateBinaryDelayEstimatorFarend(kHistorySize);
+  ASSERT_TRUE(binary_farend_ != NULL);
+  binary_ = WebRtc_CreateBinaryDelayEstimator(binary_farend_, kLookahead);
+  ASSERT_TRUE(binary_ != NULL);
+}
+
+void DelayEstimatorTest::TearDown() {
+  WebRtc_FreeDelayEstimator(handle_);
+  handle_ = NULL;
+  self_ = NULL;
+  WebRtc_FreeDelayEstimatorFarend(farend_handle_);
+  farend_handle_ = NULL;
+  farend_self_ = NULL;
+  WebRtc_FreeBinaryDelayEstimator(binary_);
+  binary_ = NULL;
+  WebRtc_FreeBinaryDelayEstimatorFarend(binary_farend_);
+  binary_farend_ = NULL;
+}
+
+void DelayEstimatorTest::Init() {
+  // Initialize Delay Estimator
+  EXPECT_EQ(0, WebRtc_InitDelayEstimatorFarend(farend_handle_));
+  EXPECT_EQ(0, WebRtc_InitDelayEstimator(handle_));
+  // Verify initialization.
+  EXPECT_EQ(0, farend_self_->far_spectrum_initialized);
+  EXPECT_EQ(0, self_->near_spectrum_initialized);
+  EXPECT_EQ(-2, WebRtc_last_delay(handle_));  // Delay in initial state.
+  EXPECT_FLOAT_EQ(0, WebRtc_last_delay_quality(handle_));  // Zero quality.
+}
+
+void DelayEstimatorTest::InitBinary() {
+  // Initialize Binary Delay Estimator (far-end part).
+  WebRtc_InitBinaryDelayEstimatorFarend(binary_farend_);
+  // Initialize Binary Delay Estimator
+  WebRtc_InitBinaryDelayEstimator(binary_);
+  // Verify initialization. This does not guarantee a complete check, since
+  // |last_delay| may be equal to -2 before initialization if done on the fly.
+  EXPECT_EQ(-2, binary_->last_delay);
+}
+
+void DelayEstimatorTest::VerifyDelay(BinaryDelayEstimator* binary_handle,
+                                     int offset, int delay) {
+  // Verify that we WebRtc_binary_last_delay() returns correct delay.
+  EXPECT_EQ(delay, WebRtc_binary_last_delay(binary_handle));
+
+  if (delay != -2) {
+    // Verify correct delay estimate. In the non-causal case the true delay
+    // is equivalent with the |offset|.
+    EXPECT_EQ(offset, delay);
+  }
+}
+
+void DelayEstimatorTest::RunBinarySpectra(BinaryDelayEstimator* binary1,
+                                          BinaryDelayEstimator* binary2,
+                                          int near_offset,
+                                          int lookahead_offset,
+                                          int far_offset) {
+  int different_validations = binary1->robust_validation_enabled ^
+      binary2->robust_validation_enabled;
+  WebRtc_InitBinaryDelayEstimatorFarend(binary_farend_);
+  WebRtc_InitBinaryDelayEstimator(binary1);
+  WebRtc_InitBinaryDelayEstimator(binary2);
+  // Verify initialization. This does not guarantee a complete check, since
+  // |last_delay| may be equal to -2 before initialization if done on the fly.
+  EXPECT_EQ(-2, binary1->last_delay);
+  EXPECT_EQ(-2, binary2->last_delay);
+  for (int i = kLookahead; i < (kSequenceLength + kLookahead); i++) {
+    WebRtc_AddBinaryFarSpectrum(binary_farend_,
+                                binary_spectrum_[i + far_offset]);
+    int delay_1 = WebRtc_ProcessBinarySpectrum(binary1, binary_spectrum_[i]);
+    int delay_2 =
+        WebRtc_ProcessBinarySpectrum(binary2,
+                                     binary_spectrum_[i - near_offset]);
+
+    VerifyDelay(binary1, far_offset + kLookahead, delay_1);
+    VerifyDelay(binary2,
+                far_offset + kLookahead + lookahead_offset + near_offset,
+                delay_2);
+    // Expect the two delay estimates to be offset by |lookahead_offset| +
+    // |near_offset| when we have left the initial state.
+    if ((delay_1 != -2) && (delay_2 != -2)) {
+      EXPECT_EQ(delay_1, delay_2 - lookahead_offset - near_offset);
+    }
+    // For the case of identical signals |delay_1| and |delay_2| should match
+    // all the time, unless one of them has robust validation turned on.  In
+    // that case the robust validation leaves the initial state faster.
+    if ((near_offset == 0) && (lookahead_offset == 0)) {
+      if  (!different_validations) {
+        EXPECT_EQ(delay_1, delay_2);
+      } else {
+        if (binary1->robust_validation_enabled) {
+          EXPECT_GE(delay_1, delay_2);
+        } else {
+          EXPECT_GE(delay_2, delay_1);
+        }
+      }
+    }
+  }
+  // Verify that we have left the initialized state.
+  EXPECT_NE(-2, WebRtc_binary_last_delay(binary1));
+  EXPECT_LT(0, WebRtc_binary_last_delay_quality(binary1));
+  EXPECT_NE(-2, WebRtc_binary_last_delay(binary2));
+  EXPECT_LT(0, WebRtc_binary_last_delay_quality(binary2));
+}
+
+void DelayEstimatorTest::RunBinarySpectraTest(int near_offset,
+                                              int lookahead_offset,
+                                              int ref_robust_validation,
+                                              int robust_validation) {
+  BinaryDelayEstimator* binary2 =
+      WebRtc_CreateBinaryDelayEstimator(binary_farend_,
+                                        kLookahead + lookahead_offset);
+  // Verify the delay for both causal and non-causal systems. For causal systems
+  // the delay is equivalent with a positive |offset| of the far-end sequence.
+  // For non-causal systems the delay is equivalent with a negative |offset| of
+  // the far-end sequence.
+  binary_->robust_validation_enabled = ref_robust_validation;
+  binary2->robust_validation_enabled = robust_validation;
+  for (int offset = -kLookahead;
+      offset < kMaxDelay - lookahead_offset - near_offset;
+      offset++) {
+    RunBinarySpectra(binary_, binary2, near_offset, lookahead_offset, offset);
+  }
+  WebRtc_FreeBinaryDelayEstimator(binary2);
+  binary2 = NULL;
+  binary_->robust_validation_enabled = 0;  // Reset reference.
+}
+
+TEST_F(DelayEstimatorTest, CorrectErrorReturnsOfWrapper) {
+  // In this test we verify correct error returns on invalid API calls.
+
+  // WebRtc_CreateDelayEstimatorFarend() and WebRtc_CreateDelayEstimator()
+  // should return a NULL pointer on invalid input values.
+  // Make sure we have a non-NULL value at start, so we can detect NULL after
+  // create failure.
+  void* handle = farend_handle_;
+  handle = WebRtc_CreateDelayEstimatorFarend(33, kHistorySize);
+  EXPECT_TRUE(handle == NULL);
+  handle = WebRtc_CreateDelayEstimatorFarend(kSpectrumSize, 1);
+  EXPECT_TRUE(handle == NULL);
+
+  handle = handle_;
+  handle = WebRtc_CreateDelayEstimator(NULL, kLookahead);
+  EXPECT_TRUE(handle == NULL);
+  handle = WebRtc_CreateDelayEstimator(farend_handle_, -1);
+  EXPECT_TRUE(handle == NULL);
+
+  // WebRtc_InitDelayEstimatorFarend() and WebRtc_InitDelayEstimator() should
+  // return -1 if we have a NULL pointer as |handle|.
+  EXPECT_EQ(-1, WebRtc_InitDelayEstimatorFarend(NULL));
+  EXPECT_EQ(-1, WebRtc_InitDelayEstimator(NULL));
+
+  // WebRtc_AddFarSpectrumFloat() should return -1 if we have:
+  // 1) NULL pointer as |handle|.
+  // 2) NULL pointer as far-end spectrum.
+  // 3) Incorrect spectrum size.
+  EXPECT_EQ(-1, WebRtc_AddFarSpectrumFloat(NULL, far_f_, spectrum_size_));
+  // Use |farend_handle_| which is properly created at SetUp().
+  EXPECT_EQ(-1, WebRtc_AddFarSpectrumFloat(farend_handle_, NULL,
+                                           spectrum_size_));
+  EXPECT_EQ(-1, WebRtc_AddFarSpectrumFloat(farend_handle_, far_f_,
+                                           spectrum_size_ + 1));
+
+  // WebRtc_AddFarSpectrumFix() should return -1 if we have:
+  // 1) NULL pointer as |handle|.
+  // 2) NULL pointer as far-end spectrum.
+  // 3) Incorrect spectrum size.
+  // 4) Too high precision in far-end spectrum (Q-domain > 15).
+  EXPECT_EQ(-1, WebRtc_AddFarSpectrumFix(NULL, far_u16_, spectrum_size_, 0));
+  EXPECT_EQ(-1, WebRtc_AddFarSpectrumFix(farend_handle_, NULL, spectrum_size_,
+                                         0));
+  EXPECT_EQ(-1, WebRtc_AddFarSpectrumFix(farend_handle_, far_u16_,
+                                         spectrum_size_ + 1, 0));
+  EXPECT_EQ(-1, WebRtc_AddFarSpectrumFix(farend_handle_, far_u16_,
+                                         spectrum_size_, 16));
+
+  // WebRtc_set_history_size() should return -1 if:
+  // 1) |handle| is a NULL.
+  // 2) |history_size| <= 1.
+  EXPECT_EQ(-1, WebRtc_set_history_size(NULL, 1));
+  EXPECT_EQ(-1, WebRtc_set_history_size(handle_, 1));
+  // WebRtc_history_size() should return -1 if:
+  // 1) NULL pointer input.
+  EXPECT_EQ(-1, WebRtc_history_size(NULL));
+  // 2) there is a mismatch between history size.
+  void* tmp_handle = WebRtc_CreateDelayEstimator(farend_handle_, kHistorySize);
+  EXPECT_EQ(0, WebRtc_InitDelayEstimator(tmp_handle));
+  EXPECT_EQ(kDifferentHistorySize,
+            WebRtc_set_history_size(tmp_handle, kDifferentHistorySize));
+  EXPECT_EQ(kDifferentHistorySize, WebRtc_history_size(tmp_handle));
+  EXPECT_EQ(kHistorySize, WebRtc_set_history_size(handle_, kHistorySize));
+  EXPECT_EQ(-1, WebRtc_history_size(tmp_handle));
+
+  // WebRtc_set_lookahead() should return -1 if we try a value outside the
+  /// buffer.
+  EXPECT_EQ(-1, WebRtc_set_lookahead(handle_, kLookahead + 1));
+  EXPECT_EQ(-1, WebRtc_set_lookahead(handle_, -1));
+
+  // WebRtc_set_allowed_offset() should return -1 if we have:
+  // 1) NULL pointer as |handle|.
+  // 2) |allowed_offset| < 0.
+  EXPECT_EQ(-1, WebRtc_set_allowed_offset(NULL, 0));
+  EXPECT_EQ(-1, WebRtc_set_allowed_offset(handle_, -1));
+
+  EXPECT_EQ(-1, WebRtc_get_allowed_offset(NULL));
+
+  // WebRtc_enable_robust_validation() should return -1 if we have:
+  // 1) NULL pointer as |handle|.
+  // 2) Incorrect |enable| value (not 0 or 1).
+  EXPECT_EQ(-1, WebRtc_enable_robust_validation(NULL, kEnable[0]));
+  EXPECT_EQ(-1, WebRtc_enable_robust_validation(handle_, -1));
+  EXPECT_EQ(-1, WebRtc_enable_robust_validation(handle_, 2));
+
+  // WebRtc_is_robust_validation_enabled() should return -1 if we have NULL
+  // pointer as |handle|.
+  EXPECT_EQ(-1, WebRtc_is_robust_validation_enabled(NULL));
+
+  // WebRtc_DelayEstimatorProcessFloat() should return -1 if we have:
+  // 1) NULL pointer as |handle|.
+  // 2) NULL pointer as near-end spectrum.
+  // 3) Incorrect spectrum size.
+  // 4) Non matching history sizes if multiple delay estimators using the same
+  //    far-end reference.
+  EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFloat(NULL, near_f_,
+                                                  spectrum_size_));
+  // Use |handle_| which is properly created at SetUp().
+  EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFloat(handle_, NULL,
+                                                  spectrum_size_));
+  EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFloat(handle_, near_f_,
+                                                  spectrum_size_ + 1));
+  // |tmp_handle| is already in a non-matching state.
+  EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFloat(tmp_handle,
+                                                  near_f_,
+                                                  spectrum_size_));
+
+  // WebRtc_DelayEstimatorProcessFix() should return -1 if we have:
+  // 1) NULL pointer as |handle|.
+  // 2) NULL pointer as near-end spectrum.
+  // 3) Incorrect spectrum size.
+  // 4) Too high precision in near-end spectrum (Q-domain > 15).
+  // 5) Non matching history sizes if multiple delay estimators using the same
+  //    far-end reference.
+  EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFix(NULL, near_u16_, spectrum_size_,
+                                                0));
+  EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFix(handle_, NULL, spectrum_size_,
+                                                0));
+  EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFix(handle_, near_u16_,
+                                                spectrum_size_ + 1, 0));
+  EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFix(handle_, near_u16_,
+                                                spectrum_size_, 16));
+  // |tmp_handle| is already in a non-matching state.
+  EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFix(tmp_handle,
+                                                near_u16_,
+                                                spectrum_size_,
+                                                0));
+  WebRtc_FreeDelayEstimator(tmp_handle);
+
+  // WebRtc_last_delay() should return -1 if we have a NULL pointer as |handle|.
+  EXPECT_EQ(-1, WebRtc_last_delay(NULL));
+
+  // Free any local memory if needed.
+  WebRtc_FreeDelayEstimator(handle);
+}
+
+TEST_F(DelayEstimatorTest, VerifyAllowedOffset) {
+  // Is set to zero by default.
+  EXPECT_EQ(0, WebRtc_get_allowed_offset(handle_));
+  for (int i = 1; i >= 0; i--) {
+    EXPECT_EQ(0, WebRtc_set_allowed_offset(handle_, i));
+    EXPECT_EQ(i, WebRtc_get_allowed_offset(handle_));
+    Init();
+    // Unaffected over a reset.
+    EXPECT_EQ(i, WebRtc_get_allowed_offset(handle_));
+  }
+}
+
+TEST_F(DelayEstimatorTest, VerifyEnableRobustValidation) {
+  // Disabled by default.
+  EXPECT_EQ(0, WebRtc_is_robust_validation_enabled(handle_));
+  for (size_t i = 0; i < kSizeEnable; ++i) {
+    EXPECT_EQ(0, WebRtc_enable_robust_validation(handle_, kEnable[i]));
+    EXPECT_EQ(kEnable[i], WebRtc_is_robust_validation_enabled(handle_));
+    Init();
+    // Unaffected over a reset.
+    EXPECT_EQ(kEnable[i], WebRtc_is_robust_validation_enabled(handle_));
+  }
+}
+
+TEST_F(DelayEstimatorTest, InitializedSpectrumAfterProcess) {
+  // In this test we verify that the mean spectra are initialized after first
+  // time we call WebRtc_AddFarSpectrum() and Process() respectively. The test
+  // also verifies the state is not left for zero spectra.
+  const float kZerosFloat[kSpectrumSize] = { 0.0 };
+  const uint16_t kZerosU16[kSpectrumSize] = { 0 };
+
+  // For floating point operations, process one frame and verify initialization
+  // flag.
+  Init();
+  EXPECT_EQ(0, WebRtc_AddFarSpectrumFloat(farend_handle_, kZerosFloat,
+                                          spectrum_size_));
+  EXPECT_EQ(0, farend_self_->far_spectrum_initialized);
+  EXPECT_EQ(0, WebRtc_AddFarSpectrumFloat(farend_handle_, far_f_,
+                                           spectrum_size_));
+  EXPECT_EQ(1, farend_self_->far_spectrum_initialized);
+  EXPECT_EQ(-2, WebRtc_DelayEstimatorProcessFloat(handle_, kZerosFloat,
+                                                  spectrum_size_));
+  EXPECT_EQ(0, self_->near_spectrum_initialized);
+  EXPECT_EQ(-2, WebRtc_DelayEstimatorProcessFloat(handle_, near_f_,
+                                                  spectrum_size_));
+  EXPECT_EQ(1, self_->near_spectrum_initialized);
+
+  // For fixed point operations, process one frame and verify initialization
+  // flag.
+  Init();
+  EXPECT_EQ(0, WebRtc_AddFarSpectrumFix(farend_handle_, kZerosU16,
+                                        spectrum_size_, 0));
+  EXPECT_EQ(0, farend_self_->far_spectrum_initialized);
+  EXPECT_EQ(0, WebRtc_AddFarSpectrumFix(farend_handle_, far_u16_,
+                                         spectrum_size_, 0));
+  EXPECT_EQ(1, farend_self_->far_spectrum_initialized);
+  EXPECT_EQ(-2, WebRtc_DelayEstimatorProcessFix(handle_, kZerosU16,
+                                                spectrum_size_, 0));
+  EXPECT_EQ(0, self_->near_spectrum_initialized);
+  EXPECT_EQ(-2, WebRtc_DelayEstimatorProcessFix(handle_, near_u16_,
+                                                spectrum_size_, 0));
+  EXPECT_EQ(1, self_->near_spectrum_initialized);
+}
+
+TEST_F(DelayEstimatorTest, CorrectLastDelay) {
+  // In this test we verify that we get the correct last delay upon valid call.
+  // We simply process the same data until we leave the initialized state
+  // (|last_delay| = -2). Then we compare the Process() output with the
+  // last_delay() call.
+
+  // TODO(bjornv): Update quality values for robust validation.
+  int last_delay = 0;
+  // Floating point operations.
+  Init();
+  for (int i = 0; i < 200; i++) {
+    EXPECT_EQ(0, WebRtc_AddFarSpectrumFloat(farend_handle_, far_f_,
+                                            spectrum_size_));
+    last_delay = WebRtc_DelayEstimatorProcessFloat(handle_, near_f_,
+                                                   spectrum_size_);
+    if (last_delay != -2) {
+      EXPECT_EQ(last_delay, WebRtc_last_delay(handle_));
+      if (!WebRtc_is_robust_validation_enabled(handle_)) {
+        EXPECT_FLOAT_EQ(7203.f / kMaxBitCountsQ9,
+                        WebRtc_last_delay_quality(handle_));
+      }
+      break;
+    }
+  }
+  // Verify that we have left the initialized state.
+  EXPECT_NE(-2, WebRtc_last_delay(handle_));
+  EXPECT_LT(0, WebRtc_last_delay_quality(handle_));
+
+  // Fixed point operations.
+  Init();
+  for (int i = 0; i < 200; i++) {
+    EXPECT_EQ(0, WebRtc_AddFarSpectrumFix(farend_handle_, far_u16_,
+                                          spectrum_size_, 0));
+    last_delay = WebRtc_DelayEstimatorProcessFix(handle_, near_u16_,
+                                                 spectrum_size_, 0);
+    if (last_delay != -2) {
+      EXPECT_EQ(last_delay, WebRtc_last_delay(handle_));
+      if (!WebRtc_is_robust_validation_enabled(handle_)) {
+        EXPECT_FLOAT_EQ(7203.f / kMaxBitCountsQ9,
+                        WebRtc_last_delay_quality(handle_));
+      }
+      break;
+    }
+  }
+  // Verify that we have left the initialized state.
+  EXPECT_NE(-2, WebRtc_last_delay(handle_));
+  EXPECT_LT(0, WebRtc_last_delay_quality(handle_));
+}
+
+TEST_F(DelayEstimatorTest, CorrectErrorReturnsOfBinaryEstimatorFarend) {
+  // In this test we verify correct output on invalid API calls to the Binary
+  // Delay Estimator (far-end part).
+
+  BinaryDelayEstimatorFarend* binary = binary_farend_;
+  // WebRtc_CreateBinaryDelayEstimatorFarend() should return -1 if the input
+  // history size is less than 2. This is to make sure the buffer shifting
+  // applies properly.
+  // Make sure we have a non-NULL value at start, so we can detect NULL after
+  // create failure.
+  binary = WebRtc_CreateBinaryDelayEstimatorFarend(1);
+  EXPECT_TRUE(binary == NULL);
+}
+
+TEST_F(DelayEstimatorTest, CorrectErrorReturnsOfBinaryEstimator) {
+  // In this test we verify correct output on invalid API calls to the Binary
+  // Delay Estimator.
+
+  BinaryDelayEstimator* binary_handle = binary_;
+  // WebRtc_CreateBinaryDelayEstimator() should return -1 if we have a NULL
+  // pointer as |binary_farend| or invalid input values. Upon failure, the
+  // |binary_handle| should be NULL.
+  // Make sure we have a non-NULL value at start, so we can detect NULL after
+  // create failure.
+  binary_handle = WebRtc_CreateBinaryDelayEstimator(NULL, kLookahead);
+  EXPECT_TRUE(binary_handle == NULL);
+  binary_handle = WebRtc_CreateBinaryDelayEstimator(binary_farend_, -1);
+  EXPECT_TRUE(binary_handle == NULL);
+}
+
+TEST_F(DelayEstimatorTest, MeanEstimatorFix) {
+  // In this test we verify that we update the mean value in correct direction
+  // only. With "direction" we mean increase or decrease.
+
+  int32_t mean_value = 4000;
+  int32_t mean_value_before = mean_value;
+  int32_t new_mean_value = mean_value * 2;
+
+  // Increasing |mean_value|.
+  WebRtc_MeanEstimatorFix(new_mean_value, 10, &mean_value);
+  EXPECT_LT(mean_value_before, mean_value);
+  EXPECT_GT(new_mean_value, mean_value);
+
+  // Decreasing |mean_value|.
+  new_mean_value = mean_value / 2;
+  mean_value_before = mean_value;
+  WebRtc_MeanEstimatorFix(new_mean_value, 10, &mean_value);
+  EXPECT_GT(mean_value_before, mean_value);
+  EXPECT_LT(new_mean_value, mean_value);
+}
+
+TEST_F(DelayEstimatorTest, ExactDelayEstimateMultipleNearSameSpectrum) {
+  // In this test we verify that we get the correct delay estimates if we shift
+  // the signal accordingly. We create two Binary Delay Estimators and feed them
+  // with the same signals, so they should output the same results.
+  // We verify both causal and non-causal delays.
+  // For these noise free signals, the robust validation should not have an
+  // impact, hence we turn robust validation on/off for both reference and
+  // delayed near end.
+
+  for (size_t i = 0; i < kSizeEnable; ++i) {
+    for (size_t j = 0; j < kSizeEnable; ++j) {
+      RunBinarySpectraTest(0, 0, kEnable[i], kEnable[j]);
+    }
+  }
+}
+
+TEST_F(DelayEstimatorTest, ExactDelayEstimateMultipleNearDifferentSpectrum) {
+  // In this test we use the same setup as above, but we now feed the two Binary
+  // Delay Estimators with different signals, so they should output different
+  // results.
+  // For these noise free signals, the robust validation should not have an
+  // impact, hence we turn robust validation on/off for both reference and
+  // delayed near end.
+
+  const int kNearOffset = 1;
+  for (size_t i = 0; i < kSizeEnable; ++i) {
+    for (size_t j = 0; j < kSizeEnable; ++j) {
+      RunBinarySpectraTest(kNearOffset, 0, kEnable[i], kEnable[j]);
+    }
+  }
+}
+
+TEST_F(DelayEstimatorTest, ExactDelayEstimateMultipleNearDifferentLookahead) {
+  // In this test we use the same setup as above, feeding the two Binary
+  // Delay Estimators with the same signals. The difference is that we create
+  // them with different lookahead.
+  // For these noise free signals, the robust validation should not have an
+  // impact, hence we turn robust validation on/off for both reference and
+  // delayed near end.
+
+  const int kLookaheadOffset = 1;
+  for (size_t i = 0; i < kSizeEnable; ++i) {
+    for (size_t j = 0; j < kSizeEnable; ++j) {
+      RunBinarySpectraTest(0, kLookaheadOffset, kEnable[i], kEnable[j]);
+    }
+  }
+}
+
+TEST_F(DelayEstimatorTest, AllowedOffsetNoImpactWhenRobustValidationDisabled) {
+  // The same setup as in ExactDelayEstimateMultipleNearSameSpectrum with the
+  // difference that |allowed_offset| is set for the reference binary delay
+  // estimator.
+
+  binary_->allowed_offset = 10;
+  RunBinarySpectraTest(0, 0, 0, 0);
+  binary_->allowed_offset = 0;  // Reset reference.
+}
+
+TEST_F(DelayEstimatorTest, VerifyLookaheadAtCreate) {
+  void* farend_handle = WebRtc_CreateDelayEstimatorFarend(kSpectrumSize,
+                                                          kMaxDelay);
+  ASSERT_TRUE(farend_handle != NULL);
+  void* handle = WebRtc_CreateDelayEstimator(farend_handle, kLookahead);
+  ASSERT_TRUE(handle != NULL);
+  EXPECT_EQ(kLookahead, WebRtc_lookahead(handle));
+  WebRtc_FreeDelayEstimator(handle);
+  WebRtc_FreeDelayEstimatorFarend(farend_handle);
+}
+
+TEST_F(DelayEstimatorTest, VerifyLookaheadIsSetAndKeptAfterInit) {
+  EXPECT_EQ(kLookahead, WebRtc_lookahead(handle_));
+  EXPECT_EQ(kDifferentLookahead,
+            WebRtc_set_lookahead(handle_, kDifferentLookahead));
+  EXPECT_EQ(kDifferentLookahead, WebRtc_lookahead(handle_));
+  EXPECT_EQ(0, WebRtc_InitDelayEstimatorFarend(farend_handle_));
+  EXPECT_EQ(kDifferentLookahead, WebRtc_lookahead(handle_));
+  EXPECT_EQ(0, WebRtc_InitDelayEstimator(handle_));
+  EXPECT_EQ(kDifferentLookahead, WebRtc_lookahead(handle_));
+}
+
+TEST_F(DelayEstimatorTest, VerifyHistorySizeAtCreate) {
+  EXPECT_EQ(kHistorySize, WebRtc_history_size(handle_));
+}
+
+TEST_F(DelayEstimatorTest, VerifyHistorySizeIsSetAndKeptAfterInit) {
+  EXPECT_EQ(kHistorySize, WebRtc_history_size(handle_));
+  EXPECT_EQ(kDifferentHistorySize,
+            WebRtc_set_history_size(handle_, kDifferentHistorySize));
+  EXPECT_EQ(kDifferentHistorySize, WebRtc_history_size(handle_));
+  EXPECT_EQ(0, WebRtc_InitDelayEstimator(handle_));
+  EXPECT_EQ(kDifferentHistorySize, WebRtc_history_size(handle_));
+  EXPECT_EQ(0, WebRtc_InitDelayEstimatorFarend(farend_handle_));
+  EXPECT_EQ(kDifferentHistorySize, WebRtc_history_size(handle_));
+}
+
+// TODO(bjornv): Add tests for SoftReset...(...).
+
+}  // namespace
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.c b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.c
new file mode 100644
index 00000000..270588f3
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.c
@@ -0,0 +1,485 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "webrtc/modules/audio_processing/utility/delay_estimator.h"
+#include "webrtc/modules/audio_processing/utility/delay_estimator_internal.h"
+#include "webrtc/system_wrappers/interface/compile_assert_c.h"
+
+// Only bit |kBandFirst| through bit |kBandLast| are processed and
+// |kBandFirst| - |kBandLast| must be < 32.
+enum { kBandFirst = 12 };
+enum { kBandLast = 43 };
+
+static __inline uint32_t SetBit(uint32_t in, int pos) {
+  uint32_t mask = (1 << pos);
+  uint32_t out = (in | mask);
+
+  return out;
+}
+
+// Calculates the mean recursively. Same version as WebRtc_MeanEstimatorFix(),
+// but for float.
+//
+// Inputs:
+//    - new_value             : New additional value.
+//    - scale                 : Scale for smoothing (should be less than 1.0).
+//
+// Input/Output:
+//    - mean_value            : Pointer to the mean value for updating.
+//
+static void MeanEstimatorFloat(float new_value,
+                               float scale,
+                               float* mean_value) {
+  assert(scale < 1.0f);
+  *mean_value += (new_value - *mean_value) * scale;
+}
+
+// Computes the binary spectrum by comparing the input |spectrum| with a
+// |threshold_spectrum|. Float and fixed point versions.
+//
+// Inputs:
+//      - spectrum            : Spectrum of which the binary spectrum should be
+//                              calculated.
+//      - threshold_spectrum  : Threshold spectrum with which the input
+//                              spectrum is compared.
+// Return:
+//      - out                 : Binary spectrum.
+//
+static uint32_t BinarySpectrumFix(const uint16_t* spectrum,
+                                  SpectrumType* threshold_spectrum,
+                                  int q_domain,
+                                  int* threshold_initialized) {
+  int i = kBandFirst;
+  uint32_t out = 0;
+
+  assert(q_domain < 16);
+
+  if (!(*threshold_initialized)) {
+    // Set the |threshold_spectrum| to half the input |spectrum| as starting
+    // value. This speeds up the convergence.
+    for (i = kBandFirst; i <= kBandLast; i++) {
+      if (spectrum[i] > 0) {
+        // Convert input spectrum from Q(|q_domain|) to Q15.
+        int32_t spectrum_q15 = ((int32_t) spectrum[i]) << (15 - q_domain);
+        threshold_spectrum[i].int32_ = (spectrum_q15 >> 1);
+        *threshold_initialized = 1;
+      }
+    }
+  }
+  for (i = kBandFirst; i <= kBandLast; i++) {
+    // Convert input spectrum from Q(|q_domain|) to Q15.
+    int32_t spectrum_q15 = ((int32_t) spectrum[i]) << (15 - q_domain);
+    // Update the |threshold_spectrum|.
+    WebRtc_MeanEstimatorFix(spectrum_q15, 6, &(threshold_spectrum[i].int32_));
+    // Convert |spectrum| at current frequency bin to a binary value.
+    if (spectrum_q15 > threshold_spectrum[i].int32_) {
+      out = SetBit(out, i - kBandFirst);
+    }
+  }
+
+  return out;
+}
+
+static uint32_t BinarySpectrumFloat(const float* spectrum,
+                                    SpectrumType* threshold_spectrum,
+                                    int* threshold_initialized) {
+  int i = kBandFirst;
+  uint32_t out = 0;
+  const float kScale = 1 / 64.0;
+
+  if (!(*threshold_initialized)) {
+    // Set the |threshold_spectrum| to half the input |spectrum| as starting
+    // value. This speeds up the convergence.
+    for (i = kBandFirst; i <= kBandLast; i++) {
+      if (spectrum[i] > 0.0f) {
+        threshold_spectrum[i].float_ = (spectrum[i] / 2);
+        *threshold_initialized = 1;
+      }
+    }
+  }
+
+  for (i = kBandFirst; i <= kBandLast; i++) {
+    // Update the |threshold_spectrum|.
+    MeanEstimatorFloat(spectrum[i], kScale, &(threshold_spectrum[i].float_));
+    // Convert |spectrum| at current frequency bin to a binary value.
+    if (spectrum[i] > threshold_spectrum[i].float_) {
+      out = SetBit(out, i - kBandFirst);
+    }
+  }
+
+  return out;
+}
+
+void WebRtc_FreeDelayEstimatorFarend(void* handle) {
+  DelayEstimatorFarend* self = (DelayEstimatorFarend*) handle;
+
+  if (handle == NULL) {
+    return;
+  }
+
+  free(self->mean_far_spectrum);
+  self->mean_far_spectrum = NULL;
+
+  WebRtc_FreeBinaryDelayEstimatorFarend(self->binary_farend);
+  self->binary_farend = NULL;
+
+  free(self);
+}
+
+void* WebRtc_CreateDelayEstimatorFarend(int spectrum_size, int history_size) {
+  DelayEstimatorFarend* self = NULL;
+
+  // Check if the sub band used in the delay estimation is small enough to fit
+  // the binary spectra in a uint32_t.
+  COMPILE_ASSERT(kBandLast - kBandFirst < 32);
+
+  if (spectrum_size >= kBandLast) {
+    self = malloc(sizeof(DelayEstimatorFarend));
+  }
+
+  if (self != NULL) {
+    int memory_fail = 0;
+
+    // Allocate memory for the binary far-end spectrum handling.
+    self->binary_farend = WebRtc_CreateBinaryDelayEstimatorFarend(history_size);
+    memory_fail |= (self->binary_farend == NULL);
+
+    // Allocate memory for spectrum buffers.
+    self->mean_far_spectrum = malloc(spectrum_size * sizeof(SpectrumType));
+    memory_fail |= (self->mean_far_spectrum == NULL);
+
+    self->spectrum_size = spectrum_size;
+
+    if (memory_fail) {
+      WebRtc_FreeDelayEstimatorFarend(self);
+      self = NULL;
+    }
+  }
+
+  return self;
+}
+
+int WebRtc_InitDelayEstimatorFarend(void* handle) {
+  DelayEstimatorFarend* self = (DelayEstimatorFarend*) handle;
+
+  if (self == NULL) {
+    return -1;
+  }
+
+  // Initialize far-end part of binary delay estimator.
+  WebRtc_InitBinaryDelayEstimatorFarend(self->binary_farend);
+
+  // Set averaged far and near end spectra to zero.
+  memset(self->mean_far_spectrum, 0,
+         sizeof(SpectrumType) * self->spectrum_size);
+  // Reset initialization indicators.
+  self->far_spectrum_initialized = 0;
+
+  return 0;
+}
+
+void WebRtc_SoftResetDelayEstimatorFarend(void* handle, int delay_shift) {
+  DelayEstimatorFarend* self = (DelayEstimatorFarend*) handle;
+  assert(self != NULL);
+  WebRtc_SoftResetBinaryDelayEstimatorFarend(self->binary_farend, delay_shift);
+}
+
+int WebRtc_AddFarSpectrumFix(void* handle,
+                             const uint16_t* far_spectrum,
+                             int spectrum_size,
+                             int far_q) {
+  DelayEstimatorFarend* self = (DelayEstimatorFarend*) handle;
+  uint32_t binary_spectrum = 0;
+
+  if (self == NULL) {
+    return -1;
+  }
+  if (far_spectrum == NULL) {
+    // Empty far end spectrum.
+    return -1;
+  }
+  if (spectrum_size != self->spectrum_size) {
+    // Data sizes don't match.
+    return -1;
+  }
+  if (far_q > 15) {
+    // If |far_q| is larger than 15 we cannot guarantee no wrap around.
+    return -1;
+  }
+
+  // Get binary spectrum.
+  binary_spectrum = BinarySpectrumFix(far_spectrum, self->mean_far_spectrum,
+                                      far_q, &(self->far_spectrum_initialized));
+  WebRtc_AddBinaryFarSpectrum(self->binary_farend, binary_spectrum);
+
+  return 0;
+}
+
+int WebRtc_AddFarSpectrumFloat(void* handle,
+                               const float* far_spectrum,
+                               int spectrum_size) {
+  DelayEstimatorFarend* self = (DelayEstimatorFarend*) handle;
+  uint32_t binary_spectrum = 0;
+
+  if (self == NULL) {
+    return -1;
+  }
+  if (far_spectrum == NULL) {
+    // Empty far end spectrum.
+    return -1;
+  }
+  if (spectrum_size != self->spectrum_size) {
+    // Data sizes don't match.
+    return -1;
+  }
+
+  // Get binary spectrum.
+  binary_spectrum = BinarySpectrumFloat(far_spectrum, self->mean_far_spectrum,
+                                        &(self->far_spectrum_initialized));
+  WebRtc_AddBinaryFarSpectrum(self->binary_farend, binary_spectrum);
+
+  return 0;
+}
+
+void WebRtc_FreeDelayEstimator(void* handle) {
+  DelayEstimator* self = (DelayEstimator*) handle;
+
+  if (handle == NULL) {
+    return;
+  }
+
+  free(self->mean_near_spectrum);
+  self->mean_near_spectrum = NULL;
+
+  WebRtc_FreeBinaryDelayEstimator(self->binary_handle);
+  self->binary_handle = NULL;
+
+  free(self);
+}
+
+void* WebRtc_CreateDelayEstimator(void* farend_handle, int max_lookahead) {
+  DelayEstimator* self = NULL;
+  DelayEstimatorFarend* farend = (DelayEstimatorFarend*) farend_handle;
+
+  if (farend_handle != NULL) {
+    self = malloc(sizeof(DelayEstimator));
+  }
+
+  if (self != NULL) {
+    int memory_fail = 0;
+
+    // Allocate memory for the farend spectrum handling.
+    self->binary_handle =
+        WebRtc_CreateBinaryDelayEstimator(farend->binary_farend, max_lookahead);
+    memory_fail |= (self->binary_handle == NULL);
+
+    // Allocate memory for spectrum buffers.
+    self->mean_near_spectrum = malloc(farend->spectrum_size *
+                                      sizeof(SpectrumType));
+    memory_fail |= (self->mean_near_spectrum == NULL);
+
+    self->spectrum_size = farend->spectrum_size;
+
+    if (memory_fail) {
+      WebRtc_FreeDelayEstimator(self);
+      self = NULL;
+    }
+  }
+
+  return self;
+}
+
+int WebRtc_InitDelayEstimator(void* handle) {
+  DelayEstimator* self = (DelayEstimator*) handle;
+
+  if (self == NULL) {
+    return -1;
+  }
+
+  // Initialize binary delay estimator.
+  WebRtc_InitBinaryDelayEstimator(self->binary_handle);
+
+  // Set averaged far and near end spectra to zero.
+  memset(self->mean_near_spectrum, 0,
+         sizeof(SpectrumType) * self->spectrum_size);
+  // Reset initialization indicators.
+  self->near_spectrum_initialized = 0;
+
+  return 0;
+}
+
+int WebRtc_SoftResetDelayEstimator(void* handle, int delay_shift) {
+  DelayEstimator* self = (DelayEstimator*) handle;
+  assert(self != NULL);
+  return WebRtc_SoftResetBinaryDelayEstimator(self->binary_handle, delay_shift);
+}
+
+int WebRtc_set_history_size(void* handle, int history_size) {
+  DelayEstimator* self = handle;
+
+  if ((self == NULL) || (history_size <= 1)) {
+    return -1;
+  }
+  return WebRtc_AllocateHistoryBufferMemory(self->binary_handle, history_size);
+}
+
+int WebRtc_history_size(const void* handle) {
+  const DelayEstimator* self = handle;
+
+  if (self == NULL) {
+    return -1;
+  }
+  if (self->binary_handle->farend->history_size !=
+      self->binary_handle->history_size) {
+    // Non matching history sizes.
+    return -1;
+  }
+  return self->binary_handle->history_size;
+}
+
+int WebRtc_set_lookahead(void* handle, int lookahead) {
+  DelayEstimator* self = (DelayEstimator*) handle;
+  assert(self != NULL);
+  assert(self->binary_handle != NULL);
+  if ((lookahead > self->binary_handle->near_history_size - 1) ||
+      (lookahead < 0)) {
+    return -1;
+  }
+  self->binary_handle->lookahead = lookahead;
+  return self->binary_handle->lookahead;
+}
+
+int WebRtc_lookahead(void* handle) {
+  DelayEstimator* self = (DelayEstimator*) handle;
+  assert(self != NULL);
+  assert(self->binary_handle != NULL);
+  return self->binary_handle->lookahead;
+}
+
+int WebRtc_set_allowed_offset(void* handle, int allowed_offset) {
+  DelayEstimator* self = (DelayEstimator*) handle;
+
+  if ((self == NULL) || (allowed_offset < 0)) {
+    return -1;
+  }
+  self->binary_handle->allowed_offset = allowed_offset;
+  return 0;
+}
+
+int WebRtc_get_allowed_offset(const void* handle) {
+  const DelayEstimator* self = (const DelayEstimator*) handle;
+
+  if (self == NULL) {
+    return -1;
+  }
+  return self->binary_handle->allowed_offset;
+}
+
+int WebRtc_enable_robust_validation(void* handle, int enable) {
+  DelayEstimator* self = (DelayEstimator*) handle;
+
+  if (self == NULL) {
+    return -1;
+  }
+  if ((enable < 0) || (enable > 1)) {
+    return -1;
+  }
+  assert(self->binary_handle != NULL);
+  self->binary_handle->robust_validation_enabled = enable;
+  return 0;
+}
+
+int WebRtc_is_robust_validation_enabled(const void* handle) {
+  const DelayEstimator* self = (const DelayEstimator*) handle;
+
+  if (self == NULL) {
+    return -1;
+  }
+  return self->binary_handle->robust_validation_enabled;
+}
+
+int WebRtc_DelayEstimatorProcessFix(void* handle,
+                                    const uint16_t* near_spectrum,
+                                    int spectrum_size,
+                                    int near_q) {
+  DelayEstimator* self = (DelayEstimator*) handle;
+  uint32_t binary_spectrum = 0;
+
+  if (self == NULL) {
+    return -1;
+  }
+  if (near_spectrum == NULL) {
+    // Empty near end spectrum.
+    return -1;
+  }
+  if (spectrum_size != self->spectrum_size) {
+    // Data sizes don't match.
+    return -1;
+  }
+  if (near_q > 15) {
+    // If |near_q| is larger than 15 we cannot guarantee no wrap around.
+    return -1;
+  }
+
+  // Get binary spectra.
+  binary_spectrum = BinarySpectrumFix(near_spectrum,
+                                      self->mean_near_spectrum,
+                                      near_q,
+                                      &(self->near_spectrum_initialized));
+
+  return WebRtc_ProcessBinarySpectrum(self->binary_handle, binary_spectrum);
+}
+
+int WebRtc_DelayEstimatorProcessFloat(void* handle,
+                                      const float* near_spectrum,
+                                      int spectrum_size) {
+  DelayEstimator* self = (DelayEstimator*) handle;
+  uint32_t binary_spectrum = 0;
+
+  if (self == NULL) {
+    return -1;
+  }
+  if (near_spectrum == NULL) {
+    // Empty near end spectrum.
+    return -1;
+  }
+  if (spectrum_size != self->spectrum_size) {
+    // Data sizes don't match.
+    return -1;
+  }
+
+  // Get binary spectrum.
+  binary_spectrum = BinarySpectrumFloat(near_spectrum, self->mean_near_spectrum,
+                                        &(self->near_spectrum_initialized));
+
+  return WebRtc_ProcessBinarySpectrum(self->binary_handle, binary_spectrum);
+}
+
+int WebRtc_last_delay(void* handle) {
+  DelayEstimator* self = (DelayEstimator*) handle;
+
+  if (self == NULL) {
+    return -1;
+  }
+
+  return WebRtc_binary_last_delay(self->binary_handle);
+}
+
+float WebRtc_last_delay_quality(void* handle) {
+  DelayEstimator* self = (DelayEstimator*) handle;
+  assert(self != NULL);
+  return WebRtc_binary_last_delay_quality(self->binary_handle);
+}
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h
new file mode 100644
index 00000000..fdadebeb
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h
@@ -0,0 +1,244 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+// Performs delay estimation on block by block basis.
+// The return value is  0 - OK and -1 - Error, unless otherwise stated.
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_WRAPPER_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_WRAPPER_H_
+
+#include "webrtc/typedefs.h"
+
+// Releases the memory allocated by WebRtc_CreateDelayEstimatorFarend(...)
+void WebRtc_FreeDelayEstimatorFarend(void* handle);
+
+// Allocates the memory needed by the far-end part of the delay estimation. The
+// memory needs to be initialized separately through
+// WebRtc_InitDelayEstimatorFarend(...).
+//
+// Inputs:
+//  - spectrum_size     : Size of the spectrum used both in far-end and
+//                        near-end. Used to allocate memory for spectrum
+//                        specific buffers.
+//  - history_size      : The far-end history buffer size. A change in buffer
+//                        size can be forced with WebRtc_set_history_size().
+//                        Note that the maximum delay which can be estimated is
+//                        determined together with WebRtc_set_lookahead().
+//
+// Return value:
+//  - void*             : Created |handle|. If the memory can't be allocated or
+//                        if any of the input parameters are invalid NULL is
+//                        returned.
+void* WebRtc_CreateDelayEstimatorFarend(int spectrum_size, int history_size);
+
+// Initializes the far-end part of the delay estimation instance returned by
+// WebRtc_CreateDelayEstimatorFarend(...)
+int WebRtc_InitDelayEstimatorFarend(void* handle);
+
+// Soft resets the far-end part of the delay estimation instance returned by
+// WebRtc_CreateDelayEstimatorFarend(...).
+// Input:
+//      - delay_shift   : The amount of blocks to shift history buffers.
+void WebRtc_SoftResetDelayEstimatorFarend(void* handle, int delay_shift);
+
+// Adds the far-end spectrum to the far-end history buffer. This spectrum is
+// used as reference when calculating the delay using
+// WebRtc_ProcessSpectrum().
+//
+// Inputs:
+//    - far_spectrum    : Far-end spectrum.
+//    - spectrum_size   : The size of the data arrays (same for both far- and
+//                        near-end).
+//    - far_q           : The Q-domain of the far-end data.
+//
+// Output:
+//    - handle          : Updated far-end instance.
+//
+int WebRtc_AddFarSpectrumFix(void* handle,
+                             const uint16_t* far_spectrum,
+                             int spectrum_size,
+                             int far_q);
+
+// See WebRtc_AddFarSpectrumFix() for description.
+int WebRtc_AddFarSpectrumFloat(void* handle,
+                               const float* far_spectrum,
+                               int spectrum_size);
+
+// Releases the memory allocated by WebRtc_CreateDelayEstimator(...)
+void WebRtc_FreeDelayEstimator(void* handle);
+
+// Allocates the memory needed by the delay estimation. The memory needs to be
+// initialized separately through WebRtc_InitDelayEstimator(...).
+//
+// Inputs:
+//      - farend_handle : Pointer to the far-end part of the delay estimation
+//                        instance created prior to this call using
+//                        WebRtc_CreateDelayEstimatorFarend().
+//
+//                        Note that WebRtc_CreateDelayEstimator does not take
+//                        ownership of |farend_handle|, which has to be torn
+//                        down properly after this instance.
+//
+//      - max_lookahead : Maximum amount of non-causal lookahead allowed. The
+//                        actual amount of lookahead used can be controlled by
+//                        WebRtc_set_lookahead(...). The default |lookahead| is
+//                        set to |max_lookahead| at create time. Use
+//                        WebRtc_set_lookahead(...) before start if a different
+//                        value is desired.
+//
+//                        Using lookahead can detect cases in which a near-end
+//                        signal occurs before the corresponding far-end signal.
+//                        It will delay the estimate for the current block by an
+//                        equal amount, and the returned values will be offset
+//                        by it.
+//
+//                        A value of zero is the typical no-lookahead case.
+//                        This also represents the minimum delay which can be
+//                        estimated.
+//
+//                        Note that the effective range of delay estimates is
+//                        [-|lookahead|,... ,|history_size|-|lookahead|)
+//                        where |history_size| is set through
+//                        WebRtc_set_history_size().
+//
+// Return value:
+//      - void*         : Created |handle|. If the memory can't be allocated or
+//                        if any of the input parameters are invalid NULL is
+//                        returned.
+void* WebRtc_CreateDelayEstimator(void* farend_handle, int max_lookahead);
+
+// Initializes the delay estimation instance returned by
+// WebRtc_CreateDelayEstimator(...)
+int WebRtc_InitDelayEstimator(void* handle);
+
+// Soft resets the delay estimation instance returned by
+// WebRtc_CreateDelayEstimator(...)
+// Input:
+//      - delay_shift   : The amount of blocks to shift history buffers.
+//
+// Return value:
+//      - actual_shifts : The actual number of shifts performed.
+int WebRtc_SoftResetDelayEstimator(void* handle, int delay_shift);
+
+// Sets the effective |history_size| used. Valid values from 2. We simply need
+// at least two delays to compare to perform an estimate. If |history_size| is
+// changed, buffers are reallocated filling in with zeros if necessary.
+// Note that changing the |history_size| affects both buffers in far-end and
+// near-end. Hence it is important to change all DelayEstimators that use the
+// same reference far-end, to the same |history_size| value.
+// Inputs:
+//  - handle            : Pointer to the delay estimation instance.
+//  - history_size      : Effective history size to be used.
+// Return value:
+//  - new_history_size  : The new history size used. If the memory was not able
+//                        to be allocated 0 is returned.
+int WebRtc_set_history_size(void* handle, int history_size);
+
+// Returns the history_size currently used.
+// Input:
+//      - handle        : Pointer to the delay estimation instance.
+int WebRtc_history_size(const void* handle);
+
+// Sets the amount of |lookahead| to use. Valid values are [0, max_lookahead]
+// where |max_lookahead| was set at create time through
+// WebRtc_CreateDelayEstimator(...).
+//
+// Input:
+//      - handle        : Pointer to the delay estimation instance.
+//      - lookahead     : The amount of lookahead to be used.
+//
+// Return value:
+//      - new_lookahead : The actual amount of lookahead set, unless |handle| is
+//                        a NULL pointer or |lookahead| is invalid, for which an
+//                        error is returned.
+int WebRtc_set_lookahead(void* handle, int lookahead);
+
+// Returns the amount of lookahead we currently use.
+// Input:
+//      - handle        : Pointer to the delay estimation instance.
+int WebRtc_lookahead(void* handle);
+
+// Sets the |allowed_offset| used in the robust validation scheme.  If the
+// delay estimator is used in an echo control component, this parameter is
+// related to the filter length.  In principle |allowed_offset| should be set to
+// the echo control filter length minus the expected echo duration, i.e., the
+// delay offset the echo control can handle without quality regression.  The
+// default value, used if not set manually, is zero.  Note that |allowed_offset|
+// has to be non-negative.
+// Inputs:
+//  - handle            : Pointer to the delay estimation instance.
+//  - allowed_offset    : The amount of delay offset, measured in partitions,
+//                        the echo control filter can handle.
+int WebRtc_set_allowed_offset(void* handle, int allowed_offset);
+
+// Returns the |allowed_offset| in number of partitions.
+int WebRtc_get_allowed_offset(const void* handle);
+
+// Enables/Disables a robust validation functionality in the delay estimation.
+// This is by default set to disabled at create time.  The state is preserved
+// over a reset.
+// Inputs:
+//      - handle        : Pointer to the delay estimation instance.
+//      - enable        : Enable (1) or disable (0) this feature.
+int WebRtc_enable_robust_validation(void* handle, int enable);
+
+// Returns 1 if robust validation is enabled and 0 if disabled.
+int WebRtc_is_robust_validation_enabled(const void* handle);
+
+// Estimates and returns the delay between the far-end and near-end blocks. The
+// value will be offset by the lookahead (i.e. the lookahead should be
+// subtracted from the returned value).
+// Inputs:
+//      - handle        : Pointer to the delay estimation instance.
+//      - near_spectrum : Pointer to the near-end spectrum data of the current
+//                        block.
+//      - spectrum_size : The size of the data arrays (same for both far- and
+//                        near-end).
+//      - near_q        : The Q-domain of the near-end data.
+//
+// Output:
+//      - handle        : Updated instance.
+//
+// Return value:
+//      - delay         :  >= 0 - Calculated delay value.
+//                        -1    - Error.
+//                        -2    - Insufficient data for estimation.
+int WebRtc_DelayEstimatorProcessFix(void* handle,
+                                    const uint16_t* near_spectrum,
+                                    int spectrum_size,
+                                    int near_q);
+
+// See WebRtc_DelayEstimatorProcessFix() for description.
+int WebRtc_DelayEstimatorProcessFloat(void* handle,
+                                      const float* near_spectrum,
+                                      int spectrum_size);
+
+// Returns the last calculated delay updated by the function
+// WebRtc_DelayEstimatorProcess(...).
+//
+// Input:
+//      - handle        : Pointer to the delay estimation instance.
+//
+// Return value:
+//      - delay         : >= 0  - Last calculated delay value.
+//                        -1    - Error.
+//                        -2    - Insufficient data for estimation.
+int WebRtc_last_delay(void* handle);
+
+// Returns the estimation quality/probability of the last calculated delay
+// updated by the function WebRtc_DelayEstimatorProcess(...). The estimation
+// quality is a value in the interval [0, 1]. The higher the value, the better
+// the quality.
+//
+// Return value:
+//      - delay_quality : >= 0  - Estimation quality of last calculated delay.
+float WebRtc_last_delay_quality(void* handle);
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_WRAPPER_H_
diff --git a/third_party/webrtc/src/webrtc/system_wrappers/interface/compile_assert_c.h b/third_party/webrtc/src/webrtc/system_wrappers/interface/compile_assert_c.h
new file mode 100644
index 00000000..dbb5292d
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/system_wrappers/interface/compile_assert_c.h
@@ -0,0 +1,24 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_SYSTEM_WRAPPERS_INTERFACE_COMPILE_ASSERT_H_
+#define WEBRTC_SYSTEM_WRAPPERS_INTERFACE_COMPILE_ASSERT_H_
+
+#ifdef __cplusplus
+#error "Only use this for C files. For C++, use static_assert."
+#endif
+
+// Use this macro to verify at compile time that certain restrictions are met.
+// The argument is the boolean expression to evaluate.
+// Example:
+//   COMPILE_ASSERT(sizeof(foo) < 128);
+#define COMPILE_ASSERT(expression) switch (0) {case 0: case expression:;}
+
+#endif  // WEBRTC_SYSTEM_WRAPPERS_INTERFACE_COMPILE_ASSERT_H_
diff --git a/third_party/webrtc/src/webrtc/system_wrappers/interface/cpu_features_wrapper.h b/third_party/webrtc/src/webrtc/system_wrappers/interface/cpu_features_wrapper.h
new file mode 100644
index 00000000..5697c491
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/system_wrappers/interface/cpu_features_wrapper.h
@@ -0,0 +1,51 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_SYSTEM_WRAPPERS_INTERFACE_CPU_FEATURES_WRAPPER_H_
+#define WEBRTC_SYSTEM_WRAPPERS_INTERFACE_CPU_FEATURES_WRAPPER_H_
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#include "webrtc/typedefs.h"
+
+// List of features in x86.
+typedef enum {
+  kSSE2,
+  kSSE3
+} CPUFeature;
+
+// List of features in ARM.
+enum {
+  kCPUFeatureARMv7       = (1 << 0),
+  kCPUFeatureVFPv3       = (1 << 1),
+  kCPUFeatureNEON        = (1 << 2),
+  kCPUFeatureLDREXSTREX  = (1 << 3)
+};
+
+typedef int (*WebRtc_CPUInfo)(CPUFeature feature);
+
+// Returns true if the CPU supports the feature.
+extern WebRtc_CPUInfo WebRtc_GetCPUInfo;
+
+// No CPU feature is available => straight C path.
+extern WebRtc_CPUInfo WebRtc_GetCPUInfoNoASM;
+
+// Return the features in an ARM device.
+// It detects the features in the hardware platform, and returns supported
+// values in the above enum definition as a bitmask.
+extern uint64_t WebRtc_GetCPUFeaturesARM(void);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  // extern "C"
+#endif
+
+#endif // WEBRTC_SYSTEM_WRAPPERS_INTERFACE_CPU_FEATURES_WRAPPER_H_
diff --git a/third_party/webrtc/src/webrtc/system_wrappers/source/cpu_features.cc b/third_party/webrtc/src/webrtc/system_wrappers/source/cpu_features.cc
new file mode 100644
index 00000000..b924d773
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/system_wrappers/source/cpu_features.cc
@@ -0,0 +1,72 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+// Parts of this file derived from Chromium's base/cpu.cc.
+
+#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
+
+#if defined(WEBRTC_ARCH_X86_FAMILY) && defined(_MSC_VER)
+#include <intrin.h>
+#endif
+
+#include "webrtc/typedefs.h"
+
+// No CPU feature is available => straight C path.
+int GetCPUInfoNoASM(CPUFeature feature) {
+  (void)feature;
+  return 0;
+}
+
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+#ifndef _MSC_VER
+// Intrinsic for "cpuid".
+#if defined(__pic__) && defined(__i386__)
+static inline void __cpuid(int cpu_info[4], int info_type) {
+  __asm__ volatile(
+    "mov %%ebx, %%edi\n"
+    "cpuid\n"
+    "xchg %%edi, %%ebx\n"
+    : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
+    : "a"(info_type));
+}
+#else
+static inline void __cpuid(int cpu_info[4], int info_type) {
+  __asm__ volatile(
+    "cpuid\n"
+    : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
+    : "a"(info_type));
+}
+#endif
+#endif  // _MSC_VER
+#endif  // WEBRTC_ARCH_X86_FAMILY
+
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+// Actual feature detection for x86.
+static int GetCPUInfo(CPUFeature feature) {
+  int cpu_info[4];
+  __cpuid(cpu_info, 1);
+  if (feature == kSSE2) {
+    return 0 != (cpu_info[3] & 0x04000000);
+  }
+  if (feature == kSSE3) {
+    return 0 != (cpu_info[2] & 0x00000001);
+  }
+  return 0;
+}
+#else
+// Default to straight C for other platforms.
+static int GetCPUInfo(CPUFeature feature) {
+  (void)feature;
+  return 0;
+}
+#endif
+
+WebRtc_CPUInfo WebRtc_GetCPUInfo = GetCPUInfo;
+WebRtc_CPUInfo WebRtc_GetCPUInfoNoASM = GetCPUInfoNoASM;
diff --git a/third_party/webrtc/src/webrtc/system_wrappers/source/cpu_features_android.c b/third_party/webrtc/src/webrtc/system_wrappers/source/cpu_features_android.c
new file mode 100644
index 00000000..0cb3a6c5
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/system_wrappers/source/cpu_features_android.c
@@ -0,0 +1,15 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <cpu-features.h>
+
+uint64_t WebRtc_GetCPUFeaturesARM(void) {
+  return android_getCpuFeatures();
+}
diff --git a/third_party/webrtc/src/webrtc/typedefs.h b/third_party/webrtc/src/webrtc/typedefs.h
new file mode 100644
index 00000000..3034c7e7
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/typedefs.h
@@ -0,0 +1,122 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+// This file contains platform-specific typedefs and defines.
+// Much of it is derived from Chromium's build/build_config.h.
+
+#ifndef WEBRTC_TYPEDEFS_H_
+#define WEBRTC_TYPEDEFS_H_
+
+// Processor architecture detection.  For more info on what's defined, see:
+//   http://msdn.microsoft.com/en-us/library/b0084kay.aspx
+//   http://www.agner.org/optimize/calling_conventions.pdf
+//   or with gcc, run: "echo | gcc -E -dM -"
+#if defined(_M_X64) || defined(__x86_64__)
+#define WEBRTC_ARCH_X86_FAMILY
+#define WEBRTC_ARCH_X86_64
+#define WEBRTC_ARCH_64_BITS
+#define WEBRTC_ARCH_LITTLE_ENDIAN
+#elif defined(__aarch64__)
+#define WEBRTC_ARCH_64_BITS
+#define WEBRTC_ARCH_LITTLE_ENDIAN
+#elif defined(_M_IX86) || defined(__i386__)
+#define WEBRTC_ARCH_X86_FAMILY
+#define WEBRTC_ARCH_X86
+#define WEBRTC_ARCH_32_BITS
+#define WEBRTC_ARCH_LITTLE_ENDIAN
+#elif defined(__ARMEL__)
+// TODO(ajm): We'd prefer to control platform defines here, but this is
+// currently provided by the Android makefiles. Commented to avoid duplicate
+// definition warnings.
+//#define WEBRTC_ARCH_ARM
+// TODO(ajm): Chromium uses the following two defines. Should we switch?
+//#define WEBRTC_ARCH_ARM_FAMILY
+//#define WEBRTC_ARCH_ARMEL
+#define WEBRTC_ARCH_32_BITS
+#define WEBRTC_ARCH_LITTLE_ENDIAN
+#elif defined(__MIPSEL__)
+#define WEBRTC_ARCH_32_BITS
+#define WEBRTC_ARCH_LITTLE_ENDIAN
+#elif defined(__pnacl__)
+#define WEBRTC_ARCH_32_BITS
+#define WEBRTC_ARCH_LITTLE_ENDIAN
+#else
+#error Please add support for your architecture in typedefs.h
+#endif
+
+#if !(defined(WEBRTC_ARCH_LITTLE_ENDIAN) ^ defined(WEBRTC_ARCH_BIG_ENDIAN))
+#error Define either WEBRTC_ARCH_LITTLE_ENDIAN or WEBRTC_ARCH_BIG_ENDIAN
+#endif
+
+// TODO(zhongwei.yao): WEBRTC_CPU_DETECTION is only used in one place; we should
+// probably just remove it.
+#if (defined(WEBRTC_ARCH_X86_FAMILY) && !defined(__SSE2__)) || \
+    defined(WEBRTC_DETECT_NEON)
+#define WEBRTC_CPU_DETECTION
+#endif
+
+#if !defined(_MSC_VER)
+#include <stdint.h>
+#else
+// Define C99 equivalent types, since pre-2010 MSVC doesn't provide stdint.h.
+typedef signed char         int8_t;
+typedef signed short        int16_t;
+typedef signed int          int32_t;
+typedef __int64             int64_t;
+typedef unsigned char       uint8_t;
+typedef unsigned short      uint16_t;
+typedef unsigned int        uint32_t;
+typedef unsigned __int64    uint64_t;
+#endif
+
+// Annotate a function indicating the caller must examine the return value.
+// Use like:
+//   int foo() WARN_UNUSED_RESULT;
+// TODO(ajm): Hack to avoid multiple definitions until the base/ of webrtc and
+// libjingle are merged.
+#if !defined(WARN_UNUSED_RESULT)
+#if defined(__GNUC__)
+#define WARN_UNUSED_RESULT __attribute__((warn_unused_result))
+#else
+#define WARN_UNUSED_RESULT
+#endif
+#endif  // WARN_UNUSED_RESULT
+
+// Put after a variable that might not be used, to prevent compiler warnings:
+//   int result ATTRIBUTE_UNUSED = DoSomething();
+//   assert(result == 17);
+#ifndef ATTRIBUTE_UNUSED
+#if defined(__GNUC__) || defined(__clang__)
+#define ATTRIBUTE_UNUSED __attribute__((unused))
+#else
+#define ATTRIBUTE_UNUSED
+#endif
+#endif
+
+// Macro to be used for switch-case fallthrough (required for enabling
+// -Wimplicit-fallthrough warning on Clang).
+#ifndef FALLTHROUGH
+#if defined(__clang__)
+#define FALLTHROUGH() [[clang::fallthrough]]
+#else
+#define FALLTHROUGH() do { } while (0)
+#endif
+#endif
+
+// Annotate a function that will not return control flow to the caller.
+#if defined(_MSC_VER)
+#define NO_RETURN __declspec(noreturn)
+#elif defined(__GNUC__)
+#define NO_RETURN __attribute__((noreturn))
+#else
+#define NO_RETURN
+#endif
+
+#endif  // WEBRTC_TYPEDEFS_H_
author	Liong Sauw Ming <ming@teluu.com>	2016-08-25 01:36:33 +0000
committer	Liong Sauw Ming <ming@teluu.com>	2016-08-25 01:36:33 +0000
commit	78d67a9205358ec4f5c38fa4191f7042d3983047 (patch)
tree	8af5992cbaec463de76ed49c8538fc7a69ff398a /third_party/webrtc/src/webrtc
parent	e9fc0d90805002cbca667c7c1d8c275adc458bc6 (diff)