diff options
author | Liong Sauw Ming <ming@teluu.com> | 2016-08-25 01:36:33 +0000 |
---|---|---|
committer | Liong Sauw Ming <ming@teluu.com> | 2016-08-25 01:36:33 +0000 |
commit | 78d67a9205358ec4f5c38fa4191f7042d3983047 (patch) | |
tree | 8af5992cbaec463de76ed49c8538fc7a69ff398a /third_party/webrtc/src/webrtc | |
parent | e9fc0d90805002cbca667c7c1d8c275adc458bc6 (diff) |
Re #1954: Add WebRTC to third party component
* Add build config for GNU build systems
git-svn-id: http://svn.pjsip.org/repos/pjproject/trunk@5428 74dad513-b988-da41-8d7b-12977e46ad98
Diffstat (limited to 'third_party/webrtc/src/webrtc')
117 files changed, 36706 insertions, 0 deletions
diff --git a/third_party/webrtc/src/webrtc/common_audio/fft4g.c b/third_party/webrtc/src/webrtc/common_audio/fft4g.c new file mode 100644 index 00000000..9cf7b9f6 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/fft4g.c @@ -0,0 +1,1332 @@ +/* + * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html + * Copyright Takuya OOURA, 1996-2001 + * + * You may use, copy, modify and distribute this code for any purpose (include + * commercial use) and without fee. Please refer to this package when you modify + * this code. + * + * Changes: + * Trivial type modifications by the WebRTC authors. + */ + +/* +Fast Fourier/Cosine/Sine Transform + dimension :one + data length :power of 2 + decimation :frequency + radix :4, 2 + data :inplace + table :use +functions + cdft: Complex Discrete Fourier Transform + rdft: Real Discrete Fourier Transform + ddct: Discrete Cosine Transform + ddst: Discrete Sine Transform + dfct: Cosine Transform of RDFT (Real Symmetric DFT) + dfst: Sine Transform of RDFT (Real Anti-symmetric DFT) +function prototypes + void cdft(int, int, float *, int *, float *); + void rdft(size_t, int, float *, size_t *, float *); + void ddct(int, int, float *, int *, float *); + void ddst(int, int, float *, int *, float *); + void dfct(int, float *, float *, int *, float *); + void dfst(int, float *, float *, int *, float *); + + +-------- Complex DFT (Discrete Fourier Transform) -------- + [definition] + <case1> + X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n + <case2> + X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n + (notes: sum_j=0^n-1 is a summation from j=0 to n-1) + [usage] + <case1> + ip[0] = 0; // first time only + cdft(2*n, 1, a, ip, w); + <case2> + ip[0] = 0; // first time only + cdft(2*n, -1, a, ip, w); + [parameters] + 2*n :data length (int) + n >= 1, n = power of 2 + a[0...2*n-1] :input/output data (float *) + input data + a[2*j] = Re(x[j]), + a[2*j+1] = Im(x[j]), 0<=j<n + output data + a[2*k] = Re(X[k]), + a[2*k+1] = Im(X[k]), 0<=k<n + ip[0...*] :work area for bit reversal (int *) + length of ip >= 2+sqrt(n) + strictly, + length of ip >= + 2+(1<<(int)(log(n+0.5)/log(2))/2). + ip[0],ip[1] are pointers of the cos/sin table. + w[0...n/2-1] :cos/sin table (float *) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + cdft(2*n, -1, a, ip, w); + is + cdft(2*n, 1, a, ip, w); + for (j = 0; j <= 2 * n - 1; j++) { + a[j] *= 1.0 / n; + } + . + + +-------- Real DFT / Inverse of Real DFT -------- + [definition] + <case1> RDFT + R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2 + I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2 + <case2> IRDFT (excluding scale) + a[k] = (R[0] + R[n/2]*cos(pi*k))/2 + + sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) + + sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n + [usage] + <case1> + ip[0] = 0; // first time only + rdft(n, 1, a, ip, w); + <case2> + ip[0] = 0; // first time only + rdft(n, -1, a, ip, w); + [parameters] + n :data length (size_t) + n >= 2, n = power of 2 + a[0...n-1] :input/output data (float *) + <case1> + output data + a[2*k] = R[k], 0<=k<n/2 + a[2*k+1] = I[k], 0<k<n/2 + a[1] = R[n/2] + <case2> + input data + a[2*j] = R[j], 0<=j<n/2 + a[2*j+1] = I[j], 0<j<n/2 + a[1] = R[n/2] + ip[0...*] :work area for bit reversal (size_t *) + length of ip >= 2+sqrt(n/2) + strictly, + length of ip >= + 2+(1<<(int)(log(n/2+0.5)/log(2))/2). + ip[0],ip[1] are pointers of the cos/sin table. + w[0...n/2-1] :cos/sin table (float *) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + rdft(n, 1, a, ip, w); + is + rdft(n, -1, a, ip, w); + for (j = 0; j <= n - 1; j++) { + a[j] *= 2.0 / n; + } + . + + +-------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- + [definition] + <case1> IDCT (excluding scale) + C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n + <case2> DCT + C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n + [usage] + <case1> + ip[0] = 0; // first time only + ddct(n, 1, a, ip, w); + <case2> + ip[0] = 0; // first time only + ddct(n, -1, a, ip, w); + [parameters] + n :data length (int) + n >= 2, n = power of 2 + a[0...n-1] :input/output data (float *) + output data + a[k] = C[k], 0<=k<n + ip[0...*] :work area for bit reversal (int *) + length of ip >= 2+sqrt(n/2) + strictly, + length of ip >= + 2+(1<<(int)(log(n/2+0.5)/log(2))/2). + ip[0],ip[1] are pointers of the cos/sin table. + w[0...n*5/4-1] :cos/sin table (float *) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + ddct(n, -1, a, ip, w); + is + a[0] *= 0.5; + ddct(n, 1, a, ip, w); + for (j = 0; j <= n - 1; j++) { + a[j] *= 2.0 / n; + } + . + + +-------- DST (Discrete Sine Transform) / Inverse of DST -------- + [definition] + <case1> IDST (excluding scale) + S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n + <case2> DST + S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n + [usage] + <case1> + ip[0] = 0; // first time only + ddst(n, 1, a, ip, w); + <case2> + ip[0] = 0; // first time only + ddst(n, -1, a, ip, w); + [parameters] + n :data length (int) + n >= 2, n = power of 2 + a[0...n-1] :input/output data (float *) + <case1> + input data + a[j] = A[j], 0<j<n + a[0] = A[n] + output data + a[k] = S[k], 0<=k<n + <case2> + output data + a[k] = S[k], 0<k<n + a[0] = S[n] + ip[0...*] :work area for bit reversal (int *) + length of ip >= 2+sqrt(n/2) + strictly, + length of ip >= + 2+(1<<(int)(log(n/2+0.5)/log(2))/2). + ip[0],ip[1] are pointers of the cos/sin table. + w[0...n*5/4-1] :cos/sin table (float *) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + ddst(n, -1, a, ip, w); + is + a[0] *= 0.5; + ddst(n, 1, a, ip, w); + for (j = 0; j <= n - 1; j++) { + a[j] *= 2.0 / n; + } + . + + +-------- Cosine Transform of RDFT (Real Symmetric DFT) -------- + [definition] + C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n + [usage] + ip[0] = 0; // first time only + dfct(n, a, t, ip, w); + [parameters] + n :data length - 1 (int) + n >= 2, n = power of 2 + a[0...n] :input/output data (float *) + output data + a[k] = C[k], 0<=k<=n + t[0...n/2] :work area (float *) + ip[0...*] :work area for bit reversal (int *) + length of ip >= 2+sqrt(n/4) + strictly, + length of ip >= + 2+(1<<(int)(log(n/4+0.5)/log(2))/2). + ip[0],ip[1] are pointers of the cos/sin table. + w[0...n*5/8-1] :cos/sin table (float *) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + a[0] *= 0.5; + a[n] *= 0.5; + dfct(n, a, t, ip, w); + is + a[0] *= 0.5; + a[n] *= 0.5; + dfct(n, a, t, ip, w); + for (j = 0; j <= n; j++) { + a[j] *= 2.0 / n; + } + . + + +-------- Sine Transform of RDFT (Real Anti-symmetric DFT) -------- + [definition] + S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n + [usage] + ip[0] = 0; // first time only + dfst(n, a, t, ip, w); + [parameters] + n :data length + 1 (int) + n >= 2, n = power of 2 + a[0...n-1] :input/output data (float *) + output data + a[k] = S[k], 0<k<n + (a[0] is used for work area) + t[0...n/2-1] :work area (float *) + ip[0...*] :work area for bit reversal (int *) + length of ip >= 2+sqrt(n/4) + strictly, + length of ip >= + 2+(1<<(int)(log(n/4+0.5)/log(2))/2). + ip[0],ip[1] are pointers of the cos/sin table. + w[0...n*5/8-1] :cos/sin table (float *) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + dfst(n, a, t, ip, w); + is + dfst(n, a, t, ip, w); + for (j = 1; j <= n - 1; j++) { + a[j] *= 2.0 / n; + } + . + + +Appendix : + The cos/sin table is recalculated when the larger table required. + w[] and ip[] are compatible with all routines. +*/ + +#include <stddef.h> + +static void makewt(size_t nw, size_t *ip, float *w); +static void makect(size_t nc, size_t *ip, float *c); +static void bitrv2(size_t n, size_t *ip, float *a); +#if 0 // Not used. +static void bitrv2conj(int n, int *ip, float *a); +#endif +static void cftfsub(size_t n, float *a, float *w); +static void cftbsub(size_t n, float *a, float *w); +static void cft1st(size_t n, float *a, float *w); +static void cftmdl(size_t n, size_t l, float *a, float *w); +static void rftfsub(size_t n, float *a, size_t nc, float *c); +static void rftbsub(size_t n, float *a, size_t nc, float *c); +#if 0 // Not used. +static void dctsub(int n, float *a, int nc, float *c) +static void dstsub(int n, float *a, int nc, float *c) +#endif + + +#if 0 // Not used. +void WebRtc_cdft(int n, int isgn, float *a, int *ip, float *w) +{ + if (n > (ip[0] << 2)) { + makewt(n >> 2, ip, w); + } + if (n > 4) { + if (isgn >= 0) { + bitrv2(n, ip + 2, a); + cftfsub(n, a, w); + } else { + bitrv2conj(n, ip + 2, a); + cftbsub(n, a, w); + } + } else if (n == 4) { + cftfsub(n, a, w); + } +} +#endif + + +void WebRtc_rdft(size_t n, int isgn, float *a, size_t *ip, float *w) +{ + size_t nw, nc; + float xi; + + nw = ip[0]; + if (n > (nw << 2)) { + nw = n >> 2; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n > (nc << 2)) { + nc = n >> 2; + makect(nc, ip, w + nw); + } + if (isgn >= 0) { + if (n > 4) { + bitrv2(n, ip + 2, a); + cftfsub(n, a, w); + rftfsub(n, a, nc, w + nw); + } else if (n == 4) { + cftfsub(n, a, w); + } + xi = a[0] - a[1]; + a[0] += a[1]; + a[1] = xi; + } else { + a[1] = 0.5f * (a[0] - a[1]); + a[0] -= a[1]; + if (n > 4) { + rftbsub(n, a, nc, w + nw); + bitrv2(n, ip + 2, a); + cftbsub(n, a, w); + } else if (n == 4) { + cftfsub(n, a, w); + } + } +} + +#if 0 // Not used. +static void ddct(int n, int isgn, float *a, int *ip, float *w) +{ + int j, nw, nc; + float xr; + + nw = ip[0]; + if (n > (nw << 2)) { + nw = n >> 2; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n > nc) { + nc = n; + makect(nc, ip, w + nw); + } + if (isgn < 0) { + xr = a[n - 1]; + for (j = n - 2; j >= 2; j -= 2) { + a[j + 1] = a[j] - a[j - 1]; + a[j] += a[j - 1]; + } + a[1] = a[0] - xr; + a[0] += xr; + if (n > 4) { + rftbsub(n, a, nc, w + nw); + bitrv2(n, ip + 2, a); + cftbsub(n, a, w); + } else if (n == 4) { + cftfsub(n, a, w); + } + } + dctsub(n, a, nc, w + nw); + if (isgn >= 0) { + if (n > 4) { + bitrv2(n, ip + 2, a); + cftfsub(n, a, w); + rftfsub(n, a, nc, w + nw); + } else if (n == 4) { + cftfsub(n, a, w); + } + xr = a[0] - a[1]; + a[0] += a[1]; + for (j = 2; j < n; j += 2) { + a[j - 1] = a[j] - a[j + 1]; + a[j] += a[j + 1]; + } + a[n - 1] = xr; + } +} + + +static void ddst(int n, int isgn, float *a, int *ip, float *w) +{ + int j, nw, nc; + float xr; + + nw = ip[0]; + if (n > (nw << 2)) { + nw = n >> 2; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n > nc) { + nc = n; + makect(nc, ip, w + nw); + } + if (isgn < 0) { + xr = a[n - 1]; + for (j = n - 2; j >= 2; j -= 2) { + a[j + 1] = -a[j] - a[j - 1]; + a[j] -= a[j - 1]; + } + a[1] = a[0] + xr; + a[0] -= xr; + if (n > 4) { + rftbsub(n, a, nc, w + nw); + bitrv2(n, ip + 2, a); + cftbsub(n, a, w); + } else if (n == 4) { + cftfsub(n, a, w); + } + } + dstsub(n, a, nc, w + nw); + if (isgn >= 0) { + if (n > 4) { + bitrv2(n, ip + 2, a); + cftfsub(n, a, w); + rftfsub(n, a, nc, w + nw); + } else if (n == 4) { + cftfsub(n, a, w); + } + xr = a[0] - a[1]; + a[0] += a[1]; + for (j = 2; j < n; j += 2) { + a[j - 1] = -a[j] - a[j + 1]; + a[j] -= a[j + 1]; + } + a[n - 1] = -xr; + } +} + + +static void dfct(int n, float *a, float *t, int *ip, float *w) +{ + int j, k, l, m, mh, nw, nc; + float xr, xi, yr, yi; + + nw = ip[0]; + if (n > (nw << 3)) { + nw = n >> 3; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n > (nc << 1)) { + nc = n >> 1; + makect(nc, ip, w + nw); + } + m = n >> 1; + yi = a[m]; + xi = a[0] + a[n]; + a[0] -= a[n]; + t[0] = xi - yi; + t[m] = xi + yi; + if (n > 2) { + mh = m >> 1; + for (j = 1; j < mh; j++) { + k = m - j; + xr = a[j] - a[n - j]; + xi = a[j] + a[n - j]; + yr = a[k] - a[n - k]; + yi = a[k] + a[n - k]; + a[j] = xr; + a[k] = yr; + t[j] = xi - yi; + t[k] = xi + yi; + } + t[mh] = a[mh] + a[n - mh]; + a[mh] -= a[n - mh]; + dctsub(m, a, nc, w + nw); + if (m > 4) { + bitrv2(m, ip + 2, a); + cftfsub(m, a, w); + rftfsub(m, a, nc, w + nw); + } else if (m == 4) { + cftfsub(m, a, w); + } + a[n - 1] = a[0] - a[1]; + a[1] = a[0] + a[1]; + for (j = m - 2; j >= 2; j -= 2) { + a[2 * j + 1] = a[j] + a[j + 1]; + a[2 * j - 1] = a[j] - a[j + 1]; + } + l = 2; + m = mh; + while (m >= 2) { + dctsub(m, t, nc, w + nw); + if (m > 4) { + bitrv2(m, ip + 2, t); + cftfsub(m, t, w); + rftfsub(m, t, nc, w + nw); + } else if (m == 4) { + cftfsub(m, t, w); + } + a[n - l] = t[0] - t[1]; + a[l] = t[0] + t[1]; + k = 0; + for (j = 2; j < m; j += 2) { + k += l << 2; + a[k - l] = t[j] - t[j + 1]; + a[k + l] = t[j] + t[j + 1]; + } + l <<= 1; + mh = m >> 1; + for (j = 0; j < mh; j++) { + k = m - j; + t[j] = t[m + k] - t[m + j]; + t[k] = t[m + k] + t[m + j]; + } + t[mh] = t[m + mh]; + m = mh; + } + a[l] = t[0]; + a[n] = t[2] - t[1]; + a[0] = t[2] + t[1]; + } else { + a[1] = a[0]; + a[2] = t[0]; + a[0] = t[1]; + } +} + +static void dfst(int n, float *a, float *t, int *ip, float *w) +{ + int j, k, l, m, mh, nw, nc; + float xr, xi, yr, yi; + + nw = ip[0]; + if (n > (nw << 3)) { + nw = n >> 3; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n > (nc << 1)) { + nc = n >> 1; + makect(nc, ip, w + nw); + } + if (n > 2) { + m = n >> 1; + mh = m >> 1; + for (j = 1; j < mh; j++) { + k = m - j; + xr = a[j] + a[n - j]; + xi = a[j] - a[n - j]; + yr = a[k] + a[n - k]; + yi = a[k] - a[n - k]; + a[j] = xr; + a[k] = yr; + t[j] = xi + yi; + t[k] = xi - yi; + } + t[0] = a[mh] - a[n - mh]; + a[mh] += a[n - mh]; + a[0] = a[m]; + dstsub(m, a, nc, w + nw); + if (m > 4) { + bitrv2(m, ip + 2, a); + cftfsub(m, a, w); + rftfsub(m, a, nc, w + nw); + } else if (m == 4) { + cftfsub(m, a, w); + } + a[n - 1] = a[1] - a[0]; + a[1] = a[0] + a[1]; + for (j = m - 2; j >= 2; j -= 2) { + a[2 * j + 1] = a[j] - a[j + 1]; + a[2 * j - 1] = -a[j] - a[j + 1]; + } + l = 2; + m = mh; + while (m >= 2) { + dstsub(m, t, nc, w + nw); + if (m > 4) { + bitrv2(m, ip + 2, t); + cftfsub(m, t, w); + rftfsub(m, t, nc, w + nw); + } else if (m == 4) { + cftfsub(m, t, w); + } + a[n - l] = t[1] - t[0]; + a[l] = t[0] + t[1]; + k = 0; + for (j = 2; j < m; j += 2) { + k += l << 2; + a[k - l] = -t[j] - t[j + 1]; + a[k + l] = t[j] - t[j + 1]; + } + l <<= 1; + mh = m >> 1; + for (j = 1; j < mh; j++) { + k = m - j; + t[j] = t[m + k] + t[m + j]; + t[k] = t[m + k] - t[m + j]; + } + t[0] = t[m + mh]; + m = mh; + } + a[l] = t[0]; + } + a[0] = 0; +} +#endif // Not used. + + +/* -------- initializing routines -------- */ + + +#include <math.h> + +static void makewt(size_t nw, size_t *ip, float *w) +{ + size_t j, nwh; + float delta, x, y; + + ip[0] = nw; + ip[1] = 1; + if (nw > 2) { + nwh = nw >> 1; + delta = atanf(1.0f) / nwh; + w[0] = 1; + w[1] = 0; + w[nwh] = (float)cos(delta * nwh); + w[nwh + 1] = w[nwh]; + if (nwh > 2) { + for (j = 2; j < nwh; j += 2) { + x = (float)cos(delta * j); + y = (float)sin(delta * j); + w[j] = x; + w[j + 1] = y; + w[nw - j] = y; + w[nw - j + 1] = x; + } + bitrv2(nw, ip + 2, w); + } + } +} + + +static void makect(size_t nc, size_t *ip, float *c) +{ + size_t j, nch; + float delta; + + ip[1] = nc; + if (nc > 1) { + nch = nc >> 1; + delta = atanf(1.0f) / nch; + c[0] = (float)cos(delta * nch); + c[nch] = 0.5f * c[0]; + for (j = 1; j < nch; j++) { + c[j] = 0.5f * (float)cos(delta * j); + c[nc - j] = 0.5f * (float)sin(delta * j); + } + } +} + + +/* -------- child routines -------- */ + + +static void bitrv2(size_t n, size_t *ip, float *a) +{ + size_t j, j1, k, k1, l, m, m2; + float xr, xi, yr, yi; + + ip[0] = 0; + l = n; + m = 1; + while ((m << 3) < l) { + l >>= 1; + for (j = 0; j < m; j++) { + ip[m + j] = ip[j] + l; + } + m <<= 1; + } + m2 = 2 * m; + if ((m << 3) == l) { + for (k = 0; k < m; k++) { + for (j = 0; j < k; j++) { + j1 = 2 * j + ip[k]; + k1 = 2 * k + ip[j]; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 += 2 * m2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 -= m2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 += 2 * m2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + j1 = 2 * k + m2 + ip[k]; + k1 = j1 + m2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + } else { + for (k = 1; k < m; k++) { + for (j = 0; j < k; j++) { + j1 = 2 * j + ip[k]; + k1 = 2 * k + ip[j]; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 += m2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + } + } +} + +#if 0 // Not used. +static void bitrv2conj(int n, int *ip, float *a) +{ + int j, j1, k, k1, l, m, m2; + float xr, xi, yr, yi; + + ip[0] = 0; + l = n; + m = 1; + while ((m << 3) < l) { + l >>= 1; + for (j = 0; j < m; j++) { + ip[m + j] = ip[j] + l; + } + m <<= 1; + } + m2 = 2 * m; + if ((m << 3) == l) { + for (k = 0; k < m; k++) { + for (j = 0; j < k; j++) { + j1 = 2 * j + ip[k]; + k1 = 2 * k + ip[j]; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 += 2 * m2; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 -= m2; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 += 2 * m2; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + k1 = 2 * k + ip[k]; + a[k1 + 1] = -a[k1 + 1]; + j1 = k1 + m2; + k1 = j1 + m2; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + k1 += m2; + a[k1 + 1] = -a[k1 + 1]; + } + } else { + a[1] = -a[1]; + a[m2 + 1] = -a[m2 + 1]; + for (k = 1; k < m; k++) { + for (j = 0; j < k; j++) { + j1 = 2 * j + ip[k]; + k1 = 2 * k + ip[j]; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 += m2; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + k1 = 2 * k + ip[k]; + a[k1 + 1] = -a[k1 + 1]; + a[k1 + m2 + 1] = -a[k1 + m2 + 1]; + } + } +} +#endif + +static void cftfsub(size_t n, float *a, float *w) +{ + size_t j, j1, j2, j3, l; + float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + l = 2; + if (n > 8) { + cft1st(n, a, w); + l = 8; + while ((l << 2) < n) { + cftmdl(n, l, a, w); + l <<= 2; + } + } + if ((l << 2) == n) { + for (j = 0; j < l; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + a[j2] = x0r - x2r; + a[j2 + 1] = x0i - x2i; + a[j1] = x1r - x3i; + a[j1 + 1] = x1i + x3r; + a[j3] = x1r + x3i; + a[j3 + 1] = x1i - x3r; + } + } else { + for (j = 0; j < l; j += 2) { + j1 = j + l; + x0r = a[j] - a[j1]; + x0i = a[j + 1] - a[j1 + 1]; + a[j] += a[j1]; + a[j + 1] += a[j1 + 1]; + a[j1] = x0r; + a[j1 + 1] = x0i; + } + } +} + + +static void cftbsub(size_t n, float *a, float *w) +{ + size_t j, j1, j2, j3, l; + float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + l = 2; + if (n > 8) { + cft1st(n, a, w); + l = 8; + while ((l << 2) < n) { + cftmdl(n, l, a, w); + l <<= 2; + } + } + if ((l << 2) == n) { + for (j = 0; j < l; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = -a[j + 1] - a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = -a[j + 1] + a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i - x2i; + a[j2] = x0r - x2r; + a[j2 + 1] = x0i + x2i; + a[j1] = x1r - x3i; + a[j1 + 1] = x1i - x3r; + a[j3] = x1r + x3i; + a[j3 + 1] = x1i + x3r; + } + } else { + for (j = 0; j < l; j += 2) { + j1 = j + l; + x0r = a[j] - a[j1]; + x0i = -a[j + 1] + a[j1 + 1]; + a[j] += a[j1]; + a[j + 1] = -a[j + 1] - a[j1 + 1]; + a[j1] = x0r; + a[j1 + 1] = x0i; + } + } +} + + +static void cft1st(size_t n, float *a, float *w) +{ + size_t j, k1, k2; + float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; + float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + x0r = a[0] + a[2]; + x0i = a[1] + a[3]; + x1r = a[0] - a[2]; + x1i = a[1] - a[3]; + x2r = a[4] + a[6]; + x2i = a[5] + a[7]; + x3r = a[4] - a[6]; + x3i = a[5] - a[7]; + a[0] = x0r + x2r; + a[1] = x0i + x2i; + a[4] = x0r - x2r; + a[5] = x0i - x2i; + a[2] = x1r - x3i; + a[3] = x1i + x3r; + a[6] = x1r + x3i; + a[7] = x1i - x3r; + wk1r = w[2]; + x0r = a[8] + a[10]; + x0i = a[9] + a[11]; + x1r = a[8] - a[10]; + x1i = a[9] - a[11]; + x2r = a[12] + a[14]; + x2i = a[13] + a[15]; + x3r = a[12] - a[14]; + x3i = a[13] - a[15]; + a[8] = x0r + x2r; + a[9] = x0i + x2i; + a[12] = x2i - x0i; + a[13] = x0r - x2r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[10] = wk1r * (x0r - x0i); + a[11] = wk1r * (x0r + x0i); + x0r = x3i + x1r; + x0i = x3r - x1i; + a[14] = wk1r * (x0i - x0r); + a[15] = wk1r * (x0i + x0r); + k1 = 0; + for (j = 16; j < n; j += 16) { + k1 += 2; + k2 = 2 * k1; + wk2r = w[k1]; + wk2i = w[k1 + 1]; + wk1r = w[k2]; + wk1i = w[k2 + 1]; + wk3r = wk1r - 2 * wk2i * wk1i; + wk3i = 2 * wk2i * wk1r - wk1i; + x0r = a[j] + a[j + 2]; + x0i = a[j + 1] + a[j + 3]; + x1r = a[j] - a[j + 2]; + x1i = a[j + 1] - a[j + 3]; + x2r = a[j + 4] + a[j + 6]; + x2i = a[j + 5] + a[j + 7]; + x3r = a[j + 4] - a[j + 6]; + x3i = a[j + 5] - a[j + 7]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j + 4] = wk2r * x0r - wk2i * x0i; + a[j + 5] = wk2r * x0i + wk2i * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j + 2] = wk1r * x0r - wk1i * x0i; + a[j + 3] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j + 6] = wk3r * x0r - wk3i * x0i; + a[j + 7] = wk3r * x0i + wk3i * x0r; + wk1r = w[k2 + 2]; + wk1i = w[k2 + 3]; + wk3r = wk1r - 2 * wk2r * wk1i; + wk3i = 2 * wk2r * wk1r - wk1i; + x0r = a[j + 8] + a[j + 10]; + x0i = a[j + 9] + a[j + 11]; + x1r = a[j + 8] - a[j + 10]; + x1i = a[j + 9] - a[j + 11]; + x2r = a[j + 12] + a[j + 14]; + x2i = a[j + 13] + a[j + 15]; + x3r = a[j + 12] - a[j + 14]; + x3i = a[j + 13] - a[j + 15]; + a[j + 8] = x0r + x2r; + a[j + 9] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j + 12] = -wk2i * x0r - wk2r * x0i; + a[j + 13] = -wk2i * x0i + wk2r * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j + 10] = wk1r * x0r - wk1i * x0i; + a[j + 11] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j + 14] = wk3r * x0r - wk3i * x0i; + a[j + 15] = wk3r * x0i + wk3i * x0r; + } +} + + +static void cftmdl(size_t n, size_t l, float *a, float *w) +{ + size_t j, j1, j2, j3, k, k1, k2, m, m2; + float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; + float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + m = l << 2; + for (j = 0; j < l; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + a[j2] = x0r - x2r; + a[j2 + 1] = x0i - x2i; + a[j1] = x1r - x3i; + a[j1 + 1] = x1i + x3r; + a[j3] = x1r + x3i; + a[j3 + 1] = x1i - x3r; + } + wk1r = w[2]; + for (j = m; j < l + m; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + a[j2] = x2i - x0i; + a[j2 + 1] = x0r - x2r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j1] = wk1r * (x0r - x0i); + a[j1 + 1] = wk1r * (x0r + x0i); + x0r = x3i + x1r; + x0i = x3r - x1i; + a[j3] = wk1r * (x0i - x0r); + a[j3 + 1] = wk1r * (x0i + x0r); + } + k1 = 0; + m2 = 2 * m; + for (k = m2; k < n; k += m2) { + k1 += 2; + k2 = 2 * k1; + wk2r = w[k1]; + wk2i = w[k1 + 1]; + wk1r = w[k2]; + wk1i = w[k2 + 1]; + wk3r = wk1r - 2 * wk2i * wk1i; + wk3i = 2 * wk2i * wk1r - wk1i; + for (j = k; j < l + k; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j2] = wk2r * x0r - wk2i * x0i; + a[j2 + 1] = wk2r * x0i + wk2i * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j1] = wk1r * x0r - wk1i * x0i; + a[j1 + 1] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3] = wk3r * x0r - wk3i * x0i; + a[j3 + 1] = wk3r * x0i + wk3i * x0r; + } + wk1r = w[k2 + 2]; + wk1i = w[k2 + 3]; + wk3r = wk1r - 2 * wk2r * wk1i; + wk3i = 2 * wk2r * wk1r - wk1i; + for (j = k + m; j < l + (k + m); j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j2] = -wk2i * x0r - wk2r * x0i; + a[j2 + 1] = -wk2i * x0i + wk2r * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j1] = wk1r * x0r - wk1i * x0i; + a[j1 + 1] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3] = wk3r * x0r - wk3i * x0i; + a[j3 + 1] = wk3r * x0i + wk3i * x0r; + } + } +} + + +static void rftfsub(size_t n, float *a, size_t nc, float *c) +{ + size_t j, k, kk, ks, m; + float wkr, wki, xr, xi, yr, yi; + + m = n >> 1; + ks = 2 * nc / m; + kk = 0; + for (j = 2; j < m; j += 2) { + k = n - j; + kk += ks; + wkr = 0.5f - c[nc - kk]; + wki = c[kk]; + xr = a[j] - a[k]; + xi = a[j + 1] + a[k + 1]; + yr = wkr * xr - wki * xi; + yi = wkr * xi + wki * xr; + a[j] -= yr; + a[j + 1] -= yi; + a[k] += yr; + a[k + 1] -= yi; + } +} + + +static void rftbsub(size_t n, float *a, size_t nc, float *c) +{ + size_t j, k, kk, ks, m; + float wkr, wki, xr, xi, yr, yi; + + a[1] = -a[1]; + m = n >> 1; + ks = 2 * nc / m; + kk = 0; + for (j = 2; j < m; j += 2) { + k = n - j; + kk += ks; + wkr = 0.5f - c[nc - kk]; + wki = c[kk]; + xr = a[j] - a[k]; + xi = a[j + 1] + a[k + 1]; + yr = wkr * xr + wki * xi; + yi = wkr * xi - wki * xr; + a[j] -= yr; + a[j + 1] = yi - a[j + 1]; + a[k] += yr; + a[k + 1] = yi - a[k + 1]; + } + a[m + 1] = -a[m + 1]; +} + +#if 0 // Not used. +static void dctsub(int n, float *a, int nc, float *c) +{ + int j, k, kk, ks, m; + float wkr, wki, xr; + + m = n >> 1; + ks = nc / n; + kk = 0; + for (j = 1; j < m; j++) { + k = n - j; + kk += ks; + wkr = c[kk] - c[nc - kk]; + wki = c[kk] + c[nc - kk]; + xr = wki * a[j] - wkr * a[k]; + a[j] = wkr * a[j] + wki * a[k]; + a[k] = xr; + } + a[m] *= c[0]; +} + + +static void dstsub(int n, float *a, int nc, float *c) +{ + int j, k, kk, ks, m; + float wkr, wki, xr; + + m = n >> 1; + ks = nc / n; + kk = 0; + for (j = 1; j < m; j++) { + k = n - j; + kk += ks; + wkr = c[kk] - c[nc - kk]; + wki = c[kk] + c[nc - kk]; + xr = wki * a[k] - wkr * a[j]; + a[k] = wkr * a[k] + wki * a[j]; + a[j] = xr; + } + a[m] *= c[0]; +} +#endif // Not used. diff --git a/third_party/webrtc/src/webrtc/common_audio/fft4g.h b/third_party/webrtc/src/webrtc/common_audio/fft4g.h new file mode 100644 index 00000000..6dd792f6 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/fft4g.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_COMMON_AUDIO_FFT4G_H_ +#define WEBRTC_COMMON_AUDIO_FFT4G_H_ + +#if defined(__cplusplus) +extern "C" { +#endif + +// Refer to fft4g.c for documentation. +void WebRtc_rdft(size_t n, int isgn, float *a, size_t *ip, float *w); + +#if defined(__cplusplus) +} +#endif + +#endif // WEBRTC_COMMON_AUDIO_FFT4G_H_ diff --git a/third_party/webrtc/src/webrtc/common_audio/ring_buffer.c b/third_party/webrtc/src/webrtc/common_audio/ring_buffer.c new file mode 100644 index 00000000..60fb5dff --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/ring_buffer.c @@ -0,0 +1,247 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// A ring buffer to hold arbitrary data. Provides no thread safety. Unless +// otherwise specified, functions return 0 on success and -1 on error. + +#include "webrtc/common_audio/ring_buffer.h" + +#include <stddef.h> // size_t +#include <stdlib.h> +#include <string.h> + +enum Wrap { + SAME_WRAP, + DIFF_WRAP +}; + +struct RingBuffer { + size_t read_pos; + size_t write_pos; + size_t element_count; + size_t element_size; + enum Wrap rw_wrap; + char* data; +}; + +// Get address of region(s) from which we can read data. +// If the region is contiguous, |data_ptr_bytes_2| will be zero. +// If non-contiguous, |data_ptr_bytes_2| will be the size in bytes of the second +// region. Returns room available to be read or |element_count|, whichever is +// smaller. +static size_t GetBufferReadRegions(RingBuffer* buf, + size_t element_count, + void** data_ptr_1, + size_t* data_ptr_bytes_1, + void** data_ptr_2, + size_t* data_ptr_bytes_2) { + + const size_t readable_elements = WebRtc_available_read(buf); + const size_t read_elements = (readable_elements < element_count ? + readable_elements : element_count); + const size_t margin = buf->element_count - buf->read_pos; + + // Check to see if read is not contiguous. + if (read_elements > margin) { + // Write data in two blocks that wrap the buffer. + *data_ptr_1 = buf->data + buf->read_pos * buf->element_size; + *data_ptr_bytes_1 = margin * buf->element_size; + *data_ptr_2 = buf->data; + *data_ptr_bytes_2 = (read_elements - margin) * buf->element_size; + } else { + *data_ptr_1 = buf->data + buf->read_pos * buf->element_size; + *data_ptr_bytes_1 = read_elements * buf->element_size; + *data_ptr_2 = NULL; + *data_ptr_bytes_2 = 0; + } + + return read_elements; +} + +RingBuffer* WebRtc_CreateBuffer(size_t element_count, size_t element_size) { + RingBuffer* self = NULL; + if (element_count == 0 || element_size == 0) { + return NULL; + } + + self = malloc(sizeof(RingBuffer)); + if (!self) { + return NULL; + } + + self->data = malloc(element_count * element_size); + if (!self->data) { + free(self); + self = NULL; + return NULL; + } + + self->element_count = element_count; + self->element_size = element_size; + WebRtc_InitBuffer(self); + + return self; +} + +void WebRtc_InitBuffer(RingBuffer* self) { + self->read_pos = 0; + self->write_pos = 0; + self->rw_wrap = SAME_WRAP; + + // Initialize buffer to zeros + memset(self->data, 0, self->element_count * self->element_size); +} + +void WebRtc_FreeBuffer(void* handle) { + RingBuffer* self = (RingBuffer*)handle; + if (!self) { + return; + } + + free(self->data); + free(self); +} + +size_t WebRtc_ReadBuffer(RingBuffer* self, + void** data_ptr, + void* data, + size_t element_count) { + + if (self == NULL) { + return 0; + } + if (data == NULL) { + return 0; + } + + { + void* buf_ptr_1 = NULL; + void* buf_ptr_2 = NULL; + size_t buf_ptr_bytes_1 = 0; + size_t buf_ptr_bytes_2 = 0; + const size_t read_count = GetBufferReadRegions(self, + element_count, + &buf_ptr_1, + &buf_ptr_bytes_1, + &buf_ptr_2, + &buf_ptr_bytes_2); + + if (buf_ptr_bytes_2 > 0) { + // We have a wrap around when reading the buffer. Copy the buffer data to + // |data| and point to it. + memcpy(data, buf_ptr_1, buf_ptr_bytes_1); + memcpy(((char*) data) + buf_ptr_bytes_1, buf_ptr_2, buf_ptr_bytes_2); + buf_ptr_1 = data; + } else if (!data_ptr) { + // No wrap, but a memcpy was requested. + memcpy(data, buf_ptr_1, buf_ptr_bytes_1); + } + if (data_ptr) { + // |buf_ptr_1| == |data| in the case of a wrap. + *data_ptr = buf_ptr_1; + } + + // Update read position + WebRtc_MoveReadPtr(self, (int) read_count); + + return read_count; + } +} + +size_t WebRtc_WriteBuffer(RingBuffer* self, + const void* data, + size_t element_count) { + if (!self) { + return 0; + } + if (!data) { + return 0; + } + + { + const size_t free_elements = WebRtc_available_write(self); + const size_t write_elements = (free_elements < element_count ? free_elements + : element_count); + size_t n = write_elements; + const size_t margin = self->element_count - self->write_pos; + + if (write_elements > margin) { + // Buffer wrap around when writing. + memcpy(self->data + self->write_pos * self->element_size, + data, margin * self->element_size); + self->write_pos = 0; + n -= margin; + self->rw_wrap = DIFF_WRAP; + } + memcpy(self->data + self->write_pos * self->element_size, + ((const char*) data) + ((write_elements - n) * self->element_size), + n * self->element_size); + self->write_pos += n; + + return write_elements; + } +} + +int WebRtc_MoveReadPtr(RingBuffer* self, int element_count) { + if (!self) { + return 0; + } + + { + // We need to be able to take care of negative changes, hence use "int" + // instead of "size_t". + const int free_elements = (int) WebRtc_available_write(self); + const int readable_elements = (int) WebRtc_available_read(self); + int read_pos = (int) self->read_pos; + + if (element_count > readable_elements) { + element_count = readable_elements; + } + if (element_count < -free_elements) { + element_count = -free_elements; + } + + read_pos += element_count; + if (read_pos > (int) self->element_count) { + // Buffer wrap around. Restart read position and wrap indicator. + read_pos -= (int) self->element_count; + self->rw_wrap = SAME_WRAP; + } + if (read_pos < 0) { + // Buffer wrap around. Restart read position and wrap indicator. + read_pos += (int) self->element_count; + self->rw_wrap = DIFF_WRAP; + } + + self->read_pos = (size_t) read_pos; + + return element_count; + } +} + +size_t WebRtc_available_read(const RingBuffer* self) { + if (!self) { + return 0; + } + + if (self->rw_wrap == SAME_WRAP) { + return self->write_pos - self->read_pos; + } else { + return self->element_count - self->read_pos + self->write_pos; + } +} + +size_t WebRtc_available_write(const RingBuffer* self) { + if (!self) { + return 0; + } + + return self->element_count - WebRtc_available_read(self); +} diff --git a/third_party/webrtc/src/webrtc/common_audio/ring_buffer.h b/third_party/webrtc/src/webrtc/common_audio/ring_buffer.h new file mode 100644 index 00000000..4125c48d --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/ring_buffer.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// A ring buffer to hold arbitrary data. Provides no thread safety. Unless +// otherwise specified, functions return 0 on success and -1 on error. + +#ifndef WEBRTC_COMMON_AUDIO_RING_BUFFER_H_ +#define WEBRTC_COMMON_AUDIO_RING_BUFFER_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stddef.h> // size_t + +typedef struct RingBuffer RingBuffer; + +// Creates and initializes the buffer. Returns NULL on failure. +RingBuffer* WebRtc_CreateBuffer(size_t element_count, size_t element_size); +void WebRtc_InitBuffer(RingBuffer* handle); +void WebRtc_FreeBuffer(void* handle); + +// Reads data from the buffer. The |data_ptr| will point to the address where +// it is located. If all |element_count| data are feasible to read without +// buffer wrap around |data_ptr| will point to the location in the buffer. +// Otherwise, the data will be copied to |data| (memory allocation done by the +// user) and |data_ptr| points to the address of |data|. |data_ptr| is only +// guaranteed to be valid until the next call to WebRtc_WriteBuffer(). +// +// To force a copying to |data|, pass a NULL |data_ptr|. +// +// Returns number of elements read. +size_t WebRtc_ReadBuffer(RingBuffer* handle, + void** data_ptr, + void* data, + size_t element_count); + +// Writes |data| to buffer and returns the number of elements written. +size_t WebRtc_WriteBuffer(RingBuffer* handle, const void* data, + size_t element_count); + +// Moves the buffer read position and returns the number of elements moved. +// Positive |element_count| moves the read position towards the write position, +// that is, flushing the buffer. Negative |element_count| moves the read +// position away from the the write position, that is, stuffing the buffer. +// Returns number of elements moved. +int WebRtc_MoveReadPtr(RingBuffer* handle, int element_count); + +// Returns number of available elements to read. +size_t WebRtc_available_read(const RingBuffer* handle); + +// Returns number of available elements for write. +size_t WebRtc_available_write(const RingBuffer* handle); + +#ifdef __cplusplus +} +#endif + +#endif // WEBRTC_COMMON_AUDIO_RING_BUFFER_H_ diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/auto_corr_to_refl_coef.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/auto_corr_to_refl_coef.c new file mode 100644 index 00000000..f99dd62b --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/auto_corr_to_refl_coef.c @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the function WebRtcSpl_AutoCorrToReflCoef(). + * The description header can be found in signal_processing_library.h + * + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +void WebRtcSpl_AutoCorrToReflCoef(const int32_t *R, int use_order, int16_t *K) +{ + int i, n; + int16_t tmp; + const int32_t *rptr; + int32_t L_num, L_den; + int16_t *acfptr, *pptr, *wptr, *p1ptr, *w1ptr, ACF[WEBRTC_SPL_MAX_LPC_ORDER], + P[WEBRTC_SPL_MAX_LPC_ORDER], W[WEBRTC_SPL_MAX_LPC_ORDER]; + + // Initialize loop and pointers. + acfptr = ACF; + rptr = R; + pptr = P; + p1ptr = &P[1]; + w1ptr = &W[1]; + wptr = w1ptr; + + // First loop; n=0. Determine shifting. + tmp = WebRtcSpl_NormW32(*R); + *acfptr = (int16_t)((*rptr++ << tmp) >> 16); + *pptr++ = *acfptr++; + + // Initialize ACF, P and W. + for (i = 1; i <= use_order; i++) + { + *acfptr = (int16_t)((*rptr++ << tmp) >> 16); + *wptr++ = *acfptr; + *pptr++ = *acfptr++; + } + + // Compute reflection coefficients. + for (n = 1; n <= use_order; n++, K++) + { + tmp = WEBRTC_SPL_ABS_W16(*p1ptr); + if (*P < tmp) + { + for (i = n; i <= use_order; i++) + *K++ = 0; + + return; + } + + // Division: WebRtcSpl_div(tmp, *P) + *K = 0; + if (tmp != 0) + { + L_num = tmp; + L_den = *P; + i = 15; + while (i--) + { + (*K) <<= 1; + L_num <<= 1; + if (L_num >= L_den) + { + L_num -= L_den; + (*K)++; + } + } + if (*p1ptr > 0) + *K = -*K; + } + + // Last iteration; don't do Schur recursion. + if (n == use_order) + return; + + // Schur recursion. + pptr = P; + wptr = w1ptr; + tmp = (int16_t)(((int32_t)*p1ptr * (int32_t)*K + 16384) >> 15); + *pptr = WebRtcSpl_AddSatW16(*pptr, tmp); + pptr++; + for (i = 1; i <= use_order - n; i++) + { + tmp = (int16_t)(((int32_t)*wptr * (int32_t)*K + 16384) >> 15); + *pptr = WebRtcSpl_AddSatW16(*(pptr + 1), tmp); + pptr++; + tmp = (int16_t)(((int32_t)*pptr * (int32_t)*K + 16384) >> 15); + *wptr = WebRtcSpl_AddSatW16(*wptr, tmp); + wptr++; + } + } +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/auto_correlation.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/auto_correlation.c new file mode 100644 index 00000000..fda4fffe --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/auto_correlation.c @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +#include <assert.h> + +size_t WebRtcSpl_AutoCorrelation(const int16_t* in_vector, + size_t in_vector_length, + size_t order, + int32_t* result, + int* scale) { + int32_t sum = 0; + size_t i = 0, j = 0; + int16_t smax = 0; + int scaling = 0; + + assert(order <= in_vector_length); + + // Find the maximum absolute value of the samples. + smax = WebRtcSpl_MaxAbsValueW16(in_vector, in_vector_length); + + // In order to avoid overflow when computing the sum we should scale the + // samples so that (in_vector_length * smax * smax) will not overflow. + if (smax == 0) { + scaling = 0; + } else { + // Number of bits in the sum loop. + int nbits = WebRtcSpl_GetSizeInBits((uint32_t)in_vector_length); + // Number of bits to normalize smax. + int t = WebRtcSpl_NormW32(WEBRTC_SPL_MUL(smax, smax)); + + if (t > nbits) { + scaling = 0; + } else { + scaling = nbits - t; + } + } + + // Perform the actual correlation calculation. + for (i = 0; i < order + 1; i++) { + sum = 0; + /* Unroll the loop to improve performance. */ + for (j = 0; i + j + 3 < in_vector_length; j += 4) { + sum += (in_vector[j + 0] * in_vector[i + j + 0]) >> scaling; + sum += (in_vector[j + 1] * in_vector[i + j + 1]) >> scaling; + sum += (in_vector[j + 2] * in_vector[i + j + 2]) >> scaling; + sum += (in_vector[j + 3] * in_vector[i + j + 3]) >> scaling; + } + for (; j < in_vector_length - i; j++) { + sum += (in_vector[j] * in_vector[i + j]) >> scaling; + } + *result++ = sum; + } + + *scale = scaling; + return order + 1; +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse.c new file mode 100644 index 00000000..c8bd2dc4 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse.c @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +/* Tables for data buffer indexes that are bit reversed and thus need to be + * swapped. Note that, index_7[{0, 2, 4, ...}] are for the left side of the swap + * operations, while index_7[{1, 3, 5, ...}] are for the right side of the + * operation. Same for index_8. + */ + +/* Indexes for the case of stages == 7. */ +static const int16_t index_7[112] = { + 1, 64, 2, 32, 3, 96, 4, 16, 5, 80, 6, 48, 7, 112, 9, 72, 10, 40, 11, 104, + 12, 24, 13, 88, 14, 56, 15, 120, 17, 68, 18, 36, 19, 100, 21, 84, 22, 52, + 23, 116, 25, 76, 26, 44, 27, 108, 29, 92, 30, 60, 31, 124, 33, 66, 35, 98, + 37, 82, 38, 50, 39, 114, 41, 74, 43, 106, 45, 90, 46, 58, 47, 122, 49, 70, + 51, 102, 53, 86, 55, 118, 57, 78, 59, 110, 61, 94, 63, 126, 67, 97, 69, + 81, 71, 113, 75, 105, 77, 89, 79, 121, 83, 101, 87, 117, 91, 109, 95, 125, + 103, 115, 111, 123 +}; + +/* Indexes for the case of stages == 8. */ +static const int16_t index_8[240] = { + 1, 128, 2, 64, 3, 192, 4, 32, 5, 160, 6, 96, 7, 224, 8, 16, 9, 144, 10, 80, + 11, 208, 12, 48, 13, 176, 14, 112, 15, 240, 17, 136, 18, 72, 19, 200, 20, + 40, 21, 168, 22, 104, 23, 232, 25, 152, 26, 88, 27, 216, 28, 56, 29, 184, + 30, 120, 31, 248, 33, 132, 34, 68, 35, 196, 37, 164, 38, 100, 39, 228, 41, + 148, 42, 84, 43, 212, 44, 52, 45, 180, 46, 116, 47, 244, 49, 140, 50, 76, + 51, 204, 53, 172, 54, 108, 55, 236, 57, 156, 58, 92, 59, 220, 61, 188, 62, + 124, 63, 252, 65, 130, 67, 194, 69, 162, 70, 98, 71, 226, 73, 146, 74, 82, + 75, 210, 77, 178, 78, 114, 79, 242, 81, 138, 83, 202, 85, 170, 86, 106, 87, + 234, 89, 154, 91, 218, 93, 186, 94, 122, 95, 250, 97, 134, 99, 198, 101, + 166, 103, 230, 105, 150, 107, 214, 109, 182, 110, 118, 111, 246, 113, 142, + 115, 206, 117, 174, 119, 238, 121, 158, 123, 222, 125, 190, 127, 254, 131, + 193, 133, 161, 135, 225, 137, 145, 139, 209, 141, 177, 143, 241, 147, 201, + 149, 169, 151, 233, 155, 217, 157, 185, 159, 249, 163, 197, 167, 229, 171, + 213, 173, 181, 175, 245, 179, 205, 183, 237, 187, 221, 191, 253, 199, 227, + 203, 211, 207, 243, 215, 235, 223, 251, 239, 247 +}; + +void WebRtcSpl_ComplexBitReverse(int16_t* __restrict complex_data, int stages) { + /* For any specific value of stages, we know exactly the indexes that are + * bit reversed. Currently (Feb. 2012) in WebRTC the only possible values of + * stages are 7 and 8, so we use tables to save unnecessary iterations and + * calculations for these two cases. + */ + if (stages == 7 || stages == 8) { + int m = 0; + int length = 112; + const int16_t* index = index_7; + + if (stages == 8) { + length = 240; + index = index_8; + } + + /* Decimation in time. Swap the elements with bit-reversed indexes. */ + for (m = 0; m < length; m += 2) { + /* We declare a int32_t* type pointer, to load both the 16-bit real + * and imaginary elements from complex_data in one instruction, reducing + * complexity. + */ + int32_t* complex_data_ptr = (int32_t*)complex_data; + int32_t temp = 0; + + temp = complex_data_ptr[index[m]]; /* Real and imaginary */ + complex_data_ptr[index[m]] = complex_data_ptr[index[m + 1]]; + complex_data_ptr[index[m + 1]] = temp; + } + } + else { + int m = 0, mr = 0, l = 0; + int n = 1 << stages; + int nn = n - 1; + + /* Decimation in time - re-order data */ + for (m = 1; m <= nn; ++m) { + int32_t* complex_data_ptr = (int32_t*)complex_data; + int32_t temp = 0; + + /* Find out indexes that are bit-reversed. */ + l = n; + do { + l >>= 1; + } while (l > nn - mr); + mr = (mr & (l - 1)) + l; + + if (mr <= m) { + continue; + } + + /* Swap the elements with bit-reversed indexes. + * This is similar to the loop in the stages == 7 or 8 cases. + */ + temp = complex_data_ptr[m]; /* Real and imaginary */ + complex_data_ptr[m] = complex_data_ptr[mr]; + complex_data_ptr[mr] = temp; + } + } +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse_arm.S b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse_arm.S new file mode 100644 index 00000000..e7f8a819 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse_arm.S @@ -0,0 +1,119 @@ +@ +@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. +@ +@ Use of this source code is governed by a BSD-style license +@ that can be found in the LICENSE file in the root of the source +@ tree. An additional intellectual property rights grant can be found +@ in the file PATENTS. All contributing project authors may +@ be found in the AUTHORS file in the root of the source tree. +@ + +@ This file contains the function WebRtcSpl_ComplexBitReverse(), optimized +@ for ARMv5 platforms. +@ Reference C code is in file complex_bit_reverse.c. Bit-exact. + +#include "webrtc/system_wrappers/interface/asm_defines.h" + +GLOBAL_FUNCTION WebRtcSpl_ComplexBitReverse +.align 2 +DEFINE_FUNCTION WebRtcSpl_ComplexBitReverse + push {r4-r7} + + cmp r1, #7 + adr r3, index_7 @ Table pointer. + mov r4, #112 @ Number of interations. + beq PRE_LOOP_STAGES_7_OR_8 + + cmp r1, #8 + adr r3, index_8 @ Table pointer. + mov r4, #240 @ Number of interations. + beq PRE_LOOP_STAGES_7_OR_8 + + mov r3, #1 @ Initialize m. + mov r1, r3, asl r1 @ n = 1 << stages; + subs r6, r1, #1 @ nn = n - 1; + ble END + + mov r5, r0 @ &complex_data + mov r4, #0 @ ml + +LOOP_GENERIC: + rsb r12, r4, r6 @ l > nn - mr + mov r2, r1 @ n + +LOOP_SHIFT: + asr r2, #1 @ l >>= 1; + cmp r2, r12 + bgt LOOP_SHIFT + + sub r12, r2, #1 + and r4, r12, r4 + add r4, r2 @ mr = (mr & (l - 1)) + l; + cmp r4, r3 @ mr <= m ? + ble UPDATE_REGISTERS + + mov r12, r4, asl #2 + ldr r7, [r5, #4] @ complex_data[2 * m, 2 * m + 1]. + @ Offset 4 due to m incrementing from 1. + ldr r2, [r0, r12] @ complex_data[2 * mr, 2 * mr + 1]. + str r7, [r0, r12] + str r2, [r5, #4] + +UPDATE_REGISTERS: + add r3, r3, #1 + add r5, #4 + cmp r3, r1 + bne LOOP_GENERIC + + b END + +PRE_LOOP_STAGES_7_OR_8: + add r4, r3, r4, asl #1 + +LOOP_STAGES_7_OR_8: + ldrsh r2, [r3], #2 @ index[m] + ldrsh r5, [r3], #2 @ index[m + 1] + ldr r1, [r0, r2] @ complex_data[index[m], index[m] + 1] + ldr r12, [r0, r5] @ complex_data[index[m + 1], index[m + 1] + 1] + cmp r3, r4 + str r1, [r0, r5] + str r12, [r0, r2] + bne LOOP_STAGES_7_OR_8 + +END: + pop {r4-r7} + bx lr + +@ The index tables. Note the values are doubles of the actual indexes for 16-bit +@ elements, different from the generic C code. It actually provides byte offsets +@ for the indexes. + +.align 2 +index_7: @ Indexes for stages == 7. + .short 4, 256, 8, 128, 12, 384, 16, 64, 20, 320, 24, 192, 28, 448, 36, 288 + .short 40, 160, 44, 416, 48, 96, 52, 352, 56, 224, 60, 480, 68, 272, 72, 144 + .short 76, 400, 84, 336, 88, 208, 92, 464, 100, 304, 104, 176, 108, 432, 116 + .short 368, 120, 240, 124, 496, 132, 264, 140, 392, 148, 328, 152, 200, 156 + .short 456, 164, 296, 172, 424, 180, 360, 184, 232, 188, 488, 196, 280, 204 + .short 408, 212, 344, 220, 472, 228, 312, 236, 440, 244, 376, 252, 504, 268 + .short 388, 276, 324, 284, 452, 300, 420, 308, 356, 316, 484, 332, 404, 348 + .short 468, 364, 436, 380, 500, 412, 460, 444, 492 + +index_8: @ Indexes for stages == 8. + .short 4, 512, 8, 256, 12, 768, 16, 128, 20, 640, 24, 384, 28, 896, 32, 64 + .short 36, 576, 40, 320, 44, 832, 48, 192, 52, 704, 56, 448, 60, 960, 68, 544 + .short 72, 288, 76, 800, 80, 160, 84, 672, 88, 416, 92, 928, 100, 608, 104 + .short 352, 108, 864, 112, 224, 116, 736, 120, 480, 124, 992, 132, 528, 136 + .short 272, 140, 784, 148, 656, 152, 400, 156, 912, 164, 592, 168, 336, 172 + .short 848, 176, 208, 180, 720, 184, 464, 188, 976, 196, 560, 200, 304, 204 + .short 816, 212, 688, 216, 432, 220, 944, 228, 624, 232, 368, 236, 880, 244 + .short 752, 248, 496, 252, 1008, 260, 520, 268, 776, 276, 648, 280, 392, 284 + .short 904, 292, 584, 296, 328, 300, 840, 308, 712, 312, 456, 316, 968, 324 + .short 552, 332, 808, 340, 680, 344, 424, 348, 936, 356, 616, 364, 872, 372 + .short 744, 376, 488, 380, 1000, 388, 536, 396, 792, 404, 664, 412, 920, 420 + .short 600, 428, 856, 436, 728, 440, 472, 444, 984, 452, 568, 460, 824, 468 + .short 696, 476, 952, 484, 632, 492, 888, 500, 760, 508, 1016, 524, 772, 532 + .short 644, 540, 900, 548, 580, 556, 836, 564, 708, 572, 964, 588, 804, 596 + .short 676, 604, 932, 620, 868, 628, 740, 636, 996, 652, 788, 668, 916, 684 + .short 852, 692, 724, 700, 980, 716, 820, 732, 948, 748, 884, 764, 1012, 796 + .short 908, 812, 844, 828, 972, 860, 940, 892, 1004, 956, 988 diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse_mips.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse_mips.c new file mode 100644 index 00000000..583fe4f6 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse_mips.c @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +static int16_t coefTable_7[] = { + 4, 256, 8, 128, 12, 384, 16, 64, + 20, 320, 24, 192, 28, 448, 36, 288, + 40, 160, 44, 416, 48, 96, 52, 352, + 56, 224, 60, 480, 68, 272, 72, 144, + 76, 400, 84, 336, 88, 208, 92, 464, + 100, 304, 104, 176, 108, 432, 116, 368, + 120, 240, 124, 496, 132, 264, 140, 392, + 148, 328, 152, 200, 156, 456, 164, 296, + 172, 424, 180, 360, 184, 232, 188, 488, + 196, 280, 204, 408, 212, 344, 220, 472, + 228, 312, 236, 440, 244, 376, 252, 504, + 268, 388, 276, 324, 284, 452, 300, 420, + 308, 356, 316, 484, 332, 404, 348, 468, + 364, 436, 380, 500, 412, 460, 444, 492 +}; + +static int16_t coefTable_8[] = { + 4, 512, 8, 256, 12, 768, 16, 128, + 20, 640, 24, 384, 28, 896, 32, 64, + 36, 576, 40, 320, 44, 832, 48, 192, + 52, 704, 56, 448, 60, 960, 68, 544, + 72, 288, 76, 800, 80, 160, 84, 672, + 88, 416, 92, 928, 100, 608, 104, 352, + 108, 864, 112, 224, 116, 736, 120, 480, + 124, 992, 132, 528, 136, 272, 140, 784, + 148, 656, 152, 400, 156, 912, 164, 592, + 168, 336, 172, 848, 176, 208, 180, 720, + 184, 464, 188, 976, 196, 560, 200, 304, + 204, 816, 212, 688, 216, 432, 220, 944, + 228, 624, 232, 368, 236, 880, 244, 752, + 248, 496, 252, 1008, 260, 520, 268, 776, + 276, 648, 280, 392, 284, 904, 292, 584, + 296, 328, 300, 840, 308, 712, 312, 456, + 316, 968, 324, 552, 332, 808, 340, 680, + 344, 424, 348, 936, 356, 616, 364, 872, + 372, 744, 376, 488, 380, 1000, 388, 536, + 396, 792, 404, 664, 412, 920, 420, 600, + 428, 856, 436, 728, 440, 472, 444, 984, + 452, 568, 460, 824, 468, 696, 476, 952, + 484, 632, 492, 888, 500, 760, 508, 1016, + 524, 772, 532, 644, 540, 900, 548, 580, + 556, 836, 564, 708, 572, 964, 588, 804, + 596, 676, 604, 932, 620, 868, 628, 740, + 636, 996, 652, 788, 668, 916, 684, 852, + 692, 724, 700, 980, 716, 820, 732, 948, + 748, 884, 764, 1012, 796, 908, 812, 844, + 828, 972, 860, 940, 892, 1004, 956, 988 +}; + +void WebRtcSpl_ComplexBitReverse(int16_t frfi[], int stages) { + int l; + int16_t tr, ti; + int32_t tmp1, tmp2, tmp3, tmp4; + int32_t* ptr_i; + int32_t* ptr_j; + + if (stages == 8) { + int16_t* pcoeftable_8 = coefTable_8; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[l], $zero, 120 \n\t" + "1: \n\t" + "addiu %[l], %[l], -4 \n\t" + "lh %[tr], 0(%[pcoeftable_8]) \n\t" + "lh %[ti], 2(%[pcoeftable_8]) \n\t" + "lh %[tmp3], 4(%[pcoeftable_8]) \n\t" + "lh %[tmp4], 6(%[pcoeftable_8]) \n\t" + "addu %[ptr_i], %[frfi], %[tr] \n\t" + "addu %[ptr_j], %[frfi], %[ti] \n\t" + "addu %[tr], %[frfi], %[tmp3] \n\t" + "addu %[ti], %[frfi], %[tmp4] \n\t" + "ulw %[tmp1], 0(%[ptr_i]) \n\t" + "ulw %[tmp2], 0(%[ptr_j]) \n\t" + "ulw %[tmp3], 0(%[tr]) \n\t" + "ulw %[tmp4], 0(%[ti]) \n\t" + "usw %[tmp1], 0(%[ptr_j]) \n\t" + "usw %[tmp2], 0(%[ptr_i]) \n\t" + "usw %[tmp4], 0(%[tr]) \n\t" + "usw %[tmp3], 0(%[ti]) \n\t" + "lh %[tmp1], 8(%[pcoeftable_8]) \n\t" + "lh %[tmp2], 10(%[pcoeftable_8]) \n\t" + "lh %[tr], 12(%[pcoeftable_8]) \n\t" + "lh %[ti], 14(%[pcoeftable_8]) \n\t" + "addu %[ptr_i], %[frfi], %[tmp1] \n\t" + "addu %[ptr_j], %[frfi], %[tmp2] \n\t" + "addu %[tr], %[frfi], %[tr] \n\t" + "addu %[ti], %[frfi], %[ti] \n\t" + "ulw %[tmp1], 0(%[ptr_i]) \n\t" + "ulw %[tmp2], 0(%[ptr_j]) \n\t" + "ulw %[tmp3], 0(%[tr]) \n\t" + "ulw %[tmp4], 0(%[ti]) \n\t" + "usw %[tmp1], 0(%[ptr_j]) \n\t" + "usw %[tmp2], 0(%[ptr_i]) \n\t" + "usw %[tmp4], 0(%[tr]) \n\t" + "usw %[tmp3], 0(%[ti]) \n\t" + "bgtz %[l], 1b \n\t" + " addiu %[pcoeftable_8], %[pcoeftable_8], 16 \n\t" + ".set pop \n\t" + + : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [ptr_i] "=&r" (ptr_i), + [ptr_j] "=&r" (ptr_j), [tr] "=&r" (tr), [l] "=&r" (l), + [tmp3] "=&r" (tmp3), [pcoeftable_8] "+r" (pcoeftable_8), + [ti] "=&r" (ti), [tmp4] "=&r" (tmp4) + : [frfi] "r" (frfi) + : "memory" + ); + } else if (stages == 7) { + int16_t* pcoeftable_7 = coefTable_7; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[l], $zero, 56 \n\t" + "1: \n\t" + "addiu %[l], %[l], -4 \n\t" + "lh %[tr], 0(%[pcoeftable_7]) \n\t" + "lh %[ti], 2(%[pcoeftable_7]) \n\t" + "lh %[tmp3], 4(%[pcoeftable_7]) \n\t" + "lh %[tmp4], 6(%[pcoeftable_7]) \n\t" + "addu %[ptr_i], %[frfi], %[tr] \n\t" + "addu %[ptr_j], %[frfi], %[ti] \n\t" + "addu %[tr], %[frfi], %[tmp3] \n\t" + "addu %[ti], %[frfi], %[tmp4] \n\t" + "ulw %[tmp1], 0(%[ptr_i]) \n\t" + "ulw %[tmp2], 0(%[ptr_j]) \n\t" + "ulw %[tmp3], 0(%[tr]) \n\t" + "ulw %[tmp4], 0(%[ti]) \n\t" + "usw %[tmp1], 0(%[ptr_j]) \n\t" + "usw %[tmp2], 0(%[ptr_i]) \n\t" + "usw %[tmp4], 0(%[tr]) \n\t" + "usw %[tmp3], 0(%[ti]) \n\t" + "lh %[tmp1], 8(%[pcoeftable_7]) \n\t" + "lh %[tmp2], 10(%[pcoeftable_7]) \n\t" + "lh %[tr], 12(%[pcoeftable_7]) \n\t" + "lh %[ti], 14(%[pcoeftable_7]) \n\t" + "addu %[ptr_i], %[frfi], %[tmp1] \n\t" + "addu %[ptr_j], %[frfi], %[tmp2] \n\t" + "addu %[tr], %[frfi], %[tr] \n\t" + "addu %[ti], %[frfi], %[ti] \n\t" + "ulw %[tmp1], 0(%[ptr_i]) \n\t" + "ulw %[tmp2], 0(%[ptr_j]) \n\t" + "ulw %[tmp3], 0(%[tr]) \n\t" + "ulw %[tmp4], 0(%[ti]) \n\t" + "usw %[tmp1], 0(%[ptr_j]) \n\t" + "usw %[tmp2], 0(%[ptr_i]) \n\t" + "usw %[tmp4], 0(%[tr]) \n\t" + "usw %[tmp3], 0(%[ti]) \n\t" + "bgtz %[l], 1b \n\t" + " addiu %[pcoeftable_7], %[pcoeftable_7], 16 \n\t" + ".set pop \n\t" + + : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [ptr_i] "=&r" (ptr_i), + [ptr_j] "=&r" (ptr_j), [ti] "=&r" (ti), [tr] "=&r" (tr), + [l] "=&r" (l), [pcoeftable_7] "+r" (pcoeftable_7), + [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4) + : [frfi] "r" (frfi) + : "memory" + ); + } +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft.c new file mode 100644 index 00000000..97ebacc4 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft.c @@ -0,0 +1,298 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the function WebRtcSpl_ComplexFFT(). + * The description header can be found in signal_processing_library.h + * + */ + +#include "webrtc/common_audio/signal_processing/complex_fft_tables.h" +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +#define CFFTSFT 14 +#define CFFTRND 1 +#define CFFTRND2 16384 + +#define CIFFTSFT 14 +#define CIFFTRND 1 + + +int WebRtcSpl_ComplexFFT(int16_t frfi[], int stages, int mode) +{ + int i, j, l, k, istep, n, m; + int16_t wr, wi; + int32_t tr32, ti32, qr32, qi32; + + /* The 1024-value is a constant given from the size of kSinTable1024[], + * and should not be changed depending on the input parameter 'stages' + */ + n = 1 << stages; + if (n > 1024) + return -1; + + l = 1; + k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change + depending on the input parameter 'stages' */ + + if (mode == 0) + { + // mode==0: Low-complexity and Low-accuracy mode + while (l < n) + { + istep = l << 1; + + for (m = 0; m < l; ++m) + { + j = m << k; + + /* The 256-value is a constant given as 1/4 of the size of + * kSinTable1024[], and should not be changed depending on the input + * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2 + */ + wr = kSinTable1024[j + 256]; + wi = -kSinTable1024[j]; + + for (i = m; i < n; i += istep) + { + j = i + l; + + tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15; + + ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15; + + qr32 = (int32_t)frfi[2 * i]; + qi32 = (int32_t)frfi[2 * i + 1]; + frfi[2 * j] = (int16_t)((qr32 - tr32) >> 1); + frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> 1); + frfi[2 * i] = (int16_t)((qr32 + tr32) >> 1); + frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> 1); + } + } + + --k; + l = istep; + + } + + } else + { + // mode==1: High-complexity and High-accuracy mode + while (l < n) + { + istep = l << 1; + + for (m = 0; m < l; ++m) + { + j = m << k; + + /* The 256-value is a constant given as 1/4 of the size of + * kSinTable1024[], and should not be changed depending on the input + * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2 + */ + wr = kSinTable1024[j + 256]; + wi = -kSinTable1024[j]; + +#ifdef WEBRTC_ARCH_ARM_V7 + int32_t wri = 0; + __asm __volatile("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) : + "r"((int32_t)wr), "r"((int32_t)wi)); +#endif + + for (i = m; i < n; i += istep) + { + j = i + l; + +#ifdef WEBRTC_ARCH_ARM_V7 + register int32_t frfi_r; + __asm __volatile( + "pkhbt %[frfi_r], %[frfi_even], %[frfi_odd]," + " lsl #16\n\t" + "smlsd %[tr32], %[wri], %[frfi_r], %[cfftrnd]\n\t" + "smladx %[ti32], %[wri], %[frfi_r], %[cfftrnd]\n\t" + :[frfi_r]"=&r"(frfi_r), + [tr32]"=&r"(tr32), + [ti32]"=r"(ti32) + :[frfi_even]"r"((int32_t)frfi[2*j]), + [frfi_odd]"r"((int32_t)frfi[2*j +1]), + [wri]"r"(wri), + [cfftrnd]"r"(CFFTRND)); +#else + tr32 = wr * frfi[2 * j] - wi * frfi[2 * j + 1] + CFFTRND; + + ti32 = wr * frfi[2 * j + 1] + wi * frfi[2 * j] + CFFTRND; +#endif + + tr32 >>= 15 - CFFTSFT; + ti32 >>= 15 - CFFTSFT; + + qr32 = ((int32_t)frfi[2 * i]) << CFFTSFT; + qi32 = ((int32_t)frfi[2 * i + 1]) << CFFTSFT; + + frfi[2 * j] = (int16_t)( + (qr32 - tr32 + CFFTRND2) >> (1 + CFFTSFT)); + frfi[2 * j + 1] = (int16_t)( + (qi32 - ti32 + CFFTRND2) >> (1 + CFFTSFT)); + frfi[2 * i] = (int16_t)( + (qr32 + tr32 + CFFTRND2) >> (1 + CFFTSFT)); + frfi[2 * i + 1] = (int16_t)( + (qi32 + ti32 + CFFTRND2) >> (1 + CFFTSFT)); + } + } + + --k; + l = istep; + } + } + return 0; +} + +int WebRtcSpl_ComplexIFFT(int16_t frfi[], int stages, int mode) +{ + size_t i, j, l, istep, n, m; + int k, scale, shift; + int16_t wr, wi; + int32_t tr32, ti32, qr32, qi32; + int32_t tmp32, round2; + + /* The 1024-value is a constant given from the size of kSinTable1024[], + * and should not be changed depending on the input parameter 'stages' + */ + n = 1 << stages; + if (n > 1024) + return -1; + + scale = 0; + + l = 1; + k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change + depending on the input parameter 'stages' */ + + while (l < n) + { + // variable scaling, depending upon data + shift = 0; + round2 = 8192; + + tmp32 = WebRtcSpl_MaxAbsValueW16(frfi, 2 * n); + if (tmp32 > 13573) + { + shift++; + scale++; + round2 <<= 1; + } + if (tmp32 > 27146) + { + shift++; + scale++; + round2 <<= 1; + } + + istep = l << 1; + + if (mode == 0) + { + // mode==0: Low-complexity and Low-accuracy mode + for (m = 0; m < l; ++m) + { + j = m << k; + + /* The 256-value is a constant given as 1/4 of the size of + * kSinTable1024[], and should not be changed depending on the input + * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2 + */ + wr = kSinTable1024[j + 256]; + wi = kSinTable1024[j]; + + for (i = m; i < n; i += istep) + { + j = i + l; + + tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15; + + ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15; + + qr32 = (int32_t)frfi[2 * i]; + qi32 = (int32_t)frfi[2 * i + 1]; + frfi[2 * j] = (int16_t)((qr32 - tr32) >> shift); + frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> shift); + frfi[2 * i] = (int16_t)((qr32 + tr32) >> shift); + frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> shift); + } + } + } else + { + // mode==1: High-complexity and High-accuracy mode + + for (m = 0; m < l; ++m) + { + j = m << k; + + /* The 256-value is a constant given as 1/4 of the size of + * kSinTable1024[], and should not be changed depending on the input + * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2 + */ + wr = kSinTable1024[j + 256]; + wi = kSinTable1024[j]; + +#ifdef WEBRTC_ARCH_ARM_V7 + int32_t wri = 0; + __asm __volatile("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) : + "r"((int32_t)wr), "r"((int32_t)wi)); +#endif + + for (i = m; i < n; i += istep) + { + j = i + l; + +#ifdef WEBRTC_ARCH_ARM_V7 + register int32_t frfi_r; + __asm __volatile( + "pkhbt %[frfi_r], %[frfi_even], %[frfi_odd], lsl #16\n\t" + "smlsd %[tr32], %[wri], %[frfi_r], %[cifftrnd]\n\t" + "smladx %[ti32], %[wri], %[frfi_r], %[cifftrnd]\n\t" + :[frfi_r]"=&r"(frfi_r), + [tr32]"=&r"(tr32), + [ti32]"=r"(ti32) + :[frfi_even]"r"((int32_t)frfi[2*j]), + [frfi_odd]"r"((int32_t)frfi[2*j +1]), + [wri]"r"(wri), + [cifftrnd]"r"(CIFFTRND) + ); +#else + + tr32 = wr * frfi[2 * j] - wi * frfi[2 * j + 1] + CIFFTRND; + + ti32 = wr * frfi[2 * j + 1] + wi * frfi[2 * j] + CIFFTRND; +#endif + tr32 >>= 15 - CIFFTSFT; + ti32 >>= 15 - CIFFTSFT; + + qr32 = ((int32_t)frfi[2 * i]) << CIFFTSFT; + qi32 = ((int32_t)frfi[2 * i + 1]) << CIFFTSFT; + + frfi[2 * j] = (int16_t)( + (qr32 - tr32 + round2) >> (shift + CIFFTSFT)); + frfi[2 * j + 1] = (int16_t)( + (qi32 - ti32 + round2) >> (shift + CIFFTSFT)); + frfi[2 * i] = (int16_t)( + (qr32 + tr32 + round2) >> (shift + CIFFTSFT)); + frfi[2 * i + 1] = (int16_t)( + (qi32 + ti32 + round2) >> (shift + CIFFTSFT)); + } + } + + } + --k; + l = istep; + } + return scale; +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft_mips.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft_mips.c new file mode 100644 index 00000000..34c4f232 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft_mips.c @@ -0,0 +1,328 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "webrtc/common_audio/signal_processing/complex_fft_tables.h" +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +#define CFFTSFT 14 +#define CFFTRND 1 +#define CFFTRND2 16384 + +#define CIFFTSFT 14 +#define CIFFTRND 1 + +int WebRtcSpl_ComplexFFT(int16_t frfi[], int stages, int mode) { + int i = 0; + int l = 0; + int k = 0; + int istep = 0; + int n = 0; + int m = 0; + int32_t wr = 0, wi = 0; + int32_t tmp1 = 0; + int32_t tmp2 = 0; + int32_t tmp3 = 0; + int32_t tmp4 = 0; + int32_t tmp5 = 0; + int32_t tmp6 = 0; + int32_t tmp = 0; + int16_t* ptr_j = NULL; + int16_t* ptr_i = NULL; + + n = 1 << stages; + if (n > 1024) { + return -1; + } + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + + "addiu %[k], $zero, 10 \n\t" + "addiu %[l], $zero, 1 \n\t" + "3: \n\t" + "sll %[istep], %[l], 1 \n\t" + "move %[m], $zero \n\t" + "sll %[tmp], %[l], 2 \n\t" + "move %[i], $zero \n\t" + "2: \n\t" +#if defined(MIPS_DSP_R1_LE) + "sllv %[tmp3], %[m], %[k] \n\t" + "addiu %[tmp2], %[tmp3], 512 \n\t" + "addiu %[m], %[m], 1 \n\t" + "lhx %[wi], %[tmp3](%[kSinTable1024]) \n\t" + "lhx %[wr], %[tmp2](%[kSinTable1024]) \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "sllv %[tmp3], %[m], %[k] \n\t" + "addu %[ptr_j], %[tmp3], %[kSinTable1024] \n\t" + "addiu %[ptr_i], %[ptr_j], 512 \n\t" + "addiu %[m], %[m], 1 \n\t" + "lh %[wi], 0(%[ptr_j]) \n\t" + "lh %[wr], 0(%[ptr_i]) \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "1: \n\t" + "sll %[tmp1], %[i], 2 \n\t" + "addu %[ptr_i], %[frfi], %[tmp1] \n\t" + "addu %[ptr_j], %[ptr_i], %[tmp] \n\t" + "lh %[tmp6], 0(%[ptr_i]) \n\t" + "lh %[tmp5], 2(%[ptr_i]) \n\t" + "lh %[tmp3], 0(%[ptr_j]) \n\t" + "lh %[tmp4], 2(%[ptr_j]) \n\t" + "addu %[i], %[i], %[istep] \n\t" +#if defined(MIPS_DSP_R2_LE) + "mult %[wr], %[tmp3] \n\t" + "madd %[wi], %[tmp4] \n\t" + "mult $ac1, %[wr], %[tmp4] \n\t" + "msub $ac1, %[wi], %[tmp3] \n\t" + "mflo %[tmp1] \n\t" + "mflo %[tmp2], $ac1 \n\t" + "sll %[tmp6], %[tmp6], 14 \n\t" + "sll %[tmp5], %[tmp5], 14 \n\t" + "shra_r.w %[tmp1], %[tmp1], 1 \n\t" + "shra_r.w %[tmp2], %[tmp2], 1 \n\t" + "subu %[tmp4], %[tmp6], %[tmp1] \n\t" + "addu %[tmp1], %[tmp6], %[tmp1] \n\t" + "addu %[tmp6], %[tmp5], %[tmp2] \n\t" + "subu %[tmp5], %[tmp5], %[tmp2] \n\t" + "shra_r.w %[tmp1], %[tmp1], 15 \n\t" + "shra_r.w %[tmp6], %[tmp6], 15 \n\t" + "shra_r.w %[tmp4], %[tmp4], 15 \n\t" + "shra_r.w %[tmp5], %[tmp5], 15 \n\t" +#else // #if defined(MIPS_DSP_R2_LE) + "mul %[tmp2], %[wr], %[tmp4] \n\t" + "mul %[tmp1], %[wr], %[tmp3] \n\t" + "mul %[tmp4], %[wi], %[tmp4] \n\t" + "mul %[tmp3], %[wi], %[tmp3] \n\t" + "sll %[tmp6], %[tmp6], 14 \n\t" + "sll %[tmp5], %[tmp5], 14 \n\t" + "addiu %[tmp6], %[tmp6], 16384 \n\t" + "addiu %[tmp5], %[tmp5], 16384 \n\t" + "addu %[tmp1], %[tmp1], %[tmp4] \n\t" + "subu %[tmp2], %[tmp2], %[tmp3] \n\t" + "addiu %[tmp1], %[tmp1], 1 \n\t" + "addiu %[tmp2], %[tmp2], 1 \n\t" + "sra %[tmp1], %[tmp1], 1 \n\t" + "sra %[tmp2], %[tmp2], 1 \n\t" + "subu %[tmp4], %[tmp6], %[tmp1] \n\t" + "addu %[tmp1], %[tmp6], %[tmp1] \n\t" + "addu %[tmp6], %[tmp5], %[tmp2] \n\t" + "subu %[tmp5], %[tmp5], %[tmp2] \n\t" + "sra %[tmp4], %[tmp4], 15 \n\t" + "sra %[tmp1], %[tmp1], 15 \n\t" + "sra %[tmp6], %[tmp6], 15 \n\t" + "sra %[tmp5], %[tmp5], 15 \n\t" +#endif // #if defined(MIPS_DSP_R2_LE) + "sh %[tmp1], 0(%[ptr_i]) \n\t" + "sh %[tmp6], 2(%[ptr_i]) \n\t" + "sh %[tmp4], 0(%[ptr_j]) \n\t" + "blt %[i], %[n], 1b \n\t" + " sh %[tmp5], 2(%[ptr_j]) \n\t" + "blt %[m], %[l], 2b \n\t" + " addu %[i], $zero, %[m] \n\t" + "move %[l], %[istep] \n\t" + "blt %[l], %[n], 3b \n\t" + " addiu %[k], %[k], -1 \n\t" + + ".set pop \n\t" + + : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), + [tmp4] "=&r" (tmp4), [tmp5] "=&r" (tmp5), [tmp6] "=&r" (tmp6), + [ptr_i] "=&r" (ptr_i), [i] "=&r" (i), [wi] "=&r" (wi), [wr] "=&r" (wr), + [m] "=&r" (m), [istep] "=&r" (istep), [l] "=&r" (l), [k] "=&r" (k), + [ptr_j] "=&r" (ptr_j), [tmp] "=&r" (tmp) + : [n] "r" (n), [frfi] "r" (frfi), [kSinTable1024] "r" (kSinTable1024) + : "hi", "lo", "memory" +#if defined(MIPS_DSP_R2_LE) + , "$ac1hi", "$ac1lo" +#endif // #if defined(MIPS_DSP_R2_LE) + ); + + return 0; +} + +int WebRtcSpl_ComplexIFFT(int16_t frfi[], int stages, int mode) { + int i = 0, l = 0, k = 0; + int istep = 0, n = 0, m = 0; + int scale = 0, shift = 0; + int32_t wr = 0, wi = 0; + int32_t tmp1 = 0, tmp2 = 0, tmp3 = 0, tmp4 = 0; + int32_t tmp5 = 0, tmp6 = 0, tmp = 0, tempMax = 0, round2 = 0; + int16_t* ptr_j = NULL; + int16_t* ptr_i = NULL; + + n = 1 << stages; + if (n > 1024) { + return -1; + } + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + + "addiu %[k], $zero, 10 \n\t" + "addiu %[l], $zero, 1 \n\t" + "move %[scale], $zero \n\t" + "3: \n\t" + "addiu %[shift], $zero, 14 \n\t" + "addiu %[round2], $zero, 8192 \n\t" + "move %[ptr_i], %[frfi] \n\t" + "move %[tempMax], $zero \n\t" + "addu %[i], %[n], %[n] \n\t" + "5: \n\t" + "lh %[tmp1], 0(%[ptr_i]) \n\t" + "lh %[tmp2], 2(%[ptr_i]) \n\t" + "lh %[tmp3], 4(%[ptr_i]) \n\t" + "lh %[tmp4], 6(%[ptr_i]) \n\t" +#if defined(MIPS_DSP_R1_LE) + "absq_s.w %[tmp1], %[tmp1] \n\t" + "absq_s.w %[tmp2], %[tmp2] \n\t" + "absq_s.w %[tmp3], %[tmp3] \n\t" + "absq_s.w %[tmp4], %[tmp4] \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "slt %[tmp5], %[tmp1], $zero \n\t" + "subu %[tmp6], $zero, %[tmp1] \n\t" + "movn %[tmp1], %[tmp6], %[tmp5] \n\t" + "slt %[tmp5], %[tmp2], $zero \n\t" + "subu %[tmp6], $zero, %[tmp2] \n\t" + "movn %[tmp2], %[tmp6], %[tmp5] \n\t" + "slt %[tmp5], %[tmp3], $zero \n\t" + "subu %[tmp6], $zero, %[tmp3] \n\t" + "movn %[tmp3], %[tmp6], %[tmp5] \n\t" + "slt %[tmp5], %[tmp4], $zero \n\t" + "subu %[tmp6], $zero, %[tmp4] \n\t" + "movn %[tmp4], %[tmp6], %[tmp5] \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "slt %[tmp5], %[tempMax], %[tmp1] \n\t" + "movn %[tempMax], %[tmp1], %[tmp5] \n\t" + "addiu %[i], %[i], -4 \n\t" + "slt %[tmp5], %[tempMax], %[tmp2] \n\t" + "movn %[tempMax], %[tmp2], %[tmp5] \n\t" + "slt %[tmp5], %[tempMax], %[tmp3] \n\t" + "movn %[tempMax], %[tmp3], %[tmp5] \n\t" + "slt %[tmp5], %[tempMax], %[tmp4] \n\t" + "movn %[tempMax], %[tmp4], %[tmp5] \n\t" + "bgtz %[i], 5b \n\t" + " addiu %[ptr_i], %[ptr_i], 8 \n\t" + "addiu %[tmp1], $zero, 13573 \n\t" + "addiu %[tmp2], $zero, 27146 \n\t" +#if !defined(MIPS32_R2_LE) + "sll %[tempMax], %[tempMax], 16 \n\t" + "sra %[tempMax], %[tempMax], 16 \n\t" +#else // #if !defined(MIPS32_R2_LE) + "seh %[tempMax] \n\t" +#endif // #if !defined(MIPS32_R2_LE) + "slt %[tmp1], %[tmp1], %[tempMax] \n\t" + "slt %[tmp2], %[tmp2], %[tempMax] \n\t" + "addu %[tmp1], %[tmp1], %[tmp2] \n\t" + "addu %[shift], %[shift], %[tmp1] \n\t" + "addu %[scale], %[scale], %[tmp1] \n\t" + "sllv %[round2], %[round2], %[tmp1] \n\t" + "sll %[istep], %[l], 1 \n\t" + "move %[m], $zero \n\t" + "sll %[tmp], %[l], 2 \n\t" + "2: \n\t" +#if defined(MIPS_DSP_R1_LE) + "sllv %[tmp3], %[m], %[k] \n\t" + "addiu %[tmp2], %[tmp3], 512 \n\t" + "addiu %[m], %[m], 1 \n\t" + "lhx %[wi], %[tmp3](%[kSinTable1024]) \n\t" + "lhx %[wr], %[tmp2](%[kSinTable1024]) \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "sllv %[tmp3], %[m], %[k] \n\t" + "addu %[ptr_j], %[tmp3], %[kSinTable1024] \n\t" + "addiu %[ptr_i], %[ptr_j], 512 \n\t" + "addiu %[m], %[m], 1 \n\t" + "lh %[wi], 0(%[ptr_j]) \n\t" + "lh %[wr], 0(%[ptr_i]) \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "1: \n\t" + "sll %[tmp1], %[i], 2 \n\t" + "addu %[ptr_i], %[frfi], %[tmp1] \n\t" + "addu %[ptr_j], %[ptr_i], %[tmp] \n\t" + "lh %[tmp3], 0(%[ptr_j]) \n\t" + "lh %[tmp4], 2(%[ptr_j]) \n\t" + "lh %[tmp6], 0(%[ptr_i]) \n\t" + "lh %[tmp5], 2(%[ptr_i]) \n\t" + "addu %[i], %[i], %[istep] \n\t" +#if defined(MIPS_DSP_R2_LE) + "mult %[wr], %[tmp3] \n\t" + "msub %[wi], %[tmp4] \n\t" + "mult $ac1, %[wr], %[tmp4] \n\t" + "madd $ac1, %[wi], %[tmp3] \n\t" + "mflo %[tmp1] \n\t" + "mflo %[tmp2], $ac1 \n\t" + "sll %[tmp6], %[tmp6], 14 \n\t" + "sll %[tmp5], %[tmp5], 14 \n\t" + "shra_r.w %[tmp1], %[tmp1], 1 \n\t" + "shra_r.w %[tmp2], %[tmp2], 1 \n\t" + "addu %[tmp6], %[tmp6], %[round2] \n\t" + "addu %[tmp5], %[tmp5], %[round2] \n\t" + "subu %[tmp4], %[tmp6], %[tmp1] \n\t" + "addu %[tmp1], %[tmp6], %[tmp1] \n\t" + "addu %[tmp6], %[tmp5], %[tmp2] \n\t" + "subu %[tmp5], %[tmp5], %[tmp2] \n\t" + "srav %[tmp4], %[tmp4], %[shift] \n\t" + "srav %[tmp1], %[tmp1], %[shift] \n\t" + "srav %[tmp6], %[tmp6], %[shift] \n\t" + "srav %[tmp5], %[tmp5], %[shift] \n\t" +#else // #if defined(MIPS_DSP_R2_LE) + "mul %[tmp1], %[wr], %[tmp3] \n\t" + "mul %[tmp2], %[wr], %[tmp4] \n\t" + "mul %[tmp4], %[wi], %[tmp4] \n\t" + "mul %[tmp3], %[wi], %[tmp3] \n\t" + "sll %[tmp6], %[tmp6], 14 \n\t" + "sll %[tmp5], %[tmp5], 14 \n\t" + "sub %[tmp1], %[tmp1], %[tmp4] \n\t" + "addu %[tmp2], %[tmp2], %[tmp3] \n\t" + "addiu %[tmp1], %[tmp1], 1 \n\t" + "addiu %[tmp2], %[tmp2], 1 \n\t" + "sra %[tmp2], %[tmp2], 1 \n\t" + "sra %[tmp1], %[tmp1], 1 \n\t" + "addu %[tmp6], %[tmp6], %[round2] \n\t" + "addu %[tmp5], %[tmp5], %[round2] \n\t" + "subu %[tmp4], %[tmp6], %[tmp1] \n\t" + "addu %[tmp1], %[tmp6], %[tmp1] \n\t" + "addu %[tmp6], %[tmp5], %[tmp2] \n\t" + "subu %[tmp5], %[tmp5], %[tmp2] \n\t" + "sra %[tmp4], %[tmp4], %[shift] \n\t" + "sra %[tmp1], %[tmp1], %[shift] \n\t" + "sra %[tmp6], %[tmp6], %[shift] \n\t" + "sra %[tmp5], %[tmp5], %[shift] \n\t" +#endif // #if defined(MIPS_DSP_R2_LE) + "sh %[tmp1], 0(%[ptr_i]) \n\t" + "sh %[tmp6], 2(%[ptr_i]) \n\t" + "sh %[tmp4], 0(%[ptr_j]) \n\t" + "blt %[i], %[n], 1b \n\t" + " sh %[tmp5], 2(%[ptr_j]) \n\t" + "blt %[m], %[l], 2b \n\t" + " addu %[i], $zero, %[m] \n\t" + "move %[l], %[istep] \n\t" + "blt %[l], %[n], 3b \n\t" + " addiu %[k], %[k], -1 \n\t" + + ".set pop \n\t" + + : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), + [tmp4] "=&r" (tmp4), [tmp5] "=&r" (tmp5), [tmp6] "=&r" (tmp6), + [ptr_i] "=&r" (ptr_i), [i] "=&r" (i), [m] "=&r" (m), [tmp] "=&r" (tmp), + [istep] "=&r" (istep), [wi] "=&r" (wi), [wr] "=&r" (wr), [l] "=&r" (l), + [k] "=&r" (k), [round2] "=&r" (round2), [ptr_j] "=&r" (ptr_j), + [shift] "=&r" (shift), [scale] "=&r" (scale), [tempMax] "=&r" (tempMax) + : [n] "r" (n), [frfi] "r" (frfi), [kSinTable1024] "r" (kSinTable1024) + : "hi", "lo", "memory" +#if defined(MIPS_DSP_R2_LE) + , "$ac1hi", "$ac1lo" +#endif // #if defined(MIPS_DSP_R2_LE) + ); + + return scale; + +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft_tables.h b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft_tables.h new file mode 100644 index 00000000..ca7b7fe3 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft_tables.h @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_ +#define WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_ + +#include "webrtc/typedefs.h" + +static const int16_t kSinTable1024[] = { + 0, 201, 402, 603, 804, 1005, 1206, 1406, + 1607, 1808, 2009, 2209, 2410, 2610, 2811, 3011, + 3211, 3411, 3611, 3811, 4011, 4210, 4409, 4608, + 4807, 5006, 5205, 5403, 5601, 5799, 5997, 6195, + 6392, 6589, 6786, 6982, 7179, 7375, 7571, 7766, + 7961, 8156, 8351, 8545, 8739, 8932, 9126, 9319, + 9511, 9703, 9895, 10087, 10278, 10469, 10659, 10849, + 11038, 11227, 11416, 11604, 11792, 11980, 12166, 12353, + 12539, 12724, 12909, 13094, 13278, 13462, 13645, 13827, + 14009, 14191, 14372, 14552, 14732, 14911, 15090, 15268, + 15446, 15623, 15799, 15975, 16150, 16325, 16499, 16672, + 16845, 17017, 17189, 17360, 17530, 17699, 17868, 18036, + 18204, 18371, 18537, 18702, 18867, 19031, 19194, 19357, + 19519, 19680, 19840, 20000, 20159, 20317, 20474, 20631, + 20787, 20942, 21096, 21249, 21402, 21554, 21705, 21855, + 22004, 22153, 22301, 22448, 22594, 22739, 22883, 23027, + 23169, 23311, 23452, 23592, 23731, 23869, 24006, 24143, + 24278, 24413, 24546, 24679, 24811, 24942, 25072, 25201, + 25329, 25456, 25582, 25707, 25831, 25954, 26077, 26198, + 26318, 26437, 26556, 26673, 26789, 26905, 27019, 27132, + 27244, 27355, 27466, 27575, 27683, 27790, 27896, 28001, + 28105, 28208, 28309, 28410, 28510, 28608, 28706, 28802, + 28897, 28992, 29085, 29177, 29268, 29358, 29446, 29534, + 29621, 29706, 29790, 29873, 29955, 30036, 30116, 30195, + 30272, 30349, 30424, 30498, 30571, 30643, 30713, 30783, + 30851, 30918, 30984, 31049, 31113, 31175, 31236, 31297, + 31356, 31413, 31470, 31525, 31580, 31633, 31684, 31735, + 31785, 31833, 31880, 31926, 31970, 32014, 32056, 32097, + 32137, 32176, 32213, 32249, 32284, 32318, 32350, 32382, + 32412, 32441, 32468, 32495, 32520, 32544, 32567, 32588, + 32609, 32628, 32646, 32662, 32678, 32692, 32705, 32717, + 32727, 32736, 32744, 32751, 32757, 32761, 32764, 32766, + 32767, 32766, 32764, 32761, 32757, 32751, 32744, 32736, + 32727, 32717, 32705, 32692, 32678, 32662, 32646, 32628, + 32609, 32588, 32567, 32544, 32520, 32495, 32468, 32441, + 32412, 32382, 32350, 32318, 32284, 32249, 32213, 32176, + 32137, 32097, 32056, 32014, 31970, 31926, 31880, 31833, + 31785, 31735, 31684, 31633, 31580, 31525, 31470, 31413, + 31356, 31297, 31236, 31175, 31113, 31049, 30984, 30918, + 30851, 30783, 30713, 30643, 30571, 30498, 30424, 30349, + 30272, 30195, 30116, 30036, 29955, 29873, 29790, 29706, + 29621, 29534, 29446, 29358, 29268, 29177, 29085, 28992, + 28897, 28802, 28706, 28608, 28510, 28410, 28309, 28208, + 28105, 28001, 27896, 27790, 27683, 27575, 27466, 27355, + 27244, 27132, 27019, 26905, 26789, 26673, 26556, 26437, + 26318, 26198, 26077, 25954, 25831, 25707, 25582, 25456, + 25329, 25201, 25072, 24942, 24811, 24679, 24546, 24413, + 24278, 24143, 24006, 23869, 23731, 23592, 23452, 23311, + 23169, 23027, 22883, 22739, 22594, 22448, 22301, 22153, + 22004, 21855, 21705, 21554, 21402, 21249, 21096, 20942, + 20787, 20631, 20474, 20317, 20159, 20000, 19840, 19680, + 19519, 19357, 19194, 19031, 18867, 18702, 18537, 18371, + 18204, 18036, 17868, 17699, 17530, 17360, 17189, 17017, + 16845, 16672, 16499, 16325, 16150, 15975, 15799, 15623, + 15446, 15268, 15090, 14911, 14732, 14552, 14372, 14191, + 14009, 13827, 13645, 13462, 13278, 13094, 12909, 12724, + 12539, 12353, 12166, 11980, 11792, 11604, 11416, 11227, + 11038, 10849, 10659, 10469, 10278, 10087, 9895, 9703, + 9511, 9319, 9126, 8932, 8739, 8545, 8351, 8156, + 7961, 7766, 7571, 7375, 7179, 6982, 6786, 6589, + 6392, 6195, 5997, 5799, 5601, 5403, 5205, 5006, + 4807, 4608, 4409, 4210, 4011, 3811, 3611, 3411, + 3211, 3011, 2811, 2610, 2410, 2209, 2009, 1808, + 1607, 1406, 1206, 1005, 804, 603, 402, 201, + 0, -201, -402, -603, -804, -1005, -1206, -1406, + -1607, -1808, -2009, -2209, -2410, -2610, -2811, -3011, + -3211, -3411, -3611, -3811, -4011, -4210, -4409, -4608, + -4807, -5006, -5205, -5403, -5601, -5799, -5997, -6195, + -6392, -6589, -6786, -6982, -7179, -7375, -7571, -7766, + -7961, -8156, -8351, -8545, -8739, -8932, -9126, -9319, + -9511, -9703, -9895, -10087, -10278, -10469, -10659, -10849, + -11038, -11227, -11416, -11604, -11792, -11980, -12166, -12353, + -12539, -12724, -12909, -13094, -13278, -13462, -13645, -13827, + -14009, -14191, -14372, -14552, -14732, -14911, -15090, -15268, + -15446, -15623, -15799, -15975, -16150, -16325, -16499, -16672, + -16845, -17017, -17189, -17360, -17530, -17699, -17868, -18036, + -18204, -18371, -18537, -18702, -18867, -19031, -19194, -19357, + -19519, -19680, -19840, -20000, -20159, -20317, -20474, -20631, + -20787, -20942, -21096, -21249, -21402, -21554, -21705, -21855, + -22004, -22153, -22301, -22448, -22594, -22739, -22883, -23027, + -23169, -23311, -23452, -23592, -23731, -23869, -24006, -24143, + -24278, -24413, -24546, -24679, -24811, -24942, -25072, -25201, + -25329, -25456, -25582, -25707, -25831, -25954, -26077, -26198, + -26318, -26437, -26556, -26673, -26789, -26905, -27019, -27132, + -27244, -27355, -27466, -27575, -27683, -27790, -27896, -28001, + -28105, -28208, -28309, -28410, -28510, -28608, -28706, -28802, + -28897, -28992, -29085, -29177, -29268, -29358, -29446, -29534, + -29621, -29706, -29790, -29873, -29955, -30036, -30116, -30195, + -30272, -30349, -30424, -30498, -30571, -30643, -30713, -30783, + -30851, -30918, -30984, -31049, -31113, -31175, -31236, -31297, + -31356, -31413, -31470, -31525, -31580, -31633, -31684, -31735, + -31785, -31833, -31880, -31926, -31970, -32014, -32056, -32097, + -32137, -32176, -32213, -32249, -32284, -32318, -32350, -32382, + -32412, -32441, -32468, -32495, -32520, -32544, -32567, -32588, + -32609, -32628, -32646, -32662, -32678, -32692, -32705, -32717, + -32727, -32736, -32744, -32751, -32757, -32761, -32764, -32766, + -32767, -32766, -32764, -32761, -32757, -32751, -32744, -32736, + -32727, -32717, -32705, -32692, -32678, -32662, -32646, -32628, + -32609, -32588, -32567, -32544, -32520, -32495, -32468, -32441, + -32412, -32382, -32350, -32318, -32284, -32249, -32213, -32176, + -32137, -32097, -32056, -32014, -31970, -31926, -31880, -31833, + -31785, -31735, -31684, -31633, -31580, -31525, -31470, -31413, + -31356, -31297, -31236, -31175, -31113, -31049, -30984, -30918, + -30851, -30783, -30713, -30643, -30571, -30498, -30424, -30349, + -30272, -30195, -30116, -30036, -29955, -29873, -29790, -29706, + -29621, -29534, -29446, -29358, -29268, -29177, -29085, -28992, + -28897, -28802, -28706, -28608, -28510, -28410, -28309, -28208, + -28105, -28001, -27896, -27790, -27683, -27575, -27466, -27355, + -27244, -27132, -27019, -26905, -26789, -26673, -26556, -26437, + -26318, -26198, -26077, -25954, -25831, -25707, -25582, -25456, + -25329, -25201, -25072, -24942, -24811, -24679, -24546, -24413, + -24278, -24143, -24006, -23869, -23731, -23592, -23452, -23311, + -23169, -23027, -22883, -22739, -22594, -22448, -22301, -22153, + -22004, -21855, -21705, -21554, -21402, -21249, -21096, -20942, + -20787, -20631, -20474, -20317, -20159, -20000, -19840, -19680, + -19519, -19357, -19194, -19031, -18867, -18702, -18537, -18371, + -18204, -18036, -17868, -17699, -17530, -17360, -17189, -17017, + -16845, -16672, -16499, -16325, -16150, -15975, -15799, -15623, + -15446, -15268, -15090, -14911, -14732, -14552, -14372, -14191, + -14009, -13827, -13645, -13462, -13278, -13094, -12909, -12724, + -12539, -12353, -12166, -11980, -11792, -11604, -11416, -11227, + -11038, -10849, -10659, -10469, -10278, -10087, -9895, -9703, + -9511, -9319, -9126, -8932, -8739, -8545, -8351, -8156, + -7961, -7766, -7571, -7375, -7179, -6982, -6786, -6589, + -6392, -6195, -5997, -5799, -5601, -5403, -5205, -5006, + -4807, -4608, -4409, -4210, -4011, -3811, -3611, -3411, + -3211, -3011, -2811, -2610, -2410, -2209, -2009, -1808, + -1607, -1406, -1206, -1005, -804, -603, -402, -201 +}; + +#endif // WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_ diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/copy_set_operations.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/copy_set_operations.c new file mode 100644 index 00000000..9d7cf47e --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/copy_set_operations.c @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the implementation of functions + * WebRtcSpl_MemSetW16() + * WebRtcSpl_MemSetW32() + * WebRtcSpl_MemCpyReversedOrder() + * WebRtcSpl_CopyFromEndW16() + * WebRtcSpl_ZerosArrayW16() + * WebRtcSpl_ZerosArrayW32() + * + * The description header can be found in signal_processing_library.h + * + */ + +#include <string.h> +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + + +void WebRtcSpl_MemSetW16(int16_t *ptr, int16_t set_value, size_t length) +{ + size_t j; + int16_t *arrptr = ptr; + + for (j = length; j > 0; j--) + { + *arrptr++ = set_value; + } +} + +void WebRtcSpl_MemSetW32(int32_t *ptr, int32_t set_value, size_t length) +{ + size_t j; + int32_t *arrptr = ptr; + + for (j = length; j > 0; j--) + { + *arrptr++ = set_value; + } +} + +void WebRtcSpl_MemCpyReversedOrder(int16_t* dest, + int16_t* source, + size_t length) +{ + size_t j; + int16_t* destPtr = dest; + int16_t* sourcePtr = source; + + for (j = 0; j < length; j++) + { + *destPtr-- = *sourcePtr++; + } +} + +void WebRtcSpl_CopyFromEndW16(const int16_t *vector_in, + size_t length, + size_t samples, + int16_t *vector_out) +{ + // Copy the last <samples> of the input vector to vector_out + WEBRTC_SPL_MEMCPY_W16(vector_out, &vector_in[length - samples], samples); +} + +void WebRtcSpl_ZerosArrayW16(int16_t *vector, size_t length) +{ + WebRtcSpl_MemSetW16(vector, 0, length); +} + +void WebRtcSpl_ZerosArrayW32(int32_t *vector, size_t length) +{ + WebRtcSpl_MemSetW32(vector, 0, length); +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation.c new file mode 100644 index 00000000..d7c9f2b9 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation.c @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +/* C version of WebRtcSpl_CrossCorrelation() for generic platforms. */ +void WebRtcSpl_CrossCorrelationC(int32_t* cross_correlation, + const int16_t* seq1, + const int16_t* seq2, + size_t dim_seq, + size_t dim_cross_correlation, + int right_shifts, + int step_seq2) { + size_t i = 0, j = 0; + + for (i = 0; i < dim_cross_correlation; i++) { + int32_t corr = 0; + for (j = 0; j < dim_seq; j++) + corr += (seq1[j] * seq2[j]) >> right_shifts; + seq2 += step_seq2; + *cross_correlation++ = corr; + } +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation_mips.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation_mips.c new file mode 100644 index 00000000..b2364026 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation_mips.c @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +void WebRtcSpl_CrossCorrelation_mips(int32_t* cross_correlation, + const int16_t* seq1, + const int16_t* seq2, + size_t dim_seq, + size_t dim_cross_correlation, + int right_shifts, + int step_seq2) { + + int32_t t0 = 0, t1 = 0, t2 = 0, t3 = 0, sum = 0; + int16_t *pseq2 = NULL; + int16_t *pseq1 = NULL; + int16_t *pseq1_0 = (int16_t*)&seq1[0]; + int16_t *pseq2_0 = (int16_t*)&seq2[0]; + int k = 0; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "sll %[step_seq2], %[step_seq2], 1 \n\t" + "andi %[t0], %[dim_seq], 1 \n\t" + "bgtz %[t0], 3f \n\t" + " nop \n\t" + "1: \n\t" + "move %[pseq1], %[pseq1_0] \n\t" + "move %[pseq2], %[pseq2_0] \n\t" + "sra %[k], %[dim_seq], 1 \n\t" + "addiu %[dim_cc], %[dim_cc], -1 \n\t" + "xor %[sum], %[sum], %[sum] \n\t" + "2: \n\t" + "lh %[t0], 0(%[pseq1]) \n\t" + "lh %[t1], 0(%[pseq2]) \n\t" + "lh %[t2], 2(%[pseq1]) \n\t" + "lh %[t3], 2(%[pseq2]) \n\t" + "mul %[t0], %[t0], %[t1] \n\t" + "addiu %[k], %[k], -1 \n\t" + "mul %[t2], %[t2], %[t3] \n\t" + "addiu %[pseq1], %[pseq1], 4 \n\t" + "addiu %[pseq2], %[pseq2], 4 \n\t" + "srav %[t0], %[t0], %[right_shifts] \n\t" + "addu %[sum], %[sum], %[t0] \n\t" + "srav %[t2], %[t2], %[right_shifts] \n\t" + "bgtz %[k], 2b \n\t" + " addu %[sum], %[sum], %[t2] \n\t" + "addu %[pseq2_0], %[pseq2_0], %[step_seq2] \n\t" + "sw %[sum], 0(%[cc]) \n\t" + "bgtz %[dim_cc], 1b \n\t" + " addiu %[cc], %[cc], 4 \n\t" + "b 6f \n\t" + " nop \n\t" + "3: \n\t" + "move %[pseq1], %[pseq1_0] \n\t" + "move %[pseq2], %[pseq2_0] \n\t" + "sra %[k], %[dim_seq], 1 \n\t" + "addiu %[dim_cc], %[dim_cc], -1 \n\t" + "beqz %[k], 5f \n\t" + " xor %[sum], %[sum], %[sum] \n\t" + "4: \n\t" + "lh %[t0], 0(%[pseq1]) \n\t" + "lh %[t1], 0(%[pseq2]) \n\t" + "lh %[t2], 2(%[pseq1]) \n\t" + "lh %[t3], 2(%[pseq2]) \n\t" + "mul %[t0], %[t0], %[t1] \n\t" + "addiu %[k], %[k], -1 \n\t" + "mul %[t2], %[t2], %[t3] \n\t" + "addiu %[pseq1], %[pseq1], 4 \n\t" + "addiu %[pseq2], %[pseq2], 4 \n\t" + "srav %[t0], %[t0], %[right_shifts] \n\t" + "addu %[sum], %[sum], %[t0] \n\t" + "srav %[t2], %[t2], %[right_shifts] \n\t" + "bgtz %[k], 4b \n\t" + " addu %[sum], %[sum], %[t2] \n\t" + "5: \n\t" + "lh %[t0], 0(%[pseq1]) \n\t" + "lh %[t1], 0(%[pseq2]) \n\t" + "mul %[t0], %[t0], %[t1] \n\t" + "srav %[t0], %[t0], %[right_shifts] \n\t" + "addu %[sum], %[sum], %[t0] \n\t" + "addu %[pseq2_0], %[pseq2_0], %[step_seq2] \n\t" + "sw %[sum], 0(%[cc]) \n\t" + "bgtz %[dim_cc], 3b \n\t" + " addiu %[cc], %[cc], 4 \n\t" + "6: \n\t" + ".set pop \n\t" + : [step_seq2] "+r" (step_seq2), [t0] "=&r" (t0), [t1] "=&r" (t1), + [t2] "=&r" (t2), [t3] "=&r" (t3), [pseq1] "=&r" (pseq1), + [pseq2] "=&r" (pseq2), [pseq1_0] "+r" (pseq1_0), [pseq2_0] "+r" (pseq2_0), + [k] "=&r" (k), [dim_cc] "+r" (dim_cross_correlation), [sum] "=&r" (sum), + [cc] "+r" (cross_correlation) + : [dim_seq] "r" (dim_seq), [right_shifts] "r" (right_shifts) + : "hi", "lo", "memory" + ); +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation_neon.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation_neon.c new file mode 100644 index 00000000..918b6715 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation_neon.c @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +#include <arm_neon.h> + +static inline void DotProductWithScaleNeon(int32_t* cross_correlation, + const int16_t* vector1, + const int16_t* vector2, + size_t length, + int scaling) { + size_t i = 0; + size_t len1 = length >> 3; + size_t len2 = length & 7; + int64x2_t sum0 = vdupq_n_s64(0); + int64x2_t sum1 = vdupq_n_s64(0); + + for (i = len1; i > 0; i -= 1) { + int16x8_t seq1_16x8 = vld1q_s16(vector1); + int16x8_t seq2_16x8 = vld1q_s16(vector2); +#if defined(WEBRTC_ARCH_ARM64) + int32x4_t tmp0 = vmull_s16(vget_low_s16(seq1_16x8), + vget_low_s16(seq2_16x8)); + int32x4_t tmp1 = vmull_high_s16(seq1_16x8, seq2_16x8); +#else + int32x4_t tmp0 = vmull_s16(vget_low_s16(seq1_16x8), + vget_low_s16(seq2_16x8)); + int32x4_t tmp1 = vmull_s16(vget_high_s16(seq1_16x8), + vget_high_s16(seq2_16x8)); +#endif + sum0 = vpadalq_s32(sum0, tmp0); + sum1 = vpadalq_s32(sum1, tmp1); + vector1 += 8; + vector2 += 8; + } + + // Calculate the rest of the samples. + int64_t sum_res = 0; + for (i = len2; i > 0; i -= 1) { + sum_res += WEBRTC_SPL_MUL_16_16(*vector1, *vector2); + vector1++; + vector2++; + } + + sum0 = vaddq_s64(sum0, sum1); +#if defined(WEBRTC_ARCH_ARM64) + int64_t sum2 = vaddvq_s64(sum0); + *cross_correlation = (int32_t)((sum2 + sum_res) >> scaling); +#else + int64x1_t shift = vdup_n_s64(-scaling); + int64x1_t sum2 = vadd_s64(vget_low_s64(sum0), vget_high_s64(sum0)); + sum2 = vadd_s64(sum2, vdup_n_s64(sum_res)); + sum2 = vshl_s64(sum2, shift); + vst1_lane_s32(cross_correlation, vreinterpret_s32_s64(sum2), 0); +#endif +} + +/* NEON version of WebRtcSpl_CrossCorrelation() for ARM32/64 platforms. */ +void WebRtcSpl_CrossCorrelationNeon(int32_t* cross_correlation, + const int16_t* seq1, + const int16_t* seq2, + size_t dim_seq, + size_t dim_cross_correlation, + int right_shifts, + int step_seq2) { + size_t i = 0; + + for (i = 0; i < dim_cross_correlation; i++) { + const int16_t* seq1_ptr = seq1; + const int16_t* seq2_ptr = seq2 + (step_seq2 * i); + + DotProductWithScaleNeon(cross_correlation, + seq1_ptr, + seq2_ptr, + dim_seq, + right_shifts); + cross_correlation++; + } +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/division_operations.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/division_operations.c new file mode 100644 index 00000000..eaa06a1f --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/division_operations.c @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains implementations of the divisions + * WebRtcSpl_DivU32U16() + * WebRtcSpl_DivW32W16() + * WebRtcSpl_DivW32W16ResW16() + * WebRtcSpl_DivResultInQ31() + * WebRtcSpl_DivW32HiLow() + * + * The description header can be found in signal_processing_library.h + * + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +uint32_t WebRtcSpl_DivU32U16(uint32_t num, uint16_t den) +{ + // Guard against division with 0 + if (den != 0) + { + return (uint32_t)(num / den); + } else + { + return (uint32_t)0xFFFFFFFF; + } +} + +int32_t WebRtcSpl_DivW32W16(int32_t num, int16_t den) +{ + // Guard against division with 0 + if (den != 0) + { + return (int32_t)(num / den); + } else + { + return (int32_t)0x7FFFFFFF; + } +} + +int16_t WebRtcSpl_DivW32W16ResW16(int32_t num, int16_t den) +{ + // Guard against division with 0 + if (den != 0) + { + return (int16_t)(num / den); + } else + { + return (int16_t)0x7FFF; + } +} + +int32_t WebRtcSpl_DivResultInQ31(int32_t num, int32_t den) +{ + int32_t L_num = num; + int32_t L_den = den; + int32_t div = 0; + int k = 31; + int change_sign = 0; + + if (num == 0) + return 0; + + if (num < 0) + { + change_sign++; + L_num = -num; + } + if (den < 0) + { + change_sign++; + L_den = -den; + } + while (k--) + { + div <<= 1; + L_num <<= 1; + if (L_num >= L_den) + { + L_num -= L_den; + div++; + } + } + if (change_sign == 1) + { + div = -div; + } + return div; +} + +int32_t WebRtcSpl_DivW32HiLow(int32_t num, int16_t den_hi, int16_t den_low) +{ + int16_t approx, tmp_hi, tmp_low, num_hi, num_low; + int32_t tmpW32; + + approx = (int16_t)WebRtcSpl_DivW32W16((int32_t)0x1FFFFFFF, den_hi); + // result in Q14 (Note: 3FFFFFFF = 0.5 in Q30) + + // tmpW32 = 1/den = approx * (2.0 - den * approx) (in Q30) + tmpW32 = (den_hi * approx << 1) + ((den_low * approx >> 15) << 1); + // tmpW32 = den * approx + + tmpW32 = (int32_t)0x7fffffffL - tmpW32; // result in Q30 (tmpW32 = 2.0-(den*approx)) + + // Store tmpW32 in hi and low format + tmp_hi = (int16_t)(tmpW32 >> 16); + tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1); + + // tmpW32 = 1/den in Q29 + tmpW32 = (tmp_hi * approx + (tmp_low * approx >> 15)) << 1; + + // 1/den in hi and low format + tmp_hi = (int16_t)(tmpW32 >> 16); + tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1); + + // Store num in hi and low format + num_hi = (int16_t)(num >> 16); + num_low = (int16_t)((num - ((int32_t)num_hi << 16)) >> 1); + + // num * (1/den) by 32 bit multiplication (result in Q28) + + tmpW32 = num_hi * tmp_hi + (num_hi * tmp_low >> 15) + + (num_low * tmp_hi >> 15); + + // Put result in Q31 (convert from Q28) + tmpW32 = WEBRTC_SPL_LSHIFT_W32(tmpW32, 3); + + return tmpW32; +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/dot_product_with_scale.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/dot_product_with_scale.c new file mode 100644 index 00000000..1302d625 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/dot_product_with_scale.c @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1, + const int16_t* vector2, + size_t length, + int scaling) { + int32_t sum = 0; + size_t i = 0; + + /* Unroll the loop to improve performance. */ + for (i = 0; i + 3 < length; i += 4) { + sum += (vector1[i + 0] * vector2[i + 0]) >> scaling; + sum += (vector1[i + 1] * vector2[i + 1]) >> scaling; + sum += (vector1[i + 2] * vector2[i + 2]) >> scaling; + sum += (vector1[i + 3] * vector2[i + 3]) >> scaling; + } + for (; i < length; i++) { + sum += (vector1[i] * vector2[i]) >> scaling; + } + + return sum; +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast.c new file mode 100644 index 00000000..726a8881 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast.c @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +// TODO(Bjornv): Change the function parameter order to WebRTC code style. +// C version of WebRtcSpl_DownsampleFast() for generic platforms. +int WebRtcSpl_DownsampleFastC(const int16_t* data_in, + size_t data_in_length, + int16_t* data_out, + size_t data_out_length, + const int16_t* __restrict coefficients, + size_t coefficients_length, + int factor, + size_t delay) { + size_t i = 0; + size_t j = 0; + int32_t out_s32 = 0; + size_t endpos = delay + factor * (data_out_length - 1) + 1; + + // Return error if any of the running conditions doesn't meet. + if (data_out_length == 0 || coefficients_length == 0 + || data_in_length < endpos) { + return -1; + } + + for (i = delay; i < endpos; i += factor) { + out_s32 = 2048; // Round value, 0.5 in Q12. + + for (j = 0; j < coefficients_length; j++) { + out_s32 += coefficients[j] * data_in[i - j]; // Q12. + } + + out_s32 >>= 12; // Q0. + + // Saturate and store the output. + *data_out++ = WebRtcSpl_SatW32ToW16(out_s32); + } + + return 0; +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast_mips.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast_mips.c new file mode 100644 index 00000000..ac39401a --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast_mips.c @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +// Version of WebRtcSpl_DownsampleFast() for MIPS platforms. +int WebRtcSpl_DownsampleFast_mips(const int16_t* data_in, + size_t data_in_length, + int16_t* data_out, + size_t data_out_length, + const int16_t* __restrict coefficients, + size_t coefficients_length, + int factor, + size_t delay) { + int i; + int j; + int k; + int32_t out_s32 = 0; + size_t endpos = delay + factor * (data_out_length - 1) + 1; + + int32_t tmp1, tmp2, tmp3, tmp4, factor_2; + int16_t* p_coefficients; + int16_t* p_data_in; + int16_t* p_data_in_0 = (int16_t*)&data_in[delay]; + int16_t* p_coefficients_0 = (int16_t*)&coefficients[0]; +#if !defined(MIPS_DSP_R1_LE) + int32_t max_16 = 0x7FFF; + int32_t min_16 = 0xFFFF8000; +#endif // #if !defined(MIPS_DSP_R1_LE) + + // Return error if any of the running conditions doesn't meet. + if (data_out_length == 0 || coefficients_length == 0 + || data_in_length < endpos) { + return -1; + } +#if defined(MIPS_DSP_R2_LE) + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "subu %[i], %[endpos], %[delay] \n\t" + "sll %[factor_2], %[factor], 1 \n\t" + "1: \n\t" + "move %[p_data_in], %[p_data_in_0] \n\t" + "mult $zero, $zero \n\t" + "move %[p_coefs], %[p_coefs_0] \n\t" + "sra %[j], %[coef_length], 2 \n\t" + "beq %[j], $zero, 3f \n\t" + " andi %[k], %[coef_length], 3 \n\t" + "2: \n\t" + "lwl %[tmp1], 1(%[p_data_in]) \n\t" + "lwl %[tmp2], 3(%[p_coefs]) \n\t" + "lwl %[tmp3], -3(%[p_data_in]) \n\t" + "lwl %[tmp4], 7(%[p_coefs]) \n\t" + "lwr %[tmp1], -2(%[p_data_in]) \n\t" + "lwr %[tmp2], 0(%[p_coefs]) \n\t" + "lwr %[tmp3], -6(%[p_data_in]) \n\t" + "lwr %[tmp4], 4(%[p_coefs]) \n\t" + "packrl.ph %[tmp1], %[tmp1], %[tmp1] \n\t" + "packrl.ph %[tmp3], %[tmp3], %[tmp3] \n\t" + "dpa.w.ph $ac0, %[tmp1], %[tmp2] \n\t" + "dpa.w.ph $ac0, %[tmp3], %[tmp4] \n\t" + "addiu %[j], %[j], -1 \n\t" + "addiu %[p_data_in], %[p_data_in], -8 \n\t" + "bgtz %[j], 2b \n\t" + " addiu %[p_coefs], %[p_coefs], 8 \n\t" + "3: \n\t" + "beq %[k], $zero, 5f \n\t" + " nop \n\t" + "4: \n\t" + "lhu %[tmp1], 0(%[p_data_in]) \n\t" + "lhu %[tmp2], 0(%[p_coefs]) \n\t" + "addiu %[p_data_in], %[p_data_in], -2 \n\t" + "addiu %[k], %[k], -1 \n\t" + "dpa.w.ph $ac0, %[tmp1], %[tmp2] \n\t" + "bgtz %[k], 4b \n\t" + " addiu %[p_coefs], %[p_coefs], 2 \n\t" + "5: \n\t" + "extr_r.w %[out_s32], $ac0, 12 \n\t" + "addu %[p_data_in_0], %[p_data_in_0], %[factor_2] \n\t" + "subu %[i], %[i], %[factor] \n\t" + "shll_s.w %[out_s32], %[out_s32], 16 \n\t" + "sra %[out_s32], %[out_s32], 16 \n\t" + "sh %[out_s32], 0(%[data_out]) \n\t" + "bgtz %[i], 1b \n\t" + " addiu %[data_out], %[data_out], 2 \n\t" + ".set pop \n\t" + : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), + [tmp4] "=&r" (tmp4), [p_data_in] "=&r" (p_data_in), + [p_data_in_0] "+r" (p_data_in_0), [p_coefs] "=&r" (p_coefficients), + [j] "=&r" (j), [out_s32] "=&r" (out_s32), [factor_2] "=&r" (factor_2), + [i] "=&r" (i), [k] "=&r" (k) + : [coef_length] "r" (coefficients_length), [data_out] "r" (data_out), + [p_coefs_0] "r" (p_coefficients_0), [endpos] "r" (endpos), + [delay] "r" (delay), [factor] "r" (factor) + : "memory", "hi", "lo" + ); +#else // #if defined(MIPS_DSP_R2_LE) + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "sll %[factor_2], %[factor], 1 \n\t" + "subu %[i], %[endpos], %[delay] \n\t" + "1: \n\t" + "move %[p_data_in], %[p_data_in_0] \n\t" + "addiu %[out_s32], $zero, 2048 \n\t" + "move %[p_coefs], %[p_coefs_0] \n\t" + "sra %[j], %[coef_length], 1 \n\t" + "beq %[j], $zero, 3f \n\t" + " andi %[k], %[coef_length], 1 \n\t" + "2: \n\t" + "lh %[tmp1], 0(%[p_data_in]) \n\t" + "lh %[tmp2], 0(%[p_coefs]) \n\t" + "lh %[tmp3], -2(%[p_data_in]) \n\t" + "lh %[tmp4], 2(%[p_coefs]) \n\t" + "mul %[tmp1], %[tmp1], %[tmp2] \n\t" + "addiu %[p_coefs], %[p_coefs], 4 \n\t" + "mul %[tmp3], %[tmp3], %[tmp4] \n\t" + "addiu %[j], %[j], -1 \n\t" + "addiu %[p_data_in], %[p_data_in], -4 \n\t" + "addu %[tmp1], %[tmp1], %[tmp3] \n\t" + "bgtz %[j], 2b \n\t" + " addu %[out_s32], %[out_s32], %[tmp1] \n\t" + "3: \n\t" + "beq %[k], $zero, 4f \n\t" + " nop \n\t" + "lh %[tmp1], 0(%[p_data_in]) \n\t" + "lh %[tmp2], 0(%[p_coefs]) \n\t" + "mul %[tmp1], %[tmp1], %[tmp2] \n\t" + "addu %[out_s32], %[out_s32], %[tmp1] \n\t" + "4: \n\t" + "sra %[out_s32], %[out_s32], 12 \n\t" + "addu %[p_data_in_0], %[p_data_in_0], %[factor_2] \n\t" +#if defined(MIPS_DSP_R1_LE) + "shll_s.w %[out_s32], %[out_s32], 16 \n\t" + "sra %[out_s32], %[out_s32], 16 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "slt %[tmp1], %[max_16], %[out_s32] \n\t" + "movn %[out_s32], %[max_16], %[tmp1] \n\t" + "slt %[tmp1], %[out_s32], %[min_16] \n\t" + "movn %[out_s32], %[min_16], %[tmp1] \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "subu %[i], %[i], %[factor] \n\t" + "sh %[out_s32], 0(%[data_out]) \n\t" + "bgtz %[i], 1b \n\t" + " addiu %[data_out], %[data_out], 2 \n\t" + ".set pop \n\t" + : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), + [tmp4] "=&r" (tmp4), [p_data_in] "=&r" (p_data_in), [k] "=&r" (k), + [p_data_in_0] "+r" (p_data_in_0), [p_coefs] "=&r" (p_coefficients), + [j] "=&r" (j), [out_s32] "=&r" (out_s32), [factor_2] "=&r" (factor_2), + [i] "=&r" (i) + : [coef_length] "r" (coefficients_length), [data_out] "r" (data_out), + [p_coefs_0] "r" (p_coefficients_0), [endpos] "r" (endpos), +#if !defined(MIPS_DSP_R1_LE) + [max_16] "r" (max_16), [min_16] "r" (min_16), +#endif // #if !defined(MIPS_DSP_R1_LE) + [delay] "r" (delay), [factor] "r" (factor) + : "memory", "hi", "lo" + ); +#endif // #if defined(MIPS_DSP_R2_LE) + return 0; +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast_neon.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast_neon.c new file mode 100644 index 00000000..58732dab --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast_neon.c @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +#include <arm_neon.h> + +// NEON intrinsics version of WebRtcSpl_DownsampleFast() +// for ARM 32-bit/64-bit platforms. +int WebRtcSpl_DownsampleFastNeon(const int16_t* data_in, + size_t data_in_length, + int16_t* data_out, + size_t data_out_length, + const int16_t* __restrict coefficients, + size_t coefficients_length, + int factor, + size_t delay) { + size_t i = 0; + size_t j = 0; + int32_t out_s32 = 0; + size_t endpos = delay + factor * (data_out_length - 1) + 1; + size_t res = data_out_length & 0x7; + size_t endpos1 = endpos - factor * res; + + // Return error if any of the running conditions doesn't meet. + if (data_out_length == 0 || coefficients_length == 0 + || data_in_length < endpos) { + return -1; + } + + // First part, unroll the loop 8 times, with 3 subcases + // (factor == 2, 4, others). + switch (factor) { + case 2: { + for (i = delay; i < endpos1; i += 16) { + // Round value, 0.5 in Q12. + int32x4_t out32x4_0 = vdupq_n_s32(2048); + int32x4_t out32x4_1 = vdupq_n_s32(2048); + +#if defined(WEBRTC_ARCH_ARM64) + // Unroll the loop 2 times. + for (j = 0; j < coefficients_length - 1; j += 2) { + int32x2_t coeff32 = vld1_dup_s32((int32_t*)&coefficients[j]); + int16x4_t coeff16x4 = vreinterpret_s16_s32(coeff32); + int16x8x2_t in16x8x2 = vld2q_s16(&data_in[i - j - 1]); + + // Mul and accumulate low 64-bit data. + int16x4_t in16x4_0 = vget_low_s16(in16x8x2.val[0]); + int16x4_t in16x4_1 = vget_low_s16(in16x8x2.val[1]); + out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 1); + out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_1, coeff16x4, 0); + + // Mul and accumulate high 64-bit data. + // TODO: vget_high_s16 need extra cost on ARM64. This could be + // replaced by vmlal_high_lane_s16. But for the interface of + // vmlal_high_lane_s16, there is a bug in gcc 4.9. + // This issue need to be tracked in the future. + int16x4_t in16x4_2 = vget_high_s16(in16x8x2.val[0]); + int16x4_t in16x4_3 = vget_high_s16(in16x8x2.val[1]); + out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_2, coeff16x4, 1); + out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_3, coeff16x4, 0); + } + + for (; j < coefficients_length; j++) { + int16x4_t coeff16x4 = vld1_dup_s16(&coefficients[j]); + int16x8x2_t in16x8x2 = vld2q_s16(&data_in[i - j]); + + // Mul and accumulate low 64-bit data. + int16x4_t in16x4_0 = vget_low_s16(in16x8x2.val[0]); + out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 0); + + // Mul and accumulate high 64-bit data. + // TODO: vget_high_s16 need extra cost on ARM64. This could be + // replaced by vmlal_high_lane_s16. But for the interface of + // vmlal_high_lane_s16, there is a bug in gcc 4.9. + // This issue need to be tracked in the future. + int16x4_t in16x4_1 = vget_high_s16(in16x8x2.val[0]); + out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 0); + } +#else + // On ARMv7, the loop unrolling 2 times results in performance + // regression. + for (j = 0; j < coefficients_length; j++) { + int16x4_t coeff16x4 = vld1_dup_s16(&coefficients[j]); + int16x8x2_t in16x8x2 = vld2q_s16(&data_in[i - j]); + + // Mul and accumulate. + int16x4_t in16x4_0 = vget_low_s16(in16x8x2.val[0]); + int16x4_t in16x4_1 = vget_high_s16(in16x8x2.val[0]); + out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 0); + out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 0); + } +#endif + + // Saturate and store the output. + int16x4_t out16x4_0 = vqshrn_n_s32(out32x4_0, 12); + int16x4_t out16x4_1 = vqshrn_n_s32(out32x4_1, 12); + vst1q_s16(data_out, vcombine_s16(out16x4_0, out16x4_1)); + data_out += 8; + } + break; + } + case 4: { + for (i = delay; i < endpos1; i += 32) { + // Round value, 0.5 in Q12. + int32x4_t out32x4_0 = vdupq_n_s32(2048); + int32x4_t out32x4_1 = vdupq_n_s32(2048); + + // Unroll the loop 4 times. + for (j = 0; j < coefficients_length - 3; j += 4) { + int16x4_t coeff16x4 = vld1_s16(&coefficients[j]); + int16x8x4_t in16x8x4 = vld4q_s16(&data_in[i - j - 3]); + + // Mul and accumulate low 64-bit data. + int16x4_t in16x4_0 = vget_low_s16(in16x8x4.val[0]); + int16x4_t in16x4_2 = vget_low_s16(in16x8x4.val[1]); + int16x4_t in16x4_4 = vget_low_s16(in16x8x4.val[2]); + int16x4_t in16x4_6 = vget_low_s16(in16x8x4.val[3]); + out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 3); + out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_2, coeff16x4, 2); + out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_4, coeff16x4, 1); + out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_6, coeff16x4, 0); + + // Mul and accumulate high 64-bit data. + // TODO: vget_high_s16 need extra cost on ARM64. This could be + // replaced by vmlal_high_lane_s16. But for the interface of + // vmlal_high_lane_s16, there is a bug in gcc 4.9. + // This issue need to be tracked in the future. + int16x4_t in16x4_1 = vget_high_s16(in16x8x4.val[0]); + int16x4_t in16x4_3 = vget_high_s16(in16x8x4.val[1]); + int16x4_t in16x4_5 = vget_high_s16(in16x8x4.val[2]); + int16x4_t in16x4_7 = vget_high_s16(in16x8x4.val[3]); + out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 3); + out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_3, coeff16x4, 2); + out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_5, coeff16x4, 1); + out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_7, coeff16x4, 0); + } + + for (; j < coefficients_length; j++) { + int16x4_t coeff16x4 = vld1_dup_s16(&coefficients[j]); + int16x8x4_t in16x8x4 = vld4q_s16(&data_in[i - j]); + + // Mul and accumulate low 64-bit data. + int16x4_t in16x4_0 = vget_low_s16(in16x8x4.val[0]); + out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 0); + + // Mul and accumulate high 64-bit data. + // TODO: vget_high_s16 need extra cost on ARM64. This could be + // replaced by vmlal_high_lane_s16. But for the interface of + // vmlal_high_lane_s16, there is a bug in gcc 4.9. + // This issue need to be tracked in the future. + int16x4_t in16x4_1 = vget_high_s16(in16x8x4.val[0]); + out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 0); + } + + // Saturate and store the output. + int16x4_t out16x4_0 = vqshrn_n_s32(out32x4_0, 12); + int16x4_t out16x4_1 = vqshrn_n_s32(out32x4_1, 12); + vst1q_s16(data_out, vcombine_s16(out16x4_0, out16x4_1)); + data_out += 8; + } + break; + } + default: { + for (i = delay; i < endpos1; i += factor * 8) { + // Round value, 0.5 in Q12. + int32x4_t out32x4_0 = vdupq_n_s32(2048); + int32x4_t out32x4_1 = vdupq_n_s32(2048); + + for (j = 0; j < coefficients_length; j++) { + int16x4_t coeff16x4 = vld1_dup_s16(&coefficients[j]); + int16x4_t in16x4_0 = vld1_dup_s16(&data_in[i - j]); + in16x4_0 = vld1_lane_s16(&data_in[i + factor - j], in16x4_0, 1); + in16x4_0 = vld1_lane_s16(&data_in[i + factor * 2 - j], in16x4_0, 2); + in16x4_0 = vld1_lane_s16(&data_in[i + factor * 3 - j], in16x4_0, 3); + int16x4_t in16x4_1 = vld1_dup_s16(&data_in[i + factor * 4 - j]); + in16x4_1 = vld1_lane_s16(&data_in[i + factor * 5 - j], in16x4_1, 1); + in16x4_1 = vld1_lane_s16(&data_in[i + factor * 6 - j], in16x4_1, 2); + in16x4_1 = vld1_lane_s16(&data_in[i + factor * 7 - j], in16x4_1, 3); + + // Mul and accumulate. + out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 0); + out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 0); + } + + // Saturate and store the output. + int16x4_t out16x4_0 = vqshrn_n_s32(out32x4_0, 12); + int16x4_t out16x4_1 = vqshrn_n_s32(out32x4_1, 12); + vst1q_s16(data_out, vcombine_s16(out16x4_0, out16x4_1)); + data_out += 8; + } + break; + } + } + + // Second part, do the rest iterations (if any). + for (; i < endpos; i += factor) { + out_s32 = 2048; // Round value, 0.5 in Q12. + + for (j = 0; j < coefficients_length; j++) { + out_s32 = WebRtc_MulAccumW16(coefficients[j], data_in[i - j], out_s32); + } + + // Saturate and store the output. + out_s32 >>= 12; + *data_out++ = WebRtcSpl_SatW32ToW16(out_s32); + } + + return 0; +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/energy.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/energy.c new file mode 100644 index 00000000..e83f1a69 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/energy.c @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the function WebRtcSpl_Energy(). + * The description header can be found in signal_processing_library.h + * + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +int32_t WebRtcSpl_Energy(int16_t* vector, + size_t vector_length, + int* scale_factor) +{ + int32_t en = 0; + size_t i; + int scaling = + WebRtcSpl_GetScalingSquare(vector, vector_length, vector_length); + size_t looptimes = vector_length; + int16_t *vectorptr = vector; + + for (i = 0; i < looptimes; i++) + { + en += (*vectorptr * *vectorptr) >> scaling; + vectorptr++; + } + *scale_factor = scaling; + + return en; +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar.c new file mode 100644 index 00000000..dfbc4c2f --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar.c @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the function WebRtcSpl_FilterAR(). + * The description header can be found in signal_processing_library.h + * + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +size_t WebRtcSpl_FilterAR(const int16_t* a, + size_t a_length, + const int16_t* x, + size_t x_length, + int16_t* state, + size_t state_length, + int16_t* state_low, + size_t state_low_length, + int16_t* filtered, + int16_t* filtered_low, + size_t filtered_low_length) +{ + int32_t o; + int32_t oLOW; + size_t i, j, stop; + const int16_t* x_ptr = &x[0]; + int16_t* filteredFINAL_ptr = filtered; + int16_t* filteredFINAL_LOW_ptr = filtered_low; + + for (i = 0; i < x_length; i++) + { + // Calculate filtered[i] and filtered_low[i] + const int16_t* a_ptr = &a[1]; + int16_t* filtered_ptr = &filtered[i - 1]; + int16_t* filtered_low_ptr = &filtered_low[i - 1]; + int16_t* state_ptr = &state[state_length - 1]; + int16_t* state_low_ptr = &state_low[state_length - 1]; + + o = (int32_t)(*x_ptr++) << 12; + oLOW = (int32_t)0; + + stop = (i < a_length) ? i + 1 : a_length; + for (j = 1; j < stop; j++) + { + o -= *a_ptr * *filtered_ptr--; + oLOW -= *a_ptr++ * *filtered_low_ptr--; + } + for (j = i + 1; j < a_length; j++) + { + o -= *a_ptr * *state_ptr--; + oLOW -= *a_ptr++ * *state_low_ptr--; + } + + o += (oLOW >> 12); + *filteredFINAL_ptr = (int16_t)((o + (int32_t)2048) >> 12); + *filteredFINAL_LOW_ptr++ = (int16_t)(o - ((int32_t)(*filteredFINAL_ptr++) + << 12)); + } + + // Save the filter state + if (x_length >= state_length) + { + WebRtcSpl_CopyFromEndW16(filtered, x_length, a_length - 1, state); + WebRtcSpl_CopyFromEndW16(filtered_low, x_length, a_length - 1, state_low); + } else + { + for (i = 0; i < state_length - x_length; i++) + { + state[i] = state[i + x_length]; + state_low[i] = state_low[i + x_length]; + } + for (i = 0; i < x_length; i++) + { + state[state_length - x_length + i] = filtered[i]; + state[state_length - x_length + i] = filtered_low[i]; + } + } + + return x_length; +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12.c new file mode 100644 index 00000000..70001a08 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12.c @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include <assert.h> + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +// TODO(bjornv): Change the return type to report errors. + +void WebRtcSpl_FilterARFastQ12(const int16_t* data_in, + int16_t* data_out, + const int16_t* __restrict coefficients, + size_t coefficients_length, + size_t data_length) { + size_t i = 0; + size_t j = 0; + + assert(data_length > 0); + assert(coefficients_length > 1); + + for (i = 0; i < data_length; i++) { + int32_t output = 0; + int32_t sum = 0; + + for (j = coefficients_length - 1; j > 0; j--) { + sum += coefficients[j] * data_out[i - j]; + } + + output = coefficients[0] * data_in[i]; + output -= sum; + + // Saturate and store the output. + output = WEBRTC_SPL_SAT(134215679, output, -134217728); + data_out[i] = (int16_t)((output + 2048) >> 12); + } +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12_armv7.S b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12_armv7.S new file mode 100644 index 00000000..76c8eee7 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12_armv7.S @@ -0,0 +1,218 @@ +@ +@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. +@ +@ Use of this source code is governed by a BSD-style license +@ that can be found in the LICENSE file in the root of the source +@ tree. An additional intellectual property rights grant can be found +@ in the file PATENTS. All contributing project authors may +@ be found in the AUTHORS file in the root of the source tree. +@ + +@ This file contains the function WebRtcSpl_FilterARFastQ12(), optimized for +@ ARMv7 platform. The description header can be found in +@ signal_processing_library.h +@ +@ Output is bit-exact with the generic C code as in filter_ar_fast_q12.c, and +@ the reference C code at end of this file. + +@ Assumptions: +@ (1) data_length > 0 +@ (2) coefficients_length > 1 + +@ Register usage: +@ +@ r0: &data_in[i] +@ r1: &data_out[i], for result ouput +@ r2: &coefficients[0] +@ r3: coefficients_length +@ r4: Iteration counter for the outer loop. +@ r5: data_out[j] as multiplication inputs +@ r6: Calculated value for output data_out[]; interation counter for inner loop +@ r7: Partial sum of a filtering multiplication results +@ r8: Partial sum of a filtering multiplication results +@ r9: &data_out[], for filtering input; data_in[i] +@ r10: coefficients[j] +@ r11: Scratch +@ r12: &coefficients[j] + +#include "webrtc/system_wrappers/interface/asm_defines.h" + +GLOBAL_FUNCTION WebRtcSpl_FilterARFastQ12 +.align 2 +DEFINE_FUNCTION WebRtcSpl_FilterARFastQ12 + push {r4-r11} + + ldrsh r12, [sp, #32] @ data_length + subs r4, r12, #1 + beq ODD_LENGTH @ jump if data_length == 1 + +LOOP_LENGTH: + add r12, r2, r3, lsl #1 + sub r12, #4 @ &coefficients[coefficients_length - 2] + sub r9, r1, r3, lsl #1 + add r9, #2 @ &data_out[i - coefficients_length + 1] + ldr r5, [r9], #4 @ data_out[i - coefficients_length + {1,2}] + + mov r7, #0 @ sum1 + mov r8, #0 @ sum2 + subs r6, r3, #3 @ Iteration counter for inner loop. + beq ODD_A_LENGTH @ branch if coefficients_length == 3 + blt POST_LOOP_A_LENGTH @ branch if coefficients_length == 2 + +LOOP_A_LENGTH: + ldr r10, [r12], #-4 @ coefficients[j - 1], coefficients[j] + subs r6, #2 + smlatt r8, r10, r5, r8 @ sum2 += coefficients[j] * data_out[i - j + 1]; + smlatb r7, r10, r5, r7 @ sum1 += coefficients[j] * data_out[i - j]; + smlabt r7, r10, r5, r7 @ coefficients[j - 1] * data_out[i - j + 1]; + ldr r5, [r9], #4 @ data_out[i - j + 2], data_out[i - j + 3] + smlabb r8, r10, r5, r8 @ coefficients[j - 1] * data_out[i - j + 2]; + bgt LOOP_A_LENGTH + blt POST_LOOP_A_LENGTH + +ODD_A_LENGTH: + ldrsh r10, [r12, #2] @ Filter coefficients coefficients[2] + sub r12, #2 @ &coefficients[0] + smlabb r7, r10, r5, r7 @ sum1 += coefficients[2] * data_out[i - 2]; + smlabt r8, r10, r5, r8 @ sum2 += coefficients[2] * data_out[i - 1]; + ldr r5, [r9, #-2] @ data_out[i - 1], data_out[i] + +POST_LOOP_A_LENGTH: + ldr r10, [r12] @ coefficients[0], coefficients[1] + smlatb r7, r10, r5, r7 @ sum1 += coefficients[1] * data_out[i - 1]; + + ldr r9, [r0], #4 @ data_in[i], data_in[i + 1] + smulbb r6, r10, r9 @ output1 = coefficients[0] * data_in[i]; + sub r6, r7 @ output1 -= sum1; + + sbfx r11, r6, #12, #16 + ssat r7, #16, r6, asr #12 + cmp r7, r11 + addeq r6, r6, #2048 + ssat r6, #16, r6, asr #12 + strh r6, [r1], #2 @ Store data_out[i] + + smlatb r8, r10, r6, r8 @ sum2 += coefficients[1] * data_out[i]; + smulbt r6, r10, r9 @ output2 = coefficients[0] * data_in[i + 1]; + sub r6, r8 @ output1 -= sum1; + + sbfx r11, r6, #12, #16 + ssat r7, #16, r6, asr #12 + cmp r7, r11 + addeq r6, r6, #2048 + ssat r6, #16, r6, asr #12 + strh r6, [r1], #2 @ Store data_out[i + 1] + + subs r4, #2 + bgt LOOP_LENGTH + blt END @ For even data_length, it's done. Jump to END. + +@ Process i = data_length -1, for the case of an odd length. +ODD_LENGTH: + add r12, r2, r3, lsl #1 + sub r12, #4 @ &coefficients[coefficients_length - 2] + sub r9, r1, r3, lsl #1 + add r9, #2 @ &data_out[i - coefficients_length + 1] + mov r7, #0 @ sum1 + mov r8, #0 @ sum1 + subs r6, r3, #2 @ inner loop counter + beq EVEN_A_LENGTH @ branch if coefficients_length == 2 + +LOOP2_A_LENGTH: + ldr r10, [r12], #-4 @ coefficients[j - 1], coefficients[j] + ldr r5, [r9], #4 @ data_out[i - j], data_out[i - j + 1] + subs r6, #2 + smlatb r7, r10, r5, r7 @ sum1 += coefficients[j] * data_out[i - j]; + smlabt r8, r10, r5, r8 @ coefficients[j - 1] * data_out[i - j + 1]; + bgt LOOP2_A_LENGTH + addlt r12, #2 + blt POST_LOOP2_A_LENGTH + +EVEN_A_LENGTH: + ldrsh r10, [r12, #2] @ Filter coefficients coefficients[1] + ldrsh r5, [r9] @ data_out[i - 1] + smlabb r7, r10, r5, r7 @ sum1 += coefficients[1] * data_out[i - 1]; + +POST_LOOP2_A_LENGTH: + ldrsh r10, [r12] @ Filter coefficients coefficients[0] + ldrsh r9, [r0] @ data_in[i] + smulbb r6, r10, r9 @ output1 = coefficients[0] * data_in[i]; + sub r6, r7 @ output1 -= sum1; + sub r6, r8 @ output1 -= sum1; + sbfx r8, r6, #12, #16 + ssat r7, #16, r6, asr #12 + cmp r7, r8 + addeq r6, r6, #2048 + ssat r6, #16, r6, asr #12 + strh r6, [r1] @ Store the data_out[i] + +END: + pop {r4-r11} + bx lr + +@Reference C code: +@ +@void WebRtcSpl_FilterARFastQ12(int16_t* data_in, +@ int16_t* data_out, +@ int16_t* __restrict coefficients, +@ size_t coefficients_length, +@ size_t data_length) { +@ size_t i = 0; +@ size_t j = 0; +@ +@ assert(data_length > 0); +@ assert(coefficients_length > 1); +@ +@ for (i = 0; i < data_length - 1; i += 2) { +@ int32_t output1 = 0; +@ int32_t sum1 = 0; +@ int32_t output2 = 0; +@ int32_t sum2 = 0; +@ +@ for (j = coefficients_length - 1; j > 2; j -= 2) { +@ sum1 += coefficients[j] * data_out[i - j]; +@ sum1 += coefficients[j - 1] * data_out[i - j + 1]; +@ sum2 += coefficients[j] * data_out[i - j + 1]; +@ sum2 += coefficients[j - 1] * data_out[i - j + 2]; +@ } +@ +@ if (j == 2) { +@ sum1 += coefficients[2] * data_out[i - 2]; +@ sum2 += coefficients[2] * data_out[i - 1]; +@ } +@ +@ sum1 += coefficients[1] * data_out[i - 1]; +@ output1 = coefficients[0] * data_in[i]; +@ output1 -= sum1; +@ // Saturate and store the output. +@ output1 = WEBRTC_SPL_SAT(134215679, output1, -134217728); +@ data_out[i] = (int16_t)((output1 + 2048) >> 12); +@ +@ sum2 += coefficients[1] * data_out[i]; +@ output2 = coefficients[0] * data_in[i + 1]; +@ output2 -= sum2; +@ // Saturate and store the output. +@ output2 = WEBRTC_SPL_SAT(134215679, output2, -134217728); +@ data_out[i + 1] = (int16_t)((output2 + 2048) >> 12); +@ } +@ +@ if (i == data_length - 1) { +@ int32_t output1 = 0; +@ int32_t sum1 = 0; +@ +@ for (j = coefficients_length - 1; j > 1; j -= 2) { +@ sum1 += coefficients[j] * data_out[i - j]; +@ sum1 += coefficients[j - 1] * data_out[i - j + 1]; +@ } +@ +@ if (j == 1) { +@ sum1 += coefficients[1] * data_out[i - 1]; +@ } +@ +@ output1 = coefficients[0] * data_in[i]; +@ output1 -= sum1; +@ // Saturate and store the output. +@ output1 = WEBRTC_SPL_SAT(134215679, output1, -134217728); +@ data_out[i] = (int16_t)((output1 + 2048) >> 12); +@ } +@} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12_mips.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12_mips.c new file mode 100644 index 00000000..03847018 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12_mips.c @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include <assert.h> + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +void WebRtcSpl_FilterARFastQ12(const int16_t* data_in, + int16_t* data_out, + const int16_t* __restrict coefficients, + size_t coefficients_length, + size_t data_length) { + int r0, r1, r2, r3; + int coef0, offset; + int i, j, k; + int coefptr, outptr, tmpout, inptr; +#if !defined(MIPS_DSP_R1_LE) + int max16 = 0x7FFF; + int min16 = 0xFFFF8000; +#endif // #if !defined(MIPS_DSP_R1_LE) + + assert(data_length > 0); + assert(coefficients_length > 1); + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[i], %[data_length], 0 \n\t" + "lh %[coef0], 0(%[coefficients]) \n\t" + "addiu %[j], %[coefficients_length], -1 \n\t" + "andi %[k], %[j], 1 \n\t" + "sll %[offset], %[j], 1 \n\t" + "subu %[outptr], %[data_out], %[offset] \n\t" + "addiu %[inptr], %[data_in], 0 \n\t" + "bgtz %[k], 3f \n\t" + " addu %[coefptr], %[coefficients], %[offset] \n\t" + "1: \n\t" + "lh %[r0], 0(%[inptr]) \n\t" + "addiu %[i], %[i], -1 \n\t" + "addiu %[tmpout], %[outptr], 0 \n\t" + "mult %[r0], %[coef0] \n\t" + "2: \n\t" + "lh %[r0], 0(%[tmpout]) \n\t" + "lh %[r1], 0(%[coefptr]) \n\t" + "lh %[r2], 2(%[tmpout]) \n\t" + "lh %[r3], -2(%[coefptr]) \n\t" + "addiu %[tmpout], %[tmpout], 4 \n\t" + "msub %[r0], %[r1] \n\t" + "msub %[r2], %[r3] \n\t" + "addiu %[j], %[j], -2 \n\t" + "bgtz %[j], 2b \n\t" + " addiu %[coefptr], %[coefptr], -4 \n\t" +#if defined(MIPS_DSP_R1_LE) + "extr_r.w %[r0], $ac0, 12 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "mflo %[r0] \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "addu %[coefptr], %[coefficients], %[offset] \n\t" + "addiu %[inptr], %[inptr], 2 \n\t" + "addiu %[j], %[coefficients_length], -1 \n\t" +#if defined(MIPS_DSP_R1_LE) + "shll_s.w %[r0], %[r0], 16 \n\t" + "sra %[r0], %[r0], 16 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r0], %[r0], 2048 \n\t" + "sra %[r0], %[r0], 12 \n\t" + "slt %[r1], %[max16], %[r0] \n\t" + "movn %[r0], %[max16], %[r1] \n\t" + "slt %[r1], %[r0], %[min16] \n\t" + "movn %[r0], %[min16], %[r1] \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "sh %[r0], 0(%[tmpout]) \n\t" + "bgtz %[i], 1b \n\t" + " addiu %[outptr], %[outptr], 2 \n\t" + "b 5f \n\t" + " nop \n\t" + "3: \n\t" + "lh %[r0], 0(%[inptr]) \n\t" + "addiu %[i], %[i], -1 \n\t" + "addiu %[tmpout], %[outptr], 0 \n\t" + "mult %[r0], %[coef0] \n\t" + "4: \n\t" + "lh %[r0], 0(%[tmpout]) \n\t" + "lh %[r1], 0(%[coefptr]) \n\t" + "lh %[r2], 2(%[tmpout]) \n\t" + "lh %[r3], -2(%[coefptr]) \n\t" + "addiu %[tmpout], %[tmpout], 4 \n\t" + "msub %[r0], %[r1] \n\t" + "msub %[r2], %[r3] \n\t" + "addiu %[j], %[j], -2 \n\t" + "bgtz %[j], 4b \n\t" + " addiu %[coefptr], %[coefptr], -4 \n\t" + "lh %[r0], 0(%[tmpout]) \n\t" + "lh %[r1], 0(%[coefptr]) \n\t" + "msub %[r0], %[r1] \n\t" +#if defined(MIPS_DSP_R1_LE) + "extr_r.w %[r0], $ac0, 12 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "mflo %[r0] \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "addu %[coefptr], %[coefficients], %[offset] \n\t" + "addiu %[inptr], %[inptr], 2 \n\t" + "addiu %[j], %[coefficients_length], -1 \n\t" +#if defined(MIPS_DSP_R1_LE) + "shll_s.w %[r0], %[r0], 16 \n\t" + "sra %[r0], %[r0], 16 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r0], %[r0], 2048 \n\t" + "sra %[r0], %[r0], 12 \n\t" + "slt %[r1], %[max16], %[r0] \n\t" + "movn %[r0], %[max16], %[r1] \n\t" + "slt %[r1], %[r0], %[min16] \n\t" + "movn %[r0], %[min16], %[r1] \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "sh %[r0], 2(%[tmpout]) \n\t" + "bgtz %[i], 3b \n\t" + " addiu %[outptr], %[outptr], 2 \n\t" + "5: \n\t" + ".set pop \n\t" + : [i] "=&r" (i), [j] "=&r" (j), [k] "=&r" (k), [r0] "=&r" (r0), + [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3), + [coef0] "=&r" (coef0), [offset] "=&r" (offset), + [outptr] "=&r" (outptr), [inptr] "=&r" (inptr), + [coefptr] "=&r" (coefptr), [tmpout] "=&r" (tmpout) + : [coefficients] "r" (coefficients), [data_length] "r" (data_length), + [coefficients_length] "r" (coefficients_length), +#if !defined(MIPS_DSP_R1_LE) + [max16] "r" (max16), [min16] "r" (min16), +#endif + [data_out] "r" (data_out), [data_in] "r" (data_in) + : "hi", "lo", "memory" + ); +} + diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ma_fast_q12.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ma_fast_q12.c new file mode 100644 index 00000000..f4d9a3d3 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ma_fast_q12.c @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the function WebRtcSpl_FilterMAFastQ12(). + * The description header can be found in signal_processing_library.h + * + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +void WebRtcSpl_FilterMAFastQ12(const int16_t* in_ptr, + int16_t* out_ptr, + const int16_t* B, + size_t B_length, + size_t length) +{ + size_t i, j; + for (i = 0; i < length; i++) + { + int32_t o = 0; + + for (j = 0; j < B_length; j++) + { + o += B[j] * in_ptr[i - j]; + } + + // If output is higher than 32768, saturate it. Same with negative side + // 2^27 = 134217728, which corresponds to 32768 in Q12 + + // Saturate the output + o = WEBRTC_SPL_SAT((int32_t)134215679, o, (int32_t)-134217728); + + *out_ptr++ = (int16_t)((o + (int32_t)2048) >> 12); + } + return; +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/get_hanning_window.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/get_hanning_window.c new file mode 100644 index 00000000..d83ac216 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/get_hanning_window.c @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the function WebRtcSpl_GetHanningWindow(). + * The description header can be found in signal_processing_library.h + * + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +// Hanning table with 256 entries +static const int16_t kHanningTable[] = { + 1, 2, 6, 10, 15, 22, 30, 39, + 50, 62, 75, 89, 104, 121, 138, 157, + 178, 199, 222, 246, 271, 297, 324, 353, + 383, 413, 446, 479, 513, 549, 586, 624, + 663, 703, 744, 787, 830, 875, 920, 967, + 1015, 1064, 1114, 1165, 1218, 1271, 1325, 1381, + 1437, 1494, 1553, 1612, 1673, 1734, 1796, 1859, + 1924, 1989, 2055, 2122, 2190, 2259, 2329, 2399, + 2471, 2543, 2617, 2691, 2765, 2841, 2918, 2995, + 3073, 3152, 3232, 3312, 3393, 3475, 3558, 3641, + 3725, 3809, 3895, 3980, 4067, 4154, 4242, 4330, + 4419, 4509, 4599, 4689, 4781, 4872, 4964, 5057, + 5150, 5244, 5338, 5432, 5527, 5622, 5718, 5814, + 5910, 6007, 6104, 6202, 6299, 6397, 6495, 6594, + 6693, 6791, 6891, 6990, 7090, 7189, 7289, 7389, + 7489, 7589, 7690, 7790, 7890, 7991, 8091, 8192, + 8293, 8393, 8494, 8594, 8694, 8795, 8895, 8995, + 9095, 9195, 9294, 9394, 9493, 9593, 9691, 9790, + 9889, 9987, 10085, 10182, 10280, 10377, 10474, 10570, +10666, 10762, 10857, 10952, 11046, 11140, 11234, 11327, +11420, 11512, 11603, 11695, 11785, 11875, 11965, 12054, +12142, 12230, 12317, 12404, 12489, 12575, 12659, 12743, +12826, 12909, 12991, 13072, 13152, 13232, 13311, 13389, +13466, 13543, 13619, 13693, 13767, 13841, 13913, 13985, +14055, 14125, 14194, 14262, 14329, 14395, 14460, 14525, +14588, 14650, 14711, 14772, 14831, 14890, 14947, 15003, +15059, 15113, 15166, 15219, 15270, 15320, 15369, 15417, +15464, 15509, 15554, 15597, 15640, 15681, 15721, 15760, +15798, 15835, 15871, 15905, 15938, 15971, 16001, 16031, +16060, 16087, 16113, 16138, 16162, 16185, 16206, 16227, +16246, 16263, 16280, 16295, 16309, 16322, 16334, 16345, +16354, 16362, 16369, 16374, 16378, 16382, 16383, 16384 +}; + +void WebRtcSpl_GetHanningWindow(int16_t *v, size_t size) +{ + size_t jj; + int16_t *vptr1; + + int32_t index; + int32_t factor = ((int32_t)0x40000000); + + factor = WebRtcSpl_DivW32W16(factor, (int16_t)size); + if (size < 513) + index = (int32_t)-0x200000; + else + index = (int32_t)-0x100000; + vptr1 = v; + + for (jj = 0; jj < size; jj++) + { + index += factor; + (*vptr1++) = kHanningTable[index >> 22]; + } + +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/get_scaling_square.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/get_scaling_square.c new file mode 100644 index 00000000..82e3c8b0 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/get_scaling_square.c @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the function WebRtcSpl_GetScalingSquare(). + * The description header can be found in signal_processing_library.h + * + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +int16_t WebRtcSpl_GetScalingSquare(int16_t* in_vector, + size_t in_vector_length, + size_t times) +{ + int16_t nbits = WebRtcSpl_GetSizeInBits((uint32_t)times); + size_t i; + int16_t smax = -1; + int16_t sabs; + int16_t *sptr = in_vector; + int16_t t; + size_t looptimes = in_vector_length; + + for (i = looptimes; i > 0; i--) + { + sabs = (*sptr > 0 ? *sptr++ : -*sptr++); + smax = (sabs > smax ? sabs : smax); + } + t = WebRtcSpl_NormW32(WEBRTC_SPL_MUL(smax, smax)); + + if (smax == 0) + { + return 0; // Since norm(0) returns 0 + } else + { + return (t > nbits) ? 0 : nbits - t; + } +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/ilbc_specific_functions.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/ilbc_specific_functions.c new file mode 100644 index 00000000..301a922d --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/ilbc_specific_functions.c @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains implementations of the iLBC specific functions + * WebRtcSpl_ReverseOrderMultArrayElements() + * WebRtcSpl_ElementwiseVectorMult() + * WebRtcSpl_AddVectorsAndShift() + * WebRtcSpl_AddAffineVectorToVector() + * WebRtcSpl_AffineTransformVector() + * + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +void WebRtcSpl_ReverseOrderMultArrayElements(int16_t *out, const int16_t *in, + const int16_t *win, + size_t vector_length, + int16_t right_shifts) +{ + size_t i; + int16_t *outptr = out; + const int16_t *inptr = in; + const int16_t *winptr = win; + for (i = 0; i < vector_length; i++) + { + *outptr++ = (int16_t)((*inptr++ * *winptr--) >> right_shifts); + } +} + +void WebRtcSpl_ElementwiseVectorMult(int16_t *out, const int16_t *in, + const int16_t *win, size_t vector_length, + int16_t right_shifts) +{ + size_t i; + int16_t *outptr = out; + const int16_t *inptr = in; + const int16_t *winptr = win; + for (i = 0; i < vector_length; i++) + { + *outptr++ = (int16_t)((*inptr++ * *winptr++) >> right_shifts); + } +} + +void WebRtcSpl_AddVectorsAndShift(int16_t *out, const int16_t *in1, + const int16_t *in2, size_t vector_length, + int16_t right_shifts) +{ + size_t i; + int16_t *outptr = out; + const int16_t *in1ptr = in1; + const int16_t *in2ptr = in2; + for (i = vector_length; i > 0; i--) + { + (*outptr++) = (int16_t)(((*in1ptr++) + (*in2ptr++)) >> right_shifts); + } +} + +void WebRtcSpl_AddAffineVectorToVector(int16_t *out, int16_t *in, + int16_t gain, int32_t add_constant, + int16_t right_shifts, + size_t vector_length) +{ + size_t i; + + for (i = 0; i < vector_length; i++) + { + out[i] += (int16_t)((in[i] * gain + add_constant) >> right_shifts); + } +} + +void WebRtcSpl_AffineTransformVector(int16_t *out, int16_t *in, + int16_t gain, int32_t add_constant, + int16_t right_shifts, size_t vector_length) +{ + size_t i; + + for (i = 0; i < vector_length; i++) + { + out[i] = (int16_t)((in[i] * gain + add_constant) >> right_shifts); + } +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/real_fft.h b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/real_fft.h new file mode 100644 index 00000000..e7942f04 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/real_fft.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_ +#define WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_ + +#include "webrtc/typedefs.h" + +// For ComplexFFT(), the maximum fft order is 10; +// for OpenMax FFT in ARM, it is 12; +// WebRTC APM uses orders of only 7 and 8. +enum {kMaxFFTOrder = 10}; + +struct RealFFT; + +#ifdef __cplusplus +extern "C" { +#endif + +struct RealFFT* WebRtcSpl_CreateRealFFT(int order); +void WebRtcSpl_FreeRealFFT(struct RealFFT* self); + +// Compute an FFT for a real-valued signal of length of 2^order, +// where 1 < order <= MAX_FFT_ORDER. Transform length is determined by the +// specification structure, which must be initialized prior to calling the FFT +// function with WebRtcSpl_CreateRealFFT(). +// The relationship between the input and output sequences can +// be expressed in terms of the DFT, i.e.: +// x[n] = (2^(-scalefactor)/N) . SUM[k=0,...,N-1] X[k].e^(jnk.2.pi/N) +// n=0,1,2,...N-1 +// N=2^order. +// The conjugate-symmetric output sequence is represented using a CCS vector, +// which is of length N+2, and is organized as follows: +// Index: 0 1 2 3 4 5 . . . N-2 N-1 N N+1 +// Component: R0 0 R1 I1 R2 I2 . . . R[N/2-1] I[N/2-1] R[N/2] 0 +// where R[n] and I[n], respectively, denote the real and imaginary components +// for FFT bin 'n'. Bins are numbered from 0 to N/2, where N is the FFT length. +// Bin index 0 corresponds to the DC component, and bin index N/2 corresponds to +// the foldover frequency. +// +// Input Arguments: +// self - pointer to preallocated and initialized FFT specification structure. +// real_data_in - the input signal. For an ARM Neon platform, it must be +// aligned on a 32-byte boundary. +// +// Output Arguments: +// complex_data_out - the output complex signal with (2^order + 2) 16-bit +// elements. For an ARM Neon platform, it must be different +// from real_data_in, and aligned on a 32-byte boundary. +// +// Return Value: +// 0 - FFT calculation is successful. +// -1 - Error with bad arguments (NULL pointers). +int WebRtcSpl_RealForwardFFT(struct RealFFT* self, + const int16_t* real_data_in, + int16_t* complex_data_out); + +// Compute the inverse FFT for a conjugate-symmetric input sequence of length of +// 2^order, where 1 < order <= MAX_FFT_ORDER. Transform length is determined by +// the specification structure, which must be initialized prior to calling the +// FFT function with WebRtcSpl_CreateRealFFT(). +// For a transform of length M, the input sequence is represented using a packed +// CCS vector of length M+2, which is explained in the comments for +// WebRtcSpl_RealForwardFFTC above. +// +// Input Arguments: +// self - pointer to preallocated and initialized FFT specification structure. +// complex_data_in - the input complex signal with (2^order + 2) 16-bit +// elements. For an ARM Neon platform, it must be aligned on +// a 32-byte boundary. +// +// Output Arguments: +// real_data_out - the output real signal. For an ARM Neon platform, it must +// be different to complex_data_in, and aligned on a 32-byte +// boundary. +// +// Return Value: +// 0 or a positive number - a value that the elements in the |real_data_out| +// should be shifted left with in order to get +// correct physical values. +// -1 - Error with bad arguments (NULL pointers). +int WebRtcSpl_RealInverseFFT(struct RealFFT* self, + const int16_t* complex_data_in, + int16_t* real_data_out); + +#ifdef __cplusplus +} +#endif + +#endif // WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_ diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/signal_processing_library.h b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/signal_processing_library.h new file mode 100644 index 00000000..2e96883e --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/signal_processing_library.h @@ -0,0 +1,1645 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This header file includes all of the fix point signal processing library (SPL) function + * descriptions and declarations. + * For specific function calls, see bottom of file. + */ + +#ifndef WEBRTC_SPL_SIGNAL_PROCESSING_LIBRARY_H_ +#define WEBRTC_SPL_SIGNAL_PROCESSING_LIBRARY_H_ + +#include <string.h> +#include "webrtc/typedefs.h" + +// Macros specific for the fixed point implementation +#define WEBRTC_SPL_WORD16_MAX 32767 +#define WEBRTC_SPL_WORD16_MIN -32768 +#define WEBRTC_SPL_WORD32_MAX (int32_t)0x7fffffff +#define WEBRTC_SPL_WORD32_MIN (int32_t)0x80000000 +#define WEBRTC_SPL_MAX_LPC_ORDER 14 +#define WEBRTC_SPL_MIN(A, B) (A < B ? A : B) // Get min value +#define WEBRTC_SPL_MAX(A, B) (A > B ? A : B) // Get max value +// TODO(kma/bjorn): For the next two macros, investigate how to correct the code +// for inputs of a = WEBRTC_SPL_WORD16_MIN or WEBRTC_SPL_WORD32_MIN. +#define WEBRTC_SPL_ABS_W16(a) \ + (((int16_t)a >= 0) ? ((int16_t)a) : -((int16_t)a)) +#define WEBRTC_SPL_ABS_W32(a) \ + (((int32_t)a >= 0) ? ((int32_t)a) : -((int32_t)a)) + +#define WEBRTC_SPL_MUL(a, b) \ + ((int32_t) ((int32_t)(a) * (int32_t)(b))) +#define WEBRTC_SPL_UMUL(a, b) \ + ((uint32_t) ((uint32_t)(a) * (uint32_t)(b))) +#define WEBRTC_SPL_UMUL_32_16(a, b) \ + ((uint32_t) ((uint32_t)(a) * (uint16_t)(b))) +#define WEBRTC_SPL_MUL_16_U16(a, b) \ + ((int32_t)(int16_t)(a) * (uint16_t)(b)) + +#ifndef WEBRTC_ARCH_ARM_V7 +// For ARMv7 platforms, these are inline functions in spl_inl_armv7.h +#ifndef MIPS32_LE +// For MIPS platforms, these are inline functions in spl_inl_mips.h +#define WEBRTC_SPL_MUL_16_16(a, b) \ + ((int32_t) (((int16_t)(a)) * ((int16_t)(b)))) +#define WEBRTC_SPL_MUL_16_32_RSFT16(a, b) \ + (WEBRTC_SPL_MUL_16_16(a, b >> 16) \ + + ((WEBRTC_SPL_MUL_16_16(a, (b & 0xffff) >> 1) + 0x4000) >> 15)) +#endif +#endif + +#define WEBRTC_SPL_MUL_16_32_RSFT11(a, b) \ + ((WEBRTC_SPL_MUL_16_16(a, (b) >> 16) << 5) \ + + (((WEBRTC_SPL_MUL_16_U16(a, (uint16_t)(b)) >> 1) + 0x0200) >> 10)) +#define WEBRTC_SPL_MUL_16_32_RSFT14(a, b) \ + ((WEBRTC_SPL_MUL_16_16(a, (b) >> 16) << 2) \ + + (((WEBRTC_SPL_MUL_16_U16(a, (uint16_t)(b)) >> 1) + 0x1000) >> 13)) +#define WEBRTC_SPL_MUL_16_32_RSFT15(a, b) \ + ((WEBRTC_SPL_MUL_16_16(a, (b) >> 16) << 1) \ + + (((WEBRTC_SPL_MUL_16_U16(a, (uint16_t)(b)) >> 1) + 0x2000) >> 14)) + +#define WEBRTC_SPL_MUL_16_16_RSFT(a, b, c) \ + (WEBRTC_SPL_MUL_16_16(a, b) >> (c)) + +#define WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(a, b, c) \ + ((WEBRTC_SPL_MUL_16_16(a, b) + ((int32_t) \ + (((int32_t)1) << ((c) - 1)))) >> (c)) + +// C + the 32 most significant bits of A * B +#define WEBRTC_SPL_SCALEDIFF32(A, B, C) \ + (C + (B >> 16) * A + (((uint32_t)(0x0000FFFF & B) * A) >> 16)) + +#define WEBRTC_SPL_SAT(a, b, c) (b > a ? a : b < c ? c : b) + +// Shifting with negative numbers allowed +// Positive means left shift +#define WEBRTC_SPL_SHIFT_W32(x, c) \ + (((c) >= 0) ? ((x) << (c)) : ((x) >> (-(c)))) + +// Shifting with negative numbers not allowed +// We cannot do casting here due to signed/unsigned problem +#define WEBRTC_SPL_LSHIFT_W32(x, c) ((x) << (c)) + +#define WEBRTC_SPL_RSHIFT_U32(x, c) ((uint32_t)(x) >> (c)) + +#define WEBRTC_SPL_RAND(a) \ + ((int16_t)((((int16_t)a * 18816) >> 7) & 0x00007fff)) + +#ifdef __cplusplus +extern "C" { +#endif + +#define WEBRTC_SPL_MEMCPY_W16(v1, v2, length) \ + memcpy(v1, v2, (length) * sizeof(int16_t)) + +// inline functions: +#include "webrtc/common_audio/signal_processing/include/spl_inl.h" + +// Initialize SPL. Currently it contains only function pointer initialization. +// If the underlying platform is known to be ARM-Neon (WEBRTC_HAS_NEON defined), +// the pointers will be assigned to code optimized for Neon; otherwise +// if run-time Neon detection (WEBRTC_DETECT_NEON) is enabled, the pointers +// will be assigned to either Neon code or generic C code; otherwise, generic C +// code will be assigned. +// Note that this function MUST be called in any application that uses SPL +// functions. +void WebRtcSpl_Init(); + +int16_t WebRtcSpl_GetScalingSquare(int16_t* in_vector, + size_t in_vector_length, + size_t times); + +// Copy and set operations. Implementation in copy_set_operations.c. +// Descriptions at bottom of file. +void WebRtcSpl_MemSetW16(int16_t* vector, + int16_t set_value, + size_t vector_length); +void WebRtcSpl_MemSetW32(int32_t* vector, + int32_t set_value, + size_t vector_length); +void WebRtcSpl_MemCpyReversedOrder(int16_t* out_vector, + int16_t* in_vector, + size_t vector_length); +void WebRtcSpl_CopyFromEndW16(const int16_t* in_vector, + size_t in_vector_length, + size_t samples, + int16_t* out_vector); +void WebRtcSpl_ZerosArrayW16(int16_t* vector, + size_t vector_length); +void WebRtcSpl_ZerosArrayW32(int32_t* vector, + size_t vector_length); +// End: Copy and set operations. + + +// Minimum and maximum operation functions and their pointers. +// Implementation in min_max_operations.c. + +// Returns the largest absolute value in a signed 16-bit vector. +// +// Input: +// - vector : 16-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Maximum absolute value in vector. +typedef int16_t (*MaxAbsValueW16)(const int16_t* vector, size_t length); +extern MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16; +int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t* vector, size_t length); +#if (defined WEBRTC_DETECT_NEON) || (defined WEBRTC_HAS_NEON) +int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, size_t length); +#endif +#if defined(MIPS32_LE) +int16_t WebRtcSpl_MaxAbsValueW16_mips(const int16_t* vector, size_t length); +#endif + +// Returns the largest absolute value in a signed 32-bit vector. +// +// Input: +// - vector : 32-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Maximum absolute value in vector. +typedef int32_t (*MaxAbsValueW32)(const int32_t* vector, size_t length); +extern MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32; +int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t* vector, size_t length); +#if (defined WEBRTC_DETECT_NEON) || (defined WEBRTC_HAS_NEON) +int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, size_t length); +#endif +#if defined(MIPS_DSP_R1_LE) +int32_t WebRtcSpl_MaxAbsValueW32_mips(const int32_t* vector, size_t length); +#endif + +// Returns the maximum value of a 16-bit vector. +// +// Input: +// - vector : 16-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Maximum sample value in |vector|. +typedef int16_t (*MaxValueW16)(const int16_t* vector, size_t length); +extern MaxValueW16 WebRtcSpl_MaxValueW16; +int16_t WebRtcSpl_MaxValueW16C(const int16_t* vector, size_t length); +#if (defined WEBRTC_DETECT_NEON) || (defined WEBRTC_HAS_NEON) +int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, size_t length); +#endif +#if defined(MIPS32_LE) +int16_t WebRtcSpl_MaxValueW16_mips(const int16_t* vector, size_t length); +#endif + +// Returns the maximum value of a 32-bit vector. +// +// Input: +// - vector : 32-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Maximum sample value in |vector|. +typedef int32_t (*MaxValueW32)(const int32_t* vector, size_t length); +extern MaxValueW32 WebRtcSpl_MaxValueW32; +int32_t WebRtcSpl_MaxValueW32C(const int32_t* vector, size_t length); +#if (defined WEBRTC_DETECT_NEON) || (defined WEBRTC_HAS_NEON) +int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, size_t length); +#endif +#if defined(MIPS32_LE) +int32_t WebRtcSpl_MaxValueW32_mips(const int32_t* vector, size_t length); +#endif + +// Returns the minimum value of a 16-bit vector. +// +// Input: +// - vector : 16-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Minimum sample value in |vector|. +typedef int16_t (*MinValueW16)(const int16_t* vector, size_t length); +extern MinValueW16 WebRtcSpl_MinValueW16; +int16_t WebRtcSpl_MinValueW16C(const int16_t* vector, size_t length); +#if (defined WEBRTC_DETECT_NEON) || (defined WEBRTC_HAS_NEON) +int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, size_t length); +#endif +#if defined(MIPS32_LE) +int16_t WebRtcSpl_MinValueW16_mips(const int16_t* vector, size_t length); +#endif + +// Returns the minimum value of a 32-bit vector. +// +// Input: +// - vector : 32-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Minimum sample value in |vector|. +typedef int32_t (*MinValueW32)(const int32_t* vector, size_t length); +extern MinValueW32 WebRtcSpl_MinValueW32; +int32_t WebRtcSpl_MinValueW32C(const int32_t* vector, size_t length); +#if (defined WEBRTC_DETECT_NEON) || (defined WEBRTC_HAS_NEON) +int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, size_t length); +#endif +#if defined(MIPS32_LE) +int32_t WebRtcSpl_MinValueW32_mips(const int32_t* vector, size_t length); +#endif + +// Returns the vector index to the largest absolute value of a 16-bit vector. +// +// Input: +// - vector : 16-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Index to the maximum absolute value in vector. +// If there are multiple equal maxima, return the index of the +// first. -32768 will always have precedence over 32767 (despite +// -32768 presenting an int16 absolute value of 32767). +size_t WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, size_t length); + +// Returns the vector index to the maximum sample value of a 16-bit vector. +// +// Input: +// - vector : 16-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Index to the maximum value in vector (if multiple +// indexes have the maximum, return the first). +size_t WebRtcSpl_MaxIndexW16(const int16_t* vector, size_t length); + +// Returns the vector index to the maximum sample value of a 32-bit vector. +// +// Input: +// - vector : 32-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Index to the maximum value in vector (if multiple +// indexes have the maximum, return the first). +size_t WebRtcSpl_MaxIndexW32(const int32_t* vector, size_t length); + +// Returns the vector index to the minimum sample value of a 16-bit vector. +// +// Input: +// - vector : 16-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Index to the mimimum value in vector (if multiple +// indexes have the minimum, return the first). +size_t WebRtcSpl_MinIndexW16(const int16_t* vector, size_t length); + +// Returns the vector index to the minimum sample value of a 32-bit vector. +// +// Input: +// - vector : 32-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Index to the mimimum value in vector (if multiple +// indexes have the minimum, return the first). +size_t WebRtcSpl_MinIndexW32(const int32_t* vector, size_t length); + +// End: Minimum and maximum operations. + + +// Vector scaling operations. Implementation in vector_scaling_operations.c. +// Description at bottom of file. +void WebRtcSpl_VectorBitShiftW16(int16_t* out_vector, + size_t vector_length, + const int16_t* in_vector, + int16_t right_shifts); +void WebRtcSpl_VectorBitShiftW32(int32_t* out_vector, + size_t vector_length, + const int32_t* in_vector, + int16_t right_shifts); +void WebRtcSpl_VectorBitShiftW32ToW16(int16_t* out_vector, + size_t vector_length, + const int32_t* in_vector, + int right_shifts); +void WebRtcSpl_ScaleVector(const int16_t* in_vector, + int16_t* out_vector, + int16_t gain, + size_t vector_length, + int16_t right_shifts); +void WebRtcSpl_ScaleVectorWithSat(const int16_t* in_vector, + int16_t* out_vector, + int16_t gain, + size_t vector_length, + int16_t right_shifts); +void WebRtcSpl_ScaleAndAddVectors(const int16_t* in_vector1, + int16_t gain1, int right_shifts1, + const int16_t* in_vector2, + int16_t gain2, int right_shifts2, + int16_t* out_vector, + size_t vector_length); + +// The functions (with related pointer) perform the vector operation: +// out_vector[k] = ((scale1 * in_vector1[k]) + (scale2 * in_vector2[k]) +// + round_value) >> right_shifts, +// where round_value = (1 << right_shifts) >> 1. +// +// Input: +// - in_vector1 : Input vector 1 +// - in_vector1_scale : Gain to be used for vector 1 +// - in_vector2 : Input vector 2 +// - in_vector2_scale : Gain to be used for vector 2 +// - right_shifts : Number of right bit shifts to be applied +// - length : Number of elements in the input vectors +// +// Output: +// - out_vector : Output vector +// Return value : 0 if OK, -1 if (in_vector1 == NULL +// || in_vector2 == NULL || out_vector == NULL +// || length <= 0 || right_shift < 0). +typedef int (*ScaleAndAddVectorsWithRound)(const int16_t* in_vector1, + int16_t in_vector1_scale, + const int16_t* in_vector2, + int16_t in_vector2_scale, + int right_shifts, + int16_t* out_vector, + size_t length); +extern ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound; +int WebRtcSpl_ScaleAndAddVectorsWithRoundC(const int16_t* in_vector1, + int16_t in_vector1_scale, + const int16_t* in_vector2, + int16_t in_vector2_scale, + int right_shifts, + int16_t* out_vector, + size_t length); +#if defined(MIPS_DSP_R1_LE) +int WebRtcSpl_ScaleAndAddVectorsWithRound_mips(const int16_t* in_vector1, + int16_t in_vector1_scale, + const int16_t* in_vector2, + int16_t in_vector2_scale, + int right_shifts, + int16_t* out_vector, + size_t length); +#endif +// End: Vector scaling operations. + +// iLBC specific functions. Implementations in ilbc_specific_functions.c. +// Description at bottom of file. +void WebRtcSpl_ReverseOrderMultArrayElements(int16_t* out_vector, + const int16_t* in_vector, + const int16_t* window, + size_t vector_length, + int16_t right_shifts); +void WebRtcSpl_ElementwiseVectorMult(int16_t* out_vector, + const int16_t* in_vector, + const int16_t* window, + size_t vector_length, + int16_t right_shifts); +void WebRtcSpl_AddVectorsAndShift(int16_t* out_vector, + const int16_t* in_vector1, + const int16_t* in_vector2, + size_t vector_length, + int16_t right_shifts); +void WebRtcSpl_AddAffineVectorToVector(int16_t* out_vector, + int16_t* in_vector, + int16_t gain, + int32_t add_constant, + int16_t right_shifts, + size_t vector_length); +void WebRtcSpl_AffineTransformVector(int16_t* out_vector, + int16_t* in_vector, + int16_t gain, + int32_t add_constant, + int16_t right_shifts, + size_t vector_length); +// End: iLBC specific functions. + +// Signal processing operations. + +// A 32-bit fix-point implementation of auto-correlation computation +// +// Input: +// - in_vector : Vector to calculate autocorrelation upon +// - in_vector_length : Length (in samples) of |vector| +// - order : The order up to which the autocorrelation should be +// calculated +// +// Output: +// - result : auto-correlation values (values should be seen +// relative to each other since the absolute values +// might have been down shifted to avoid overflow) +// +// - scale : The number of left shifts required to obtain the +// auto-correlation in Q0 +// +// Return value : Number of samples in |result|, i.e. (order+1) +size_t WebRtcSpl_AutoCorrelation(const int16_t* in_vector, + size_t in_vector_length, + size_t order, + int32_t* result, + int* scale); + +// A 32-bit fix-point implementation of the Levinson-Durbin algorithm that +// does NOT use the 64 bit class +// +// Input: +// - auto_corr : Vector with autocorrelation values of length >= |order|+1 +// - order : The LPC filter order (support up to order 20) +// +// Output: +// - lpc_coef : lpc_coef[0..order] LPC coefficients in Q12 +// - refl_coef : refl_coef[0...order-1]| Reflection coefficients in Q15 +// +// Return value : 1 for stable 0 for unstable +int16_t WebRtcSpl_LevinsonDurbin(const int32_t* auto_corr, + int16_t* lpc_coef, + int16_t* refl_coef, + size_t order); + +// Converts reflection coefficients |refl_coef| to LPC coefficients |lpc_coef|. +// This version is a 16 bit operation. +// +// NOTE: The 16 bit refl_coef -> lpc_coef conversion might result in a +// "slightly unstable" filter (i.e., a pole just outside the unit circle) in +// "rare" cases even if the reflection coefficients are stable. +// +// Input: +// - refl_coef : Reflection coefficients in Q15 that should be converted +// to LPC coefficients +// - use_order : Number of coefficients in |refl_coef| +// +// Output: +// - lpc_coef : LPC coefficients in Q12 +void WebRtcSpl_ReflCoefToLpc(const int16_t* refl_coef, + int use_order, + int16_t* lpc_coef); + +// Converts LPC coefficients |lpc_coef| to reflection coefficients |refl_coef|. +// This version is a 16 bit operation. +// The conversion is implemented by the step-down algorithm. +// +// Input: +// - lpc_coef : LPC coefficients in Q12, that should be converted to +// reflection coefficients +// - use_order : Number of coefficients in |lpc_coef| +// +// Output: +// - refl_coef : Reflection coefficients in Q15. +void WebRtcSpl_LpcToReflCoef(int16_t* lpc_coef, + int use_order, + int16_t* refl_coef); + +// Calculates reflection coefficients (16 bit) from auto-correlation values +// +// Input: +// - auto_corr : Auto-correlation values +// - use_order : Number of coefficients wanted be calculated +// +// Output: +// - refl_coef : Reflection coefficients in Q15. +void WebRtcSpl_AutoCorrToReflCoef(const int32_t* auto_corr, + int use_order, + int16_t* refl_coef); + +// The functions (with related pointer) calculate the cross-correlation between +// two sequences |seq1| and |seq2|. +// |seq1| is fixed and |seq2| slides as the pointer is increased with the +// amount |step_seq2|. Note the arguments should obey the relationship: +// |dim_seq| - 1 + |step_seq2| * (|dim_cross_correlation| - 1) < +// buffer size of |seq2| +// +// Input: +// - seq1 : First sequence (fixed throughout the correlation) +// - seq2 : Second sequence (slides |step_vector2| for each +// new correlation) +// - dim_seq : Number of samples to use in the cross-correlation +// - dim_cross_correlation : Number of cross-correlations to calculate (the +// start position for |vector2| is updated for each +// new one) +// - right_shifts : Number of right bit shifts to use. This will +// become the output Q-domain. +// - step_seq2 : How many (positive or negative) steps the +// |vector2| pointer should be updated for each new +// cross-correlation value. +// +// Output: +// - cross_correlation : The cross-correlation in Q(-right_shifts) +typedef void (*CrossCorrelation)(int32_t* cross_correlation, + const int16_t* seq1, + const int16_t* seq2, + size_t dim_seq, + size_t dim_cross_correlation, + int right_shifts, + int step_seq2); +extern CrossCorrelation WebRtcSpl_CrossCorrelation; +void WebRtcSpl_CrossCorrelationC(int32_t* cross_correlation, + const int16_t* seq1, + const int16_t* seq2, + size_t dim_seq, + size_t dim_cross_correlation, + int right_shifts, + int step_seq2); +#if (defined WEBRTC_DETECT_NEON) || (defined WEBRTC_HAS_NEON) +void WebRtcSpl_CrossCorrelationNeon(int32_t* cross_correlation, + const int16_t* seq1, + const int16_t* seq2, + size_t dim_seq, + size_t dim_cross_correlation, + int right_shifts, + int step_seq2); +#endif +#if defined(MIPS32_LE) +void WebRtcSpl_CrossCorrelation_mips(int32_t* cross_correlation, + const int16_t* seq1, + const int16_t* seq2, + size_t dim_seq, + size_t dim_cross_correlation, + int right_shifts, + int step_seq2); +#endif + +// Creates (the first half of) a Hanning window. Size must be at least 1 and +// at most 512. +// +// Input: +// - size : Length of the requested Hanning window (1 to 512) +// +// Output: +// - window : Hanning vector in Q14. +void WebRtcSpl_GetHanningWindow(int16_t* window, size_t size); + +// Calculates y[k] = sqrt(1 - x[k]^2) for each element of the input vector +// |in_vector|. Input and output values are in Q15. +// +// Inputs: +// - in_vector : Values to calculate sqrt(1 - x^2) of +// - vector_length : Length of vector |in_vector| +// +// Output: +// - out_vector : Output values in Q15 +void WebRtcSpl_SqrtOfOneMinusXSquared(int16_t* in_vector, + size_t vector_length, + int16_t* out_vector); +// End: Signal processing operations. + +// Randomization functions. Implementations collected in +// randomization_functions.c and descriptions at bottom of this file. +int16_t WebRtcSpl_RandU(uint32_t* seed); +int16_t WebRtcSpl_RandN(uint32_t* seed); +int16_t WebRtcSpl_RandUArray(int16_t* vector, + int16_t vector_length, + uint32_t* seed); +// End: Randomization functions. + +// Math functions +int32_t WebRtcSpl_Sqrt(int32_t value); +int32_t WebRtcSpl_SqrtFloor(int32_t value); + +// Divisions. Implementations collected in division_operations.c and +// descriptions at bottom of this file. +uint32_t WebRtcSpl_DivU32U16(uint32_t num, uint16_t den); +int32_t WebRtcSpl_DivW32W16(int32_t num, int16_t den); +int16_t WebRtcSpl_DivW32W16ResW16(int32_t num, int16_t den); +int32_t WebRtcSpl_DivResultInQ31(int32_t num, int32_t den); +int32_t WebRtcSpl_DivW32HiLow(int32_t num, int16_t den_hi, int16_t den_low); +// End: Divisions. + +int32_t WebRtcSpl_Energy(int16_t* vector, + size_t vector_length, + int* scale_factor); + +// Calculates the dot product between two (int16_t) vectors. +// +// Input: +// - vector1 : Vector 1 +// - vector2 : Vector 2 +// - vector_length : Number of samples used in the dot product +// - scaling : The number of right bit shifts to apply on each term +// during calculation to avoid overflow, i.e., the +// output will be in Q(-|scaling|) +// +// Return value : The dot product in Q(-scaling) +int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1, + const int16_t* vector2, + size_t length, + int scaling); + +// Filter operations. +size_t WebRtcSpl_FilterAR(const int16_t* ar_coef, + size_t ar_coef_length, + const int16_t* in_vector, + size_t in_vector_length, + int16_t* filter_state, + size_t filter_state_length, + int16_t* filter_state_low, + size_t filter_state_low_length, + int16_t* out_vector, + int16_t* out_vector_low, + size_t out_vector_low_length); + +// WebRtcSpl_FilterMAFastQ12(...) +// +// Performs a MA filtering on a vector in Q12 +// +// Input: +// - in_vector : Input samples (state in positions +// in_vector[-order] .. in_vector[-1]) +// - ma_coef : Filter coefficients (in Q12) +// - ma_coef_length : Number of B coefficients (order+1) +// - vector_length : Number of samples to be filtered +// +// Output: +// - out_vector : Filtered samples +// +void WebRtcSpl_FilterMAFastQ12(const int16_t* in_vector, + int16_t* out_vector, + const int16_t* ma_coef, + size_t ma_coef_length, + size_t vector_length); + +// Performs a AR filtering on a vector in Q12 +// Input: +// - data_in : Input samples +// - data_out : State information in positions +// data_out[-order] .. data_out[-1] +// - coefficients : Filter coefficients (in Q12) +// - coefficients_length: Number of coefficients (order+1) +// - data_length : Number of samples to be filtered +// Output: +// - data_out : Filtered samples +void WebRtcSpl_FilterARFastQ12(const int16_t* data_in, + int16_t* data_out, + const int16_t* __restrict coefficients, + size_t coefficients_length, + size_t data_length); + +// The functions (with related pointer) perform a MA down sampling filter +// on a vector. +// Input: +// - data_in : Input samples (state in positions +// data_in[-order] .. data_in[-1]) +// - data_in_length : Number of samples in |data_in| to be filtered. +// This must be at least +// |delay| + |factor|*(|out_vector_length|-1) + 1) +// - data_out_length : Number of down sampled samples desired +// - coefficients : Filter coefficients (in Q12) +// - coefficients_length: Number of coefficients (order+1) +// - factor : Decimation factor +// - delay : Delay of filter (compensated for in out_vector) +// Output: +// - data_out : Filtered samples +// Return value : 0 if OK, -1 if |in_vector| is too short +typedef int (*DownsampleFast)(const int16_t* data_in, + size_t data_in_length, + int16_t* data_out, + size_t data_out_length, + const int16_t* __restrict coefficients, + size_t coefficients_length, + int factor, + size_t delay); +extern DownsampleFast WebRtcSpl_DownsampleFast; +int WebRtcSpl_DownsampleFastC(const int16_t* data_in, + size_t data_in_length, + int16_t* data_out, + size_t data_out_length, + const int16_t* __restrict coefficients, + size_t coefficients_length, + int factor, + size_t delay); +#if (defined WEBRTC_DETECT_NEON) || (defined WEBRTC_HAS_NEON) +int WebRtcSpl_DownsampleFastNeon(const int16_t* data_in, + size_t data_in_length, + int16_t* data_out, + size_t data_out_length, + const int16_t* __restrict coefficients, + size_t coefficients_length, + int factor, + size_t delay); +#endif +#if defined(MIPS32_LE) +int WebRtcSpl_DownsampleFast_mips(const int16_t* data_in, + size_t data_in_length, + int16_t* data_out, + size_t data_out_length, + const int16_t* __restrict coefficients, + size_t coefficients_length, + int factor, + size_t delay); +#endif + +// End: Filter operations. + +// FFT operations + +int WebRtcSpl_ComplexFFT(int16_t vector[], int stages, int mode); +int WebRtcSpl_ComplexIFFT(int16_t vector[], int stages, int mode); + +// Treat a 16-bit complex data buffer |complex_data| as an array of 32-bit +// values, and swap elements whose indexes are bit-reverses of each other. +// +// Input: +// - complex_data : Complex data buffer containing 2^|stages| real +// elements interleaved with 2^|stages| imaginary +// elements: [Re Im Re Im Re Im....] +// - stages : Number of FFT stages. Must be at least 3 and at most +// 10, since the table WebRtcSpl_kSinTable1024[] is 1024 +// elements long. +// +// Output: +// - complex_data : The complex data buffer. + +void WebRtcSpl_ComplexBitReverse(int16_t* __restrict complex_data, int stages); + +// End: FFT operations + +/************************************************************ + * + * RESAMPLING FUNCTIONS AND THEIR STRUCTS ARE DEFINED BELOW + * + ************************************************************/ + +/******************************************************************* + * resample.c + * + * Includes the following resampling combinations + * 22 kHz -> 16 kHz + * 16 kHz -> 22 kHz + * 22 kHz -> 8 kHz + * 8 kHz -> 22 kHz + * + ******************************************************************/ + +// state structure for 22 -> 16 resampler +typedef struct { + int32_t S_22_44[8]; + int32_t S_44_32[8]; + int32_t S_32_16[8]; +} WebRtcSpl_State22khzTo16khz; + +void WebRtcSpl_Resample22khzTo16khz(const int16_t* in, + int16_t* out, + WebRtcSpl_State22khzTo16khz* state, + int32_t* tmpmem); + +void WebRtcSpl_ResetResample22khzTo16khz(WebRtcSpl_State22khzTo16khz* state); + +// state structure for 16 -> 22 resampler +typedef struct { + int32_t S_16_32[8]; + int32_t S_32_22[8]; +} WebRtcSpl_State16khzTo22khz; + +void WebRtcSpl_Resample16khzTo22khz(const int16_t* in, + int16_t* out, + WebRtcSpl_State16khzTo22khz* state, + int32_t* tmpmem); + +void WebRtcSpl_ResetResample16khzTo22khz(WebRtcSpl_State16khzTo22khz* state); + +// state structure for 22 -> 8 resampler +typedef struct { + int32_t S_22_22[16]; + int32_t S_22_16[8]; + int32_t S_16_8[8]; +} WebRtcSpl_State22khzTo8khz; + +void WebRtcSpl_Resample22khzTo8khz(const int16_t* in, int16_t* out, + WebRtcSpl_State22khzTo8khz* state, + int32_t* tmpmem); + +void WebRtcSpl_ResetResample22khzTo8khz(WebRtcSpl_State22khzTo8khz* state); + +// state structure for 8 -> 22 resampler +typedef struct { + int32_t S_8_16[8]; + int32_t S_16_11[8]; + int32_t S_11_22[8]; +} WebRtcSpl_State8khzTo22khz; + +void WebRtcSpl_Resample8khzTo22khz(const int16_t* in, int16_t* out, + WebRtcSpl_State8khzTo22khz* state, + int32_t* tmpmem); + +void WebRtcSpl_ResetResample8khzTo22khz(WebRtcSpl_State8khzTo22khz* state); + +/******************************************************************* + * resample_fractional.c + * Functions for internal use in the other resample functions + * + * Includes the following resampling combinations + * 48 kHz -> 32 kHz + * 32 kHz -> 24 kHz + * 44 kHz -> 32 kHz + * + ******************************************************************/ + +void WebRtcSpl_Resample48khzTo32khz(const int32_t* In, int32_t* Out, size_t K); + +void WebRtcSpl_Resample32khzTo24khz(const int32_t* In, int32_t* Out, size_t K); + +void WebRtcSpl_Resample44khzTo32khz(const int32_t* In, int32_t* Out, size_t K); + +/******************************************************************* + * resample_48khz.c + * + * Includes the following resampling combinations + * 48 kHz -> 16 kHz + * 16 kHz -> 48 kHz + * 48 kHz -> 8 kHz + * 8 kHz -> 48 kHz + * + ******************************************************************/ + +typedef struct { + int32_t S_48_48[16]; + int32_t S_48_32[8]; + int32_t S_32_16[8]; +} WebRtcSpl_State48khzTo16khz; + +void WebRtcSpl_Resample48khzTo16khz(const int16_t* in, int16_t* out, + WebRtcSpl_State48khzTo16khz* state, + int32_t* tmpmem); + +void WebRtcSpl_ResetResample48khzTo16khz(WebRtcSpl_State48khzTo16khz* state); + +typedef struct { + int32_t S_16_32[8]; + int32_t S_32_24[8]; + int32_t S_24_48[8]; +} WebRtcSpl_State16khzTo48khz; + +void WebRtcSpl_Resample16khzTo48khz(const int16_t* in, int16_t* out, + WebRtcSpl_State16khzTo48khz* state, + int32_t* tmpmem); + +void WebRtcSpl_ResetResample16khzTo48khz(WebRtcSpl_State16khzTo48khz* state); + +typedef struct { + int32_t S_48_24[8]; + int32_t S_24_24[16]; + int32_t S_24_16[8]; + int32_t S_16_8[8]; +} WebRtcSpl_State48khzTo8khz; + +void WebRtcSpl_Resample48khzTo8khz(const int16_t* in, int16_t* out, + WebRtcSpl_State48khzTo8khz* state, + int32_t* tmpmem); + +void WebRtcSpl_ResetResample48khzTo8khz(WebRtcSpl_State48khzTo8khz* state); + +typedef struct { + int32_t S_8_16[8]; + int32_t S_16_12[8]; + int32_t S_12_24[8]; + int32_t S_24_48[8]; +} WebRtcSpl_State8khzTo48khz; + +void WebRtcSpl_Resample8khzTo48khz(const int16_t* in, int16_t* out, + WebRtcSpl_State8khzTo48khz* state, + int32_t* tmpmem); + +void WebRtcSpl_ResetResample8khzTo48khz(WebRtcSpl_State8khzTo48khz* state); + +/******************************************************************* + * resample_by_2.c + * + * Includes down and up sampling by a factor of two. + * + ******************************************************************/ + +void WebRtcSpl_DownsampleBy2(const int16_t* in, size_t len, + int16_t* out, int32_t* filtState); + +void WebRtcSpl_UpsampleBy2(const int16_t* in, size_t len, + int16_t* out, int32_t* filtState); + +/************************************************************ + * END OF RESAMPLING FUNCTIONS + ************************************************************/ +void WebRtcSpl_AnalysisQMF(const int16_t* in_data, + size_t in_data_length, + int16_t* low_band, + int16_t* high_band, + int32_t* filter_state1, + int32_t* filter_state2); +void WebRtcSpl_SynthesisQMF(const int16_t* low_band, + const int16_t* high_band, + size_t band_length, + int16_t* out_data, + int32_t* filter_state1, + int32_t* filter_state2); + +#ifdef __cplusplus +} +#endif // __cplusplus +#endif // WEBRTC_SPL_SIGNAL_PROCESSING_LIBRARY_H_ + +// +// WebRtcSpl_AddSatW16(...) +// WebRtcSpl_AddSatW32(...) +// +// Returns the result of a saturated 16-bit, respectively 32-bit, addition of +// the numbers specified by the |var1| and |var2| parameters. +// +// Input: +// - var1 : Input variable 1 +// - var2 : Input variable 2 +// +// Return value : Added and saturated value +// + +// +// WebRtcSpl_SubSatW16(...) +// WebRtcSpl_SubSatW32(...) +// +// Returns the result of a saturated 16-bit, respectively 32-bit, subtraction +// of the numbers specified by the |var1| and |var2| parameters. +// +// Input: +// - var1 : Input variable 1 +// - var2 : Input variable 2 +// +// Returned value : Subtracted and saturated value +// + +// +// WebRtcSpl_GetSizeInBits(...) +// +// Returns the # of bits that are needed at the most to represent the number +// specified by the |value| parameter. +// +// Input: +// - value : Input value +// +// Return value : Number of bits needed to represent |value| +// + +// +// WebRtcSpl_NormW32(...) +// +// Norm returns the # of left shifts required to 32-bit normalize the 32-bit +// signed number specified by the |value| parameter. +// +// Input: +// - value : Input value +// +// Return value : Number of bit shifts needed to 32-bit normalize |value| +// + +// +// WebRtcSpl_NormW16(...) +// +// Norm returns the # of left shifts required to 16-bit normalize the 16-bit +// signed number specified by the |value| parameter. +// +// Input: +// - value : Input value +// +// Return value : Number of bit shifts needed to 32-bit normalize |value| +// + +// +// WebRtcSpl_NormU32(...) +// +// Norm returns the # of left shifts required to 32-bit normalize the unsigned +// 32-bit number specified by the |value| parameter. +// +// Input: +// - value : Input value +// +// Return value : Number of bit shifts needed to 32-bit normalize |value| +// + +// +// WebRtcSpl_GetScalingSquare(...) +// +// Returns the # of bits required to scale the samples specified in the +// |in_vector| parameter so that, if the squares of the samples are added the +// # of times specified by the |times| parameter, the 32-bit addition will not +// overflow (result in int32_t). +// +// Input: +// - in_vector : Input vector to check scaling on +// - in_vector_length : Samples in |in_vector| +// - times : Number of additions to be performed +// +// Return value : Number of right bit shifts needed to avoid +// overflow in the addition calculation +// + +// +// WebRtcSpl_MemSetW16(...) +// +// Sets all the values in the int16_t vector |vector| of length +// |vector_length| to the specified value |set_value| +// +// Input: +// - vector : Pointer to the int16_t vector +// - set_value : Value specified +// - vector_length : Length of vector +// + +// +// WebRtcSpl_MemSetW32(...) +// +// Sets all the values in the int32_t vector |vector| of length +// |vector_length| to the specified value |set_value| +// +// Input: +// - vector : Pointer to the int16_t vector +// - set_value : Value specified +// - vector_length : Length of vector +// + +// +// WebRtcSpl_MemCpyReversedOrder(...) +// +// Copies all the values from the source int16_t vector |in_vector| to a +// destination int16_t vector |out_vector|. It is done in reversed order, +// meaning that the first sample of |in_vector| is copied to the last sample of +// the |out_vector|. The procedure continues until the last sample of +// |in_vector| has been copied to the first sample of |out_vector|. This +// creates a reversed vector. Used in e.g. prediction in iLBC. +// +// Input: +// - in_vector : Pointer to the first sample in a int16_t vector +// of length |length| +// - vector_length : Number of elements to copy +// +// Output: +// - out_vector : Pointer to the last sample in a int16_t vector +// of length |length| +// + +// +// WebRtcSpl_CopyFromEndW16(...) +// +// Copies the rightmost |samples| of |in_vector| (of length |in_vector_length|) +// to the vector |out_vector|. +// +// Input: +// - in_vector : Input vector +// - in_vector_length : Number of samples in |in_vector| +// - samples : Number of samples to extract (from right side) +// from |in_vector| +// +// Output: +// - out_vector : Vector with the requested samples +// + +// +// WebRtcSpl_ZerosArrayW16(...) +// WebRtcSpl_ZerosArrayW32(...) +// +// Inserts the value "zero" in all positions of a w16 and a w32 vector +// respectively. +// +// Input: +// - vector_length : Number of samples in vector +// +// Output: +// - vector : Vector containing all zeros +// + +// +// WebRtcSpl_VectorBitShiftW16(...) +// WebRtcSpl_VectorBitShiftW32(...) +// +// Bit shifts all the values in a vector up or downwards. Different calls for +// int16_t and int32_t vectors respectively. +// +// Input: +// - vector_length : Length of vector +// - in_vector : Pointer to the vector that should be bit shifted +// - right_shifts : Number of right bit shifts (negative value gives left +// shifts) +// +// Output: +// - out_vector : Pointer to the result vector (can be the same as +// |in_vector|) +// + +// +// WebRtcSpl_VectorBitShiftW32ToW16(...) +// +// Bit shifts all the values in a int32_t vector up or downwards and +// stores the result as an int16_t vector. The function will saturate the +// signal if needed, before storing in the output vector. +// +// Input: +// - vector_length : Length of vector +// - in_vector : Pointer to the vector that should be bit shifted +// - right_shifts : Number of right bit shifts (negative value gives left +// shifts) +// +// Output: +// - out_vector : Pointer to the result vector (can be the same as +// |in_vector|) +// + +// +// WebRtcSpl_ScaleVector(...) +// +// Performs the vector operation: +// out_vector[k] = (gain*in_vector[k])>>right_shifts +// +// Input: +// - in_vector : Input vector +// - gain : Scaling gain +// - vector_length : Elements in the |in_vector| +// - right_shifts : Number of right bit shifts applied +// +// Output: +// - out_vector : Output vector (can be the same as |in_vector|) +// + +// +// WebRtcSpl_ScaleVectorWithSat(...) +// +// Performs the vector operation: +// out_vector[k] = SATURATE( (gain*in_vector[k])>>right_shifts ) +// +// Input: +// - in_vector : Input vector +// - gain : Scaling gain +// - vector_length : Elements in the |in_vector| +// - right_shifts : Number of right bit shifts applied +// +// Output: +// - out_vector : Output vector (can be the same as |in_vector|) +// + +// +// WebRtcSpl_ScaleAndAddVectors(...) +// +// Performs the vector operation: +// out_vector[k] = (gain1*in_vector1[k])>>right_shifts1 +// + (gain2*in_vector2[k])>>right_shifts2 +// +// Input: +// - in_vector1 : Input vector 1 +// - gain1 : Gain to be used for vector 1 +// - right_shifts1 : Right bit shift to be used for vector 1 +// - in_vector2 : Input vector 2 +// - gain2 : Gain to be used for vector 2 +// - right_shifts2 : Right bit shift to be used for vector 2 +// - vector_length : Elements in the input vectors +// +// Output: +// - out_vector : Output vector +// + +// +// WebRtcSpl_ReverseOrderMultArrayElements(...) +// +// Performs the vector operation: +// out_vector[n] = (in_vector[n]*window[-n])>>right_shifts +// +// Input: +// - in_vector : Input vector +// - window : Window vector (should be reversed). The pointer +// should be set to the last value in the vector +// - right_shifts : Number of right bit shift to be applied after the +// multiplication +// - vector_length : Number of elements in |in_vector| +// +// Output: +// - out_vector : Output vector (can be same as |in_vector|) +// + +// +// WebRtcSpl_ElementwiseVectorMult(...) +// +// Performs the vector operation: +// out_vector[n] = (in_vector[n]*window[n])>>right_shifts +// +// Input: +// - in_vector : Input vector +// - window : Window vector. +// - right_shifts : Number of right bit shift to be applied after the +// multiplication +// - vector_length : Number of elements in |in_vector| +// +// Output: +// - out_vector : Output vector (can be same as |in_vector|) +// + +// +// WebRtcSpl_AddVectorsAndShift(...) +// +// Performs the vector operation: +// out_vector[k] = (in_vector1[k] + in_vector2[k])>>right_shifts +// +// Input: +// - in_vector1 : Input vector 1 +// - in_vector2 : Input vector 2 +// - right_shifts : Number of right bit shift to be applied after the +// multiplication +// - vector_length : Number of elements in |in_vector1| and |in_vector2| +// +// Output: +// - out_vector : Output vector (can be same as |in_vector1|) +// + +// +// WebRtcSpl_AddAffineVectorToVector(...) +// +// Adds an affine transformed vector to another vector |out_vector|, i.e, +// performs +// out_vector[k] += (in_vector[k]*gain+add_constant)>>right_shifts +// +// Input: +// - in_vector : Input vector +// - gain : Gain value, used to multiply the in vector with +// - add_constant : Constant value to add (usually 1<<(right_shifts-1), +// but others can be used as well +// - right_shifts : Number of right bit shifts (0-16) +// - vector_length : Number of samples in |in_vector| and |out_vector| +// +// Output: +// - out_vector : Vector with the output +// + +// +// WebRtcSpl_AffineTransformVector(...) +// +// Affine transforms a vector, i.e, performs +// out_vector[k] = (in_vector[k]*gain+add_constant)>>right_shifts +// +// Input: +// - in_vector : Input vector +// - gain : Gain value, used to multiply the in vector with +// - add_constant : Constant value to add (usually 1<<(right_shifts-1), +// but others can be used as well +// - right_shifts : Number of right bit shifts (0-16) +// - vector_length : Number of samples in |in_vector| and |out_vector| +// +// Output: +// - out_vector : Vector with the output +// + +// +// WebRtcSpl_IncreaseSeed(...) +// +// Increases the seed (and returns the new value) +// +// Input: +// - seed : Seed for random calculation +// +// Output: +// - seed : Updated seed value +// +// Return value : The new seed value +// + +// +// WebRtcSpl_RandU(...) +// +// Produces a uniformly distributed value in the int16_t range +// +// Input: +// - seed : Seed for random calculation +// +// Output: +// - seed : Updated seed value +// +// Return value : Uniformly distributed value in the range +// [Word16_MIN...Word16_MAX] +// + +// +// WebRtcSpl_RandN(...) +// +// Produces a normal distributed value in the int16_t range +// +// Input: +// - seed : Seed for random calculation +// +// Output: +// - seed : Updated seed value +// +// Return value : N(0,1) value in the Q13 domain +// + +// +// WebRtcSpl_RandUArray(...) +// +// Produces a uniformly distributed vector with elements in the int16_t +// range +// +// Input: +// - vector_length : Samples wanted in the vector +// - seed : Seed for random calculation +// +// Output: +// - vector : Vector with the uniform values +// - seed : Updated seed value +// +// Return value : Number of samples in vector, i.e., |vector_length| +// + +// +// WebRtcSpl_Sqrt(...) +// +// Returns the square root of the input value |value|. The precision of this +// function is integer precision, i.e., sqrt(8) gives 2 as answer. +// If |value| is a negative number then 0 is returned. +// +// Algorithm: +// +// A sixth order Taylor Series expansion is used here to compute the square +// root of a number y^0.5 = (1+x)^0.5 +// where +// x = y-1 +// = 1+(x/2)-0.5*((x/2)^2+0.5*((x/2)^3-0.625*((x/2)^4+0.875*((x/2)^5) +// 0.5 <= x < 1 +// +// Input: +// - value : Value to calculate sqrt of +// +// Return value : Result of the sqrt calculation +// + +// +// WebRtcSpl_SqrtFloor(...) +// +// Returns the square root of the input value |value|. The precision of this +// function is rounding down integer precision, i.e., sqrt(8) gives 2 as answer. +// If |value| is a negative number then 0 is returned. +// +// Algorithm: +// +// An iterative 4 cylce/bit routine +// +// Input: +// - value : Value to calculate sqrt of +// +// Return value : Result of the sqrt calculation +// + +// +// WebRtcSpl_DivU32U16(...) +// +// Divides a uint32_t |num| by a uint16_t |den|. +// +// If |den|==0, (uint32_t)0xFFFFFFFF is returned. +// +// Input: +// - num : Numerator +// - den : Denominator +// +// Return value : Result of the division (as a uint32_t), i.e., the +// integer part of num/den. +// + +// +// WebRtcSpl_DivW32W16(...) +// +// Divides a int32_t |num| by a int16_t |den|. +// +// If |den|==0, (int32_t)0x7FFFFFFF is returned. +// +// Input: +// - num : Numerator +// - den : Denominator +// +// Return value : Result of the division (as a int32_t), i.e., the +// integer part of num/den. +// + +// +// WebRtcSpl_DivW32W16ResW16(...) +// +// Divides a int32_t |num| by a int16_t |den|, assuming that the +// result is less than 32768, otherwise an unpredictable result will occur. +// +// If |den|==0, (int16_t)0x7FFF is returned. +// +// Input: +// - num : Numerator +// - den : Denominator +// +// Return value : Result of the division (as a int16_t), i.e., the +// integer part of num/den. +// + +// +// WebRtcSpl_DivResultInQ31(...) +// +// Divides a int32_t |num| by a int16_t |den|, assuming that the +// absolute value of the denominator is larger than the numerator, otherwise +// an unpredictable result will occur. +// +// Input: +// - num : Numerator +// - den : Denominator +// +// Return value : Result of the division in Q31. +// + +// +// WebRtcSpl_DivW32HiLow(...) +// +// Divides a int32_t |num| by a denominator in hi, low format. The +// absolute value of the denominator has to be larger (or equal to) the +// numerator. +// +// Input: +// - num : Numerator +// - den_hi : High part of denominator +// - den_low : Low part of denominator +// +// Return value : Divided value in Q31 +// + +// +// WebRtcSpl_Energy(...) +// +// Calculates the energy of a vector +// +// Input: +// - vector : Vector which the energy should be calculated on +// - vector_length : Number of samples in vector +// +// Output: +// - scale_factor : Number of left bit shifts needed to get the physical +// energy value, i.e, to get the Q0 value +// +// Return value : Energy value in Q(-|scale_factor|) +// + +// +// WebRtcSpl_FilterAR(...) +// +// Performs a 32-bit AR filtering on a vector in Q12 +// +// Input: +// - ar_coef : AR-coefficient vector (values in Q12), +// ar_coef[0] must be 4096. +// - ar_coef_length : Number of coefficients in |ar_coef|. +// - in_vector : Vector to be filtered. +// - in_vector_length : Number of samples in |in_vector|. +// - filter_state : Current state (higher part) of the filter. +// - filter_state_length : Length (in samples) of |filter_state|. +// - filter_state_low : Current state (lower part) of the filter. +// - filter_state_low_length : Length (in samples) of |filter_state_low|. +// - out_vector_low_length : Maximum length (in samples) of +// |out_vector_low|. +// +// Output: +// - filter_state : Updated state (upper part) vector. +// - filter_state_low : Updated state (lower part) vector. +// - out_vector : Vector containing the upper part of the +// filtered values. +// - out_vector_low : Vector containing the lower part of the +// filtered values. +// +// Return value : Number of samples in the |out_vector|. +// + +// +// WebRtcSpl_ComplexIFFT(...) +// +// Complex Inverse FFT +// +// Computes an inverse complex 2^|stages|-point FFT on the input vector, which +// is in bit-reversed order. The original content of the vector is destroyed in +// the process, since the input is overwritten by the output, normal-ordered, +// FFT vector. With X as the input complex vector, y as the output complex +// vector and with M = 2^|stages|, the following is computed: +// +// M-1 +// y(k) = sum[X(i)*[cos(2*pi*i*k/M) + j*sin(2*pi*i*k/M)]] +// i=0 +// +// The implementations are optimized for speed, not for code size. It uses the +// decimation-in-time algorithm with radix-2 butterfly technique. +// +// Input: +// - vector : In pointer to complex vector containing 2^|stages| +// real elements interleaved with 2^|stages| imaginary +// elements. +// [ReImReImReIm....] +// The elements are in Q(-scale) domain, see more on Return +// Value below. +// +// - stages : Number of FFT stages. Must be at least 3 and at most 10, +// since the table WebRtcSpl_kSinTable1024[] is 1024 +// elements long. +// +// - mode : This parameter gives the user to choose how the FFT +// should work. +// mode==0: Low-complexity and Low-accuracy mode +// mode==1: High-complexity and High-accuracy mode +// +// Output: +// - vector : Out pointer to the FFT vector (the same as input). +// +// Return Value : The scale value that tells the number of left bit shifts +// that the elements in the |vector| should be shifted with +// in order to get Q0 values, i.e. the physically correct +// values. The scale parameter is always 0 or positive, +// except if N>1024 (|stages|>10), which returns a scale +// value of -1, indicating error. +// + +// +// WebRtcSpl_ComplexFFT(...) +// +// Complex FFT +// +// Computes a complex 2^|stages|-point FFT on the input vector, which is in +// bit-reversed order. The original content of the vector is destroyed in +// the process, since the input is overwritten by the output, normal-ordered, +// FFT vector. With x as the input complex vector, Y as the output complex +// vector and with M = 2^|stages|, the following is computed: +// +// M-1 +// Y(k) = 1/M * sum[x(i)*[cos(2*pi*i*k/M) + j*sin(2*pi*i*k/M)]] +// i=0 +// +// The implementations are optimized for speed, not for code size. It uses the +// decimation-in-time algorithm with radix-2 butterfly technique. +// +// This routine prevents overflow by scaling by 2 before each FFT stage. This is +// a fixed scaling, for proper normalization - there will be log2(n) passes, so +// this results in an overall factor of 1/n, distributed to maximize arithmetic +// accuracy. +// +// Input: +// - vector : In pointer to complex vector containing 2^|stages| real +// elements interleaved with 2^|stages| imaginary elements. +// [ReImReImReIm....] +// The output is in the Q0 domain. +// +// - stages : Number of FFT stages. Must be at least 3 and at most 10, +// since the table WebRtcSpl_kSinTable1024[] is 1024 +// elements long. +// +// - mode : This parameter gives the user to choose how the FFT +// should work. +// mode==0: Low-complexity and Low-accuracy mode +// mode==1: High-complexity and High-accuracy mode +// +// Output: +// - vector : The output FFT vector is in the Q0 domain. +// +// Return value : The scale parameter is always 0, except if N>1024, +// which returns a scale value of -1, indicating error. +// + +// +// WebRtcSpl_AnalysisQMF(...) +// +// Splits a 0-2*F Hz signal into two sub bands: 0-F Hz and F-2*F Hz. The +// current version has F = 8000, therefore, a super-wideband audio signal is +// split to lower-band 0-8 kHz and upper-band 8-16 kHz. +// +// Input: +// - in_data : Wide band speech signal, 320 samples (10 ms) +// +// Input & Output: +// - filter_state1 : Filter state for first All-pass filter +// - filter_state2 : Filter state for second All-pass filter +// +// Output: +// - low_band : Lower-band signal 0-8 kHz band, 160 samples (10 ms) +// - high_band : Upper-band signal 8-16 kHz band (flipped in frequency +// domain), 160 samples (10 ms) +// + +// +// WebRtcSpl_SynthesisQMF(...) +// +// Combines the two sub bands (0-F and F-2*F Hz) into a signal of 0-2*F +// Hz, (current version has F = 8000 Hz). So the filter combines lower-band +// (0-8 kHz) and upper-band (8-16 kHz) channels to obtain super-wideband 0-16 +// kHz audio. +// +// Input: +// - low_band : The signal with the 0-8 kHz band, 160 samples (10 ms) +// - high_band : The signal with the 8-16 kHz band, 160 samples (10 ms) +// +// Input & Output: +// - filter_state1 : Filter state for first All-pass filter +// - filter_state2 : Filter state for second All-pass filter +// +// Output: +// - out_data : Super-wideband speech signal, 0-16 kHz +// + +// int16_t WebRtcSpl_SatW32ToW16(...) +// +// This function saturates a 32-bit word into a 16-bit word. +// +// Input: +// - value32 : The value of a 32-bit word. +// +// Output: +// - out16 : the saturated 16-bit word. +// + +// int32_t WebRtc_MulAccumW16(...) +// +// This function multiply a 16-bit word by a 16-bit word, and accumulate this +// value to a 32-bit integer. +// +// Input: +// - a : The value of the first 16-bit word. +// - b : The value of the second 16-bit word. +// - c : The value of an 32-bit integer. +// +// Return Value: The value of a * b + c. +// diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl.h b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl.h new file mode 100644 index 00000000..d3cc6dee --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl.h @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +// This header file includes the inline functions in +// the fix point signal processing library. + +#ifndef WEBRTC_SPL_SPL_INL_H_ +#define WEBRTC_SPL_SPL_INL_H_ + +#ifdef WEBRTC_ARCH_ARM_V7 +#include "webrtc/common_audio/signal_processing/include/spl_inl_armv7.h" +#else + +#if defined(MIPS32_LE) +#include "webrtc/common_audio/signal_processing/include/spl_inl_mips.h" +#endif + +#if !defined(MIPS_DSP_R1_LE) +static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) { + int16_t out16 = (int16_t) value32; + + if (value32 > 32767) + out16 = 32767; + else if (value32 < -32768) + out16 = -32768; + + return out16; +} + +static __inline int32_t WebRtcSpl_AddSatW32(int32_t l_var1, int32_t l_var2) { + int32_t l_sum; + + // Perform long addition + l_sum = l_var1 + l_var2; + + if (l_var1 < 0) { // Check for underflow. + if ((l_var2 < 0) && (l_sum >= 0)) { + l_sum = (int32_t)0x80000000; + } + } else { // Check for overflow. + if ((l_var2 > 0) && (l_sum < 0)) { + l_sum = (int32_t)0x7FFFFFFF; + } + } + + return l_sum; +} + +static __inline int32_t WebRtcSpl_SubSatW32(int32_t l_var1, int32_t l_var2) { + int32_t l_diff; + + // Perform subtraction. + l_diff = l_var1 - l_var2; + + if (l_var1 < 0) { // Check for underflow. + if ((l_var2 > 0) && (l_diff > 0)) { + l_diff = (int32_t)0x80000000; + } + } else { // Check for overflow. + if ((l_var2 < 0) && (l_diff < 0)) { + l_diff = (int32_t)0x7FFFFFFF; + } + } + + return l_diff; +} + +static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) { + return WebRtcSpl_SatW32ToW16((int32_t) a + (int32_t) b); +} + +static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) { + return WebRtcSpl_SatW32ToW16((int32_t) var1 - (int32_t) var2); +} +#endif // #if !defined(MIPS_DSP_R1_LE) + +#if !defined(MIPS32_LE) +static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) { + int16_t bits; + + if (0xFFFF0000 & n) { + bits = 16; + } else { + bits = 0; + } + if (0x0000FF00 & (n >> bits)) bits += 8; + if (0x000000F0 & (n >> bits)) bits += 4; + if (0x0000000C & (n >> bits)) bits += 2; + if (0x00000002 & (n >> bits)) bits += 1; + if (0x00000001 & (n >> bits)) bits += 1; + + return bits; +} + +static __inline int16_t WebRtcSpl_NormW32(int32_t a) { + int16_t zeros; + + if (a == 0) { + return 0; + } + else if (a < 0) { + a = ~a; + } + + if (!(0xFFFF8000 & a)) { + zeros = 16; + } else { + zeros = 0; + } + if (!(0xFF800000 & (a << zeros))) zeros += 8; + if (!(0xF8000000 & (a << zeros))) zeros += 4; + if (!(0xE0000000 & (a << zeros))) zeros += 2; + if (!(0xC0000000 & (a << zeros))) zeros += 1; + + return zeros; +} + +static __inline int16_t WebRtcSpl_NormU32(uint32_t a) { + int16_t zeros; + + if (a == 0) return 0; + + if (!(0xFFFF0000 & a)) { + zeros = 16; + } else { + zeros = 0; + } + if (!(0xFF000000 & (a << zeros))) zeros += 8; + if (!(0xF0000000 & (a << zeros))) zeros += 4; + if (!(0xC0000000 & (a << zeros))) zeros += 2; + if (!(0x80000000 & (a << zeros))) zeros += 1; + + return zeros; +} + +static __inline int16_t WebRtcSpl_NormW16(int16_t a) { + int16_t zeros; + + if (a == 0) { + return 0; + } + else if (a < 0) { + a = ~a; + } + + if (!(0xFF80 & a)) { + zeros = 8; + } else { + zeros = 0; + } + if (!(0xF800 & (a << zeros))) zeros += 4; + if (!(0xE000 & (a << zeros))) zeros += 2; + if (!(0xC000 & (a << zeros))) zeros += 1; + + return zeros; +} + +static __inline int32_t WebRtc_MulAccumW16(int16_t a, int16_t b, int32_t c) { + return (a * b + c); +} +#endif // #if !defined(MIPS32_LE) + +#endif // WEBRTC_ARCH_ARM_V7 + +#endif // WEBRTC_SPL_SPL_INL_H_ diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl_armv7.h b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl_armv7.h new file mode 100644 index 00000000..27188011 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl_armv7.h @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* This header file includes the inline functions for ARM processors in + * the fix point signal processing library. + */ + +#ifndef WEBRTC_SPL_SPL_INL_ARMV7_H_ +#define WEBRTC_SPL_SPL_INL_ARMV7_H_ + +/* TODO(kma): Replace some assembly code with GCC intrinsics + * (e.g. __builtin_clz). + */ + +/* This function produces result that is not bit exact with that by the generic + * C version in some cases, although the former is at least as accurate as the + * later. + */ +static __inline int32_t WEBRTC_SPL_MUL_16_32_RSFT16(int16_t a, int32_t b) { + int32_t tmp = 0; + __asm __volatile ("smulwb %0, %1, %2":"=r"(tmp):"r"(b), "r"(a)); + return tmp; +} + +static __inline int32_t WEBRTC_SPL_MUL_16_16(int16_t a, int16_t b) { + int32_t tmp = 0; + __asm __volatile ("smulbb %0, %1, %2":"=r"(tmp):"r"(a), "r"(b)); + return tmp; +} + +// TODO(kma): add unit test. +static __inline int32_t WebRtc_MulAccumW16(int16_t a, int16_t b, int32_t c) { + int32_t tmp = 0; + __asm __volatile ("smlabb %0, %1, %2, %3":"=r"(tmp):"r"(a), "r"(b), "r"(c)); + return tmp; +} + +static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) { + int32_t s_sum = 0; + + __asm __volatile ("qadd16 %0, %1, %2":"=r"(s_sum):"r"(a), "r"(b)); + + return (int16_t) s_sum; +} + +static __inline int32_t WebRtcSpl_AddSatW32(int32_t l_var1, int32_t l_var2) { + int32_t l_sum = 0; + + __asm __volatile ("qadd %0, %1, %2":"=r"(l_sum):"r"(l_var1), "r"(l_var2)); + + return l_sum; +} + +static __inline int32_t WebRtcSpl_SubSatW32(int32_t l_var1, int32_t l_var2) { + int32_t l_sub = 0; + + __asm __volatile ("qsub %0, %1, %2":"=r"(l_sub):"r"(l_var1), "r"(l_var2)); + + return l_sub; +} + +static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) { + int32_t s_sub = 0; + + __asm __volatile ("qsub16 %0, %1, %2":"=r"(s_sub):"r"(var1), "r"(var2)); + + return (int16_t)s_sub; +} + +static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) { + int32_t tmp = 0; + + __asm __volatile ("clz %0, %1":"=r"(tmp):"r"(n)); + + return (int16_t)(32 - tmp); +} + +static __inline int16_t WebRtcSpl_NormW32(int32_t a) { + int32_t tmp = 0; + + if (a == 0) { + return 0; + } + else if (a < 0) { + a ^= 0xFFFFFFFF; + } + + __asm __volatile ("clz %0, %1":"=r"(tmp):"r"(a)); + + return (int16_t)(tmp - 1); +} + +static __inline int16_t WebRtcSpl_NormU32(uint32_t a) { + int tmp = 0; + + if (a == 0) return 0; + + __asm __volatile ("clz %0, %1":"=r"(tmp):"r"(a)); + + return (int16_t)tmp; +} + +static __inline int16_t WebRtcSpl_NormW16(int16_t a) { + int32_t tmp = 0; + int32_t a_32 = a; + + if (a_32 == 0) { + return 0; + } + else if (a_32 < 0) { + a_32 ^= 0xFFFFFFFF; + } + + __asm __volatile ("clz %0, %1":"=r"(tmp):"r"(a_32)); + + return (int16_t)(tmp - 17); +} + +// TODO(kma): add unit test. +static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) { + int32_t out = 0; + + __asm __volatile ("ssat %0, #16, %1" : "=r"(out) : "r"(value32)); + + return (int16_t)out; +} + +#endif // WEBRTC_SPL_SPL_INL_ARMV7_H_ diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl_mips.h b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl_mips.h new file mode 100644 index 00000000..cd04bddc --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl_mips.h @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +// This header file includes the inline functions in +// the fix point signal processing library. + +#ifndef WEBRTC_SPL_SPL_INL_MIPS_H_ +#define WEBRTC_SPL_SPL_INL_MIPS_H_ + +static __inline int32_t WEBRTC_SPL_MUL_16_16(int32_t a, + int32_t b) { + int32_t value32 = 0; + int32_t a1 = 0, b1 = 0; + + __asm __volatile( +#if defined(MIPS32_R2_LE) + "seh %[a1], %[a] \n\t" + "seh %[b1], %[b] \n\t" +#else + "sll %[a1], %[a], 16 \n\t" + "sll %[b1], %[b], 16 \n\t" + "sra %[a1], %[a1], 16 \n\t" + "sra %[b1], %[b1], 16 \n\t" +#endif + "mul %[value32], %[a1], %[b1] \n\t" + : [value32] "=r" (value32), [a1] "=&r" (a1), [b1] "=&r" (b1) + : [a] "r" (a), [b] "r" (b) + : "hi", "lo" + ); + return value32; +} + +static __inline int32_t WEBRTC_SPL_MUL_16_32_RSFT16(int16_t a, + int32_t b) { + int32_t value32 = 0, b1 = 0, b2 = 0; + int32_t a1 = 0; + + __asm __volatile( +#if defined(MIPS32_R2_LE) + "seh %[a1], %[a] \n\t" +#else + "sll %[a1], %[a], 16 \n\t" + "sra %[a1], %[a1], 16 \n\t" +#endif + "andi %[b2], %[b], 0xFFFF \n\t" + "sra %[b1], %[b], 16 \n\t" + "sra %[b2], %[b2], 1 \n\t" + "mul %[value32], %[a1], %[b1] \n\t" + "mul %[b2], %[a1], %[b2] \n\t" + "addiu %[b2], %[b2], 0x4000 \n\t" + "sra %[b2], %[b2], 15 \n\t" + "addu %[value32], %[value32], %[b2] \n\t" + : [value32] "=&r" (value32), [b1] "=&r" (b1), [b2] "=&r" (b2), + [a1] "=&r" (a1) + : [a] "r" (a), [b] "r" (b) + : "hi", "lo" + ); + return value32; +} + +#if defined(MIPS_DSP_R1_LE) +static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) { + __asm __volatile( + "shll_s.w %[value32], %[value32], 16 \n\t" + "sra %[value32], %[value32], 16 \n\t" + : [value32] "+r" (value32) + : + ); + int16_t out16 = (int16_t)value32; + return out16; +} + +static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) { + int32_t value32 = 0; + + __asm __volatile( + "addq_s.ph %[value32], %[a], %[b] \n\t" + : [value32] "=r" (value32) + : [a] "r" (a), [b] "r" (b) + ); + return (int16_t)value32; +} + +static __inline int32_t WebRtcSpl_AddSatW32(int32_t l_var1, int32_t l_var2) { + int32_t l_sum; + + __asm __volatile( + "addq_s.w %[l_sum], %[l_var1], %[l_var2] \n\t" + : [l_sum] "=r" (l_sum) + : [l_var1] "r" (l_var1), [l_var2] "r" (l_var2) + ); + + return l_sum; +} + +static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) { + int32_t value32; + + __asm __volatile( + "subq_s.ph %[value32], %[var1], %[var2] \n\t" + : [value32] "=r" (value32) + : [var1] "r" (var1), [var2] "r" (var2) + ); + + return (int16_t)value32; +} + +static __inline int32_t WebRtcSpl_SubSatW32(int32_t l_var1, int32_t l_var2) { + int32_t l_diff; + + __asm __volatile( + "subq_s.w %[l_diff], %[l_var1], %[l_var2] \n\t" + : [l_diff] "=r" (l_diff) + : [l_var1] "r" (l_var1), [l_var2] "r" (l_var2) + ); + + return l_diff; +} +#endif + +static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) { + int bits = 0; + int i32 = 32; + + __asm __volatile( + "clz %[bits], %[n] \n\t" + "subu %[bits], %[i32], %[bits] \n\t" + : [bits] "=&r" (bits) + : [n] "r" (n), [i32] "r" (i32) + ); + + return (int16_t)bits; +} + +static __inline int16_t WebRtcSpl_NormW32(int32_t a) { + int zeros = 0; + + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "bnez %[a], 1f \n\t" + " sra %[zeros], %[a], 31 \n\t" + "b 2f \n\t" + " move %[zeros], $zero \n\t" + "1: \n\t" + "xor %[zeros], %[a], %[zeros] \n\t" + "clz %[zeros], %[zeros] \n\t" + "addiu %[zeros], %[zeros], -1 \n\t" + "2: \n\t" + ".set pop \n\t" + : [zeros]"=&r"(zeros) + : [a] "r" (a) + ); + + return (int16_t)zeros; +} + +static __inline int16_t WebRtcSpl_NormU32(uint32_t a) { + int zeros = 0; + + __asm __volatile( + "clz %[zeros], %[a] \n\t" + : [zeros] "=r" (zeros) + : [a] "r" (a) + ); + + return (int16_t)(zeros & 0x1f); +} + +static __inline int16_t WebRtcSpl_NormW16(int16_t a) { + int zeros = 0; + int a0 = a << 16; + + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "bnez %[a0], 1f \n\t" + " sra %[zeros], %[a0], 31 \n\t" + "b 2f \n\t" + " move %[zeros], $zero \n\t" + "1: \n\t" + "xor %[zeros], %[a0], %[zeros] \n\t" + "clz %[zeros], %[zeros] \n\t" + "addiu %[zeros], %[zeros], -1 \n\t" + "2: \n\t" + ".set pop \n\t" + : [zeros]"=&r"(zeros) + : [a0] "r" (a0) + ); + + return (int16_t)zeros; +} + +static __inline int32_t WebRtc_MulAccumW16(int16_t a, + int16_t b, + int32_t c) { + int32_t res = 0, c1 = 0; + __asm __volatile( +#if defined(MIPS32_R2_LE) + "seh %[a], %[a] \n\t" + "seh %[b], %[b] \n\t" +#else + "sll %[a], %[a], 16 \n\t" + "sll %[b], %[b], 16 \n\t" + "sra %[a], %[a], 16 \n\t" + "sra %[b], %[b], 16 \n\t" +#endif + "mul %[res], %[a], %[b] \n\t" + "addu %[c1], %[c], %[res] \n\t" + : [c1] "=r" (c1), [res] "=&r" (res) + : [a] "r" (a), [b] "r" (b), [c] "r" (c) + : "hi", "lo" + ); + return (c1); +} + +#endif // WEBRTC_SPL_SPL_INL_MIPS_H_ diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/levinson_durbin.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/levinson_durbin.c new file mode 100644 index 00000000..d46e5513 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/levinson_durbin.c @@ -0,0 +1,246 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the function WebRtcSpl_LevinsonDurbin(). + * The description header can be found in signal_processing_library.h + * + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +#define SPL_LEVINSON_MAXORDER 20 + +int16_t WebRtcSpl_LevinsonDurbin(const int32_t* R, int16_t* A, int16_t* K, + size_t order) +{ + size_t i, j; + // Auto-correlation coefficients in high precision + int16_t R_hi[SPL_LEVINSON_MAXORDER + 1], R_low[SPL_LEVINSON_MAXORDER + 1]; + // LPC coefficients in high precision + int16_t A_hi[SPL_LEVINSON_MAXORDER + 1], A_low[SPL_LEVINSON_MAXORDER + 1]; + // LPC coefficients for next iteration + int16_t A_upd_hi[SPL_LEVINSON_MAXORDER + 1], A_upd_low[SPL_LEVINSON_MAXORDER + 1]; + // Reflection coefficient in high precision + int16_t K_hi, K_low; + // Prediction gain Alpha in high precision and with scale factor + int16_t Alpha_hi, Alpha_low, Alpha_exp; + int16_t tmp_hi, tmp_low; + int32_t temp1W32, temp2W32, temp3W32; + int16_t norm; + + // Normalize the autocorrelation R[0]...R[order+1] + + norm = WebRtcSpl_NormW32(R[0]); + + for (i = 0; i <= order; ++i) + { + temp1W32 = WEBRTC_SPL_LSHIFT_W32(R[i], norm); + // Put R in hi and low format + R_hi[i] = (int16_t)(temp1W32 >> 16); + R_low[i] = (int16_t)((temp1W32 - ((int32_t)R_hi[i] << 16)) >> 1); + } + + // K = A[1] = -R[1] / R[0] + + temp2W32 = WEBRTC_SPL_LSHIFT_W32((int32_t)R_hi[1],16) + + WEBRTC_SPL_LSHIFT_W32((int32_t)R_low[1],1); // R[1] in Q31 + temp3W32 = WEBRTC_SPL_ABS_W32(temp2W32); // abs R[1] + temp1W32 = WebRtcSpl_DivW32HiLow(temp3W32, R_hi[0], R_low[0]); // abs(R[1])/R[0] in Q31 + // Put back the sign on R[1] + if (temp2W32 > 0) + { + temp1W32 = -temp1W32; + } + + // Put K in hi and low format + K_hi = (int16_t)(temp1W32 >> 16); + K_low = (int16_t)((temp1W32 - ((int32_t)K_hi << 16)) >> 1); + + // Store first reflection coefficient + K[0] = K_hi; + + temp1W32 >>= 4; // A[1] in Q27. + + // Put A[1] in hi and low format + A_hi[1] = (int16_t)(temp1W32 >> 16); + A_low[1] = (int16_t)((temp1W32 - ((int32_t)A_hi[1] << 16)) >> 1); + + // Alpha = R[0] * (1-K^2) + + temp1W32 = ((K_hi * K_low >> 14) + K_hi * K_hi) << 1; // = k^2 in Q31 + + temp1W32 = WEBRTC_SPL_ABS_W32(temp1W32); // Guard against <0 + temp1W32 = (int32_t)0x7fffffffL - temp1W32; // temp1W32 = (1 - K[0]*K[0]) in Q31 + + // Store temp1W32 = 1 - K[0]*K[0] on hi and low format + tmp_hi = (int16_t)(temp1W32 >> 16); + tmp_low = (int16_t)((temp1W32 - ((int32_t)tmp_hi << 16)) >> 1); + + // Calculate Alpha in Q31 + temp1W32 = (R_hi[0] * tmp_hi + (R_hi[0] * tmp_low >> 15) + + (R_low[0] * tmp_hi >> 15)) << 1; + + // Normalize Alpha and put it in hi and low format + + Alpha_exp = WebRtcSpl_NormW32(temp1W32); + temp1W32 = WEBRTC_SPL_LSHIFT_W32(temp1W32, Alpha_exp); + Alpha_hi = (int16_t)(temp1W32 >> 16); + Alpha_low = (int16_t)((temp1W32 - ((int32_t)Alpha_hi << 16)) >> 1); + + // Perform the iterative calculations in the Levinson-Durbin algorithm + + for (i = 2; i <= order; i++) + { + /* ---- + temp1W32 = R[i] + > R[j]*A[i-j] + / + ---- + j=1..i-1 + */ + + temp1W32 = 0; + + for (j = 1; j < i; j++) + { + // temp1W32 is in Q31 + temp1W32 += (R_hi[j] * A_hi[i - j] << 1) + + (((R_hi[j] * A_low[i - j] >> 15) + + (R_low[j] * A_hi[i - j] >> 15)) << 1); + } + + temp1W32 = WEBRTC_SPL_LSHIFT_W32(temp1W32, 4); + temp1W32 += (WEBRTC_SPL_LSHIFT_W32((int32_t)R_hi[i], 16) + + WEBRTC_SPL_LSHIFT_W32((int32_t)R_low[i], 1)); + + // K = -temp1W32 / Alpha + temp2W32 = WEBRTC_SPL_ABS_W32(temp1W32); // abs(temp1W32) + temp3W32 = WebRtcSpl_DivW32HiLow(temp2W32, Alpha_hi, Alpha_low); // abs(temp1W32)/Alpha + + // Put the sign of temp1W32 back again + if (temp1W32 > 0) + { + temp3W32 = -temp3W32; + } + + // Use the Alpha shifts from earlier to de-normalize + norm = WebRtcSpl_NormW32(temp3W32); + if ((Alpha_exp <= norm) || (temp3W32 == 0)) + { + temp3W32 = WEBRTC_SPL_LSHIFT_W32(temp3W32, Alpha_exp); + } else + { + if (temp3W32 > 0) + { + temp3W32 = (int32_t)0x7fffffffL; + } else + { + temp3W32 = (int32_t)0x80000000L; + } + } + + // Put K on hi and low format + K_hi = (int16_t)(temp3W32 >> 16); + K_low = (int16_t)((temp3W32 - ((int32_t)K_hi << 16)) >> 1); + + // Store Reflection coefficient in Q15 + K[i - 1] = K_hi; + + // Test for unstable filter. + // If unstable return 0 and let the user decide what to do in that case + + if ((int32_t)WEBRTC_SPL_ABS_W16(K_hi) > (int32_t)32750) + { + return 0; // Unstable filter + } + + /* + Compute updated LPC coefficient: Anew[i] + Anew[j]= A[j] + K*A[i-j] for j=1..i-1 + Anew[i]= K + */ + + for (j = 1; j < i; j++) + { + // temp1W32 = A[j] in Q27 + temp1W32 = WEBRTC_SPL_LSHIFT_W32((int32_t)A_hi[j],16) + + WEBRTC_SPL_LSHIFT_W32((int32_t)A_low[j],1); + + // temp1W32 += K*A[i-j] in Q27 + temp1W32 += (K_hi * A_hi[i - j] + (K_hi * A_low[i - j] >> 15) + + (K_low * A_hi[i - j] >> 15)) << 1; + + // Put Anew in hi and low format + A_upd_hi[j] = (int16_t)(temp1W32 >> 16); + A_upd_low[j] = (int16_t)( + (temp1W32 - ((int32_t)A_upd_hi[j] << 16)) >> 1); + } + + // temp3W32 = K in Q27 (Convert from Q31 to Q27) + temp3W32 >>= 4; + + // Store Anew in hi and low format + A_upd_hi[i] = (int16_t)(temp3W32 >> 16); + A_upd_low[i] = (int16_t)( + (temp3W32 - ((int32_t)A_upd_hi[i] << 16)) >> 1); + + // Alpha = Alpha * (1-K^2) + + temp1W32 = ((K_hi * K_low >> 14) + K_hi * K_hi) << 1; // K*K in Q31 + + temp1W32 = WEBRTC_SPL_ABS_W32(temp1W32); // Guard against <0 + temp1W32 = (int32_t)0x7fffffffL - temp1W32; // 1 - K*K in Q31 + + // Convert 1- K^2 in hi and low format + tmp_hi = (int16_t)(temp1W32 >> 16); + tmp_low = (int16_t)((temp1W32 - ((int32_t)tmp_hi << 16)) >> 1); + + // Calculate Alpha = Alpha * (1-K^2) in Q31 + temp1W32 = (Alpha_hi * tmp_hi + (Alpha_hi * tmp_low >> 15) + + (Alpha_low * tmp_hi >> 15)) << 1; + + // Normalize Alpha and store it on hi and low format + + norm = WebRtcSpl_NormW32(temp1W32); + temp1W32 = WEBRTC_SPL_LSHIFT_W32(temp1W32, norm); + + Alpha_hi = (int16_t)(temp1W32 >> 16); + Alpha_low = (int16_t)((temp1W32 - ((int32_t)Alpha_hi << 16)) >> 1); + + // Update the total normalization of Alpha + Alpha_exp = Alpha_exp + norm; + + // Update A[] + + for (j = 1; j <= i; j++) + { + A_hi[j] = A_upd_hi[j]; + A_low[j] = A_upd_low[j]; + } + } + + /* + Set A[0] to 1.0 and store the A[i] i=1...order in Q12 + (Convert from Q27 and use rounding) + */ + + A[0] = 4096; + + for (i = 1; i <= order; i++) + { + // temp1W32 in Q27 + temp1W32 = WEBRTC_SPL_LSHIFT_W32((int32_t)A_hi[i], 16) + + WEBRTC_SPL_LSHIFT_W32((int32_t)A_low[i], 1); + // Round and store upper word + A[i] = (int16_t)(((temp1W32 << 1) + 32768) >> 16); + } + return 1; // Stable filters +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/lpc_to_refl_coef.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/lpc_to_refl_coef.c new file mode 100644 index 00000000..edcebd4e --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/lpc_to_refl_coef.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the function WebRtcSpl_LpcToReflCoef(). + * The description header can be found in signal_processing_library.h + * + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +#define SPL_LPC_TO_REFL_COEF_MAX_AR_MODEL_ORDER 50 + +void WebRtcSpl_LpcToReflCoef(int16_t* a16, int use_order, int16_t* k16) +{ + int m, k; + int32_t tmp32[SPL_LPC_TO_REFL_COEF_MAX_AR_MODEL_ORDER]; + int32_t tmp_inv_denom32; + int16_t tmp_inv_denom16; + + k16[use_order - 1] = a16[use_order] << 3; // Q12<<3 => Q15 + for (m = use_order - 1; m > 0; m--) + { + // (1 - k^2) in Q30 + tmp_inv_denom32 = 1073741823 - k16[m] * k16[m]; + // (1 - k^2) in Q15 + tmp_inv_denom16 = (int16_t)(tmp_inv_denom32 >> 15); + + for (k = 1; k <= m; k++) + { + // tmp[k] = (a[k] - RC[m] * a[m-k+1]) / (1.0 - RC[m]*RC[m]); + + // [Q12<<16 - (Q15*Q12)<<1] = [Q28 - Q28] = Q28 + tmp32[k] = (a16[k] << 16) - (k16[m] * a16[m - k + 1] << 1); + + tmp32[k] = WebRtcSpl_DivW32W16(tmp32[k], tmp_inv_denom16); //Q28/Q15 = Q13 + } + + for (k = 1; k < m; k++) + { + a16[k] = (int16_t)(tmp32[k] >> 1); // Q13>>1 => Q12 + } + + tmp32[m] = WEBRTC_SPL_SAT(8191, tmp32[m], -8191); + k16[m - 1] = (int16_t)WEBRTC_SPL_LSHIFT_W32(tmp32[m], 2); //Q13<<2 => Q15 + } + return; +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations.c new file mode 100644 index 00000000..4a962f86 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations.c @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * This file contains the implementation of functions + * WebRtcSpl_MaxAbsValueW16C() + * WebRtcSpl_MaxAbsValueW32C() + * WebRtcSpl_MaxValueW16C() + * WebRtcSpl_MaxValueW32C() + * WebRtcSpl_MinValueW16C() + * WebRtcSpl_MinValueW32C() + * WebRtcSpl_MaxAbsIndexW16() + * WebRtcSpl_MaxIndexW16() + * WebRtcSpl_MaxIndexW32() + * WebRtcSpl_MinIndexW16() + * WebRtcSpl_MinIndexW32() + * + */ + +#include <assert.h> +#include <stdlib.h> + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +// TODO(bjorn/kma): Consolidate function pairs (e.g. combine +// WebRtcSpl_MaxAbsValueW16C and WebRtcSpl_MaxAbsIndexW16 into a single one.) +// TODO(kma): Move the next six functions into min_max_operations_c.c. + +// Maximum absolute value of word16 vector. C version for generic platforms. +int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t* vector, size_t length) { + size_t i = 0; + int absolute = 0, maximum = 0; + + assert(length > 0); + + for (i = 0; i < length; i++) { + absolute = abs((int)vector[i]); + + if (absolute > maximum) { + maximum = absolute; + } + } + + // Guard the case for abs(-32768). + if (maximum > WEBRTC_SPL_WORD16_MAX) { + maximum = WEBRTC_SPL_WORD16_MAX; + } + + return (int16_t)maximum; +} + +// Maximum absolute value of word32 vector. C version for generic platforms. +int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t* vector, size_t length) { + // Use uint32_t for the local variables, to accommodate the return value + // of abs(0x80000000), which is 0x80000000. + + uint32_t absolute = 0, maximum = 0; + size_t i = 0; + + assert(length > 0); + + for (i = 0; i < length; i++) { + absolute = abs((int)vector[i]); + if (absolute > maximum) { + maximum = absolute; + } + } + + maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX); + + return (int32_t)maximum; +} + +// Maximum value of word16 vector. C version for generic platforms. +int16_t WebRtcSpl_MaxValueW16C(const int16_t* vector, size_t length) { + int16_t maximum = WEBRTC_SPL_WORD16_MIN; + size_t i = 0; + + assert(length > 0); + + for (i = 0; i < length; i++) { + if (vector[i] > maximum) + maximum = vector[i]; + } + return maximum; +} + +// Maximum value of word32 vector. C version for generic platforms. +int32_t WebRtcSpl_MaxValueW32C(const int32_t* vector, size_t length) { + int32_t maximum = WEBRTC_SPL_WORD32_MIN; + size_t i = 0; + + assert(length > 0); + + for (i = 0; i < length; i++) { + if (vector[i] > maximum) + maximum = vector[i]; + } + return maximum; +} + +// Minimum value of word16 vector. C version for generic platforms. +int16_t WebRtcSpl_MinValueW16C(const int16_t* vector, size_t length) { + int16_t minimum = WEBRTC_SPL_WORD16_MAX; + size_t i = 0; + + assert(length > 0); + + for (i = 0; i < length; i++) { + if (vector[i] < minimum) + minimum = vector[i]; + } + return minimum; +} + +// Minimum value of word32 vector. C version for generic platforms. +int32_t WebRtcSpl_MinValueW32C(const int32_t* vector, size_t length) { + int32_t minimum = WEBRTC_SPL_WORD32_MAX; + size_t i = 0; + + assert(length > 0); + + for (i = 0; i < length; i++) { + if (vector[i] < minimum) + minimum = vector[i]; + } + return minimum; +} + +// Index of maximum absolute value in a word16 vector. +size_t WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, size_t length) { + // Use type int for local variables, to accomodate the value of abs(-32768). + + size_t i = 0, index = 0; + int absolute = 0, maximum = 0; + + assert(length > 0); + + for (i = 0; i < length; i++) { + absolute = abs((int)vector[i]); + + if (absolute > maximum) { + maximum = absolute; + index = i; + } + } + + return index; +} + +// Index of maximum value in a word16 vector. +size_t WebRtcSpl_MaxIndexW16(const int16_t* vector, size_t length) { + size_t i = 0, index = 0; + int16_t maximum = WEBRTC_SPL_WORD16_MIN; + + assert(length > 0); + + for (i = 0; i < length; i++) { + if (vector[i] > maximum) { + maximum = vector[i]; + index = i; + } + } + + return index; +} + +// Index of maximum value in a word32 vector. +size_t WebRtcSpl_MaxIndexW32(const int32_t* vector, size_t length) { + size_t i = 0, index = 0; + int32_t maximum = WEBRTC_SPL_WORD32_MIN; + + assert(length > 0); + + for (i = 0; i < length; i++) { + if (vector[i] > maximum) { + maximum = vector[i]; + index = i; + } + } + + return index; +} + +// Index of minimum value in a word16 vector. +size_t WebRtcSpl_MinIndexW16(const int16_t* vector, size_t length) { + size_t i = 0, index = 0; + int16_t minimum = WEBRTC_SPL_WORD16_MAX; + + assert(length > 0); + + for (i = 0; i < length; i++) { + if (vector[i] < minimum) { + minimum = vector[i]; + index = i; + } + } + + return index; +} + +// Index of minimum value in a word32 vector. +size_t WebRtcSpl_MinIndexW32(const int32_t* vector, size_t length) { + size_t i = 0, index = 0; + int32_t minimum = WEBRTC_SPL_WORD32_MAX; + + assert(length > 0); + + for (i = 0; i < length; i++) { + if (vector[i] < minimum) { + minimum = vector[i]; + index = i; + } + } + + return index; +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations_mips.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations_mips.c new file mode 100644 index 00000000..28de45b3 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations_mips.c @@ -0,0 +1,376 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * This file contains the implementation of function + * WebRtcSpl_MaxAbsValueW16() + * + * The description header can be found in signal_processing_library.h. + * + */ + +#include <assert.h> + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +// Maximum absolute value of word16 vector. +int16_t WebRtcSpl_MaxAbsValueW16_mips(const int16_t* vector, size_t length) { + int32_t totMax = 0; + int32_t tmp32_0, tmp32_1, tmp32_2, tmp32_3; + size_t i, loop_size; + + assert(length > 0); + +#if defined(MIPS_DSP_R1) + const int32_t* tmpvec32 = (int32_t*)vector; + loop_size = length >> 4; + + for (i = 0; i < loop_size; i++) { + __asm__ volatile ( + "lw %[tmp32_0], 0(%[tmpvec32]) \n\t" + "lw %[tmp32_1], 4(%[tmpvec32]) \n\t" + "lw %[tmp32_2], 8(%[tmpvec32]) \n\t" + "lw %[tmp32_3], 12(%[tmpvec32]) \n\t" + + "absq_s.ph %[tmp32_0], %[tmp32_0] \n\t" + "absq_s.ph %[tmp32_1], %[tmp32_1] \n\t" + "cmp.lt.ph %[totMax], %[tmp32_0] \n\t" + "pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t" + + "lw %[tmp32_0], 16(%[tmpvec32]) \n\t" + "absq_s.ph %[tmp32_2], %[tmp32_2] \n\t" + "cmp.lt.ph %[totMax], %[tmp32_1] \n\t" + "pick.ph %[totMax], %[tmp32_1], %[totMax] \n\t" + + "lw %[tmp32_1], 20(%[tmpvec32]) \n\t" + "absq_s.ph %[tmp32_3], %[tmp32_3] \n\t" + "cmp.lt.ph %[totMax], %[tmp32_2] \n\t" + "pick.ph %[totMax], %[tmp32_2], %[totMax] \n\t" + + "lw %[tmp32_2], 24(%[tmpvec32]) \n\t" + "cmp.lt.ph %[totMax], %[tmp32_3] \n\t" + "pick.ph %[totMax], %[tmp32_3], %[totMax] \n\t" + + "lw %[tmp32_3], 28(%[tmpvec32]) \n\t" + "absq_s.ph %[tmp32_0], %[tmp32_0] \n\t" + "absq_s.ph %[tmp32_1], %[tmp32_1] \n\t" + "cmp.lt.ph %[totMax], %[tmp32_0] \n\t" + "pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t" + + "absq_s.ph %[tmp32_2], %[tmp32_2] \n\t" + "cmp.lt.ph %[totMax], %[tmp32_1] \n\t" + "pick.ph %[totMax], %[tmp32_1], %[totMax] \n\t" + "absq_s.ph %[tmp32_3], %[tmp32_3] \n\t" + "cmp.lt.ph %[totMax], %[tmp32_2] \n\t" + "pick.ph %[totMax], %[tmp32_2], %[totMax] \n\t" + + "cmp.lt.ph %[totMax], %[tmp32_3] \n\t" + "pick.ph %[totMax], %[tmp32_3], %[totMax] \n\t" + + "addiu %[tmpvec32], %[tmpvec32], 32 \n\t" + : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1), + [tmp32_2] "=&r" (tmp32_2), [tmp32_3] "=&r" (tmp32_3), + [totMax] "+r" (totMax), [tmpvec32] "+r" (tmpvec32) + : + : "memory" + ); + } + __asm__ volatile ( + "rotr %[tmp32_0], %[totMax], 16 \n\t" + "cmp.lt.ph %[totMax], %[tmp32_0] \n\t" + "pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t" + "packrl.ph %[totMax], $0, %[totMax] \n\t" + : [tmp32_0] "=&r" (tmp32_0), [totMax] "+r" (totMax) + : + ); + loop_size = length & 0xf; + for (i = 0; i < loop_size; i++) { + __asm__ volatile ( + "lh %[tmp32_0], 0(%[tmpvec32]) \n\t" + "addiu %[tmpvec32], %[tmpvec32], 2 \n\t" + "absq_s.w %[tmp32_0], %[tmp32_0] \n\t" + "slt %[tmp32_1], %[totMax], %[tmp32_0] \n\t" + "movn %[totMax], %[tmp32_0], %[tmp32_1] \n\t" + : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1), + [tmpvec32] "+r" (tmpvec32), [totMax] "+r" (totMax) + : + : "memory" + ); + } +#else // #if defined(MIPS_DSP_R1) + int32_t v16MaxMax = WEBRTC_SPL_WORD16_MAX; + int32_t r, r1, r2, r3; + const int16_t* tmpvector = vector; + loop_size = length >> 4; + for (i = 0; i < loop_size; i++) { + __asm__ volatile ( + "lh %[tmp32_0], 0(%[tmpvector]) \n\t" + "lh %[tmp32_1], 2(%[tmpvector]) \n\t" + "lh %[tmp32_2], 4(%[tmpvector]) \n\t" + "lh %[tmp32_3], 6(%[tmpvector]) \n\t" + + "abs %[tmp32_0], %[tmp32_0] \n\t" + "abs %[tmp32_1], %[tmp32_1] \n\t" + "abs %[tmp32_2], %[tmp32_2] \n\t" + "abs %[tmp32_3], %[tmp32_3] \n\t" + + "slt %[r], %[totMax], %[tmp32_0] \n\t" + "movn %[totMax], %[tmp32_0], %[r] \n\t" + "slt %[r1], %[totMax], %[tmp32_1] \n\t" + "movn %[totMax], %[tmp32_1], %[r1] \n\t" + "slt %[r2], %[totMax], %[tmp32_2] \n\t" + "movn %[totMax], %[tmp32_2], %[r2] \n\t" + "slt %[r3], %[totMax], %[tmp32_3] \n\t" + "movn %[totMax], %[tmp32_3], %[r3] \n\t" + + "lh %[tmp32_0], 8(%[tmpvector]) \n\t" + "lh %[tmp32_1], 10(%[tmpvector]) \n\t" + "lh %[tmp32_2], 12(%[tmpvector]) \n\t" + "lh %[tmp32_3], 14(%[tmpvector]) \n\t" + + "abs %[tmp32_0], %[tmp32_0] \n\t" + "abs %[tmp32_1], %[tmp32_1] \n\t" + "abs %[tmp32_2], %[tmp32_2] \n\t" + "abs %[tmp32_3], %[tmp32_3] \n\t" + + "slt %[r], %[totMax], %[tmp32_0] \n\t" + "movn %[totMax], %[tmp32_0], %[r] \n\t" + "slt %[r1], %[totMax], %[tmp32_1] \n\t" + "movn %[totMax], %[tmp32_1], %[r1] \n\t" + "slt %[r2], %[totMax], %[tmp32_2] \n\t" + "movn %[totMax], %[tmp32_2], %[r2] \n\t" + "slt %[r3], %[totMax], %[tmp32_3] \n\t" + "movn %[totMax], %[tmp32_3], %[r3] \n\t" + + "lh %[tmp32_0], 16(%[tmpvector]) \n\t" + "lh %[tmp32_1], 18(%[tmpvector]) \n\t" + "lh %[tmp32_2], 20(%[tmpvector]) \n\t" + "lh %[tmp32_3], 22(%[tmpvector]) \n\t" + + "abs %[tmp32_0], %[tmp32_0] \n\t" + "abs %[tmp32_1], %[tmp32_1] \n\t" + "abs %[tmp32_2], %[tmp32_2] \n\t" + "abs %[tmp32_3], %[tmp32_3] \n\t" + + "slt %[r], %[totMax], %[tmp32_0] \n\t" + "movn %[totMax], %[tmp32_0], %[r] \n\t" + "slt %[r1], %[totMax], %[tmp32_1] \n\t" + "movn %[totMax], %[tmp32_1], %[r1] \n\t" + "slt %[r2], %[totMax], %[tmp32_2] \n\t" + "movn %[totMax], %[tmp32_2], %[r2] \n\t" + "slt %[r3], %[totMax], %[tmp32_3] \n\t" + "movn %[totMax], %[tmp32_3], %[r3] \n\t" + + "lh %[tmp32_0], 24(%[tmpvector]) \n\t" + "lh %[tmp32_1], 26(%[tmpvector]) \n\t" + "lh %[tmp32_2], 28(%[tmpvector]) \n\t" + "lh %[tmp32_3], 30(%[tmpvector]) \n\t" + + "abs %[tmp32_0], %[tmp32_0] \n\t" + "abs %[tmp32_1], %[tmp32_1] \n\t" + "abs %[tmp32_2], %[tmp32_2] \n\t" + "abs %[tmp32_3], %[tmp32_3] \n\t" + + "slt %[r], %[totMax], %[tmp32_0] \n\t" + "movn %[totMax], %[tmp32_0], %[r] \n\t" + "slt %[r1], %[totMax], %[tmp32_1] \n\t" + "movn %[totMax], %[tmp32_1], %[r1] \n\t" + "slt %[r2], %[totMax], %[tmp32_2] \n\t" + "movn %[totMax], %[tmp32_2], %[r2] \n\t" + "slt %[r3], %[totMax], %[tmp32_3] \n\t" + "movn %[totMax], %[tmp32_3], %[r3] \n\t" + + "addiu %[tmpvector], %[tmpvector], 32 \n\t" + : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1), + [tmp32_2] "=&r" (tmp32_2), [tmp32_3] "=&r" (tmp32_3), + [totMax] "+r" (totMax), [r] "=&r" (r), [tmpvector] "+r" (tmpvector), + [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3) + : + : "memory" + ); + } + loop_size = length & 0xf; + for (i = 0; i < loop_size; i++) { + __asm__ volatile ( + "lh %[tmp32_0], 0(%[tmpvector]) \n\t" + "addiu %[tmpvector], %[tmpvector], 2 \n\t" + "abs %[tmp32_0], %[tmp32_0] \n\t" + "slt %[tmp32_1], %[totMax], %[tmp32_0] \n\t" + "movn %[totMax], %[tmp32_0], %[tmp32_1] \n\t" + : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1), + [tmpvector] "+r" (tmpvector), [totMax] "+r" (totMax) + : + : "memory" + ); + } + + __asm__ volatile ( + "slt %[r], %[v16MaxMax], %[totMax] \n\t" + "movn %[totMax], %[v16MaxMax], %[r] \n\t" + : [totMax] "+r" (totMax), [r] "=&r" (r) + : [v16MaxMax] "r" (v16MaxMax) + ); +#endif // #if defined(MIPS_DSP_R1) + return (int16_t)totMax; +} + +#if defined(MIPS_DSP_R1_LE) +// Maximum absolute value of word32 vector. Version for MIPS platform. +int32_t WebRtcSpl_MaxAbsValueW32_mips(const int32_t* vector, size_t length) { + // Use uint32_t for the local variables, to accommodate the return value + // of abs(0x80000000), which is 0x80000000. + + uint32_t absolute = 0, maximum = 0; + int tmp1 = 0, max_value = 0x7fffffff; + + assert(length > 0); + + __asm__ volatile ( + ".set push \n\t" + ".set noreorder \n\t" + + "1: \n\t" + "lw %[absolute], 0(%[vector]) \n\t" + "absq_s.w %[absolute], %[absolute] \n\t" + "addiu %[length], %[length], -1 \n\t" + "slt %[tmp1], %[maximum], %[absolute] \n\t" + "movn %[maximum], %[absolute], %[tmp1] \n\t" + "bgtz %[length], 1b \n\t" + " addiu %[vector], %[vector], 4 \n\t" + "slt %[tmp1], %[max_value], %[maximum] \n\t" + "movn %[maximum], %[max_value], %[tmp1] \n\t" + + ".set pop \n\t" + + : [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [absolute] "+r" (absolute) + : [vector] "r" (vector), [length] "r" (length), [max_value] "r" (max_value) + : "memory" + ); + + return (int32_t)maximum; +} +#endif // #if defined(MIPS_DSP_R1_LE) + +// Maximum value of word16 vector. Version for MIPS platform. +int16_t WebRtcSpl_MaxValueW16_mips(const int16_t* vector, size_t length) { + int16_t maximum = WEBRTC_SPL_WORD16_MIN; + int tmp1; + int16_t value; + + assert(length > 0); + + __asm__ volatile ( + ".set push \n\t" + ".set noreorder \n\t" + + "1: \n\t" + "lh %[value], 0(%[vector]) \n\t" + "addiu %[length], %[length], -1 \n\t" + "slt %[tmp1], %[maximum], %[value] \n\t" + "movn %[maximum], %[value], %[tmp1] \n\t" + "bgtz %[length], 1b \n\t" + " addiu %[vector], %[vector], 2 \n\t" + ".set pop \n\t" + + : [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [value] "=&r" (value) + : [vector] "r" (vector), [length] "r" (length) + : "memory" + ); + + return maximum; +} + +// Maximum value of word32 vector. Version for MIPS platform. +int32_t WebRtcSpl_MaxValueW32_mips(const int32_t* vector, size_t length) { + int32_t maximum = WEBRTC_SPL_WORD32_MIN; + int tmp1, value; + + assert(length > 0); + + __asm__ volatile ( + ".set push \n\t" + ".set noreorder \n\t" + + "1: \n\t" + "lw %[value], 0(%[vector]) \n\t" + "addiu %[length], %[length], -1 \n\t" + "slt %[tmp1], %[maximum], %[value] \n\t" + "movn %[maximum], %[value], %[tmp1] \n\t" + "bgtz %[length], 1b \n\t" + " addiu %[vector], %[vector], 4 \n\t" + + ".set pop \n\t" + + : [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [value] "=&r" (value) + : [vector] "r" (vector), [length] "r" (length) + : "memory" + ); + + return maximum; +} + +// Minimum value of word16 vector. Version for MIPS platform. +int16_t WebRtcSpl_MinValueW16_mips(const int16_t* vector, size_t length) { + int16_t minimum = WEBRTC_SPL_WORD16_MAX; + int tmp1; + int16_t value; + + assert(length > 0); + + __asm__ volatile ( + ".set push \n\t" + ".set noreorder \n\t" + + "1: \n\t" + "lh %[value], 0(%[vector]) \n\t" + "addiu %[length], %[length], -1 \n\t" + "slt %[tmp1], %[value], %[minimum] \n\t" + "movn %[minimum], %[value], %[tmp1] \n\t" + "bgtz %[length], 1b \n\t" + " addiu %[vector], %[vector], 2 \n\t" + + ".set pop \n\t" + + : [tmp1] "=&r" (tmp1), [minimum] "+r" (minimum), [value] "=&r" (value) + : [vector] "r" (vector), [length] "r" (length) + : "memory" + ); + + return minimum; +} + +// Minimum value of word32 vector. Version for MIPS platform. +int32_t WebRtcSpl_MinValueW32_mips(const int32_t* vector, size_t length) { + int32_t minimum = WEBRTC_SPL_WORD32_MAX; + int tmp1, value; + + assert(length > 0); + + __asm__ volatile ( + ".set push \n\t" + ".set noreorder \n\t" + + "1: \n\t" + "lw %[value], 0(%[vector]) \n\t" + "addiu %[length], %[length], -1 \n\t" + "slt %[tmp1], %[value], %[minimum] \n\t" + "movn %[minimum], %[value], %[tmp1] \n\t" + "bgtz %[length], 1b \n\t" + " addiu %[vector], %[vector], 4 \n\t" + + ".set pop \n\t" + + : [tmp1] "=&r" (tmp1), [minimum] "+r" (minimum), [value] "=&r" (value) + : [vector] "r" (vector), [length] "r" (length) + : "memory" + ); + + return minimum; +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations_neon.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations_neon.c new file mode 100644 index 00000000..6fbbf94e --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations_neon.c @@ -0,0 +1,283 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <arm_neon.h> +#include <assert.h> +#include <stdlib.h> + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +// Maximum absolute value of word16 vector. C version for generic platforms. +int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, size_t length) { + int absolute = 0, maximum = 0; + + assert(length > 0); + + const int16_t* p_start = vector; + size_t rest = length & 7; + const int16_t* p_end = vector + length - rest; + + int16x8_t v; + uint16x8_t max_qv; + max_qv = vdupq_n_u16(0); + + while (p_start < p_end) { + v = vld1q_s16(p_start); + // Note vabs doesn't change the value of -32768. + v = vabsq_s16(v); + // Use u16 so we don't lose the value -32768. + max_qv = vmaxq_u16(max_qv, vreinterpretq_u16_s16(v)); + p_start += 8; + } + +#ifdef WEBRTC_ARCH_ARM64 + maximum = (int)vmaxvq_u16(max_qv); +#else + uint16x4_t max_dv; + max_dv = vmax_u16(vget_low_u16(max_qv), vget_high_u16(max_qv)); + max_dv = vpmax_u16(max_dv, max_dv); + max_dv = vpmax_u16(max_dv, max_dv); + + maximum = (int)vget_lane_u16(max_dv, 0); +#endif + + p_end = vector + length; + while (p_start < p_end) { + absolute = abs((int)(*p_start)); + + if (absolute > maximum) { + maximum = absolute; + } + p_start++; + } + + // Guard the case for abs(-32768). + if (maximum > WEBRTC_SPL_WORD16_MAX) { + maximum = WEBRTC_SPL_WORD16_MAX; + } + + return (int16_t)maximum; +} + +// Maximum absolute value of word32 vector. NEON intrinsics version for +// ARM 32-bit/64-bit platforms. +int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, size_t length) { + // Use uint32_t for the local variables, to accommodate the return value + // of abs(0x80000000), which is 0x80000000. + + uint32_t absolute = 0, maximum = 0; + size_t i = 0; + size_t residual = length & 0x7; + + assert(length > 0); + + const int32_t* p_start = vector; + uint32x4_t max32x4_0 = vdupq_n_u32(0); + uint32x4_t max32x4_1 = vdupq_n_u32(0); + + // First part, unroll the loop 8 times. + for (i = 0; i < length - residual; i += 8) { + int32x4_t in32x4_0 = vld1q_s32(p_start); + p_start += 4; + int32x4_t in32x4_1 = vld1q_s32(p_start); + p_start += 4; + in32x4_0 = vabsq_s32(in32x4_0); + in32x4_1 = vabsq_s32(in32x4_1); + // vabs doesn't change the value of 0x80000000. + // Use u32 so we don't lose the value 0x80000000. + max32x4_0 = vmaxq_u32(max32x4_0, vreinterpretq_u32_s32(in32x4_0)); + max32x4_1 = vmaxq_u32(max32x4_1, vreinterpretq_u32_s32(in32x4_1)); + } + + uint32x4_t max32x4 = vmaxq_u32(max32x4_0, max32x4_1); +#if defined(WEBRTC_ARCH_ARM64) + maximum = vmaxvq_u32(max32x4); +#else + uint32x2_t max32x2 = vmax_u32(vget_low_u32(max32x4), vget_high_u32(max32x4)); + max32x2 = vpmax_u32(max32x2, max32x2); + + maximum = vget_lane_u32(max32x2, 0); +#endif + + // Second part, do the remaining iterations (if any). + for (i = residual; i > 0; i--) { + absolute = abs((int)(*p_start)); + if (absolute > maximum) { + maximum = absolute; + } + p_start++; + } + + // Guard against the case for 0x80000000. + maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX); + + return (int32_t)maximum; +} + +// Maximum value of word16 vector. NEON intrinsics version for +// ARM 32-bit/64-bit platforms. +int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, size_t length) { + int16_t maximum = WEBRTC_SPL_WORD16_MIN; + size_t i = 0; + size_t residual = length & 0x7; + + assert(length > 0); + + const int16_t* p_start = vector; + int16x8_t max16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MIN); + + // First part, unroll the loop 8 times. + for (i = 0; i < length - residual; i += 8) { + int16x8_t in16x8 = vld1q_s16(p_start); + max16x8 = vmaxq_s16(max16x8, in16x8); + p_start += 8; + } + +#if defined(WEBRTC_ARCH_ARM64) + maximum = vmaxvq_s16(max16x8); +#else + int16x4_t max16x4 = vmax_s16(vget_low_s16(max16x8), vget_high_s16(max16x8)); + max16x4 = vpmax_s16(max16x4, max16x4); + max16x4 = vpmax_s16(max16x4, max16x4); + + maximum = vget_lane_s16(max16x4, 0); +#endif + + // Second part, do the remaining iterations (if any). + for (i = residual; i > 0; i--) { + if (*p_start > maximum) + maximum = *p_start; + p_start++; + } + return maximum; +} + +// Maximum value of word32 vector. NEON intrinsics version for +// ARM 32-bit/64-bit platforms. +int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, size_t length) { + int32_t maximum = WEBRTC_SPL_WORD32_MIN; + size_t i = 0; + size_t residual = length & 0x7; + + assert(length > 0); + + const int32_t* p_start = vector; + int32x4_t max32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN); + int32x4_t max32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN); + + // First part, unroll the loop 8 times. + for (i = 0; i < length - residual; i += 8) { + int32x4_t in32x4_0 = vld1q_s32(p_start); + p_start += 4; + int32x4_t in32x4_1 = vld1q_s32(p_start); + p_start += 4; + max32x4_0 = vmaxq_s32(max32x4_0, in32x4_0); + max32x4_1 = vmaxq_s32(max32x4_1, in32x4_1); + } + + int32x4_t max32x4 = vmaxq_s32(max32x4_0, max32x4_1); +#if defined(WEBRTC_ARCH_ARM64) + maximum = vmaxvq_s32(max32x4); +#else + int32x2_t max32x2 = vmax_s32(vget_low_s32(max32x4), vget_high_s32(max32x4)); + max32x2 = vpmax_s32(max32x2, max32x2); + + maximum = vget_lane_s32(max32x2, 0); +#endif + + // Second part, do the remaining iterations (if any). + for (i = residual; i > 0; i--) { + if (*p_start > maximum) + maximum = *p_start; + p_start++; + } + return maximum; +} + +// Minimum value of word16 vector. NEON intrinsics version for +// ARM 32-bit/64-bit platforms. +int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, size_t length) { + int16_t minimum = WEBRTC_SPL_WORD16_MAX; + size_t i = 0; + size_t residual = length & 0x7; + + assert(length > 0); + + const int16_t* p_start = vector; + int16x8_t min16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MAX); + + // First part, unroll the loop 8 times. + for (i = 0; i < length - residual; i += 8) { + int16x8_t in16x8 = vld1q_s16(p_start); + min16x8 = vminq_s16(min16x8, in16x8); + p_start += 8; + } + +#if defined(WEBRTC_ARCH_ARM64) + minimum = vminvq_s16(min16x8); +#else + int16x4_t min16x4 = vmin_s16(vget_low_s16(min16x8), vget_high_s16(min16x8)); + min16x4 = vpmin_s16(min16x4, min16x4); + min16x4 = vpmin_s16(min16x4, min16x4); + + minimum = vget_lane_s16(min16x4, 0); +#endif + + // Second part, do the remaining iterations (if any). + for (i = residual; i > 0; i--) { + if (*p_start < minimum) + minimum = *p_start; + p_start++; + } + return minimum; +} + +// Minimum value of word32 vector. NEON intrinsics version for +// ARM 32-bit/64-bit platforms. +int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, size_t length) { + int32_t minimum = WEBRTC_SPL_WORD32_MAX; + size_t i = 0; + size_t residual = length & 0x7; + + assert(length > 0); + + const int32_t* p_start = vector; + int32x4_t min32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX); + int32x4_t min32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX); + + // First part, unroll the loop 8 times. + for (i = 0; i < length - residual; i += 8) { + int32x4_t in32x4_0 = vld1q_s32(p_start); + p_start += 4; + int32x4_t in32x4_1 = vld1q_s32(p_start); + p_start += 4; + min32x4_0 = vminq_s32(min32x4_0, in32x4_0); + min32x4_1 = vminq_s32(min32x4_1, in32x4_1); + } + + int32x4_t min32x4 = vminq_s32(min32x4_0, min32x4_1); +#if defined(WEBRTC_ARCH_ARM64) + minimum = vminvq_s32(min32x4); +#else + int32x2_t min32x2 = vmin_s32(vget_low_s32(min32x4), vget_high_s32(min32x4)); + min32x2 = vpmin_s32(min32x2, min32x2); + + minimum = vget_lane_s32(min32x2, 0); +#endif + + // Second part, do the remaining iterations (if any). + for (i = residual; i > 0; i--) { + if (*p_start < minimum) + minimum = *p_start; + p_start++; + } + return minimum; +} + diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/randomization_functions.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/randomization_functions.c new file mode 100644 index 00000000..73f24093 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/randomization_functions.c @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains implementations of the randomization functions + * WebRtcSpl_RandU() + * WebRtcSpl_RandN() + * WebRtcSpl_RandUArray() + * + * The description header can be found in signal_processing_library.h + * + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +static const uint32_t kMaxSeedUsed = 0x80000000; + +static const int16_t kRandNTable[] = { + 9178, -7260, 40, 10189, 4894, -3531, -13779, 14764, + -4008, -8884, -8990, 1008, 7368, 5184, 3251, -5817, + -9786, 5963, 1770, 8066, -7135, 10772, -2298, 1361, + 6484, 2241, -8633, 792, 199, -3344, 6553, -10079, + -15040, 95, 11608, -12469, 14161, -4176, 2476, 6403, + 13685, -16005, 6646, 2239, 10916, -3004, -602, -3141, + 2142, 14144, -5829, 5305, 8209, 4713, 2697, -5112, + 16092, -1210, -2891, -6631, -5360, -11878, -6781, -2739, + -6392, 536, 10923, 10872, 5059, -4748, -7770, 5477, + 38, -1025, -2892, 1638, 6304, 14375, -11028, 1553, + -1565, 10762, -393, 4040, 5257, 12310, 6554, -4799, + 4899, -6354, 1603, -1048, -2220, 8247, -186, -8944, + -12004, 2332, 4801, -4933, 6371, 131, 8614, -5927, + -8287, -22760, 4033, -15162, 3385, 3246, 3153, -5250, + 3766, 784, 6494, -62, 3531, -1582, 15572, 662, + -3952, -330, -3196, 669, 7236, -2678, -6569, 23319, + -8645, -741, 14830, -15976, 4903, 315, -11342, 10311, + 1858, -7777, 2145, 5436, 5677, -113, -10033, 826, + -1353, 17210, 7768, 986, -1471, 8291, -4982, 8207, + -14911, -6255, -2449, -11881, -7059, -11703, -4338, 8025, + 7538, -2823, -12490, 9470, -1613, -2529, -10092, -7807, + 9480, 6970, -12844, 5123, 3532, 4816, 4803, -8455, + -5045, 14032, -4378, -1643, 5756, -11041, -2732, -16618, + -6430, -18375, -3320, 6098, 5131, -4269, -8840, 2482, + -7048, 1547, -21890, -6505, -7414, -424, -11722, 7955, + 1653, -17299, 1823, 473, -9232, 3337, 1111, 873, + 4018, -8982, 9889, 3531, -11763, -3799, 7373, -4539, + 3231, 7054, -8537, 7616, 6244, 16635, 447, -2915, + 13967, 705, -2669, -1520, -1771, -16188, 5956, 5117, + 6371, -9936, -1448, 2480, 5128, 7550, -8130, 5236, + 8213, -6443, 7707, -1950, -13811, 7218, 7031, -3883, + 67, 5731, -2874, 13480, -3743, 9298, -3280, 3552, + -4425, -18, -3785, -9988, -5357, 5477, -11794, 2117, + 1416, -9935, 3376, 802, -5079, -8243, 12652, 66, + 3653, -2368, 6781, -21895, -7227, 2487, 7839, -385, + 6646, -7016, -4658, 5531, -1705, 834, 129, 3694, + -1343, 2238, -22640, -6417, -11139, 11301, -2945, -3494, + -5626, 185, -3615, -2041, -7972, -3106, -60, -23497, + -1566, 17064, 3519, 2518, 304, -6805, -10269, 2105, + 1936, -426, -736, -8122, -1467, 4238, -6939, -13309, + 360, 7402, -7970, 12576, 3287, 12194, -6289, -16006, + 9171, 4042, -9193, 9123, -2512, 6388, -4734, -8739, + 1028, -5406, -1696, 5889, -666, -4736, 4971, 3565, + 9362, -6292, 3876, -3652, -19666, 7523, -4061, 391, + -11773, 7502, -3763, 4929, -9478, 13278, 2805, 4496, + 7814, 16419, 12455, -14773, 2127, -2746, 3763, 4847, + 3698, 6978, 4751, -6957, -3581, -45, 6252, 1513, + -4797, -7925, 11270, 16188, -2359, -5269, 9376, -10777, + 7262, 20031, -6515, -2208, -5353, 8085, -1341, -1303, + 7333, 5576, 3625, 5763, -7931, 9833, -3371, -10305, + 6534, -13539, -9971, 997, 8464, -4064, -1495, 1857, + 13624, 5458, 9490, -11086, -4524, 12022, -550, -198, + 408, -8455, -7068, 10289, 9712, -3366, 9028, -7621, + -5243, 2362, 6909, 4672, -4933, -1799, 4709, -4563, + -62, -566, 1624, -7010, 14730, -17791, -3697, -2344, + -1741, 7099, -9509, -6855, -1989, 3495, -2289, 2031, + 12784, 891, 14189, -3963, -5683, 421, -12575, 1724, + -12682, -5970, -8169, 3143, -1824, -5488, -5130, 8536, + 12799, 794, 5738, 3459, -11689, -258, -3738, -3775, + -8742, 2333, 8312, -9383, 10331, 13119, 8398, 10644, + -19433, -6446, -16277, -11793, 16284, 9345, 15222, 15834, + 2009, -7349, 130, -14547, 338, -5998, 3337, 21492, + 2406, 7703, -951, 11196, -564, 3406, 2217, 4806, + 2374, -5797, 11839, 8940, -11874, 18213, 2855, 10492 +}; + +static uint32_t IncreaseSeed(uint32_t* seed) { + seed[0] = (seed[0] * ((int32_t)69069) + 1) & (kMaxSeedUsed - 1); + return seed[0]; +} + +int16_t WebRtcSpl_RandU(uint32_t* seed) { + return (int16_t)(IncreaseSeed(seed) >> 16); +} + +int16_t WebRtcSpl_RandN(uint32_t* seed) { + return kRandNTable[IncreaseSeed(seed) >> 23]; +} + +// Creates an array of uniformly distributed variables. +int16_t WebRtcSpl_RandUArray(int16_t* vector, + int16_t vector_length, + uint32_t* seed) { + int i; + for (i = 0; i < vector_length; i++) { + vector[i] = WebRtcSpl_RandU(seed); + } + return vector_length; +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/real_fft.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/real_fft.c new file mode 100644 index 00000000..92daae4d --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/real_fft.c @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/signal_processing/include/real_fft.h" + +#include <stdlib.h> + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +struct RealFFT { + int order; +}; + +struct RealFFT* WebRtcSpl_CreateRealFFT(int order) { + struct RealFFT* self = NULL; + + if (order > kMaxFFTOrder || order < 0) { + return NULL; + } + + self = malloc(sizeof(struct RealFFT)); + if (self == NULL) { + return NULL; + } + self->order = order; + + return self; +} + +void WebRtcSpl_FreeRealFFT(struct RealFFT* self) { + if (self != NULL) { + free(self); + } +} + +// The C version FFT functions (i.e. WebRtcSpl_RealForwardFFT and +// WebRtcSpl_RealInverseFFT) are real-valued FFT wrappers for complex-valued +// FFT implementation in SPL. + +int WebRtcSpl_RealForwardFFT(struct RealFFT* self, + const int16_t* real_data_in, + int16_t* complex_data_out) { + int i = 0; + int j = 0; + int result = 0; + int n = 1 << self->order; + // The complex-value FFT implementation needs a buffer to hold 2^order + // 16-bit COMPLEX numbers, for both time and frequency data. + int16_t complex_buffer[2 << kMaxFFTOrder]; + + // Insert zeros to the imaginary parts for complex forward FFT input. + for (i = 0, j = 0; i < n; i += 1, j += 2) { + complex_buffer[j] = real_data_in[i]; + complex_buffer[j + 1] = 0; + }; + + WebRtcSpl_ComplexBitReverse(complex_buffer, self->order); + result = WebRtcSpl_ComplexFFT(complex_buffer, self->order, 1); + + // For real FFT output, use only the first N + 2 elements from + // complex forward FFT. + memcpy(complex_data_out, complex_buffer, sizeof(int16_t) * (n + 2)); + + return result; +} + +int WebRtcSpl_RealInverseFFT(struct RealFFT* self, + const int16_t* complex_data_in, + int16_t* real_data_out) { + int i = 0; + int j = 0; + int result = 0; + int n = 1 << self->order; + // Create the buffer specific to complex-valued FFT implementation. + int16_t complex_buffer[2 << kMaxFFTOrder]; + + // For n-point FFT, first copy the first n + 2 elements into complex + // FFT, then construct the remaining n - 2 elements by real FFT's + // conjugate-symmetric properties. + memcpy(complex_buffer, complex_data_in, sizeof(int16_t) * (n + 2)); + for (i = n + 2; i < 2 * n; i += 2) { + complex_buffer[i] = complex_data_in[2 * n - i]; + complex_buffer[i + 1] = -complex_data_in[2 * n - i + 1]; + } + + WebRtcSpl_ComplexBitReverse(complex_buffer, self->order); + result = WebRtcSpl_ComplexIFFT(complex_buffer, self->order, 1); + + // Strip out the imaginary parts of the complex inverse FFT output. + for (i = 0, j = 0; i < n; i += 1, j += 2) { + real_data_out[i] = complex_buffer[j]; + } + + return result; +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/real_fft_unittest.cc b/third_party/webrtc/src/webrtc/common_audio/signal_processing/real_fft_unittest.cc new file mode 100644 index 00000000..9bd35cd6 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/real_fft_unittest.cc @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/signal_processing/include/real_fft.h" +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/test/testsupport/gtest_disable.h" +#include "webrtc/typedefs.h" + +#include "testing/gtest/include/gtest/gtest.h" + +namespace webrtc { +namespace { + +// FFT order. +const int kOrder = 5; +// Lengths for real FFT's time and frequency bufffers. +// For N-point FFT, the length requirements from API are N and N+2 respectively. +const int kTimeDataLength = 1 << kOrder; +const int kFreqDataLength = (1 << kOrder) + 2; +// For complex FFT's time and freq buffer. The implementation requires +// 2*N 16-bit words. +const int kComplexFftDataLength = 2 << kOrder; +// Reference data for time signal. +const int16_t kRefData[kTimeDataLength] = { + 11739, 6848, -8688, 31980, -30295, 25242, 27085, 19410, + -26299, 15607, -10791, 11778, -23819, 14498, -25772, 10076, + 1173, 6848, -8688, 31980, -30295, 2522, 27085, 19410, + -2629, 5607, -3, 1178, -23819, 1498, -25772, 10076 +}; + +class RealFFTTest : public ::testing::Test { + protected: + RealFFTTest() { + WebRtcSpl_Init(); + } +}; + +TEST_F(RealFFTTest, CreateFailsOnBadInput) { + RealFFT* fft = WebRtcSpl_CreateRealFFT(11); + EXPECT_TRUE(fft == NULL); + fft = WebRtcSpl_CreateRealFFT(-1); + EXPECT_TRUE(fft == NULL); +} + +TEST_F(RealFFTTest, RealAndComplexMatch) { + int i = 0; + int j = 0; + int16_t real_fft_time[kTimeDataLength] = {0}; + int16_t real_fft_freq[kFreqDataLength] = {0}; + // One common buffer for complex FFT's time and frequency data. + int16_t complex_fft_buff[kComplexFftDataLength] = {0}; + + // Prepare the inputs to forward FFT's. + memcpy(real_fft_time, kRefData, sizeof(kRefData)); + for (i = 0, j = 0; i < kTimeDataLength; i += 1, j += 2) { + complex_fft_buff[j] = kRefData[i]; + complex_fft_buff[j + 1] = 0; // Insert zero's to imaginary parts. + }; + + // Create and run real forward FFT. + RealFFT* fft = WebRtcSpl_CreateRealFFT(kOrder); + EXPECT_TRUE(fft != NULL); + EXPECT_EQ(0, WebRtcSpl_RealForwardFFT(fft, real_fft_time, real_fft_freq)); + + // Run complex forward FFT. + WebRtcSpl_ComplexBitReverse(complex_fft_buff, kOrder); + EXPECT_EQ(0, WebRtcSpl_ComplexFFT(complex_fft_buff, kOrder, 1)); + + // Verify the results between complex and real forward FFT. + for (i = 0; i < kFreqDataLength; i++) { + EXPECT_EQ(real_fft_freq[i], complex_fft_buff[i]); + } + + // Prepare the inputs to inverse real FFT. + // We use whatever data in complex_fft_buff[] since we don't care + // about data contents. Only kFreqDataLength 16-bit words are copied + // from complex_fft_buff to real_fft_freq since remaining words (2nd half) + // are conjugate-symmetric to the first half in theory. + memcpy(real_fft_freq, complex_fft_buff, sizeof(real_fft_freq)); + + // Run real inverse FFT. + int real_scale = WebRtcSpl_RealInverseFFT(fft, real_fft_freq, real_fft_time); + EXPECT_GE(real_scale, 0); + + // Run complex inverse FFT. + WebRtcSpl_ComplexBitReverse(complex_fft_buff, kOrder); + int complex_scale = WebRtcSpl_ComplexIFFT(complex_fft_buff, kOrder, 1); + + // Verify the results between complex and real inverse FFT. + // They are not bit-exact, since complex IFFT doesn't produce + // exactly conjugate-symmetric data (between first and second half). + EXPECT_EQ(real_scale, complex_scale); + for (i = 0, j = 0; i < kTimeDataLength; i += 1, j += 2) { + EXPECT_LE(abs(real_fft_time[i] - complex_fft_buff[j]), 1); + } + + WebRtcSpl_FreeRealFFT(fft); +} + +} // namespace +} // namespace webrtc diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/refl_coef_to_lpc.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/refl_coef_to_lpc.c new file mode 100644 index 00000000..06a29b66 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/refl_coef_to_lpc.c @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the function WebRtcSpl_ReflCoefToLpc(). + * The description header can be found in signal_processing_library.h + * + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +void WebRtcSpl_ReflCoefToLpc(const int16_t *k, int use_order, int16_t *a) +{ + int16_t any[WEBRTC_SPL_MAX_LPC_ORDER + 1]; + int16_t *aptr, *aptr2, *anyptr; + const int16_t *kptr; + int m, i; + + kptr = k; + *a = 4096; // i.e., (Word16_MAX >> 3)+1. + *any = *a; + a[1] = *k >> 3; + + for (m = 1; m < use_order; m++) + { + kptr++; + aptr = a; + aptr++; + aptr2 = &a[m]; + anyptr = any; + anyptr++; + + any[m + 1] = *kptr >> 3; + for (i = 0; i < m; i++) + { + *anyptr = *aptr + (int16_t)((*aptr2 * *kptr) >> 15); + anyptr++; + aptr++; + aptr2--; + } + + aptr = a; + anyptr = any; + for (i = 0; i < (m + 2); i++) + { + *aptr = *anyptr; + aptr++; + anyptr++; + } + } +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample.c new file mode 100644 index 00000000..45fe52aa --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample.c @@ -0,0 +1,505 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the resampling functions for 22 kHz. + * The description header can be found in signal_processing_library.h + * + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/common_audio/signal_processing/resample_by_2_internal.h" + +// Declaration of internally used functions +static void WebRtcSpl_32khzTo22khzIntToShort(const int32_t *In, int16_t *Out, + int32_t K); + +void WebRtcSpl_32khzTo22khzIntToInt(const int32_t *In, int32_t *Out, + int32_t K); + +// interpolation coefficients +static const int16_t kCoefficients32To22[5][9] = { + {127, -712, 2359, -6333, 23456, 16775, -3695, 945, -154}, + {-39, 230, -830, 2785, 32366, -2324, 760, -218, 38}, + {117, -663, 2222, -6133, 26634, 13070, -3174, 831, -137}, + {-77, 457, -1677, 5958, 31175, -4136, 1405, -408, 71}, + { 98, -560, 1900, -5406, 29240, 9423, -2480, 663, -110} +}; + +////////////////////// +// 22 kHz -> 16 kHz // +////////////////////// + +// number of subblocks; options: 1, 2, 4, 5, 10 +#define SUB_BLOCKS_22_16 5 + +// 22 -> 16 resampler +void WebRtcSpl_Resample22khzTo16khz(const int16_t* in, int16_t* out, + WebRtcSpl_State22khzTo16khz* state, int32_t* tmpmem) +{ + int k; + + // process two blocks of 10/SUB_BLOCKS_22_16 ms (to reduce temp buffer size) + for (k = 0; k < SUB_BLOCKS_22_16; k++) + { + ///// 22 --> 44 ///// + // int16_t in[220/SUB_BLOCKS_22_16] + // int32_t out[440/SUB_BLOCKS_22_16] + ///// + WebRtcSpl_UpBy2ShortToInt(in, 220 / SUB_BLOCKS_22_16, tmpmem + 16, state->S_22_44); + + ///// 44 --> 32 ///// + // int32_t in[440/SUB_BLOCKS_22_16] + // int32_t out[320/SUB_BLOCKS_22_16] + ///// + // copy state to and from input array + tmpmem[8] = state->S_44_32[0]; + tmpmem[9] = state->S_44_32[1]; + tmpmem[10] = state->S_44_32[2]; + tmpmem[11] = state->S_44_32[3]; + tmpmem[12] = state->S_44_32[4]; + tmpmem[13] = state->S_44_32[5]; + tmpmem[14] = state->S_44_32[6]; + tmpmem[15] = state->S_44_32[7]; + state->S_44_32[0] = tmpmem[440 / SUB_BLOCKS_22_16 + 8]; + state->S_44_32[1] = tmpmem[440 / SUB_BLOCKS_22_16 + 9]; + state->S_44_32[2] = tmpmem[440 / SUB_BLOCKS_22_16 + 10]; + state->S_44_32[3] = tmpmem[440 / SUB_BLOCKS_22_16 + 11]; + state->S_44_32[4] = tmpmem[440 / SUB_BLOCKS_22_16 + 12]; + state->S_44_32[5] = tmpmem[440 / SUB_BLOCKS_22_16 + 13]; + state->S_44_32[6] = tmpmem[440 / SUB_BLOCKS_22_16 + 14]; + state->S_44_32[7] = tmpmem[440 / SUB_BLOCKS_22_16 + 15]; + + WebRtcSpl_Resample44khzTo32khz(tmpmem + 8, tmpmem, 40 / SUB_BLOCKS_22_16); + + ///// 32 --> 16 ///// + // int32_t in[320/SUB_BLOCKS_22_16] + // int32_t out[160/SUB_BLOCKS_22_16] + ///// + WebRtcSpl_DownBy2IntToShort(tmpmem, 320 / SUB_BLOCKS_22_16, out, state->S_32_16); + + // move input/output pointers 10/SUB_BLOCKS_22_16 ms seconds ahead + in += 220 / SUB_BLOCKS_22_16; + out += 160 / SUB_BLOCKS_22_16; + } +} + +// initialize state of 22 -> 16 resampler +void WebRtcSpl_ResetResample22khzTo16khz(WebRtcSpl_State22khzTo16khz* state) +{ + int k; + for (k = 0; k < 8; k++) + { + state->S_22_44[k] = 0; + state->S_44_32[k] = 0; + state->S_32_16[k] = 0; + } +} + +////////////////////// +// 16 kHz -> 22 kHz // +////////////////////// + +// number of subblocks; options: 1, 2, 4, 5, 10 +#define SUB_BLOCKS_16_22 4 + +// 16 -> 22 resampler +void WebRtcSpl_Resample16khzTo22khz(const int16_t* in, int16_t* out, + WebRtcSpl_State16khzTo22khz* state, int32_t* tmpmem) +{ + int k; + + // process two blocks of 10/SUB_BLOCKS_16_22 ms (to reduce temp buffer size) + for (k = 0; k < SUB_BLOCKS_16_22; k++) + { + ///// 16 --> 32 ///// + // int16_t in[160/SUB_BLOCKS_16_22] + // int32_t out[320/SUB_BLOCKS_16_22] + ///// + WebRtcSpl_UpBy2ShortToInt(in, 160 / SUB_BLOCKS_16_22, tmpmem + 8, state->S_16_32); + + ///// 32 --> 22 ///// + // int32_t in[320/SUB_BLOCKS_16_22] + // int32_t out[220/SUB_BLOCKS_16_22] + ///// + // copy state to and from input array + tmpmem[0] = state->S_32_22[0]; + tmpmem[1] = state->S_32_22[1]; + tmpmem[2] = state->S_32_22[2]; + tmpmem[3] = state->S_32_22[3]; + tmpmem[4] = state->S_32_22[4]; + tmpmem[5] = state->S_32_22[5]; + tmpmem[6] = state->S_32_22[6]; + tmpmem[7] = state->S_32_22[7]; + state->S_32_22[0] = tmpmem[320 / SUB_BLOCKS_16_22]; + state->S_32_22[1] = tmpmem[320 / SUB_BLOCKS_16_22 + 1]; + state->S_32_22[2] = tmpmem[320 / SUB_BLOCKS_16_22 + 2]; + state->S_32_22[3] = tmpmem[320 / SUB_BLOCKS_16_22 + 3]; + state->S_32_22[4] = tmpmem[320 / SUB_BLOCKS_16_22 + 4]; + state->S_32_22[5] = tmpmem[320 / SUB_BLOCKS_16_22 + 5]; + state->S_32_22[6] = tmpmem[320 / SUB_BLOCKS_16_22 + 6]; + state->S_32_22[7] = tmpmem[320 / SUB_BLOCKS_16_22 + 7]; + + WebRtcSpl_32khzTo22khzIntToShort(tmpmem, out, 20 / SUB_BLOCKS_16_22); + + // move input/output pointers 10/SUB_BLOCKS_16_22 ms seconds ahead + in += 160 / SUB_BLOCKS_16_22; + out += 220 / SUB_BLOCKS_16_22; + } +} + +// initialize state of 16 -> 22 resampler +void WebRtcSpl_ResetResample16khzTo22khz(WebRtcSpl_State16khzTo22khz* state) +{ + int k; + for (k = 0; k < 8; k++) + { + state->S_16_32[k] = 0; + state->S_32_22[k] = 0; + } +} + +////////////////////// +// 22 kHz -> 8 kHz // +////////////////////// + +// number of subblocks; options: 1, 2, 5, 10 +#define SUB_BLOCKS_22_8 2 + +// 22 -> 8 resampler +void WebRtcSpl_Resample22khzTo8khz(const int16_t* in, int16_t* out, + WebRtcSpl_State22khzTo8khz* state, int32_t* tmpmem) +{ + int k; + + // process two blocks of 10/SUB_BLOCKS_22_8 ms (to reduce temp buffer size) + for (k = 0; k < SUB_BLOCKS_22_8; k++) + { + ///// 22 --> 22 lowpass ///// + // int16_t in[220/SUB_BLOCKS_22_8] + // int32_t out[220/SUB_BLOCKS_22_8] + ///// + WebRtcSpl_LPBy2ShortToInt(in, 220 / SUB_BLOCKS_22_8, tmpmem + 16, state->S_22_22); + + ///// 22 --> 16 ///// + // int32_t in[220/SUB_BLOCKS_22_8] + // int32_t out[160/SUB_BLOCKS_22_8] + ///// + // copy state to and from input array + tmpmem[8] = state->S_22_16[0]; + tmpmem[9] = state->S_22_16[1]; + tmpmem[10] = state->S_22_16[2]; + tmpmem[11] = state->S_22_16[3]; + tmpmem[12] = state->S_22_16[4]; + tmpmem[13] = state->S_22_16[5]; + tmpmem[14] = state->S_22_16[6]; + tmpmem[15] = state->S_22_16[7]; + state->S_22_16[0] = tmpmem[220 / SUB_BLOCKS_22_8 + 8]; + state->S_22_16[1] = tmpmem[220 / SUB_BLOCKS_22_8 + 9]; + state->S_22_16[2] = tmpmem[220 / SUB_BLOCKS_22_8 + 10]; + state->S_22_16[3] = tmpmem[220 / SUB_BLOCKS_22_8 + 11]; + state->S_22_16[4] = tmpmem[220 / SUB_BLOCKS_22_8 + 12]; + state->S_22_16[5] = tmpmem[220 / SUB_BLOCKS_22_8 + 13]; + state->S_22_16[6] = tmpmem[220 / SUB_BLOCKS_22_8 + 14]; + state->S_22_16[7] = tmpmem[220 / SUB_BLOCKS_22_8 + 15]; + + WebRtcSpl_Resample44khzTo32khz(tmpmem + 8, tmpmem, 20 / SUB_BLOCKS_22_8); + + ///// 16 --> 8 ///// + // int32_t in[160/SUB_BLOCKS_22_8] + // int32_t out[80/SUB_BLOCKS_22_8] + ///// + WebRtcSpl_DownBy2IntToShort(tmpmem, 160 / SUB_BLOCKS_22_8, out, state->S_16_8); + + // move input/output pointers 10/SUB_BLOCKS_22_8 ms seconds ahead + in += 220 / SUB_BLOCKS_22_8; + out += 80 / SUB_BLOCKS_22_8; + } +} + +// initialize state of 22 -> 8 resampler +void WebRtcSpl_ResetResample22khzTo8khz(WebRtcSpl_State22khzTo8khz* state) +{ + int k; + for (k = 0; k < 8; k++) + { + state->S_22_22[k] = 0; + state->S_22_22[k + 8] = 0; + state->S_22_16[k] = 0; + state->S_16_8[k] = 0; + } +} + +////////////////////// +// 8 kHz -> 22 kHz // +////////////////////// + +// number of subblocks; options: 1, 2, 5, 10 +#define SUB_BLOCKS_8_22 2 + +// 8 -> 22 resampler +void WebRtcSpl_Resample8khzTo22khz(const int16_t* in, int16_t* out, + WebRtcSpl_State8khzTo22khz* state, int32_t* tmpmem) +{ + int k; + + // process two blocks of 10/SUB_BLOCKS_8_22 ms (to reduce temp buffer size) + for (k = 0; k < SUB_BLOCKS_8_22; k++) + { + ///// 8 --> 16 ///// + // int16_t in[80/SUB_BLOCKS_8_22] + // int32_t out[160/SUB_BLOCKS_8_22] + ///// + WebRtcSpl_UpBy2ShortToInt(in, 80 / SUB_BLOCKS_8_22, tmpmem + 18, state->S_8_16); + + ///// 16 --> 11 ///// + // int32_t in[160/SUB_BLOCKS_8_22] + // int32_t out[110/SUB_BLOCKS_8_22] + ///// + // copy state to and from input array + tmpmem[10] = state->S_16_11[0]; + tmpmem[11] = state->S_16_11[1]; + tmpmem[12] = state->S_16_11[2]; + tmpmem[13] = state->S_16_11[3]; + tmpmem[14] = state->S_16_11[4]; + tmpmem[15] = state->S_16_11[5]; + tmpmem[16] = state->S_16_11[6]; + tmpmem[17] = state->S_16_11[7]; + state->S_16_11[0] = tmpmem[160 / SUB_BLOCKS_8_22 + 10]; + state->S_16_11[1] = tmpmem[160 / SUB_BLOCKS_8_22 + 11]; + state->S_16_11[2] = tmpmem[160 / SUB_BLOCKS_8_22 + 12]; + state->S_16_11[3] = tmpmem[160 / SUB_BLOCKS_8_22 + 13]; + state->S_16_11[4] = tmpmem[160 / SUB_BLOCKS_8_22 + 14]; + state->S_16_11[5] = tmpmem[160 / SUB_BLOCKS_8_22 + 15]; + state->S_16_11[6] = tmpmem[160 / SUB_BLOCKS_8_22 + 16]; + state->S_16_11[7] = tmpmem[160 / SUB_BLOCKS_8_22 + 17]; + + WebRtcSpl_32khzTo22khzIntToInt(tmpmem + 10, tmpmem, 10 / SUB_BLOCKS_8_22); + + ///// 11 --> 22 ///// + // int32_t in[110/SUB_BLOCKS_8_22] + // int16_t out[220/SUB_BLOCKS_8_22] + ///// + WebRtcSpl_UpBy2IntToShort(tmpmem, 110 / SUB_BLOCKS_8_22, out, state->S_11_22); + + // move input/output pointers 10/SUB_BLOCKS_8_22 ms seconds ahead + in += 80 / SUB_BLOCKS_8_22; + out += 220 / SUB_BLOCKS_8_22; + } +} + +// initialize state of 8 -> 22 resampler +void WebRtcSpl_ResetResample8khzTo22khz(WebRtcSpl_State8khzTo22khz* state) +{ + int k; + for (k = 0; k < 8; k++) + { + state->S_8_16[k] = 0; + state->S_16_11[k] = 0; + state->S_11_22[k] = 0; + } +} + +// compute two inner-products and store them to output array +static void WebRtcSpl_DotProdIntToInt(const int32_t* in1, const int32_t* in2, + const int16_t* coef_ptr, int32_t* out1, + int32_t* out2) +{ + int32_t tmp1 = 16384; + int32_t tmp2 = 16384; + int16_t coef; + + coef = coef_ptr[0]; + tmp1 += coef * in1[0]; + tmp2 += coef * in2[-0]; + + coef = coef_ptr[1]; + tmp1 += coef * in1[1]; + tmp2 += coef * in2[-1]; + + coef = coef_ptr[2]; + tmp1 += coef * in1[2]; + tmp2 += coef * in2[-2]; + + coef = coef_ptr[3]; + tmp1 += coef * in1[3]; + tmp2 += coef * in2[-3]; + + coef = coef_ptr[4]; + tmp1 += coef * in1[4]; + tmp2 += coef * in2[-4]; + + coef = coef_ptr[5]; + tmp1 += coef * in1[5]; + tmp2 += coef * in2[-5]; + + coef = coef_ptr[6]; + tmp1 += coef * in1[6]; + tmp2 += coef * in2[-6]; + + coef = coef_ptr[7]; + tmp1 += coef * in1[7]; + tmp2 += coef * in2[-7]; + + coef = coef_ptr[8]; + *out1 = tmp1 + coef * in1[8]; + *out2 = tmp2 + coef * in2[-8]; +} + +// compute two inner-products and store them to output array +static void WebRtcSpl_DotProdIntToShort(const int32_t* in1, const int32_t* in2, + const int16_t* coef_ptr, int16_t* out1, + int16_t* out2) +{ + int32_t tmp1 = 16384; + int32_t tmp2 = 16384; + int16_t coef; + + coef = coef_ptr[0]; + tmp1 += coef * in1[0]; + tmp2 += coef * in2[-0]; + + coef = coef_ptr[1]; + tmp1 += coef * in1[1]; + tmp2 += coef * in2[-1]; + + coef = coef_ptr[2]; + tmp1 += coef * in1[2]; + tmp2 += coef * in2[-2]; + + coef = coef_ptr[3]; + tmp1 += coef * in1[3]; + tmp2 += coef * in2[-3]; + + coef = coef_ptr[4]; + tmp1 += coef * in1[4]; + tmp2 += coef * in2[-4]; + + coef = coef_ptr[5]; + tmp1 += coef * in1[5]; + tmp2 += coef * in2[-5]; + + coef = coef_ptr[6]; + tmp1 += coef * in1[6]; + tmp2 += coef * in2[-6]; + + coef = coef_ptr[7]; + tmp1 += coef * in1[7]; + tmp2 += coef * in2[-7]; + + coef = coef_ptr[8]; + tmp1 += coef * in1[8]; + tmp2 += coef * in2[-8]; + + // scale down, round and saturate + tmp1 >>= 15; + if (tmp1 > (int32_t)0x00007FFF) + tmp1 = 0x00007FFF; + if (tmp1 < (int32_t)0xFFFF8000) + tmp1 = 0xFFFF8000; + tmp2 >>= 15; + if (tmp2 > (int32_t)0x00007FFF) + tmp2 = 0x00007FFF; + if (tmp2 < (int32_t)0xFFFF8000) + tmp2 = 0xFFFF8000; + *out1 = (int16_t)tmp1; + *out2 = (int16_t)tmp2; +} + +// Resampling ratio: 11/16 +// input: int32_t (normalized, not saturated) :: size 16 * K +// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 11 * K +// K: Number of blocks + +void WebRtcSpl_32khzTo22khzIntToInt(const int32_t* In, + int32_t* Out, + int32_t K) +{ + ///////////////////////////////////////////////////////////// + // Filter operation: + // + // Perform resampling (16 input samples -> 11 output samples); + // process in sub blocks of size 16 samples. + int32_t m; + + for (m = 0; m < K; m++) + { + // first output sample + Out[0] = ((int32_t)In[3] << 15) + (1 << 14); + + // sum and accumulate filter coefficients and input samples + WebRtcSpl_DotProdIntToInt(&In[0], &In[22], kCoefficients32To22[0], &Out[1], &Out[10]); + + // sum and accumulate filter coefficients and input samples + WebRtcSpl_DotProdIntToInt(&In[2], &In[20], kCoefficients32To22[1], &Out[2], &Out[9]); + + // sum and accumulate filter coefficients and input samples + WebRtcSpl_DotProdIntToInt(&In[3], &In[19], kCoefficients32To22[2], &Out[3], &Out[8]); + + // sum and accumulate filter coefficients and input samples + WebRtcSpl_DotProdIntToInt(&In[5], &In[17], kCoefficients32To22[3], &Out[4], &Out[7]); + + // sum and accumulate filter coefficients and input samples + WebRtcSpl_DotProdIntToInt(&In[6], &In[16], kCoefficients32To22[4], &Out[5], &Out[6]); + + // update pointers + In += 16; + Out += 11; + } +} + +// Resampling ratio: 11/16 +// input: int32_t (normalized, not saturated) :: size 16 * K +// output: int16_t (saturated) :: size 11 * K +// K: Number of blocks + +void WebRtcSpl_32khzTo22khzIntToShort(const int32_t *In, + int16_t *Out, + int32_t K) +{ + ///////////////////////////////////////////////////////////// + // Filter operation: + // + // Perform resampling (16 input samples -> 11 output samples); + // process in sub blocks of size 16 samples. + int32_t tmp; + int32_t m; + + for (m = 0; m < K; m++) + { + // first output sample + tmp = In[3]; + if (tmp > (int32_t)0x00007FFF) + tmp = 0x00007FFF; + if (tmp < (int32_t)0xFFFF8000) + tmp = 0xFFFF8000; + Out[0] = (int16_t)tmp; + + // sum and accumulate filter coefficients and input samples + WebRtcSpl_DotProdIntToShort(&In[0], &In[22], kCoefficients32To22[0], &Out[1], &Out[10]); + + // sum and accumulate filter coefficients and input samples + WebRtcSpl_DotProdIntToShort(&In[2], &In[20], kCoefficients32To22[1], &Out[2], &Out[9]); + + // sum and accumulate filter coefficients and input samples + WebRtcSpl_DotProdIntToShort(&In[3], &In[19], kCoefficients32To22[2], &Out[3], &Out[8]); + + // sum and accumulate filter coefficients and input samples + WebRtcSpl_DotProdIntToShort(&In[5], &In[17], kCoefficients32To22[3], &Out[4], &Out[7]); + + // sum and accumulate filter coefficients and input samples + WebRtcSpl_DotProdIntToShort(&In[6], &In[16], kCoefficients32To22[4], &Out[5], &Out[6]); + + // update pointers + In += 16; + Out += 11; + } +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_48khz.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_48khz.c new file mode 100644 index 00000000..2220cc33 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_48khz.c @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains resampling functions between 48 kHz and nb/wb. + * The description header can be found in signal_processing_library.h + * + */ + +#include <string.h> +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/common_audio/signal_processing/resample_by_2_internal.h" + +//////////////////////////// +///// 48 kHz -> 16 kHz ///// +//////////////////////////// + +// 48 -> 16 resampler +void WebRtcSpl_Resample48khzTo16khz(const int16_t* in, int16_t* out, + WebRtcSpl_State48khzTo16khz* state, int32_t* tmpmem) +{ + ///// 48 --> 48(LP) ///// + // int16_t in[480] + // int32_t out[480] + ///// + WebRtcSpl_LPBy2ShortToInt(in, 480, tmpmem + 16, state->S_48_48); + + ///// 48 --> 32 ///// + // int32_t in[480] + // int32_t out[320] + ///// + // copy state to and from input array + memcpy(tmpmem + 8, state->S_48_32, 8 * sizeof(int32_t)); + memcpy(state->S_48_32, tmpmem + 488, 8 * sizeof(int32_t)); + WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 160); + + ///// 32 --> 16 ///// + // int32_t in[320] + // int16_t out[160] + ///// + WebRtcSpl_DownBy2IntToShort(tmpmem, 320, out, state->S_32_16); +} + +// initialize state of 48 -> 16 resampler +void WebRtcSpl_ResetResample48khzTo16khz(WebRtcSpl_State48khzTo16khz* state) +{ + memset(state->S_48_48, 0, 16 * sizeof(int32_t)); + memset(state->S_48_32, 0, 8 * sizeof(int32_t)); + memset(state->S_32_16, 0, 8 * sizeof(int32_t)); +} + +//////////////////////////// +///// 16 kHz -> 48 kHz ///// +//////////////////////////// + +// 16 -> 48 resampler +void WebRtcSpl_Resample16khzTo48khz(const int16_t* in, int16_t* out, + WebRtcSpl_State16khzTo48khz* state, int32_t* tmpmem) +{ + ///// 16 --> 32 ///// + // int16_t in[160] + // int32_t out[320] + ///// + WebRtcSpl_UpBy2ShortToInt(in, 160, tmpmem + 16, state->S_16_32); + + ///// 32 --> 24 ///// + // int32_t in[320] + // int32_t out[240] + // copy state to and from input array + ///// + memcpy(tmpmem + 8, state->S_32_24, 8 * sizeof(int32_t)); + memcpy(state->S_32_24, tmpmem + 328, 8 * sizeof(int32_t)); + WebRtcSpl_Resample32khzTo24khz(tmpmem + 8, tmpmem, 80); + + ///// 24 --> 48 ///// + // int32_t in[240] + // int16_t out[480] + ///// + WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48); +} + +// initialize state of 16 -> 48 resampler +void WebRtcSpl_ResetResample16khzTo48khz(WebRtcSpl_State16khzTo48khz* state) +{ + memset(state->S_16_32, 0, 8 * sizeof(int32_t)); + memset(state->S_32_24, 0, 8 * sizeof(int32_t)); + memset(state->S_24_48, 0, 8 * sizeof(int32_t)); +} + +//////////////////////////// +///// 48 kHz -> 8 kHz ///// +//////////////////////////// + +// 48 -> 8 resampler +void WebRtcSpl_Resample48khzTo8khz(const int16_t* in, int16_t* out, + WebRtcSpl_State48khzTo8khz* state, int32_t* tmpmem) +{ + ///// 48 --> 24 ///// + // int16_t in[480] + // int32_t out[240] + ///// + WebRtcSpl_DownBy2ShortToInt(in, 480, tmpmem + 256, state->S_48_24); + + ///// 24 --> 24(LP) ///// + // int32_t in[240] + // int32_t out[240] + ///// + WebRtcSpl_LPBy2IntToInt(tmpmem + 256, 240, tmpmem + 16, state->S_24_24); + + ///// 24 --> 16 ///// + // int32_t in[240] + // int32_t out[160] + ///// + // copy state to and from input array + memcpy(tmpmem + 8, state->S_24_16, 8 * sizeof(int32_t)); + memcpy(state->S_24_16, tmpmem + 248, 8 * sizeof(int32_t)); + WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 80); + + ///// 16 --> 8 ///// + // int32_t in[160] + // int16_t out[80] + ///// + WebRtcSpl_DownBy2IntToShort(tmpmem, 160, out, state->S_16_8); +} + +// initialize state of 48 -> 8 resampler +void WebRtcSpl_ResetResample48khzTo8khz(WebRtcSpl_State48khzTo8khz* state) +{ + memset(state->S_48_24, 0, 8 * sizeof(int32_t)); + memset(state->S_24_24, 0, 16 * sizeof(int32_t)); + memset(state->S_24_16, 0, 8 * sizeof(int32_t)); + memset(state->S_16_8, 0, 8 * sizeof(int32_t)); +} + +//////////////////////////// +///// 8 kHz -> 48 kHz ///// +//////////////////////////// + +// 8 -> 48 resampler +void WebRtcSpl_Resample8khzTo48khz(const int16_t* in, int16_t* out, + WebRtcSpl_State8khzTo48khz* state, int32_t* tmpmem) +{ + ///// 8 --> 16 ///// + // int16_t in[80] + // int32_t out[160] + ///// + WebRtcSpl_UpBy2ShortToInt(in, 80, tmpmem + 264, state->S_8_16); + + ///// 16 --> 12 ///// + // int32_t in[160] + // int32_t out[120] + ///// + // copy state to and from input array + memcpy(tmpmem + 256, state->S_16_12, 8 * sizeof(int32_t)); + memcpy(state->S_16_12, tmpmem + 416, 8 * sizeof(int32_t)); + WebRtcSpl_Resample32khzTo24khz(tmpmem + 256, tmpmem + 240, 40); + + ///// 12 --> 24 ///// + // int32_t in[120] + // int16_t out[240] + ///// + WebRtcSpl_UpBy2IntToInt(tmpmem + 240, 120, tmpmem, state->S_12_24); + + ///// 24 --> 48 ///// + // int32_t in[240] + // int16_t out[480] + ///// + WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48); +} + +// initialize state of 8 -> 48 resampler +void WebRtcSpl_ResetResample8khzTo48khz(WebRtcSpl_State8khzTo48khz* state) +{ + memset(state->S_8_16, 0, 8 * sizeof(int32_t)); + memset(state->S_16_12, 0, 8 * sizeof(int32_t)); + memset(state->S_12_24, 0, 8 * sizeof(int32_t)); + memset(state->S_24_48, 0, 8 * sizeof(int32_t)); +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2.c new file mode 100644 index 00000000..dcba82e3 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2.c @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the resampling by two functions. + * The description header can be found in signal_processing_library.h + * + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +#ifdef WEBRTC_ARCH_ARM_V7 + +// allpass filter coefficients. +static const uint32_t kResampleAllpass1[3] = {3284, 24441, 49528 << 15}; +static const uint32_t kResampleAllpass2[3] = + {12199, 37471 << 15, 60255 << 15}; + +// Multiply two 32-bit values and accumulate to another input value. +// Return: state + ((diff * tbl_value) >> 16) + +static __inline int32_t MUL_ACCUM_1(int32_t tbl_value, + int32_t diff, + int32_t state) { + int32_t result; + __asm __volatile ("smlawb %0, %1, %2, %3": "=r"(result): "r"(diff), + "r"(tbl_value), "r"(state)); + return result; +} + +// Multiply two 32-bit values and accumulate to another input value. +// Return: Return: state + (((diff << 1) * tbl_value) >> 32) +// +// The reason to introduce this function is that, in case we can't use smlawb +// instruction (in MUL_ACCUM_1) due to input value range, we can still use +// smmla to save some cycles. + +static __inline int32_t MUL_ACCUM_2(int32_t tbl_value, + int32_t diff, + int32_t state) { + int32_t result; + __asm __volatile ("smmla %0, %1, %2, %3": "=r"(result): "r"(diff << 1), + "r"(tbl_value), "r"(state)); + return result; +} + +#else + +// allpass filter coefficients. +static const uint16_t kResampleAllpass1[3] = {3284, 24441, 49528}; +static const uint16_t kResampleAllpass2[3] = {12199, 37471, 60255}; + +// Multiply a 32-bit value with a 16-bit value and accumulate to another input: +#define MUL_ACCUM_1(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c) +#define MUL_ACCUM_2(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c) + +#endif // WEBRTC_ARCH_ARM_V7 + + +// decimator +#if !defined(MIPS32_LE) +void WebRtcSpl_DownsampleBy2(const int16_t* in, size_t len, + int16_t* out, int32_t* filtState) { + int32_t tmp1, tmp2, diff, in32, out32; + size_t i; + + register int32_t state0 = filtState[0]; + register int32_t state1 = filtState[1]; + register int32_t state2 = filtState[2]; + register int32_t state3 = filtState[3]; + register int32_t state4 = filtState[4]; + register int32_t state5 = filtState[5]; + register int32_t state6 = filtState[6]; + register int32_t state7 = filtState[7]; + + for (i = (len >> 1); i > 0; i--) { + // lower allpass filter + in32 = (int32_t)(*in++) << 10; + diff = in32 - state1; + tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0); + state0 = in32; + diff = tmp1 - state2; + tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1); + state1 = tmp1; + diff = tmp2 - state3; + state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2); + state2 = tmp2; + + // upper allpass filter + in32 = (int32_t)(*in++) << 10; + diff = in32 - state5; + tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4); + state4 = in32; + diff = tmp1 - state6; + tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5); + state5 = tmp1; + diff = tmp2 - state7; + state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6); + state6 = tmp2; + + // add two allpass outputs, divide by two and round + out32 = (state3 + state7 + 1024) >> 11; + + // limit amplitude to prevent wrap-around, and write to output array + *out++ = WebRtcSpl_SatW32ToW16(out32); + } + + filtState[0] = state0; + filtState[1] = state1; + filtState[2] = state2; + filtState[3] = state3; + filtState[4] = state4; + filtState[5] = state5; + filtState[6] = state6; + filtState[7] = state7; +} +#endif // #if defined(MIPS32_LE) + + +void WebRtcSpl_UpsampleBy2(const int16_t* in, size_t len, + int16_t* out, int32_t* filtState) { + int32_t tmp1, tmp2, diff, in32, out32; + size_t i; + + register int32_t state0 = filtState[0]; + register int32_t state1 = filtState[1]; + register int32_t state2 = filtState[2]; + register int32_t state3 = filtState[3]; + register int32_t state4 = filtState[4]; + register int32_t state5 = filtState[5]; + register int32_t state6 = filtState[6]; + register int32_t state7 = filtState[7]; + + for (i = len; i > 0; i--) { + // lower allpass filter + in32 = (int32_t)(*in++) << 10; + diff = in32 - state1; + tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state0); + state0 = in32; + diff = tmp1 - state2; + tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state1); + state1 = tmp1; + diff = tmp2 - state3; + state3 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state2); + state2 = tmp2; + + // round; limit amplitude to prevent wrap-around; write to output array + out32 = (state3 + 512) >> 10; + *out++ = WebRtcSpl_SatW32ToW16(out32); + + // upper allpass filter + diff = in32 - state5; + tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state4); + state4 = in32; + diff = tmp1 - state6; + tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state5); + state5 = tmp1; + diff = tmp2 - state7; + state7 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state6); + state6 = tmp2; + + // round; limit amplitude to prevent wrap-around; write to output array + out32 = (state7 + 512) >> 10; + *out++ = WebRtcSpl_SatW32ToW16(out32); + } + + filtState[0] = state0; + filtState[1] = state1; + filtState[2] = state2; + filtState[3] = state3; + filtState[4] = state4; + filtState[5] = state5; + filtState[6] = state6; + filtState[7] = state7; +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_internal.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_internal.c new file mode 100644 index 00000000..085069c8 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_internal.c @@ -0,0 +1,679 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This header file contains some internal resampling functions. + * + */ + +#include "webrtc/common_audio/signal_processing/resample_by_2_internal.h" + +// allpass filter coefficients. +static const int16_t kResampleAllpass[2][3] = { + {821, 6110, 12382}, + {3050, 9368, 15063} +}; + +// +// decimator +// input: int32_t (shifted 15 positions to the left, + offset 16384) OVERWRITTEN! +// output: int16_t (saturated) (of length len/2) +// state: filter state array; length = 8 + +void WebRtcSpl_DownBy2IntToShort(int32_t *in, int32_t len, int16_t *out, + int32_t *state) +{ + int32_t tmp0, tmp1, diff; + int32_t i; + + len >>= 1; + + // lower allpass filter (operates on even input samples) + for (i = 0; i < len; i++) + { + tmp0 = in[i << 1]; + diff = tmp0 - state[1]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; + + // divide by two and store temporarily + in[i << 1] = (state[3] >> 1); + } + + in++; + + // upper allpass filter (operates on odd input samples) + for (i = 0; i < len; i++) + { + tmp0 = in[i << 1]; + diff = tmp0 - state[5]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; + + // divide by two and store temporarily + in[i << 1] = (state[7] >> 1); + } + + in--; + + // combine allpass outputs + for (i = 0; i < len; i += 2) + { + // divide by two, add both allpass outputs and round + tmp0 = (in[i << 1] + in[(i << 1) + 1]) >> 15; + tmp1 = (in[(i << 1) + 2] + in[(i << 1) + 3]) >> 15; + if (tmp0 > (int32_t)0x00007FFF) + tmp0 = 0x00007FFF; + if (tmp0 < (int32_t)0xFFFF8000) + tmp0 = 0xFFFF8000; + out[i] = (int16_t)tmp0; + if (tmp1 > (int32_t)0x00007FFF) + tmp1 = 0x00007FFF; + if (tmp1 < (int32_t)0xFFFF8000) + tmp1 = 0xFFFF8000; + out[i + 1] = (int16_t)tmp1; + } +} + +// +// decimator +// input: int16_t +// output: int32_t (shifted 15 positions to the left, + offset 16384) (of length len/2) +// state: filter state array; length = 8 + +void WebRtcSpl_DownBy2ShortToInt(const int16_t *in, + int32_t len, + int32_t *out, + int32_t *state) +{ + int32_t tmp0, tmp1, diff; + int32_t i; + + len >>= 1; + + // lower allpass filter (operates on even input samples) + for (i = 0; i < len; i++) + { + tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); + diff = tmp0 - state[1]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; + + // divide by two and store temporarily + out[i] = (state[3] >> 1); + } + + in++; + + // upper allpass filter (operates on odd input samples) + for (i = 0; i < len; i++) + { + tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); + diff = tmp0 - state[5]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; + + // divide by two and store temporarily + out[i] += (state[7] >> 1); + } + + in--; +} + +// +// interpolator +// input: int16_t +// output: int32_t (normalized, not saturated) (of length len*2) +// state: filter state array; length = 8 +void WebRtcSpl_UpBy2ShortToInt(const int16_t *in, int32_t len, int32_t *out, + int32_t *state) +{ + int32_t tmp0, tmp1, diff; + int32_t i; + + // upper allpass filter (generates odd output samples) + for (i = 0; i < len; i++) + { + tmp0 = ((int32_t)in[i] << 15) + (1 << 14); + diff = tmp0 - state[5]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; + + // scale down, round and store + out[i << 1] = state[7] >> 15; + } + + out++; + + // lower allpass filter (generates even output samples) + for (i = 0; i < len; i++) + { + tmp0 = ((int32_t)in[i] << 15) + (1 << 14); + diff = tmp0 - state[1]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; + + // scale down, round and store + out[i << 1] = state[3] >> 15; + } +} + +// +// interpolator +// input: int32_t (shifted 15 positions to the left, + offset 16384) +// output: int32_t (shifted 15 positions to the left, + offset 16384) (of length len*2) +// state: filter state array; length = 8 +void WebRtcSpl_UpBy2IntToInt(const int32_t *in, int32_t len, int32_t *out, + int32_t *state) +{ + int32_t tmp0, tmp1, diff; + int32_t i; + + // upper allpass filter (generates odd output samples) + for (i = 0; i < len; i++) + { + tmp0 = in[i]; + diff = tmp0 - state[5]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; + + // scale down, round and store + out[i << 1] = state[7]; + } + + out++; + + // lower allpass filter (generates even output samples) + for (i = 0; i < len; i++) + { + tmp0 = in[i]; + diff = tmp0 - state[1]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; + + // scale down, round and store + out[i << 1] = state[3]; + } +} + +// +// interpolator +// input: int32_t (shifted 15 positions to the left, + offset 16384) +// output: int16_t (saturated) (of length len*2) +// state: filter state array; length = 8 +void WebRtcSpl_UpBy2IntToShort(const int32_t *in, int32_t len, int16_t *out, + int32_t *state) +{ + int32_t tmp0, tmp1, diff; + int32_t i; + + // upper allpass filter (generates odd output samples) + for (i = 0; i < len; i++) + { + tmp0 = in[i]; + diff = tmp0 - state[5]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; + + // scale down, saturate and store + tmp1 = state[7] >> 15; + if (tmp1 > (int32_t)0x00007FFF) + tmp1 = 0x00007FFF; + if (tmp1 < (int32_t)0xFFFF8000) + tmp1 = 0xFFFF8000; + out[i << 1] = (int16_t)tmp1; + } + + out++; + + // lower allpass filter (generates even output samples) + for (i = 0; i < len; i++) + { + tmp0 = in[i]; + diff = tmp0 - state[1]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; + + // scale down, saturate and store + tmp1 = state[3] >> 15; + if (tmp1 > (int32_t)0x00007FFF) + tmp1 = 0x00007FFF; + if (tmp1 < (int32_t)0xFFFF8000) + tmp1 = 0xFFFF8000; + out[i << 1] = (int16_t)tmp1; + } +} + +// lowpass filter +// input: int16_t +// output: int32_t (normalized, not saturated) +// state: filter state array; length = 8 +void WebRtcSpl_LPBy2ShortToInt(const int16_t* in, int32_t len, int32_t* out, + int32_t* state) +{ + int32_t tmp0, tmp1, diff; + int32_t i; + + len >>= 1; + + // lower allpass filter: odd input -> even output samples + in++; + // initial state of polyphase delay element + tmp0 = state[12]; + for (i = 0; i < len; i++) + { + diff = tmp0 - state[1]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; + + // scale down, round and store + out[i << 1] = state[3] >> 1; + tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); + } + in--; + + // upper allpass filter: even input -> even output samples + for (i = 0; i < len; i++) + { + tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); + diff = tmp0 - state[5]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; + + // average the two allpass outputs, scale down and store + out[i << 1] = (out[i << 1] + (state[7] >> 1)) >> 15; + } + + // switch to odd output samples + out++; + + // lower allpass filter: even input -> odd output samples + for (i = 0; i < len; i++) + { + tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); + diff = tmp0 - state[9]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[8] + diff * kResampleAllpass[1][0]; + state[8] = tmp0; + diff = tmp1 - state[10]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[9] + diff * kResampleAllpass[1][1]; + state[9] = tmp1; + diff = tmp0 - state[11]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[11] = state[10] + diff * kResampleAllpass[1][2]; + state[10] = tmp0; + + // scale down, round and store + out[i << 1] = state[11] >> 1; + } + + // upper allpass filter: odd input -> odd output samples + in++; + for (i = 0; i < len; i++) + { + tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); + diff = tmp0 - state[13]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[12] + diff * kResampleAllpass[0][0]; + state[12] = tmp0; + diff = tmp1 - state[14]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[13] + diff * kResampleAllpass[0][1]; + state[13] = tmp1; + diff = tmp0 - state[15]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[15] = state[14] + diff * kResampleAllpass[0][2]; + state[14] = tmp0; + + // average the two allpass outputs, scale down and store + out[i << 1] = (out[i << 1] + (state[15] >> 1)) >> 15; + } +} + +// lowpass filter +// input: int32_t (shifted 15 positions to the left, + offset 16384) +// output: int32_t (normalized, not saturated) +// state: filter state array; length = 8 +void WebRtcSpl_LPBy2IntToInt(const int32_t* in, int32_t len, int32_t* out, + int32_t* state) +{ + int32_t tmp0, tmp1, diff; + int32_t i; + + len >>= 1; + + // lower allpass filter: odd input -> even output samples + in++; + // initial state of polyphase delay element + tmp0 = state[12]; + for (i = 0; i < len; i++) + { + diff = tmp0 - state[1]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; + + // scale down, round and store + out[i << 1] = state[3] >> 1; + tmp0 = in[i << 1]; + } + in--; + + // upper allpass filter: even input -> even output samples + for (i = 0; i < len; i++) + { + tmp0 = in[i << 1]; + diff = tmp0 - state[5]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; + + // average the two allpass outputs, scale down and store + out[i << 1] = (out[i << 1] + (state[7] >> 1)) >> 15; + } + + // switch to odd output samples + out++; + + // lower allpass filter: even input -> odd output samples + for (i = 0; i < len; i++) + { + tmp0 = in[i << 1]; + diff = tmp0 - state[9]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[8] + diff * kResampleAllpass[1][0]; + state[8] = tmp0; + diff = tmp1 - state[10]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[9] + diff * kResampleAllpass[1][1]; + state[9] = tmp1; + diff = tmp0 - state[11]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[11] = state[10] + diff * kResampleAllpass[1][2]; + state[10] = tmp0; + + // scale down, round and store + out[i << 1] = state[11] >> 1; + } + + // upper allpass filter: odd input -> odd output samples + in++; + for (i = 0; i < len; i++) + { + tmp0 = in[i << 1]; + diff = tmp0 - state[13]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[12] + diff * kResampleAllpass[0][0]; + state[12] = tmp0; + diff = tmp1 - state[14]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[13] + diff * kResampleAllpass[0][1]; + state[13] = tmp1; + diff = tmp0 - state[15]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[15] = state[14] + diff * kResampleAllpass[0][2]; + state[14] = tmp0; + + // average the two allpass outputs, scale down and store + out[i << 1] = (out[i << 1] + (state[15] >> 1)) >> 15; + } +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_internal.h b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_internal.h new file mode 100644 index 00000000..5c9533ee --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_internal.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This header file contains some internal resampling functions. + * + */ + +#ifndef WEBRTC_SPL_RESAMPLE_BY_2_INTERNAL_H_ +#define WEBRTC_SPL_RESAMPLE_BY_2_INTERNAL_H_ + +#include "webrtc/typedefs.h" + +/******************************************************************* + * resample_by_2_fast.c + * Functions for internal use in the other resample functions + ******************************************************************/ +void WebRtcSpl_DownBy2IntToShort(int32_t *in, int32_t len, int16_t *out, + int32_t *state); + +void WebRtcSpl_DownBy2ShortToInt(const int16_t *in, int32_t len, + int32_t *out, int32_t *state); + +void WebRtcSpl_UpBy2ShortToInt(const int16_t *in, int32_t len, + int32_t *out, int32_t *state); + +void WebRtcSpl_UpBy2IntToInt(const int32_t *in, int32_t len, int32_t *out, + int32_t *state); + +void WebRtcSpl_UpBy2IntToShort(const int32_t *in, int32_t len, + int16_t *out, int32_t *state); + +void WebRtcSpl_LPBy2ShortToInt(const int16_t* in, int32_t len, + int32_t* out, int32_t* state); + +void WebRtcSpl_LPBy2IntToInt(const int32_t* in, int32_t len, int32_t* out, + int32_t* state); + +#endif // WEBRTC_SPL_RESAMPLE_BY_2_INTERNAL_H_ diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_mips.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_mips.c new file mode 100644 index 00000000..ec5fc8b3 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_mips.c @@ -0,0 +1,290 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the resampling by two functions. + * The description header can be found in signal_processing_library.h + * + */ + +#if defined(MIPS32_LE) + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +// allpass filter coefficients. +static const uint16_t kResampleAllpass1[3] = {3284, 24441, 49528}; +static const uint16_t kResampleAllpass2[3] = {12199, 37471, 60255}; + +// Multiply a 32-bit value with a 16-bit value and accumulate to another input: +#define MUL_ACCUM_1(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c) +#define MUL_ACCUM_2(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c) + +// decimator +void WebRtcSpl_DownsampleBy2(const int16_t* in, + size_t len, + int16_t* out, + int32_t* filtState) { + int32_t out32; + size_t i, len1; + + register int32_t state0 = filtState[0]; + register int32_t state1 = filtState[1]; + register int32_t state2 = filtState[2]; + register int32_t state3 = filtState[3]; + register int32_t state4 = filtState[4]; + register int32_t state5 = filtState[5]; + register int32_t state6 = filtState[6]; + register int32_t state7 = filtState[7]; + +#if defined(MIPS_DSP_R2_LE) + int32_t k1Res0, k1Res1, k1Res2, k2Res0, k2Res1, k2Res2; + + k1Res0= 3284; + k1Res1= 24441; + k1Res2= 49528; + k2Res0= 12199; + k2Res1= 37471; + k2Res2= 60255; + len1 = (len >> 1); + + const int32_t* inw = (int32_t*)in; + int32_t tmp11, tmp12, tmp21, tmp22; + int32_t in322, in321; + int32_t diff1, diff2; + for (i = len1; i > 0; i--) { + __asm__ volatile ( + "lh %[in321], 0(%[inw]) \n\t" + "lh %[in322], 2(%[inw]) \n\t" + + "sll %[in321], %[in321], 10 \n\t" + "sll %[in322], %[in322], 10 \n\t" + + "addiu %[inw], %[inw], 4 \n\t" + + "subu %[diff1], %[in321], %[state1] \n\t" + "subu %[diff2], %[in322], %[state5] \n\t" + + : [in322] "=&r" (in322), [in321] "=&r" (in321), + [diff1] "=&r" (diff1), [diff2] "=r" (diff2), [inw] "+r" (inw) + : [state1] "r" (state1), [state5] "r" (state5) + : "memory" + ); + + __asm__ volatile ( + "mult $ac0, %[diff1], %[k2Res0] \n\t" + "mult $ac1, %[diff2], %[k1Res0] \n\t" + + "extr.w %[tmp11], $ac0, 16 \n\t" + "extr.w %[tmp12], $ac1, 16 \n\t" + + "addu %[tmp11], %[state0], %[tmp11] \n\t" + "addu %[tmp12], %[state4], %[tmp12] \n\t" + + "addiu %[state0], %[in321], 0 \n\t" + "addiu %[state4], %[in322], 0 \n\t" + + "subu %[diff1], %[tmp11], %[state2] \n\t" + "subu %[diff2], %[tmp12], %[state6] \n\t" + + "mult $ac0, %[diff1], %[k2Res1] \n\t" + "mult $ac1, %[diff2], %[k1Res1] \n\t" + + "extr.w %[tmp21], $ac0, 16 \n\t" + "extr.w %[tmp22], $ac1, 16 \n\t" + + "addu %[tmp21], %[state1], %[tmp21] \n\t" + "addu %[tmp22], %[state5], %[tmp22] \n\t" + + "addiu %[state1], %[tmp11], 0 \n\t" + "addiu %[state5], %[tmp12], 0 \n\t" + : [tmp22] "=r" (tmp22), [tmp21] "=&r" (tmp21), + [tmp11] "=&r" (tmp11), [state0] "+r" (state0), + [state1] "+r" (state1), + [state2] "+r" (state2), + [state4] "+r" (state4), [tmp12] "=&r" (tmp12), + [state6] "+r" (state6), [state5] "+r" (state5) + : [k1Res1] "r" (k1Res1), [k2Res1] "r" (k2Res1), [k2Res0] "r" (k2Res0), + [diff2] "r" (diff2), [diff1] "r" (diff1), [in322] "r" (in322), + [in321] "r" (in321), [k1Res0] "r" (k1Res0) + : "hi", "lo", "$ac1hi", "$ac1lo" + ); + + // upper allpass filter + __asm__ volatile ( + "subu %[diff1], %[tmp21], %[state3] \n\t" + "subu %[diff2], %[tmp22], %[state7] \n\t" + + "mult $ac0, %[diff1], %[k2Res2] \n\t" + "mult $ac1, %[diff2], %[k1Res2] \n\t" + "extr.w %[state3], $ac0, 16 \n\t" + "extr.w %[state7], $ac1, 16 \n\t" + "addu %[state3], %[state2], %[state3] \n\t" + "addu %[state7], %[state6], %[state7] \n\t" + + "addiu %[state2], %[tmp21], 0 \n\t" + "addiu %[state6], %[tmp22], 0 \n\t" + + // add two allpass outputs, divide by two and round + "addu %[out32], %[state3], %[state7] \n\t" + "addiu %[out32], %[out32], 1024 \n\t" + "sra %[out32], %[out32], 11 \n\t" + : [state3] "+r" (state3), [state6] "+r" (state6), + [state2] "+r" (state2), [diff2] "=&r" (diff2), + [out32] "=r" (out32), [diff1] "=&r" (diff1), [state7] "+r" (state7) + : [tmp22] "r" (tmp22), [tmp21] "r" (tmp21), + [k1Res2] "r" (k1Res2), [k2Res2] "r" (k2Res2) + : "hi", "lo", "$ac1hi", "$ac1lo" + ); + + // limit amplitude to prevent wrap-around, and write to output array + *out++ = WebRtcSpl_SatW32ToW16(out32); + } +#else // #if defined(MIPS_DSP_R2_LE) + int32_t tmp1, tmp2, diff; + int32_t in32; + len1 = (len >> 1)/4; + for (i = len1; i > 0; i--) { + // lower allpass filter + in32 = (int32_t)(*in++) << 10; + diff = in32 - state1; + tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0); + state0 = in32; + diff = tmp1 - state2; + tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1); + state1 = tmp1; + diff = tmp2 - state3; + state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2); + state2 = tmp2; + + // upper allpass filter + in32 = (int32_t)(*in++) << 10; + diff = in32 - state5; + tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4); + state4 = in32; + diff = tmp1 - state6; + tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5); + state5 = tmp1; + diff = tmp2 - state7; + state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6); + state6 = tmp2; + + // add two allpass outputs, divide by two and round + out32 = (state3 + state7 + 1024) >> 11; + + // limit amplitude to prevent wrap-around, and write to output array + *out++ = WebRtcSpl_SatW32ToW16(out32); + // lower allpass filter + in32 = (int32_t)(*in++) << 10; + diff = in32 - state1; + tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0); + state0 = in32; + diff = tmp1 - state2; + tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1); + state1 = tmp1; + diff = tmp2 - state3; + state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2); + state2 = tmp2; + + // upper allpass filter + in32 = (int32_t)(*in++) << 10; + diff = in32 - state5; + tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4); + state4 = in32; + diff = tmp1 - state6; + tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5); + state5 = tmp1; + diff = tmp2 - state7; + state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6); + state6 = tmp2; + + // add two allpass outputs, divide by two and round + out32 = (state3 + state7 + 1024) >> 11; + + // limit amplitude to prevent wrap-around, and write to output array + *out++ = WebRtcSpl_SatW32ToW16(out32); + // lower allpass filter + in32 = (int32_t)(*in++) << 10; + diff = in32 - state1; + tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0); + state0 = in32; + diff = tmp1 - state2; + tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1); + state1 = tmp1; + diff = tmp2 - state3; + state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2); + state2 = tmp2; + + // upper allpass filter + in32 = (int32_t)(*in++) << 10; + diff = in32 - state5; + tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4); + state4 = in32; + diff = tmp1 - state6; + tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5); + state5 = tmp1; + diff = tmp2 - state7; + state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6); + state6 = tmp2; + + // add two allpass outputs, divide by two and round + out32 = (state3 + state7 + 1024) >> 11; + + // limit amplitude to prevent wrap-around, and write to output array + *out++ = WebRtcSpl_SatW32ToW16(out32); + // lower allpass filter + in32 = (int32_t)(*in++) << 10; + diff = in32 - state1; + tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0); + state0 = in32; + diff = tmp1 - state2; + tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1); + state1 = tmp1; + diff = tmp2 - state3; + state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2); + state2 = tmp2; + + // upper allpass filter + in32 = (int32_t)(*in++) << 10; + diff = in32 - state5; + tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4); + state4 = in32; + diff = tmp1 - state6; + tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5); + state5 = tmp1; + diff = tmp2 - state7; + state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6); + state6 = tmp2; + + // add two allpass outputs, divide by two and round + out32 = (state3 + state7 + 1024) >> 11; + + // limit amplitude to prevent wrap-around, and write to output array + *out++ = WebRtcSpl_SatW32ToW16(out32); + } +#endif // #if defined(MIPS_DSP_R2_LE) + __asm__ volatile ( + "sw %[state0], 0(%[filtState]) \n\t" + "sw %[state1], 4(%[filtState]) \n\t" + "sw %[state2], 8(%[filtState]) \n\t" + "sw %[state3], 12(%[filtState]) \n\t" + "sw %[state4], 16(%[filtState]) \n\t" + "sw %[state5], 20(%[filtState]) \n\t" + "sw %[state6], 24(%[filtState]) \n\t" + "sw %[state7], 28(%[filtState]) \n\t" + : + : [state0] "r" (state0), [state1] "r" (state1), [state2] "r" (state2), + [state3] "r" (state3), [state4] "r" (state4), [state5] "r" (state5), + [state6] "r" (state6), [state7] "r" (state7), [filtState] "r" (filtState) + : "memory" + ); +} + +#endif // #if defined(MIPS32_LE) diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_fractional.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_fractional.c new file mode 100644 index 00000000..6409fbac --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_fractional.c @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the resampling functions between 48, 44, 32 and 24 kHz. + * The description headers can be found in signal_processing_library.h + * + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +// interpolation coefficients +static const int16_t kCoefficients48To32[2][8] = { + {778, -2050, 1087, 23285, 12903, -3783, 441, 222}, + {222, 441, -3783, 12903, 23285, 1087, -2050, 778} +}; + +static const int16_t kCoefficients32To24[3][8] = { + {767, -2362, 2434, 24406, 10620, -3838, 721, 90}, + {386, -381, -2646, 19062, 19062, -2646, -381, 386}, + {90, 721, -3838, 10620, 24406, 2434, -2362, 767} +}; + +static const int16_t kCoefficients44To32[4][9] = { + {117, -669, 2245, -6183, 26267, 13529, -3245, 845, -138}, + {-101, 612, -2283, 8532, 29790, -5138, 1789, -524, 91}, + {50, -292, 1016, -3064, 32010, 3933, -1147, 315, -53}, + {-156, 974, -3863, 18603, 21691, -6246, 2353, -712, 126} +}; + +// Resampling ratio: 2/3 +// input: int32_t (normalized, not saturated) :: size 3 * K +// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 2 * K +// K: number of blocks + +void WebRtcSpl_Resample48khzTo32khz(const int32_t *In, int32_t *Out, size_t K) +{ + ///////////////////////////////////////////////////////////// + // Filter operation: + // + // Perform resampling (3 input samples -> 2 output samples); + // process in sub blocks of size 3 samples. + int32_t tmp; + size_t m; + + for (m = 0; m < K; m++) + { + tmp = 1 << 14; + tmp += kCoefficients48To32[0][0] * In[0]; + tmp += kCoefficients48To32[0][1] * In[1]; + tmp += kCoefficients48To32[0][2] * In[2]; + tmp += kCoefficients48To32[0][3] * In[3]; + tmp += kCoefficients48To32[0][4] * In[4]; + tmp += kCoefficients48To32[0][5] * In[5]; + tmp += kCoefficients48To32[0][6] * In[6]; + tmp += kCoefficients48To32[0][7] * In[7]; + Out[0] = tmp; + + tmp = 1 << 14; + tmp += kCoefficients48To32[1][0] * In[1]; + tmp += kCoefficients48To32[1][1] * In[2]; + tmp += kCoefficients48To32[1][2] * In[3]; + tmp += kCoefficients48To32[1][3] * In[4]; + tmp += kCoefficients48To32[1][4] * In[5]; + tmp += kCoefficients48To32[1][5] * In[6]; + tmp += kCoefficients48To32[1][6] * In[7]; + tmp += kCoefficients48To32[1][7] * In[8]; + Out[1] = tmp; + + // update pointers + In += 3; + Out += 2; + } +} + +// Resampling ratio: 3/4 +// input: int32_t (normalized, not saturated) :: size 4 * K +// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 3 * K +// K: number of blocks + +void WebRtcSpl_Resample32khzTo24khz(const int32_t *In, int32_t *Out, size_t K) +{ + ///////////////////////////////////////////////////////////// + // Filter operation: + // + // Perform resampling (4 input samples -> 3 output samples); + // process in sub blocks of size 4 samples. + size_t m; + int32_t tmp; + + for (m = 0; m < K; m++) + { + tmp = 1 << 14; + tmp += kCoefficients32To24[0][0] * In[0]; + tmp += kCoefficients32To24[0][1] * In[1]; + tmp += kCoefficients32To24[0][2] * In[2]; + tmp += kCoefficients32To24[0][3] * In[3]; + tmp += kCoefficients32To24[0][4] * In[4]; + tmp += kCoefficients32To24[0][5] * In[5]; + tmp += kCoefficients32To24[0][6] * In[6]; + tmp += kCoefficients32To24[0][7] * In[7]; + Out[0] = tmp; + + tmp = 1 << 14; + tmp += kCoefficients32To24[1][0] * In[1]; + tmp += kCoefficients32To24[1][1] * In[2]; + tmp += kCoefficients32To24[1][2] * In[3]; + tmp += kCoefficients32To24[1][3] * In[4]; + tmp += kCoefficients32To24[1][4] * In[5]; + tmp += kCoefficients32To24[1][5] * In[6]; + tmp += kCoefficients32To24[1][6] * In[7]; + tmp += kCoefficients32To24[1][7] * In[8]; + Out[1] = tmp; + + tmp = 1 << 14; + tmp += kCoefficients32To24[2][0] * In[2]; + tmp += kCoefficients32To24[2][1] * In[3]; + tmp += kCoefficients32To24[2][2] * In[4]; + tmp += kCoefficients32To24[2][3] * In[5]; + tmp += kCoefficients32To24[2][4] * In[6]; + tmp += kCoefficients32To24[2][5] * In[7]; + tmp += kCoefficients32To24[2][6] * In[8]; + tmp += kCoefficients32To24[2][7] * In[9]; + Out[2] = tmp; + + // update pointers + In += 4; + Out += 3; + } +} + +// +// fractional resampling filters +// Fout = 11/16 * Fin +// Fout = 8/11 * Fin +// + +// compute two inner-products and store them to output array +static void WebRtcSpl_ResampDotProduct(const int32_t *in1, const int32_t *in2, + const int16_t *coef_ptr, int32_t *out1, + int32_t *out2) +{ + int32_t tmp1 = 16384; + int32_t tmp2 = 16384; + int16_t coef; + + coef = coef_ptr[0]; + tmp1 += coef * in1[0]; + tmp2 += coef * in2[-0]; + + coef = coef_ptr[1]; + tmp1 += coef * in1[1]; + tmp2 += coef * in2[-1]; + + coef = coef_ptr[2]; + tmp1 += coef * in1[2]; + tmp2 += coef * in2[-2]; + + coef = coef_ptr[3]; + tmp1 += coef * in1[3]; + tmp2 += coef * in2[-3]; + + coef = coef_ptr[4]; + tmp1 += coef * in1[4]; + tmp2 += coef * in2[-4]; + + coef = coef_ptr[5]; + tmp1 += coef * in1[5]; + tmp2 += coef * in2[-5]; + + coef = coef_ptr[6]; + tmp1 += coef * in1[6]; + tmp2 += coef * in2[-6]; + + coef = coef_ptr[7]; + tmp1 += coef * in1[7]; + tmp2 += coef * in2[-7]; + + coef = coef_ptr[8]; + *out1 = tmp1 + coef * in1[8]; + *out2 = tmp2 + coef * in2[-8]; +} + +// Resampling ratio: 8/11 +// input: int32_t (normalized, not saturated) :: size 11 * K +// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 8 * K +// K: number of blocks + +void WebRtcSpl_Resample44khzTo32khz(const int32_t *In, int32_t *Out, size_t K) +{ + ///////////////////////////////////////////////////////////// + // Filter operation: + // + // Perform resampling (11 input samples -> 8 output samples); + // process in sub blocks of size 11 samples. + int32_t tmp; + size_t m; + + for (m = 0; m < K; m++) + { + tmp = 1 << 14; + + // first output sample + Out[0] = ((int32_t)In[3] << 15) + tmp; + + // sum and accumulate filter coefficients and input samples + tmp += kCoefficients44To32[3][0] * In[5]; + tmp += kCoefficients44To32[3][1] * In[6]; + tmp += kCoefficients44To32[3][2] * In[7]; + tmp += kCoefficients44To32[3][3] * In[8]; + tmp += kCoefficients44To32[3][4] * In[9]; + tmp += kCoefficients44To32[3][5] * In[10]; + tmp += kCoefficients44To32[3][6] * In[11]; + tmp += kCoefficients44To32[3][7] * In[12]; + tmp += kCoefficients44To32[3][8] * In[13]; + Out[4] = tmp; + + // sum and accumulate filter coefficients and input samples + WebRtcSpl_ResampDotProduct(&In[0], &In[17], kCoefficients44To32[0], &Out[1], &Out[7]); + + // sum and accumulate filter coefficients and input samples + WebRtcSpl_ResampDotProduct(&In[2], &In[15], kCoefficients44To32[1], &Out[2], &Out[6]); + + // sum and accumulate filter coefficients and input samples + WebRtcSpl_ResampDotProduct(&In[3], &In[14], kCoefficients44To32[2], &Out[3], &Out[5]); + + // update pointers + In += 11; + Out += 8; + } +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/signal_processing_unittest.cc b/third_party/webrtc/src/webrtc/common_audio/signal_processing/signal_processing_unittest.cc new file mode 100644 index 00000000..108f459c --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/signal_processing_unittest.cc @@ -0,0 +1,579 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "testing/gtest/include/gtest/gtest.h" +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +static const size_t kVector16Size = 9; +static const int16_t vector16[kVector16Size] = {1, -15511, 4323, 1963, + WEBRTC_SPL_WORD16_MAX, 0, WEBRTC_SPL_WORD16_MIN + 5, -3333, 345}; + +class SplTest : public testing::Test { + protected: + SplTest() { + WebRtcSpl_Init(); + } + virtual ~SplTest() { + } +}; + +TEST_F(SplTest, MacroTest) { + // Macros with inputs. + int A = 10; + int B = 21; + int a = -3; + int b = WEBRTC_SPL_WORD32_MAX; + + EXPECT_EQ(10, WEBRTC_SPL_MIN(A, B)); + EXPECT_EQ(21, WEBRTC_SPL_MAX(A, B)); + + EXPECT_EQ(3, WEBRTC_SPL_ABS_W16(a)); + EXPECT_EQ(3, WEBRTC_SPL_ABS_W32(a)); + + EXPECT_EQ(-63, WEBRTC_SPL_MUL(a, B)); + EXPECT_EQ(-2147483645, WEBRTC_SPL_MUL(a, b)); + EXPECT_EQ(2147483651u, WEBRTC_SPL_UMUL(a, b)); + b = WEBRTC_SPL_WORD16_MAX >> 1; + EXPECT_EQ(4294918147u, WEBRTC_SPL_UMUL_32_16(a, b)); + EXPECT_EQ(-49149, WEBRTC_SPL_MUL_16_U16(a, b)); + + a = b; + b = -3; + + EXPECT_EQ(-1, WEBRTC_SPL_MUL_16_32_RSFT16(a, b)); + EXPECT_EQ(-1, WEBRTC_SPL_MUL_16_32_RSFT15(a, b)); + EXPECT_EQ(-3, WEBRTC_SPL_MUL_16_32_RSFT14(a, b)); + EXPECT_EQ(-24, WEBRTC_SPL_MUL_16_32_RSFT11(a, b)); + + EXPECT_EQ(-12288, WEBRTC_SPL_MUL_16_16_RSFT(a, b, 2)); + EXPECT_EQ(-12287, WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(a, b, 2)); + + EXPECT_EQ(21, WEBRTC_SPL_SAT(a, A, B)); + EXPECT_EQ(21, WEBRTC_SPL_SAT(a, B, A)); + + // Shifting with negative numbers allowed + int shift_amount = 1; // Workaround compiler warning using variable here. + // Positive means left shift + EXPECT_EQ(32766, WEBRTC_SPL_SHIFT_W32(a, shift_amount)); + + // Shifting with negative numbers not allowed + // We cannot do casting here due to signed/unsigned problem + EXPECT_EQ(32766, WEBRTC_SPL_LSHIFT_W32(a, 1)); + + EXPECT_EQ(8191u, WEBRTC_SPL_RSHIFT_U32(a, 1)); + + EXPECT_EQ(1470, WEBRTC_SPL_RAND(A)); + + EXPECT_EQ(-49149, WEBRTC_SPL_MUL_16_16(a, b)); + EXPECT_EQ(1073676289, WEBRTC_SPL_MUL_16_16(WEBRTC_SPL_WORD16_MAX, + WEBRTC_SPL_WORD16_MAX)); + EXPECT_EQ(1073709055, WEBRTC_SPL_MUL_16_32_RSFT16(WEBRTC_SPL_WORD16_MAX, + WEBRTC_SPL_WORD32_MAX)); + EXPECT_EQ(1073741824, WEBRTC_SPL_MUL_16_32_RSFT16(WEBRTC_SPL_WORD16_MIN, + WEBRTC_SPL_WORD32_MIN)); +#ifdef WEBRTC_ARCH_ARM_V7 + EXPECT_EQ(-1073741824, + WEBRTC_SPL_MUL_16_32_RSFT16(WEBRTC_SPL_WORD16_MIN, + WEBRTC_SPL_WORD32_MAX)); +#else + EXPECT_EQ(-1073741823, + WEBRTC_SPL_MUL_16_32_RSFT16(WEBRTC_SPL_WORD16_MIN, + WEBRTC_SPL_WORD32_MAX)); +#endif +} + +TEST_F(SplTest, InlineTest) { + int16_t a16 = 121; + int16_t b16 = -17; + int32_t a32 = 111121; + int32_t b32 = -1711; + + EXPECT_EQ(17, WebRtcSpl_GetSizeInBits(a32)); + + EXPECT_EQ(0, WebRtcSpl_NormW32(0)); + EXPECT_EQ(31, WebRtcSpl_NormW32(-1)); + EXPECT_EQ(0, WebRtcSpl_NormW32(WEBRTC_SPL_WORD32_MIN)); + EXPECT_EQ(14, WebRtcSpl_NormW32(a32)); + + EXPECT_EQ(0, WebRtcSpl_NormW16(0)); + EXPECT_EQ(15, WebRtcSpl_NormW16(-1)); + EXPECT_EQ(0, WebRtcSpl_NormW16(WEBRTC_SPL_WORD16_MIN)); + EXPECT_EQ(4, WebRtcSpl_NormW16(b32)); + for (int ii = 0; ii < 15; ++ii) { + int16_t value = 1 << ii; + EXPECT_EQ(14 - ii, WebRtcSpl_NormW16(value)); + EXPECT_EQ(15 - ii, WebRtcSpl_NormW16(-value)); + } + + EXPECT_EQ(0, WebRtcSpl_NormU32(0u)); + EXPECT_EQ(0, WebRtcSpl_NormU32(0xffffffff)); + EXPECT_EQ(15, WebRtcSpl_NormU32(static_cast<uint32_t>(a32))); + + EXPECT_EQ(104, WebRtcSpl_AddSatW16(a16, b16)); + EXPECT_EQ(138, WebRtcSpl_SubSatW16(a16, b16)); + + EXPECT_EQ(109410, WebRtcSpl_AddSatW32(a32, b32)); + EXPECT_EQ(112832, WebRtcSpl_SubSatW32(a32, b32)); + + a32 = 0x80000000; + b32 = 0x80000000; + // Cast to signed int to avoid compiler complaint on gtest.h. + EXPECT_EQ(static_cast<int>(0x80000000), WebRtcSpl_AddSatW32(a32, b32)); + a32 = 0x7fffffff; + b32 = 0x7fffffff; + EXPECT_EQ(0x7fffffff, WebRtcSpl_AddSatW32(a32, b32)); + a32 = 0; + b32 = 0x80000000; + EXPECT_EQ(0x7fffffff, WebRtcSpl_SubSatW32(a32, b32)); + a32 = 0x7fffffff; + b32 = 0x80000000; + EXPECT_EQ(0x7fffffff, WebRtcSpl_SubSatW32(a32, b32)); + a32 = 0x80000000; + b32 = 0x7fffffff; + EXPECT_EQ(static_cast<int>(0x80000000), WebRtcSpl_SubSatW32(a32, b32)); +} + +TEST_F(SplTest, MathOperationsTest) { + int A = 1134567892; + int32_t num = 117; + int32_t den = -5; + uint16_t denU = 5; + EXPECT_EQ(33700, WebRtcSpl_Sqrt(A)); + EXPECT_EQ(33683, WebRtcSpl_SqrtFloor(A)); + + + EXPECT_EQ(-91772805, WebRtcSpl_DivResultInQ31(den, num)); + EXPECT_EQ(-23, WebRtcSpl_DivW32W16ResW16(num, (int16_t)den)); + EXPECT_EQ(-23, WebRtcSpl_DivW32W16(num, (int16_t)den)); + EXPECT_EQ(23u, WebRtcSpl_DivU32U16(num, denU)); + EXPECT_EQ(0, WebRtcSpl_DivW32HiLow(128, 0, 256)); +} + +TEST_F(SplTest, BasicArrayOperationsTest) { + const size_t kVectorSize = 4; + int B[] = {4, 12, 133, 1100}; + int16_t b16[kVectorSize]; + int32_t b32[kVectorSize]; + + int16_t bTmp16[kVectorSize]; + int32_t bTmp32[kVectorSize]; + + WebRtcSpl_MemSetW16(b16, 3, kVectorSize); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ(3, b16[kk]); + } + WebRtcSpl_ZerosArrayW16(b16, kVectorSize); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ(0, b16[kk]); + } + WebRtcSpl_MemSetW32(b32, 3, kVectorSize); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ(3, b32[kk]); + } + WebRtcSpl_ZerosArrayW32(b32, kVectorSize); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ(0, b32[kk]); + } + for (size_t kk = 0; kk < kVectorSize; ++kk) { + bTmp16[kk] = (int16_t)kk; + bTmp32[kk] = (int32_t)kk; + } + WEBRTC_SPL_MEMCPY_W16(b16, bTmp16, kVectorSize); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ(b16[kk], bTmp16[kk]); + } +// WEBRTC_SPL_MEMCPY_W32(b32, bTmp32, kVectorSize); +// for (int kk = 0; kk < kVectorSize; ++kk) { +// EXPECT_EQ(b32[kk], bTmp32[kk]); +// } + WebRtcSpl_CopyFromEndW16(b16, kVectorSize, 2, bTmp16); + for (size_t kk = 0; kk < 2; ++kk) { + EXPECT_EQ(static_cast<int16_t>(kk+2), bTmp16[kk]); + } + + for (size_t kk = 0; kk < kVectorSize; ++kk) { + b32[kk] = B[kk]; + b16[kk] = (int16_t)B[kk]; + } + WebRtcSpl_VectorBitShiftW32ToW16(bTmp16, kVectorSize, b32, 1); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ((B[kk]>>1), bTmp16[kk]); + } + WebRtcSpl_VectorBitShiftW16(bTmp16, kVectorSize, b16, 1); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ((B[kk]>>1), bTmp16[kk]); + } + WebRtcSpl_VectorBitShiftW32(bTmp32, kVectorSize, b32, 1); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ((B[kk]>>1), bTmp32[kk]); + } + + WebRtcSpl_MemCpyReversedOrder(&bTmp16[3], b16, kVectorSize); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ(b16[3-kk], bTmp16[kk]); + } +} + +TEST_F(SplTest, MinMaxOperationsTest) { + const size_t kVectorSize = 17; + + // Vectors to test the cases where minimum values have to be caught + // outside of the unrolled loops in ARM-Neon. + int16_t vector16[kVectorSize] = {-1, 7485, 0, 3333, + -18283, 0, 12334, -29871, 988, -3333, + 345, -456, 222, 999, 888, 8774, WEBRTC_SPL_WORD16_MIN}; + int32_t vector32[kVectorSize] = {-1, 0, 283211, 3333, + 8712345, 0, -3333, 89345, -374585456, 222, 999, 122345334, + -12389756, -987329871, 888, -2, WEBRTC_SPL_WORD32_MIN}; + + EXPECT_EQ(WEBRTC_SPL_WORD16_MIN, + WebRtcSpl_MinValueW16(vector16, kVectorSize)); + EXPECT_EQ(WEBRTC_SPL_WORD32_MIN, + WebRtcSpl_MinValueW32(vector32, kVectorSize)); + EXPECT_EQ(kVectorSize - 1, WebRtcSpl_MinIndexW16(vector16, kVectorSize)); + EXPECT_EQ(kVectorSize - 1, WebRtcSpl_MinIndexW32(vector32, kVectorSize)); + + // Test the cases where maximum values have to be caught + // outside of the unrolled loops in ARM-Neon. + vector16[kVectorSize - 1] = WEBRTC_SPL_WORD16_MAX; + vector32[kVectorSize - 1] = WEBRTC_SPL_WORD32_MAX; + + EXPECT_EQ(WEBRTC_SPL_WORD16_MAX, + WebRtcSpl_MaxAbsValueW16(vector16, kVectorSize)); + EXPECT_EQ(WEBRTC_SPL_WORD16_MAX, + WebRtcSpl_MaxValueW16(vector16, kVectorSize)); + EXPECT_EQ(WEBRTC_SPL_WORD32_MAX, + WebRtcSpl_MaxAbsValueW32(vector32, kVectorSize)); + EXPECT_EQ(WEBRTC_SPL_WORD32_MAX, + WebRtcSpl_MaxValueW32(vector32, kVectorSize)); + EXPECT_EQ(kVectorSize - 1, WebRtcSpl_MaxAbsIndexW16(vector16, kVectorSize)); + EXPECT_EQ(kVectorSize - 1, WebRtcSpl_MaxIndexW16(vector16, kVectorSize)); + EXPECT_EQ(kVectorSize - 1, WebRtcSpl_MaxIndexW32(vector32, kVectorSize)); + + // Test the cases where multiple maximum and minimum values are present. + vector16[1] = WEBRTC_SPL_WORD16_MAX; + vector16[6] = WEBRTC_SPL_WORD16_MIN; + vector16[11] = WEBRTC_SPL_WORD16_MIN; + vector32[1] = WEBRTC_SPL_WORD32_MAX; + vector32[6] = WEBRTC_SPL_WORD32_MIN; + vector32[11] = WEBRTC_SPL_WORD32_MIN; + + EXPECT_EQ(WEBRTC_SPL_WORD16_MAX, + WebRtcSpl_MaxAbsValueW16(vector16, kVectorSize)); + EXPECT_EQ(WEBRTC_SPL_WORD16_MAX, + WebRtcSpl_MaxValueW16(vector16, kVectorSize)); + EXPECT_EQ(WEBRTC_SPL_WORD16_MIN, + WebRtcSpl_MinValueW16(vector16, kVectorSize)); + EXPECT_EQ(WEBRTC_SPL_WORD32_MAX, + WebRtcSpl_MaxAbsValueW32(vector32, kVectorSize)); + EXPECT_EQ(WEBRTC_SPL_WORD32_MAX, + WebRtcSpl_MaxValueW32(vector32, kVectorSize)); + EXPECT_EQ(WEBRTC_SPL_WORD32_MIN, + WebRtcSpl_MinValueW32(vector32, kVectorSize)); + EXPECT_EQ(6u, WebRtcSpl_MaxAbsIndexW16(vector16, kVectorSize)); + EXPECT_EQ(1u, WebRtcSpl_MaxIndexW16(vector16, kVectorSize)); + EXPECT_EQ(1u, WebRtcSpl_MaxIndexW32(vector32, kVectorSize)); + EXPECT_EQ(6u, WebRtcSpl_MinIndexW16(vector16, kVectorSize)); + EXPECT_EQ(6u, WebRtcSpl_MinIndexW32(vector32, kVectorSize)); +} + +TEST_F(SplTest, VectorOperationsTest) { + const size_t kVectorSize = 4; + int B[] = {4, 12, 133, 1100}; + int16_t a16[kVectorSize]; + int16_t b16[kVectorSize]; + int16_t bTmp16[kVectorSize]; + + for (size_t kk = 0; kk < kVectorSize; ++kk) { + a16[kk] = B[kk]; + b16[kk] = B[kk]; + } + + WebRtcSpl_AffineTransformVector(bTmp16, b16, 3, 7, 2, kVectorSize); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ((B[kk]*3+7)>>2, bTmp16[kk]); + } + WebRtcSpl_ScaleAndAddVectorsWithRound(b16, 3, b16, 2, 2, bTmp16, kVectorSize); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ((B[kk]*3+B[kk]*2+2)>>2, bTmp16[kk]); + } + + WebRtcSpl_AddAffineVectorToVector(bTmp16, b16, 3, 7, 2, kVectorSize); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ(((B[kk]*3+B[kk]*2+2)>>2)+((b16[kk]*3+7)>>2), bTmp16[kk]); + } + + WebRtcSpl_ScaleVector(b16, bTmp16, 13, kVectorSize, 2); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ((b16[kk]*13)>>2, bTmp16[kk]); + } + WebRtcSpl_ScaleVectorWithSat(b16, bTmp16, 13, kVectorSize, 2); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ((b16[kk]*13)>>2, bTmp16[kk]); + } + WebRtcSpl_ScaleAndAddVectors(a16, 13, 2, b16, 7, 2, bTmp16, kVectorSize); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ(((a16[kk]*13)>>2)+((b16[kk]*7)>>2), bTmp16[kk]); + } + + WebRtcSpl_AddVectorsAndShift(bTmp16, a16, b16, kVectorSize, 2); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ(B[kk] >> 1, bTmp16[kk]); + } + WebRtcSpl_ReverseOrderMultArrayElements(bTmp16, a16, &b16[3], kVectorSize, 2); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ((a16[kk]*b16[3-kk])>>2, bTmp16[kk]); + } + WebRtcSpl_ElementwiseVectorMult(bTmp16, a16, b16, kVectorSize, 6); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ((a16[kk]*b16[kk])>>6, bTmp16[kk]); + } + + WebRtcSpl_SqrtOfOneMinusXSquared(b16, kVectorSize, bTmp16); + for (size_t kk = 0; kk < kVectorSize - 1; ++kk) { + EXPECT_EQ(32767, bTmp16[kk]); + } + EXPECT_EQ(32749, bTmp16[kVectorSize - 1]); + + EXPECT_EQ(0, WebRtcSpl_GetScalingSquare(b16, kVectorSize, 1)); +} + +TEST_F(SplTest, EstimatorsTest) { + const size_t kOrder = 2; + const int32_t unstable_filter[] = { 4, 12, 133, 1100 }; + const int32_t stable_filter[] = { 1100, 133, 12, 4 }; + int16_t lpc[kOrder + 2] = { 0 }; + int16_t refl[kOrder + 2] = { 0 }; + int16_t lpc_result[] = { 4096, -497, 15, 0 }; + int16_t refl_result[] = { -3962, 123, 0, 0 }; + + EXPECT_EQ(0, WebRtcSpl_LevinsonDurbin(unstable_filter, lpc, refl, kOrder)); + EXPECT_EQ(1, WebRtcSpl_LevinsonDurbin(stable_filter, lpc, refl, kOrder)); + for (size_t i = 0; i < kOrder + 2; ++i) { + EXPECT_EQ(lpc_result[i], lpc[i]); + EXPECT_EQ(refl_result[i], refl[i]); + } +} + +TEST_F(SplTest, FilterTest) { + const size_t kVectorSize = 4; + const size_t kFilterOrder = 3; + int16_t A[] = {1, 2, 33, 100}; + int16_t A5[] = {1, 2, 33, 100, -5}; + int16_t B[] = {4, 12, 133, 110}; + int16_t data_in[kVectorSize]; + int16_t data_out[kVectorSize]; + int16_t bTmp16Low[kVectorSize]; + int16_t bState[kVectorSize]; + int16_t bStateLow[kVectorSize]; + + WebRtcSpl_ZerosArrayW16(bState, kVectorSize); + WebRtcSpl_ZerosArrayW16(bStateLow, kVectorSize); + + for (size_t kk = 0; kk < kVectorSize; ++kk) { + data_in[kk] = A[kk]; + data_out[kk] = 0; + } + + // MA filters. + // Note that the input data has |kFilterOrder| states before the actual + // data (one sample). + WebRtcSpl_FilterMAFastQ12(&data_in[kFilterOrder], data_out, B, + kFilterOrder + 1, 1); + EXPECT_EQ(0, data_out[0]); + // AR filters. + // Note that the output data has |kFilterOrder| states before the actual + // data (one sample). + WebRtcSpl_FilterARFastQ12(data_in, &data_out[kFilterOrder], A, + kFilterOrder + 1, 1); + EXPECT_EQ(0, data_out[kFilterOrder]); + + EXPECT_EQ(kVectorSize, WebRtcSpl_FilterAR(A5, + 5, + data_in, + kVectorSize, + bState, + kVectorSize, + bStateLow, + kVectorSize, + data_out, + bTmp16Low, + kVectorSize)); +} + +TEST_F(SplTest, RandTest) { + const int kVectorSize = 4; + int16_t BU[] = {3653, 12446, 8525, 30691}; + int16_t b16[kVectorSize]; + uint32_t bSeed = 100000; + + EXPECT_EQ(7086, WebRtcSpl_RandU(&bSeed)); + EXPECT_EQ(31565, WebRtcSpl_RandU(&bSeed)); + EXPECT_EQ(-9786, WebRtcSpl_RandN(&bSeed)); + EXPECT_EQ(kVectorSize, WebRtcSpl_RandUArray(b16, kVectorSize, &bSeed)); + for (int kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ(BU[kk], b16[kk]); + } +} + +TEST_F(SplTest, DotProductWithScaleTest) { + EXPECT_EQ(605362796, WebRtcSpl_DotProductWithScale(vector16, + vector16, kVector16Size, 2)); +} + +TEST_F(SplTest, CrossCorrelationTest) { + // Note the function arguments relation specificed by API. + const size_t kCrossCorrelationDimension = 3; + const int kShift = 2; + const int kStep = 1; + const size_t kSeqDimension = 6; + + const int16_t kVector16[kVector16Size] = {1, 4323, 1963, + WEBRTC_SPL_WORD16_MAX, WEBRTC_SPL_WORD16_MIN + 5, -3333, -876, 8483, 142}; + int32_t vector32[kCrossCorrelationDimension] = {0}; + + WebRtcSpl_CrossCorrelation(vector32, vector16, kVector16, kSeqDimension, + kCrossCorrelationDimension, kShift, kStep); + + // WebRtcSpl_CrossCorrelationC() and WebRtcSpl_CrossCorrelationNeon() + // are not bit-exact. + const int32_t kExpected[kCrossCorrelationDimension] = + {-266947903, -15579555, -171282001}; + const int32_t* expected = kExpected; +#if !defined(MIPS32_LE) + const int32_t kExpectedNeon[kCrossCorrelationDimension] = + {-266947901, -15579553, -171281999}; + if (WebRtcSpl_CrossCorrelation != WebRtcSpl_CrossCorrelationC) { + expected = kExpectedNeon; + } +#endif + for (size_t i = 0; i < kCrossCorrelationDimension; ++i) { + EXPECT_EQ(expected[i], vector32[i]); + } +} + +TEST_F(SplTest, AutoCorrelationTest) { + int scale = 0; + int32_t vector32[kVector16Size]; + const int32_t expected[kVector16Size] = {302681398, 14223410, -121705063, + -85221647, -17104971, 61806945, 6644603, -669329, 43}; + + EXPECT_EQ(kVector16Size, + WebRtcSpl_AutoCorrelation(vector16, kVector16Size, + kVector16Size - 1, vector32, &scale)); + EXPECT_EQ(3, scale); + for (size_t i = 0; i < kVector16Size; ++i) { + EXPECT_EQ(expected[i], vector32[i]); + } +} + +TEST_F(SplTest, SignalProcessingTest) { + const size_t kVectorSize = 4; + int A[] = {1, 2, 33, 100}; + const int16_t kHanning[4] = { 2399, 8192, 13985, 16384 }; + int16_t b16[kVectorSize]; + + int16_t bTmp16[kVectorSize]; + + int bScale = 0; + + for (size_t kk = 0; kk < kVectorSize; ++kk) { + b16[kk] = A[kk]; + } + + // TODO(bjornv): Activate the Reflection Coefficient tests when refactoring. +// WebRtcSpl_ReflCoefToLpc(b16, kVectorSize, bTmp16); +//// for (int kk = 0; kk < kVectorSize; ++kk) { +//// EXPECT_EQ(aTmp16[kk], bTmp16[kk]); +//// } +// WebRtcSpl_LpcToReflCoef(bTmp16, kVectorSize, b16); +//// for (int kk = 0; kk < kVectorSize; ++kk) { +//// EXPECT_EQ(a16[kk], b16[kk]); +//// } +// WebRtcSpl_AutoCorrToReflCoef(b32, kVectorSize, bTmp16); +//// for (int kk = 0; kk < kVectorSize; ++kk) { +//// EXPECT_EQ(aTmp16[kk], bTmp16[kk]); +//// } + + WebRtcSpl_GetHanningWindow(bTmp16, kVectorSize); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ(kHanning[kk], bTmp16[kk]); + } + + for (size_t kk = 0; kk < kVectorSize; ++kk) { + b16[kk] = A[kk]; + } + EXPECT_EQ(11094 , WebRtcSpl_Energy(b16, kVectorSize, &bScale)); + EXPECT_EQ(0, bScale); +} + +TEST_F(SplTest, FFTTest) { + int16_t B[] = {1, 2, 33, 100, + 2, 3, 34, 101, + 3, 4, 35, 102, + 4, 5, 36, 103}; + + EXPECT_EQ(0, WebRtcSpl_ComplexFFT(B, 3, 1)); +// for (int kk = 0; kk < 16; ++kk) { +// EXPECT_EQ(A[kk], B[kk]); +// } + EXPECT_EQ(0, WebRtcSpl_ComplexIFFT(B, 3, 1)); +// for (int kk = 0; kk < 16; ++kk) { +// EXPECT_EQ(A[kk], B[kk]); +// } + WebRtcSpl_ComplexBitReverse(B, 3); + for (int kk = 0; kk < 16; ++kk) { + //EXPECT_EQ(A[kk], B[kk]); + } +} + +TEST_F(SplTest, Resample48WithSaturationTest) { + // The test resamples 3*kBlockSize number of samples to 2*kBlockSize number + // of samples. + const size_t kBlockSize = 16; + + // Saturated input vector of 48 samples. + const int32_t kVectorSaturated[3 * kBlockSize + 7] = { + -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, + -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, + -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767 + }; + + // All values in |out_vector| should be |kRefValue32kHz|. + const int32_t kRefValue32kHz1 = -1077493760; + const int32_t kRefValue32kHz2 = 1077493645; + + // After bit shift with saturation, |out_vector_w16| is saturated. + + const int16_t kRefValue16kHz1 = -32768; + const int16_t kRefValue16kHz2 = 32767; + // Vector for storing output. + int32_t out_vector[2 * kBlockSize]; + int16_t out_vector_w16[2 * kBlockSize]; + + WebRtcSpl_Resample48khzTo32khz(kVectorSaturated, out_vector, kBlockSize); + WebRtcSpl_VectorBitShiftW32ToW16(out_vector_w16, 2 * kBlockSize, out_vector, + 15); + + // Comparing output values against references. The values at position + // 12-15 are skipped to account for the filter lag. + for (size_t i = 0; i < 12; ++i) { + EXPECT_EQ(kRefValue32kHz1, out_vector[i]); + EXPECT_EQ(kRefValue16kHz1, out_vector_w16[i]); + } + for (size_t i = 16; i < 2 * kBlockSize; ++i) { + EXPECT_EQ(kRefValue32kHz2, out_vector[i]); + EXPECT_EQ(kRefValue16kHz2, out_vector_w16[i]); + } +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_init.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_init.c new file mode 100644 index 00000000..73c2039e --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_init.c @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* The global function contained in this file initializes SPL function + * pointers, currently only for ARM platforms. + * + * Some code came from common/rtcd.c in the WebM project. + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h" + +/* Declare function pointers. */ +MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16; +MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32; +MaxValueW16 WebRtcSpl_MaxValueW16; +MaxValueW32 WebRtcSpl_MaxValueW32; +MinValueW16 WebRtcSpl_MinValueW16; +MinValueW32 WebRtcSpl_MinValueW32; +CrossCorrelation WebRtcSpl_CrossCorrelation; +DownsampleFast WebRtcSpl_DownsampleFast; +ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound; + +#if (defined(WEBRTC_DETECT_NEON) || !defined(WEBRTC_HAS_NEON)) && \ + !defined(MIPS32_LE) +/* Initialize function pointers to the generic C version. */ +static void InitPointersToC() { + WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16C; + WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C; + WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16C; + WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32C; + WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16C; + WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32C; + WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationC; + WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastC; + WebRtcSpl_ScaleAndAddVectorsWithRound = + WebRtcSpl_ScaleAndAddVectorsWithRoundC; +} +#endif + +#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON) +/* Initialize function pointers to the Neon version. */ +static void InitPointersToNeon() { + WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16Neon; + WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32Neon; + WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16Neon; + WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32Neon; + WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16Neon; + WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32Neon; + WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationNeon; + WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastNeon; + WebRtcSpl_ScaleAndAddVectorsWithRound = + WebRtcSpl_ScaleAndAddVectorsWithRoundC; +} +#endif + +#if defined(MIPS32_LE) +/* Initialize function pointers to the MIPS version. */ +static void InitPointersToMIPS() { + WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16_mips; + WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16_mips; + WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32_mips; + WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16_mips; + WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32_mips; + WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelation_mips; + WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFast_mips; +#if defined(MIPS_DSP_R1_LE) + WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32_mips; + WebRtcSpl_ScaleAndAddVectorsWithRound = + WebRtcSpl_ScaleAndAddVectorsWithRound_mips; +#else + WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C; + WebRtcSpl_ScaleAndAddVectorsWithRound = + WebRtcSpl_ScaleAndAddVectorsWithRoundC; +#endif +} +#endif + +static void InitFunctionPointers(void) { +#if defined(WEBRTC_DETECT_NEON) + if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) { + InitPointersToNeon(); + } else { + InitPointersToC(); + } +#elif defined(WEBRTC_HAS_NEON) + InitPointersToNeon(); +#elif defined(MIPS32_LE) + InitPointersToMIPS(); +#else + InitPointersToC(); +#endif /* WEBRTC_DETECT_NEON */ +} + +#if defined(WEBRTC_POSIX) +#include <pthread.h> + +static void once(void (*func)(void)) { + static pthread_once_t lock = PTHREAD_ONCE_INIT; + pthread_once(&lock, func); +} + +#elif defined(_WIN32) +#include <windows.h> + +static void once(void (*func)(void)) { + /* Didn't use InitializeCriticalSection() since there's no race-free context + * in which to execute it. + * + * TODO(kma): Change to different implementation (e.g. + * InterlockedCompareExchangePointer) to avoid issues similar to + * http://code.google.com/p/webm/issues/detail?id=467. + */ + static CRITICAL_SECTION lock = {(void *)((size_t)-1), -1, 0, 0, 0, 0}; + static int done = 0; + + EnterCriticalSection(&lock); + if (!done) { + func(); + done = 1; + } + LeaveCriticalSection(&lock); +} + +/* There's no fallback version as an #else block here to ensure thread safety. + * In case of neither pthread for WEBRTC_POSIX nor _WIN32 is present, build + * system should pick it up. + */ +#endif /* WEBRTC_POSIX */ + +void WebRtcSpl_Init() { + once(InitFunctionPointers); +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt.c new file mode 100644 index 00000000..24db4f82 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt.c @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the function WebRtcSpl_Sqrt(). + * The description header can be found in signal_processing_library.h + * + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +#include <assert.h> + +int32_t WebRtcSpl_SqrtLocal(int32_t in); + +int32_t WebRtcSpl_SqrtLocal(int32_t in) +{ + + int16_t x_half, t16; + int32_t A, B, x2; + + /* The following block performs: + y=in/2 + x=y-2^30 + x_half=x/2^31 + t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4) + + 0.875*((x_half)^5) + */ + + B = in / 2; + + B = B - ((int32_t)0x40000000); // B = in/2 - 1/2 + x_half = (int16_t)(B >> 16); // x_half = x/2 = (in-1)/2 + B = B + ((int32_t)0x40000000); // B = 1 + x/2 + B = B + ((int32_t)0x40000000); // Add 0.5 twice (since 1.0 does not exist in Q31) + + x2 = ((int32_t)x_half) * ((int32_t)x_half) * 2; // A = (x/2)^2 + A = -x2; // A = -(x/2)^2 + B = B + (A >> 1); // B = 1 + x/2 - 0.5*(x/2)^2 + + A >>= 16; + A = A * A * 2; // A = (x/2)^4 + t16 = (int16_t)(A >> 16); + B += -20480 * t16 * 2; // B = B - 0.625*A + // After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4 + + A = x_half * t16 * 2; // A = (x/2)^5 + t16 = (int16_t)(A >> 16); + B += 28672 * t16 * 2; // B = B + 0.875*A + // After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4 + 0.875*(x/2)^5 + + t16 = (int16_t)(x2 >> 16); + A = x_half * t16 * 2; // A = x/2^3 + + B = B + (A >> 1); // B = B + 0.5*A + // After this, B = 1 + x/2 - 0.5*(x/2)^2 + 0.5*(x/2)^3 - 0.625*(x/2)^4 + 0.875*(x/2)^5 + + B = B + ((int32_t)32768); // Round off bit + + return B; +} + +int32_t WebRtcSpl_Sqrt(int32_t value) +{ + /* + Algorithm: + + Six term Taylor Series is used here to compute the square root of a number + y^0.5 = (1+x)^0.5 where x = y-1 + = 1+(x/2)-0.5*((x/2)^2+0.5*((x/2)^3-0.625*((x/2)^4+0.875*((x/2)^5) + 0.5 <= x < 1 + + Example of how the algorithm works, with ut=sqrt(in), and + with in=73632 and ut=271 (even shift value case): + + in=73632 + y= in/131072 + x=y-1 + t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5) + ut=t*(1/sqrt(2))*512 + + or: + + in=73632 + in2=73632*2^14 + y= in2/2^31 + x=y-1 + t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5) + ut=t*(1/sqrt(2)) + ut2=ut*2^9 + + which gives: + + in = 73632 + in2 = 1206386688 + y = 0.56176757812500 + x = -0.43823242187500 + t = 0.74973506527313 + ut = 0.53014274874797 + ut2 = 2.714330873589594e+002 + + or: + + in=73632 + in2=73632*2^14 + y=in2/2 + x=y-2^30 + x_half=x/2^31 + t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4) + + 0.875*((x_half)^5) + ut=t*(1/sqrt(2)) + ut2=ut*2^9 + + which gives: + + in = 73632 + in2 = 1206386688 + y = 603193344 + x = -470548480 + x_half = -0.21911621093750 + t = 0.74973506527313 + ut = 0.53014274874797 + ut2 = 2.714330873589594e+002 + + */ + + int16_t x_norm, nshift, t16, sh; + int32_t A; + + int16_t k_sqrt_2 = 23170; // 1/sqrt2 (==5a82) + + A = value; + + if (A == 0) + return (int32_t)0; // sqrt(0) = 0 + + sh = WebRtcSpl_NormW32(A); // # shifts to normalize A + A = WEBRTC_SPL_LSHIFT_W32(A, sh); // Normalize A + if (A < (WEBRTC_SPL_WORD32_MAX - 32767)) + { + A = A + ((int32_t)32768); // Round off bit + } else + { + A = WEBRTC_SPL_WORD32_MAX; + } + + x_norm = (int16_t)(A >> 16); // x_norm = AH + + nshift = (sh / 2); + assert(nshift >= 0); + + A = (int32_t)WEBRTC_SPL_LSHIFT_W32((int32_t)x_norm, 16); + A = WEBRTC_SPL_ABS_W32(A); // A = abs(x_norm<<16) + A = WebRtcSpl_SqrtLocal(A); // A = sqrt(A) + + if (2 * nshift == sh) { + // Even shift value case + + t16 = (int16_t)(A >> 16); // t16 = AH + + A = k_sqrt_2 * t16 * 2; // A = 1/sqrt(2)*t16 + A = A + ((int32_t)32768); // Round off + A = A & ((int32_t)0x7fff0000); // Round off + + A >>= 15; // A = A>>16 + + } else + { + A >>= 16; // A = A>>16 + } + + A = A & ((int32_t)0x0000ffff); + A >>= nshift; // De-normalize the result. + + return A; +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor.c new file mode 100644 index 00000000..370307a0 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor.c @@ -0,0 +1,77 @@ +/* + * Written by Wilco Dijkstra, 1996. The following email exchange establishes the + * license. + * + * From: Wilco Dijkstra <Wilco.Dijkstra@ntlworld.com> + * Date: Fri, Jun 24, 2011 at 3:20 AM + * Subject: Re: sqrt routine + * To: Kevin Ma <kma@google.com> + * Hi Kevin, + * Thanks for asking. Those routines are public domain (originally posted to + * comp.sys.arm a long time ago), so you can use them freely for any purpose. + * Cheers, + * Wilco + * + * ----- Original Message ----- + * From: "Kevin Ma" <kma@google.com> + * To: <Wilco.Dijkstra@ntlworld.com> + * Sent: Thursday, June 23, 2011 11:44 PM + * Subject: Fwd: sqrt routine + * Hi Wilco, + * I saw your sqrt routine from several web sites, including + * http://www.finesse.demon.co.uk/steven/sqrt.html. + * Just wonder if there's any copyright information with your Successive + * approximation routines, or if I can freely use it for any purpose. + * Thanks. + * Kevin + */ + +// Minor modifications in code style for WebRTC, 2012. + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +/* + * Algorithm: + * Successive approximation of the equation (root + delta) ^ 2 = N + * until delta < 1. If delta < 1 we have the integer part of SQRT (N). + * Use delta = 2^i for i = 15 .. 0. + * + * Output precision is 16 bits. Note for large input values (close to + * 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word) + * contains the MSB information (a non-sign value). Do with caution + * if you need to cast the output to int16_t type. + * + * If the input value is negative, it returns 0. + */ + +#define WEBRTC_SPL_SQRT_ITER(N) \ + try1 = root + (1 << (N)); \ + if (value >= try1 << (N)) \ + { \ + value -= try1 << (N); \ + root |= 2 << (N); \ + } + +int32_t WebRtcSpl_SqrtFloor(int32_t value) +{ + int32_t root = 0, try1; + + WEBRTC_SPL_SQRT_ITER (15); + WEBRTC_SPL_SQRT_ITER (14); + WEBRTC_SPL_SQRT_ITER (13); + WEBRTC_SPL_SQRT_ITER (12); + WEBRTC_SPL_SQRT_ITER (11); + WEBRTC_SPL_SQRT_ITER (10); + WEBRTC_SPL_SQRT_ITER ( 9); + WEBRTC_SPL_SQRT_ITER ( 8); + WEBRTC_SPL_SQRT_ITER ( 7); + WEBRTC_SPL_SQRT_ITER ( 6); + WEBRTC_SPL_SQRT_ITER ( 5); + WEBRTC_SPL_SQRT_ITER ( 4); + WEBRTC_SPL_SQRT_ITER ( 3); + WEBRTC_SPL_SQRT_ITER ( 2); + WEBRTC_SPL_SQRT_ITER ( 1); + WEBRTC_SPL_SQRT_ITER ( 0); + + return root >> 1; +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor_arm.S b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor_arm.S new file mode 100644 index 00000000..f44ddd46 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor_arm.S @@ -0,0 +1,110 @@ +@ +@ Written by Wilco Dijkstra, 1996. The following email exchange establishes the +@ license. +@ +@ From: Wilco Dijkstra <Wilco.Dijkstra@ntlworld.com> +@ Date: Fri, Jun 24, 2011 at 3:20 AM +@ Subject: Re: sqrt routine +@ To: Kevin Ma <kma@google.com> +@ Hi Kevin, +@ Thanks for asking. Those routines are public domain (originally posted to +@ comp.sys.arm a long time ago), so you can use them freely for any purpose. +@ Cheers, +@ Wilco +@ +@ ----- Original Message ----- +@ From: "Kevin Ma" <kma@google.com> +@ To: <Wilco.Dijkstra@ntlworld.com> +@ Sent: Thursday, June 23, 2011 11:44 PM +@ Subject: Fwd: sqrt routine +@ Hi Wilco, +@ I saw your sqrt routine from several web sites, including +@ http://www.finesse.demon.co.uk/steven/sqrt.html. +@ Just wonder if there's any copyright information with your Successive +@ approximation routines, or if I can freely use it for any purpose. +@ Thanks. +@ Kevin + +@ Minor modifications in code style for WebRTC, 2012. +@ Output is bit-exact with the reference C code in spl_sqrt_floor.c. + +@ Input : r0 32 bit unsigned integer +@ Output: r0 = INT (SQRT (r0)), precision is 16 bits +@ Registers touched: r1, r2 + +#include "webrtc/system_wrappers/interface/asm_defines.h" + +GLOBAL_FUNCTION WebRtcSpl_SqrtFloor +.align 2 +DEFINE_FUNCTION WebRtcSpl_SqrtFloor + mov r1, #3 << 30 + mov r2, #1 << 30 + + @ unroll for i = 0 .. 15 + + cmp r0, r2, ror #2 * 0 + subhs r0, r0, r2, ror #2 * 0 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 1 + subhs r0, r0, r2, ror #2 * 1 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 2 + subhs r0, r0, r2, ror #2 * 2 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 3 + subhs r0, r0, r2, ror #2 * 3 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 4 + subhs r0, r0, r2, ror #2 * 4 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 5 + subhs r0, r0, r2, ror #2 * 5 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 6 + subhs r0, r0, r2, ror #2 * 6 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 7 + subhs r0, r0, r2, ror #2 * 7 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 8 + subhs r0, r0, r2, ror #2 * 8 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 9 + subhs r0, r0, r2, ror #2 * 9 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 10 + subhs r0, r0, r2, ror #2 * 10 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 11 + subhs r0, r0, r2, ror #2 * 11 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 12 + subhs r0, r0, r2, ror #2 * 12 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 13 + subhs r0, r0, r2, ror #2 * 13 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 14 + subhs r0, r0, r2, ror #2 * 14 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 15 + subhs r0, r0, r2, ror #2 * 15 + adc r2, r1, r2, lsl #1 + + bic r0, r2, #3 << 30 @ for rounding add: cmp r0, r2 adc r2, #1 + bx lr diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor_mips.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor_mips.c new file mode 100644 index 00000000..8716459b --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor_mips.c @@ -0,0 +1,207 @@ +/* + * Written by Wilco Dijkstra, 1996. The following email exchange establishes the + * license. + * + * From: Wilco Dijkstra <Wilco.Dijkstra@ntlworld.com> + * Date: Fri, Jun 24, 2011 at 3:20 AM + * Subject: Re: sqrt routine + * To: Kevin Ma <kma@google.com> + * Hi Kevin, + * Thanks for asking. Those routines are public domain (originally posted to + * comp.sys.arm a long time ago), so you can use them freely for any purpose. + * Cheers, + * Wilco + * + * ----- Original Message ----- + * From: "Kevin Ma" <kma@google.com> + * To: <Wilco.Dijkstra@ntlworld.com> + * Sent: Thursday, June 23, 2011 11:44 PM + * Subject: Fwd: sqrt routine + * Hi Wilco, + * I saw your sqrt routine from several web sites, including + * http://www.finesse.demon.co.uk/steven/sqrt.html. + * Just wonder if there's any copyright information with your Successive + * approximation routines, or if I can freely use it for any purpose. + * Thanks. + * Kevin + */ + +// Minor modifications in code style for WebRTC, 2012. +// Code optimizations for MIPS, 2013. + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +/* + * Algorithm: + * Successive approximation of the equation (root + delta) ^ 2 = N + * until delta < 1. If delta < 1 we have the integer part of SQRT (N). + * Use delta = 2^i for i = 15 .. 0. + * + * Output precision is 16 bits. Note for large input values (close to + * 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word) + * contains the MSB information (a non-sign value). Do with caution + * if you need to cast the output to int16_t type. + * + * If the input value is negative, it returns 0. + */ + + +int32_t WebRtcSpl_SqrtFloor(int32_t value) +{ + int32_t root = 0, tmp1, tmp2, tmp3, tmp4; + + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + + "lui %[tmp1], 0x4000 \n\t" + "slt %[tmp2], %[value], %[tmp1] \n\t" + "sub %[tmp3], %[value], %[tmp1] \n\t" + "lui %[tmp1], 0x1 \n\t" + "or %[tmp4], %[root], %[tmp1] \n\t" + "movz %[value], %[tmp3], %[tmp2] \n\t" + "movz %[root], %[tmp4], %[tmp2] \n\t" + + "addiu %[tmp1], $0, 0x4000 \n\t" + "addu %[tmp1], %[tmp1], %[root] \n\t" + "sll %[tmp1], 14 \n\t" + "slt %[tmp2], %[value], %[tmp1] \n\t" + "subu %[tmp3], %[value], %[tmp1] \n\t" + "ori %[tmp4], %[root], 0x8000 \n\t" + "movz %[value], %[tmp3], %[tmp2] \n\t" + "movz %[root], %[tmp4], %[tmp2] \n\t" + + "addiu %[tmp1], $0, 0x2000 \n\t" + "addu %[tmp1], %[tmp1], %[root] \n\t" + "sll %[tmp1], 13 \n\t" + "slt %[tmp2], %[value], %[tmp1] \n\t" + "subu %[tmp3], %[value], %[tmp1] \n\t" + "ori %[tmp4], %[root], 0x4000 \n\t" + "movz %[value], %[tmp3], %[tmp2] \n\t" + "movz %[root], %[tmp4], %[tmp2] \n\t" + + "addiu %[tmp1], $0, 0x1000 \n\t" + "addu %[tmp1], %[tmp1], %[root] \n\t" + "sll %[tmp1], 12 \n\t" + "slt %[tmp2], %[value], %[tmp1] \n\t" + "subu %[tmp3], %[value], %[tmp1] \n\t" + "ori %[tmp4], %[root], 0x2000 \n\t" + "movz %[value], %[tmp3], %[tmp2] \n\t" + "movz %[root], %[tmp4], %[tmp2] \n\t" + + "addiu %[tmp1], $0, 0x800 \n\t" + "addu %[tmp1], %[tmp1], %[root] \n\t" + "sll %[tmp1], 11 \n\t" + "slt %[tmp2], %[value], %[tmp1] \n\t" + "subu %[tmp3], %[value], %[tmp1] \n\t" + "ori %[tmp4], %[root], 0x1000 \n\t" + "movz %[value], %[tmp3], %[tmp2] \n\t" + "movz %[root], %[tmp4], %[tmp2] \n\t" + + "addiu %[tmp1], $0, 0x400 \n\t" + "addu %[tmp1], %[tmp1], %[root] \n\t" + "sll %[tmp1], 10 \n\t" + "slt %[tmp2], %[value], %[tmp1] \n\t" + "subu %[tmp3], %[value], %[tmp1] \n\t" + "ori %[tmp4], %[root], 0x800 \n\t" + "movz %[value], %[tmp3], %[tmp2] \n\t" + "movz %[root], %[tmp4], %[tmp2] \n\t" + + "addiu %[tmp1], $0, 0x200 \n\t" + "addu %[tmp1], %[tmp1], %[root] \n\t" + "sll %[tmp1], 9 \n\t" + "slt %[tmp2], %[value], %[tmp1] \n\t" + "subu %[tmp3], %[value], %[tmp1] \n\t" + "ori %[tmp4], %[root], 0x400 \n\t" + "movz %[value], %[tmp3], %[tmp2] \n\t" + "movz %[root], %[tmp4], %[tmp2] \n\t" + + "addiu %[tmp1], $0, 0x100 \n\t" + "addu %[tmp1], %[tmp1], %[root] \n\t" + "sll %[tmp1], 8 \n\t" + "slt %[tmp2], %[value], %[tmp1] \n\t" + "subu %[tmp3], %[value], %[tmp1] \n\t" + "ori %[tmp4], %[root], 0x200 \n\t" + "movz %[value], %[tmp3], %[tmp2] \n\t" + "movz %[root], %[tmp4], %[tmp2] \n\t" + + "addiu %[tmp1], $0, 0x80 \n\t" + "addu %[tmp1], %[tmp1], %[root] \n\t" + "sll %[tmp1], 7 \n\t" + "slt %[tmp2], %[value], %[tmp1] \n\t" + "subu %[tmp3], %[value], %[tmp1] \n\t" + "ori %[tmp4], %[root], 0x100 \n\t" + "movz %[value], %[tmp3], %[tmp2] \n\t" + "movz %[root], %[tmp4], %[tmp2] \n\t" + + "addiu %[tmp1], $0, 0x40 \n\t" + "addu %[tmp1], %[tmp1], %[root] \n\t" + "sll %[tmp1], 6 \n\t" + "slt %[tmp2], %[value], %[tmp1] \n\t" + "subu %[tmp3], %[value], %[tmp1] \n\t" + "ori %[tmp4], %[root], 0x80 \n\t" + "movz %[value], %[tmp3], %[tmp2] \n\t" + "movz %[root], %[tmp4], %[tmp2] \n\t" + + "addiu %[tmp1], $0, 0x20 \n\t" + "addu %[tmp1], %[tmp1], %[root] \n\t" + "sll %[tmp1], 5 \n\t" + "slt %[tmp2], %[value], %[tmp1] \n\t" + "subu %[tmp3], %[value], %[tmp1] \n\t" + "ori %[tmp4], %[root], 0x40 \n\t" + "movz %[value], %[tmp3], %[tmp2] \n\t" + "movz %[root], %[tmp4], %[tmp2] \n\t" + + "addiu %[tmp1], $0, 0x10 \n\t" + "addu %[tmp1], %[tmp1], %[root] \n\t" + "sll %[tmp1], 4 \n\t" + "slt %[tmp2], %[value], %[tmp1] \n\t" + "subu %[tmp3], %[value], %[tmp1] \n\t" + "ori %[tmp4], %[root], 0x20 \n\t" + "movz %[value], %[tmp3], %[tmp2] \n\t" + "movz %[root], %[tmp4], %[tmp2] \n\t" + + "addiu %[tmp1], $0, 0x8 \n\t" + "addu %[tmp1], %[tmp1], %[root] \n\t" + "sll %[tmp1], 3 \n\t" + "slt %[tmp2], %[value], %[tmp1] \n\t" + "subu %[tmp3], %[value], %[tmp1] \n\t" + "ori %[tmp4], %[root], 0x10 \n\t" + "movz %[value], %[tmp3], %[tmp2] \n\t" + "movz %[root], %[tmp4], %[tmp2] \n\t" + + "addiu %[tmp1], $0, 0x4 \n\t" + "addu %[tmp1], %[tmp1], %[root] \n\t" + "sll %[tmp1], 2 \n\t" + "slt %[tmp2], %[value], %[tmp1] \n\t" + "subu %[tmp3], %[value], %[tmp1] \n\t" + "ori %[tmp4], %[root], 0x8 \n\t" + "movz %[value], %[tmp3], %[tmp2] \n\t" + "movz %[root], %[tmp4], %[tmp2] \n\t" + + "addiu %[tmp1], $0, 0x2 \n\t" + "addu %[tmp1], %[tmp1], %[root] \n\t" + "sll %[tmp1], 1 \n\t" + "slt %[tmp2], %[value], %[tmp1] \n\t" + "subu %[tmp3], %[value], %[tmp1] \n\t" + "ori %[tmp4], %[root], 0x4 \n\t" + "movz %[value], %[tmp3], %[tmp2] \n\t" + "movz %[root], %[tmp4], %[tmp2] \n\t" + + "addiu %[tmp1], $0, 0x1 \n\t" + "addu %[tmp1], %[tmp1], %[root] \n\t" + "slt %[tmp2], %[value], %[tmp1] \n\t" + "ori %[tmp4], %[root], 0x2 \n\t" + "movz %[root], %[tmp4], %[tmp2] \n\t" + + ".set pop \n\t" + + : [root] "+r" (root), [value] "+r" (value), + [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), + [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4) + : + ); + + return root >> 1; +} + diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/splitting_filter.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/splitting_filter.c new file mode 100644 index 00000000..36fcf355 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/splitting_filter.c @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * This file contains the splitting filter functions. + * + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +#include <assert.h> + +// Maximum number of samples in a low/high-band frame. +enum +{ + kMaxBandFrameLength = 320 // 10 ms at 64 kHz. +}; + +// QMF filter coefficients in Q16. +static const uint16_t WebRtcSpl_kAllPassFilter1[3] = {6418, 36982, 57261}; +static const uint16_t WebRtcSpl_kAllPassFilter2[3] = {21333, 49062, 63010}; + +/////////////////////////////////////////////////////////////////////////////////////////////// +// WebRtcSpl_AllPassQMF(...) +// +// Allpass filter used by the analysis and synthesis parts of the QMF filter. +// +// Input: +// - in_data : Input data sequence (Q10) +// - data_length : Length of data sequence (>2) +// - filter_coefficients : Filter coefficients (length 3, Q16) +// +// Input & Output: +// - filter_state : Filter state (length 6, Q10). +// +// Output: +// - out_data : Output data sequence (Q10), length equal to +// |data_length| +// + +void WebRtcSpl_AllPassQMF(int32_t* in_data, size_t data_length, + int32_t* out_data, const uint16_t* filter_coefficients, + int32_t* filter_state) +{ + // The procedure is to filter the input with three first order all pass filters + // (cascade operations). + // + // a_3 + q^-1 a_2 + q^-1 a_1 + q^-1 + // y[n] = ----------- ----------- ----------- x[n] + // 1 + a_3q^-1 1 + a_2q^-1 1 + a_1q^-1 + // + // The input vector |filter_coefficients| includes these three filter coefficients. + // The filter state contains the in_data state, in_data[-1], followed by + // the out_data state, out_data[-1]. This is repeated for each cascade. + // The first cascade filter will filter the |in_data| and store the output in + // |out_data|. The second will the take the |out_data| as input and make an + // intermediate storage in |in_data|, to save memory. The third, and final, cascade + // filter operation takes the |in_data| (which is the output from the previous cascade + // filter) and store the output in |out_data|. + // Note that the input vector values are changed during the process. + size_t k; + int32_t diff; + // First all-pass cascade; filter from in_data to out_data. + + // Let y_i[n] indicate the output of cascade filter i (with filter coefficient a_i) at + // vector position n. Then the final output will be y[n] = y_3[n] + + // First loop, use the states stored in memory. + // "diff" should be safe from wrap around since max values are 2^25 + // diff = (x[0] - y_1[-1]) + diff = WebRtcSpl_SubSatW32(in_data[0], filter_state[1]); + // y_1[0] = x[-1] + a_1 * (x[0] - y_1[-1]) + out_data[0] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[0], diff, filter_state[0]); + + // For the remaining loops, use previous values. + for (k = 1; k < data_length; k++) + { + // diff = (x[n] - y_1[n-1]) + diff = WebRtcSpl_SubSatW32(in_data[k], out_data[k - 1]); + // y_1[n] = x[n-1] + a_1 * (x[n] - y_1[n-1]) + out_data[k] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[0], diff, in_data[k - 1]); + } + + // Update states. + filter_state[0] = in_data[data_length - 1]; // x[N-1], becomes x[-1] next time + filter_state[1] = out_data[data_length - 1]; // y_1[N-1], becomes y_1[-1] next time + + // Second all-pass cascade; filter from out_data to in_data. + // diff = (y_1[0] - y_2[-1]) + diff = WebRtcSpl_SubSatW32(out_data[0], filter_state[3]); + // y_2[0] = y_1[-1] + a_2 * (y_1[0] - y_2[-1]) + in_data[0] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[1], diff, filter_state[2]); + for (k = 1; k < data_length; k++) + { + // diff = (y_1[n] - y_2[n-1]) + diff = WebRtcSpl_SubSatW32(out_data[k], in_data[k - 1]); + // y_2[0] = y_1[-1] + a_2 * (y_1[0] - y_2[-1]) + in_data[k] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[1], diff, out_data[k-1]); + } + + filter_state[2] = out_data[data_length - 1]; // y_1[N-1], becomes y_1[-1] next time + filter_state[3] = in_data[data_length - 1]; // y_2[N-1], becomes y_2[-1] next time + + // Third all-pass cascade; filter from in_data to out_data. + // diff = (y_2[0] - y[-1]) + diff = WebRtcSpl_SubSatW32(in_data[0], filter_state[5]); + // y[0] = y_2[-1] + a_3 * (y_2[0] - y[-1]) + out_data[0] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[2], diff, filter_state[4]); + for (k = 1; k < data_length; k++) + { + // diff = (y_2[n] - y[n-1]) + diff = WebRtcSpl_SubSatW32(in_data[k], out_data[k - 1]); + // y[n] = y_2[n-1] + a_3 * (y_2[n] - y[n-1]) + out_data[k] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[2], diff, in_data[k-1]); + } + filter_state[4] = in_data[data_length - 1]; // y_2[N-1], becomes y_2[-1] next time + filter_state[5] = out_data[data_length - 1]; // y[N-1], becomes y[-1] next time +} + +void WebRtcSpl_AnalysisQMF(const int16_t* in_data, size_t in_data_length, + int16_t* low_band, int16_t* high_band, + int32_t* filter_state1, int32_t* filter_state2) +{ + size_t i; + int16_t k; + int32_t tmp; + int32_t half_in1[kMaxBandFrameLength]; + int32_t half_in2[kMaxBandFrameLength]; + int32_t filter1[kMaxBandFrameLength]; + int32_t filter2[kMaxBandFrameLength]; + const size_t band_length = in_data_length / 2; + assert(in_data_length % 2 == 0); + assert(band_length <= kMaxBandFrameLength); + + // Split even and odd samples. Also shift them to Q10. + for (i = 0, k = 0; i < band_length; i++, k += 2) + { + half_in2[i] = WEBRTC_SPL_LSHIFT_W32((int32_t)in_data[k], 10); + half_in1[i] = WEBRTC_SPL_LSHIFT_W32((int32_t)in_data[k + 1], 10); + } + + // All pass filter even and odd samples, independently. + WebRtcSpl_AllPassQMF(half_in1, band_length, filter1, + WebRtcSpl_kAllPassFilter1, filter_state1); + WebRtcSpl_AllPassQMF(half_in2, band_length, filter2, + WebRtcSpl_kAllPassFilter2, filter_state2); + + // Take the sum and difference of filtered version of odd and even + // branches to get upper & lower band. + for (i = 0; i < band_length; i++) + { + tmp = (filter1[i] + filter2[i] + 1024) >> 11; + low_band[i] = WebRtcSpl_SatW32ToW16(tmp); + + tmp = (filter1[i] - filter2[i] + 1024) >> 11; + high_band[i] = WebRtcSpl_SatW32ToW16(tmp); + } +} + +void WebRtcSpl_SynthesisQMF(const int16_t* low_band, const int16_t* high_band, + size_t band_length, int16_t* out_data, + int32_t* filter_state1, int32_t* filter_state2) +{ + int32_t tmp; + int32_t half_in1[kMaxBandFrameLength]; + int32_t half_in2[kMaxBandFrameLength]; + int32_t filter1[kMaxBandFrameLength]; + int32_t filter2[kMaxBandFrameLength]; + size_t i; + int16_t k; + assert(band_length <= kMaxBandFrameLength); + + // Obtain the sum and difference channels out of upper and lower-band channels. + // Also shift to Q10 domain. + for (i = 0; i < band_length; i++) + { + tmp = (int32_t)low_band[i] + (int32_t)high_band[i]; + half_in1[i] = WEBRTC_SPL_LSHIFT_W32(tmp, 10); + tmp = (int32_t)low_band[i] - (int32_t)high_band[i]; + half_in2[i] = WEBRTC_SPL_LSHIFT_W32(tmp, 10); + } + + // all-pass filter the sum and difference channels + WebRtcSpl_AllPassQMF(half_in1, band_length, filter1, + WebRtcSpl_kAllPassFilter2, filter_state1); + WebRtcSpl_AllPassQMF(half_in2, band_length, filter2, + WebRtcSpl_kAllPassFilter1, filter_state2); + + // The filtered signals are even and odd samples of the output. Combine + // them. The signals are Q10 should shift them back to Q0 and take care of + // saturation. + for (i = 0, k = 0; i < band_length; i++) + { + tmp = (filter2[i] + 512) >> 10; + out_data[k++] = WebRtcSpl_SatW32ToW16(tmp); + + tmp = (filter1[i] + 512) >> 10; + out_data[k++] = WebRtcSpl_SatW32ToW16(tmp); + } + +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/sqrt_of_one_minus_x_squared.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/sqrt_of_one_minus_x_squared.c new file mode 100644 index 00000000..ff78b522 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/sqrt_of_one_minus_x_squared.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the function WebRtcSpl_SqrtOfOneMinusXSquared(). + * The description header can be found in signal_processing_library.h + * + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +void WebRtcSpl_SqrtOfOneMinusXSquared(int16_t *xQ15, size_t vector_length, + int16_t *yQ15) +{ + int32_t sq; + size_t m; + int16_t tmp; + + for (m = 0; m < vector_length; m++) + { + tmp = xQ15[m]; + sq = tmp * tmp; // x^2 in Q30 + sq = 1073741823 - sq; // 1-x^2, where 1 ~= 0.99999999906 is 1073741823 in Q30 + sq = WebRtcSpl_Sqrt(sq); // sqrt(1-x^2) in Q15 + yQ15[m] = (int16_t)sq; + } +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/vector_scaling_operations.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/vector_scaling_operations.c new file mode 100644 index 00000000..fdefd067 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/vector_scaling_operations.c @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains implementations of the functions + * WebRtcSpl_VectorBitShiftW16() + * WebRtcSpl_VectorBitShiftW32() + * WebRtcSpl_VectorBitShiftW32ToW16() + * WebRtcSpl_ScaleVector() + * WebRtcSpl_ScaleVectorWithSat() + * WebRtcSpl_ScaleAndAddVectors() + * WebRtcSpl_ScaleAndAddVectorsWithRoundC() + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +void WebRtcSpl_VectorBitShiftW16(int16_t *res, size_t length, + const int16_t *in, int16_t right_shifts) +{ + size_t i; + + if (right_shifts > 0) + { + for (i = length; i > 0; i--) + { + (*res++) = ((*in++) >> right_shifts); + } + } else + { + for (i = length; i > 0; i--) + { + (*res++) = ((*in++) << (-right_shifts)); + } + } +} + +void WebRtcSpl_VectorBitShiftW32(int32_t *out_vector, + size_t vector_length, + const int32_t *in_vector, + int16_t right_shifts) +{ + size_t i; + + if (right_shifts > 0) + { + for (i = vector_length; i > 0; i--) + { + (*out_vector++) = ((*in_vector++) >> right_shifts); + } + } else + { + for (i = vector_length; i > 0; i--) + { + (*out_vector++) = ((*in_vector++) << (-right_shifts)); + } + } +} + +void WebRtcSpl_VectorBitShiftW32ToW16(int16_t* out, size_t length, + const int32_t* in, int right_shifts) { + size_t i; + int32_t tmp_w32; + + if (right_shifts >= 0) { + for (i = length; i > 0; i--) { + tmp_w32 = (*in++) >> right_shifts; + (*out++) = WebRtcSpl_SatW32ToW16(tmp_w32); + } + } else { + int left_shifts = -right_shifts; + for (i = length; i > 0; i--) { + tmp_w32 = (*in++) << left_shifts; + (*out++) = WebRtcSpl_SatW32ToW16(tmp_w32); + } + } +} + +void WebRtcSpl_ScaleVector(const int16_t *in_vector, int16_t *out_vector, + int16_t gain, size_t in_vector_length, + int16_t right_shifts) +{ + // Performs vector operation: out_vector = (gain*in_vector)>>right_shifts + size_t i; + const int16_t *inptr; + int16_t *outptr; + + inptr = in_vector; + outptr = out_vector; + + for (i = 0; i < in_vector_length; i++) + { + *outptr++ = (int16_t)((*inptr++ * gain) >> right_shifts); + } +} + +void WebRtcSpl_ScaleVectorWithSat(const int16_t *in_vector, int16_t *out_vector, + int16_t gain, size_t in_vector_length, + int16_t right_shifts) +{ + // Performs vector operation: out_vector = (gain*in_vector)>>right_shifts + size_t i; + const int16_t *inptr; + int16_t *outptr; + + inptr = in_vector; + outptr = out_vector; + + for (i = 0; i < in_vector_length; i++) { + *outptr++ = WebRtcSpl_SatW32ToW16((*inptr++ * gain) >> right_shifts); + } +} + +void WebRtcSpl_ScaleAndAddVectors(const int16_t *in1, int16_t gain1, int shift1, + const int16_t *in2, int16_t gain2, int shift2, + int16_t *out, size_t vector_length) +{ + // Performs vector operation: out = (gain1*in1)>>shift1 + (gain2*in2)>>shift2 + size_t i; + const int16_t *in1ptr; + const int16_t *in2ptr; + int16_t *outptr; + + in1ptr = in1; + in2ptr = in2; + outptr = out; + + for (i = 0; i < vector_length; i++) + { + *outptr++ = (int16_t)((gain1 * *in1ptr++) >> shift1) + + (int16_t)((gain2 * *in2ptr++) >> shift2); + } +} + +// C version of WebRtcSpl_ScaleAndAddVectorsWithRound() for generic platforms. +int WebRtcSpl_ScaleAndAddVectorsWithRoundC(const int16_t* in_vector1, + int16_t in_vector1_scale, + const int16_t* in_vector2, + int16_t in_vector2_scale, + int right_shifts, + int16_t* out_vector, + size_t length) { + size_t i = 0; + int round_value = (1 << right_shifts) >> 1; + + if (in_vector1 == NULL || in_vector2 == NULL || out_vector == NULL || + length == 0 || right_shifts < 0) { + return -1; + } + + for (i = 0; i < length; i++) { + out_vector[i] = (int16_t)(( + in_vector1[i] * in_vector1_scale + in_vector2[i] * in_vector2_scale + + round_value) >> right_shifts); + } + + return 0; +} diff --git a/third_party/webrtc/src/webrtc/common_audio/signal_processing/vector_scaling_operations_mips.c b/third_party/webrtc/src/webrtc/common_audio/signal_processing/vector_scaling_operations_mips.c new file mode 100644 index 00000000..dd73eeae --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/vector_scaling_operations_mips.c @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains implementations of the functions + * WebRtcSpl_ScaleAndAddVectorsWithRound_mips() + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +int WebRtcSpl_ScaleAndAddVectorsWithRound_mips(const int16_t* in_vector1, + int16_t in_vector1_scale, + const int16_t* in_vector2, + int16_t in_vector2_scale, + int right_shifts, + int16_t* out_vector, + size_t length) { + int16_t r0 = 0, r1 = 0; + int16_t *in1 = (int16_t*)in_vector1; + int16_t *in2 = (int16_t*)in_vector2; + int16_t *out = out_vector; + size_t i = 0; + int value32 = 0; + + if (in_vector1 == NULL || in_vector2 == NULL || out_vector == NULL || + length == 0 || right_shifts < 0) { + return -1; + } + for (i = 0; i < length; i++) { + __asm __volatile ( + "lh %[r0], 0(%[in1]) \n\t" + "lh %[r1], 0(%[in2]) \n\t" + "mult %[r0], %[in_vector1_scale] \n\t" + "madd %[r1], %[in_vector2_scale] \n\t" + "extrv_r.w %[value32], $ac0, %[right_shifts] \n\t" + "addiu %[in1], %[in1], 2 \n\t" + "addiu %[in2], %[in2], 2 \n\t" + "sh %[value32], 0(%[out]) \n\t" + "addiu %[out], %[out], 2 \n\t" + : [value32] "=&r" (value32), [out] "+r" (out), [in1] "+r" (in1), + [in2] "+r" (in2), [r0] "=&r" (r0), [r1] "=&r" (r1) + : [in_vector1_scale] "r" (in_vector1_scale), + [in_vector2_scale] "r" (in_vector2_scale), + [right_shifts] "r" (right_shifts) + : "hi", "lo", "memory" + ); + } + return 0; +} diff --git a/third_party/webrtc/src/webrtc/common_audio/wav_file.h b/third_party/webrtc/src/webrtc/common_audio/wav_file.h new file mode 100644 index 00000000..2eadd3f7 --- /dev/null +++ b/third_party/webrtc/src/webrtc/common_audio/wav_file.h @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_COMMON_AUDIO_WAV_FILE_H_ +#define WEBRTC_COMMON_AUDIO_WAV_FILE_H_ + +#ifdef __cplusplus + +#include <stdint.h> +#include <cstddef> +#include <string> + +#include "webrtc/base/constructormagic.h" + +namespace webrtc { + +// Interface to provide access to WAV file parameters. +class WavFile { + public: + virtual ~WavFile() {} + + virtual int sample_rate() const = 0; + virtual int num_channels() const = 0; + virtual uint32_t num_samples() const = 0; +}; + +// Simple C++ class for writing 16-bit PCM WAV files. All error handling is +// by calls to RTC_CHECK(), making it unsuitable for anything but debug code. +class WavWriter final : public WavFile { + public: + // Open a new WAV file for writing. + WavWriter(const std::string& filename, int sample_rate, int num_channels); + + // Close the WAV file, after writing its header. + ~WavWriter(); + + // Write additional samples to the file. Each sample is in the range + // [-32768,32767], and there must be the previously specified number of + // interleaved channels. + void WriteSamples(const float* samples, size_t num_samples); + void WriteSamples(const int16_t* samples, size_t num_samples); + + int sample_rate() const override { return sample_rate_; } + int num_channels() const override { return num_channels_; } + uint32_t num_samples() const override { return num_samples_; } + + private: + void Close(); + const int sample_rate_; + const int num_channels_; + uint32_t num_samples_; // Total number of samples written to file. + FILE* file_handle_; // Output file, owned by this class + + RTC_DISALLOW_COPY_AND_ASSIGN(WavWriter); +}; + +// Follows the conventions of WavWriter. +class WavReader final : public WavFile { + public: + // Opens an existing WAV file for reading. + explicit WavReader(const std::string& filename); + + // Close the WAV file. + ~WavReader(); + + // Returns the number of samples read. If this is less than requested, + // verifies that the end of the file was reached. + size_t ReadSamples(size_t num_samples, float* samples); + size_t ReadSamples(size_t num_samples, int16_t* samples); + + int sample_rate() const override { return sample_rate_; } + int num_channels() const override { return num_channels_; } + uint32_t num_samples() const override { return num_samples_; } + + private: + void Close(); + int sample_rate_; + int num_channels_; + uint32_t num_samples_; // Total number of samples in the file. + uint32_t num_samples_remaining_; + FILE* file_handle_; // Input file, owned by this class. + + RTC_DISALLOW_COPY_AND_ASSIGN(WavReader); +}; + +} // namespace webrtc + +extern "C" { +#endif // __cplusplus + +// C wrappers for the WavWriter class. +typedef struct rtc_WavWriter rtc_WavWriter; +rtc_WavWriter* rtc_WavOpen(const char* filename, + int sample_rate, + int num_channels); +void rtc_WavClose(rtc_WavWriter* wf); +void rtc_WavWriteSamples(rtc_WavWriter* wf, + const float* samples, + size_t num_samples); +int rtc_WavSampleRate(const rtc_WavWriter* wf); +int rtc_WavNumChannels(const rtc_WavWriter* wf); +uint32_t rtc_WavNumSamples(const rtc_WavWriter* wf); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // WEBRTC_COMMON_AUDIO_WAV_FILE_H_ diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_common.h b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_common.h new file mode 100644 index 00000000..1e24ca99 --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_common.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_COMMON_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_COMMON_H_ + +#include "webrtc/typedefs.h" + +#ifdef _MSC_VER /* visual c++ */ +#define ALIGN16_BEG __declspec(align(16)) +#define ALIGN16_END +#else /* gcc or icc */ +#define ALIGN16_BEG +#define ALIGN16_END __attribute__((aligned(16))) +#endif + +extern ALIGN16_BEG const float ALIGN16_END WebRtcAec_sqrtHanning[65]; +extern ALIGN16_BEG const float ALIGN16_END WebRtcAec_weightCurve[65]; +extern ALIGN16_BEG const float ALIGN16_END WebRtcAec_overDriveCurve[65]; +extern const float WebRtcAec_kExtendedSmoothingCoefficients[2][2]; +extern const float WebRtcAec_kNormalSmoothingCoefficients[2][2]; +extern const float WebRtcAec_kMinFarendPSD; + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_COMMON_H_ + diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core.c new file mode 100644 index 00000000..b2162ac0 --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core.c @@ -0,0 +1,1929 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * The core AEC algorithm, which is presented with time-aligned signals. + */ + +#include "webrtc/modules/audio_processing/aec/aec_core.h" + +#ifdef WEBRTC_AEC_DEBUG_DUMP +#include <stdio.h> +#endif + +#include <assert.h> +#include <math.h> +#include <stddef.h> // size_t +#include <stdlib.h> +#include <string.h> + +#include "webrtc/common_audio/ring_buffer.h" +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/modules/audio_processing/aec/aec_common.h" +#include "webrtc/modules/audio_processing/aec/aec_core_internal.h" +#include "webrtc/modules/audio_processing/aec/aec_rdft.h" +#include "webrtc/modules/audio_processing/logging/aec_logging.h" +#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h" +#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h" +#include "webrtc/typedefs.h" + + +// Buffer size (samples) +static const size_t kBufSizePartitions = 250; // 1 second of audio in 16 kHz. + +// Metrics +static const int subCountLen = 4; +static const int countLen = 50; +static const int kDelayMetricsAggregationWindow = 1250; // 5 seconds at 16 kHz. + +// Quantities to control H band scaling for SWB input +static const int flagHbandCn = 1; // flag for adding comfort noise in H band +static const float cnScaleHband = + (float)0.4; // scale for comfort noise in H band +// Initial bin for averaging nlp gain in low band +static const int freqAvgIc = PART_LEN / 2; + +// Matlab code to produce table: +// win = sqrt(hanning(63)); win = [0 ; win(1:32)]; +// fprintf(1, '\t%.14f, %.14f, %.14f,\n', win); +ALIGN16_BEG const float ALIGN16_END WebRtcAec_sqrtHanning[65] = { + 0.00000000000000f, 0.02454122852291f, 0.04906767432742f, 0.07356456359967f, + 0.09801714032956f, 0.12241067519922f, 0.14673047445536f, 0.17096188876030f, + 0.19509032201613f, 0.21910124015687f, 0.24298017990326f, 0.26671275747490f, + 0.29028467725446f, 0.31368174039889f, 0.33688985339222f, 0.35989503653499f, + 0.38268343236509f, 0.40524131400499f, 0.42755509343028f, 0.44961132965461f, + 0.47139673682600f, 0.49289819222978f, 0.51410274419322f, 0.53499761988710f, + 0.55557023301960f, 0.57580819141785f, 0.59569930449243f, 0.61523159058063f, + 0.63439328416365f, 0.65317284295378f, 0.67155895484702f, 0.68954054473707f, + 0.70710678118655f, 0.72424708295147f, 0.74095112535496f, 0.75720884650648f, + 0.77301045336274f, 0.78834642762661f, 0.80320753148064f, 0.81758481315158f, + 0.83146961230255f, 0.84485356524971f, 0.85772861000027f, 0.87008699110871f, + 0.88192126434835f, 0.89322430119552f, 0.90398929312344f, 0.91420975570353f, + 0.92387953251129f, 0.93299279883474f, 0.94154406518302f, 0.94952818059304f, + 0.95694033573221f, 0.96377606579544f, 0.97003125319454f, 0.97570213003853f, + 0.98078528040323f, 0.98527764238894f, 0.98917650996478f, 0.99247953459871f, + 0.99518472667220f, 0.99729045667869f, 0.99879545620517f, 0.99969881869620f, + 1.00000000000000f}; + +// Matlab code to produce table: +// weightCurve = [0 ; 0.3 * sqrt(linspace(0,1,64))' + 0.1]; +// fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', weightCurve); +ALIGN16_BEG const float ALIGN16_END WebRtcAec_weightCurve[65] = { + 0.0000f, 0.1000f, 0.1378f, 0.1535f, 0.1655f, 0.1756f, 0.1845f, 0.1926f, + 0.2000f, 0.2069f, 0.2134f, 0.2195f, 0.2254f, 0.2309f, 0.2363f, 0.2414f, + 0.2464f, 0.2512f, 0.2558f, 0.2604f, 0.2648f, 0.2690f, 0.2732f, 0.2773f, + 0.2813f, 0.2852f, 0.2890f, 0.2927f, 0.2964f, 0.3000f, 0.3035f, 0.3070f, + 0.3104f, 0.3138f, 0.3171f, 0.3204f, 0.3236f, 0.3268f, 0.3299f, 0.3330f, + 0.3360f, 0.3390f, 0.3420f, 0.3449f, 0.3478f, 0.3507f, 0.3535f, 0.3563f, + 0.3591f, 0.3619f, 0.3646f, 0.3673f, 0.3699f, 0.3726f, 0.3752f, 0.3777f, + 0.3803f, 0.3828f, 0.3854f, 0.3878f, 0.3903f, 0.3928f, 0.3952f, 0.3976f, + 0.4000f}; + +// Matlab code to produce table: +// overDriveCurve = [sqrt(linspace(0,1,65))' + 1]; +// fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', overDriveCurve); +ALIGN16_BEG const float ALIGN16_END WebRtcAec_overDriveCurve[65] = { + 1.0000f, 1.1250f, 1.1768f, 1.2165f, 1.2500f, 1.2795f, 1.3062f, 1.3307f, + 1.3536f, 1.3750f, 1.3953f, 1.4146f, 1.4330f, 1.4507f, 1.4677f, 1.4841f, + 1.5000f, 1.5154f, 1.5303f, 1.5449f, 1.5590f, 1.5728f, 1.5863f, 1.5995f, + 1.6124f, 1.6250f, 1.6374f, 1.6495f, 1.6614f, 1.6731f, 1.6847f, 1.6960f, + 1.7071f, 1.7181f, 1.7289f, 1.7395f, 1.7500f, 1.7603f, 1.7706f, 1.7806f, + 1.7906f, 1.8004f, 1.8101f, 1.8197f, 1.8292f, 1.8385f, 1.8478f, 1.8570f, + 1.8660f, 1.8750f, 1.8839f, 1.8927f, 1.9014f, 1.9100f, 1.9186f, 1.9270f, + 1.9354f, 1.9437f, 1.9520f, 1.9601f, 1.9682f, 1.9763f, 1.9843f, 1.9922f, + 2.0000f}; + +// Delay Agnostic AEC parameters, still under development and may change. +static const float kDelayQualityThresholdMax = 0.07f; +static const float kDelayQualityThresholdMin = 0.01f; +static const int kInitialShiftOffset = 5; +#if !defined(WEBRTC_ANDROID) +static const int kDelayCorrectionStart = 1500; // 10 ms chunks +#endif + +// Target suppression levels for nlp modes. +// log{0.001, 0.00001, 0.00000001} +static const float kTargetSupp[3] = {-6.9f, -11.5f, -18.4f}; + +// Two sets of parameters, one for the extended filter mode. +static const float kExtendedMinOverDrive[3] = {3.0f, 6.0f, 15.0f}; +static const float kNormalMinOverDrive[3] = {1.0f, 2.0f, 5.0f}; +const float WebRtcAec_kExtendedSmoothingCoefficients[2][2] = {{0.9f, 0.1f}, + {0.92f, 0.08f}}; +const float WebRtcAec_kNormalSmoothingCoefficients[2][2] = {{0.9f, 0.1f}, + {0.93f, 0.07f}}; + +// Number of partitions forming the NLP's "preferred" bands. +enum { + kPrefBandSize = 24 +}; + +#ifdef WEBRTC_AEC_DEBUG_DUMP +extern int webrtc_aec_instance_count; +#endif + +WebRtcAecFilterFar WebRtcAec_FilterFar; +WebRtcAecScaleErrorSignal WebRtcAec_ScaleErrorSignal; +WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation; +WebRtcAecOverdriveAndSuppress WebRtcAec_OverdriveAndSuppress; +WebRtcAecComfortNoise WebRtcAec_ComfortNoise; +WebRtcAecSubBandCoherence WebRtcAec_SubbandCoherence; + +__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) { + return aRe * bRe - aIm * bIm; +} + +__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { + return aRe * bIm + aIm * bRe; +} + +static int CmpFloat(const void* a, const void* b) { + const float* da = (const float*)a; + const float* db = (const float*)b; + + return (*da > *db) - (*da < *db); +} + +static void FilterFar(AecCore* aec, float yf[2][PART_LEN1]) { + int i; + for (i = 0; i < aec->num_partitions; i++) { + int j; + int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; + int pos = i * PART_LEN1; + // Check for wrap + if (i + aec->xfBufBlockPos >= aec->num_partitions) { + xPos -= aec->num_partitions * (PART_LEN1); + } + + for (j = 0; j < PART_LEN1; j++) { + yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], + aec->xfBuf[1][xPos + j], + aec->wfBuf[0][pos + j], + aec->wfBuf[1][pos + j]); + yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], + aec->xfBuf[1][xPos + j], + aec->wfBuf[0][pos + j], + aec->wfBuf[1][pos + j]); + } + } +} + +static void ScaleErrorSignal(AecCore* aec, float ef[2][PART_LEN1]) { + const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu; + const float error_threshold = aec->extended_filter_enabled + ? kExtendedErrorThreshold + : aec->normal_error_threshold; + int i; + float abs_ef; + for (i = 0; i < (PART_LEN1); i++) { + ef[0][i] /= (aec->xPow[i] + 1e-10f); + ef[1][i] /= (aec->xPow[i] + 1e-10f); + abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); + + if (abs_ef > error_threshold) { + abs_ef = error_threshold / (abs_ef + 1e-10f); + ef[0][i] *= abs_ef; + ef[1][i] *= abs_ef; + } + + // Stepsize factor + ef[0][i] *= mu; + ef[1][i] *= mu; + } +} + +// Time-unconstrined filter adaptation. +// TODO(andrew): consider for a low-complexity mode. +// static void FilterAdaptationUnconstrained(AecCore* aec, float *fft, +// float ef[2][PART_LEN1]) { +// int i, j; +// for (i = 0; i < aec->num_partitions; i++) { +// int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1); +// int pos; +// // Check for wrap +// if (i + aec->xfBufBlockPos >= aec->num_partitions) { +// xPos -= aec->num_partitions * PART_LEN1; +// } +// +// pos = i * PART_LEN1; +// +// for (j = 0; j < PART_LEN1; j++) { +// aec->wfBuf[0][pos + j] += MulRe(aec->xfBuf[0][xPos + j], +// -aec->xfBuf[1][xPos + j], +// ef[0][j], ef[1][j]); +// aec->wfBuf[1][pos + j] += MulIm(aec->xfBuf[0][xPos + j], +// -aec->xfBuf[1][xPos + j], +// ef[0][j], ef[1][j]); +// } +// } +//} + +static void FilterAdaptation(AecCore* aec, float* fft, float ef[2][PART_LEN1]) { + int i, j; + for (i = 0; i < aec->num_partitions; i++) { + int xPos = (i + aec->xfBufBlockPos) * (PART_LEN1); + int pos; + // Check for wrap + if (i + aec->xfBufBlockPos >= aec->num_partitions) { + xPos -= aec->num_partitions * PART_LEN1; + } + + pos = i * PART_LEN1; + + for (j = 0; j < PART_LEN; j++) { + + fft[2 * j] = MulRe(aec->xfBuf[0][xPos + j], + -aec->xfBuf[1][xPos + j], + ef[0][j], + ef[1][j]); + fft[2 * j + 1] = MulIm(aec->xfBuf[0][xPos + j], + -aec->xfBuf[1][xPos + j], + ef[0][j], + ef[1][j]); + } + fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN], + -aec->xfBuf[1][xPos + PART_LEN], + ef[0][PART_LEN], + ef[1][PART_LEN]); + + aec_rdft_inverse_128(fft); + memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); + + // fft scaling + { + float scale = 2.0f / PART_LEN2; + for (j = 0; j < PART_LEN; j++) { + fft[j] *= scale; + } + } + aec_rdft_forward_128(fft); + + aec->wfBuf[0][pos] += fft[0]; + aec->wfBuf[0][pos + PART_LEN] += fft[1]; + + for (j = 1; j < PART_LEN; j++) { + aec->wfBuf[0][pos + j] += fft[2 * j]; + aec->wfBuf[1][pos + j] += fft[2 * j + 1]; + } + } +} + +static void OverdriveAndSuppress(AecCore* aec, + float hNl[PART_LEN1], + const float hNlFb, + float efw[2][PART_LEN1]) { + int i; + for (i = 0; i < PART_LEN1; i++) { + // Weight subbands + if (hNl[i] > hNlFb) { + hNl[i] = WebRtcAec_weightCurve[i] * hNlFb + + (1 - WebRtcAec_weightCurve[i]) * hNl[i]; + } + hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]); + + // Suppress error signal + efw[0][i] *= hNl[i]; + efw[1][i] *= hNl[i]; + + // Ooura fft returns incorrect sign on imaginary component. It matters here + // because we are making an additive change with comfort noise. + efw[1][i] *= -1; + } +} + +static int PartitionDelay(const AecCore* aec) { + // Measures the energy in each filter partition and returns the partition with + // highest energy. + // TODO(bjornv): Spread computational cost by computing one partition per + // block? + float wfEnMax = 0; + int i; + int delay = 0; + + for (i = 0; i < aec->num_partitions; i++) { + int j; + int pos = i * PART_LEN1; + float wfEn = 0; + for (j = 0; j < PART_LEN1; j++) { + wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] + + aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j]; + } + + if (wfEn > wfEnMax) { + wfEnMax = wfEn; + delay = i; + } + } + return delay; +} + +// Threshold to protect against the ill-effects of a zero far-end. +const float WebRtcAec_kMinFarendPSD = 15; + +// Updates the following smoothed Power Spectral Densities (PSD): +// - sd : near-end +// - se : residual echo +// - sx : far-end +// - sde : cross-PSD of near-end and residual echo +// - sxd : cross-PSD of near-end and far-end +// +// In addition to updating the PSDs, also the filter diverge state is determined +// upon actions are taken. +static void SmoothedPSD(AecCore* aec, + float efw[2][PART_LEN1], + float dfw[2][PART_LEN1], + float xfw[2][PART_LEN1]) { + // Power estimate smoothing coefficients. + const float* ptrGCoh = aec->extended_filter_enabled + ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1] + : WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1]; + int i; + float sdSum = 0, seSum = 0; + + for (i = 0; i < PART_LEN1; i++) { + aec->sd[i] = ptrGCoh[0] * aec->sd[i] + + ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]); + aec->se[i] = ptrGCoh[0] * aec->se[i] + + ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]); + // We threshold here to protect against the ill-effects of a zero farend. + // The threshold is not arbitrarily chosen, but balances protection and + // adverse interaction with the algorithm's tuning. + // TODO(bjornv): investigate further why this is so sensitive. + aec->sx[i] = + ptrGCoh[0] * aec->sx[i] + + ptrGCoh[1] * WEBRTC_SPL_MAX( + xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i], + WebRtcAec_kMinFarendPSD); + + aec->sde[i][0] = + ptrGCoh[0] * aec->sde[i][0] + + ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]); + aec->sde[i][1] = + ptrGCoh[0] * aec->sde[i][1] + + ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]); + + aec->sxd[i][0] = + ptrGCoh[0] * aec->sxd[i][0] + + ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]); + aec->sxd[i][1] = + ptrGCoh[0] * aec->sxd[i][1] + + ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]); + + sdSum += aec->sd[i]; + seSum += aec->se[i]; + } + + // Divergent filter safeguard. + aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum; + + if (aec->divergeState) + memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1); + + // Reset if error is significantly larger than nearend (13 dB). + if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum)) + memset(aec->wfBuf, 0, sizeof(aec->wfBuf)); +} + +// Window time domain data to be used by the fft. +__inline static void WindowData(float* x_windowed, const float* x) { + int i; + for (i = 0; i < PART_LEN; i++) { + x_windowed[i] = x[i] * WebRtcAec_sqrtHanning[i]; + x_windowed[PART_LEN + i] = + x[PART_LEN + i] * WebRtcAec_sqrtHanning[PART_LEN - i]; + } +} + +// Puts fft output data into a complex valued array. +__inline static void StoreAsComplex(const float* data, + float data_complex[2][PART_LEN1]) { + int i; + data_complex[0][0] = data[0]; + data_complex[1][0] = 0; + for (i = 1; i < PART_LEN; i++) { + data_complex[0][i] = data[2 * i]; + data_complex[1][i] = data[2 * i + 1]; + } + data_complex[0][PART_LEN] = data[1]; + data_complex[1][PART_LEN] = 0; +} + +static void SubbandCoherence(AecCore* aec, + float efw[2][PART_LEN1], + float xfw[2][PART_LEN1], + float* fft, + float* cohde, + float* cohxd) { + float dfw[2][PART_LEN1]; + int i; + + if (aec->delayEstCtr == 0) + aec->delayIdx = PartitionDelay(aec); + + // Use delayed far. + memcpy(xfw, + aec->xfwBuf + aec->delayIdx * PART_LEN1, + sizeof(xfw[0][0]) * 2 * PART_LEN1); + + // Windowed near fft + WindowData(fft, aec->dBuf); + aec_rdft_forward_128(fft); + StoreAsComplex(fft, dfw); + + // Windowed error fft + WindowData(fft, aec->eBuf); + aec_rdft_forward_128(fft); + StoreAsComplex(fft, efw); + + SmoothedPSD(aec, efw, dfw, xfw); + + // Subband coherence + for (i = 0; i < PART_LEN1; i++) { + cohde[i] = + (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) / + (aec->sd[i] * aec->se[i] + 1e-10f); + cohxd[i] = + (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) / + (aec->sx[i] * aec->sd[i] + 1e-10f); + } +} + +static void GetHighbandGain(const float* lambda, float* nlpGainHband) { + int i; + + nlpGainHband[0] = (float)0.0; + for (i = freqAvgIc; i < PART_LEN1 - 1; i++) { + nlpGainHband[0] += lambda[i]; + } + nlpGainHband[0] /= (float)(PART_LEN1 - 1 - freqAvgIc); +} + +static void ComfortNoise(AecCore* aec, + float efw[2][PART_LEN1], + complex_t* comfortNoiseHband, + const float* noisePow, + const float* lambda) { + int i, num; + float rand[PART_LEN]; + float noise, noiseAvg, tmp, tmpAvg; + int16_t randW16[PART_LEN]; + complex_t u[PART_LEN1]; + + const float pi2 = 6.28318530717959f; + + // Generate a uniform random array on [0 1] + WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed); + for (i = 0; i < PART_LEN; i++) { + rand[i] = ((float)randW16[i]) / 32768; + } + + // Reject LF noise + u[0][0] = 0; + u[0][1] = 0; + for (i = 1; i < PART_LEN1; i++) { + tmp = pi2 * rand[i - 1]; + + noise = sqrtf(noisePow[i]); + u[i][0] = noise * cosf(tmp); + u[i][1] = -noise * sinf(tmp); + } + u[PART_LEN][1] = 0; + + for (i = 0; i < PART_LEN1; i++) { + // This is the proper weighting to match the background noise power + tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0)); + // tmp = 1 - lambda[i]; + efw[0][i] += tmp * u[i][0]; + efw[1][i] += tmp * u[i][1]; + } + + // For H band comfort noise + // TODO: don't compute noise and "tmp" twice. Use the previous results. + noiseAvg = 0.0; + tmpAvg = 0.0; + num = 0; + if (aec->num_bands > 1 && flagHbandCn == 1) { + + // average noise scale + // average over second half of freq spectrum (i.e., 4->8khz) + // TODO: we shouldn't need num. We know how many elements we're summing. + for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { + num++; + noiseAvg += sqrtf(noisePow[i]); + } + noiseAvg /= (float)num; + + // average nlp scale + // average over second half of freq spectrum (i.e., 4->8khz) + // TODO: we shouldn't need num. We know how many elements we're summing. + num = 0; + for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { + num++; + tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0)); + } + tmpAvg /= (float)num; + + // Use average noise for H band + // TODO: we should probably have a new random vector here. + // Reject LF noise + u[0][0] = 0; + u[0][1] = 0; + for (i = 1; i < PART_LEN1; i++) { + tmp = pi2 * rand[i - 1]; + + // Use average noise for H band + u[i][0] = noiseAvg * (float)cos(tmp); + u[i][1] = -noiseAvg * (float)sin(tmp); + } + u[PART_LEN][1] = 0; + + for (i = 0; i < PART_LEN1; i++) { + // Use average NLP weight for H band + comfortNoiseHband[i][0] = tmpAvg * u[i][0]; + comfortNoiseHband[i][1] = tmpAvg * u[i][1]; + } + } +} + +static void InitLevel(PowerLevel* level) { + const float kBigFloat = 1E17f; + + level->averagelevel = 0; + level->framelevel = 0; + level->minlevel = kBigFloat; + level->frsum = 0; + level->sfrsum = 0; + level->frcounter = 0; + level->sfrcounter = 0; +} + +static void InitStats(Stats* stats) { + stats->instant = kOffsetLevel; + stats->average = kOffsetLevel; + stats->max = kOffsetLevel; + stats->min = kOffsetLevel * (-1); + stats->sum = 0; + stats->hisum = 0; + stats->himean = kOffsetLevel; + stats->counter = 0; + stats->hicounter = 0; +} + +static void InitMetrics(AecCore* self) { + self->stateCounter = 0; + InitLevel(&self->farlevel); + InitLevel(&self->nearlevel); + InitLevel(&self->linoutlevel); + InitLevel(&self->nlpoutlevel); + + InitStats(&self->erl); + InitStats(&self->erle); + InitStats(&self->aNlp); + InitStats(&self->rerl); +} + +static void UpdateLevel(PowerLevel* level, float in[2][PART_LEN1]) { + // Do the energy calculation in the frequency domain. The FFT is performed on + // a segment of PART_LEN2 samples due to overlap, but we only want the energy + // of half that data (the last PART_LEN samples). Parseval's relation states + // that the energy is preserved according to + // + // \sum_{n=0}^{N-1} |x(n)|^2 = 1/N * \sum_{n=0}^{N-1} |X(n)|^2 + // = ENERGY, + // + // where N = PART_LEN2. Since we are only interested in calculating the energy + // for the last PART_LEN samples we approximate by calculating ENERGY and + // divide by 2, + // + // \sum_{n=N/2}^{N-1} |x(n)|^2 ~= ENERGY / 2 + // + // Since we deal with real valued time domain signals we only store frequency + // bins [0, PART_LEN], which is what |in| consists of. To calculate ENERGY we + // need to add the contribution from the missing part in + // [PART_LEN+1, PART_LEN2-1]. These values are, up to a phase shift, identical + // with the values in [1, PART_LEN-1], hence multiply those values by 2. This + // is the values in the for loop below, but multiplication by 2 and division + // by 2 cancel. + + // TODO(bjornv): Investigate reusing energy calculations performed at other + // places in the code. + int k = 1; + // Imaginary parts are zero at end points and left out of the calculation. + float energy = (in[0][0] * in[0][0]) / 2; + energy += (in[0][PART_LEN] * in[0][PART_LEN]) / 2; + + for (k = 1; k < PART_LEN; k++) { + energy += (in[0][k] * in[0][k] + in[1][k] * in[1][k]); + } + energy /= PART_LEN2; + + level->sfrsum += energy; + level->sfrcounter++; + + if (level->sfrcounter > subCountLen) { + level->framelevel = level->sfrsum / (subCountLen * PART_LEN); + level->sfrsum = 0; + level->sfrcounter = 0; + if (level->framelevel > 0) { + if (level->framelevel < level->minlevel) { + level->minlevel = level->framelevel; // New minimum. + } else { + level->minlevel *= (1 + 0.001f); // Small increase. + } + } + level->frcounter++; + level->frsum += level->framelevel; + if (level->frcounter > countLen) { + level->averagelevel = level->frsum / countLen; + level->frsum = 0; + level->frcounter = 0; + } + } +} + +static void UpdateMetrics(AecCore* aec) { + float dtmp, dtmp2; + + const float actThresholdNoisy = 8.0f; + const float actThresholdClean = 40.0f; + const float safety = 0.99995f; + const float noisyPower = 300000.0f; + + float actThreshold; + float echo, suppressedEcho; + + if (aec->echoState) { // Check if echo is likely present + aec->stateCounter++; + } + + if (aec->farlevel.frcounter == 0) { + + if (aec->farlevel.minlevel < noisyPower) { + actThreshold = actThresholdClean; + } else { + actThreshold = actThresholdNoisy; + } + + if ((aec->stateCounter > (0.5f * countLen * subCountLen)) && + (aec->farlevel.sfrcounter == 0) + + // Estimate in active far-end segments only + && + (aec->farlevel.averagelevel > + (actThreshold * aec->farlevel.minlevel))) { + + // Subtract noise power + echo = aec->nearlevel.averagelevel - safety * aec->nearlevel.minlevel; + + // ERL + dtmp = 10 * (float)log10(aec->farlevel.averagelevel / + aec->nearlevel.averagelevel + + 1e-10f); + dtmp2 = 10 * (float)log10(aec->farlevel.averagelevel / echo + 1e-10f); + + aec->erl.instant = dtmp; + if (dtmp > aec->erl.max) { + aec->erl.max = dtmp; + } + + if (dtmp < aec->erl.min) { + aec->erl.min = dtmp; + } + + aec->erl.counter++; + aec->erl.sum += dtmp; + aec->erl.average = aec->erl.sum / aec->erl.counter; + + // Upper mean + if (dtmp > aec->erl.average) { + aec->erl.hicounter++; + aec->erl.hisum += dtmp; + aec->erl.himean = aec->erl.hisum / aec->erl.hicounter; + } + + // A_NLP + dtmp = 10 * (float)log10(aec->nearlevel.averagelevel / + (2 * aec->linoutlevel.averagelevel) + + 1e-10f); + + // subtract noise power + suppressedEcho = 2 * (aec->linoutlevel.averagelevel - + safety * aec->linoutlevel.minlevel); + + dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f); + + aec->aNlp.instant = dtmp2; + if (dtmp > aec->aNlp.max) { + aec->aNlp.max = dtmp; + } + + if (dtmp < aec->aNlp.min) { + aec->aNlp.min = dtmp; + } + + aec->aNlp.counter++; + aec->aNlp.sum += dtmp; + aec->aNlp.average = aec->aNlp.sum / aec->aNlp.counter; + + // Upper mean + if (dtmp > aec->aNlp.average) { + aec->aNlp.hicounter++; + aec->aNlp.hisum += dtmp; + aec->aNlp.himean = aec->aNlp.hisum / aec->aNlp.hicounter; + } + + // ERLE + + // subtract noise power + suppressedEcho = 2 * (aec->nlpoutlevel.averagelevel - + safety * aec->nlpoutlevel.minlevel); + + dtmp = 10 * (float)log10(aec->nearlevel.averagelevel / + (2 * aec->nlpoutlevel.averagelevel) + + 1e-10f); + dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f); + + dtmp = dtmp2; + aec->erle.instant = dtmp; + if (dtmp > aec->erle.max) { + aec->erle.max = dtmp; + } + + if (dtmp < aec->erle.min) { + aec->erle.min = dtmp; + } + + aec->erle.counter++; + aec->erle.sum += dtmp; + aec->erle.average = aec->erle.sum / aec->erle.counter; + + // Upper mean + if (dtmp > aec->erle.average) { + aec->erle.hicounter++; + aec->erle.hisum += dtmp; + aec->erle.himean = aec->erle.hisum / aec->erle.hicounter; + } + } + + aec->stateCounter = 0; + } +} + +static void UpdateDelayMetrics(AecCore* self) { + int i = 0; + int delay_values = 0; + int median = 0; + int lookahead = WebRtc_lookahead(self->delay_estimator); + const int kMsPerBlock = PART_LEN / (self->mult * 8); + int64_t l1_norm = 0; + + if (self->num_delay_values == 0) { + // We have no new delay value data. Even though -1 is a valid |median| in + // the sense that we allow negative values, it will practically never be + // used since multiples of |kMsPerBlock| will always be returned. + // We therefore use -1 to indicate in the logs that the delay estimator was + // not able to estimate the delay. + self->delay_median = -1; + self->delay_std = -1; + self->fraction_poor_delays = -1; + return; + } + + // Start value for median count down. + delay_values = self->num_delay_values >> 1; + // Get median of delay values since last update. + for (i = 0; i < kHistorySizeBlocks; i++) { + delay_values -= self->delay_histogram[i]; + if (delay_values < 0) { + median = i; + break; + } + } + // Account for lookahead. + self->delay_median = (median - lookahead) * kMsPerBlock; + + // Calculate the L1 norm, with median value as central moment. + for (i = 0; i < kHistorySizeBlocks; i++) { + l1_norm += abs(i - median) * self->delay_histogram[i]; + } + self->delay_std = (int)((l1_norm + self->num_delay_values / 2) / + self->num_delay_values) * kMsPerBlock; + + // Determine fraction of delays that are out of bounds, that is, either + // negative (anti-causal system) or larger than the AEC filter length. + { + int num_delays_out_of_bounds = self->num_delay_values; + const int histogram_length = sizeof(self->delay_histogram) / + sizeof(self->delay_histogram[0]); + for (i = lookahead; i < lookahead + self->num_partitions; ++i) { + if (i < histogram_length) + num_delays_out_of_bounds -= self->delay_histogram[i]; + } + self->fraction_poor_delays = (float)num_delays_out_of_bounds / + self->num_delay_values; + } + + // Reset histogram. + memset(self->delay_histogram, 0, sizeof(self->delay_histogram)); + self->num_delay_values = 0; + + return; +} + +static void TimeToFrequency(float time_data[PART_LEN2], + float freq_data[2][PART_LEN1], + int window) { + int i = 0; + + // TODO(bjornv): Should we have a different function/wrapper for windowed FFT? + if (window) { + for (i = 0; i < PART_LEN; i++) { + time_data[i] *= WebRtcAec_sqrtHanning[i]; + time_data[PART_LEN + i] *= WebRtcAec_sqrtHanning[PART_LEN - i]; + } + } + + aec_rdft_forward_128(time_data); + // Reorder. + freq_data[1][0] = 0; + freq_data[1][PART_LEN] = 0; + freq_data[0][0] = time_data[0]; + freq_data[0][PART_LEN] = time_data[1]; + for (i = 1; i < PART_LEN; i++) { + freq_data[0][i] = time_data[2 * i]; + freq_data[1][i] = time_data[2 * i + 1]; + } +} + +static int MoveFarReadPtrWithoutSystemDelayUpdate(AecCore* self, int elements) { + WebRtc_MoveReadPtr(self->far_buf_windowed, elements); +#ifdef WEBRTC_AEC_DEBUG_DUMP + WebRtc_MoveReadPtr(self->far_time_buf, elements); +#endif + return WebRtc_MoveReadPtr(self->far_buf, elements); +} + +static int SignalBasedDelayCorrection(AecCore* self) { + int delay_correction = 0; + int last_delay = -2; + assert(self != NULL); +#if !defined(WEBRTC_ANDROID) + // On desktops, turn on correction after |kDelayCorrectionStart| frames. This + // is to let the delay estimation get a chance to converge. Also, if the + // playout audio volume is low (or even muted) the delay estimation can return + // a very large delay, which will break the AEC if it is applied. + if (self->frame_count < kDelayCorrectionStart) { + return 0; + } +#endif + + // 1. Check for non-negative delay estimate. Note that the estimates we get + // from the delay estimation are not compensated for lookahead. Hence, a + // negative |last_delay| is an invalid one. + // 2. Verify that there is a delay change. In addition, only allow a change + // if the delay is outside a certain region taking the AEC filter length + // into account. + // TODO(bjornv): Investigate if we can remove the non-zero delay change check. + // 3. Only allow delay correction if the delay estimation quality exceeds + // |delay_quality_threshold|. + // 4. Finally, verify that the proposed |delay_correction| is feasible by + // comparing with the size of the far-end buffer. + last_delay = WebRtc_last_delay(self->delay_estimator); + if ((last_delay >= 0) && + (last_delay != self->previous_delay) && + (WebRtc_last_delay_quality(self->delay_estimator) > + self->delay_quality_threshold)) { + int delay = last_delay - WebRtc_lookahead(self->delay_estimator); + // Allow for a slack in the actual delay, defined by a |lower_bound| and an + // |upper_bound|. The adaptive echo cancellation filter is currently + // |num_partitions| (of 64 samples) long. If the delay estimate is negative + // or at least 3/4 of the filter length we open up for correction. + const int lower_bound = 0; + const int upper_bound = self->num_partitions * 3 / 4; + const int do_correction = delay <= lower_bound || delay > upper_bound; + if (do_correction == 1) { + int available_read = (int)WebRtc_available_read(self->far_buf); + // With |shift_offset| we gradually rely on the delay estimates. For + // positive delays we reduce the correction by |shift_offset| to lower the + // risk of pushing the AEC into a non causal state. For negative delays + // we rely on the values up to a rounding error, hence compensate by 1 + // element to make sure to push the delay into the causal region. + delay_correction = -delay; + delay_correction += delay > self->shift_offset ? self->shift_offset : 1; + self->shift_offset--; + self->shift_offset = (self->shift_offset <= 1 ? 1 : self->shift_offset); + if (delay_correction > available_read - self->mult - 1) { + // There is not enough data in the buffer to perform this shift. Hence, + // we do not rely on the delay estimate and do nothing. + delay_correction = 0; + } else { + self->previous_delay = last_delay; + ++self->delay_correction_count; + } + } + } + // Update the |delay_quality_threshold| once we have our first delay + // correction. + if (self->delay_correction_count > 0) { + float delay_quality = WebRtc_last_delay_quality(self->delay_estimator); + delay_quality = (delay_quality > kDelayQualityThresholdMax ? + kDelayQualityThresholdMax : delay_quality); + self->delay_quality_threshold = + (delay_quality > self->delay_quality_threshold ? delay_quality : + self->delay_quality_threshold); + } + return delay_correction; +} + +static void NonLinearProcessing(AecCore* aec, + float* output, + float* const* outputH) { + float efw[2][PART_LEN1], xfw[2][PART_LEN1]; + complex_t comfortNoiseHband[PART_LEN1]; + float fft[PART_LEN2]; + float scale, dtmp; + float nlpGainHband; + int i; + size_t j; + + // Coherence and non-linear filter + float cohde[PART_LEN1], cohxd[PART_LEN1]; + float hNlDeAvg, hNlXdAvg; + float hNl[PART_LEN1]; + float hNlPref[kPrefBandSize]; + float hNlFb = 0, hNlFbLow = 0; + const float prefBandQuant = 0.75f, prefBandQuantLow = 0.5f; + const int prefBandSize = kPrefBandSize / aec->mult; + const int minPrefBand = 4 / aec->mult; + // Power estimate smoothing coefficients. + const float* min_overdrive = aec->extended_filter_enabled + ? kExtendedMinOverDrive + : kNormalMinOverDrive; + + // Filter energy + const int delayEstInterval = 10 * aec->mult; + + float* xfw_ptr = NULL; + + aec->delayEstCtr++; + if (aec->delayEstCtr == delayEstInterval) { + aec->delayEstCtr = 0; + } + + // initialize comfort noise for H band + memset(comfortNoiseHband, 0, sizeof(comfortNoiseHband)); + nlpGainHband = (float)0.0; + dtmp = (float)0.0; + + // We should always have at least one element stored in |far_buf|. + assert(WebRtc_available_read(aec->far_buf_windowed) > 0); + // NLP + WebRtc_ReadBuffer(aec->far_buf_windowed, (void**)&xfw_ptr, &xfw[0][0], 1); + + // TODO(bjornv): Investigate if we can reuse |far_buf_windowed| instead of + // |xfwBuf|. + // Buffer far. + memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1); + + WebRtcAec_SubbandCoherence(aec, efw, xfw, fft, cohde, cohxd); + + hNlXdAvg = 0; + for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) { + hNlXdAvg += cohxd[i]; + } + hNlXdAvg /= prefBandSize; + hNlXdAvg = 1 - hNlXdAvg; + + hNlDeAvg = 0; + for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) { + hNlDeAvg += cohde[i]; + } + hNlDeAvg /= prefBandSize; + + if (hNlXdAvg < 0.75f && hNlXdAvg < aec->hNlXdAvgMin) { + aec->hNlXdAvgMin = hNlXdAvg; + } + + if (hNlDeAvg > 0.98f && hNlXdAvg > 0.9f) { + aec->stNearState = 1; + } else if (hNlDeAvg < 0.95f || hNlXdAvg < 0.8f) { + aec->stNearState = 0; + } + + if (aec->hNlXdAvgMin == 1) { + aec->echoState = 0; + aec->overDrive = min_overdrive[aec->nlp_mode]; + + if (aec->stNearState == 1) { + memcpy(hNl, cohde, sizeof(hNl)); + hNlFb = hNlDeAvg; + hNlFbLow = hNlDeAvg; + } else { + for (i = 0; i < PART_LEN1; i++) { + hNl[i] = 1 - cohxd[i]; + } + hNlFb = hNlXdAvg; + hNlFbLow = hNlXdAvg; + } + } else { + + if (aec->stNearState == 1) { + aec->echoState = 0; + memcpy(hNl, cohde, sizeof(hNl)); + hNlFb = hNlDeAvg; + hNlFbLow = hNlDeAvg; + } else { + aec->echoState = 1; + for (i = 0; i < PART_LEN1; i++) { + hNl[i] = WEBRTC_SPL_MIN(cohde[i], 1 - cohxd[i]); + } + + // Select an order statistic from the preferred bands. + // TODO: Using quicksort now, but a selection algorithm may be preferred. + memcpy(hNlPref, &hNl[minPrefBand], sizeof(float) * prefBandSize); + qsort(hNlPref, prefBandSize, sizeof(float), CmpFloat); + hNlFb = hNlPref[(int)floor(prefBandQuant * (prefBandSize - 1))]; + hNlFbLow = hNlPref[(int)floor(prefBandQuantLow * (prefBandSize - 1))]; + } + } + + // Track the local filter minimum to determine suppression overdrive. + if (hNlFbLow < 0.6f && hNlFbLow < aec->hNlFbLocalMin) { + aec->hNlFbLocalMin = hNlFbLow; + aec->hNlFbMin = hNlFbLow; + aec->hNlNewMin = 1; + aec->hNlMinCtr = 0; + } + aec->hNlFbLocalMin = + WEBRTC_SPL_MIN(aec->hNlFbLocalMin + 0.0008f / aec->mult, 1); + aec->hNlXdAvgMin = WEBRTC_SPL_MIN(aec->hNlXdAvgMin + 0.0006f / aec->mult, 1); + + if (aec->hNlNewMin == 1) { + aec->hNlMinCtr++; + } + if (aec->hNlMinCtr == 2) { + aec->hNlNewMin = 0; + aec->hNlMinCtr = 0; + aec->overDrive = + WEBRTC_SPL_MAX(kTargetSupp[aec->nlp_mode] / + ((float)log(aec->hNlFbMin + 1e-10f) + 1e-10f), + min_overdrive[aec->nlp_mode]); + } + + // Smooth the overdrive. + if (aec->overDrive < aec->overDriveSm) { + aec->overDriveSm = 0.99f * aec->overDriveSm + 0.01f * aec->overDrive; + } else { + aec->overDriveSm = 0.9f * aec->overDriveSm + 0.1f * aec->overDrive; + } + + WebRtcAec_OverdriveAndSuppress(aec, hNl, hNlFb, efw); + + // Add comfort noise. + WebRtcAec_ComfortNoise(aec, efw, comfortNoiseHband, aec->noisePow, hNl); + + // TODO(bjornv): Investigate how to take the windowing below into account if + // needed. + if (aec->metricsMode == 1) { + // Note that we have a scaling by two in the time domain |eBuf|. + // In addition the time domain signal is windowed before transformation, + // losing half the energy on the average. We take care of the first + // scaling only in UpdateMetrics(). + UpdateLevel(&aec->nlpoutlevel, efw); + } + // Inverse error fft. + fft[0] = efw[0][0]; + fft[1] = efw[0][PART_LEN]; + for (i = 1; i < PART_LEN; i++) { + fft[2 * i] = efw[0][i]; + // Sign change required by Ooura fft. + fft[2 * i + 1] = -efw[1][i]; + } + aec_rdft_inverse_128(fft); + + // Overlap and add to obtain output. + scale = 2.0f / PART_LEN2; + for (i = 0; i < PART_LEN; i++) { + fft[i] *= scale; // fft scaling + fft[i] = fft[i] * WebRtcAec_sqrtHanning[i] + aec->outBuf[i]; + + fft[PART_LEN + i] *= scale; // fft scaling + aec->outBuf[i] = fft[PART_LEN + i] * WebRtcAec_sqrtHanning[PART_LEN - i]; + + // Saturate output to keep it in the allowed range. + output[i] = WEBRTC_SPL_SAT( + WEBRTC_SPL_WORD16_MAX, fft[i], WEBRTC_SPL_WORD16_MIN); + } + + // For H band + if (aec->num_bands > 1) { + + // H band gain + // average nlp over low band: average over second half of freq spectrum + // (4->8khz) + GetHighbandGain(hNl, &nlpGainHband); + + // Inverse comfort_noise + if (flagHbandCn == 1) { + fft[0] = comfortNoiseHband[0][0]; + fft[1] = comfortNoiseHband[PART_LEN][0]; + for (i = 1; i < PART_LEN; i++) { + fft[2 * i] = comfortNoiseHband[i][0]; + fft[2 * i + 1] = comfortNoiseHband[i][1]; + } + aec_rdft_inverse_128(fft); + scale = 2.0f / PART_LEN2; + } + + // compute gain factor + for (j = 0; j < aec->num_bands - 1; ++j) { + for (i = 0; i < PART_LEN; i++) { + dtmp = aec->dBufH[j][i]; + dtmp = dtmp * nlpGainHband; // for variable gain + + // add some comfort noise where Hband is attenuated + if (flagHbandCn == 1 && j == 0) { + fft[i] *= scale; // fft scaling + dtmp += cnScaleHband * fft[i]; + } + + // Saturate output to keep it in the allowed range. + outputH[j][i] = WEBRTC_SPL_SAT( + WEBRTC_SPL_WORD16_MAX, dtmp, WEBRTC_SPL_WORD16_MIN); + } + } + } + + // Copy the current block to the old position. + memcpy(aec->dBuf, aec->dBuf + PART_LEN, sizeof(float) * PART_LEN); + memcpy(aec->eBuf, aec->eBuf + PART_LEN, sizeof(float) * PART_LEN); + + // Copy the current block to the old position for H band + for (j = 0; j < aec->num_bands - 1; ++j) { + memcpy(aec->dBufH[j], aec->dBufH[j] + PART_LEN, sizeof(float) * PART_LEN); + } + + memmove(aec->xfwBuf + PART_LEN1, + aec->xfwBuf, + sizeof(aec->xfwBuf) - sizeof(complex_t) * PART_LEN1); +} + +static void ProcessBlock(AecCore* aec) { + size_t i; + float y[PART_LEN], e[PART_LEN]; + float scale; + + float fft[PART_LEN2]; + float xf[2][PART_LEN1], yf[2][PART_LEN1], ef[2][PART_LEN1]; + float df[2][PART_LEN1]; + float far_spectrum = 0.0f; + float near_spectrum = 0.0f; + float abs_far_spectrum[PART_LEN1]; + float abs_near_spectrum[PART_LEN1]; + + const float gPow[2] = {0.9f, 0.1f}; + + // Noise estimate constants. + const int noiseInitBlocks = 500 * aec->mult; + const float step = 0.1f; + const float ramp = 1.0002f; + const float gInitNoise[2] = {0.999f, 0.001f}; + + float nearend[PART_LEN]; + float* nearend_ptr = NULL; + float output[PART_LEN]; + float outputH[NUM_HIGH_BANDS_MAX][PART_LEN]; + float* outputH_ptr[NUM_HIGH_BANDS_MAX]; + for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) { + outputH_ptr[i] = outputH[i]; + } + + float* xf_ptr = NULL; + + // Concatenate old and new nearend blocks. + for (i = 0; i < aec->num_bands - 1; ++i) { + WebRtc_ReadBuffer(aec->nearFrBufH[i], + (void**)&nearend_ptr, + nearend, + PART_LEN); + memcpy(aec->dBufH[i] + PART_LEN, nearend_ptr, sizeof(nearend)); + } + WebRtc_ReadBuffer(aec->nearFrBuf, (void**)&nearend_ptr, nearend, PART_LEN); + memcpy(aec->dBuf + PART_LEN, nearend_ptr, sizeof(nearend)); + + // ---------- Ooura fft ---------- + +#ifdef WEBRTC_AEC_DEBUG_DUMP + { + float farend[PART_LEN]; + float* farend_ptr = NULL; + WebRtc_ReadBuffer(aec->far_time_buf, (void**)&farend_ptr, farend, 1); + RTC_AEC_DEBUG_WAV_WRITE(aec->farFile, farend_ptr, PART_LEN); + RTC_AEC_DEBUG_WAV_WRITE(aec->nearFile, nearend_ptr, PART_LEN); + } +#endif + + // We should always have at least one element stored in |far_buf|. + assert(WebRtc_available_read(aec->far_buf) > 0); + WebRtc_ReadBuffer(aec->far_buf, (void**)&xf_ptr, &xf[0][0], 1); + + // Near fft + memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2); + TimeToFrequency(fft, df, 0); + + // Power smoothing + for (i = 0; i < PART_LEN1; i++) { + far_spectrum = (xf_ptr[i] * xf_ptr[i]) + + (xf_ptr[PART_LEN1 + i] * xf_ptr[PART_LEN1 + i]); + aec->xPow[i] = + gPow[0] * aec->xPow[i] + gPow[1] * aec->num_partitions * far_spectrum; + // Calculate absolute spectra + abs_far_spectrum[i] = sqrtf(far_spectrum); + + near_spectrum = df[0][i] * df[0][i] + df[1][i] * df[1][i]; + aec->dPow[i] = gPow[0] * aec->dPow[i] + gPow[1] * near_spectrum; + // Calculate absolute spectra + abs_near_spectrum[i] = sqrtf(near_spectrum); + } + + // Estimate noise power. Wait until dPow is more stable. + if (aec->noiseEstCtr > 50) { + for (i = 0; i < PART_LEN1; i++) { + if (aec->dPow[i] < aec->dMinPow[i]) { + aec->dMinPow[i] = + (aec->dPow[i] + step * (aec->dMinPow[i] - aec->dPow[i])) * ramp; + } else { + aec->dMinPow[i] *= ramp; + } + } + } + + // Smooth increasing noise power from zero at the start, + // to avoid a sudden burst of comfort noise. + if (aec->noiseEstCtr < noiseInitBlocks) { + aec->noiseEstCtr++; + for (i = 0; i < PART_LEN1; i++) { + if (aec->dMinPow[i] > aec->dInitMinPow[i]) { + aec->dInitMinPow[i] = gInitNoise[0] * aec->dInitMinPow[i] + + gInitNoise[1] * aec->dMinPow[i]; + } else { + aec->dInitMinPow[i] = aec->dMinPow[i]; + } + } + aec->noisePow = aec->dInitMinPow; + } else { + aec->noisePow = aec->dMinPow; + } + + // Block wise delay estimation used for logging + if (aec->delay_logging_enabled) { + if (WebRtc_AddFarSpectrumFloat( + aec->delay_estimator_farend, abs_far_spectrum, PART_LEN1) == 0) { + int delay_estimate = WebRtc_DelayEstimatorProcessFloat( + aec->delay_estimator, abs_near_spectrum, PART_LEN1); + if (delay_estimate >= 0) { + // Update delay estimate buffer. + aec->delay_histogram[delay_estimate]++; + aec->num_delay_values++; + } + if (aec->delay_metrics_delivered == 1 && + aec->num_delay_values >= kDelayMetricsAggregationWindow) { + UpdateDelayMetrics(aec); + } + } + } + + // Update the xfBuf block position. + aec->xfBufBlockPos--; + if (aec->xfBufBlockPos == -1) { + aec->xfBufBlockPos = aec->num_partitions - 1; + } + + // Buffer xf + memcpy(aec->xfBuf[0] + aec->xfBufBlockPos * PART_LEN1, + xf_ptr, + sizeof(float) * PART_LEN1); + memcpy(aec->xfBuf[1] + aec->xfBufBlockPos * PART_LEN1, + &xf_ptr[PART_LEN1], + sizeof(float) * PART_LEN1); + + memset(yf, 0, sizeof(yf)); + + // Filter far + WebRtcAec_FilterFar(aec, yf); + + // Inverse fft to obtain echo estimate and error. + fft[0] = yf[0][0]; + fft[1] = yf[0][PART_LEN]; + for (i = 1; i < PART_LEN; i++) { + fft[2 * i] = yf[0][i]; + fft[2 * i + 1] = yf[1][i]; + } + aec_rdft_inverse_128(fft); + + scale = 2.0f / PART_LEN2; + for (i = 0; i < PART_LEN; i++) { + y[i] = fft[PART_LEN + i] * scale; // fft scaling + } + + for (i = 0; i < PART_LEN; i++) { + e[i] = nearend_ptr[i] - y[i]; + } + + // Error fft + memcpy(aec->eBuf + PART_LEN, e, sizeof(float) * PART_LEN); + memset(fft, 0, sizeof(float) * PART_LEN); + memcpy(fft + PART_LEN, e, sizeof(float) * PART_LEN); + // TODO(bjornv): Change to use TimeToFrequency(). + aec_rdft_forward_128(fft); + + ef[1][0] = 0; + ef[1][PART_LEN] = 0; + ef[0][0] = fft[0]; + ef[0][PART_LEN] = fft[1]; + for (i = 1; i < PART_LEN; i++) { + ef[0][i] = fft[2 * i]; + ef[1][i] = fft[2 * i + 1]; + } + + RTC_AEC_DEBUG_RAW_WRITE(aec->e_fft_file, + &ef[0][0], + sizeof(ef[0][0]) * PART_LEN1 * 2); + + if (aec->metricsMode == 1) { + // Note that the first PART_LEN samples in fft (before transformation) are + // zero. Hence, the scaling by two in UpdateLevel() should not be + // performed. That scaling is taken care of in UpdateMetrics() instead. + UpdateLevel(&aec->linoutlevel, ef); + } + + // Scale error signal inversely with far power. + WebRtcAec_ScaleErrorSignal(aec, ef); + WebRtcAec_FilterAdaptation(aec, fft, ef); + NonLinearProcessing(aec, output, outputH_ptr); + + if (aec->metricsMode == 1) { + // Update power levels and echo metrics + UpdateLevel(&aec->farlevel, (float(*)[PART_LEN1])xf_ptr); + UpdateLevel(&aec->nearlevel, df); + UpdateMetrics(aec); + } + + // Store the output block. + WebRtc_WriteBuffer(aec->outFrBuf, output, PART_LEN); + // For high bands + for (i = 0; i < aec->num_bands - 1; ++i) { + WebRtc_WriteBuffer(aec->outFrBufH[i], outputH[i], PART_LEN); + } + + RTC_AEC_DEBUG_WAV_WRITE(aec->outLinearFile, e, PART_LEN); + RTC_AEC_DEBUG_WAV_WRITE(aec->outFile, output, PART_LEN); +} + +AecCore* WebRtcAec_CreateAec() { + int i; + AecCore* aec = malloc(sizeof(AecCore)); + if (!aec) { + return NULL; + } + + aec->nearFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(float)); + if (!aec->nearFrBuf) { + WebRtcAec_FreeAec(aec); + return NULL; + } + + aec->outFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(float)); + if (!aec->outFrBuf) { + WebRtcAec_FreeAec(aec); + return NULL; + } + + for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) { + aec->nearFrBufH[i] = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, + sizeof(float)); + if (!aec->nearFrBufH[i]) { + WebRtcAec_FreeAec(aec); + return NULL; + } + aec->outFrBufH[i] = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, + sizeof(float)); + if (!aec->outFrBufH[i]) { + WebRtcAec_FreeAec(aec); + return NULL; + } + } + + // Create far-end buffers. + aec->far_buf = + WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * 2 * PART_LEN1); + if (!aec->far_buf) { + WebRtcAec_FreeAec(aec); + return NULL; + } + aec->far_buf_windowed = + WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * 2 * PART_LEN1); + if (!aec->far_buf_windowed) { + WebRtcAec_FreeAec(aec); + return NULL; + } +#ifdef WEBRTC_AEC_DEBUG_DUMP + aec->instance_index = webrtc_aec_instance_count; + aec->far_time_buf = + WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * PART_LEN); + if (!aec->far_time_buf) { + WebRtcAec_FreeAec(aec); + return NULL; + } + aec->farFile = aec->nearFile = aec->outFile = aec->outLinearFile = NULL; + aec->debug_dump_count = 0; +#endif + aec->delay_estimator_farend = + WebRtc_CreateDelayEstimatorFarend(PART_LEN1, kHistorySizeBlocks); + if (aec->delay_estimator_farend == NULL) { + WebRtcAec_FreeAec(aec); + return NULL; + } + // We create the delay_estimator with the same amount of maximum lookahead as + // the delay history size (kHistorySizeBlocks) for symmetry reasons. + aec->delay_estimator = WebRtc_CreateDelayEstimator( + aec->delay_estimator_farend, kHistorySizeBlocks); + if (aec->delay_estimator == NULL) { + WebRtcAec_FreeAec(aec); + return NULL; + } +#ifdef WEBRTC_ANDROID + aec->delay_agnostic_enabled = 1; // DA-AEC enabled by default. + // DA-AEC assumes the system is causal from the beginning and will self adjust + // the lookahead when shifting is required. + WebRtc_set_lookahead(aec->delay_estimator, 0); +#else + aec->delay_agnostic_enabled = 0; + WebRtc_set_lookahead(aec->delay_estimator, kLookaheadBlocks); +#endif + aec->extended_filter_enabled = 0; + + // Assembly optimization + WebRtcAec_FilterFar = FilterFar; + WebRtcAec_ScaleErrorSignal = ScaleErrorSignal; + WebRtcAec_FilterAdaptation = FilterAdaptation; + WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress; + WebRtcAec_ComfortNoise = ComfortNoise; + WebRtcAec_SubbandCoherence = SubbandCoherence; + +#if defined(WEBRTC_ARCH_X86_FAMILY) + if (WebRtc_GetCPUInfo(kSSE2)) { + WebRtcAec_InitAec_SSE2(); + } +#endif + +#if defined(MIPS_FPU_LE) + WebRtcAec_InitAec_mips(); +#endif + +#if defined(WEBRTC_HAS_NEON) + WebRtcAec_InitAec_neon(); +#elif defined(WEBRTC_DETECT_NEON) + if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) { + WebRtcAec_InitAec_neon(); + } +#endif + + aec_rdft_init(); + + return aec; +} + +void WebRtcAec_FreeAec(AecCore* aec) { + int i; + if (aec == NULL) { + return; + } + + WebRtc_FreeBuffer(aec->nearFrBuf); + WebRtc_FreeBuffer(aec->outFrBuf); + + for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) { + WebRtc_FreeBuffer(aec->nearFrBufH[i]); + WebRtc_FreeBuffer(aec->outFrBufH[i]); + } + + WebRtc_FreeBuffer(aec->far_buf); + WebRtc_FreeBuffer(aec->far_buf_windowed); +#ifdef WEBRTC_AEC_DEBUG_DUMP + WebRtc_FreeBuffer(aec->far_time_buf); +#endif + RTC_AEC_DEBUG_WAV_CLOSE(aec->farFile); + RTC_AEC_DEBUG_WAV_CLOSE(aec->nearFile); + RTC_AEC_DEBUG_WAV_CLOSE(aec->outFile); + RTC_AEC_DEBUG_WAV_CLOSE(aec->outLinearFile); + RTC_AEC_DEBUG_RAW_CLOSE(aec->e_fft_file); + + WebRtc_FreeDelayEstimator(aec->delay_estimator); + WebRtc_FreeDelayEstimatorFarend(aec->delay_estimator_farend); + + free(aec); +} + +int WebRtcAec_InitAec(AecCore* aec, int sampFreq) { + int i; + + aec->sampFreq = sampFreq; + + if (sampFreq == 8000) { + aec->normal_mu = 0.6f; + aec->normal_error_threshold = 2e-6f; + aec->num_bands = 1; + } else { + aec->normal_mu = 0.5f; + aec->normal_error_threshold = 1.5e-6f; + aec->num_bands = (size_t)(sampFreq / 16000); + } + + WebRtc_InitBuffer(aec->nearFrBuf); + WebRtc_InitBuffer(aec->outFrBuf); + for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) { + WebRtc_InitBuffer(aec->nearFrBufH[i]); + WebRtc_InitBuffer(aec->outFrBufH[i]); + } + + // Initialize far-end buffers. + WebRtc_InitBuffer(aec->far_buf); + WebRtc_InitBuffer(aec->far_buf_windowed); +#ifdef WEBRTC_AEC_DEBUG_DUMP + WebRtc_InitBuffer(aec->far_time_buf); + { + int process_rate = sampFreq > 16000 ? 16000 : sampFreq; + RTC_AEC_DEBUG_WAV_REOPEN("aec_far", aec->instance_index, + aec->debug_dump_count, process_rate, + &aec->farFile ); + RTC_AEC_DEBUG_WAV_REOPEN("aec_near", aec->instance_index, + aec->debug_dump_count, process_rate, + &aec->nearFile); + RTC_AEC_DEBUG_WAV_REOPEN("aec_out", aec->instance_index, + aec->debug_dump_count, process_rate, + &aec->outFile ); + RTC_AEC_DEBUG_WAV_REOPEN("aec_out_linear", aec->instance_index, + aec->debug_dump_count, process_rate, + &aec->outLinearFile); + } + + RTC_AEC_DEBUG_RAW_OPEN("aec_e_fft", + aec->debug_dump_count, + &aec->e_fft_file); + + ++aec->debug_dump_count; +#endif + aec->system_delay = 0; + + if (WebRtc_InitDelayEstimatorFarend(aec->delay_estimator_farend) != 0) { + return -1; + } + if (WebRtc_InitDelayEstimator(aec->delay_estimator) != 0) { + return -1; + } + aec->delay_logging_enabled = 0; + aec->delay_metrics_delivered = 0; + memset(aec->delay_histogram, 0, sizeof(aec->delay_histogram)); + aec->num_delay_values = 0; + aec->delay_median = -1; + aec->delay_std = -1; + aec->fraction_poor_delays = -1.0f; + + aec->signal_delay_correction = 0; + aec->previous_delay = -2; // (-2): Uninitialized. + aec->delay_correction_count = 0; + aec->shift_offset = kInitialShiftOffset; + aec->delay_quality_threshold = kDelayQualityThresholdMin; + + aec->num_partitions = kNormalNumPartitions; + + // Update the delay estimator with filter length. We use half the + // |num_partitions| to take the echo path into account. In practice we say + // that the echo has a duration of maximum half |num_partitions|, which is not + // true, but serves as a crude measure. + WebRtc_set_allowed_offset(aec->delay_estimator, aec->num_partitions / 2); + // TODO(bjornv): I currently hard coded the enable. Once we've established + // that AECM has no performance regression, robust_validation will be enabled + // all the time and the APIs to turn it on/off will be removed. Hence, remove + // this line then. + WebRtc_enable_robust_validation(aec->delay_estimator, 1); + aec->frame_count = 0; + + // Default target suppression mode. + aec->nlp_mode = 1; + + // Sampling frequency multiplier w.r.t. 8 kHz. + // In case of multiple bands we process the lower band in 16 kHz, hence the + // multiplier is always 2. + if (aec->num_bands > 1) { + aec->mult = 2; + } else { + aec->mult = (short)aec->sampFreq / 8000; + } + + aec->farBufWritePos = 0; + aec->farBufReadPos = 0; + + aec->inSamples = 0; + aec->outSamples = 0; + aec->knownDelay = 0; + + // Initialize buffers + memset(aec->dBuf, 0, sizeof(aec->dBuf)); + memset(aec->eBuf, 0, sizeof(aec->eBuf)); + // For H bands + for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) { + memset(aec->dBufH[i], 0, sizeof(aec->dBufH[i])); + } + + memset(aec->xPow, 0, sizeof(aec->xPow)); + memset(aec->dPow, 0, sizeof(aec->dPow)); + memset(aec->dInitMinPow, 0, sizeof(aec->dInitMinPow)); + aec->noisePow = aec->dInitMinPow; + aec->noiseEstCtr = 0; + + // Initial comfort noise power + for (i = 0; i < PART_LEN1; i++) { + aec->dMinPow[i] = 1.0e6f; + } + + // Holds the last block written to + aec->xfBufBlockPos = 0; + // TODO: Investigate need for these initializations. Deleting them doesn't + // change the output at all and yields 0.4% overall speedup. + memset(aec->xfBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1); + memset(aec->wfBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1); + memset(aec->sde, 0, sizeof(complex_t) * PART_LEN1); + memset(aec->sxd, 0, sizeof(complex_t) * PART_LEN1); + memset( + aec->xfwBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1); + memset(aec->se, 0, sizeof(float) * PART_LEN1); + + // To prevent numerical instability in the first block. + for (i = 0; i < PART_LEN1; i++) { + aec->sd[i] = 1; + } + for (i = 0; i < PART_LEN1; i++) { + aec->sx[i] = 1; + } + + memset(aec->hNs, 0, sizeof(aec->hNs)); + memset(aec->outBuf, 0, sizeof(float) * PART_LEN); + + aec->hNlFbMin = 1; + aec->hNlFbLocalMin = 1; + aec->hNlXdAvgMin = 1; + aec->hNlNewMin = 0; + aec->hNlMinCtr = 0; + aec->overDrive = 2; + aec->overDriveSm = 2; + aec->delayIdx = 0; + aec->stNearState = 0; + aec->echoState = 0; + aec->divergeState = 0; + + aec->seed = 777; + aec->delayEstCtr = 0; + + // Metrics disabled by default + aec->metricsMode = 0; + InitMetrics(aec); + + return 0; +} + +void WebRtcAec_BufferFarendPartition(AecCore* aec, const float* farend) { + float fft[PART_LEN2]; + float xf[2][PART_LEN1]; + + // Check if the buffer is full, and in that case flush the oldest data. + if (WebRtc_available_write(aec->far_buf) < 1) { + WebRtcAec_MoveFarReadPtr(aec, 1); + } + // Convert far-end partition to the frequency domain without windowing. + memcpy(fft, farend, sizeof(float) * PART_LEN2); + TimeToFrequency(fft, xf, 0); + WebRtc_WriteBuffer(aec->far_buf, &xf[0][0], 1); + + // Convert far-end partition to the frequency domain with windowing. + memcpy(fft, farend, sizeof(float) * PART_LEN2); + TimeToFrequency(fft, xf, 1); + WebRtc_WriteBuffer(aec->far_buf_windowed, &xf[0][0], 1); +} + +int WebRtcAec_MoveFarReadPtr(AecCore* aec, int elements) { + int elements_moved = MoveFarReadPtrWithoutSystemDelayUpdate(aec, elements); + aec->system_delay -= elements_moved * PART_LEN; + return elements_moved; +} + +void WebRtcAec_ProcessFrames(AecCore* aec, + const float* const* nearend, + size_t num_bands, + size_t num_samples, + int knownDelay, + float* const* out) { + size_t i, j; + int out_elements = 0; + + aec->frame_count++; + // For each frame the process is as follows: + // 1) If the system_delay indicates on being too small for processing a + // frame we stuff the buffer with enough data for 10 ms. + // 2 a) Adjust the buffer to the system delay, by moving the read pointer. + // b) Apply signal based delay correction, if we have detected poor AEC + // performance. + // 3) TODO(bjornv): Investigate if we need to add this: + // If we can't move read pointer due to buffer size limitations we + // flush/stuff the buffer. + // 4) Process as many partitions as possible. + // 5) Update the |system_delay| with respect to a full frame of FRAME_LEN + // samples. Even though we will have data left to process (we work with + // partitions) we consider updating a whole frame, since that's the + // amount of data we input and output in audio_processing. + // 6) Update the outputs. + + // The AEC has two different delay estimation algorithms built in. The + // first relies on delay input values from the user and the amount of + // shifted buffer elements is controlled by |knownDelay|. This delay will + // give a guess on how much we need to shift far-end buffers to align with + // the near-end signal. The other delay estimation algorithm uses the + // far- and near-end signals to find the offset between them. This one + // (called "signal delay") is then used to fine tune the alignment, or + // simply compensate for errors in the system based one. + // Note that the two algorithms operate independently. Currently, we only + // allow one algorithm to be turned on. + + assert(aec->num_bands == num_bands); + + for (j = 0; j < num_samples; j+= FRAME_LEN) { + // TODO(bjornv): Change the near-end buffer handling to be the same as for + // far-end, that is, with a near_pre_buf. + // Buffer the near-end frame. + WebRtc_WriteBuffer(aec->nearFrBuf, &nearend[0][j], FRAME_LEN); + // For H band + for (i = 1; i < num_bands; ++i) { + WebRtc_WriteBuffer(aec->nearFrBufH[i - 1], &nearend[i][j], FRAME_LEN); + } + + // 1) At most we process |aec->mult|+1 partitions in 10 ms. Make sure we + // have enough far-end data for that by stuffing the buffer if the + // |system_delay| indicates others. + if (aec->system_delay < FRAME_LEN) { + // We don't have enough data so we rewind 10 ms. + WebRtcAec_MoveFarReadPtr(aec, -(aec->mult + 1)); + } + + if (!aec->delay_agnostic_enabled) { + // 2 a) Compensate for a possible change in the system delay. + + // TODO(bjornv): Investigate how we should round the delay difference; + // right now we know that incoming |knownDelay| is underestimated when + // it's less than |aec->knownDelay|. We therefore, round (-32) in that + // direction. In the other direction, we don't have this situation, but + // might flush one partition too little. This can cause non-causality, + // which should be investigated. Maybe, allow for a non-symmetric + // rounding, like -16. + int move_elements = (aec->knownDelay - knownDelay - 32) / PART_LEN; + int moved_elements = + MoveFarReadPtrWithoutSystemDelayUpdate(aec, move_elements); + aec->knownDelay -= moved_elements * PART_LEN; + } else { + // 2 b) Apply signal based delay correction. + int move_elements = SignalBasedDelayCorrection(aec); + int moved_elements = + MoveFarReadPtrWithoutSystemDelayUpdate(aec, move_elements); + int far_near_buffer_diff = WebRtc_available_read(aec->far_buf) - + WebRtc_available_read(aec->nearFrBuf) / PART_LEN; + WebRtc_SoftResetDelayEstimator(aec->delay_estimator, moved_elements); + WebRtc_SoftResetDelayEstimatorFarend(aec->delay_estimator_farend, + moved_elements); + aec->signal_delay_correction += moved_elements; + // If we rely on reported system delay values only, a buffer underrun here + // can never occur since we've taken care of that in 1) above. Here, we + // apply signal based delay correction and can therefore end up with + // buffer underruns since the delay estimation can be wrong. We therefore + // stuff the buffer with enough elements if needed. + if (far_near_buffer_diff < 0) { + WebRtcAec_MoveFarReadPtr(aec, far_near_buffer_diff); + } + } + + // 4) Process as many blocks as possible. + while (WebRtc_available_read(aec->nearFrBuf) >= PART_LEN) { + ProcessBlock(aec); + } + + // 5) Update system delay with respect to the entire frame. + aec->system_delay -= FRAME_LEN; + + // 6) Update output frame. + // Stuff the out buffer if we have less than a frame to output. + // This should only happen for the first frame. + out_elements = (int)WebRtc_available_read(aec->outFrBuf); + if (out_elements < FRAME_LEN) { + WebRtc_MoveReadPtr(aec->outFrBuf, out_elements - FRAME_LEN); + for (i = 0; i < num_bands - 1; ++i) { + WebRtc_MoveReadPtr(aec->outFrBufH[i], out_elements - FRAME_LEN); + } + } + // Obtain an output frame. + WebRtc_ReadBuffer(aec->outFrBuf, NULL, &out[0][j], FRAME_LEN); + // For H bands. + for (i = 1; i < num_bands; ++i) { + WebRtc_ReadBuffer(aec->outFrBufH[i - 1], NULL, &out[i][j], FRAME_LEN); + } + } +} + +int WebRtcAec_GetDelayMetricsCore(AecCore* self, int* median, int* std, + float* fraction_poor_delays) { + assert(self != NULL); + assert(median != NULL); + assert(std != NULL); + + if (self->delay_logging_enabled == 0) { + // Logging disabled. + return -1; + } + + if (self->delay_metrics_delivered == 0) { + UpdateDelayMetrics(self); + self->delay_metrics_delivered = 1; + } + *median = self->delay_median; + *std = self->delay_std; + *fraction_poor_delays = self->fraction_poor_delays; + + return 0; +} + +int WebRtcAec_echo_state(AecCore* self) { return self->echoState; } + +void WebRtcAec_GetEchoStats(AecCore* self, + Stats* erl, + Stats* erle, + Stats* a_nlp) { + assert(erl != NULL); + assert(erle != NULL); + assert(a_nlp != NULL); + *erl = self->erl; + *erle = self->erle; + *a_nlp = self->aNlp; +} + +#ifdef WEBRTC_AEC_DEBUG_DUMP +void* WebRtcAec_far_time_buf(AecCore* self) { return self->far_time_buf; } +#endif + +void WebRtcAec_SetConfigCore(AecCore* self, + int nlp_mode, + int metrics_mode, + int delay_logging) { + assert(nlp_mode >= 0 && nlp_mode < 3); + self->nlp_mode = nlp_mode; + self->metricsMode = metrics_mode; + if (self->metricsMode) { + InitMetrics(self); + } + // Turn on delay logging if it is either set explicitly or if delay agnostic + // AEC is enabled (which requires delay estimates). + self->delay_logging_enabled = delay_logging || self->delay_agnostic_enabled; + if (self->delay_logging_enabled) { + memset(self->delay_histogram, 0, sizeof(self->delay_histogram)); + } +} + +void WebRtcAec_enable_delay_agnostic(AecCore* self, int enable) { + self->delay_agnostic_enabled = enable; +} + +int WebRtcAec_delay_agnostic_enabled(AecCore* self) { + return self->delay_agnostic_enabled; +} + +void WebRtcAec_enable_extended_filter(AecCore* self, int enable) { + self->extended_filter_enabled = enable; + self->num_partitions = enable ? kExtendedNumPartitions : kNormalNumPartitions; + // Update the delay estimator with filter length. See InitAEC() for details. + WebRtc_set_allowed_offset(self->delay_estimator, self->num_partitions / 2); +} + +int WebRtcAec_extended_filter_enabled(AecCore* self) { + return self->extended_filter_enabled; +} + +int WebRtcAec_system_delay(AecCore* self) { return self->system_delay; } + +void WebRtcAec_SetSystemDelay(AecCore* self, int delay) { + assert(delay >= 0); + self->system_delay = delay; +} diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core.h b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core.h new file mode 100644 index 00000000..241f0775 --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core.h @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * Specifies the interface for the AEC core. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_ + +#include <stddef.h> + +#include "webrtc/typedefs.h" + +#define FRAME_LEN 80 +#define PART_LEN 64 // Length of partition +#define PART_LEN1 (PART_LEN + 1) // Unique fft coefficients +#define PART_LEN2 (PART_LEN * 2) // Length of partition * 2 +#define NUM_HIGH_BANDS_MAX 2 // Max number of high bands + +typedef float complex_t[2]; +// For performance reasons, some arrays of complex numbers are replaced by twice +// as long arrays of float, all the real parts followed by all the imaginary +// ones (complex_t[SIZE] -> float[2][SIZE]). This allows SIMD optimizations and +// is better than two arrays (one for the real parts and one for the imaginary +// parts) as this other way would require two pointers instead of one and cause +// extra register spilling. This also allows the offsets to be calculated at +// compile time. + +// Metrics +enum { + kOffsetLevel = -100 +}; + +typedef struct Stats { + float instant; + float average; + float min; + float max; + float sum; + float hisum; + float himean; + int counter; + int hicounter; +} Stats; + +typedef struct AecCore AecCore; + +AecCore* WebRtcAec_CreateAec(); // Returns NULL on error. +void WebRtcAec_FreeAec(AecCore* aec); +int WebRtcAec_InitAec(AecCore* aec, int sampFreq); +void WebRtcAec_InitAec_SSE2(void); +#if defined(MIPS_FPU_LE) +void WebRtcAec_InitAec_mips(void); +#endif +#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON) +void WebRtcAec_InitAec_neon(void); +#endif + +void WebRtcAec_BufferFarendPartition(AecCore* aec, const float* farend); +void WebRtcAec_ProcessFrames(AecCore* aec, + const float* const* nearend, + size_t num_bands, + size_t num_samples, + int knownDelay, + float* const* out); + +// A helper function to call WebRtc_MoveReadPtr() for all far-end buffers. +// Returns the number of elements moved, and adjusts |system_delay| by the +// corresponding amount in ms. +int WebRtcAec_MoveFarReadPtr(AecCore* aec, int elements); + +// Calculates the median, standard deviation and amount of poor values among the +// delay estimates aggregated up to the first call to the function. After that +// first call the metrics are aggregated and updated every second. With poor +// values we mean values that most likely will cause the AEC to perform poorly. +// TODO(bjornv): Consider changing tests and tools to handle constant +// constant aggregation window throughout the session instead. +int WebRtcAec_GetDelayMetricsCore(AecCore* self, int* median, int* std, + float* fraction_poor_delays); + +// Returns the echo state (1: echo, 0: no echo). +int WebRtcAec_echo_state(AecCore* self); + +// Gets statistics of the echo metrics ERL, ERLE, A_NLP. +void WebRtcAec_GetEchoStats(AecCore* self, + Stats* erl, + Stats* erle, + Stats* a_nlp); +#ifdef WEBRTC_AEC_DEBUG_DUMP +void* WebRtcAec_far_time_buf(AecCore* self); +#endif + +// Sets local configuration modes. +void WebRtcAec_SetConfigCore(AecCore* self, + int nlp_mode, + int metrics_mode, + int delay_logging); + +// Non-zero enables, zero disables. +void WebRtcAec_enable_delay_agnostic(AecCore* self, int enable); + +// Returns non-zero if delay agnostic (i.e., signal based delay estimation) is +// enabled and zero if disabled. +int WebRtcAec_delay_agnostic_enabled(AecCore* self); + +// Enables or disables extended filter mode. Non-zero enables, zero disables. +void WebRtcAec_enable_extended_filter(AecCore* self, int enable); + +// Returns non-zero if extended filter mode is enabled and zero if disabled. +int WebRtcAec_extended_filter_enabled(AecCore* self); + +// Returns the current |system_delay|, i.e., the buffered difference between +// far-end and near-end. +int WebRtcAec_system_delay(AecCore* self); + +// Sets the |system_delay| to |value|. Note that if the value is changed +// improperly, there can be a performance regression. So it should be used with +// care. +void WebRtcAec_SetSystemDelay(AecCore* self, int delay); + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_ diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_internal.h b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_internal.h new file mode 100644 index 00000000..2de02837 --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_internal.h @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_ + +#include "webrtc/common_audio/ring_buffer.h" +#include "webrtc/common_audio/wav_file.h" +#include "webrtc/modules/audio_processing/aec/aec_common.h" +#include "webrtc/modules/audio_processing/aec/aec_core.h" +#include "webrtc/typedefs.h" + +// Number of partitions for the extended filter mode. The first one is an enum +// to be used in array declarations, as it represents the maximum filter length. +enum { + kExtendedNumPartitions = 32 +}; +static const int kNormalNumPartitions = 12; + +// Delay estimator constants, used for logging and delay compensation if +// if reported delays are disabled. +enum { + kLookaheadBlocks = 15 +}; +enum { + // 500 ms for 16 kHz which is equivalent with the limit of reported delays. + kHistorySizeBlocks = 125 +}; + +// Extended filter adaptation parameters. +// TODO(ajm): No narrowband tuning yet. +static const float kExtendedMu = 0.4f; +static const float kExtendedErrorThreshold = 1.0e-6f; + +typedef struct PowerLevel { + float sfrsum; + int sfrcounter; + float framelevel; + float frsum; + int frcounter; + float minlevel; + float averagelevel; +} PowerLevel; + +struct AecCore { + int farBufWritePos, farBufReadPos; + + int knownDelay; + int inSamples, outSamples; + int delayEstCtr; + + RingBuffer* nearFrBuf; + RingBuffer* outFrBuf; + + RingBuffer* nearFrBufH[NUM_HIGH_BANDS_MAX]; + RingBuffer* outFrBufH[NUM_HIGH_BANDS_MAX]; + + float dBuf[PART_LEN2]; // nearend + float eBuf[PART_LEN2]; // error + + float dBufH[NUM_HIGH_BANDS_MAX][PART_LEN2]; // nearend + + float xPow[PART_LEN1]; + float dPow[PART_LEN1]; + float dMinPow[PART_LEN1]; + float dInitMinPow[PART_LEN1]; + float* noisePow; + + float xfBuf[2][kExtendedNumPartitions * PART_LEN1]; // farend fft buffer + float wfBuf[2][kExtendedNumPartitions * PART_LEN1]; // filter fft + complex_t sde[PART_LEN1]; // cross-psd of nearend and error + complex_t sxd[PART_LEN1]; // cross-psd of farend and nearend + // Farend windowed fft buffer. + complex_t xfwBuf[kExtendedNumPartitions * PART_LEN1]; + + float sx[PART_LEN1], sd[PART_LEN1], se[PART_LEN1]; // far, near, error psd + float hNs[PART_LEN1]; + float hNlFbMin, hNlFbLocalMin; + float hNlXdAvgMin; + int hNlNewMin, hNlMinCtr; + float overDrive, overDriveSm; + int nlp_mode; + float outBuf[PART_LEN]; + int delayIdx; + + short stNearState, echoState; + short divergeState; + + int xfBufBlockPos; + + RingBuffer* far_buf; + RingBuffer* far_buf_windowed; + int system_delay; // Current system delay buffered in AEC. + + int mult; // sampling frequency multiple + int sampFreq; + size_t num_bands; + uint32_t seed; + + float normal_mu; // stepsize + float normal_error_threshold; // error threshold + + int noiseEstCtr; + + PowerLevel farlevel; + PowerLevel nearlevel; + PowerLevel linoutlevel; + PowerLevel nlpoutlevel; + + int metricsMode; + int stateCounter; + Stats erl; + Stats erle; + Stats aNlp; + Stats rerl; + + // Quantities to control H band scaling for SWB input + int freq_avg_ic; // initial bin for averaging nlp gain + int flag_Hband_cn; // for comfort noise + float cn_scale_Hband; // scale for comfort noise in H band + + int delay_metrics_delivered; + int delay_histogram[kHistorySizeBlocks]; + int num_delay_values; + int delay_median; + int delay_std; + float fraction_poor_delays; + int delay_logging_enabled; + void* delay_estimator_farend; + void* delay_estimator; + // Variables associated with delay correction through signal based delay + // estimation feedback. + int signal_delay_correction; + int previous_delay; + int delay_correction_count; + int shift_offset; + float delay_quality_threshold; + int frame_count; + + // 0 = delay agnostic mode (signal based delay correction) disabled. + // Otherwise enabled. + int delay_agnostic_enabled; + // 1 = extended filter mode enabled, 0 = disabled. + int extended_filter_enabled; + // Runtime selection of number of filter partitions. + int num_partitions; + +#ifdef WEBRTC_AEC_DEBUG_DUMP + // Sequence number of this AEC instance, so that different instances can + // choose different dump file names. + int instance_index; + + // Number of times we've restarted dumping; used to pick new dump file names + // each time. + int debug_dump_count; + + RingBuffer* far_time_buf; + rtc_WavWriter* farFile; + rtc_WavWriter* nearFile; + rtc_WavWriter* outFile; + rtc_WavWriter* outLinearFile; + FILE* e_fft_file; +#endif +}; + +typedef void (*WebRtcAecFilterFar)(AecCore* aec, float yf[2][PART_LEN1]); +extern WebRtcAecFilterFar WebRtcAec_FilterFar; +typedef void (*WebRtcAecScaleErrorSignal)(AecCore* aec, float ef[2][PART_LEN1]); +extern WebRtcAecScaleErrorSignal WebRtcAec_ScaleErrorSignal; +typedef void (*WebRtcAecFilterAdaptation)(AecCore* aec, + float* fft, + float ef[2][PART_LEN1]); +extern WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation; +typedef void (*WebRtcAecOverdriveAndSuppress)(AecCore* aec, + float hNl[PART_LEN1], + const float hNlFb, + float efw[2][PART_LEN1]); +extern WebRtcAecOverdriveAndSuppress WebRtcAec_OverdriveAndSuppress; + +typedef void (*WebRtcAecComfortNoise)(AecCore* aec, + float efw[2][PART_LEN1], + complex_t* comfortNoiseHband, + const float* noisePow, + const float* lambda); +extern WebRtcAecComfortNoise WebRtcAec_ComfortNoise; + +typedef void (*WebRtcAecSubBandCoherence)(AecCore* aec, + float efw[2][PART_LEN1], + float xfw[2][PART_LEN1], + float* fft, + float* cohde, + float* cohxd); +extern WebRtcAecSubBandCoherence WebRtcAec_SubbandCoherence; + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_ diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_mips.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_mips.c new file mode 100644 index 00000000..bb33087a --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_mips.c @@ -0,0 +1,774 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * The core AEC algorithm, which is presented with time-aligned signals. + */ + +#include "webrtc/modules/audio_processing/aec/aec_core.h" + +#include <math.h> + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/modules/audio_processing/aec/aec_core_internal.h" +#include "webrtc/modules/audio_processing/aec/aec_rdft.h" + +static const int flagHbandCn = 1; // flag for adding comfort noise in H band +extern const float WebRtcAec_weightCurve[65]; +extern const float WebRtcAec_overDriveCurve[65]; + +void WebRtcAec_ComfortNoise_mips(AecCore* aec, + float efw[2][PART_LEN1], + complex_t* comfortNoiseHband, + const float* noisePow, + const float* lambda) { + int i, num; + float rand[PART_LEN]; + float noise, noiseAvg, tmp, tmpAvg; + int16_t randW16[PART_LEN]; + complex_t u[PART_LEN1]; + + const float pi2 = 6.28318530717959f; + const float pi2t = pi2 / 32768; + + // Generate a uniform random array on [0 1] + WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed); + + int16_t* randWptr = randW16; + float randTemp, randTemp2, randTemp3, randTemp4; + int32_t tmp1s, tmp2s, tmp3s, tmp4s; + + for (i = 0; i < PART_LEN; i+=4) { + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "lh %[tmp1s], 0(%[randWptr]) \n\t" + "lh %[tmp2s], 2(%[randWptr]) \n\t" + "lh %[tmp3s], 4(%[randWptr]) \n\t" + "lh %[tmp4s], 6(%[randWptr]) \n\t" + "mtc1 %[tmp1s], %[randTemp] \n\t" + "mtc1 %[tmp2s], %[randTemp2] \n\t" + "mtc1 %[tmp3s], %[randTemp3] \n\t" + "mtc1 %[tmp4s], %[randTemp4] \n\t" + "cvt.s.w %[randTemp], %[randTemp] \n\t" + "cvt.s.w %[randTemp2], %[randTemp2] \n\t" + "cvt.s.w %[randTemp3], %[randTemp3] \n\t" + "cvt.s.w %[randTemp4], %[randTemp4] \n\t" + "addiu %[randWptr], %[randWptr], 8 \n\t" + "mul.s %[randTemp], %[randTemp], %[pi2t] \n\t" + "mul.s %[randTemp2], %[randTemp2], %[pi2t] \n\t" + "mul.s %[randTemp3], %[randTemp3], %[pi2t] \n\t" + "mul.s %[randTemp4], %[randTemp4], %[pi2t] \n\t" + ".set pop \n\t" + : [randWptr] "+r" (randWptr), [randTemp] "=&f" (randTemp), + [randTemp2] "=&f" (randTemp2), [randTemp3] "=&f" (randTemp3), + [randTemp4] "=&f" (randTemp4), [tmp1s] "=&r" (tmp1s), + [tmp2s] "=&r" (tmp2s), [tmp3s] "=&r" (tmp3s), + [tmp4s] "=&r" (tmp4s) + : [pi2t] "f" (pi2t) + : "memory" + ); + + u[i+1][0] = cosf(randTemp); + u[i+1][1] = sinf(randTemp); + u[i+2][0] = cosf(randTemp2); + u[i+2][1] = sinf(randTemp2); + u[i+3][0] = cosf(randTemp3); + u[i+3][1] = sinf(randTemp3); + u[i+4][0] = cosf(randTemp4); + u[i+4][1] = sinf(randTemp4); + } + + // Reject LF noise + float* u_ptr = &u[1][0]; + float noise2, noise3, noise4; + float tmp1f, tmp2f, tmp3f, tmp4f, tmp5f, tmp6f, tmp7f, tmp8f; + + u[0][0] = 0; + u[0][1] = 0; + for (i = 1; i < PART_LEN1; i+=4) { + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "lwc1 %[noise], 4(%[noisePow]) \n\t" + "lwc1 %[noise2], 8(%[noisePow]) \n\t" + "lwc1 %[noise3], 12(%[noisePow]) \n\t" + "lwc1 %[noise4], 16(%[noisePow]) \n\t" + "sqrt.s %[noise], %[noise] \n\t" + "sqrt.s %[noise2], %[noise2] \n\t" + "sqrt.s %[noise3], %[noise3] \n\t" + "sqrt.s %[noise4], %[noise4] \n\t" + "lwc1 %[tmp1f], 0(%[u_ptr]) \n\t" + "lwc1 %[tmp2f], 4(%[u_ptr]) \n\t" + "lwc1 %[tmp3f], 8(%[u_ptr]) \n\t" + "lwc1 %[tmp4f], 12(%[u_ptr]) \n\t" + "lwc1 %[tmp5f], 16(%[u_ptr]) \n\t" + "lwc1 %[tmp6f], 20(%[u_ptr]) \n\t" + "lwc1 %[tmp7f], 24(%[u_ptr]) \n\t" + "lwc1 %[tmp8f], 28(%[u_ptr]) \n\t" + "addiu %[noisePow], %[noisePow], 16 \n\t" + "mul.s %[tmp1f], %[tmp1f], %[noise] \n\t" + "mul.s %[tmp2f], %[tmp2f], %[noise] \n\t" + "mul.s %[tmp3f], %[tmp3f], %[noise2] \n\t" + "mul.s %[tmp4f], %[tmp4f], %[noise2] \n\t" + "mul.s %[tmp5f], %[tmp5f], %[noise3] \n\t" + "mul.s %[tmp6f], %[tmp6f], %[noise3] \n\t" + "swc1 %[tmp1f], 0(%[u_ptr]) \n\t" + "swc1 %[tmp3f], 8(%[u_ptr]) \n\t" + "mul.s %[tmp8f], %[tmp8f], %[noise4] \n\t" + "mul.s %[tmp7f], %[tmp7f], %[noise4] \n\t" + "neg.s %[tmp2f] \n\t" + "neg.s %[tmp4f] \n\t" + "neg.s %[tmp6f] \n\t" + "neg.s %[tmp8f] \n\t" + "swc1 %[tmp5f], 16(%[u_ptr]) \n\t" + "swc1 %[tmp7f], 24(%[u_ptr]) \n\t" + "swc1 %[tmp2f], 4(%[u_ptr]) \n\t" + "swc1 %[tmp4f], 12(%[u_ptr]) \n\t" + "swc1 %[tmp6f], 20(%[u_ptr]) \n\t" + "swc1 %[tmp8f], 28(%[u_ptr]) \n\t" + "addiu %[u_ptr], %[u_ptr], 32 \n\t" + ".set pop \n\t" + : [u_ptr] "+r" (u_ptr), [noisePow] "+r" (noisePow), + [noise] "=&f" (noise), [noise2] "=&f" (noise2), + [noise3] "=&f" (noise3), [noise4] "=&f" (noise4), + [tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f), + [tmp3f] "=&f" (tmp3f), [tmp4f] "=&f" (tmp4f), + [tmp5f] "=&f" (tmp5f), [tmp6f] "=&f" (tmp6f), + [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f) + : + : "memory" + ); + } + u[PART_LEN][1] = 0; + noisePow -= PART_LEN; + + u_ptr = &u[0][0]; + float* u_ptr_end = &u[PART_LEN][0]; + float* efw_ptr_0 = &efw[0][0]; + float* efw_ptr_1 = &efw[1][0]; + float tmp9f, tmp10f; + const float tmp1c = 1.0; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "1: \n\t" + "lwc1 %[tmp1f], 0(%[lambda]) \n\t" + "lwc1 %[tmp6f], 4(%[lambda]) \n\t" + "addiu %[lambda], %[lambda], 8 \n\t" + "c.lt.s %[tmp1f], %[tmp1c] \n\t" + "bc1f 4f \n\t" + " nop \n\t" + "c.lt.s %[tmp6f], %[tmp1c] \n\t" + "bc1f 3f \n\t" + " nop \n\t" + "2: \n\t" + "mul.s %[tmp1f], %[tmp1f], %[tmp1f] \n\t" + "mul.s %[tmp6f], %[tmp6f], %[tmp6f] \n\t" + "sub.s %[tmp1f], %[tmp1c], %[tmp1f] \n\t" + "sub.s %[tmp6f], %[tmp1c], %[tmp6f] \n\t" + "sqrt.s %[tmp1f], %[tmp1f] \n\t" + "sqrt.s %[tmp6f], %[tmp6f] \n\t" + "lwc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t" + "lwc1 %[tmp3f], 0(%[u_ptr]) \n\t" + "lwc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t" + "lwc1 %[tmp8f], 8(%[u_ptr]) \n\t" + "lwc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t" + "lwc1 %[tmp5f], 4(%[u_ptr]) \n\t" + "lwc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t" + "lwc1 %[tmp10f], 12(%[u_ptr]) \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[tmp3f], %[tmp1f], %[tmp3f] \n\t" + "add.s %[tmp2f], %[tmp2f], %[tmp3f] \n\t" + "mul.s %[tmp3f], %[tmp1f], %[tmp5f] \n\t" + "add.s %[tmp4f], %[tmp4f], %[tmp3f] \n\t" + "mul.s %[tmp3f], %[tmp6f], %[tmp8f] \n\t" + "add.s %[tmp7f], %[tmp7f], %[tmp3f] \n\t" + "mul.s %[tmp3f], %[tmp6f], %[tmp10f] \n\t" + "add.s %[tmp9f], %[tmp9f], %[tmp3f] \n\t" +#else // #if !defined(MIPS32_R2_LE) + "madd.s %[tmp2f], %[tmp2f], %[tmp1f], %[tmp3f] \n\t" + "madd.s %[tmp4f], %[tmp4f], %[tmp1f], %[tmp5f] \n\t" + "madd.s %[tmp7f], %[tmp7f], %[tmp6f], %[tmp8f] \n\t" + "madd.s %[tmp9f], %[tmp9f], %[tmp6f], %[tmp10f] \n\t" +#endif // #if !defined(MIPS32_R2_LE) + "swc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t" + "swc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t" + "swc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t" + "b 5f \n\t" + " swc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t" + "3: \n\t" + "mul.s %[tmp1f], %[tmp1f], %[tmp1f] \n\t" + "sub.s %[tmp1f], %[tmp1c], %[tmp1f] \n\t" + "sqrt.s %[tmp1f], %[tmp1f] \n\t" + "lwc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t" + "lwc1 %[tmp3f], 0(%[u_ptr]) \n\t" + "lwc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t" + "lwc1 %[tmp5f], 4(%[u_ptr]) \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[tmp3f], %[tmp1f], %[tmp3f] \n\t" + "add.s %[tmp2f], %[tmp2f], %[tmp3f] \n\t" + "mul.s %[tmp3f], %[tmp1f], %[tmp5f] \n\t" + "add.s %[tmp4f], %[tmp4f], %[tmp3f] \n\t" +#else // #if !defined(MIPS32_R2_LE) + "madd.s %[tmp2f], %[tmp2f], %[tmp1f], %[tmp3f] \n\t" + "madd.s %[tmp4f], %[tmp4f], %[tmp1f], %[tmp5f] \n\t" +#endif // #if !defined(MIPS32_R2_LE) + "swc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t" + "b 5f \n\t" + " swc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t" + "4: \n\t" + "c.lt.s %[tmp6f], %[tmp1c] \n\t" + "bc1f 5f \n\t" + " nop \n\t" + "mul.s %[tmp6f], %[tmp6f], %[tmp6f] \n\t" + "sub.s %[tmp6f], %[tmp1c], %[tmp6f] \n\t" + "sqrt.s %[tmp6f], %[tmp6f] \n\t" + "lwc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t" + "lwc1 %[tmp8f], 8(%[u_ptr]) \n\t" + "lwc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t" + "lwc1 %[tmp10f], 12(%[u_ptr]) \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[tmp3f], %[tmp6f], %[tmp8f] \n\t" + "add.s %[tmp7f], %[tmp7f], %[tmp3f] \n\t" + "mul.s %[tmp3f], %[tmp6f], %[tmp10f] \n\t" + "add.s %[tmp9f], %[tmp9f], %[tmp3f] \n\t" +#else // #if !defined(MIPS32_R2_LE) + "madd.s %[tmp7f], %[tmp7f], %[tmp6f], %[tmp8f] \n\t" + "madd.s %[tmp9f], %[tmp9f], %[tmp6f], %[tmp10f] \n\t" +#endif // #if !defined(MIPS32_R2_LE) + "swc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t" + "swc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t" + "5: \n\t" + "addiu %[u_ptr], %[u_ptr], 16 \n\t" + "addiu %[efw_ptr_0], %[efw_ptr_0], 8 \n\t" + "bne %[u_ptr], %[u_ptr_end], 1b \n\t" + " addiu %[efw_ptr_1], %[efw_ptr_1], 8 \n\t" + ".set pop \n\t" + : [lambda] "+r" (lambda), [u_ptr] "+r" (u_ptr), + [efw_ptr_0] "+r" (efw_ptr_0), [efw_ptr_1] "+r" (efw_ptr_1), + [tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f), [tmp3f] "=&f" (tmp3f), + [tmp4f] "=&f" (tmp4f), [tmp5f] "=&f" (tmp5f), + [tmp6f] "=&f" (tmp6f), [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f), + [tmp9f] "=&f" (tmp9f), [tmp10f] "=&f" (tmp10f) + : [tmp1c] "f" (tmp1c), [u_ptr_end] "r" (u_ptr_end) + : "memory" + ); + + lambda -= PART_LEN; + tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[PART_LEN] * lambda[PART_LEN], 0)); + //tmp = 1 - lambda[i]; + efw[0][PART_LEN] += tmp * u[PART_LEN][0]; + efw[1][PART_LEN] += tmp * u[PART_LEN][1]; + + // For H band comfort noise + // TODO: don't compute noise and "tmp" twice. Use the previous results. + noiseAvg = 0.0; + tmpAvg = 0.0; + num = 0; + if ((aec->sampFreq == 32000 || aec->sampFreq == 48000) && flagHbandCn == 1) { + for (i = 0; i < PART_LEN; i++) { + rand[i] = ((float)randW16[i]) / 32768; + } + + // average noise scale + // average over second half of freq spectrum (i.e., 4->8khz) + // TODO: we shouldn't need num. We know how many elements we're summing. + for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { + num++; + noiseAvg += sqrtf(noisePow[i]); + } + noiseAvg /= (float)num; + + // average nlp scale + // average over second half of freq spectrum (i.e., 4->8khz) + // TODO: we shouldn't need num. We know how many elements we're summing. + num = 0; + for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { + num++; + tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0)); + } + tmpAvg /= (float)num; + + // Use average noise for H band + // TODO: we should probably have a new random vector here. + // Reject LF noise + u[0][0] = 0; + u[0][1] = 0; + for (i = 1; i < PART_LEN1; i++) { + tmp = pi2 * rand[i - 1]; + + // Use average noise for H band + u[i][0] = noiseAvg * (float)cos(tmp); + u[i][1] = -noiseAvg * (float)sin(tmp); + } + u[PART_LEN][1] = 0; + + for (i = 0; i < PART_LEN1; i++) { + // Use average NLP weight for H band + comfortNoiseHband[i][0] = tmpAvg * u[i][0]; + comfortNoiseHband[i][1] = tmpAvg * u[i][1]; + } + } +} + +void WebRtcAec_FilterFar_mips(AecCore* aec, float yf[2][PART_LEN1]) { + int i; + for (i = 0; i < aec->num_partitions; i++) { + int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; + int pos = i * PART_LEN1; + // Check for wrap + if (i + aec->xfBufBlockPos >= aec->num_partitions) { + xPos -= aec->num_partitions * (PART_LEN1); + } + float* yf0 = yf[0]; + float* yf1 = yf[1]; + float* aRe = aec->xfBuf[0] + xPos; + float* aIm = aec->xfBuf[1] + xPos; + float* bRe = aec->wfBuf[0] + pos; + float* bIm = aec->wfBuf[1] + pos; + float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13; + int len = PART_LEN1 >> 1; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "1: \n\t" + "lwc1 %[f0], 0(%[aRe]) \n\t" + "lwc1 %[f1], 0(%[bRe]) \n\t" + "lwc1 %[f2], 0(%[bIm]) \n\t" + "lwc1 %[f3], 0(%[aIm]) \n\t" + "lwc1 %[f4], 4(%[aRe]) \n\t" + "lwc1 %[f5], 4(%[bRe]) \n\t" + "lwc1 %[f6], 4(%[bIm]) \n\t" + "mul.s %[f8], %[f0], %[f1] \n\t" + "mul.s %[f0], %[f0], %[f2] \n\t" + "mul.s %[f9], %[f4], %[f5] \n\t" + "mul.s %[f4], %[f4], %[f6] \n\t" + "lwc1 %[f7], 4(%[aIm]) \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[f12], %[f2], %[f3] \n\t" + "mul.s %[f1], %[f3], %[f1] \n\t" + "mul.s %[f11], %[f6], %[f7] \n\t" + "addiu %[aRe], %[aRe], 8 \n\t" + "addiu %[aIm], %[aIm], 8 \n\t" + "addiu %[len], %[len], -1 \n\t" + "sub.s %[f8], %[f8], %[f12] \n\t" + "mul.s %[f12], %[f7], %[f5] \n\t" + "lwc1 %[f2], 0(%[yf0]) \n\t" + "add.s %[f1], %[f0], %[f1] \n\t" + "lwc1 %[f3], 0(%[yf1]) \n\t" + "sub.s %[f9], %[f9], %[f11] \n\t" + "lwc1 %[f6], 4(%[yf0]) \n\t" + "add.s %[f4], %[f4], %[f12] \n\t" +#else // #if !defined(MIPS32_R2_LE) + "addiu %[aRe], %[aRe], 8 \n\t" + "addiu %[aIm], %[aIm], 8 \n\t" + "addiu %[len], %[len], -1 \n\t" + "nmsub.s %[f8], %[f8], %[f2], %[f3] \n\t" + "lwc1 %[f2], 0(%[yf0]) \n\t" + "madd.s %[f1], %[f0], %[f3], %[f1] \n\t" + "lwc1 %[f3], 0(%[yf1]) \n\t" + "nmsub.s %[f9], %[f9], %[f6], %[f7] \n\t" + "lwc1 %[f6], 4(%[yf0]) \n\t" + "madd.s %[f4], %[f4], %[f7], %[f5] \n\t" +#endif // #if !defined(MIPS32_R2_LE) + "lwc1 %[f5], 4(%[yf1]) \n\t" + "add.s %[f2], %[f2], %[f8] \n\t" + "addiu %[bRe], %[bRe], 8 \n\t" + "addiu %[bIm], %[bIm], 8 \n\t" + "add.s %[f3], %[f3], %[f1] \n\t" + "add.s %[f6], %[f6], %[f9] \n\t" + "add.s %[f5], %[f5], %[f4] \n\t" + "swc1 %[f2], 0(%[yf0]) \n\t" + "swc1 %[f3], 0(%[yf1]) \n\t" + "swc1 %[f6], 4(%[yf0]) \n\t" + "swc1 %[f5], 4(%[yf1]) \n\t" + "addiu %[yf0], %[yf0], 8 \n\t" + "bgtz %[len], 1b \n\t" + " addiu %[yf1], %[yf1], 8 \n\t" + "lwc1 %[f0], 0(%[aRe]) \n\t" + "lwc1 %[f1], 0(%[bRe]) \n\t" + "lwc1 %[f2], 0(%[bIm]) \n\t" + "lwc1 %[f3], 0(%[aIm]) \n\t" + "mul.s %[f8], %[f0], %[f1] \n\t" + "mul.s %[f0], %[f0], %[f2] \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[f12], %[f2], %[f3] \n\t" + "mul.s %[f1], %[f3], %[f1] \n\t" + "sub.s %[f8], %[f8], %[f12] \n\t" + "lwc1 %[f2], 0(%[yf0]) \n\t" + "add.s %[f1], %[f0], %[f1] \n\t" + "lwc1 %[f3], 0(%[yf1]) \n\t" +#else // #if !defined(MIPS32_R2_LE) + "nmsub.s %[f8], %[f8], %[f2], %[f3] \n\t" + "lwc1 %[f2], 0(%[yf0]) \n\t" + "madd.s %[f1], %[f0], %[f3], %[f1] \n\t" + "lwc1 %[f3], 0(%[yf1]) \n\t" +#endif // #if !defined(MIPS32_R2_LE) + "add.s %[f2], %[f2], %[f8] \n\t" + "add.s %[f3], %[f3], %[f1] \n\t" + "swc1 %[f2], 0(%[yf0]) \n\t" + "swc1 %[f3], 0(%[yf1]) \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), + [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5), + [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8), + [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), + [f12] "=&f" (f12), [f13] "=&f" (f13), [aRe] "+r" (aRe), + [aIm] "+r" (aIm), [bRe] "+r" (bRe), [bIm] "+r" (bIm), + [yf0] "+r" (yf0), [yf1] "+r" (yf1), [len] "+r" (len) + : + : "memory" + ); + } +} + +void WebRtcAec_FilterAdaptation_mips(AecCore* aec, + float* fft, + float ef[2][PART_LEN1]) { + int i; + for (i = 0; i < aec->num_partitions; i++) { + int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1); + int pos; + // Check for wrap + if (i + aec->xfBufBlockPos >= aec->num_partitions) { + xPos -= aec->num_partitions * PART_LEN1; + } + + pos = i * PART_LEN1; + float* aRe = aec->xfBuf[0] + xPos; + float* aIm = aec->xfBuf[1] + xPos; + float* bRe = ef[0]; + float* bIm = ef[1]; + float* fft_tmp; + + float f0, f1, f2, f3, f4, f5, f6 ,f7, f8, f9, f10, f11, f12; + int len = PART_LEN >> 1; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[fft_tmp], %[fft], 0 \n\t" + "1: \n\t" + "lwc1 %[f0], 0(%[aRe]) \n\t" + "lwc1 %[f1], 0(%[bRe]) \n\t" + "lwc1 %[f2], 0(%[bIm]) \n\t" + "lwc1 %[f4], 4(%[aRe]) \n\t" + "lwc1 %[f5], 4(%[bRe]) \n\t" + "lwc1 %[f6], 4(%[bIm]) \n\t" + "addiu %[aRe], %[aRe], 8 \n\t" + "addiu %[bRe], %[bRe], 8 \n\t" + "mul.s %[f8], %[f0], %[f1] \n\t" + "mul.s %[f0], %[f0], %[f2] \n\t" + "lwc1 %[f3], 0(%[aIm]) \n\t" + "mul.s %[f9], %[f4], %[f5] \n\t" + "lwc1 %[f7], 4(%[aIm]) \n\t" + "mul.s %[f4], %[f4], %[f6] \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[f10], %[f3], %[f2] \n\t" + "mul.s %[f1], %[f3], %[f1] \n\t" + "mul.s %[f11], %[f7], %[f6] \n\t" + "mul.s %[f5], %[f7], %[f5] \n\t" + "addiu %[aIm], %[aIm], 8 \n\t" + "addiu %[bIm], %[bIm], 8 \n\t" + "addiu %[len], %[len], -1 \n\t" + "add.s %[f8], %[f8], %[f10] \n\t" + "sub.s %[f1], %[f0], %[f1] \n\t" + "add.s %[f9], %[f9], %[f11] \n\t" + "sub.s %[f5], %[f4], %[f5] \n\t" +#else // #if !defined(MIPS32_R2_LE) + "addiu %[aIm], %[aIm], 8 \n\t" + "addiu %[bIm], %[bIm], 8 \n\t" + "addiu %[len], %[len], -1 \n\t" + "madd.s %[f8], %[f8], %[f3], %[f2] \n\t" + "nmsub.s %[f1], %[f0], %[f3], %[f1] \n\t" + "madd.s %[f9], %[f9], %[f7], %[f6] \n\t" + "nmsub.s %[f5], %[f4], %[f7], %[f5] \n\t" +#endif // #if !defined(MIPS32_R2_LE) + "swc1 %[f8], 0(%[fft_tmp]) \n\t" + "swc1 %[f1], 4(%[fft_tmp]) \n\t" + "swc1 %[f9], 8(%[fft_tmp]) \n\t" + "swc1 %[f5], 12(%[fft_tmp]) \n\t" + "bgtz %[len], 1b \n\t" + " addiu %[fft_tmp], %[fft_tmp], 16 \n\t" + "lwc1 %[f0], 0(%[aRe]) \n\t" + "lwc1 %[f1], 0(%[bRe]) \n\t" + "lwc1 %[f2], 0(%[bIm]) \n\t" + "lwc1 %[f3], 0(%[aIm]) \n\t" + "mul.s %[f8], %[f0], %[f1] \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[f10], %[f3], %[f2] \n\t" + "add.s %[f8], %[f8], %[f10] \n\t" +#else // #if !defined(MIPS32_R2_LE) + "madd.s %[f8], %[f8], %[f3], %[f2] \n\t" +#endif // #if !defined(MIPS32_R2_LE) + "swc1 %[f8], 4(%[fft]) \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), + [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5), + [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8), + [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), + [f12] "=&f" (f12), [aRe] "+r" (aRe), [aIm] "+r" (aIm), + [bRe] "+r" (bRe), [bIm] "+r" (bIm), [fft_tmp] "=&r" (fft_tmp), + [len] "+r" (len) + : [fft] "r" (fft) + : "memory" + ); + + aec_rdft_inverse_128(fft); + memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); + + // fft scaling + { + float scale = 2.0f / PART_LEN2; + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[fft_tmp], %[fft], 0 \n\t" + "addiu %[len], $zero, 8 \n\t" + "1: \n\t" + "addiu %[len], %[len], -1 \n\t" + "lwc1 %[f0], 0(%[fft_tmp]) \n\t" + "lwc1 %[f1], 4(%[fft_tmp]) \n\t" + "lwc1 %[f2], 8(%[fft_tmp]) \n\t" + "lwc1 %[f3], 12(%[fft_tmp]) \n\t" + "mul.s %[f0], %[f0], %[scale] \n\t" + "mul.s %[f1], %[f1], %[scale] \n\t" + "mul.s %[f2], %[f2], %[scale] \n\t" + "mul.s %[f3], %[f3], %[scale] \n\t" + "lwc1 %[f4], 16(%[fft_tmp]) \n\t" + "lwc1 %[f5], 20(%[fft_tmp]) \n\t" + "lwc1 %[f6], 24(%[fft_tmp]) \n\t" + "lwc1 %[f7], 28(%[fft_tmp]) \n\t" + "mul.s %[f4], %[f4], %[scale] \n\t" + "mul.s %[f5], %[f5], %[scale] \n\t" + "mul.s %[f6], %[f6], %[scale] \n\t" + "mul.s %[f7], %[f7], %[scale] \n\t" + "swc1 %[f0], 0(%[fft_tmp]) \n\t" + "swc1 %[f1], 4(%[fft_tmp]) \n\t" + "swc1 %[f2], 8(%[fft_tmp]) \n\t" + "swc1 %[f3], 12(%[fft_tmp]) \n\t" + "swc1 %[f4], 16(%[fft_tmp]) \n\t" + "swc1 %[f5], 20(%[fft_tmp]) \n\t" + "swc1 %[f6], 24(%[fft_tmp]) \n\t" + "swc1 %[f7], 28(%[fft_tmp]) \n\t" + "bgtz %[len], 1b \n\t" + " addiu %[fft_tmp], %[fft_tmp], 32 \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), + [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5), + [f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len), + [fft_tmp] "=&r" (fft_tmp) + : [scale] "f" (scale), [fft] "r" (fft) + : "memory" + ); + } + aec_rdft_forward_128(fft); + aRe = aec->wfBuf[0] + pos; + aIm = aec->wfBuf[1] + pos; + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[fft_tmp], %[fft], 0 \n\t" + "addiu %[len], $zero, 31 \n\t" + "lwc1 %[f0], 0(%[aRe]) \n\t" + "lwc1 %[f1], 0(%[fft_tmp]) \n\t" + "lwc1 %[f2], 256(%[aRe]) \n\t" + "lwc1 %[f3], 4(%[fft_tmp]) \n\t" + "lwc1 %[f4], 4(%[aRe]) \n\t" + "lwc1 %[f5], 8(%[fft_tmp]) \n\t" + "lwc1 %[f6], 4(%[aIm]) \n\t" + "lwc1 %[f7], 12(%[fft_tmp]) \n\t" + "add.s %[f0], %[f0], %[f1] \n\t" + "add.s %[f2], %[f2], %[f3] \n\t" + "add.s %[f4], %[f4], %[f5] \n\t" + "add.s %[f6], %[f6], %[f7] \n\t" + "addiu %[fft_tmp], %[fft_tmp], 16 \n\t" + "swc1 %[f0], 0(%[aRe]) \n\t" + "swc1 %[f2], 256(%[aRe]) \n\t" + "swc1 %[f4], 4(%[aRe]) \n\t" + "addiu %[aRe], %[aRe], 8 \n\t" + "swc1 %[f6], 4(%[aIm]) \n\t" + "addiu %[aIm], %[aIm], 8 \n\t" + "1: \n\t" + "lwc1 %[f0], 0(%[aRe]) \n\t" + "lwc1 %[f1], 0(%[fft_tmp]) \n\t" + "lwc1 %[f2], 0(%[aIm]) \n\t" + "lwc1 %[f3], 4(%[fft_tmp]) \n\t" + "lwc1 %[f4], 4(%[aRe]) \n\t" + "lwc1 %[f5], 8(%[fft_tmp]) \n\t" + "lwc1 %[f6], 4(%[aIm]) \n\t" + "lwc1 %[f7], 12(%[fft_tmp]) \n\t" + "add.s %[f0], %[f0], %[f1] \n\t" + "add.s %[f2], %[f2], %[f3] \n\t" + "add.s %[f4], %[f4], %[f5] \n\t" + "add.s %[f6], %[f6], %[f7] \n\t" + "addiu %[len], %[len], -1 \n\t" + "addiu %[fft_tmp], %[fft_tmp], 16 \n\t" + "swc1 %[f0], 0(%[aRe]) \n\t" + "swc1 %[f2], 0(%[aIm]) \n\t" + "swc1 %[f4], 4(%[aRe]) \n\t" + "addiu %[aRe], %[aRe], 8 \n\t" + "swc1 %[f6], 4(%[aIm]) \n\t" + "bgtz %[len], 1b \n\t" + " addiu %[aIm], %[aIm], 8 \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), + [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5), + [f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len), + [fft_tmp] "=&r" (fft_tmp), [aRe] "+r" (aRe), [aIm] "+r" (aIm) + : [fft] "r" (fft) + : "memory" + ); + } +} + +void WebRtcAec_OverdriveAndSuppress_mips(AecCore* aec, + float hNl[PART_LEN1], + const float hNlFb, + float efw[2][PART_LEN1]) { + int i; + const float one = 1.0; + float* p_hNl; + float* p_efw0; + float* p_efw1; + float* p_WebRtcAec_wC; + float temp1, temp2, temp3, temp4; + + p_hNl = &hNl[0]; + p_efw0 = &efw[0][0]; + p_efw1 = &efw[1][0]; + p_WebRtcAec_wC = (float*)&WebRtcAec_weightCurve[0]; + + for (i = 0; i < PART_LEN1; i++) { + // Weight subbands + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "lwc1 %[temp1], 0(%[p_hNl]) \n\t" + "lwc1 %[temp2], 0(%[p_wC]) \n\t" + "c.lt.s %[hNlFb], %[temp1] \n\t" + "bc1f 1f \n\t" + " mul.s %[temp3], %[temp2], %[hNlFb] \n\t" + "sub.s %[temp4], %[one], %[temp2] \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[temp1], %[temp1], %[temp4] \n\t" + "add.s %[temp1], %[temp3], %[temp1] \n\t" +#else // #if !defined(MIPS32_R2_LE) + "madd.s %[temp1], %[temp3], %[temp1], %[temp4] \n\t" +#endif // #if !defined(MIPS32_R2_LE) + "swc1 %[temp1], 0(%[p_hNl]) \n\t" + "1: \n\t" + "addiu %[p_wC], %[p_wC], 4 \n\t" + ".set pop \n\t" + : [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3), + [temp4] "=&f" (temp4), [p_wC] "+r" (p_WebRtcAec_wC) + : [hNlFb] "f" (hNlFb), [one] "f" (one), [p_hNl] "r" (p_hNl) + : "memory" + ); + + hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]); + + __asm __volatile ( + "lwc1 %[temp1], 0(%[p_hNl]) \n\t" + "lwc1 %[temp3], 0(%[p_efw1]) \n\t" + "lwc1 %[temp2], 0(%[p_efw0]) \n\t" + "addiu %[p_hNl], %[p_hNl], 4 \n\t" + "mul.s %[temp3], %[temp3], %[temp1] \n\t" + "mul.s %[temp2], %[temp2], %[temp1] \n\t" + "addiu %[p_efw0], %[p_efw0], 4 \n\t" + "addiu %[p_efw1], %[p_efw1], 4 \n\t" + "neg.s %[temp4], %[temp3] \n\t" + "swc1 %[temp2], -4(%[p_efw0]) \n\t" + "swc1 %[temp4], -4(%[p_efw1]) \n\t" + : [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3), + [temp4] "=&f" (temp4), [p_efw0] "+r" (p_efw0), [p_efw1] "+r" (p_efw1), + [p_hNl] "+r" (p_hNl) + : + : "memory" + ); + } +} + +void WebRtcAec_ScaleErrorSignal_mips(AecCore* aec, float ef[2][PART_LEN1]) { + const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu; + const float error_threshold = aec->extended_filter_enabled + ? kExtendedErrorThreshold + : aec->normal_error_threshold; + int len = (PART_LEN1); + float* ef0 = ef[0]; + float* ef1 = ef[1]; + float* xPow = aec->xPow; + float fac1 = 1e-10f; + float err_th2 = error_threshold * error_threshold; + float f0, f1, f2; +#if !defined(MIPS32_R2_LE) + float f3; +#endif + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "1: \n\t" + "lwc1 %[f0], 0(%[xPow]) \n\t" + "lwc1 %[f1], 0(%[ef0]) \n\t" + "lwc1 %[f2], 0(%[ef1]) \n\t" + "add.s %[f0], %[f0], %[fac1] \n\t" + "div.s %[f1], %[f1], %[f0] \n\t" + "div.s %[f2], %[f2], %[f0] \n\t" + "mul.s %[f0], %[f1], %[f1] \n\t" +#if defined(MIPS32_R2_LE) + "madd.s %[f0], %[f0], %[f2], %[f2] \n\t" +#else + "mul.s %[f3], %[f2], %[f2] \n\t" + "add.s %[f0], %[f0], %[f3] \n\t" +#endif + "c.le.s %[f0], %[err_th2] \n\t" + "nop \n\t" + "bc1t 2f \n\t" + " nop \n\t" + "sqrt.s %[f0], %[f0] \n\t" + "add.s %[f0], %[f0], %[fac1] \n\t" + "div.s %[f0], %[err_th], %[f0] \n\t" + "mul.s %[f1], %[f1], %[f0] \n\t" + "mul.s %[f2], %[f2], %[f0] \n\t" + "2: \n\t" + "mul.s %[f1], %[f1], %[mu] \n\t" + "mul.s %[f2], %[f2], %[mu] \n\t" + "swc1 %[f1], 0(%[ef0]) \n\t" + "swc1 %[f2], 0(%[ef1]) \n\t" + "addiu %[len], %[len], -1 \n\t" + "addiu %[xPow], %[xPow], 4 \n\t" + "addiu %[ef0], %[ef0], 4 \n\t" + "bgtz %[len], 1b \n\t" + " addiu %[ef1], %[ef1], 4 \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), +#if !defined(MIPS32_R2_LE) + [f3] "=&f" (f3), +#endif + [xPow] "+r" (xPow), [ef0] "+r" (ef0), [ef1] "+r" (ef1), + [len] "+r" (len) + : [fac1] "f" (fac1), [err_th2] "f" (err_th2), [mu] "f" (mu), + [err_th] "f" (error_threshold) + : "memory" + ); +} + +void WebRtcAec_InitAec_mips(void) { + WebRtcAec_FilterFar = WebRtcAec_FilterFar_mips; + WebRtcAec_FilterAdaptation = WebRtcAec_FilterAdaptation_mips; + WebRtcAec_ScaleErrorSignal = WebRtcAec_ScaleErrorSignal_mips; + WebRtcAec_ComfortNoise = WebRtcAec_ComfortNoise_mips; + WebRtcAec_OverdriveAndSuppress = WebRtcAec_OverdriveAndSuppress_mips; +} + diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_neon.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_neon.c new file mode 100644 index 00000000..9a677aaa --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_neon.c @@ -0,0 +1,736 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * The core AEC algorithm, neon version of speed-critical functions. + * + * Based on aec_core_sse2.c. + */ + +#include <arm_neon.h> +#include <math.h> +#include <string.h> // memset + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/modules/audio_processing/aec/aec_common.h" +#include "webrtc/modules/audio_processing/aec/aec_core_internal.h" +#include "webrtc/modules/audio_processing/aec/aec_rdft.h" + +enum { kShiftExponentIntoTopMantissa = 8 }; +enum { kFloatExponentShift = 23 }; + +__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) { + return aRe * bRe - aIm * bIm; +} + +__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { + return aRe * bIm + aIm * bRe; +} + +static void FilterFarNEON(AecCore* aec, float yf[2][PART_LEN1]) { + int i; + const int num_partitions = aec->num_partitions; + for (i = 0; i < num_partitions; i++) { + int j; + int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; + int pos = i * PART_LEN1; + // Check for wrap + if (i + aec->xfBufBlockPos >= num_partitions) { + xPos -= num_partitions * PART_LEN1; + } + + // vectorized code (four at once) + for (j = 0; j + 3 < PART_LEN1; j += 4) { + const float32x4_t xfBuf_re = vld1q_f32(&aec->xfBuf[0][xPos + j]); + const float32x4_t xfBuf_im = vld1q_f32(&aec->xfBuf[1][xPos + j]); + const float32x4_t wfBuf_re = vld1q_f32(&aec->wfBuf[0][pos + j]); + const float32x4_t wfBuf_im = vld1q_f32(&aec->wfBuf[1][pos + j]); + const float32x4_t yf_re = vld1q_f32(&yf[0][j]); + const float32x4_t yf_im = vld1q_f32(&yf[1][j]); + const float32x4_t a = vmulq_f32(xfBuf_re, wfBuf_re); + const float32x4_t e = vmlsq_f32(a, xfBuf_im, wfBuf_im); + const float32x4_t c = vmulq_f32(xfBuf_re, wfBuf_im); + const float32x4_t f = vmlaq_f32(c, xfBuf_im, wfBuf_re); + const float32x4_t g = vaddq_f32(yf_re, e); + const float32x4_t h = vaddq_f32(yf_im, f); + vst1q_f32(&yf[0][j], g); + vst1q_f32(&yf[1][j], h); + } + // scalar code for the remaining items. + for (; j < PART_LEN1; j++) { + yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], + aec->xfBuf[1][xPos + j], + aec->wfBuf[0][pos + j], + aec->wfBuf[1][pos + j]); + yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], + aec->xfBuf[1][xPos + j], + aec->wfBuf[0][pos + j], + aec->wfBuf[1][pos + j]); + } + } +} + +// ARM64's arm_neon.h has already defined vdivq_f32 vsqrtq_f32. +#if !defined (WEBRTC_ARCH_ARM64) +static float32x4_t vdivq_f32(float32x4_t a, float32x4_t b) { + int i; + float32x4_t x = vrecpeq_f32(b); + // from arm documentation + // The Newton-Raphson iteration: + // x[n+1] = x[n] * (2 - d * x[n]) + // converges to (1/d) if x0 is the result of VRECPE applied to d. + // + // Note: The precision did not improve after 2 iterations. + for (i = 0; i < 2; i++) { + x = vmulq_f32(vrecpsq_f32(b, x), x); + } + // a/b = a*(1/b) + return vmulq_f32(a, x); +} + +static float32x4_t vsqrtq_f32(float32x4_t s) { + int i; + float32x4_t x = vrsqrteq_f32(s); + + // Code to handle sqrt(0). + // If the input to sqrtf() is zero, a zero will be returned. + // If the input to vrsqrteq_f32() is zero, positive infinity is returned. + const uint32x4_t vec_p_inf = vdupq_n_u32(0x7F800000); + // check for divide by zero + const uint32x4_t div_by_zero = vceqq_u32(vec_p_inf, vreinterpretq_u32_f32(x)); + // zero out the positive infinity results + x = vreinterpretq_f32_u32(vandq_u32(vmvnq_u32(div_by_zero), + vreinterpretq_u32_f32(x))); + // from arm documentation + // The Newton-Raphson iteration: + // x[n+1] = x[n] * (3 - d * (x[n] * x[n])) / 2) + // converges to (1/√d) if x0 is the result of VRSQRTE applied to d. + // + // Note: The precision did not improve after 2 iterations. + for (i = 0; i < 2; i++) { + x = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, x), s), x); + } + // sqrt(s) = s * 1/sqrt(s) + return vmulq_f32(s, x);; +} +#endif // WEBRTC_ARCH_ARM64 + +static void ScaleErrorSignalNEON(AecCore* aec, float ef[2][PART_LEN1]) { + const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu; + const float error_threshold = aec->extended_filter_enabled ? + kExtendedErrorThreshold : aec->normal_error_threshold; + const float32x4_t k1e_10f = vdupq_n_f32(1e-10f); + const float32x4_t kMu = vmovq_n_f32(mu); + const float32x4_t kThresh = vmovq_n_f32(error_threshold); + int i; + // vectorized code (four at once) + for (i = 0; i + 3 < PART_LEN1; i += 4) { + const float32x4_t xPow = vld1q_f32(&aec->xPow[i]); + const float32x4_t ef_re_base = vld1q_f32(&ef[0][i]); + const float32x4_t ef_im_base = vld1q_f32(&ef[1][i]); + const float32x4_t xPowPlus = vaddq_f32(xPow, k1e_10f); + float32x4_t ef_re = vdivq_f32(ef_re_base, xPowPlus); + float32x4_t ef_im = vdivq_f32(ef_im_base, xPowPlus); + const float32x4_t ef_re2 = vmulq_f32(ef_re, ef_re); + const float32x4_t ef_sum2 = vmlaq_f32(ef_re2, ef_im, ef_im); + const float32x4_t absEf = vsqrtq_f32(ef_sum2); + const uint32x4_t bigger = vcgtq_f32(absEf, kThresh); + const float32x4_t absEfPlus = vaddq_f32(absEf, k1e_10f); + const float32x4_t absEfInv = vdivq_f32(kThresh, absEfPlus); + uint32x4_t ef_re_if = vreinterpretq_u32_f32(vmulq_f32(ef_re, absEfInv)); + uint32x4_t ef_im_if = vreinterpretq_u32_f32(vmulq_f32(ef_im, absEfInv)); + uint32x4_t ef_re_u32 = vandq_u32(vmvnq_u32(bigger), + vreinterpretq_u32_f32(ef_re)); + uint32x4_t ef_im_u32 = vandq_u32(vmvnq_u32(bigger), + vreinterpretq_u32_f32(ef_im)); + ef_re_if = vandq_u32(bigger, ef_re_if); + ef_im_if = vandq_u32(bigger, ef_im_if); + ef_re_u32 = vorrq_u32(ef_re_u32, ef_re_if); + ef_im_u32 = vorrq_u32(ef_im_u32, ef_im_if); + ef_re = vmulq_f32(vreinterpretq_f32_u32(ef_re_u32), kMu); + ef_im = vmulq_f32(vreinterpretq_f32_u32(ef_im_u32), kMu); + vst1q_f32(&ef[0][i], ef_re); + vst1q_f32(&ef[1][i], ef_im); + } + // scalar code for the remaining items. + for (; i < PART_LEN1; i++) { + float abs_ef; + ef[0][i] /= (aec->xPow[i] + 1e-10f); + ef[1][i] /= (aec->xPow[i] + 1e-10f); + abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); + + if (abs_ef > error_threshold) { + abs_ef = error_threshold / (abs_ef + 1e-10f); + ef[0][i] *= abs_ef; + ef[1][i] *= abs_ef; + } + + // Stepsize factor + ef[0][i] *= mu; + ef[1][i] *= mu; + } +} + +static void FilterAdaptationNEON(AecCore* aec, + float* fft, + float ef[2][PART_LEN1]) { + int i; + const int num_partitions = aec->num_partitions; + for (i = 0; i < num_partitions; i++) { + int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; + int pos = i * PART_LEN1; + int j; + // Check for wrap + if (i + aec->xfBufBlockPos >= num_partitions) { + xPos -= num_partitions * PART_LEN1; + } + + // Process the whole array... + for (j = 0; j < PART_LEN; j += 4) { + // Load xfBuf and ef. + const float32x4_t xfBuf_re = vld1q_f32(&aec->xfBuf[0][xPos + j]); + const float32x4_t xfBuf_im = vld1q_f32(&aec->xfBuf[1][xPos + j]); + const float32x4_t ef_re = vld1q_f32(&ef[0][j]); + const float32x4_t ef_im = vld1q_f32(&ef[1][j]); + // Calculate the product of conjugate(xfBuf) by ef. + // re(conjugate(a) * b) = aRe * bRe + aIm * bIm + // im(conjugate(a) * b)= aRe * bIm - aIm * bRe + const float32x4_t a = vmulq_f32(xfBuf_re, ef_re); + const float32x4_t e = vmlaq_f32(a, xfBuf_im, ef_im); + const float32x4_t c = vmulq_f32(xfBuf_re, ef_im); + const float32x4_t f = vmlsq_f32(c, xfBuf_im, ef_re); + // Interleave real and imaginary parts. + const float32x4x2_t g_n_h = vzipq_f32(e, f); + // Store + vst1q_f32(&fft[2 * j + 0], g_n_h.val[0]); + vst1q_f32(&fft[2 * j + 4], g_n_h.val[1]); + } + // ... and fixup the first imaginary entry. + fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN], + -aec->xfBuf[1][xPos + PART_LEN], + ef[0][PART_LEN], + ef[1][PART_LEN]); + + aec_rdft_inverse_128(fft); + memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); + + // fft scaling + { + const float scale = 2.0f / PART_LEN2; + const float32x4_t scale_ps = vmovq_n_f32(scale); + for (j = 0; j < PART_LEN; j += 4) { + const float32x4_t fft_ps = vld1q_f32(&fft[j]); + const float32x4_t fft_scale = vmulq_f32(fft_ps, scale_ps); + vst1q_f32(&fft[j], fft_scale); + } + } + aec_rdft_forward_128(fft); + + { + const float wt1 = aec->wfBuf[1][pos]; + aec->wfBuf[0][pos + PART_LEN] += fft[1]; + for (j = 0; j < PART_LEN; j += 4) { + float32x4_t wtBuf_re = vld1q_f32(&aec->wfBuf[0][pos + j]); + float32x4_t wtBuf_im = vld1q_f32(&aec->wfBuf[1][pos + j]); + const float32x4_t fft0 = vld1q_f32(&fft[2 * j + 0]); + const float32x4_t fft4 = vld1q_f32(&fft[2 * j + 4]); + const float32x4x2_t fft_re_im = vuzpq_f32(fft0, fft4); + wtBuf_re = vaddq_f32(wtBuf_re, fft_re_im.val[0]); + wtBuf_im = vaddq_f32(wtBuf_im, fft_re_im.val[1]); + + vst1q_f32(&aec->wfBuf[0][pos + j], wtBuf_re); + vst1q_f32(&aec->wfBuf[1][pos + j], wtBuf_im); + } + aec->wfBuf[1][pos] = wt1; + } + } +} + +static float32x4_t vpowq_f32(float32x4_t a, float32x4_t b) { + // a^b = exp2(b * log2(a)) + // exp2(x) and log2(x) are calculated using polynomial approximations. + float32x4_t log2_a, b_log2_a, a_exp_b; + + // Calculate log2(x), x = a. + { + // To calculate log2(x), we decompose x like this: + // x = y * 2^n + // n is an integer + // y is in the [1.0, 2.0) range + // + // log2(x) = log2(y) + n + // n can be evaluated by playing with float representation. + // log2(y) in a small range can be approximated, this code uses an order + // five polynomial approximation. The coefficients have been + // estimated with the Remez algorithm and the resulting + // polynomial has a maximum relative error of 0.00086%. + + // Compute n. + // This is done by masking the exponent, shifting it into the top bit of + // the mantissa, putting eight into the biased exponent (to shift/ + // compensate the fact that the exponent has been shifted in the top/ + // fractional part and finally getting rid of the implicit leading one + // from the mantissa by substracting it out. + const uint32x4_t vec_float_exponent_mask = vdupq_n_u32(0x7F800000); + const uint32x4_t vec_eight_biased_exponent = vdupq_n_u32(0x43800000); + const uint32x4_t vec_implicit_leading_one = vdupq_n_u32(0x43BF8000); + const uint32x4_t two_n = vandq_u32(vreinterpretq_u32_f32(a), + vec_float_exponent_mask); + const uint32x4_t n_1 = vshrq_n_u32(two_n, kShiftExponentIntoTopMantissa); + const uint32x4_t n_0 = vorrq_u32(n_1, vec_eight_biased_exponent); + const float32x4_t n = + vsubq_f32(vreinterpretq_f32_u32(n_0), + vreinterpretq_f32_u32(vec_implicit_leading_one)); + // Compute y. + const uint32x4_t vec_mantissa_mask = vdupq_n_u32(0x007FFFFF); + const uint32x4_t vec_zero_biased_exponent_is_one = vdupq_n_u32(0x3F800000); + const uint32x4_t mantissa = vandq_u32(vreinterpretq_u32_f32(a), + vec_mantissa_mask); + const float32x4_t y = + vreinterpretq_f32_u32(vorrq_u32(mantissa, + vec_zero_biased_exponent_is_one)); + // Approximate log2(y) ~= (y - 1) * pol5(y). + // pol5(y) = C5 * y^5 + C4 * y^4 + C3 * y^3 + C2 * y^2 + C1 * y + C0 + const float32x4_t C5 = vdupq_n_f32(-3.4436006e-2f); + const float32x4_t C4 = vdupq_n_f32(3.1821337e-1f); + const float32x4_t C3 = vdupq_n_f32(-1.2315303f); + const float32x4_t C2 = vdupq_n_f32(2.5988452f); + const float32x4_t C1 = vdupq_n_f32(-3.3241990f); + const float32x4_t C0 = vdupq_n_f32(3.1157899f); + float32x4_t pol5_y = C5; + pol5_y = vmlaq_f32(C4, y, pol5_y); + pol5_y = vmlaq_f32(C3, y, pol5_y); + pol5_y = vmlaq_f32(C2, y, pol5_y); + pol5_y = vmlaq_f32(C1, y, pol5_y); + pol5_y = vmlaq_f32(C0, y, pol5_y); + const float32x4_t y_minus_one = + vsubq_f32(y, vreinterpretq_f32_u32(vec_zero_biased_exponent_is_one)); + const float32x4_t log2_y = vmulq_f32(y_minus_one, pol5_y); + + // Combine parts. + log2_a = vaddq_f32(n, log2_y); + } + + // b * log2(a) + b_log2_a = vmulq_f32(b, log2_a); + + // Calculate exp2(x), x = b * log2(a). + { + // To calculate 2^x, we decompose x like this: + // x = n + y + // n is an integer, the value of x - 0.5 rounded down, therefore + // y is in the [0.5, 1.5) range + // + // 2^x = 2^n * 2^y + // 2^n can be evaluated by playing with float representation. + // 2^y in a small range can be approximated, this code uses an order two + // polynomial approximation. The coefficients have been estimated + // with the Remez algorithm and the resulting polynomial has a + // maximum relative error of 0.17%. + // To avoid over/underflow, we reduce the range of input to ]-127, 129]. + const float32x4_t max_input = vdupq_n_f32(129.f); + const float32x4_t min_input = vdupq_n_f32(-126.99999f); + const float32x4_t x_min = vminq_f32(b_log2_a, max_input); + const float32x4_t x_max = vmaxq_f32(x_min, min_input); + // Compute n. + const float32x4_t half = vdupq_n_f32(0.5f); + const float32x4_t x_minus_half = vsubq_f32(x_max, half); + const int32x4_t x_minus_half_floor = vcvtq_s32_f32(x_minus_half); + + // Compute 2^n. + const int32x4_t float_exponent_bias = vdupq_n_s32(127); + const int32x4_t two_n_exponent = + vaddq_s32(x_minus_half_floor, float_exponent_bias); + const float32x4_t two_n = + vreinterpretq_f32_s32(vshlq_n_s32(two_n_exponent, kFloatExponentShift)); + // Compute y. + const float32x4_t y = vsubq_f32(x_max, vcvtq_f32_s32(x_minus_half_floor)); + + // Approximate 2^y ~= C2 * y^2 + C1 * y + C0. + const float32x4_t C2 = vdupq_n_f32(3.3718944e-1f); + const float32x4_t C1 = vdupq_n_f32(6.5763628e-1f); + const float32x4_t C0 = vdupq_n_f32(1.0017247f); + float32x4_t exp2_y = C2; + exp2_y = vmlaq_f32(C1, y, exp2_y); + exp2_y = vmlaq_f32(C0, y, exp2_y); + + // Combine parts. + a_exp_b = vmulq_f32(exp2_y, two_n); + } + + return a_exp_b; +} + +static void OverdriveAndSuppressNEON(AecCore* aec, + float hNl[PART_LEN1], + const float hNlFb, + float efw[2][PART_LEN1]) { + int i; + const float32x4_t vec_hNlFb = vmovq_n_f32(hNlFb); + const float32x4_t vec_one = vdupq_n_f32(1.0f); + const float32x4_t vec_minus_one = vdupq_n_f32(-1.0f); + const float32x4_t vec_overDriveSm = vmovq_n_f32(aec->overDriveSm); + + // vectorized code (four at once) + for (i = 0; i + 3 < PART_LEN1; i += 4) { + // Weight subbands + float32x4_t vec_hNl = vld1q_f32(&hNl[i]); + const float32x4_t vec_weightCurve = vld1q_f32(&WebRtcAec_weightCurve[i]); + const uint32x4_t bigger = vcgtq_f32(vec_hNl, vec_hNlFb); + const float32x4_t vec_weightCurve_hNlFb = vmulq_f32(vec_weightCurve, + vec_hNlFb); + const float32x4_t vec_one_weightCurve = vsubq_f32(vec_one, vec_weightCurve); + const float32x4_t vec_one_weightCurve_hNl = vmulq_f32(vec_one_weightCurve, + vec_hNl); + const uint32x4_t vec_if0 = vandq_u32(vmvnq_u32(bigger), + vreinterpretq_u32_f32(vec_hNl)); + const float32x4_t vec_one_weightCurve_add = + vaddq_f32(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl); + const uint32x4_t vec_if1 = + vandq_u32(bigger, vreinterpretq_u32_f32(vec_one_weightCurve_add)); + + vec_hNl = vreinterpretq_f32_u32(vorrq_u32(vec_if0, vec_if1)); + + { + const float32x4_t vec_overDriveCurve = + vld1q_f32(&WebRtcAec_overDriveCurve[i]); + const float32x4_t vec_overDriveSm_overDriveCurve = + vmulq_f32(vec_overDriveSm, vec_overDriveCurve); + vec_hNl = vpowq_f32(vec_hNl, vec_overDriveSm_overDriveCurve); + vst1q_f32(&hNl[i], vec_hNl); + } + + // Suppress error signal + { + float32x4_t vec_efw_re = vld1q_f32(&efw[0][i]); + float32x4_t vec_efw_im = vld1q_f32(&efw[1][i]); + vec_efw_re = vmulq_f32(vec_efw_re, vec_hNl); + vec_efw_im = vmulq_f32(vec_efw_im, vec_hNl); + + // Ooura fft returns incorrect sign on imaginary component. It matters + // here because we are making an additive change with comfort noise. + vec_efw_im = vmulq_f32(vec_efw_im, vec_minus_one); + vst1q_f32(&efw[0][i], vec_efw_re); + vst1q_f32(&efw[1][i], vec_efw_im); + } + } + + // scalar code for the remaining items. + for (; i < PART_LEN1; i++) { + // Weight subbands + if (hNl[i] > hNlFb) { + hNl[i] = WebRtcAec_weightCurve[i] * hNlFb + + (1 - WebRtcAec_weightCurve[i]) * hNl[i]; + } + + hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]); + + // Suppress error signal + efw[0][i] *= hNl[i]; + efw[1][i] *= hNl[i]; + + // Ooura fft returns incorrect sign on imaginary component. It matters + // here because we are making an additive change with comfort noise. + efw[1][i] *= -1; + } +} + +static int PartitionDelay(const AecCore* aec) { + // Measures the energy in each filter partition and returns the partition with + // highest energy. + // TODO(bjornv): Spread computational cost by computing one partition per + // block? + float wfEnMax = 0; + int i; + int delay = 0; + + for (i = 0; i < aec->num_partitions; i++) { + int j; + int pos = i * PART_LEN1; + float wfEn = 0; + float32x4_t vec_wfEn = vdupq_n_f32(0.0f); + // vectorized code (four at once) + for (j = 0; j + 3 < PART_LEN1; j += 4) { + const float32x4_t vec_wfBuf0 = vld1q_f32(&aec->wfBuf[0][pos + j]); + const float32x4_t vec_wfBuf1 = vld1q_f32(&aec->wfBuf[1][pos + j]); + vec_wfEn = vmlaq_f32(vec_wfEn, vec_wfBuf0, vec_wfBuf0); + vec_wfEn = vmlaq_f32(vec_wfEn, vec_wfBuf1, vec_wfBuf1); + } + { + float32x2_t vec_total; + // A B C D + vec_total = vpadd_f32(vget_low_f32(vec_wfEn), vget_high_f32(vec_wfEn)); + // A+B C+D + vec_total = vpadd_f32(vec_total, vec_total); + // A+B+C+D A+B+C+D + wfEn = vget_lane_f32(vec_total, 0); + } + + // scalar code for the remaining items. + for (; j < PART_LEN1; j++) { + wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] + + aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j]; + } + + if (wfEn > wfEnMax) { + wfEnMax = wfEn; + delay = i; + } + } + return delay; +} + +// Updates the following smoothed Power Spectral Densities (PSD): +// - sd : near-end +// - se : residual echo +// - sx : far-end +// - sde : cross-PSD of near-end and residual echo +// - sxd : cross-PSD of near-end and far-end +// +// In addition to updating the PSDs, also the filter diverge state is determined +// upon actions are taken. +static void SmoothedPSD(AecCore* aec, + float efw[2][PART_LEN1], + float dfw[2][PART_LEN1], + float xfw[2][PART_LEN1]) { + // Power estimate smoothing coefficients. + const float* ptrGCoh = aec->extended_filter_enabled + ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1] + : WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1]; + int i; + float sdSum = 0, seSum = 0; + const float32x4_t vec_15 = vdupq_n_f32(WebRtcAec_kMinFarendPSD); + float32x4_t vec_sdSum = vdupq_n_f32(0.0f); + float32x4_t vec_seSum = vdupq_n_f32(0.0f); + + for (i = 0; i + 3 < PART_LEN1; i += 4) { + const float32x4_t vec_dfw0 = vld1q_f32(&dfw[0][i]); + const float32x4_t vec_dfw1 = vld1q_f32(&dfw[1][i]); + const float32x4_t vec_efw0 = vld1q_f32(&efw[0][i]); + const float32x4_t vec_efw1 = vld1q_f32(&efw[1][i]); + const float32x4_t vec_xfw0 = vld1q_f32(&xfw[0][i]); + const float32x4_t vec_xfw1 = vld1q_f32(&xfw[1][i]); + float32x4_t vec_sd = vmulq_n_f32(vld1q_f32(&aec->sd[i]), ptrGCoh[0]); + float32x4_t vec_se = vmulq_n_f32(vld1q_f32(&aec->se[i]), ptrGCoh[0]); + float32x4_t vec_sx = vmulq_n_f32(vld1q_f32(&aec->sx[i]), ptrGCoh[0]); + float32x4_t vec_dfw_sumsq = vmulq_f32(vec_dfw0, vec_dfw0); + float32x4_t vec_efw_sumsq = vmulq_f32(vec_efw0, vec_efw0); + float32x4_t vec_xfw_sumsq = vmulq_f32(vec_xfw0, vec_xfw0); + + vec_dfw_sumsq = vmlaq_f32(vec_dfw_sumsq, vec_dfw1, vec_dfw1); + vec_efw_sumsq = vmlaq_f32(vec_efw_sumsq, vec_efw1, vec_efw1); + vec_xfw_sumsq = vmlaq_f32(vec_xfw_sumsq, vec_xfw1, vec_xfw1); + vec_xfw_sumsq = vmaxq_f32(vec_xfw_sumsq, vec_15); + vec_sd = vmlaq_n_f32(vec_sd, vec_dfw_sumsq, ptrGCoh[1]); + vec_se = vmlaq_n_f32(vec_se, vec_efw_sumsq, ptrGCoh[1]); + vec_sx = vmlaq_n_f32(vec_sx, vec_xfw_sumsq, ptrGCoh[1]); + + vst1q_f32(&aec->sd[i], vec_sd); + vst1q_f32(&aec->se[i], vec_se); + vst1q_f32(&aec->sx[i], vec_sx); + + { + float32x4x2_t vec_sde = vld2q_f32(&aec->sde[i][0]); + float32x4_t vec_dfwefw0011 = vmulq_f32(vec_dfw0, vec_efw0); + float32x4_t vec_dfwefw0110 = vmulq_f32(vec_dfw0, vec_efw1); + vec_sde.val[0] = vmulq_n_f32(vec_sde.val[0], ptrGCoh[0]); + vec_sde.val[1] = vmulq_n_f32(vec_sde.val[1], ptrGCoh[0]); + vec_dfwefw0011 = vmlaq_f32(vec_dfwefw0011, vec_dfw1, vec_efw1); + vec_dfwefw0110 = vmlsq_f32(vec_dfwefw0110, vec_dfw1, vec_efw0); + vec_sde.val[0] = vmlaq_n_f32(vec_sde.val[0], vec_dfwefw0011, ptrGCoh[1]); + vec_sde.val[1] = vmlaq_n_f32(vec_sde.val[1], vec_dfwefw0110, ptrGCoh[1]); + vst2q_f32(&aec->sde[i][0], vec_sde); + } + + { + float32x4x2_t vec_sxd = vld2q_f32(&aec->sxd[i][0]); + float32x4_t vec_dfwxfw0011 = vmulq_f32(vec_dfw0, vec_xfw0); + float32x4_t vec_dfwxfw0110 = vmulq_f32(vec_dfw0, vec_xfw1); + vec_sxd.val[0] = vmulq_n_f32(vec_sxd.val[0], ptrGCoh[0]); + vec_sxd.val[1] = vmulq_n_f32(vec_sxd.val[1], ptrGCoh[0]); + vec_dfwxfw0011 = vmlaq_f32(vec_dfwxfw0011, vec_dfw1, vec_xfw1); + vec_dfwxfw0110 = vmlsq_f32(vec_dfwxfw0110, vec_dfw1, vec_xfw0); + vec_sxd.val[0] = vmlaq_n_f32(vec_sxd.val[0], vec_dfwxfw0011, ptrGCoh[1]); + vec_sxd.val[1] = vmlaq_n_f32(vec_sxd.val[1], vec_dfwxfw0110, ptrGCoh[1]); + vst2q_f32(&aec->sxd[i][0], vec_sxd); + } + + vec_sdSum = vaddq_f32(vec_sdSum, vec_sd); + vec_seSum = vaddq_f32(vec_seSum, vec_se); + } + { + float32x2_t vec_sdSum_total; + float32x2_t vec_seSum_total; + // A B C D + vec_sdSum_total = vpadd_f32(vget_low_f32(vec_sdSum), + vget_high_f32(vec_sdSum)); + vec_seSum_total = vpadd_f32(vget_low_f32(vec_seSum), + vget_high_f32(vec_seSum)); + // A+B C+D + vec_sdSum_total = vpadd_f32(vec_sdSum_total, vec_sdSum_total); + vec_seSum_total = vpadd_f32(vec_seSum_total, vec_seSum_total); + // A+B+C+D A+B+C+D + sdSum = vget_lane_f32(vec_sdSum_total, 0); + seSum = vget_lane_f32(vec_seSum_total, 0); + } + + // scalar code for the remaining items. + for (; i < PART_LEN1; i++) { + aec->sd[i] = ptrGCoh[0] * aec->sd[i] + + ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]); + aec->se[i] = ptrGCoh[0] * aec->se[i] + + ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]); + // We threshold here to protect against the ill-effects of a zero farend. + // The threshold is not arbitrarily chosen, but balances protection and + // adverse interaction with the algorithm's tuning. + // TODO(bjornv): investigate further why this is so sensitive. + aec->sx[i] = + ptrGCoh[0] * aec->sx[i] + + ptrGCoh[1] * WEBRTC_SPL_MAX( + xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i], + WebRtcAec_kMinFarendPSD); + + aec->sde[i][0] = + ptrGCoh[0] * aec->sde[i][0] + + ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]); + aec->sde[i][1] = + ptrGCoh[0] * aec->sde[i][1] + + ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]); + + aec->sxd[i][0] = + ptrGCoh[0] * aec->sxd[i][0] + + ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]); + aec->sxd[i][1] = + ptrGCoh[0] * aec->sxd[i][1] + + ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]); + + sdSum += aec->sd[i]; + seSum += aec->se[i]; + } + + // Divergent filter safeguard. + aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum; + + if (aec->divergeState) + memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1); + + // Reset if error is significantly larger than nearend (13 dB). + if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum)) + memset(aec->wfBuf, 0, sizeof(aec->wfBuf)); +} + +// Window time domain data to be used by the fft. +__inline static void WindowData(float* x_windowed, const float* x) { + int i; + for (i = 0; i < PART_LEN; i += 4) { + const float32x4_t vec_Buf1 = vld1q_f32(&x[i]); + const float32x4_t vec_Buf2 = vld1q_f32(&x[PART_LEN + i]); + const float32x4_t vec_sqrtHanning = vld1q_f32(&WebRtcAec_sqrtHanning[i]); + // A B C D + float32x4_t vec_sqrtHanning_rev = + vld1q_f32(&WebRtcAec_sqrtHanning[PART_LEN - i - 3]); + // B A D C + vec_sqrtHanning_rev = vrev64q_f32(vec_sqrtHanning_rev); + // D C B A + vec_sqrtHanning_rev = vcombine_f32(vget_high_f32(vec_sqrtHanning_rev), + vget_low_f32(vec_sqrtHanning_rev)); + vst1q_f32(&x_windowed[i], vmulq_f32(vec_Buf1, vec_sqrtHanning)); + vst1q_f32(&x_windowed[PART_LEN + i], + vmulq_f32(vec_Buf2, vec_sqrtHanning_rev)); + } +} + +// Puts fft output data into a complex valued array. +__inline static void StoreAsComplex(const float* data, + float data_complex[2][PART_LEN1]) { + int i; + for (i = 0; i < PART_LEN; i += 4) { + const float32x4x2_t vec_data = vld2q_f32(&data[2 * i]); + vst1q_f32(&data_complex[0][i], vec_data.val[0]); + vst1q_f32(&data_complex[1][i], vec_data.val[1]); + } + // fix beginning/end values + data_complex[1][0] = 0; + data_complex[1][PART_LEN] = 0; + data_complex[0][0] = data[0]; + data_complex[0][PART_LEN] = data[1]; +} + +static void SubbandCoherenceNEON(AecCore* aec, + float efw[2][PART_LEN1], + float xfw[2][PART_LEN1], + float* fft, + float* cohde, + float* cohxd) { + float dfw[2][PART_LEN1]; + int i; + + if (aec->delayEstCtr == 0) + aec->delayIdx = PartitionDelay(aec); + + // Use delayed far. + memcpy(xfw, + aec->xfwBuf + aec->delayIdx * PART_LEN1, + sizeof(xfw[0][0]) * 2 * PART_LEN1); + + // Windowed near fft + WindowData(fft, aec->dBuf); + aec_rdft_forward_128(fft); + StoreAsComplex(fft, dfw); + + // Windowed error fft + WindowData(fft, aec->eBuf); + aec_rdft_forward_128(fft); + StoreAsComplex(fft, efw); + + SmoothedPSD(aec, efw, dfw, xfw); + + { + const float32x4_t vec_1eminus10 = vdupq_n_f32(1e-10f); + + // Subband coherence + for (i = 0; i + 3 < PART_LEN1; i += 4) { + const float32x4_t vec_sd = vld1q_f32(&aec->sd[i]); + const float32x4_t vec_se = vld1q_f32(&aec->se[i]); + const float32x4_t vec_sx = vld1q_f32(&aec->sx[i]); + const float32x4_t vec_sdse = vmlaq_f32(vec_1eminus10, vec_sd, vec_se); + const float32x4_t vec_sdsx = vmlaq_f32(vec_1eminus10, vec_sd, vec_sx); + float32x4x2_t vec_sde = vld2q_f32(&aec->sde[i][0]); + float32x4x2_t vec_sxd = vld2q_f32(&aec->sxd[i][0]); + float32x4_t vec_cohde = vmulq_f32(vec_sde.val[0], vec_sde.val[0]); + float32x4_t vec_cohxd = vmulq_f32(vec_sxd.val[0], vec_sxd.val[0]); + vec_cohde = vmlaq_f32(vec_cohde, vec_sde.val[1], vec_sde.val[1]); + vec_cohde = vdivq_f32(vec_cohde, vec_sdse); + vec_cohxd = vmlaq_f32(vec_cohxd, vec_sxd.val[1], vec_sxd.val[1]); + vec_cohxd = vdivq_f32(vec_cohxd, vec_sdsx); + + vst1q_f32(&cohde[i], vec_cohde); + vst1q_f32(&cohxd[i], vec_cohxd); + } + } + // scalar code for the remaining items. + for (; i < PART_LEN1; i++) { + cohde[i] = + (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) / + (aec->sd[i] * aec->se[i] + 1e-10f); + cohxd[i] = + (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) / + (aec->sx[i] * aec->sd[i] + 1e-10f); + } +} + +void WebRtcAec_InitAec_neon(void) { + WebRtcAec_FilterFar = FilterFarNEON; + WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; + WebRtcAec_FilterAdaptation = FilterAdaptationNEON; + WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; + WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; +} + diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_sse2.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_sse2.c new file mode 100644 index 00000000..b1bffcbb --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_sse2.c @@ -0,0 +1,731 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * The core AEC algorithm, SSE2 version of speed-critical functions. + */ + +#include <emmintrin.h> +#include <math.h> +#include <string.h> // memset + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/modules/audio_processing/aec/aec_common.h" +#include "webrtc/modules/audio_processing/aec/aec_core_internal.h" +#include "webrtc/modules/audio_processing/aec/aec_rdft.h" + +__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) { + return aRe * bRe - aIm * bIm; +} + +__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { + return aRe * bIm + aIm * bRe; +} + +static void FilterFarSSE2(AecCore* aec, float yf[2][PART_LEN1]) { + int i; + const int num_partitions = aec->num_partitions; + for (i = 0; i < num_partitions; i++) { + int j; + int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; + int pos = i * PART_LEN1; + // Check for wrap + if (i + aec->xfBufBlockPos >= num_partitions) { + xPos -= num_partitions * (PART_LEN1); + } + + // vectorized code (four at once) + for (j = 0; j + 3 < PART_LEN1; j += 4) { + const __m128 xfBuf_re = _mm_loadu_ps(&aec->xfBuf[0][xPos + j]); + const __m128 xfBuf_im = _mm_loadu_ps(&aec->xfBuf[1][xPos + j]); + const __m128 wfBuf_re = _mm_loadu_ps(&aec->wfBuf[0][pos + j]); + const __m128 wfBuf_im = _mm_loadu_ps(&aec->wfBuf[1][pos + j]); + const __m128 yf_re = _mm_loadu_ps(&yf[0][j]); + const __m128 yf_im = _mm_loadu_ps(&yf[1][j]); + const __m128 a = _mm_mul_ps(xfBuf_re, wfBuf_re); + const __m128 b = _mm_mul_ps(xfBuf_im, wfBuf_im); + const __m128 c = _mm_mul_ps(xfBuf_re, wfBuf_im); + const __m128 d = _mm_mul_ps(xfBuf_im, wfBuf_re); + const __m128 e = _mm_sub_ps(a, b); + const __m128 f = _mm_add_ps(c, d); + const __m128 g = _mm_add_ps(yf_re, e); + const __m128 h = _mm_add_ps(yf_im, f); + _mm_storeu_ps(&yf[0][j], g); + _mm_storeu_ps(&yf[1][j], h); + } + // scalar code for the remaining items. + for (; j < PART_LEN1; j++) { + yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], + aec->xfBuf[1][xPos + j], + aec->wfBuf[0][pos + j], + aec->wfBuf[1][pos + j]); + yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], + aec->xfBuf[1][xPos + j], + aec->wfBuf[0][pos + j], + aec->wfBuf[1][pos + j]); + } + } +} + +static void ScaleErrorSignalSSE2(AecCore* aec, float ef[2][PART_LEN1]) { + const __m128 k1e_10f = _mm_set1_ps(1e-10f); + const __m128 kMu = aec->extended_filter_enabled ? _mm_set1_ps(kExtendedMu) + : _mm_set1_ps(aec->normal_mu); + const __m128 kThresh = aec->extended_filter_enabled + ? _mm_set1_ps(kExtendedErrorThreshold) + : _mm_set1_ps(aec->normal_error_threshold); + + int i; + // vectorized code (four at once) + for (i = 0; i + 3 < PART_LEN1; i += 4) { + const __m128 xPow = _mm_loadu_ps(&aec->xPow[i]); + const __m128 ef_re_base = _mm_loadu_ps(&ef[0][i]); + const __m128 ef_im_base = _mm_loadu_ps(&ef[1][i]); + + const __m128 xPowPlus = _mm_add_ps(xPow, k1e_10f); + __m128 ef_re = _mm_div_ps(ef_re_base, xPowPlus); + __m128 ef_im = _mm_div_ps(ef_im_base, xPowPlus); + const __m128 ef_re2 = _mm_mul_ps(ef_re, ef_re); + const __m128 ef_im2 = _mm_mul_ps(ef_im, ef_im); + const __m128 ef_sum2 = _mm_add_ps(ef_re2, ef_im2); + const __m128 absEf = _mm_sqrt_ps(ef_sum2); + const __m128 bigger = _mm_cmpgt_ps(absEf, kThresh); + __m128 absEfPlus = _mm_add_ps(absEf, k1e_10f); + const __m128 absEfInv = _mm_div_ps(kThresh, absEfPlus); + __m128 ef_re_if = _mm_mul_ps(ef_re, absEfInv); + __m128 ef_im_if = _mm_mul_ps(ef_im, absEfInv); + ef_re_if = _mm_and_ps(bigger, ef_re_if); + ef_im_if = _mm_and_ps(bigger, ef_im_if); + ef_re = _mm_andnot_ps(bigger, ef_re); + ef_im = _mm_andnot_ps(bigger, ef_im); + ef_re = _mm_or_ps(ef_re, ef_re_if); + ef_im = _mm_or_ps(ef_im, ef_im_if); + ef_re = _mm_mul_ps(ef_re, kMu); + ef_im = _mm_mul_ps(ef_im, kMu); + + _mm_storeu_ps(&ef[0][i], ef_re); + _mm_storeu_ps(&ef[1][i], ef_im); + } + // scalar code for the remaining items. + { + const float mu = + aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu; + const float error_threshold = aec->extended_filter_enabled + ? kExtendedErrorThreshold + : aec->normal_error_threshold; + for (; i < (PART_LEN1); i++) { + float abs_ef; + ef[0][i] /= (aec->xPow[i] + 1e-10f); + ef[1][i] /= (aec->xPow[i] + 1e-10f); + abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); + + if (abs_ef > error_threshold) { + abs_ef = error_threshold / (abs_ef + 1e-10f); + ef[0][i] *= abs_ef; + ef[1][i] *= abs_ef; + } + + // Stepsize factor + ef[0][i] *= mu; + ef[1][i] *= mu; + } + } +} + +static void FilterAdaptationSSE2(AecCore* aec, + float* fft, + float ef[2][PART_LEN1]) { + int i, j; + const int num_partitions = aec->num_partitions; + for (i = 0; i < num_partitions; i++) { + int xPos = (i + aec->xfBufBlockPos) * (PART_LEN1); + int pos = i * PART_LEN1; + // Check for wrap + if (i + aec->xfBufBlockPos >= num_partitions) { + xPos -= num_partitions * PART_LEN1; + } + + // Process the whole array... + for (j = 0; j < PART_LEN; j += 4) { + // Load xfBuf and ef. + const __m128 xfBuf_re = _mm_loadu_ps(&aec->xfBuf[0][xPos + j]); + const __m128 xfBuf_im = _mm_loadu_ps(&aec->xfBuf[1][xPos + j]); + const __m128 ef_re = _mm_loadu_ps(&ef[0][j]); + const __m128 ef_im = _mm_loadu_ps(&ef[1][j]); + // Calculate the product of conjugate(xfBuf) by ef. + // re(conjugate(a) * b) = aRe * bRe + aIm * bIm + // im(conjugate(a) * b)= aRe * bIm - aIm * bRe + const __m128 a = _mm_mul_ps(xfBuf_re, ef_re); + const __m128 b = _mm_mul_ps(xfBuf_im, ef_im); + const __m128 c = _mm_mul_ps(xfBuf_re, ef_im); + const __m128 d = _mm_mul_ps(xfBuf_im, ef_re); + const __m128 e = _mm_add_ps(a, b); + const __m128 f = _mm_sub_ps(c, d); + // Interleave real and imaginary parts. + const __m128 g = _mm_unpacklo_ps(e, f); + const __m128 h = _mm_unpackhi_ps(e, f); + // Store + _mm_storeu_ps(&fft[2 * j + 0], g); + _mm_storeu_ps(&fft[2 * j + 4], h); + } + // ... and fixup the first imaginary entry. + fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN], + -aec->xfBuf[1][xPos + PART_LEN], + ef[0][PART_LEN], + ef[1][PART_LEN]); + + aec_rdft_inverse_128(fft); + memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); + + // fft scaling + { + float scale = 2.0f / PART_LEN2; + const __m128 scale_ps = _mm_load_ps1(&scale); + for (j = 0; j < PART_LEN; j += 4) { + const __m128 fft_ps = _mm_loadu_ps(&fft[j]); + const __m128 fft_scale = _mm_mul_ps(fft_ps, scale_ps); + _mm_storeu_ps(&fft[j], fft_scale); + } + } + aec_rdft_forward_128(fft); + + { + float wt1 = aec->wfBuf[1][pos]; + aec->wfBuf[0][pos + PART_LEN] += fft[1]; + for (j = 0; j < PART_LEN; j += 4) { + __m128 wtBuf_re = _mm_loadu_ps(&aec->wfBuf[0][pos + j]); + __m128 wtBuf_im = _mm_loadu_ps(&aec->wfBuf[1][pos + j]); + const __m128 fft0 = _mm_loadu_ps(&fft[2 * j + 0]); + const __m128 fft4 = _mm_loadu_ps(&fft[2 * j + 4]); + const __m128 fft_re = + _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(2, 0, 2, 0)); + const __m128 fft_im = + _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(3, 1, 3, 1)); + wtBuf_re = _mm_add_ps(wtBuf_re, fft_re); + wtBuf_im = _mm_add_ps(wtBuf_im, fft_im); + _mm_storeu_ps(&aec->wfBuf[0][pos + j], wtBuf_re); + _mm_storeu_ps(&aec->wfBuf[1][pos + j], wtBuf_im); + } + aec->wfBuf[1][pos] = wt1; + } + } +} + +static __m128 mm_pow_ps(__m128 a, __m128 b) { + // a^b = exp2(b * log2(a)) + // exp2(x) and log2(x) are calculated using polynomial approximations. + __m128 log2_a, b_log2_a, a_exp_b; + + // Calculate log2(x), x = a. + { + // To calculate log2(x), we decompose x like this: + // x = y * 2^n + // n is an integer + // y is in the [1.0, 2.0) range + // + // log2(x) = log2(y) + n + // n can be evaluated by playing with float representation. + // log2(y) in a small range can be approximated, this code uses an order + // five polynomial approximation. The coefficients have been + // estimated with the Remez algorithm and the resulting + // polynomial has a maximum relative error of 0.00086%. + + // Compute n. + // This is done by masking the exponent, shifting it into the top bit of + // the mantissa, putting eight into the biased exponent (to shift/ + // compensate the fact that the exponent has been shifted in the top/ + // fractional part and finally getting rid of the implicit leading one + // from the mantissa by substracting it out. + static const ALIGN16_BEG int float_exponent_mask[4] ALIGN16_END = { + 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000}; + static const ALIGN16_BEG int eight_biased_exponent[4] ALIGN16_END = { + 0x43800000, 0x43800000, 0x43800000, 0x43800000}; + static const ALIGN16_BEG int implicit_leading_one[4] ALIGN16_END = { + 0x43BF8000, 0x43BF8000, 0x43BF8000, 0x43BF8000}; + static const int shift_exponent_into_top_mantissa = 8; + const __m128 two_n = _mm_and_ps(a, *((__m128*)float_exponent_mask)); + const __m128 n_1 = _mm_castsi128_ps(_mm_srli_epi32( + _mm_castps_si128(two_n), shift_exponent_into_top_mantissa)); + const __m128 n_0 = _mm_or_ps(n_1, *((__m128*)eight_biased_exponent)); + const __m128 n = _mm_sub_ps(n_0, *((__m128*)implicit_leading_one)); + + // Compute y. + static const ALIGN16_BEG int mantissa_mask[4] ALIGN16_END = { + 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF}; + static const ALIGN16_BEG int zero_biased_exponent_is_one[4] ALIGN16_END = { + 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000}; + const __m128 mantissa = _mm_and_ps(a, *((__m128*)mantissa_mask)); + const __m128 y = + _mm_or_ps(mantissa, *((__m128*)zero_biased_exponent_is_one)); + + // Approximate log2(y) ~= (y - 1) * pol5(y). + // pol5(y) = C5 * y^5 + C4 * y^4 + C3 * y^3 + C2 * y^2 + C1 * y + C0 + static const ALIGN16_BEG float ALIGN16_END C5[4] = { + -3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f}; + static const ALIGN16_BEG float ALIGN16_END + C4[4] = {3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f}; + static const ALIGN16_BEG float ALIGN16_END + C3[4] = {-1.2315303f, -1.2315303f, -1.2315303f, -1.2315303f}; + static const ALIGN16_BEG float ALIGN16_END + C2[4] = {2.5988452f, 2.5988452f, 2.5988452f, 2.5988452f}; + static const ALIGN16_BEG float ALIGN16_END + C1[4] = {-3.3241990f, -3.3241990f, -3.3241990f, -3.3241990f}; + static const ALIGN16_BEG float ALIGN16_END + C0[4] = {3.1157899f, 3.1157899f, 3.1157899f, 3.1157899f}; + const __m128 pol5_y_0 = _mm_mul_ps(y, *((__m128*)C5)); + const __m128 pol5_y_1 = _mm_add_ps(pol5_y_0, *((__m128*)C4)); + const __m128 pol5_y_2 = _mm_mul_ps(pol5_y_1, y); + const __m128 pol5_y_3 = _mm_add_ps(pol5_y_2, *((__m128*)C3)); + const __m128 pol5_y_4 = _mm_mul_ps(pol5_y_3, y); + const __m128 pol5_y_5 = _mm_add_ps(pol5_y_4, *((__m128*)C2)); + const __m128 pol5_y_6 = _mm_mul_ps(pol5_y_5, y); + const __m128 pol5_y_7 = _mm_add_ps(pol5_y_6, *((__m128*)C1)); + const __m128 pol5_y_8 = _mm_mul_ps(pol5_y_7, y); + const __m128 pol5_y = _mm_add_ps(pol5_y_8, *((__m128*)C0)); + const __m128 y_minus_one = + _mm_sub_ps(y, *((__m128*)zero_biased_exponent_is_one)); + const __m128 log2_y = _mm_mul_ps(y_minus_one, pol5_y); + + // Combine parts. + log2_a = _mm_add_ps(n, log2_y); + } + + // b * log2(a) + b_log2_a = _mm_mul_ps(b, log2_a); + + // Calculate exp2(x), x = b * log2(a). + { + // To calculate 2^x, we decompose x like this: + // x = n + y + // n is an integer, the value of x - 0.5 rounded down, therefore + // y is in the [0.5, 1.5) range + // + // 2^x = 2^n * 2^y + // 2^n can be evaluated by playing with float representation. + // 2^y in a small range can be approximated, this code uses an order two + // polynomial approximation. The coefficients have been estimated + // with the Remez algorithm and the resulting polynomial has a + // maximum relative error of 0.17%. + + // To avoid over/underflow, we reduce the range of input to ]-127, 129]. + static const ALIGN16_BEG float max_input[4] ALIGN16_END = {129.f, 129.f, + 129.f, 129.f}; + static const ALIGN16_BEG float min_input[4] ALIGN16_END = { + -126.99999f, -126.99999f, -126.99999f, -126.99999f}; + const __m128 x_min = _mm_min_ps(b_log2_a, *((__m128*)max_input)); + const __m128 x_max = _mm_max_ps(x_min, *((__m128*)min_input)); + // Compute n. + static const ALIGN16_BEG float half[4] ALIGN16_END = {0.5f, 0.5f, + 0.5f, 0.5f}; + const __m128 x_minus_half = _mm_sub_ps(x_max, *((__m128*)half)); + const __m128i x_minus_half_floor = _mm_cvtps_epi32(x_minus_half); + // Compute 2^n. + static const ALIGN16_BEG int float_exponent_bias[4] ALIGN16_END = { + 127, 127, 127, 127}; + static const int float_exponent_shift = 23; + const __m128i two_n_exponent = + _mm_add_epi32(x_minus_half_floor, *((__m128i*)float_exponent_bias)); + const __m128 two_n = + _mm_castsi128_ps(_mm_slli_epi32(two_n_exponent, float_exponent_shift)); + // Compute y. + const __m128 y = _mm_sub_ps(x_max, _mm_cvtepi32_ps(x_minus_half_floor)); + // Approximate 2^y ~= C2 * y^2 + C1 * y + C0. + static const ALIGN16_BEG float C2[4] ALIGN16_END = { + 3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f}; + static const ALIGN16_BEG float C1[4] ALIGN16_END = { + 6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f}; + static const ALIGN16_BEG float C0[4] ALIGN16_END = {1.0017247f, 1.0017247f, + 1.0017247f, 1.0017247f}; + const __m128 exp2_y_0 = _mm_mul_ps(y, *((__m128*)C2)); + const __m128 exp2_y_1 = _mm_add_ps(exp2_y_0, *((__m128*)C1)); + const __m128 exp2_y_2 = _mm_mul_ps(exp2_y_1, y); + const __m128 exp2_y = _mm_add_ps(exp2_y_2, *((__m128*)C0)); + + // Combine parts. + a_exp_b = _mm_mul_ps(exp2_y, two_n); + } + return a_exp_b; +} + +static void OverdriveAndSuppressSSE2(AecCore* aec, + float hNl[PART_LEN1], + const float hNlFb, + float efw[2][PART_LEN1]) { + int i; + const __m128 vec_hNlFb = _mm_set1_ps(hNlFb); + const __m128 vec_one = _mm_set1_ps(1.0f); + const __m128 vec_minus_one = _mm_set1_ps(-1.0f); + const __m128 vec_overDriveSm = _mm_set1_ps(aec->overDriveSm); + // vectorized code (four at once) + for (i = 0; i + 3 < PART_LEN1; i += 4) { + // Weight subbands + __m128 vec_hNl = _mm_loadu_ps(&hNl[i]); + const __m128 vec_weightCurve = _mm_loadu_ps(&WebRtcAec_weightCurve[i]); + const __m128 bigger = _mm_cmpgt_ps(vec_hNl, vec_hNlFb); + const __m128 vec_weightCurve_hNlFb = _mm_mul_ps(vec_weightCurve, vec_hNlFb); + const __m128 vec_one_weightCurve = _mm_sub_ps(vec_one, vec_weightCurve); + const __m128 vec_one_weightCurve_hNl = + _mm_mul_ps(vec_one_weightCurve, vec_hNl); + const __m128 vec_if0 = _mm_andnot_ps(bigger, vec_hNl); + const __m128 vec_if1 = _mm_and_ps( + bigger, _mm_add_ps(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl)); + vec_hNl = _mm_or_ps(vec_if0, vec_if1); + + { + const __m128 vec_overDriveCurve = + _mm_loadu_ps(&WebRtcAec_overDriveCurve[i]); + const __m128 vec_overDriveSm_overDriveCurve = + _mm_mul_ps(vec_overDriveSm, vec_overDriveCurve); + vec_hNl = mm_pow_ps(vec_hNl, vec_overDriveSm_overDriveCurve); + _mm_storeu_ps(&hNl[i], vec_hNl); + } + + // Suppress error signal + { + __m128 vec_efw_re = _mm_loadu_ps(&efw[0][i]); + __m128 vec_efw_im = _mm_loadu_ps(&efw[1][i]); + vec_efw_re = _mm_mul_ps(vec_efw_re, vec_hNl); + vec_efw_im = _mm_mul_ps(vec_efw_im, vec_hNl); + + // Ooura fft returns incorrect sign on imaginary component. It matters + // here because we are making an additive change with comfort noise. + vec_efw_im = _mm_mul_ps(vec_efw_im, vec_minus_one); + _mm_storeu_ps(&efw[0][i], vec_efw_re); + _mm_storeu_ps(&efw[1][i], vec_efw_im); + } + } + // scalar code for the remaining items. + for (; i < PART_LEN1; i++) { + // Weight subbands + if (hNl[i] > hNlFb) { + hNl[i] = WebRtcAec_weightCurve[i] * hNlFb + + (1 - WebRtcAec_weightCurve[i]) * hNl[i]; + } + hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]); + + // Suppress error signal + efw[0][i] *= hNl[i]; + efw[1][i] *= hNl[i]; + + // Ooura fft returns incorrect sign on imaginary component. It matters + // here because we are making an additive change with comfort noise. + efw[1][i] *= -1; + } +} + +__inline static void _mm_add_ps_4x1(__m128 sum, float *dst) { + // A+B C+D + sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(0, 0, 3, 2))); + // A+B+C+D A+B+C+D + sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(1, 1, 1, 1))); + _mm_store_ss(dst, sum); +} +static int PartitionDelay(const AecCore* aec) { + // Measures the energy in each filter partition and returns the partition with + // highest energy. + // TODO(bjornv): Spread computational cost by computing one partition per + // block? + float wfEnMax = 0; + int i; + int delay = 0; + + for (i = 0; i < aec->num_partitions; i++) { + int j; + int pos = i * PART_LEN1; + float wfEn = 0; + __m128 vec_wfEn = _mm_set1_ps(0.0f); + // vectorized code (four at once) + for (j = 0; j + 3 < PART_LEN1; j += 4) { + const __m128 vec_wfBuf0 = _mm_loadu_ps(&aec->wfBuf[0][pos + j]); + const __m128 vec_wfBuf1 = _mm_loadu_ps(&aec->wfBuf[1][pos + j]); + vec_wfEn = _mm_add_ps(vec_wfEn, _mm_mul_ps(vec_wfBuf0, vec_wfBuf0)); + vec_wfEn = _mm_add_ps(vec_wfEn, _mm_mul_ps(vec_wfBuf1, vec_wfBuf1)); + } + _mm_add_ps_4x1(vec_wfEn, &wfEn); + + // scalar code for the remaining items. + for (; j < PART_LEN1; j++) { + wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] + + aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j]; + } + + if (wfEn > wfEnMax) { + wfEnMax = wfEn; + delay = i; + } + } + return delay; +} + +// Updates the following smoothed Power Spectral Densities (PSD): +// - sd : near-end +// - se : residual echo +// - sx : far-end +// - sde : cross-PSD of near-end and residual echo +// - sxd : cross-PSD of near-end and far-end +// +// In addition to updating the PSDs, also the filter diverge state is determined +// upon actions are taken. +static void SmoothedPSD(AecCore* aec, + float efw[2][PART_LEN1], + float dfw[2][PART_LEN1], + float xfw[2][PART_LEN1]) { + // Power estimate smoothing coefficients. + const float* ptrGCoh = aec->extended_filter_enabled + ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1] + : WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1]; + int i; + float sdSum = 0, seSum = 0; + const __m128 vec_15 = _mm_set1_ps(WebRtcAec_kMinFarendPSD); + const __m128 vec_GCoh0 = _mm_set1_ps(ptrGCoh[0]); + const __m128 vec_GCoh1 = _mm_set1_ps(ptrGCoh[1]); + __m128 vec_sdSum = _mm_set1_ps(0.0f); + __m128 vec_seSum = _mm_set1_ps(0.0f); + + for (i = 0; i + 3 < PART_LEN1; i += 4) { + const __m128 vec_dfw0 = _mm_loadu_ps(&dfw[0][i]); + const __m128 vec_dfw1 = _mm_loadu_ps(&dfw[1][i]); + const __m128 vec_efw0 = _mm_loadu_ps(&efw[0][i]); + const __m128 vec_efw1 = _mm_loadu_ps(&efw[1][i]); + const __m128 vec_xfw0 = _mm_loadu_ps(&xfw[0][i]); + const __m128 vec_xfw1 = _mm_loadu_ps(&xfw[1][i]); + __m128 vec_sd = _mm_mul_ps(_mm_loadu_ps(&aec->sd[i]), vec_GCoh0); + __m128 vec_se = _mm_mul_ps(_mm_loadu_ps(&aec->se[i]), vec_GCoh0); + __m128 vec_sx = _mm_mul_ps(_mm_loadu_ps(&aec->sx[i]), vec_GCoh0); + __m128 vec_dfw_sumsq = _mm_mul_ps(vec_dfw0, vec_dfw0); + __m128 vec_efw_sumsq = _mm_mul_ps(vec_efw0, vec_efw0); + __m128 vec_xfw_sumsq = _mm_mul_ps(vec_xfw0, vec_xfw0); + vec_dfw_sumsq = _mm_add_ps(vec_dfw_sumsq, _mm_mul_ps(vec_dfw1, vec_dfw1)); + vec_efw_sumsq = _mm_add_ps(vec_efw_sumsq, _mm_mul_ps(vec_efw1, vec_efw1)); + vec_xfw_sumsq = _mm_add_ps(vec_xfw_sumsq, _mm_mul_ps(vec_xfw1, vec_xfw1)); + vec_xfw_sumsq = _mm_max_ps(vec_xfw_sumsq, vec_15); + vec_sd = _mm_add_ps(vec_sd, _mm_mul_ps(vec_dfw_sumsq, vec_GCoh1)); + vec_se = _mm_add_ps(vec_se, _mm_mul_ps(vec_efw_sumsq, vec_GCoh1)); + vec_sx = _mm_add_ps(vec_sx, _mm_mul_ps(vec_xfw_sumsq, vec_GCoh1)); + _mm_storeu_ps(&aec->sd[i], vec_sd); + _mm_storeu_ps(&aec->se[i], vec_se); + _mm_storeu_ps(&aec->sx[i], vec_sx); + + { + const __m128 vec_3210 = _mm_loadu_ps(&aec->sde[i][0]); + const __m128 vec_7654 = _mm_loadu_ps(&aec->sde[i + 2][0]); + __m128 vec_a = _mm_shuffle_ps(vec_3210, vec_7654, + _MM_SHUFFLE(2, 0, 2, 0)); + __m128 vec_b = _mm_shuffle_ps(vec_3210, vec_7654, + _MM_SHUFFLE(3, 1, 3, 1)); + __m128 vec_dfwefw0011 = _mm_mul_ps(vec_dfw0, vec_efw0); + __m128 vec_dfwefw0110 = _mm_mul_ps(vec_dfw0, vec_efw1); + vec_a = _mm_mul_ps(vec_a, vec_GCoh0); + vec_b = _mm_mul_ps(vec_b, vec_GCoh0); + vec_dfwefw0011 = _mm_add_ps(vec_dfwefw0011, + _mm_mul_ps(vec_dfw1, vec_efw1)); + vec_dfwefw0110 = _mm_sub_ps(vec_dfwefw0110, + _mm_mul_ps(vec_dfw1, vec_efw0)); + vec_a = _mm_add_ps(vec_a, _mm_mul_ps(vec_dfwefw0011, vec_GCoh1)); + vec_b = _mm_add_ps(vec_b, _mm_mul_ps(vec_dfwefw0110, vec_GCoh1)); + _mm_storeu_ps(&aec->sde[i][0], _mm_unpacklo_ps(vec_a, vec_b)); + _mm_storeu_ps(&aec->sde[i + 2][0], _mm_unpackhi_ps(vec_a, vec_b)); + } + + { + const __m128 vec_3210 = _mm_loadu_ps(&aec->sxd[i][0]); + const __m128 vec_7654 = _mm_loadu_ps(&aec->sxd[i + 2][0]); + __m128 vec_a = _mm_shuffle_ps(vec_3210, vec_7654, + _MM_SHUFFLE(2, 0, 2, 0)); + __m128 vec_b = _mm_shuffle_ps(vec_3210, vec_7654, + _MM_SHUFFLE(3, 1, 3, 1)); + __m128 vec_dfwxfw0011 = _mm_mul_ps(vec_dfw0, vec_xfw0); + __m128 vec_dfwxfw0110 = _mm_mul_ps(vec_dfw0, vec_xfw1); + vec_a = _mm_mul_ps(vec_a, vec_GCoh0); + vec_b = _mm_mul_ps(vec_b, vec_GCoh0); + vec_dfwxfw0011 = _mm_add_ps(vec_dfwxfw0011, + _mm_mul_ps(vec_dfw1, vec_xfw1)); + vec_dfwxfw0110 = _mm_sub_ps(vec_dfwxfw0110, + _mm_mul_ps(vec_dfw1, vec_xfw0)); + vec_a = _mm_add_ps(vec_a, _mm_mul_ps(vec_dfwxfw0011, vec_GCoh1)); + vec_b = _mm_add_ps(vec_b, _mm_mul_ps(vec_dfwxfw0110, vec_GCoh1)); + _mm_storeu_ps(&aec->sxd[i][0], _mm_unpacklo_ps(vec_a, vec_b)); + _mm_storeu_ps(&aec->sxd[i + 2][0], _mm_unpackhi_ps(vec_a, vec_b)); + } + + vec_sdSum = _mm_add_ps(vec_sdSum, vec_sd); + vec_seSum = _mm_add_ps(vec_seSum, vec_se); + } + + _mm_add_ps_4x1(vec_sdSum, &sdSum); + _mm_add_ps_4x1(vec_seSum, &seSum); + + for (; i < PART_LEN1; i++) { + aec->sd[i] = ptrGCoh[0] * aec->sd[i] + + ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]); + aec->se[i] = ptrGCoh[0] * aec->se[i] + + ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]); + // We threshold here to protect against the ill-effects of a zero farend. + // The threshold is not arbitrarily chosen, but balances protection and + // adverse interaction with the algorithm's tuning. + // TODO(bjornv): investigate further why this is so sensitive. + aec->sx[i] = + ptrGCoh[0] * aec->sx[i] + + ptrGCoh[1] * WEBRTC_SPL_MAX( + xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i], + WebRtcAec_kMinFarendPSD); + + aec->sde[i][0] = + ptrGCoh[0] * aec->sde[i][0] + + ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]); + aec->sde[i][1] = + ptrGCoh[0] * aec->sde[i][1] + + ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]); + + aec->sxd[i][0] = + ptrGCoh[0] * aec->sxd[i][0] + + ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]); + aec->sxd[i][1] = + ptrGCoh[0] * aec->sxd[i][1] + + ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]); + + sdSum += aec->sd[i]; + seSum += aec->se[i]; + } + + // Divergent filter safeguard. + aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum; + + if (aec->divergeState) + memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1); + + // Reset if error is significantly larger than nearend (13 dB). + if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum)) + memset(aec->wfBuf, 0, sizeof(aec->wfBuf)); +} + +// Window time domain data to be used by the fft. +__inline static void WindowData(float* x_windowed, const float* x) { + int i; + for (i = 0; i < PART_LEN; i += 4) { + const __m128 vec_Buf1 = _mm_loadu_ps(&x[i]); + const __m128 vec_Buf2 = _mm_loadu_ps(&x[PART_LEN + i]); + const __m128 vec_sqrtHanning = _mm_load_ps(&WebRtcAec_sqrtHanning[i]); + // A B C D + __m128 vec_sqrtHanning_rev = + _mm_loadu_ps(&WebRtcAec_sqrtHanning[PART_LEN - i - 3]); + // D C B A + vec_sqrtHanning_rev = + _mm_shuffle_ps(vec_sqrtHanning_rev, vec_sqrtHanning_rev, + _MM_SHUFFLE(0, 1, 2, 3)); + _mm_storeu_ps(&x_windowed[i], _mm_mul_ps(vec_Buf1, vec_sqrtHanning)); + _mm_storeu_ps(&x_windowed[PART_LEN + i], + _mm_mul_ps(vec_Buf2, vec_sqrtHanning_rev)); + } +} + +// Puts fft output data into a complex valued array. +__inline static void StoreAsComplex(const float* data, + float data_complex[2][PART_LEN1]) { + int i; + for (i = 0; i < PART_LEN; i += 4) { + const __m128 vec_fft0 = _mm_loadu_ps(&data[2 * i]); + const __m128 vec_fft4 = _mm_loadu_ps(&data[2 * i + 4]); + const __m128 vec_a = _mm_shuffle_ps(vec_fft0, vec_fft4, + _MM_SHUFFLE(2, 0, 2, 0)); + const __m128 vec_b = _mm_shuffle_ps(vec_fft0, vec_fft4, + _MM_SHUFFLE(3, 1, 3, 1)); + _mm_storeu_ps(&data_complex[0][i], vec_a); + _mm_storeu_ps(&data_complex[1][i], vec_b); + } + // fix beginning/end values + data_complex[1][0] = 0; + data_complex[1][PART_LEN] = 0; + data_complex[0][0] = data[0]; + data_complex[0][PART_LEN] = data[1]; +} + +static void SubbandCoherenceSSE2(AecCore* aec, + float efw[2][PART_LEN1], + float xfw[2][PART_LEN1], + float* fft, + float* cohde, + float* cohxd) { + float dfw[2][PART_LEN1]; + int i; + + if (aec->delayEstCtr == 0) + aec->delayIdx = PartitionDelay(aec); + + // Use delayed far. + memcpy(xfw, + aec->xfwBuf + aec->delayIdx * PART_LEN1, + sizeof(xfw[0][0]) * 2 * PART_LEN1); + + // Windowed near fft + WindowData(fft, aec->dBuf); + aec_rdft_forward_128(fft); + StoreAsComplex(fft, dfw); + + // Windowed error fft + WindowData(fft, aec->eBuf); + aec_rdft_forward_128(fft); + StoreAsComplex(fft, efw); + + SmoothedPSD(aec, efw, dfw, xfw); + + { + const __m128 vec_1eminus10 = _mm_set1_ps(1e-10f); + + // Subband coherence + for (i = 0; i + 3 < PART_LEN1; i += 4) { + const __m128 vec_sd = _mm_loadu_ps(&aec->sd[i]); + const __m128 vec_se = _mm_loadu_ps(&aec->se[i]); + const __m128 vec_sx = _mm_loadu_ps(&aec->sx[i]); + const __m128 vec_sdse = _mm_add_ps(vec_1eminus10, + _mm_mul_ps(vec_sd, vec_se)); + const __m128 vec_sdsx = _mm_add_ps(vec_1eminus10, + _mm_mul_ps(vec_sd, vec_sx)); + const __m128 vec_sde_3210 = _mm_loadu_ps(&aec->sde[i][0]); + const __m128 vec_sde_7654 = _mm_loadu_ps(&aec->sde[i + 2][0]); + const __m128 vec_sxd_3210 = _mm_loadu_ps(&aec->sxd[i][0]); + const __m128 vec_sxd_7654 = _mm_loadu_ps(&aec->sxd[i + 2][0]); + const __m128 vec_sde_0 = _mm_shuffle_ps(vec_sde_3210, vec_sde_7654, + _MM_SHUFFLE(2, 0, 2, 0)); + const __m128 vec_sde_1 = _mm_shuffle_ps(vec_sde_3210, vec_sde_7654, + _MM_SHUFFLE(3, 1, 3, 1)); + const __m128 vec_sxd_0 = _mm_shuffle_ps(vec_sxd_3210, vec_sxd_7654, + _MM_SHUFFLE(2, 0, 2, 0)); + const __m128 vec_sxd_1 = _mm_shuffle_ps(vec_sxd_3210, vec_sxd_7654, + _MM_SHUFFLE(3, 1, 3, 1)); + __m128 vec_cohde = _mm_mul_ps(vec_sde_0, vec_sde_0); + __m128 vec_cohxd = _mm_mul_ps(vec_sxd_0, vec_sxd_0); + vec_cohde = _mm_add_ps(vec_cohde, _mm_mul_ps(vec_sde_1, vec_sde_1)); + vec_cohde = _mm_div_ps(vec_cohde, vec_sdse); + vec_cohxd = _mm_add_ps(vec_cohxd, _mm_mul_ps(vec_sxd_1, vec_sxd_1)); + vec_cohxd = _mm_div_ps(vec_cohxd, vec_sdsx); + _mm_storeu_ps(&cohde[i], vec_cohde); + _mm_storeu_ps(&cohxd[i], vec_cohxd); + } + + // scalar code for the remaining items. + for (; i < PART_LEN1; i++) { + cohde[i] = + (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) / + (aec->sd[i] * aec->se[i] + 1e-10f); + cohxd[i] = + (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) / + (aec->sx[i] * aec->sd[i] + 1e-10f); + } + } +} + +void WebRtcAec_InitAec_SSE2(void) { + WebRtcAec_FilterFar = FilterFarSSE2; + WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2; + WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; + WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2; + WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2; +} diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft.c new file mode 100644 index 00000000..2c3cff2d --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft.c @@ -0,0 +1,589 @@ +/* + * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html + * Copyright Takuya OOURA, 1996-2001 + * + * You may use, copy, modify and distribute this code for any purpose (include + * commercial use) and without fee. Please refer to this package when you modify + * this code. + * + * Changes by the WebRTC authors: + * - Trivial type modifications. + * - Minimal code subset to do rdft of length 128. + * - Optimizations because of known length. + * + * All changes are covered by the WebRTC license and IP grant: + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/aec/aec_rdft.h" + +#include <math.h> + +#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h" +#include "webrtc/typedefs.h" + +// These tables used to be computed at run-time. For example, refer to: +// https://code.google.com/p/webrtc/source/browse/trunk/webrtc/modules/audio_processing/aec/aec_rdft.c?r=6564 +// to see the initialization code. +const float rdft_w[64] = { + 1.0000000000f, 0.0000000000f, 0.7071067691f, 0.7071067691f, + 0.9238795638f, 0.3826834559f, 0.3826834559f, 0.9238795638f, + 0.9807852507f, 0.1950903237f, 0.5555702448f, 0.8314695954f, + 0.8314695954f, 0.5555702448f, 0.1950903237f, 0.9807852507f, + 0.9951847196f, 0.0980171412f, 0.6343933344f, 0.7730104327f, + 0.8819212914f, 0.4713967443f, 0.2902846634f, 0.9569403529f, + 0.9569403529f, 0.2902846634f, 0.4713967443f, 0.8819212914f, + 0.7730104327f, 0.6343933344f, 0.0980171412f, 0.9951847196f, + 0.7071067691f, 0.4993977249f, 0.4975923598f, 0.4945882559f, + 0.4903926253f, 0.4850156307f, 0.4784701765f, 0.4707720280f, + 0.4619397819f, 0.4519946277f, 0.4409606457f, 0.4288643003f, + 0.4157347977f, 0.4016037583f, 0.3865052164f, 0.3704755902f, + 0.3535533845f, 0.3357794881f, 0.3171966672f, 0.2978496552f, + 0.2777851224f, 0.2570513785f, 0.2356983721f, 0.2137775421f, + 0.1913417280f, 0.1684449315f, 0.1451423317f, 0.1214900985f, + 0.0975451618f, 0.0733652338f, 0.0490085706f, 0.0245338380f, +}; +const float rdft_wk3ri_first[16] = { + 1.000000000f, 0.000000000f, 0.382683456f, 0.923879564f, + 0.831469536f, 0.555570245f, -0.195090353f, 0.980785251f, + 0.956940353f, 0.290284693f, 0.098017156f, 0.995184720f, + 0.634393334f, 0.773010492f, -0.471396863f, 0.881921172f, +}; +const float rdft_wk3ri_second[16] = { + -0.707106769f, 0.707106769f, -0.923879564f, -0.382683456f, + -0.980785251f, 0.195090353f, -0.555570245f, -0.831469536f, + -0.881921172f, 0.471396863f, -0.773010492f, -0.634393334f, + -0.995184720f, -0.098017156f, -0.290284693f, -0.956940353f, +}; +ALIGN16_BEG const float ALIGN16_END rdft_wk1r[32] = { + 1.000000000f, 1.000000000f, 0.707106769f, 0.707106769f, + 0.923879564f, 0.923879564f, 0.382683456f, 0.382683456f, + 0.980785251f, 0.980785251f, 0.555570245f, 0.555570245f, + 0.831469595f, 0.831469595f, 0.195090324f, 0.195090324f, + 0.995184720f, 0.995184720f, 0.634393334f, 0.634393334f, + 0.881921291f, 0.881921291f, 0.290284663f, 0.290284663f, + 0.956940353f, 0.956940353f, 0.471396744f, 0.471396744f, + 0.773010433f, 0.773010433f, 0.098017141f, 0.098017141f, +}; +ALIGN16_BEG const float ALIGN16_END rdft_wk2r[32] = { + 1.000000000f, 1.000000000f, -0.000000000f, -0.000000000f, + 0.707106769f, 0.707106769f, -0.707106769f, -0.707106769f, + 0.923879564f, 0.923879564f, -0.382683456f, -0.382683456f, + 0.382683456f, 0.382683456f, -0.923879564f, -0.923879564f, + 0.980785251f, 0.980785251f, -0.195090324f, -0.195090324f, + 0.555570245f, 0.555570245f, -0.831469595f, -0.831469595f, + 0.831469595f, 0.831469595f, -0.555570245f, -0.555570245f, + 0.195090324f, 0.195090324f, -0.980785251f, -0.980785251f, +}; +ALIGN16_BEG const float ALIGN16_END rdft_wk3r[32] = { + 1.000000000f, 1.000000000f, -0.707106769f, -0.707106769f, + 0.382683456f, 0.382683456f, -0.923879564f, -0.923879564f, + 0.831469536f, 0.831469536f, -0.980785251f, -0.980785251f, + -0.195090353f, -0.195090353f, -0.555570245f, -0.555570245f, + 0.956940353f, 0.956940353f, -0.881921172f, -0.881921172f, + 0.098017156f, 0.098017156f, -0.773010492f, -0.773010492f, + 0.634393334f, 0.634393334f, -0.995184720f, -0.995184720f, + -0.471396863f, -0.471396863f, -0.290284693f, -0.290284693f, +}; +ALIGN16_BEG const float ALIGN16_END rdft_wk1i[32] = { + -0.000000000f, 0.000000000f, -0.707106769f, 0.707106769f, + -0.382683456f, 0.382683456f, -0.923879564f, 0.923879564f, + -0.195090324f, 0.195090324f, -0.831469595f, 0.831469595f, + -0.555570245f, 0.555570245f, -0.980785251f, 0.980785251f, + -0.098017141f, 0.098017141f, -0.773010433f, 0.773010433f, + -0.471396744f, 0.471396744f, -0.956940353f, 0.956940353f, + -0.290284663f, 0.290284663f, -0.881921291f, 0.881921291f, + -0.634393334f, 0.634393334f, -0.995184720f, 0.995184720f, +}; +ALIGN16_BEG const float ALIGN16_END rdft_wk2i[32] = { + -0.000000000f, 0.000000000f, -1.000000000f, 1.000000000f, + -0.707106769f, 0.707106769f, -0.707106769f, 0.707106769f, + -0.382683456f, 0.382683456f, -0.923879564f, 0.923879564f, + -0.923879564f, 0.923879564f, -0.382683456f, 0.382683456f, + -0.195090324f, 0.195090324f, -0.980785251f, 0.980785251f, + -0.831469595f, 0.831469595f, -0.555570245f, 0.555570245f, + -0.555570245f, 0.555570245f, -0.831469595f, 0.831469595f, + -0.980785251f, 0.980785251f, -0.195090324f, 0.195090324f, +}; +ALIGN16_BEG const float ALIGN16_END rdft_wk3i[32] = { + -0.000000000f, 0.000000000f, -0.707106769f, 0.707106769f, + -0.923879564f, 0.923879564f, 0.382683456f, -0.382683456f, + -0.555570245f, 0.555570245f, -0.195090353f, 0.195090353f, + -0.980785251f, 0.980785251f, 0.831469536f, -0.831469536f, + -0.290284693f, 0.290284693f, -0.471396863f, 0.471396863f, + -0.995184720f, 0.995184720f, 0.634393334f, -0.634393334f, + -0.773010492f, 0.773010492f, 0.098017156f, -0.098017156f, + -0.881921172f, 0.881921172f, 0.956940353f, -0.956940353f, +}; +ALIGN16_BEG const float ALIGN16_END cftmdl_wk1r[4] = { + 0.707106769f, 0.707106769f, 0.707106769f, -0.707106769f, +}; + +static void bitrv2_128_C(float* a) { + /* + Following things have been attempted but are no faster: + (a) Storing the swap indexes in a LUT (index calculations are done + for 'free' while waiting on memory/L1). + (b) Consolidate the load/store of two consecutive floats by a 64 bit + integer (execution is memory/L1 bound). + (c) Do a mix of floats and 64 bit integer to maximize register + utilization (execution is memory/L1 bound). + (d) Replacing ip[i] by ((k<<31)>>25) + ((k >> 1)<<5). + (e) Hard-coding of the offsets to completely eliminates index + calculations. + */ + + unsigned int j, j1, k, k1; + float xr, xi, yr, yi; + + static const int ip[4] = {0, 64, 32, 96}; + for (k = 0; k < 4; k++) { + for (j = 0; j < k; j++) { + j1 = 2 * j + ip[k]; + k1 = 2 * k + ip[j]; + xr = a[j1 + 0]; + xi = a[j1 + 1]; + yr = a[k1 + 0]; + yi = a[k1 + 1]; + a[j1 + 0] = yr; + a[j1 + 1] = yi; + a[k1 + 0] = xr; + a[k1 + 1] = xi; + j1 += 8; + k1 += 16; + xr = a[j1 + 0]; + xi = a[j1 + 1]; + yr = a[k1 + 0]; + yi = a[k1 + 1]; + a[j1 + 0] = yr; + a[j1 + 1] = yi; + a[k1 + 0] = xr; + a[k1 + 1] = xi; + j1 += 8; + k1 -= 8; + xr = a[j1 + 0]; + xi = a[j1 + 1]; + yr = a[k1 + 0]; + yi = a[k1 + 1]; + a[j1 + 0] = yr; + a[j1 + 1] = yi; + a[k1 + 0] = xr; + a[k1 + 1] = xi; + j1 += 8; + k1 += 16; + xr = a[j1 + 0]; + xi = a[j1 + 1]; + yr = a[k1 + 0]; + yi = a[k1 + 1]; + a[j1 + 0] = yr; + a[j1 + 1] = yi; + a[k1 + 0] = xr; + a[k1 + 1] = xi; + } + j1 = 2 * k + 8 + ip[k]; + k1 = j1 + 8; + xr = a[j1 + 0]; + xi = a[j1 + 1]; + yr = a[k1 + 0]; + yi = a[k1 + 1]; + a[j1 + 0] = yr; + a[j1 + 1] = yi; + a[k1 + 0] = xr; + a[k1 + 1] = xi; + } +} + +static void cft1st_128_C(float* a) { + const int n = 128; + int j, k1, k2; + float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; + float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + // The processing of the first set of elements was simplified in C to avoid + // some operations (multiplication by zero or one, addition of two elements + // multiplied by the same weight, ...). + x0r = a[0] + a[2]; + x0i = a[1] + a[3]; + x1r = a[0] - a[2]; + x1i = a[1] - a[3]; + x2r = a[4] + a[6]; + x2i = a[5] + a[7]; + x3r = a[4] - a[6]; + x3i = a[5] - a[7]; + a[0] = x0r + x2r; + a[1] = x0i + x2i; + a[4] = x0r - x2r; + a[5] = x0i - x2i; + a[2] = x1r - x3i; + a[3] = x1i + x3r; + a[6] = x1r + x3i; + a[7] = x1i - x3r; + wk1r = rdft_w[2]; + x0r = a[8] + a[10]; + x0i = a[9] + a[11]; + x1r = a[8] - a[10]; + x1i = a[9] - a[11]; + x2r = a[12] + a[14]; + x2i = a[13] + a[15]; + x3r = a[12] - a[14]; + x3i = a[13] - a[15]; + a[8] = x0r + x2r; + a[9] = x0i + x2i; + a[12] = x2i - x0i; + a[13] = x0r - x2r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[10] = wk1r * (x0r - x0i); + a[11] = wk1r * (x0r + x0i); + x0r = x3i + x1r; + x0i = x3r - x1i; + a[14] = wk1r * (x0i - x0r); + a[15] = wk1r * (x0i + x0r); + k1 = 0; + for (j = 16; j < n; j += 16) { + k1 += 2; + k2 = 2 * k1; + wk2r = rdft_w[k1 + 0]; + wk2i = rdft_w[k1 + 1]; + wk1r = rdft_w[k2 + 0]; + wk1i = rdft_w[k2 + 1]; + wk3r = rdft_wk3ri_first[k1 + 0]; + wk3i = rdft_wk3ri_first[k1 + 1]; + x0r = a[j + 0] + a[j + 2]; + x0i = a[j + 1] + a[j + 3]; + x1r = a[j + 0] - a[j + 2]; + x1i = a[j + 1] - a[j + 3]; + x2r = a[j + 4] + a[j + 6]; + x2i = a[j + 5] + a[j + 7]; + x3r = a[j + 4] - a[j + 6]; + x3i = a[j + 5] - a[j + 7]; + a[j + 0] = x0r + x2r; + a[j + 1] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j + 4] = wk2r * x0r - wk2i * x0i; + a[j + 5] = wk2r * x0i + wk2i * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j + 2] = wk1r * x0r - wk1i * x0i; + a[j + 3] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j + 6] = wk3r * x0r - wk3i * x0i; + a[j + 7] = wk3r * x0i + wk3i * x0r; + wk1r = rdft_w[k2 + 2]; + wk1i = rdft_w[k2 + 3]; + wk3r = rdft_wk3ri_second[k1 + 0]; + wk3i = rdft_wk3ri_second[k1 + 1]; + x0r = a[j + 8] + a[j + 10]; + x0i = a[j + 9] + a[j + 11]; + x1r = a[j + 8] - a[j + 10]; + x1i = a[j + 9] - a[j + 11]; + x2r = a[j + 12] + a[j + 14]; + x2i = a[j + 13] + a[j + 15]; + x3r = a[j + 12] - a[j + 14]; + x3i = a[j + 13] - a[j + 15]; + a[j + 8] = x0r + x2r; + a[j + 9] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j + 12] = -wk2i * x0r - wk2r * x0i; + a[j + 13] = -wk2i * x0i + wk2r * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j + 10] = wk1r * x0r - wk1i * x0i; + a[j + 11] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j + 14] = wk3r * x0r - wk3i * x0i; + a[j + 15] = wk3r * x0i + wk3i * x0r; + } +} + +static void cftmdl_128_C(float* a) { + const int l = 8; + const int n = 128; + const int m = 32; + int j0, j1, j2, j3, k, k1, k2, m2; + float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; + float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + for (j0 = 0; j0 < l; j0 += 2) { + j1 = j0 + 8; + j2 = j0 + 16; + j3 = j0 + 24; + x0r = a[j0 + 0] + a[j1 + 0]; + x0i = a[j0 + 1] + a[j1 + 1]; + x1r = a[j0 + 0] - a[j1 + 0]; + x1i = a[j0 + 1] - a[j1 + 1]; + x2r = a[j2 + 0] + a[j3 + 0]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2 + 0] - a[j3 + 0]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j0 + 0] = x0r + x2r; + a[j0 + 1] = x0i + x2i; + a[j2 + 0] = x0r - x2r; + a[j2 + 1] = x0i - x2i; + a[j1 + 0] = x1r - x3i; + a[j1 + 1] = x1i + x3r; + a[j3 + 0] = x1r + x3i; + a[j3 + 1] = x1i - x3r; + } + wk1r = rdft_w[2]; + for (j0 = m; j0 < l + m; j0 += 2) { + j1 = j0 + 8; + j2 = j0 + 16; + j3 = j0 + 24; + x0r = a[j0 + 0] + a[j1 + 0]; + x0i = a[j0 + 1] + a[j1 + 1]; + x1r = a[j0 + 0] - a[j1 + 0]; + x1i = a[j0 + 1] - a[j1 + 1]; + x2r = a[j2 + 0] + a[j3 + 0]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2 + 0] - a[j3 + 0]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j0 + 0] = x0r + x2r; + a[j0 + 1] = x0i + x2i; + a[j2 + 0] = x2i - x0i; + a[j2 + 1] = x0r - x2r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j1 + 0] = wk1r * (x0r - x0i); + a[j1 + 1] = wk1r * (x0r + x0i); + x0r = x3i + x1r; + x0i = x3r - x1i; + a[j3 + 0] = wk1r * (x0i - x0r); + a[j3 + 1] = wk1r * (x0i + x0r); + } + k1 = 0; + m2 = 2 * m; + for (k = m2; k < n; k += m2) { + k1 += 2; + k2 = 2 * k1; + wk2r = rdft_w[k1 + 0]; + wk2i = rdft_w[k1 + 1]; + wk1r = rdft_w[k2 + 0]; + wk1i = rdft_w[k2 + 1]; + wk3r = rdft_wk3ri_first[k1 + 0]; + wk3i = rdft_wk3ri_first[k1 + 1]; + for (j0 = k; j0 < l + k; j0 += 2) { + j1 = j0 + 8; + j2 = j0 + 16; + j3 = j0 + 24; + x0r = a[j0 + 0] + a[j1 + 0]; + x0i = a[j0 + 1] + a[j1 + 1]; + x1r = a[j0 + 0] - a[j1 + 0]; + x1i = a[j0 + 1] - a[j1 + 1]; + x2r = a[j2 + 0] + a[j3 + 0]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2 + 0] - a[j3 + 0]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j0 + 0] = x0r + x2r; + a[j0 + 1] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j2 + 0] = wk2r * x0r - wk2i * x0i; + a[j2 + 1] = wk2r * x0i + wk2i * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j1 + 0] = wk1r * x0r - wk1i * x0i; + a[j1 + 1] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3 + 0] = wk3r * x0r - wk3i * x0i; + a[j3 + 1] = wk3r * x0i + wk3i * x0r; + } + wk1r = rdft_w[k2 + 2]; + wk1i = rdft_w[k2 + 3]; + wk3r = rdft_wk3ri_second[k1 + 0]; + wk3i = rdft_wk3ri_second[k1 + 1]; + for (j0 = k + m; j0 < l + (k + m); j0 += 2) { + j1 = j0 + 8; + j2 = j0 + 16; + j3 = j0 + 24; + x0r = a[j0 + 0] + a[j1 + 0]; + x0i = a[j0 + 1] + a[j1 + 1]; + x1r = a[j0 + 0] - a[j1 + 0]; + x1i = a[j0 + 1] - a[j1 + 1]; + x2r = a[j2 + 0] + a[j3 + 0]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2 + 0] - a[j3 + 0]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j0 + 0] = x0r + x2r; + a[j0 + 1] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j2 + 0] = -wk2i * x0r - wk2r * x0i; + a[j2 + 1] = -wk2i * x0i + wk2r * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j1 + 0] = wk1r * x0r - wk1i * x0i; + a[j1 + 1] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3 + 0] = wk3r * x0r - wk3i * x0i; + a[j3 + 1] = wk3r * x0i + wk3i * x0r; + } + } +} + +static void cftfsub_128_C(float* a) { + int j, j1, j2, j3, l; + float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + cft1st_128(a); + cftmdl_128(a); + l = 32; + for (j = 0; j < l; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + a[j2] = x0r - x2r; + a[j2 + 1] = x0i - x2i; + a[j1] = x1r - x3i; + a[j1 + 1] = x1i + x3r; + a[j3] = x1r + x3i; + a[j3 + 1] = x1i - x3r; + } +} + +static void cftbsub_128_C(float* a) { + int j, j1, j2, j3, l; + float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + cft1st_128(a); + cftmdl_128(a); + l = 32; + + for (j = 0; j < l; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = -a[j + 1] - a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = -a[j + 1] + a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i - x2i; + a[j2] = x0r - x2r; + a[j2 + 1] = x0i + x2i; + a[j1] = x1r - x3i; + a[j1 + 1] = x1i - x3r; + a[j3] = x1r + x3i; + a[j3 + 1] = x1i + x3r; + } +} + +static void rftfsub_128_C(float* a) { + const float* c = rdft_w + 32; + int j1, j2, k1, k2; + float wkr, wki, xr, xi, yr, yi; + + for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) { + k2 = 128 - j2; + k1 = 32 - j1; + wkr = 0.5f - c[k1]; + wki = c[j1]; + xr = a[j2 + 0] - a[k2 + 0]; + xi = a[j2 + 1] + a[k2 + 1]; + yr = wkr * xr - wki * xi; + yi = wkr * xi + wki * xr; + a[j2 + 0] -= yr; + a[j2 + 1] -= yi; + a[k2 + 0] += yr; + a[k2 + 1] -= yi; + } +} + +static void rftbsub_128_C(float* a) { + const float* c = rdft_w + 32; + int j1, j2, k1, k2; + float wkr, wki, xr, xi, yr, yi; + + a[1] = -a[1]; + for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) { + k2 = 128 - j2; + k1 = 32 - j1; + wkr = 0.5f - c[k1]; + wki = c[j1]; + xr = a[j2 + 0] - a[k2 + 0]; + xi = a[j2 + 1] + a[k2 + 1]; + yr = wkr * xr + wki * xi; + yi = wkr * xi - wki * xr; + a[j2 + 0] = a[j2 + 0] - yr; + a[j2 + 1] = yi - a[j2 + 1]; + a[k2 + 0] = yr + a[k2 + 0]; + a[k2 + 1] = yi - a[k2 + 1]; + } + a[65] = -a[65]; +} + +void aec_rdft_forward_128(float* a) { + float xi; + bitrv2_128(a); + cftfsub_128(a); + rftfsub_128(a); + xi = a[0] - a[1]; + a[0] += a[1]; + a[1] = xi; +} + +void aec_rdft_inverse_128(float* a) { + a[1] = 0.5f * (a[0] - a[1]); + a[0] -= a[1]; + rftbsub_128(a); + bitrv2_128(a); + cftbsub_128(a); +} + +// code path selection +RftSub128 cft1st_128; +RftSub128 cftmdl_128; +RftSub128 rftfsub_128; +RftSub128 rftbsub_128; +RftSub128 cftfsub_128; +RftSub128 cftbsub_128; +RftSub128 bitrv2_128; + +void aec_rdft_init(void) { + cft1st_128 = cft1st_128_C; + cftmdl_128 = cftmdl_128_C; + rftfsub_128 = rftfsub_128_C; + rftbsub_128 = rftbsub_128_C; + cftfsub_128 = cftfsub_128_C; + cftbsub_128 = cftbsub_128_C; + bitrv2_128 = bitrv2_128_C; +#if defined(WEBRTC_ARCH_X86_FAMILY) + if (WebRtc_GetCPUInfo(kSSE2)) { + aec_rdft_init_sse2(); + } +#endif +#if defined(MIPS_FPU_LE) + aec_rdft_init_mips(); +#endif +#if defined(WEBRTC_HAS_NEON) + aec_rdft_init_neon(); +#elif defined(WEBRTC_DETECT_NEON) + if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) { + aec_rdft_init_neon(); + } +#endif +} diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft.h b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft.h new file mode 100644 index 00000000..18eb7a5c --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_ + +#include "webrtc/modules/audio_processing/aec/aec_common.h" + +// These intrinsics were unavailable before VS 2008. +// TODO(andrew): move to a common file. +#if defined(_MSC_VER) && _MSC_VER < 1500 +#include <emmintrin.h> +static __inline __m128 _mm_castsi128_ps(__m128i a) { return *(__m128*)&a; } +static __inline __m128i _mm_castps_si128(__m128 a) { return *(__m128i*)&a; } +#endif + +// Constants shared by all paths (C, SSE2, NEON). +extern const float rdft_w[64]; +// Constants used by the C path. +extern const float rdft_wk3ri_first[16]; +extern const float rdft_wk3ri_second[16]; +// Constants used by SSE2 and NEON but initialized in the C path. +extern ALIGN16_BEG const float ALIGN16_END rdft_wk1r[32]; +extern ALIGN16_BEG const float ALIGN16_END rdft_wk2r[32]; +extern ALIGN16_BEG const float ALIGN16_END rdft_wk3r[32]; +extern ALIGN16_BEG const float ALIGN16_END rdft_wk1i[32]; +extern ALIGN16_BEG const float ALIGN16_END rdft_wk2i[32]; +extern ALIGN16_BEG const float ALIGN16_END rdft_wk3i[32]; +extern ALIGN16_BEG const float ALIGN16_END cftmdl_wk1r[4]; + +// code path selection function pointers +typedef void (*RftSub128)(float* a); +extern RftSub128 rftfsub_128; +extern RftSub128 rftbsub_128; +extern RftSub128 cft1st_128; +extern RftSub128 cftmdl_128; +extern RftSub128 cftfsub_128; +extern RftSub128 cftbsub_128; +extern RftSub128 bitrv2_128; + +// entry points +void aec_rdft_init(void); +void aec_rdft_init_sse2(void); +void aec_rdft_forward_128(float* a); +void aec_rdft_inverse_128(float* a); + +#if defined(MIPS_FPU_LE) +void aec_rdft_init_mips(void); +#endif +#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON) +void aec_rdft_init_neon(void); +#endif + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_ diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_mips.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_mips.c new file mode 100644 index 00000000..7e64e657 --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_mips.c @@ -0,0 +1,1187 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/aec/aec_rdft.h" +#include "webrtc/typedefs.h" + +static void bitrv2_128_mips(float* a) { + // n is 128 + float xr, xi, yr, yi; + + xr = a[8]; + xi = a[9]; + yr = a[16]; + yi = a[17]; + a[8] = yr; + a[9] = yi; + a[16] = xr; + a[17] = xi; + + xr = a[64]; + xi = a[65]; + yr = a[2]; + yi = a[3]; + a[64] = yr; + a[65] = yi; + a[2] = xr; + a[3] = xi; + + xr = a[72]; + xi = a[73]; + yr = a[18]; + yi = a[19]; + a[72] = yr; + a[73] = yi; + a[18] = xr; + a[19] = xi; + + xr = a[80]; + xi = a[81]; + yr = a[10]; + yi = a[11]; + a[80] = yr; + a[81] = yi; + a[10] = xr; + a[11] = xi; + + xr = a[88]; + xi = a[89]; + yr = a[26]; + yi = a[27]; + a[88] = yr; + a[89] = yi; + a[26] = xr; + a[27] = xi; + + xr = a[74]; + xi = a[75]; + yr = a[82]; + yi = a[83]; + a[74] = yr; + a[75] = yi; + a[82] = xr; + a[83] = xi; + + xr = a[32]; + xi = a[33]; + yr = a[4]; + yi = a[5]; + a[32] = yr; + a[33] = yi; + a[4] = xr; + a[5] = xi; + + xr = a[40]; + xi = a[41]; + yr = a[20]; + yi = a[21]; + a[40] = yr; + a[41] = yi; + a[20] = xr; + a[21] = xi; + + xr = a[48]; + xi = a[49]; + yr = a[12]; + yi = a[13]; + a[48] = yr; + a[49] = yi; + a[12] = xr; + a[13] = xi; + + xr = a[56]; + xi = a[57]; + yr = a[28]; + yi = a[29]; + a[56] = yr; + a[57] = yi; + a[28] = xr; + a[29] = xi; + + xr = a[34]; + xi = a[35]; + yr = a[68]; + yi = a[69]; + a[34] = yr; + a[35] = yi; + a[68] = xr; + a[69] = xi; + + xr = a[42]; + xi = a[43]; + yr = a[84]; + yi = a[85]; + a[42] = yr; + a[43] = yi; + a[84] = xr; + a[85] = xi; + + xr = a[50]; + xi = a[51]; + yr = a[76]; + yi = a[77]; + a[50] = yr; + a[51] = yi; + a[76] = xr; + a[77] = xi; + + xr = a[58]; + xi = a[59]; + yr = a[92]; + yi = a[93]; + a[58] = yr; + a[59] = yi; + a[92] = xr; + a[93] = xi; + + xr = a[44]; + xi = a[45]; + yr = a[52]; + yi = a[53]; + a[44] = yr; + a[45] = yi; + a[52] = xr; + a[53] = xi; + + xr = a[96]; + xi = a[97]; + yr = a[6]; + yi = a[7]; + a[96] = yr; + a[97] = yi; + a[6] = xr; + a[7] = xi; + + xr = a[104]; + xi = a[105]; + yr = a[22]; + yi = a[23]; + a[104] = yr; + a[105] = yi; + a[22] = xr; + a[23] = xi; + + xr = a[112]; + xi = a[113]; + yr = a[14]; + yi = a[15]; + a[112] = yr; + a[113] = yi; + a[14] = xr; + a[15] = xi; + + xr = a[120]; + xi = a[121]; + yr = a[30]; + yi = a[31]; + a[120] = yr; + a[121] = yi; + a[30] = xr; + a[31] = xi; + + xr = a[98]; + xi = a[99]; + yr = a[70]; + yi = a[71]; + a[98] = yr; + a[99] = yi; + a[70] = xr; + a[71] = xi; + + xr = a[106]; + xi = a[107]; + yr = a[86]; + yi = a[87]; + a[106] = yr; + a[107] = yi; + a[86] = xr; + a[87] = xi; + + xr = a[114]; + xi = a[115]; + yr = a[78]; + yi = a[79]; + a[114] = yr; + a[115] = yi; + a[78] = xr; + a[79] = xi; + + xr = a[122]; + xi = a[123]; + yr = a[94]; + yi = a[95]; + a[122] = yr; + a[123] = yi; + a[94] = xr; + a[95] = xi; + + xr = a[100]; + xi = a[101]; + yr = a[38]; + yi = a[39]; + a[100] = yr; + a[101] = yi; + a[38] = xr; + a[39] = xi; + + xr = a[108]; + xi = a[109]; + yr = a[54]; + yi = a[55]; + a[108] = yr; + a[109] = yi; + a[54] = xr; + a[55] = xi; + + xr = a[116]; + xi = a[117]; + yr = a[46]; + yi = a[47]; + a[116] = yr; + a[117] = yi; + a[46] = xr; + a[47] = xi; + + xr = a[124]; + xi = a[125]; + yr = a[62]; + yi = a[63]; + a[124] = yr; + a[125] = yi; + a[62] = xr; + a[63] = xi; + + xr = a[110]; + xi = a[111]; + yr = a[118]; + yi = a[119]; + a[110] = yr; + a[111] = yi; + a[118] = xr; + a[119] = xi; +} + +static void cft1st_128_mips(float* a) { + float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14; + int a_ptr, p1_rdft, p2_rdft, count; + const float* first = rdft_wk3ri_first; + const float* second = rdft_wk3ri_second; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + // first 8 + "lwc1 %[f0], 0(%[a]) \n\t" + "lwc1 %[f1], 4(%[a]) \n\t" + "lwc1 %[f2], 8(%[a]) \n\t" + "lwc1 %[f3], 12(%[a]) \n\t" + "lwc1 %[f4], 16(%[a]) \n\t" + "lwc1 %[f5], 20(%[a]) \n\t" + "lwc1 %[f6], 24(%[a]) \n\t" + "lwc1 %[f7], 28(%[a]) \n\t" + "add.s %[f8], %[f0], %[f2] \n\t" + "sub.s %[f0], %[f0], %[f2] \n\t" + "add.s %[f2], %[f4], %[f6] \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "add.s %[f6], %[f1], %[f3] \n\t" + "sub.s %[f1], %[f1], %[f3] \n\t" + "add.s %[f3], %[f5], %[f7] \n\t" + "sub.s %[f5], %[f5], %[f7] \n\t" + "add.s %[f7], %[f8], %[f2] \n\t" + "sub.s %[f8], %[f8], %[f2] \n\t" + "sub.s %[f2], %[f1], %[f4] \n\t" + "add.s %[f1], %[f1], %[f4] \n\t" + "add.s %[f4], %[f6], %[f3] \n\t" + "sub.s %[f6], %[f6], %[f3] \n\t" + "sub.s %[f3], %[f0], %[f5] \n\t" + "add.s %[f0], %[f0], %[f5] \n\t" + "swc1 %[f7], 0(%[a]) \n\t" + "swc1 %[f8], 16(%[a]) \n\t" + "swc1 %[f2], 28(%[a]) \n\t" + "swc1 %[f1], 12(%[a]) \n\t" + "swc1 %[f4], 4(%[a]) \n\t" + "swc1 %[f6], 20(%[a]) \n\t" + "swc1 %[f3], 8(%[a]) \n\t" + "swc1 %[f0], 24(%[a]) \n\t" + // second 8 + "lwc1 %[f0], 32(%[a]) \n\t" + "lwc1 %[f1], 36(%[a]) \n\t" + "lwc1 %[f2], 40(%[a]) \n\t" + "lwc1 %[f3], 44(%[a]) \n\t" + "lwc1 %[f4], 48(%[a]) \n\t" + "lwc1 %[f5], 52(%[a]) \n\t" + "lwc1 %[f6], 56(%[a]) \n\t" + "lwc1 %[f7], 60(%[a]) \n\t" + "add.s %[f8], %[f4], %[f6] \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "add.s %[f6], %[f1], %[f3] \n\t" + "sub.s %[f1], %[f1], %[f3] \n\t" + "add.s %[f3], %[f0], %[f2] \n\t" + "sub.s %[f0], %[f0], %[f2] \n\t" + "add.s %[f2], %[f5], %[f7] \n\t" + "sub.s %[f5], %[f5], %[f7] \n\t" + "add.s %[f7], %[f4], %[f1] \n\t" + "sub.s %[f4], %[f4], %[f1] \n\t" + "add.s %[f1], %[f3], %[f8] \n\t" + "sub.s %[f3], %[f3], %[f8] \n\t" + "sub.s %[f8], %[f0], %[f5] \n\t" + "add.s %[f0], %[f0], %[f5] \n\t" + "add.s %[f5], %[f6], %[f2] \n\t" + "sub.s %[f6], %[f2], %[f6] \n\t" + "lwc1 %[f9], 8(%[rdft_w]) \n\t" + "sub.s %[f2], %[f8], %[f7] \n\t" + "add.s %[f8], %[f8], %[f7] \n\t" + "sub.s %[f7], %[f4], %[f0] \n\t" + "add.s %[f4], %[f4], %[f0] \n\t" + // prepare for loop + "addiu %[a_ptr], %[a], 64 \n\t" + "addiu %[p1_rdft], %[rdft_w], 8 \n\t" + "addiu %[p2_rdft], %[rdft_w], 16 \n\t" + "addiu %[count], $zero, 7 \n\t" + // finish second 8 + "mul.s %[f2], %[f9], %[f2] \n\t" + "mul.s %[f8], %[f9], %[f8] \n\t" + "mul.s %[f7], %[f9], %[f7] \n\t" + "mul.s %[f4], %[f9], %[f4] \n\t" + "swc1 %[f1], 32(%[a]) \n\t" + "swc1 %[f3], 52(%[a]) \n\t" + "swc1 %[f5], 36(%[a]) \n\t" + "swc1 %[f6], 48(%[a]) \n\t" + "swc1 %[f2], 40(%[a]) \n\t" + "swc1 %[f8], 44(%[a]) \n\t" + "swc1 %[f7], 56(%[a]) \n\t" + "swc1 %[f4], 60(%[a]) \n\t" + // loop + "1: \n\t" + "lwc1 %[f0], 0(%[a_ptr]) \n\t" + "lwc1 %[f1], 4(%[a_ptr]) \n\t" + "lwc1 %[f2], 8(%[a_ptr]) \n\t" + "lwc1 %[f3], 12(%[a_ptr]) \n\t" + "lwc1 %[f4], 16(%[a_ptr]) \n\t" + "lwc1 %[f5], 20(%[a_ptr]) \n\t" + "lwc1 %[f6], 24(%[a_ptr]) \n\t" + "lwc1 %[f7], 28(%[a_ptr]) \n\t" + "add.s %[f8], %[f0], %[f2] \n\t" + "sub.s %[f0], %[f0], %[f2] \n\t" + "add.s %[f2], %[f4], %[f6] \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "add.s %[f6], %[f1], %[f3] \n\t" + "sub.s %[f1], %[f1], %[f3] \n\t" + "add.s %[f3], %[f5], %[f7] \n\t" + "sub.s %[f5], %[f5], %[f7] \n\t" + "lwc1 %[f10], 4(%[p1_rdft]) \n\t" + "lwc1 %[f11], 0(%[p2_rdft]) \n\t" + "lwc1 %[f12], 4(%[p2_rdft]) \n\t" + "lwc1 %[f13], 8(%[first]) \n\t" + "lwc1 %[f14], 12(%[first]) \n\t" + "add.s %[f7], %[f8], %[f2] \n\t" + "sub.s %[f8], %[f8], %[f2] \n\t" + "add.s %[f2], %[f6], %[f3] \n\t" + "sub.s %[f6], %[f6], %[f3] \n\t" + "add.s %[f3], %[f0], %[f5] \n\t" + "sub.s %[f0], %[f0], %[f5] \n\t" + "add.s %[f5], %[f1], %[f4] \n\t" + "sub.s %[f1], %[f1], %[f4] \n\t" + "swc1 %[f7], 0(%[a_ptr]) \n\t" + "swc1 %[f2], 4(%[a_ptr]) \n\t" + "mul.s %[f4], %[f9], %[f8] \n\t" +#if defined(MIPS32_R2_LE) + "mul.s %[f8], %[f10], %[f8] \n\t" + "mul.s %[f7], %[f11], %[f0] \n\t" + "mul.s %[f0], %[f12], %[f0] \n\t" + "mul.s %[f2], %[f13], %[f3] \n\t" + "mul.s %[f3], %[f14], %[f3] \n\t" + "nmsub.s %[f4], %[f4], %[f10], %[f6] \n\t" + "madd.s %[f8], %[f8], %[f9], %[f6] \n\t" + "nmsub.s %[f7], %[f7], %[f12], %[f5] \n\t" + "madd.s %[f0], %[f0], %[f11], %[f5] \n\t" + "nmsub.s %[f2], %[f2], %[f14], %[f1] \n\t" + "madd.s %[f3], %[f3], %[f13], %[f1] \n\t" +#else + "mul.s %[f7], %[f10], %[f6] \n\t" + "mul.s %[f6], %[f9], %[f6] \n\t" + "mul.s %[f8], %[f10], %[f8] \n\t" + "mul.s %[f2], %[f11], %[f0] \n\t" + "mul.s %[f11], %[f11], %[f5] \n\t" + "mul.s %[f5], %[f12], %[f5] \n\t" + "mul.s %[f0], %[f12], %[f0] \n\t" + "mul.s %[f12], %[f13], %[f3] \n\t" + "mul.s %[f13], %[f13], %[f1] \n\t" + "mul.s %[f1], %[f14], %[f1] \n\t" + "mul.s %[f3], %[f14], %[f3] \n\t" + "sub.s %[f4], %[f4], %[f7] \n\t" + "add.s %[f8], %[f6], %[f8] \n\t" + "sub.s %[f7], %[f2], %[f5] \n\t" + "add.s %[f0], %[f11], %[f0] \n\t" + "sub.s %[f2], %[f12], %[f1] \n\t" + "add.s %[f3], %[f13], %[f3] \n\t" +#endif + "swc1 %[f4], 16(%[a_ptr]) \n\t" + "swc1 %[f8], 20(%[a_ptr]) \n\t" + "swc1 %[f7], 8(%[a_ptr]) \n\t" + "swc1 %[f0], 12(%[a_ptr]) \n\t" + "swc1 %[f2], 24(%[a_ptr]) \n\t" + "swc1 %[f3], 28(%[a_ptr]) \n\t" + "lwc1 %[f0], 32(%[a_ptr]) \n\t" + "lwc1 %[f1], 36(%[a_ptr]) \n\t" + "lwc1 %[f2], 40(%[a_ptr]) \n\t" + "lwc1 %[f3], 44(%[a_ptr]) \n\t" + "lwc1 %[f4], 48(%[a_ptr]) \n\t" + "lwc1 %[f5], 52(%[a_ptr]) \n\t" + "lwc1 %[f6], 56(%[a_ptr]) \n\t" + "lwc1 %[f7], 60(%[a_ptr]) \n\t" + "add.s %[f8], %[f0], %[f2] \n\t" + "sub.s %[f0], %[f0], %[f2] \n\t" + "add.s %[f2], %[f4], %[f6] \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "add.s %[f6], %[f1], %[f3] \n\t" + "sub.s %[f1], %[f1], %[f3] \n\t" + "add.s %[f3], %[f5], %[f7] \n\t" + "sub.s %[f5], %[f5], %[f7] \n\t" + "lwc1 %[f11], 8(%[p2_rdft]) \n\t" + "lwc1 %[f12], 12(%[p2_rdft]) \n\t" + "lwc1 %[f13], 8(%[second]) \n\t" + "lwc1 %[f14], 12(%[second]) \n\t" + "add.s %[f7], %[f8], %[f2] \n\t" + "sub.s %[f8], %[f2], %[f8] \n\t" + "add.s %[f2], %[f6], %[f3] \n\t" + "sub.s %[f6], %[f3], %[f6] \n\t" + "add.s %[f3], %[f0], %[f5] \n\t" + "sub.s %[f0], %[f0], %[f5] \n\t" + "add.s %[f5], %[f1], %[f4] \n\t" + "sub.s %[f1], %[f1], %[f4] \n\t" + "swc1 %[f7], 32(%[a_ptr]) \n\t" + "swc1 %[f2], 36(%[a_ptr]) \n\t" + "mul.s %[f4], %[f10], %[f8] \n\t" +#if defined(MIPS32_R2_LE) + "mul.s %[f10], %[f10], %[f6] \n\t" + "mul.s %[f7], %[f11], %[f0] \n\t" + "mul.s %[f11], %[f11], %[f5] \n\t" + "mul.s %[f2], %[f13], %[f3] \n\t" + "mul.s %[f13], %[f13], %[f1] \n\t" + "madd.s %[f4], %[f4], %[f9], %[f6] \n\t" + "nmsub.s %[f10], %[f10], %[f9], %[f8] \n\t" + "nmsub.s %[f7], %[f7], %[f12], %[f5] \n\t" + "madd.s %[f11], %[f11], %[f12], %[f0] \n\t" + "nmsub.s %[f2], %[f2], %[f14], %[f1] \n\t" + "madd.s %[f13], %[f13], %[f14], %[f3] \n\t" +#else + "mul.s %[f2], %[f9], %[f6] \n\t" + "mul.s %[f10], %[f10], %[f6] \n\t" + "mul.s %[f9], %[f9], %[f8] \n\t" + "mul.s %[f7], %[f11], %[f0] \n\t" + "mul.s %[f8], %[f12], %[f5] \n\t" + "mul.s %[f11], %[f11], %[f5] \n\t" + "mul.s %[f12], %[f12], %[f0] \n\t" + "mul.s %[f5], %[f13], %[f3] \n\t" + "mul.s %[f0], %[f14], %[f1] \n\t" + "mul.s %[f13], %[f13], %[f1] \n\t" + "mul.s %[f14], %[f14], %[f3] \n\t" + "add.s %[f4], %[f4], %[f2] \n\t" + "sub.s %[f10], %[f10], %[f9] \n\t" + "sub.s %[f7], %[f7], %[f8] \n\t" + "add.s %[f11], %[f11], %[f12] \n\t" + "sub.s %[f2], %[f5], %[f0] \n\t" + "add.s %[f13], %[f13], %[f14] \n\t" +#endif + "swc1 %[f4], 48(%[a_ptr]) \n\t" + "swc1 %[f10], 52(%[a_ptr]) \n\t" + "swc1 %[f7], 40(%[a_ptr]) \n\t" + "swc1 %[f11], 44(%[a_ptr]) \n\t" + "swc1 %[f2], 56(%[a_ptr]) \n\t" + "swc1 %[f13], 60(%[a_ptr]) \n\t" + "addiu %[count], %[count], -1 \n\t" + "lwc1 %[f9], 8(%[p1_rdft]) \n\t" + "addiu %[a_ptr], %[a_ptr], 64 \n\t" + "addiu %[p1_rdft], %[p1_rdft], 8 \n\t" + "addiu %[p2_rdft], %[p2_rdft], 16 \n\t" + "addiu %[first], %[first], 8 \n\t" + "bgtz %[count], 1b \n\t" + " addiu %[second], %[second], 8 \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), + [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), + [f8] "=&f" (f8), [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), + [f12] "=&f" (f12), [f13] "=&f" (f13), [f14] "=&f" (f14), + [a_ptr] "=&r" (a_ptr), [p1_rdft] "=&r" (p1_rdft), [first] "+r" (first), + [p2_rdft] "=&r" (p2_rdft), [count] "=&r" (count), [second] "+r" (second) + : [a] "r" (a), [rdft_w] "r" (rdft_w) + : "memory" + ); +} + +static void cftmdl_128_mips(float* a) { + float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14; + int tmp_a, count; + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[tmp_a], %[a], 0 \n\t" + "addiu %[count], $zero, 4 \n\t" + "1: \n\t" + "addiu %[count], %[count], -1 \n\t" + "lwc1 %[f0], 0(%[tmp_a]) \n\t" + "lwc1 %[f2], 32(%[tmp_a]) \n\t" + "lwc1 %[f4], 64(%[tmp_a]) \n\t" + "lwc1 %[f6], 96(%[tmp_a]) \n\t" + "lwc1 %[f1], 4(%[tmp_a]) \n\t" + "lwc1 %[f3], 36(%[tmp_a]) \n\t" + "lwc1 %[f5], 68(%[tmp_a]) \n\t" + "lwc1 %[f7], 100(%[tmp_a]) \n\t" + "add.s %[f8], %[f0], %[f2] \n\t" + "sub.s %[f0], %[f0], %[f2] \n\t" + "add.s %[f2], %[f4], %[f6] \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "add.s %[f6], %[f1], %[f3] \n\t" + "sub.s %[f1], %[f1], %[f3] \n\t" + "add.s %[f3], %[f5], %[f7] \n\t" + "sub.s %[f5], %[f5], %[f7] \n\t" + "add.s %[f7], %[f8], %[f2] \n\t" + "sub.s %[f8], %[f8], %[f2] \n\t" + "add.s %[f2], %[f1], %[f4] \n\t" + "sub.s %[f1], %[f1], %[f4] \n\t" + "add.s %[f4], %[f6], %[f3] \n\t" + "sub.s %[f6], %[f6], %[f3] \n\t" + "sub.s %[f3], %[f0], %[f5] \n\t" + "add.s %[f0], %[f0], %[f5] \n\t" + "swc1 %[f7], 0(%[tmp_a]) \n\t" + "swc1 %[f8], 64(%[tmp_a]) \n\t" + "swc1 %[f2], 36(%[tmp_a]) \n\t" + "swc1 %[f1], 100(%[tmp_a]) \n\t" + "swc1 %[f4], 4(%[tmp_a]) \n\t" + "swc1 %[f6], 68(%[tmp_a]) \n\t" + "swc1 %[f3], 32(%[tmp_a]) \n\t" + "swc1 %[f0], 96(%[tmp_a]) \n\t" + "bgtz %[count], 1b \n\t" + " addiu %[tmp_a], %[tmp_a], 8 \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), + [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), + [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count) + : [a] "r" (a) + : "memory" + ); + f9 = rdft_w[2]; + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[tmp_a], %[a], 128 \n\t" + "addiu %[count], $zero, 4 \n\t" + "1: \n\t" + "addiu %[count], %[count], -1 \n\t" + "lwc1 %[f0], 0(%[tmp_a]) \n\t" + "lwc1 %[f2], 32(%[tmp_a]) \n\t" + "lwc1 %[f5], 68(%[tmp_a]) \n\t" + "lwc1 %[f7], 100(%[tmp_a]) \n\t" + "lwc1 %[f1], 4(%[tmp_a]) \n\t" + "lwc1 %[f3], 36(%[tmp_a]) \n\t" + "lwc1 %[f4], 64(%[tmp_a]) \n\t" + "lwc1 %[f6], 96(%[tmp_a]) \n\t" + "sub.s %[f8], %[f0], %[f2] \n\t" + "add.s %[f0], %[f0], %[f2] \n\t" + "sub.s %[f2], %[f5], %[f7] \n\t" + "add.s %[f5], %[f5], %[f7] \n\t" + "sub.s %[f7], %[f1], %[f3] \n\t" + "add.s %[f1], %[f1], %[f3] \n\t" + "sub.s %[f3], %[f4], %[f6] \n\t" + "add.s %[f4], %[f4], %[f6] \n\t" + "sub.s %[f6], %[f8], %[f2] \n\t" + "add.s %[f8], %[f8], %[f2] \n\t" + "add.s %[f2], %[f5], %[f1] \n\t" + "sub.s %[f5], %[f5], %[f1] \n\t" + "add.s %[f1], %[f3], %[f7] \n\t" + "sub.s %[f3], %[f3], %[f7] \n\t" + "add.s %[f7], %[f0], %[f4] \n\t" + "sub.s %[f0], %[f0], %[f4] \n\t" + "sub.s %[f4], %[f6], %[f1] \n\t" + "add.s %[f6], %[f6], %[f1] \n\t" + "sub.s %[f1], %[f3], %[f8] \n\t" + "add.s %[f3], %[f3], %[f8] \n\t" + "mul.s %[f4], %[f4], %[f9] \n\t" + "mul.s %[f6], %[f6], %[f9] \n\t" + "mul.s %[f1], %[f1], %[f9] \n\t" + "mul.s %[f3], %[f3], %[f9] \n\t" + "swc1 %[f7], 0(%[tmp_a]) \n\t" + "swc1 %[f2], 4(%[tmp_a]) \n\t" + "swc1 %[f5], 64(%[tmp_a]) \n\t" + "swc1 %[f0], 68(%[tmp_a]) \n\t" + "swc1 %[f4], 32(%[tmp_a]) \n\t" + "swc1 %[f6], 36(%[tmp_a]) \n\t" + "swc1 %[f1], 96(%[tmp_a]) \n\t" + "swc1 %[f3], 100(%[tmp_a]) \n\t" + "bgtz %[count], 1b \n\t" + " addiu %[tmp_a], %[tmp_a], 8 \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), + [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), + [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count) + : [a] "r" (a), [f9] "f" (f9) + : "memory" + ); + f10 = rdft_w[3]; + f11 = rdft_w[4]; + f12 = rdft_w[5]; + f13 = rdft_wk3ri_first[2]; + f14 = rdft_wk3ri_first[3]; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[tmp_a], %[a], 256 \n\t" + "addiu %[count], $zero, 4 \n\t" + "1: \n\t" + "addiu %[count], %[count], -1 \n\t" + "lwc1 %[f0], 0(%[tmp_a]) \n\t" + "lwc1 %[f2], 32(%[tmp_a]) \n\t" + "lwc1 %[f4], 64(%[tmp_a]) \n\t" + "lwc1 %[f6], 96(%[tmp_a]) \n\t" + "lwc1 %[f1], 4(%[tmp_a]) \n\t" + "lwc1 %[f3], 36(%[tmp_a]) \n\t" + "lwc1 %[f5], 68(%[tmp_a]) \n\t" + "lwc1 %[f7], 100(%[tmp_a]) \n\t" + "add.s %[f8], %[f0], %[f2] \n\t" + "sub.s %[f0], %[f0], %[f2] \n\t" + "add.s %[f2], %[f4], %[f6] \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "add.s %[f6], %[f1], %[f3] \n\t" + "sub.s %[f1], %[f1], %[f3] \n\t" + "add.s %[f3], %[f5], %[f7] \n\t" + "sub.s %[f5], %[f5], %[f7] \n\t" + "sub.s %[f7], %[f8], %[f2] \n\t" + "add.s %[f8], %[f8], %[f2] \n\t" + "add.s %[f2], %[f1], %[f4] \n\t" + "sub.s %[f1], %[f1], %[f4] \n\t" + "sub.s %[f4], %[f6], %[f3] \n\t" + "add.s %[f6], %[f6], %[f3] \n\t" + "sub.s %[f3], %[f0], %[f5] \n\t" + "add.s %[f0], %[f0], %[f5] \n\t" + "swc1 %[f8], 0(%[tmp_a]) \n\t" + "swc1 %[f6], 4(%[tmp_a]) \n\t" + "mul.s %[f5], %[f9], %[f7] \n\t" +#if defined(MIPS32_R2_LE) + "mul.s %[f7], %[f10], %[f7] \n\t" + "mul.s %[f8], %[f11], %[f3] \n\t" + "mul.s %[f3], %[f12], %[f3] \n\t" + "mul.s %[f6], %[f13], %[f0] \n\t" + "mul.s %[f0], %[f14], %[f0] \n\t" + "nmsub.s %[f5], %[f5], %[f10], %[f4] \n\t" + "madd.s %[f7], %[f7], %[f9], %[f4] \n\t" + "nmsub.s %[f8], %[f8], %[f12], %[f2] \n\t" + "madd.s %[f3], %[f3], %[f11], %[f2] \n\t" + "nmsub.s %[f6], %[f6], %[f14], %[f1] \n\t" + "madd.s %[f0], %[f0], %[f13], %[f1] \n\t" + "swc1 %[f5], 64(%[tmp_a]) \n\t" + "swc1 %[f7], 68(%[tmp_a]) \n\t" +#else + "mul.s %[f8], %[f10], %[f4] \n\t" + "mul.s %[f4], %[f9], %[f4] \n\t" + "mul.s %[f7], %[f10], %[f7] \n\t" + "mul.s %[f6], %[f11], %[f3] \n\t" + "mul.s %[f3], %[f12], %[f3] \n\t" + "sub.s %[f5], %[f5], %[f8] \n\t" + "mul.s %[f8], %[f12], %[f2] \n\t" + "mul.s %[f2], %[f11], %[f2] \n\t" + "add.s %[f7], %[f4], %[f7] \n\t" + "mul.s %[f4], %[f13], %[f0] \n\t" + "mul.s %[f0], %[f14], %[f0] \n\t" + "sub.s %[f8], %[f6], %[f8] \n\t" + "mul.s %[f6], %[f14], %[f1] \n\t" + "mul.s %[f1], %[f13], %[f1] \n\t" + "add.s %[f3], %[f2], %[f3] \n\t" + "swc1 %[f5], 64(%[tmp_a]) \n\t" + "swc1 %[f7], 68(%[tmp_a]) \n\t" + "sub.s %[f6], %[f4], %[f6] \n\t" + "add.s %[f0], %[f1], %[f0] \n\t" +#endif + "swc1 %[f8], 32(%[tmp_a]) \n\t" + "swc1 %[f3], 36(%[tmp_a]) \n\t" + "swc1 %[f6], 96(%[tmp_a]) \n\t" + "swc1 %[f0], 100(%[tmp_a]) \n\t" + "bgtz %[count], 1b \n\t" + " addiu %[tmp_a], %[tmp_a], 8 \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), + [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), + [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count) + : [a] "r" (a), [f9] "f" (f9), [f10] "f" (f10), [f11] "f" (f11), + [f12] "f" (f12), [f13] "f" (f13), [f14] "f" (f14) + : "memory" + ); + f11 = rdft_w[6]; + f12 = rdft_w[7]; + f13 = rdft_wk3ri_second[2]; + f14 = rdft_wk3ri_second[3]; + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[tmp_a], %[a], 384 \n\t" + "addiu %[count], $zero, 4 \n\t" + "1: \n\t" + "addiu %[count], %[count], -1 \n\t" + "lwc1 %[f0], 0(%[tmp_a]) \n\t" + "lwc1 %[f1], 4(%[tmp_a]) \n\t" + "lwc1 %[f2], 32(%[tmp_a]) \n\t" + "lwc1 %[f3], 36(%[tmp_a]) \n\t" + "lwc1 %[f4], 64(%[tmp_a]) \n\t" + "lwc1 %[f5], 68(%[tmp_a]) \n\t" + "lwc1 %[f6], 96(%[tmp_a]) \n\t" + "lwc1 %[f7], 100(%[tmp_a]) \n\t" + "add.s %[f8], %[f0], %[f2] \n\t" + "sub.s %[f0], %[f0], %[f2] \n\t" + "add.s %[f2], %[f4], %[f6] \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "add.s %[f6], %[f1], %[f3] \n\t" + "sub.s %[f1], %[f1], %[f3] \n\t" + "add.s %[f3], %[f5], %[f7] \n\t" + "sub.s %[f5], %[f5], %[f7] \n\t" + "sub.s %[f7], %[f2], %[f8] \n\t" + "add.s %[f2], %[f2], %[f8] \n\t" + "add.s %[f8], %[f1], %[f4] \n\t" + "sub.s %[f1], %[f1], %[f4] \n\t" + "sub.s %[f4], %[f3], %[f6] \n\t" + "add.s %[f3], %[f3], %[f6] \n\t" + "sub.s %[f6], %[f0], %[f5] \n\t" + "add.s %[f0], %[f0], %[f5] \n\t" + "swc1 %[f2], 0(%[tmp_a]) \n\t" + "swc1 %[f3], 4(%[tmp_a]) \n\t" + "mul.s %[f5], %[f10], %[f7] \n\t" +#if defined(MIPS32_R2_LE) + "mul.s %[f7], %[f9], %[f7] \n\t" + "mul.s %[f2], %[f12], %[f8] \n\t" + "mul.s %[f8], %[f11], %[f8] \n\t" + "mul.s %[f3], %[f14], %[f1] \n\t" + "mul.s %[f1], %[f13], %[f1] \n\t" + "madd.s %[f5], %[f5], %[f9], %[f4] \n\t" + "msub.s %[f7], %[f7], %[f10], %[f4] \n\t" + "msub.s %[f2], %[f2], %[f11], %[f6] \n\t" + "madd.s %[f8], %[f8], %[f12], %[f6] \n\t" + "msub.s %[f3], %[f3], %[f13], %[f0] \n\t" + "madd.s %[f1], %[f1], %[f14], %[f0] \n\t" + "swc1 %[f5], 64(%[tmp_a]) \n\t" + "swc1 %[f7], 68(%[tmp_a]) \n\t" +#else + "mul.s %[f2], %[f9], %[f4] \n\t" + "mul.s %[f4], %[f10], %[f4] \n\t" + "mul.s %[f7], %[f9], %[f7] \n\t" + "mul.s %[f3], %[f11], %[f6] \n\t" + "mul.s %[f6], %[f12], %[f6] \n\t" + "add.s %[f5], %[f5], %[f2] \n\t" + "sub.s %[f7], %[f4], %[f7] \n\t" + "mul.s %[f2], %[f12], %[f8] \n\t" + "mul.s %[f8], %[f11], %[f8] \n\t" + "mul.s %[f4], %[f14], %[f1] \n\t" + "mul.s %[f1], %[f13], %[f1] \n\t" + "sub.s %[f2], %[f3], %[f2] \n\t" + "mul.s %[f3], %[f13], %[f0] \n\t" + "mul.s %[f0], %[f14], %[f0] \n\t" + "add.s %[f8], %[f8], %[f6] \n\t" + "swc1 %[f5], 64(%[tmp_a]) \n\t" + "swc1 %[f7], 68(%[tmp_a]) \n\t" + "sub.s %[f3], %[f3], %[f4] \n\t" + "add.s %[f1], %[f1], %[f0] \n\t" +#endif + "swc1 %[f2], 32(%[tmp_a]) \n\t" + "swc1 %[f8], 36(%[tmp_a]) \n\t" + "swc1 %[f3], 96(%[tmp_a]) \n\t" + "swc1 %[f1], 100(%[tmp_a]) \n\t" + "bgtz %[count], 1b \n\t" + " addiu %[tmp_a], %[tmp_a], 8 \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), + [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), + [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count) + : [a] "r" (a), [f9] "f" (f9), [f10] "f" (f10), [f11] "f" (f11), + [f12] "f" (f12), [f13] "f" (f13), [f14] "f" (f14) + : "memory" + ); +} + +static void cftfsub_128_mips(float* a) { + float f0, f1, f2, f3, f4, f5, f6, f7, f8; + int tmp_a, count; + + cft1st_128(a); + cftmdl_128(a); + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[tmp_a], %[a], 0 \n\t" + "addiu %[count], $zero, 16 \n\t" + "1: \n\t" + "addiu %[count], %[count], -1 \n\t" + "lwc1 %[f0], 0(%[tmp_a]) \n\t" + "lwc1 %[f2], 128(%[tmp_a]) \n\t" + "lwc1 %[f4], 256(%[tmp_a]) \n\t" + "lwc1 %[f6], 384(%[tmp_a]) \n\t" + "lwc1 %[f1], 4(%[tmp_a]) \n\t" + "lwc1 %[f3], 132(%[tmp_a]) \n\t" + "lwc1 %[f5], 260(%[tmp_a]) \n\t" + "lwc1 %[f7], 388(%[tmp_a]) \n\t" + "add.s %[f8], %[f0], %[f2] \n\t" + "sub.s %[f0], %[f0], %[f2] \n\t" + "add.s %[f2], %[f4], %[f6] \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "add.s %[f6], %[f1], %[f3] \n\t" + "sub.s %[f1], %[f1], %[f3] \n\t" + "add.s %[f3], %[f5], %[f7] \n\t" + "sub.s %[f5], %[f5], %[f7] \n\t" + "add.s %[f7], %[f8], %[f2] \n\t" + "sub.s %[f8], %[f8], %[f2] \n\t" + "add.s %[f2], %[f1], %[f4] \n\t" + "sub.s %[f1], %[f1], %[f4] \n\t" + "add.s %[f4], %[f6], %[f3] \n\t" + "sub.s %[f6], %[f6], %[f3] \n\t" + "sub.s %[f3], %[f0], %[f5] \n\t" + "add.s %[f0], %[f0], %[f5] \n\t" + "swc1 %[f7], 0(%[tmp_a]) \n\t" + "swc1 %[f8], 256(%[tmp_a]) \n\t" + "swc1 %[f2], 132(%[tmp_a]) \n\t" + "swc1 %[f1], 388(%[tmp_a]) \n\t" + "swc1 %[f4], 4(%[tmp_a]) \n\t" + "swc1 %[f6], 260(%[tmp_a]) \n\t" + "swc1 %[f3], 128(%[tmp_a]) \n\t" + "swc1 %[f0], 384(%[tmp_a]) \n\t" + "bgtz %[count], 1b \n\t" + " addiu %[tmp_a], %[tmp_a], 8 \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), + [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), + [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), + [count] "=&r" (count) + : [a] "r" (a) + : "memory" + ); +} + +static void cftbsub_128_mips(float* a) { + float f0, f1, f2, f3, f4, f5, f6, f7, f8; + int tmp_a, count; + + cft1st_128(a); + cftmdl_128(a); + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[tmp_a], %[a], 0 \n\t" + "addiu %[count], $zero, 16 \n\t" + "1: \n\t" + "addiu %[count], %[count], -1 \n\t" + "lwc1 %[f0], 0(%[tmp_a]) \n\t" + "lwc1 %[f2], 128(%[tmp_a]) \n\t" + "lwc1 %[f4], 256(%[tmp_a]) \n\t" + "lwc1 %[f6], 384(%[tmp_a]) \n\t" + "lwc1 %[f1], 4(%[tmp_a]) \n\t" + "lwc1 %[f3], 132(%[tmp_a]) \n\t" + "lwc1 %[f5], 260(%[tmp_a]) \n\t" + "lwc1 %[f7], 388(%[tmp_a]) \n\t" + "add.s %[f8], %[f0], %[f2] \n\t" + "sub.s %[f0], %[f0], %[f2] \n\t" + "add.s %[f2], %[f4], %[f6] \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "add.s %[f6], %[f1], %[f3] \n\t" + "sub.s %[f1], %[f3], %[f1] \n\t" + "add.s %[f3], %[f5], %[f7] \n\t" + "sub.s %[f5], %[f5], %[f7] \n\t" + "add.s %[f7], %[f8], %[f2] \n\t" + "sub.s %[f8], %[f8], %[f2] \n\t" + "sub.s %[f2], %[f1], %[f4] \n\t" + "add.s %[f1], %[f1], %[f4] \n\t" + "add.s %[f4], %[f3], %[f6] \n\t" + "sub.s %[f6], %[f3], %[f6] \n\t" + "sub.s %[f3], %[f0], %[f5] \n\t" + "add.s %[f0], %[f0], %[f5] \n\t" + "neg.s %[f4], %[f4] \n\t" + "swc1 %[f7], 0(%[tmp_a]) \n\t" + "swc1 %[f8], 256(%[tmp_a]) \n\t" + "swc1 %[f2], 132(%[tmp_a]) \n\t" + "swc1 %[f1], 388(%[tmp_a]) \n\t" + "swc1 %[f6], 260(%[tmp_a]) \n\t" + "swc1 %[f3], 128(%[tmp_a]) \n\t" + "swc1 %[f0], 384(%[tmp_a]) \n\t" + "swc1 %[f4], 4(%[tmp_a]) \n\t" + "bgtz %[count], 1b \n\t" + " addiu %[tmp_a], %[tmp_a], 8 \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), + [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), + [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count) + : [a] "r" (a) + : "memory" + ); +} + +static void rftfsub_128_mips(float* a) { + const float* c = rdft_w + 32; + const float f0 = 0.5f; + float* a1 = &a[2]; + float* a2 = &a[126]; + const float* c1 = &c[1]; + const float* c2 = &c[31]; + float f1, f2, f3 ,f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15; + int count; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "lwc1 %[f6], 0(%[c2]) \n\t" + "lwc1 %[f1], 0(%[a1]) \n\t" + "lwc1 %[f2], 0(%[a2]) \n\t" + "lwc1 %[f3], 4(%[a1]) \n\t" + "lwc1 %[f4], 4(%[a2]) \n\t" + "lwc1 %[f5], 0(%[c1]) \n\t" + "sub.s %[f6], %[f0], %[f6] \n\t" + "sub.s %[f7], %[f1], %[f2] \n\t" + "add.s %[f8], %[f3], %[f4] \n\t" + "addiu %[count], $zero, 15 \n\t" + "mul.s %[f9], %[f6], %[f7] \n\t" + "mul.s %[f6], %[f6], %[f8] \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[f8], %[f5], %[f8] \n\t" + "mul.s %[f5], %[f5], %[f7] \n\t" + "sub.s %[f9], %[f9], %[f8] \n\t" + "add.s %[f6], %[f6], %[f5] \n\t" +#else + "nmsub.s %[f9], %[f9], %[f5], %[f8] \n\t" + "madd.s %[f6], %[f6], %[f5], %[f7] \n\t" +#endif + "sub.s %[f1], %[f1], %[f9] \n\t" + "add.s %[f2], %[f2], %[f9] \n\t" + "sub.s %[f3], %[f3], %[f6] \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "swc1 %[f1], 0(%[a1]) \n\t" + "swc1 %[f2], 0(%[a2]) \n\t" + "swc1 %[f3], 4(%[a1]) \n\t" + "swc1 %[f4], 4(%[a2]) \n\t" + "addiu %[a1], %[a1], 8 \n\t" + "addiu %[a2], %[a2], -8 \n\t" + "addiu %[c1], %[c1], 4 \n\t" + "addiu %[c2], %[c2], -4 \n\t" + "1: \n\t" + "lwc1 %[f6], 0(%[c2]) \n\t" + "lwc1 %[f1], 0(%[a1]) \n\t" + "lwc1 %[f2], 0(%[a2]) \n\t" + "lwc1 %[f3], 4(%[a1]) \n\t" + "lwc1 %[f4], 4(%[a2]) \n\t" + "lwc1 %[f5], 0(%[c1]) \n\t" + "sub.s %[f6], %[f0], %[f6] \n\t" + "sub.s %[f7], %[f1], %[f2] \n\t" + "add.s %[f8], %[f3], %[f4] \n\t" + "lwc1 %[f10], -4(%[c2]) \n\t" + "lwc1 %[f11], 8(%[a1]) \n\t" + "lwc1 %[f12], -8(%[a2]) \n\t" + "mul.s %[f9], %[f6], %[f7] \n\t" + "mul.s %[f6], %[f6], %[f8] \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[f8], %[f5], %[f8] \n\t" + "mul.s %[f5], %[f5], %[f7] \n\t" + "lwc1 %[f13], 12(%[a1]) \n\t" + "lwc1 %[f14], -4(%[a2]) \n\t" + "lwc1 %[f15], 4(%[c1]) \n\t" + "sub.s %[f9], %[f9], %[f8] \n\t" + "add.s %[f6], %[f6], %[f5] \n\t" +#else + "lwc1 %[f13], 12(%[a1]) \n\t" + "lwc1 %[f14], -4(%[a2]) \n\t" + "lwc1 %[f15], 4(%[c1]) \n\t" + "nmsub.s %[f9], %[f9], %[f5], %[f8] \n\t" + "madd.s %[f6], %[f6], %[f5], %[f7] \n\t" +#endif + "sub.s %[f10], %[f0], %[f10] \n\t" + "sub.s %[f5], %[f11], %[f12] \n\t" + "add.s %[f7], %[f13], %[f14] \n\t" + "sub.s %[f1], %[f1], %[f9] \n\t" + "add.s %[f2], %[f2], %[f9] \n\t" + "sub.s %[f3], %[f3], %[f6] \n\t" + "mul.s %[f8], %[f10], %[f5] \n\t" + "mul.s %[f10], %[f10], %[f7] \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[f9], %[f15], %[f7] \n\t" + "mul.s %[f15], %[f15], %[f5] \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "swc1 %[f1], 0(%[a1]) \n\t" + "swc1 %[f2], 0(%[a2]) \n\t" + "sub.s %[f8], %[f8], %[f9] \n\t" + "add.s %[f10], %[f10], %[f15] \n\t" +#else + "swc1 %[f1], 0(%[a1]) \n\t" + "swc1 %[f2], 0(%[a2]) \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "nmsub.s %[f8], %[f8], %[f15], %[f7] \n\t" + "madd.s %[f10], %[f10], %[f15], %[f5] \n\t" +#endif + "swc1 %[f3], 4(%[a1]) \n\t" + "swc1 %[f4], 4(%[a2]) \n\t" + "sub.s %[f11], %[f11], %[f8] \n\t" + "add.s %[f12], %[f12], %[f8] \n\t" + "sub.s %[f13], %[f13], %[f10] \n\t" + "sub.s %[f14], %[f14], %[f10] \n\t" + "addiu %[c2], %[c2], -8 \n\t" + "addiu %[c1], %[c1], 8 \n\t" + "swc1 %[f11], 8(%[a1]) \n\t" + "swc1 %[f12], -8(%[a2]) \n\t" + "swc1 %[f13], 12(%[a1]) \n\t" + "swc1 %[f14], -4(%[a2]) \n\t" + "addiu %[a1], %[a1], 16 \n\t" + "addiu %[count], %[count], -1 \n\t" + "bgtz %[count], 1b \n\t" + " addiu %[a2], %[a2], -16 \n\t" + ".set pop \n\t" + : [a1] "+r" (a1), [a2] "+r" (a2), [c1] "+r" (c1), [c2] "+r" (c2), + [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), [f4] "=&f" (f4), + [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8), + [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), [f12] "=&f" (f12), + [f13] "=&f" (f13), [f14] "=&f" (f14), [f15] "=&f" (f15), + [count] "=&r" (count) + : [f0] "f" (f0) + : "memory" + ); +} + +static void rftbsub_128_mips(float* a) { + const float *c = rdft_w + 32; + const float f0 = 0.5f; + float* a1 = &a[2]; + float* a2 = &a[126]; + const float* c1 = &c[1]; + const float* c2 = &c[31]; + float f1, f2, f3 ,f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15; + int count; + + a[1] = -a[1]; + a[65] = -a[65]; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "lwc1 %[f6], 0(%[c2]) \n\t" + "lwc1 %[f1], 0(%[a1]) \n\t" + "lwc1 %[f2], 0(%[a2]) \n\t" + "lwc1 %[f3], 4(%[a1]) \n\t" + "lwc1 %[f4], 4(%[a2]) \n\t" + "lwc1 %[f5], 0(%[c1]) \n\t" + "sub.s %[f6], %[f0], %[f6] \n\t" + "sub.s %[f7], %[f1], %[f2] \n\t" + "add.s %[f8], %[f3], %[f4] \n\t" + "addiu %[count], $zero, 15 \n\t" + "mul.s %[f9], %[f6], %[f7] \n\t" + "mul.s %[f6], %[f6], %[f8] \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[f8], %[f5], %[f8] \n\t" + "mul.s %[f5], %[f5], %[f7] \n\t" + "add.s %[f9], %[f9], %[f8] \n\t" + "sub.s %[f6], %[f6], %[f5] \n\t" +#else + "madd.s %[f9], %[f9], %[f5], %[f8] \n\t" + "nmsub.s %[f6], %[f6], %[f5], %[f7] \n\t" +#endif + "sub.s %[f1], %[f1], %[f9] \n\t" + "add.s %[f2], %[f2], %[f9] \n\t" + "sub.s %[f3], %[f6], %[f3] \n\t" + "sub.s %[f4], %[f6], %[f4] \n\t" + "swc1 %[f1], 0(%[a1]) \n\t" + "swc1 %[f2], 0(%[a2]) \n\t" + "swc1 %[f3], 4(%[a1]) \n\t" + "swc1 %[f4], 4(%[a2]) \n\t" + "addiu %[a1], %[a1], 8 \n\t" + "addiu %[a2], %[a2], -8 \n\t" + "addiu %[c1], %[c1], 4 \n\t" + "addiu %[c2], %[c2], -4 \n\t" + "1: \n\t" + "lwc1 %[f6], 0(%[c2]) \n\t" + "lwc1 %[f1], 0(%[a1]) \n\t" + "lwc1 %[f2], 0(%[a2]) \n\t" + "lwc1 %[f3], 4(%[a1]) \n\t" + "lwc1 %[f4], 4(%[a2]) \n\t" + "lwc1 %[f5], 0(%[c1]) \n\t" + "sub.s %[f6], %[f0], %[f6] \n\t" + "sub.s %[f7], %[f1], %[f2] \n\t" + "add.s %[f8], %[f3], %[f4] \n\t" + "lwc1 %[f10], -4(%[c2]) \n\t" + "lwc1 %[f11], 8(%[a1]) \n\t" + "lwc1 %[f12], -8(%[a2]) \n\t" + "mul.s %[f9], %[f6], %[f7] \n\t" + "mul.s %[f6], %[f6], %[f8] \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[f8], %[f5], %[f8] \n\t" + "mul.s %[f5], %[f5], %[f7] \n\t" + "lwc1 %[f13], 12(%[a1]) \n\t" + "lwc1 %[f14], -4(%[a2]) \n\t" + "lwc1 %[f15], 4(%[c1]) \n\t" + "add.s %[f9], %[f9], %[f8] \n\t" + "sub.s %[f6], %[f6], %[f5] \n\t" +#else + "lwc1 %[f13], 12(%[a1]) \n\t" + "lwc1 %[f14], -4(%[a2]) \n\t" + "lwc1 %[f15], 4(%[c1]) \n\t" + "madd.s %[f9], %[f9], %[f5], %[f8] \n\t" + "nmsub.s %[f6], %[f6], %[f5], %[f7] \n\t" +#endif + "sub.s %[f10], %[f0], %[f10] \n\t" + "sub.s %[f5], %[f11], %[f12] \n\t" + "add.s %[f7], %[f13], %[f14] \n\t" + "sub.s %[f1], %[f1], %[f9] \n\t" + "add.s %[f2], %[f2], %[f9] \n\t" + "sub.s %[f3], %[f6], %[f3] \n\t" + "mul.s %[f8], %[f10], %[f5] \n\t" + "mul.s %[f10], %[f10], %[f7] \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[f9], %[f15], %[f7] \n\t" + "mul.s %[f15], %[f15], %[f5] \n\t" + "sub.s %[f4], %[f6], %[f4] \n\t" + "swc1 %[f1], 0(%[a1]) \n\t" + "swc1 %[f2], 0(%[a2]) \n\t" + "add.s %[f8], %[f8], %[f9] \n\t" + "sub.s %[f10], %[f10], %[f15] \n\t" +#else + "swc1 %[f1], 0(%[a1]) \n\t" + "swc1 %[f2], 0(%[a2]) \n\t" + "sub.s %[f4], %[f6], %[f4] \n\t" + "madd.s %[f8], %[f8], %[f15], %[f7] \n\t" + "nmsub.s %[f10], %[f10], %[f15], %[f5] \n\t" +#endif + "swc1 %[f3], 4(%[a1]) \n\t" + "swc1 %[f4], 4(%[a2]) \n\t" + "sub.s %[f11], %[f11], %[f8] \n\t" + "add.s %[f12], %[f12], %[f8] \n\t" + "sub.s %[f13], %[f10], %[f13] \n\t" + "sub.s %[f14], %[f10], %[f14] \n\t" + "addiu %[c2], %[c2], -8 \n\t" + "addiu %[c1], %[c1], 8 \n\t" + "swc1 %[f11], 8(%[a1]) \n\t" + "swc1 %[f12], -8(%[a2]) \n\t" + "swc1 %[f13], 12(%[a1]) \n\t" + "swc1 %[f14], -4(%[a2]) \n\t" + "addiu %[a1], %[a1], 16 \n\t" + "addiu %[count], %[count], -1 \n\t" + "bgtz %[count], 1b \n\t" + " addiu %[a2], %[a2], -16 \n\t" + ".set pop \n\t" + : [a1] "+r" (a1), [a2] "+r" (a2), [c1] "+r" (c1), [c2] "+r" (c2), + [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), [f4] "=&f" (f4), + [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8), + [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), [f12] "=&f" (f12), + [f13] "=&f" (f13), [f14] "=&f" (f14), [f15] "=&f" (f15), + [count] "=&r" (count) + : [f0] "f" (f0) + : "memory" + ); +} + +void aec_rdft_init_mips(void) { + cft1st_128 = cft1st_128_mips; + cftmdl_128 = cftmdl_128_mips; + rftfsub_128 = rftfsub_128_mips; + rftbsub_128 = rftbsub_128_mips; + cftfsub_128 = cftfsub_128_mips; + cftbsub_128 = cftbsub_128_mips; + bitrv2_128 = bitrv2_128_mips; +} diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_neon.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_neon.c new file mode 100644 index 00000000..43b6a68c --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_neon.c @@ -0,0 +1,355 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * The rdft AEC algorithm, neon version of speed-critical functions. + * + * Based on the sse2 version. + */ + + +#include "webrtc/modules/audio_processing/aec/aec_rdft.h" + +#include <arm_neon.h> + +static const ALIGN16_BEG float ALIGN16_END + k_swap_sign[4] = {-1.f, 1.f, -1.f, 1.f}; + +static void cft1st_128_neon(float* a) { + const float32x4_t vec_swap_sign = vld1q_f32((float32_t*)k_swap_sign); + int j, k2; + + for (k2 = 0, j = 0; j < 128; j += 16, k2 += 4) { + float32x4_t a00v = vld1q_f32(&a[j + 0]); + float32x4_t a04v = vld1q_f32(&a[j + 4]); + float32x4_t a08v = vld1q_f32(&a[j + 8]); + float32x4_t a12v = vld1q_f32(&a[j + 12]); + float32x4_t a01v = vcombine_f32(vget_low_f32(a00v), vget_low_f32(a08v)); + float32x4_t a23v = vcombine_f32(vget_high_f32(a00v), vget_high_f32(a08v)); + float32x4_t a45v = vcombine_f32(vget_low_f32(a04v), vget_low_f32(a12v)); + float32x4_t a67v = vcombine_f32(vget_high_f32(a04v), vget_high_f32(a12v)); + const float32x4_t wk1rv = vld1q_f32(&rdft_wk1r[k2]); + const float32x4_t wk1iv = vld1q_f32(&rdft_wk1i[k2]); + const float32x4_t wk2rv = vld1q_f32(&rdft_wk2r[k2]); + const float32x4_t wk2iv = vld1q_f32(&rdft_wk2i[k2]); + const float32x4_t wk3rv = vld1q_f32(&rdft_wk3r[k2]); + const float32x4_t wk3iv = vld1q_f32(&rdft_wk3i[k2]); + float32x4_t x0v = vaddq_f32(a01v, a23v); + const float32x4_t x1v = vsubq_f32(a01v, a23v); + const float32x4_t x2v = vaddq_f32(a45v, a67v); + const float32x4_t x3v = vsubq_f32(a45v, a67v); + const float32x4_t x3w = vrev64q_f32(x3v); + float32x4_t x0w; + a01v = vaddq_f32(x0v, x2v); + x0v = vsubq_f32(x0v, x2v); + x0w = vrev64q_f32(x0v); + a45v = vmulq_f32(wk2rv, x0v); + a45v = vmlaq_f32(a45v, wk2iv, x0w); + x0v = vmlaq_f32(x1v, x3w, vec_swap_sign); + x0w = vrev64q_f32(x0v); + a23v = vmulq_f32(wk1rv, x0v); + a23v = vmlaq_f32(a23v, wk1iv, x0w); + x0v = vmlsq_f32(x1v, x3w, vec_swap_sign); + x0w = vrev64q_f32(x0v); + a67v = vmulq_f32(wk3rv, x0v); + a67v = vmlaq_f32(a67v, wk3iv, x0w); + a00v = vcombine_f32(vget_low_f32(a01v), vget_low_f32(a23v)); + a04v = vcombine_f32(vget_low_f32(a45v), vget_low_f32(a67v)); + a08v = vcombine_f32(vget_high_f32(a01v), vget_high_f32(a23v)); + a12v = vcombine_f32(vget_high_f32(a45v), vget_high_f32(a67v)); + vst1q_f32(&a[j + 0], a00v); + vst1q_f32(&a[j + 4], a04v); + vst1q_f32(&a[j + 8], a08v); + vst1q_f32(&a[j + 12], a12v); + } +} + +static void cftmdl_128_neon(float* a) { + int j; + const int l = 8; + const float32x4_t vec_swap_sign = vld1q_f32((float32_t*)k_swap_sign); + float32x4_t wk1rv = vld1q_f32(cftmdl_wk1r); + + for (j = 0; j < l; j += 2) { + const float32x2_t a_00 = vld1_f32(&a[j + 0]); + const float32x2_t a_08 = vld1_f32(&a[j + 8]); + const float32x2_t a_32 = vld1_f32(&a[j + 32]); + const float32x2_t a_40 = vld1_f32(&a[j + 40]); + const float32x4_t a_00_32 = vcombine_f32(a_00, a_32); + const float32x4_t a_08_40 = vcombine_f32(a_08, a_40); + const float32x4_t x0r0_0i0_0r1_x0i1 = vaddq_f32(a_00_32, a_08_40); + const float32x4_t x1r0_1i0_1r1_x1i1 = vsubq_f32(a_00_32, a_08_40); + const float32x2_t a_16 = vld1_f32(&a[j + 16]); + const float32x2_t a_24 = vld1_f32(&a[j + 24]); + const float32x2_t a_48 = vld1_f32(&a[j + 48]); + const float32x2_t a_56 = vld1_f32(&a[j + 56]); + const float32x4_t a_16_48 = vcombine_f32(a_16, a_48); + const float32x4_t a_24_56 = vcombine_f32(a_24, a_56); + const float32x4_t x2r0_2i0_2r1_x2i1 = vaddq_f32(a_16_48, a_24_56); + const float32x4_t x3r0_3i0_3r1_x3i1 = vsubq_f32(a_16_48, a_24_56); + const float32x4_t xx0 = vaddq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); + const float32x4_t xx1 = vsubq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); + const float32x4_t x3i0_3r0_3i1_x3r1 = vrev64q_f32(x3r0_3i0_3r1_x3i1); + const float32x4_t x1_x3_add = + vmlaq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1); + const float32x4_t x1_x3_sub = + vmlsq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1); + const float32x2_t yy0_a = vdup_lane_f32(vget_high_f32(x1_x3_add), 0); + const float32x2_t yy0_s = vdup_lane_f32(vget_high_f32(x1_x3_sub), 0); + const float32x4_t yy0_as = vcombine_f32(yy0_a, yy0_s); + const float32x2_t yy1_a = vdup_lane_f32(vget_high_f32(x1_x3_add), 1); + const float32x2_t yy1_s = vdup_lane_f32(vget_high_f32(x1_x3_sub), 1); + const float32x4_t yy1_as = vcombine_f32(yy1_a, yy1_s); + const float32x4_t yy0 = vmlaq_f32(yy0_as, vec_swap_sign, yy1_as); + const float32x4_t yy4 = vmulq_f32(wk1rv, yy0); + const float32x4_t xx1_rev = vrev64q_f32(xx1); + const float32x4_t yy4_rev = vrev64q_f32(yy4); + + vst1_f32(&a[j + 0], vget_low_f32(xx0)); + vst1_f32(&a[j + 32], vget_high_f32(xx0)); + vst1_f32(&a[j + 16], vget_low_f32(xx1)); + vst1_f32(&a[j + 48], vget_high_f32(xx1_rev)); + + a[j + 48] = -a[j + 48]; + + vst1_f32(&a[j + 8], vget_low_f32(x1_x3_add)); + vst1_f32(&a[j + 24], vget_low_f32(x1_x3_sub)); + vst1_f32(&a[j + 40], vget_low_f32(yy4)); + vst1_f32(&a[j + 56], vget_high_f32(yy4_rev)); + } + + { + const int k = 64; + const int k1 = 2; + const int k2 = 2 * k1; + const float32x4_t wk2rv = vld1q_f32(&rdft_wk2r[k2 + 0]); + const float32x4_t wk2iv = vld1q_f32(&rdft_wk2i[k2 + 0]); + const float32x4_t wk1iv = vld1q_f32(&rdft_wk1i[k2 + 0]); + const float32x4_t wk3rv = vld1q_f32(&rdft_wk3r[k2 + 0]); + const float32x4_t wk3iv = vld1q_f32(&rdft_wk3i[k2 + 0]); + wk1rv = vld1q_f32(&rdft_wk1r[k2 + 0]); + for (j = k; j < l + k; j += 2) { + const float32x2_t a_00 = vld1_f32(&a[j + 0]); + const float32x2_t a_08 = vld1_f32(&a[j + 8]); + const float32x2_t a_32 = vld1_f32(&a[j + 32]); + const float32x2_t a_40 = vld1_f32(&a[j + 40]); + const float32x4_t a_00_32 = vcombine_f32(a_00, a_32); + const float32x4_t a_08_40 = vcombine_f32(a_08, a_40); + const float32x4_t x0r0_0i0_0r1_x0i1 = vaddq_f32(a_00_32, a_08_40); + const float32x4_t x1r0_1i0_1r1_x1i1 = vsubq_f32(a_00_32, a_08_40); + const float32x2_t a_16 = vld1_f32(&a[j + 16]); + const float32x2_t a_24 = vld1_f32(&a[j + 24]); + const float32x2_t a_48 = vld1_f32(&a[j + 48]); + const float32x2_t a_56 = vld1_f32(&a[j + 56]); + const float32x4_t a_16_48 = vcombine_f32(a_16, a_48); + const float32x4_t a_24_56 = vcombine_f32(a_24, a_56); + const float32x4_t x2r0_2i0_2r1_x2i1 = vaddq_f32(a_16_48, a_24_56); + const float32x4_t x3r0_3i0_3r1_x3i1 = vsubq_f32(a_16_48, a_24_56); + const float32x4_t xx = vaddq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); + const float32x4_t xx1 = vsubq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); + const float32x4_t x3i0_3r0_3i1_x3r1 = vrev64q_f32(x3r0_3i0_3r1_x3i1); + const float32x4_t x1_x3_add = + vmlaq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1); + const float32x4_t x1_x3_sub = + vmlsq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1); + float32x4_t xx4 = vmulq_f32(wk2rv, xx1); + float32x4_t xx12 = vmulq_f32(wk1rv, x1_x3_add); + float32x4_t xx22 = vmulq_f32(wk3rv, x1_x3_sub); + xx4 = vmlaq_f32(xx4, wk2iv, vrev64q_f32(xx1)); + xx12 = vmlaq_f32(xx12, wk1iv, vrev64q_f32(x1_x3_add)); + xx22 = vmlaq_f32(xx22, wk3iv, vrev64q_f32(x1_x3_sub)); + + vst1_f32(&a[j + 0], vget_low_f32(xx)); + vst1_f32(&a[j + 32], vget_high_f32(xx)); + vst1_f32(&a[j + 16], vget_low_f32(xx4)); + vst1_f32(&a[j + 48], vget_high_f32(xx4)); + vst1_f32(&a[j + 8], vget_low_f32(xx12)); + vst1_f32(&a[j + 40], vget_high_f32(xx12)); + vst1_f32(&a[j + 24], vget_low_f32(xx22)); + vst1_f32(&a[j + 56], vget_high_f32(xx22)); + } + } +} + +__inline static float32x4_t reverse_order_f32x4(float32x4_t in) { + // A B C D -> C D A B + const float32x4_t rev = vcombine_f32(vget_high_f32(in), vget_low_f32(in)); + // C D A B -> D C B A + return vrev64q_f32(rev); +} + +static void rftfsub_128_neon(float* a) { + const float* c = rdft_w + 32; + int j1, j2; + const float32x4_t mm_half = vdupq_n_f32(0.5f); + + // Vectorized code (four at once). + // Note: commented number are indexes for the first iteration of the loop. + for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) { + // Load 'wk'. + const float32x4_t c_j1 = vld1q_f32(&c[j1]); // 1, 2, 3, 4, + const float32x4_t c_k1 = vld1q_f32(&c[29 - j1]); // 28, 29, 30, 31, + const float32x4_t wkrt = vsubq_f32(mm_half, c_k1); // 28, 29, 30, 31, + const float32x4_t wkr_ = reverse_order_f32x4(wkrt); // 31, 30, 29, 28, + const float32x4_t wki_ = c_j1; // 1, 2, 3, 4, + // Load and shuffle 'a'. + // 2, 4, 6, 8, 3, 5, 7, 9 + float32x4x2_t a_j2_p = vld2q_f32(&a[0 + j2]); + // 120, 122, 124, 126, 121, 123, 125, 127, + const float32x4x2_t k2_0_4 = vld2q_f32(&a[122 - j2]); + // 126, 124, 122, 120 + const float32x4_t a_k2_p0 = reverse_order_f32x4(k2_0_4.val[0]); + // 127, 125, 123, 121 + const float32x4_t a_k2_p1 = reverse_order_f32x4(k2_0_4.val[1]); + // Calculate 'x'. + const float32x4_t xr_ = vsubq_f32(a_j2_p.val[0], a_k2_p0); + // 2-126, 4-124, 6-122, 8-120, + const float32x4_t xi_ = vaddq_f32(a_j2_p.val[1], a_k2_p1); + // 3-127, 5-125, 7-123, 9-121, + // Calculate product into 'y'. + // yr = wkr * xr - wki * xi; + // yi = wkr * xi + wki * xr; + const float32x4_t a_ = vmulq_f32(wkr_, xr_); + const float32x4_t b_ = vmulq_f32(wki_, xi_); + const float32x4_t c_ = vmulq_f32(wkr_, xi_); + const float32x4_t d_ = vmulq_f32(wki_, xr_); + const float32x4_t yr_ = vsubq_f32(a_, b_); // 2-126, 4-124, 6-122, 8-120, + const float32x4_t yi_ = vaddq_f32(c_, d_); // 3-127, 5-125, 7-123, 9-121, + // Update 'a'. + // a[j2 + 0] -= yr; + // a[j2 + 1] -= yi; + // a[k2 + 0] += yr; + // a[k2 + 1] -= yi; + // 126, 124, 122, 120, + const float32x4_t a_k2_p0n = vaddq_f32(a_k2_p0, yr_); + // 127, 125, 123, 121, + const float32x4_t a_k2_p1n = vsubq_f32(a_k2_p1, yi_); + // Shuffle in right order and store. + const float32x4_t a_k2_p0nr = vrev64q_f32(a_k2_p0n); + const float32x4_t a_k2_p1nr = vrev64q_f32(a_k2_p1n); + // 124, 125, 126, 127, 120, 121, 122, 123 + const float32x4x2_t a_k2_n = vzipq_f32(a_k2_p0nr, a_k2_p1nr); + // 2, 4, 6, 8, + a_j2_p.val[0] = vsubq_f32(a_j2_p.val[0], yr_); + // 3, 5, 7, 9, + a_j2_p.val[1] = vsubq_f32(a_j2_p.val[1], yi_); + // 2, 3, 4, 5, 6, 7, 8, 9, + vst2q_f32(&a[0 + j2], a_j2_p); + + vst1q_f32(&a[122 - j2], a_k2_n.val[1]); + vst1q_f32(&a[126 - j2], a_k2_n.val[0]); + } + + // Scalar code for the remaining items. + for (; j2 < 64; j1 += 1, j2 += 2) { + const int k2 = 128 - j2; + const int k1 = 32 - j1; + const float wkr = 0.5f - c[k1]; + const float wki = c[j1]; + const float xr = a[j2 + 0] - a[k2 + 0]; + const float xi = a[j2 + 1] + a[k2 + 1]; + const float yr = wkr * xr - wki * xi; + const float yi = wkr * xi + wki * xr; + a[j2 + 0] -= yr; + a[j2 + 1] -= yi; + a[k2 + 0] += yr; + a[k2 + 1] -= yi; + } +} + +static void rftbsub_128_neon(float* a) { + const float* c = rdft_w + 32; + int j1, j2; + const float32x4_t mm_half = vdupq_n_f32(0.5f); + + a[1] = -a[1]; + // Vectorized code (four at once). + // Note: commented number are indexes for the first iteration of the loop. + for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) { + // Load 'wk'. + const float32x4_t c_j1 = vld1q_f32(&c[j1]); // 1, 2, 3, 4, + const float32x4_t c_k1 = vld1q_f32(&c[29 - j1]); // 28, 29, 30, 31, + const float32x4_t wkrt = vsubq_f32(mm_half, c_k1); // 28, 29, 30, 31, + const float32x4_t wkr_ = reverse_order_f32x4(wkrt); // 31, 30, 29, 28, + const float32x4_t wki_ = c_j1; // 1, 2, 3, 4, + // Load and shuffle 'a'. + // 2, 4, 6, 8, 3, 5, 7, 9 + float32x4x2_t a_j2_p = vld2q_f32(&a[0 + j2]); + // 120, 122, 124, 126, 121, 123, 125, 127, + const float32x4x2_t k2_0_4 = vld2q_f32(&a[122 - j2]); + // 126, 124, 122, 120 + const float32x4_t a_k2_p0 = reverse_order_f32x4(k2_0_4.val[0]); + // 127, 125, 123, 121 + const float32x4_t a_k2_p1 = reverse_order_f32x4(k2_0_4.val[1]); + // Calculate 'x'. + const float32x4_t xr_ = vsubq_f32(a_j2_p.val[0], a_k2_p0); + // 2-126, 4-124, 6-122, 8-120, + const float32x4_t xi_ = vaddq_f32(a_j2_p.val[1], a_k2_p1); + // 3-127, 5-125, 7-123, 9-121, + // Calculate product into 'y'. + // yr = wkr * xr - wki * xi; + // yi = wkr * xi + wki * xr; + const float32x4_t a_ = vmulq_f32(wkr_, xr_); + const float32x4_t b_ = vmulq_f32(wki_, xi_); + const float32x4_t c_ = vmulq_f32(wkr_, xi_); + const float32x4_t d_ = vmulq_f32(wki_, xr_); + const float32x4_t yr_ = vaddq_f32(a_, b_); // 2-126, 4-124, 6-122, 8-120, + const float32x4_t yi_ = vsubq_f32(c_, d_); // 3-127, 5-125, 7-123, 9-121, + // Update 'a'. + // a[j2 + 0] -= yr; + // a[j2 + 1] -= yi; + // a[k2 + 0] += yr; + // a[k2 + 1] -= yi; + // 126, 124, 122, 120, + const float32x4_t a_k2_p0n = vaddq_f32(a_k2_p0, yr_); + // 127, 125, 123, 121, + const float32x4_t a_k2_p1n = vsubq_f32(yi_, a_k2_p1); + // Shuffle in right order and store. + // 2, 3, 4, 5, 6, 7, 8, 9, + const float32x4_t a_k2_p0nr = vrev64q_f32(a_k2_p0n); + const float32x4_t a_k2_p1nr = vrev64q_f32(a_k2_p1n); + // 124, 125, 126, 127, 120, 121, 122, 123 + const float32x4x2_t a_k2_n = vzipq_f32(a_k2_p0nr, a_k2_p1nr); + // 2, 4, 6, 8, + a_j2_p.val[0] = vsubq_f32(a_j2_p.val[0], yr_); + // 3, 5, 7, 9, + a_j2_p.val[1] = vsubq_f32(yi_, a_j2_p.val[1]); + // 2, 3, 4, 5, 6, 7, 8, 9, + vst2q_f32(&a[0 + j2], a_j2_p); + + vst1q_f32(&a[122 - j2], a_k2_n.val[1]); + vst1q_f32(&a[126 - j2], a_k2_n.val[0]); + } + + // Scalar code for the remaining items. + for (; j2 < 64; j1 += 1, j2 += 2) { + const int k2 = 128 - j2; + const int k1 = 32 - j1; + const float wkr = 0.5f - c[k1]; + const float wki = c[j1]; + const float xr = a[j2 + 0] - a[k2 + 0]; + const float xi = a[j2 + 1] + a[k2 + 1]; + const float yr = wkr * xr + wki * xi; + const float yi = wkr * xi - wki * xr; + a[j2 + 0] = a[j2 + 0] - yr; + a[j2 + 1] = yi - a[j2 + 1]; + a[k2 + 0] = yr + a[k2 + 0]; + a[k2 + 1] = yi - a[k2 + 1]; + } + a[65] = -a[65]; +} + +void aec_rdft_init_neon(void) { + cft1st_128 = cft1st_128_neon; + cftmdl_128 = cftmdl_128_neon; + rftfsub_128 = rftfsub_128_neon; + rftbsub_128 = rftbsub_128_neon; +} + diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_sse2.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_sse2.c new file mode 100644 index 00000000..b4e453ff --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_sse2.c @@ -0,0 +1,427 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/aec/aec_rdft.h" + +#include <emmintrin.h> + +static const ALIGN16_BEG float ALIGN16_END + k_swap_sign[4] = {-1.f, 1.f, -1.f, 1.f}; + +static void cft1st_128_SSE2(float* a) { + const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign); + int j, k2; + + for (k2 = 0, j = 0; j < 128; j += 16, k2 += 4) { + __m128 a00v = _mm_loadu_ps(&a[j + 0]); + __m128 a04v = _mm_loadu_ps(&a[j + 4]); + __m128 a08v = _mm_loadu_ps(&a[j + 8]); + __m128 a12v = _mm_loadu_ps(&a[j + 12]); + __m128 a01v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(1, 0, 1, 0)); + __m128 a23v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(3, 2, 3, 2)); + __m128 a45v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(1, 0, 1, 0)); + __m128 a67v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(3, 2, 3, 2)); + + const __m128 wk1rv = _mm_load_ps(&rdft_wk1r[k2]); + const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2]); + const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2]); + const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2]); + const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2]); + const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2]); + __m128 x0v = _mm_add_ps(a01v, a23v); + const __m128 x1v = _mm_sub_ps(a01v, a23v); + const __m128 x2v = _mm_add_ps(a45v, a67v); + const __m128 x3v = _mm_sub_ps(a45v, a67v); + __m128 x0w; + a01v = _mm_add_ps(x0v, x2v); + x0v = _mm_sub_ps(x0v, x2v); + x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1)); + { + const __m128 a45_0v = _mm_mul_ps(wk2rv, x0v); + const __m128 a45_1v = _mm_mul_ps(wk2iv, x0w); + a45v = _mm_add_ps(a45_0v, a45_1v); + } + { + __m128 a23_0v, a23_1v; + const __m128 x3w = _mm_shuffle_ps(x3v, x3v, _MM_SHUFFLE(2, 3, 0, 1)); + const __m128 x3s = _mm_mul_ps(mm_swap_sign, x3w); + x0v = _mm_add_ps(x1v, x3s); + x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1)); + a23_0v = _mm_mul_ps(wk1rv, x0v); + a23_1v = _mm_mul_ps(wk1iv, x0w); + a23v = _mm_add_ps(a23_0v, a23_1v); + + x0v = _mm_sub_ps(x1v, x3s); + x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1)); + } + { + const __m128 a67_0v = _mm_mul_ps(wk3rv, x0v); + const __m128 a67_1v = _mm_mul_ps(wk3iv, x0w); + a67v = _mm_add_ps(a67_0v, a67_1v); + } + + a00v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(1, 0, 1, 0)); + a04v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(1, 0, 1, 0)); + a08v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(3, 2, 3, 2)); + a12v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(3, 2, 3, 2)); + _mm_storeu_ps(&a[j + 0], a00v); + _mm_storeu_ps(&a[j + 4], a04v); + _mm_storeu_ps(&a[j + 8], a08v); + _mm_storeu_ps(&a[j + 12], a12v); + } +} + +static void cftmdl_128_SSE2(float* a) { + const int l = 8; + const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign); + int j0; + + __m128 wk1rv = _mm_load_ps(cftmdl_wk1r); + for (j0 = 0; j0 < l; j0 += 2) { + const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]); + const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]); + const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]); + const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]); + const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00), + _mm_castsi128_ps(a_32), + _MM_SHUFFLE(1, 0, 1, 0)); + const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08), + _mm_castsi128_ps(a_40), + _MM_SHUFFLE(1, 0, 1, 0)); + __m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40); + const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40); + + const __m128i a_16 = _mm_loadl_epi64((__m128i*)&a[j0 + 16]); + const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]); + const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]); + const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]); + const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16), + _mm_castsi128_ps(a_48), + _MM_SHUFFLE(1, 0, 1, 0)); + const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24), + _mm_castsi128_ps(a_56), + _MM_SHUFFLE(1, 0, 1, 0)); + const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56); + const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56); + + const __m128 xx0 = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); + const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); + + const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(_mm_shuffle_epi32( + _mm_castps_si128(x3r0_3i0_3r1_x3i1), _MM_SHUFFLE(2, 3, 0, 1))); + const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1); + const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped); + const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped); + + const __m128 yy0 = + _mm_shuffle_ps(x1_x3_add, x1_x3_sub, _MM_SHUFFLE(2, 2, 2, 2)); + const __m128 yy1 = + _mm_shuffle_ps(x1_x3_add, x1_x3_sub, _MM_SHUFFLE(3, 3, 3, 3)); + const __m128 yy2 = _mm_mul_ps(mm_swap_sign, yy1); + const __m128 yy3 = _mm_add_ps(yy0, yy2); + const __m128 yy4 = _mm_mul_ps(wk1rv, yy3); + + _mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx0)); + _mm_storel_epi64( + (__m128i*)&a[j0 + 32], + _mm_shuffle_epi32(_mm_castps_si128(xx0), _MM_SHUFFLE(3, 2, 3, 2))); + + _mm_storel_epi64((__m128i*)&a[j0 + 16], _mm_castps_si128(xx1)); + _mm_storel_epi64( + (__m128i*)&a[j0 + 48], + _mm_shuffle_epi32(_mm_castps_si128(xx1), _MM_SHUFFLE(2, 3, 2, 3))); + a[j0 + 48] = -a[j0 + 48]; + + _mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(x1_x3_add)); + _mm_storel_epi64((__m128i*)&a[j0 + 24], _mm_castps_si128(x1_x3_sub)); + + _mm_storel_epi64((__m128i*)&a[j0 + 40], _mm_castps_si128(yy4)); + _mm_storel_epi64( + (__m128i*)&a[j0 + 56], + _mm_shuffle_epi32(_mm_castps_si128(yy4), _MM_SHUFFLE(2, 3, 2, 3))); + } + + { + int k = 64; + int k1 = 2; + int k2 = 2 * k1; + const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2 + 0]); + const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2 + 0]); + const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2 + 0]); + const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2 + 0]); + const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2 + 0]); + wk1rv = _mm_load_ps(&rdft_wk1r[k2 + 0]); + for (j0 = k; j0 < l + k; j0 += 2) { + const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]); + const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]); + const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]); + const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]); + const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00), + _mm_castsi128_ps(a_32), + _MM_SHUFFLE(1, 0, 1, 0)); + const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08), + _mm_castsi128_ps(a_40), + _MM_SHUFFLE(1, 0, 1, 0)); + __m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40); + const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40); + + const __m128i a_16 = _mm_loadl_epi64((__m128i*)&a[j0 + 16]); + const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]); + const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]); + const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]); + const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16), + _mm_castsi128_ps(a_48), + _MM_SHUFFLE(1, 0, 1, 0)); + const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24), + _mm_castsi128_ps(a_56), + _MM_SHUFFLE(1, 0, 1, 0)); + const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56); + const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56); + + const __m128 xx = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); + const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); + const __m128 xx2 = _mm_mul_ps(xx1, wk2rv); + const __m128 xx3 = + _mm_mul_ps(wk2iv, + _mm_castsi128_ps(_mm_shuffle_epi32( + _mm_castps_si128(xx1), _MM_SHUFFLE(2, 3, 0, 1)))); + const __m128 xx4 = _mm_add_ps(xx2, xx3); + + const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(_mm_shuffle_epi32( + _mm_castps_si128(x3r0_3i0_3r1_x3i1), _MM_SHUFFLE(2, 3, 0, 1))); + const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1); + const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped); + const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped); + + const __m128 xx10 = _mm_mul_ps(x1_x3_add, wk1rv); + const __m128 xx11 = _mm_mul_ps( + wk1iv, + _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_add), + _MM_SHUFFLE(2, 3, 0, 1)))); + const __m128 xx12 = _mm_add_ps(xx10, xx11); + + const __m128 xx20 = _mm_mul_ps(x1_x3_sub, wk3rv); + const __m128 xx21 = _mm_mul_ps( + wk3iv, + _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_sub), + _MM_SHUFFLE(2, 3, 0, 1)))); + const __m128 xx22 = _mm_add_ps(xx20, xx21); + + _mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx)); + _mm_storel_epi64( + (__m128i*)&a[j0 + 32], + _mm_shuffle_epi32(_mm_castps_si128(xx), _MM_SHUFFLE(3, 2, 3, 2))); + + _mm_storel_epi64((__m128i*)&a[j0 + 16], _mm_castps_si128(xx4)); + _mm_storel_epi64( + (__m128i*)&a[j0 + 48], + _mm_shuffle_epi32(_mm_castps_si128(xx4), _MM_SHUFFLE(3, 2, 3, 2))); + + _mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(xx12)); + _mm_storel_epi64( + (__m128i*)&a[j0 + 40], + _mm_shuffle_epi32(_mm_castps_si128(xx12), _MM_SHUFFLE(3, 2, 3, 2))); + + _mm_storel_epi64((__m128i*)&a[j0 + 24], _mm_castps_si128(xx22)); + _mm_storel_epi64( + (__m128i*)&a[j0 + 56], + _mm_shuffle_epi32(_mm_castps_si128(xx22), _MM_SHUFFLE(3, 2, 3, 2))); + } + } +} + +static void rftfsub_128_SSE2(float* a) { + const float* c = rdft_w + 32; + int j1, j2, k1, k2; + float wkr, wki, xr, xi, yr, yi; + + static const ALIGN16_BEG float ALIGN16_END + k_half[4] = {0.5f, 0.5f, 0.5f, 0.5f}; + const __m128 mm_half = _mm_load_ps(k_half); + + // Vectorized code (four at once). + // Note: commented number are indexes for the first iteration of the loop. + for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) { + // Load 'wk'. + const __m128 c_j1 = _mm_loadu_ps(&c[j1]); // 1, 2, 3, 4, + const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]); // 28, 29, 30, 31, + const __m128 wkrt = _mm_sub_ps(mm_half, c_k1); // 28, 29, 30, 31, + const __m128 wkr_ = + _mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3)); // 31, 30, 29, 28, + const __m128 wki_ = c_j1; // 1, 2, 3, 4, + // Load and shuffle 'a'. + const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]); // 2, 3, 4, 5, + const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]); // 6, 7, 8, 9, + const __m128 a_k2_0 = _mm_loadu_ps(&a[122 - j2]); // 120, 121, 122, 123, + const __m128 a_k2_4 = _mm_loadu_ps(&a[126 - j2]); // 124, 125, 126, 127, + const __m128 a_j2_p0 = _mm_shuffle_ps( + a_j2_0, a_j2_4, _MM_SHUFFLE(2, 0, 2, 0)); // 2, 4, 6, 8, + const __m128 a_j2_p1 = _mm_shuffle_ps( + a_j2_0, a_j2_4, _MM_SHUFFLE(3, 1, 3, 1)); // 3, 5, 7, 9, + const __m128 a_k2_p0 = _mm_shuffle_ps( + a_k2_4, a_k2_0, _MM_SHUFFLE(0, 2, 0, 2)); // 126, 124, 122, 120, + const __m128 a_k2_p1 = _mm_shuffle_ps( + a_k2_4, a_k2_0, _MM_SHUFFLE(1, 3, 1, 3)); // 127, 125, 123, 121, + // Calculate 'x'. + const __m128 xr_ = _mm_sub_ps(a_j2_p0, a_k2_p0); + // 2-126, 4-124, 6-122, 8-120, + const __m128 xi_ = _mm_add_ps(a_j2_p1, a_k2_p1); + // 3-127, 5-125, 7-123, 9-121, + // Calculate product into 'y'. + // yr = wkr * xr - wki * xi; + // yi = wkr * xi + wki * xr; + const __m128 a_ = _mm_mul_ps(wkr_, xr_); + const __m128 b_ = _mm_mul_ps(wki_, xi_); + const __m128 c_ = _mm_mul_ps(wkr_, xi_); + const __m128 d_ = _mm_mul_ps(wki_, xr_); + const __m128 yr_ = _mm_sub_ps(a_, b_); // 2-126, 4-124, 6-122, 8-120, + const __m128 yi_ = _mm_add_ps(c_, d_); // 3-127, 5-125, 7-123, 9-121, + // Update 'a'. + // a[j2 + 0] -= yr; + // a[j2 + 1] -= yi; + // a[k2 + 0] += yr; + // a[k2 + 1] -= yi; + const __m128 a_j2_p0n = _mm_sub_ps(a_j2_p0, yr_); // 2, 4, 6, 8, + const __m128 a_j2_p1n = _mm_sub_ps(a_j2_p1, yi_); // 3, 5, 7, 9, + const __m128 a_k2_p0n = _mm_add_ps(a_k2_p0, yr_); // 126, 124, 122, 120, + const __m128 a_k2_p1n = _mm_sub_ps(a_k2_p1, yi_); // 127, 125, 123, 121, + // Shuffle in right order and store. + const __m128 a_j2_0n = _mm_unpacklo_ps(a_j2_p0n, a_j2_p1n); + // 2, 3, 4, 5, + const __m128 a_j2_4n = _mm_unpackhi_ps(a_j2_p0n, a_j2_p1n); + // 6, 7, 8, 9, + const __m128 a_k2_0nt = _mm_unpackhi_ps(a_k2_p0n, a_k2_p1n); + // 122, 123, 120, 121, + const __m128 a_k2_4nt = _mm_unpacklo_ps(a_k2_p0n, a_k2_p1n); + // 126, 127, 124, 125, + const __m128 a_k2_0n = _mm_shuffle_ps( + a_k2_0nt, a_k2_0nt, _MM_SHUFFLE(1, 0, 3, 2)); // 120, 121, 122, 123, + const __m128 a_k2_4n = _mm_shuffle_ps( + a_k2_4nt, a_k2_4nt, _MM_SHUFFLE(1, 0, 3, 2)); // 124, 125, 126, 127, + _mm_storeu_ps(&a[0 + j2], a_j2_0n); + _mm_storeu_ps(&a[4 + j2], a_j2_4n); + _mm_storeu_ps(&a[122 - j2], a_k2_0n); + _mm_storeu_ps(&a[126 - j2], a_k2_4n); + } + // Scalar code for the remaining items. + for (; j2 < 64; j1 += 1, j2 += 2) { + k2 = 128 - j2; + k1 = 32 - j1; + wkr = 0.5f - c[k1]; + wki = c[j1]; + xr = a[j2 + 0] - a[k2 + 0]; + xi = a[j2 + 1] + a[k2 + 1]; + yr = wkr * xr - wki * xi; + yi = wkr * xi + wki * xr; + a[j2 + 0] -= yr; + a[j2 + 1] -= yi; + a[k2 + 0] += yr; + a[k2 + 1] -= yi; + } +} + +static void rftbsub_128_SSE2(float* a) { + const float* c = rdft_w + 32; + int j1, j2, k1, k2; + float wkr, wki, xr, xi, yr, yi; + + static const ALIGN16_BEG float ALIGN16_END + k_half[4] = {0.5f, 0.5f, 0.5f, 0.5f}; + const __m128 mm_half = _mm_load_ps(k_half); + + a[1] = -a[1]; + // Vectorized code (four at once). + // Note: commented number are indexes for the first iteration of the loop. + for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) { + // Load 'wk'. + const __m128 c_j1 = _mm_loadu_ps(&c[j1]); // 1, 2, 3, 4, + const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]); // 28, 29, 30, 31, + const __m128 wkrt = _mm_sub_ps(mm_half, c_k1); // 28, 29, 30, 31, + const __m128 wkr_ = + _mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3)); // 31, 30, 29, 28, + const __m128 wki_ = c_j1; // 1, 2, 3, 4, + // Load and shuffle 'a'. + const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]); // 2, 3, 4, 5, + const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]); // 6, 7, 8, 9, + const __m128 a_k2_0 = _mm_loadu_ps(&a[122 - j2]); // 120, 121, 122, 123, + const __m128 a_k2_4 = _mm_loadu_ps(&a[126 - j2]); // 124, 125, 126, 127, + const __m128 a_j2_p0 = _mm_shuffle_ps( + a_j2_0, a_j2_4, _MM_SHUFFLE(2, 0, 2, 0)); // 2, 4, 6, 8, + const __m128 a_j2_p1 = _mm_shuffle_ps( + a_j2_0, a_j2_4, _MM_SHUFFLE(3, 1, 3, 1)); // 3, 5, 7, 9, + const __m128 a_k2_p0 = _mm_shuffle_ps( + a_k2_4, a_k2_0, _MM_SHUFFLE(0, 2, 0, 2)); // 126, 124, 122, 120, + const __m128 a_k2_p1 = _mm_shuffle_ps( + a_k2_4, a_k2_0, _MM_SHUFFLE(1, 3, 1, 3)); // 127, 125, 123, 121, + // Calculate 'x'. + const __m128 xr_ = _mm_sub_ps(a_j2_p0, a_k2_p0); + // 2-126, 4-124, 6-122, 8-120, + const __m128 xi_ = _mm_add_ps(a_j2_p1, a_k2_p1); + // 3-127, 5-125, 7-123, 9-121, + // Calculate product into 'y'. + // yr = wkr * xr + wki * xi; + // yi = wkr * xi - wki * xr; + const __m128 a_ = _mm_mul_ps(wkr_, xr_); + const __m128 b_ = _mm_mul_ps(wki_, xi_); + const __m128 c_ = _mm_mul_ps(wkr_, xi_); + const __m128 d_ = _mm_mul_ps(wki_, xr_); + const __m128 yr_ = _mm_add_ps(a_, b_); // 2-126, 4-124, 6-122, 8-120, + const __m128 yi_ = _mm_sub_ps(c_, d_); // 3-127, 5-125, 7-123, 9-121, + // Update 'a'. + // a[j2 + 0] = a[j2 + 0] - yr; + // a[j2 + 1] = yi - a[j2 + 1]; + // a[k2 + 0] = yr + a[k2 + 0]; + // a[k2 + 1] = yi - a[k2 + 1]; + const __m128 a_j2_p0n = _mm_sub_ps(a_j2_p0, yr_); // 2, 4, 6, 8, + const __m128 a_j2_p1n = _mm_sub_ps(yi_, a_j2_p1); // 3, 5, 7, 9, + const __m128 a_k2_p0n = _mm_add_ps(a_k2_p0, yr_); // 126, 124, 122, 120, + const __m128 a_k2_p1n = _mm_sub_ps(yi_, a_k2_p1); // 127, 125, 123, 121, + // Shuffle in right order and store. + const __m128 a_j2_0n = _mm_unpacklo_ps(a_j2_p0n, a_j2_p1n); + // 2, 3, 4, 5, + const __m128 a_j2_4n = _mm_unpackhi_ps(a_j2_p0n, a_j2_p1n); + // 6, 7, 8, 9, + const __m128 a_k2_0nt = _mm_unpackhi_ps(a_k2_p0n, a_k2_p1n); + // 122, 123, 120, 121, + const __m128 a_k2_4nt = _mm_unpacklo_ps(a_k2_p0n, a_k2_p1n); + // 126, 127, 124, 125, + const __m128 a_k2_0n = _mm_shuffle_ps( + a_k2_0nt, a_k2_0nt, _MM_SHUFFLE(1, 0, 3, 2)); // 120, 121, 122, 123, + const __m128 a_k2_4n = _mm_shuffle_ps( + a_k2_4nt, a_k2_4nt, _MM_SHUFFLE(1, 0, 3, 2)); // 124, 125, 126, 127, + _mm_storeu_ps(&a[0 + j2], a_j2_0n); + _mm_storeu_ps(&a[4 + j2], a_j2_4n); + _mm_storeu_ps(&a[122 - j2], a_k2_0n); + _mm_storeu_ps(&a[126 - j2], a_k2_4n); + } + // Scalar code for the remaining items. + for (; j2 < 64; j1 += 1, j2 += 2) { + k2 = 128 - j2; + k1 = 32 - j1; + wkr = 0.5f - c[k1]; + wki = c[j1]; + xr = a[j2 + 0] - a[k2 + 0]; + xi = a[j2 + 1] + a[k2 + 1]; + yr = wkr * xr + wki * xi; + yi = wkr * xi - wki * xr; + a[j2 + 0] = a[j2 + 0] - yr; + a[j2 + 1] = yi - a[j2 + 1]; + a[k2 + 0] = yr + a[k2 + 0]; + a[k2 + 1] = yi - a[k2 + 1]; + } + a[65] = -a[65]; +} + +void aec_rdft_init_sse2(void) { + cft1st_128 = cft1st_128_SSE2; + cftmdl_128 = cftmdl_128_SSE2; + rftfsub_128 = rftfsub_128_SSE2; + rftbsub_128 = rftbsub_128_SSE2; +} diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_resampler.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_resampler.c new file mode 100644 index 00000000..99c39efa --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_resampler.c @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* Resamples a signal to an arbitrary rate. Used by the AEC to compensate for + * clock skew by resampling the farend signal. + */ + +#include "webrtc/modules/audio_processing/aec/aec_resampler.h" + +#include <assert.h> +#include <math.h> +#include <stdlib.h> +#include <string.h> + +#include "webrtc/modules/audio_processing/aec/aec_core.h" + +enum { + kEstimateLengthFrames = 400 +}; + +typedef struct { + float buffer[kResamplerBufferSize]; + float position; + + int deviceSampleRateHz; + int skewData[kEstimateLengthFrames]; + int skewDataIndex; + float skewEstimate; +} AecResampler; + +static int EstimateSkew(const int* rawSkew, + int size, + int absLimit, + float* skewEst); + +void* WebRtcAec_CreateResampler() { + return malloc(sizeof(AecResampler)); +} + +int WebRtcAec_InitResampler(void* resampInst, int deviceSampleRateHz) { + AecResampler* obj = (AecResampler*)resampInst; + memset(obj->buffer, 0, sizeof(obj->buffer)); + obj->position = 0.0; + + obj->deviceSampleRateHz = deviceSampleRateHz; + memset(obj->skewData, 0, sizeof(obj->skewData)); + obj->skewDataIndex = 0; + obj->skewEstimate = 0.0; + + return 0; +} + +void WebRtcAec_FreeResampler(void* resampInst) { + AecResampler* obj = (AecResampler*)resampInst; + free(obj); +} + +void WebRtcAec_ResampleLinear(void* resampInst, + const float* inspeech, + size_t size, + float skew, + float* outspeech, + size_t* size_out) { + AecResampler* obj = (AecResampler*)resampInst; + + float* y; + float be, tnew; + size_t tn, mm; + + assert(size <= 2 * FRAME_LEN); + assert(resampInst != NULL); + assert(inspeech != NULL); + assert(outspeech != NULL); + assert(size_out != NULL); + + // Add new frame data in lookahead + memcpy(&obj->buffer[FRAME_LEN + kResamplingDelay], + inspeech, + size * sizeof(inspeech[0])); + + // Sample rate ratio + be = 1 + skew; + + // Loop over input frame + mm = 0; + y = &obj->buffer[FRAME_LEN]; // Point at current frame + + tnew = be * mm + obj->position; + tn = (size_t)tnew; + + while (tn < size) { + + // Interpolation + outspeech[mm] = y[tn] + (tnew - tn) * (y[tn + 1] - y[tn]); + mm++; + + tnew = be * mm + obj->position; + tn = (int)tnew; + } + + *size_out = mm; + obj->position += (*size_out) * be - size; + + // Shift buffer + memmove(obj->buffer, + &obj->buffer[size], + (kResamplerBufferSize - size) * sizeof(obj->buffer[0])); +} + +int WebRtcAec_GetSkew(void* resampInst, int rawSkew, float* skewEst) { + AecResampler* obj = (AecResampler*)resampInst; + int err = 0; + + if (obj->skewDataIndex < kEstimateLengthFrames) { + obj->skewData[obj->skewDataIndex] = rawSkew; + obj->skewDataIndex++; + } else if (obj->skewDataIndex == kEstimateLengthFrames) { + err = EstimateSkew( + obj->skewData, kEstimateLengthFrames, obj->deviceSampleRateHz, skewEst); + obj->skewEstimate = *skewEst; + obj->skewDataIndex++; + } else { + *skewEst = obj->skewEstimate; + } + + return err; +} + +int EstimateSkew(const int* rawSkew, + int size, + int deviceSampleRateHz, + float* skewEst) { + const int absLimitOuter = (int)(0.04f * deviceSampleRateHz); + const int absLimitInner = (int)(0.0025f * deviceSampleRateHz); + int i = 0; + int n = 0; + float rawAvg = 0; + float err = 0; + float rawAbsDev = 0; + int upperLimit = 0; + int lowerLimit = 0; + float cumSum = 0; + float x = 0; + float x2 = 0; + float y = 0; + float xy = 0; + float xAvg = 0; + float denom = 0; + float skew = 0; + + *skewEst = 0; // Set in case of error below. + for (i = 0; i < size; i++) { + if ((rawSkew[i] < absLimitOuter && rawSkew[i] > -absLimitOuter)) { + n++; + rawAvg += rawSkew[i]; + } + } + + if (n == 0) { + return -1; + } + assert(n > 0); + rawAvg /= n; + + for (i = 0; i < size; i++) { + if ((rawSkew[i] < absLimitOuter && rawSkew[i] > -absLimitOuter)) { + err = rawSkew[i] - rawAvg; + rawAbsDev += err >= 0 ? err : -err; + } + } + assert(n > 0); + rawAbsDev /= n; + upperLimit = (int)(rawAvg + 5 * rawAbsDev + 1); // +1 for ceiling. + lowerLimit = (int)(rawAvg - 5 * rawAbsDev - 1); // -1 for floor. + + n = 0; + for (i = 0; i < size; i++) { + if ((rawSkew[i] < absLimitInner && rawSkew[i] > -absLimitInner) || + (rawSkew[i] < upperLimit && rawSkew[i] > lowerLimit)) { + n++; + cumSum += rawSkew[i]; + x += n; + x2 += n * n; + y += cumSum; + xy += n * cumSum; + } + } + + if (n == 0) { + return -1; + } + assert(n > 0); + xAvg = x / n; + denom = x2 - xAvg * x; + + if (denom != 0) { + skew = (xy - xAvg * y) / denom; + } + + *skewEst = skew; + return 0; +} diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_resampler.h b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_resampler.h new file mode 100644 index 00000000..a5002c15 --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_resampler.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_RESAMPLER_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_RESAMPLER_H_ + +#include "webrtc/modules/audio_processing/aec/aec_core.h" + +enum { + kResamplingDelay = 1 +}; +enum { + kResamplerBufferSize = FRAME_LEN * 4 +}; + +// Unless otherwise specified, functions return 0 on success and -1 on error. +void* WebRtcAec_CreateResampler(); // Returns NULL on error. +int WebRtcAec_InitResampler(void* resampInst, int deviceSampleRateHz); +void WebRtcAec_FreeResampler(void* resampInst); + +// Estimates skew from raw measurement. +int WebRtcAec_GetSkew(void* resampInst, int rawSkew, float* skewEst); + +// Resamples input using linear interpolation. +void WebRtcAec_ResampleLinear(void* resampInst, + const float* inspeech, + size_t size, + float skew, + float* outspeech, + size_t* size_out); + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_RESAMPLER_H_ diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation.c new file mode 100644 index 00000000..0f5cd31d --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation.c @@ -0,0 +1,923 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * Contains the API functions for the AEC. + */ +#include "webrtc/modules/audio_processing/aec/include/echo_cancellation.h" + +#include <math.h> +#ifdef WEBRTC_AEC_DEBUG_DUMP +#include <stdio.h> +#endif +#include <stdlib.h> +#include <string.h> + +#include "webrtc/common_audio/ring_buffer.h" +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/modules/audio_processing/aec/aec_core.h" +#include "webrtc/modules/audio_processing/aec/aec_resampler.h" +#include "webrtc/modules/audio_processing/aec/echo_cancellation_internal.h" +#include "webrtc/typedefs.h" + +// Measured delays [ms] +// Device Chrome GTP +// MacBook Air 10 +// MacBook Retina 10 100 +// MacPro 30? +// +// Win7 Desktop 70 80? +// Win7 T430s 110 +// Win8 T420s 70 +// +// Daisy 50 +// Pixel (w/ preproc?) 240 +// Pixel (w/o preproc?) 110 110 + +// The extended filter mode gives us the flexibility to ignore the system's +// reported delays. We do this for platforms which we believe provide results +// which are incompatible with the AEC's expectations. Based on measurements +// (some provided above) we set a conservative (i.e. lower than measured) +// fixed delay. +// +// WEBRTC_UNTRUSTED_DELAY will only have an impact when |extended_filter_mode| +// is enabled. See the note along with |DelayCorrection| in +// echo_cancellation_impl.h for more details on the mode. +// +// Justification: +// Chromium/Mac: Here, the true latency is so low (~10-20 ms), that it plays +// havoc with the AEC's buffering. To avoid this, we set a fixed delay of 20 ms +// and then compensate by rewinding by 10 ms (in wideband) through +// kDelayDiffOffsetSamples. This trick does not seem to work for larger rewind +// values, but fortunately this is sufficient. +// +// Chromium/Linux(ChromeOS): The values we get on this platform don't correspond +// well to reality. The variance doesn't match the AEC's buffer changes, and the +// bulk values tend to be too low. However, the range across different hardware +// appears to be too large to choose a single value. +// +// GTP/Linux(ChromeOS): TBD, but for the moment we will trust the values. +#if defined(WEBRTC_CHROMIUM_BUILD) && defined(WEBRTC_MAC) +#define WEBRTC_UNTRUSTED_DELAY +#endif + +#if defined(WEBRTC_UNTRUSTED_DELAY) && defined(WEBRTC_MAC) +static const int kDelayDiffOffsetSamples = -160; +#else +// Not enabled for now. +static const int kDelayDiffOffsetSamples = 0; +#endif + +#if defined(WEBRTC_MAC) +static const int kFixedDelayMs = 20; +#else +static const int kFixedDelayMs = 50; +#endif +#if !defined(WEBRTC_UNTRUSTED_DELAY) +static const int kMinTrustedDelayMs = 20; +#endif +static const int kMaxTrustedDelayMs = 500; + +// Maximum length of resampled signal. Must be an integer multiple of frames +// (ceil(1/(1 + MIN_SKEW)*2) + 1)*FRAME_LEN +// The factor of 2 handles wb, and the + 1 is as a safety margin +// TODO(bjornv): Replace with kResamplerBufferSize +#define MAX_RESAMP_LEN (5 * FRAME_LEN) + +static const int kMaxBufSizeStart = 62; // In partitions +static const int sampMsNb = 8; // samples per ms in nb +static const int initCheck = 42; + +#ifdef WEBRTC_AEC_DEBUG_DUMP +int webrtc_aec_instance_count = 0; +#endif + +// Estimates delay to set the position of the far-end buffer read pointer +// (controlled by knownDelay) +static void EstBufDelayNormal(Aec* aecInst); +static void EstBufDelayExtended(Aec* aecInst); +static int ProcessNormal(Aec* self, + const float* const* near, + size_t num_bands, + float* const* out, + size_t num_samples, + int16_t reported_delay_ms, + int32_t skew); +static void ProcessExtended(Aec* self, + const float* const* near, + size_t num_bands, + float* const* out, + size_t num_samples, + int16_t reported_delay_ms, + int32_t skew); + +void* WebRtcAec_Create() { + Aec* aecpc = malloc(sizeof(Aec)); + + if (!aecpc) { + return NULL; + } + + aecpc->aec = WebRtcAec_CreateAec(); + if (!aecpc->aec) { + WebRtcAec_Free(aecpc); + return NULL; + } + aecpc->resampler = WebRtcAec_CreateResampler(); + if (!aecpc->resampler) { + WebRtcAec_Free(aecpc); + return NULL; + } + // Create far-end pre-buffer. The buffer size has to be large enough for + // largest possible drift compensation (kResamplerBufferSize) + "almost" an + // FFT buffer (PART_LEN2 - 1). + aecpc->far_pre_buf = + WebRtc_CreateBuffer(PART_LEN2 + kResamplerBufferSize, sizeof(float)); + if (!aecpc->far_pre_buf) { + WebRtcAec_Free(aecpc); + return NULL; + } + + aecpc->initFlag = 0; + aecpc->lastError = 0; + +#ifdef WEBRTC_AEC_DEBUG_DUMP + { + char filename[64]; + sprintf(filename, "aec_buf%d.dat", webrtc_aec_instance_count); + aecpc->bufFile = fopen(filename, "wb"); + sprintf(filename, "aec_skew%d.dat", webrtc_aec_instance_count); + aecpc->skewFile = fopen(filename, "wb"); + sprintf(filename, "aec_delay%d.dat", webrtc_aec_instance_count); + aecpc->delayFile = fopen(filename, "wb"); + webrtc_aec_instance_count++; + } +#endif + + return aecpc; +} + +void WebRtcAec_Free(void* aecInst) { + Aec* aecpc = aecInst; + + if (aecpc == NULL) { + return; + } + + WebRtc_FreeBuffer(aecpc->far_pre_buf); + +#ifdef WEBRTC_AEC_DEBUG_DUMP + fclose(aecpc->bufFile); + fclose(aecpc->skewFile); + fclose(aecpc->delayFile); +#endif + + WebRtcAec_FreeAec(aecpc->aec); + WebRtcAec_FreeResampler(aecpc->resampler); + free(aecpc); +} + +int32_t WebRtcAec_Init(void* aecInst, int32_t sampFreq, int32_t scSampFreq) { + Aec* aecpc = aecInst; + AecConfig aecConfig; + + if (sampFreq != 8000 && + sampFreq != 16000 && + sampFreq != 32000 && + sampFreq != 48000) { + aecpc->lastError = AEC_BAD_PARAMETER_ERROR; + return -1; + } + aecpc->sampFreq = sampFreq; + + if (scSampFreq < 1 || scSampFreq > 96000) { + aecpc->lastError = AEC_BAD_PARAMETER_ERROR; + return -1; + } + aecpc->scSampFreq = scSampFreq; + + // Initialize echo canceller core + if (WebRtcAec_InitAec(aecpc->aec, aecpc->sampFreq) == -1) { + aecpc->lastError = AEC_UNSPECIFIED_ERROR; + return -1; + } + + if (WebRtcAec_InitResampler(aecpc->resampler, aecpc->scSampFreq) == -1) { + aecpc->lastError = AEC_UNSPECIFIED_ERROR; + return -1; + } + + WebRtc_InitBuffer(aecpc->far_pre_buf); + WebRtc_MoveReadPtr(aecpc->far_pre_buf, -PART_LEN); // Start overlap. + + aecpc->initFlag = initCheck; // indicates that initialization has been done + + if (aecpc->sampFreq == 32000 || aecpc->sampFreq == 48000) { + aecpc->splitSampFreq = 16000; + } else { + aecpc->splitSampFreq = sampFreq; + } + + aecpc->delayCtr = 0; + aecpc->sampFactor = (aecpc->scSampFreq * 1.0f) / aecpc->splitSampFreq; + // Sampling frequency multiplier (SWB is processed as 160 frame size). + aecpc->rate_factor = aecpc->splitSampFreq / 8000; + + aecpc->sum = 0; + aecpc->counter = 0; + aecpc->checkBuffSize = 1; + aecpc->firstVal = 0; + + // We skip the startup_phase completely (setting to 0) if DA-AEC is enabled, + // but not extended_filter mode. + aecpc->startup_phase = WebRtcAec_extended_filter_enabled(aecpc->aec) || + !WebRtcAec_delay_agnostic_enabled(aecpc->aec); + aecpc->bufSizeStart = 0; + aecpc->checkBufSizeCtr = 0; + aecpc->msInSndCardBuf = 0; + aecpc->filtDelay = -1; // -1 indicates an initialized state. + aecpc->timeForDelayChange = 0; + aecpc->knownDelay = 0; + aecpc->lastDelayDiff = 0; + + aecpc->skewFrCtr = 0; + aecpc->resample = kAecFalse; + aecpc->highSkewCtr = 0; + aecpc->skew = 0; + + aecpc->farend_started = 0; + + // Default settings. + aecConfig.nlpMode = kAecNlpModerate; + aecConfig.skewMode = kAecFalse; + aecConfig.metricsMode = kAecFalse; + aecConfig.delay_logging = kAecFalse; + + if (WebRtcAec_set_config(aecpc, aecConfig) == -1) { + aecpc->lastError = AEC_UNSPECIFIED_ERROR; + return -1; + } + + return 0; +} + +// only buffer L band for farend +int32_t WebRtcAec_BufferFarend(void* aecInst, + const float* farend, + size_t nrOfSamples) { + Aec* aecpc = aecInst; + size_t newNrOfSamples = nrOfSamples; + float new_farend[MAX_RESAMP_LEN]; + const float* farend_ptr = farend; + + if (farend == NULL) { + aecpc->lastError = AEC_NULL_POINTER_ERROR; + return -1; + } + + if (aecpc->initFlag != initCheck) { + aecpc->lastError = AEC_UNINITIALIZED_ERROR; + return -1; + } + + // number of samples == 160 for SWB input + if (nrOfSamples != 80 && nrOfSamples != 160) { + aecpc->lastError = AEC_BAD_PARAMETER_ERROR; + return -1; + } + + if (aecpc->skewMode == kAecTrue && aecpc->resample == kAecTrue) { + // Resample and get a new number of samples + WebRtcAec_ResampleLinear(aecpc->resampler, + farend, + nrOfSamples, + aecpc->skew, + new_farend, + &newNrOfSamples); + farend_ptr = new_farend; + } + + aecpc->farend_started = 1; + WebRtcAec_SetSystemDelay( + aecpc->aec, WebRtcAec_system_delay(aecpc->aec) + (int)newNrOfSamples); + + // Write the time-domain data to |far_pre_buf|. + WebRtc_WriteBuffer(aecpc->far_pre_buf, farend_ptr, newNrOfSamples); + + // Transform to frequency domain if we have enough data. + while (WebRtc_available_read(aecpc->far_pre_buf) >= PART_LEN2) { + // We have enough data to pass to the FFT, hence read PART_LEN2 samples. + { + float* ptmp = NULL; + float tmp[PART_LEN2]; + WebRtc_ReadBuffer(aecpc->far_pre_buf, (void**)&ptmp, tmp, PART_LEN2); + WebRtcAec_BufferFarendPartition(aecpc->aec, ptmp); +#ifdef WEBRTC_AEC_DEBUG_DUMP + WebRtc_WriteBuffer( + WebRtcAec_far_time_buf(aecpc->aec), &ptmp[PART_LEN], 1); +#endif + } + + // Rewind |far_pre_buf| PART_LEN samples for overlap before continuing. + WebRtc_MoveReadPtr(aecpc->far_pre_buf, -PART_LEN); + } + + return 0; +} + +int32_t WebRtcAec_Process(void* aecInst, + const float* const* nearend, + size_t num_bands, + float* const* out, + size_t nrOfSamples, + int16_t msInSndCardBuf, + int32_t skew) { + Aec* aecpc = aecInst; + int32_t retVal = 0; + + if (out == NULL) { + aecpc->lastError = AEC_NULL_POINTER_ERROR; + return -1; + } + + if (aecpc->initFlag != initCheck) { + aecpc->lastError = AEC_UNINITIALIZED_ERROR; + return -1; + } + + // number of samples == 160 for SWB input + if (nrOfSamples != 80 && nrOfSamples != 160) { + aecpc->lastError = AEC_BAD_PARAMETER_ERROR; + return -1; + } + + if (msInSndCardBuf < 0) { + msInSndCardBuf = 0; + aecpc->lastError = AEC_BAD_PARAMETER_WARNING; + retVal = -1; + } else if (msInSndCardBuf > kMaxTrustedDelayMs) { + // The clamping is now done in ProcessExtended/Normal(). + aecpc->lastError = AEC_BAD_PARAMETER_WARNING; + retVal = -1; + } + + // This returns the value of aec->extended_filter_enabled. + if (WebRtcAec_extended_filter_enabled(aecpc->aec)) { + ProcessExtended(aecpc, + nearend, + num_bands, + out, + nrOfSamples, + msInSndCardBuf, + skew); + } else { + if (ProcessNormal(aecpc, + nearend, + num_bands, + out, + nrOfSamples, + msInSndCardBuf, + skew) != 0) { + retVal = -1; + } + } + +#ifdef WEBRTC_AEC_DEBUG_DUMP + { + int16_t far_buf_size_ms = (int16_t)(WebRtcAec_system_delay(aecpc->aec) / + (sampMsNb * aecpc->rate_factor)); + (void)fwrite(&far_buf_size_ms, 2, 1, aecpc->bufFile); + (void)fwrite( + &aecpc->knownDelay, sizeof(aecpc->knownDelay), 1, aecpc->delayFile); + } +#endif + + return retVal; +} + +int WebRtcAec_set_config(void* handle, AecConfig config) { + Aec* self = (Aec*)handle; + if (self->initFlag != initCheck) { + self->lastError = AEC_UNINITIALIZED_ERROR; + return -1; + } + + if (config.skewMode != kAecFalse && config.skewMode != kAecTrue) { + self->lastError = AEC_BAD_PARAMETER_ERROR; + return -1; + } + self->skewMode = config.skewMode; + + if (config.nlpMode != kAecNlpConservative && + config.nlpMode != kAecNlpModerate && + config.nlpMode != kAecNlpAggressive) { + self->lastError = AEC_BAD_PARAMETER_ERROR; + return -1; + } + + if (config.metricsMode != kAecFalse && config.metricsMode != kAecTrue) { + self->lastError = AEC_BAD_PARAMETER_ERROR; + return -1; + } + + if (config.delay_logging != kAecFalse && config.delay_logging != kAecTrue) { + self->lastError = AEC_BAD_PARAMETER_ERROR; + return -1; + } + + WebRtcAec_SetConfigCore( + self->aec, config.nlpMode, config.metricsMode, config.delay_logging); + return 0; +} + +int WebRtcAec_get_echo_status(void* handle, int* status) { + Aec* self = (Aec*)handle; + if (status == NULL) { + self->lastError = AEC_NULL_POINTER_ERROR; + return -1; + } + if (self->initFlag != initCheck) { + self->lastError = AEC_UNINITIALIZED_ERROR; + return -1; + } + + *status = WebRtcAec_echo_state(self->aec); + + return 0; +} + +int WebRtcAec_GetMetrics(void* handle, AecMetrics* metrics) { + const float kUpWeight = 0.7f; + float dtmp; + int stmp; + Aec* self = (Aec*)handle; + Stats erl; + Stats erle; + Stats a_nlp; + + if (handle == NULL) { + return -1; + } + if (metrics == NULL) { + self->lastError = AEC_NULL_POINTER_ERROR; + return -1; + } + if (self->initFlag != initCheck) { + self->lastError = AEC_UNINITIALIZED_ERROR; + return -1; + } + + WebRtcAec_GetEchoStats(self->aec, &erl, &erle, &a_nlp); + + // ERL + metrics->erl.instant = (int)erl.instant; + + if ((erl.himean > kOffsetLevel) && (erl.average > kOffsetLevel)) { + // Use a mix between regular average and upper part average. + dtmp = kUpWeight * erl.himean + (1 - kUpWeight) * erl.average; + metrics->erl.average = (int)dtmp; + } else { + metrics->erl.average = kOffsetLevel; + } + + metrics->erl.max = (int)erl.max; + + if (erl.min < (kOffsetLevel * (-1))) { + metrics->erl.min = (int)erl.min; + } else { + metrics->erl.min = kOffsetLevel; + } + + // ERLE + metrics->erle.instant = (int)erle.instant; + + if ((erle.himean > kOffsetLevel) && (erle.average > kOffsetLevel)) { + // Use a mix between regular average and upper part average. + dtmp = kUpWeight * erle.himean + (1 - kUpWeight) * erle.average; + metrics->erle.average = (int)dtmp; + } else { + metrics->erle.average = kOffsetLevel; + } + + metrics->erle.max = (int)erle.max; + + if (erle.min < (kOffsetLevel * (-1))) { + metrics->erle.min = (int)erle.min; + } else { + metrics->erle.min = kOffsetLevel; + } + + // RERL + if ((metrics->erl.average > kOffsetLevel) && + (metrics->erle.average > kOffsetLevel)) { + stmp = metrics->erl.average + metrics->erle.average; + } else { + stmp = kOffsetLevel; + } + metrics->rerl.average = stmp; + + // No other statistics needed, but returned for completeness. + metrics->rerl.instant = stmp; + metrics->rerl.max = stmp; + metrics->rerl.min = stmp; + + // A_NLP + metrics->aNlp.instant = (int)a_nlp.instant; + + if ((a_nlp.himean > kOffsetLevel) && (a_nlp.average > kOffsetLevel)) { + // Use a mix between regular average and upper part average. + dtmp = kUpWeight * a_nlp.himean + (1 - kUpWeight) * a_nlp.average; + metrics->aNlp.average = (int)dtmp; + } else { + metrics->aNlp.average = kOffsetLevel; + } + + metrics->aNlp.max = (int)a_nlp.max; + + if (a_nlp.min < (kOffsetLevel * (-1))) { + metrics->aNlp.min = (int)a_nlp.min; + } else { + metrics->aNlp.min = kOffsetLevel; + } + + return 0; +} + +int WebRtcAec_GetDelayMetrics(void* handle, + int* median, + int* std, + float* fraction_poor_delays) { + Aec* self = handle; + if (median == NULL) { + self->lastError = AEC_NULL_POINTER_ERROR; + return -1; + } + if (std == NULL) { + self->lastError = AEC_NULL_POINTER_ERROR; + return -1; + } + if (self->initFlag != initCheck) { + self->lastError = AEC_UNINITIALIZED_ERROR; + return -1; + } + if (WebRtcAec_GetDelayMetricsCore(self->aec, median, std, + fraction_poor_delays) == + -1) { + // Logging disabled. + self->lastError = AEC_UNSUPPORTED_FUNCTION_ERROR; + return -1; + } + + return 0; +} + +int32_t WebRtcAec_get_error_code(void* aecInst) { + Aec* aecpc = aecInst; + return aecpc->lastError; +} + +AecCore* WebRtcAec_aec_core(void* handle) { + if (!handle) { + return NULL; + } + return ((Aec*)handle)->aec; +} + +static int ProcessNormal(Aec* aecpc, + const float* const* nearend, + size_t num_bands, + float* const* out, + size_t nrOfSamples, + int16_t msInSndCardBuf, + int32_t skew) { + int retVal = 0; + size_t i; + size_t nBlocks10ms; + // Limit resampling to doubling/halving of signal + const float minSkewEst = -0.5f; + const float maxSkewEst = 1.0f; + + msInSndCardBuf = + msInSndCardBuf > kMaxTrustedDelayMs ? kMaxTrustedDelayMs : msInSndCardBuf; + // TODO(andrew): we need to investigate if this +10 is really wanted. + msInSndCardBuf += 10; + aecpc->msInSndCardBuf = msInSndCardBuf; + + if (aecpc->skewMode == kAecTrue) { + if (aecpc->skewFrCtr < 25) { + aecpc->skewFrCtr++; + } else { + retVal = WebRtcAec_GetSkew(aecpc->resampler, skew, &aecpc->skew); + if (retVal == -1) { + aecpc->skew = 0; + aecpc->lastError = AEC_BAD_PARAMETER_WARNING; + } + + aecpc->skew /= aecpc->sampFactor * nrOfSamples; + + if (aecpc->skew < 1.0e-3 && aecpc->skew > -1.0e-3) { + aecpc->resample = kAecFalse; + } else { + aecpc->resample = kAecTrue; + } + + if (aecpc->skew < minSkewEst) { + aecpc->skew = minSkewEst; + } else if (aecpc->skew > maxSkewEst) { + aecpc->skew = maxSkewEst; + } + +#ifdef WEBRTC_AEC_DEBUG_DUMP + (void)fwrite(&aecpc->skew, sizeof(aecpc->skew), 1, aecpc->skewFile); +#endif + } + } + + nBlocks10ms = nrOfSamples / (FRAME_LEN * aecpc->rate_factor); + + if (aecpc->startup_phase) { + for (i = 0; i < num_bands; ++i) { + // Only needed if they don't already point to the same place. + if (nearend[i] != out[i]) { + memcpy(out[i], nearend[i], sizeof(nearend[i][0]) * nrOfSamples); + } + } + + // The AEC is in the start up mode + // AEC is disabled until the system delay is OK + + // Mechanism to ensure that the system delay is reasonably stable. + if (aecpc->checkBuffSize) { + aecpc->checkBufSizeCtr++; + // Before we fill up the far-end buffer we require the system delay + // to be stable (+/-8 ms) compared to the first value. This + // comparison is made during the following 6 consecutive 10 ms + // blocks. If it seems to be stable then we start to fill up the + // far-end buffer. + if (aecpc->counter == 0) { + aecpc->firstVal = aecpc->msInSndCardBuf; + aecpc->sum = 0; + } + + if (abs(aecpc->firstVal - aecpc->msInSndCardBuf) < + WEBRTC_SPL_MAX(0.2 * aecpc->msInSndCardBuf, sampMsNb)) { + aecpc->sum += aecpc->msInSndCardBuf; + aecpc->counter++; + } else { + aecpc->counter = 0; + } + + if (aecpc->counter * nBlocks10ms >= 6) { + // The far-end buffer size is determined in partitions of + // PART_LEN samples. Use 75% of the average value of the system + // delay as buffer size to start with. + aecpc->bufSizeStart = + WEBRTC_SPL_MIN((3 * aecpc->sum * aecpc->rate_factor * 8) / + (4 * aecpc->counter * PART_LEN), + kMaxBufSizeStart); + // Buffer size has now been determined. + aecpc->checkBuffSize = 0; + } + + if (aecpc->checkBufSizeCtr * nBlocks10ms > 50) { + // For really bad systems, don't disable the echo canceller for + // more than 0.5 sec. + aecpc->bufSizeStart = WEBRTC_SPL_MIN( + (aecpc->msInSndCardBuf * aecpc->rate_factor * 3) / 40, + kMaxBufSizeStart); + aecpc->checkBuffSize = 0; + } + } + + // If |checkBuffSize| changed in the if-statement above. + if (!aecpc->checkBuffSize) { + // The system delay is now reasonably stable (or has been unstable + // for too long). When the far-end buffer is filled with + // approximately the same amount of data as reported by the system + // we end the startup phase. + int overhead_elements = + WebRtcAec_system_delay(aecpc->aec) / PART_LEN - aecpc->bufSizeStart; + if (overhead_elements == 0) { + // Enable the AEC + aecpc->startup_phase = 0; + } else if (overhead_elements > 0) { + // TODO(bjornv): Do we need a check on how much we actually + // moved the read pointer? It should always be possible to move + // the pointer |overhead_elements| since we have only added data + // to the buffer and no delay compensation nor AEC processing + // has been done. + WebRtcAec_MoveFarReadPtr(aecpc->aec, overhead_elements); + + // Enable the AEC + aecpc->startup_phase = 0; + } + } + } else { + // AEC is enabled. + EstBufDelayNormal(aecpc); + + // Call the AEC. + // TODO(bjornv): Re-structure such that we don't have to pass + // |aecpc->knownDelay| as input. Change name to something like + // |system_buffer_diff|. + WebRtcAec_ProcessFrames(aecpc->aec, + nearend, + num_bands, + nrOfSamples, + aecpc->knownDelay, + out); + } + + return retVal; +} + +static void ProcessExtended(Aec* self, + const float* const* near, + size_t num_bands, + float* const* out, + size_t num_samples, + int16_t reported_delay_ms, + int32_t skew) { + size_t i; + const int delay_diff_offset = kDelayDiffOffsetSamples; +#if defined(WEBRTC_UNTRUSTED_DELAY) + reported_delay_ms = kFixedDelayMs; +#else + // This is the usual mode where we trust the reported system delay values. + // Due to the longer filter, we no longer add 10 ms to the reported delay + // to reduce chance of non-causality. Instead we apply a minimum here to avoid + // issues with the read pointer jumping around needlessly. + reported_delay_ms = reported_delay_ms < kMinTrustedDelayMs + ? kMinTrustedDelayMs + : reported_delay_ms; + // If the reported delay appears to be bogus, we attempt to recover by using + // the measured fixed delay values. We use >= here because higher layers + // may already clamp to this maximum value, and we would otherwise not + // detect it here. + reported_delay_ms = reported_delay_ms >= kMaxTrustedDelayMs + ? kFixedDelayMs + : reported_delay_ms; +#endif + self->msInSndCardBuf = reported_delay_ms; + + if (!self->farend_started) { + for (i = 0; i < num_bands; ++i) { + // Only needed if they don't already point to the same place. + if (near[i] != out[i]) { + memcpy(out[i], near[i], sizeof(near[i][0]) * num_samples); + } + } + return; + } + if (self->startup_phase) { + // In the extended mode, there isn't a startup "phase", just a special + // action on the first frame. In the trusted delay case, we'll take the + // current reported delay, unless it's less then our conservative + // measurement. + int startup_size_ms = + reported_delay_ms < kFixedDelayMs ? kFixedDelayMs : reported_delay_ms; +#if defined(WEBRTC_ANDROID) + int target_delay = startup_size_ms * self->rate_factor * 8; +#else + // To avoid putting the AEC in a non-causal state we're being slightly + // conservative and scale by 2. On Android we use a fixed delay and + // therefore there is no need to scale the target_delay. + int target_delay = startup_size_ms * self->rate_factor * 8 / 2; +#endif + int overhead_elements = + (WebRtcAec_system_delay(self->aec) - target_delay) / PART_LEN; + WebRtcAec_MoveFarReadPtr(self->aec, overhead_elements); + self->startup_phase = 0; + } + + EstBufDelayExtended(self); + + { + // |delay_diff_offset| gives us the option to manually rewind the delay on + // very low delay platforms which can't be expressed purely through + // |reported_delay_ms|. + const int adjusted_known_delay = + WEBRTC_SPL_MAX(0, self->knownDelay + delay_diff_offset); + + WebRtcAec_ProcessFrames(self->aec, + near, + num_bands, + num_samples, + adjusted_known_delay, + out); + } +} + +static void EstBufDelayNormal(Aec* aecpc) { + int nSampSndCard = aecpc->msInSndCardBuf * sampMsNb * aecpc->rate_factor; + int current_delay = nSampSndCard - WebRtcAec_system_delay(aecpc->aec); + int delay_difference = 0; + + // Before we proceed with the delay estimate filtering we: + // 1) Compensate for the frame that will be read. + // 2) Compensate for drift resampling. + // 3) Compensate for non-causality if needed, since the estimated delay can't + // be negative. + + // 1) Compensating for the frame(s) that will be read/processed. + current_delay += FRAME_LEN * aecpc->rate_factor; + + // 2) Account for resampling frame delay. + if (aecpc->skewMode == kAecTrue && aecpc->resample == kAecTrue) { + current_delay -= kResamplingDelay; + } + + // 3) Compensate for non-causality, if needed, by flushing one block. + if (current_delay < PART_LEN) { + current_delay += WebRtcAec_MoveFarReadPtr(aecpc->aec, 1) * PART_LEN; + } + + // We use -1 to signal an initialized state in the "extended" implementation; + // compensate for that. + aecpc->filtDelay = aecpc->filtDelay < 0 ? 0 : aecpc->filtDelay; + aecpc->filtDelay = + WEBRTC_SPL_MAX(0, (short)(0.8 * aecpc->filtDelay + 0.2 * current_delay)); + + delay_difference = aecpc->filtDelay - aecpc->knownDelay; + if (delay_difference > 224) { + if (aecpc->lastDelayDiff < 96) { + aecpc->timeForDelayChange = 0; + } else { + aecpc->timeForDelayChange++; + } + } else if (delay_difference < 96 && aecpc->knownDelay > 0) { + if (aecpc->lastDelayDiff > 224) { + aecpc->timeForDelayChange = 0; + } else { + aecpc->timeForDelayChange++; + } + } else { + aecpc->timeForDelayChange = 0; + } + aecpc->lastDelayDiff = delay_difference; + + if (aecpc->timeForDelayChange > 25) { + aecpc->knownDelay = WEBRTC_SPL_MAX((int)aecpc->filtDelay - 160, 0); + } +} + +static void EstBufDelayExtended(Aec* self) { + int reported_delay = self->msInSndCardBuf * sampMsNb * self->rate_factor; + int current_delay = reported_delay - WebRtcAec_system_delay(self->aec); + int delay_difference = 0; + + // Before we proceed with the delay estimate filtering we: + // 1) Compensate for the frame that will be read. + // 2) Compensate for drift resampling. + // 3) Compensate for non-causality if needed, since the estimated delay can't + // be negative. + + // 1) Compensating for the frame(s) that will be read/processed. + current_delay += FRAME_LEN * self->rate_factor; + + // 2) Account for resampling frame delay. + if (self->skewMode == kAecTrue && self->resample == kAecTrue) { + current_delay -= kResamplingDelay; + } + + // 3) Compensate for non-causality, if needed, by flushing two blocks. + if (current_delay < PART_LEN) { + current_delay += WebRtcAec_MoveFarReadPtr(self->aec, 2) * PART_LEN; + } + + if (self->filtDelay == -1) { + self->filtDelay = WEBRTC_SPL_MAX(0, 0.5 * current_delay); + } else { + self->filtDelay = WEBRTC_SPL_MAX( + 0, (short)(0.95 * self->filtDelay + 0.05 * current_delay)); + } + + delay_difference = self->filtDelay - self->knownDelay; + if (delay_difference > 384) { + if (self->lastDelayDiff < 128) { + self->timeForDelayChange = 0; + } else { + self->timeForDelayChange++; + } + } else if (delay_difference < 128 && self->knownDelay > 0) { + if (self->lastDelayDiff > 384) { + self->timeForDelayChange = 0; + } else { + self->timeForDelayChange++; + } + } else { + self->timeForDelayChange = 0; + } + self->lastDelayDiff = delay_difference; + + if (self->timeForDelayChange > 25) { + self->knownDelay = WEBRTC_SPL_MAX((int)self->filtDelay - 256, 0); + } +} diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation_internal.h b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation_internal.h new file mode 100644 index 00000000..95a6cf33 --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation_internal.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_INTERNAL_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_INTERNAL_H_ + +#include "webrtc/common_audio/ring_buffer.h" +#include "webrtc/modules/audio_processing/aec/aec_core.h" + +typedef struct { + int delayCtr; + int sampFreq; + int splitSampFreq; + int scSampFreq; + float sampFactor; // scSampRate / sampFreq + short skewMode; + int bufSizeStart; + int knownDelay; + int rate_factor; + + short initFlag; // indicates if AEC has been initialized + + // Variables used for averaging far end buffer size + short counter; + int sum; + short firstVal; + short checkBufSizeCtr; + + // Variables used for delay shifts + short msInSndCardBuf; + short filtDelay; // Filtered delay estimate. + int timeForDelayChange; + int startup_phase; + int checkBuffSize; + short lastDelayDiff; + +#ifdef WEBRTC_AEC_DEBUG_DUMP + FILE* bufFile; + FILE* delayFile; + FILE* skewFile; +#endif + + // Structures + void* resampler; + + int skewFrCtr; + int resample; // if the skew is small enough we don't resample + int highSkewCtr; + float skew; + + RingBuffer* far_pre_buf; // Time domain far-end pre-buffer. + + int lastError; + + int farend_started; + + AecCore* aec; +} Aec; + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_INTERNAL_H_ diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation_unittest.cc b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation_unittest.cc new file mode 100644 index 00000000..315ac3e9 --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation_unittest.cc @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// TODO(bjornv): Make this a comprehensive test. + +#include "webrtc/modules/audio_processing/aec/include/echo_cancellation.h" + +#include <stdlib.h> +#include <time.h> + +extern "C" { +#include "webrtc/modules/audio_processing/aec/aec_core.h" +} + +#include "testing/gtest/include/gtest/gtest.h" +#include "webrtc/base/checks.h" + +namespace webrtc { + +TEST(EchoCancellationTest, CreateAndFreeHasExpectedBehavior) { + void* handle = WebRtcAec_Create(); + ASSERT_TRUE(handle); + WebRtcAec_Free(nullptr); + WebRtcAec_Free(handle); +} + +TEST(EchoCancellationTest, ApplyAecCoreHandle) { + void* handle = WebRtcAec_Create(); + ASSERT_TRUE(handle); + EXPECT_TRUE(WebRtcAec_aec_core(NULL) == NULL); + AecCore* aec_core = WebRtcAec_aec_core(handle); + EXPECT_TRUE(aec_core != NULL); + // A simple test to verify that we can set and get a value from the lower + // level |aec_core| handle. + int delay = 111; + WebRtcAec_SetSystemDelay(aec_core, delay); + EXPECT_EQ(delay, WebRtcAec_system_delay(aec_core)); + WebRtcAec_Free(handle); +} + +} // namespace webrtc diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/include/echo_cancellation.h b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/include/echo_cancellation.h new file mode 100644 index 00000000..a340cf84 --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/include/echo_cancellation.h @@ -0,0 +1,245 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_ + +#include <stddef.h> + +#include "webrtc/typedefs.h" + +// Errors +#define AEC_UNSPECIFIED_ERROR 12000 +#define AEC_UNSUPPORTED_FUNCTION_ERROR 12001 +#define AEC_UNINITIALIZED_ERROR 12002 +#define AEC_NULL_POINTER_ERROR 12003 +#define AEC_BAD_PARAMETER_ERROR 12004 + +// Warnings +#define AEC_BAD_PARAMETER_WARNING 12050 + +enum { + kAecNlpConservative = 0, + kAecNlpModerate, + kAecNlpAggressive +}; + +enum { + kAecFalse = 0, + kAecTrue +}; + +typedef struct { + int16_t nlpMode; // default kAecNlpModerate + int16_t skewMode; // default kAecFalse + int16_t metricsMode; // default kAecFalse + int delay_logging; // default kAecFalse + // float realSkew; +} AecConfig; + +typedef struct { + int instant; + int average; + int max; + int min; +} AecLevel; + +typedef struct { + AecLevel rerl; + AecLevel erl; + AecLevel erle; + AecLevel aNlp; +} AecMetrics; + +struct AecCore; + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Allocates the memory needed by the AEC. The memory needs to be initialized + * separately using the WebRtcAec_Init() function. Returns a pointer to the + * object or NULL on error. + */ +void* WebRtcAec_Create(); + +/* + * This function releases the memory allocated by WebRtcAec_Create(). + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecInst Pointer to the AEC instance + */ +void WebRtcAec_Free(void* aecInst); + +/* + * Initializes an AEC instance. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecInst Pointer to the AEC instance + * int32_t sampFreq Sampling frequency of data + * int32_t scSampFreq Soundcard sampling frequency + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 0: OK + * -1: error + */ +int32_t WebRtcAec_Init(void* aecInst, int32_t sampFreq, int32_t scSampFreq); + +/* + * Inserts an 80 or 160 sample block of data into the farend buffer. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecInst Pointer to the AEC instance + * const float* farend In buffer containing one frame of + * farend signal for L band + * int16_t nrOfSamples Number of samples in farend buffer + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 0: OK + * -1: error + */ +int32_t WebRtcAec_BufferFarend(void* aecInst, + const float* farend, + size_t nrOfSamples); + +/* + * Runs the echo canceller on an 80 or 160 sample blocks of data. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecInst Pointer to the AEC instance + * float* const* nearend In buffer containing one frame of + * nearend+echo signal for each band + * int num_bands Number of bands in nearend buffer + * int16_t nrOfSamples Number of samples in nearend buffer + * int16_t msInSndCardBuf Delay estimate for sound card and + * system buffers + * int16_t skew Difference between number of samples played + * and recorded at the soundcard (for clock skew + * compensation) + * + * Outputs Description + * ------------------------------------------------------------------- + * float* const* out Out buffer, one frame of processed nearend + * for each band + * int32_t return 0: OK + * -1: error + */ +int32_t WebRtcAec_Process(void* aecInst, + const float* const* nearend, + size_t num_bands, + float* const* out, + size_t nrOfSamples, + int16_t msInSndCardBuf, + int32_t skew); + +/* + * This function enables the user to set certain parameters on-the-fly. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* handle Pointer to the AEC instance + * AecConfig config Config instance that contains all + * properties to be set + * + * Outputs Description + * ------------------------------------------------------------------- + * int return 0: OK + * -1: error + */ +int WebRtcAec_set_config(void* handle, AecConfig config); + +/* + * Gets the current echo status of the nearend signal. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* handle Pointer to the AEC instance + * + * Outputs Description + * ------------------------------------------------------------------- + * int* status 0: Almost certainly nearend single-talk + * 1: Might not be neared single-talk + * int return 0: OK + * -1: error + */ +int WebRtcAec_get_echo_status(void* handle, int* status); + +/* + * Gets the current echo metrics for the session. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* handle Pointer to the AEC instance + * + * Outputs Description + * ------------------------------------------------------------------- + * AecMetrics* metrics Struct which will be filled out with the + * current echo metrics. + * int return 0: OK + * -1: error + */ +int WebRtcAec_GetMetrics(void* handle, AecMetrics* metrics); + +/* + * Gets the current delay metrics for the session. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* handle Pointer to the AEC instance + * + * Outputs Description + * ------------------------------------------------------------------- + * int* median Delay median value. + * int* std Delay standard deviation. + * float* fraction_poor_delays Fraction of the delay estimates that may + * cause the AEC to perform poorly. + * + * int return 0: OK + * -1: error + */ +int WebRtcAec_GetDelayMetrics(void* handle, + int* median, + int* std, + float* fraction_poor_delays); + +/* + * Gets the last error code. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecInst Pointer to the AEC instance + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 11000-11100: error code + */ +int32_t WebRtcAec_get_error_code(void* aecInst); + +// Returns a pointer to the low level AEC handle. +// +// Input: +// - handle : Pointer to the AEC instance. +// +// Return value: +// - AecCore pointer : NULL for error. +// +struct AecCore* WebRtcAec_aec_core(void* handle); + +#ifdef __cplusplus +} +#endif +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_ diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/system_delay_unittest.cc b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/system_delay_unittest.cc new file mode 100644 index 00000000..07e3cf8a --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/system_delay_unittest.cc @@ -0,0 +1,602 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "testing/gtest/include/gtest/gtest.h" +extern "C" { +#include "webrtc/modules/audio_processing/aec/aec_core.h" +} +#include "webrtc/modules/audio_processing/aec/echo_cancellation_internal.h" +#include "webrtc/modules/audio_processing/aec/include/echo_cancellation.h" +#include "webrtc/test/testsupport/gtest_disable.h" +#include "webrtc/typedefs.h" + +namespace { + +class SystemDelayTest : public ::testing::Test { + protected: + SystemDelayTest(); + virtual void SetUp(); + virtual void TearDown(); + + // Initialization of AEC handle with respect to |sample_rate_hz|. Since the + // device sample rate is unimportant we set that value to 48000 Hz. + void Init(int sample_rate_hz); + + // Makes one render call and one capture call in that specific order. + void RenderAndCapture(int device_buffer_ms); + + // Fills up the far-end buffer with respect to the default device buffer size. + size_t BufferFillUp(); + + // Runs and verifies the behavior in a stable startup procedure. + void RunStableStartup(); + + // Maps buffer size in ms into samples, taking the unprocessed frame into + // account. + int MapBufferSizeToSamples(int size_in_ms, bool extended_filter); + + void* handle_; + Aec* self_; + size_t samples_per_frame_; + // Dummy input/output speech data. + static const int kSamplesPerChunk = 160; + float far_[kSamplesPerChunk]; + float near_[kSamplesPerChunk]; + float out_[kSamplesPerChunk]; + const float* near_ptr_; + float* out_ptr_; +}; + +SystemDelayTest::SystemDelayTest() + : handle_(NULL), self_(NULL), samples_per_frame_(0) { + // Dummy input data are set with more or less arbitrary non-zero values. + for (int i = 0; i < kSamplesPerChunk; i++) { + far_[i] = 257.0; + near_[i] = 514.0; + } + memset(out_, 0, sizeof(out_)); + near_ptr_ = near_; + out_ptr_ = out_; +} + +void SystemDelayTest::SetUp() { + handle_ = WebRtcAec_Create(); + ASSERT_TRUE(handle_); + self_ = reinterpret_cast<Aec*>(handle_); +} + +void SystemDelayTest::TearDown() { + // Free AEC + WebRtcAec_Free(handle_); + handle_ = NULL; +} + +// In SWB mode nothing is added to the buffer handling with respect to +// functionality compared to WB. We therefore only verify behavior in NB and WB. +static const int kSampleRateHz[] = {8000, 16000}; +static const size_t kNumSampleRates = + sizeof(kSampleRateHz) / sizeof(*kSampleRateHz); + +// Default audio device buffer size used. +static const int kDeviceBufMs = 100; + +// Requirement for a stable device convergence time in ms. Should converge in +// less than |kStableConvergenceMs|. +static const int kStableConvergenceMs = 100; + +// Maximum convergence time in ms. This means that we should leave the startup +// phase after |kMaxConvergenceMs| independent of device buffer stability +// conditions. +static const int kMaxConvergenceMs = 500; + +void SystemDelayTest::Init(int sample_rate_hz) { + // Initialize AEC + EXPECT_EQ(0, WebRtcAec_Init(handle_, sample_rate_hz, 48000)); + EXPECT_EQ(0, WebRtcAec_system_delay(self_->aec)); + + // One frame equals 10 ms of data. + samples_per_frame_ = static_cast<size_t>(sample_rate_hz / 100); +} + +void SystemDelayTest::RenderAndCapture(int device_buffer_ms) { + EXPECT_EQ(0, WebRtcAec_BufferFarend(handle_, far_, samples_per_frame_)); + EXPECT_EQ(0, + WebRtcAec_Process(handle_, + &near_ptr_, + 1, + &out_ptr_, + samples_per_frame_, + device_buffer_ms, + 0)); +} + +size_t SystemDelayTest::BufferFillUp() { + // To make sure we have a full buffer when we verify stability we first fill + // up the far-end buffer with the same amount as we will report in through + // Process(). + size_t buffer_size = 0; + for (int i = 0; i < kDeviceBufMs / 10; i++) { + EXPECT_EQ(0, WebRtcAec_BufferFarend(handle_, far_, samples_per_frame_)); + buffer_size += samples_per_frame_; + EXPECT_EQ(static_cast<int>(buffer_size), + WebRtcAec_system_delay(self_->aec)); + } + return buffer_size; +} + +void SystemDelayTest::RunStableStartup() { + // To make sure we have a full buffer when we verify stability we first fill + // up the far-end buffer with the same amount as we will report in through + // Process(). + size_t buffer_size = BufferFillUp(); + + if (WebRtcAec_delay_agnostic_enabled(self_->aec) == 1) { + // In extended_filter mode we set the buffer size after the first processed + // 10 ms chunk. Hence, we don't need to wait for the reported system delay + // values to become stable. + RenderAndCapture(kDeviceBufMs); + buffer_size += samples_per_frame_; + EXPECT_EQ(0, self_->startup_phase); + } else { + // A stable device should be accepted and put in a regular process mode + // within |kStableConvergenceMs|. + int process_time_ms = 0; + for (; process_time_ms < kStableConvergenceMs; process_time_ms += 10) { + RenderAndCapture(kDeviceBufMs); + buffer_size += samples_per_frame_; + if (self_->startup_phase == 0) { + // We have left the startup phase. + break; + } + } + // Verify convergence time. + EXPECT_GT(kStableConvergenceMs, process_time_ms); + } + // Verify that the buffer has been flushed. + EXPECT_GE(static_cast<int>(buffer_size), + WebRtcAec_system_delay(self_->aec)); +} + + int SystemDelayTest::MapBufferSizeToSamples(int size_in_ms, + bool extended_filter) { + // If extended_filter is disabled we add an extra 10 ms for the unprocessed + // frame. That is simply how the algorithm is constructed. + return static_cast<int>( + (size_in_ms + (extended_filter ? 0 : 10)) * samples_per_frame_ / 10); +} + +// The tests should meet basic requirements and not be adjusted to what is +// actually implemented. If we don't get good code coverage this way we either +// lack in tests or have unnecessary code. +// General requirements: +// 1) If we add far-end data the system delay should be increased with the same +// amount we add. +// 2) If the far-end buffer is full we should flush the oldest data to make room +// for the new. In this case the system delay is unaffected. +// 3) There should exist a startup phase in which the buffer size is to be +// determined. In this phase no cancellation should be performed. +// 4) Under stable conditions (small variations in device buffer sizes) the AEC +// should determine an appropriate local buffer size within +// |kStableConvergenceMs| ms. +// 5) Under unstable conditions the AEC should make a decision within +// |kMaxConvergenceMs| ms. +// 6) If the local buffer runs out of data we should stuff the buffer with older +// frames. +// 7) The system delay should within |kMaxConvergenceMs| ms heal from +// disturbances like drift, data glitches, toggling events and outliers. +// 8) The system delay should never become negative. + +TEST_F(SystemDelayTest, CorrectIncreaseWhenBufferFarend) { + // When we add data to the AEC buffer the internal system delay should be + // incremented with the same amount as the size of data. + // This process should be independent of DA-AEC and extended_filter mode. + for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) { + WebRtcAec_enable_extended_filter(self_->aec, extended_filter); + EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec)); + for (int da_aec = 0; da_aec <= 1; ++da_aec) { + WebRtcAec_enable_delay_agnostic(self_->aec, da_aec); + EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec)); + for (size_t i = 0; i < kNumSampleRates; i++) { + Init(kSampleRateHz[i]); + // Loop through a couple of calls to make sure the system delay + // increments correctly. + for (int j = 1; j <= 5; j++) { + EXPECT_EQ(0, + WebRtcAec_BufferFarend(handle_, far_, samples_per_frame_)); + EXPECT_EQ(static_cast<int>(j * samples_per_frame_), + WebRtcAec_system_delay(self_->aec)); + } + } + } + } +} + +// TODO(bjornv): Add a test to verify behavior if the far-end buffer is full +// when adding new data. + +TEST_F(SystemDelayTest, CorrectDelayAfterStableStartup) { + // We run the system in a stable startup. After that we verify that the system + // delay meets the requirements. + // This process should be independent of DA-AEC and extended_filter mode. + for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) { + WebRtcAec_enable_extended_filter(self_->aec, extended_filter); + EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec)); + for (int da_aec = 0; da_aec <= 1; ++da_aec) { + WebRtcAec_enable_delay_agnostic(self_->aec, da_aec); + EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec)); + for (size_t i = 0; i < kNumSampleRates; i++) { + Init(kSampleRateHz[i]); + RunStableStartup(); + + // Verify system delay with respect to requirements, i.e., the + // |system_delay| is in the interval [75%, 100%] of what's reported on + // the average. + // In extended_filter mode we target 50% and measure after one processed + // 10 ms chunk. + int average_reported_delay = + static_cast<int>(kDeviceBufMs * samples_per_frame_ / 10); + EXPECT_GE(average_reported_delay, WebRtcAec_system_delay(self_->aec)); + int lower_bound = WebRtcAec_extended_filter_enabled(self_->aec) + ? average_reported_delay / 2 - samples_per_frame_ + : average_reported_delay * 3 / 4; + EXPECT_LE(lower_bound, WebRtcAec_system_delay(self_->aec)); + } + } + } +} + +TEST_F(SystemDelayTest, CorrectDelayAfterUnstableStartup) { + // This test does not apply in extended_filter mode, since we only use the + // the first 10 ms chunk to determine a reasonable buffer size. Neither does + // it apply if DA-AEC is on because that overrides the startup procedure. + WebRtcAec_enable_extended_filter(self_->aec, 0); + EXPECT_EQ(0, WebRtcAec_extended_filter_enabled(self_->aec)); + WebRtcAec_enable_delay_agnostic(self_->aec, 0); + EXPECT_EQ(0, WebRtcAec_delay_agnostic_enabled(self_->aec)); + + // In an unstable system we would start processing after |kMaxConvergenceMs|. + // On the last frame the AEC buffer is adjusted to 60% of the last reported + // device buffer size. + // We construct an unstable system by altering the device buffer size between + // two values |kDeviceBufMs| +- 25 ms. + for (size_t i = 0; i < kNumSampleRates; i++) { + Init(kSampleRateHz[i]); + + // To make sure we have a full buffer when we verify stability we first fill + // up the far-end buffer with the same amount as we will report in on the + // average through Process(). + size_t buffer_size = BufferFillUp(); + + int buffer_offset_ms = 25; + int reported_delay_ms = 0; + int process_time_ms = 0; + for (; process_time_ms <= kMaxConvergenceMs; process_time_ms += 10) { + reported_delay_ms = kDeviceBufMs + buffer_offset_ms; + RenderAndCapture(reported_delay_ms); + buffer_size += samples_per_frame_; + buffer_offset_ms = -buffer_offset_ms; + if (self_->startup_phase == 0) { + // We have left the startup phase. + break; + } + } + // Verify convergence time. + EXPECT_GE(kMaxConvergenceMs, process_time_ms); + // Verify that the buffer has been flushed. + EXPECT_GE(static_cast<int>(buffer_size), + WebRtcAec_system_delay(self_->aec)); + + // Verify system delay with respect to requirements, i.e., the + // |system_delay| is in the interval [60%, 100%] of what's last reported. + EXPECT_GE(static_cast<int>(reported_delay_ms * samples_per_frame_ / 10), + WebRtcAec_system_delay(self_->aec)); + EXPECT_LE( + static_cast<int>(reported_delay_ms * samples_per_frame_ / 10 * 3 / 5), + WebRtcAec_system_delay(self_->aec)); + } +} + +TEST_F(SystemDelayTest, CorrectDelayAfterStableBufferBuildUp) { + // This test does not apply in extended_filter mode, since we only use the + // the first 10 ms chunk to determine a reasonable buffer size. Neither does + // it apply if DA-AEC is on because that overrides the startup procedure. + WebRtcAec_enable_extended_filter(self_->aec, 0); + EXPECT_EQ(0, WebRtcAec_extended_filter_enabled(self_->aec)); + WebRtcAec_enable_delay_agnostic(self_->aec, 0); + EXPECT_EQ(0, WebRtcAec_delay_agnostic_enabled(self_->aec)); + + // In this test we start by establishing the device buffer size during stable + // conditions, but with an empty internal far-end buffer. Once that is done we + // verify that the system delay is increased correctly until we have reach an + // internal buffer size of 75% of what's been reported. + for (size_t i = 0; i < kNumSampleRates; i++) { + Init(kSampleRateHz[i]); + + // We assume that running |kStableConvergenceMs| calls will put the + // algorithm in a state where the device buffer size has been determined. We + // can make that assumption since we have a separate stability test. + int process_time_ms = 0; + for (; process_time_ms < kStableConvergenceMs; process_time_ms += 10) { + EXPECT_EQ(0, + WebRtcAec_Process(handle_, + &near_ptr_, + 1, + &out_ptr_, + samples_per_frame_, + kDeviceBufMs, + 0)); + } + // Verify that a buffer size has been established. + EXPECT_EQ(0, self_->checkBuffSize); + + // We now have established the required buffer size. Let us verify that we + // fill up before leaving the startup phase for normal processing. + size_t buffer_size = 0; + size_t target_buffer_size = kDeviceBufMs * samples_per_frame_ / 10 * 3 / 4; + process_time_ms = 0; + for (; process_time_ms <= kMaxConvergenceMs; process_time_ms += 10) { + RenderAndCapture(kDeviceBufMs); + buffer_size += samples_per_frame_; + if (self_->startup_phase == 0) { + // We have left the startup phase. + break; + } + } + // Verify convergence time. + EXPECT_GT(kMaxConvergenceMs, process_time_ms); + // Verify that the buffer has reached the desired size. + EXPECT_LE(static_cast<int>(target_buffer_size), + WebRtcAec_system_delay(self_->aec)); + + // Verify normal behavior (system delay is kept constant) after startup by + // running a couple of calls to BufferFarend() and Process(). + for (int j = 0; j < 6; j++) { + int system_delay_before_calls = WebRtcAec_system_delay(self_->aec); + RenderAndCapture(kDeviceBufMs); + EXPECT_EQ(system_delay_before_calls, WebRtcAec_system_delay(self_->aec)); + } + } +} + +TEST_F(SystemDelayTest, CorrectDelayWhenBufferUnderrun) { + // Here we test a buffer under run scenario. If we keep on calling + // WebRtcAec_Process() we will finally run out of data, but should + // automatically stuff the buffer. We verify this behavior by checking if the + // system delay goes negative. + // This process should be independent of DA-AEC and extended_filter mode. + for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) { + WebRtcAec_enable_extended_filter(self_->aec, extended_filter); + EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec)); + for (int da_aec = 0; da_aec <= 1; ++da_aec) { + WebRtcAec_enable_delay_agnostic(self_->aec, da_aec); + EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec)); + for (size_t i = 0; i < kNumSampleRates; i++) { + Init(kSampleRateHz[i]); + RunStableStartup(); + + // The AEC has now left the Startup phase. We now have at most + // |kStableConvergenceMs| in the buffer. Keep on calling Process() until + // we run out of data and verify that the system delay is non-negative. + for (int j = 0; j <= kStableConvergenceMs; j += 10) { + EXPECT_EQ(0, WebRtcAec_Process(handle_, &near_ptr_, 1, &out_ptr_, + samples_per_frame_, kDeviceBufMs, 0)); + EXPECT_LE(0, WebRtcAec_system_delay(self_->aec)); + } + } + } + } +} + +TEST_F(SystemDelayTest, CorrectDelayDuringDrift) { + // This drift test should verify that the system delay is never exceeding the + // device buffer. The drift is simulated by decreasing the reported device + // buffer size by 1 ms every 100 ms. If the device buffer size goes below 30 + // ms we jump (add) 10 ms to give a repeated pattern. + + // This process should be independent of DA-AEC and extended_filter mode. + for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) { + WebRtcAec_enable_extended_filter(self_->aec, extended_filter); + EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec)); + for (int da_aec = 0; da_aec <= 1; ++da_aec) { + WebRtcAec_enable_delay_agnostic(self_->aec, da_aec); + EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec)); + for (size_t i = 0; i < kNumSampleRates; i++) { + Init(kSampleRateHz[i]); + RunStableStartup(); + + // We have left the startup phase and proceed with normal processing. + int jump = 0; + for (int j = 0; j < 1000; j++) { + // Drift = -1 ms per 100 ms of data. + int device_buf_ms = kDeviceBufMs - (j / 10) + jump; + int device_buf = MapBufferSizeToSamples(device_buf_ms, + extended_filter == 1); + + if (device_buf_ms < 30) { + // Add 10 ms data, taking affect next frame. + jump += 10; + } + RenderAndCapture(device_buf_ms); + + // Verify that the system delay does not exceed the device buffer. + EXPECT_GE(device_buf, WebRtcAec_system_delay(self_->aec)); + + // Verify that the system delay is non-negative. + EXPECT_LE(0, WebRtcAec_system_delay(self_->aec)); + } + } + } + } +} + +TEST_F(SystemDelayTest, ShouldRecoverAfterGlitch) { + // This glitch test should verify that the system delay recovers if there is + // a glitch in data. The data glitch is constructed as 200 ms of buffering + // after which the stable procedure continues. The glitch is never reported by + // the device. + // The system is said to be in a non-causal state if the difference between + // the device buffer and system delay is less than a block (64 samples). + + // This process should be independent of DA-AEC and extended_filter mode. + for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) { + WebRtcAec_enable_extended_filter(self_->aec, extended_filter); + EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec)); + for (int da_aec = 0; da_aec <= 1; ++da_aec) { + WebRtcAec_enable_delay_agnostic(self_->aec, da_aec); + EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec)); + for (size_t i = 0; i < kNumSampleRates; i++) { + Init(kSampleRateHz[i]); + RunStableStartup(); + int device_buf = MapBufferSizeToSamples(kDeviceBufMs, + extended_filter == 1); + // Glitch state. + for (int j = 0; j < 20; j++) { + EXPECT_EQ(0, + WebRtcAec_BufferFarend(handle_, far_, samples_per_frame_)); + // No need to verify system delay, since that is done in a separate + // test. + } + // Verify that we are in a non-causal state, i.e., + // |system_delay| > |device_buf|. + EXPECT_LT(device_buf, WebRtcAec_system_delay(self_->aec)); + + // Recover state. Should recover at least 4 ms of data per 10 ms, hence + // a glitch of 200 ms will take at most 200 * 10 / 4 = 500 ms to recover + // from. + bool non_causal = true; // We are currently in a non-causal state. + for (int j = 0; j < 50; j++) { + int system_delay_before = WebRtcAec_system_delay(self_->aec); + RenderAndCapture(kDeviceBufMs); + int system_delay_after = WebRtcAec_system_delay(self_->aec); + // We have recovered if + // |device_buf| - |system_delay_after| >= PART_LEN (1 block). + // During recovery, |system_delay_after| < |system_delay_before|, + // otherwise they are equal. + if (non_causal) { + EXPECT_LT(system_delay_after, system_delay_before); + if (device_buf - system_delay_after >= PART_LEN) { + non_causal = false; + } + } else { + EXPECT_EQ(system_delay_before, system_delay_after); + } + // Verify that the system delay is non-negative. + EXPECT_LE(0, WebRtcAec_system_delay(self_->aec)); + } + // Check that we have recovered. + EXPECT_FALSE(non_causal); + } + } + } +} + +TEST_F(SystemDelayTest, UnaffectedWhenSpuriousDeviceBufferValues) { + // This test does not apply in extended_filter mode, since we only use the + // the first 10 ms chunk to determine a reasonable buffer size. + const int extended_filter = 0; + WebRtcAec_enable_extended_filter(self_->aec, extended_filter); + EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec)); + + // Should be DA-AEC independent. + for (int da_aec = 0; da_aec <= 1; ++da_aec) { + WebRtcAec_enable_delay_agnostic(self_->aec, da_aec); + EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec)); + // This spurious device buffer data test aims at verifying that the system + // delay is unaffected by large outliers. + // The system is said to be in a non-causal state if the difference between + // the device buffer and system delay is less than a block (64 samples). + for (size_t i = 0; i < kNumSampleRates; i++) { + Init(kSampleRateHz[i]); + RunStableStartup(); + int device_buf = MapBufferSizeToSamples(kDeviceBufMs, + extended_filter == 1); + + // Normal state. We are currently not in a non-causal state. + bool non_causal = false; + + // Run 1 s and replace device buffer size with 500 ms every 100 ms. + for (int j = 0; j < 100; j++) { + int system_delay_before_calls = WebRtcAec_system_delay(self_->aec); + int device_buf_ms = j % 10 == 0 ? 500 : kDeviceBufMs; + RenderAndCapture(device_buf_ms); + + // Check for non-causality. + if (device_buf - WebRtcAec_system_delay(self_->aec) < PART_LEN) { + non_causal = true; + } + EXPECT_FALSE(non_causal); + EXPECT_EQ(system_delay_before_calls, + WebRtcAec_system_delay(self_->aec)); + + // Verify that the system delay is non-negative. + EXPECT_LE(0, WebRtcAec_system_delay(self_->aec)); + } + } + } +} + +TEST_F(SystemDelayTest, CorrectImpactWhenTogglingDeviceBufferValues) { + // This test aims at verifying that the system delay is "unaffected" by + // toggling values reported by the device. + // The test is constructed such that every other device buffer value is zero + // and then 2 * |kDeviceBufMs|, hence the size is constant on the average. The + // zero values will force us into a non-causal state and thereby lowering the + // system delay until we basically run out of data. Once that happens the + // buffer will be stuffed. + // TODO(bjornv): This test will have a better impact if we verified that the + // delay estimate goes up when the system delay goes down to meet the average + // device buffer size. + + // This test does not apply if DA-AEC is enabled and extended_filter mode + // disabled. + for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) { + WebRtcAec_enable_extended_filter(self_->aec, extended_filter); + EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec)); + for (int da_aec = 0; da_aec <= 1; ++da_aec) { + WebRtcAec_enable_delay_agnostic(self_->aec, da_aec); + EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec)); + if (extended_filter == 0 && da_aec == 1) { + continue; + } + for (size_t i = 0; i < kNumSampleRates; i++) { + Init(kSampleRateHz[i]); + RunStableStartup(); + const int device_buf = MapBufferSizeToSamples(kDeviceBufMs, + extended_filter == 1); + + // Normal state. We are currently not in a non-causal state. + bool non_causal = false; + + // Loop through 100 frames (both render and capture), which equals 1 s + // of data. Every odd frame we set the device buffer size to + // 2 * |kDeviceBufMs| and even frames we set the device buffer size to + // zero. + for (int j = 0; j < 100; j++) { + int system_delay_before_calls = WebRtcAec_system_delay(self_->aec); + int device_buf_ms = 2 * (j % 2) * kDeviceBufMs; + RenderAndCapture(device_buf_ms); + + // Check for non-causality, compared with the average device buffer + // size. + non_causal |= (device_buf - WebRtcAec_system_delay(self_->aec) < 64); + EXPECT_GE(system_delay_before_calls, + WebRtcAec_system_delay(self_->aec)); + + // Verify that the system delay is non-negative. + EXPECT_LE(0, WebRtcAec_system_delay(self_->aec)); + } + // Verify we are not in a non-causal state. + EXPECT_FALSE(non_causal); + } + } + } +} + +} // namespace diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core.c new file mode 100644 index 00000000..b801f07a --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core.c @@ -0,0 +1,1233 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/aecm/aecm_core.h" + +#include <assert.h> +#include <stddef.h> +#include <stdlib.h> + +#include "webrtc/common_audio/ring_buffer.h" +#include "webrtc/common_audio/signal_processing/include/real_fft.h" +#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h" +#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h" +#include "webrtc/system_wrappers/interface/compile_assert_c.h" +#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h" +#include "webrtc/typedefs.h" + +#ifdef AEC_DEBUG +FILE *dfile; +FILE *testfile; +#endif + +const int16_t WebRtcAecm_kCosTable[] = { + 8192, 8190, 8187, 8180, 8172, 8160, 8147, 8130, 8112, + 8091, 8067, 8041, 8012, 7982, 7948, 7912, 7874, 7834, + 7791, 7745, 7697, 7647, 7595, 7540, 7483, 7424, 7362, + 7299, 7233, 7164, 7094, 7021, 6947, 6870, 6791, 6710, + 6627, 6542, 6455, 6366, 6275, 6182, 6087, 5991, 5892, + 5792, 5690, 5586, 5481, 5374, 5265, 5155, 5043, 4930, + 4815, 4698, 4580, 4461, 4341, 4219, 4096, 3971, 3845, + 3719, 3591, 3462, 3331, 3200, 3068, 2935, 2801, 2667, + 2531, 2395, 2258, 2120, 1981, 1842, 1703, 1563, 1422, + 1281, 1140, 998, 856, 713, 571, 428, 285, 142, + 0, -142, -285, -428, -571, -713, -856, -998, -1140, + -1281, -1422, -1563, -1703, -1842, -1981, -2120, -2258, -2395, + -2531, -2667, -2801, -2935, -3068, -3200, -3331, -3462, -3591, + -3719, -3845, -3971, -4095, -4219, -4341, -4461, -4580, -4698, + -4815, -4930, -5043, -5155, -5265, -5374, -5481, -5586, -5690, + -5792, -5892, -5991, -6087, -6182, -6275, -6366, -6455, -6542, + -6627, -6710, -6791, -6870, -6947, -7021, -7094, -7164, -7233, + -7299, -7362, -7424, -7483, -7540, -7595, -7647, -7697, -7745, + -7791, -7834, -7874, -7912, -7948, -7982, -8012, -8041, -8067, + -8091, -8112, -8130, -8147, -8160, -8172, -8180, -8187, -8190, + -8191, -8190, -8187, -8180, -8172, -8160, -8147, -8130, -8112, + -8091, -8067, -8041, -8012, -7982, -7948, -7912, -7874, -7834, + -7791, -7745, -7697, -7647, -7595, -7540, -7483, -7424, -7362, + -7299, -7233, -7164, -7094, -7021, -6947, -6870, -6791, -6710, + -6627, -6542, -6455, -6366, -6275, -6182, -6087, -5991, -5892, + -5792, -5690, -5586, -5481, -5374, -5265, -5155, -5043, -4930, + -4815, -4698, -4580, -4461, -4341, -4219, -4096, -3971, -3845, + -3719, -3591, -3462, -3331, -3200, -3068, -2935, -2801, -2667, + -2531, -2395, -2258, -2120, -1981, -1842, -1703, -1563, -1422, + -1281, -1140, -998, -856, -713, -571, -428, -285, -142, + 0, 142, 285, 428, 571, 713, 856, 998, 1140, + 1281, 1422, 1563, 1703, 1842, 1981, 2120, 2258, 2395, + 2531, 2667, 2801, 2935, 3068, 3200, 3331, 3462, 3591, + 3719, 3845, 3971, 4095, 4219, 4341, 4461, 4580, 4698, + 4815, 4930, 5043, 5155, 5265, 5374, 5481, 5586, 5690, + 5792, 5892, 5991, 6087, 6182, 6275, 6366, 6455, 6542, + 6627, 6710, 6791, 6870, 6947, 7021, 7094, 7164, 7233, + 7299, 7362, 7424, 7483, 7540, 7595, 7647, 7697, 7745, + 7791, 7834, 7874, 7912, 7948, 7982, 8012, 8041, 8067, + 8091, 8112, 8130, 8147, 8160, 8172, 8180, 8187, 8190 +}; + +const int16_t WebRtcAecm_kSinTable[] = { + 0, 142, 285, 428, 571, 713, 856, 998, + 1140, 1281, 1422, 1563, 1703, 1842, 1981, 2120, + 2258, 2395, 2531, 2667, 2801, 2935, 3068, 3200, + 3331, 3462, 3591, 3719, 3845, 3971, 4095, 4219, + 4341, 4461, 4580, 4698, 4815, 4930, 5043, 5155, + 5265, 5374, 5481, 5586, 5690, 5792, 5892, 5991, + 6087, 6182, 6275, 6366, 6455, 6542, 6627, 6710, + 6791, 6870, 6947, 7021, 7094, 7164, 7233, 7299, + 7362, 7424, 7483, 7540, 7595, 7647, 7697, 7745, + 7791, 7834, 7874, 7912, 7948, 7982, 8012, 8041, + 8067, 8091, 8112, 8130, 8147, 8160, 8172, 8180, + 8187, 8190, 8191, 8190, 8187, 8180, 8172, 8160, + 8147, 8130, 8112, 8091, 8067, 8041, 8012, 7982, + 7948, 7912, 7874, 7834, 7791, 7745, 7697, 7647, + 7595, 7540, 7483, 7424, 7362, 7299, 7233, 7164, + 7094, 7021, 6947, 6870, 6791, 6710, 6627, 6542, + 6455, 6366, 6275, 6182, 6087, 5991, 5892, 5792, + 5690, 5586, 5481, 5374, 5265, 5155, 5043, 4930, + 4815, 4698, 4580, 4461, 4341, 4219, 4096, 3971, + 3845, 3719, 3591, 3462, 3331, 3200, 3068, 2935, + 2801, 2667, 2531, 2395, 2258, 2120, 1981, 1842, + 1703, 1563, 1422, 1281, 1140, 998, 856, 713, + 571, 428, 285, 142, 0, -142, -285, -428, + -571, -713, -856, -998, -1140, -1281, -1422, -1563, + -1703, -1842, -1981, -2120, -2258, -2395, -2531, -2667, + -2801, -2935, -3068, -3200, -3331, -3462, -3591, -3719, + -3845, -3971, -4095, -4219, -4341, -4461, -4580, -4698, + -4815, -4930, -5043, -5155, -5265, -5374, -5481, -5586, + -5690, -5792, -5892, -5991, -6087, -6182, -6275, -6366, + -6455, -6542, -6627, -6710, -6791, -6870, -6947, -7021, + -7094, -7164, -7233, -7299, -7362, -7424, -7483, -7540, + -7595, -7647, -7697, -7745, -7791, -7834, -7874, -7912, + -7948, -7982, -8012, -8041, -8067, -8091, -8112, -8130, + -8147, -8160, -8172, -8180, -8187, -8190, -8191, -8190, + -8187, -8180, -8172, -8160, -8147, -8130, -8112, -8091, + -8067, -8041, -8012, -7982, -7948, -7912, -7874, -7834, + -7791, -7745, -7697, -7647, -7595, -7540, -7483, -7424, + -7362, -7299, -7233, -7164, -7094, -7021, -6947, -6870, + -6791, -6710, -6627, -6542, -6455, -6366, -6275, -6182, + -6087, -5991, -5892, -5792, -5690, -5586, -5481, -5374, + -5265, -5155, -5043, -4930, -4815, -4698, -4580, -4461, + -4341, -4219, -4096, -3971, -3845, -3719, -3591, -3462, + -3331, -3200, -3068, -2935, -2801, -2667, -2531, -2395, + -2258, -2120, -1981, -1842, -1703, -1563, -1422, -1281, + -1140, -998, -856, -713, -571, -428, -285, -142 +}; + +// Initialization table for echo channel in 8 kHz +static const int16_t kChannelStored8kHz[PART_LEN1] = { + 2040, 1815, 1590, 1498, 1405, 1395, 1385, 1418, + 1451, 1506, 1562, 1644, 1726, 1804, 1882, 1918, + 1953, 1982, 2010, 2025, 2040, 2034, 2027, 2021, + 2014, 1997, 1980, 1925, 1869, 1800, 1732, 1683, + 1635, 1604, 1572, 1545, 1517, 1481, 1444, 1405, + 1367, 1331, 1294, 1270, 1245, 1239, 1233, 1247, + 1260, 1282, 1303, 1338, 1373, 1407, 1441, 1470, + 1499, 1524, 1549, 1565, 1582, 1601, 1621, 1649, + 1676 +}; + +// Initialization table for echo channel in 16 kHz +static const int16_t kChannelStored16kHz[PART_LEN1] = { + 2040, 1590, 1405, 1385, 1451, 1562, 1726, 1882, + 1953, 2010, 2040, 2027, 2014, 1980, 1869, 1732, + 1635, 1572, 1517, 1444, 1367, 1294, 1245, 1233, + 1260, 1303, 1373, 1441, 1499, 1549, 1582, 1621, + 1676, 1741, 1802, 1861, 1921, 1983, 2040, 2102, + 2170, 2265, 2375, 2515, 2651, 2781, 2922, 3075, + 3253, 3471, 3738, 3976, 4151, 4258, 4308, 4288, + 4270, 4253, 4237, 4179, 4086, 3947, 3757, 3484, + 3153 +}; + +// Moves the pointer to the next entry and inserts |far_spectrum| and +// corresponding Q-domain in its buffer. +// +// Inputs: +// - self : Pointer to the delay estimation instance +// - far_spectrum : Pointer to the far end spectrum +// - far_q : Q-domain of far end spectrum +// +void WebRtcAecm_UpdateFarHistory(AecmCore* self, + uint16_t* far_spectrum, + int far_q) { + // Get new buffer position + self->far_history_pos++; + if (self->far_history_pos >= MAX_DELAY) { + self->far_history_pos = 0; + } + // Update Q-domain buffer + self->far_q_domains[self->far_history_pos] = far_q; + // Update far end spectrum buffer + memcpy(&(self->far_history[self->far_history_pos * PART_LEN1]), + far_spectrum, + sizeof(uint16_t) * PART_LEN1); +} + +// Returns a pointer to the far end spectrum aligned to current near end +// spectrum. The function WebRtc_DelayEstimatorProcessFix(...) should have been +// called before AlignedFarend(...). Otherwise, you get the pointer to the +// previous frame. The memory is only valid until the next call of +// WebRtc_DelayEstimatorProcessFix(...). +// +// Inputs: +// - self : Pointer to the AECM instance. +// - delay : Current delay estimate. +// +// Output: +// - far_q : The Q-domain of the aligned far end spectrum +// +// Return value: +// - far_spectrum : Pointer to the aligned far end spectrum +// NULL - Error +// +const uint16_t* WebRtcAecm_AlignedFarend(AecmCore* self, + int* far_q, + int delay) { + int buffer_position = 0; + assert(self != NULL); + buffer_position = self->far_history_pos - delay; + + // Check buffer position + if (buffer_position < 0) { + buffer_position += MAX_DELAY; + } + // Get Q-domain + *far_q = self->far_q_domains[buffer_position]; + // Return far end spectrum + return &(self->far_history[buffer_position * PART_LEN1]); +} + +// Declare function pointers. +CalcLinearEnergies WebRtcAecm_CalcLinearEnergies; +StoreAdaptiveChannel WebRtcAecm_StoreAdaptiveChannel; +ResetAdaptiveChannel WebRtcAecm_ResetAdaptiveChannel; + +AecmCore* WebRtcAecm_CreateCore() { + AecmCore* aecm = malloc(sizeof(AecmCore)); + + aecm->farFrameBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, + sizeof(int16_t)); + if (!aecm->farFrameBuf) + { + WebRtcAecm_FreeCore(aecm); + return NULL; + } + + aecm->nearNoisyFrameBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, + sizeof(int16_t)); + if (!aecm->nearNoisyFrameBuf) + { + WebRtcAecm_FreeCore(aecm); + return NULL; + } + + aecm->nearCleanFrameBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, + sizeof(int16_t)); + if (!aecm->nearCleanFrameBuf) + { + WebRtcAecm_FreeCore(aecm); + return NULL; + } + + aecm->outFrameBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, + sizeof(int16_t)); + if (!aecm->outFrameBuf) + { + WebRtcAecm_FreeCore(aecm); + return NULL; + } + + aecm->delay_estimator_farend = WebRtc_CreateDelayEstimatorFarend(PART_LEN1, + MAX_DELAY); + if (aecm->delay_estimator_farend == NULL) { + WebRtcAecm_FreeCore(aecm); + return NULL; + } + aecm->delay_estimator = + WebRtc_CreateDelayEstimator(aecm->delay_estimator_farend, 0); + if (aecm->delay_estimator == NULL) { + WebRtcAecm_FreeCore(aecm); + return NULL; + } + // TODO(bjornv): Explicitly disable robust delay validation until no + // performance regression has been established. Then remove the line. + WebRtc_enable_robust_validation(aecm->delay_estimator, 0); + + aecm->real_fft = WebRtcSpl_CreateRealFFT(PART_LEN_SHIFT); + if (aecm->real_fft == NULL) { + WebRtcAecm_FreeCore(aecm); + return NULL; + } + + // Init some aecm pointers. 16 and 32 byte alignment is only necessary + // for Neon code currently. + aecm->xBuf = (int16_t*) (((uintptr_t)aecm->xBuf_buf + 31) & ~ 31); + aecm->dBufClean = (int16_t*) (((uintptr_t)aecm->dBufClean_buf + 31) & ~ 31); + aecm->dBufNoisy = (int16_t*) (((uintptr_t)aecm->dBufNoisy_buf + 31) & ~ 31); + aecm->outBuf = (int16_t*) (((uintptr_t)aecm->outBuf_buf + 15) & ~ 15); + aecm->channelStored = (int16_t*) (((uintptr_t) + aecm->channelStored_buf + 15) & ~ 15); + aecm->channelAdapt16 = (int16_t*) (((uintptr_t) + aecm->channelAdapt16_buf + 15) & ~ 15); + aecm->channelAdapt32 = (int32_t*) (((uintptr_t) + aecm->channelAdapt32_buf + 31) & ~ 31); + + return aecm; +} + +void WebRtcAecm_InitEchoPathCore(AecmCore* aecm, const int16_t* echo_path) { + int i = 0; + + // Reset the stored channel + memcpy(aecm->channelStored, echo_path, sizeof(int16_t) * PART_LEN1); + // Reset the adapted channels + memcpy(aecm->channelAdapt16, echo_path, sizeof(int16_t) * PART_LEN1); + for (i = 0; i < PART_LEN1; i++) + { + aecm->channelAdapt32[i] = (int32_t)aecm->channelAdapt16[i] << 16; + } + + // Reset channel storing variables + aecm->mseAdaptOld = 1000; + aecm->mseStoredOld = 1000; + aecm->mseThreshold = WEBRTC_SPL_WORD32_MAX; + aecm->mseChannelCount = 0; +} + +static void CalcLinearEnergiesC(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est, + uint32_t* far_energy, + uint32_t* echo_energy_adapt, + uint32_t* echo_energy_stored) { + int i; + + // Get energy for the delayed far end signal and estimated + // echo using both stored and adapted channels. + for (i = 0; i < PART_LEN1; i++) + { + echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + far_spectrum[i]); + (*far_energy) += (uint32_t)(far_spectrum[i]); + *echo_energy_adapt += aecm->channelAdapt16[i] * far_spectrum[i]; + (*echo_energy_stored) += (uint32_t)echo_est[i]; + } +} + +static void StoreAdaptiveChannelC(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est) { + int i; + + // During startup we store the channel every block. + memcpy(aecm->channelStored, aecm->channelAdapt16, sizeof(int16_t) * PART_LEN1); + // Recalculate echo estimate + for (i = 0; i < PART_LEN; i += 4) + { + echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + far_spectrum[i]); + echo_est[i + 1] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 1], + far_spectrum[i + 1]); + echo_est[i + 2] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 2], + far_spectrum[i + 2]); + echo_est[i + 3] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 3], + far_spectrum[i + 3]); + } + echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + far_spectrum[i]); +} + +static void ResetAdaptiveChannelC(AecmCore* aecm) { + int i; + + // The stored channel has a significantly lower MSE than the adaptive one for + // two consecutive calculations. Reset the adaptive channel. + memcpy(aecm->channelAdapt16, aecm->channelStored, + sizeof(int16_t) * PART_LEN1); + // Restore the W32 channel + for (i = 0; i < PART_LEN; i += 4) + { + aecm->channelAdapt32[i] = (int32_t)aecm->channelStored[i] << 16; + aecm->channelAdapt32[i + 1] = (int32_t)aecm->channelStored[i + 1] << 16; + aecm->channelAdapt32[i + 2] = (int32_t)aecm->channelStored[i + 2] << 16; + aecm->channelAdapt32[i + 3] = (int32_t)aecm->channelStored[i + 3] << 16; + } + aecm->channelAdapt32[i] = (int32_t)aecm->channelStored[i] << 16; +} + +// Initialize function pointers for ARM Neon platform. +#if (defined WEBRTC_DETECT_NEON || defined WEBRTC_HAS_NEON) +static void WebRtcAecm_InitNeon(void) +{ + WebRtcAecm_StoreAdaptiveChannel = WebRtcAecm_StoreAdaptiveChannelNeon; + WebRtcAecm_ResetAdaptiveChannel = WebRtcAecm_ResetAdaptiveChannelNeon; + WebRtcAecm_CalcLinearEnergies = WebRtcAecm_CalcLinearEnergiesNeon; +} +#endif + +// Initialize function pointers for MIPS platform. +#if defined(MIPS32_LE) +static void WebRtcAecm_InitMips(void) +{ +#if defined(MIPS_DSP_R1_LE) + WebRtcAecm_StoreAdaptiveChannel = WebRtcAecm_StoreAdaptiveChannel_mips; + WebRtcAecm_ResetAdaptiveChannel = WebRtcAecm_ResetAdaptiveChannel_mips; +#endif + WebRtcAecm_CalcLinearEnergies = WebRtcAecm_CalcLinearEnergies_mips; +} +#endif + +// WebRtcAecm_InitCore(...) +// +// This function initializes the AECM instant created with WebRtcAecm_CreateCore(...) +// Input: +// - aecm : Pointer to the Echo Suppression instance +// - samplingFreq : Sampling Frequency +// +// Output: +// - aecm : Initialized instance +// +// Return value : 0 - Ok +// -1 - Error +// +int WebRtcAecm_InitCore(AecmCore* const aecm, int samplingFreq) { + int i = 0; + int32_t tmp32 = PART_LEN1 * PART_LEN1; + int16_t tmp16 = PART_LEN1; + + if (samplingFreq != 8000 && samplingFreq != 16000) + { + samplingFreq = 8000; + return -1; + } + // sanity check of sampling frequency + aecm->mult = (int16_t)samplingFreq / 8000; + + aecm->farBufWritePos = 0; + aecm->farBufReadPos = 0; + aecm->knownDelay = 0; + aecm->lastKnownDelay = 0; + + WebRtc_InitBuffer(aecm->farFrameBuf); + WebRtc_InitBuffer(aecm->nearNoisyFrameBuf); + WebRtc_InitBuffer(aecm->nearCleanFrameBuf); + WebRtc_InitBuffer(aecm->outFrameBuf); + + memset(aecm->xBuf_buf, 0, sizeof(aecm->xBuf_buf)); + memset(aecm->dBufClean_buf, 0, sizeof(aecm->dBufClean_buf)); + memset(aecm->dBufNoisy_buf, 0, sizeof(aecm->dBufNoisy_buf)); + memset(aecm->outBuf_buf, 0, sizeof(aecm->outBuf_buf)); + + aecm->seed = 666; + aecm->totCount = 0; + + if (WebRtc_InitDelayEstimatorFarend(aecm->delay_estimator_farend) != 0) { + return -1; + } + if (WebRtc_InitDelayEstimator(aecm->delay_estimator) != 0) { + return -1; + } + // Set far end histories to zero + memset(aecm->far_history, 0, sizeof(uint16_t) * PART_LEN1 * MAX_DELAY); + memset(aecm->far_q_domains, 0, sizeof(int) * MAX_DELAY); + aecm->far_history_pos = MAX_DELAY; + + aecm->nlpFlag = 1; + aecm->fixedDelay = -1; + + aecm->dfaCleanQDomain = 0; + aecm->dfaCleanQDomainOld = 0; + aecm->dfaNoisyQDomain = 0; + aecm->dfaNoisyQDomainOld = 0; + + memset(aecm->nearLogEnergy, 0, sizeof(aecm->nearLogEnergy)); + aecm->farLogEnergy = 0; + memset(aecm->echoAdaptLogEnergy, 0, sizeof(aecm->echoAdaptLogEnergy)); + memset(aecm->echoStoredLogEnergy, 0, sizeof(aecm->echoStoredLogEnergy)); + + // Initialize the echo channels with a stored shape. + if (samplingFreq == 8000) + { + WebRtcAecm_InitEchoPathCore(aecm, kChannelStored8kHz); + } + else + { + WebRtcAecm_InitEchoPathCore(aecm, kChannelStored16kHz); + } + + memset(aecm->echoFilt, 0, sizeof(aecm->echoFilt)); + memset(aecm->nearFilt, 0, sizeof(aecm->nearFilt)); + aecm->noiseEstCtr = 0; + + aecm->cngMode = AecmTrue; + + memset(aecm->noiseEstTooLowCtr, 0, sizeof(aecm->noiseEstTooLowCtr)); + memset(aecm->noiseEstTooHighCtr, 0, sizeof(aecm->noiseEstTooHighCtr)); + // Shape the initial noise level to an approximate pink noise. + for (i = 0; i < (PART_LEN1 >> 1) - 1; i++) + { + aecm->noiseEst[i] = (tmp32 << 8); + tmp16--; + tmp32 -= (int32_t)((tmp16 << 1) + 1); + } + for (; i < PART_LEN1; i++) + { + aecm->noiseEst[i] = (tmp32 << 8); + } + + aecm->farEnergyMin = WEBRTC_SPL_WORD16_MAX; + aecm->farEnergyMax = WEBRTC_SPL_WORD16_MIN; + aecm->farEnergyMaxMin = 0; + aecm->farEnergyVAD = FAR_ENERGY_MIN; // This prevents false speech detection at the + // beginning. + aecm->farEnergyMSE = 0; + aecm->currentVADValue = 0; + aecm->vadUpdateCount = 0; + aecm->firstVAD = 1; + + aecm->startupState = 0; + aecm->supGain = SUPGAIN_DEFAULT; + aecm->supGainOld = SUPGAIN_DEFAULT; + + aecm->supGainErrParamA = SUPGAIN_ERROR_PARAM_A; + aecm->supGainErrParamD = SUPGAIN_ERROR_PARAM_D; + aecm->supGainErrParamDiffAB = SUPGAIN_ERROR_PARAM_A - SUPGAIN_ERROR_PARAM_B; + aecm->supGainErrParamDiffBD = SUPGAIN_ERROR_PARAM_B - SUPGAIN_ERROR_PARAM_D; + + // Assert a preprocessor definition at compile-time. It's an assumption + // used in assembly code, so check the assembly files before any change. + COMPILE_ASSERT(PART_LEN % 16 == 0); + + // Initialize function pointers. + WebRtcAecm_CalcLinearEnergies = CalcLinearEnergiesC; + WebRtcAecm_StoreAdaptiveChannel = StoreAdaptiveChannelC; + WebRtcAecm_ResetAdaptiveChannel = ResetAdaptiveChannelC; + +#ifdef WEBRTC_DETECT_NEON + uint64_t features = WebRtc_GetCPUFeaturesARM(); + if ((features & kCPUFeatureNEON) != 0) + { + WebRtcAecm_InitNeon(); + } +#elif defined(WEBRTC_HAS_NEON) + WebRtcAecm_InitNeon(); +#endif + +#if defined(MIPS32_LE) + WebRtcAecm_InitMips(); +#endif + return 0; +} + +// TODO(bjornv): This function is currently not used. Add support for these +// parameters from a higher level +int WebRtcAecm_Control(AecmCore* aecm, int delay, int nlpFlag) { + aecm->nlpFlag = nlpFlag; + aecm->fixedDelay = delay; + + return 0; +} + +void WebRtcAecm_FreeCore(AecmCore* aecm) { + if (aecm == NULL) { + return; + } + + WebRtc_FreeBuffer(aecm->farFrameBuf); + WebRtc_FreeBuffer(aecm->nearNoisyFrameBuf); + WebRtc_FreeBuffer(aecm->nearCleanFrameBuf); + WebRtc_FreeBuffer(aecm->outFrameBuf); + + WebRtc_FreeDelayEstimator(aecm->delay_estimator); + WebRtc_FreeDelayEstimatorFarend(aecm->delay_estimator_farend); + WebRtcSpl_FreeRealFFT(aecm->real_fft); + + free(aecm); +} + +int WebRtcAecm_ProcessFrame(AecmCore* aecm, + const int16_t* farend, + const int16_t* nearendNoisy, + const int16_t* nearendClean, + int16_t* out) { + int16_t outBlock_buf[PART_LEN + 8]; // Align buffer to 8-byte boundary. + int16_t* outBlock = (int16_t*) (((uintptr_t) outBlock_buf + 15) & ~ 15); + + int16_t farFrame[FRAME_LEN]; + const int16_t* out_ptr = NULL; + int size = 0; + + // Buffer the current frame. + // Fetch an older one corresponding to the delay. + WebRtcAecm_BufferFarFrame(aecm, farend, FRAME_LEN); + WebRtcAecm_FetchFarFrame(aecm, farFrame, FRAME_LEN, aecm->knownDelay); + + // Buffer the synchronized far and near frames, + // to pass the smaller blocks individually. + WebRtc_WriteBuffer(aecm->farFrameBuf, farFrame, FRAME_LEN); + WebRtc_WriteBuffer(aecm->nearNoisyFrameBuf, nearendNoisy, FRAME_LEN); + if (nearendClean != NULL) + { + WebRtc_WriteBuffer(aecm->nearCleanFrameBuf, nearendClean, FRAME_LEN); + } + + // Process as many blocks as possible. + while (WebRtc_available_read(aecm->farFrameBuf) >= PART_LEN) + { + int16_t far_block[PART_LEN]; + const int16_t* far_block_ptr = NULL; + int16_t near_noisy_block[PART_LEN]; + const int16_t* near_noisy_block_ptr = NULL; + + WebRtc_ReadBuffer(aecm->farFrameBuf, (void**) &far_block_ptr, far_block, + PART_LEN); + WebRtc_ReadBuffer(aecm->nearNoisyFrameBuf, + (void**) &near_noisy_block_ptr, + near_noisy_block, + PART_LEN); + if (nearendClean != NULL) + { + int16_t near_clean_block[PART_LEN]; + const int16_t* near_clean_block_ptr = NULL; + + WebRtc_ReadBuffer(aecm->nearCleanFrameBuf, + (void**) &near_clean_block_ptr, + near_clean_block, + PART_LEN); + if (WebRtcAecm_ProcessBlock(aecm, + far_block_ptr, + near_noisy_block_ptr, + near_clean_block_ptr, + outBlock) == -1) + { + return -1; + } + } else + { + if (WebRtcAecm_ProcessBlock(aecm, + far_block_ptr, + near_noisy_block_ptr, + NULL, + outBlock) == -1) + { + return -1; + } + } + + WebRtc_WriteBuffer(aecm->outFrameBuf, outBlock, PART_LEN); + } + + // Stuff the out buffer if we have less than a frame to output. + // This should only happen for the first frame. + size = (int) WebRtc_available_read(aecm->outFrameBuf); + if (size < FRAME_LEN) + { + WebRtc_MoveReadPtr(aecm->outFrameBuf, size - FRAME_LEN); + } + + // Obtain an output frame. + WebRtc_ReadBuffer(aecm->outFrameBuf, (void**) &out_ptr, out, FRAME_LEN); + if (out_ptr != out) { + // ReadBuffer() hasn't copied to |out| in this case. + memcpy(out, out_ptr, FRAME_LEN * sizeof(int16_t)); + } + + return 0; +} + +// WebRtcAecm_AsymFilt(...) +// +// Performs asymmetric filtering. +// +// Inputs: +// - filtOld : Previous filtered value. +// - inVal : New input value. +// - stepSizePos : Step size when we have a positive contribution. +// - stepSizeNeg : Step size when we have a negative contribution. +// +// Output: +// +// Return: - Filtered value. +// +int16_t WebRtcAecm_AsymFilt(const int16_t filtOld, const int16_t inVal, + const int16_t stepSizePos, + const int16_t stepSizeNeg) +{ + int16_t retVal; + + if ((filtOld == WEBRTC_SPL_WORD16_MAX) | (filtOld == WEBRTC_SPL_WORD16_MIN)) + { + return inVal; + } + retVal = filtOld; + if (filtOld > inVal) + { + retVal -= (filtOld - inVal) >> stepSizeNeg; + } else + { + retVal += (inVal - filtOld) >> stepSizePos; + } + + return retVal; +} + +// ExtractFractionPart(a, zeros) +// +// returns the fraction part of |a|, with |zeros| number of leading zeros, as an +// int16_t scaled to Q8. There is no sanity check of |a| in the sense that the +// number of zeros match. +static int16_t ExtractFractionPart(uint32_t a, int zeros) { + return (int16_t)(((a << zeros) & 0x7FFFFFFF) >> 23); +} + +// Calculates and returns the log of |energy| in Q8. The input |energy| is +// supposed to be in Q(|q_domain|). +static int16_t LogOfEnergyInQ8(uint32_t energy, int q_domain) { + static const int16_t kLogLowValue = PART_LEN_SHIFT << 7; + int16_t log_energy_q8 = kLogLowValue; + if (energy > 0) { + int zeros = WebRtcSpl_NormU32(energy); + int16_t frac = ExtractFractionPart(energy, zeros); + // log2 of |energy| in Q8. + log_energy_q8 += ((31 - zeros) << 8) + frac - (q_domain << 8); + } + return log_energy_q8; +} + +// WebRtcAecm_CalcEnergies(...) +// +// This function calculates the log of energies for nearend, farend and estimated +// echoes. There is also an update of energy decision levels, i.e. internal VAD. +// +// +// @param aecm [i/o] Handle of the AECM instance. +// @param far_spectrum [in] Pointer to farend spectrum. +// @param far_q [in] Q-domain of farend spectrum. +// @param nearEner [in] Near end energy for current block in +// Q(aecm->dfaQDomain). +// @param echoEst [out] Estimated echo in Q(xfa_q+RESOLUTION_CHANNEL16). +// +void WebRtcAecm_CalcEnergies(AecmCore* aecm, + const uint16_t* far_spectrum, + const int16_t far_q, + const uint32_t nearEner, + int32_t* echoEst) { + // Local variables + uint32_t tmpAdapt = 0; + uint32_t tmpStored = 0; + uint32_t tmpFar = 0; + + int i; + + int16_t tmp16; + int16_t increase_max_shifts = 4; + int16_t decrease_max_shifts = 11; + int16_t increase_min_shifts = 11; + int16_t decrease_min_shifts = 3; + + // Get log of near end energy and store in buffer + + // Shift buffer + memmove(aecm->nearLogEnergy + 1, aecm->nearLogEnergy, + sizeof(int16_t) * (MAX_BUF_LEN - 1)); + + // Logarithm of integrated magnitude spectrum (nearEner) + aecm->nearLogEnergy[0] = LogOfEnergyInQ8(nearEner, aecm->dfaNoisyQDomain); + + WebRtcAecm_CalcLinearEnergies(aecm, far_spectrum, echoEst, &tmpFar, &tmpAdapt, &tmpStored); + + // Shift buffers + memmove(aecm->echoAdaptLogEnergy + 1, aecm->echoAdaptLogEnergy, + sizeof(int16_t) * (MAX_BUF_LEN - 1)); + memmove(aecm->echoStoredLogEnergy + 1, aecm->echoStoredLogEnergy, + sizeof(int16_t) * (MAX_BUF_LEN - 1)); + + // Logarithm of delayed far end energy + aecm->farLogEnergy = LogOfEnergyInQ8(tmpFar, far_q); + + // Logarithm of estimated echo energy through adapted channel + aecm->echoAdaptLogEnergy[0] = LogOfEnergyInQ8(tmpAdapt, + RESOLUTION_CHANNEL16 + far_q); + + // Logarithm of estimated echo energy through stored channel + aecm->echoStoredLogEnergy[0] = + LogOfEnergyInQ8(tmpStored, RESOLUTION_CHANNEL16 + far_q); + + // Update farend energy levels (min, max, vad, mse) + if (aecm->farLogEnergy > FAR_ENERGY_MIN) + { + if (aecm->startupState == 0) + { + increase_max_shifts = 2; + decrease_min_shifts = 2; + increase_min_shifts = 8; + } + + aecm->farEnergyMin = WebRtcAecm_AsymFilt(aecm->farEnergyMin, aecm->farLogEnergy, + increase_min_shifts, decrease_min_shifts); + aecm->farEnergyMax = WebRtcAecm_AsymFilt(aecm->farEnergyMax, aecm->farLogEnergy, + increase_max_shifts, decrease_max_shifts); + aecm->farEnergyMaxMin = (aecm->farEnergyMax - aecm->farEnergyMin); + + // Dynamic VAD region size + tmp16 = 2560 - aecm->farEnergyMin; + if (tmp16 > 0) + { + tmp16 = (int16_t)((tmp16 * FAR_ENERGY_VAD_REGION) >> 9); + } else + { + tmp16 = 0; + } + tmp16 += FAR_ENERGY_VAD_REGION; + + if ((aecm->startupState == 0) | (aecm->vadUpdateCount > 1024)) + { + // In startup phase or VAD update halted + aecm->farEnergyVAD = aecm->farEnergyMin + tmp16; + } else + { + if (aecm->farEnergyVAD > aecm->farLogEnergy) + { + aecm->farEnergyVAD += + (aecm->farLogEnergy + tmp16 - aecm->farEnergyVAD) >> 6; + aecm->vadUpdateCount = 0; + } else + { + aecm->vadUpdateCount++; + } + } + // Put MSE threshold higher than VAD + aecm->farEnergyMSE = aecm->farEnergyVAD + (1 << 8); + } + + // Update VAD variables + if (aecm->farLogEnergy > aecm->farEnergyVAD) + { + if ((aecm->startupState == 0) | (aecm->farEnergyMaxMin > FAR_ENERGY_DIFF)) + { + // We are in startup or have significant dynamics in input speech level + aecm->currentVADValue = 1; + } + } else + { + aecm->currentVADValue = 0; + } + if ((aecm->currentVADValue) && (aecm->firstVAD)) + { + aecm->firstVAD = 0; + if (aecm->echoAdaptLogEnergy[0] > aecm->nearLogEnergy[0]) + { + // The estimated echo has higher energy than the near end signal. + // This means that the initialization was too aggressive. Scale + // down by a factor 8 + for (i = 0; i < PART_LEN1; i++) + { + aecm->channelAdapt16[i] >>= 3; + } + // Compensate the adapted echo energy level accordingly. + aecm->echoAdaptLogEnergy[0] -= (3 << 8); + aecm->firstVAD = 1; + } + } +} + +// WebRtcAecm_CalcStepSize(...) +// +// This function calculates the step size used in channel estimation +// +// +// @param aecm [in] Handle of the AECM instance. +// @param mu [out] (Return value) Stepsize in log2(), i.e. number of shifts. +// +// +int16_t WebRtcAecm_CalcStepSize(AecmCore* const aecm) { + int32_t tmp32; + int16_t tmp16; + int16_t mu = MU_MAX; + + // Here we calculate the step size mu used in the + // following NLMS based Channel estimation algorithm + if (!aecm->currentVADValue) + { + // Far end energy level too low, no channel update + mu = 0; + } else if (aecm->startupState > 0) + { + if (aecm->farEnergyMin >= aecm->farEnergyMax) + { + mu = MU_MIN; + } else + { + tmp16 = (aecm->farLogEnergy - aecm->farEnergyMin); + tmp32 = tmp16 * MU_DIFF; + tmp32 = WebRtcSpl_DivW32W16(tmp32, aecm->farEnergyMaxMin); + mu = MU_MIN - 1 - (int16_t)(tmp32); + // The -1 is an alternative to rounding. This way we get a larger + // stepsize, so we in some sense compensate for truncation in NLMS + } + if (mu < MU_MAX) + { + mu = MU_MAX; // Equivalent with maximum step size of 2^-MU_MAX + } + } + + return mu; +} + +// WebRtcAecm_UpdateChannel(...) +// +// This function performs channel estimation. NLMS and decision on channel storage. +// +// +// @param aecm [i/o] Handle of the AECM instance. +// @param far_spectrum [in] Absolute value of the farend signal in Q(far_q) +// @param far_q [in] Q-domain of the farend signal +// @param dfa [in] Absolute value of the nearend signal (Q[aecm->dfaQDomain]) +// @param mu [in] NLMS step size. +// @param echoEst [i/o] Estimated echo in Q(far_q+RESOLUTION_CHANNEL16). +// +void WebRtcAecm_UpdateChannel(AecmCore* aecm, + const uint16_t* far_spectrum, + const int16_t far_q, + const uint16_t* const dfa, + const int16_t mu, + int32_t* echoEst) { + uint32_t tmpU32no1, tmpU32no2; + int32_t tmp32no1, tmp32no2; + int32_t mseStored; + int32_t mseAdapt; + + int i; + + int16_t zerosFar, zerosNum, zerosCh, zerosDfa; + int16_t shiftChFar, shiftNum, shift2ResChan; + int16_t tmp16no1; + int16_t xfaQ, dfaQ; + + // This is the channel estimation algorithm. It is base on NLMS but has a variable step + // length, which was calculated above. + if (mu) + { + for (i = 0; i < PART_LEN1; i++) + { + // Determine norm of channel and farend to make sure we don't get overflow in + // multiplication + zerosCh = WebRtcSpl_NormU32(aecm->channelAdapt32[i]); + zerosFar = WebRtcSpl_NormU32((uint32_t)far_spectrum[i]); + if (zerosCh + zerosFar > 31) + { + // Multiplication is safe + tmpU32no1 = WEBRTC_SPL_UMUL_32_16(aecm->channelAdapt32[i], + far_spectrum[i]); + shiftChFar = 0; + } else + { + // We need to shift down before multiplication + shiftChFar = 32 - zerosCh - zerosFar; + tmpU32no1 = (aecm->channelAdapt32[i] >> shiftChFar) * + far_spectrum[i]; + } + // Determine Q-domain of numerator + zerosNum = WebRtcSpl_NormU32(tmpU32no1); + if (dfa[i]) + { + zerosDfa = WebRtcSpl_NormU32((uint32_t)dfa[i]); + } else + { + zerosDfa = 32; + } + tmp16no1 = zerosDfa - 2 + aecm->dfaNoisyQDomain - + RESOLUTION_CHANNEL32 - far_q + shiftChFar; + if (zerosNum > tmp16no1 + 1) + { + xfaQ = tmp16no1; + dfaQ = zerosDfa - 2; + } else + { + xfaQ = zerosNum - 2; + dfaQ = RESOLUTION_CHANNEL32 + far_q - aecm->dfaNoisyQDomain - + shiftChFar + xfaQ; + } + // Add in the same Q-domain + tmpU32no1 = WEBRTC_SPL_SHIFT_W32(tmpU32no1, xfaQ); + tmpU32no2 = WEBRTC_SPL_SHIFT_W32((uint32_t)dfa[i], dfaQ); + tmp32no1 = (int32_t)tmpU32no2 - (int32_t)tmpU32no1; + zerosNum = WebRtcSpl_NormW32(tmp32no1); + if ((tmp32no1) && (far_spectrum[i] > (CHANNEL_VAD << far_q))) + { + // + // Update is needed + // + // This is what we would like to compute + // + // tmp32no1 = dfa[i] - (aecm->channelAdapt[i] * far_spectrum[i]) + // tmp32norm = (i + 1) + // aecm->channelAdapt[i] += (2^mu) * tmp32no1 + // / (tmp32norm * far_spectrum[i]) + // + + // Make sure we don't get overflow in multiplication. + if (zerosNum + zerosFar > 31) + { + if (tmp32no1 > 0) + { + tmp32no2 = (int32_t)WEBRTC_SPL_UMUL_32_16(tmp32no1, + far_spectrum[i]); + } else + { + tmp32no2 = -(int32_t)WEBRTC_SPL_UMUL_32_16(-tmp32no1, + far_spectrum[i]); + } + shiftNum = 0; + } else + { + shiftNum = 32 - (zerosNum + zerosFar); + if (tmp32no1 > 0) + { + tmp32no2 = (tmp32no1 >> shiftNum) * far_spectrum[i]; + } else + { + tmp32no2 = -((-tmp32no1 >> shiftNum) * far_spectrum[i]); + } + } + // Normalize with respect to frequency bin + tmp32no2 = WebRtcSpl_DivW32W16(tmp32no2, i + 1); + // Make sure we are in the right Q-domain + shift2ResChan = shiftNum + shiftChFar - xfaQ - mu - ((30 - zerosFar) << 1); + if (WebRtcSpl_NormW32(tmp32no2) < shift2ResChan) + { + tmp32no2 = WEBRTC_SPL_WORD32_MAX; + } else + { + tmp32no2 = WEBRTC_SPL_SHIFT_W32(tmp32no2, shift2ResChan); + } + aecm->channelAdapt32[i] = + WebRtcSpl_AddSatW32(aecm->channelAdapt32[i], tmp32no2); + if (aecm->channelAdapt32[i] < 0) + { + // We can never have negative channel gain + aecm->channelAdapt32[i] = 0; + } + aecm->channelAdapt16[i] = + (int16_t)(aecm->channelAdapt32[i] >> 16); + } + } + } + // END: Adaptive channel update + + // Determine if we should store or restore the channel + if ((aecm->startupState == 0) & (aecm->currentVADValue)) + { + // During startup we store the channel every block, + // and we recalculate echo estimate + WebRtcAecm_StoreAdaptiveChannel(aecm, far_spectrum, echoEst); + } else + { + if (aecm->farLogEnergy < aecm->farEnergyMSE) + { + aecm->mseChannelCount = 0; + } else + { + aecm->mseChannelCount++; + } + // Enough data for validation. Store channel if we can. + if (aecm->mseChannelCount >= (MIN_MSE_COUNT + 10)) + { + // We have enough data. + // Calculate MSE of "Adapt" and "Stored" versions. + // It is actually not MSE, but average absolute error. + mseStored = 0; + mseAdapt = 0; + for (i = 0; i < MIN_MSE_COUNT; i++) + { + tmp32no1 = ((int32_t)aecm->echoStoredLogEnergy[i] + - (int32_t)aecm->nearLogEnergy[i]); + tmp32no2 = WEBRTC_SPL_ABS_W32(tmp32no1); + mseStored += tmp32no2; + + tmp32no1 = ((int32_t)aecm->echoAdaptLogEnergy[i] + - (int32_t)aecm->nearLogEnergy[i]); + tmp32no2 = WEBRTC_SPL_ABS_W32(tmp32no1); + mseAdapt += tmp32no2; + } + if (((mseStored << MSE_RESOLUTION) < (MIN_MSE_DIFF * mseAdapt)) + & ((aecm->mseStoredOld << MSE_RESOLUTION) < (MIN_MSE_DIFF + * aecm->mseAdaptOld))) + { + // The stored channel has a significantly lower MSE than the adaptive one for + // two consecutive calculations. Reset the adaptive channel. + WebRtcAecm_ResetAdaptiveChannel(aecm); + } else if (((MIN_MSE_DIFF * mseStored) > (mseAdapt << MSE_RESOLUTION)) & (mseAdapt + < aecm->mseThreshold) & (aecm->mseAdaptOld < aecm->mseThreshold)) + { + // The adaptive channel has a significantly lower MSE than the stored one. + // The MSE for the adaptive channel has also been low for two consecutive + // calculations. Store the adaptive channel. + WebRtcAecm_StoreAdaptiveChannel(aecm, far_spectrum, echoEst); + + // Update threshold + if (aecm->mseThreshold == WEBRTC_SPL_WORD32_MAX) + { + aecm->mseThreshold = (mseAdapt + aecm->mseAdaptOld); + } else + { + int scaled_threshold = aecm->mseThreshold * 5 / 8; + aecm->mseThreshold += + ((mseAdapt - scaled_threshold) * 205) >> 8; + } + + } + + // Reset counter + aecm->mseChannelCount = 0; + + // Store the MSE values. + aecm->mseStoredOld = mseStored; + aecm->mseAdaptOld = mseAdapt; + } + } + // END: Determine if we should store or reset channel estimate. +} + +// CalcSuppressionGain(...) +// +// This function calculates the suppression gain that is used in the Wiener filter. +// +// +// @param aecm [i/n] Handle of the AECM instance. +// @param supGain [out] (Return value) Suppression gain with which to scale the noise +// level (Q14). +// +// +int16_t WebRtcAecm_CalcSuppressionGain(AecmCore* const aecm) { + int32_t tmp32no1; + + int16_t supGain = SUPGAIN_DEFAULT; + int16_t tmp16no1; + int16_t dE = 0; + + // Determine suppression gain used in the Wiener filter. The gain is based on a mix of far + // end energy and echo estimation error. + // Adjust for the far end signal level. A low signal level indicates no far end signal, + // hence we set the suppression gain to 0 + if (!aecm->currentVADValue) + { + supGain = 0; + } else + { + // Adjust for possible double talk. If we have large variations in estimation error we + // likely have double talk (or poor channel). + tmp16no1 = (aecm->nearLogEnergy[0] - aecm->echoStoredLogEnergy[0] - ENERGY_DEV_OFFSET); + dE = WEBRTC_SPL_ABS_W16(tmp16no1); + + if (dE < ENERGY_DEV_TOL) + { + // Likely no double talk. The better estimation, the more we can suppress signal. + // Update counters + if (dE < SUPGAIN_EPC_DT) + { + tmp32no1 = aecm->supGainErrParamDiffAB * dE; + tmp32no1 += (SUPGAIN_EPC_DT >> 1); + tmp16no1 = (int16_t)WebRtcSpl_DivW32W16(tmp32no1, SUPGAIN_EPC_DT); + supGain = aecm->supGainErrParamA - tmp16no1; + } else + { + tmp32no1 = aecm->supGainErrParamDiffBD * (ENERGY_DEV_TOL - dE); + tmp32no1 += ((ENERGY_DEV_TOL - SUPGAIN_EPC_DT) >> 1); + tmp16no1 = (int16_t)WebRtcSpl_DivW32W16(tmp32no1, (ENERGY_DEV_TOL + - SUPGAIN_EPC_DT)); + supGain = aecm->supGainErrParamD + tmp16no1; + } + } else + { + // Likely in double talk. Use default value + supGain = aecm->supGainErrParamD; + } + } + + if (supGain > aecm->supGainOld) + { + tmp16no1 = supGain; + } else + { + tmp16no1 = aecm->supGainOld; + } + aecm->supGainOld = supGain; + if (tmp16no1 < aecm->supGain) + { + aecm->supGain += (int16_t)((tmp16no1 - aecm->supGain) >> 4); + } else + { + aecm->supGain += (int16_t)((tmp16no1 - aecm->supGain) >> 4); + } + + // END: Update suppression gain + + return aecm->supGain; +} + +void WebRtcAecm_BufferFarFrame(AecmCore* const aecm, + const int16_t* const farend, + const int farLen) { + int writeLen = farLen, writePos = 0; + + // Check if the write position must be wrapped + while (aecm->farBufWritePos + writeLen > FAR_BUF_LEN) + { + // Write to remaining buffer space before wrapping + writeLen = FAR_BUF_LEN - aecm->farBufWritePos; + memcpy(aecm->farBuf + aecm->farBufWritePos, farend + writePos, + sizeof(int16_t) * writeLen); + aecm->farBufWritePos = 0; + writePos = writeLen; + writeLen = farLen - writeLen; + } + + memcpy(aecm->farBuf + aecm->farBufWritePos, farend + writePos, + sizeof(int16_t) * writeLen); + aecm->farBufWritePos += writeLen; +} + +void WebRtcAecm_FetchFarFrame(AecmCore* const aecm, + int16_t* const farend, + const int farLen, + const int knownDelay) { + int readLen = farLen; + int readPos = 0; + int delayChange = knownDelay - aecm->lastKnownDelay; + + aecm->farBufReadPos -= delayChange; + + // Check if delay forces a read position wrap + while (aecm->farBufReadPos < 0) + { + aecm->farBufReadPos += FAR_BUF_LEN; + } + while (aecm->farBufReadPos > FAR_BUF_LEN - 1) + { + aecm->farBufReadPos -= FAR_BUF_LEN; + } + + aecm->lastKnownDelay = knownDelay; + + // Check if read position must be wrapped + while (aecm->farBufReadPos + readLen > FAR_BUF_LEN) + { + + // Read from remaining buffer space before wrapping + readLen = FAR_BUF_LEN - aecm->farBufReadPos; + memcpy(farend + readPos, aecm->farBuf + aecm->farBufReadPos, + sizeof(int16_t) * readLen); + aecm->farBufReadPos = 0; + readPos = readLen; + readLen = farLen - readLen; + } + memcpy(farend + readPos, aecm->farBuf + aecm->farBufReadPos, + sizeof(int16_t) * readLen); + aecm->farBufReadPos += readLen; +} diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core.h b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core.h new file mode 100644 index 00000000..b52bb62d --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core.h @@ -0,0 +1,434 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Performs echo control (suppression) with fft routines in fixed-point. + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_AECM_CORE_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_AECM_CORE_H_ + +#include "webrtc/common_audio/ring_buffer.h" +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/modules/audio_processing/aecm/aecm_defines.h" +#include "webrtc/typedefs.h" + +#ifdef _MSC_VER // visual c++ +#define ALIGN8_BEG __declspec(align(8)) +#define ALIGN8_END +#else // gcc or icc +#define ALIGN8_BEG +#define ALIGN8_END __attribute__((aligned(8))) +#endif + +typedef struct { + int16_t real; + int16_t imag; +} ComplexInt16; + +typedef struct { + int farBufWritePos; + int farBufReadPos; + int knownDelay; + int lastKnownDelay; + int firstVAD; // Parameter to control poorly initialized channels + + RingBuffer* farFrameBuf; + RingBuffer* nearNoisyFrameBuf; + RingBuffer* nearCleanFrameBuf; + RingBuffer* outFrameBuf; + + int16_t farBuf[FAR_BUF_LEN]; + + int16_t mult; + uint32_t seed; + + // Delay estimation variables + void* delay_estimator_farend; + void* delay_estimator; + uint16_t currentDelay; + // Far end history variables + // TODO(bjornv): Replace |far_history| with ring_buffer. + uint16_t far_history[PART_LEN1 * MAX_DELAY]; + int far_history_pos; + int far_q_domains[MAX_DELAY]; + + int16_t nlpFlag; + int16_t fixedDelay; + + uint32_t totCount; + + int16_t dfaCleanQDomain; + int16_t dfaCleanQDomainOld; + int16_t dfaNoisyQDomain; + int16_t dfaNoisyQDomainOld; + + int16_t nearLogEnergy[MAX_BUF_LEN]; + int16_t farLogEnergy; + int16_t echoAdaptLogEnergy[MAX_BUF_LEN]; + int16_t echoStoredLogEnergy[MAX_BUF_LEN]; + + // The extra 16 or 32 bytes in the following buffers are for alignment based + // Neon code. + // It's designed this way since the current GCC compiler can't align a + // buffer in 16 or 32 byte boundaries properly. + int16_t channelStored_buf[PART_LEN1 + 8]; + int16_t channelAdapt16_buf[PART_LEN1 + 8]; + int32_t channelAdapt32_buf[PART_LEN1 + 8]; + int16_t xBuf_buf[PART_LEN2 + 16]; // farend + int16_t dBufClean_buf[PART_LEN2 + 16]; // nearend + int16_t dBufNoisy_buf[PART_LEN2 + 16]; // nearend + int16_t outBuf_buf[PART_LEN + 8]; + + // Pointers to the above buffers + int16_t *channelStored; + int16_t *channelAdapt16; + int32_t *channelAdapt32; + int16_t *xBuf; + int16_t *dBufClean; + int16_t *dBufNoisy; + int16_t *outBuf; + + int32_t echoFilt[PART_LEN1]; + int16_t nearFilt[PART_LEN1]; + int32_t noiseEst[PART_LEN1]; + int noiseEstTooLowCtr[PART_LEN1]; + int noiseEstTooHighCtr[PART_LEN1]; + int16_t noiseEstCtr; + int16_t cngMode; + + int32_t mseAdaptOld; + int32_t mseStoredOld; + int32_t mseThreshold; + + int16_t farEnergyMin; + int16_t farEnergyMax; + int16_t farEnergyMaxMin; + int16_t farEnergyVAD; + int16_t farEnergyMSE; + int currentVADValue; + int16_t vadUpdateCount; + + int16_t startupState; + int16_t mseChannelCount; + int16_t supGain; + int16_t supGainOld; + + int16_t supGainErrParamA; + int16_t supGainErrParamD; + int16_t supGainErrParamDiffAB; + int16_t supGainErrParamDiffBD; + + struct RealFFT* real_fft; + +#ifdef AEC_DEBUG + FILE *farFile; + FILE *nearFile; + FILE *outFile; +#endif +} AecmCore; + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_CreateCore() +// +// Allocates the memory needed by the AECM. The memory needs to be +// initialized separately using the WebRtcAecm_InitCore() function. +// Returns a pointer to the instance and a nullptr at failure. +AecmCore* WebRtcAecm_CreateCore(); + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_InitCore(...) +// +// This function initializes the AECM instant created with +// WebRtcAecm_CreateCore() +// Input: +// - aecm : Pointer to the AECM instance +// - samplingFreq : Sampling Frequency +// +// Output: +// - aecm : Initialized instance +// +// Return value : 0 - Ok +// -1 - Error +// +int WebRtcAecm_InitCore(AecmCore* const aecm, int samplingFreq); + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_FreeCore(...) +// +// This function releases the memory allocated by WebRtcAecm_CreateCore() +// Input: +// - aecm : Pointer to the AECM instance +// +void WebRtcAecm_FreeCore(AecmCore* aecm); + +int WebRtcAecm_Control(AecmCore* aecm, int delay, int nlpFlag); + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_InitEchoPathCore(...) +// +// This function resets the echo channel adaptation with the specified channel. +// Input: +// - aecm : Pointer to the AECM instance +// - echo_path : Pointer to the data that should initialize the echo +// path +// +// Output: +// - aecm : Initialized instance +// +void WebRtcAecm_InitEchoPathCore(AecmCore* aecm, const int16_t* echo_path); + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_ProcessFrame(...) +// +// This function processes frames and sends blocks to +// WebRtcAecm_ProcessBlock(...) +// +// Inputs: +// - aecm : Pointer to the AECM instance +// - farend : In buffer containing one frame of echo signal +// - nearendNoisy : In buffer containing one frame of nearend+echo signal +// without NS +// - nearendClean : In buffer containing one frame of nearend+echo signal +// with NS +// +// Output: +// - out : Out buffer, one frame of nearend signal : +// +// +int WebRtcAecm_ProcessFrame(AecmCore* aecm, + const int16_t* farend, + const int16_t* nearendNoisy, + const int16_t* nearendClean, + int16_t* out); + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_ProcessBlock(...) +// +// This function is called for every block within one frame +// This function is called by WebRtcAecm_ProcessFrame(...) +// +// Inputs: +// - aecm : Pointer to the AECM instance +// - farend : In buffer containing one block of echo signal +// - nearendNoisy : In buffer containing one frame of nearend+echo signal +// without NS +// - nearendClean : In buffer containing one frame of nearend+echo signal +// with NS +// +// Output: +// - out : Out buffer, one block of nearend signal : +// +// +int WebRtcAecm_ProcessBlock(AecmCore* aecm, + const int16_t* farend, + const int16_t* nearendNoisy, + const int16_t* noisyClean, + int16_t* out); + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_BufferFarFrame() +// +// Inserts a frame of data into farend buffer. +// +// Inputs: +// - aecm : Pointer to the AECM instance +// - farend : In buffer containing one frame of farend signal +// - farLen : Length of frame +// +void WebRtcAecm_BufferFarFrame(AecmCore* const aecm, + const int16_t* const farend, + const int farLen); + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_FetchFarFrame() +// +// Read the farend buffer to account for known delay +// +// Inputs: +// - aecm : Pointer to the AECM instance +// - farend : In buffer containing one frame of farend signal +// - farLen : Length of frame +// - knownDelay : known delay +// +void WebRtcAecm_FetchFarFrame(AecmCore* const aecm, + int16_t* const farend, + const int farLen, + const int knownDelay); + +// All the functions below are intended to be private + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_UpdateFarHistory() +// +// Moves the pointer to the next entry and inserts |far_spectrum| and +// corresponding Q-domain in its buffer. +// +// Inputs: +// - self : Pointer to the delay estimation instance +// - far_spectrum : Pointer to the far end spectrum +// - far_q : Q-domain of far end spectrum +// +void WebRtcAecm_UpdateFarHistory(AecmCore* self, + uint16_t* far_spectrum, + int far_q); + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_AlignedFarend() +// +// Returns a pointer to the far end spectrum aligned to current near end +// spectrum. The function WebRtc_DelayEstimatorProcessFix(...) should have been +// called before AlignedFarend(...). Otherwise, you get the pointer to the +// previous frame. The memory is only valid until the next call of +// WebRtc_DelayEstimatorProcessFix(...). +// +// Inputs: +// - self : Pointer to the AECM instance. +// - delay : Current delay estimate. +// +// Output: +// - far_q : The Q-domain of the aligned far end spectrum +// +// Return value: +// - far_spectrum : Pointer to the aligned far end spectrum +// NULL - Error +// +const uint16_t* WebRtcAecm_AlignedFarend(AecmCore* self, int* far_q, int delay); + +/////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_CalcSuppressionGain() +// +// This function calculates the suppression gain that is used in the +// Wiener filter. +// +// Inputs: +// - aecm : Pointer to the AECM instance. +// +// Return value: +// - supGain : Suppression gain with which to scale the noise +// level (Q14). +// +int16_t WebRtcAecm_CalcSuppressionGain(AecmCore* const aecm); + +/////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_CalcEnergies() +// +// This function calculates the log of energies for nearend, farend and +// estimated echoes. There is also an update of energy decision levels, +// i.e. internal VAD. +// +// Inputs: +// - aecm : Pointer to the AECM instance. +// - far_spectrum : Pointer to farend spectrum. +// - far_q : Q-domain of farend spectrum. +// - nearEner : Near end energy for current block in +// Q(aecm->dfaQDomain). +// +// Output: +// - echoEst : Estimated echo in Q(xfa_q+RESOLUTION_CHANNEL16). +// +void WebRtcAecm_CalcEnergies(AecmCore* aecm, + const uint16_t* far_spectrum, + const int16_t far_q, + const uint32_t nearEner, + int32_t* echoEst); + +/////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_CalcStepSize() +// +// This function calculates the step size used in channel estimation +// +// Inputs: +// - aecm : Pointer to the AECM instance. +// +// Return value: +// - mu : Stepsize in log2(), i.e. number of shifts. +// +int16_t WebRtcAecm_CalcStepSize(AecmCore* const aecm); + +/////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_UpdateChannel(...) +// +// This function performs channel estimation. +// NLMS and decision on channel storage. +// +// Inputs: +// - aecm : Pointer to the AECM instance. +// - far_spectrum : Absolute value of the farend signal in Q(far_q) +// - far_q : Q-domain of the farend signal +// - dfa : Absolute value of the nearend signal +// (Q[aecm->dfaQDomain]) +// - mu : NLMS step size. +// Input/Output: +// - echoEst : Estimated echo in Q(far_q+RESOLUTION_CHANNEL16). +// +void WebRtcAecm_UpdateChannel(AecmCore* aecm, + const uint16_t* far_spectrum, + const int16_t far_q, + const uint16_t* const dfa, + const int16_t mu, + int32_t* echoEst); + +extern const int16_t WebRtcAecm_kCosTable[]; +extern const int16_t WebRtcAecm_kSinTable[]; + +/////////////////////////////////////////////////////////////////////////////// +// Some function pointers, for internal functions shared by ARM NEON and +// generic C code. +// +typedef void (*CalcLinearEnergies)(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echoEst, + uint32_t* far_energy, + uint32_t* echo_energy_adapt, + uint32_t* echo_energy_stored); +extern CalcLinearEnergies WebRtcAecm_CalcLinearEnergies; + +typedef void (*StoreAdaptiveChannel)(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est); +extern StoreAdaptiveChannel WebRtcAecm_StoreAdaptiveChannel; + +typedef void (*ResetAdaptiveChannel)(AecmCore* aecm); +extern ResetAdaptiveChannel WebRtcAecm_ResetAdaptiveChannel; + +// For the above function pointers, functions for generic platforms are declared +// and defined as static in file aecm_core.c, while those for ARM Neon platforms +// are declared below and defined in file aecm_core_neon.c. +#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON) +void WebRtcAecm_CalcLinearEnergiesNeon(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est, + uint32_t* far_energy, + uint32_t* echo_energy_adapt, + uint32_t* echo_energy_stored); + +void WebRtcAecm_StoreAdaptiveChannelNeon(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est); + +void WebRtcAecm_ResetAdaptiveChannelNeon(AecmCore* aecm); +#endif + +#if defined(MIPS32_LE) +void WebRtcAecm_CalcLinearEnergies_mips(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est, + uint32_t* far_energy, + uint32_t* echo_energy_adapt, + uint32_t* echo_energy_stored); +#if defined(MIPS_DSP_R1_LE) +void WebRtcAecm_StoreAdaptiveChannel_mips(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est); + +void WebRtcAecm_ResetAdaptiveChannel_mips(AecmCore* aecm); +#endif +#endif + +#endif diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_c.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_c.c new file mode 100644 index 00000000..eb2bd918 --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_c.c @@ -0,0 +1,771 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/aecm/aecm_core.h" + +#include <assert.h> +#include <stddef.h> +#include <stdlib.h> + +#include "webrtc/common_audio/ring_buffer.h" +#include "webrtc/common_audio/signal_processing/include/real_fft.h" +#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h" +#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h" +#include "webrtc/system_wrappers/interface/compile_assert_c.h" +#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h" +#include "webrtc/typedefs.h" + +// Square root of Hanning window in Q14. +#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON) +// Table is defined in an ARM assembly file. +extern const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END; +#else +static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = { + 0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172, + 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224, + 6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040, + 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514, + 11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553, + 13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079, + 15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034, + 16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384 +}; +#endif + +#ifdef AECM_WITH_ABS_APPROX +//Q15 alpha = 0.99439986968132 const Factor for magnitude approximation +static const uint16_t kAlpha1 = 32584; +//Q15 beta = 0.12967166976970 const Factor for magnitude approximation +static const uint16_t kBeta1 = 4249; +//Q15 alpha = 0.94234827210087 const Factor for magnitude approximation +static const uint16_t kAlpha2 = 30879; +//Q15 beta = 0.33787806009150 const Factor for magnitude approximation +static const uint16_t kBeta2 = 11072; +//Q15 alpha = 0.82247698684306 const Factor for magnitude approximation +static const uint16_t kAlpha3 = 26951; +//Q15 beta = 0.57762063060713 const Factor for magnitude approximation +static const uint16_t kBeta3 = 18927; +#endif + +static const int16_t kNoiseEstQDomain = 15; +static const int16_t kNoiseEstIncCount = 5; + +static void ComfortNoise(AecmCore* aecm, + const uint16_t* dfa, + ComplexInt16* out, + const int16_t* lambda); + +static void WindowAndFFT(AecmCore* aecm, + int16_t* fft, + const int16_t* time_signal, + ComplexInt16* freq_signal, + int time_signal_scaling) { + int i = 0; + + // FFT of signal + for (i = 0; i < PART_LEN; i++) { + // Window time domain signal and insert into real part of + // transformation array |fft| + int16_t scaled_time_signal = time_signal[i] << time_signal_scaling; + fft[i] = (int16_t)((scaled_time_signal * WebRtcAecm_kSqrtHanning[i]) >> 14); + scaled_time_signal = time_signal[i + PART_LEN] << time_signal_scaling; + fft[PART_LEN + i] = (int16_t)(( + scaled_time_signal * WebRtcAecm_kSqrtHanning[PART_LEN - i]) >> 14); + } + + // Do forward FFT, then take only the first PART_LEN complex samples, + // and change signs of the imaginary parts. + WebRtcSpl_RealForwardFFT(aecm->real_fft, fft, (int16_t*)freq_signal); + for (i = 0; i < PART_LEN; i++) { + freq_signal[i].imag = -freq_signal[i].imag; + } +} + +static void InverseFFTAndWindow(AecmCore* aecm, + int16_t* fft, + ComplexInt16* efw, + int16_t* output, + const int16_t* nearendClean) { + int i, j, outCFFT; + int32_t tmp32no1; + // Reuse |efw| for the inverse FFT output after transferring + // the contents to |fft|. + int16_t* ifft_out = (int16_t*)efw; + + // Synthesis + for (i = 1, j = 2; i < PART_LEN; i += 1, j += 2) { + fft[j] = efw[i].real; + fft[j + 1] = -efw[i].imag; + } + fft[0] = efw[0].real; + fft[1] = -efw[0].imag; + + fft[PART_LEN2] = efw[PART_LEN].real; + fft[PART_LEN2 + 1] = -efw[PART_LEN].imag; + + // Inverse FFT. Keep outCFFT to scale the samples in the next block. + outCFFT = WebRtcSpl_RealInverseFFT(aecm->real_fft, fft, ifft_out); + for (i = 0; i < PART_LEN; i++) { + ifft_out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + ifft_out[i], WebRtcAecm_kSqrtHanning[i], 14); + tmp32no1 = WEBRTC_SPL_SHIFT_W32((int32_t)ifft_out[i], + outCFFT - aecm->dfaCleanQDomain); + output[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, + tmp32no1 + aecm->outBuf[i], + WEBRTC_SPL_WORD16_MIN); + + tmp32no1 = (ifft_out[PART_LEN + i] * + WebRtcAecm_kSqrtHanning[PART_LEN - i]) >> 14; + tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, + outCFFT - aecm->dfaCleanQDomain); + aecm->outBuf[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, + tmp32no1, + WEBRTC_SPL_WORD16_MIN); + } + + // Copy the current block to the old position + // (aecm->outBuf is shifted elsewhere) + memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(int16_t) * PART_LEN); + memcpy(aecm->dBufNoisy, + aecm->dBufNoisy + PART_LEN, + sizeof(int16_t) * PART_LEN); + if (nearendClean != NULL) + { + memcpy(aecm->dBufClean, + aecm->dBufClean + PART_LEN, + sizeof(int16_t) * PART_LEN); + } +} + +// Transforms a time domain signal into the frequency domain, outputting the +// complex valued signal, absolute value and sum of absolute values. +// +// time_signal [in] Pointer to time domain signal +// freq_signal_real [out] Pointer to real part of frequency domain array +// freq_signal_imag [out] Pointer to imaginary part of frequency domain +// array +// freq_signal_abs [out] Pointer to absolute value of frequency domain +// array +// freq_signal_sum_abs [out] Pointer to the sum of all absolute values in +// the frequency domain array +// return value The Q-domain of current frequency values +// +static int TimeToFrequencyDomain(AecmCore* aecm, + const int16_t* time_signal, + ComplexInt16* freq_signal, + uint16_t* freq_signal_abs, + uint32_t* freq_signal_sum_abs) { + int i = 0; + int time_signal_scaling = 0; + + int32_t tmp32no1 = 0; + int32_t tmp32no2 = 0; + + // In fft_buf, +16 for 32-byte alignment. + int16_t fft_buf[PART_LEN4 + 16]; + int16_t *fft = (int16_t *) (((uintptr_t) fft_buf + 31) & ~31); + + int16_t tmp16no1; +#ifndef WEBRTC_ARCH_ARM_V7 + int16_t tmp16no2; +#endif +#ifdef AECM_WITH_ABS_APPROX + int16_t max_value = 0; + int16_t min_value = 0; + uint16_t alpha = 0; + uint16_t beta = 0; +#endif + +#ifdef AECM_DYNAMIC_Q + tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2); + time_signal_scaling = WebRtcSpl_NormW16(tmp16no1); +#endif + + WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling); + + // Extract imaginary and real part, calculate the magnitude for + // all frequency bins + freq_signal[0].imag = 0; + freq_signal[PART_LEN].imag = 0; + freq_signal_abs[0] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[0].real); + freq_signal_abs[PART_LEN] = (uint16_t)WEBRTC_SPL_ABS_W16( + freq_signal[PART_LEN].real); + (*freq_signal_sum_abs) = (uint32_t)(freq_signal_abs[0]) + + (uint32_t)(freq_signal_abs[PART_LEN]); + + for (i = 1; i < PART_LEN; i++) + { + if (freq_signal[i].real == 0) + { + freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].imag); + } + else if (freq_signal[i].imag == 0) + { + freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].real); + } + else + { + // Approximation for magnitude of complex fft output + // magn = sqrt(real^2 + imag^2) + // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|) + // + // The parameters alpha and beta are stored in Q15 + +#ifdef AECM_WITH_ABS_APPROX + tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real); + tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag); + + if(tmp16no1 > tmp16no2) + { + max_value = tmp16no1; + min_value = tmp16no2; + } else + { + max_value = tmp16no2; + min_value = tmp16no1; + } + + // Magnitude in Q(-6) + if ((max_value >> 2) > min_value) + { + alpha = kAlpha1; + beta = kBeta1; + } else if ((max_value >> 1) > min_value) + { + alpha = kAlpha2; + beta = kBeta2; + } else + { + alpha = kAlpha3; + beta = kBeta3; + } + tmp16no1 = (int16_t)((max_value * alpha) >> 15); + tmp16no2 = (int16_t)((min_value * beta) >> 15); + freq_signal_abs[i] = (uint16_t)tmp16no1 + (uint16_t)tmp16no2; +#else +#ifdef WEBRTC_ARCH_ARM_V7 + __asm __volatile( + "smulbb %[tmp32no1], %[real], %[real]\n\t" + "smlabb %[tmp32no2], %[imag], %[imag], %[tmp32no1]\n\t" + :[tmp32no1]"+&r"(tmp32no1), + [tmp32no2]"=r"(tmp32no2) + :[real]"r"(freq_signal[i].real), + [imag]"r"(freq_signal[i].imag) + ); +#else + tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real); + tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag); + tmp32no1 = tmp16no1 * tmp16no1; + tmp32no2 = tmp16no2 * tmp16no2; + tmp32no2 = WebRtcSpl_AddSatW32(tmp32no1, tmp32no2); +#endif // WEBRTC_ARCH_ARM_V7 + tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2); + + freq_signal_abs[i] = (uint16_t)tmp32no1; +#endif // AECM_WITH_ABS_APPROX + } + (*freq_signal_sum_abs) += (uint32_t)freq_signal_abs[i]; + } + + return time_signal_scaling; +} + +int WebRtcAecm_ProcessBlock(AecmCore* aecm, + const int16_t* farend, + const int16_t* nearendNoisy, + const int16_t* nearendClean, + int16_t* output) { + int i; + + uint32_t xfaSum; + uint32_t dfaNoisySum; + uint32_t dfaCleanSum; + uint32_t echoEst32Gained; + uint32_t tmpU32; + + int32_t tmp32no1; + + uint16_t xfa[PART_LEN1]; + uint16_t dfaNoisy[PART_LEN1]; + uint16_t dfaClean[PART_LEN1]; + uint16_t* ptrDfaClean = dfaClean; + const uint16_t* far_spectrum_ptr = NULL; + + // 32 byte aligned buffers (with +8 or +16). + // TODO(kma): define fft with ComplexInt16. + int16_t fft_buf[PART_LEN4 + 2 + 16]; // +2 to make a loop safe. + int32_t echoEst32_buf[PART_LEN1 + 8]; + int32_t dfw_buf[PART_LEN2 + 8]; + int32_t efw_buf[PART_LEN2 + 8]; + + int16_t* fft = (int16_t*) (((uintptr_t) fft_buf + 31) & ~ 31); + int32_t* echoEst32 = (int32_t*) (((uintptr_t) echoEst32_buf + 31) & ~ 31); + ComplexInt16* dfw = (ComplexInt16*)(((uintptr_t)dfw_buf + 31) & ~31); + ComplexInt16* efw = (ComplexInt16*)(((uintptr_t)efw_buf + 31) & ~31); + + int16_t hnl[PART_LEN1]; + int16_t numPosCoef = 0; + int16_t nlpGain = ONE_Q14; + int delay; + int16_t tmp16no1; + int16_t tmp16no2; + int16_t mu; + int16_t supGain; + int16_t zeros32, zeros16; + int16_t zerosDBufNoisy, zerosDBufClean, zerosXBuf; + int far_q; + int16_t resolutionDiff, qDomainDiff, dfa_clean_q_domain_diff; + + const int kMinPrefBand = 4; + const int kMaxPrefBand = 24; + int32_t avgHnl32 = 0; + + // Determine startup state. There are three states: + // (0) the first CONV_LEN blocks + // (1) another CONV_LEN blocks + // (2) the rest + + if (aecm->startupState < 2) + { + aecm->startupState = (aecm->totCount >= CONV_LEN) + + (aecm->totCount >= CONV_LEN2); + } + // END: Determine startup state + + // Buffer near and far end signals + memcpy(aecm->xBuf + PART_LEN, farend, sizeof(int16_t) * PART_LEN); + memcpy(aecm->dBufNoisy + PART_LEN, nearendNoisy, sizeof(int16_t) * PART_LEN); + if (nearendClean != NULL) + { + memcpy(aecm->dBufClean + PART_LEN, + nearendClean, + sizeof(int16_t) * PART_LEN); + } + + // Transform far end signal from time domain to frequency domain. + far_q = TimeToFrequencyDomain(aecm, + aecm->xBuf, + dfw, + xfa, + &xfaSum); + + // Transform noisy near end signal from time domain to frequency domain. + zerosDBufNoisy = TimeToFrequencyDomain(aecm, + aecm->dBufNoisy, + dfw, + dfaNoisy, + &dfaNoisySum); + aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain; + aecm->dfaNoisyQDomain = (int16_t)zerosDBufNoisy; + + + if (nearendClean == NULL) + { + ptrDfaClean = dfaNoisy; + aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld; + aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain; + dfaCleanSum = dfaNoisySum; + } else + { + // Transform clean near end signal from time domain to frequency domain. + zerosDBufClean = TimeToFrequencyDomain(aecm, + aecm->dBufClean, + dfw, + dfaClean, + &dfaCleanSum); + aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain; + aecm->dfaCleanQDomain = (int16_t)zerosDBufClean; + } + + // Get the delay + // Save far-end history and estimate delay + WebRtcAecm_UpdateFarHistory(aecm, xfa, far_q); + if (WebRtc_AddFarSpectrumFix(aecm->delay_estimator_farend, + xfa, + PART_LEN1, + far_q) == -1) { + return -1; + } + delay = WebRtc_DelayEstimatorProcessFix(aecm->delay_estimator, + dfaNoisy, + PART_LEN1, + zerosDBufNoisy); + if (delay == -1) + { + return -1; + } + else if (delay == -2) + { + // If the delay is unknown, we assume zero. + // NOTE: this will have to be adjusted if we ever add lookahead. + delay = 0; + } + + if (aecm->fixedDelay >= 0) + { + // Use fixed delay + delay = aecm->fixedDelay; + } + + // Get aligned far end spectrum + far_spectrum_ptr = WebRtcAecm_AlignedFarend(aecm, &far_q, delay); + zerosXBuf = (int16_t) far_q; + if (far_spectrum_ptr == NULL) + { + return -1; + } + + // Calculate log(energy) and update energy threshold levels + WebRtcAecm_CalcEnergies(aecm, + far_spectrum_ptr, + zerosXBuf, + dfaNoisySum, + echoEst32); + + // Calculate stepsize + mu = WebRtcAecm_CalcStepSize(aecm); + + // Update counters + aecm->totCount++; + + // This is the channel estimation algorithm. + // It is base on NLMS but has a variable step length, + // which was calculated above. + WebRtcAecm_UpdateChannel(aecm, + far_spectrum_ptr, + zerosXBuf, + dfaNoisy, + mu, + echoEst32); + supGain = WebRtcAecm_CalcSuppressionGain(aecm); + + + // Calculate Wiener filter hnl[] + for (i = 0; i < PART_LEN1; i++) + { + // Far end signal through channel estimate in Q8 + // How much can we shift right to preserve resolution + tmp32no1 = echoEst32[i] - aecm->echoFilt[i]; + aecm->echoFilt[i] += (tmp32no1 * 50) >> 8; + + zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1; + zeros16 = WebRtcSpl_NormW16(supGain) + 1; + if (zeros32 + zeros16 > 16) + { + // Multiplication is safe + // Result in + // Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+ + // aecm->xfaQDomainBuf[diff]) + echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i], + (uint16_t)supGain); + resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN; + resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf); + } else + { + tmp16no1 = 17 - zeros32 - zeros16; + resolutionDiff = 14 + tmp16no1 - RESOLUTION_CHANNEL16 - + RESOLUTION_SUPGAIN; + resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf); + if (zeros32 > tmp16no1) + { + echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i], + supGain >> tmp16no1); + } else + { + // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16) + echoEst32Gained = (aecm->echoFilt[i] >> tmp16no1) * supGain; + } + } + + zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]); + assert(zeros16 >= 0); // |zeros16| is a norm, hence non-negative. + dfa_clean_q_domain_diff = aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld; + if (zeros16 < dfa_clean_q_domain_diff && aecm->nearFilt[i]) { + tmp16no1 = aecm->nearFilt[i] << zeros16; + qDomainDiff = zeros16 - dfa_clean_q_domain_diff; + tmp16no2 = ptrDfaClean[i] >> -qDomainDiff; + } else { + tmp16no1 = dfa_clean_q_domain_diff < 0 + ? aecm->nearFilt[i] >> -dfa_clean_q_domain_diff + : aecm->nearFilt[i] << dfa_clean_q_domain_diff; + qDomainDiff = 0; + tmp16no2 = ptrDfaClean[i]; + } + tmp32no1 = (int32_t)(tmp16no2 - tmp16no1); + tmp16no2 = (int16_t)(tmp32no1 >> 4); + tmp16no2 += tmp16no1; + zeros16 = WebRtcSpl_NormW16(tmp16no2); + if ((tmp16no2) & (-qDomainDiff > zeros16)) { + aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX; + } else { + aecm->nearFilt[i] = qDomainDiff < 0 ? tmp16no2 << -qDomainDiff + : tmp16no2 >> qDomainDiff; + } + + // Wiener filter coefficients, resulting hnl in Q14 + if (echoEst32Gained == 0) + { + hnl[i] = ONE_Q14; + } else if (aecm->nearFilt[i] == 0) + { + hnl[i] = 0; + } else + { + // Multiply the suppression gain + // Rounding + echoEst32Gained += (uint32_t)(aecm->nearFilt[i] >> 1); + tmpU32 = WebRtcSpl_DivU32U16(echoEst32Gained, + (uint16_t)aecm->nearFilt[i]); + + // Current resolution is + // Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN- max(0,17-zeros16- zeros32)) + // Make sure we are in Q14 + tmp32no1 = (int32_t)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff); + if (tmp32no1 > ONE_Q14) + { + hnl[i] = 0; + } else if (tmp32no1 < 0) + { + hnl[i] = ONE_Q14; + } else + { + // 1-echoEst/dfa + hnl[i] = ONE_Q14 - (int16_t)tmp32no1; + if (hnl[i] < 0) + { + hnl[i] = 0; + } + } + } + if (hnl[i]) + { + numPosCoef++; + } + } + // Only in wideband. Prevent the gain in upper band from being larger than + // in lower band. + if (aecm->mult == 2) + { + // TODO(bjornv): Investigate if the scaling of hnl[i] below can cause + // speech distortion in double-talk. + for (i = 0; i < PART_LEN1; i++) + { + hnl[i] = (int16_t)((hnl[i] * hnl[i]) >> 14); + } + + for (i = kMinPrefBand; i <= kMaxPrefBand; i++) + { + avgHnl32 += (int32_t)hnl[i]; + } + assert(kMaxPrefBand - kMinPrefBand + 1 > 0); + avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1); + + for (i = kMaxPrefBand; i < PART_LEN1; i++) + { + if (hnl[i] > (int16_t)avgHnl32) + { + hnl[i] = (int16_t)avgHnl32; + } + } + } + + // Calculate NLP gain, result is in Q14 + if (aecm->nlpFlag) + { + for (i = 0; i < PART_LEN1; i++) + { + // Truncate values close to zero and one. + if (hnl[i] > NLP_COMP_HIGH) + { + hnl[i] = ONE_Q14; + } else if (hnl[i] < NLP_COMP_LOW) + { + hnl[i] = 0; + } + + // Remove outliers + if (numPosCoef < 3) + { + nlpGain = 0; + } else + { + nlpGain = ONE_Q14; + } + + // NLP + if ((hnl[i] == ONE_Q14) && (nlpGain == ONE_Q14)) + { + hnl[i] = ONE_Q14; + } else + { + hnl[i] = (int16_t)((hnl[i] * nlpGain) >> 14); + } + + // multiply with Wiener coefficients + efw[i].real = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real, + hnl[i], 14)); + efw[i].imag = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag, + hnl[i], 14)); + } + } + else + { + // multiply with Wiener coefficients + for (i = 0; i < PART_LEN1; i++) + { + efw[i].real = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real, + hnl[i], 14)); + efw[i].imag = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag, + hnl[i], 14)); + } + } + + if (aecm->cngMode == AecmTrue) + { + ComfortNoise(aecm, ptrDfaClean, efw, hnl); + } + + InverseFFTAndWindow(aecm, fft, efw, output, nearendClean); + + return 0; +} + +static void ComfortNoise(AecmCore* aecm, + const uint16_t* dfa, + ComplexInt16* out, + const int16_t* lambda) { + int16_t i; + int16_t tmp16; + int32_t tmp32; + + int16_t randW16[PART_LEN]; + int16_t uReal[PART_LEN1]; + int16_t uImag[PART_LEN1]; + int32_t outLShift32; + int16_t noiseRShift16[PART_LEN1]; + + int16_t shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain; + int16_t minTrackShift; + + assert(shiftFromNearToNoise >= 0); + assert(shiftFromNearToNoise < 16); + + if (aecm->noiseEstCtr < 100) + { + // Track the minimum more quickly initially. + aecm->noiseEstCtr++; + minTrackShift = 6; + } else + { + minTrackShift = 9; + } + + // Estimate noise power. + for (i = 0; i < PART_LEN1; i++) + { + // Shift to the noise domain. + tmp32 = (int32_t)dfa[i]; + outLShift32 = tmp32 << shiftFromNearToNoise; + + if (outLShift32 < aecm->noiseEst[i]) + { + // Reset "too low" counter + aecm->noiseEstTooLowCtr[i] = 0; + // Track the minimum. + if (aecm->noiseEst[i] < (1 << minTrackShift)) + { + // For small values, decrease noiseEst[i] every + // |kNoiseEstIncCount| block. The regular approach below can not + // go further down due to truncation. + aecm->noiseEstTooHighCtr[i]++; + if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount) + { + aecm->noiseEst[i]--; + aecm->noiseEstTooHighCtr[i] = 0; // Reset the counter + } + } + else + { + aecm->noiseEst[i] -= ((aecm->noiseEst[i] - outLShift32) + >> minTrackShift); + } + } else + { + // Reset "too high" counter + aecm->noiseEstTooHighCtr[i] = 0; + // Ramp slowly upwards until we hit the minimum again. + if ((aecm->noiseEst[i] >> 19) > 0) + { + // Avoid overflow. + // Multiplication with 2049 will cause wrap around. Scale + // down first and then multiply + aecm->noiseEst[i] >>= 11; + aecm->noiseEst[i] *= 2049; + } + else if ((aecm->noiseEst[i] >> 11) > 0) + { + // Large enough for relative increase + aecm->noiseEst[i] *= 2049; + aecm->noiseEst[i] >>= 11; + } + else + { + // Make incremental increases based on size every + // |kNoiseEstIncCount| block + aecm->noiseEstTooLowCtr[i]++; + if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount) + { + aecm->noiseEst[i] += (aecm->noiseEst[i] >> 9) + 1; + aecm->noiseEstTooLowCtr[i] = 0; // Reset counter + } + } + } + } + + for (i = 0; i < PART_LEN1; i++) + { + tmp32 = aecm->noiseEst[i] >> shiftFromNearToNoise; + if (tmp32 > 32767) + { + tmp32 = 32767; + aecm->noiseEst[i] = tmp32 << shiftFromNearToNoise; + } + noiseRShift16[i] = (int16_t)tmp32; + + tmp16 = ONE_Q14 - lambda[i]; + noiseRShift16[i] = (int16_t)((tmp16 * noiseRShift16[i]) >> 14); + } + + // Generate a uniform random array on [0 2^15-1]. + WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed); + + // Generate noise according to estimated energy. + uReal[0] = 0; // Reject LF noise. + uImag[0] = 0; + for (i = 1; i < PART_LEN1; i++) + { + // Get a random index for the cos and sin tables over [0 359]. + tmp16 = (int16_t)((359 * randW16[i - 1]) >> 15); + + // Tables are in Q13. + uReal[i] = (int16_t)((noiseRShift16[i] * WebRtcAecm_kCosTable[tmp16]) >> + 13); + uImag[i] = (int16_t)((-noiseRShift16[i] * WebRtcAecm_kSinTable[tmp16]) >> + 13); + } + uImag[PART_LEN] = 0; + + for (i = 0; i < PART_LEN1; i++) + { + out[i].real = WebRtcSpl_AddSatW16(out[i].real, uReal[i]); + out[i].imag = WebRtcSpl_AddSatW16(out[i].imag, uImag[i]); + } +} + diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_mips.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_mips.c new file mode 100644 index 00000000..3c2343a8 --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_mips.c @@ -0,0 +1,1566 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/aecm/aecm_core.h" + +#include <assert.h> + +#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h" +#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h" + +static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = { + 0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172, + 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224, + 6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040, + 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514, + 11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553, + 13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079, + 15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034, + 16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384 +}; + +static const int16_t kNoiseEstQDomain = 15; +static const int16_t kNoiseEstIncCount = 5; + +static int16_t coefTable[] = { + 0, 4, 256, 260, 128, 132, 384, 388, + 64, 68, 320, 324, 192, 196, 448, 452, + 32, 36, 288, 292, 160, 164, 416, 420, + 96, 100, 352, 356, 224, 228, 480, 484, + 16, 20, 272, 276, 144, 148, 400, 404, + 80, 84, 336, 340, 208, 212, 464, 468, + 48, 52, 304, 308, 176, 180, 432, 436, + 112, 116, 368, 372, 240, 244, 496, 500, + 8, 12, 264, 268, 136, 140, 392, 396, + 72, 76, 328, 332, 200, 204, 456, 460, + 40, 44, 296, 300, 168, 172, 424, 428, + 104, 108, 360, 364, 232, 236, 488, 492, + 24, 28, 280, 284, 152, 156, 408, 412, + 88, 92, 344, 348, 216, 220, 472, 476, + 56, 60, 312, 316, 184, 188, 440, 444, + 120, 124, 376, 380, 248, 252, 504, 508 +}; + +static int16_t coefTable_ifft[] = { + 0, 512, 256, 508, 128, 252, 384, 380, + 64, 124, 320, 444, 192, 188, 448, 316, + 32, 60, 288, 476, 160, 220, 416, 348, + 96, 92, 352, 412, 224, 156, 480, 284, + 16, 28, 272, 492, 144, 236, 400, 364, + 80, 108, 336, 428, 208, 172, 464, 300, + 48, 44, 304, 460, 176, 204, 432, 332, + 112, 76, 368, 396, 240, 140, 496, 268, + 8, 12, 264, 500, 136, 244, 392, 372, + 72, 116, 328, 436, 200, 180, 456, 308, + 40, 52, 296, 468, 168, 212, 424, 340, + 104, 84, 360, 404, 232, 148, 488, 276, + 24, 20, 280, 484, 152, 228, 408, 356, + 88, 100, 344, 420, 216, 164, 472, 292, + 56, 36, 312, 452, 184, 196, 440, 324, + 120, 68, 376, 388, 248, 132, 504, 260 +}; + +static void ComfortNoise(AecmCore* aecm, + const uint16_t* dfa, + ComplexInt16* out, + const int16_t* lambda); + +static void WindowAndFFT(AecmCore* aecm, + int16_t* fft, + const int16_t* time_signal, + ComplexInt16* freq_signal, + int time_signal_scaling) { + int i, j; + int32_t tmp1, tmp2, tmp3, tmp4; + int16_t* pfrfi; + ComplexInt16* pfreq_signal; + int16_t f_coef, s_coef; + int32_t load_ptr, store_ptr1, store_ptr2, shift, shift1; + int32_t hann, hann1, coefs; + + memset(fft, 0, sizeof(int16_t) * PART_LEN4); + + // FFT of signal + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[shift], %[time_signal_scaling], -14 \n\t" + "addiu %[i], $zero, 64 \n\t" + "addiu %[load_ptr], %[time_signal], 0 \n\t" + "addiu %[hann], %[hanning], 0 \n\t" + "addiu %[hann1], %[hanning], 128 \n\t" + "addiu %[coefs], %[coefTable], 0 \n\t" + "bltz %[shift], 2f \n\t" + " negu %[shift1], %[shift] \n\t" + "1: \n\t" + "lh %[tmp1], 0(%[load_ptr]) \n\t" + "lh %[tmp2], 0(%[hann]) \n\t" + "lh %[tmp3], 128(%[load_ptr]) \n\t" + "lh %[tmp4], 0(%[hann1]) \n\t" + "addiu %[i], %[i], -1 \n\t" + "mul %[tmp1], %[tmp1], %[tmp2] \n\t" + "mul %[tmp3], %[tmp3], %[tmp4] \n\t" + "lh %[f_coef], 0(%[coefs]) \n\t" + "lh %[s_coef], 2(%[coefs]) \n\t" + "addiu %[load_ptr], %[load_ptr], 2 \n\t" + "addiu %[hann], %[hann], 2 \n\t" + "addiu %[hann1], %[hann1], -2 \n\t" + "addu %[store_ptr1], %[fft], %[f_coef] \n\t" + "addu %[store_ptr2], %[fft], %[s_coef] \n\t" + "sllv %[tmp1], %[tmp1], %[shift] \n\t" + "sllv %[tmp3], %[tmp3], %[shift] \n\t" + "sh %[tmp1], 0(%[store_ptr1]) \n\t" + "sh %[tmp3], 0(%[store_ptr2]) \n\t" + "bgtz %[i], 1b \n\t" + " addiu %[coefs], %[coefs], 4 \n\t" + "b 3f \n\t" + " nop \n\t" + "2: \n\t" + "lh %[tmp1], 0(%[load_ptr]) \n\t" + "lh %[tmp2], 0(%[hann]) \n\t" + "lh %[tmp3], 128(%[load_ptr]) \n\t" + "lh %[tmp4], 0(%[hann1]) \n\t" + "addiu %[i], %[i], -1 \n\t" + "mul %[tmp1], %[tmp1], %[tmp2] \n\t" + "mul %[tmp3], %[tmp3], %[tmp4] \n\t" + "lh %[f_coef], 0(%[coefs]) \n\t" + "lh %[s_coef], 2(%[coefs]) \n\t" + "addiu %[load_ptr], %[load_ptr], 2 \n\t" + "addiu %[hann], %[hann], 2 \n\t" + "addiu %[hann1], %[hann1], -2 \n\t" + "addu %[store_ptr1], %[fft], %[f_coef] \n\t" + "addu %[store_ptr2], %[fft], %[s_coef] \n\t" + "srav %[tmp1], %[tmp1], %[shift1] \n\t" + "srav %[tmp3], %[tmp3], %[shift1] \n\t" + "sh %[tmp1], 0(%[store_ptr1]) \n\t" + "sh %[tmp3], 0(%[store_ptr2]) \n\t" + "bgtz %[i], 2b \n\t" + " addiu %[coefs], %[coefs], 4 \n\t" + "3: \n\t" + ".set pop \n\t" + : [load_ptr] "=&r" (load_ptr), [shift] "=&r" (shift), [hann] "=&r" (hann), + [hann1] "=&r" (hann1), [shift1] "=&r" (shift1), [coefs] "=&r" (coefs), + [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), + [tmp4] "=&r" (tmp4), [i] "=&r" (i), [f_coef] "=&r" (f_coef), + [s_coef] "=&r" (s_coef), [store_ptr1] "=&r" (store_ptr1), + [store_ptr2] "=&r" (store_ptr2) + : [time_signal] "r" (time_signal), [coefTable] "r" (coefTable), + [time_signal_scaling] "r" (time_signal_scaling), + [hanning] "r" (WebRtcAecm_kSqrtHanning), [fft] "r" (fft) + : "memory", "hi", "lo" + ); + + WebRtcSpl_ComplexFFT(fft, PART_LEN_SHIFT, 1); + pfrfi = fft; + pfreq_signal = freq_signal; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[j], $zero, 128 \n\t" + "1: \n\t" + "lh %[tmp1], 0(%[pfrfi]) \n\t" + "lh %[tmp2], 2(%[pfrfi]) \n\t" + "lh %[tmp3], 4(%[pfrfi]) \n\t" + "lh %[tmp4], 6(%[pfrfi]) \n\t" + "subu %[tmp2], $zero, %[tmp2] \n\t" + "sh %[tmp1], 0(%[pfreq_signal]) \n\t" + "sh %[tmp2], 2(%[pfreq_signal]) \n\t" + "subu %[tmp4], $zero, %[tmp4] \n\t" + "sh %[tmp3], 4(%[pfreq_signal]) \n\t" + "sh %[tmp4], 6(%[pfreq_signal]) \n\t" + "lh %[tmp1], 8(%[pfrfi]) \n\t" + "lh %[tmp2], 10(%[pfrfi]) \n\t" + "lh %[tmp3], 12(%[pfrfi]) \n\t" + "lh %[tmp4], 14(%[pfrfi]) \n\t" + "addiu %[j], %[j], -8 \n\t" + "subu %[tmp2], $zero, %[tmp2] \n\t" + "sh %[tmp1], 8(%[pfreq_signal]) \n\t" + "sh %[tmp2], 10(%[pfreq_signal]) \n\t" + "subu %[tmp4], $zero, %[tmp4] \n\t" + "sh %[tmp3], 12(%[pfreq_signal]) \n\t" + "sh %[tmp4], 14(%[pfreq_signal]) \n\t" + "addiu %[pfreq_signal], %[pfreq_signal], 16 \n\t" + "bgtz %[j], 1b \n\t" + " addiu %[pfrfi], %[pfrfi], 16 \n\t" + ".set pop \n\t" + : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), + [j] "=&r" (j), [pfrfi] "+r" (pfrfi), [pfreq_signal] "+r" (pfreq_signal), + [tmp4] "=&r" (tmp4) + : + : "memory" + ); +} + +static void InverseFFTAndWindow(AecmCore* aecm, + int16_t* fft, + ComplexInt16* efw, + int16_t* output, + const int16_t* nearendClean) { + int i, outCFFT; + int32_t tmp1, tmp2, tmp3, tmp4, tmp_re, tmp_im; + int16_t* pcoefTable_ifft = coefTable_ifft; + int16_t* pfft = fft; + int16_t* ppfft = fft; + ComplexInt16* pefw = efw; + int32_t out_aecm; + int16_t* paecm_buf = aecm->outBuf; + const int16_t* p_kSqrtHanning = WebRtcAecm_kSqrtHanning; + const int16_t* pp_kSqrtHanning = &WebRtcAecm_kSqrtHanning[PART_LEN]; + int16_t* output1 = output; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[i], $zero, 64 \n\t" + "1: \n\t" + "lh %[tmp1], 0(%[pcoefTable_ifft]) \n\t" + "lh %[tmp2], 2(%[pcoefTable_ifft]) \n\t" + "lh %[tmp_re], 0(%[pefw]) \n\t" + "lh %[tmp_im], 2(%[pefw]) \n\t" + "addu %[pfft], %[fft], %[tmp2] \n\t" + "sh %[tmp_re], 0(%[pfft]) \n\t" + "sh %[tmp_im], 2(%[pfft]) \n\t" + "addu %[pfft], %[fft], %[tmp1] \n\t" + "sh %[tmp_re], 0(%[pfft]) \n\t" + "subu %[tmp_im], $zero, %[tmp_im] \n\t" + "sh %[tmp_im], 2(%[pfft]) \n\t" + "lh %[tmp1], 4(%[pcoefTable_ifft]) \n\t" + "lh %[tmp2], 6(%[pcoefTable_ifft]) \n\t" + "lh %[tmp_re], 4(%[pefw]) \n\t" + "lh %[tmp_im], 6(%[pefw]) \n\t" + "addu %[pfft], %[fft], %[tmp2] \n\t" + "sh %[tmp_re], 0(%[pfft]) \n\t" + "sh %[tmp_im], 2(%[pfft]) \n\t" + "addu %[pfft], %[fft], %[tmp1] \n\t" + "sh %[tmp_re], 0(%[pfft]) \n\t" + "subu %[tmp_im], $zero, %[tmp_im] \n\t" + "sh %[tmp_im], 2(%[pfft]) \n\t" + "lh %[tmp1], 8(%[pcoefTable_ifft]) \n\t" + "lh %[tmp2], 10(%[pcoefTable_ifft]) \n\t" + "lh %[tmp_re], 8(%[pefw]) \n\t" + "lh %[tmp_im], 10(%[pefw]) \n\t" + "addu %[pfft], %[fft], %[tmp2] \n\t" + "sh %[tmp_re], 0(%[pfft]) \n\t" + "sh %[tmp_im], 2(%[pfft]) \n\t" + "addu %[pfft], %[fft], %[tmp1] \n\t" + "sh %[tmp_re], 0(%[pfft]) \n\t" + "subu %[tmp_im], $zero, %[tmp_im] \n\t" + "sh %[tmp_im], 2(%[pfft]) \n\t" + "lh %[tmp1], 12(%[pcoefTable_ifft]) \n\t" + "lh %[tmp2], 14(%[pcoefTable_ifft]) \n\t" + "lh %[tmp_re], 12(%[pefw]) \n\t" + "lh %[tmp_im], 14(%[pefw]) \n\t" + "addu %[pfft], %[fft], %[tmp2] \n\t" + "sh %[tmp_re], 0(%[pfft]) \n\t" + "sh %[tmp_im], 2(%[pfft]) \n\t" + "addu %[pfft], %[fft], %[tmp1] \n\t" + "sh %[tmp_re], 0(%[pfft]) \n\t" + "subu %[tmp_im], $zero, %[tmp_im] \n\t" + "sh %[tmp_im], 2(%[pfft]) \n\t" + "addiu %[pcoefTable_ifft], %[pcoefTable_ifft], 16 \n\t" + "addiu %[i], %[i], -4 \n\t" + "bgtz %[i], 1b \n\t" + " addiu %[pefw], %[pefw], 16 \n\t" + ".set pop \n\t" + : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft), + [i] "=&r" (i), [tmp_re] "=&r" (tmp_re), [tmp_im] "=&r" (tmp_im), + [pefw] "+r" (pefw), [pcoefTable_ifft] "+r" (pcoefTable_ifft), + [fft] "+r" (fft) + : + : "memory" + ); + + fft[2] = efw[PART_LEN].real; + fft[3] = -efw[PART_LEN].imag; + + outCFFT = WebRtcSpl_ComplexIFFT(fft, PART_LEN_SHIFT, 1); + pfft = fft; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[i], $zero, 128 \n\t" + "1: \n\t" + "lh %[tmp1], 0(%[ppfft]) \n\t" + "lh %[tmp2], 4(%[ppfft]) \n\t" + "lh %[tmp3], 8(%[ppfft]) \n\t" + "lh %[tmp4], 12(%[ppfft]) \n\t" + "addiu %[i], %[i], -4 \n\t" + "sh %[tmp1], 0(%[pfft]) \n\t" + "sh %[tmp2], 2(%[pfft]) \n\t" + "sh %[tmp3], 4(%[pfft]) \n\t" + "sh %[tmp4], 6(%[pfft]) \n\t" + "addiu %[ppfft], %[ppfft], 16 \n\t" + "bgtz %[i], 1b \n\t" + " addiu %[pfft], %[pfft], 8 \n\t" + ".set pop \n\t" + : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft), + [i] "=&r" (i), [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4), + [ppfft] "+r" (ppfft) + : + : "memory" + ); + + pfft = fft; + out_aecm = (int32_t)(outCFFT - aecm->dfaCleanQDomain); + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[i], $zero, 64 \n\t" + "11: \n\t" + "lh %[tmp1], 0(%[pfft]) \n\t" + "lh %[tmp2], 0(%[p_kSqrtHanning]) \n\t" + "addiu %[i], %[i], -2 \n\t" + "mul %[tmp1], %[tmp1], %[tmp2] \n\t" + "lh %[tmp3], 2(%[pfft]) \n\t" + "lh %[tmp4], 2(%[p_kSqrtHanning]) \n\t" + "mul %[tmp3], %[tmp3], %[tmp4] \n\t" + "addiu %[tmp1], %[tmp1], 8192 \n\t" + "sra %[tmp1], %[tmp1], 14 \n\t" + "addiu %[tmp3], %[tmp3], 8192 \n\t" + "sra %[tmp3], %[tmp3], 14 \n\t" + "bgez %[out_aecm], 1f \n\t" + " negu %[tmp2], %[out_aecm] \n\t" + "srav %[tmp1], %[tmp1], %[tmp2] \n\t" + "b 2f \n\t" + " srav %[tmp3], %[tmp3], %[tmp2] \n\t" + "1: \n\t" + "sllv %[tmp1], %[tmp1], %[out_aecm] \n\t" + "sllv %[tmp3], %[tmp3], %[out_aecm] \n\t" + "2: \n\t" + "lh %[tmp4], 0(%[paecm_buf]) \n\t" + "lh %[tmp2], 2(%[paecm_buf]) \n\t" + "addu %[tmp3], %[tmp3], %[tmp2] \n\t" + "addu %[tmp1], %[tmp1], %[tmp4] \n\t" +#if defined(MIPS_DSP_R1_LE) + "shll_s.w %[tmp1], %[tmp1], 16 \n\t" + "sra %[tmp1], %[tmp1], 16 \n\t" + "shll_s.w %[tmp3], %[tmp3], 16 \n\t" + "sra %[tmp3], %[tmp3], 16 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "sra %[tmp4], %[tmp1], 31 \n\t" + "sra %[tmp2], %[tmp1], 15 \n\t" + "beq %[tmp4], %[tmp2], 3f \n\t" + " ori %[tmp2], $zero, 0x7fff \n\t" + "xor %[tmp1], %[tmp2], %[tmp4] \n\t" + "3: \n\t" + "sra %[tmp2], %[tmp3], 31 \n\t" + "sra %[tmp4], %[tmp3], 15 \n\t" + "beq %[tmp2], %[tmp4], 4f \n\t" + " ori %[tmp4], $zero, 0x7fff \n\t" + "xor %[tmp3], %[tmp4], %[tmp2] \n\t" + "4: \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "sh %[tmp1], 0(%[pfft]) \n\t" + "sh %[tmp1], 0(%[output1]) \n\t" + "sh %[tmp3], 2(%[pfft]) \n\t" + "sh %[tmp3], 2(%[output1]) \n\t" + "lh %[tmp1], 128(%[pfft]) \n\t" + "lh %[tmp2], 0(%[pp_kSqrtHanning]) \n\t" + "mul %[tmp1], %[tmp1], %[tmp2] \n\t" + "lh %[tmp3], 130(%[pfft]) \n\t" + "lh %[tmp4], -2(%[pp_kSqrtHanning]) \n\t" + "mul %[tmp3], %[tmp3], %[tmp4] \n\t" + "sra %[tmp1], %[tmp1], 14 \n\t" + "sra %[tmp3], %[tmp3], 14 \n\t" + "bgez %[out_aecm], 5f \n\t" + " negu %[tmp2], %[out_aecm] \n\t" + "srav %[tmp3], %[tmp3], %[tmp2] \n\t" + "b 6f \n\t" + " srav %[tmp1], %[tmp1], %[tmp2] \n\t" + "5: \n\t" + "sllv %[tmp1], %[tmp1], %[out_aecm] \n\t" + "sllv %[tmp3], %[tmp3], %[out_aecm] \n\t" + "6: \n\t" +#if defined(MIPS_DSP_R1_LE) + "shll_s.w %[tmp1], %[tmp1], 16 \n\t" + "sra %[tmp1], %[tmp1], 16 \n\t" + "shll_s.w %[tmp3], %[tmp3], 16 \n\t" + "sra %[tmp3], %[tmp3], 16 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "sra %[tmp4], %[tmp1], 31 \n\t" + "sra %[tmp2], %[tmp1], 15 \n\t" + "beq %[tmp4], %[tmp2], 7f \n\t" + " ori %[tmp2], $zero, 0x7fff \n\t" + "xor %[tmp1], %[tmp2], %[tmp4] \n\t" + "7: \n\t" + "sra %[tmp2], %[tmp3], 31 \n\t" + "sra %[tmp4], %[tmp3], 15 \n\t" + "beq %[tmp2], %[tmp4], 8f \n\t" + " ori %[tmp4], $zero, 0x7fff \n\t" + "xor %[tmp3], %[tmp4], %[tmp2] \n\t" + "8: \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "sh %[tmp1], 0(%[paecm_buf]) \n\t" + "sh %[tmp3], 2(%[paecm_buf]) \n\t" + "addiu %[output1], %[output1], 4 \n\t" + "addiu %[paecm_buf], %[paecm_buf], 4 \n\t" + "addiu %[pfft], %[pfft], 4 \n\t" + "addiu %[p_kSqrtHanning], %[p_kSqrtHanning], 4 \n\t" + "bgtz %[i], 11b \n\t" + " addiu %[pp_kSqrtHanning], %[pp_kSqrtHanning], -4 \n\t" + ".set pop \n\t" + : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft), + [output1] "+r" (output1), [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4), + [paecm_buf] "+r" (paecm_buf), [i] "=&r" (i), + [pp_kSqrtHanning] "+r" (pp_kSqrtHanning), + [p_kSqrtHanning] "+r" (p_kSqrtHanning) + : [out_aecm] "r" (out_aecm), + [WebRtcAecm_kSqrtHanning] "r" (WebRtcAecm_kSqrtHanning) + : "hi", "lo","memory" + ); + + // Copy the current block to the old position + // (aecm->outBuf is shifted elsewhere) + memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(int16_t) * PART_LEN); + memcpy(aecm->dBufNoisy, + aecm->dBufNoisy + PART_LEN, + sizeof(int16_t) * PART_LEN); + if (nearendClean != NULL) { + memcpy(aecm->dBufClean, + aecm->dBufClean + PART_LEN, + sizeof(int16_t) * PART_LEN); + } +} + +void WebRtcAecm_CalcLinearEnergies_mips(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est, + uint32_t* far_energy, + uint32_t* echo_energy_adapt, + uint32_t* echo_energy_stored) { + int i; + uint32_t par1 = (*far_energy); + uint32_t par2 = (*echo_energy_adapt); + uint32_t par3 = (*echo_energy_stored); + int16_t* ch_stored_p = &(aecm->channelStored[0]); + int16_t* ch_adapt_p = &(aecm->channelAdapt16[0]); + uint16_t* spectrum_p = (uint16_t*)(&(far_spectrum[0])); + int32_t* echo_p = &(echo_est[0]); + int32_t temp0, stored0, echo0, adept0, spectrum0; + int32_t stored1, adept1, spectrum1, echo1, temp1; + + // Get energy for the delayed far end signal and estimated + // echo using both stored and adapted channels. + for (i = 0; i < PART_LEN; i+= 4) { + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "lh %[stored0], 0(%[ch_stored_p]) \n\t" + "lhu %[adept0], 0(%[ch_adapt_p]) \n\t" + "lhu %[spectrum0], 0(%[spectrum_p]) \n\t" + "lh %[stored1], 2(%[ch_stored_p]) \n\t" + "lhu %[adept1], 2(%[ch_adapt_p]) \n\t" + "lhu %[spectrum1], 2(%[spectrum_p]) \n\t" + "mul %[echo0], %[stored0], %[spectrum0] \n\t" + "mul %[temp0], %[adept0], %[spectrum0] \n\t" + "mul %[echo1], %[stored1], %[spectrum1] \n\t" + "mul %[temp1], %[adept1], %[spectrum1] \n\t" + "addu %[par1], %[par1], %[spectrum0] \n\t" + "addu %[par1], %[par1], %[spectrum1] \n\t" + "addiu %[echo_p], %[echo_p], 16 \n\t" + "addu %[par3], %[par3], %[echo0] \n\t" + "addu %[par2], %[par2], %[temp0] \n\t" + "addu %[par3], %[par3], %[echo1] \n\t" + "addu %[par2], %[par2], %[temp1] \n\t" + "usw %[echo0], -16(%[echo_p]) \n\t" + "usw %[echo1], -12(%[echo_p]) \n\t" + "lh %[stored0], 4(%[ch_stored_p]) \n\t" + "lhu %[adept0], 4(%[ch_adapt_p]) \n\t" + "lhu %[spectrum0], 4(%[spectrum_p]) \n\t" + "lh %[stored1], 6(%[ch_stored_p]) \n\t" + "lhu %[adept1], 6(%[ch_adapt_p]) \n\t" + "lhu %[spectrum1], 6(%[spectrum_p]) \n\t" + "mul %[echo0], %[stored0], %[spectrum0] \n\t" + "mul %[temp0], %[adept0], %[spectrum0] \n\t" + "mul %[echo1], %[stored1], %[spectrum1] \n\t" + "mul %[temp1], %[adept1], %[spectrum1] \n\t" + "addu %[par1], %[par1], %[spectrum0] \n\t" + "addu %[par1], %[par1], %[spectrum1] \n\t" + "addiu %[ch_stored_p], %[ch_stored_p], 8 \n\t" + "addiu %[ch_adapt_p], %[ch_adapt_p], 8 \n\t" + "addiu %[spectrum_p], %[spectrum_p], 8 \n\t" + "addu %[par3], %[par3], %[echo0] \n\t" + "addu %[par2], %[par2], %[temp0] \n\t" + "addu %[par3], %[par3], %[echo1] \n\t" + "addu %[par2], %[par2], %[temp1] \n\t" + "usw %[echo0], -8(%[echo_p]) \n\t" + "usw %[echo1], -4(%[echo_p]) \n\t" + ".set pop \n\t" + : [temp0] "=&r" (temp0), [stored0] "=&r" (stored0), + [adept0] "=&r" (adept0), [spectrum0] "=&r" (spectrum0), + [echo0] "=&r" (echo0), [echo_p] "+r" (echo_p), [par3] "+r" (par3), + [par1] "+r" (par1), [par2] "+r" (par2), [stored1] "=&r" (stored1), + [adept1] "=&r" (adept1), [echo1] "=&r" (echo1), + [spectrum1] "=&r" (spectrum1), [temp1] "=&r" (temp1), + [ch_stored_p] "+r" (ch_stored_p), [ch_adapt_p] "+r" (ch_adapt_p), + [spectrum_p] "+r" (spectrum_p) + : + : "hi", "lo", "memory" + ); + } + + echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN], + far_spectrum[PART_LEN]); + par1 += (uint32_t)(far_spectrum[PART_LEN]); + par2 += aecm->channelAdapt16[PART_LEN] * far_spectrum[PART_LEN]; + par3 += (uint32_t)echo_est[PART_LEN]; + + (*far_energy) = par1; + (*echo_energy_adapt) = par2; + (*echo_energy_stored) = par3; +} + +#if defined(MIPS_DSP_R1_LE) +void WebRtcAecm_StoreAdaptiveChannel_mips(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est) { + int i; + int16_t* temp1; + uint16_t* temp8; + int32_t temp0, temp2, temp3, temp4, temp5, temp6; + int32_t* temp7 = &(echo_est[0]); + temp1 = &(aecm->channelStored[0]); + temp8 = (uint16_t*)(&far_spectrum[0]); + + // During startup we store the channel every block. + memcpy(aecm->channelStored, aecm->channelAdapt16, + sizeof(int16_t) * PART_LEN1); + // Recalculate echo estimate + for (i = 0; i < PART_LEN; i += 4) { + __asm __volatile ( + "ulw %[temp0], 0(%[temp8]) \n\t" + "ulw %[temp2], 0(%[temp1]) \n\t" + "ulw %[temp4], 4(%[temp8]) \n\t" + "ulw %[temp5], 4(%[temp1]) \n\t" + "muleq_s.w.phl %[temp3], %[temp2], %[temp0] \n\t" + "muleq_s.w.phr %[temp0], %[temp2], %[temp0] \n\t" + "muleq_s.w.phl %[temp6], %[temp5], %[temp4] \n\t" + "muleq_s.w.phr %[temp4], %[temp5], %[temp4] \n\t" + "addiu %[temp7], %[temp7], 16 \n\t" + "addiu %[temp1], %[temp1], 8 \n\t" + "addiu %[temp8], %[temp8], 8 \n\t" + "sra %[temp3], %[temp3], 1 \n\t" + "sra %[temp0], %[temp0], 1 \n\t" + "sra %[temp6], %[temp6], 1 \n\t" + "sra %[temp4], %[temp4], 1 \n\t" + "usw %[temp3], -12(%[temp7]) \n\t" + "usw %[temp0], -16(%[temp7]) \n\t" + "usw %[temp6], -4(%[temp7]) \n\t" + "usw %[temp4], -8(%[temp7]) \n\t" + : [temp0] "=&r" (temp0), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), + [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [temp6] "=&r" (temp6), + [temp1] "+r" (temp1), [temp8] "+r" (temp8), [temp7] "+r" (temp7) + : + : "hi", "lo", "memory" + ); + } + echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + far_spectrum[i]); +} + +void WebRtcAecm_ResetAdaptiveChannel_mips(AecmCore* aecm) { + int i; + int32_t* temp3; + int16_t* temp0; + int32_t temp1, temp2, temp4, temp5; + + temp0 = &(aecm->channelStored[0]); + temp3 = &(aecm->channelAdapt32[0]); + + // The stored channel has a significantly lower MSE than the adaptive one for + // two consecutive calculations. Reset the adaptive channel. + memcpy(aecm->channelAdapt16, + aecm->channelStored, + sizeof(int16_t) * PART_LEN1); + + // Restore the W32 channel + for (i = 0; i < PART_LEN; i += 4) { + __asm __volatile ( + "ulw %[temp1], 0(%[temp0]) \n\t" + "ulw %[temp4], 4(%[temp0]) \n\t" + "preceq.w.phl %[temp2], %[temp1] \n\t" + "preceq.w.phr %[temp1], %[temp1] \n\t" + "preceq.w.phl %[temp5], %[temp4] \n\t" + "preceq.w.phr %[temp4], %[temp4] \n\t" + "addiu %[temp0], %[temp0], 8 \n\t" + "usw %[temp2], 4(%[temp3]) \n\t" + "usw %[temp1], 0(%[temp3]) \n\t" + "usw %[temp5], 12(%[temp3]) \n\t" + "usw %[temp4], 8(%[temp3]) \n\t" + "addiu %[temp3], %[temp3], 16 \n\t" + : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), + [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), + [temp3] "+r" (temp3), [temp0] "+r" (temp0) + : + : "memory" + ); + } + + aecm->channelAdapt32[i] = (int32_t)aecm->channelStored[i] << 16; +} +#endif // #if defined(MIPS_DSP_R1_LE) + +// Transforms a time domain signal into the frequency domain, outputting the +// complex valued signal, absolute value and sum of absolute values. +// +// time_signal [in] Pointer to time domain signal +// freq_signal_real [out] Pointer to real part of frequency domain array +// freq_signal_imag [out] Pointer to imaginary part of frequency domain +// array +// freq_signal_abs [out] Pointer to absolute value of frequency domain +// array +// freq_signal_sum_abs [out] Pointer to the sum of all absolute values in +// the frequency domain array +// return value The Q-domain of current frequency values +// +static int TimeToFrequencyDomain(AecmCore* aecm, + const int16_t* time_signal, + ComplexInt16* freq_signal, + uint16_t* freq_signal_abs, + uint32_t* freq_signal_sum_abs) { + int i = 0; + int time_signal_scaling = 0; + + // In fft_buf, +16 for 32-byte alignment. + int16_t fft_buf[PART_LEN4 + 16]; + int16_t *fft = (int16_t *) (((uintptr_t) fft_buf + 31) & ~31); + + int16_t tmp16no1; +#if !defined(MIPS_DSP_R2_LE) + int32_t tmp32no1; + int32_t tmp32no2; + int16_t tmp16no2; +#else + int32_t tmp32no10, tmp32no11, tmp32no12, tmp32no13; + int32_t tmp32no20, tmp32no21, tmp32no22, tmp32no23; + int16_t* freqp; + uint16_t* freqabsp; + uint32_t freqt0, freqt1, freqt2, freqt3; + uint32_t freqs; +#endif + +#ifdef AECM_DYNAMIC_Q + tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2); + time_signal_scaling = WebRtcSpl_NormW16(tmp16no1); +#endif + + WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling); + + // Extract imaginary and real part, + // calculate the magnitude for all frequency bins + freq_signal[0].imag = 0; + freq_signal[PART_LEN].imag = 0; + freq_signal[PART_LEN].real = fft[PART_LEN2]; + freq_signal_abs[0] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[0].real); + freq_signal_abs[PART_LEN] = (uint16_t)WEBRTC_SPL_ABS_W16( + freq_signal[PART_LEN].real); + (*freq_signal_sum_abs) = (uint32_t)(freq_signal_abs[0]) + + (uint32_t)(freq_signal_abs[PART_LEN]); + +#if !defined(MIPS_DSP_R2_LE) + for (i = 1; i < PART_LEN; i++) { + if (freq_signal[i].real == 0) + { + freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16( + freq_signal[i].imag); + } + else if (freq_signal[i].imag == 0) + { + freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16( + freq_signal[i].real); + } + else + { + // Approximation for magnitude of complex fft output + // magn = sqrt(real^2 + imag^2) + // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|) + // + // The parameters alpha and beta are stored in Q15 + tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real); + tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag); + tmp32no1 = tmp16no1 * tmp16no1; + tmp32no2 = tmp16no2 * tmp16no2; + tmp32no2 = WebRtcSpl_AddSatW32(tmp32no1, tmp32no2); + tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2); + + freq_signal_abs[i] = (uint16_t)tmp32no1; + } + (*freq_signal_sum_abs) += (uint32_t)freq_signal_abs[i]; + } +#else // #if !defined(MIPS_DSP_R2_LE) + freqs = (uint32_t)(freq_signal_abs[0]) + + (uint32_t)(freq_signal_abs[PART_LEN]); + freqp = &(freq_signal[1].real); + + __asm __volatile ( + "lw %[freqt0], 0(%[freqp]) \n\t" + "lw %[freqt1], 4(%[freqp]) \n\t" + "lw %[freqt2], 8(%[freqp]) \n\t" + "mult $ac0, $zero, $zero \n\t" + "mult $ac1, $zero, $zero \n\t" + "mult $ac2, $zero, $zero \n\t" + "dpaq_s.w.ph $ac0, %[freqt0], %[freqt0] \n\t" + "dpaq_s.w.ph $ac1, %[freqt1], %[freqt1] \n\t" + "dpaq_s.w.ph $ac2, %[freqt2], %[freqt2] \n\t" + "addiu %[freqp], %[freqp], 12 \n\t" + "extr.w %[tmp32no20], $ac0, 1 \n\t" + "extr.w %[tmp32no21], $ac1, 1 \n\t" + "extr.w %[tmp32no22], $ac2, 1 \n\t" + : [freqt0] "=&r" (freqt0), [freqt1] "=&r" (freqt1), + [freqt2] "=&r" (freqt2), [freqp] "+r" (freqp), + [tmp32no20] "=r" (tmp32no20), [tmp32no21] "=r" (tmp32no21), + [tmp32no22] "=r" (tmp32no22) + : + : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo" + ); + + tmp32no10 = WebRtcSpl_SqrtFloor(tmp32no20); + tmp32no11 = WebRtcSpl_SqrtFloor(tmp32no21); + tmp32no12 = WebRtcSpl_SqrtFloor(tmp32no22); + freq_signal_abs[1] = (uint16_t)tmp32no10; + freq_signal_abs[2] = (uint16_t)tmp32no11; + freq_signal_abs[3] = (uint16_t)tmp32no12; + freqs += (uint32_t)tmp32no10; + freqs += (uint32_t)tmp32no11; + freqs += (uint32_t)tmp32no12; + freqabsp = &(freq_signal_abs[4]); + for (i = 4; i < PART_LEN; i+=4) + { + __asm __volatile ( + "ulw %[freqt0], 0(%[freqp]) \n\t" + "ulw %[freqt1], 4(%[freqp]) \n\t" + "ulw %[freqt2], 8(%[freqp]) \n\t" + "ulw %[freqt3], 12(%[freqp]) \n\t" + "mult $ac0, $zero, $zero \n\t" + "mult $ac1, $zero, $zero \n\t" + "mult $ac2, $zero, $zero \n\t" + "mult $ac3, $zero, $zero \n\t" + "dpaq_s.w.ph $ac0, %[freqt0], %[freqt0] \n\t" + "dpaq_s.w.ph $ac1, %[freqt1], %[freqt1] \n\t" + "dpaq_s.w.ph $ac2, %[freqt2], %[freqt2] \n\t" + "dpaq_s.w.ph $ac3, %[freqt3], %[freqt3] \n\t" + "addiu %[freqp], %[freqp], 16 \n\t" + "addiu %[freqabsp], %[freqabsp], 8 \n\t" + "extr.w %[tmp32no20], $ac0, 1 \n\t" + "extr.w %[tmp32no21], $ac1, 1 \n\t" + "extr.w %[tmp32no22], $ac2, 1 \n\t" + "extr.w %[tmp32no23], $ac3, 1 \n\t" + : [freqt0] "=&r" (freqt0), [freqt1] "=&r" (freqt1), + [freqt2] "=&r" (freqt2), [freqt3] "=&r" (freqt3), + [tmp32no20] "=r" (tmp32no20), [tmp32no21] "=r" (tmp32no21), + [tmp32no22] "=r" (tmp32no22), [tmp32no23] "=r" (tmp32no23), + [freqabsp] "+r" (freqabsp), [freqp] "+r" (freqp) + : + : "memory", "hi", "lo", "$ac1hi", "$ac1lo", + "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo" + ); + + tmp32no10 = WebRtcSpl_SqrtFloor(tmp32no20); + tmp32no11 = WebRtcSpl_SqrtFloor(tmp32no21); + tmp32no12 = WebRtcSpl_SqrtFloor(tmp32no22); + tmp32no13 = WebRtcSpl_SqrtFloor(tmp32no23); + + __asm __volatile ( + "sh %[tmp32no10], -8(%[freqabsp]) \n\t" + "sh %[tmp32no11], -6(%[freqabsp]) \n\t" + "sh %[tmp32no12], -4(%[freqabsp]) \n\t" + "sh %[tmp32no13], -2(%[freqabsp]) \n\t" + "addu %[freqs], %[freqs], %[tmp32no10] \n\t" + "addu %[freqs], %[freqs], %[tmp32no11] \n\t" + "addu %[freqs], %[freqs], %[tmp32no12] \n\t" + "addu %[freqs], %[freqs], %[tmp32no13] \n\t" + : [freqs] "+r" (freqs) + : [tmp32no10] "r" (tmp32no10), [tmp32no11] "r" (tmp32no11), + [tmp32no12] "r" (tmp32no12), [tmp32no13] "r" (tmp32no13), + [freqabsp] "r" (freqabsp) + : "memory" + ); + } + + (*freq_signal_sum_abs) = freqs; +#endif + + return time_signal_scaling; +} + +int WebRtcAecm_ProcessBlock(AecmCore* aecm, + const int16_t* farend, + const int16_t* nearendNoisy, + const int16_t* nearendClean, + int16_t* output) { + int i; + uint32_t xfaSum; + uint32_t dfaNoisySum; + uint32_t dfaCleanSum; + uint32_t echoEst32Gained; + uint32_t tmpU32; + int32_t tmp32no1; + + uint16_t xfa[PART_LEN1]; + uint16_t dfaNoisy[PART_LEN1]; + uint16_t dfaClean[PART_LEN1]; + uint16_t* ptrDfaClean = dfaClean; + const uint16_t* far_spectrum_ptr = NULL; + + // 32 byte aligned buffers (with +8 or +16). + int16_t fft_buf[PART_LEN4 + 2 + 16]; // +2 to make a loop safe. + int32_t echoEst32_buf[PART_LEN1 + 8]; + int32_t dfw_buf[PART_LEN2 + 8]; + int32_t efw_buf[PART_LEN2 + 8]; + + int16_t* fft = (int16_t*)(((uint32_t)fft_buf + 31) & ~ 31); + int32_t* echoEst32 = (int32_t*)(((uint32_t)echoEst32_buf + 31) & ~ 31); + ComplexInt16* dfw = (ComplexInt16*)(((uint32_t)dfw_buf + 31) & ~31); + ComplexInt16* efw = (ComplexInt16*)(((uint32_t)efw_buf + 31) & ~31); + + int16_t hnl[PART_LEN1]; + int16_t numPosCoef = 0; + int delay; + int16_t tmp16no1; + int16_t tmp16no2; + int16_t mu; + int16_t supGain; + int16_t zeros32, zeros16; + int16_t zerosDBufNoisy, zerosDBufClean, zerosXBuf; + int far_q; + int16_t resolutionDiff, qDomainDiff, dfa_clean_q_domain_diff; + + const int kMinPrefBand = 4; + const int kMaxPrefBand = 24; + int32_t avgHnl32 = 0; + + int32_t temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8; + int16_t* ptr; + int16_t* ptr1; + int16_t* er_ptr; + int16_t* dr_ptr; + + ptr = &hnl[0]; + ptr1 = &hnl[0]; + er_ptr = &efw[0].real; + dr_ptr = &dfw[0].real; + + // Determine startup state. There are three states: + // (0) the first CONV_LEN blocks + // (1) another CONV_LEN blocks + // (2) the rest + + if (aecm->startupState < 2) { + aecm->startupState = (aecm->totCount >= CONV_LEN) + + (aecm->totCount >= CONV_LEN2); + } + // END: Determine startup state + + // Buffer near and far end signals + memcpy(aecm->xBuf + PART_LEN, farend, sizeof(int16_t) * PART_LEN); + memcpy(aecm->dBufNoisy + PART_LEN, + nearendNoisy, + sizeof(int16_t) * PART_LEN); + if (nearendClean != NULL) { + memcpy(aecm->dBufClean + PART_LEN, + nearendClean, + sizeof(int16_t) * PART_LEN); + } + + // Transform far end signal from time domain to frequency domain. + far_q = TimeToFrequencyDomain(aecm, + aecm->xBuf, + dfw, + xfa, + &xfaSum); + + // Transform noisy near end signal from time domain to frequency domain. + zerosDBufNoisy = TimeToFrequencyDomain(aecm, + aecm->dBufNoisy, + dfw, + dfaNoisy, + &dfaNoisySum); + aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain; + aecm->dfaNoisyQDomain = (int16_t)zerosDBufNoisy; + + if (nearendClean == NULL) { + ptrDfaClean = dfaNoisy; + aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld; + aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain; + dfaCleanSum = dfaNoisySum; + } else { + // Transform clean near end signal from time domain to frequency domain. + zerosDBufClean = TimeToFrequencyDomain(aecm, + aecm->dBufClean, + dfw, + dfaClean, + &dfaCleanSum); + aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain; + aecm->dfaCleanQDomain = (int16_t)zerosDBufClean; + } + + // Get the delay + // Save far-end history and estimate delay + WebRtcAecm_UpdateFarHistory(aecm, xfa, far_q); + + if (WebRtc_AddFarSpectrumFix(aecm->delay_estimator_farend, xfa, PART_LEN1, + far_q) == -1) { + return -1; + } + delay = WebRtc_DelayEstimatorProcessFix(aecm->delay_estimator, + dfaNoisy, + PART_LEN1, + zerosDBufNoisy); + if (delay == -1) { + return -1; + } + else if (delay == -2) { + // If the delay is unknown, we assume zero. + // NOTE: this will have to be adjusted if we ever add lookahead. + delay = 0; + } + + if (aecm->fixedDelay >= 0) { + // Use fixed delay + delay = aecm->fixedDelay; + } + + // Get aligned far end spectrum + far_spectrum_ptr = WebRtcAecm_AlignedFarend(aecm, &far_q, delay); + zerosXBuf = (int16_t) far_q; + + if (far_spectrum_ptr == NULL) { + return -1; + } + + // Calculate log(energy) and update energy threshold levels + WebRtcAecm_CalcEnergies(aecm, + far_spectrum_ptr, + zerosXBuf, + dfaNoisySum, + echoEst32); + // Calculate stepsize + mu = WebRtcAecm_CalcStepSize(aecm); + + // Update counters + aecm->totCount++; + + // This is the channel estimation algorithm. + // It is base on NLMS but has a variable step length, + // which was calculated above. + WebRtcAecm_UpdateChannel(aecm, + far_spectrum_ptr, + zerosXBuf, + dfaNoisy, + mu, + echoEst32); + + supGain = WebRtcAecm_CalcSuppressionGain(aecm); + + // Calculate Wiener filter hnl[] + for (i = 0; i < PART_LEN1; i++) { + // Far end signal through channel estimate in Q8 + // How much can we shift right to preserve resolution + tmp32no1 = echoEst32[i] - aecm->echoFilt[i]; + aecm->echoFilt[i] += (tmp32no1 * 50) >> 8; + + zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1; + zeros16 = WebRtcSpl_NormW16(supGain) + 1; + if (zeros32 + zeros16 > 16) { + // Multiplication is safe + // Result in + // Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+aecm->xfaQDomainBuf[diff]) + echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i], + (uint16_t)supGain); + resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN; + resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf); + } else { + tmp16no1 = 17 - zeros32 - zeros16; + resolutionDiff = 14 + tmp16no1 - RESOLUTION_CHANNEL16 - + RESOLUTION_SUPGAIN; + resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf); + if (zeros32 > tmp16no1) { + echoEst32Gained = WEBRTC_SPL_UMUL_32_16( + (uint32_t)aecm->echoFilt[i], + supGain >> tmp16no1); + } else { + // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16) + echoEst32Gained = (aecm->echoFilt[i] >> tmp16no1) * supGain; + } + } + + zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]); + assert(zeros16 >= 0); // |zeros16| is a norm, hence non-negative. + dfa_clean_q_domain_diff = aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld; + if (zeros16 < dfa_clean_q_domain_diff && aecm->nearFilt[i]) { + tmp16no1 = aecm->nearFilt[i] << zeros16; + qDomainDiff = zeros16 - dfa_clean_q_domain_diff; + tmp16no2 = ptrDfaClean[i] >> -qDomainDiff; + } else { + tmp16no1 = dfa_clean_q_domain_diff < 0 + ? aecm->nearFilt[i] >> -dfa_clean_q_domain_diff + : aecm->nearFilt[i] << dfa_clean_q_domain_diff; + qDomainDiff = 0; + tmp16no2 = ptrDfaClean[i]; + } + + tmp32no1 = (int32_t)(tmp16no2 - tmp16no1); + tmp16no2 = (int16_t)(tmp32no1 >> 4); + tmp16no2 += tmp16no1; + zeros16 = WebRtcSpl_NormW16(tmp16no2); + if ((tmp16no2) & (-qDomainDiff > zeros16)) { + aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX; + } else { + aecm->nearFilt[i] = qDomainDiff < 0 ? tmp16no2 << -qDomainDiff + : tmp16no2 >> qDomainDiff; + } + + // Wiener filter coefficients, resulting hnl in Q14 + if (echoEst32Gained == 0) { + hnl[i] = ONE_Q14; + numPosCoef++; + } else if (aecm->nearFilt[i] == 0) { + hnl[i] = 0; + } else { + // Multiply the suppression gain + // Rounding + echoEst32Gained += (uint32_t)(aecm->nearFilt[i] >> 1); + tmpU32 = WebRtcSpl_DivU32U16(echoEst32Gained, + (uint16_t)aecm->nearFilt[i]); + + // Current resolution is + // Q-(RESOLUTION_CHANNEL + RESOLUTION_SUPGAIN + // - max(0, 17 - zeros16 - zeros32)) + // Make sure we are in Q14 + tmp32no1 = (int32_t)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff); + if (tmp32no1 > ONE_Q14) { + hnl[i] = 0; + } else if (tmp32no1 < 0) { + hnl[i] = ONE_Q14; + numPosCoef++; + } else { + // 1-echoEst/dfa + hnl[i] = ONE_Q14 - (int16_t)tmp32no1; + if (hnl[i] <= 0) { + hnl[i] = 0; + } else { + numPosCoef++; + } + } + } + } + + // Only in wideband. Prevent the gain in upper band from being larger than + // in lower band. + if (aecm->mult == 2) { + // TODO(bjornv): Investigate if the scaling of hnl[i] below can cause + // speech distortion in double-talk. + for (i = 0; i < (PART_LEN1 >> 3); i++) { + __asm __volatile ( + "lh %[temp1], 0(%[ptr1]) \n\t" + "lh %[temp2], 2(%[ptr1]) \n\t" + "lh %[temp3], 4(%[ptr1]) \n\t" + "lh %[temp4], 6(%[ptr1]) \n\t" + "lh %[temp5], 8(%[ptr1]) \n\t" + "lh %[temp6], 10(%[ptr1]) \n\t" + "lh %[temp7], 12(%[ptr1]) \n\t" + "lh %[temp8], 14(%[ptr1]) \n\t" + "mul %[temp1], %[temp1], %[temp1] \n\t" + "mul %[temp2], %[temp2], %[temp2] \n\t" + "mul %[temp3], %[temp3], %[temp3] \n\t" + "mul %[temp4], %[temp4], %[temp4] \n\t" + "mul %[temp5], %[temp5], %[temp5] \n\t" + "mul %[temp6], %[temp6], %[temp6] \n\t" + "mul %[temp7], %[temp7], %[temp7] \n\t" + "mul %[temp8], %[temp8], %[temp8] \n\t" + "sra %[temp1], %[temp1], 14 \n\t" + "sra %[temp2], %[temp2], 14 \n\t" + "sra %[temp3], %[temp3], 14 \n\t" + "sra %[temp4], %[temp4], 14 \n\t" + "sra %[temp5], %[temp5], 14 \n\t" + "sra %[temp6], %[temp6], 14 \n\t" + "sra %[temp7], %[temp7], 14 \n\t" + "sra %[temp8], %[temp8], 14 \n\t" + "sh %[temp1], 0(%[ptr1]) \n\t" + "sh %[temp2], 2(%[ptr1]) \n\t" + "sh %[temp3], 4(%[ptr1]) \n\t" + "sh %[temp4], 6(%[ptr1]) \n\t" + "sh %[temp5], 8(%[ptr1]) \n\t" + "sh %[temp6], 10(%[ptr1]) \n\t" + "sh %[temp7], 12(%[ptr1]) \n\t" + "sh %[temp8], 14(%[ptr1]) \n\t" + "addiu %[ptr1], %[ptr1], 16 \n\t" + : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), + [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [temp6] "=&r" (temp6), + [temp7] "=&r" (temp7), [temp8] "=&r" (temp8), [ptr1] "+r" (ptr1) + : + : "memory", "hi", "lo" + ); + } + for(i = 0; i < (PART_LEN1 & 7); i++) { + __asm __volatile ( + "lh %[temp1], 0(%[ptr1]) \n\t" + "mul %[temp1], %[temp1], %[temp1] \n\t" + "sra %[temp1], %[temp1], 14 \n\t" + "sh %[temp1], 0(%[ptr1]) \n\t" + "addiu %[ptr1], %[ptr1], 2 \n\t" + : [temp1] "=&r" (temp1), [ptr1] "+r" (ptr1) + : + : "memory", "hi", "lo" + ); + } + + for (i = kMinPrefBand; i <= kMaxPrefBand; i++) { + avgHnl32 += (int32_t)hnl[i]; + } + + assert(kMaxPrefBand - kMinPrefBand + 1 > 0); + avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1); + + for (i = kMaxPrefBand; i < PART_LEN1; i++) { + if (hnl[i] > (int16_t)avgHnl32) { + hnl[i] = (int16_t)avgHnl32; + } + } + } + + // Calculate NLP gain, result is in Q14 + if (aecm->nlpFlag) { + if (numPosCoef < 3) { + for (i = 0; i < PART_LEN1; i++) { + efw[i].real = 0; + efw[i].imag = 0; + hnl[i] = 0; + } + } else { + for (i = 0; i < PART_LEN1; i++) { +#if defined(MIPS_DSP_R1_LE) + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "lh %[temp1], 0(%[ptr]) \n\t" + "lh %[temp2], 0(%[dr_ptr]) \n\t" + "slti %[temp4], %[temp1], 0x4001 \n\t" + "beqz %[temp4], 3f \n\t" + " lh %[temp3], 2(%[dr_ptr]) \n\t" + "slti %[temp5], %[temp1], 3277 \n\t" + "bnez %[temp5], 2f \n\t" + " addiu %[dr_ptr], %[dr_ptr], 4 \n\t" + "mul %[temp2], %[temp2], %[temp1] \n\t" + "mul %[temp3], %[temp3], %[temp1] \n\t" + "shra_r.w %[temp2], %[temp2], 14 \n\t" + "shra_r.w %[temp3], %[temp3], 14 \n\t" + "b 4f \n\t" + " nop \n\t" + "2: \n\t" + "addu %[temp1], $zero, $zero \n\t" + "addu %[temp2], $zero, $zero \n\t" + "addu %[temp3], $zero, $zero \n\t" + "b 1f \n\t" + " nop \n\t" + "3: \n\t" + "addiu %[temp1], $0, 0x4000 \n\t" + "1: \n\t" + "sh %[temp1], 0(%[ptr]) \n\t" + "4: \n\t" + "sh %[temp2], 0(%[er_ptr]) \n\t" + "sh %[temp3], 2(%[er_ptr]) \n\t" + "addiu %[ptr], %[ptr], 2 \n\t" + "addiu %[er_ptr], %[er_ptr], 4 \n\t" + ".set pop \n\t" + : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), + [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [ptr] "+r" (ptr), + [er_ptr] "+r" (er_ptr), [dr_ptr] "+r" (dr_ptr) + : + : "memory", "hi", "lo" + ); +#else + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "lh %[temp1], 0(%[ptr]) \n\t" + "lh %[temp2], 0(%[dr_ptr]) \n\t" + "slti %[temp4], %[temp1], 0x4001 \n\t" + "beqz %[temp4], 3f \n\t" + " lh %[temp3], 2(%[dr_ptr]) \n\t" + "slti %[temp5], %[temp1], 3277 \n\t" + "bnez %[temp5], 2f \n\t" + " addiu %[dr_ptr], %[dr_ptr], 4 \n\t" + "mul %[temp2], %[temp2], %[temp1] \n\t" + "mul %[temp3], %[temp3], %[temp1] \n\t" + "addiu %[temp2], %[temp2], 0x2000 \n\t" + "addiu %[temp3], %[temp3], 0x2000 \n\t" + "sra %[temp2], %[temp2], 14 \n\t" + "sra %[temp3], %[temp3], 14 \n\t" + "b 4f \n\t" + " nop \n\t" + "2: \n\t" + "addu %[temp1], $zero, $zero \n\t" + "addu %[temp2], $zero, $zero \n\t" + "addu %[temp3], $zero, $zero \n\t" + "b 1f \n\t" + " nop \n\t" + "3: \n\t" + "addiu %[temp1], $0, 0x4000 \n\t" + "1: \n\t" + "sh %[temp1], 0(%[ptr]) \n\t" + "4: \n\t" + "sh %[temp2], 0(%[er_ptr]) \n\t" + "sh %[temp3], 2(%[er_ptr]) \n\t" + "addiu %[ptr], %[ptr], 2 \n\t" + "addiu %[er_ptr], %[er_ptr], 4 \n\t" + ".set pop \n\t" + : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), + [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [ptr] "+r" (ptr), + [er_ptr] "+r" (er_ptr), [dr_ptr] "+r" (dr_ptr) + : + : "memory", "hi", "lo" + ); +#endif + } + } + } + else { + // multiply with Wiener coefficients + for (i = 0; i < PART_LEN1; i++) { + efw[i].real = (int16_t) + (WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real, + hnl[i], + 14)); + efw[i].imag = (int16_t) + (WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag, + hnl[i], + 14)); + } + } + + if (aecm->cngMode == AecmTrue) { + ComfortNoise(aecm, ptrDfaClean, efw, hnl); + } + + InverseFFTAndWindow(aecm, fft, efw, output, nearendClean); + + return 0; +} + +// Generate comfort noise and add to output signal. +static void ComfortNoise(AecmCore* aecm, + const uint16_t* dfa, + ComplexInt16* out, + const int16_t* lambda) { + int16_t i; + int16_t tmp16, tmp161, tmp162, tmp163, nrsh1, nrsh2; + int32_t tmp32, tmp321, tnoise, tnoise1; + int32_t tmp322, tmp323, *tmp1; + int16_t* dfap; + int16_t* lambdap; + const int32_t c2049 = 2049; + const int32_t c359 = 359; + const int32_t c114 = ONE_Q14; + + int16_t randW16[PART_LEN]; + int16_t uReal[PART_LEN1]; + int16_t uImag[PART_LEN1]; + int32_t outLShift32; + + int16_t shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain; + int16_t minTrackShift = 9; + + assert(shiftFromNearToNoise >= 0); + assert(shiftFromNearToNoise < 16); + + if (aecm->noiseEstCtr < 100) { + // Track the minimum more quickly initially. + aecm->noiseEstCtr++; + minTrackShift = 6; + } + + // Generate a uniform random array on [0 2^15-1]. + WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed); + int16_t* randW16p = (int16_t*)randW16; +#if defined (MIPS_DSP_R1_LE) + int16_t* kCosTablep = (int16_t*)WebRtcAecm_kCosTable; + int16_t* kSinTablep = (int16_t*)WebRtcAecm_kSinTable; +#endif // #if defined(MIPS_DSP_R1_LE) + tmp1 = (int32_t*)aecm->noiseEst + 1; + dfap = (int16_t*)dfa + 1; + lambdap = (int16_t*)lambda + 1; + // Estimate noise power. + for (i = 1; i < PART_LEN1; i+=2) { + // Shift to the noise domain. + __asm __volatile ( + "lh %[tmp32], 0(%[dfap]) \n\t" + "lw %[tnoise], 0(%[tmp1]) \n\t" + "sllv %[outLShift32], %[tmp32], %[shiftFromNearToNoise] \n\t" + : [tmp32] "=&r" (tmp32), [outLShift32] "=r" (outLShift32), + [tnoise] "=&r" (tnoise) + : [tmp1] "r" (tmp1), [dfap] "r" (dfap), + [shiftFromNearToNoise] "r" (shiftFromNearToNoise) + : "memory" + ); + + if (outLShift32 < tnoise) { + // Reset "too low" counter + aecm->noiseEstTooLowCtr[i] = 0; + // Track the minimum. + if (tnoise < (1 << minTrackShift)) { + // For small values, decrease noiseEst[i] every + // |kNoiseEstIncCount| block. The regular approach below can not + // go further down due to truncation. + aecm->noiseEstTooHighCtr[i]++; + if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount) { + tnoise--; + aecm->noiseEstTooHighCtr[i] = 0; // Reset the counter + } + } else { + __asm __volatile ( + "subu %[tmp32], %[tnoise], %[outLShift32] \n\t" + "srav %[tmp32], %[tmp32], %[minTrackShift] \n\t" + "subu %[tnoise], %[tnoise], %[tmp32] \n\t" + : [tmp32] "=&r" (tmp32), [tnoise] "+r" (tnoise) + : [outLShift32] "r" (outLShift32), [minTrackShift] "r" (minTrackShift) + ); + } + } else { + // Reset "too high" counter + aecm->noiseEstTooHighCtr[i] = 0; + // Ramp slowly upwards until we hit the minimum again. + if ((tnoise >> 19) <= 0) { + if ((tnoise >> 11) > 0) { + // Large enough for relative increase + __asm __volatile ( + "mul %[tnoise], %[tnoise], %[c2049] \n\t" + "sra %[tnoise], %[tnoise], 11 \n\t" + : [tnoise] "+r" (tnoise) + : [c2049] "r" (c2049) + : "hi", "lo" + ); + } else { + // Make incremental increases based on size every + // |kNoiseEstIncCount| block + aecm->noiseEstTooLowCtr[i]++; + if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount) { + __asm __volatile ( + "sra %[tmp32], %[tnoise], 9 \n\t" + "addi %[tnoise], %[tnoise], 1 \n\t" + "addu %[tnoise], %[tnoise], %[tmp32] \n\t" + : [tnoise] "+r" (tnoise), [tmp32] "=&r" (tmp32) + : + ); + aecm->noiseEstTooLowCtr[i] = 0; // Reset counter + } + } + } else { + // Avoid overflow. + // Multiplication with 2049 will cause wrap around. Scale + // down first and then multiply + __asm __volatile ( + "sra %[tnoise], %[tnoise], 11 \n\t" + "mul %[tnoise], %[tnoise], %[c2049] \n\t" + : [tnoise] "+r" (tnoise) + : [c2049] "r" (c2049) + : "hi", "lo" + ); + } + } + + // Shift to the noise domain. + __asm __volatile ( + "lh %[tmp32], 2(%[dfap]) \n\t" + "lw %[tnoise1], 4(%[tmp1]) \n\t" + "addiu %[dfap], %[dfap], 4 \n\t" + "sllv %[outLShift32], %[tmp32], %[shiftFromNearToNoise] \n\t" + : [tmp32] "=&r" (tmp32), [dfap] "+r" (dfap), + [outLShift32] "=r" (outLShift32), [tnoise1] "=&r" (tnoise1) + : [tmp1] "r" (tmp1), [shiftFromNearToNoise] "r" (shiftFromNearToNoise) + : "memory" + ); + + if (outLShift32 < tnoise1) { + // Reset "too low" counter + aecm->noiseEstTooLowCtr[i + 1] = 0; + // Track the minimum. + if (tnoise1 < (1 << minTrackShift)) { + // For small values, decrease noiseEst[i] every + // |kNoiseEstIncCount| block. The regular approach below can not + // go further down due to truncation. + aecm->noiseEstTooHighCtr[i + 1]++; + if (aecm->noiseEstTooHighCtr[i + 1] >= kNoiseEstIncCount) { + tnoise1--; + aecm->noiseEstTooHighCtr[i + 1] = 0; // Reset the counter + } + } else { + __asm __volatile ( + "subu %[tmp32], %[tnoise1], %[outLShift32] \n\t" + "srav %[tmp32], %[tmp32], %[minTrackShift] \n\t" + "subu %[tnoise1], %[tnoise1], %[tmp32] \n\t" + : [tmp32] "=&r" (tmp32), [tnoise1] "+r" (tnoise1) + : [outLShift32] "r" (outLShift32), [minTrackShift] "r" (minTrackShift) + ); + } + } else { + // Reset "too high" counter + aecm->noiseEstTooHighCtr[i + 1] = 0; + // Ramp slowly upwards until we hit the minimum again. + if ((tnoise1 >> 19) <= 0) { + if ((tnoise1 >> 11) > 0) { + // Large enough for relative increase + __asm __volatile ( + "mul %[tnoise1], %[tnoise1], %[c2049] \n\t" + "sra %[tnoise1], %[tnoise1], 11 \n\t" + : [tnoise1] "+r" (tnoise1) + : [c2049] "r" (c2049) + : "hi", "lo" + ); + } else { + // Make incremental increases based on size every + // |kNoiseEstIncCount| block + aecm->noiseEstTooLowCtr[i + 1]++; + if (aecm->noiseEstTooLowCtr[i + 1] >= kNoiseEstIncCount) { + __asm __volatile ( + "sra %[tmp32], %[tnoise1], 9 \n\t" + "addi %[tnoise1], %[tnoise1], 1 \n\t" + "addu %[tnoise1], %[tnoise1], %[tmp32] \n\t" + : [tnoise1] "+r" (tnoise1), [tmp32] "=&r" (tmp32) + : + ); + aecm->noiseEstTooLowCtr[i + 1] = 0; // Reset counter + } + } + } else { + // Avoid overflow. + // Multiplication with 2049 will cause wrap around. Scale + // down first and then multiply + __asm __volatile ( + "sra %[tnoise1], %[tnoise1], 11 \n\t" + "mul %[tnoise1], %[tnoise1], %[c2049] \n\t" + : [tnoise1] "+r" (tnoise1) + : [c2049] "r" (c2049) + : "hi", "lo" + ); + } + } + + __asm __volatile ( + "lh %[tmp16], 0(%[lambdap]) \n\t" + "lh %[tmp161], 2(%[lambdap]) \n\t" + "sw %[tnoise], 0(%[tmp1]) \n\t" + "sw %[tnoise1], 4(%[tmp1]) \n\t" + "subu %[tmp16], %[c114], %[tmp16] \n\t" + "subu %[tmp161], %[c114], %[tmp161] \n\t" + "srav %[tmp32], %[tnoise], %[shiftFromNearToNoise] \n\t" + "srav %[tmp321], %[tnoise1], %[shiftFromNearToNoise] \n\t" + "addiu %[lambdap], %[lambdap], 4 \n\t" + "addiu %[tmp1], %[tmp1], 8 \n\t" + : [tmp16] "=&r" (tmp16), [tmp161] "=&r" (tmp161), [tmp1] "+r" (tmp1), + [tmp32] "=&r" (tmp32), [tmp321] "=&r" (tmp321), [lambdap] "+r" (lambdap) + : [tnoise] "r" (tnoise), [tnoise1] "r" (tnoise1), [c114] "r" (c114), + [shiftFromNearToNoise] "r" (shiftFromNearToNoise) + : "memory" + ); + + if (tmp32 > 32767) { + tmp32 = 32767; + aecm->noiseEst[i] = tmp32 << shiftFromNearToNoise; + } + if (tmp321 > 32767) { + tmp321 = 32767; + aecm->noiseEst[i+1] = tmp321 << shiftFromNearToNoise; + } + + __asm __volatile ( + "mul %[tmp32], %[tmp32], %[tmp16] \n\t" + "mul %[tmp321], %[tmp321], %[tmp161] \n\t" + "sra %[nrsh1], %[tmp32], 14 \n\t" + "sra %[nrsh2], %[tmp321], 14 \n\t" + : [nrsh1] "=&r" (nrsh1), [nrsh2] "=r" (nrsh2) + : [tmp16] "r" (tmp16), [tmp161] "r" (tmp161), [tmp32] "r" (tmp32), + [tmp321] "r" (tmp321) + : "memory", "hi", "lo" + ); + + __asm __volatile ( + "lh %[tmp32], 0(%[randW16p]) \n\t" + "lh %[tmp321], 2(%[randW16p]) \n\t" + "addiu %[randW16p], %[randW16p], 4 \n\t" + "mul %[tmp32], %[tmp32], %[c359] \n\t" + "mul %[tmp321], %[tmp321], %[c359] \n\t" + "sra %[tmp16], %[tmp32], 15 \n\t" + "sra %[tmp161], %[tmp321], 15 \n\t" + : [randW16p] "+r" (randW16p), [tmp32] "=&r" (tmp32), + [tmp16] "=r" (tmp16), [tmp161] "=r" (tmp161), [tmp321] "=&r" (tmp321) + : [c359] "r" (c359) + : "memory", "hi", "lo" + ); + +#if !defined(MIPS_DSP_R1_LE) + tmp32 = WebRtcAecm_kCosTable[tmp16]; + tmp321 = WebRtcAecm_kSinTable[tmp16]; + tmp322 = WebRtcAecm_kCosTable[tmp161]; + tmp323 = WebRtcAecm_kSinTable[tmp161]; +#else + __asm __volatile ( + "sll %[tmp16], %[tmp16], 1 \n\t" + "sll %[tmp161], %[tmp161], 1 \n\t" + "lhx %[tmp32], %[tmp16](%[kCosTablep]) \n\t" + "lhx %[tmp321], %[tmp16](%[kSinTablep]) \n\t" + "lhx %[tmp322], %[tmp161](%[kCosTablep]) \n\t" + "lhx %[tmp323], %[tmp161](%[kSinTablep]) \n\t" + : [tmp32] "=&r" (tmp32), [tmp321] "=&r" (tmp321), + [tmp322] "=&r" (tmp322), [tmp323] "=&r" (tmp323) + : [kCosTablep] "r" (kCosTablep), [tmp16] "r" (tmp16), + [tmp161] "r" (tmp161), [kSinTablep] "r" (kSinTablep) + : "memory" + ); +#endif + __asm __volatile ( + "mul %[tmp32], %[tmp32], %[nrsh1] \n\t" + "negu %[tmp162], %[nrsh1] \n\t" + "mul %[tmp322], %[tmp322], %[nrsh2] \n\t" + "negu %[tmp163], %[nrsh2] \n\t" + "sra %[tmp32], %[tmp32], 13 \n\t" + "mul %[tmp321], %[tmp321], %[tmp162] \n\t" + "sra %[tmp322], %[tmp322], 13 \n\t" + "mul %[tmp323], %[tmp323], %[tmp163] \n\t" + "sra %[tmp321], %[tmp321], 13 \n\t" + "sra %[tmp323], %[tmp323], 13 \n\t" + : [tmp32] "+r" (tmp32), [tmp321] "+r" (tmp321), [tmp162] "=&r" (tmp162), + [tmp322] "+r" (tmp322), [tmp323] "+r" (tmp323), [tmp163] "=&r" (tmp163) + : [nrsh1] "r" (nrsh1), [nrsh2] "r" (nrsh2) + : "hi", "lo" + ); + // Tables are in Q13. + uReal[i] = (int16_t)tmp32; + uImag[i] = (int16_t)tmp321; + uReal[i + 1] = (int16_t)tmp322; + uImag[i + 1] = (int16_t)tmp323; + } + + int32_t tt, sgn; + tt = out[0].real; + sgn = ((int)tt) >> 31; + out[0].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); + tt = out[0].imag; + sgn = ((int)tt) >> 31; + out[0].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); + for (i = 1; i < PART_LEN; i++) { + tt = out[i].real + uReal[i]; + sgn = ((int)tt) >> 31; + out[i].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); + tt = out[i].imag + uImag[i]; + sgn = ((int)tt) >> 31; + out[i].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); + } + tt = out[PART_LEN].real + uReal[PART_LEN]; + sgn = ((int)tt) >> 31; + out[PART_LEN].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); + tt = out[PART_LEN].imag; + sgn = ((int)tt) >> 31; + out[PART_LEN].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); +} + diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_neon.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_neon.c new file mode 100644 index 00000000..1751fcf7 --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_neon.c @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/aecm/aecm_core.h" + +#include <arm_neon.h> +#include <assert.h> + +#include "webrtc/common_audio/signal_processing/include/real_fft.h" + +// TODO(kma): Re-write the corresponding assembly file, the offset +// generating script and makefile, to replace these C functions. + +// Square root of Hanning window in Q14. +const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = { + 0, + 399, 798, 1196, 1594, 1990, 2386, 2780, 3172, + 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224, + 6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040, + 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514, + 11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553, + 13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079, + 15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034, + 16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384 +}; + +static inline void AddLanes(uint32_t* ptr, uint32x4_t v) { +#if defined(WEBRTC_ARCH_ARM64) + *(ptr) = vaddvq_u32(v); +#else + uint32x2_t tmp_v; + tmp_v = vadd_u32(vget_low_u32(v), vget_high_u32(v)); + tmp_v = vpadd_u32(tmp_v, tmp_v); + *(ptr) = vget_lane_u32(tmp_v, 0); +#endif +} + +void WebRtcAecm_CalcLinearEnergiesNeon(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est, + uint32_t* far_energy, + uint32_t* echo_energy_adapt, + uint32_t* echo_energy_stored) { + int16_t* start_stored_p = aecm->channelStored; + int16_t* start_adapt_p = aecm->channelAdapt16; + int32_t* echo_est_p = echo_est; + const int16_t* end_stored_p = aecm->channelStored + PART_LEN; + const uint16_t* far_spectrum_p = far_spectrum; + int16x8_t store_v, adapt_v; + uint16x8_t spectrum_v; + uint32x4_t echo_est_v_low, echo_est_v_high; + uint32x4_t far_energy_v, echo_stored_v, echo_adapt_v; + + far_energy_v = vdupq_n_u32(0); + echo_adapt_v = vdupq_n_u32(0); + echo_stored_v = vdupq_n_u32(0); + + // Get energy for the delayed far end signal and estimated + // echo using both stored and adapted channels. + // The C code: + // for (i = 0; i < PART_LEN1; i++) { + // echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + // far_spectrum[i]); + // (*far_energy) += (uint32_t)(far_spectrum[i]); + // *echo_energy_adapt += aecm->channelAdapt16[i] * far_spectrum[i]; + // (*echo_energy_stored) += (uint32_t)echo_est[i]; + // } + while (start_stored_p < end_stored_p) { + spectrum_v = vld1q_u16(far_spectrum_p); + adapt_v = vld1q_s16(start_adapt_p); + store_v = vld1q_s16(start_stored_p); + + far_energy_v = vaddw_u16(far_energy_v, vget_low_u16(spectrum_v)); + far_energy_v = vaddw_u16(far_energy_v, vget_high_u16(spectrum_v)); + + echo_est_v_low = vmull_u16(vreinterpret_u16_s16(vget_low_s16(store_v)), + vget_low_u16(spectrum_v)); + echo_est_v_high = vmull_u16(vreinterpret_u16_s16(vget_high_s16(store_v)), + vget_high_u16(spectrum_v)); + vst1q_s32(echo_est_p, vreinterpretq_s32_u32(echo_est_v_low)); + vst1q_s32(echo_est_p + 4, vreinterpretq_s32_u32(echo_est_v_high)); + + echo_stored_v = vaddq_u32(echo_est_v_low, echo_stored_v); + echo_stored_v = vaddq_u32(echo_est_v_high, echo_stored_v); + + echo_adapt_v = vmlal_u16(echo_adapt_v, + vreinterpret_u16_s16(vget_low_s16(adapt_v)), + vget_low_u16(spectrum_v)); + echo_adapt_v = vmlal_u16(echo_adapt_v, + vreinterpret_u16_s16(vget_high_s16(adapt_v)), + vget_high_u16(spectrum_v)); + + start_stored_p += 8; + start_adapt_p += 8; + far_spectrum_p += 8; + echo_est_p += 8; + } + + AddLanes(far_energy, far_energy_v); + AddLanes(echo_energy_stored, echo_stored_v); + AddLanes(echo_energy_adapt, echo_adapt_v); + + echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN], + far_spectrum[PART_LEN]); + *echo_energy_stored += (uint32_t)echo_est[PART_LEN]; + *far_energy += (uint32_t)far_spectrum[PART_LEN]; + *echo_energy_adapt += aecm->channelAdapt16[PART_LEN] * far_spectrum[PART_LEN]; +} + +void WebRtcAecm_StoreAdaptiveChannelNeon(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est) { + assert((uintptr_t)echo_est % 32 == 0); + assert((uintptr_t)(aecm->channelStored) % 16 == 0); + assert((uintptr_t)(aecm->channelAdapt16) % 16 == 0); + + // This is C code of following optimized code. + // During startup we store the channel every block. + // memcpy(aecm->channelStored, + // aecm->channelAdapt16, + // sizeof(int16_t) * PART_LEN1); + // Recalculate echo estimate + // for (i = 0; i < PART_LEN; i += 4) { + // echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + // far_spectrum[i]); + // echo_est[i + 1] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 1], + // far_spectrum[i + 1]); + // echo_est[i + 2] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 2], + // far_spectrum[i + 2]); + // echo_est[i + 3] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 3], + // far_spectrum[i + 3]); + // } + // echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + // far_spectrum[i]); + const uint16_t* far_spectrum_p = far_spectrum; + int16_t* start_adapt_p = aecm->channelAdapt16; + int16_t* start_stored_p = aecm->channelStored; + const int16_t* end_stored_p = aecm->channelStored + PART_LEN; + int32_t* echo_est_p = echo_est; + + uint16x8_t far_spectrum_v; + int16x8_t adapt_v; + uint32x4_t echo_est_v_low, echo_est_v_high; + + while (start_stored_p < end_stored_p) { + far_spectrum_v = vld1q_u16(far_spectrum_p); + adapt_v = vld1q_s16(start_adapt_p); + + vst1q_s16(start_stored_p, adapt_v); + + echo_est_v_low = vmull_u16(vget_low_u16(far_spectrum_v), + vget_low_u16(vreinterpretq_u16_s16(adapt_v))); + echo_est_v_high = vmull_u16(vget_high_u16(far_spectrum_v), + vget_high_u16(vreinterpretq_u16_s16(adapt_v))); + + vst1q_s32(echo_est_p, vreinterpretq_s32_u32(echo_est_v_low)); + vst1q_s32(echo_est_p + 4, vreinterpretq_s32_u32(echo_est_v_high)); + + far_spectrum_p += 8; + start_adapt_p += 8; + start_stored_p += 8; + echo_est_p += 8; + } + aecm->channelStored[PART_LEN] = aecm->channelAdapt16[PART_LEN]; + echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN], + far_spectrum[PART_LEN]); +} + +void WebRtcAecm_ResetAdaptiveChannelNeon(AecmCore* aecm) { + assert((uintptr_t)(aecm->channelStored) % 16 == 0); + assert((uintptr_t)(aecm->channelAdapt16) % 16 == 0); + assert((uintptr_t)(aecm->channelAdapt32) % 32 == 0); + + // The C code of following optimized code. + // for (i = 0; i < PART_LEN1; i++) { + // aecm->channelAdapt16[i] = aecm->channelStored[i]; + // aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32( + // (int32_t)aecm->channelStored[i], 16); + // } + + int16_t* start_stored_p = aecm->channelStored; + int16_t* start_adapt16_p = aecm->channelAdapt16; + int32_t* start_adapt32_p = aecm->channelAdapt32; + const int16_t* end_stored_p = start_stored_p + PART_LEN; + + int16x8_t stored_v; + int32x4_t adapt32_v_low, adapt32_v_high; + + while (start_stored_p < end_stored_p) { + stored_v = vld1q_s16(start_stored_p); + vst1q_s16(start_adapt16_p, stored_v); + + adapt32_v_low = vshll_n_s16(vget_low_s16(stored_v), 16); + adapt32_v_high = vshll_n_s16(vget_high_s16(stored_v), 16); + + vst1q_s32(start_adapt32_p, adapt32_v_low); + vst1q_s32(start_adapt32_p + 4, adapt32_v_high); + + start_stored_p += 8; + start_adapt16_p += 8; + start_adapt32_p += 8; + } + aecm->channelAdapt16[PART_LEN] = aecm->channelStored[PART_LEN]; + aecm->channelAdapt32[PART_LEN] = (int32_t)aecm->channelStored[PART_LEN] << 16; +} diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_defines.h b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_defines.h new file mode 100644 index 00000000..6d63990b --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_defines.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_AECM_DEFINES_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_AECM_DEFINES_H_ + +#define AECM_DYNAMIC_Q /* Turn on/off dynamic Q-domain. */ + +/* Algorithm parameters */ +#define FRAME_LEN 80 /* Total frame length, 10 ms. */ + +#define PART_LEN 64 /* Length of partition. */ +#define PART_LEN_SHIFT 7 /* Length of (PART_LEN * 2) in base 2. */ + +#define PART_LEN1 (PART_LEN + 1) /* Unique fft coefficients. */ +#define PART_LEN2 (PART_LEN << 1) /* Length of partition * 2. */ +#define PART_LEN4 (PART_LEN << 2) /* Length of partition * 4. */ +#define FAR_BUF_LEN PART_LEN4 /* Length of buffers. */ +#define MAX_DELAY 100 + +/* Counter parameters */ +#define CONV_LEN 512 /* Convergence length used at startup. */ +#define CONV_LEN2 (CONV_LEN << 1) /* Used at startup. */ + +/* Energy parameters */ +#define MAX_BUF_LEN 64 /* History length of energy signals. */ +#define FAR_ENERGY_MIN 1025 /* Lowest Far energy level: At least 2 */ + /* in energy. */ +#define FAR_ENERGY_DIFF 929 /* Allowed difference between max */ + /* and min. */ +#define ENERGY_DEV_OFFSET 0 /* The energy error offset in Q8. */ +#define ENERGY_DEV_TOL 400 /* The energy estimation tolerance (Q8). */ +#define FAR_ENERGY_VAD_REGION 230 /* Far VAD tolerance region. */ + +/* Stepsize parameters */ +#define MU_MIN 10 /* Min stepsize 2^-MU_MIN (far end energy */ + /* dependent). */ +#define MU_MAX 1 /* Max stepsize 2^-MU_MAX (far end energy */ + /* dependent). */ +#define MU_DIFF 9 /* MU_MIN - MU_MAX */ + +/* Channel parameters */ +#define MIN_MSE_COUNT 20 /* Min number of consecutive blocks with enough */ + /* far end energy to compare channel estimates. */ +#define MIN_MSE_DIFF 29 /* The ratio between adapted and stored channel to */ + /* accept a new storage (0.8 in Q-MSE_RESOLUTION). */ +#define MSE_RESOLUTION 5 /* MSE parameter resolution. */ +#define RESOLUTION_CHANNEL16 12 /* W16 Channel in Q-RESOLUTION_CHANNEL16. */ +#define RESOLUTION_CHANNEL32 28 /* W32 Channel in Q-RESOLUTION_CHANNEL. */ +#define CHANNEL_VAD 16 /* Minimum energy in frequency band */ + /* to update channel. */ + +/* Suppression gain parameters: SUPGAIN parameters in Q-(RESOLUTION_SUPGAIN). */ +#define RESOLUTION_SUPGAIN 8 /* Channel in Q-(RESOLUTION_SUPGAIN). */ +#define SUPGAIN_DEFAULT (1 << RESOLUTION_SUPGAIN) /* Default. */ +#define SUPGAIN_ERROR_PARAM_A 3072 /* Estimation error parameter */ + /* (Maximum gain) (8 in Q8). */ +#define SUPGAIN_ERROR_PARAM_B 1536 /* Estimation error parameter */ + /* (Gain before going down). */ +#define SUPGAIN_ERROR_PARAM_D SUPGAIN_DEFAULT /* Estimation error parameter */ + /* (Should be the same as Default) (1 in Q8). */ +#define SUPGAIN_EPC_DT 200 /* SUPGAIN_ERROR_PARAM_C * ENERGY_DEV_TOL */ + +/* Defines for "check delay estimation" */ +#define CORR_WIDTH 31 /* Number of samples to correlate over. */ +#define CORR_MAX 16 /* Maximum correlation offset. */ +#define CORR_MAX_BUF 63 +#define CORR_DEV 4 +#define CORR_MAX_LEVEL 20 +#define CORR_MAX_LOW 4 +#define CORR_BUF_LEN (CORR_MAX << 1) + 1 +/* Note that CORR_WIDTH + 2*CORR_MAX <= MAX_BUF_LEN. */ + +#define ONE_Q14 (1 << 14) + +/* NLP defines */ +#define NLP_COMP_LOW 3277 /* 0.2 in Q14 */ +#define NLP_COMP_HIGH ONE_Q14 /* 1 in Q14 */ + +#endif diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/echo_control_mobile.c b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/echo_control_mobile.c new file mode 100644 index 00000000..83781e97 --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/echo_control_mobile.c @@ -0,0 +1,702 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h" + +#ifdef AEC_DEBUG +#include <stdio.h> +#endif +#include <stdlib.h> + +#include "webrtc/common_audio/ring_buffer.h" +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/modules/audio_processing/aecm/aecm_core.h" + +#define BUF_SIZE_FRAMES 50 // buffer size (frames) +// Maximum length of resampled signal. Must be an integer multiple of frames +// (ceil(1/(1 + MIN_SKEW)*2) + 1)*FRAME_LEN +// The factor of 2 handles wb, and the + 1 is as a safety margin +#define MAX_RESAMP_LEN (5 * FRAME_LEN) + +static const size_t kBufSizeSamp = BUF_SIZE_FRAMES * FRAME_LEN; // buffer size (samples) +static const int kSampMsNb = 8; // samples per ms in nb +// Target suppression levels for nlp modes +// log{0.001, 0.00001, 0.00000001} +static const int kInitCheck = 42; + +typedef struct +{ + int sampFreq; + int scSampFreq; + short bufSizeStart; + int knownDelay; + + // Stores the last frame added to the farend buffer + short farendOld[2][FRAME_LEN]; + short initFlag; // indicates if AEC has been initialized + + // Variables used for averaging far end buffer size + short counter; + short sum; + short firstVal; + short checkBufSizeCtr; + + // Variables used for delay shifts + short msInSndCardBuf; + short filtDelay; + int timeForDelayChange; + int ECstartup; + int checkBuffSize; + int delayChange; + short lastDelayDiff; + + int16_t echoMode; + +#ifdef AEC_DEBUG + FILE *bufFile; + FILE *delayFile; + FILE *preCompFile; + FILE *postCompFile; +#endif // AEC_DEBUG + // Structures + RingBuffer *farendBuf; + + int lastError; + + AecmCore* aecmCore; +} AecMobile; + +// Estimates delay to set the position of the farend buffer read pointer +// (controlled by knownDelay) +static int WebRtcAecm_EstBufDelay(AecMobile* aecmInst, short msInSndCardBuf); + +// Stuffs the farend buffer if the estimated delay is too large +static int WebRtcAecm_DelayComp(AecMobile* aecmInst); + +void* WebRtcAecm_Create() { + AecMobile* aecm = malloc(sizeof(AecMobile)); + + WebRtcSpl_Init(); + + aecm->aecmCore = WebRtcAecm_CreateCore(); + if (!aecm->aecmCore) { + WebRtcAecm_Free(aecm); + return NULL; + } + + aecm->farendBuf = WebRtc_CreateBuffer(kBufSizeSamp, + sizeof(int16_t)); + if (!aecm->farendBuf) + { + WebRtcAecm_Free(aecm); + return NULL; + } + + aecm->initFlag = 0; + aecm->lastError = 0; + +#ifdef AEC_DEBUG + aecm->aecmCore->farFile = fopen("aecFar.pcm","wb"); + aecm->aecmCore->nearFile = fopen("aecNear.pcm","wb"); + aecm->aecmCore->outFile = fopen("aecOut.pcm","wb"); + //aecm->aecmCore->outLpFile = fopen("aecOutLp.pcm","wb"); + + aecm->bufFile = fopen("aecBuf.dat", "wb"); + aecm->delayFile = fopen("aecDelay.dat", "wb"); + aecm->preCompFile = fopen("preComp.pcm", "wb"); + aecm->postCompFile = fopen("postComp.pcm", "wb"); +#endif // AEC_DEBUG + return aecm; +} + +void WebRtcAecm_Free(void* aecmInst) { + AecMobile* aecm = aecmInst; + + if (aecm == NULL) { + return; + } + +#ifdef AEC_DEBUG + fclose(aecm->aecmCore->farFile); + fclose(aecm->aecmCore->nearFile); + fclose(aecm->aecmCore->outFile); + //fclose(aecm->aecmCore->outLpFile); + + fclose(aecm->bufFile); + fclose(aecm->delayFile); + fclose(aecm->preCompFile); + fclose(aecm->postCompFile); +#endif // AEC_DEBUG + WebRtcAecm_FreeCore(aecm->aecmCore); + WebRtc_FreeBuffer(aecm->farendBuf); + free(aecm); +} + +int32_t WebRtcAecm_Init(void *aecmInst, int32_t sampFreq) +{ + AecMobile* aecm = aecmInst; + AecmConfig aecConfig; + + if (aecm == NULL) + { + return -1; + } + + if (sampFreq != 8000 && sampFreq != 16000) + { + aecm->lastError = AECM_BAD_PARAMETER_ERROR; + return -1; + } + aecm->sampFreq = sampFreq; + + // Initialize AECM core + if (WebRtcAecm_InitCore(aecm->aecmCore, aecm->sampFreq) == -1) + { + aecm->lastError = AECM_UNSPECIFIED_ERROR; + return -1; + } + + // Initialize farend buffer + WebRtc_InitBuffer(aecm->farendBuf); + + aecm->initFlag = kInitCheck; // indicates that initialization has been done + + aecm->delayChange = 1; + + aecm->sum = 0; + aecm->counter = 0; + aecm->checkBuffSize = 1; + aecm->firstVal = 0; + + aecm->ECstartup = 1; + aecm->bufSizeStart = 0; + aecm->checkBufSizeCtr = 0; + aecm->filtDelay = 0; + aecm->timeForDelayChange = 0; + aecm->knownDelay = 0; + aecm->lastDelayDiff = 0; + + memset(&aecm->farendOld[0][0], 0, 160); + + // Default settings. + aecConfig.cngMode = AecmTrue; + aecConfig.echoMode = 3; + + if (WebRtcAecm_set_config(aecm, aecConfig) == -1) + { + aecm->lastError = AECM_UNSPECIFIED_ERROR; + return -1; + } + + return 0; +} + +int32_t WebRtcAecm_BufferFarend(void *aecmInst, const int16_t *farend, + size_t nrOfSamples) +{ + AecMobile* aecm = aecmInst; + int32_t retVal = 0; + + if (aecm == NULL) + { + return -1; + } + + if (farend == NULL) + { + aecm->lastError = AECM_NULL_POINTER_ERROR; + return -1; + } + + if (aecm->initFlag != kInitCheck) + { + aecm->lastError = AECM_UNINITIALIZED_ERROR; + return -1; + } + + if (nrOfSamples != 80 && nrOfSamples != 160) + { + aecm->lastError = AECM_BAD_PARAMETER_ERROR; + return -1; + } + + // TODO: Is this really a good idea? + if (!aecm->ECstartup) + { + WebRtcAecm_DelayComp(aecm); + } + + WebRtc_WriteBuffer(aecm->farendBuf, farend, nrOfSamples); + + return retVal; +} + +int32_t WebRtcAecm_Process(void *aecmInst, const int16_t *nearendNoisy, + const int16_t *nearendClean, int16_t *out, + size_t nrOfSamples, int16_t msInSndCardBuf) +{ + AecMobile* aecm = aecmInst; + int32_t retVal = 0; + size_t i; + short nmbrOfFilledBuffers; + size_t nBlocks10ms; + size_t nFrames; +#ifdef AEC_DEBUG + short msInAECBuf; +#endif + + if (aecm == NULL) + { + return -1; + } + + if (nearendNoisy == NULL) + { + aecm->lastError = AECM_NULL_POINTER_ERROR; + return -1; + } + + if (out == NULL) + { + aecm->lastError = AECM_NULL_POINTER_ERROR; + return -1; + } + + if (aecm->initFlag != kInitCheck) + { + aecm->lastError = AECM_UNINITIALIZED_ERROR; + return -1; + } + + if (nrOfSamples != 80 && nrOfSamples != 160) + { + aecm->lastError = AECM_BAD_PARAMETER_ERROR; + return -1; + } + + if (msInSndCardBuf < 0) + { + msInSndCardBuf = 0; + aecm->lastError = AECM_BAD_PARAMETER_WARNING; + retVal = -1; + } else if (msInSndCardBuf > 500) + { + msInSndCardBuf = 500; + aecm->lastError = AECM_BAD_PARAMETER_WARNING; + retVal = -1; + } + msInSndCardBuf += 10; + aecm->msInSndCardBuf = msInSndCardBuf; + + nFrames = nrOfSamples / FRAME_LEN; + nBlocks10ms = nFrames / aecm->aecmCore->mult; + + if (aecm->ECstartup) + { + if (nearendClean == NULL) + { + if (out != nearendNoisy) + { + memcpy(out, nearendNoisy, sizeof(short) * nrOfSamples); + } + } else if (out != nearendClean) + { + memcpy(out, nearendClean, sizeof(short) * nrOfSamples); + } + + nmbrOfFilledBuffers = + (short) WebRtc_available_read(aecm->farendBuf) / FRAME_LEN; + // The AECM is in the start up mode + // AECM is disabled until the soundcard buffer and farend buffers are OK + + // Mechanism to ensure that the soundcard buffer is reasonably stable. + if (aecm->checkBuffSize) + { + aecm->checkBufSizeCtr++; + // Before we fill up the far end buffer we require the amount of data on the + // sound card to be stable (+/-8 ms) compared to the first value. This + // comparison is made during the following 4 consecutive frames. If it seems + // to be stable then we start to fill up the far end buffer. + + if (aecm->counter == 0) + { + aecm->firstVal = aecm->msInSndCardBuf; + aecm->sum = 0; + } + + if (abs(aecm->firstVal - aecm->msInSndCardBuf) + < WEBRTC_SPL_MAX(0.2 * aecm->msInSndCardBuf, kSampMsNb)) + { + aecm->sum += aecm->msInSndCardBuf; + aecm->counter++; + } else + { + aecm->counter = 0; + } + + if (aecm->counter * nBlocks10ms >= 6) + { + // The farend buffer size is determined in blocks of 80 samples + // Use 75% of the average value of the soundcard buffer + aecm->bufSizeStart + = WEBRTC_SPL_MIN((3 * aecm->sum + * aecm->aecmCore->mult) / (aecm->counter * 40), BUF_SIZE_FRAMES); + // buffersize has now been determined + aecm->checkBuffSize = 0; + } + + if (aecm->checkBufSizeCtr * nBlocks10ms > 50) + { + // for really bad sound cards, don't disable echocanceller for more than 0.5 sec + aecm->bufSizeStart = WEBRTC_SPL_MIN((3 * aecm->msInSndCardBuf + * aecm->aecmCore->mult) / 40, BUF_SIZE_FRAMES); + aecm->checkBuffSize = 0; + } + } + + // if checkBuffSize changed in the if-statement above + if (!aecm->checkBuffSize) + { + // soundcard buffer is now reasonably stable + // When the far end buffer is filled with approximately the same amount of + // data as the amount on the sound card we end the start up phase and start + // to cancel echoes. + + if (nmbrOfFilledBuffers == aecm->bufSizeStart) + { + aecm->ECstartup = 0; // Enable the AECM + } else if (nmbrOfFilledBuffers > aecm->bufSizeStart) + { + WebRtc_MoveReadPtr(aecm->farendBuf, + (int) WebRtc_available_read(aecm->farendBuf) + - (int) aecm->bufSizeStart * FRAME_LEN); + aecm->ECstartup = 0; + } + } + + } else + { + // AECM is enabled + + // Note only 1 block supported for nb and 2 blocks for wb + for (i = 0; i < nFrames; i++) + { + int16_t farend[FRAME_LEN]; + const int16_t* farend_ptr = NULL; + + nmbrOfFilledBuffers = + (short) WebRtc_available_read(aecm->farendBuf) / FRAME_LEN; + + // Check that there is data in the far end buffer + if (nmbrOfFilledBuffers > 0) + { + // Get the next 80 samples from the farend buffer + WebRtc_ReadBuffer(aecm->farendBuf, (void**) &farend_ptr, farend, + FRAME_LEN); + + // Always store the last frame for use when we run out of data + memcpy(&(aecm->farendOld[i][0]), farend_ptr, + FRAME_LEN * sizeof(short)); + } else + { + // We have no data so we use the last played frame + memcpy(farend, &(aecm->farendOld[i][0]), FRAME_LEN * sizeof(short)); + farend_ptr = farend; + } + + // Call buffer delay estimator when all data is extracted, + // i,e. i = 0 for NB and i = 1 for WB + if ((i == 0 && aecm->sampFreq == 8000) || (i == 1 && aecm->sampFreq == 16000)) + { + WebRtcAecm_EstBufDelay(aecm, aecm->msInSndCardBuf); + } + + // Call the AECM + /*WebRtcAecm_ProcessFrame(aecm->aecmCore, farend, &nearend[FRAME_LEN * i], + &out[FRAME_LEN * i], aecm->knownDelay);*/ + if (WebRtcAecm_ProcessFrame(aecm->aecmCore, + farend_ptr, + &nearendNoisy[FRAME_LEN * i], + (nearendClean + ? &nearendClean[FRAME_LEN * i] + : NULL), + &out[FRAME_LEN * i]) == -1) + return -1; + } + } + +#ifdef AEC_DEBUG + msInAECBuf = (short) WebRtc_available_read(aecm->farendBuf) / + (kSampMsNb * aecm->aecmCore->mult); + fwrite(&msInAECBuf, 2, 1, aecm->bufFile); + fwrite(&(aecm->knownDelay), sizeof(aecm->knownDelay), 1, aecm->delayFile); +#endif + + return retVal; +} + +int32_t WebRtcAecm_set_config(void *aecmInst, AecmConfig config) +{ + AecMobile* aecm = aecmInst; + + if (aecm == NULL) + { + return -1; + } + + if (aecm->initFlag != kInitCheck) + { + aecm->lastError = AECM_UNINITIALIZED_ERROR; + return -1; + } + + if (config.cngMode != AecmFalse && config.cngMode != AecmTrue) + { + aecm->lastError = AECM_BAD_PARAMETER_ERROR; + return -1; + } + aecm->aecmCore->cngMode = config.cngMode; + + if (config.echoMode < 0 || config.echoMode > 4) + { + aecm->lastError = AECM_BAD_PARAMETER_ERROR; + return -1; + } + aecm->echoMode = config.echoMode; + + if (aecm->echoMode == 0) + { + aecm->aecmCore->supGain = SUPGAIN_DEFAULT >> 3; + aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT >> 3; + aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A >> 3; + aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D >> 3; + aecm->aecmCore->supGainErrParamDiffAB = (SUPGAIN_ERROR_PARAM_A >> 3) + - (SUPGAIN_ERROR_PARAM_B >> 3); + aecm->aecmCore->supGainErrParamDiffBD = (SUPGAIN_ERROR_PARAM_B >> 3) + - (SUPGAIN_ERROR_PARAM_D >> 3); + } else if (aecm->echoMode == 1) + { + aecm->aecmCore->supGain = SUPGAIN_DEFAULT >> 2; + aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT >> 2; + aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A >> 2; + aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D >> 2; + aecm->aecmCore->supGainErrParamDiffAB = (SUPGAIN_ERROR_PARAM_A >> 2) + - (SUPGAIN_ERROR_PARAM_B >> 2); + aecm->aecmCore->supGainErrParamDiffBD = (SUPGAIN_ERROR_PARAM_B >> 2) + - (SUPGAIN_ERROR_PARAM_D >> 2); + } else if (aecm->echoMode == 2) + { + aecm->aecmCore->supGain = SUPGAIN_DEFAULT >> 1; + aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT >> 1; + aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A >> 1; + aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D >> 1; + aecm->aecmCore->supGainErrParamDiffAB = (SUPGAIN_ERROR_PARAM_A >> 1) + - (SUPGAIN_ERROR_PARAM_B >> 1); + aecm->aecmCore->supGainErrParamDiffBD = (SUPGAIN_ERROR_PARAM_B >> 1) + - (SUPGAIN_ERROR_PARAM_D >> 1); + } else if (aecm->echoMode == 3) + { + aecm->aecmCore->supGain = SUPGAIN_DEFAULT; + aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT; + aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A; + aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D; + aecm->aecmCore->supGainErrParamDiffAB = SUPGAIN_ERROR_PARAM_A - SUPGAIN_ERROR_PARAM_B; + aecm->aecmCore->supGainErrParamDiffBD = SUPGAIN_ERROR_PARAM_B - SUPGAIN_ERROR_PARAM_D; + } else if (aecm->echoMode == 4) + { + aecm->aecmCore->supGain = SUPGAIN_DEFAULT << 1; + aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT << 1; + aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A << 1; + aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D << 1; + aecm->aecmCore->supGainErrParamDiffAB = (SUPGAIN_ERROR_PARAM_A << 1) + - (SUPGAIN_ERROR_PARAM_B << 1); + aecm->aecmCore->supGainErrParamDiffBD = (SUPGAIN_ERROR_PARAM_B << 1) + - (SUPGAIN_ERROR_PARAM_D << 1); + } + + return 0; +} + +int32_t WebRtcAecm_get_config(void *aecmInst, AecmConfig *config) +{ + AecMobile* aecm = aecmInst; + + if (aecm == NULL) + { + return -1; + } + + if (config == NULL) + { + aecm->lastError = AECM_NULL_POINTER_ERROR; + return -1; + } + + if (aecm->initFlag != kInitCheck) + { + aecm->lastError = AECM_UNINITIALIZED_ERROR; + return -1; + } + + config->cngMode = aecm->aecmCore->cngMode; + config->echoMode = aecm->echoMode; + + return 0; +} + +int32_t WebRtcAecm_InitEchoPath(void* aecmInst, + const void* echo_path, + size_t size_bytes) +{ + AecMobile* aecm = aecmInst; + const int16_t* echo_path_ptr = echo_path; + + if (aecmInst == NULL) { + return -1; + } + if (echo_path == NULL) { + aecm->lastError = AECM_NULL_POINTER_ERROR; + return -1; + } + if (size_bytes != WebRtcAecm_echo_path_size_bytes()) + { + // Input channel size does not match the size of AECM + aecm->lastError = AECM_BAD_PARAMETER_ERROR; + return -1; + } + if (aecm->initFlag != kInitCheck) + { + aecm->lastError = AECM_UNINITIALIZED_ERROR; + return -1; + } + + WebRtcAecm_InitEchoPathCore(aecm->aecmCore, echo_path_ptr); + + return 0; +} + +int32_t WebRtcAecm_GetEchoPath(void* aecmInst, + void* echo_path, + size_t size_bytes) +{ + AecMobile* aecm = aecmInst; + int16_t* echo_path_ptr = echo_path; + + if (aecmInst == NULL) { + return -1; + } + if (echo_path == NULL) { + aecm->lastError = AECM_NULL_POINTER_ERROR; + return -1; + } + if (size_bytes != WebRtcAecm_echo_path_size_bytes()) + { + // Input channel size does not match the size of AECM + aecm->lastError = AECM_BAD_PARAMETER_ERROR; + return -1; + } + if (aecm->initFlag != kInitCheck) + { + aecm->lastError = AECM_UNINITIALIZED_ERROR; + return -1; + } + + memcpy(echo_path_ptr, aecm->aecmCore->channelStored, size_bytes); + return 0; +} + +size_t WebRtcAecm_echo_path_size_bytes() +{ + return (PART_LEN1 * sizeof(int16_t)); +} + +int32_t WebRtcAecm_get_error_code(void *aecmInst) +{ + AecMobile* aecm = aecmInst; + + if (aecm == NULL) + { + return -1; + } + + return aecm->lastError; +} + +static int WebRtcAecm_EstBufDelay(AecMobile* aecm, short msInSndCardBuf) { + short delayNew, nSampSndCard; + short nSampFar = (short) WebRtc_available_read(aecm->farendBuf); + short diff; + + nSampSndCard = msInSndCardBuf * kSampMsNb * aecm->aecmCore->mult; + + delayNew = nSampSndCard - nSampFar; + + if (delayNew < FRAME_LEN) + { + WebRtc_MoveReadPtr(aecm->farendBuf, FRAME_LEN); + delayNew += FRAME_LEN; + } + + aecm->filtDelay = WEBRTC_SPL_MAX(0, (8 * aecm->filtDelay + 2 * delayNew) / 10); + + diff = aecm->filtDelay - aecm->knownDelay; + if (diff > 224) + { + if (aecm->lastDelayDiff < 96) + { + aecm->timeForDelayChange = 0; + } else + { + aecm->timeForDelayChange++; + } + } else if (diff < 96 && aecm->knownDelay > 0) + { + if (aecm->lastDelayDiff > 224) + { + aecm->timeForDelayChange = 0; + } else + { + aecm->timeForDelayChange++; + } + } else + { + aecm->timeForDelayChange = 0; + } + aecm->lastDelayDiff = diff; + + if (aecm->timeForDelayChange > 25) + { + aecm->knownDelay = WEBRTC_SPL_MAX((int)aecm->filtDelay - 160, 0); + } + return 0; +} + +static int WebRtcAecm_DelayComp(AecMobile* aecm) { + int nSampFar = (int) WebRtc_available_read(aecm->farendBuf); + int nSampSndCard, delayNew, nSampAdd; + const int maxStuffSamp = 10 * FRAME_LEN; + + nSampSndCard = aecm->msInSndCardBuf * kSampMsNb * aecm->aecmCore->mult; + delayNew = nSampSndCard - nSampFar; + + if (delayNew > FAR_BUF_LEN - FRAME_LEN * aecm->aecmCore->mult) + { + // The difference of the buffer sizes is larger than the maximum + // allowed known delay. Compensate by stuffing the buffer. + nSampAdd = (int)(WEBRTC_SPL_MAX(((nSampSndCard >> 1) - nSampFar), + FRAME_LEN)); + nSampAdd = WEBRTC_SPL_MIN(nSampAdd, maxStuffSamp); + + WebRtc_MoveReadPtr(aecm->farendBuf, -nSampAdd); + aecm->delayChange = 1; // the delay needs to be updated + } + + return 0; +} diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h new file mode 100644 index 00000000..7ae15c2a --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h @@ -0,0 +1,218 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_INCLUDE_ECHO_CONTROL_MOBILE_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_INCLUDE_ECHO_CONTROL_MOBILE_H_ + +#include <stdlib.h> + +#include "webrtc/typedefs.h" + +enum { + AecmFalse = 0, + AecmTrue +}; + +// Errors +#define AECM_UNSPECIFIED_ERROR 12000 +#define AECM_UNSUPPORTED_FUNCTION_ERROR 12001 +#define AECM_UNINITIALIZED_ERROR 12002 +#define AECM_NULL_POINTER_ERROR 12003 +#define AECM_BAD_PARAMETER_ERROR 12004 + +// Warnings +#define AECM_BAD_PARAMETER_WARNING 12100 + +typedef struct { + int16_t cngMode; // AECM_FALSE, AECM_TRUE (default) + int16_t echoMode; // 0, 1, 2, 3 (default), 4 +} AecmConfig; + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Allocates the memory needed by the AECM. The memory needs to be + * initialized separately using the WebRtcAecm_Init() function. + * Returns a pointer to the instance and a nullptr at failure. + */ +void* WebRtcAecm_Create(); + +/* + * This function releases the memory allocated by WebRtcAecm_Create() + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecmInst Pointer to the AECM instance + */ +void WebRtcAecm_Free(void* aecmInst); + +/* + * Initializes an AECM instance. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecmInst Pointer to the AECM instance + * int32_t sampFreq Sampling frequency of data + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 0: OK + * -1: error + */ +int32_t WebRtcAecm_Init(void* aecmInst, int32_t sampFreq); + +/* + * Inserts an 80 or 160 sample block of data into the farend buffer. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecmInst Pointer to the AECM instance + * int16_t* farend In buffer containing one frame of + * farend signal + * int16_t nrOfSamples Number of samples in farend buffer + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 0: OK + * -1: error + */ +int32_t WebRtcAecm_BufferFarend(void* aecmInst, + const int16_t* farend, + size_t nrOfSamples); + +/* + * Runs the AECM on an 80 or 160 sample blocks of data. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecmInst Pointer to the AECM instance + * int16_t* nearendNoisy In buffer containing one frame of + * reference nearend+echo signal. If + * noise reduction is active, provide + * the noisy signal here. + * int16_t* nearendClean In buffer containing one frame of + * nearend+echo signal. If noise + * reduction is active, provide the + * clean signal here. Otherwise pass a + * NULL pointer. + * int16_t nrOfSamples Number of samples in nearend buffer + * int16_t msInSndCardBuf Delay estimate for sound card and + * system buffers + * + * Outputs Description + * ------------------------------------------------------------------- + * int16_t* out Out buffer, one frame of processed nearend + * int32_t return 0: OK + * -1: error + */ +int32_t WebRtcAecm_Process(void* aecmInst, + const int16_t* nearendNoisy, + const int16_t* nearendClean, + int16_t* out, + size_t nrOfSamples, + int16_t msInSndCardBuf); + +/* + * This function enables the user to set certain parameters on-the-fly + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecmInst Pointer to the AECM instance + * AecmConfig config Config instance that contains all + * properties to be set + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 0: OK + * -1: error + */ +int32_t WebRtcAecm_set_config(void* aecmInst, AecmConfig config); + +/* + * This function enables the user to set certain parameters on-the-fly + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecmInst Pointer to the AECM instance + * + * Outputs Description + * ------------------------------------------------------------------- + * AecmConfig* config Pointer to the config instance that + * all properties will be written to + * int32_t return 0: OK + * -1: error + */ +int32_t WebRtcAecm_get_config(void *aecmInst, AecmConfig *config); + +/* + * This function enables the user to set the echo path on-the-fly. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecmInst Pointer to the AECM instance + * void* echo_path Pointer to the echo path to be set + * size_t size_bytes Size in bytes of the echo path + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 0: OK + * -1: error + */ +int32_t WebRtcAecm_InitEchoPath(void* aecmInst, + const void* echo_path, + size_t size_bytes); + +/* + * This function enables the user to get the currently used echo path + * on-the-fly + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecmInst Pointer to the AECM instance + * void* echo_path Pointer to echo path + * size_t size_bytes Size in bytes of the echo path + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 0: OK + * -1: error + */ +int32_t WebRtcAecm_GetEchoPath(void* aecmInst, + void* echo_path, + size_t size_bytes); + +/* + * This function enables the user to get the echo path size in bytes + * + * Outputs Description + * ------------------------------------------------------------------- + * size_t return Size in bytes + */ +size_t WebRtcAecm_echo_path_size_bytes(); + +/* + * Gets the last error code. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecmInst Pointer to the AECM instance + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 11000-11100: error code + */ +int32_t WebRtcAecm_get_error_code(void *aecmInst); + +#ifdef __cplusplus +} +#endif +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AECM_INCLUDE_ECHO_CONTROL_MOBILE_H_ diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging.h b/third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging.h new file mode 100644 index 00000000..3cf9ff89 --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_ + +#include <stdio.h> + +#include "webrtc/modules/audio_processing/logging/aec_logging_file_handling.h" + +// To enable AEC logging, invoke GYP with -Daec_debug_dump=1. +#ifdef WEBRTC_AEC_DEBUG_DUMP +// Dumps a wav data to file. +#define RTC_AEC_DEBUG_WAV_WRITE(file, data, num_samples) \ + do { \ + rtc_WavWriteSamples(file, data, num_samples); \ + } while (0) + +// (Re)opens a wav file for writing using the specified sample rate. +#define RTC_AEC_DEBUG_WAV_REOPEN(name, instance_index, process_rate, \ + sample_rate, wav_file) \ + do { \ + WebRtcAec_ReopenWav(name, instance_index, process_rate, sample_rate, \ + wav_file); \ + } while (0) + +// Closes a wav file. +#define RTC_AEC_DEBUG_WAV_CLOSE(wav_file) \ + do { \ + rtc_WavClose(wav_file); \ + } while (0) + +// Dumps a raw data to file. +#define RTC_AEC_DEBUG_RAW_WRITE(file, data, data_size) \ + do { \ + (void) fwrite(data, data_size, 1, file); \ + } while (0) + +// Opens a raw data file for writing using the specified sample rate. +#define RTC_AEC_DEBUG_RAW_OPEN(name, instance_counter, file) \ + do { \ + WebRtcAec_RawFileOpen(name, instance_counter, file); \ + } while (0) + +// Closes a raw data file. +#define RTC_AEC_DEBUG_RAW_CLOSE(file) \ + do { \ + fclose(file); \ + } while (0) + +#else // RTC_AEC_DEBUG_DUMP +#define RTC_AEC_DEBUG_WAV_WRITE(file, data, num_samples) \ + do { \ + } while (0) + +#define RTC_AEC_DEBUG_WAV_REOPEN(wav_file, name, instance_index, process_rate, \ + sample_rate) \ + do { \ + } while (0) + +#define RTC_AEC_DEBUG_WAV_CLOSE(wav_file) \ + do { \ + } while (0) + +#define RTC_AEC_DEBUG_RAW_WRITE(file, data, data_size) \ + do { \ + } while (0) + +#define RTC_AEC_DEBUG_RAW_OPEN(file, name, instance_counter) \ + do { \ + } while (0) + +#define RTC_AEC_DEBUG_RAW_CLOSE(file) \ + do { \ + } while (0) + +#endif // WEBRTC_AEC_DEBUG_DUMP + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_ diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging_file_handling.cc b/third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging_file_handling.cc new file mode 100644 index 00000000..3a434714 --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging_file_handling.cc @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/logging/aec_logging_file_handling.h" + +#include <stdint.h> +#include <stdio.h> + +#include "webrtc/base/checks.h" +#include "webrtc/base/stringutils.h" +#include "webrtc/common_audio/wav_file.h" +#include "webrtc/typedefs.h" + +#ifdef WEBRTC_AEC_DEBUG_DUMP +void WebRtcAec_ReopenWav(const char* name, + int instance_index, + int process_rate, + int sample_rate, + rtc_WavWriter** wav_file) { + if (*wav_file) { + if (rtc_WavSampleRate(*wav_file) == sample_rate) + return; + rtc_WavClose(*wav_file); + } + char filename[64]; + int written = rtc::sprintfn(filename, sizeof(filename), "%s%d-%d.wav", name, + instance_index, process_rate); + + // Ensure there was no buffer output error. + RTC_DCHECK_GE(written, 0); + // Ensure that the buffer size was sufficient. + RTC_DCHECK_LT(static_cast<size_t>(written), sizeof(filename)); + + *wav_file = rtc_WavOpen(filename, sample_rate, 1); +} + +void WebRtcAec_RawFileOpen(const char* name, int instance_index, FILE** file) { + char filename[64]; + int written = rtc::sprintfn(filename, sizeof(filename), "%s_%d.dat", name, + instance_index); + + // Ensure there was no buffer output error. + RTC_DCHECK_GE(written, 0); + // Ensure that the buffer size was sufficient. + RTC_DCHECK_LT(static_cast<size_t>(written), sizeof(filename)); + + *file = fopen(filename, "wb"); +} + +#endif // WEBRTC_AEC_DEBUG_DUMP diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging_file_handling.h b/third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging_file_handling.h new file mode 100644 index 00000000..5ec83948 --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging_file_handling.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_FILE_HANDLING_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_FILE_HANDLING_ + +#include <stdio.h> + +#include "webrtc/common_audio/wav_file.h" +#include "webrtc/typedefs.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef WEBRTC_AEC_DEBUG_DUMP +// Opens a new Wav file for writing. If it was already open with a different +// sample frequency, it closes it first. +void WebRtcAec_ReopenWav(const char* name, + int instance_index, + int process_rate, + int sample_rate, + rtc_WavWriter** wav_file); + +// Opens dumpfile with instance-specific filename. +void WebRtcAec_RawFileOpen(const char* name, int instance_index, FILE** file); + +#endif // WEBRTC_AEC_DEBUG_DUMP + +#ifdef __cplusplus +} +#endif + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_FILE_HANDLING_ diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/defines.h b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/defines.h new file mode 100644 index 00000000..8271332c --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/defines.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_DEFINES_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_DEFINES_H_ + +#define BLOCKL_MAX 160 // max processing block length: 160 +#define ANAL_BLOCKL_MAX 256 // max analysis block length: 256 +#define HALF_ANAL_BLOCKL 129 // half max analysis block length + 1 +#define NUM_HIGH_BANDS_MAX 2 // max number of high bands: 2 + +#define QUANTILE (float)0.25 + +#define SIMULT 3 +#define END_STARTUP_LONG 200 +#define END_STARTUP_SHORT 50 +#define FACTOR (float)40.0 +#define WIDTH (float)0.01 + +// Length of fft work arrays. +#define IP_LENGTH (ANAL_BLOCKL_MAX >> 1) // must be at least ceil(2 + sqrt(ANAL_BLOCKL_MAX/2)) +#define W_LENGTH (ANAL_BLOCKL_MAX >> 1) + +//PARAMETERS FOR NEW METHOD +#define DD_PR_SNR (float)0.98 // DD update of prior SNR +#define LRT_TAVG (float)0.50 // tavg parameter for LRT (previously 0.90) +#define SPECT_FL_TAVG (float)0.30 // tavg parameter for spectral flatness measure +#define SPECT_DIFF_TAVG (float)0.30 // tavg parameter for spectral difference measure +#define PRIOR_UPDATE (float)0.10 // update parameter of prior model +#define NOISE_UPDATE (float)0.90 // update parameter for noise +#define SPEECH_UPDATE (float)0.99 // update parameter when likely speech +#define WIDTH_PR_MAP (float)4.0 // width parameter in sigmoid map for prior model +#define LRT_FEATURE_THR (float)0.5 // default threshold for LRT feature +#define SF_FEATURE_THR (float)0.5 // default threshold for Spectral Flatness feature +#define SD_FEATURE_THR (float)0.5 // default threshold for Spectral Difference feature +#define PROB_RANGE (float)0.20 // probability threshold for noise state in + // speech/noise likelihood +#define HIST_PAR_EST 1000 // histogram size for estimation of parameters +#define GAMMA_PAUSE (float)0.05 // update for conservative noise estimate +// +#define B_LIM (float)0.5 // threshold in final energy gain factor calculation +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_DEFINES_H_ diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/include/noise_suppression.h b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/include/noise_suppression.h new file mode 100644 index 00000000..9dac56bd --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/include/noise_suppression.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_H_ + +#include <stddef.h> + +#include "webrtc/typedefs.h" + +typedef struct NsHandleT NsHandle; + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * This function creates an instance of the floating point Noise Suppression. + */ +NsHandle* WebRtcNs_Create(); + +/* + * This function frees the dynamic memory of a specified noise suppression + * instance. + * + * Input: + * - NS_inst : Pointer to NS instance that should be freed + */ +void WebRtcNs_Free(NsHandle* NS_inst); + +/* + * This function initializes a NS instance and has to be called before any other + * processing is made. + * + * Input: + * - NS_inst : Instance that should be initialized + * - fs : sampling frequency + * + * Output: + * - NS_inst : Initialized instance + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcNs_Init(NsHandle* NS_inst, uint32_t fs); + +/* + * This changes the aggressiveness of the noise suppression method. + * + * Input: + * - NS_inst : Noise suppression instance. + * - mode : 0: Mild, 1: Medium , 2: Aggressive + * + * Output: + * - NS_inst : Updated instance. + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcNs_set_policy(NsHandle* NS_inst, int mode); + +/* + * This functions estimates the background noise for the inserted speech frame. + * The input and output signals should always be 10ms (80 or 160 samples). + * + * Input + * - NS_inst : Noise suppression instance. + * - spframe : Pointer to speech frame buffer for L band + * + * Output: + * - NS_inst : Updated NS instance + */ +void WebRtcNs_Analyze(NsHandle* NS_inst, const float* spframe); + +/* + * This functions does Noise Suppression for the inserted speech frame. The + * input and output signals should always be 10ms (80 or 160 samples). + * + * Input + * - NS_inst : Noise suppression instance. + * - spframe : Pointer to speech frame buffer for each band + * - num_bands : Number of bands + * + * Output: + * - NS_inst : Updated NS instance + * - outframe : Pointer to output frame for each band + */ +void WebRtcNs_Process(NsHandle* NS_inst, + const float* const* spframe, + size_t num_bands, + float* const* outframe); + +/* Returns the internally used prior speech probability of the current frame. + * There is a frequency bin based one as well, with which this should not be + * confused. + * + * Input + * - handle : Noise suppression instance. + * + * Return value : Prior speech probability in interval [0.0, 1.0]. + * -1 - NULL pointer or uninitialized instance. + */ +float WebRtcNs_prior_speech_probability(NsHandle* handle); + +#ifdef __cplusplus +} +#endif + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_H_ diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/include/noise_suppression_x.h b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/include/noise_suppression_x.h new file mode 100644 index 00000000..88fe4cd6 --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/include/noise_suppression_x.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_X_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_X_H_ + +#include "webrtc/typedefs.h" + +typedef struct NsxHandleT NsxHandle; + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * This function creates an instance of the fixed point Noise Suppression. + */ +NsxHandle* WebRtcNsx_Create(); + +/* + * This function frees the dynamic memory of a specified Noise Suppression + * instance. + * + * Input: + * - nsxInst : Pointer to NS instance that should be freed + */ +void WebRtcNsx_Free(NsxHandle* nsxInst); + +/* + * This function initializes a NS instance + * + * Input: + * - nsxInst : Instance that should be initialized + * - fs : sampling frequency + * + * Output: + * - nsxInst : Initialized instance + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcNsx_Init(NsxHandle* nsxInst, uint32_t fs); + +/* + * This changes the aggressiveness of the noise suppression method. + * + * Input: + * - nsxInst : Instance that should be initialized + * - mode : 0: Mild, 1: Medium , 2: Aggressive + * + * Output: + * - nsxInst : Initialized instance + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcNsx_set_policy(NsxHandle* nsxInst, int mode); + +/* + * This functions does noise suppression for the inserted speech frame. The + * input and output signals should always be 10ms (80 or 160 samples). + * + * Input + * - nsxInst : NSx instance. Needs to be initiated before call. + * - speechFrame : Pointer to speech frame buffer for each band + * - num_bands : Number of bands + * + * Output: + * - nsxInst : Updated NSx instance + * - outFrame : Pointer to output frame for each band + */ +void WebRtcNsx_Process(NsxHandle* nsxInst, + const short* const* speechFrame, + int num_bands, + short* const* outFrame); + +#ifdef __cplusplus +} +#endif + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_X_H_ diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/noise_suppression.c b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/noise_suppression.c new file mode 100644 index 00000000..13f1b2d6 --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/noise_suppression.c @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/ns/include/noise_suppression.h" + +#include <stdlib.h> +#include <string.h> + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/modules/audio_processing/ns/defines.h" +#include "webrtc/modules/audio_processing/ns/ns_core.h" + +NsHandle* WebRtcNs_Create() { + NoiseSuppressionC* self = malloc(sizeof(NoiseSuppressionC)); + self->initFlag = 0; + return (NsHandle*)self; +} + +void WebRtcNs_Free(NsHandle* NS_inst) { + free(NS_inst); +} + +int WebRtcNs_Init(NsHandle* NS_inst, uint32_t fs) { + return WebRtcNs_InitCore((NoiseSuppressionC*)NS_inst, fs); +} + +int WebRtcNs_set_policy(NsHandle* NS_inst, int mode) { + return WebRtcNs_set_policy_core((NoiseSuppressionC*)NS_inst, mode); +} + +void WebRtcNs_Analyze(NsHandle* NS_inst, const float* spframe) { + WebRtcNs_AnalyzeCore((NoiseSuppressionC*)NS_inst, spframe); +} + +void WebRtcNs_Process(NsHandle* NS_inst, + const float* const* spframe, + size_t num_bands, + float* const* outframe) { + WebRtcNs_ProcessCore((NoiseSuppressionC*)NS_inst, spframe, num_bands, + outframe); +} + +float WebRtcNs_prior_speech_probability(NsHandle* handle) { + NoiseSuppressionC* self = (NoiseSuppressionC*)handle; + if (handle == NULL) { + return -1; + } + if (self->initFlag == 0) { + return -1; + } + return self->priorSpeechProb; +} diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/noise_suppression_x.c b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/noise_suppression_x.c new file mode 100644 index 00000000..150fe608 --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/noise_suppression_x.c @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h" + +#include <stdlib.h> + +#include "webrtc/common_audio/signal_processing/include/real_fft.h" +#include "webrtc/modules/audio_processing/ns/nsx_core.h" +#include "webrtc/modules/audio_processing/ns/nsx_defines.h" + +NsxHandle* WebRtcNsx_Create() { + NoiseSuppressionFixedC* self = malloc(sizeof(NoiseSuppressionFixedC)); + WebRtcSpl_Init(); + self->real_fft = NULL; + self->initFlag = 0; + return (NsxHandle*)self; +} + +void WebRtcNsx_Free(NsxHandle* nsxInst) { + WebRtcSpl_FreeRealFFT(((NoiseSuppressionFixedC*)nsxInst)->real_fft); + free(nsxInst); +} + +int WebRtcNsx_Init(NsxHandle* nsxInst, uint32_t fs) { + return WebRtcNsx_InitCore((NoiseSuppressionFixedC*)nsxInst, fs); +} + +int WebRtcNsx_set_policy(NsxHandle* nsxInst, int mode) { + return WebRtcNsx_set_policy_core((NoiseSuppressionFixedC*)nsxInst, mode); +} + +void WebRtcNsx_Process(NsxHandle* nsxInst, + const short* const* speechFrame, + int num_bands, + short* const* outFrame) { + WebRtcNsx_ProcessCore((NoiseSuppressionFixedC*)nsxInst, speechFrame, + num_bands, outFrame); +} diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/ns_core.c b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/ns_core.c new file mode 100644 index 00000000..1d609140 --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/ns_core.c @@ -0,0 +1,1416 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <assert.h> +#include <math.h> +#include <string.h> +#include <stdlib.h> + +#include "webrtc/common_audio/fft4g.h" +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/modules/audio_processing/ns/include/noise_suppression.h" +#include "webrtc/modules/audio_processing/ns/ns_core.h" +#include "webrtc/modules/audio_processing/ns/windows_private.h" + +// Set Feature Extraction Parameters. +static void set_feature_extraction_parameters(NoiseSuppressionC* self) { + // Bin size of histogram. + self->featureExtractionParams.binSizeLrt = 0.1f; + self->featureExtractionParams.binSizeSpecFlat = 0.05f; + self->featureExtractionParams.binSizeSpecDiff = 0.1f; + + // Range of histogram over which LRT threshold is computed. + self->featureExtractionParams.rangeAvgHistLrt = 1.f; + + // Scale parameters: multiply dominant peaks of the histograms by scale factor + // to obtain thresholds for prior model. + // For LRT and spectral difference. + self->featureExtractionParams.factor1ModelPars = 1.2f; + // For spectral_flatness: used when noise is flatter than speech. + self->featureExtractionParams.factor2ModelPars = 0.9f; + + // Peak limit for spectral flatness (varies between 0 and 1). + self->featureExtractionParams.thresPosSpecFlat = 0.6f; + + // Limit on spacing of two highest peaks in histogram: spacing determined by + // bin size. + self->featureExtractionParams.limitPeakSpacingSpecFlat = + 2 * self->featureExtractionParams.binSizeSpecFlat; + self->featureExtractionParams.limitPeakSpacingSpecDiff = + 2 * self->featureExtractionParams.binSizeSpecDiff; + + // Limit on relevance of second peak. + self->featureExtractionParams.limitPeakWeightsSpecFlat = 0.5f; + self->featureExtractionParams.limitPeakWeightsSpecDiff = 0.5f; + + // Fluctuation limit of LRT feature. + self->featureExtractionParams.thresFluctLrt = 0.05f; + + // Limit on the max and min values for the feature thresholds. + self->featureExtractionParams.maxLrt = 1.f; + self->featureExtractionParams.minLrt = 0.2f; + + self->featureExtractionParams.maxSpecFlat = 0.95f; + self->featureExtractionParams.minSpecFlat = 0.1f; + + self->featureExtractionParams.maxSpecDiff = 1.f; + self->featureExtractionParams.minSpecDiff = 0.16f; + + // Criteria of weight of histogram peak to accept/reject feature. + self->featureExtractionParams.thresWeightSpecFlat = + (int)(0.3 * (self->modelUpdatePars[1])); // For spectral flatness. + self->featureExtractionParams.thresWeightSpecDiff = + (int)(0.3 * (self->modelUpdatePars[1])); // For spectral difference. +} + +// Initialize state. +int WebRtcNs_InitCore(NoiseSuppressionC* self, uint32_t fs) { + int i; + // Check for valid pointer. + if (self == NULL) { + return -1; + } + + // Initialization of struct. + if (fs == 8000 || fs == 16000 || fs == 32000 || fs == 48000) { + self->fs = fs; + } else { + return -1; + } + self->windShift = 0; + // We only support 10ms frames. + if (fs == 8000) { + self->blockLen = 80; + self->anaLen = 128; + self->window = kBlocks80w128; + } else { + self->blockLen = 160; + self->anaLen = 256; + self->window = kBlocks160w256; + } + self->magnLen = self->anaLen / 2 + 1; // Number of frequency bins. + + // Initialize FFT work arrays. + self->ip[0] = 0; // Setting this triggers initialization. + memset(self->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); + WebRtc_rdft(self->anaLen, 1, self->dataBuf, self->ip, self->wfft); + + memset(self->analyzeBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); + memset(self->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); + memset(self->syntBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); + + // For HB processing. + memset(self->dataBufHB, + 0, + sizeof(float) * NUM_HIGH_BANDS_MAX * ANAL_BLOCKL_MAX); + + // For quantile noise estimation. + memset(self->quantile, 0, sizeof(float) * HALF_ANAL_BLOCKL); + for (i = 0; i < SIMULT * HALF_ANAL_BLOCKL; i++) { + self->lquantile[i] = 8.f; + self->density[i] = 0.3f; + } + + for (i = 0; i < SIMULT; i++) { + self->counter[i] = + (int)floor((float)(END_STARTUP_LONG * (i + 1)) / (float)SIMULT); + } + + self->updates = 0; + + // Wiener filter initialization. + for (i = 0; i < HALF_ANAL_BLOCKL; i++) { + self->smooth[i] = 1.f; + } + + // Set the aggressiveness: default. + self->aggrMode = 0; + + // Initialize variables for new method. + self->priorSpeechProb = 0.5f; // Prior prob for speech/noise. + // Previous analyze mag spectrum. + memset(self->magnPrevAnalyze, 0, sizeof(float) * HALF_ANAL_BLOCKL); + // Previous process mag spectrum. + memset(self->magnPrevProcess, 0, sizeof(float) * HALF_ANAL_BLOCKL); + // Current noise-spectrum. + memset(self->noise, 0, sizeof(float) * HALF_ANAL_BLOCKL); + // Previous noise-spectrum. + memset(self->noisePrev, 0, sizeof(float) * HALF_ANAL_BLOCKL); + // Conservative noise spectrum estimate. + memset(self->magnAvgPause, 0, sizeof(float) * HALF_ANAL_BLOCKL); + // For estimation of HB in second pass. + memset(self->speechProb, 0, sizeof(float) * HALF_ANAL_BLOCKL); + // Initial average magnitude spectrum. + memset(self->initMagnEst, 0, sizeof(float) * HALF_ANAL_BLOCKL); + for (i = 0; i < HALF_ANAL_BLOCKL; i++) { + // Smooth LR (same as threshold). + self->logLrtTimeAvg[i] = LRT_FEATURE_THR; + } + + // Feature quantities. + // Spectral flatness (start on threshold). + self->featureData[0] = SF_FEATURE_THR; + self->featureData[1] = 0.f; // Spectral entropy: not used in this version. + self->featureData[2] = 0.f; // Spectral variance: not used in this version. + // Average LRT factor (start on threshold). + self->featureData[3] = LRT_FEATURE_THR; + // Spectral template diff (start on threshold). + self->featureData[4] = SF_FEATURE_THR; + self->featureData[5] = 0.f; // Normalization for spectral difference. + // Window time-average of input magnitude spectrum. + self->featureData[6] = 0.f; + + // Histogram quantities: used to estimate/update thresholds for features. + memset(self->histLrt, 0, sizeof(int) * HIST_PAR_EST); + memset(self->histSpecFlat, 0, sizeof(int) * HIST_PAR_EST); + memset(self->histSpecDiff, 0, sizeof(int) * HIST_PAR_EST); + + + self->blockInd = -1; // Frame counter. + // Default threshold for LRT feature. + self->priorModelPars[0] = LRT_FEATURE_THR; + // Threshold for spectral flatness: determined on-line. + self->priorModelPars[1] = 0.5f; + // sgn_map par for spectral measure: 1 for flatness measure. + self->priorModelPars[2] = 1.f; + // Threshold for template-difference feature: determined on-line. + self->priorModelPars[3] = 0.5f; + // Default weighting parameter for LRT feature. + self->priorModelPars[4] = 1.f; + // Default weighting parameter for spectral flatness feature. + self->priorModelPars[5] = 0.f; + // Default weighting parameter for spectral difference feature. + self->priorModelPars[6] = 0.f; + + // Update flag for parameters: + // 0 no update, 1 = update once, 2 = update every window. + self->modelUpdatePars[0] = 2; + self->modelUpdatePars[1] = 500; // Window for update. + // Counter for update of conservative noise spectrum. + self->modelUpdatePars[2] = 0; + // Counter if the feature thresholds are updated during the sequence. + self->modelUpdatePars[3] = self->modelUpdatePars[1]; + + self->signalEnergy = 0.0; + self->sumMagn = 0.0; + self->whiteNoiseLevel = 0.0; + self->pinkNoiseNumerator = 0.0; + self->pinkNoiseExp = 0.0; + + set_feature_extraction_parameters(self); + + // Default mode. + WebRtcNs_set_policy_core(self, 0); + + self->initFlag = 1; + return 0; +} + +// Estimate noise. +static void NoiseEstimation(NoiseSuppressionC* self, + float* magn, + float* noise) { + size_t i, s, offset; + float lmagn[HALF_ANAL_BLOCKL], delta; + + if (self->updates < END_STARTUP_LONG) { + self->updates++; + } + + for (i = 0; i < self->magnLen; i++) { + lmagn[i] = (float)log(magn[i]); + } + + // Loop over simultaneous estimates. + for (s = 0; s < SIMULT; s++) { + offset = s * self->magnLen; + + // newquantest(...) + for (i = 0; i < self->magnLen; i++) { + // Compute delta. + if (self->density[offset + i] > 1.0) { + delta = FACTOR * 1.f / self->density[offset + i]; + } else { + delta = FACTOR; + } + + // Update log quantile estimate. + if (lmagn[i] > self->lquantile[offset + i]) { + self->lquantile[offset + i] += + QUANTILE * delta / (float)(self->counter[s] + 1); + } else { + self->lquantile[offset + i] -= + (1.f - QUANTILE) * delta / (float)(self->counter[s] + 1); + } + + // Update density estimate. + if (fabs(lmagn[i] - self->lquantile[offset + i]) < WIDTH) { + self->density[offset + i] = + ((float)self->counter[s] * self->density[offset + i] + + 1.f / (2.f * WIDTH)) / + (float)(self->counter[s] + 1); + } + } // End loop over magnitude spectrum. + + if (self->counter[s] >= END_STARTUP_LONG) { + self->counter[s] = 0; + if (self->updates >= END_STARTUP_LONG) { + for (i = 0; i < self->magnLen; i++) { + self->quantile[i] = (float)exp(self->lquantile[offset + i]); + } + } + } + + self->counter[s]++; + } // End loop over simultaneous estimates. + + // Sequentially update the noise during startup. + if (self->updates < END_STARTUP_LONG) { + // Use the last "s" to get noise during startup that differ from zero. + for (i = 0; i < self->magnLen; i++) { + self->quantile[i] = (float)exp(self->lquantile[offset + i]); + } + } + + for (i = 0; i < self->magnLen; i++) { + noise[i] = self->quantile[i]; + } +} + +// Extract thresholds for feature parameters. +// Histograms are computed over some window size (given by +// self->modelUpdatePars[1]). +// Thresholds and weights are extracted every window. +// |flag| = 0 updates histogram only, |flag| = 1 computes the threshold/weights. +// Threshold and weights are returned in: self->priorModelPars. +static void FeatureParameterExtraction(NoiseSuppressionC* self, int flag) { + int i, useFeatureSpecFlat, useFeatureSpecDiff, numHistLrt; + int maxPeak1, maxPeak2; + int weightPeak1SpecFlat, weightPeak2SpecFlat, weightPeak1SpecDiff, + weightPeak2SpecDiff; + + float binMid, featureSum; + float posPeak1SpecFlat, posPeak2SpecFlat, posPeak1SpecDiff, posPeak2SpecDiff; + float fluctLrt, avgHistLrt, avgSquareHistLrt, avgHistLrtCompl; + + // 3 features: LRT, flatness, difference. + // lrt_feature = self->featureData[3]; + // flat_feature = self->featureData[0]; + // diff_feature = self->featureData[4]; + + // Update histograms. + if (flag == 0) { + // LRT + if ((self->featureData[3] < + HIST_PAR_EST * self->featureExtractionParams.binSizeLrt) && + (self->featureData[3] >= 0.0)) { + i = (int)(self->featureData[3] / + self->featureExtractionParams.binSizeLrt); + self->histLrt[i]++; + } + // Spectral flatness. + if ((self->featureData[0] < + HIST_PAR_EST * self->featureExtractionParams.binSizeSpecFlat) && + (self->featureData[0] >= 0.0)) { + i = (int)(self->featureData[0] / + self->featureExtractionParams.binSizeSpecFlat); + self->histSpecFlat[i]++; + } + // Spectral difference. + if ((self->featureData[4] < + HIST_PAR_EST * self->featureExtractionParams.binSizeSpecDiff) && + (self->featureData[4] >= 0.0)) { + i = (int)(self->featureData[4] / + self->featureExtractionParams.binSizeSpecDiff); + self->histSpecDiff[i]++; + } + } + + // Extract parameters for speech/noise probability. + if (flag == 1) { + // LRT feature: compute the average over + // self->featureExtractionParams.rangeAvgHistLrt. + avgHistLrt = 0.0; + avgHistLrtCompl = 0.0; + avgSquareHistLrt = 0.0; + numHistLrt = 0; + for (i = 0; i < HIST_PAR_EST; i++) { + binMid = ((float)i + 0.5f) * self->featureExtractionParams.binSizeLrt; + if (binMid <= self->featureExtractionParams.rangeAvgHistLrt) { + avgHistLrt += self->histLrt[i] * binMid; + numHistLrt += self->histLrt[i]; + } + avgSquareHistLrt += self->histLrt[i] * binMid * binMid; + avgHistLrtCompl += self->histLrt[i] * binMid; + } + if (numHistLrt > 0) { + avgHistLrt = avgHistLrt / ((float)numHistLrt); + } + avgHistLrtCompl = avgHistLrtCompl / ((float)self->modelUpdatePars[1]); + avgSquareHistLrt = avgSquareHistLrt / ((float)self->modelUpdatePars[1]); + fluctLrt = avgSquareHistLrt - avgHistLrt * avgHistLrtCompl; + // Get threshold for LRT feature. + if (fluctLrt < self->featureExtractionParams.thresFluctLrt) { + // Very low fluctuation, so likely noise. + self->priorModelPars[0] = self->featureExtractionParams.maxLrt; + } else { + self->priorModelPars[0] = + self->featureExtractionParams.factor1ModelPars * avgHistLrt; + // Check if value is within min/max range. + if (self->priorModelPars[0] < self->featureExtractionParams.minLrt) { + self->priorModelPars[0] = self->featureExtractionParams.minLrt; + } + if (self->priorModelPars[0] > self->featureExtractionParams.maxLrt) { + self->priorModelPars[0] = self->featureExtractionParams.maxLrt; + } + } + // Done with LRT feature. + + // For spectral flatness and spectral difference: compute the main peaks of + // histogram. + maxPeak1 = 0; + maxPeak2 = 0; + posPeak1SpecFlat = 0.0; + posPeak2SpecFlat = 0.0; + weightPeak1SpecFlat = 0; + weightPeak2SpecFlat = 0; + + // Peaks for flatness. + for (i = 0; i < HIST_PAR_EST; i++) { + binMid = + (i + 0.5f) * self->featureExtractionParams.binSizeSpecFlat; + if (self->histSpecFlat[i] > maxPeak1) { + // Found new "first" peak. + maxPeak2 = maxPeak1; + weightPeak2SpecFlat = weightPeak1SpecFlat; + posPeak2SpecFlat = posPeak1SpecFlat; + + maxPeak1 = self->histSpecFlat[i]; + weightPeak1SpecFlat = self->histSpecFlat[i]; + posPeak1SpecFlat = binMid; + } else if (self->histSpecFlat[i] > maxPeak2) { + // Found new "second" peak. + maxPeak2 = self->histSpecFlat[i]; + weightPeak2SpecFlat = self->histSpecFlat[i]; + posPeak2SpecFlat = binMid; + } + } + + // Compute two peaks for spectral difference. + maxPeak1 = 0; + maxPeak2 = 0; + posPeak1SpecDiff = 0.0; + posPeak2SpecDiff = 0.0; + weightPeak1SpecDiff = 0; + weightPeak2SpecDiff = 0; + // Peaks for spectral difference. + for (i = 0; i < HIST_PAR_EST; i++) { + binMid = + ((float)i + 0.5f) * self->featureExtractionParams.binSizeSpecDiff; + if (self->histSpecDiff[i] > maxPeak1) { + // Found new "first" peak. + maxPeak2 = maxPeak1; + weightPeak2SpecDiff = weightPeak1SpecDiff; + posPeak2SpecDiff = posPeak1SpecDiff; + + maxPeak1 = self->histSpecDiff[i]; + weightPeak1SpecDiff = self->histSpecDiff[i]; + posPeak1SpecDiff = binMid; + } else if (self->histSpecDiff[i] > maxPeak2) { + // Found new "second" peak. + maxPeak2 = self->histSpecDiff[i]; + weightPeak2SpecDiff = self->histSpecDiff[i]; + posPeak2SpecDiff = binMid; + } + } + + // For spectrum flatness feature. + useFeatureSpecFlat = 1; + // Merge the two peaks if they are close. + if ((fabs(posPeak2SpecFlat - posPeak1SpecFlat) < + self->featureExtractionParams.limitPeakSpacingSpecFlat) && + (weightPeak2SpecFlat > + self->featureExtractionParams.limitPeakWeightsSpecFlat * + weightPeak1SpecFlat)) { + weightPeak1SpecFlat += weightPeak2SpecFlat; + posPeak1SpecFlat = 0.5f * (posPeak1SpecFlat + posPeak2SpecFlat); + } + // Reject if weight of peaks is not large enough, or peak value too small. + if (weightPeak1SpecFlat < + self->featureExtractionParams.thresWeightSpecFlat || + posPeak1SpecFlat < self->featureExtractionParams.thresPosSpecFlat) { + useFeatureSpecFlat = 0; + } + // If selected, get the threshold. + if (useFeatureSpecFlat == 1) { + // Compute the threshold. + self->priorModelPars[1] = + self->featureExtractionParams.factor2ModelPars * posPeak1SpecFlat; + // Check if value is within min/max range. + if (self->priorModelPars[1] < self->featureExtractionParams.minSpecFlat) { + self->priorModelPars[1] = self->featureExtractionParams.minSpecFlat; + } + if (self->priorModelPars[1] > self->featureExtractionParams.maxSpecFlat) { + self->priorModelPars[1] = self->featureExtractionParams.maxSpecFlat; + } + } + // Done with flatness feature. + + // For template feature. + useFeatureSpecDiff = 1; + // Merge the two peaks if they are close. + if ((fabs(posPeak2SpecDiff - posPeak1SpecDiff) < + self->featureExtractionParams.limitPeakSpacingSpecDiff) && + (weightPeak2SpecDiff > + self->featureExtractionParams.limitPeakWeightsSpecDiff * + weightPeak1SpecDiff)) { + weightPeak1SpecDiff += weightPeak2SpecDiff; + posPeak1SpecDiff = 0.5f * (posPeak1SpecDiff + posPeak2SpecDiff); + } + // Get the threshold value. + self->priorModelPars[3] = + self->featureExtractionParams.factor1ModelPars * posPeak1SpecDiff; + // Reject if weight of peaks is not large enough. + if (weightPeak1SpecDiff < + self->featureExtractionParams.thresWeightSpecDiff) { + useFeatureSpecDiff = 0; + } + // Check if value is within min/max range. + if (self->priorModelPars[3] < self->featureExtractionParams.minSpecDiff) { + self->priorModelPars[3] = self->featureExtractionParams.minSpecDiff; + } + if (self->priorModelPars[3] > self->featureExtractionParams.maxSpecDiff) { + self->priorModelPars[3] = self->featureExtractionParams.maxSpecDiff; + } + // Done with spectral difference feature. + + // Don't use template feature if fluctuation of LRT feature is very low: + // most likely just noise state. + if (fluctLrt < self->featureExtractionParams.thresFluctLrt) { + useFeatureSpecDiff = 0; + } + + // Select the weights between the features. + // self->priorModelPars[4] is weight for LRT: always selected. + // self->priorModelPars[5] is weight for spectral flatness. + // self->priorModelPars[6] is weight for spectral difference. + featureSum = (float)(1 + useFeatureSpecFlat + useFeatureSpecDiff); + self->priorModelPars[4] = 1.f / featureSum; + self->priorModelPars[5] = ((float)useFeatureSpecFlat) / featureSum; + self->priorModelPars[6] = ((float)useFeatureSpecDiff) / featureSum; + + // Set hists to zero for next update. + if (self->modelUpdatePars[0] >= 1) { + for (i = 0; i < HIST_PAR_EST; i++) { + self->histLrt[i] = 0; + self->histSpecFlat[i] = 0; + self->histSpecDiff[i] = 0; + } + } + } // End of flag == 1. +} + +// Compute spectral flatness on input spectrum. +// |magnIn| is the magnitude spectrum. +// Spectral flatness is returned in self->featureData[0]. +static void ComputeSpectralFlatness(NoiseSuppressionC* self, + const float* magnIn) { + size_t i; + size_t shiftLP = 1; // Option to remove first bin(s) from spectral measures. + float avgSpectralFlatnessNum, avgSpectralFlatnessDen, spectralTmp; + + // Compute spectral measures. + // For flatness. + avgSpectralFlatnessNum = 0.0; + avgSpectralFlatnessDen = self->sumMagn; + for (i = 0; i < shiftLP; i++) { + avgSpectralFlatnessDen -= magnIn[i]; + } + // Compute log of ratio of the geometric to arithmetic mean: check for log(0) + // case. + for (i = shiftLP; i < self->magnLen; i++) { + if (magnIn[i] > 0.0) { + avgSpectralFlatnessNum += (float)log(magnIn[i]); + } else { + self->featureData[0] -= SPECT_FL_TAVG * self->featureData[0]; + return; + } + } + // Normalize. + avgSpectralFlatnessDen = avgSpectralFlatnessDen / self->magnLen; + avgSpectralFlatnessNum = avgSpectralFlatnessNum / self->magnLen; + + // Ratio and inverse log: check for case of log(0). + spectralTmp = (float)exp(avgSpectralFlatnessNum) / avgSpectralFlatnessDen; + + // Time-avg update of spectral flatness feature. + self->featureData[0] += SPECT_FL_TAVG * (spectralTmp - self->featureData[0]); + // Done with flatness feature. +} + +// Compute prior and post SNR based on quantile noise estimation. +// Compute DD estimate of prior SNR. +// Inputs: +// * |magn| is the signal magnitude spectrum estimate. +// * |noise| is the magnitude noise spectrum estimate. +// Outputs: +// * |snrLocPrior| is the computed prior SNR. +// * |snrLocPost| is the computed post SNR. +static void ComputeSnr(const NoiseSuppressionC* self, + const float* magn, + const float* noise, + float* snrLocPrior, + float* snrLocPost) { + size_t i; + + for (i = 0; i < self->magnLen; i++) { + // Previous post SNR. + // Previous estimate: based on previous frame with gain filter. + float previousEstimateStsa = self->magnPrevAnalyze[i] / + (self->noisePrev[i] + 0.0001f) * self->smooth[i]; + // Post SNR. + snrLocPost[i] = 0.f; + if (magn[i] > noise[i]) { + snrLocPost[i] = magn[i] / (noise[i] + 0.0001f) - 1.f; + } + // DD estimate is sum of two terms: current estimate and previous estimate. + // Directed decision update of snrPrior. + snrLocPrior[i] = + DD_PR_SNR * previousEstimateStsa + (1.f - DD_PR_SNR) * snrLocPost[i]; + } // End of loop over frequencies. +} + +// Compute the difference measure between input spectrum and a template/learned +// noise spectrum. +// |magnIn| is the input spectrum. +// The reference/template spectrum is self->magnAvgPause[i]. +// Returns (normalized) spectral difference in self->featureData[4]. +static void ComputeSpectralDifference(NoiseSuppressionC* self, + const float* magnIn) { + // avgDiffNormMagn = var(magnIn) - cov(magnIn, magnAvgPause)^2 / + // var(magnAvgPause) + size_t i; + float avgPause, avgMagn, covMagnPause, varPause, varMagn, avgDiffNormMagn; + + avgPause = 0.0; + avgMagn = self->sumMagn; + // Compute average quantities. + for (i = 0; i < self->magnLen; i++) { + // Conservative smooth noise spectrum from pause frames. + avgPause += self->magnAvgPause[i]; + } + avgPause /= self->magnLen; + avgMagn /= self->magnLen; + + covMagnPause = 0.0; + varPause = 0.0; + varMagn = 0.0; + // Compute variance and covariance quantities. + for (i = 0; i < self->magnLen; i++) { + covMagnPause += (magnIn[i] - avgMagn) * (self->magnAvgPause[i] - avgPause); + varPause += + (self->magnAvgPause[i] - avgPause) * (self->magnAvgPause[i] - avgPause); + varMagn += (magnIn[i] - avgMagn) * (magnIn[i] - avgMagn); + } + covMagnPause /= self->magnLen; + varPause /= self->magnLen; + varMagn /= self->magnLen; + // Update of average magnitude spectrum. + self->featureData[6] += self->signalEnergy; + + avgDiffNormMagn = + varMagn - (covMagnPause * covMagnPause) / (varPause + 0.0001f); + // Normalize and compute time-avg update of difference feature. + avgDiffNormMagn = (float)(avgDiffNormMagn / (self->featureData[5] + 0.0001f)); + self->featureData[4] += + SPECT_DIFF_TAVG * (avgDiffNormMagn - self->featureData[4]); +} + +// Compute speech/noise probability. +// Speech/noise probability is returned in |probSpeechFinal|. +// |magn| is the input magnitude spectrum. +// |noise| is the noise spectrum. +// |snrLocPrior| is the prior SNR for each frequency. +// |snrLocPost| is the post SNR for each frequency. +static void SpeechNoiseProb(NoiseSuppressionC* self, + float* probSpeechFinal, + const float* snrLocPrior, + const float* snrLocPost) { + size_t i; + int sgnMap; + float invLrt, gainPrior, indPrior; + float logLrtTimeAvgKsum, besselTmp; + float indicator0, indicator1, indicator2; + float tmpFloat1, tmpFloat2; + float weightIndPrior0, weightIndPrior1, weightIndPrior2; + float threshPrior0, threshPrior1, threshPrior2; + float widthPrior, widthPrior0, widthPrior1, widthPrior2; + + widthPrior0 = WIDTH_PR_MAP; + // Width for pause region: lower range, so increase width in tanh map. + widthPrior1 = 2.f * WIDTH_PR_MAP; + widthPrior2 = 2.f * WIDTH_PR_MAP; // For spectral-difference measure. + + // Threshold parameters for features. + threshPrior0 = self->priorModelPars[0]; + threshPrior1 = self->priorModelPars[1]; + threshPrior2 = self->priorModelPars[3]; + + // Sign for flatness feature. + sgnMap = (int)(self->priorModelPars[2]); + + // Weight parameters for features. + weightIndPrior0 = self->priorModelPars[4]; + weightIndPrior1 = self->priorModelPars[5]; + weightIndPrior2 = self->priorModelPars[6]; + + // Compute feature based on average LR factor. + // This is the average over all frequencies of the smooth log LRT. + logLrtTimeAvgKsum = 0.0; + for (i = 0; i < self->magnLen; i++) { + tmpFloat1 = 1.f + 2.f * snrLocPrior[i]; + tmpFloat2 = 2.f * snrLocPrior[i] / (tmpFloat1 + 0.0001f); + besselTmp = (snrLocPost[i] + 1.f) * tmpFloat2; + self->logLrtTimeAvg[i] += + LRT_TAVG * (besselTmp - (float)log(tmpFloat1) - self->logLrtTimeAvg[i]); + logLrtTimeAvgKsum += self->logLrtTimeAvg[i]; + } + logLrtTimeAvgKsum = (float)logLrtTimeAvgKsum / (self->magnLen); + self->featureData[3] = logLrtTimeAvgKsum; + // Done with computation of LR factor. + + // Compute the indicator functions. + // Average LRT feature. + widthPrior = widthPrior0; + // Use larger width in tanh map for pause regions. + if (logLrtTimeAvgKsum < threshPrior0) { + widthPrior = widthPrior1; + } + // Compute indicator function: sigmoid map. + indicator0 = + 0.5f * + ((float)tanh(widthPrior * (logLrtTimeAvgKsum - threshPrior0)) + 1.f); + + // Spectral flatness feature. + tmpFloat1 = self->featureData[0]; + widthPrior = widthPrior0; + // Use larger width in tanh map for pause regions. + if (sgnMap == 1 && (tmpFloat1 > threshPrior1)) { + widthPrior = widthPrior1; + } + if (sgnMap == -1 && (tmpFloat1 < threshPrior1)) { + widthPrior = widthPrior1; + } + // Compute indicator function: sigmoid map. + indicator1 = + 0.5f * + ((float)tanh((float)sgnMap * widthPrior * (threshPrior1 - tmpFloat1)) + + 1.f); + + // For template spectrum-difference. + tmpFloat1 = self->featureData[4]; + widthPrior = widthPrior0; + // Use larger width in tanh map for pause regions. + if (tmpFloat1 < threshPrior2) { + widthPrior = widthPrior2; + } + // Compute indicator function: sigmoid map. + indicator2 = + 0.5f * ((float)tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.f); + + // Combine the indicator function with the feature weights. + indPrior = weightIndPrior0 * indicator0 + weightIndPrior1 * indicator1 + + weightIndPrior2 * indicator2; + // Done with computing indicator function. + + // Compute the prior probability. + self->priorSpeechProb += PRIOR_UPDATE * (indPrior - self->priorSpeechProb); + // Make sure probabilities are within range: keep floor to 0.01. + if (self->priorSpeechProb > 1.f) { + self->priorSpeechProb = 1.f; + } + if (self->priorSpeechProb < 0.01f) { + self->priorSpeechProb = 0.01f; + } + + // Final speech probability: combine prior model with LR factor:. + gainPrior = (1.f - self->priorSpeechProb) / (self->priorSpeechProb + 0.0001f); + for (i = 0; i < self->magnLen; i++) { + invLrt = (float)exp(-self->logLrtTimeAvg[i]); + invLrt = (float)gainPrior * invLrt; + probSpeechFinal[i] = 1.f / (1.f + invLrt); + } +} + +// Update the noise features. +// Inputs: +// * |magn| is the signal magnitude spectrum estimate. +// * |updateParsFlag| is an update flag for parameters. +static void FeatureUpdate(NoiseSuppressionC* self, + const float* magn, + int updateParsFlag) { + // Compute spectral flatness on input spectrum. + ComputeSpectralFlatness(self, magn); + // Compute difference of input spectrum with learned/estimated noise spectrum. + ComputeSpectralDifference(self, magn); + // Compute histograms for parameter decisions (thresholds and weights for + // features). + // Parameters are extracted once every window time. + // (=self->modelUpdatePars[1]) + if (updateParsFlag >= 1) { + // Counter update. + self->modelUpdatePars[3]--; + // Update histogram. + if (self->modelUpdatePars[3] > 0) { + FeatureParameterExtraction(self, 0); + } + // Compute model parameters. + if (self->modelUpdatePars[3] == 0) { + FeatureParameterExtraction(self, 1); + self->modelUpdatePars[3] = self->modelUpdatePars[1]; + // If wish to update only once, set flag to zero. + if (updateParsFlag == 1) { + self->modelUpdatePars[0] = 0; + } else { + // Update every window: + // Get normalization for spectral difference for next window estimate. + self->featureData[6] = + self->featureData[6] / ((float)self->modelUpdatePars[1]); + self->featureData[5] = + 0.5f * (self->featureData[6] + self->featureData[5]); + self->featureData[6] = 0.f; + } + } + } +} + +// Update the noise estimate. +// Inputs: +// * |magn| is the signal magnitude spectrum estimate. +// * |snrLocPrior| is the prior SNR. +// * |snrLocPost| is the post SNR. +// Output: +// * |noise| is the updated noise magnitude spectrum estimate. +static void UpdateNoiseEstimate(NoiseSuppressionC* self, + const float* magn, + const float* snrLocPrior, + const float* snrLocPost, + float* noise) { + size_t i; + float probSpeech, probNonSpeech; + // Time-avg parameter for noise update. + float gammaNoiseTmp = NOISE_UPDATE; + float gammaNoiseOld; + float noiseUpdateTmp; + + for (i = 0; i < self->magnLen; i++) { + probSpeech = self->speechProb[i]; + probNonSpeech = 1.f - probSpeech; + // Temporary noise update: + // Use it for speech frames if update value is less than previous. + noiseUpdateTmp = gammaNoiseTmp * self->noisePrev[i] + + (1.f - gammaNoiseTmp) * (probNonSpeech * magn[i] + + probSpeech * self->noisePrev[i]); + // Time-constant based on speech/noise state. + gammaNoiseOld = gammaNoiseTmp; + gammaNoiseTmp = NOISE_UPDATE; + // Increase gamma (i.e., less noise update) for frame likely to be speech. + if (probSpeech > PROB_RANGE) { + gammaNoiseTmp = SPEECH_UPDATE; + } + // Conservative noise update. + if (probSpeech < PROB_RANGE) { + self->magnAvgPause[i] += GAMMA_PAUSE * (magn[i] - self->magnAvgPause[i]); + } + // Noise update. + if (gammaNoiseTmp == gammaNoiseOld) { + noise[i] = noiseUpdateTmp; + } else { + noise[i] = gammaNoiseTmp * self->noisePrev[i] + + (1.f - gammaNoiseTmp) * (probNonSpeech * magn[i] + + probSpeech * self->noisePrev[i]); + // Allow for noise update downwards: + // If noise update decreases the noise, it is safe, so allow it to + // happen. + if (noiseUpdateTmp < noise[i]) { + noise[i] = noiseUpdateTmp; + } + } + } // End of freq loop. +} + +// Updates |buffer| with a new |frame|. +// Inputs: +// * |frame| is a new speech frame or NULL for setting to zero. +// * |frame_length| is the length of the new frame. +// * |buffer_length| is the length of the buffer. +// Output: +// * |buffer| is the updated buffer. +static void UpdateBuffer(const float* frame, + size_t frame_length, + size_t buffer_length, + float* buffer) { + assert(buffer_length < 2 * frame_length); + + memcpy(buffer, + buffer + frame_length, + sizeof(*buffer) * (buffer_length - frame_length)); + if (frame) { + memcpy(buffer + buffer_length - frame_length, + frame, + sizeof(*buffer) * frame_length); + } else { + memset(buffer + buffer_length - frame_length, + 0, + sizeof(*buffer) * frame_length); + } +} + +// Transforms the signal from time to frequency domain. +// Inputs: +// * |time_data| is the signal in the time domain. +// * |time_data_length| is the length of the analysis buffer. +// * |magnitude_length| is the length of the spectrum magnitude, which equals +// the length of both |real| and |imag| (time_data_length / 2 + 1). +// Outputs: +// * |time_data| is the signal in the frequency domain. +// * |real| is the real part of the frequency domain. +// * |imag| is the imaginary part of the frequency domain. +// * |magn| is the calculated signal magnitude in the frequency domain. +static void FFT(NoiseSuppressionC* self, + float* time_data, + size_t time_data_length, + size_t magnitude_length, + float* real, + float* imag, + float* magn) { + size_t i; + + assert(magnitude_length == time_data_length / 2 + 1); + + WebRtc_rdft(time_data_length, 1, time_data, self->ip, self->wfft); + + imag[0] = 0; + real[0] = time_data[0]; + magn[0] = fabsf(real[0]) + 1.f; + imag[magnitude_length - 1] = 0; + real[magnitude_length - 1] = time_data[1]; + magn[magnitude_length - 1] = fabsf(real[magnitude_length - 1]) + 1.f; + for (i = 1; i < magnitude_length - 1; ++i) { + real[i] = time_data[2 * i]; + imag[i] = time_data[2 * i + 1]; + // Magnitude spectrum. + magn[i] = sqrtf(real[i] * real[i] + imag[i] * imag[i]) + 1.f; + } +} + +// Transforms the signal from frequency to time domain. +// Inputs: +// * |real| is the real part of the frequency domain. +// * |imag| is the imaginary part of the frequency domain. +// * |magnitude_length| is the length of the spectrum magnitude, which equals +// the length of both |real| and |imag|. +// * |time_data_length| is the length of the analysis buffer +// (2 * (magnitude_length - 1)). +// Output: +// * |time_data| is the signal in the time domain. +static void IFFT(NoiseSuppressionC* self, + const float* real, + const float* imag, + size_t magnitude_length, + size_t time_data_length, + float* time_data) { + size_t i; + + assert(time_data_length == 2 * (magnitude_length - 1)); + + time_data[0] = real[0]; + time_data[1] = real[magnitude_length - 1]; + for (i = 1; i < magnitude_length - 1; ++i) { + time_data[2 * i] = real[i]; + time_data[2 * i + 1] = imag[i]; + } + WebRtc_rdft(time_data_length, -1, time_data, self->ip, self->wfft); + + for (i = 0; i < time_data_length; ++i) { + time_data[i] *= 2.f / time_data_length; // FFT scaling. + } +} + +// Calculates the energy of a buffer. +// Inputs: +// * |buffer| is the buffer over which the energy is calculated. +// * |length| is the length of the buffer. +// Returns the calculated energy. +static float Energy(const float* buffer, size_t length) { + size_t i; + float energy = 0.f; + + for (i = 0; i < length; ++i) { + energy += buffer[i] * buffer[i]; + } + + return energy; +} + +// Windows a buffer. +// Inputs: +// * |window| is the window by which to multiply. +// * |data| is the data without windowing. +// * |length| is the length of the window and data. +// Output: +// * |data_windowed| is the windowed data. +static void Windowing(const float* window, + const float* data, + size_t length, + float* data_windowed) { + size_t i; + + for (i = 0; i < length; ++i) { + data_windowed[i] = window[i] * data[i]; + } +} + +// Estimate prior SNR decision-directed and compute DD based Wiener Filter. +// Input: +// * |magn| is the signal magnitude spectrum estimate. +// Output: +// * |theFilter| is the frequency response of the computed Wiener filter. +static void ComputeDdBasedWienerFilter(const NoiseSuppressionC* self, + const float* magn, + float* theFilter) { + size_t i; + float snrPrior, previousEstimateStsa, currentEstimateStsa; + + for (i = 0; i < self->magnLen; i++) { + // Previous estimate: based on previous frame with gain filter. + previousEstimateStsa = self->magnPrevProcess[i] / + (self->noisePrev[i] + 0.0001f) * self->smooth[i]; + // Post and prior SNR. + currentEstimateStsa = 0.f; + if (magn[i] > self->noise[i]) { + currentEstimateStsa = magn[i] / (self->noise[i] + 0.0001f) - 1.f; + } + // DD estimate is sum of two terms: current estimate and previous estimate. + // Directed decision update of |snrPrior|. + snrPrior = DD_PR_SNR * previousEstimateStsa + + (1.f - DD_PR_SNR) * currentEstimateStsa; + // Gain filter. + theFilter[i] = snrPrior / (self->overdrive + snrPrior); + } // End of loop over frequencies. +} + +// Changes the aggressiveness of the noise suppression method. +// |mode| = 0 is mild (6dB), |mode| = 1 is medium (10dB) and |mode| = 2 is +// aggressive (15dB). +// Returns 0 on success and -1 otherwise. +int WebRtcNs_set_policy_core(NoiseSuppressionC* self, int mode) { + // Allow for modes: 0, 1, 2, 3. + if (mode < 0 || mode > 3) { + return (-1); + } + + self->aggrMode = mode; + if (mode == 0) { + self->overdrive = 1.f; + self->denoiseBound = 0.5f; + self->gainmap = 0; + } else if (mode == 1) { + // self->overdrive = 1.25f; + self->overdrive = 1.f; + self->denoiseBound = 0.25f; + self->gainmap = 1; + } else if (mode == 2) { + // self->overdrive = 1.25f; + self->overdrive = 1.1f; + self->denoiseBound = 0.125f; + self->gainmap = 1; + } else if (mode == 3) { + // self->overdrive = 1.3f; + self->overdrive = 1.25f; + self->denoiseBound = 0.09f; + self->gainmap = 1; + } + return 0; +} + +void WebRtcNs_AnalyzeCore(NoiseSuppressionC* self, const float* speechFrame) { + size_t i; + const size_t kStartBand = 5; // Skip first frequency bins during estimation. + int updateParsFlag; + float energy; + float signalEnergy = 0.f; + float sumMagn = 0.f; + float tmpFloat1, tmpFloat2, tmpFloat3; + float winData[ANAL_BLOCKL_MAX]; + float magn[HALF_ANAL_BLOCKL], noise[HALF_ANAL_BLOCKL]; + float snrLocPost[HALF_ANAL_BLOCKL], snrLocPrior[HALF_ANAL_BLOCKL]; + float real[ANAL_BLOCKL_MAX], imag[HALF_ANAL_BLOCKL]; + // Variables during startup. + float sum_log_i = 0.0; + float sum_log_i_square = 0.0; + float sum_log_magn = 0.0; + float sum_log_i_log_magn = 0.0; + float parametric_exp = 0.0; + float parametric_num = 0.0; + + // Check that initiation has been done. + assert(self->initFlag == 1); + updateParsFlag = self->modelUpdatePars[0]; + + // Update analysis buffer for L band. + UpdateBuffer(speechFrame, self->blockLen, self->anaLen, self->analyzeBuf); + + Windowing(self->window, self->analyzeBuf, self->anaLen, winData); + energy = Energy(winData, self->anaLen); + if (energy == 0.0) { + // We want to avoid updating statistics in this case: + // Updating feature statistics when we have zeros only will cause + // thresholds to move towards zero signal situations. This in turn has the + // effect that once the signal is "turned on" (non-zero values) everything + // will be treated as speech and there is no noise suppression effect. + // Depending on the duration of the inactive signal it takes a + // considerable amount of time for the system to learn what is noise and + // what is speech. + return; + } + + self->blockInd++; // Update the block index only when we process a block. + + FFT(self, winData, self->anaLen, self->magnLen, real, imag, magn); + + for (i = 0; i < self->magnLen; i++) { + signalEnergy += real[i] * real[i] + imag[i] * imag[i]; + sumMagn += magn[i]; + if (self->blockInd < END_STARTUP_SHORT) { + if (i >= kStartBand) { + tmpFloat2 = logf((float)i); + sum_log_i += tmpFloat2; + sum_log_i_square += tmpFloat2 * tmpFloat2; + tmpFloat1 = logf(magn[i]); + sum_log_magn += tmpFloat1; + sum_log_i_log_magn += tmpFloat2 * tmpFloat1; + } + } + } + signalEnergy /= self->magnLen; + self->signalEnergy = signalEnergy; + self->sumMagn = sumMagn; + + // Quantile noise estimate. + NoiseEstimation(self, magn, noise); + // Compute simplified noise model during startup. + if (self->blockInd < END_STARTUP_SHORT) { + // Estimate White noise. + self->whiteNoiseLevel += sumMagn / self->magnLen * self->overdrive; + // Estimate Pink noise parameters. + tmpFloat1 = sum_log_i_square * (self->magnLen - kStartBand); + tmpFloat1 -= (sum_log_i * sum_log_i); + tmpFloat2 = + (sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn); + tmpFloat3 = tmpFloat2 / tmpFloat1; + // Constrain the estimated spectrum to be positive. + if (tmpFloat3 < 0.f) { + tmpFloat3 = 0.f; + } + self->pinkNoiseNumerator += tmpFloat3; + tmpFloat2 = (sum_log_i * sum_log_magn); + tmpFloat2 -= (self->magnLen - kStartBand) * sum_log_i_log_magn; + tmpFloat3 = tmpFloat2 / tmpFloat1; + // Constrain the pink noise power to be in the interval [0, 1]. + if (tmpFloat3 < 0.f) { + tmpFloat3 = 0.f; + } + if (tmpFloat3 > 1.f) { + tmpFloat3 = 1.f; + } + self->pinkNoiseExp += tmpFloat3; + + // Calculate frequency independent parts of parametric noise estimate. + if (self->pinkNoiseExp > 0.f) { + // Use pink noise estimate. + parametric_num = + expf(self->pinkNoiseNumerator / (float)(self->blockInd + 1)); + parametric_num *= (float)(self->blockInd + 1); + parametric_exp = self->pinkNoiseExp / (float)(self->blockInd + 1); + } + for (i = 0; i < self->magnLen; i++) { + // Estimate the background noise using the white and pink noise + // parameters. + if (self->pinkNoiseExp == 0.f) { + // Use white noise estimate. + self->parametricNoise[i] = self->whiteNoiseLevel; + } else { + // Use pink noise estimate. + float use_band = (float)(i < kStartBand ? kStartBand : i); + self->parametricNoise[i] = + parametric_num / powf(use_band, parametric_exp); + } + // Weight quantile noise with modeled noise. + noise[i] *= (self->blockInd); + tmpFloat2 = + self->parametricNoise[i] * (END_STARTUP_SHORT - self->blockInd); + noise[i] += (tmpFloat2 / (float)(self->blockInd + 1)); + noise[i] /= END_STARTUP_SHORT; + } + } + // Compute average signal during END_STARTUP_LONG time: + // used to normalize spectral difference measure. + if (self->blockInd < END_STARTUP_LONG) { + self->featureData[5] *= self->blockInd; + self->featureData[5] += signalEnergy; + self->featureData[5] /= (self->blockInd + 1); + } + + // Post and prior SNR needed for SpeechNoiseProb. + ComputeSnr(self, magn, noise, snrLocPrior, snrLocPost); + + FeatureUpdate(self, magn, updateParsFlag); + SpeechNoiseProb(self, self->speechProb, snrLocPrior, snrLocPost); + UpdateNoiseEstimate(self, magn, snrLocPrior, snrLocPost, noise); + + // Keep track of noise spectrum for next frame. + memcpy(self->noise, noise, sizeof(*noise) * self->magnLen); + memcpy(self->magnPrevAnalyze, magn, sizeof(*magn) * self->magnLen); +} + +void WebRtcNs_ProcessCore(NoiseSuppressionC* self, + const float* const* speechFrame, + size_t num_bands, + float* const* outFrame) { + // Main routine for noise reduction. + int flagHB = 0; + size_t i, j; + + float energy1, energy2, gain, factor, factor1, factor2; + float fout[BLOCKL_MAX]; + float winData[ANAL_BLOCKL_MAX]; + float magn[HALF_ANAL_BLOCKL]; + float theFilter[HALF_ANAL_BLOCKL], theFilterTmp[HALF_ANAL_BLOCKL]; + float real[ANAL_BLOCKL_MAX], imag[HALF_ANAL_BLOCKL]; + + // SWB variables. + int deltaBweHB = 1; + int deltaGainHB = 1; + float decayBweHB = 1.0; + float gainMapParHB = 1.0; + float gainTimeDomainHB = 1.0; + float avgProbSpeechHB, avgProbSpeechHBTmp, avgFilterGainHB, gainModHB; + float sumMagnAnalyze, sumMagnProcess; + + // Check that initiation has been done. + assert(self->initFlag == 1); + assert((num_bands - 1) <= NUM_HIGH_BANDS_MAX); + + const float* const* speechFrameHB = NULL; + float* const* outFrameHB = NULL; + size_t num_high_bands = 0; + if (num_bands > 1) { + speechFrameHB = &speechFrame[1]; + outFrameHB = &outFrame[1]; + num_high_bands = num_bands - 1; + flagHB = 1; + // Range for averaging low band quantities for H band gain. + deltaBweHB = (int)self->magnLen / 4; + deltaGainHB = deltaBweHB; + } + + // Update analysis buffer for L band. + UpdateBuffer(speechFrame[0], self->blockLen, self->anaLen, self->dataBuf); + + if (flagHB == 1) { + // Update analysis buffer for H bands. + for (i = 0; i < num_high_bands; ++i) { + UpdateBuffer(speechFrameHB[i], + self->blockLen, + self->anaLen, + self->dataBufHB[i]); + } + } + + Windowing(self->window, self->dataBuf, self->anaLen, winData); + energy1 = Energy(winData, self->anaLen); + if (energy1 == 0.0) { + // Synthesize the special case of zero input. + // Read out fully processed segment. + for (i = self->windShift; i < self->blockLen + self->windShift; i++) { + fout[i - self->windShift] = self->syntBuf[i]; + } + // Update synthesis buffer. + UpdateBuffer(NULL, self->blockLen, self->anaLen, self->syntBuf); + + for (i = 0; i < self->blockLen; ++i) + outFrame[0][i] = + WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN); + + // For time-domain gain of HB. + if (flagHB == 1) { + for (i = 0; i < num_high_bands; ++i) { + for (j = 0; j < self->blockLen; ++j) { + outFrameHB[i][j] = WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, + self->dataBufHB[i][j], + WEBRTC_SPL_WORD16_MIN); + } + } + } + + return; + } + + FFT(self, winData, self->anaLen, self->magnLen, real, imag, magn); + + if (self->blockInd < END_STARTUP_SHORT) { + for (i = 0; i < self->magnLen; i++) { + self->initMagnEst[i] += magn[i]; + } + } + + ComputeDdBasedWienerFilter(self, magn, theFilter); + + for (i = 0; i < self->magnLen; i++) { + // Flooring bottom. + if (theFilter[i] < self->denoiseBound) { + theFilter[i] = self->denoiseBound; + } + // Flooring top. + if (theFilter[i] > 1.f) { + theFilter[i] = 1.f; + } + if (self->blockInd < END_STARTUP_SHORT) { + theFilterTmp[i] = + (self->initMagnEst[i] - self->overdrive * self->parametricNoise[i]); + theFilterTmp[i] /= (self->initMagnEst[i] + 0.0001f); + // Flooring bottom. + if (theFilterTmp[i] < self->denoiseBound) { + theFilterTmp[i] = self->denoiseBound; + } + // Flooring top. + if (theFilterTmp[i] > 1.f) { + theFilterTmp[i] = 1.f; + } + // Weight the two suppression filters. + theFilter[i] *= (self->blockInd); + theFilterTmp[i] *= (END_STARTUP_SHORT - self->blockInd); + theFilter[i] += theFilterTmp[i]; + theFilter[i] /= (END_STARTUP_SHORT); + } + + self->smooth[i] = theFilter[i]; + real[i] *= self->smooth[i]; + imag[i] *= self->smooth[i]; + } + // Keep track of |magn| spectrum for next frame. + memcpy(self->magnPrevProcess, magn, sizeof(*magn) * self->magnLen); + memcpy(self->noisePrev, self->noise, sizeof(self->noise[0]) * self->magnLen); + // Back to time domain. + IFFT(self, real, imag, self->magnLen, self->anaLen, winData); + + // Scale factor: only do it after END_STARTUP_LONG time. + factor = 1.f; + if (self->gainmap == 1 && self->blockInd > END_STARTUP_LONG) { + factor1 = 1.f; + factor2 = 1.f; + + energy2 = Energy(winData, self->anaLen); + gain = (float)sqrt(energy2 / (energy1 + 1.f)); + + // Scaling for new version. + if (gain > B_LIM) { + factor1 = 1.f + 1.3f * (gain - B_LIM); + if (gain * factor1 > 1.f) { + factor1 = 1.f / gain; + } + } + if (gain < B_LIM) { + // Don't reduce scale too much for pause regions: + // attenuation here should be controlled by flooring. + if (gain <= self->denoiseBound) { + gain = self->denoiseBound; + } + factor2 = 1.f - 0.3f * (B_LIM - gain); + } + // Combine both scales with speech/noise prob: + // note prior (priorSpeechProb) is not frequency dependent. + factor = self->priorSpeechProb * factor1 + + (1.f - self->priorSpeechProb) * factor2; + } // Out of self->gainmap == 1. + + Windowing(self->window, winData, self->anaLen, winData); + + // Synthesis. + for (i = 0; i < self->anaLen; i++) { + self->syntBuf[i] += factor * winData[i]; + } + // Read out fully processed segment. + for (i = self->windShift; i < self->blockLen + self->windShift; i++) { + fout[i - self->windShift] = self->syntBuf[i]; + } + // Update synthesis buffer. + UpdateBuffer(NULL, self->blockLen, self->anaLen, self->syntBuf); + + for (i = 0; i < self->blockLen; ++i) + outFrame[0][i] = + WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN); + + // For time-domain gain of HB. + if (flagHB == 1) { + // Average speech prob from low band. + // Average over second half (i.e., 4->8kHz) of frequencies spectrum. + avgProbSpeechHB = 0.0; + for (i = self->magnLen - deltaBweHB - 1; i < self->magnLen - 1; i++) { + avgProbSpeechHB += self->speechProb[i]; + } + avgProbSpeechHB = avgProbSpeechHB / ((float)deltaBweHB); + // If the speech was suppressed by a component between Analyze and + // Process, for example the AEC, then it should not be considered speech + // for high band suppression purposes. + sumMagnAnalyze = 0; + sumMagnProcess = 0; + for (i = 0; i < self->magnLen; ++i) { + sumMagnAnalyze += self->magnPrevAnalyze[i]; + sumMagnProcess += self->magnPrevProcess[i]; + } + avgProbSpeechHB *= sumMagnProcess / sumMagnAnalyze; + // Average filter gain from low band. + // Average over second half (i.e., 4->8kHz) of frequencies spectrum. + avgFilterGainHB = 0.0; + for (i = self->magnLen - deltaGainHB - 1; i < self->magnLen - 1; i++) { + avgFilterGainHB += self->smooth[i]; + } + avgFilterGainHB = avgFilterGainHB / ((float)(deltaGainHB)); + avgProbSpeechHBTmp = 2.f * avgProbSpeechHB - 1.f; + // Gain based on speech probability. + gainModHB = 0.5f * (1.f + (float)tanh(gainMapParHB * avgProbSpeechHBTmp)); + // Combine gain with low band gain. + gainTimeDomainHB = 0.5f * gainModHB + 0.5f * avgFilterGainHB; + if (avgProbSpeechHB >= 0.5f) { + gainTimeDomainHB = 0.25f * gainModHB + 0.75f * avgFilterGainHB; + } + gainTimeDomainHB = gainTimeDomainHB * decayBweHB; + // Make sure gain is within flooring range. + // Flooring bottom. + if (gainTimeDomainHB < self->denoiseBound) { + gainTimeDomainHB = self->denoiseBound; + } + // Flooring top. + if (gainTimeDomainHB > 1.f) { + gainTimeDomainHB = 1.f; + } + // Apply gain. + for (i = 0; i < num_high_bands; ++i) { + for (j = 0; j < self->blockLen; j++) { + outFrameHB[i][j] = + WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, + gainTimeDomainHB * self->dataBufHB[i][j], + WEBRTC_SPL_WORD16_MIN); + } + } + } // End of H band gain computation. +} diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/ns_core.h b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/ns_core.h new file mode 100644 index 00000000..aba1c468 --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/ns_core.h @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_ + +#include "webrtc/modules/audio_processing/ns/defines.h" + +typedef struct NSParaExtract_ { + // Bin size of histogram. + float binSizeLrt; + float binSizeSpecFlat; + float binSizeSpecDiff; + // Range of histogram over which LRT threshold is computed. + float rangeAvgHistLrt; + // Scale parameters: multiply dominant peaks of the histograms by scale factor + // to obtain thresholds for prior model. + float factor1ModelPars; // For LRT and spectral difference. + float factor2ModelPars; // For spectral_flatness: used when noise is flatter + // than speech. + // Peak limit for spectral flatness (varies between 0 and 1). + float thresPosSpecFlat; + // Limit on spacing of two highest peaks in histogram: spacing determined by + // bin size. + float limitPeakSpacingSpecFlat; + float limitPeakSpacingSpecDiff; + // Limit on relevance of second peak. + float limitPeakWeightsSpecFlat; + float limitPeakWeightsSpecDiff; + // Limit on fluctuation of LRT feature. + float thresFluctLrt; + // Limit on the max and min values for the feature thresholds. + float maxLrt; + float minLrt; + float maxSpecFlat; + float minSpecFlat; + float maxSpecDiff; + float minSpecDiff; + // Criteria of weight of histogram peak to accept/reject feature. + int thresWeightSpecFlat; + int thresWeightSpecDiff; + +} NSParaExtract; + +typedef struct NoiseSuppressionC_ { + uint32_t fs; + size_t blockLen; + size_t windShift; + size_t anaLen; + size_t magnLen; + int aggrMode; + const float* window; + float analyzeBuf[ANAL_BLOCKL_MAX]; + float dataBuf[ANAL_BLOCKL_MAX]; + float syntBuf[ANAL_BLOCKL_MAX]; + + int initFlag; + // Parameters for quantile noise estimation. + float density[SIMULT * HALF_ANAL_BLOCKL]; + float lquantile[SIMULT * HALF_ANAL_BLOCKL]; + float quantile[HALF_ANAL_BLOCKL]; + int counter[SIMULT]; + int updates; + // Parameters for Wiener filter. + float smooth[HALF_ANAL_BLOCKL]; + float overdrive; + float denoiseBound; + int gainmap; + // FFT work arrays. + size_t ip[IP_LENGTH]; + float wfft[W_LENGTH]; + + // Parameters for new method: some not needed, will reduce/cleanup later. + int32_t blockInd; // Frame index counter. + int modelUpdatePars[4]; // Parameters for updating or estimating. + // Thresholds/weights for prior model. + float priorModelPars[7]; // Parameters for prior model. + float noise[HALF_ANAL_BLOCKL]; // Noise spectrum from current frame. + float noisePrev[HALF_ANAL_BLOCKL]; // Noise spectrum from previous frame. + // Magnitude spectrum of previous analyze frame. + float magnPrevAnalyze[HALF_ANAL_BLOCKL]; + // Magnitude spectrum of previous process frame. + float magnPrevProcess[HALF_ANAL_BLOCKL]; + float logLrtTimeAvg[HALF_ANAL_BLOCKL]; // Log LRT factor with time-smoothing. + float priorSpeechProb; // Prior speech/noise probability. + float featureData[7]; + // Conservative noise spectrum estimate. + float magnAvgPause[HALF_ANAL_BLOCKL]; + float signalEnergy; // Energy of |magn|. + float sumMagn; + float whiteNoiseLevel; // Initial noise estimate. + float initMagnEst[HALF_ANAL_BLOCKL]; // Initial magnitude spectrum estimate. + float pinkNoiseNumerator; // Pink noise parameter: numerator. + float pinkNoiseExp; // Pink noise parameter: power of frequencies. + float parametricNoise[HALF_ANAL_BLOCKL]; + // Parameters for feature extraction. + NSParaExtract featureExtractionParams; + // Histograms for parameter estimation. + int histLrt[HIST_PAR_EST]; + int histSpecFlat[HIST_PAR_EST]; + int histSpecDiff[HIST_PAR_EST]; + // Quantities for high band estimate. + float speechProb[HALF_ANAL_BLOCKL]; // Final speech/noise prob: prior + LRT. + // Buffering data for HB. + float dataBufHB[NUM_HIGH_BANDS_MAX][ANAL_BLOCKL_MAX]; + +} NoiseSuppressionC; + +#ifdef __cplusplus +extern "C" { +#endif + +/**************************************************************************** + * WebRtcNs_InitCore(...) + * + * This function initializes a noise suppression instance + * + * Input: + * - self : Instance that should be initialized + * - fs : Sampling frequency + * + * Output: + * - self : Initialized instance + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcNs_InitCore(NoiseSuppressionC* self, uint32_t fs); + +/**************************************************************************** + * WebRtcNs_set_policy_core(...) + * + * This changes the aggressiveness of the noise suppression method. + * + * Input: + * - self : Instance that should be initialized + * - mode : 0: Mild (6dB), 1: Medium (10dB), 2: Aggressive (15dB) + * + * Output: + * - self : Initialized instance + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcNs_set_policy_core(NoiseSuppressionC* self, int mode); + +/**************************************************************************** + * WebRtcNs_AnalyzeCore + * + * Estimate the background noise. + * + * Input: + * - self : Instance that should be initialized + * - speechFrame : Input speech frame for lower band + * + * Output: + * - self : Updated instance + */ +void WebRtcNs_AnalyzeCore(NoiseSuppressionC* self, const float* speechFrame); + +/**************************************************************************** + * WebRtcNs_ProcessCore + * + * Do noise suppression. + * + * Input: + * - self : Instance that should be initialized + * - inFrame : Input speech frame for each band + * - num_bands : Number of bands + * + * Output: + * - self : Updated instance + * - outFrame : Output speech frame for each band + */ +void WebRtcNs_ProcessCore(NoiseSuppressionC* self, + const float* const* inFrame, + size_t num_bands, + float* const* outFrame); + +#ifdef __cplusplus +} +#endif +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_ diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core.c b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core.c new file mode 100644 index 00000000..ed6125aa --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core.c @@ -0,0 +1,2112 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h" + +#include <assert.h> +#include <math.h> +#include <stdlib.h> +#include <string.h> + +#include "webrtc/common_audio/signal_processing/include/real_fft.h" +#include "webrtc/modules/audio_processing/ns/nsx_core.h" +#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h" + +#if (defined WEBRTC_DETECT_NEON || defined WEBRTC_HAS_NEON) +/* Tables are defined in ARM assembly files. */ +extern const int16_t WebRtcNsx_kLogTable[9]; +extern const int16_t WebRtcNsx_kCounterDiv[201]; +extern const int16_t WebRtcNsx_kLogTableFrac[256]; +#else +static const int16_t WebRtcNsx_kLogTable[9] = { + 0, 177, 355, 532, 710, 887, 1065, 1242, 1420 +}; + +static const int16_t WebRtcNsx_kCounterDiv[201] = { + 32767, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3641, 3277, 2979, 2731, + 2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, 1489, 1425, 1365, 1311, + 1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964, 936, 910, 886, 862, 840, + 819, 799, 780, 762, 745, 728, 712, 697, 683, 669, 655, 643, 630, 618, 607, + 596, 585, 575, 565, 555, 546, 537, 529, 520, 512, 504, 496, 489, 482, 475, + 468, 462, 455, 449, 443, 437, 431, 426, 420, 415, 410, 405, 400, 395, 390, + 386, 381, 377, 372, 368, 364, 360, 356, 352, 349, 345, 341, 338, 334, 331, + 328, 324, 321, 318, 315, 312, 309, 306, 303, 301, 298, 295, 293, 290, 287, + 285, 282, 280, 278, 275, 273, 271, 269, 266, 264, 262, 260, 258, 256, 254, + 252, 250, 248, 246, 245, 243, 241, 239, 237, 236, 234, 232, 231, 229, 228, + 226, 224, 223, 221, 220, 218, 217, 216, 214, 213, 211, 210, 209, 207, 206, + 205, 204, 202, 201, 200, 199, 197, 196, 195, 194, 193, 192, 191, 189, 188, + 187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174, 173, + 172, 172, 171, 170, 169, 168, 167, 166, 165, 165, 164, 163 +}; + +static const int16_t WebRtcNsx_kLogTableFrac[256] = { + 0, 1, 3, 4, 6, 7, 9, 10, 11, 13, 14, 16, 17, 18, 20, 21, + 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 36, 37, 38, 40, 41, 42, + 44, 45, 46, 47, 49, 50, 51, 52, 54, 55, 56, 57, 59, 60, 61, 62, + 63, 65, 66, 67, 68, 69, 71, 72, 73, 74, 75, 77, 78, 79, 80, 81, + 82, 84, 85, 86, 87, 88, 89, 90, 92, 93, 94, 95, 96, 97, 98, 99, + 100, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 116, + 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, + 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, + 147, 148, 149, 150, 151, 152, 153, 154, 155, 155, 156, 157, 158, 159, 160, + 161, 162, 163, 164, 165, 166, 167, 168, 169, 169, 170, 171, 172, 173, 174, + 175, 176, 177, 178, 178, 179, 180, 181, 182, 183, 184, 185, 185, 186, 187, + 188, 189, 190, 191, 192, 192, 193, 194, 195, 196, 197, 198, 198, 199, 200, + 201, 202, 203, 203, 204, 205, 206, 207, 208, 208, 209, 210, 211, 212, 212, + 213, 214, 215, 216, 216, 217, 218, 219, 220, 220, 221, 222, 223, 224, 224, + 225, 226, 227, 228, 228, 229, 230, 231, 231, 232, 233, 234, 234, 235, 236, + 237, 238, 238, 239, 240, 241, 241, 242, 243, 244, 244, 245, 246, 247, 247, + 248, 249, 249, 250, 251, 252, 252, 253, 254, 255, 255 +}; +#endif // WEBRTC_DETECT_NEON || WEBRTC_HAS_NEON + +// Skip first frequency bins during estimation. (0 <= value < 64) +static const size_t kStartBand = 5; + +// hybrib Hanning & flat window +static const int16_t kBlocks80w128x[128] = { + 0, 536, 1072, 1606, 2139, 2669, 3196, 3720, 4240, 4756, 5266, + 5771, 6270, 6762, 7246, 7723, 8192, 8652, 9102, 9543, 9974, 10394, + 10803, 11200, 11585, 11958, 12318, 12665, 12998, 13318, 13623, 13913, 14189, + 14449, 14694, 14924, 15137, 15334, 15515, 15679, 15826, 15956, 16069, 16165, + 16244, 16305, 16349, 16375, 16384, 16384, 16384, 16384, 16384, 16384, 16384, + 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, + 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, + 16384, 16384, 16384, 16384, 16375, 16349, 16305, 16244, 16165, 16069, 15956, + 15826, 15679, 15515, 15334, 15137, 14924, 14694, 14449, 14189, 13913, 13623, + 13318, 12998, 12665, 12318, 11958, 11585, 11200, 10803, 10394, 9974, 9543, + 9102, 8652, 8192, 7723, 7246, 6762, 6270, 5771, 5266, 4756, 4240, + 3720, 3196, 2669, 2139, 1606, 1072, 536 +}; + +// hybrib Hanning & flat window +static const int16_t kBlocks160w256x[256] = { + 0, 268, 536, 804, 1072, 1339, 1606, 1872, + 2139, 2404, 2669, 2933, 3196, 3459, 3720, 3981, + 4240, 4499, 4756, 5012, 5266, 5520, 5771, 6021, + 6270, 6517, 6762, 7005, 7246, 7486, 7723, 7959, + 8192, 8423, 8652, 8878, 9102, 9324, 9543, 9760, + 9974, 10185, 10394, 10600, 10803, 11003, 11200, 11394, + 11585, 11773, 11958, 12140, 12318, 12493, 12665, 12833, + 12998, 13160, 13318, 13472, 13623, 13770, 13913, 14053, + 14189, 14321, 14449, 14574, 14694, 14811, 14924, 15032, + 15137, 15237, 15334, 15426, 15515, 15599, 15679, 15754, + 15826, 15893, 15956, 16015, 16069, 16119, 16165, 16207, + 16244, 16277, 16305, 16329, 16349, 16364, 16375, 16382, + 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, + 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, + 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, + 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, + 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, + 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, + 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, + 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, + 16384, 16382, 16375, 16364, 16349, 16329, 16305, 16277, + 16244, 16207, 16165, 16119, 16069, 16015, 15956, 15893, + 15826, 15754, 15679, 15599, 15515, 15426, 15334, 15237, + 15137, 15032, 14924, 14811, 14694, 14574, 14449, 14321, + 14189, 14053, 13913, 13770, 13623, 13472, 13318, 13160, + 12998, 12833, 12665, 12493, 12318, 12140, 11958, 11773, + 11585, 11394, 11200, 11003, 10803, 10600, 10394, 10185, + 9974, 9760, 9543, 9324, 9102, 8878, 8652, 8423, + 8192, 7959, 7723, 7486, 7246, 7005, 6762, 6517, + 6270, 6021, 5771, 5520, 5266, 5012, 4756, 4499, + 4240, 3981, 3720, 3459, 3196, 2933, 2669, 2404, + 2139, 1872, 1606, 1339, 1072, 804, 536, 268 +}; + +// Gain factor1 table: Input value in Q8 and output value in Q13 +// original floating point code +// if (gain > blim) { +// factor1 = 1.0 + 1.3 * (gain - blim); +// if (gain * factor1 > 1.0) { +// factor1 = 1.0 / gain; +// } +// } else { +// factor1 = 1.0; +// } +static const int16_t kFactor1Table[257] = { + 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8233, 8274, 8315, 8355, 8396, 8436, 8475, 8515, 8554, 8592, 8631, 8669, + 8707, 8745, 8783, 8820, 8857, 8894, 8931, 8967, 9003, 9039, 9075, 9111, 9146, 9181, + 9216, 9251, 9286, 9320, 9354, 9388, 9422, 9456, 9489, 9523, 9556, 9589, 9622, 9655, + 9687, 9719, 9752, 9784, 9816, 9848, 9879, 9911, 9942, 9973, 10004, 10035, 10066, + 10097, 10128, 10158, 10188, 10218, 10249, 10279, 10308, 10338, 10368, 10397, 10426, + 10456, 10485, 10514, 10543, 10572, 10600, 10629, 10657, 10686, 10714, 10742, 10770, + 10798, 10826, 10854, 10882, 10847, 10810, 10774, 10737, 10701, 10666, 10631, 10596, + 10562, 10527, 10494, 10460, 10427, 10394, 10362, 10329, 10297, 10266, 10235, 10203, + 10173, 10142, 10112, 10082, 10052, 10023, 9994, 9965, 9936, 9908, 9879, 9851, 9824, + 9796, 9769, 9742, 9715, 9689, 9662, 9636, 9610, 9584, 9559, 9534, 9508, 9484, 9459, + 9434, 9410, 9386, 9362, 9338, 9314, 9291, 9268, 9245, 9222, 9199, 9176, 9154, 9132, + 9110, 9088, 9066, 9044, 9023, 9002, 8980, 8959, 8939, 8918, 8897, 8877, 8857, 8836, + 8816, 8796, 8777, 8757, 8738, 8718, 8699, 8680, 8661, 8642, 8623, 8605, 8586, 8568, + 8550, 8532, 8514, 8496, 8478, 8460, 8443, 8425, 8408, 8391, 8373, 8356, 8339, 8323, + 8306, 8289, 8273, 8256, 8240, 8224, 8208, 8192 +}; + +// For Factor2 tables +// original floating point code +// if (gain > blim) { +// factor2 = 1.0; +// } else { +// factor2 = 1.0 - 0.3 * (blim - gain); +// if (gain <= inst->denoiseBound) { +// factor2 = 1.0 - 0.3 * (blim - inst->denoiseBound); +// } +// } +// +// Gain factor table: Input value in Q8 and output value in Q13 +static const int16_t kFactor2Aggressiveness1[257] = { + 7577, 7577, 7577, 7577, 7577, 7577, + 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7596, 7614, 7632, + 7650, 7667, 7683, 7699, 7715, 7731, 7746, 7761, 7775, 7790, 7804, 7818, 7832, 7845, + 7858, 7871, 7884, 7897, 7910, 7922, 7934, 7946, 7958, 7970, 7982, 7993, 8004, 8016, + 8027, 8038, 8049, 8060, 8070, 8081, 8091, 8102, 8112, 8122, 8132, 8143, 8152, 8162, + 8172, 8182, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192 +}; + +// Gain factor table: Input value in Q8 and output value in Q13 +static const int16_t kFactor2Aggressiveness2[257] = { + 7270, 7270, 7270, 7270, 7270, 7306, + 7339, 7369, 7397, 7424, 7448, 7472, 7495, 7517, 7537, 7558, 7577, 7596, 7614, 7632, + 7650, 7667, 7683, 7699, 7715, 7731, 7746, 7761, 7775, 7790, 7804, 7818, 7832, 7845, + 7858, 7871, 7884, 7897, 7910, 7922, 7934, 7946, 7958, 7970, 7982, 7993, 8004, 8016, + 8027, 8038, 8049, 8060, 8070, 8081, 8091, 8102, 8112, 8122, 8132, 8143, 8152, 8162, + 8172, 8182, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192 +}; + +// Gain factor table: Input value in Q8 and output value in Q13 +static const int16_t kFactor2Aggressiveness3[257] = { + 7184, 7184, 7184, 7229, 7270, 7306, + 7339, 7369, 7397, 7424, 7448, 7472, 7495, 7517, 7537, 7558, 7577, 7596, 7614, 7632, + 7650, 7667, 7683, 7699, 7715, 7731, 7746, 7761, 7775, 7790, 7804, 7818, 7832, 7845, + 7858, 7871, 7884, 7897, 7910, 7922, 7934, 7946, 7958, 7970, 7982, 7993, 8004, 8016, + 8027, 8038, 8049, 8060, 8070, 8081, 8091, 8102, 8112, 8122, 8132, 8143, 8152, 8162, + 8172, 8182, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192 +}; + +// sum of log2(i) from table index to inst->anaLen2 in Q5 +// Note that the first table value is invalid, since log2(0) = -infinity +static const int16_t kSumLogIndex[66] = { + 0, 22917, 22917, 22885, 22834, 22770, 22696, 22613, + 22524, 22428, 22326, 22220, 22109, 21994, 21876, 21754, + 21629, 21501, 21370, 21237, 21101, 20963, 20822, 20679, + 20535, 20388, 20239, 20089, 19937, 19783, 19628, 19470, + 19312, 19152, 18991, 18828, 18664, 18498, 18331, 18164, + 17994, 17824, 17653, 17480, 17306, 17132, 16956, 16779, + 16602, 16423, 16243, 16063, 15881, 15699, 15515, 15331, + 15146, 14960, 14774, 14586, 14398, 14209, 14019, 13829, + 13637, 13445 +}; + +// sum of log2(i)^2 from table index to inst->anaLen2 in Q2 +// Note that the first table value is invalid, since log2(0) = -infinity +static const int16_t kSumSquareLogIndex[66] = { + 0, 16959, 16959, 16955, 16945, 16929, 16908, 16881, + 16850, 16814, 16773, 16729, 16681, 16630, 16575, 16517, + 16456, 16392, 16325, 16256, 16184, 16109, 16032, 15952, + 15870, 15786, 15700, 15612, 15521, 15429, 15334, 15238, + 15140, 15040, 14938, 14834, 14729, 14622, 14514, 14404, + 14292, 14179, 14064, 13947, 13830, 13710, 13590, 13468, + 13344, 13220, 13094, 12966, 12837, 12707, 12576, 12444, + 12310, 12175, 12039, 11902, 11763, 11624, 11483, 11341, + 11198, 11054 +}; + +// log2(table index) in Q12 +// Note that the first table value is invalid, since log2(0) = -infinity +static const int16_t kLogIndex[129] = { + 0, 0, 4096, 6492, 8192, 9511, 10588, 11499, + 12288, 12984, 13607, 14170, 14684, 15157, 15595, 16003, + 16384, 16742, 17080, 17400, 17703, 17991, 18266, 18529, + 18780, 19021, 19253, 19476, 19691, 19898, 20099, 20292, + 20480, 20662, 20838, 21010, 21176, 21338, 21496, 21649, + 21799, 21945, 22087, 22226, 22362, 22495, 22625, 22752, + 22876, 22998, 23117, 23234, 23349, 23462, 23572, 23680, + 23787, 23892, 23994, 24095, 24195, 24292, 24388, 24483, + 24576, 24668, 24758, 24847, 24934, 25021, 25106, 25189, + 25272, 25354, 25434, 25513, 25592, 25669, 25745, 25820, + 25895, 25968, 26041, 26112, 26183, 26253, 26322, 26390, + 26458, 26525, 26591, 26656, 26721, 26784, 26848, 26910, + 26972, 27033, 27094, 27154, 27213, 27272, 27330, 27388, + 27445, 27502, 27558, 27613, 27668, 27722, 27776, 27830, + 27883, 27935, 27988, 28039, 28090, 28141, 28191, 28241, + 28291, 28340, 28388, 28437, 28484, 28532, 28579, 28626, + 28672 +}; + +// determinant of estimation matrix in Q0 corresponding to the log2 tables above +// Note that the first table value is invalid, since log2(0) = -infinity +static const int16_t kDeterminantEstMatrix[66] = { + 0, 29814, 25574, 22640, 20351, 18469, 16873, 15491, + 14277, 13199, 12233, 11362, 10571, 9851, 9192, 8587, + 8030, 7515, 7038, 6596, 6186, 5804, 5448, 5115, + 4805, 4514, 4242, 3988, 3749, 3524, 3314, 3116, + 2930, 2755, 2590, 2435, 2289, 2152, 2022, 1900, + 1785, 1677, 1575, 1478, 1388, 1302, 1221, 1145, + 1073, 1005, 942, 881, 825, 771, 721, 674, + 629, 587, 547, 510, 475, 442, 411, 382, + 355, 330 +}; + +// Update the noise estimation information. +static void UpdateNoiseEstimate(NoiseSuppressionFixedC* inst, int offset) { + int32_t tmp32no1 = 0; + int32_t tmp32no2 = 0; + int16_t tmp16 = 0; + const int16_t kExp2Const = 11819; // Q13 + + size_t i = 0; + + tmp16 = WebRtcSpl_MaxValueW16(inst->noiseEstLogQuantile + offset, + inst->magnLen); + // Guarantee a Q-domain as high as possible and still fit in int16 + inst->qNoise = 14 - (int) WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + kExp2Const, tmp16, 21); + for (i = 0; i < inst->magnLen; i++) { + // inst->quantile[i]=exp(inst->lquantile[offset+i]); + // in Q21 + tmp32no2 = kExp2Const * inst->noiseEstLogQuantile[offset + i]; + tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac + tmp16 = (int16_t)(tmp32no2 >> 21); + tmp16 -= 21;// shift 21 to get result in Q0 + tmp16 += (int16_t) inst->qNoise; //shift to get result in Q(qNoise) + if (tmp16 < 0) { + tmp32no1 >>= -tmp16; + } else { + tmp32no1 <<= tmp16; + } + inst->noiseEstQuantile[i] = WebRtcSpl_SatW32ToW16(tmp32no1); + } +} + +// Noise Estimation +static void NoiseEstimationC(NoiseSuppressionFixedC* inst, + uint16_t* magn, + uint32_t* noise, + int16_t* q_noise) { + int16_t lmagn[HALF_ANAL_BLOCKL], counter, countDiv; + int16_t countProd, delta, zeros, frac; + int16_t log2, tabind, logval, tmp16, tmp16no1, tmp16no2; + const int16_t log2_const = 22713; // Q15 + const int16_t width_factor = 21845; + + size_t i, s, offset; + + tabind = inst->stages - inst->normData; + assert(tabind < 9); + assert(tabind > -9); + if (tabind < 0) { + logval = -WebRtcNsx_kLogTable[-tabind]; + } else { + logval = WebRtcNsx_kLogTable[tabind]; + } + + // lmagn(i)=log(magn(i))=log(2)*log2(magn(i)) + // magn is in Q(-stages), and the real lmagn values are: + // real_lmagn(i)=log(magn(i)*2^stages)=log(magn(i))+log(2^stages) + // lmagn in Q8 + for (i = 0; i < inst->magnLen; i++) { + if (magn[i]) { + zeros = WebRtcSpl_NormU32((uint32_t)magn[i]); + frac = (int16_t)((((uint32_t)magn[i] << zeros) + & 0x7FFFFFFF) >> 23); + // log2(magn(i)) + assert(frac < 256); + log2 = (int16_t)(((31 - zeros) << 8) + + WebRtcNsx_kLogTableFrac[frac]); + // log2(magn(i))*log(2) + lmagn[i] = (int16_t)((log2 * log2_const) >> 15); + // + log(2^stages) + lmagn[i] += logval; + } else { + lmagn[i] = logval;//0; + } + } + + // loop over simultaneous estimates + for (s = 0; s < SIMULT; s++) { + offset = s * inst->magnLen; + + // Get counter values from state + counter = inst->noiseEstCounter[s]; + assert(counter < 201); + countDiv = WebRtcNsx_kCounterDiv[counter]; + countProd = (int16_t)(counter * countDiv); + + // quant_est(...) + for (i = 0; i < inst->magnLen; i++) { + // compute delta + if (inst->noiseEstDensity[offset + i] > 512) { + // Get the value for delta by shifting intead of dividing. + int factor = WebRtcSpl_NormW16(inst->noiseEstDensity[offset + i]); + delta = (int16_t)(FACTOR_Q16 >> (14 - factor)); + } else { + delta = FACTOR_Q7; + if (inst->blockIndex < END_STARTUP_LONG) { + // Smaller step size during startup. This prevents from using + // unrealistic values causing overflow. + delta = FACTOR_Q7_STARTUP; + } + } + + // update log quantile estimate + tmp16 = (int16_t)((delta * countDiv) >> 14); + if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) { + // +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2 + // CounterDiv=1/(inst->counter[s]+1) in Q15 + tmp16 += 2; + inst->noiseEstLogQuantile[offset + i] += tmp16 / 4; + } else { + tmp16 += 1; + // *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2 + // TODO(bjornv): investigate why we need to truncate twice. + tmp16no2 = (int16_t)((tmp16 / 2) * 3 / 2); + inst->noiseEstLogQuantile[offset + i] -= tmp16no2; + if (inst->noiseEstLogQuantile[offset + i] < logval) { + // This is the smallest fixed point representation we can + // have, hence we limit the output. + inst->noiseEstLogQuantile[offset + i] = logval; + } + } + + // update density estimate + if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i]) + < WIDTH_Q8) { + tmp16no1 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + inst->noiseEstDensity[offset + i], countProd, 15); + tmp16no2 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + width_factor, countDiv, 15); + inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2; + } + } // end loop over magnitude spectrum + + if (counter >= END_STARTUP_LONG) { + inst->noiseEstCounter[s] = 0; + if (inst->blockIndex >= END_STARTUP_LONG) { + UpdateNoiseEstimate(inst, offset); + } + } + inst->noiseEstCounter[s]++; + + } // end loop over simultaneous estimates + + // Sequentially update the noise during startup + if (inst->blockIndex < END_STARTUP_LONG) { + UpdateNoiseEstimate(inst, offset); + } + + for (i = 0; i < inst->magnLen; i++) { + noise[i] = (uint32_t)(inst->noiseEstQuantile[i]); // Q(qNoise) + } + (*q_noise) = (int16_t)inst->qNoise; +} + +// Filter the data in the frequency domain, and create spectrum. +static void PrepareSpectrumC(NoiseSuppressionFixedC* inst, int16_t* freq_buf) { + size_t i = 0, j = 0; + + for (i = 0; i < inst->magnLen; i++) { + inst->real[i] = (int16_t)((inst->real[i] * + (int16_t)(inst->noiseSupFilter[i])) >> 14); // Q(normData-stages) + inst->imag[i] = (int16_t)((inst->imag[i] * + (int16_t)(inst->noiseSupFilter[i])) >> 14); // Q(normData-stages) + } + + freq_buf[0] = inst->real[0]; + freq_buf[1] = -inst->imag[0]; + for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) { + freq_buf[j] = inst->real[i]; + freq_buf[j + 1] = -inst->imag[i]; + } + freq_buf[inst->anaLen] = inst->real[inst->anaLen2]; + freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2]; +} + +// Denormalize the real-valued signal |in|, the output from inverse FFT. +static void DenormalizeC(NoiseSuppressionFixedC* inst, + int16_t* in, + int factor) { + size_t i = 0; + int32_t tmp32 = 0; + for (i = 0; i < inst->anaLen; i += 1) { + tmp32 = WEBRTC_SPL_SHIFT_W32((int32_t)in[i], + factor - inst->normData); + inst->real[i] = WebRtcSpl_SatW32ToW16(tmp32); // Q0 + } +} + +// For the noise supression process, synthesis, read out fully processed +// segment, and update synthesis buffer. +static void SynthesisUpdateC(NoiseSuppressionFixedC* inst, + int16_t* out_frame, + int16_t gain_factor) { + size_t i = 0; + int16_t tmp16a = 0; + int16_t tmp16b = 0; + int32_t tmp32 = 0; + + // synthesis + for (i = 0; i < inst->anaLen; i++) { + tmp16a = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + inst->window[i], inst->real[i], 14); // Q0, window in Q14 + tmp32 = WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(tmp16a, gain_factor, 13); // Q0 + // Down shift with rounding + tmp16b = WebRtcSpl_SatW32ToW16(tmp32); // Q0 + inst->synthesisBuffer[i] = WebRtcSpl_AddSatW16(inst->synthesisBuffer[i], + tmp16b); // Q0 + } + + // read out fully processed segment + for (i = 0; i < inst->blockLen10ms; i++) { + out_frame[i] = inst->synthesisBuffer[i]; // Q0 + } + + // update synthesis buffer + memcpy(inst->synthesisBuffer, inst->synthesisBuffer + inst->blockLen10ms, + (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->synthesisBuffer)); + WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer + + inst->anaLen - inst->blockLen10ms, inst->blockLen10ms); +} + +// Update analysis buffer for lower band, and window data before FFT. +static void AnalysisUpdateC(NoiseSuppressionFixedC* inst, + int16_t* out, + int16_t* new_speech) { + size_t i = 0; + + // For lower band update analysis buffer. + memcpy(inst->analysisBuffer, inst->analysisBuffer + inst->blockLen10ms, + (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->analysisBuffer)); + memcpy(inst->analysisBuffer + inst->anaLen - inst->blockLen10ms, new_speech, + inst->blockLen10ms * sizeof(*inst->analysisBuffer)); + + // Window data before FFT. + for (i = 0; i < inst->anaLen; i++) { + out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + inst->window[i], inst->analysisBuffer[i], 14); // Q0 + } +} + +// Normalize the real-valued signal |in|, the input to forward FFT. +static void NormalizeRealBufferC(NoiseSuppressionFixedC* inst, + const int16_t* in, + int16_t* out) { + size_t i = 0; + assert(inst->normData >= 0); + for (i = 0; i < inst->anaLen; ++i) { + out[i] = in[i] << inst->normData; // Q(normData) + } +} + +// Declare function pointers. +NoiseEstimation WebRtcNsx_NoiseEstimation; +PrepareSpectrum WebRtcNsx_PrepareSpectrum; +SynthesisUpdate WebRtcNsx_SynthesisUpdate; +AnalysisUpdate WebRtcNsx_AnalysisUpdate; +Denormalize WebRtcNsx_Denormalize; +NormalizeRealBuffer WebRtcNsx_NormalizeRealBuffer; + +#if (defined WEBRTC_DETECT_NEON || defined WEBRTC_HAS_NEON) +// Initialize function pointers for ARM Neon platform. +static void WebRtcNsx_InitNeon(void) { + WebRtcNsx_NoiseEstimation = WebRtcNsx_NoiseEstimationNeon; + WebRtcNsx_PrepareSpectrum = WebRtcNsx_PrepareSpectrumNeon; + WebRtcNsx_SynthesisUpdate = WebRtcNsx_SynthesisUpdateNeon; + WebRtcNsx_AnalysisUpdate = WebRtcNsx_AnalysisUpdateNeon; +} +#endif + +#if defined(MIPS32_LE) +// Initialize function pointers for MIPS platform. +static void WebRtcNsx_InitMips(void) { + WebRtcNsx_PrepareSpectrum = WebRtcNsx_PrepareSpectrum_mips; + WebRtcNsx_SynthesisUpdate = WebRtcNsx_SynthesisUpdate_mips; + WebRtcNsx_AnalysisUpdate = WebRtcNsx_AnalysisUpdate_mips; + WebRtcNsx_NormalizeRealBuffer = WebRtcNsx_NormalizeRealBuffer_mips; +#if defined(MIPS_DSP_R1_LE) + WebRtcNsx_Denormalize = WebRtcNsx_Denormalize_mips; +#endif +} +#endif + +void WebRtcNsx_CalcParametricNoiseEstimate(NoiseSuppressionFixedC* inst, + int16_t pink_noise_exp_avg, + int32_t pink_noise_num_avg, + int freq_index, + uint32_t* noise_estimate, + uint32_t* noise_estimate_avg) { + int32_t tmp32no1 = 0; + int32_t tmp32no2 = 0; + + int16_t int_part = 0; + int16_t frac_part = 0; + + // Use pink noise estimate + // noise_estimate = 2^(pinkNoiseNumerator + pinkNoiseExp * log2(j)) + assert(freq_index >= 0); + assert(freq_index < 129); + tmp32no2 = (pink_noise_exp_avg * kLogIndex[freq_index]) >> 15; // Q11 + tmp32no1 = pink_noise_num_avg - tmp32no2; // Q11 + + // Calculate output: 2^tmp32no1 + // Output in Q(minNorm-stages) + tmp32no1 += (inst->minNorm - inst->stages) << 11; + if (tmp32no1 > 0) { + int_part = (int16_t)(tmp32no1 >> 11); + frac_part = (int16_t)(tmp32no1 & 0x000007ff); // Q11 + // Piecewise linear approximation of 'b' in + // 2^(int_part+frac_part) = 2^int_part * (1 + b) + // 'b' is given in Q11 and below stored in frac_part. + if (frac_part >> 10) { + // Upper fractional part + tmp32no2 = (2048 - frac_part) * 1244; // Q21 + tmp32no2 = 2048 - (tmp32no2 >> 10); + } else { + // Lower fractional part + tmp32no2 = (frac_part * 804) >> 10; + } + // Shift fractional part to Q(minNorm-stages) + tmp32no2 = WEBRTC_SPL_SHIFT_W32(tmp32no2, int_part - 11); + *noise_estimate_avg = (1 << int_part) + (uint32_t)tmp32no2; + // Scale up to initMagnEst, which is not block averaged + *noise_estimate = (*noise_estimate_avg) * (uint32_t)(inst->blockIndex + 1); + } +} + +// Initialize state +int32_t WebRtcNsx_InitCore(NoiseSuppressionFixedC* inst, uint32_t fs) { + int i; + + //check for valid pointer + if (inst == NULL) { + return -1; + } + // + + // Initialization of struct + if (fs == 8000 || fs == 16000 || fs == 32000 || fs == 48000) { + inst->fs = fs; + } else { + return -1; + } + + if (fs == 8000) { + inst->blockLen10ms = 80; + inst->anaLen = 128; + inst->stages = 7; + inst->window = kBlocks80w128x; + inst->thresholdLogLrt = 131072; //default threshold for LRT feature + inst->maxLrt = 0x0040000; + inst->minLrt = 52429; + } else { + inst->blockLen10ms = 160; + inst->anaLen = 256; + inst->stages = 8; + inst->window = kBlocks160w256x; + inst->thresholdLogLrt = 212644; //default threshold for LRT feature + inst->maxLrt = 0x0080000; + inst->minLrt = 104858; + } + inst->anaLen2 = inst->anaLen / 2; + inst->magnLen = inst->anaLen2 + 1; + + if (inst->real_fft != NULL) { + WebRtcSpl_FreeRealFFT(inst->real_fft); + } + inst->real_fft = WebRtcSpl_CreateRealFFT(inst->stages); + if (inst->real_fft == NULL) { + return -1; + } + + WebRtcSpl_ZerosArrayW16(inst->analysisBuffer, ANAL_BLOCKL_MAX); + WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer, ANAL_BLOCKL_MAX); + + // for HB processing + WebRtcSpl_ZerosArrayW16(inst->dataBufHBFX[0], + NUM_HIGH_BANDS_MAX * ANAL_BLOCKL_MAX); + // for quantile noise estimation + WebRtcSpl_ZerosArrayW16(inst->noiseEstQuantile, HALF_ANAL_BLOCKL); + for (i = 0; i < SIMULT * HALF_ANAL_BLOCKL; i++) { + inst->noiseEstLogQuantile[i] = 2048; // Q8 + inst->noiseEstDensity[i] = 153; // Q9 + } + for (i = 0; i < SIMULT; i++) { + inst->noiseEstCounter[i] = (int16_t)(END_STARTUP_LONG * (i + 1)) / SIMULT; + } + + // Initialize suppression filter with ones + WebRtcSpl_MemSetW16((int16_t*)inst->noiseSupFilter, 16384, HALF_ANAL_BLOCKL); + + // Set the aggressiveness: default + inst->aggrMode = 0; + + //initialize variables for new method + inst->priorNonSpeechProb = 8192; // Q14(0.5) prior probability for speech/noise + for (i = 0; i < HALF_ANAL_BLOCKL; i++) { + inst->prevMagnU16[i] = 0; + inst->prevNoiseU32[i] = 0; //previous noise-spectrum + inst->logLrtTimeAvgW32[i] = 0; //smooth LR ratio + inst->avgMagnPause[i] = 0; //conservative noise spectrum estimate + inst->initMagnEst[i] = 0; //initial average magnitude spectrum + } + + //feature quantities + inst->thresholdSpecDiff = 50; //threshold for difference feature: determined on-line + inst->thresholdSpecFlat = 20480; //threshold for flatness: determined on-line + inst->featureLogLrt = inst->thresholdLogLrt; //average LRT factor (= threshold) + inst->featureSpecFlat = inst->thresholdSpecFlat; //spectral flatness (= threshold) + inst->featureSpecDiff = inst->thresholdSpecDiff; //spectral difference (= threshold) + inst->weightLogLrt = 6; //default weighting par for LRT feature + inst->weightSpecFlat = 0; //default weighting par for spectral flatness feature + inst->weightSpecDiff = 0; //default weighting par for spectral difference feature + + inst->curAvgMagnEnergy = 0; //window time-average of input magnitude spectrum + inst->timeAvgMagnEnergy = 0; //normalization for spectral difference + inst->timeAvgMagnEnergyTmp = 0; //normalization for spectral difference + + //histogram quantities: used to estimate/update thresholds for features + WebRtcSpl_ZerosArrayW16(inst->histLrt, HIST_PAR_EST); + WebRtcSpl_ZerosArrayW16(inst->histSpecDiff, HIST_PAR_EST); + WebRtcSpl_ZerosArrayW16(inst->histSpecFlat, HIST_PAR_EST); + + inst->blockIndex = -1; //frame counter + + //inst->modelUpdate = 500; //window for update + inst->modelUpdate = (1 << STAT_UPDATES); //window for update + inst->cntThresUpdate = 0; //counter feature thresholds updates + + inst->sumMagn = 0; + inst->magnEnergy = 0; + inst->prevQMagn = 0; + inst->qNoise = 0; + inst->prevQNoise = 0; + + inst->energyIn = 0; + inst->scaleEnergyIn = 0; + + inst->whiteNoiseLevel = 0; + inst->pinkNoiseNumerator = 0; + inst->pinkNoiseExp = 0; + inst->minNorm = 15; // Start with full scale + inst->zeroInputSignal = 0; + + //default mode + WebRtcNsx_set_policy_core(inst, 0); + +#ifdef NS_FILEDEBUG + inst->infile = fopen("indebug.pcm", "wb"); + inst->outfile = fopen("outdebug.pcm", "wb"); + inst->file1 = fopen("file1.pcm", "wb"); + inst->file2 = fopen("file2.pcm", "wb"); + inst->file3 = fopen("file3.pcm", "wb"); + inst->file4 = fopen("file4.pcm", "wb"); + inst->file5 = fopen("file5.pcm", "wb"); +#endif + + // Initialize function pointers. + WebRtcNsx_NoiseEstimation = NoiseEstimationC; + WebRtcNsx_PrepareSpectrum = PrepareSpectrumC; + WebRtcNsx_SynthesisUpdate = SynthesisUpdateC; + WebRtcNsx_AnalysisUpdate = AnalysisUpdateC; + WebRtcNsx_Denormalize = DenormalizeC; + WebRtcNsx_NormalizeRealBuffer = NormalizeRealBufferC; + +#ifdef WEBRTC_DETECT_NEON + uint64_t features = WebRtc_GetCPUFeaturesARM(); + if ((features & kCPUFeatureNEON) != 0) { + WebRtcNsx_InitNeon(); + } +#elif defined(WEBRTC_HAS_NEON) + WebRtcNsx_InitNeon(); +#endif + +#if defined(MIPS32_LE) + WebRtcNsx_InitMips(); +#endif + + inst->initFlag = 1; + + return 0; +} + +int WebRtcNsx_set_policy_core(NoiseSuppressionFixedC* inst, int mode) { + // allow for modes:0,1,2,3 + if (mode < 0 || mode > 3) { + return -1; + } + + inst->aggrMode = mode; + if (mode == 0) { + inst->overdrive = 256; // Q8(1.0) + inst->denoiseBound = 8192; // Q14(0.5) + inst->gainMap = 0; // No gain compensation + } else if (mode == 1) { + inst->overdrive = 256; // Q8(1.0) + inst->denoiseBound = 4096; // Q14(0.25) + inst->factor2Table = kFactor2Aggressiveness1; + inst->gainMap = 1; + } else if (mode == 2) { + inst->overdrive = 282; // ~= Q8(1.1) + inst->denoiseBound = 2048; // Q14(0.125) + inst->factor2Table = kFactor2Aggressiveness2; + inst->gainMap = 1; + } else if (mode == 3) { + inst->overdrive = 320; // Q8(1.25) + inst->denoiseBound = 1475; // ~= Q14(0.09) + inst->factor2Table = kFactor2Aggressiveness3; + inst->gainMap = 1; + } + return 0; +} + +// Extract thresholds for feature parameters +// histograms are computed over some window_size (given by window_pars) +// thresholds and weights are extracted every window +// flag 0 means update histogram only, flag 1 means compute the thresholds/weights +// threshold and weights are returned in: inst->priorModelPars +void WebRtcNsx_FeatureParameterExtraction(NoiseSuppressionFixedC* inst, + int flag) { + uint32_t tmpU32; + uint32_t histIndex; + uint32_t posPeak1SpecFlatFX, posPeak2SpecFlatFX; + uint32_t posPeak1SpecDiffFX, posPeak2SpecDiffFX; + + int32_t tmp32; + int32_t fluctLrtFX, thresFluctLrtFX; + int32_t avgHistLrtFX, avgSquareHistLrtFX, avgHistLrtComplFX; + + int16_t j; + int16_t numHistLrt; + + int i; + int useFeatureSpecFlat, useFeatureSpecDiff, featureSum; + int maxPeak1, maxPeak2; + int weightPeak1SpecFlat, weightPeak2SpecFlat; + int weightPeak1SpecDiff, weightPeak2SpecDiff; + + //update histograms + if (!flag) { + // LRT + // Type casting to UWord32 is safe since negative values will not be wrapped to larger + // values than HIST_PAR_EST + histIndex = (uint32_t)(inst->featureLogLrt); + if (histIndex < HIST_PAR_EST) { + inst->histLrt[histIndex]++; + } + // Spectral flatness + // (inst->featureSpecFlat*20)>>10 = (inst->featureSpecFlat*5)>>8 + histIndex = (inst->featureSpecFlat * 5) >> 8; + if (histIndex < HIST_PAR_EST) { + inst->histSpecFlat[histIndex]++; + } + // Spectral difference + histIndex = HIST_PAR_EST; + if (inst->timeAvgMagnEnergy > 0) { + // Guard against division by zero + // If timeAvgMagnEnergy == 0 we have no normalizing statistics and + // therefore can't update the histogram + histIndex = ((inst->featureSpecDiff * 5) >> inst->stages) / + inst->timeAvgMagnEnergy; + } + if (histIndex < HIST_PAR_EST) { + inst->histSpecDiff[histIndex]++; + } + } + + // extract parameters for speech/noise probability + if (flag) { + useFeatureSpecDiff = 1; + //for LRT feature: + // compute the average over inst->featureExtractionParams.rangeAvgHistLrt + avgHistLrtFX = 0; + avgSquareHistLrtFX = 0; + numHistLrt = 0; + for (i = 0; i < BIN_SIZE_LRT; i++) { + j = (2 * i + 1); + tmp32 = inst->histLrt[i] * j; + avgHistLrtFX += tmp32; + numHistLrt += inst->histLrt[i]; + avgSquareHistLrtFX += tmp32 * j; + } + avgHistLrtComplFX = avgHistLrtFX; + for (; i < HIST_PAR_EST; i++) { + j = (2 * i + 1); + tmp32 = inst->histLrt[i] * j; + avgHistLrtComplFX += tmp32; + avgSquareHistLrtFX += tmp32 * j; + } + fluctLrtFX = avgSquareHistLrtFX * numHistLrt - + avgHistLrtFX * avgHistLrtComplFX; + thresFluctLrtFX = THRES_FLUCT_LRT * numHistLrt; + // get threshold for LRT feature: + tmpU32 = (FACTOR_1_LRT_DIFF * (uint32_t)avgHistLrtFX); + if ((fluctLrtFX < thresFluctLrtFX) || (numHistLrt == 0) || + (tmpU32 > (uint32_t)(100 * numHistLrt))) { + //very low fluctuation, so likely noise + inst->thresholdLogLrt = inst->maxLrt; + } else { + tmp32 = (int32_t)((tmpU32 << (9 + inst->stages)) / numHistLrt / + 25); + // check if value is within min/max range + inst->thresholdLogLrt = WEBRTC_SPL_SAT(inst->maxLrt, + tmp32, + inst->minLrt); + } + if (fluctLrtFX < thresFluctLrtFX) { + // Do not use difference feature if fluctuation of LRT feature is very low: + // most likely just noise state + useFeatureSpecDiff = 0; + } + + // for spectral flatness and spectral difference: compute the main peaks of histogram + maxPeak1 = 0; + maxPeak2 = 0; + posPeak1SpecFlatFX = 0; + posPeak2SpecFlatFX = 0; + weightPeak1SpecFlat = 0; + weightPeak2SpecFlat = 0; + + // peaks for flatness + for (i = 0; i < HIST_PAR_EST; i++) { + if (inst->histSpecFlat[i] > maxPeak1) { + // Found new "first" peak + maxPeak2 = maxPeak1; + weightPeak2SpecFlat = weightPeak1SpecFlat; + posPeak2SpecFlatFX = posPeak1SpecFlatFX; + + maxPeak1 = inst->histSpecFlat[i]; + weightPeak1SpecFlat = inst->histSpecFlat[i]; + posPeak1SpecFlatFX = (uint32_t)(2 * i + 1); + } else if (inst->histSpecFlat[i] > maxPeak2) { + // Found new "second" peak + maxPeak2 = inst->histSpecFlat[i]; + weightPeak2SpecFlat = inst->histSpecFlat[i]; + posPeak2SpecFlatFX = (uint32_t)(2 * i + 1); + } + } + + // for spectral flatness feature + useFeatureSpecFlat = 1; + // merge the two peaks if they are close + if ((posPeak1SpecFlatFX - posPeak2SpecFlatFX < LIM_PEAK_SPACE_FLAT_DIFF) + && (weightPeak2SpecFlat * LIM_PEAK_WEIGHT_FLAT_DIFF > weightPeak1SpecFlat)) { + weightPeak1SpecFlat += weightPeak2SpecFlat; + posPeak1SpecFlatFX = (posPeak1SpecFlatFX + posPeak2SpecFlatFX) >> 1; + } + //reject if weight of peaks is not large enough, or peak value too small + if (weightPeak1SpecFlat < THRES_WEIGHT_FLAT_DIFF || posPeak1SpecFlatFX + < THRES_PEAK_FLAT) { + useFeatureSpecFlat = 0; + } else { // if selected, get the threshold + // compute the threshold and check if value is within min/max range + inst->thresholdSpecFlat = WEBRTC_SPL_SAT(MAX_FLAT_Q10, FACTOR_2_FLAT_Q10 + * posPeak1SpecFlatFX, MIN_FLAT_Q10); //Q10 + } + // done with flatness feature + + if (useFeatureSpecDiff) { + //compute two peaks for spectral difference + maxPeak1 = 0; + maxPeak2 = 0; + posPeak1SpecDiffFX = 0; + posPeak2SpecDiffFX = 0; + weightPeak1SpecDiff = 0; + weightPeak2SpecDiff = 0; + // peaks for spectral difference + for (i = 0; i < HIST_PAR_EST; i++) { + if (inst->histSpecDiff[i] > maxPeak1) { + // Found new "first" peak + maxPeak2 = maxPeak1; + weightPeak2SpecDiff = weightPeak1SpecDiff; + posPeak2SpecDiffFX = posPeak1SpecDiffFX; + + maxPeak1 = inst->histSpecDiff[i]; + weightPeak1SpecDiff = inst->histSpecDiff[i]; + posPeak1SpecDiffFX = (uint32_t)(2 * i + 1); + } else if (inst->histSpecDiff[i] > maxPeak2) { + // Found new "second" peak + maxPeak2 = inst->histSpecDiff[i]; + weightPeak2SpecDiff = inst->histSpecDiff[i]; + posPeak2SpecDiffFX = (uint32_t)(2 * i + 1); + } + } + + // merge the two peaks if they are close + if ((posPeak1SpecDiffFX - posPeak2SpecDiffFX < LIM_PEAK_SPACE_FLAT_DIFF) + && (weightPeak2SpecDiff * LIM_PEAK_WEIGHT_FLAT_DIFF > weightPeak1SpecDiff)) { + weightPeak1SpecDiff += weightPeak2SpecDiff; + posPeak1SpecDiffFX = (posPeak1SpecDiffFX + posPeak2SpecDiffFX) >> 1; + } + // get the threshold value and check if value is within min/max range + inst->thresholdSpecDiff = WEBRTC_SPL_SAT(MAX_DIFF, FACTOR_1_LRT_DIFF + * posPeak1SpecDiffFX, MIN_DIFF); //5x bigger + //reject if weight of peaks is not large enough + if (weightPeak1SpecDiff < THRES_WEIGHT_FLAT_DIFF) { + useFeatureSpecDiff = 0; + } + // done with spectral difference feature + } + + // select the weights between the features + // inst->priorModelPars[4] is weight for LRT: always selected + featureSum = 6 / (1 + useFeatureSpecFlat + useFeatureSpecDiff); + inst->weightLogLrt = featureSum; + inst->weightSpecFlat = useFeatureSpecFlat * featureSum; + inst->weightSpecDiff = useFeatureSpecDiff * featureSum; + + // set histograms to zero for next update + WebRtcSpl_ZerosArrayW16(inst->histLrt, HIST_PAR_EST); + WebRtcSpl_ZerosArrayW16(inst->histSpecDiff, HIST_PAR_EST); + WebRtcSpl_ZerosArrayW16(inst->histSpecFlat, HIST_PAR_EST); + } // end of flag == 1 +} + + +// Compute spectral flatness on input spectrum +// magn is the magnitude spectrum +// spectral flatness is returned in inst->featureSpecFlat +void WebRtcNsx_ComputeSpectralFlatness(NoiseSuppressionFixedC* inst, + uint16_t* magn) { + uint32_t tmpU32; + uint32_t avgSpectralFlatnessNum, avgSpectralFlatnessDen; + + int32_t tmp32; + int32_t currentSpectralFlatness, logCurSpectralFlatness; + + int16_t zeros, frac, intPart; + + size_t i; + + // for flatness + avgSpectralFlatnessNum = 0; + avgSpectralFlatnessDen = inst->sumMagn - (uint32_t)magn[0]; // Q(normData-stages) + + // compute log of ratio of the geometric to arithmetic mean: check for log(0) case + // flatness = exp( sum(log(magn[i]))/N - log(sum(magn[i])/N) ) + // = exp( sum(log(magn[i]))/N ) * N / sum(magn[i]) + // = 2^( sum(log2(magn[i]))/N - (log2(sum(magn[i])) - log2(N)) ) [This is used] + for (i = 1; i < inst->magnLen; i++) { + // First bin is excluded from spectrum measures. Number of bins is now a power of 2 + if (magn[i]) { + zeros = WebRtcSpl_NormU32((uint32_t)magn[i]); + frac = (int16_t)(((uint32_t)((uint32_t)(magn[i]) << zeros) + & 0x7FFFFFFF) >> 23); + // log2(magn(i)) + assert(frac < 256); + tmpU32 = (uint32_t)(((31 - zeros) << 8) + + WebRtcNsx_kLogTableFrac[frac]); // Q8 + avgSpectralFlatnessNum += tmpU32; // Q8 + } else { + //if at least one frequency component is zero, treat separately + tmpU32 = WEBRTC_SPL_UMUL_32_16(inst->featureSpecFlat, SPECT_FLAT_TAVG_Q14); // Q24 + inst->featureSpecFlat -= tmpU32 >> 14; // Q10 + return; + } + } + //ratio and inverse log: check for case of log(0) + zeros = WebRtcSpl_NormU32(avgSpectralFlatnessDen); + frac = (int16_t)(((avgSpectralFlatnessDen << zeros) & 0x7FFFFFFF) >> 23); + // log2(avgSpectralFlatnessDen) + assert(frac < 256); + tmp32 = (int32_t)(((31 - zeros) << 8) + WebRtcNsx_kLogTableFrac[frac]); // Q8 + logCurSpectralFlatness = (int32_t)avgSpectralFlatnessNum; + logCurSpectralFlatness += ((int32_t)(inst->stages - 1) << (inst->stages + 7)); // Q(8+stages-1) + logCurSpectralFlatness -= (tmp32 << (inst->stages - 1)); + logCurSpectralFlatness <<= (10 - inst->stages); // Q17 + tmp32 = (int32_t)(0x00020000 | (WEBRTC_SPL_ABS_W32(logCurSpectralFlatness) + & 0x0001FFFF)); //Q17 + intPart = 7 - (logCurSpectralFlatness >> 17); // Add 7 for output in Q10. + if (intPart > 0) { + currentSpectralFlatness = tmp32 >> intPart; + } else { + currentSpectralFlatness = tmp32 << -intPart; + } + + //time average update of spectral flatness feature + tmp32 = currentSpectralFlatness - (int32_t)inst->featureSpecFlat; // Q10 + tmp32 *= SPECT_FLAT_TAVG_Q14; // Q24 + inst->featureSpecFlat += tmp32 >> 14; // Q10 + // done with flatness feature +} + + +// Compute the difference measure between input spectrum and a template/learned noise spectrum +// magn_tmp is the input spectrum +// the reference/template spectrum is inst->magn_avg_pause[i] +// returns (normalized) spectral difference in inst->featureSpecDiff +void WebRtcNsx_ComputeSpectralDifference(NoiseSuppressionFixedC* inst, + uint16_t* magnIn) { + // This is to be calculated: + // avgDiffNormMagn = var(magnIn) - cov(magnIn, magnAvgPause)^2 / var(magnAvgPause) + + uint32_t tmpU32no1, tmpU32no2; + uint32_t varMagnUFX, varPauseUFX, avgDiffNormMagnUFX; + + int32_t tmp32no1, tmp32no2; + int32_t avgPauseFX, avgMagnFX, covMagnPauseFX; + int32_t maxPause, minPause; + + int16_t tmp16no1; + + size_t i; + int norm32, nShifts; + + avgPauseFX = 0; + maxPause = 0; + minPause = inst->avgMagnPause[0]; // Q(prevQMagn) + // compute average quantities + for (i = 0; i < inst->magnLen; i++) { + // Compute mean of magn_pause + avgPauseFX += inst->avgMagnPause[i]; // in Q(prevQMagn) + maxPause = WEBRTC_SPL_MAX(maxPause, inst->avgMagnPause[i]); + minPause = WEBRTC_SPL_MIN(minPause, inst->avgMagnPause[i]); + } + // normalize by replacing div of "inst->magnLen" with "inst->stages-1" shifts + avgPauseFX >>= inst->stages - 1; + avgMagnFX = inst->sumMagn >> (inst->stages - 1); + // Largest possible deviation in magnPause for (co)var calculations + tmp32no1 = WEBRTC_SPL_MAX(maxPause - avgPauseFX, avgPauseFX - minPause); + // Get number of shifts to make sure we don't get wrap around in varPause + nShifts = WEBRTC_SPL_MAX(0, 10 + inst->stages - WebRtcSpl_NormW32(tmp32no1)); + + varMagnUFX = 0; + varPauseUFX = 0; + covMagnPauseFX = 0; + for (i = 0; i < inst->magnLen; i++) { + // Compute var and cov of magn and magn_pause + tmp16no1 = (int16_t)((int32_t)magnIn[i] - avgMagnFX); + tmp32no2 = inst->avgMagnPause[i] - avgPauseFX; + varMagnUFX += (uint32_t)(tmp16no1 * tmp16no1); // Q(2*qMagn) + tmp32no1 = tmp32no2 * tmp16no1; // Q(prevQMagn+qMagn) + covMagnPauseFX += tmp32no1; // Q(prevQMagn+qMagn) + tmp32no1 = tmp32no2 >> nShifts; // Q(prevQMagn-minPause). + varPauseUFX += tmp32no1 * tmp32no1; // Q(2*(prevQMagn-minPause)) + } + //update of average magnitude spectrum: Q(-2*stages) and averaging replaced by shifts + inst->curAvgMagnEnergy += + inst->magnEnergy >> (2 * inst->normData + inst->stages - 1); + + avgDiffNormMagnUFX = varMagnUFX; // Q(2*qMagn) + if ((varPauseUFX) && (covMagnPauseFX)) { + tmpU32no1 = (uint32_t)WEBRTC_SPL_ABS_W32(covMagnPauseFX); // Q(prevQMagn+qMagn) + norm32 = WebRtcSpl_NormU32(tmpU32no1) - 16; + if (norm32 > 0) { + tmpU32no1 <<= norm32; // Q(prevQMagn+qMagn+norm32) + } else { + tmpU32no1 >>= -norm32; // Q(prevQMagn+qMagn+norm32) + } + tmpU32no2 = WEBRTC_SPL_UMUL(tmpU32no1, tmpU32no1); // Q(2*(prevQMagn+qMagn-norm32)) + + nShifts += norm32; + nShifts <<= 1; + if (nShifts < 0) { + varPauseUFX >>= (-nShifts); // Q(2*(qMagn+norm32+minPause)) + nShifts = 0; + } + if (varPauseUFX > 0) { + // Q(2*(qMagn+norm32-16+minPause)) + tmpU32no1 = tmpU32no2 / varPauseUFX; + tmpU32no1 >>= nShifts; + + // Q(2*qMagn) + avgDiffNormMagnUFX -= WEBRTC_SPL_MIN(avgDiffNormMagnUFX, tmpU32no1); + } else { + avgDiffNormMagnUFX = 0; + } + } + //normalize and compute time average update of difference feature + tmpU32no1 = avgDiffNormMagnUFX >> (2 * inst->normData); + if (inst->featureSpecDiff > tmpU32no1) { + tmpU32no2 = WEBRTC_SPL_UMUL_32_16(inst->featureSpecDiff - tmpU32no1, + SPECT_DIFF_TAVG_Q8); // Q(8-2*stages) + inst->featureSpecDiff -= tmpU32no2 >> 8; // Q(-2*stages) + } else { + tmpU32no2 = WEBRTC_SPL_UMUL_32_16(tmpU32no1 - inst->featureSpecDiff, + SPECT_DIFF_TAVG_Q8); // Q(8-2*stages) + inst->featureSpecDiff += tmpU32no2 >> 8; // Q(-2*stages) + } +} + +// Transform input (speechFrame) to frequency domain magnitude (magnU16) +void WebRtcNsx_DataAnalysis(NoiseSuppressionFixedC* inst, + short* speechFrame, + uint16_t* magnU16) { + uint32_t tmpU32no1; + + int32_t tmp_1_w32 = 0; + int32_t tmp_2_w32 = 0; + int32_t sum_log_magn = 0; + int32_t sum_log_i_log_magn = 0; + + uint16_t sum_log_magn_u16 = 0; + uint16_t tmp_u16 = 0; + + int16_t sum_log_i = 0; + int16_t sum_log_i_square = 0; + int16_t frac = 0; + int16_t log2 = 0; + int16_t matrix_determinant = 0; + int16_t maxWinData; + + size_t i, j; + int zeros; + int net_norm = 0; + int right_shifts_in_magnU16 = 0; + int right_shifts_in_initMagnEst = 0; + + int16_t winData_buff[ANAL_BLOCKL_MAX * 2 + 16]; + int16_t realImag_buff[ANAL_BLOCKL_MAX * 2 + 16]; + + // Align the structures to 32-byte boundary for the FFT function. + int16_t* winData = (int16_t*) (((uintptr_t)winData_buff + 31) & ~31); + int16_t* realImag = (int16_t*) (((uintptr_t) realImag_buff + 31) & ~31); + + // Update analysis buffer for lower band, and window data before FFT. + WebRtcNsx_AnalysisUpdate(inst, winData, speechFrame); + + // Get input energy + inst->energyIn = + WebRtcSpl_Energy(winData, inst->anaLen, &inst->scaleEnergyIn); + + // Reset zero input flag + inst->zeroInputSignal = 0; + // Acquire norm for winData + maxWinData = WebRtcSpl_MaxAbsValueW16(winData, inst->anaLen); + inst->normData = WebRtcSpl_NormW16(maxWinData); + if (maxWinData == 0) { + // Treat zero input separately. + inst->zeroInputSignal = 1; + return; + } + + // Determine the net normalization in the frequency domain + net_norm = inst->stages - inst->normData; + // Track lowest normalization factor and use it to prevent wrap around in shifting + right_shifts_in_magnU16 = inst->normData - inst->minNorm; + right_shifts_in_initMagnEst = WEBRTC_SPL_MAX(-right_shifts_in_magnU16, 0); + inst->minNorm -= right_shifts_in_initMagnEst; + right_shifts_in_magnU16 = WEBRTC_SPL_MAX(right_shifts_in_magnU16, 0); + + // create realImag as winData interleaved with zeros (= imag. part), normalize it + WebRtcNsx_NormalizeRealBuffer(inst, winData, realImag); + + // FFT output will be in winData[]. + WebRtcSpl_RealForwardFFT(inst->real_fft, realImag, winData); + + inst->imag[0] = 0; // Q(normData-stages) + inst->imag[inst->anaLen2] = 0; + inst->real[0] = winData[0]; // Q(normData-stages) + inst->real[inst->anaLen2] = winData[inst->anaLen]; + // Q(2*(normData-stages)) + inst->magnEnergy = (uint32_t)(inst->real[0] * inst->real[0]); + inst->magnEnergy += (uint32_t)(inst->real[inst->anaLen2] * + inst->real[inst->anaLen2]); + magnU16[0] = (uint16_t)WEBRTC_SPL_ABS_W16(inst->real[0]); // Q(normData-stages) + magnU16[inst->anaLen2] = (uint16_t)WEBRTC_SPL_ABS_W16(inst->real[inst->anaLen2]); + inst->sumMagn = (uint32_t)magnU16[0]; // Q(normData-stages) + inst->sumMagn += (uint32_t)magnU16[inst->anaLen2]; + + if (inst->blockIndex >= END_STARTUP_SHORT) { + for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) { + inst->real[i] = winData[j]; + inst->imag[i] = -winData[j + 1]; + // magnitude spectrum + // energy in Q(2*(normData-stages)) + tmpU32no1 = (uint32_t)(winData[j] * winData[j]); + tmpU32no1 += (uint32_t)(winData[j + 1] * winData[j + 1]); + inst->magnEnergy += tmpU32no1; // Q(2*(normData-stages)) + + magnU16[i] = (uint16_t)WebRtcSpl_SqrtFloor(tmpU32no1); // Q(normData-stages) + inst->sumMagn += (uint32_t)magnU16[i]; // Q(normData-stages) + } + } else { + // + // Gather information during startup for noise parameter estimation + // + + // Switch initMagnEst to Q(minNorm-stages) + inst->initMagnEst[0] >>= right_shifts_in_initMagnEst; + inst->initMagnEst[inst->anaLen2] >>= right_shifts_in_initMagnEst; + + // Update initMagnEst with magnU16 in Q(minNorm-stages). + inst->initMagnEst[0] += magnU16[0] >> right_shifts_in_magnU16; + inst->initMagnEst[inst->anaLen2] += + magnU16[inst->anaLen2] >> right_shifts_in_magnU16; + + log2 = 0; + if (magnU16[inst->anaLen2]) { + // Calculate log2(magnU16[inst->anaLen2]) + zeros = WebRtcSpl_NormU32((uint32_t)magnU16[inst->anaLen2]); + frac = (int16_t)((((uint32_t)magnU16[inst->anaLen2] << zeros) & + 0x7FFFFFFF) >> 23); // Q8 + // log2(magnU16(i)) in Q8 + assert(frac < 256); + log2 = (int16_t)(((31 - zeros) << 8) + WebRtcNsx_kLogTableFrac[frac]); + } + + sum_log_magn = (int32_t)log2; // Q8 + // sum_log_i_log_magn in Q17 + sum_log_i_log_magn = (kLogIndex[inst->anaLen2] * log2) >> 3; + + for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) { + inst->real[i] = winData[j]; + inst->imag[i] = -winData[j + 1]; + // magnitude spectrum + // energy in Q(2*(normData-stages)) + tmpU32no1 = (uint32_t)(winData[j] * winData[j]); + tmpU32no1 += (uint32_t)(winData[j + 1] * winData[j + 1]); + inst->magnEnergy += tmpU32no1; // Q(2*(normData-stages)) + + magnU16[i] = (uint16_t)WebRtcSpl_SqrtFloor(tmpU32no1); // Q(normData-stages) + inst->sumMagn += (uint32_t)magnU16[i]; // Q(normData-stages) + + // Switch initMagnEst to Q(minNorm-stages) + inst->initMagnEst[i] >>= right_shifts_in_initMagnEst; + + // Update initMagnEst with magnU16 in Q(minNorm-stages). + inst->initMagnEst[i] += magnU16[i] >> right_shifts_in_magnU16; + + if (i >= kStartBand) { + // For pink noise estimation. Collect data neglecting lower frequency band + log2 = 0; + if (magnU16[i]) { + zeros = WebRtcSpl_NormU32((uint32_t)magnU16[i]); + frac = (int16_t)((((uint32_t)magnU16[i] << zeros) & + 0x7FFFFFFF) >> 23); + // log2(magnU16(i)) in Q8 + assert(frac < 256); + log2 = (int16_t)(((31 - zeros) << 8) + + WebRtcNsx_kLogTableFrac[frac]); + } + sum_log_magn += (int32_t)log2; // Q8 + // sum_log_i_log_magn in Q17 + sum_log_i_log_magn += (kLogIndex[i] * log2) >> 3; + } + } + + // + //compute simplified noise model during startup + // + + // Estimate White noise + + // Switch whiteNoiseLevel to Q(minNorm-stages) + inst->whiteNoiseLevel >>= right_shifts_in_initMagnEst; + + // Update the average magnitude spectrum, used as noise estimate. + tmpU32no1 = WEBRTC_SPL_UMUL_32_16(inst->sumMagn, inst->overdrive); + tmpU32no1 >>= inst->stages + 8; + + // Replacing division above with 'stages' shifts + // Shift to same Q-domain as whiteNoiseLevel + tmpU32no1 >>= right_shifts_in_magnU16; + // This operation is safe from wrap around as long as END_STARTUP_SHORT < 128 + assert(END_STARTUP_SHORT < 128); + inst->whiteNoiseLevel += tmpU32no1; // Q(minNorm-stages) + + // Estimate Pink noise parameters + // Denominator used in both parameter estimates. + // The value is only dependent on the size of the frequency band (kStartBand) + // and to reduce computational complexity stored in a table (kDeterminantEstMatrix[]) + assert(kStartBand < 66); + matrix_determinant = kDeterminantEstMatrix[kStartBand]; // Q0 + sum_log_i = kSumLogIndex[kStartBand]; // Q5 + sum_log_i_square = kSumSquareLogIndex[kStartBand]; // Q2 + if (inst->fs == 8000) { + // Adjust values to shorter blocks in narrow band. + tmp_1_w32 = (int32_t)matrix_determinant; + tmp_1_w32 += (kSumLogIndex[65] * sum_log_i) >> 9; + tmp_1_w32 -= (kSumLogIndex[65] * kSumLogIndex[65]) >> 10; + tmp_1_w32 -= (int32_t)sum_log_i_square << 4; + tmp_1_w32 -= ((inst->magnLen - kStartBand) * kSumSquareLogIndex[65]) >> 2; + matrix_determinant = (int16_t)tmp_1_w32; + sum_log_i -= kSumLogIndex[65]; // Q5 + sum_log_i_square -= kSumSquareLogIndex[65]; // Q2 + } + + // Necessary number of shifts to fit sum_log_magn in a word16 + zeros = 16 - WebRtcSpl_NormW32(sum_log_magn); + if (zeros < 0) { + zeros = 0; + } + tmp_1_w32 = sum_log_magn << 1; // Q9 + sum_log_magn_u16 = (uint16_t)(tmp_1_w32 >> zeros); // Q(9-zeros). + + // Calculate and update pinkNoiseNumerator. Result in Q11. + tmp_2_w32 = WEBRTC_SPL_MUL_16_U16(sum_log_i_square, sum_log_magn_u16); // Q(11-zeros) + tmpU32no1 = sum_log_i_log_magn >> 12; // Q5 + + // Shift the largest value of sum_log_i and tmp32no3 before multiplication + tmp_u16 = ((uint16_t)sum_log_i << 1); // Q6 + if ((uint32_t)sum_log_i > tmpU32no1) { + tmp_u16 >>= zeros; + } else { + tmpU32no1 >>= zeros; + } + tmp_2_w32 -= (int32_t)WEBRTC_SPL_UMUL_32_16(tmpU32no1, tmp_u16); // Q(11-zeros) + matrix_determinant >>= zeros; // Q(-zeros) + tmp_2_w32 = WebRtcSpl_DivW32W16(tmp_2_w32, matrix_determinant); // Q11 + tmp_2_w32 += (int32_t)net_norm << 11; // Q11 + if (tmp_2_w32 < 0) { + tmp_2_w32 = 0; + } + inst->pinkNoiseNumerator += tmp_2_w32; // Q11 + + // Calculate and update pinkNoiseExp. Result in Q14. + tmp_2_w32 = WEBRTC_SPL_MUL_16_U16(sum_log_i, sum_log_magn_u16); // Q(14-zeros) + tmp_1_w32 = sum_log_i_log_magn >> (3 + zeros); + tmp_1_w32 *= inst->magnLen - kStartBand; + tmp_2_w32 -= tmp_1_w32; // Q(14-zeros) + if (tmp_2_w32 > 0) { + // If the exponential parameter is negative force it to zero, which means a + // flat spectrum. + tmp_1_w32 = WebRtcSpl_DivW32W16(tmp_2_w32, matrix_determinant); // Q14 + inst->pinkNoiseExp += WEBRTC_SPL_SAT(16384, tmp_1_w32, 0); // Q14 + } + } +} + +void WebRtcNsx_DataSynthesis(NoiseSuppressionFixedC* inst, short* outFrame) { + int32_t energyOut; + + int16_t realImag_buff[ANAL_BLOCKL_MAX * 2 + 16]; + int16_t rfft_out_buff[ANAL_BLOCKL_MAX * 2 + 16]; + + // Align the structures to 32-byte boundary for the FFT function. + int16_t* realImag = (int16_t*) (((uintptr_t)realImag_buff + 31) & ~31); + int16_t* rfft_out = (int16_t*) (((uintptr_t) rfft_out_buff + 31) & ~31); + + int16_t tmp16no1, tmp16no2; + int16_t energyRatio; + int16_t gainFactor, gainFactor1, gainFactor2; + + size_t i; + int outCIFFT; + int scaleEnergyOut = 0; + + if (inst->zeroInputSignal) { + // synthesize the special case of zero input + // read out fully processed segment + for (i = 0; i < inst->blockLen10ms; i++) { + outFrame[i] = inst->synthesisBuffer[i]; // Q0 + } + // update synthesis buffer + memcpy(inst->synthesisBuffer, inst->synthesisBuffer + inst->blockLen10ms, + (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->synthesisBuffer)); + WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer + inst->anaLen - inst->blockLen10ms, + inst->blockLen10ms); + return; + } + + // Filter the data in the frequency domain, and create spectrum. + WebRtcNsx_PrepareSpectrum(inst, realImag); + + // Inverse FFT output will be in rfft_out[]. + outCIFFT = WebRtcSpl_RealInverseFFT(inst->real_fft, realImag, rfft_out); + + WebRtcNsx_Denormalize(inst, rfft_out, outCIFFT); + + //scale factor: only do it after END_STARTUP_LONG time + gainFactor = 8192; // 8192 = Q13(1.0) + if (inst->gainMap == 1 && + inst->blockIndex > END_STARTUP_LONG && + inst->energyIn > 0) { + // Q(-scaleEnergyOut) + energyOut = WebRtcSpl_Energy(inst->real, inst->anaLen, &scaleEnergyOut); + if (scaleEnergyOut == 0 && !(energyOut & 0x7f800000)) { + energyOut = WEBRTC_SPL_SHIFT_W32(energyOut, 8 + scaleEnergyOut + - inst->scaleEnergyIn); + } else { + // |energyIn| is currently in Q(|scaleEnergyIn|), but to later on end up + // with an |energyRatio| in Q8 we need to change the Q-domain to + // Q(-8-scaleEnergyOut). + inst->energyIn >>= 8 + scaleEnergyOut - inst->scaleEnergyIn; + } + + assert(inst->energyIn > 0); + energyRatio = (energyOut + inst->energyIn / 2) / inst->energyIn; // Q8 + // Limit the ratio to [0, 1] in Q8, i.e., [0, 256] + energyRatio = WEBRTC_SPL_SAT(256, energyRatio, 0); + + // all done in lookup tables now + assert(energyRatio < 257); + gainFactor1 = kFactor1Table[energyRatio]; // Q8 + gainFactor2 = inst->factor2Table[energyRatio]; // Q8 + + //combine both scales with speech/noise prob: note prior (priorSpeechProb) is not frequency dependent + + // factor = inst->priorSpeechProb*factor1 + (1.0-inst->priorSpeechProb)*factor2; // original code + tmp16no1 = (int16_t)(((16384 - inst->priorNonSpeechProb) * gainFactor1) >> + 14); // in Q13, where 16384 = Q14(1.0) + tmp16no2 = (int16_t)((inst->priorNonSpeechProb * gainFactor2) >> 14); + gainFactor = tmp16no1 + tmp16no2; // Q13 + } // out of flag_gain_map==1 + + // Synthesis, read out fully processed segment, and update synthesis buffer. + WebRtcNsx_SynthesisUpdate(inst, outFrame, gainFactor); +} + +void WebRtcNsx_ProcessCore(NoiseSuppressionFixedC* inst, + const short* const* speechFrame, + int num_bands, + short* const* outFrame) { + // main routine for noise suppression + + uint32_t tmpU32no1, tmpU32no2, tmpU32no3; + uint32_t satMax, maxNoiseU32; + uint32_t tmpMagnU32, tmpNoiseU32; + uint32_t nearMagnEst; + uint32_t noiseUpdateU32; + uint32_t noiseU32[HALF_ANAL_BLOCKL]; + uint32_t postLocSnr[HALF_ANAL_BLOCKL]; + uint32_t priorLocSnr[HALF_ANAL_BLOCKL]; + uint32_t prevNearSnr[HALF_ANAL_BLOCKL]; + uint32_t curNearSnr; + uint32_t priorSnr; + uint32_t noise_estimate = 0; + uint32_t noise_estimate_avg = 0; + uint32_t numerator = 0; + + int32_t tmp32no1, tmp32no2; + int32_t pink_noise_num_avg = 0; + + uint16_t tmpU16no1; + uint16_t magnU16[HALF_ANAL_BLOCKL]; + uint16_t prevNoiseU16[HALF_ANAL_BLOCKL]; + uint16_t nonSpeechProbFinal[HALF_ANAL_BLOCKL]; + uint16_t gammaNoise, prevGammaNoise; + uint16_t noiseSupFilterTmp[HALF_ANAL_BLOCKL]; + + int16_t qMagn, qNoise; + int16_t avgProbSpeechHB, gainModHB, avgFilterGainHB, gainTimeDomainHB; + int16_t pink_noise_exp_avg = 0; + + size_t i, j; + int nShifts, postShifts; + int norm32no1, norm32no2; + int flag, sign; + int q_domain_to_use = 0; + + // Code for ARMv7-Neon platform assumes the following: + assert(inst->anaLen > 0); + assert(inst->anaLen2 > 0); + assert(inst->anaLen % 16 == 0); + assert(inst->anaLen2 % 8 == 0); + assert(inst->blockLen10ms > 0); + assert(inst->blockLen10ms % 16 == 0); + assert(inst->magnLen == inst->anaLen2 + 1); + +#ifdef NS_FILEDEBUG + if (fwrite(spframe, sizeof(short), + inst->blockLen10ms, inst->infile) != inst->blockLen10ms) { + assert(false); + } +#endif + + // Check that initialization has been done + assert(inst->initFlag == 1); + assert((num_bands - 1) <= NUM_HIGH_BANDS_MAX); + + const short* const* speechFrameHB = NULL; + short* const* outFrameHB = NULL; + size_t num_high_bands = 0; + if (num_bands > 1) { + speechFrameHB = &speechFrame[1]; + outFrameHB = &outFrame[1]; + num_high_bands = (size_t)(num_bands - 1); + } + + // Store speechFrame and transform to frequency domain + WebRtcNsx_DataAnalysis(inst, (short*)speechFrame[0], magnU16); + + if (inst->zeroInputSignal) { + WebRtcNsx_DataSynthesis(inst, outFrame[0]); + + if (num_bands > 1) { + // update analysis buffer for H band + // append new data to buffer FX + for (i = 0; i < num_high_bands; ++i) { + int block_shift = inst->anaLen - inst->blockLen10ms; + memcpy(inst->dataBufHBFX[i], inst->dataBufHBFX[i] + inst->blockLen10ms, + block_shift * sizeof(*inst->dataBufHBFX[i])); + memcpy(inst->dataBufHBFX[i] + block_shift, speechFrameHB[i], + inst->blockLen10ms * sizeof(*inst->dataBufHBFX[i])); + for (j = 0; j < inst->blockLen10ms; j++) { + outFrameHB[i][j] = inst->dataBufHBFX[i][j]; // Q0 + } + } + } // end of H band gain computation + return; + } + + // Update block index when we have something to process + inst->blockIndex++; + // + + // Norm of magn + qMagn = inst->normData - inst->stages; + + // Compute spectral flatness on input spectrum + WebRtcNsx_ComputeSpectralFlatness(inst, magnU16); + + // quantile noise estimate + WebRtcNsx_NoiseEstimation(inst, magnU16, noiseU32, &qNoise); + + //noise estimate from previous frame + for (i = 0; i < inst->magnLen; i++) { + prevNoiseU16[i] = (uint16_t)(inst->prevNoiseU32[i] >> 11); // Q(prevQNoise) + } + + if (inst->blockIndex < END_STARTUP_SHORT) { + // Noise Q-domain to be used later; see description at end of section. + q_domain_to_use = WEBRTC_SPL_MIN((int)qNoise, inst->minNorm - inst->stages); + + // Calculate frequency independent parts in parametric noise estimate and calculate + // the estimate for the lower frequency band (same values for all frequency bins) + if (inst->pinkNoiseExp) { + pink_noise_exp_avg = (int16_t)WebRtcSpl_DivW32W16(inst->pinkNoiseExp, + (int16_t)(inst->blockIndex + 1)); // Q14 + pink_noise_num_avg = WebRtcSpl_DivW32W16(inst->pinkNoiseNumerator, + (int16_t)(inst->blockIndex + 1)); // Q11 + WebRtcNsx_CalcParametricNoiseEstimate(inst, + pink_noise_exp_avg, + pink_noise_num_avg, + kStartBand, + &noise_estimate, + &noise_estimate_avg); + } else { + // Use white noise estimate if we have poor pink noise parameter estimates + noise_estimate = inst->whiteNoiseLevel; // Q(minNorm-stages) + noise_estimate_avg = noise_estimate / (inst->blockIndex + 1); // Q(minNorm-stages) + } + for (i = 0; i < inst->magnLen; i++) { + // Estimate the background noise using the pink noise parameters if permitted + if ((inst->pinkNoiseExp) && (i >= kStartBand)) { + // Reset noise_estimate + noise_estimate = 0; + noise_estimate_avg = 0; + // Calculate the parametric noise estimate for current frequency bin + WebRtcNsx_CalcParametricNoiseEstimate(inst, + pink_noise_exp_avg, + pink_noise_num_avg, + i, + &noise_estimate, + &noise_estimate_avg); + } + // Calculate parametric Wiener filter + noiseSupFilterTmp[i] = inst->denoiseBound; + if (inst->initMagnEst[i]) { + // numerator = (initMagnEst - noise_estimate * overdrive) + // Result in Q(8+minNorm-stages) + tmpU32no1 = WEBRTC_SPL_UMUL_32_16(noise_estimate, inst->overdrive); + numerator = inst->initMagnEst[i] << 8; + if (numerator > tmpU32no1) { + // Suppression filter coefficient larger than zero, so calculate. + numerator -= tmpU32no1; + + // Determine number of left shifts in numerator for best accuracy after + // division + nShifts = WebRtcSpl_NormU32(numerator); + nShifts = WEBRTC_SPL_SAT(6, nShifts, 0); + + // Shift numerator to Q(nShifts+8+minNorm-stages) + numerator <<= nShifts; + + // Shift denominator to Q(nShifts-6+minNorm-stages) + tmpU32no1 = inst->initMagnEst[i] >> (6 - nShifts); + if (tmpU32no1 == 0) { + // This is only possible if numerator = 0, in which case + // we don't need any division. + tmpU32no1 = 1; + } + tmpU32no2 = numerator / tmpU32no1; // Q14 + noiseSupFilterTmp[i] = (uint16_t)WEBRTC_SPL_SAT(16384, tmpU32no2, + (uint32_t)(inst->denoiseBound)); // Q14 + } + } + // Weight quantile noise 'noiseU32' with modeled noise 'noise_estimate_avg' + // 'noiseU32 is in Q(qNoise) and 'noise_estimate' in Q(minNorm-stages) + // To guarantee that we do not get wrap around when shifting to the same domain + // we use the lowest one. Furthermore, we need to save 6 bits for the weighting. + // 'noise_estimate_avg' can handle this operation by construction, but 'noiseU32' + // may not. + + // Shift 'noiseU32' to 'q_domain_to_use' + tmpU32no1 = noiseU32[i] >> (qNoise - q_domain_to_use); + // Shift 'noise_estimate_avg' to 'q_domain_to_use' + tmpU32no2 = noise_estimate_avg >> + (inst->minNorm - inst->stages - q_domain_to_use); + // Make a simple check to see if we have enough room for weighting 'tmpU32no1' + // without wrap around + nShifts = 0; + if (tmpU32no1 & 0xfc000000) { + tmpU32no1 >>= 6; + tmpU32no2 >>= 6; + nShifts = 6; + } + tmpU32no1 *= inst->blockIndex; + tmpU32no2 *= (END_STARTUP_SHORT - inst->blockIndex); + // Add them together and divide by startup length + noiseU32[i] = WebRtcSpl_DivU32U16(tmpU32no1 + tmpU32no2, END_STARTUP_SHORT); + // Shift back if necessary + noiseU32[i] <<= nShifts; + } + // Update new Q-domain for 'noiseU32' + qNoise = q_domain_to_use; + } + // compute average signal during END_STARTUP_LONG time: + // used to normalize spectral difference measure + if (inst->blockIndex < END_STARTUP_LONG) { + // substituting division with shift ending up in Q(-2*stages) + inst->timeAvgMagnEnergyTmp += + inst->magnEnergy >> (2 * inst->normData + inst->stages - 1); + inst->timeAvgMagnEnergy = WebRtcSpl_DivU32U16(inst->timeAvgMagnEnergyTmp, + inst->blockIndex + 1); + } + + //start processing at frames == converged+1 + // STEP 1: compute prior and post SNR based on quantile noise estimates + + // compute direct decision (DD) estimate of prior SNR: needed for new method + satMax = (uint32_t)1048575;// Largest possible value without getting overflow despite shifting 12 steps + postShifts = 6 + qMagn - qNoise; + nShifts = 5 - inst->prevQMagn + inst->prevQNoise; + for (i = 0; i < inst->magnLen; i++) { + // FLOAT: + // post SNR + // postLocSnr[i] = 0.0; + // if (magn[i] > noise[i]) + // { + // postLocSnr[i] = magn[i] / (noise[i] + 0.0001); + // } + // // previous post SNR + // // previous estimate: based on previous frame with gain filter (smooth is previous filter) + // + // prevNearSnr[i] = inst->prevMagnU16[i] / (inst->noisePrev[i] + 0.0001) * (inst->smooth[i]); + // + // // DD estimate is sum of two terms: current estimate and previous estimate + // // directed decision update of priorSnr (or we actually store [2*priorSnr+1]) + // + // priorLocSnr[i] = DD_PR_SNR * prevNearSnr[i] + (1.0 - DD_PR_SNR) * (postLocSnr[i] - 1.0); + + // calculate post SNR: output in Q11 + postLocSnr[i] = 2048; // 1.0 in Q11 + tmpU32no1 = (uint32_t)magnU16[i] << 6; // Q(6+qMagn) + if (postShifts < 0) { + tmpU32no2 = noiseU32[i] >> -postShifts; // Q(6+qMagn) + } else { + tmpU32no2 = noiseU32[i] << postShifts; // Q(6+qMagn) + } + if (tmpU32no1 > tmpU32no2) { + // Current magnitude larger than noise + tmpU32no1 <<= 11; // Q(17+qMagn) + if (tmpU32no2 > 0) { + tmpU32no1 /= tmpU32no2; // Q11 + postLocSnr[i] = WEBRTC_SPL_MIN(satMax, tmpU32no1); // Q11 + } else { + postLocSnr[i] = satMax; + } + } + + // calculate prevNearSnr[i] and save for later instead of recalculating it later + // |nearMagnEst| in Q(prevQMagn + 14) + nearMagnEst = inst->prevMagnU16[i] * inst->noiseSupFilter[i]; + tmpU32no1 = nearMagnEst << 3; // Q(prevQMagn+17) + tmpU32no2 = inst->prevNoiseU32[i] >> nShifts; // Q(prevQMagn+6) + + if (tmpU32no2 > 0) { + tmpU32no1 /= tmpU32no2; // Q11 + tmpU32no1 = WEBRTC_SPL_MIN(satMax, tmpU32no1); // Q11 + } else { + tmpU32no1 = satMax; // Q11 + } + prevNearSnr[i] = tmpU32no1; // Q11 + + //directed decision update of priorSnr + tmpU32no1 = WEBRTC_SPL_UMUL_32_16(prevNearSnr[i], DD_PR_SNR_Q11); // Q22 + tmpU32no2 = WEBRTC_SPL_UMUL_32_16(postLocSnr[i] - 2048, ONE_MINUS_DD_PR_SNR_Q11); // Q22 + priorSnr = tmpU32no1 + tmpU32no2 + 512; // Q22 (added 512 for rounding) + // priorLocSnr = 1 + 2*priorSnr + priorLocSnr[i] = 2048 + (priorSnr >> 10); // Q11 + } // end of loop over frequencies + // done with step 1: DD computation of prior and post SNR + + // STEP 2: compute speech/noise likelihood + + //compute difference of input spectrum with learned/estimated noise spectrum + WebRtcNsx_ComputeSpectralDifference(inst, magnU16); + //compute histograms for determination of parameters (thresholds and weights for features) + //parameters are extracted once every window time (=inst->modelUpdate) + //counter update + inst->cntThresUpdate++; + flag = (int)(inst->cntThresUpdate == inst->modelUpdate); + //update histogram + WebRtcNsx_FeatureParameterExtraction(inst, flag); + //compute model parameters + if (flag) { + inst->cntThresUpdate = 0; // Reset counter + //update every window: + // get normalization for spectral difference for next window estimate + + // Shift to Q(-2*stages) + inst->curAvgMagnEnergy >>= STAT_UPDATES; + + tmpU32no1 = (inst->curAvgMagnEnergy + inst->timeAvgMagnEnergy + 1) >> 1; //Q(-2*stages) + // Update featureSpecDiff + if ((tmpU32no1 != inst->timeAvgMagnEnergy) && (inst->featureSpecDiff) && + (inst->timeAvgMagnEnergy > 0)) { + norm32no1 = 0; + tmpU32no3 = tmpU32no1; + while (0xFFFF0000 & tmpU32no3) { + tmpU32no3 >>= 1; + norm32no1++; + } + tmpU32no2 = inst->featureSpecDiff; + while (0xFFFF0000 & tmpU32no2) { + tmpU32no2 >>= 1; + norm32no1++; + } + tmpU32no3 = WEBRTC_SPL_UMUL(tmpU32no3, tmpU32no2); + tmpU32no3 /= inst->timeAvgMagnEnergy; + if (WebRtcSpl_NormU32(tmpU32no3) < norm32no1) { + inst->featureSpecDiff = 0x007FFFFF; + } else { + inst->featureSpecDiff = WEBRTC_SPL_MIN(0x007FFFFF, + tmpU32no3 << norm32no1); + } + } + + inst->timeAvgMagnEnergy = tmpU32no1; // Q(-2*stages) + inst->curAvgMagnEnergy = 0; + } + + //compute speech/noise probability + WebRtcNsx_SpeechNoiseProb(inst, nonSpeechProbFinal, priorLocSnr, postLocSnr); + + //time-avg parameter for noise update + gammaNoise = NOISE_UPDATE_Q8; // Q8 + + maxNoiseU32 = 0; + postShifts = inst->prevQNoise - qMagn; + nShifts = inst->prevQMagn - qMagn; + for (i = 0; i < inst->magnLen; i++) { + // temporary noise update: use it for speech frames if update value is less than previous + // the formula has been rewritten into: + // noiseUpdate = noisePrev[i] + (1 - gammaNoise) * nonSpeechProb * (magn[i] - noisePrev[i]) + + if (postShifts < 0) { + tmpU32no2 = magnU16[i] >> -postShifts; // Q(prevQNoise) + } else { + tmpU32no2 = (uint32_t)magnU16[i] << postShifts; // Q(prevQNoise) + } + if (prevNoiseU16[i] > tmpU32no2) { + sign = -1; + tmpU32no1 = prevNoiseU16[i] - tmpU32no2; + } else { + sign = 1; + tmpU32no1 = tmpU32no2 - prevNoiseU16[i]; + } + noiseUpdateU32 = inst->prevNoiseU32[i]; // Q(prevQNoise+11) + tmpU32no3 = 0; + if ((tmpU32no1) && (nonSpeechProbFinal[i])) { + // This value will be used later, if gammaNoise changes + tmpU32no3 = WEBRTC_SPL_UMUL_32_16(tmpU32no1, nonSpeechProbFinal[i]); // Q(prevQNoise+8) + if (0x7c000000 & tmpU32no3) { + // Shifting required before multiplication + tmpU32no2 = (tmpU32no3 >> 5) * gammaNoise; // Q(prevQNoise+11) + } else { + // We can do shifting after multiplication + tmpU32no2 = (tmpU32no3 * gammaNoise) >> 5; // Q(prevQNoise+11) + } + if (sign > 0) { + noiseUpdateU32 += tmpU32no2; // Q(prevQNoise+11) + } else { + // This operation is safe. We can never get wrap around, since worst + // case scenario means magnU16 = 0 + noiseUpdateU32 -= tmpU32no2; // Q(prevQNoise+11) + } + } + + //increase gamma (i.e., less noise update) for frame likely to be speech + prevGammaNoise = gammaNoise; + gammaNoise = NOISE_UPDATE_Q8; + //time-constant based on speech/noise state + //increase gamma (i.e., less noise update) for frames likely to be speech + if (nonSpeechProbFinal[i] < ONE_MINUS_PROB_RANGE_Q8) { + gammaNoise = GAMMA_NOISE_TRANS_AND_SPEECH_Q8; + } + + if (prevGammaNoise != gammaNoise) { + // new noise update + // this line is the same as above, only that the result is stored in a different variable and the gammaNoise + // has changed + // + // noiseUpdate = noisePrev[i] + (1 - gammaNoise) * nonSpeechProb * (magn[i] - noisePrev[i]) + + if (0x7c000000 & tmpU32no3) { + // Shifting required before multiplication + tmpU32no2 = (tmpU32no3 >> 5) * gammaNoise; // Q(prevQNoise+11) + } else { + // We can do shifting after multiplication + tmpU32no2 = (tmpU32no3 * gammaNoise) >> 5; // Q(prevQNoise+11) + } + if (sign > 0) { + tmpU32no1 = inst->prevNoiseU32[i] + tmpU32no2; // Q(prevQNoise+11) + } else { + tmpU32no1 = inst->prevNoiseU32[i] - tmpU32no2; // Q(prevQNoise+11) + } + if (noiseUpdateU32 > tmpU32no1) { + noiseUpdateU32 = tmpU32no1; // Q(prevQNoise+11) + } + } + noiseU32[i] = noiseUpdateU32; // Q(prevQNoise+11) + if (noiseUpdateU32 > maxNoiseU32) { + maxNoiseU32 = noiseUpdateU32; + } + + // conservative noise update + // // original FLOAT code + // if (prob_speech < PROB_RANGE) { + // inst->avgMagnPause[i] = inst->avgMagnPause[i] + (1.0 - gamma_pause)*(magn[i] - inst->avgMagnPause[i]); + // } + + tmp32no2 = WEBRTC_SPL_SHIFT_W32(inst->avgMagnPause[i], -nShifts); + if (nonSpeechProbFinal[i] > ONE_MINUS_PROB_RANGE_Q8) { + if (nShifts < 0) { + tmp32no1 = (int32_t)magnU16[i] - tmp32no2; // Q(qMagn) + tmp32no1 *= ONE_MINUS_GAMMA_PAUSE_Q8; // Q(8+prevQMagn+nShifts) + tmp32no1 = (tmp32no1 + 128) >> 8; // Q(qMagn). + } else { + // In Q(qMagn+nShifts) + tmp32no1 = ((int32_t)magnU16[i] << nShifts) - inst->avgMagnPause[i]; + tmp32no1 *= ONE_MINUS_GAMMA_PAUSE_Q8; // Q(8+prevQMagn+nShifts) + tmp32no1 = (tmp32no1 + (128 << nShifts)) >> (8 + nShifts); // Q(qMagn). + } + tmp32no2 += tmp32no1; // Q(qMagn) + } + inst->avgMagnPause[i] = tmp32no2; + } // end of frequency loop + + norm32no1 = WebRtcSpl_NormU32(maxNoiseU32); + qNoise = inst->prevQNoise + norm32no1 - 5; + // done with step 2: noise update + + // STEP 3: compute dd update of prior snr and post snr based on new noise estimate + nShifts = inst->prevQNoise + 11 - qMagn; + for (i = 0; i < inst->magnLen; i++) { + // FLOAT code + // // post and prior SNR + // curNearSnr = 0.0; + // if (magn[i] > noise[i]) + // { + // curNearSnr = magn[i] / (noise[i] + 0.0001) - 1.0; + // } + // // DD estimate is sum of two terms: current estimate and previous estimate + // // directed decision update of snrPrior + // snrPrior = DD_PR_SNR * prevNearSnr[i] + (1.0 - DD_PR_SNR) * curNearSnr; + // // gain filter + // tmpFloat1 = inst->overdrive + snrPrior; + // tmpFloat2 = snrPrior / tmpFloat1; + // theFilter[i] = tmpFloat2; + + // calculate curNearSnr again, this is necessary because a new noise estimate has been made since then. for the original + curNearSnr = 0; // Q11 + if (nShifts < 0) { + // This case is equivalent with magn < noise which implies curNearSnr = 0; + tmpMagnU32 = (uint32_t)magnU16[i]; // Q(qMagn) + tmpNoiseU32 = noiseU32[i] << -nShifts; // Q(qMagn) + } else if (nShifts > 17) { + tmpMagnU32 = (uint32_t)magnU16[i] << 17; // Q(qMagn+17) + tmpNoiseU32 = noiseU32[i] >> (nShifts - 17); // Q(qMagn+17) + } else { + tmpMagnU32 = (uint32_t)magnU16[i] << nShifts; // Q(qNoise_prev+11) + tmpNoiseU32 = noiseU32[i]; // Q(qNoise_prev+11) + } + if (tmpMagnU32 > tmpNoiseU32) { + tmpU32no1 = tmpMagnU32 - tmpNoiseU32; // Q(qCur) + norm32no2 = WEBRTC_SPL_MIN(11, WebRtcSpl_NormU32(tmpU32no1)); + tmpU32no1 <<= norm32no2; // Q(qCur+norm32no2) + tmpU32no2 = tmpNoiseU32 >> (11 - norm32no2); // Q(qCur+norm32no2-11) + if (tmpU32no2 > 0) { + tmpU32no1 /= tmpU32no2; // Q11 + } + curNearSnr = WEBRTC_SPL_MIN(satMax, tmpU32no1); // Q11 + } + + //directed decision update of priorSnr + // FLOAT + // priorSnr = DD_PR_SNR * prevNearSnr + (1.0-DD_PR_SNR) * curNearSnr; + + tmpU32no1 = WEBRTC_SPL_UMUL_32_16(prevNearSnr[i], DD_PR_SNR_Q11); // Q22 + tmpU32no2 = WEBRTC_SPL_UMUL_32_16(curNearSnr, ONE_MINUS_DD_PR_SNR_Q11); // Q22 + priorSnr = tmpU32no1 + tmpU32no2; // Q22 + + //gain filter + tmpU32no1 = inst->overdrive + ((priorSnr + 8192) >> 14); // Q8 + assert(inst->overdrive > 0); + tmpU16no1 = (priorSnr + tmpU32no1 / 2) / tmpU32no1; // Q14 + inst->noiseSupFilter[i] = WEBRTC_SPL_SAT(16384, tmpU16no1, inst->denoiseBound); // 16384 = Q14(1.0) // Q14 + + // Weight in the parametric Wiener filter during startup + if (inst->blockIndex < END_STARTUP_SHORT) { + // Weight the two suppression filters + tmpU32no1 = inst->noiseSupFilter[i] * inst->blockIndex; + tmpU32no2 = noiseSupFilterTmp[i] * + (END_STARTUP_SHORT - inst->blockIndex); + tmpU32no1 += tmpU32no2; + inst->noiseSupFilter[i] = (uint16_t)WebRtcSpl_DivU32U16(tmpU32no1, + END_STARTUP_SHORT); + } + } // end of loop over frequencies + //done with step3 + + // save noise and magnitude spectrum for next frame + inst->prevQNoise = qNoise; + inst->prevQMagn = qMagn; + if (norm32no1 > 5) { + for (i = 0; i < inst->magnLen; i++) { + inst->prevNoiseU32[i] = noiseU32[i] << (norm32no1 - 5); // Q(qNoise+11) + inst->prevMagnU16[i] = magnU16[i]; // Q(qMagn) + } + } else { + for (i = 0; i < inst->magnLen; i++) { + inst->prevNoiseU32[i] = noiseU32[i] >> (5 - norm32no1); // Q(qNoise+11) + inst->prevMagnU16[i] = magnU16[i]; // Q(qMagn) + } + } + + WebRtcNsx_DataSynthesis(inst, outFrame[0]); +#ifdef NS_FILEDEBUG + if (fwrite(outframe, sizeof(short), + inst->blockLen10ms, inst->outfile) != inst->blockLen10ms) { + assert(false); + } +#endif + + //for H band: + // only update data buffer, then apply time-domain gain is applied derived from L band + if (num_bands > 1) { + // update analysis buffer for H band + // append new data to buffer FX + for (i = 0; i < num_high_bands; ++i) { + memcpy(inst->dataBufHBFX[i], inst->dataBufHBFX[i] + inst->blockLen10ms, + (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->dataBufHBFX[i])); + memcpy(inst->dataBufHBFX[i] + inst->anaLen - inst->blockLen10ms, + speechFrameHB[i], inst->blockLen10ms * sizeof(*inst->dataBufHBFX[i])); + } + // range for averaging low band quantities for H band gain + + gainTimeDomainHB = 16384; // 16384 = Q14(1.0) + //average speech prob from low band + //average filter gain from low band + //avg over second half (i.e., 4->8kHz) of freq. spectrum + tmpU32no1 = 0; // Q12 + tmpU16no1 = 0; // Q8 + for (i = inst->anaLen2 - (inst->anaLen2 >> 2); i < inst->anaLen2; i++) { + tmpU16no1 += nonSpeechProbFinal[i]; // Q8 + tmpU32no1 += (uint32_t)(inst->noiseSupFilter[i]); // Q14 + } + assert(inst->stages >= 7); + avgProbSpeechHB = (4096 - (tmpU16no1 >> (inst->stages - 7))); // Q12 + avgFilterGainHB = (int16_t)(tmpU32no1 >> (inst->stages - 3)); // Q14 + + // // original FLOAT code + // // gain based on speech probability: + // avg_prob_speech_tt=(float)2.0*avg_prob_speech-(float)1.0; + // gain_mod=(float)0.5*((float)1.0+(float)tanh(avg_prob_speech_tt)); // between 0 and 1 + + // gain based on speech probability: + // original expression: "0.5 * (1 + tanh(2x-1))" + // avgProbSpeechHB has been anyway saturated to a value between 0 and 1 so the other cases don't have to be dealt with + // avgProbSpeechHB and gainModHB are in Q12, 3607 = Q12(0.880615234375) which is a zero point of + // |0.5 * (1 + tanh(2x-1)) - x| - |0.5 * (1 + tanh(2x-1)) - 0.880615234375| meaning that from that point the error of approximating + // the expression with f(x) = x would be greater than the error of approximating the expression with f(x) = 0.880615234375 + // error: "|0.5 * (1 + tanh(2x-1)) - x| from x=0 to 0.880615234375" -> http://www.wolframalpha.com/input/?i=|0.5+*+(1+%2B+tanh(2x-1))+-+x|+from+x%3D0+to+0.880615234375 + // and: "|0.5 * (1 + tanh(2x-1)) - 0.880615234375| from x=0.880615234375 to 1" -> http://www.wolframalpha.com/input/?i=+|0.5+*+(1+%2B+tanh(2x-1))+-+0.880615234375|+from+x%3D0.880615234375+to+1 + gainModHB = WEBRTC_SPL_MIN(avgProbSpeechHB, 3607); + + // // original FLOAT code + // //combine gain with low band gain + // if (avg_prob_speech < (float)0.5) { + // gain_time_domain_HB=(float)0.5*gain_mod+(float)0.5*avg_filter_gain; + // } + // else { + // gain_time_domain_HB=(float)0.25*gain_mod+(float)0.75*avg_filter_gain; + // } + + + //combine gain with low band gain + if (avgProbSpeechHB < 2048) { + // 2048 = Q12(0.5) + // the next two lines in float are "gain_time_domain = 0.5 * gain_mod + 0.5 * avg_filter_gain"; Q2(0.5) = 2 equals one left shift + gainTimeDomainHB = (gainModHB << 1) + (avgFilterGainHB >> 1); // Q14 + } else { + // "gain_time_domain = 0.25 * gain_mod + 0.75 * agv_filter_gain;" + gainTimeDomainHB = (int16_t)((3 * avgFilterGainHB) >> 2); // 3 = Q2(0.75) + gainTimeDomainHB += gainModHB; // Q14 + } + //make sure gain is within flooring range + gainTimeDomainHB + = WEBRTC_SPL_SAT(16384, gainTimeDomainHB, (int16_t)(inst->denoiseBound)); // 16384 = Q14(1.0) + + + //apply gain + for (i = 0; i < num_high_bands; ++i) { + for (j = 0; j < inst->blockLen10ms; j++) { + outFrameHB[i][j] = (int16_t)((gainTimeDomainHB * + inst->dataBufHBFX[i][j]) >> 14); // Q0 + } + } + } // end of H band gain computation +} diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core.h b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core.h new file mode 100644 index 00000000..f463dbbe --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core.h @@ -0,0 +1,263 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_CORE_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_CORE_H_ + +#ifdef NS_FILEDEBUG +#include <stdio.h> +#endif + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/modules/audio_processing/ns/nsx_defines.h" +#include "webrtc/typedefs.h" + +typedef struct NoiseSuppressionFixedC_ { + uint32_t fs; + + const int16_t* window; + int16_t analysisBuffer[ANAL_BLOCKL_MAX]; + int16_t synthesisBuffer[ANAL_BLOCKL_MAX]; + uint16_t noiseSupFilter[HALF_ANAL_BLOCKL]; + uint16_t overdrive; /* Q8 */ + uint16_t denoiseBound; /* Q14 */ + const int16_t* factor2Table; + int16_t noiseEstLogQuantile[SIMULT* HALF_ANAL_BLOCKL]; + int16_t noiseEstDensity[SIMULT* HALF_ANAL_BLOCKL]; + int16_t noiseEstCounter[SIMULT]; + int16_t noiseEstQuantile[HALF_ANAL_BLOCKL]; + + size_t anaLen; + size_t anaLen2; + size_t magnLen; + int aggrMode; + int stages; + int initFlag; + int gainMap; + + int32_t maxLrt; + int32_t minLrt; + // Log LRT factor with time-smoothing in Q8. + int32_t logLrtTimeAvgW32[HALF_ANAL_BLOCKL]; + int32_t featureLogLrt; + int32_t thresholdLogLrt; + int16_t weightLogLrt; + + uint32_t featureSpecDiff; + uint32_t thresholdSpecDiff; + int16_t weightSpecDiff; + + uint32_t featureSpecFlat; + uint32_t thresholdSpecFlat; + int16_t weightSpecFlat; + + // Conservative estimate of noise spectrum. + int32_t avgMagnPause[HALF_ANAL_BLOCKL]; + uint32_t magnEnergy; + uint32_t sumMagn; + uint32_t curAvgMagnEnergy; + uint32_t timeAvgMagnEnergy; + uint32_t timeAvgMagnEnergyTmp; + + uint32_t whiteNoiseLevel; // Initial noise estimate. + // Initial magnitude spectrum estimate. + uint32_t initMagnEst[HALF_ANAL_BLOCKL]; + // Pink noise parameters: + int32_t pinkNoiseNumerator; // Numerator. + int32_t pinkNoiseExp; // Power of freq. + int minNorm; // Smallest normalization factor. + int zeroInputSignal; // Zero input signal flag. + + // Noise spectrum from previous frame. + uint32_t prevNoiseU32[HALF_ANAL_BLOCKL]; + // Magnitude spectrum from previous frame. + uint16_t prevMagnU16[HALF_ANAL_BLOCKL]; + // Prior speech/noise probability in Q14. + int16_t priorNonSpeechProb; + + int blockIndex; // Frame index counter. + // Parameter for updating or estimating thresholds/weights for prior model. + int modelUpdate; + int cntThresUpdate; + + // Histograms for parameter estimation. + int16_t histLrt[HIST_PAR_EST]; + int16_t histSpecFlat[HIST_PAR_EST]; + int16_t histSpecDiff[HIST_PAR_EST]; + + // Quantities for high band estimate. + int16_t dataBufHBFX[NUM_HIGH_BANDS_MAX][ANAL_BLOCKL_MAX]; + + int qNoise; + int prevQNoise; + int prevQMagn; + size_t blockLen10ms; + + int16_t real[ANAL_BLOCKL_MAX]; + int16_t imag[ANAL_BLOCKL_MAX]; + int32_t energyIn; + int scaleEnergyIn; + int normData; + + struct RealFFT* real_fft; +} NoiseSuppressionFixedC; + +#ifdef __cplusplus +extern "C" +{ +#endif + +/**************************************************************************** + * WebRtcNsx_InitCore(...) + * + * This function initializes a noise suppression instance + * + * Input: + * - inst : Instance that should be initialized + * - fs : Sampling frequency + * + * Output: + * - inst : Initialized instance + * + * Return value : 0 - Ok + * -1 - Error + */ +int32_t WebRtcNsx_InitCore(NoiseSuppressionFixedC* inst, uint32_t fs); + +/**************************************************************************** + * WebRtcNsx_set_policy_core(...) + * + * This changes the aggressiveness of the noise suppression method. + * + * Input: + * - inst : Instance that should be initialized + * - mode : 0: Mild (6 dB), 1: Medium (10 dB), 2: Aggressive (15 dB) + * + * Output: + * - inst : Initialized instance + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcNsx_set_policy_core(NoiseSuppressionFixedC* inst, int mode); + +/**************************************************************************** + * WebRtcNsx_ProcessCore + * + * Do noise suppression. + * + * Input: + * - inst : Instance that should be initialized + * - inFrame : Input speech frame for each band + * - num_bands : Number of bands + * + * Output: + * - inst : Updated instance + * - outFrame : Output speech frame for each band + */ +void WebRtcNsx_ProcessCore(NoiseSuppressionFixedC* inst, + const short* const* inFrame, + int num_bands, + short* const* outFrame); + +/**************************************************************************** + * Some function pointers, for internal functions shared by ARM NEON and + * generic C code. + */ +// Noise Estimation. +typedef void (*NoiseEstimation)(NoiseSuppressionFixedC* inst, + uint16_t* magn, + uint32_t* noise, + int16_t* q_noise); +extern NoiseEstimation WebRtcNsx_NoiseEstimation; + +// Filter the data in the frequency domain, and create spectrum. +typedef void (*PrepareSpectrum)(NoiseSuppressionFixedC* inst, + int16_t* freq_buff); +extern PrepareSpectrum WebRtcNsx_PrepareSpectrum; + +// For the noise supression process, synthesis, read out fully processed +// segment, and update synthesis buffer. +typedef void (*SynthesisUpdate)(NoiseSuppressionFixedC* inst, + int16_t* out_frame, + int16_t gain_factor); +extern SynthesisUpdate WebRtcNsx_SynthesisUpdate; + +// Update analysis buffer for lower band, and window data before FFT. +typedef void (*AnalysisUpdate)(NoiseSuppressionFixedC* inst, + int16_t* out, + int16_t* new_speech); +extern AnalysisUpdate WebRtcNsx_AnalysisUpdate; + +// Denormalize the real-valued signal |in|, the output from inverse FFT. +typedef void (*Denormalize)(NoiseSuppressionFixedC* inst, + int16_t* in, + int factor); +extern Denormalize WebRtcNsx_Denormalize; + +// Normalize the real-valued signal |in|, the input to forward FFT. +typedef void (*NormalizeRealBuffer)(NoiseSuppressionFixedC* inst, + const int16_t* in, + int16_t* out); +extern NormalizeRealBuffer WebRtcNsx_NormalizeRealBuffer; + +// Compute speech/noise probability. +// Intended to be private. +void WebRtcNsx_SpeechNoiseProb(NoiseSuppressionFixedC* inst, + uint16_t* nonSpeechProbFinal, + uint32_t* priorLocSnr, + uint32_t* postLocSnr); + +#if (defined WEBRTC_DETECT_NEON || defined WEBRTC_HAS_NEON) +// For the above function pointers, functions for generic platforms are declared +// and defined as static in file nsx_core.c, while those for ARM Neon platforms +// are declared below and defined in file nsx_core_neon.c. +void WebRtcNsx_NoiseEstimationNeon(NoiseSuppressionFixedC* inst, + uint16_t* magn, + uint32_t* noise, + int16_t* q_noise); +void WebRtcNsx_SynthesisUpdateNeon(NoiseSuppressionFixedC* inst, + int16_t* out_frame, + int16_t gain_factor); +void WebRtcNsx_AnalysisUpdateNeon(NoiseSuppressionFixedC* inst, + int16_t* out, + int16_t* new_speech); +void WebRtcNsx_PrepareSpectrumNeon(NoiseSuppressionFixedC* inst, + int16_t* freq_buff); +#endif + +#if defined(MIPS32_LE) +// For the above function pointers, functions for generic platforms are declared +// and defined as static in file nsx_core.c, while those for MIPS platforms +// are declared below and defined in file nsx_core_mips.c. +void WebRtcNsx_SynthesisUpdate_mips(NoiseSuppressionFixedC* inst, + int16_t* out_frame, + int16_t gain_factor); +void WebRtcNsx_AnalysisUpdate_mips(NoiseSuppressionFixedC* inst, + int16_t* out, + int16_t* new_speech); +void WebRtcNsx_PrepareSpectrum_mips(NoiseSuppressionFixedC* inst, + int16_t* freq_buff); +void WebRtcNsx_NormalizeRealBuffer_mips(NoiseSuppressionFixedC* inst, + const int16_t* in, + int16_t* out); +#if defined(MIPS_DSP_R1_LE) +void WebRtcNsx_Denormalize_mips(NoiseSuppressionFixedC* inst, + int16_t* in, + int factor); +#endif + +#endif + +#ifdef __cplusplus +} +#endif + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_CORE_H_ diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_c.c b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_c.c new file mode 100644 index 00000000..14322d38 --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_c.c @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <assert.h> + +#include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h" +#include "webrtc/modules/audio_processing/ns/nsx_core.h" +#include "webrtc/modules/audio_processing/ns/nsx_defines.h" + +static const int16_t kIndicatorTable[17] = { + 0, 2017, 3809, 5227, 6258, 6963, 7424, 7718, + 7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187 +}; + +// Compute speech/noise probability +// speech/noise probability is returned in: probSpeechFinal +//snrLocPrior is the prior SNR for each frequency (in Q11) +//snrLocPost is the post SNR for each frequency (in Q11) +void WebRtcNsx_SpeechNoiseProb(NoiseSuppressionFixedC* inst, + uint16_t* nonSpeechProbFinal, + uint32_t* priorLocSnr, + uint32_t* postLocSnr) { + uint32_t zeros, num, den, tmpU32no1, tmpU32no2, tmpU32no3; + int32_t invLrtFX, indPriorFX, tmp32, tmp32no1, tmp32no2, besselTmpFX32; + int32_t frac32, logTmp; + int32_t logLrtTimeAvgKsumFX; + int16_t indPriorFX16; + int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac, intPart; + size_t i; + int normTmp, normTmp2, nShifts; + + // compute feature based on average LR factor + // this is the average over all frequencies of the smooth log LRT + logLrtTimeAvgKsumFX = 0; + for (i = 0; i < inst->magnLen; i++) { + besselTmpFX32 = (int32_t)postLocSnr[i]; // Q11 + normTmp = WebRtcSpl_NormU32(postLocSnr[i]); + num = postLocSnr[i] << normTmp; // Q(11+normTmp) + if (normTmp > 10) { + den = priorLocSnr[i] << (normTmp - 11); // Q(normTmp) + } else { + den = priorLocSnr[i] >> (11 - normTmp); // Q(normTmp) + } + if (den > 0) { + besselTmpFX32 -= num / den; // Q11 + } else { + besselTmpFX32 = 0; + } + + // inst->logLrtTimeAvg[i] += LRT_TAVG * (besselTmp - log(snrLocPrior) + // - inst->logLrtTimeAvg[i]); + // Here, LRT_TAVG = 0.5 + zeros = WebRtcSpl_NormU32(priorLocSnr[i]); + frac32 = (int32_t)(((priorLocSnr[i] << zeros) & 0x7FFFFFFF) >> 19); + tmp32 = (frac32 * frac32 * -43) >> 19; + tmp32 += ((int16_t)frac32 * 5412) >> 12; + frac32 = tmp32 + 37; + // tmp32 = log2(priorLocSnr[i]) + tmp32 = (int32_t)(((31 - zeros) << 12) + frac32) - (11 << 12); // Q12 + logTmp = (tmp32 * 178) >> 8; // log2(priorLocSnr[i])*log(2) + // tmp32no1 = LRT_TAVG * (log(snrLocPrior) + inst->logLrtTimeAvg[i]) in Q12. + tmp32no1 = (logTmp + inst->logLrtTimeAvgW32[i]) / 2; + inst->logLrtTimeAvgW32[i] += (besselTmpFX32 - tmp32no1); // Q12 + + logLrtTimeAvgKsumFX += inst->logLrtTimeAvgW32[i]; // Q12 + } + inst->featureLogLrt = (logLrtTimeAvgKsumFX * BIN_SIZE_LRT) >> + (inst->stages + 11); + + // done with computation of LR factor + + // + //compute the indicator functions + // + + // average LRT feature + // FLOAT code + // indicator0 = 0.5 * (tanh(widthPrior * + // (logLrtTimeAvgKsum - threshPrior0)) + 1.0); + tmpIndFX = 16384; // Q14(1.0) + tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12 + nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5; + //use larger width in tanh map for pause regions + if (tmp32no1 < 0) { + tmpIndFX = 0; + tmp32no1 = -tmp32no1; + //widthPrior = widthPrior * 2.0; + nShifts++; + } + tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14 + // compute indicator function: sigmoid map + tableIndex = (int16_t)(tmp32no1 >> 14); + if ((tableIndex < 16) && (tableIndex >= 0)) { + tmp16no2 = kIndicatorTable[tableIndex]; + tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; + frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14 + tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14); + if (tmpIndFX == 0) { + tmpIndFX = 8192 - tmp16no2; // Q14 + } else { + tmpIndFX = 8192 + tmp16no2; // Q14 + } + } + indPriorFX = inst->weightLogLrt * tmpIndFX; // 6*Q14 + + //spectral flatness feature + if (inst->weightSpecFlat) { + tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10 + tmpIndFX = 16384; // Q14(1.0) + //use larger width in tanh map for pause regions + tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10 + nShifts = 4; + if (inst->thresholdSpecFlat < tmpU32no1) { + tmpIndFX = 0; + tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat; + //widthPrior = widthPrior * 2.0; + nShifts++; + } + tmpU32no1 = WebRtcSpl_DivU32U16(tmpU32no2 << nShifts, 25); // Q14 + // compute indicator function: sigmoid map + // FLOAT code + // indicator1 = 0.5 * (tanh(sgnMap * widthPrior * + // (threshPrior1 - tmpFloat1)) + 1.0); + tableIndex = (int16_t)(tmpU32no1 >> 14); + if (tableIndex < 16) { + tmp16no2 = kIndicatorTable[tableIndex]; + tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; + frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14 + tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14); + if (tmpIndFX) { + tmpIndFX = 8192 + tmp16no2; // Q14 + } else { + tmpIndFX = 8192 - tmp16no2; // Q14 + } + } + indPriorFX += inst->weightSpecFlat * tmpIndFX; // 6*Q14 + } + + //for template spectral-difference + if (inst->weightSpecDiff) { + tmpU32no1 = 0; + if (inst->featureSpecDiff) { + normTmp = WEBRTC_SPL_MIN(20 - inst->stages, + WebRtcSpl_NormU32(inst->featureSpecDiff)); + assert(normTmp >= 0); + tmpU32no1 = inst->featureSpecDiff << normTmp; // Q(normTmp-2*stages) + tmpU32no2 = inst->timeAvgMagnEnergy >> (20 - inst->stages - normTmp); + if (tmpU32no2 > 0) { + // Q(20 - inst->stages) + tmpU32no1 /= tmpU32no2; + } else { + tmpU32no1 = (uint32_t)(0x7fffffff); + } + } + tmpU32no3 = (inst->thresholdSpecDiff << 17) / 25; + tmpU32no2 = tmpU32no1 - tmpU32no3; + nShifts = 1; + tmpIndFX = 16384; // Q14(1.0) + //use larger width in tanh map for pause regions + if (tmpU32no2 & 0x80000000) { + tmpIndFX = 0; + tmpU32no2 = tmpU32no3 - tmpU32no1; + //widthPrior = widthPrior * 2.0; + nShifts--; + } + tmpU32no1 = tmpU32no2 >> nShifts; + // compute indicator function: sigmoid map + /* FLOAT code + indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0); + */ + tableIndex = (int16_t)(tmpU32no1 >> 14); + if (tableIndex < 16) { + tmp16no2 = kIndicatorTable[tableIndex]; + tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; + frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14 + tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + tmp16no1, frac, 14); + if (tmpIndFX) { + tmpIndFX = 8192 + tmp16no2; + } else { + tmpIndFX = 8192 - tmp16no2; + } + } + indPriorFX += inst->weightSpecDiff * tmpIndFX; // 6*Q14 + } + + //combine the indicator function with the feature weights + // FLOAT code + // indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 * + // indicator1 + weightIndPrior2 * indicator2); + indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14 + // done with computing indicator function + + //compute the prior probability + // FLOAT code + // inst->priorNonSpeechProb += PRIOR_UPDATE * + // (indPriorNonSpeech - inst->priorNonSpeechProb); + tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14 + inst->priorNonSpeechProb += (int16_t)((PRIOR_UPDATE_Q14 * tmp16) >> 14); + + //final speech probability: combine prior model with LR factor: + + memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen); + + if (inst->priorNonSpeechProb > 0) { + for (i = 0; i < inst->magnLen; i++) { + // FLOAT code + // invLrt = exp(inst->logLrtTimeAvg[i]); + // invLrt = inst->priorSpeechProb * invLrt; + // nonSpeechProbFinal[i] = (1.0 - inst->priorSpeechProb) / + // (1.0 - inst->priorSpeechProb + invLrt); + // invLrt = (1.0 - inst->priorNonSpeechProb) * invLrt; + // nonSpeechProbFinal[i] = inst->priorNonSpeechProb / + // (inst->priorNonSpeechProb + invLrt); + if (inst->logLrtTimeAvgW32[i] < 65300) { + tmp32no1 = (inst->logLrtTimeAvgW32[i] * 23637) >> 14; // Q12 + intPart = (int16_t)(tmp32no1 >> 12); + if (intPart < -8) { + intPart = -8; + } + frac = (int16_t)(tmp32no1 & 0x00000fff); // Q12 + + // Quadratic approximation of 2^frac + tmp32no2 = (frac * frac * 44) >> 19; // Q12. + tmp32no2 += (frac * 84) >> 7; // Q12 + invLrtFX = (1 << (8 + intPart)) + + WEBRTC_SPL_SHIFT_W32(tmp32no2, intPart - 4); // Q8 + + normTmp = WebRtcSpl_NormW32(invLrtFX); + normTmp2 = WebRtcSpl_NormW16((16384 - inst->priorNonSpeechProb)); + if (normTmp + normTmp2 >= 7) { + if (normTmp + normTmp2 < 15) { + invLrtFX >>= 15 - normTmp2 - normTmp; + // Q(normTmp+normTmp2-7) + tmp32no1 = invLrtFX * (16384 - inst->priorNonSpeechProb); + // Q(normTmp+normTmp2+7) + invLrtFX = WEBRTC_SPL_SHIFT_W32(tmp32no1, 7 - normTmp - normTmp2); + // Q14 + } else { + tmp32no1 = invLrtFX * (16384 - inst->priorNonSpeechProb); + // Q22 + invLrtFX = tmp32no1 >> 8; // Q14. + } + + tmp32no1 = (int32_t)inst->priorNonSpeechProb << 8; // Q22 + + nonSpeechProbFinal[i] = tmp32no1 / + (inst->priorNonSpeechProb + invLrtFX); // Q8 + } + } + } + } +} + diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_mips.c b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_mips.c new file mode 100644 index 00000000..d99be872 --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_mips.c @@ -0,0 +1,1002 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <assert.h> +#include <string.h> + +#include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h" +#include "webrtc/modules/audio_processing/ns/nsx_core.h" + +static const int16_t kIndicatorTable[17] = { + 0, 2017, 3809, 5227, 6258, 6963, 7424, 7718, + 7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187 +}; + +// Compute speech/noise probability +// speech/noise probability is returned in: probSpeechFinal +//snrLocPrior is the prior SNR for each frequency (in Q11) +//snrLocPost is the post SNR for each frequency (in Q11) +void WebRtcNsx_SpeechNoiseProb(NoiseSuppressionFixedC* inst, + uint16_t* nonSpeechProbFinal, + uint32_t* priorLocSnr, + uint32_t* postLocSnr) { + uint32_t tmpU32no1, tmpU32no2, tmpU32no3; + int32_t indPriorFX, tmp32no1; + int32_t logLrtTimeAvgKsumFX; + int16_t indPriorFX16; + int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac; + size_t i; + int normTmp, nShifts; + + int32_t r0, r1, r2, r3, r4, r5, r6, r7, r8, r9; + int32_t const_max = 0x7fffffff; + int32_t const_neg43 = -43; + int32_t const_5412 = 5412; + int32_t const_11rsh12 = (11 << 12); + int32_t const_178 = 178; + + + // compute feature based on average LR factor + // this is the average over all frequencies of the smooth log LRT + logLrtTimeAvgKsumFX = 0; + for (i = 0; i < inst->magnLen; i++) { + r0 = postLocSnr[i]; // Q11 + r1 = priorLocSnr[i]; + r2 = inst->logLrtTimeAvgW32[i]; + + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "clz %[r3], %[r0] \n\t" + "clz %[r5], %[r1] \n\t" + "slti %[r4], %[r3], 32 \n\t" + "slti %[r6], %[r5], 32 \n\t" + "movz %[r3], $0, %[r4] \n\t" + "movz %[r5], $0, %[r6] \n\t" + "slti %[r4], %[r3], 11 \n\t" + "addiu %[r6], %[r3], -11 \n\t" + "neg %[r7], %[r6] \n\t" + "sllv %[r6], %[r1], %[r6] \n\t" + "srav %[r7], %[r1], %[r7] \n\t" + "movn %[r6], %[r7], %[r4] \n\t" + "sllv %[r1], %[r1], %[r5] \n\t" + "and %[r1], %[r1], %[const_max] \n\t" + "sra %[r1], %[r1], 19 \n\t" + "mul %[r7], %[r1], %[r1] \n\t" + "sllv %[r3], %[r0], %[r3] \n\t" + "divu %[r8], %[r3], %[r6] \n\t" + "slti %[r6], %[r6], 1 \n\t" + "mul %[r7], %[r7], %[const_neg43] \n\t" + "sra %[r7], %[r7], 19 \n\t" + "movz %[r3], %[r8], %[r6] \n\t" + "subu %[r0], %[r0], %[r3] \n\t" + "movn %[r0], $0, %[r6] \n\t" + "mul %[r1], %[r1], %[const_5412] \n\t" + "sra %[r1], %[r1], 12 \n\t" + "addu %[r7], %[r7], %[r1] \n\t" + "addiu %[r1], %[r7], 37 \n\t" + "addiu %[r5], %[r5], -31 \n\t" + "neg %[r5], %[r5] \n\t" + "sll %[r5], %[r5], 12 \n\t" + "addu %[r5], %[r5], %[r1] \n\t" + "subu %[r7], %[r5], %[const_11rsh12] \n\t" + "mul %[r7], %[r7], %[const_178] \n\t" + "sra %[r7], %[r7], 8 \n\t" + "addu %[r7], %[r7], %[r2] \n\t" + "sra %[r7], %[r7], 1 \n\t" + "subu %[r2], %[r2], %[r7] \n\t" + "addu %[r2], %[r2], %[r0] \n\t" + ".set pop \n\t" + : [r0] "+r" (r0), [r1] "+r" (r1), [r2] "+r" (r2), + [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5), + [r6] "=&r" (r6), [r7] "=&r" (r7), [r8] "=&r" (r8) + : [const_max] "r" (const_max), [const_neg43] "r" (const_neg43), + [const_5412] "r" (const_5412), [const_11rsh12] "r" (const_11rsh12), + [const_178] "r" (const_178) + : "hi", "lo" + ); + inst->logLrtTimeAvgW32[i] = r2; + logLrtTimeAvgKsumFX += r2; + } + + inst->featureLogLrt = (logLrtTimeAvgKsumFX * BIN_SIZE_LRT) >> + (inst->stages + 11); + + // done with computation of LR factor + + // + // compute the indicator functions + // + + // average LRT feature + // FLOAT code + // indicator0 = 0.5 * (tanh(widthPrior * + // (logLrtTimeAvgKsum - threshPrior0)) + 1.0); + tmpIndFX = 16384; // Q14(1.0) + tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12 + nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5; + //use larger width in tanh map for pause regions + if (tmp32no1 < 0) { + tmpIndFX = 0; + tmp32no1 = -tmp32no1; + //widthPrior = widthPrior * 2.0; + nShifts++; + } + tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14 + // compute indicator function: sigmoid map + tableIndex = (int16_t)(tmp32no1 >> 14); + if ((tableIndex < 16) && (tableIndex >= 0)) { + tmp16no2 = kIndicatorTable[tableIndex]; + tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; + frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14 + tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14); + if (tmpIndFX == 0) { + tmpIndFX = 8192 - tmp16no2; // Q14 + } else { + tmpIndFX = 8192 + tmp16no2; // Q14 + } + } + indPriorFX = inst->weightLogLrt * tmpIndFX; // 6*Q14 + + //spectral flatness feature + if (inst->weightSpecFlat) { + tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10 + tmpIndFX = 16384; // Q14(1.0) + //use larger width in tanh map for pause regions + tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10 + nShifts = 4; + if (inst->thresholdSpecFlat < tmpU32no1) { + tmpIndFX = 0; + tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat; + //widthPrior = widthPrior * 2.0; + nShifts++; + } + tmpU32no1 = WebRtcSpl_DivU32U16(tmpU32no2 << nShifts, 25); //Q14 + // compute indicator function: sigmoid map + // FLOAT code + // indicator1 = 0.5 * (tanh(sgnMap * widthPrior * + // (threshPrior1 - tmpFloat1)) + 1.0); + tableIndex = (int16_t)(tmpU32no1 >> 14); + if (tableIndex < 16) { + tmp16no2 = kIndicatorTable[tableIndex]; + tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; + frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14 + tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14); + if (tmpIndFX) { + tmpIndFX = 8192 + tmp16no2; // Q14 + } else { + tmpIndFX = 8192 - tmp16no2; // Q14 + } + } + indPriorFX += inst->weightSpecFlat * tmpIndFX; // 6*Q14 + } + + //for template spectral-difference + if (inst->weightSpecDiff) { + tmpU32no1 = 0; + if (inst->featureSpecDiff) { + normTmp = WEBRTC_SPL_MIN(20 - inst->stages, + WebRtcSpl_NormU32(inst->featureSpecDiff)); + assert(normTmp >= 0); + tmpU32no1 = inst->featureSpecDiff << normTmp; // Q(normTmp-2*stages) + tmpU32no2 = inst->timeAvgMagnEnergy >> (20 - inst->stages - normTmp); + if (tmpU32no2 > 0) { + // Q(20 - inst->stages) + tmpU32no1 /= tmpU32no2; + } else { + tmpU32no1 = (uint32_t)(0x7fffffff); + } + } + tmpU32no3 = (inst->thresholdSpecDiff << 17) / 25; + tmpU32no2 = tmpU32no1 - tmpU32no3; + nShifts = 1; + tmpIndFX = 16384; // Q14(1.0) + //use larger width in tanh map for pause regions + if (tmpU32no2 & 0x80000000) { + tmpIndFX = 0; + tmpU32no2 = tmpU32no3 - tmpU32no1; + //widthPrior = widthPrior * 2.0; + nShifts--; + } + tmpU32no1 = tmpU32no2 >> nShifts; + // compute indicator function: sigmoid map + /* FLOAT code + indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0); + */ + tableIndex = (int16_t)(tmpU32no1 >> 14); + if (tableIndex < 16) { + tmp16no2 = kIndicatorTable[tableIndex]; + tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; + frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14 + tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + tmp16no1, frac, 14); + if (tmpIndFX) { + tmpIndFX = 8192 + tmp16no2; + } else { + tmpIndFX = 8192 - tmp16no2; + } + } + indPriorFX += inst->weightSpecDiff * tmpIndFX; // 6*Q14 + } + + //combine the indicator function with the feature weights + // FLOAT code + // indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 * + // indicator1 + weightIndPrior2 * indicator2); + indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14 + // done with computing indicator function + + //compute the prior probability + // FLOAT code + // inst->priorNonSpeechProb += PRIOR_UPDATE * + // (indPriorNonSpeech - inst->priorNonSpeechProb); + tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14 + inst->priorNonSpeechProb += (int16_t)((PRIOR_UPDATE_Q14 * tmp16) >> 14); + + //final speech probability: combine prior model with LR factor: + + memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen); + + if (inst->priorNonSpeechProb > 0) { + r0 = inst->priorNonSpeechProb; + r1 = 16384 - r0; + int32_t const_23637 = 23637; + int32_t const_44 = 44; + int32_t const_84 = 84; + int32_t const_1 = 1; + int32_t const_neg8 = -8; + for (i = 0; i < inst->magnLen; i++) { + r2 = inst->logLrtTimeAvgW32[i]; + if (r2 < 65300) { + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "mul %[r2], %[r2], %[const_23637] \n\t" + "sll %[r6], %[r1], 16 \n\t" + "clz %[r7], %[r6] \n\t" + "clo %[r8], %[r6] \n\t" + "slt %[r9], %[r6], $0 \n\t" + "movn %[r7], %[r8], %[r9] \n\t" + "sra %[r2], %[r2], 14 \n\t" + "andi %[r3], %[r2], 0xfff \n\t" + "mul %[r4], %[r3], %[r3] \n\t" + "mul %[r3], %[r3], %[const_84] \n\t" + "sra %[r2], %[r2], 12 \n\t" + "slt %[r5], %[r2], %[const_neg8] \n\t" + "movn %[r2], %[const_neg8], %[r5] \n\t" + "mul %[r4], %[r4], %[const_44] \n\t" + "sra %[r3], %[r3], 7 \n\t" + "addiu %[r7], %[r7], -1 \n\t" + "slti %[r9], %[r7], 31 \n\t" + "movz %[r7], $0, %[r9] \n\t" + "sra %[r4], %[r4], 19 \n\t" + "addu %[r4], %[r4], %[r3] \n\t" + "addiu %[r3], %[r2], 8 \n\t" + "addiu %[r2], %[r2], -4 \n\t" + "neg %[r5], %[r2] \n\t" + "sllv %[r6], %[r4], %[r2] \n\t" + "srav %[r5], %[r4], %[r5] \n\t" + "slt %[r2], %[r2], $0 \n\t" + "movn %[r6], %[r5], %[r2] \n\t" + "sllv %[r3], %[const_1], %[r3] \n\t" + "addu %[r2], %[r3], %[r6] \n\t" + "clz %[r4], %[r2] \n\t" + "clo %[r5], %[r2] \n\t" + "slt %[r8], %[r2], $0 \n\t" + "movn %[r4], %[r5], %[r8] \n\t" + "addiu %[r4], %[r4], -1 \n\t" + "slt %[r5], $0, %[r2] \n\t" + "or %[r5], %[r5], %[r7] \n\t" + "movz %[r4], $0, %[r5] \n\t" + "addiu %[r6], %[r7], -7 \n\t" + "addu %[r6], %[r6], %[r4] \n\t" + "bltz %[r6], 1f \n\t" + " nop \n\t" + "addiu %[r4], %[r6], -8 \n\t" + "neg %[r3], %[r4] \n\t" + "srav %[r5], %[r2], %[r3] \n\t" + "mul %[r5], %[r5], %[r1] \n\t" + "mul %[r2], %[r2], %[r1] \n\t" + "slt %[r4], %[r4], $0 \n\t" + "srav %[r5], %[r5], %[r6] \n\t" + "sra %[r2], %[r2], 8 \n\t" + "movn %[r2], %[r5], %[r4] \n\t" + "sll %[r3], %[r0], 8 \n\t" + "addu %[r2], %[r0], %[r2] \n\t" + "divu %[r3], %[r3], %[r2] \n\t" + "1: \n\t" + ".set pop \n\t" + : [r2] "+r" (r2), [r3] "=&r" (r3), [r4] "=&r" (r4), + [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7), + [r8] "=&r" (r8), [r9] "=&r" (r9) + : [r0] "r" (r0), [r1] "r" (r1), [const_23637] "r" (const_23637), + [const_neg8] "r" (const_neg8), [const_84] "r" (const_84), + [const_1] "r" (const_1), [const_44] "r" (const_44) + : "hi", "lo" + ); + nonSpeechProbFinal[i] = r3; + } + } + } +} + +// Update analysis buffer for lower band, and window data before FFT. +void WebRtcNsx_AnalysisUpdate_mips(NoiseSuppressionFixedC* inst, + int16_t* out, + int16_t* new_speech) { + int iters, after; + int anaLen = (int)inst->anaLen; + int *window = (int*)inst->window; + int *anaBuf = (int*)inst->analysisBuffer; + int *outBuf = (int*)out; + int r0, r1, r2, r3, r4, r5, r6, r7; +#if defined(MIPS_DSP_R1_LE) + int r8; +#endif + + // For lower band update analysis buffer. + memcpy(inst->analysisBuffer, inst->analysisBuffer + inst->blockLen10ms, + (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->analysisBuffer)); + memcpy(inst->analysisBuffer + inst->anaLen - inst->blockLen10ms, new_speech, + inst->blockLen10ms * sizeof(*inst->analysisBuffer)); + + // Window data before FFT. +#if defined(MIPS_DSP_R1_LE) + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "sra %[iters], %[anaLen], 3 \n\t" + "1: \n\t" + "blez %[iters], 2f \n\t" + " nop \n\t" + "lw %[r0], 0(%[window]) \n\t" + "lw %[r1], 0(%[anaBuf]) \n\t" + "lw %[r2], 4(%[window]) \n\t" + "lw %[r3], 4(%[anaBuf]) \n\t" + "lw %[r4], 8(%[window]) \n\t" + "lw %[r5], 8(%[anaBuf]) \n\t" + "lw %[r6], 12(%[window]) \n\t" + "lw %[r7], 12(%[anaBuf]) \n\t" + "muleq_s.w.phl %[r8], %[r0], %[r1] \n\t" + "muleq_s.w.phr %[r0], %[r0], %[r1] \n\t" + "muleq_s.w.phl %[r1], %[r2], %[r3] \n\t" + "muleq_s.w.phr %[r2], %[r2], %[r3] \n\t" + "muleq_s.w.phl %[r3], %[r4], %[r5] \n\t" + "muleq_s.w.phr %[r4], %[r4], %[r5] \n\t" + "muleq_s.w.phl %[r5], %[r6], %[r7] \n\t" + "muleq_s.w.phr %[r6], %[r6], %[r7] \n\t" +#if defined(MIPS_DSP_R2_LE) + "precr_sra_r.ph.w %[r8], %[r0], 15 \n\t" + "precr_sra_r.ph.w %[r1], %[r2], 15 \n\t" + "precr_sra_r.ph.w %[r3], %[r4], 15 \n\t" + "precr_sra_r.ph.w %[r5], %[r6], 15 \n\t" + "sw %[r8], 0(%[outBuf]) \n\t" + "sw %[r1], 4(%[outBuf]) \n\t" + "sw %[r3], 8(%[outBuf]) \n\t" + "sw %[r5], 12(%[outBuf]) \n\t" +#else + "shra_r.w %[r8], %[r8], 15 \n\t" + "shra_r.w %[r0], %[r0], 15 \n\t" + "shra_r.w %[r1], %[r1], 15 \n\t" + "shra_r.w %[r2], %[r2], 15 \n\t" + "shra_r.w %[r3], %[r3], 15 \n\t" + "shra_r.w %[r4], %[r4], 15 \n\t" + "shra_r.w %[r5], %[r5], 15 \n\t" + "shra_r.w %[r6], %[r6], 15 \n\t" + "sll %[r0], %[r0], 16 \n\t" + "sll %[r2], %[r2], 16 \n\t" + "sll %[r4], %[r4], 16 \n\t" + "sll %[r6], %[r6], 16 \n\t" + "packrl.ph %[r0], %[r8], %[r0] \n\t" + "packrl.ph %[r2], %[r1], %[r2] \n\t" + "packrl.ph %[r4], %[r3], %[r4] \n\t" + "packrl.ph %[r6], %[r5], %[r6] \n\t" + "sw %[r0], 0(%[outBuf]) \n\t" + "sw %[r2], 4(%[outBuf]) \n\t" + "sw %[r4], 8(%[outBuf]) \n\t" + "sw %[r6], 12(%[outBuf]) \n\t" +#endif + "addiu %[window], %[window], 16 \n\t" + "addiu %[anaBuf], %[anaBuf], 16 \n\t" + "addiu %[outBuf], %[outBuf], 16 \n\t" + "b 1b \n\t" + " addiu %[iters], %[iters], -1 \n\t" + "2: \n\t" + "andi %[after], %[anaLen], 7 \n\t" + "3: \n\t" + "blez %[after], 4f \n\t" + " nop \n\t" + "lh %[r0], 0(%[window]) \n\t" + "lh %[r1], 0(%[anaBuf]) \n\t" + "mul %[r0], %[r0], %[r1] \n\t" + "addiu %[window], %[window], 2 \n\t" + "addiu %[anaBuf], %[anaBuf], 2 \n\t" + "addiu %[outBuf], %[outBuf], 2 \n\t" + "shra_r.w %[r0], %[r0], 14 \n\t" + "sh %[r0], -2(%[outBuf]) \n\t" + "b 3b \n\t" + " addiu %[after], %[after], -1 \n\t" + "4: \n\t" + ".set pop \n\t" + : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), + [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5), + [r6] "=&r" (r6), [r7] "=&r" (r7), [r8] "=&r" (r8), + [iters] "=&r" (iters), [after] "=&r" (after), + [window] "+r" (window),[anaBuf] "+r" (anaBuf), + [outBuf] "+r" (outBuf) + : [anaLen] "r" (anaLen) + : "memory", "hi", "lo" + ); +#else + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "sra %[iters], %[anaLen], 2 \n\t" + "1: \n\t" + "blez %[iters], 2f \n\t" + " nop \n\t" + "lh %[r0], 0(%[window]) \n\t" + "lh %[r1], 0(%[anaBuf]) \n\t" + "lh %[r2], 2(%[window]) \n\t" + "lh %[r3], 2(%[anaBuf]) \n\t" + "lh %[r4], 4(%[window]) \n\t" + "lh %[r5], 4(%[anaBuf]) \n\t" + "lh %[r6], 6(%[window]) \n\t" + "lh %[r7], 6(%[anaBuf]) \n\t" + "mul %[r0], %[r0], %[r1] \n\t" + "mul %[r2], %[r2], %[r3] \n\t" + "mul %[r4], %[r4], %[r5] \n\t" + "mul %[r6], %[r6], %[r7] \n\t" + "addiu %[window], %[window], 8 \n\t" + "addiu %[anaBuf], %[anaBuf], 8 \n\t" + "addiu %[r0], %[r0], 0x2000 \n\t" + "addiu %[r2], %[r2], 0x2000 \n\t" + "addiu %[r4], %[r4], 0x2000 \n\t" + "addiu %[r6], %[r6], 0x2000 \n\t" + "sra %[r0], %[r0], 14 \n\t" + "sra %[r2], %[r2], 14 \n\t" + "sra %[r4], %[r4], 14 \n\t" + "sra %[r6], %[r6], 14 \n\t" + "sh %[r0], 0(%[outBuf]) \n\t" + "sh %[r2], 2(%[outBuf]) \n\t" + "sh %[r4], 4(%[outBuf]) \n\t" + "sh %[r6], 6(%[outBuf]) \n\t" + "addiu %[outBuf], %[outBuf], 8 \n\t" + "b 1b \n\t" + " addiu %[iters], %[iters], -1 \n\t" + "2: \n\t" + "andi %[after], %[anaLen], 3 \n\t" + "3: \n\t" + "blez %[after], 4f \n\t" + " nop \n\t" + "lh %[r0], 0(%[window]) \n\t" + "lh %[r1], 0(%[anaBuf]) \n\t" + "mul %[r0], %[r0], %[r1] \n\t" + "addiu %[window], %[window], 2 \n\t" + "addiu %[anaBuf], %[anaBuf], 2 \n\t" + "addiu %[outBuf], %[outBuf], 2 \n\t" + "addiu %[r0], %[r0], 0x2000 \n\t" + "sra %[r0], %[r0], 14 \n\t" + "sh %[r0], -2(%[outBuf]) \n\t" + "b 3b \n\t" + " addiu %[after], %[after], -1 \n\t" + "4: \n\t" + ".set pop \n\t" + : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), + [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5), + [r6] "=&r" (r6), [r7] "=&r" (r7), [iters] "=&r" (iters), + [after] "=&r" (after), [window] "+r" (window), + [anaBuf] "+r" (anaBuf), [outBuf] "+r" (outBuf) + : [anaLen] "r" (anaLen) + : "memory", "hi", "lo" + ); +#endif +} + +// For the noise supression process, synthesis, read out fully processed +// segment, and update synthesis buffer. +void WebRtcNsx_SynthesisUpdate_mips(NoiseSuppressionFixedC* inst, + int16_t* out_frame, + int16_t gain_factor) { + int iters = (int)inst->blockLen10ms >> 2; + int after = inst->blockLen10ms & 3; + int r0, r1, r2, r3, r4, r5, r6, r7; + int16_t *window = (int16_t*)inst->window; + int16_t *real = inst->real; + int16_t *synthBuf = inst->synthesisBuffer; + int16_t *out = out_frame; + int sat_pos = 0x7fff; + int sat_neg = 0xffff8000; + int block10 = (int)inst->blockLen10ms; + int anaLen = (int)inst->anaLen; + + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "1: \n\t" + "blez %[iters], 2f \n\t" + " nop \n\t" + "lh %[r0], 0(%[window]) \n\t" + "lh %[r1], 0(%[real]) \n\t" + "lh %[r2], 2(%[window]) \n\t" + "lh %[r3], 2(%[real]) \n\t" + "lh %[r4], 4(%[window]) \n\t" + "lh %[r5], 4(%[real]) \n\t" + "lh %[r6], 6(%[window]) \n\t" + "lh %[r7], 6(%[real]) \n\t" + "mul %[r0], %[r0], %[r1] \n\t" + "mul %[r2], %[r2], %[r3] \n\t" + "mul %[r4], %[r4], %[r5] \n\t" + "mul %[r6], %[r6], %[r7] \n\t" + "addiu %[r0], %[r0], 0x2000 \n\t" + "addiu %[r2], %[r2], 0x2000 \n\t" + "addiu %[r4], %[r4], 0x2000 \n\t" + "addiu %[r6], %[r6], 0x2000 \n\t" + "sra %[r0], %[r0], 14 \n\t" + "sra %[r2], %[r2], 14 \n\t" + "sra %[r4], %[r4], 14 \n\t" + "sra %[r6], %[r6], 14 \n\t" + "mul %[r0], %[r0], %[gain_factor] \n\t" + "mul %[r2], %[r2], %[gain_factor] \n\t" + "mul %[r4], %[r4], %[gain_factor] \n\t" + "mul %[r6], %[r6], %[gain_factor] \n\t" + "addiu %[r0], %[r0], 0x1000 \n\t" + "addiu %[r2], %[r2], 0x1000 \n\t" + "addiu %[r4], %[r4], 0x1000 \n\t" + "addiu %[r6], %[r6], 0x1000 \n\t" + "sra %[r0], %[r0], 13 \n\t" + "sra %[r2], %[r2], 13 \n\t" + "sra %[r4], %[r4], 13 \n\t" + "sra %[r6], %[r6], 13 \n\t" + "slt %[r1], %[r0], %[sat_pos] \n\t" + "slt %[r3], %[r2], %[sat_pos] \n\t" + "slt %[r5], %[r4], %[sat_pos] \n\t" + "slt %[r7], %[r6], %[sat_pos] \n\t" + "movz %[r0], %[sat_pos], %[r1] \n\t" + "movz %[r2], %[sat_pos], %[r3] \n\t" + "movz %[r4], %[sat_pos], %[r5] \n\t" + "movz %[r6], %[sat_pos], %[r7] \n\t" + "lh %[r1], 0(%[synthBuf]) \n\t" + "lh %[r3], 2(%[synthBuf]) \n\t" + "lh %[r5], 4(%[synthBuf]) \n\t" + "lh %[r7], 6(%[synthBuf]) \n\t" + "addu %[r0], %[r0], %[r1] \n\t" + "addu %[r2], %[r2], %[r3] \n\t" + "addu %[r4], %[r4], %[r5] \n\t" + "addu %[r6], %[r6], %[r7] \n\t" + "slt %[r1], %[r0], %[sat_pos] \n\t" + "slt %[r3], %[r2], %[sat_pos] \n\t" + "slt %[r5], %[r4], %[sat_pos] \n\t" + "slt %[r7], %[r6], %[sat_pos] \n\t" + "movz %[r0], %[sat_pos], %[r1] \n\t" + "movz %[r2], %[sat_pos], %[r3] \n\t" + "movz %[r4], %[sat_pos], %[r5] \n\t" + "movz %[r6], %[sat_pos], %[r7] \n\t" + "slt %[r1], %[r0], %[sat_neg] \n\t" + "slt %[r3], %[r2], %[sat_neg] \n\t" + "slt %[r5], %[r4], %[sat_neg] \n\t" + "slt %[r7], %[r6], %[sat_neg] \n\t" + "movn %[r0], %[sat_neg], %[r1] \n\t" + "movn %[r2], %[sat_neg], %[r3] \n\t" + "movn %[r4], %[sat_neg], %[r5] \n\t" + "movn %[r6], %[sat_neg], %[r7] \n\t" + "sh %[r0], 0(%[synthBuf]) \n\t" + "sh %[r2], 2(%[synthBuf]) \n\t" + "sh %[r4], 4(%[synthBuf]) \n\t" + "sh %[r6], 6(%[synthBuf]) \n\t" + "sh %[r0], 0(%[out]) \n\t" + "sh %[r2], 2(%[out]) \n\t" + "sh %[r4], 4(%[out]) \n\t" + "sh %[r6], 6(%[out]) \n\t" + "addiu %[window], %[window], 8 \n\t" + "addiu %[real], %[real], 8 \n\t" + "addiu %[synthBuf],%[synthBuf], 8 \n\t" + "addiu %[out], %[out], 8 \n\t" + "b 1b \n\t" + " addiu %[iters], %[iters], -1 \n\t" + "2: \n\t" + "blez %[after], 3f \n\t" + " subu %[block10], %[anaLen], %[block10] \n\t" + "lh %[r0], 0(%[window]) \n\t" + "lh %[r1], 0(%[real]) \n\t" + "mul %[r0], %[r0], %[r1] \n\t" + "addiu %[window], %[window], 2 \n\t" + "addiu %[real], %[real], 2 \n\t" + "addiu %[r0], %[r0], 0x2000 \n\t" + "sra %[r0], %[r0], 14 \n\t" + "mul %[r0], %[r0], %[gain_factor] \n\t" + "addiu %[r0], %[r0], 0x1000 \n\t" + "sra %[r0], %[r0], 13 \n\t" + "slt %[r1], %[r0], %[sat_pos] \n\t" + "movz %[r0], %[sat_pos], %[r1] \n\t" + "lh %[r1], 0(%[synthBuf]) \n\t" + "addu %[r0], %[r0], %[r1] \n\t" + "slt %[r1], %[r0], %[sat_pos] \n\t" + "movz %[r0], %[sat_pos], %[r1] \n\t" + "slt %[r1], %[r0], %[sat_neg] \n\t" + "movn %[r0], %[sat_neg], %[r1] \n\t" + "sh %[r0], 0(%[synthBuf]) \n\t" + "sh %[r0], 0(%[out]) \n\t" + "addiu %[synthBuf],%[synthBuf], 2 \n\t" + "addiu %[out], %[out], 2 \n\t" + "b 2b \n\t" + " addiu %[after], %[after], -1 \n\t" + "3: \n\t" + "sra %[iters], %[block10], 2 \n\t" + "4: \n\t" + "blez %[iters], 5f \n\t" + " andi %[after], %[block10], 3 \n\t" + "lh %[r0], 0(%[window]) \n\t" + "lh %[r1], 0(%[real]) \n\t" + "lh %[r2], 2(%[window]) \n\t" + "lh %[r3], 2(%[real]) \n\t" + "lh %[r4], 4(%[window]) \n\t" + "lh %[r5], 4(%[real]) \n\t" + "lh %[r6], 6(%[window]) \n\t" + "lh %[r7], 6(%[real]) \n\t" + "mul %[r0], %[r0], %[r1] \n\t" + "mul %[r2], %[r2], %[r3] \n\t" + "mul %[r4], %[r4], %[r5] \n\t" + "mul %[r6], %[r6], %[r7] \n\t" + "addiu %[r0], %[r0], 0x2000 \n\t" + "addiu %[r2], %[r2], 0x2000 \n\t" + "addiu %[r4], %[r4], 0x2000 \n\t" + "addiu %[r6], %[r6], 0x2000 \n\t" + "sra %[r0], %[r0], 14 \n\t" + "sra %[r2], %[r2], 14 \n\t" + "sra %[r4], %[r4], 14 \n\t" + "sra %[r6], %[r6], 14 \n\t" + "mul %[r0], %[r0], %[gain_factor] \n\t" + "mul %[r2], %[r2], %[gain_factor] \n\t" + "mul %[r4], %[r4], %[gain_factor] \n\t" + "mul %[r6], %[r6], %[gain_factor] \n\t" + "addiu %[r0], %[r0], 0x1000 \n\t" + "addiu %[r2], %[r2], 0x1000 \n\t" + "addiu %[r4], %[r4], 0x1000 \n\t" + "addiu %[r6], %[r6], 0x1000 \n\t" + "sra %[r0], %[r0], 13 \n\t" + "sra %[r2], %[r2], 13 \n\t" + "sra %[r4], %[r4], 13 \n\t" + "sra %[r6], %[r6], 13 \n\t" + "slt %[r1], %[r0], %[sat_pos] \n\t" + "slt %[r3], %[r2], %[sat_pos] \n\t" + "slt %[r5], %[r4], %[sat_pos] \n\t" + "slt %[r7], %[r6], %[sat_pos] \n\t" + "movz %[r0], %[sat_pos], %[r1] \n\t" + "movz %[r2], %[sat_pos], %[r3] \n\t" + "movz %[r4], %[sat_pos], %[r5] \n\t" + "movz %[r6], %[sat_pos], %[r7] \n\t" + "lh %[r1], 0(%[synthBuf]) \n\t" + "lh %[r3], 2(%[synthBuf]) \n\t" + "lh %[r5], 4(%[synthBuf]) \n\t" + "lh %[r7], 6(%[synthBuf]) \n\t" + "addu %[r0], %[r0], %[r1] \n\t" + "addu %[r2], %[r2], %[r3] \n\t" + "addu %[r4], %[r4], %[r5] \n\t" + "addu %[r6], %[r6], %[r7] \n\t" + "slt %[r1], %[r0], %[sat_pos] \n\t" + "slt %[r3], %[r2], %[sat_pos] \n\t" + "slt %[r5], %[r4], %[sat_pos] \n\t" + "slt %[r7], %[r6], %[sat_pos] \n\t" + "movz %[r0], %[sat_pos], %[r1] \n\t" + "movz %[r2], %[sat_pos], %[r3] \n\t" + "movz %[r4], %[sat_pos], %[r5] \n\t" + "movz %[r6], %[sat_pos], %[r7] \n\t" + "slt %[r1], %[r0], %[sat_neg] \n\t" + "slt %[r3], %[r2], %[sat_neg] \n\t" + "slt %[r5], %[r4], %[sat_neg] \n\t" + "slt %[r7], %[r6], %[sat_neg] \n\t" + "movn %[r0], %[sat_neg], %[r1] \n\t" + "movn %[r2], %[sat_neg], %[r3] \n\t" + "movn %[r4], %[sat_neg], %[r5] \n\t" + "movn %[r6], %[sat_neg], %[r7] \n\t" + "sh %[r0], 0(%[synthBuf]) \n\t" + "sh %[r2], 2(%[synthBuf]) \n\t" + "sh %[r4], 4(%[synthBuf]) \n\t" + "sh %[r6], 6(%[synthBuf]) \n\t" + "addiu %[window], %[window], 8 \n\t" + "addiu %[real], %[real], 8 \n\t" + "addiu %[synthBuf],%[synthBuf], 8 \n\t" + "b 4b \n\t" + " addiu %[iters], %[iters], -1 \n\t" + "5: \n\t" + "blez %[after], 6f \n\t" + " nop \n\t" + "lh %[r0], 0(%[window]) \n\t" + "lh %[r1], 0(%[real]) \n\t" + "mul %[r0], %[r0], %[r1] \n\t" + "addiu %[window], %[window], 2 \n\t" + "addiu %[real], %[real], 2 \n\t" + "addiu %[r0], %[r0], 0x2000 \n\t" + "sra %[r0], %[r0], 14 \n\t" + "mul %[r0], %[r0], %[gain_factor] \n\t" + "addiu %[r0], %[r0], 0x1000 \n\t" + "sra %[r0], %[r0], 13 \n\t" + "slt %[r1], %[r0], %[sat_pos] \n\t" + "movz %[r0], %[sat_pos], %[r1] \n\t" + "lh %[r1], 0(%[synthBuf]) \n\t" + "addu %[r0], %[r0], %[r1] \n\t" + "slt %[r1], %[r0], %[sat_pos] \n\t" + "movz %[r0], %[sat_pos], %[r1] \n\t" + "slt %[r1], %[r0], %[sat_neg] \n\t" + "movn %[r0], %[sat_neg], %[r1] \n\t" + "sh %[r0], 0(%[synthBuf]) \n\t" + "addiu %[synthBuf],%[synthBuf], 2 \n\t" + "b 2b \n\t" + " addiu %[after], %[after], -1 \n\t" + "6: \n\t" + ".set pop \n\t" + : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), + [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5), + [r6] "=&r" (r6), [r7] "=&r" (r7), [iters] "+r" (iters), + [after] "+r" (after), [block10] "+r" (block10), + [window] "+r" (window), [real] "+r" (real), + [synthBuf] "+r" (synthBuf), [out] "+r" (out) + : [gain_factor] "r" (gain_factor), [sat_pos] "r" (sat_pos), + [sat_neg] "r" (sat_neg), [anaLen] "r" (anaLen) + : "memory", "hi", "lo" + ); + + // update synthesis buffer + memcpy(inst->synthesisBuffer, inst->synthesisBuffer + inst->blockLen10ms, + (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->synthesisBuffer)); + WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer + + inst->anaLen - inst->blockLen10ms, inst->blockLen10ms); +} + +// Filter the data in the frequency domain, and create spectrum. +void WebRtcNsx_PrepareSpectrum_mips(NoiseSuppressionFixedC* inst, + int16_t* freq_buf) { + uint16_t *noiseSupFilter = inst->noiseSupFilter; + int16_t *real = inst->real; + int16_t *imag = inst->imag; + int32_t loop_count = 2; + int16_t tmp_1, tmp_2, tmp_3, tmp_4, tmp_5, tmp_6; + int16_t tmp16 = (int16_t)(inst->anaLen << 1) - 4; + int16_t* freq_buf_f = freq_buf; + int16_t* freq_buf_s = &freq_buf[tmp16]; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + //first sample + "lh %[tmp_1], 0(%[noiseSupFilter]) \n\t" + "lh %[tmp_2], 0(%[real]) \n\t" + "lh %[tmp_3], 0(%[imag]) \n\t" + "mul %[tmp_2], %[tmp_2], %[tmp_1] \n\t" + "mul %[tmp_3], %[tmp_3], %[tmp_1] \n\t" + "sra %[tmp_2], %[tmp_2], 14 \n\t" + "sra %[tmp_3], %[tmp_3], 14 \n\t" + "sh %[tmp_2], 0(%[real]) \n\t" + "sh %[tmp_3], 0(%[imag]) \n\t" + "negu %[tmp_3], %[tmp_3] \n\t" + "sh %[tmp_2], 0(%[freq_buf_f]) \n\t" + "sh %[tmp_3], 2(%[freq_buf_f]) \n\t" + "addiu %[real], %[real], 2 \n\t" + "addiu %[imag], %[imag], 2 \n\t" + "addiu %[noiseSupFilter], %[noiseSupFilter], 2 \n\t" + "addiu %[freq_buf_f], %[freq_buf_f], 4 \n\t" + "1: \n\t" + "lh %[tmp_1], 0(%[noiseSupFilter]) \n\t" + "lh %[tmp_2], 0(%[real]) \n\t" + "lh %[tmp_3], 0(%[imag]) \n\t" + "lh %[tmp_4], 2(%[noiseSupFilter]) \n\t" + "lh %[tmp_5], 2(%[real]) \n\t" + "lh %[tmp_6], 2(%[imag]) \n\t" + "mul %[tmp_2], %[tmp_2], %[tmp_1] \n\t" + "mul %[tmp_3], %[tmp_3], %[tmp_1] \n\t" + "mul %[tmp_5], %[tmp_5], %[tmp_4] \n\t" + "mul %[tmp_6], %[tmp_6], %[tmp_4] \n\t" + "addiu %[loop_count], %[loop_count], 2 \n\t" + "sra %[tmp_2], %[tmp_2], 14 \n\t" + "sra %[tmp_3], %[tmp_3], 14 \n\t" + "sra %[tmp_5], %[tmp_5], 14 \n\t" + "sra %[tmp_6], %[tmp_6], 14 \n\t" + "addiu %[noiseSupFilter], %[noiseSupFilter], 4 \n\t" + "sh %[tmp_2], 0(%[real]) \n\t" + "sh %[tmp_2], 4(%[freq_buf_s]) \n\t" + "sh %[tmp_3], 0(%[imag]) \n\t" + "sh %[tmp_3], 6(%[freq_buf_s]) \n\t" + "negu %[tmp_3], %[tmp_3] \n\t" + "sh %[tmp_5], 2(%[real]) \n\t" + "sh %[tmp_5], 0(%[freq_buf_s]) \n\t" + "sh %[tmp_6], 2(%[imag]) \n\t" + "sh %[tmp_6], 2(%[freq_buf_s]) \n\t" + "negu %[tmp_6], %[tmp_6] \n\t" + "addiu %[freq_buf_s], %[freq_buf_s], -8 \n\t" + "addiu %[real], %[real], 4 \n\t" + "addiu %[imag], %[imag], 4 \n\t" + "sh %[tmp_2], 0(%[freq_buf_f]) \n\t" + "sh %[tmp_3], 2(%[freq_buf_f]) \n\t" + "sh %[tmp_5], 4(%[freq_buf_f]) \n\t" + "sh %[tmp_6], 6(%[freq_buf_f]) \n\t" + "blt %[loop_count], %[loop_size], 1b \n\t" + " addiu %[freq_buf_f], %[freq_buf_f], 8 \n\t" + //last two samples: + "lh %[tmp_1], 0(%[noiseSupFilter]) \n\t" + "lh %[tmp_2], 0(%[real]) \n\t" + "lh %[tmp_3], 0(%[imag]) \n\t" + "lh %[tmp_4], 2(%[noiseSupFilter]) \n\t" + "lh %[tmp_5], 2(%[real]) \n\t" + "lh %[tmp_6], 2(%[imag]) \n\t" + "mul %[tmp_2], %[tmp_2], %[tmp_1] \n\t" + "mul %[tmp_3], %[tmp_3], %[tmp_1] \n\t" + "mul %[tmp_5], %[tmp_5], %[tmp_4] \n\t" + "mul %[tmp_6], %[tmp_6], %[tmp_4] \n\t" + "sra %[tmp_2], %[tmp_2], 14 \n\t" + "sra %[tmp_3], %[tmp_3], 14 \n\t" + "sra %[tmp_5], %[tmp_5], 14 \n\t" + "sra %[tmp_6], %[tmp_6], 14 \n\t" + "sh %[tmp_2], 0(%[real]) \n\t" + "sh %[tmp_2], 4(%[freq_buf_s]) \n\t" + "sh %[tmp_3], 0(%[imag]) \n\t" + "sh %[tmp_3], 6(%[freq_buf_s]) \n\t" + "negu %[tmp_3], %[tmp_3] \n\t" + "sh %[tmp_2], 0(%[freq_buf_f]) \n\t" + "sh %[tmp_3], 2(%[freq_buf_f]) \n\t" + "sh %[tmp_5], 4(%[freq_buf_f]) \n\t" + "sh %[tmp_6], 6(%[freq_buf_f]) \n\t" + "sh %[tmp_5], 2(%[real]) \n\t" + "sh %[tmp_6], 2(%[imag]) \n\t" + ".set pop \n\t" + : [real] "+r" (real), [imag] "+r" (imag), + [freq_buf_f] "+r" (freq_buf_f), [freq_buf_s] "+r" (freq_buf_s), + [loop_count] "+r" (loop_count), [noiseSupFilter] "+r" (noiseSupFilter), + [tmp_1] "=&r" (tmp_1), [tmp_2] "=&r" (tmp_2), [tmp_3] "=&r" (tmp_3), + [tmp_4] "=&r" (tmp_4), [tmp_5] "=&r" (tmp_5), [tmp_6] "=&r" (tmp_6) + : [loop_size] "r" (inst->anaLen2) + : "memory", "hi", "lo" + ); +} + +#if defined(MIPS_DSP_R1_LE) +// Denormalize the real-valued signal |in|, the output from inverse FFT. +void WebRtcNsx_Denormalize_mips(NoiseSuppressionFixedC* inst, + int16_t* in, + int factor) { + int32_t r0, r1, r2, r3, t0; + int len = (int)inst->anaLen; + int16_t *out = &inst->real[0]; + int shift = factor - inst->normData; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "beqz %[len], 8f \n\t" + " nop \n\t" + "bltz %[shift], 4f \n\t" + " sra %[t0], %[len], 2 \n\t" + "beqz %[t0], 2f \n\t" + " andi %[len], %[len], 3 \n\t" + "1: \n\t" + "lh %[r0], 0(%[in]) \n\t" + "lh %[r1], 2(%[in]) \n\t" + "lh %[r2], 4(%[in]) \n\t" + "lh %[r3], 6(%[in]) \n\t" + "shllv_s.ph %[r0], %[r0], %[shift] \n\t" + "shllv_s.ph %[r1], %[r1], %[shift] \n\t" + "shllv_s.ph %[r2], %[r2], %[shift] \n\t" + "shllv_s.ph %[r3], %[r3], %[shift] \n\t" + "addiu %[in], %[in], 8 \n\t" + "addiu %[t0], %[t0], -1 \n\t" + "sh %[r0], 0(%[out]) \n\t" + "sh %[r1], 2(%[out]) \n\t" + "sh %[r2], 4(%[out]) \n\t" + "sh %[r3], 6(%[out]) \n\t" + "bgtz %[t0], 1b \n\t" + " addiu %[out], %[out], 8 \n\t" + "2: \n\t" + "beqz %[len], 8f \n\t" + " nop \n\t" + "3: \n\t" + "lh %[r0], 0(%[in]) \n\t" + "addiu %[in], %[in], 2 \n\t" + "addiu %[len], %[len], -1 \n\t" + "shllv_s.ph %[r0], %[r0], %[shift] \n\t" + "addiu %[out], %[out], 2 \n\t" + "bgtz %[len], 3b \n\t" + " sh %[r0], -2(%[out]) \n\t" + "b 8f \n\t" + "4: \n\t" + "negu %[shift], %[shift] \n\t" + "beqz %[t0], 6f \n\t" + " andi %[len], %[len], 3 \n\t" + "5: \n\t" + "lh %[r0], 0(%[in]) \n\t" + "lh %[r1], 2(%[in]) \n\t" + "lh %[r2], 4(%[in]) \n\t" + "lh %[r3], 6(%[in]) \n\t" + "srav %[r0], %[r0], %[shift] \n\t" + "srav %[r1], %[r1], %[shift] \n\t" + "srav %[r2], %[r2], %[shift] \n\t" + "srav %[r3], %[r3], %[shift] \n\t" + "addiu %[in], %[in], 8 \n\t" + "addiu %[t0], %[t0], -1 \n\t" + "sh %[r0], 0(%[out]) \n\t" + "sh %[r1], 2(%[out]) \n\t" + "sh %[r2], 4(%[out]) \n\t" + "sh %[r3], 6(%[out]) \n\t" + "bgtz %[t0], 5b \n\t" + " addiu %[out], %[out], 8 \n\t" + "6: \n\t" + "beqz %[len], 8f \n\t" + " nop \n\t" + "7: \n\t" + "lh %[r0], 0(%[in]) \n\t" + "addiu %[in], %[in], 2 \n\t" + "addiu %[len], %[len], -1 \n\t" + "srav %[r0], %[r0], %[shift] \n\t" + "addiu %[out], %[out], 2 \n\t" + "bgtz %[len], 7b \n\t" + " sh %[r0], -2(%[out]) \n\t" + "8: \n\t" + ".set pop \n\t" + : [t0] "=&r" (t0), [r0] "=&r" (r0), [r1] "=&r" (r1), + [r2] "=&r" (r2), [r3] "=&r" (r3) + : [len] "r" (len), [shift] "r" (shift), [in] "r" (in), + [out] "r" (out) + : "memory" + ); +} +#endif + +// Normalize the real-valued signal |in|, the input to forward FFT. +void WebRtcNsx_NormalizeRealBuffer_mips(NoiseSuppressionFixedC* inst, + const int16_t* in, + int16_t* out) { + int32_t r0, r1, r2, r3, t0; + int len = (int)inst->anaLen; + int shift = inst->normData; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "beqz %[len], 4f \n\t" + " sra %[t0], %[len], 2 \n\t" + "beqz %[t0], 2f \n\t" + " andi %[len], %[len], 3 \n\t" + "1: \n\t" + "lh %[r0], 0(%[in]) \n\t" + "lh %[r1], 2(%[in]) \n\t" + "lh %[r2], 4(%[in]) \n\t" + "lh %[r3], 6(%[in]) \n\t" + "sllv %[r0], %[r0], %[shift] \n\t" + "sllv %[r1], %[r1], %[shift] \n\t" + "sllv %[r2], %[r2], %[shift] \n\t" + "sllv %[r3], %[r3], %[shift] \n\t" + "addiu %[in], %[in], 8 \n\t" + "addiu %[t0], %[t0], -1 \n\t" + "sh %[r0], 0(%[out]) \n\t" + "sh %[r1], 2(%[out]) \n\t" + "sh %[r2], 4(%[out]) \n\t" + "sh %[r3], 6(%[out]) \n\t" + "bgtz %[t0], 1b \n\t" + " addiu %[out], %[out], 8 \n\t" + "2: \n\t" + "beqz %[len], 4f \n\t" + " nop \n\t" + "3: \n\t" + "lh %[r0], 0(%[in]) \n\t" + "addiu %[in], %[in], 2 \n\t" + "addiu %[len], %[len], -1 \n\t" + "sllv %[r0], %[r0], %[shift] \n\t" + "addiu %[out], %[out], 2 \n\t" + "bgtz %[len], 3b \n\t" + " sh %[r0], -2(%[out]) \n\t" + "4: \n\t" + ".set pop \n\t" + : [t0] "=&r" (t0), [r0] "=&r" (r0), [r1] "=&r" (r1), + [r2] "=&r" (r2), [r3] "=&r" (r3) + : [len] "r" (len), [shift] "r" (shift), [in] "r" (in), + [out] "r" (out) + : "memory" + ); +} + diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_neon.c b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_neon.c new file mode 100644 index 00000000..65788ae2 --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_neon.c @@ -0,0 +1,598 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/ns/nsx_core.h" + +#include <arm_neon.h> +#include <assert.h> + +// Constants to compensate for shifting signal log(2^shifts). +const int16_t WebRtcNsx_kLogTable[9] = { + 0, 177, 355, 532, 710, 887, 1065, 1242, 1420 +}; + +const int16_t WebRtcNsx_kCounterDiv[201] = { + 32767, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3641, 3277, 2979, 2731, + 2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, 1489, 1425, 1365, 1311, + 1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964, 936, 910, 886, 862, 840, + 819, 799, 780, 762, 745, 728, 712, 697, 683, 669, 655, 643, 630, 618, 607, + 596, 585, 575, 565, 555, 546, 537, 529, 520, 512, 504, 496, 489, 482, 475, + 468, 462, 455, 449, 443, 437, 431, 426, 420, 415, 410, 405, 400, 395, 390, + 386, 381, 377, 372, 368, 364, 360, 356, 352, 349, 345, 341, 338, 334, 331, + 328, 324, 321, 318, 315, 312, 309, 306, 303, 301, 298, 295, 293, 290, 287, + 285, 282, 280, 278, 275, 273, 271, 269, 266, 264, 262, 260, 258, 256, 254, + 252, 250, 248, 246, 245, 243, 241, 239, 237, 236, 234, 232, 231, 229, 228, + 226, 224, 223, 221, 220, 218, 217, 216, 214, 213, 211, 210, 209, 207, 206, + 205, 204, 202, 201, 200, 199, 197, 196, 195, 194, 193, 192, 191, 189, 188, + 187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174, 173, + 172, 172, 171, 170, 169, 168, 167, 166, 165, 165, 164, 163 +}; + +const int16_t WebRtcNsx_kLogTableFrac[256] = { + 0, 1, 3, 4, 6, 7, 9, 10, 11, 13, 14, 16, 17, 18, 20, 21, + 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 36, 37, 38, 40, 41, 42, + 44, 45, 46, 47, 49, 50, 51, 52, 54, 55, 56, 57, 59, 60, 61, 62, + 63, 65, 66, 67, 68, 69, 71, 72, 73, 74, 75, 77, 78, 79, 80, 81, + 82, 84, 85, 86, 87, 88, 89, 90, 92, 93, 94, 95, 96, 97, 98, 99, + 100, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 116, + 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, + 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, + 147, 148, 149, 150, 151, 152, 153, 154, 155, 155, 156, 157, 158, 159, 160, + 161, 162, 163, 164, 165, 166, 167, 168, 169, 169, 170, 171, 172, 173, 174, + 175, 176, 177, 178, 178, 179, 180, 181, 182, 183, 184, 185, 185, 186, 187, + 188, 189, 190, 191, 192, 192, 193, 194, 195, 196, 197, 198, 198, 199, 200, + 201, 202, 203, 203, 204, 205, 206, 207, 208, 208, 209, 210, 211, 212, 212, + 213, 214, 215, 216, 216, 217, 218, 219, 220, 220, 221, 222, 223, 224, 224, + 225, 226, 227, 228, 228, 229, 230, 231, 231, 232, 233, 234, 234, 235, 236, + 237, 238, 238, 239, 240, 241, 241, 242, 243, 244, 244, 245, 246, 247, 247, + 248, 249, 249, 250, 251, 252, 252, 253, 254, 255, 255 +}; + +// Update the noise estimation information. +static void UpdateNoiseEstimateNeon(NoiseSuppressionFixedC* inst, int offset) { + const int16_t kExp2Const = 11819; // Q13 + int16_t* ptr_noiseEstLogQuantile = NULL; + int16_t* ptr_noiseEstQuantile = NULL; + int16x4_t kExp2Const16x4 = vdup_n_s16(kExp2Const); + int32x4_t twentyOne32x4 = vdupq_n_s32(21); + int32x4_t constA32x4 = vdupq_n_s32(0x1fffff); + int32x4_t constB32x4 = vdupq_n_s32(0x200000); + + int16_t tmp16 = WebRtcSpl_MaxValueW16(inst->noiseEstLogQuantile + offset, + inst->magnLen); + + // Guarantee a Q-domain as high as possible and still fit in int16 + inst->qNoise = 14 - (int) WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(kExp2Const, + tmp16, + 21); + + int32x4_t qNoise32x4 = vdupq_n_s32(inst->qNoise); + + for (ptr_noiseEstLogQuantile = &inst->noiseEstLogQuantile[offset], + ptr_noiseEstQuantile = &inst->noiseEstQuantile[0]; + ptr_noiseEstQuantile < &inst->noiseEstQuantile[inst->magnLen - 3]; + ptr_noiseEstQuantile += 4, ptr_noiseEstLogQuantile += 4) { + + // tmp32no2 = kExp2Const * inst->noiseEstLogQuantile[offset + i]; + int16x4_t v16x4 = vld1_s16(ptr_noiseEstLogQuantile); + int32x4_t v32x4B = vmull_s16(v16x4, kExp2Const16x4); + + // tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac + int32x4_t v32x4A = vandq_s32(v32x4B, constA32x4); + v32x4A = vorrq_s32(v32x4A, constB32x4); + + // tmp16 = (int16_t)(tmp32no2 >> 21); + v32x4B = vshrq_n_s32(v32x4B, 21); + + // tmp16 -= 21;// shift 21 to get result in Q0 + v32x4B = vsubq_s32(v32x4B, twentyOne32x4); + + // tmp16 += (int16_t) inst->qNoise; + // shift to get result in Q(qNoise) + v32x4B = vaddq_s32(v32x4B, qNoise32x4); + + // if (tmp16 < 0) { + // tmp32no1 >>= -tmp16; + // } else { + // tmp32no1 <<= tmp16; + // } + v32x4B = vshlq_s32(v32x4A, v32x4B); + + // tmp16 = WebRtcSpl_SatW32ToW16(tmp32no1); + v16x4 = vqmovn_s32(v32x4B); + + //inst->noiseEstQuantile[i] = tmp16; + vst1_s16(ptr_noiseEstQuantile, v16x4); + } + + // Last iteration: + + // inst->quantile[i]=exp(inst->lquantile[offset+i]); + // in Q21 + int32_t tmp32no2 = kExp2Const * *ptr_noiseEstLogQuantile; + int32_t tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac + + tmp16 = (int16_t)(tmp32no2 >> 21); + tmp16 -= 21;// shift 21 to get result in Q0 + tmp16 += (int16_t) inst->qNoise; //shift to get result in Q(qNoise) + if (tmp16 < 0) { + tmp32no1 >>= -tmp16; + } else { + tmp32no1 <<= tmp16; + } + *ptr_noiseEstQuantile = WebRtcSpl_SatW32ToW16(tmp32no1); +} + +// Noise Estimation +void WebRtcNsx_NoiseEstimationNeon(NoiseSuppressionFixedC* inst, + uint16_t* magn, + uint32_t* noise, + int16_t* q_noise) { + int16_t lmagn[HALF_ANAL_BLOCKL], counter, countDiv; + int16_t countProd, delta, zeros, frac; + int16_t log2, tabind, logval, tmp16, tmp16no1, tmp16no2; + const int16_t log2_const = 22713; + const int16_t width_factor = 21845; + + size_t i, s, offset; + + tabind = inst->stages - inst->normData; + assert(tabind < 9); + assert(tabind > -9); + if (tabind < 0) { + logval = -WebRtcNsx_kLogTable[-tabind]; + } else { + logval = WebRtcNsx_kLogTable[tabind]; + } + + int16x8_t logval_16x8 = vdupq_n_s16(logval); + + // lmagn(i)=log(magn(i))=log(2)*log2(magn(i)) + // magn is in Q(-stages), and the real lmagn values are: + // real_lmagn(i)=log(magn(i)*2^stages)=log(magn(i))+log(2^stages) + // lmagn in Q8 + for (i = 0; i < inst->magnLen; i++) { + if (magn[i]) { + zeros = WebRtcSpl_NormU32((uint32_t)magn[i]); + frac = (int16_t)((((uint32_t)magn[i] << zeros) + & 0x7FFFFFFF) >> 23); + assert(frac < 256); + // log2(magn(i)) + log2 = (int16_t)(((31 - zeros) << 8) + + WebRtcNsx_kLogTableFrac[frac]); + // log2(magn(i))*log(2) + lmagn[i] = (int16_t)((log2 * log2_const) >> 15); + // + log(2^stages) + lmagn[i] += logval; + } else { + lmagn[i] = logval; + } + } + + int16x4_t Q3_16x4 = vdup_n_s16(3); + int16x8_t WIDTHQ8_16x8 = vdupq_n_s16(WIDTH_Q8); + int16x8_t WIDTHFACTOR_16x8 = vdupq_n_s16(width_factor); + + int16_t factor = FACTOR_Q7; + if (inst->blockIndex < END_STARTUP_LONG) + factor = FACTOR_Q7_STARTUP; + + // Loop over simultaneous estimates + for (s = 0; s < SIMULT; s++) { + offset = s * inst->magnLen; + + // Get counter values from state + counter = inst->noiseEstCounter[s]; + assert(counter < 201); + countDiv = WebRtcNsx_kCounterDiv[counter]; + countProd = (int16_t)(counter * countDiv); + + // quant_est(...) + int16_t deltaBuff[8]; + int16x4_t tmp16x4_0; + int16x4_t tmp16x4_1; + int16x4_t countDiv_16x4 = vdup_n_s16(countDiv); + int16x8_t countProd_16x8 = vdupq_n_s16(countProd); + int16x8_t tmp16x8_0 = vdupq_n_s16(countDiv); + int16x8_t prod16x8 = vqrdmulhq_s16(WIDTHFACTOR_16x8, tmp16x8_0); + int16x8_t tmp16x8_1; + int16x8_t tmp16x8_2; + int16x8_t tmp16x8_3; + uint16x8_t tmp16x8_4; + int32x4_t tmp32x4; + + for (i = 0; i + 7 < inst->magnLen; i += 8) { + // Compute delta. + // Smaller step size during startup. This prevents from using + // unrealistic values causing overflow. + tmp16x8_0 = vdupq_n_s16(factor); + vst1q_s16(deltaBuff, tmp16x8_0); + + int j; + for (j = 0; j < 8; j++) { + if (inst->noiseEstDensity[offset + i + j] > 512) { + // Get values for deltaBuff by shifting intead of dividing. + int factor = WebRtcSpl_NormW16(inst->noiseEstDensity[offset + i + j]); + deltaBuff[j] = (int16_t)(FACTOR_Q16 >> (14 - factor)); + } + } + + // Update log quantile estimate + + // tmp16 = (int16_t)((delta * countDiv) >> 14); + tmp32x4 = vmull_s16(vld1_s16(&deltaBuff[0]), countDiv_16x4); + tmp16x4_1 = vshrn_n_s32(tmp32x4, 14); + tmp32x4 = vmull_s16(vld1_s16(&deltaBuff[4]), countDiv_16x4); + tmp16x4_0 = vshrn_n_s32(tmp32x4, 14); + tmp16x8_0 = vcombine_s16(tmp16x4_1, tmp16x4_0); // Keep for several lines. + + // prepare for the "if" branch + // tmp16 += 2; + // tmp16_1 = (Word16)(tmp16>>2); + tmp16x8_1 = vrshrq_n_s16(tmp16x8_0, 2); + + // inst->noiseEstLogQuantile[offset+i] + tmp16_1; + tmp16x8_2 = vld1q_s16(&inst->noiseEstLogQuantile[offset + i]); // Keep + tmp16x8_1 = vaddq_s16(tmp16x8_2, tmp16x8_1); // Keep for several lines + + // Prepare for the "else" branch + // tmp16 += 1; + // tmp16_1 = (Word16)(tmp16>>1); + tmp16x8_0 = vrshrq_n_s16(tmp16x8_0, 1); + + // tmp16_2 = (int16_t)((tmp16_1 * 3) >> 1); + tmp32x4 = vmull_s16(vget_low_s16(tmp16x8_0), Q3_16x4); + tmp16x4_1 = vshrn_n_s32(tmp32x4, 1); + + // tmp16_2 = (int16_t)((tmp16_1 * 3) >> 1); + tmp32x4 = vmull_s16(vget_high_s16(tmp16x8_0), Q3_16x4); + tmp16x4_0 = vshrn_n_s32(tmp32x4, 1); + + // inst->noiseEstLogQuantile[offset + i] - tmp16_2; + tmp16x8_0 = vcombine_s16(tmp16x4_1, tmp16x4_0); // keep + tmp16x8_0 = vsubq_s16(tmp16x8_2, tmp16x8_0); + + // logval is the smallest fixed point representation we can have. Values + // below that will correspond to values in the interval [0, 1], which + // can't possibly occur. + tmp16x8_0 = vmaxq_s16(tmp16x8_0, logval_16x8); + + // Do the if-else branches: + tmp16x8_3 = vld1q_s16(&lmagn[i]); // keep for several lines + tmp16x8_4 = vcgtq_s16(tmp16x8_3, tmp16x8_2); + tmp16x8_2 = vbslq_s16(tmp16x8_4, tmp16x8_1, tmp16x8_0); + vst1q_s16(&inst->noiseEstLogQuantile[offset + i], tmp16x8_2); + + // Update density estimate + // tmp16_1 + tmp16_2 + tmp16x8_1 = vld1q_s16(&inst->noiseEstDensity[offset + i]); + tmp16x8_0 = vqrdmulhq_s16(tmp16x8_1, countProd_16x8); + tmp16x8_0 = vaddq_s16(tmp16x8_0, prod16x8); + + // lmagn[i] - inst->noiseEstLogQuantile[offset + i] + tmp16x8_3 = vsubq_s16(tmp16x8_3, tmp16x8_2); + tmp16x8_3 = vabsq_s16(tmp16x8_3); + tmp16x8_4 = vcgtq_s16(WIDTHQ8_16x8, tmp16x8_3); + tmp16x8_1 = vbslq_s16(tmp16x8_4, tmp16x8_0, tmp16x8_1); + vst1q_s16(&inst->noiseEstDensity[offset + i], tmp16x8_1); + } // End loop over magnitude spectrum + + // Last iteration over magnitude spectrum: + // compute delta + if (inst->noiseEstDensity[offset + i] > 512) { + // Get values for deltaBuff by shifting intead of dividing. + int factor = WebRtcSpl_NormW16(inst->noiseEstDensity[offset + i]); + delta = (int16_t)(FACTOR_Q16 >> (14 - factor)); + } else { + delta = FACTOR_Q7; + if (inst->blockIndex < END_STARTUP_LONG) { + // Smaller step size during startup. This prevents from using + // unrealistic values causing overflow. + delta = FACTOR_Q7_STARTUP; + } + } + // update log quantile estimate + tmp16 = (int16_t)((delta * countDiv) >> 14); + if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) { + // +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2 + // CounterDiv=1/(inst->counter[s]+1) in Q15 + tmp16 += 2; + inst->noiseEstLogQuantile[offset + i] += tmp16 / 4; + } else { + tmp16 += 1; + // *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2 + // TODO(bjornv): investigate why we need to truncate twice. + tmp16no2 = (int16_t)((tmp16 / 2) * 3 / 2); + inst->noiseEstLogQuantile[offset + i] -= tmp16no2; + if (inst->noiseEstLogQuantile[offset + i] < logval) { + // logval is the smallest fixed point representation we can have. + // Values below that will correspond to values in the interval + // [0, 1], which can't possibly occur. + inst->noiseEstLogQuantile[offset + i] = logval; + } + } + + // update density estimate + if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i]) + < WIDTH_Q8) { + tmp16no1 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + inst->noiseEstDensity[offset + i], countProd, 15); + tmp16no2 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + width_factor, countDiv, 15); + inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2; + } + + + if (counter >= END_STARTUP_LONG) { + inst->noiseEstCounter[s] = 0; + if (inst->blockIndex >= END_STARTUP_LONG) { + UpdateNoiseEstimateNeon(inst, offset); + } + } + inst->noiseEstCounter[s]++; + + } // end loop over simultaneous estimates + + // Sequentially update the noise during startup + if (inst->blockIndex < END_STARTUP_LONG) { + UpdateNoiseEstimateNeon(inst, offset); + } + + for (i = 0; i < inst->magnLen; i++) { + noise[i] = (uint32_t)(inst->noiseEstQuantile[i]); // Q(qNoise) + } + (*q_noise) = (int16_t)inst->qNoise; +} + +// Filter the data in the frequency domain, and create spectrum. +void WebRtcNsx_PrepareSpectrumNeon(NoiseSuppressionFixedC* inst, + int16_t* freq_buf) { + assert(inst->magnLen % 8 == 1); + assert(inst->anaLen2 % 16 == 0); + + // (1) Filtering. + + // Fixed point C code for the next block is as follows: + // for (i = 0; i < inst->magnLen; i++) { + // inst->real[i] = (int16_t)((inst->real[i] * + // (int16_t)(inst->noiseSupFilter[i])) >> 14); // Q(normData-stages) + // inst->imag[i] = (int16_t)((inst->imag[i] * + // (int16_t)(inst->noiseSupFilter[i])) >> 14); // Q(normData-stages) + // } + + int16_t* preal = &inst->real[0]; + int16_t* pimag = &inst->imag[0]; + int16_t* pns_filter = (int16_t*)&inst->noiseSupFilter[0]; + int16_t* pimag_end = pimag + inst->magnLen - 4; + + while (pimag < pimag_end) { + int16x8_t real = vld1q_s16(preal); + int16x8_t imag = vld1q_s16(pimag); + int16x8_t ns_filter = vld1q_s16(pns_filter); + + int32x4_t tmp_r_0 = vmull_s16(vget_low_s16(real), vget_low_s16(ns_filter)); + int32x4_t tmp_i_0 = vmull_s16(vget_low_s16(imag), vget_low_s16(ns_filter)); + int32x4_t tmp_r_1 = vmull_s16(vget_high_s16(real), + vget_high_s16(ns_filter)); + int32x4_t tmp_i_1 = vmull_s16(vget_high_s16(imag), + vget_high_s16(ns_filter)); + + int16x4_t result_r_0 = vshrn_n_s32(tmp_r_0, 14); + int16x4_t result_i_0 = vshrn_n_s32(tmp_i_0, 14); + int16x4_t result_r_1 = vshrn_n_s32(tmp_r_1, 14); + int16x4_t result_i_1 = vshrn_n_s32(tmp_i_1, 14); + + vst1q_s16(preal, vcombine_s16(result_r_0, result_r_1)); + vst1q_s16(pimag, vcombine_s16(result_i_0, result_i_1)); + preal += 8; + pimag += 8; + pns_filter += 8; + } + + // Filter the last element + *preal = (int16_t)((*preal * *pns_filter) >> 14); + *pimag = (int16_t)((*pimag * *pns_filter) >> 14); + + // (2) Create spectrum. + + // Fixed point C code for the rest of the function is as follows: + // freq_buf[0] = inst->real[0]; + // freq_buf[1] = -inst->imag[0]; + // for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) { + // freq_buf[j] = inst->real[i]; + // freq_buf[j + 1] = -inst->imag[i]; + // } + // freq_buf[inst->anaLen] = inst->real[inst->anaLen2]; + // freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2]; + + preal = &inst->real[0]; + pimag = &inst->imag[0]; + pimag_end = pimag + inst->anaLen2; + int16_t * freq_buf_start = freq_buf; + while (pimag < pimag_end) { + // loop unroll + int16x8x2_t real_imag_0; + int16x8x2_t real_imag_1; + real_imag_0.val[1] = vld1q_s16(pimag); + real_imag_0.val[0] = vld1q_s16(preal); + preal += 8; + pimag += 8; + real_imag_1.val[1] = vld1q_s16(pimag); + real_imag_1.val[0] = vld1q_s16(preal); + preal += 8; + pimag += 8; + + real_imag_0.val[1] = vnegq_s16(real_imag_0.val[1]); + real_imag_1.val[1] = vnegq_s16(real_imag_1.val[1]); + vst2q_s16(freq_buf_start, real_imag_0); + freq_buf_start += 16; + vst2q_s16(freq_buf_start, real_imag_1); + freq_buf_start += 16; + } + freq_buf[inst->anaLen] = inst->real[inst->anaLen2]; + freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2]; +} + +// For the noise supress process, synthesis, read out fully processed segment, +// and update synthesis buffer. +void WebRtcNsx_SynthesisUpdateNeon(NoiseSuppressionFixedC* inst, + int16_t* out_frame, + int16_t gain_factor) { + assert(inst->anaLen % 16 == 0); + assert(inst->blockLen10ms % 16 == 0); + + int16_t* preal_start = inst->real; + const int16_t* pwindow = inst->window; + int16_t* preal_end = preal_start + inst->anaLen; + int16_t* psynthesis_buffer = inst->synthesisBuffer; + + while (preal_start < preal_end) { + // Loop unroll. + int16x8_t window_0 = vld1q_s16(pwindow); + int16x8_t real_0 = vld1q_s16(preal_start); + int16x8_t synthesis_buffer_0 = vld1q_s16(psynthesis_buffer); + + int16x8_t window_1 = vld1q_s16(pwindow + 8); + int16x8_t real_1 = vld1q_s16(preal_start + 8); + int16x8_t synthesis_buffer_1 = vld1q_s16(psynthesis_buffer + 8); + + int32x4_t tmp32a_0_low = vmull_s16(vget_low_s16(real_0), + vget_low_s16(window_0)); + int32x4_t tmp32a_0_high = vmull_s16(vget_high_s16(real_0), + vget_high_s16(window_0)); + + int32x4_t tmp32a_1_low = vmull_s16(vget_low_s16(real_1), + vget_low_s16(window_1)); + int32x4_t tmp32a_1_high = vmull_s16(vget_high_s16(real_1), + vget_high_s16(window_1)); + + int16x4_t tmp16a_0_low = vqrshrn_n_s32(tmp32a_0_low, 14); + int16x4_t tmp16a_0_high = vqrshrn_n_s32(tmp32a_0_high, 14); + + int16x4_t tmp16a_1_low = vqrshrn_n_s32(tmp32a_1_low, 14); + int16x4_t tmp16a_1_high = vqrshrn_n_s32(tmp32a_1_high, 14); + + int32x4_t tmp32b_0_low = vmull_n_s16(tmp16a_0_low, gain_factor); + int32x4_t tmp32b_0_high = vmull_n_s16(tmp16a_0_high, gain_factor); + + int32x4_t tmp32b_1_low = vmull_n_s16(tmp16a_1_low, gain_factor); + int32x4_t tmp32b_1_high = vmull_n_s16(tmp16a_1_high, gain_factor); + + int16x4_t tmp16b_0_low = vqrshrn_n_s32(tmp32b_0_low, 13); + int16x4_t tmp16b_0_high = vqrshrn_n_s32(tmp32b_0_high, 13); + + int16x4_t tmp16b_1_low = vqrshrn_n_s32(tmp32b_1_low, 13); + int16x4_t tmp16b_1_high = vqrshrn_n_s32(tmp32b_1_high, 13); + + synthesis_buffer_0 = vqaddq_s16(vcombine_s16(tmp16b_0_low, tmp16b_0_high), + synthesis_buffer_0); + synthesis_buffer_1 = vqaddq_s16(vcombine_s16(tmp16b_1_low, tmp16b_1_high), + synthesis_buffer_1); + vst1q_s16(psynthesis_buffer, synthesis_buffer_0); + vst1q_s16(psynthesis_buffer + 8, synthesis_buffer_1); + + pwindow += 16; + preal_start += 16; + psynthesis_buffer += 16; + } + + // Read out fully processed segment. + int16_t * p_start = inst->synthesisBuffer; + int16_t * p_end = inst->synthesisBuffer + inst->blockLen10ms; + int16_t * p_frame = out_frame; + while (p_start < p_end) { + int16x8_t frame_0 = vld1q_s16(p_start); + vst1q_s16(p_frame, frame_0); + p_start += 8; + p_frame += 8; + } + + // Update synthesis buffer. + int16_t* p_start_src = inst->synthesisBuffer + inst->blockLen10ms; + int16_t* p_end_src = inst->synthesisBuffer + inst->anaLen; + int16_t* p_start_dst = inst->synthesisBuffer; + while (p_start_src < p_end_src) { + int16x8_t frame = vld1q_s16(p_start_src); + vst1q_s16(p_start_dst, frame); + p_start_src += 8; + p_start_dst += 8; + } + + p_start = inst->synthesisBuffer + inst->anaLen - inst->blockLen10ms; + p_end = p_start + inst->blockLen10ms; + int16x8_t zero = vdupq_n_s16(0); + for (;p_start < p_end; p_start += 8) { + vst1q_s16(p_start, zero); + } +} + +// Update analysis buffer for lower band, and window data before FFT. +void WebRtcNsx_AnalysisUpdateNeon(NoiseSuppressionFixedC* inst, + int16_t* out, + int16_t* new_speech) { + assert(inst->blockLen10ms % 16 == 0); + assert(inst->anaLen % 16 == 0); + + // For lower band update analysis buffer. + // memcpy(inst->analysisBuffer, inst->analysisBuffer + inst->blockLen10ms, + // (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->analysisBuffer)); + int16_t* p_start_src = inst->analysisBuffer + inst->blockLen10ms; + int16_t* p_end_src = inst->analysisBuffer + inst->anaLen; + int16_t* p_start_dst = inst->analysisBuffer; + while (p_start_src < p_end_src) { + int16x8_t frame = vld1q_s16(p_start_src); + vst1q_s16(p_start_dst, frame); + + p_start_src += 8; + p_start_dst += 8; + } + + // memcpy(inst->analysisBuffer + inst->anaLen - inst->blockLen10ms, + // new_speech, inst->blockLen10ms * sizeof(*inst->analysisBuffer)); + p_start_src = new_speech; + p_end_src = new_speech + inst->blockLen10ms; + p_start_dst = inst->analysisBuffer + inst->anaLen - inst->blockLen10ms; + while (p_start_src < p_end_src) { + int16x8_t frame = vld1q_s16(p_start_src); + vst1q_s16(p_start_dst, frame); + + p_start_src += 8; + p_start_dst += 8; + } + + // Window data before FFT. + int16_t* p_start_window = (int16_t*) inst->window; + int16_t* p_start_buffer = inst->analysisBuffer; + int16_t* p_start_out = out; + const int16_t* p_end_out = out + inst->anaLen; + + // Load the first element to reduce pipeline bubble. + int16x8_t window = vld1q_s16(p_start_window); + int16x8_t buffer = vld1q_s16(p_start_buffer); + p_start_window += 8; + p_start_buffer += 8; + + while (p_start_out < p_end_out) { + // Unroll loop. + int32x4_t tmp32_low = vmull_s16(vget_low_s16(window), vget_low_s16(buffer)); + int32x4_t tmp32_high = vmull_s16(vget_high_s16(window), + vget_high_s16(buffer)); + window = vld1q_s16(p_start_window); + buffer = vld1q_s16(p_start_buffer); + + int16x4_t result_low = vrshrn_n_s32(tmp32_low, 14); + int16x4_t result_high = vrshrn_n_s32(tmp32_high, 14); + vst1q_s16(p_start_out, vcombine_s16(result_low, result_high)); + + p_start_buffer += 8; + p_start_window += 8; + p_start_out += 8; + } +} diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_defines.h b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_defines.h new file mode 100644 index 00000000..862dc3ca --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_defines.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_DEFINES_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_DEFINES_H_ + +#define ANAL_BLOCKL_MAX 256 /* Max analysis block length */ +#define HALF_ANAL_BLOCKL 129 /* Half max analysis block length + 1 */ +#define NUM_HIGH_BANDS_MAX 2 /* Max number of high bands */ +#define SIMULT 3 +#define END_STARTUP_LONG 200 +#define END_STARTUP_SHORT 50 +#define FACTOR_Q16 2621440 /* 40 in Q16 */ +#define FACTOR_Q7 5120 /* 40 in Q7 */ +#define FACTOR_Q7_STARTUP 1024 /* 8 in Q7 */ +#define WIDTH_Q8 3 /* 0.01 in Q8 (or 25 ) */ + +/* PARAMETERS FOR NEW METHOD */ +#define DD_PR_SNR_Q11 2007 /* ~= Q11(0.98) DD update of prior SNR */ +#define ONE_MINUS_DD_PR_SNR_Q11 41 /* DD update of prior SNR */ +#define SPECT_FLAT_TAVG_Q14 4915 /* (0.30) tavg parameter for spectral flatness measure */ +#define SPECT_DIFF_TAVG_Q8 77 /* (0.30) tavg parameter for spectral flatness measure */ +#define PRIOR_UPDATE_Q14 1638 /* Q14(0.1) Update parameter of prior model */ +#define NOISE_UPDATE_Q8 26 /* 26 ~= Q8(0.1) Update parameter for noise */ + +/* Probability threshold for noise state in speech/noise likelihood. */ +#define ONE_MINUS_PROB_RANGE_Q8 205 /* 205 ~= Q8(0.8) */ +#define HIST_PAR_EST 1000 /* Histogram size for estimation of parameters */ + +/* FEATURE EXTRACTION CONFIG */ +/* Bin size of histogram */ +#define BIN_SIZE_LRT 10 +/* Scale parameters: multiply dominant peaks of the histograms by scale factor to obtain. */ +/* Thresholds for prior model */ +#define FACTOR_1_LRT_DIFF 6 /* For LRT and spectral difference (5 times bigger) */ +/* For spectral_flatness: used when noise is flatter than speech (10 times bigger). */ +#define FACTOR_2_FLAT_Q10 922 +/* Peak limit for spectral flatness (varies between 0 and 1) */ +#define THRES_PEAK_FLAT 24 /* * 2 * BIN_SIZE_FLAT_FX */ +/* Limit on spacing of two highest peaks in histogram: spacing determined by bin size. */ +#define LIM_PEAK_SPACE_FLAT_DIFF 4 /* * 2 * BIN_SIZE_DIFF_FX */ +/* Limit on relevance of second peak */ +#define LIM_PEAK_WEIGHT_FLAT_DIFF 2 +#define THRES_FLUCT_LRT 10240 /* = 20 * inst->modelUpdate; fluctuation limit of LRT feat. */ +/* Limit on the max and min values for the feature thresholds */ +#define MAX_FLAT_Q10 38912 /* * 2 * BIN_SIZE_FLAT_FX */ +#define MIN_FLAT_Q10 4096 /* * 2 * BIN_SIZE_FLAT_FX */ +#define MAX_DIFF 100 /* * 2 * BIN_SIZE_DIFF_FX */ +#define MIN_DIFF 16 /* * 2 * BIN_SIZE_DIFF_FX */ +/* Criteria of weight of histogram peak to accept/reject feature */ +#define THRES_WEIGHT_FLAT_DIFF 154 /*(int)(0.3*(inst->modelUpdate)) for flatness and difference */ + +#define STAT_UPDATES 9 /* Update every 512 = 1 << 9 block */ +#define ONE_MINUS_GAMMA_PAUSE_Q8 13 /* ~= Q8(0.05) Update for conservative noise estimate */ +#define GAMMA_NOISE_TRANS_AND_SPEECH_Q8 3 /* ~= Q8(0.01) Update for transition and noise region */ + +#endif /* WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_DEFINES_H_ */ diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/windows_private.h b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/windows_private.h new file mode 100644 index 00000000..44c2e846 --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/windows_private.h @@ -0,0 +1,574 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_WINDOWS_PRIVATE_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_WINDOWS_PRIVATE_H_ + +// Hanning window for 4ms 16kHz +static const float kHanning64w128[128] = { + 0.00000000000000f, 0.02454122852291f, 0.04906767432742f, + 0.07356456359967f, 0.09801714032956f, 0.12241067519922f, + 0.14673047445536f, 0.17096188876030f, 0.19509032201613f, + 0.21910124015687f, 0.24298017990326f, 0.26671275747490f, + 0.29028467725446f, 0.31368174039889f, 0.33688985339222f, + 0.35989503653499f, 0.38268343236509f, 0.40524131400499f, + 0.42755509343028f, 0.44961132965461f, 0.47139673682600f, + 0.49289819222978f, 0.51410274419322f, 0.53499761988710f, + 0.55557023301960f, 0.57580819141785f, 0.59569930449243f, + 0.61523159058063f, 0.63439328416365f, 0.65317284295378f, + 0.67155895484702f, 0.68954054473707f, 0.70710678118655f, + 0.72424708295147f, 0.74095112535496f, 0.75720884650648f, + 0.77301045336274f, 0.78834642762661f, 0.80320753148064f, + 0.81758481315158f, 0.83146961230255f, 0.84485356524971f, + 0.85772861000027f, 0.87008699110871f, 0.88192126434835f, + 0.89322430119552f, 0.90398929312344f, 0.91420975570353f, + 0.92387953251129f, 0.93299279883474f, 0.94154406518302f, + 0.94952818059304f, 0.95694033573221f, 0.96377606579544f, + 0.97003125319454f, 0.97570213003853f, 0.98078528040323f, + 0.98527764238894f, 0.98917650996478f, 0.99247953459871f, + 0.99518472667220f, 0.99729045667869f, 0.99879545620517f, + 0.99969881869620f, 1.00000000000000f, + 0.99969881869620f, 0.99879545620517f, 0.99729045667869f, + 0.99518472667220f, 0.99247953459871f, 0.98917650996478f, + 0.98527764238894f, 0.98078528040323f, 0.97570213003853f, + 0.97003125319454f, 0.96377606579544f, 0.95694033573221f, + 0.94952818059304f, 0.94154406518302f, 0.93299279883474f, + 0.92387953251129f, 0.91420975570353f, 0.90398929312344f, + 0.89322430119552f, 0.88192126434835f, 0.87008699110871f, + 0.85772861000027f, 0.84485356524971f, 0.83146961230255f, + 0.81758481315158f, 0.80320753148064f, 0.78834642762661f, + 0.77301045336274f, 0.75720884650648f, 0.74095112535496f, + 0.72424708295147f, 0.70710678118655f, 0.68954054473707f, + 0.67155895484702f, 0.65317284295378f, 0.63439328416365f, + 0.61523159058063f, 0.59569930449243f, 0.57580819141785f, + 0.55557023301960f, 0.53499761988710f, 0.51410274419322f, + 0.49289819222978f, 0.47139673682600f, 0.44961132965461f, + 0.42755509343028f, 0.40524131400499f, 0.38268343236509f, + 0.35989503653499f, 0.33688985339222f, 0.31368174039889f, + 0.29028467725446f, 0.26671275747490f, 0.24298017990326f, + 0.21910124015687f, 0.19509032201613f, 0.17096188876030f, + 0.14673047445536f, 0.12241067519922f, 0.09801714032956f, + 0.07356456359967f, 0.04906767432742f, 0.02454122852291f +}; + + + +// hybrib Hanning & flat window +static const float kBlocks80w128[128] = { + (float)0.00000000, (float)0.03271908, (float)0.06540313, (float)0.09801714, (float)0.13052619, + (float)0.16289547, (float)0.19509032, (float)0.22707626, (float)0.25881905, (float)0.29028468, + (float)0.32143947, (float)0.35225005, (float)0.38268343, (float)0.41270703, (float)0.44228869, + (float)0.47139674, (float)0.50000000, (float)0.52806785, (float)0.55557023, (float)0.58247770, + (float)0.60876143, (float)0.63439328, (float)0.65934582, (float)0.68359230, (float)0.70710678, + (float)0.72986407, (float)0.75183981, (float)0.77301045, (float)0.79335334, (float)0.81284668, + (float)0.83146961, (float)0.84920218, (float)0.86602540, (float)0.88192126, (float)0.89687274, + (float)0.91086382, (float)0.92387953, (float)0.93590593, (float)0.94693013, (float)0.95694034, + (float)0.96592583, (float)0.97387698, (float)0.98078528, (float)0.98664333, (float)0.99144486, + (float)0.99518473, (float)0.99785892, (float)0.99946459, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)0.99946459, (float)0.99785892, (float)0.99518473, (float)0.99144486, + (float)0.98664333, (float)0.98078528, (float)0.97387698, (float)0.96592583, (float)0.95694034, + (float)0.94693013, (float)0.93590593, (float)0.92387953, (float)0.91086382, (float)0.89687274, + (float)0.88192126, (float)0.86602540, (float)0.84920218, (float)0.83146961, (float)0.81284668, + (float)0.79335334, (float)0.77301045, (float)0.75183981, (float)0.72986407, (float)0.70710678, + (float)0.68359230, (float)0.65934582, (float)0.63439328, (float)0.60876143, (float)0.58247770, + (float)0.55557023, (float)0.52806785, (float)0.50000000, (float)0.47139674, (float)0.44228869, + (float)0.41270703, (float)0.38268343, (float)0.35225005, (float)0.32143947, (float)0.29028468, + (float)0.25881905, (float)0.22707626, (float)0.19509032, (float)0.16289547, (float)0.13052619, + (float)0.09801714, (float)0.06540313, (float)0.03271908 +}; + +// hybrib Hanning & flat window +static const float kBlocks160w256[256] = { + (float)0.00000000, (float)0.01636173, (float)0.03271908, (float)0.04906767, (float)0.06540313, + (float)0.08172107, (float)0.09801714, (float)0.11428696, (float)0.13052619, (float)0.14673047, + (float)0.16289547, (float)0.17901686, (float)0.19509032, (float)0.21111155, (float)0.22707626, + (float)0.24298018, (float)0.25881905, (float)0.27458862, (float)0.29028468, (float)0.30590302, + (float)0.32143947, (float)0.33688985, (float)0.35225005, (float)0.36751594, (float)0.38268343, + (float)0.39774847, (float)0.41270703, (float)0.42755509, (float)0.44228869, (float)0.45690388, + (float)0.47139674, (float)0.48576339, (float)0.50000000, (float)0.51410274, (float)0.52806785, + (float)0.54189158, (float)0.55557023, (float)0.56910015, (float)0.58247770, (float)0.59569930, + (float)0.60876143, (float)0.62166057, (float)0.63439328, (float)0.64695615, (float)0.65934582, + (float)0.67155895, (float)0.68359230, (float)0.69544264, (float)0.70710678, (float)0.71858162, + (float)0.72986407, (float)0.74095113, (float)0.75183981, (float)0.76252720, (float)0.77301045, + (float)0.78328675, (float)0.79335334, (float)0.80320753, (float)0.81284668, (float)0.82226822, + (float)0.83146961, (float)0.84044840, (float)0.84920218, (float)0.85772861, (float)0.86602540, + (float)0.87409034, (float)0.88192126, (float)0.88951608, (float)0.89687274, (float)0.90398929, + (float)0.91086382, (float)0.91749450, (float)0.92387953, (float)0.93001722, (float)0.93590593, + (float)0.94154407, (float)0.94693013, (float)0.95206268, (float)0.95694034, (float)0.96156180, + (float)0.96592583, (float)0.97003125, (float)0.97387698, (float)0.97746197, (float)0.98078528, + (float)0.98384601, (float)0.98664333, (float)0.98917651, (float)0.99144486, (float)0.99344778, + (float)0.99518473, (float)0.99665524, (float)0.99785892, (float)0.99879546, (float)0.99946459, + (float)0.99986614, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)0.99986614, (float)0.99946459, (float)0.99879546, (float)0.99785892, + (float)0.99665524, (float)0.99518473, (float)0.99344778, (float)0.99144486, (float)0.98917651, + (float)0.98664333, (float)0.98384601, (float)0.98078528, (float)0.97746197, (float)0.97387698, + (float)0.97003125, (float)0.96592583, (float)0.96156180, (float)0.95694034, (float)0.95206268, + (float)0.94693013, (float)0.94154407, (float)0.93590593, (float)0.93001722, (float)0.92387953, + (float)0.91749450, (float)0.91086382, (float)0.90398929, (float)0.89687274, (float)0.88951608, + (float)0.88192126, (float)0.87409034, (float)0.86602540, (float)0.85772861, (float)0.84920218, + (float)0.84044840, (float)0.83146961, (float)0.82226822, (float)0.81284668, (float)0.80320753, + (float)0.79335334, (float)0.78328675, (float)0.77301045, (float)0.76252720, (float)0.75183981, + (float)0.74095113, (float)0.72986407, (float)0.71858162, (float)0.70710678, (float)0.69544264, + (float)0.68359230, (float)0.67155895, (float)0.65934582, (float)0.64695615, (float)0.63439328, + (float)0.62166057, (float)0.60876143, (float)0.59569930, (float)0.58247770, (float)0.56910015, + (float)0.55557023, (float)0.54189158, (float)0.52806785, (float)0.51410274, (float)0.50000000, + (float)0.48576339, (float)0.47139674, (float)0.45690388, (float)0.44228869, (float)0.42755509, + (float)0.41270703, (float)0.39774847, (float)0.38268343, (float)0.36751594, (float)0.35225005, + (float)0.33688985, (float)0.32143947, (float)0.30590302, (float)0.29028468, (float)0.27458862, + (float)0.25881905, (float)0.24298018, (float)0.22707626, (float)0.21111155, (float)0.19509032, + (float)0.17901686, (float)0.16289547, (float)0.14673047, (float)0.13052619, (float)0.11428696, + (float)0.09801714, (float)0.08172107, (float)0.06540313, (float)0.04906767, (float)0.03271908, + (float)0.01636173 +}; + +// hybrib Hanning & flat window: for 20ms +static const float kBlocks320w512[512] = { + (float)0.00000000, (float)0.00818114, (float)0.01636173, (float)0.02454123, (float)0.03271908, + (float)0.04089475, (float)0.04906767, (float)0.05723732, (float)0.06540313, (float)0.07356456, + (float)0.08172107, (float)0.08987211, (float)0.09801714, (float)0.10615561, (float)0.11428696, + (float)0.12241068, (float)0.13052619, (float)0.13863297, (float)0.14673047, (float)0.15481816, + (float)0.16289547, (float)0.17096189, (float)0.17901686, (float)0.18705985, (float)0.19509032, + (float)0.20310773, (float)0.21111155, (float)0.21910124, (float)0.22707626, (float)0.23503609, + (float)0.24298018, (float)0.25090801, (float)0.25881905, (float)0.26671276, (float)0.27458862, + (float)0.28244610, (float)0.29028468, (float)0.29810383, (float)0.30590302, (float)0.31368174, + (float)0.32143947, (float)0.32917568, (float)0.33688985, (float)0.34458148, (float)0.35225005, + (float)0.35989504, (float)0.36751594, (float)0.37511224, (float)0.38268343, (float)0.39022901, + (float)0.39774847, (float)0.40524131, (float)0.41270703, (float)0.42014512, (float)0.42755509, + (float)0.43493645, (float)0.44228869, (float)0.44961133, (float)0.45690388, (float)0.46416584, + (float)0.47139674, (float)0.47859608, (float)0.48576339, (float)0.49289819, (float)0.50000000, + (float)0.50706834, (float)0.51410274, (float)0.52110274, (float)0.52806785, (float)0.53499762, + (float)0.54189158, (float)0.54874927, (float)0.55557023, (float)0.56235401, (float)0.56910015, + (float)0.57580819, (float)0.58247770, (float)0.58910822, (float)0.59569930, (float)0.60225052, + (float)0.60876143, (float)0.61523159, (float)0.62166057, (float)0.62804795, (float)0.63439328, + (float)0.64069616, (float)0.64695615, (float)0.65317284, (float)0.65934582, (float)0.66547466, + (float)0.67155895, (float)0.67759830, (float)0.68359230, (float)0.68954054, (float)0.69544264, + (float)0.70129818, (float)0.70710678, (float)0.71286806, (float)0.71858162, (float)0.72424708, + (float)0.72986407, (float)0.73543221, (float)0.74095113, (float)0.74642045, (float)0.75183981, + (float)0.75720885, (float)0.76252720, (float)0.76779452, (float)0.77301045, (float)0.77817464, + (float)0.78328675, (float)0.78834643, (float)0.79335334, (float)0.79830715, (float)0.80320753, + (float)0.80805415, (float)0.81284668, (float)0.81758481, (float)0.82226822, (float)0.82689659, + (float)0.83146961, (float)0.83598698, (float)0.84044840, (float)0.84485357, (float)0.84920218, + (float)0.85349396, (float)0.85772861, (float)0.86190585, (float)0.86602540, (float)0.87008699, + (float)0.87409034, (float)0.87803519, (float)0.88192126, (float)0.88574831, (float)0.88951608, + (float)0.89322430, (float)0.89687274, (float)0.90046115, (float)0.90398929, (float)0.90745693, + (float)0.91086382, (float)0.91420976, (float)0.91749450, (float)0.92071783, (float)0.92387953, + (float)0.92697940, (float)0.93001722, (float)0.93299280, (float)0.93590593, (float)0.93875641, + (float)0.94154407, (float)0.94426870, (float)0.94693013, (float)0.94952818, (float)0.95206268, + (float)0.95453345, (float)0.95694034, (float)0.95928317, (float)0.96156180, (float)0.96377607, + (float)0.96592583, (float)0.96801094, (float)0.97003125, (float)0.97198664, (float)0.97387698, + (float)0.97570213, (float)0.97746197, (float)0.97915640, (float)0.98078528, (float)0.98234852, + (float)0.98384601, (float)0.98527764, (float)0.98664333, (float)0.98794298, (float)0.98917651, + (float)0.99034383, (float)0.99144486, (float)0.99247953, (float)0.99344778, (float)0.99434953, + (float)0.99518473, (float)0.99595331, (float)0.99665524, (float)0.99729046, (float)0.99785892, + (float)0.99836060, (float)0.99879546, (float)0.99916346, (float)0.99946459, (float)0.99969882, + (float)0.99986614, (float)0.99996653, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)0.99996653, (float)0.99986614, (float)0.99969882, (float)0.99946459, + (float)0.99916346, (float)0.99879546, (float)0.99836060, (float)0.99785892, (float)0.99729046, + (float)0.99665524, (float)0.99595331, (float)0.99518473, (float)0.99434953, (float)0.99344778, + (float)0.99247953, (float)0.99144486, (float)0.99034383, (float)0.98917651, (float)0.98794298, + (float)0.98664333, (float)0.98527764, (float)0.98384601, (float)0.98234852, (float)0.98078528, + (float)0.97915640, (float)0.97746197, (float)0.97570213, (float)0.97387698, (float)0.97198664, + (float)0.97003125, (float)0.96801094, (float)0.96592583, (float)0.96377607, (float)0.96156180, + (float)0.95928317, (float)0.95694034, (float)0.95453345, (float)0.95206268, (float)0.94952818, + (float)0.94693013, (float)0.94426870, (float)0.94154407, (float)0.93875641, (float)0.93590593, + (float)0.93299280, (float)0.93001722, (float)0.92697940, (float)0.92387953, (float)0.92071783, + (float)0.91749450, (float)0.91420976, (float)0.91086382, (float)0.90745693, (float)0.90398929, + (float)0.90046115, (float)0.89687274, (float)0.89322430, (float)0.88951608, (float)0.88574831, + (float)0.88192126, (float)0.87803519, (float)0.87409034, (float)0.87008699, (float)0.86602540, + (float)0.86190585, (float)0.85772861, (float)0.85349396, (float)0.84920218, (float)0.84485357, + (float)0.84044840, (float)0.83598698, (float)0.83146961, (float)0.82689659, (float)0.82226822, + (float)0.81758481, (float)0.81284668, (float)0.80805415, (float)0.80320753, (float)0.79830715, + (float)0.79335334, (float)0.78834643, (float)0.78328675, (float)0.77817464, (float)0.77301045, + (float)0.76779452, (float)0.76252720, (float)0.75720885, (float)0.75183981, (float)0.74642045, + (float)0.74095113, (float)0.73543221, (float)0.72986407, (float)0.72424708, (float)0.71858162, + (float)0.71286806, (float)0.70710678, (float)0.70129818, (float)0.69544264, (float)0.68954054, + (float)0.68359230, (float)0.67759830, (float)0.67155895, (float)0.66547466, (float)0.65934582, + (float)0.65317284, (float)0.64695615, (float)0.64069616, (float)0.63439328, (float)0.62804795, + (float)0.62166057, (float)0.61523159, (float)0.60876143, (float)0.60225052, (float)0.59569930, + (float)0.58910822, (float)0.58247770, (float)0.57580819, (float)0.56910015, (float)0.56235401, + (float)0.55557023, (float)0.54874927, (float)0.54189158, (float)0.53499762, (float)0.52806785, + (float)0.52110274, (float)0.51410274, (float)0.50706834, (float)0.50000000, (float)0.49289819, + (float)0.48576339, (float)0.47859608, (float)0.47139674, (float)0.46416584, (float)0.45690388, + (float)0.44961133, (float)0.44228869, (float)0.43493645, (float)0.42755509, (float)0.42014512, + (float)0.41270703, (float)0.40524131, (float)0.39774847, (float)0.39022901, (float)0.38268343, + (float)0.37511224, (float)0.36751594, (float)0.35989504, (float)0.35225005, (float)0.34458148, + (float)0.33688985, (float)0.32917568, (float)0.32143947, (float)0.31368174, (float)0.30590302, + (float)0.29810383, (float)0.29028468, (float)0.28244610, (float)0.27458862, (float)0.26671276, + (float)0.25881905, (float)0.25090801, (float)0.24298018, (float)0.23503609, (float)0.22707626, + (float)0.21910124, (float)0.21111155, (float)0.20310773, (float)0.19509032, (float)0.18705985, + (float)0.17901686, (float)0.17096189, (float)0.16289547, (float)0.15481816, (float)0.14673047, + (float)0.13863297, (float)0.13052619, (float)0.12241068, (float)0.11428696, (float)0.10615561, + (float)0.09801714, (float)0.08987211, (float)0.08172107, (float)0.07356456, (float)0.06540313, + (float)0.05723732, (float)0.04906767, (float)0.04089475, (float)0.03271908, (float)0.02454123, + (float)0.01636173, (float)0.00818114 +}; + + +// Hanning window: for 15ms at 16kHz with symmetric zeros +static const float kBlocks240w512[512] = { + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000, (float)0.00654494, (float)0.01308960, (float)0.01963369, + (float)0.02617695, (float)0.03271908, (float)0.03925982, (float)0.04579887, (float)0.05233596, + (float)0.05887080, (float)0.06540313, (float)0.07193266, (float)0.07845910, (float)0.08498218, + (float)0.09150162, (float)0.09801714, (float)0.10452846, (float)0.11103531, (float)0.11753740, + (float)0.12403446, (float)0.13052620, (float)0.13701233, (float)0.14349262, (float)0.14996676, + (float)0.15643448, (float)0.16289547, (float)0.16934951, (float)0.17579629, (float)0.18223552, + (float)0.18866697, (float)0.19509032, (float)0.20150533, (float)0.20791170, (float)0.21430916, + (float)0.22069745, (float)0.22707628, (float)0.23344538, (float)0.23980446, (float)0.24615330, + (float)0.25249159, (float)0.25881904, (float)0.26513544, (float)0.27144045, (float)0.27773386, + (float)0.28401536, (float)0.29028466, (float)0.29654160, (float)0.30278578, (float)0.30901700, + (float)0.31523499, (float)0.32143945, (float)0.32763019, (float)0.33380687, (float)0.33996925, + (float)0.34611708, (float)0.35225007, (float)0.35836795, (float)0.36447051, (float)0.37055743, + (float)0.37662852, (float)0.38268346, (float)0.38872197, (float)0.39474389, (float)0.40074885, + (float)0.40673664, (float)0.41270703, (float)0.41865975, (float)0.42459452, (float)0.43051112, + (float)0.43640924, (float)0.44228873, (float)0.44814920, (float)0.45399052, (float)0.45981237, + (float)0.46561453, (float)0.47139674, (float)0.47715878, (float)0.48290035, (float)0.48862126, + (float)0.49432120, (float)0.50000000, (float)0.50565743, (float)0.51129311, (float)0.51690692, + (float)0.52249855, (float)0.52806789, (float)0.53361452, (float)0.53913832, (float)0.54463905, + (float)0.55011642, (float)0.55557024, (float)0.56100029, (float)0.56640625, (float)0.57178795, + (float)0.57714522, (float)0.58247769, (float)0.58778524, (float)0.59306765, (float)0.59832460, + (float)0.60355598, (float)0.60876143, (float)0.61394083, (float)0.61909395, (float)0.62422055, + (float)0.62932038, (float)0.63439333, (float)0.63943899, (float)0.64445734, (float)0.64944810, + (float)0.65441096, (float)0.65934587, (float)0.66425246, (float)0.66913062, (float)0.67398012, + (float)0.67880076, (float)0.68359232, (float)0.68835455, (float)0.69308740, (float)0.69779050, + (float)0.70246369, (float)0.70710677, (float)0.71171963, (float)0.71630198, (float)0.72085363, + (float)0.72537440, (float)0.72986406, (float)0.73432255, (float)0.73874950, (float)0.74314487, + (float)0.74750835, (float)0.75183982, (float)0.75613910, (float)0.76040596, (float)0.76464027, + (float)0.76884186, (float)0.77301043, (float)0.77714598, (float)0.78124821, (float)0.78531694, + (float)0.78935206, (float)0.79335338, (float)0.79732066, (float)0.80125386, (float)0.80515265, + (float)0.80901700, (float)0.81284672, (float)0.81664157, (float)0.82040149, (float)0.82412618, + (float)0.82781565, (float)0.83146966, (float)0.83508795, (float)0.83867061, (float)0.84221727, + (float)0.84572780, (float)0.84920216, (float)0.85264021, (float)0.85604161, (float)0.85940641, + (float)0.86273444, (float)0.86602545, (float)0.86927933, (float)0.87249607, (float)0.87567532, + (float)0.87881714, (float)0.88192129, (float)0.88498765, (float)0.88801610, (float)0.89100653, + (float)0.89395881, (float)0.89687276, (float)0.89974827, (float)0.90258533, (float)0.90538365, + (float)0.90814316, (float)0.91086388, (float)0.91354549, (float)0.91618794, (float)0.91879123, + (float)0.92135513, (float)0.92387950, (float)0.92636442, (float)0.92880958, (float)0.93121493, + (float)0.93358046, (float)0.93590593, (float)0.93819135, (float)0.94043654, (float)0.94264150, + (float)0.94480604, (float)0.94693011, (float)0.94901365, (float)0.95105654, (float)0.95305866, + (float)0.95501995, (float)0.95694035, (float)0.95881975, (float)0.96065807, (float)0.96245527, + (float)0.96421117, (float)0.96592581, (float)0.96759909, (float)0.96923089, (float)0.97082120, + (float)0.97236991, (float)0.97387701, (float)0.97534233, (float)0.97676587, (float)0.97814763, + (float)0.97948742, (float)0.98078531, (float)0.98204112, (float)0.98325491, (float)0.98442656, + (float)0.98555607, (float)0.98664331, (float)0.98768836, (float)0.98869103, (float)0.98965138, + (float)0.99056935, (float)0.99144489, (float)0.99227792, (float)0.99306846, (float)0.99381649, + (float)0.99452192, (float)0.99518472, (float)0.99580491, (float)0.99638247, (float)0.99691731, + (float)0.99740952, (float)0.99785894, (float)0.99826562, (float)0.99862951, (float)0.99895066, + (float)0.99922901, (float)0.99946457, (float)0.99965733, (float)0.99980724, (float)0.99991435, + (float)0.99997860, (float)1.00000000, (float)0.99997860, (float)0.99991435, (float)0.99980724, + (float)0.99965733, (float)0.99946457, (float)0.99922901, (float)0.99895066, (float)0.99862951, + (float)0.99826562, (float)0.99785894, (float)0.99740946, (float)0.99691731, (float)0.99638247, + (float)0.99580491, (float)0.99518472, (float)0.99452192, (float)0.99381644, (float)0.99306846, + (float)0.99227792, (float)0.99144489, (float)0.99056935, (float)0.98965138, (float)0.98869103, + (float)0.98768836, (float)0.98664331, (float)0.98555607, (float)0.98442656, (float)0.98325491, + (float)0.98204112, (float)0.98078525, (float)0.97948742, (float)0.97814757, (float)0.97676587, + (float)0.97534227, (float)0.97387695, (float)0.97236991, (float)0.97082120, (float)0.96923089, + (float)0.96759909, (float)0.96592581, (float)0.96421117, (float)0.96245521, (float)0.96065807, + (float)0.95881969, (float)0.95694029, (float)0.95501995, (float)0.95305860, (float)0.95105648, + (float)0.94901365, (float)0.94693011, (float)0.94480604, (float)0.94264150, (float)0.94043654, + (float)0.93819129, (float)0.93590593, (float)0.93358046, (float)0.93121493, (float)0.92880952, + (float)0.92636436, (float)0.92387950, (float)0.92135507, (float)0.91879123, (float)0.91618794, + (float)0.91354543, (float)0.91086382, (float)0.90814310, (float)0.90538365, (float)0.90258527, + (float)0.89974827, (float)0.89687276, (float)0.89395875, (float)0.89100647, (float)0.88801610, + (float)0.88498759, (float)0.88192123, (float)0.87881714, (float)0.87567532, (float)0.87249595, + (float)0.86927933, (float)0.86602539, (float)0.86273432, (float)0.85940641, (float)0.85604161, + (float)0.85264009, (float)0.84920216, (float)0.84572780, (float)0.84221715, (float)0.83867055, + (float)0.83508795, (float)0.83146954, (float)0.82781565, (float)0.82412612, (float)0.82040137, + (float)0.81664157, (float)0.81284660, (float)0.80901700, (float)0.80515265, (float)0.80125374, + (float)0.79732066, (float)0.79335332, (float)0.78935200, (float)0.78531694, (float)0.78124815, + (float)0.77714586, (float)0.77301049, (float)0.76884180, (float)0.76464021, (float)0.76040596, + (float)0.75613904, (float)0.75183970, (float)0.74750835, (float)0.74314481, (float)0.73874938, + (float)0.73432249, (float)0.72986400, (float)0.72537428, (float)0.72085363, (float)0.71630186, + (float)0.71171951, (float)0.70710677, (float)0.70246363, (float)0.69779032, (float)0.69308734, + (float)0.68835449, (float)0.68359220, (float)0.67880070, (float)0.67398006, (float)0.66913044, + (float)0.66425240, (float)0.65934575, (float)0.65441096, (float)0.64944804, (float)0.64445722, + (float)0.63943905, (float)0.63439327, (float)0.62932026, (float)0.62422055, (float)0.61909389, + (float)0.61394072, (float)0.60876143, (float)0.60355592, (float)0.59832448, (float)0.59306765, + (float)0.58778518, (float)0.58247757, (float)0.57714522, (float)0.57178789, (float)0.56640613, + (float)0.56100023, (float)0.55557019, (float)0.55011630, (float)0.54463905, (float)0.53913826, + (float)0.53361434, (float)0.52806783, (float)0.52249849, (float)0.51690674, (float)0.51129305, + (float)0.50565726, (float)0.50000006, (float)0.49432117, (float)0.48862115, (float)0.48290038, + (float)0.47715873, (float)0.47139663, (float)0.46561456, (float)0.45981231, (float)0.45399037, + (float)0.44814920, (float)0.44228864, (float)0.43640912, (float)0.43051112, (float)0.42459446, + (float)0.41865960, (float)0.41270703, (float)0.40673658, (float)0.40074870, (float)0.39474386, + (float)0.38872188, (float)0.38268328, (float)0.37662849, (float)0.37055734, (float)0.36447033, + (float)0.35836792, (float)0.35224995, (float)0.34611690, (float)0.33996922, (float)0.33380675, + (float)0.32763001, (float)0.32143945, (float)0.31523487, (float)0.30901679, (float)0.30278572, + (float)0.29654145, (float)0.29028472, (float)0.28401530, (float)0.27773371, (float)0.27144048, + (float)0.26513538, (float)0.25881892, (float)0.25249159, (float)0.24615324, (float)0.23980433, + (float)0.23344538, (float)0.22707619, (float)0.22069728, (float)0.21430916, (float)0.20791161, + (float)0.20150517, (float)0.19509031, (float)0.18866688, (float)0.18223536, (float)0.17579627, + (float)0.16934940, (float)0.16289529, (float)0.15643445, (float)0.14996666, (float)0.14349243, + (float)0.13701232, (float)0.13052608, (float)0.12403426, (float)0.11753736, (float)0.11103519, + (float)0.10452849, (float)0.09801710, (float)0.09150149, (float)0.08498220, (float)0.07845904, + (float)0.07193252, (float)0.06540315, (float)0.05887074, (float)0.05233581, (float)0.04579888, + (float)0.03925974, (float)0.03271893, (float)0.02617695, (float)0.01963361, (float)0.01308943, + (float)0.00654493, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000 +}; + + +// Hanning window: for 30ms with 1024 fft with symmetric zeros at 16kHz +static const float kBlocks480w1024[1024] = { + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00327249, (float)0.00654494, + (float)0.00981732, (float)0.01308960, (float)0.01636173, (float)0.01963369, (float)0.02290544, + (float)0.02617695, (float)0.02944817, (float)0.03271908, (float)0.03598964, (float)0.03925982, + (float)0.04252957, (float)0.04579887, (float)0.04906768, (float)0.05233596, (float)0.05560368, + (float)0.05887080, (float)0.06213730, (float)0.06540313, (float)0.06866825, (float)0.07193266, + (float)0.07519628, (float)0.07845910, (float)0.08172107, (float)0.08498218, (float)0.08824237, + (float)0.09150162, (float)0.09475989, (float)0.09801714, (float)0.10127335, (float)0.10452846, + (float)0.10778246, (float)0.11103531, (float)0.11428697, (float)0.11753740, (float)0.12078657, + (float)0.12403446, (float)0.12728101, (float)0.13052620, (float)0.13376999, (float)0.13701233, + (float)0.14025325, (float)0.14349262, (float)0.14673047, (float)0.14996676, (float)0.15320145, + (float)0.15643448, (float)0.15966582, (float)0.16289547, (float)0.16612339, (float)0.16934951, + (float)0.17257382, (float)0.17579629, (float)0.17901687, (float)0.18223552, (float)0.18545224, + (float)0.18866697, (float)0.19187967, (float)0.19509032, (float)0.19829889, (float)0.20150533, + (float)0.20470962, (float)0.20791170, (float)0.21111156, (float)0.21430916, (float)0.21750447, + (float)0.22069745, (float)0.22388805, (float)0.22707628, (float)0.23026206, (float)0.23344538, + (float)0.23662618, (float)0.23980446, (float)0.24298020, (float)0.24615330, (float)0.24932377, + (float)0.25249159, (float)0.25565669, (float)0.25881904, (float)0.26197866, (float)0.26513544, + (float)0.26828939, (float)0.27144045, (float)0.27458861, (float)0.27773386, (float)0.28087610, + (float)0.28401536, (float)0.28715158, (float)0.29028466, (float)0.29341471, (float)0.29654160, + (float)0.29966527, (float)0.30278578, (float)0.30590302, (float)0.30901700, (float)0.31212768, + (float)0.31523499, (float)0.31833893, (float)0.32143945, (float)0.32453656, (float)0.32763019, + (float)0.33072028, (float)0.33380687, (float)0.33688986, (float)0.33996925, (float)0.34304500, + (float)0.34611708, (float)0.34918544, (float)0.35225007, (float)0.35531089, (float)0.35836795, + (float)0.36142117, (float)0.36447051, (float)0.36751595, (float)0.37055743, (float)0.37359497, + (float)0.37662852, (float)0.37965801, (float)0.38268346, (float)0.38570479, (float)0.38872197, + (float)0.39173502, (float)0.39474389, (float)0.39774847, (float)0.40074885, (float)0.40374491, + (float)0.40673664, (float)0.40972406, (float)0.41270703, (float)0.41568562, (float)0.41865975, + (float)0.42162940, (float)0.42459452, (float)0.42755508, (float)0.43051112, (float)0.43346250, + (float)0.43640924, (float)0.43935132, (float)0.44228873, (float)0.44522133, (float)0.44814920, + (float)0.45107228, (float)0.45399052, (float)0.45690390, (float)0.45981237, (float)0.46271592, + (float)0.46561453, (float)0.46850815, (float)0.47139674, (float)0.47428030, (float)0.47715878, + (float)0.48003215, (float)0.48290035, (float)0.48576337, (float)0.48862126, (float)0.49147385, + (float)0.49432120, (float)0.49716330, (float)0.50000000, (float)0.50283140, (float)0.50565743, + (float)0.50847799, (float)0.51129311, (float)0.51410276, (float)0.51690692, (float)0.51970553, + (float)0.52249855, (float)0.52528602, (float)0.52806789, (float)0.53084403, (float)0.53361452, + (float)0.53637928, (float)0.53913832, (float)0.54189163, (float)0.54463905, (float)0.54738063, + (float)0.55011642, (float)0.55284631, (float)0.55557024, (float)0.55828828, (float)0.56100029, + (float)0.56370628, (float)0.56640625, (float)0.56910014, (float)0.57178795, (float)0.57446963, + (float)0.57714522, (float)0.57981455, (float)0.58247769, (float)0.58513463, (float)0.58778524, + (float)0.59042960, (float)0.59306765, (float)0.59569931, (float)0.59832460, (float)0.60094351, + (float)0.60355598, (float)0.60616195, (float)0.60876143, (float)0.61135441, (float)0.61394083, + (float)0.61652070, (float)0.61909395, (float)0.62166059, (float)0.62422055, (float)0.62677383, + (float)0.62932038, (float)0.63186020, (float)0.63439333, (float)0.63691956, (float)0.63943899, + (float)0.64195162, (float)0.64445734, (float)0.64695615, (float)0.64944810, (float)0.65193301, + (float)0.65441096, (float)0.65688187, (float)0.65934587, (float)0.66180271, (float)0.66425246, + (float)0.66669512, (float)0.66913062, (float)0.67155898, (float)0.67398012, (float)0.67639405, + (float)0.67880076, (float)0.68120021, (float)0.68359232, (float)0.68597710, (float)0.68835455, + (float)0.69072467, (float)0.69308740, (float)0.69544262, (float)0.69779050, (float)0.70013082, + (float)0.70246369, (float)0.70478904, (float)0.70710677, (float)0.70941699, (float)0.71171963, + (float)0.71401459, (float)0.71630198, (float)0.71858168, (float)0.72085363, (float)0.72311789, + (float)0.72537440, (float)0.72762316, (float)0.72986406, (float)0.73209721, (float)0.73432255, + (float)0.73653996, (float)0.73874950, (float)0.74095118, (float)0.74314487, (float)0.74533057, + (float)0.74750835, (float)0.74967808, (float)0.75183982, (float)0.75399351, (float)0.75613910, + (float)0.75827658, (float)0.76040596, (float)0.76252723, (float)0.76464027, (float)0.76674515, + (float)0.76884186, (float)0.77093029, (float)0.77301043, (float)0.77508241, (float)0.77714598, + (float)0.77920127, (float)0.78124821, (float)0.78328675, (float)0.78531694, (float)0.78733873, + (float)0.78935206, (float)0.79135692, (float)0.79335338, (float)0.79534125, (float)0.79732066, + (float)0.79929149, (float)0.80125386, (float)0.80320752, (float)0.80515265, (float)0.80708915, + (float)0.80901700, (float)0.81093621, (float)0.81284672, (float)0.81474853, (float)0.81664157, + (float)0.81852591, (float)0.82040149, (float)0.82226825, (float)0.82412618, (float)0.82597536, + (float)0.82781565, (float)0.82964706, (float)0.83146966, (float)0.83328325, (float)0.83508795, + (float)0.83688378, (float)0.83867061, (float)0.84044838, (float)0.84221727, (float)0.84397703, + (float)0.84572780, (float)0.84746957, (float)0.84920216, (float)0.85092574, (float)0.85264021, + (float)0.85434544, (float)0.85604161, (float)0.85772866, (float)0.85940641, (float)0.86107504, + (float)0.86273444, (float)0.86438453, (float)0.86602545, (float)0.86765707, (float)0.86927933, + (float)0.87089235, (float)0.87249607, (float)0.87409031, (float)0.87567532, (float)0.87725097, + (float)0.87881714, (float)0.88037390, (float)0.88192129, (float)0.88345921, (float)0.88498765, + (float)0.88650668, (float)0.88801610, (float)0.88951612, (float)0.89100653, (float)0.89248741, + (float)0.89395881, (float)0.89542055, (float)0.89687276, (float)0.89831537, (float)0.89974827, + (float)0.90117162, (float)0.90258533, (float)0.90398932, (float)0.90538365, (float)0.90676826, + (float)0.90814316, (float)0.90950841, (float)0.91086388, (float)0.91220951, (float)0.91354549, + (float)0.91487163, (float)0.91618794, (float)0.91749454, (float)0.91879123, (float)0.92007810, + (float)0.92135513, (float)0.92262226, (float)0.92387950, (float)0.92512691, (float)0.92636442, + (float)0.92759192, (float)0.92880958, (float)0.93001723, (float)0.93121493, (float)0.93240267, + (float)0.93358046, (float)0.93474817, (float)0.93590593, (float)0.93705362, (float)0.93819135, + (float)0.93931901, (float)0.94043654, (float)0.94154406, (float)0.94264150, (float)0.94372880, + (float)0.94480604, (float)0.94587320, (float)0.94693011, (float)0.94797695, (float)0.94901365, + (float)0.95004016, (float)0.95105654, (float)0.95206273, (float)0.95305866, (float)0.95404440, + (float)0.95501995, (float)0.95598525, (float)0.95694035, (float)0.95788521, (float)0.95881975, + (float)0.95974404, (float)0.96065807, (float)0.96156180, (float)0.96245527, (float)0.96333838, + (float)0.96421117, (float)0.96507370, (float)0.96592581, (float)0.96676767, (float)0.96759909, + (float)0.96842021, (float)0.96923089, (float)0.97003126, (float)0.97082120, (float)0.97160077, + (float)0.97236991, (float)0.97312868, (float)0.97387701, (float)0.97461486, (float)0.97534233, + (float)0.97605932, (float)0.97676587, (float)0.97746199, (float)0.97814763, (float)0.97882277, + (float)0.97948742, (float)0.98014158, (float)0.98078531, (float)0.98141843, (float)0.98204112, + (float)0.98265332, (float)0.98325491, (float)0.98384601, (float)0.98442656, (float)0.98499662, + (float)0.98555607, (float)0.98610497, (float)0.98664331, (float)0.98717111, (float)0.98768836, + (float)0.98819500, (float)0.98869103, (float)0.98917651, (float)0.98965138, (float)0.99011570, + (float)0.99056935, (float)0.99101239, (float)0.99144489, (float)0.99186671, (float)0.99227792, + (float)0.99267852, (float)0.99306846, (float)0.99344778, (float)0.99381649, (float)0.99417448, + (float)0.99452192, (float)0.99485862, (float)0.99518472, (float)0.99550015, (float)0.99580491, + (float)0.99609905, (float)0.99638247, (float)0.99665523, (float)0.99691731, (float)0.99716878, + (float)0.99740952, (float)0.99763954, (float)0.99785894, (float)0.99806762, (float)0.99826562, + (float)0.99845290, (float)0.99862951, (float)0.99879545, (float)0.99895066, (float)0.99909520, + (float)0.99922901, (float)0.99935216, (float)0.99946457, (float)0.99956632, (float)0.99965733, + (float)0.99973762, (float)0.99980724, (float)0.99986613, (float)0.99991435, (float)0.99995178, + (float)0.99997860, (float)0.99999464, (float)1.00000000, (float)0.99999464, (float)0.99997860, + (float)0.99995178, (float)0.99991435, (float)0.99986613, (float)0.99980724, (float)0.99973762, + (float)0.99965733, (float)0.99956632, (float)0.99946457, (float)0.99935216, (float)0.99922901, + (float)0.99909520, (float)0.99895066, (float)0.99879545, (float)0.99862951, (float)0.99845290, + (float)0.99826562, (float)0.99806762, (float)0.99785894, (float)0.99763954, (float)0.99740946, + (float)0.99716872, (float)0.99691731, (float)0.99665523, (float)0.99638247, (float)0.99609905, + (float)0.99580491, (float)0.99550015, (float)0.99518472, (float)0.99485862, (float)0.99452192, + (float)0.99417448, (float)0.99381644, (float)0.99344778, (float)0.99306846, (float)0.99267852, + (float)0.99227792, (float)0.99186671, (float)0.99144489, (float)0.99101239, (float)0.99056935, + (float)0.99011564, (float)0.98965138, (float)0.98917651, (float)0.98869103, (float)0.98819494, + (float)0.98768836, (float)0.98717111, (float)0.98664331, (float)0.98610497, (float)0.98555607, + (float)0.98499656, (float)0.98442656, (float)0.98384601, (float)0.98325491, (float)0.98265326, + (float)0.98204112, (float)0.98141843, (float)0.98078525, (float)0.98014158, (float)0.97948742, + (float)0.97882277, (float)0.97814757, (float)0.97746193, (float)0.97676587, (float)0.97605932, + (float)0.97534227, (float)0.97461486, (float)0.97387695, (float)0.97312862, (float)0.97236991, + (float)0.97160077, (float)0.97082120, (float)0.97003126, (float)0.96923089, (float)0.96842015, + (float)0.96759909, (float)0.96676761, (float)0.96592581, (float)0.96507365, (float)0.96421117, + (float)0.96333838, (float)0.96245521, (float)0.96156180, (float)0.96065807, (float)0.95974404, + (float)0.95881969, (float)0.95788515, (float)0.95694029, (float)0.95598525, (float)0.95501995, + (float)0.95404440, (float)0.95305860, (float)0.95206267, (float)0.95105648, (float)0.95004016, + (float)0.94901365, (float)0.94797695, (float)0.94693011, (float)0.94587314, (float)0.94480604, + (float)0.94372880, (float)0.94264150, (float)0.94154406, (float)0.94043654, (float)0.93931895, + (float)0.93819129, (float)0.93705362, (float)0.93590593, (float)0.93474817, (float)0.93358046, + (float)0.93240267, (float)0.93121493, (float)0.93001723, (float)0.92880952, (float)0.92759192, + (float)0.92636436, (float)0.92512691, (float)0.92387950, (float)0.92262226, (float)0.92135507, + (float)0.92007804, (float)0.91879123, (float)0.91749448, (float)0.91618794, (float)0.91487157, + (float)0.91354543, (float)0.91220951, (float)0.91086382, (float)0.90950835, (float)0.90814310, + (float)0.90676820, (float)0.90538365, (float)0.90398932, (float)0.90258527, (float)0.90117157, + (float)0.89974827, (float)0.89831525, (float)0.89687276, (float)0.89542055, (float)0.89395875, + (float)0.89248741, (float)0.89100647, (float)0.88951600, (float)0.88801610, (float)0.88650662, + (float)0.88498759, (float)0.88345915, (float)0.88192123, (float)0.88037384, (float)0.87881714, + (float)0.87725091, (float)0.87567532, (float)0.87409031, (float)0.87249595, (float)0.87089223, + (float)0.86927933, (float)0.86765701, (float)0.86602539, (float)0.86438447, (float)0.86273432, + (float)0.86107504, (float)0.85940641, (float)0.85772860, (float)0.85604161, (float)0.85434544, + (float)0.85264009, (float)0.85092574, (float)0.84920216, (float)0.84746951, (float)0.84572780, + (float)0.84397697, (float)0.84221715, (float)0.84044844, (float)0.83867055, (float)0.83688372, + (float)0.83508795, (float)0.83328319, (float)0.83146954, (float)0.82964706, (float)0.82781565, + (float)0.82597530, (float)0.82412612, (float)0.82226813, (float)0.82040137, (float)0.81852591, + (float)0.81664157, (float)0.81474847, (float)0.81284660, (float)0.81093609, (float)0.80901700, + (float)0.80708915, (float)0.80515265, (float)0.80320752, (float)0.80125374, (float)0.79929143, + (float)0.79732066, (float)0.79534125, (float)0.79335332, (float)0.79135686, (float)0.78935200, + (float)0.78733861, (float)0.78531694, (float)0.78328675, (float)0.78124815, (float)0.77920121, + (float)0.77714586, (float)0.77508223, (float)0.77301049, (float)0.77093029, (float)0.76884180, + (float)0.76674509, (float)0.76464021, (float)0.76252711, (float)0.76040596, (float)0.75827658, + (float)0.75613904, (float)0.75399339, (float)0.75183970, (float)0.74967796, (float)0.74750835, + (float)0.74533057, (float)0.74314481, (float)0.74095106, (float)0.73874938, (float)0.73653996, + (float)0.73432249, (float)0.73209721, (float)0.72986400, (float)0.72762305, (float)0.72537428, + (float)0.72311789, (float)0.72085363, (float)0.71858162, (float)0.71630186, (float)0.71401453, + (float)0.71171951, (float)0.70941705, (float)0.70710677, (float)0.70478898, (float)0.70246363, + (float)0.70013070, (float)0.69779032, (float)0.69544268, (float)0.69308734, (float)0.69072461, + (float)0.68835449, (float)0.68597704, (float)0.68359220, (float)0.68120021, (float)0.67880070, + (float)0.67639399, (float)0.67398006, (float)0.67155886, (float)0.66913044, (float)0.66669512, + (float)0.66425240, (float)0.66180259, (float)0.65934575, (float)0.65688181, (float)0.65441096, + (float)0.65193301, (float)0.64944804, (float)0.64695609, (float)0.64445722, (float)0.64195150, + (float)0.63943905, (float)0.63691956, (float)0.63439327, (float)0.63186014, (float)0.62932026, + (float)0.62677372, (float)0.62422055, (float)0.62166059, (float)0.61909389, (float)0.61652064, + (float)0.61394072, (float)0.61135429, (float)0.60876143, (float)0.60616189, (float)0.60355592, + (float)0.60094339, (float)0.59832448, (float)0.59569913, (float)0.59306765, (float)0.59042960, + (float)0.58778518, (float)0.58513451, (float)0.58247757, (float)0.57981461, (float)0.57714522, + (float)0.57446963, (float)0.57178789, (float)0.56910002, (float)0.56640613, (float)0.56370628, + (float)0.56100023, (float)0.55828822, (float)0.55557019, (float)0.55284619, (float)0.55011630, + (float)0.54738069, (float)0.54463905, (float)0.54189152, (float)0.53913826, (float)0.53637916, + (float)0.53361434, (float)0.53084403, (float)0.52806783, (float)0.52528596, (float)0.52249849, + (float)0.51970541, (float)0.51690674, (float)0.51410276, (float)0.51129305, (float)0.50847787, + (float)0.50565726, (float)0.50283122, (float)0.50000006, (float)0.49716327, (float)0.49432117, + (float)0.49147379, (float)0.48862115, (float)0.48576325, (float)0.48290038, (float)0.48003212, + (float)0.47715873, (float)0.47428021, (float)0.47139663, (float)0.46850798, (float)0.46561456, + (float)0.46271589, (float)0.45981231, (float)0.45690379, (float)0.45399037, (float)0.45107210, + (float)0.44814920, (float)0.44522130, (float)0.44228864, (float)0.43935123, (float)0.43640912, + (float)0.43346232, (float)0.43051112, (float)0.42755505, (float)0.42459446, (float)0.42162928, + (float)0.41865960, (float)0.41568545, (float)0.41270703, (float)0.40972400, (float)0.40673658, + (float)0.40374479, (float)0.40074870, (float)0.39774850, (float)0.39474386, (float)0.39173496, + (float)0.38872188, (float)0.38570464, (float)0.38268328, (float)0.37965804, (float)0.37662849, + (float)0.37359491, (float)0.37055734, (float)0.36751580, (float)0.36447033, (float)0.36142117, + (float)0.35836792, (float)0.35531086, (float)0.35224995, (float)0.34918529, (float)0.34611690, + (float)0.34304500, (float)0.33996922, (float)0.33688980, (float)0.33380675, (float)0.33072016, + (float)0.32763001, (float)0.32453656, (float)0.32143945, (float)0.31833887, (float)0.31523487, + (float)0.31212750, (float)0.30901679, (float)0.30590302, (float)0.30278572, (float)0.29966521, + (float)0.29654145, (float)0.29341453, (float)0.29028472, (float)0.28715155, (float)0.28401530, + (float)0.28087601, (float)0.27773371, (float)0.27458847, (float)0.27144048, (float)0.26828936, + (float)0.26513538, (float)0.26197854, (float)0.25881892, (float)0.25565651, (float)0.25249159, + (float)0.24932374, (float)0.24615324, (float)0.24298008, (float)0.23980433, (float)0.23662600, + (float)0.23344538, (float)0.23026201, (float)0.22707619, (float)0.22388794, (float)0.22069728, + (float)0.21750426, (float)0.21430916, (float)0.21111152, (float)0.20791161, (float)0.20470949, + (float)0.20150517, (float)0.19829892, (float)0.19509031, (float)0.19187963, (float)0.18866688, + (float)0.18545210, (float)0.18223536, (float)0.17901689, (float)0.17579627, (float)0.17257376, + (float)0.16934940, (float)0.16612324, (float)0.16289529, (float)0.15966584, (float)0.15643445, + (float)0.15320137, (float)0.14996666, (float)0.14673033, (float)0.14349243, (float)0.14025325, + (float)0.13701232, (float)0.13376991, (float)0.13052608, (float)0.12728085, (float)0.12403426, + (float)0.12078657, (float)0.11753736, (float)0.11428688, (float)0.11103519, (float)0.10778230, + (float)0.10452849, (float)0.10127334, (float)0.09801710, (float)0.09475980, (float)0.09150149, + (float)0.08824220, (float)0.08498220, (float)0.08172106, (float)0.07845904, (float)0.07519618, + (float)0.07193252, (float)0.06866808, (float)0.06540315, (float)0.06213728, (float)0.05887074, + (float)0.05560357, (float)0.05233581, (float)0.04906749, (float)0.04579888, (float)0.04252954, + (float)0.03925974, (float)0.03598953, (float)0.03271893, (float)0.02944798, (float)0.02617695, + (float)0.02290541, (float)0.01963361, (float)0.01636161, (float)0.01308943, (float)0.00981712, + (float)0.00654493, (float)0.00327244, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000 +}; + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_WINDOWS_PRIVATE_H_ diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator.c b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator.c new file mode 100644 index 00000000..f9f3dc24 --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator.c @@ -0,0 +1,684 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/utility/delay_estimator.h" + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + +// Number of right shifts for scaling is linearly depending on number of bits in +// the far-end binary spectrum. +static const int kShiftsAtZero = 13; // Right shifts at zero binary spectrum. +static const int kShiftsLinearSlope = 3; + +static const int32_t kProbabilityOffset = 1024; // 2 in Q9. +static const int32_t kProbabilityLowerLimit = 8704; // 17 in Q9. +static const int32_t kProbabilityMinSpread = 2816; // 5.5 in Q9. + +// Robust validation settings +static const float kHistogramMax = 3000.f; +static const float kLastHistogramMax = 250.f; +static const float kMinHistogramThreshold = 1.5f; +static const int kMinRequiredHits = 10; +static const int kMaxHitsWhenPossiblyNonCausal = 10; +static const int kMaxHitsWhenPossiblyCausal = 1000; +static const float kQ14Scaling = 1.f / (1 << 14); // Scaling by 2^14 to get Q0. +static const float kFractionSlope = 0.05f; +static const float kMinFractionWhenPossiblyCausal = 0.5f; +static const float kMinFractionWhenPossiblyNonCausal = 0.25f; + +// Counts and returns number of bits of a 32-bit word. +static int BitCount(uint32_t u32) { + uint32_t tmp = u32 - ((u32 >> 1) & 033333333333) - + ((u32 >> 2) & 011111111111); + tmp = ((tmp + (tmp >> 3)) & 030707070707); + tmp = (tmp + (tmp >> 6)); + tmp = (tmp + (tmp >> 12) + (tmp >> 24)) & 077; + + return ((int) tmp); +} + +// Compares the |binary_vector| with all rows of the |binary_matrix| and counts +// per row the number of times they have the same value. +// +// Inputs: +// - binary_vector : binary "vector" stored in a long +// - binary_matrix : binary "matrix" stored as a vector of long +// - matrix_size : size of binary "matrix" +// +// Output: +// - bit_counts : "Vector" stored as a long, containing for each +// row the number of times the matrix row and the +// input vector have the same value +// +static void BitCountComparison(uint32_t binary_vector, + const uint32_t* binary_matrix, + int matrix_size, + int32_t* bit_counts) { + int n = 0; + + // Compare |binary_vector| with all rows of the |binary_matrix| + for (; n < matrix_size; n++) { + bit_counts[n] = (int32_t) BitCount(binary_vector ^ binary_matrix[n]); + } +} + +// Collects necessary statistics for the HistogramBasedValidation(). This +// function has to be called prior to calling HistogramBasedValidation(). The +// statistics updated and used by the HistogramBasedValidation() are: +// 1. the number of |candidate_hits|, which states for how long we have had the +// same |candidate_delay| +// 2. the |histogram| of candidate delays over time. This histogram is +// weighted with respect to a reliability measure and time-varying to cope +// with possible delay shifts. +// For further description see commented code. +// +// Inputs: +// - candidate_delay : The delay to validate. +// - valley_depth_q14 : The cost function has a valley/minimum at the +// |candidate_delay| location. |valley_depth_q14| is the +// cost function difference between the minimum and +// maximum locations. The value is in the Q14 domain. +// - valley_level_q14 : Is the cost function value at the minimum, in Q14. +static void UpdateRobustValidationStatistics(BinaryDelayEstimator* self, + int candidate_delay, + int32_t valley_depth_q14, + int32_t valley_level_q14) { + const float valley_depth = valley_depth_q14 * kQ14Scaling; + float decrease_in_last_set = valley_depth; + const int max_hits_for_slow_change = (candidate_delay < self->last_delay) ? + kMaxHitsWhenPossiblyNonCausal : kMaxHitsWhenPossiblyCausal; + int i = 0; + + assert(self->history_size == self->farend->history_size); + // Reset |candidate_hits| if we have a new candidate. + if (candidate_delay != self->last_candidate_delay) { + self->candidate_hits = 0; + self->last_candidate_delay = candidate_delay; + } + self->candidate_hits++; + + // The |histogram| is updated differently across the bins. + // 1. The |candidate_delay| histogram bin is increased with the + // |valley_depth|, which is a simple measure of how reliable the + // |candidate_delay| is. The histogram is not increased above + // |kHistogramMax|. + self->histogram[candidate_delay] += valley_depth; + if (self->histogram[candidate_delay] > kHistogramMax) { + self->histogram[candidate_delay] = kHistogramMax; + } + // 2. The histogram bins in the neighborhood of |candidate_delay| are + // unaffected. The neighborhood is defined as x + {-2, -1, 0, 1}. + // 3. The histogram bins in the neighborhood of |last_delay| are decreased + // with |decrease_in_last_set|. This value equals the difference between + // the cost function values at the locations |candidate_delay| and + // |last_delay| until we reach |max_hits_for_slow_change| consecutive hits + // at the |candidate_delay|. If we exceed this amount of hits the + // |candidate_delay| is a "potential" candidate and we start decreasing + // these histogram bins more rapidly with |valley_depth|. + if (self->candidate_hits < max_hits_for_slow_change) { + decrease_in_last_set = (self->mean_bit_counts[self->compare_delay] - + valley_level_q14) * kQ14Scaling; + } + // 4. All other bins are decreased with |valley_depth|. + // TODO(bjornv): Investigate how to make this loop more efficient. Split up + // the loop? Remove parts that doesn't add too much. + for (i = 0; i < self->history_size; ++i) { + int is_in_last_set = (i >= self->last_delay - 2) && + (i <= self->last_delay + 1) && (i != candidate_delay); + int is_in_candidate_set = (i >= candidate_delay - 2) && + (i <= candidate_delay + 1); + self->histogram[i] -= decrease_in_last_set * is_in_last_set + + valley_depth * (!is_in_last_set && !is_in_candidate_set); + // 5. No histogram bin can go below 0. + if (self->histogram[i] < 0) { + self->histogram[i] = 0; + } + } +} + +// Validates the |candidate_delay|, estimated in WebRtc_ProcessBinarySpectrum(), +// based on a mix of counting concurring hits with a modified histogram +// of recent delay estimates. In brief a candidate is valid (returns 1) if it +// is the most likely according to the histogram. There are a couple of +// exceptions that are worth mentioning: +// 1. If the |candidate_delay| < |last_delay| it can be that we are in a +// non-causal state, breaking a possible echo control algorithm. Hence, we +// open up for a quicker change by allowing the change even if the +// |candidate_delay| is not the most likely one according to the histogram. +// 2. There's a minimum number of hits (kMinRequiredHits) and the histogram +// value has to reached a minimum (kMinHistogramThreshold) to be valid. +// 3. The action is also depending on the filter length used for echo control. +// If the delay difference is larger than what the filter can capture, we +// also move quicker towards a change. +// For further description see commented code. +// +// Input: +// - candidate_delay : The delay to validate. +// +// Return value: +// - is_histogram_valid : 1 - The |candidate_delay| is valid. +// 0 - Otherwise. +static int HistogramBasedValidation(const BinaryDelayEstimator* self, + int candidate_delay) { + float fraction = 1.f; + float histogram_threshold = self->histogram[self->compare_delay]; + const int delay_difference = candidate_delay - self->last_delay; + int is_histogram_valid = 0; + + // The histogram based validation of |candidate_delay| is done by comparing + // the |histogram| at bin |candidate_delay| with a |histogram_threshold|. + // This |histogram_threshold| equals a |fraction| of the |histogram| at bin + // |last_delay|. The |fraction| is a piecewise linear function of the + // |delay_difference| between the |candidate_delay| and the |last_delay| + // allowing for a quicker move if + // i) a potential echo control filter can not handle these large differences. + // ii) keeping |last_delay| instead of updating to |candidate_delay| could + // force an echo control into a non-causal state. + // We further require the histogram to have reached a minimum value of + // |kMinHistogramThreshold|. In addition, we also require the number of + // |candidate_hits| to be more than |kMinRequiredHits| to remove spurious + // values. + + // Calculate a comparison histogram value (|histogram_threshold|) that is + // depending on the distance between the |candidate_delay| and |last_delay|. + // TODO(bjornv): How much can we gain by turning the fraction calculation + // into tables? + if (delay_difference > self->allowed_offset) { + fraction = 1.f - kFractionSlope * (delay_difference - self->allowed_offset); + fraction = (fraction > kMinFractionWhenPossiblyCausal ? fraction : + kMinFractionWhenPossiblyCausal); + } else if (delay_difference < 0) { + fraction = kMinFractionWhenPossiblyNonCausal - + kFractionSlope * delay_difference; + fraction = (fraction > 1.f ? 1.f : fraction); + } + histogram_threshold *= fraction; + histogram_threshold = (histogram_threshold > kMinHistogramThreshold ? + histogram_threshold : kMinHistogramThreshold); + + is_histogram_valid = + (self->histogram[candidate_delay] >= histogram_threshold) && + (self->candidate_hits > kMinRequiredHits); + + return is_histogram_valid; +} + +// Performs a robust validation of the |candidate_delay| estimated in +// WebRtc_ProcessBinarySpectrum(). The algorithm takes the +// |is_instantaneous_valid| and the |is_histogram_valid| and combines them +// into a robust validation. The HistogramBasedValidation() has to be called +// prior to this call. +// For further description on how the combination is done, see commented code. +// +// Inputs: +// - candidate_delay : The delay to validate. +// - is_instantaneous_valid : The instantaneous validation performed in +// WebRtc_ProcessBinarySpectrum(). +// - is_histogram_valid : The histogram based validation. +// +// Return value: +// - is_robust : 1 - The candidate_delay is valid according to a +// combination of the two inputs. +// : 0 - Otherwise. +static int RobustValidation(const BinaryDelayEstimator* self, + int candidate_delay, + int is_instantaneous_valid, + int is_histogram_valid) { + int is_robust = 0; + + // The final robust validation is based on the two algorithms; 1) the + // |is_instantaneous_valid| and 2) the histogram based with result stored in + // |is_histogram_valid|. + // i) Before we actually have a valid estimate (|last_delay| == -2), we say + // a candidate is valid if either algorithm states so + // (|is_instantaneous_valid| OR |is_histogram_valid|). + is_robust = (self->last_delay < 0) && + (is_instantaneous_valid || is_histogram_valid); + // ii) Otherwise, we need both algorithms to be certain + // (|is_instantaneous_valid| AND |is_histogram_valid|) + is_robust |= is_instantaneous_valid && is_histogram_valid; + // iii) With one exception, i.e., the histogram based algorithm can overrule + // the instantaneous one if |is_histogram_valid| = 1 and the histogram + // is significantly strong. + is_robust |= is_histogram_valid && + (self->histogram[candidate_delay] > self->last_delay_histogram); + + return is_robust; +} + +void WebRtc_FreeBinaryDelayEstimatorFarend(BinaryDelayEstimatorFarend* self) { + + if (self == NULL) { + return; + } + + free(self->binary_far_history); + self->binary_far_history = NULL; + + free(self->far_bit_counts); + self->far_bit_counts = NULL; + + free(self); +} + +BinaryDelayEstimatorFarend* WebRtc_CreateBinaryDelayEstimatorFarend( + int history_size) { + BinaryDelayEstimatorFarend* self = NULL; + + if (history_size > 1) { + // Sanity conditions fulfilled. + self = malloc(sizeof(BinaryDelayEstimatorFarend)); + } + if (self == NULL) { + return NULL; + } + + self->history_size = 0; + self->binary_far_history = NULL; + self->far_bit_counts = NULL; + if (WebRtc_AllocateFarendBufferMemory(self, history_size) == 0) { + WebRtc_FreeBinaryDelayEstimatorFarend(self); + self = NULL; + } + return self; +} + +int WebRtc_AllocateFarendBufferMemory(BinaryDelayEstimatorFarend* self, + int history_size) { + assert(self != NULL); + // (Re-)Allocate memory for history buffers. + self->binary_far_history = + realloc(self->binary_far_history, + history_size * sizeof(*self->binary_far_history)); + self->far_bit_counts = realloc(self->far_bit_counts, + history_size * sizeof(*self->far_bit_counts)); + if ((self->binary_far_history == NULL) || (self->far_bit_counts == NULL)) { + history_size = 0; + } + // Fill with zeros if we have expanded the buffers. + if (history_size > self->history_size) { + int size_diff = history_size - self->history_size; + memset(&self->binary_far_history[self->history_size], + 0, + sizeof(*self->binary_far_history) * size_diff); + memset(&self->far_bit_counts[self->history_size], + 0, + sizeof(*self->far_bit_counts) * size_diff); + } + self->history_size = history_size; + + return self->history_size; +} + +void WebRtc_InitBinaryDelayEstimatorFarend(BinaryDelayEstimatorFarend* self) { + assert(self != NULL); + memset(self->binary_far_history, 0, sizeof(uint32_t) * self->history_size); + memset(self->far_bit_counts, 0, sizeof(int) * self->history_size); +} + +void WebRtc_SoftResetBinaryDelayEstimatorFarend( + BinaryDelayEstimatorFarend* self, int delay_shift) { + int abs_shift = abs(delay_shift); + int shift_size = 0; + int dest_index = 0; + int src_index = 0; + int padding_index = 0; + + assert(self != NULL); + shift_size = self->history_size - abs_shift; + assert(shift_size > 0); + if (delay_shift == 0) { + return; + } else if (delay_shift > 0) { + dest_index = abs_shift; + } else if (delay_shift < 0) { + src_index = abs_shift; + padding_index = shift_size; + } + + // Shift and zero pad buffers. + memmove(&self->binary_far_history[dest_index], + &self->binary_far_history[src_index], + sizeof(*self->binary_far_history) * shift_size); + memset(&self->binary_far_history[padding_index], 0, + sizeof(*self->binary_far_history) * abs_shift); + memmove(&self->far_bit_counts[dest_index], + &self->far_bit_counts[src_index], + sizeof(*self->far_bit_counts) * shift_size); + memset(&self->far_bit_counts[padding_index], 0, + sizeof(*self->far_bit_counts) * abs_shift); +} + +void WebRtc_AddBinaryFarSpectrum(BinaryDelayEstimatorFarend* handle, + uint32_t binary_far_spectrum) { + assert(handle != NULL); + // Shift binary spectrum history and insert current |binary_far_spectrum|. + memmove(&(handle->binary_far_history[1]), &(handle->binary_far_history[0]), + (handle->history_size - 1) * sizeof(uint32_t)); + handle->binary_far_history[0] = binary_far_spectrum; + + // Shift history of far-end binary spectrum bit counts and insert bit count + // of current |binary_far_spectrum|. + memmove(&(handle->far_bit_counts[1]), &(handle->far_bit_counts[0]), + (handle->history_size - 1) * sizeof(int)); + handle->far_bit_counts[0] = BitCount(binary_far_spectrum); +} + +void WebRtc_FreeBinaryDelayEstimator(BinaryDelayEstimator* self) { + + if (self == NULL) { + return; + } + + free(self->mean_bit_counts); + self->mean_bit_counts = NULL; + + free(self->bit_counts); + self->bit_counts = NULL; + + free(self->binary_near_history); + self->binary_near_history = NULL; + + free(self->histogram); + self->histogram = NULL; + + // BinaryDelayEstimator does not have ownership of |farend|, hence we do not + // free the memory here. That should be handled separately by the user. + self->farend = NULL; + + free(self); +} + +BinaryDelayEstimator* WebRtc_CreateBinaryDelayEstimator( + BinaryDelayEstimatorFarend* farend, int max_lookahead) { + BinaryDelayEstimator* self = NULL; + + if ((farend != NULL) && (max_lookahead >= 0)) { + // Sanity conditions fulfilled. + self = malloc(sizeof(BinaryDelayEstimator)); + } + if (self == NULL) { + return NULL; + } + + self->farend = farend; + self->near_history_size = max_lookahead + 1; + self->history_size = 0; + self->robust_validation_enabled = 0; // Disabled by default. + self->allowed_offset = 0; + + self->lookahead = max_lookahead; + + // Allocate memory for spectrum and history buffers. + self->mean_bit_counts = NULL; + self->bit_counts = NULL; + self->histogram = NULL; + self->binary_near_history = + malloc((max_lookahead + 1) * sizeof(*self->binary_near_history)); + if (self->binary_near_history == NULL || + WebRtc_AllocateHistoryBufferMemory(self, farend->history_size) == 0) { + WebRtc_FreeBinaryDelayEstimator(self); + self = NULL; + } + + return self; +} + +int WebRtc_AllocateHistoryBufferMemory(BinaryDelayEstimator* self, + int history_size) { + BinaryDelayEstimatorFarend* far = self->farend; + // (Re-)Allocate memory for spectrum and history buffers. + if (history_size != far->history_size) { + // Only update far-end buffers if we need. + history_size = WebRtc_AllocateFarendBufferMemory(far, history_size); + } + // The extra array element in |mean_bit_counts| and |histogram| is a dummy + // element only used while |last_delay| == -2, i.e., before we have a valid + // estimate. + self->mean_bit_counts = + realloc(self->mean_bit_counts, + (history_size + 1) * sizeof(*self->mean_bit_counts)); + self->bit_counts = + realloc(self->bit_counts, history_size * sizeof(*self->bit_counts)); + self->histogram = + realloc(self->histogram, (history_size + 1) * sizeof(*self->histogram)); + + if ((self->mean_bit_counts == NULL) || + (self->bit_counts == NULL) || + (self->histogram == NULL)) { + history_size = 0; + } + // Fill with zeros if we have expanded the buffers. + if (history_size > self->history_size) { + int size_diff = history_size - self->history_size; + memset(&self->mean_bit_counts[self->history_size], + 0, + sizeof(*self->mean_bit_counts) * size_diff); + memset(&self->bit_counts[self->history_size], + 0, + sizeof(*self->bit_counts) * size_diff); + memset(&self->histogram[self->history_size], + 0, + sizeof(*self->histogram) * size_diff); + } + self->history_size = history_size; + + return self->history_size; +} + +void WebRtc_InitBinaryDelayEstimator(BinaryDelayEstimator* self) { + int i = 0; + assert(self != NULL); + + memset(self->bit_counts, 0, sizeof(int32_t) * self->history_size); + memset(self->binary_near_history, + 0, + sizeof(uint32_t) * self->near_history_size); + for (i = 0; i <= self->history_size; ++i) { + self->mean_bit_counts[i] = (20 << 9); // 20 in Q9. + self->histogram[i] = 0.f; + } + self->minimum_probability = kMaxBitCountsQ9; // 32 in Q9. + self->last_delay_probability = (int) kMaxBitCountsQ9; // 32 in Q9. + + // Default return value if we're unable to estimate. -1 is used for errors. + self->last_delay = -2; + + self->last_candidate_delay = -2; + self->compare_delay = self->history_size; + self->candidate_hits = 0; + self->last_delay_histogram = 0.f; +} + +int WebRtc_SoftResetBinaryDelayEstimator(BinaryDelayEstimator* self, + int delay_shift) { + int lookahead = 0; + assert(self != NULL); + lookahead = self->lookahead; + self->lookahead -= delay_shift; + if (self->lookahead < 0) { + self->lookahead = 0; + } + if (self->lookahead > self->near_history_size - 1) { + self->lookahead = self->near_history_size - 1; + } + return lookahead - self->lookahead; +} + +int WebRtc_ProcessBinarySpectrum(BinaryDelayEstimator* self, + uint32_t binary_near_spectrum) { + int i = 0; + int candidate_delay = -1; + int valid_candidate = 0; + + int32_t value_best_candidate = kMaxBitCountsQ9; + int32_t value_worst_candidate = 0; + int32_t valley_depth = 0; + + assert(self != NULL); + if (self->farend->history_size != self->history_size) { + // Non matching history sizes. + return -1; + } + if (self->near_history_size > 1) { + // If we apply lookahead, shift near-end binary spectrum history. Insert + // current |binary_near_spectrum| and pull out the delayed one. + memmove(&(self->binary_near_history[1]), &(self->binary_near_history[0]), + (self->near_history_size - 1) * sizeof(uint32_t)); + self->binary_near_history[0] = binary_near_spectrum; + binary_near_spectrum = self->binary_near_history[self->lookahead]; + } + + // Compare with delayed spectra and store the |bit_counts| for each delay. + BitCountComparison(binary_near_spectrum, self->farend->binary_far_history, + self->history_size, self->bit_counts); + + // Update |mean_bit_counts|, which is the smoothed version of |bit_counts|. + for (i = 0; i < self->history_size; i++) { + // |bit_counts| is constrained to [0, 32], meaning we can smooth with a + // factor up to 2^26. We use Q9. + int32_t bit_count = (self->bit_counts[i] << 9); // Q9. + + // Update |mean_bit_counts| only when far-end signal has something to + // contribute. If |far_bit_counts| is zero the far-end signal is weak and + // we likely have a poor echo condition, hence don't update. + if (self->farend->far_bit_counts[i] > 0) { + // Make number of right shifts piecewise linear w.r.t. |far_bit_counts|. + int shifts = kShiftsAtZero; + shifts -= (kShiftsLinearSlope * self->farend->far_bit_counts[i]) >> 4; + WebRtc_MeanEstimatorFix(bit_count, shifts, &(self->mean_bit_counts[i])); + } + } + + // Find |candidate_delay|, |value_best_candidate| and |value_worst_candidate| + // of |mean_bit_counts|. + for (i = 0; i < self->history_size; i++) { + if (self->mean_bit_counts[i] < value_best_candidate) { + value_best_candidate = self->mean_bit_counts[i]; + candidate_delay = i; + } + if (self->mean_bit_counts[i] > value_worst_candidate) { + value_worst_candidate = self->mean_bit_counts[i]; + } + } + valley_depth = value_worst_candidate - value_best_candidate; + + // The |value_best_candidate| is a good indicator on the probability of + // |candidate_delay| being an accurate delay (a small |value_best_candidate| + // means a good binary match). In the following sections we make a decision + // whether to update |last_delay| or not. + // 1) If the difference bit counts between the best and the worst delay + // candidates is too small we consider the situation to be unreliable and + // don't update |last_delay|. + // 2) If the situation is reliable we update |last_delay| if the value of the + // best candidate delay has a value less than + // i) an adaptive threshold |minimum_probability|, or + // ii) this corresponding value |last_delay_probability|, but updated at + // this time instant. + + // Update |minimum_probability|. + if ((self->minimum_probability > kProbabilityLowerLimit) && + (valley_depth > kProbabilityMinSpread)) { + // The "hard" threshold can't be lower than 17 (in Q9). + // The valley in the curve also has to be distinct, i.e., the + // difference between |value_worst_candidate| and |value_best_candidate| has + // to be large enough. + int32_t threshold = value_best_candidate + kProbabilityOffset; + if (threshold < kProbabilityLowerLimit) { + threshold = kProbabilityLowerLimit; + } + if (self->minimum_probability > threshold) { + self->minimum_probability = threshold; + } + } + // Update |last_delay_probability|. + // We use a Markov type model, i.e., a slowly increasing level over time. + self->last_delay_probability++; + // Validate |candidate_delay|. We have a reliable instantaneous delay + // estimate if + // 1) The valley is distinct enough (|valley_depth| > |kProbabilityOffset|) + // and + // 2) The depth of the valley is deep enough + // (|value_best_candidate| < |minimum_probability|) + // and deeper than the best estimate so far + // (|value_best_candidate| < |last_delay_probability|) + valid_candidate = ((valley_depth > kProbabilityOffset) && + ((value_best_candidate < self->minimum_probability) || + (value_best_candidate < self->last_delay_probability))); + + UpdateRobustValidationStatistics(self, candidate_delay, valley_depth, + value_best_candidate); + if (self->robust_validation_enabled) { + int is_histogram_valid = HistogramBasedValidation(self, candidate_delay); + valid_candidate = RobustValidation(self, candidate_delay, valid_candidate, + is_histogram_valid); + + } + if (valid_candidate) { + if (candidate_delay != self->last_delay) { + self->last_delay_histogram = + (self->histogram[candidate_delay] > kLastHistogramMax ? + kLastHistogramMax : self->histogram[candidate_delay]); + // Adjust the histogram if we made a change to |last_delay|, though it was + // not the most likely one according to the histogram. + if (self->histogram[candidate_delay] < + self->histogram[self->compare_delay]) { + self->histogram[self->compare_delay] = self->histogram[candidate_delay]; + } + } + self->last_delay = candidate_delay; + if (value_best_candidate < self->last_delay_probability) { + self->last_delay_probability = value_best_candidate; + } + self->compare_delay = self->last_delay; + } + + return self->last_delay; +} + +int WebRtc_binary_last_delay(BinaryDelayEstimator* self) { + assert(self != NULL); + return self->last_delay; +} + +float WebRtc_binary_last_delay_quality(BinaryDelayEstimator* self) { + float quality = 0; + assert(self != NULL); + + if (self->robust_validation_enabled) { + // Simply a linear function of the histogram height at delay estimate. + quality = self->histogram[self->compare_delay] / kHistogramMax; + } else { + // Note that |last_delay_probability| states how deep the minimum of the + // cost function is, so it is rather an error probability. + quality = (float) (kMaxBitCountsQ9 - self->last_delay_probability) / + kMaxBitCountsQ9; + if (quality < 0) { + quality = 0; + } + } + return quality; +} + +void WebRtc_MeanEstimatorFix(int32_t new_value, + int factor, + int32_t* mean_value) { + int32_t diff = new_value - *mean_value; + + // mean_new = mean_value + ((new_value - mean_value) >> factor); + if (diff < 0) { + diff = -((-diff) >> factor); + } else { + diff = (diff >> factor); + } + *mean_value += diff; +} diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator.h b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator.h new file mode 100644 index 00000000..65c3f034 --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator.h @@ -0,0 +1,251 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Performs delay estimation on binary converted spectra. +// The return value is 0 - OK and -1 - Error, unless otherwise stated. + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_ + +#include "webrtc/typedefs.h" + +static const int32_t kMaxBitCountsQ9 = (32 << 9); // 32 matching bits in Q9. + +typedef struct { + // Pointer to bit counts. + int* far_bit_counts; + // Binary history variables. + uint32_t* binary_far_history; + int history_size; +} BinaryDelayEstimatorFarend; + +typedef struct { + // Pointer to bit counts. + int32_t* mean_bit_counts; + // Array only used locally in ProcessBinarySpectrum() but whose size is + // determined at run-time. + int32_t* bit_counts; + + // Binary history variables. + uint32_t* binary_near_history; + int near_history_size; + int history_size; + + // Delay estimation variables. + int32_t minimum_probability; + int last_delay_probability; + + // Delay memory. + int last_delay; + + // Robust validation + int robust_validation_enabled; + int allowed_offset; + int last_candidate_delay; + int compare_delay; + int candidate_hits; + float* histogram; + float last_delay_histogram; + + // For dynamically changing the lookahead when using SoftReset...(). + int lookahead; + + // Far-end binary spectrum history buffer etc. + BinaryDelayEstimatorFarend* farend; +} BinaryDelayEstimator; + +// Releases the memory allocated by +// WebRtc_CreateBinaryDelayEstimatorFarend(...). +// Input: +// - self : Pointer to the binary delay estimation far-end +// instance which is the return value of +// WebRtc_CreateBinaryDelayEstimatorFarend(). +// +void WebRtc_FreeBinaryDelayEstimatorFarend(BinaryDelayEstimatorFarend* self); + +// Allocates the memory needed by the far-end part of the binary delay +// estimation. The memory needs to be initialized separately through +// WebRtc_InitBinaryDelayEstimatorFarend(...). +// +// Inputs: +// - history_size : Size of the far-end binary spectrum history. +// +// Return value: +// - BinaryDelayEstimatorFarend* +// : Created |handle|. If the memory can't be allocated +// or if any of the input parameters are invalid NULL +// is returned. +// +BinaryDelayEstimatorFarend* WebRtc_CreateBinaryDelayEstimatorFarend( + int history_size); + +// Re-allocates the buffers. +// +// Inputs: +// - self : Pointer to the binary estimation far-end instance +// which is the return value of +// WebRtc_CreateBinaryDelayEstimatorFarend(). +// - history_size : Size of the far-end binary spectrum history. +// +// Return value: +// - history_size : The history size allocated. +int WebRtc_AllocateFarendBufferMemory(BinaryDelayEstimatorFarend* self, + int history_size); + +// Initializes the delay estimation far-end instance created with +// WebRtc_CreateBinaryDelayEstimatorFarend(...). +// +// Input: +// - self : Pointer to the delay estimation far-end instance. +// +// Output: +// - self : Initialized far-end instance. +// +void WebRtc_InitBinaryDelayEstimatorFarend(BinaryDelayEstimatorFarend* self); + +// Soft resets the delay estimation far-end instance created with +// WebRtc_CreateBinaryDelayEstimatorFarend(...). +// +// Input: +// - delay_shift : The amount of blocks to shift history buffers. +// +void WebRtc_SoftResetBinaryDelayEstimatorFarend( + BinaryDelayEstimatorFarend* self, int delay_shift); + +// Adds the binary far-end spectrum to the internal far-end history buffer. This +// spectrum is used as reference when calculating the delay using +// WebRtc_ProcessBinarySpectrum(). +// +// Inputs: +// - self : Pointer to the delay estimation far-end +// instance. +// - binary_far_spectrum : Far-end binary spectrum. +// +// Output: +// - self : Updated far-end instance. +// +void WebRtc_AddBinaryFarSpectrum(BinaryDelayEstimatorFarend* self, + uint32_t binary_far_spectrum); + +// Releases the memory allocated by WebRtc_CreateBinaryDelayEstimator(...). +// +// Note that BinaryDelayEstimator utilizes BinaryDelayEstimatorFarend, but does +// not take ownership of it, hence the BinaryDelayEstimator has to be torn down +// before the far-end. +// +// Input: +// - self : Pointer to the binary delay estimation instance +// which is the return value of +// WebRtc_CreateBinaryDelayEstimator(). +// +void WebRtc_FreeBinaryDelayEstimator(BinaryDelayEstimator* self); + +// Allocates the memory needed by the binary delay estimation. The memory needs +// to be initialized separately through WebRtc_InitBinaryDelayEstimator(...). +// +// See WebRtc_CreateDelayEstimator(..) in delay_estimator_wrapper.c for detailed +// description. +BinaryDelayEstimator* WebRtc_CreateBinaryDelayEstimator( + BinaryDelayEstimatorFarend* farend, int max_lookahead); + +// Re-allocates |history_size| dependent buffers. The far-end buffers will be +// updated at the same time if needed. +// +// Input: +// - self : Pointer to the binary estimation instance which is +// the return value of +// WebRtc_CreateBinaryDelayEstimator(). +// - history_size : Size of the history buffers. +// +// Return value: +// - history_size : The history size allocated. +int WebRtc_AllocateHistoryBufferMemory(BinaryDelayEstimator* self, + int history_size); + +// Initializes the delay estimation instance created with +// WebRtc_CreateBinaryDelayEstimator(...). +// +// Input: +// - self : Pointer to the delay estimation instance. +// +// Output: +// - self : Initialized instance. +// +void WebRtc_InitBinaryDelayEstimator(BinaryDelayEstimator* self); + +// Soft resets the delay estimation instance created with +// WebRtc_CreateBinaryDelayEstimator(...). +// +// Input: +// - delay_shift : The amount of blocks to shift history buffers. +// +// Return value: +// - actual_shifts : The actual number of shifts performed. +// +int WebRtc_SoftResetBinaryDelayEstimator(BinaryDelayEstimator* self, + int delay_shift); + +// Estimates and returns the delay between the binary far-end and binary near- +// end spectra. It is assumed the binary far-end spectrum has been added using +// WebRtc_AddBinaryFarSpectrum() prior to this call. The value will be offset by +// the lookahead (i.e. the lookahead should be subtracted from the returned +// value). +// +// Inputs: +// - self : Pointer to the delay estimation instance. +// - binary_near_spectrum : Near-end binary spectrum of the current block. +// +// Output: +// - self : Updated instance. +// +// Return value: +// - delay : >= 0 - Calculated delay value. +// -2 - Insufficient data for estimation. +// +int WebRtc_ProcessBinarySpectrum(BinaryDelayEstimator* self, + uint32_t binary_near_spectrum); + +// Returns the last calculated delay updated by the function +// WebRtc_ProcessBinarySpectrum(...). +// +// Input: +// - self : Pointer to the delay estimation instance. +// +// Return value: +// - delay : >= 0 - Last calculated delay value +// -2 - Insufficient data for estimation. +// +int WebRtc_binary_last_delay(BinaryDelayEstimator* self); + +// Returns the estimation quality of the last calculated delay updated by the +// function WebRtc_ProcessBinarySpectrum(...). The estimation quality is a value +// in the interval [0, 1]. The higher the value, the better the quality. +// +// Return value: +// - delay_quality : >= 0 - Estimation quality of last calculated +// delay value. +float WebRtc_binary_last_delay_quality(BinaryDelayEstimator* self); + +// Updates the |mean_value| recursively with a step size of 2^-|factor|. This +// function is used internally in the Binary Delay Estimator as well as the +// Fixed point wrapper. +// +// Inputs: +// - new_value : The new value the mean should be updated with. +// - factor : The step size, in number of right shifts. +// +// Input/Output: +// - mean_value : Pointer to the mean value. +// +void WebRtc_MeanEstimatorFix(int32_t new_value, + int factor, + int32_t* mean_value); + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_ diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_internal.h b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_internal.h new file mode 100644 index 00000000..fd11028f --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_internal.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Header file including the delay estimator handle used for testing. + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_INTERNAL_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_INTERNAL_H_ + +#include "webrtc/modules/audio_processing/utility/delay_estimator.h" +#include "webrtc/typedefs.h" + +typedef union { + float float_; + int32_t int32_; +} SpectrumType; + +typedef struct { + // Pointers to mean values of spectrum. + SpectrumType* mean_far_spectrum; + // |mean_far_spectrum| initialization indicator. + int far_spectrum_initialized; + + int spectrum_size; + + // Far-end part of binary spectrum based delay estimation. + BinaryDelayEstimatorFarend* binary_farend; +} DelayEstimatorFarend; + +typedef struct { + // Pointers to mean values of spectrum. + SpectrumType* mean_near_spectrum; + // |mean_near_spectrum| initialization indicator. + int near_spectrum_initialized; + + int spectrum_size; + + // Binary spectrum based delay estimator + BinaryDelayEstimator* binary_handle; +} DelayEstimator; + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_INTERNAL_H_ diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_unittest.cc b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_unittest.cc new file mode 100644 index 00000000..4ebe0e61 --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_unittest.cc @@ -0,0 +1,621 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "testing/gtest/include/gtest/gtest.h" + +extern "C" { +#include "webrtc/modules/audio_processing/utility/delay_estimator.h" +#include "webrtc/modules/audio_processing/utility/delay_estimator_internal.h" +#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h" +} +#include "webrtc/typedefs.h" + +namespace { + +enum { kSpectrumSize = 65 }; +// Delay history sizes. +enum { kMaxDelay = 100 }; +enum { kLookahead = 10 }; +enum { kHistorySize = kMaxDelay + kLookahead }; +// Length of binary spectrum sequence. +enum { kSequenceLength = 400 }; + +const int kDifferentHistorySize = 3; +const int kDifferentLookahead = 1; + +const int kEnable[] = { 0, 1 }; +const size_t kSizeEnable = sizeof(kEnable) / sizeof(*kEnable); + +class DelayEstimatorTest : public ::testing::Test { + protected: + DelayEstimatorTest(); + virtual void SetUp(); + virtual void TearDown(); + + void Init(); + void InitBinary(); + void VerifyDelay(BinaryDelayEstimator* binary_handle, int offset, int delay); + void RunBinarySpectra(BinaryDelayEstimator* binary1, + BinaryDelayEstimator* binary2, + int near_offset, int lookahead_offset, int far_offset); + void RunBinarySpectraTest(int near_offset, int lookahead_offset, + int ref_robust_validation, int robust_validation); + + void* handle_; + DelayEstimator* self_; + void* farend_handle_; + DelayEstimatorFarend* farend_self_; + BinaryDelayEstimator* binary_; + BinaryDelayEstimatorFarend* binary_farend_; + int spectrum_size_; + // Dummy input spectra. + float far_f_[kSpectrumSize]; + float near_f_[kSpectrumSize]; + uint16_t far_u16_[kSpectrumSize]; + uint16_t near_u16_[kSpectrumSize]; + uint32_t binary_spectrum_[kSequenceLength + kHistorySize]; +}; + +DelayEstimatorTest::DelayEstimatorTest() + : handle_(NULL), + self_(NULL), + farend_handle_(NULL), + farend_self_(NULL), + binary_(NULL), + binary_farend_(NULL), + spectrum_size_(kSpectrumSize) { + // Dummy input data are set with more or less arbitrary non-zero values. + memset(far_f_, 1, sizeof(far_f_)); + memset(near_f_, 2, sizeof(near_f_)); + memset(far_u16_, 1, sizeof(far_u16_)); + memset(near_u16_, 2, sizeof(near_u16_)); + // Construct a sequence of binary spectra used to verify delay estimate. The + // |kSequenceLength| has to be long enough for the delay estimation to leave + // the initialized state. + binary_spectrum_[0] = 1; + for (int i = 1; i < (kSequenceLength + kHistorySize); i++) { + binary_spectrum_[i] = 3 * binary_spectrum_[i - 1]; + } +} + +void DelayEstimatorTest::SetUp() { + farend_handle_ = WebRtc_CreateDelayEstimatorFarend(kSpectrumSize, + kHistorySize); + ASSERT_TRUE(farend_handle_ != NULL); + farend_self_ = reinterpret_cast<DelayEstimatorFarend*>(farend_handle_); + handle_ = WebRtc_CreateDelayEstimator(farend_handle_, kLookahead); + ASSERT_TRUE(handle_ != NULL); + self_ = reinterpret_cast<DelayEstimator*>(handle_); + binary_farend_ = WebRtc_CreateBinaryDelayEstimatorFarend(kHistorySize); + ASSERT_TRUE(binary_farend_ != NULL); + binary_ = WebRtc_CreateBinaryDelayEstimator(binary_farend_, kLookahead); + ASSERT_TRUE(binary_ != NULL); +} + +void DelayEstimatorTest::TearDown() { + WebRtc_FreeDelayEstimator(handle_); + handle_ = NULL; + self_ = NULL; + WebRtc_FreeDelayEstimatorFarend(farend_handle_); + farend_handle_ = NULL; + farend_self_ = NULL; + WebRtc_FreeBinaryDelayEstimator(binary_); + binary_ = NULL; + WebRtc_FreeBinaryDelayEstimatorFarend(binary_farend_); + binary_farend_ = NULL; +} + +void DelayEstimatorTest::Init() { + // Initialize Delay Estimator + EXPECT_EQ(0, WebRtc_InitDelayEstimatorFarend(farend_handle_)); + EXPECT_EQ(0, WebRtc_InitDelayEstimator(handle_)); + // Verify initialization. + EXPECT_EQ(0, farend_self_->far_spectrum_initialized); + EXPECT_EQ(0, self_->near_spectrum_initialized); + EXPECT_EQ(-2, WebRtc_last_delay(handle_)); // Delay in initial state. + EXPECT_FLOAT_EQ(0, WebRtc_last_delay_quality(handle_)); // Zero quality. +} + +void DelayEstimatorTest::InitBinary() { + // Initialize Binary Delay Estimator (far-end part). + WebRtc_InitBinaryDelayEstimatorFarend(binary_farend_); + // Initialize Binary Delay Estimator + WebRtc_InitBinaryDelayEstimator(binary_); + // Verify initialization. This does not guarantee a complete check, since + // |last_delay| may be equal to -2 before initialization if done on the fly. + EXPECT_EQ(-2, binary_->last_delay); +} + +void DelayEstimatorTest::VerifyDelay(BinaryDelayEstimator* binary_handle, + int offset, int delay) { + // Verify that we WebRtc_binary_last_delay() returns correct delay. + EXPECT_EQ(delay, WebRtc_binary_last_delay(binary_handle)); + + if (delay != -2) { + // Verify correct delay estimate. In the non-causal case the true delay + // is equivalent with the |offset|. + EXPECT_EQ(offset, delay); + } +} + +void DelayEstimatorTest::RunBinarySpectra(BinaryDelayEstimator* binary1, + BinaryDelayEstimator* binary2, + int near_offset, + int lookahead_offset, + int far_offset) { + int different_validations = binary1->robust_validation_enabled ^ + binary2->robust_validation_enabled; + WebRtc_InitBinaryDelayEstimatorFarend(binary_farend_); + WebRtc_InitBinaryDelayEstimator(binary1); + WebRtc_InitBinaryDelayEstimator(binary2); + // Verify initialization. This does not guarantee a complete check, since + // |last_delay| may be equal to -2 before initialization if done on the fly. + EXPECT_EQ(-2, binary1->last_delay); + EXPECT_EQ(-2, binary2->last_delay); + for (int i = kLookahead; i < (kSequenceLength + kLookahead); i++) { + WebRtc_AddBinaryFarSpectrum(binary_farend_, + binary_spectrum_[i + far_offset]); + int delay_1 = WebRtc_ProcessBinarySpectrum(binary1, binary_spectrum_[i]); + int delay_2 = + WebRtc_ProcessBinarySpectrum(binary2, + binary_spectrum_[i - near_offset]); + + VerifyDelay(binary1, far_offset + kLookahead, delay_1); + VerifyDelay(binary2, + far_offset + kLookahead + lookahead_offset + near_offset, + delay_2); + // Expect the two delay estimates to be offset by |lookahead_offset| + + // |near_offset| when we have left the initial state. + if ((delay_1 != -2) && (delay_2 != -2)) { + EXPECT_EQ(delay_1, delay_2 - lookahead_offset - near_offset); + } + // For the case of identical signals |delay_1| and |delay_2| should match + // all the time, unless one of them has robust validation turned on. In + // that case the robust validation leaves the initial state faster. + if ((near_offset == 0) && (lookahead_offset == 0)) { + if (!different_validations) { + EXPECT_EQ(delay_1, delay_2); + } else { + if (binary1->robust_validation_enabled) { + EXPECT_GE(delay_1, delay_2); + } else { + EXPECT_GE(delay_2, delay_1); + } + } + } + } + // Verify that we have left the initialized state. + EXPECT_NE(-2, WebRtc_binary_last_delay(binary1)); + EXPECT_LT(0, WebRtc_binary_last_delay_quality(binary1)); + EXPECT_NE(-2, WebRtc_binary_last_delay(binary2)); + EXPECT_LT(0, WebRtc_binary_last_delay_quality(binary2)); +} + +void DelayEstimatorTest::RunBinarySpectraTest(int near_offset, + int lookahead_offset, + int ref_robust_validation, + int robust_validation) { + BinaryDelayEstimator* binary2 = + WebRtc_CreateBinaryDelayEstimator(binary_farend_, + kLookahead + lookahead_offset); + // Verify the delay for both causal and non-causal systems. For causal systems + // the delay is equivalent with a positive |offset| of the far-end sequence. + // For non-causal systems the delay is equivalent with a negative |offset| of + // the far-end sequence. + binary_->robust_validation_enabled = ref_robust_validation; + binary2->robust_validation_enabled = robust_validation; + for (int offset = -kLookahead; + offset < kMaxDelay - lookahead_offset - near_offset; + offset++) { + RunBinarySpectra(binary_, binary2, near_offset, lookahead_offset, offset); + } + WebRtc_FreeBinaryDelayEstimator(binary2); + binary2 = NULL; + binary_->robust_validation_enabled = 0; // Reset reference. +} + +TEST_F(DelayEstimatorTest, CorrectErrorReturnsOfWrapper) { + // In this test we verify correct error returns on invalid API calls. + + // WebRtc_CreateDelayEstimatorFarend() and WebRtc_CreateDelayEstimator() + // should return a NULL pointer on invalid input values. + // Make sure we have a non-NULL value at start, so we can detect NULL after + // create failure. + void* handle = farend_handle_; + handle = WebRtc_CreateDelayEstimatorFarend(33, kHistorySize); + EXPECT_TRUE(handle == NULL); + handle = WebRtc_CreateDelayEstimatorFarend(kSpectrumSize, 1); + EXPECT_TRUE(handle == NULL); + + handle = handle_; + handle = WebRtc_CreateDelayEstimator(NULL, kLookahead); + EXPECT_TRUE(handle == NULL); + handle = WebRtc_CreateDelayEstimator(farend_handle_, -1); + EXPECT_TRUE(handle == NULL); + + // WebRtc_InitDelayEstimatorFarend() and WebRtc_InitDelayEstimator() should + // return -1 if we have a NULL pointer as |handle|. + EXPECT_EQ(-1, WebRtc_InitDelayEstimatorFarend(NULL)); + EXPECT_EQ(-1, WebRtc_InitDelayEstimator(NULL)); + + // WebRtc_AddFarSpectrumFloat() should return -1 if we have: + // 1) NULL pointer as |handle|. + // 2) NULL pointer as far-end spectrum. + // 3) Incorrect spectrum size. + EXPECT_EQ(-1, WebRtc_AddFarSpectrumFloat(NULL, far_f_, spectrum_size_)); + // Use |farend_handle_| which is properly created at SetUp(). + EXPECT_EQ(-1, WebRtc_AddFarSpectrumFloat(farend_handle_, NULL, + spectrum_size_)); + EXPECT_EQ(-1, WebRtc_AddFarSpectrumFloat(farend_handle_, far_f_, + spectrum_size_ + 1)); + + // WebRtc_AddFarSpectrumFix() should return -1 if we have: + // 1) NULL pointer as |handle|. + // 2) NULL pointer as far-end spectrum. + // 3) Incorrect spectrum size. + // 4) Too high precision in far-end spectrum (Q-domain > 15). + EXPECT_EQ(-1, WebRtc_AddFarSpectrumFix(NULL, far_u16_, spectrum_size_, 0)); + EXPECT_EQ(-1, WebRtc_AddFarSpectrumFix(farend_handle_, NULL, spectrum_size_, + 0)); + EXPECT_EQ(-1, WebRtc_AddFarSpectrumFix(farend_handle_, far_u16_, + spectrum_size_ + 1, 0)); + EXPECT_EQ(-1, WebRtc_AddFarSpectrumFix(farend_handle_, far_u16_, + spectrum_size_, 16)); + + // WebRtc_set_history_size() should return -1 if: + // 1) |handle| is a NULL. + // 2) |history_size| <= 1. + EXPECT_EQ(-1, WebRtc_set_history_size(NULL, 1)); + EXPECT_EQ(-1, WebRtc_set_history_size(handle_, 1)); + // WebRtc_history_size() should return -1 if: + // 1) NULL pointer input. + EXPECT_EQ(-1, WebRtc_history_size(NULL)); + // 2) there is a mismatch between history size. + void* tmp_handle = WebRtc_CreateDelayEstimator(farend_handle_, kHistorySize); + EXPECT_EQ(0, WebRtc_InitDelayEstimator(tmp_handle)); + EXPECT_EQ(kDifferentHistorySize, + WebRtc_set_history_size(tmp_handle, kDifferentHistorySize)); + EXPECT_EQ(kDifferentHistorySize, WebRtc_history_size(tmp_handle)); + EXPECT_EQ(kHistorySize, WebRtc_set_history_size(handle_, kHistorySize)); + EXPECT_EQ(-1, WebRtc_history_size(tmp_handle)); + + // WebRtc_set_lookahead() should return -1 if we try a value outside the + /// buffer. + EXPECT_EQ(-1, WebRtc_set_lookahead(handle_, kLookahead + 1)); + EXPECT_EQ(-1, WebRtc_set_lookahead(handle_, -1)); + + // WebRtc_set_allowed_offset() should return -1 if we have: + // 1) NULL pointer as |handle|. + // 2) |allowed_offset| < 0. + EXPECT_EQ(-1, WebRtc_set_allowed_offset(NULL, 0)); + EXPECT_EQ(-1, WebRtc_set_allowed_offset(handle_, -1)); + + EXPECT_EQ(-1, WebRtc_get_allowed_offset(NULL)); + + // WebRtc_enable_robust_validation() should return -1 if we have: + // 1) NULL pointer as |handle|. + // 2) Incorrect |enable| value (not 0 or 1). + EXPECT_EQ(-1, WebRtc_enable_robust_validation(NULL, kEnable[0])); + EXPECT_EQ(-1, WebRtc_enable_robust_validation(handle_, -1)); + EXPECT_EQ(-1, WebRtc_enable_robust_validation(handle_, 2)); + + // WebRtc_is_robust_validation_enabled() should return -1 if we have NULL + // pointer as |handle|. + EXPECT_EQ(-1, WebRtc_is_robust_validation_enabled(NULL)); + + // WebRtc_DelayEstimatorProcessFloat() should return -1 if we have: + // 1) NULL pointer as |handle|. + // 2) NULL pointer as near-end spectrum. + // 3) Incorrect spectrum size. + // 4) Non matching history sizes if multiple delay estimators using the same + // far-end reference. + EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFloat(NULL, near_f_, + spectrum_size_)); + // Use |handle_| which is properly created at SetUp(). + EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFloat(handle_, NULL, + spectrum_size_)); + EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFloat(handle_, near_f_, + spectrum_size_ + 1)); + // |tmp_handle| is already in a non-matching state. + EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFloat(tmp_handle, + near_f_, + spectrum_size_)); + + // WebRtc_DelayEstimatorProcessFix() should return -1 if we have: + // 1) NULL pointer as |handle|. + // 2) NULL pointer as near-end spectrum. + // 3) Incorrect spectrum size. + // 4) Too high precision in near-end spectrum (Q-domain > 15). + // 5) Non matching history sizes if multiple delay estimators using the same + // far-end reference. + EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFix(NULL, near_u16_, spectrum_size_, + 0)); + EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFix(handle_, NULL, spectrum_size_, + 0)); + EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFix(handle_, near_u16_, + spectrum_size_ + 1, 0)); + EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFix(handle_, near_u16_, + spectrum_size_, 16)); + // |tmp_handle| is already in a non-matching state. + EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFix(tmp_handle, + near_u16_, + spectrum_size_, + 0)); + WebRtc_FreeDelayEstimator(tmp_handle); + + // WebRtc_last_delay() should return -1 if we have a NULL pointer as |handle|. + EXPECT_EQ(-1, WebRtc_last_delay(NULL)); + + // Free any local memory if needed. + WebRtc_FreeDelayEstimator(handle); +} + +TEST_F(DelayEstimatorTest, VerifyAllowedOffset) { + // Is set to zero by default. + EXPECT_EQ(0, WebRtc_get_allowed_offset(handle_)); + for (int i = 1; i >= 0; i--) { + EXPECT_EQ(0, WebRtc_set_allowed_offset(handle_, i)); + EXPECT_EQ(i, WebRtc_get_allowed_offset(handle_)); + Init(); + // Unaffected over a reset. + EXPECT_EQ(i, WebRtc_get_allowed_offset(handle_)); + } +} + +TEST_F(DelayEstimatorTest, VerifyEnableRobustValidation) { + // Disabled by default. + EXPECT_EQ(0, WebRtc_is_robust_validation_enabled(handle_)); + for (size_t i = 0; i < kSizeEnable; ++i) { + EXPECT_EQ(0, WebRtc_enable_robust_validation(handle_, kEnable[i])); + EXPECT_EQ(kEnable[i], WebRtc_is_robust_validation_enabled(handle_)); + Init(); + // Unaffected over a reset. + EXPECT_EQ(kEnable[i], WebRtc_is_robust_validation_enabled(handle_)); + } +} + +TEST_F(DelayEstimatorTest, InitializedSpectrumAfterProcess) { + // In this test we verify that the mean spectra are initialized after first + // time we call WebRtc_AddFarSpectrum() and Process() respectively. The test + // also verifies the state is not left for zero spectra. + const float kZerosFloat[kSpectrumSize] = { 0.0 }; + const uint16_t kZerosU16[kSpectrumSize] = { 0 }; + + // For floating point operations, process one frame and verify initialization + // flag. + Init(); + EXPECT_EQ(0, WebRtc_AddFarSpectrumFloat(farend_handle_, kZerosFloat, + spectrum_size_)); + EXPECT_EQ(0, farend_self_->far_spectrum_initialized); + EXPECT_EQ(0, WebRtc_AddFarSpectrumFloat(farend_handle_, far_f_, + spectrum_size_)); + EXPECT_EQ(1, farend_self_->far_spectrum_initialized); + EXPECT_EQ(-2, WebRtc_DelayEstimatorProcessFloat(handle_, kZerosFloat, + spectrum_size_)); + EXPECT_EQ(0, self_->near_spectrum_initialized); + EXPECT_EQ(-2, WebRtc_DelayEstimatorProcessFloat(handle_, near_f_, + spectrum_size_)); + EXPECT_EQ(1, self_->near_spectrum_initialized); + + // For fixed point operations, process one frame and verify initialization + // flag. + Init(); + EXPECT_EQ(0, WebRtc_AddFarSpectrumFix(farend_handle_, kZerosU16, + spectrum_size_, 0)); + EXPECT_EQ(0, farend_self_->far_spectrum_initialized); + EXPECT_EQ(0, WebRtc_AddFarSpectrumFix(farend_handle_, far_u16_, + spectrum_size_, 0)); + EXPECT_EQ(1, farend_self_->far_spectrum_initialized); + EXPECT_EQ(-2, WebRtc_DelayEstimatorProcessFix(handle_, kZerosU16, + spectrum_size_, 0)); + EXPECT_EQ(0, self_->near_spectrum_initialized); + EXPECT_EQ(-2, WebRtc_DelayEstimatorProcessFix(handle_, near_u16_, + spectrum_size_, 0)); + EXPECT_EQ(1, self_->near_spectrum_initialized); +} + +TEST_F(DelayEstimatorTest, CorrectLastDelay) { + // In this test we verify that we get the correct last delay upon valid call. + // We simply process the same data until we leave the initialized state + // (|last_delay| = -2). Then we compare the Process() output with the + // last_delay() call. + + // TODO(bjornv): Update quality values for robust validation. + int last_delay = 0; + // Floating point operations. + Init(); + for (int i = 0; i < 200; i++) { + EXPECT_EQ(0, WebRtc_AddFarSpectrumFloat(farend_handle_, far_f_, + spectrum_size_)); + last_delay = WebRtc_DelayEstimatorProcessFloat(handle_, near_f_, + spectrum_size_); + if (last_delay != -2) { + EXPECT_EQ(last_delay, WebRtc_last_delay(handle_)); + if (!WebRtc_is_robust_validation_enabled(handle_)) { + EXPECT_FLOAT_EQ(7203.f / kMaxBitCountsQ9, + WebRtc_last_delay_quality(handle_)); + } + break; + } + } + // Verify that we have left the initialized state. + EXPECT_NE(-2, WebRtc_last_delay(handle_)); + EXPECT_LT(0, WebRtc_last_delay_quality(handle_)); + + // Fixed point operations. + Init(); + for (int i = 0; i < 200; i++) { + EXPECT_EQ(0, WebRtc_AddFarSpectrumFix(farend_handle_, far_u16_, + spectrum_size_, 0)); + last_delay = WebRtc_DelayEstimatorProcessFix(handle_, near_u16_, + spectrum_size_, 0); + if (last_delay != -2) { + EXPECT_EQ(last_delay, WebRtc_last_delay(handle_)); + if (!WebRtc_is_robust_validation_enabled(handle_)) { + EXPECT_FLOAT_EQ(7203.f / kMaxBitCountsQ9, + WebRtc_last_delay_quality(handle_)); + } + break; + } + } + // Verify that we have left the initialized state. + EXPECT_NE(-2, WebRtc_last_delay(handle_)); + EXPECT_LT(0, WebRtc_last_delay_quality(handle_)); +} + +TEST_F(DelayEstimatorTest, CorrectErrorReturnsOfBinaryEstimatorFarend) { + // In this test we verify correct output on invalid API calls to the Binary + // Delay Estimator (far-end part). + + BinaryDelayEstimatorFarend* binary = binary_farend_; + // WebRtc_CreateBinaryDelayEstimatorFarend() should return -1 if the input + // history size is less than 2. This is to make sure the buffer shifting + // applies properly. + // Make sure we have a non-NULL value at start, so we can detect NULL after + // create failure. + binary = WebRtc_CreateBinaryDelayEstimatorFarend(1); + EXPECT_TRUE(binary == NULL); +} + +TEST_F(DelayEstimatorTest, CorrectErrorReturnsOfBinaryEstimator) { + // In this test we verify correct output on invalid API calls to the Binary + // Delay Estimator. + + BinaryDelayEstimator* binary_handle = binary_; + // WebRtc_CreateBinaryDelayEstimator() should return -1 if we have a NULL + // pointer as |binary_farend| or invalid input values. Upon failure, the + // |binary_handle| should be NULL. + // Make sure we have a non-NULL value at start, so we can detect NULL after + // create failure. + binary_handle = WebRtc_CreateBinaryDelayEstimator(NULL, kLookahead); + EXPECT_TRUE(binary_handle == NULL); + binary_handle = WebRtc_CreateBinaryDelayEstimator(binary_farend_, -1); + EXPECT_TRUE(binary_handle == NULL); +} + +TEST_F(DelayEstimatorTest, MeanEstimatorFix) { + // In this test we verify that we update the mean value in correct direction + // only. With "direction" we mean increase or decrease. + + int32_t mean_value = 4000; + int32_t mean_value_before = mean_value; + int32_t new_mean_value = mean_value * 2; + + // Increasing |mean_value|. + WebRtc_MeanEstimatorFix(new_mean_value, 10, &mean_value); + EXPECT_LT(mean_value_before, mean_value); + EXPECT_GT(new_mean_value, mean_value); + + // Decreasing |mean_value|. + new_mean_value = mean_value / 2; + mean_value_before = mean_value; + WebRtc_MeanEstimatorFix(new_mean_value, 10, &mean_value); + EXPECT_GT(mean_value_before, mean_value); + EXPECT_LT(new_mean_value, mean_value); +} + +TEST_F(DelayEstimatorTest, ExactDelayEstimateMultipleNearSameSpectrum) { + // In this test we verify that we get the correct delay estimates if we shift + // the signal accordingly. We create two Binary Delay Estimators and feed them + // with the same signals, so they should output the same results. + // We verify both causal and non-causal delays. + // For these noise free signals, the robust validation should not have an + // impact, hence we turn robust validation on/off for both reference and + // delayed near end. + + for (size_t i = 0; i < kSizeEnable; ++i) { + for (size_t j = 0; j < kSizeEnable; ++j) { + RunBinarySpectraTest(0, 0, kEnable[i], kEnable[j]); + } + } +} + +TEST_F(DelayEstimatorTest, ExactDelayEstimateMultipleNearDifferentSpectrum) { + // In this test we use the same setup as above, but we now feed the two Binary + // Delay Estimators with different signals, so they should output different + // results. + // For these noise free signals, the robust validation should not have an + // impact, hence we turn robust validation on/off for both reference and + // delayed near end. + + const int kNearOffset = 1; + for (size_t i = 0; i < kSizeEnable; ++i) { + for (size_t j = 0; j < kSizeEnable; ++j) { + RunBinarySpectraTest(kNearOffset, 0, kEnable[i], kEnable[j]); + } + } +} + +TEST_F(DelayEstimatorTest, ExactDelayEstimateMultipleNearDifferentLookahead) { + // In this test we use the same setup as above, feeding the two Binary + // Delay Estimators with the same signals. The difference is that we create + // them with different lookahead. + // For these noise free signals, the robust validation should not have an + // impact, hence we turn robust validation on/off for both reference and + // delayed near end. + + const int kLookaheadOffset = 1; + for (size_t i = 0; i < kSizeEnable; ++i) { + for (size_t j = 0; j < kSizeEnable; ++j) { + RunBinarySpectraTest(0, kLookaheadOffset, kEnable[i], kEnable[j]); + } + } +} + +TEST_F(DelayEstimatorTest, AllowedOffsetNoImpactWhenRobustValidationDisabled) { + // The same setup as in ExactDelayEstimateMultipleNearSameSpectrum with the + // difference that |allowed_offset| is set for the reference binary delay + // estimator. + + binary_->allowed_offset = 10; + RunBinarySpectraTest(0, 0, 0, 0); + binary_->allowed_offset = 0; // Reset reference. +} + +TEST_F(DelayEstimatorTest, VerifyLookaheadAtCreate) { + void* farend_handle = WebRtc_CreateDelayEstimatorFarend(kSpectrumSize, + kMaxDelay); + ASSERT_TRUE(farend_handle != NULL); + void* handle = WebRtc_CreateDelayEstimator(farend_handle, kLookahead); + ASSERT_TRUE(handle != NULL); + EXPECT_EQ(kLookahead, WebRtc_lookahead(handle)); + WebRtc_FreeDelayEstimator(handle); + WebRtc_FreeDelayEstimatorFarend(farend_handle); +} + +TEST_F(DelayEstimatorTest, VerifyLookaheadIsSetAndKeptAfterInit) { + EXPECT_EQ(kLookahead, WebRtc_lookahead(handle_)); + EXPECT_EQ(kDifferentLookahead, + WebRtc_set_lookahead(handle_, kDifferentLookahead)); + EXPECT_EQ(kDifferentLookahead, WebRtc_lookahead(handle_)); + EXPECT_EQ(0, WebRtc_InitDelayEstimatorFarend(farend_handle_)); + EXPECT_EQ(kDifferentLookahead, WebRtc_lookahead(handle_)); + EXPECT_EQ(0, WebRtc_InitDelayEstimator(handle_)); + EXPECT_EQ(kDifferentLookahead, WebRtc_lookahead(handle_)); +} + +TEST_F(DelayEstimatorTest, VerifyHistorySizeAtCreate) { + EXPECT_EQ(kHistorySize, WebRtc_history_size(handle_)); +} + +TEST_F(DelayEstimatorTest, VerifyHistorySizeIsSetAndKeptAfterInit) { + EXPECT_EQ(kHistorySize, WebRtc_history_size(handle_)); + EXPECT_EQ(kDifferentHistorySize, + WebRtc_set_history_size(handle_, kDifferentHistorySize)); + EXPECT_EQ(kDifferentHistorySize, WebRtc_history_size(handle_)); + EXPECT_EQ(0, WebRtc_InitDelayEstimator(handle_)); + EXPECT_EQ(kDifferentHistorySize, WebRtc_history_size(handle_)); + EXPECT_EQ(0, WebRtc_InitDelayEstimatorFarend(farend_handle_)); + EXPECT_EQ(kDifferentHistorySize, WebRtc_history_size(handle_)); +} + +// TODO(bjornv): Add tests for SoftReset...(...). + +} // namespace diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.c b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.c new file mode 100644 index 00000000..270588f3 --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.c @@ -0,0 +1,485 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h" + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + +#include "webrtc/modules/audio_processing/utility/delay_estimator.h" +#include "webrtc/modules/audio_processing/utility/delay_estimator_internal.h" +#include "webrtc/system_wrappers/interface/compile_assert_c.h" + +// Only bit |kBandFirst| through bit |kBandLast| are processed and +// |kBandFirst| - |kBandLast| must be < 32. +enum { kBandFirst = 12 }; +enum { kBandLast = 43 }; + +static __inline uint32_t SetBit(uint32_t in, int pos) { + uint32_t mask = (1 << pos); + uint32_t out = (in | mask); + + return out; +} + +// Calculates the mean recursively. Same version as WebRtc_MeanEstimatorFix(), +// but for float. +// +// Inputs: +// - new_value : New additional value. +// - scale : Scale for smoothing (should be less than 1.0). +// +// Input/Output: +// - mean_value : Pointer to the mean value for updating. +// +static void MeanEstimatorFloat(float new_value, + float scale, + float* mean_value) { + assert(scale < 1.0f); + *mean_value += (new_value - *mean_value) * scale; +} + +// Computes the binary spectrum by comparing the input |spectrum| with a +// |threshold_spectrum|. Float and fixed point versions. +// +// Inputs: +// - spectrum : Spectrum of which the binary spectrum should be +// calculated. +// - threshold_spectrum : Threshold spectrum with which the input +// spectrum is compared. +// Return: +// - out : Binary spectrum. +// +static uint32_t BinarySpectrumFix(const uint16_t* spectrum, + SpectrumType* threshold_spectrum, + int q_domain, + int* threshold_initialized) { + int i = kBandFirst; + uint32_t out = 0; + + assert(q_domain < 16); + + if (!(*threshold_initialized)) { + // Set the |threshold_spectrum| to half the input |spectrum| as starting + // value. This speeds up the convergence. + for (i = kBandFirst; i <= kBandLast; i++) { + if (spectrum[i] > 0) { + // Convert input spectrum from Q(|q_domain|) to Q15. + int32_t spectrum_q15 = ((int32_t) spectrum[i]) << (15 - q_domain); + threshold_spectrum[i].int32_ = (spectrum_q15 >> 1); + *threshold_initialized = 1; + } + } + } + for (i = kBandFirst; i <= kBandLast; i++) { + // Convert input spectrum from Q(|q_domain|) to Q15. + int32_t spectrum_q15 = ((int32_t) spectrum[i]) << (15 - q_domain); + // Update the |threshold_spectrum|. + WebRtc_MeanEstimatorFix(spectrum_q15, 6, &(threshold_spectrum[i].int32_)); + // Convert |spectrum| at current frequency bin to a binary value. + if (spectrum_q15 > threshold_spectrum[i].int32_) { + out = SetBit(out, i - kBandFirst); + } + } + + return out; +} + +static uint32_t BinarySpectrumFloat(const float* spectrum, + SpectrumType* threshold_spectrum, + int* threshold_initialized) { + int i = kBandFirst; + uint32_t out = 0; + const float kScale = 1 / 64.0; + + if (!(*threshold_initialized)) { + // Set the |threshold_spectrum| to half the input |spectrum| as starting + // value. This speeds up the convergence. + for (i = kBandFirst; i <= kBandLast; i++) { + if (spectrum[i] > 0.0f) { + threshold_spectrum[i].float_ = (spectrum[i] / 2); + *threshold_initialized = 1; + } + } + } + + for (i = kBandFirst; i <= kBandLast; i++) { + // Update the |threshold_spectrum|. + MeanEstimatorFloat(spectrum[i], kScale, &(threshold_spectrum[i].float_)); + // Convert |spectrum| at current frequency bin to a binary value. + if (spectrum[i] > threshold_spectrum[i].float_) { + out = SetBit(out, i - kBandFirst); + } + } + + return out; +} + +void WebRtc_FreeDelayEstimatorFarend(void* handle) { + DelayEstimatorFarend* self = (DelayEstimatorFarend*) handle; + + if (handle == NULL) { + return; + } + + free(self->mean_far_spectrum); + self->mean_far_spectrum = NULL; + + WebRtc_FreeBinaryDelayEstimatorFarend(self->binary_farend); + self->binary_farend = NULL; + + free(self); +} + +void* WebRtc_CreateDelayEstimatorFarend(int spectrum_size, int history_size) { + DelayEstimatorFarend* self = NULL; + + // Check if the sub band used in the delay estimation is small enough to fit + // the binary spectra in a uint32_t. + COMPILE_ASSERT(kBandLast - kBandFirst < 32); + + if (spectrum_size >= kBandLast) { + self = malloc(sizeof(DelayEstimatorFarend)); + } + + if (self != NULL) { + int memory_fail = 0; + + // Allocate memory for the binary far-end spectrum handling. + self->binary_farend = WebRtc_CreateBinaryDelayEstimatorFarend(history_size); + memory_fail |= (self->binary_farend == NULL); + + // Allocate memory for spectrum buffers. + self->mean_far_spectrum = malloc(spectrum_size * sizeof(SpectrumType)); + memory_fail |= (self->mean_far_spectrum == NULL); + + self->spectrum_size = spectrum_size; + + if (memory_fail) { + WebRtc_FreeDelayEstimatorFarend(self); + self = NULL; + } + } + + return self; +} + +int WebRtc_InitDelayEstimatorFarend(void* handle) { + DelayEstimatorFarend* self = (DelayEstimatorFarend*) handle; + + if (self == NULL) { + return -1; + } + + // Initialize far-end part of binary delay estimator. + WebRtc_InitBinaryDelayEstimatorFarend(self->binary_farend); + + // Set averaged far and near end spectra to zero. + memset(self->mean_far_spectrum, 0, + sizeof(SpectrumType) * self->spectrum_size); + // Reset initialization indicators. + self->far_spectrum_initialized = 0; + + return 0; +} + +void WebRtc_SoftResetDelayEstimatorFarend(void* handle, int delay_shift) { + DelayEstimatorFarend* self = (DelayEstimatorFarend*) handle; + assert(self != NULL); + WebRtc_SoftResetBinaryDelayEstimatorFarend(self->binary_farend, delay_shift); +} + +int WebRtc_AddFarSpectrumFix(void* handle, + const uint16_t* far_spectrum, + int spectrum_size, + int far_q) { + DelayEstimatorFarend* self = (DelayEstimatorFarend*) handle; + uint32_t binary_spectrum = 0; + + if (self == NULL) { + return -1; + } + if (far_spectrum == NULL) { + // Empty far end spectrum. + return -1; + } + if (spectrum_size != self->spectrum_size) { + // Data sizes don't match. + return -1; + } + if (far_q > 15) { + // If |far_q| is larger than 15 we cannot guarantee no wrap around. + return -1; + } + + // Get binary spectrum. + binary_spectrum = BinarySpectrumFix(far_spectrum, self->mean_far_spectrum, + far_q, &(self->far_spectrum_initialized)); + WebRtc_AddBinaryFarSpectrum(self->binary_farend, binary_spectrum); + + return 0; +} + +int WebRtc_AddFarSpectrumFloat(void* handle, + const float* far_spectrum, + int spectrum_size) { + DelayEstimatorFarend* self = (DelayEstimatorFarend*) handle; + uint32_t binary_spectrum = 0; + + if (self == NULL) { + return -1; + } + if (far_spectrum == NULL) { + // Empty far end spectrum. + return -1; + } + if (spectrum_size != self->spectrum_size) { + // Data sizes don't match. + return -1; + } + + // Get binary spectrum. + binary_spectrum = BinarySpectrumFloat(far_spectrum, self->mean_far_spectrum, + &(self->far_spectrum_initialized)); + WebRtc_AddBinaryFarSpectrum(self->binary_farend, binary_spectrum); + + return 0; +} + +void WebRtc_FreeDelayEstimator(void* handle) { + DelayEstimator* self = (DelayEstimator*) handle; + + if (handle == NULL) { + return; + } + + free(self->mean_near_spectrum); + self->mean_near_spectrum = NULL; + + WebRtc_FreeBinaryDelayEstimator(self->binary_handle); + self->binary_handle = NULL; + + free(self); +} + +void* WebRtc_CreateDelayEstimator(void* farend_handle, int max_lookahead) { + DelayEstimator* self = NULL; + DelayEstimatorFarend* farend = (DelayEstimatorFarend*) farend_handle; + + if (farend_handle != NULL) { + self = malloc(sizeof(DelayEstimator)); + } + + if (self != NULL) { + int memory_fail = 0; + + // Allocate memory for the farend spectrum handling. + self->binary_handle = + WebRtc_CreateBinaryDelayEstimator(farend->binary_farend, max_lookahead); + memory_fail |= (self->binary_handle == NULL); + + // Allocate memory for spectrum buffers. + self->mean_near_spectrum = malloc(farend->spectrum_size * + sizeof(SpectrumType)); + memory_fail |= (self->mean_near_spectrum == NULL); + + self->spectrum_size = farend->spectrum_size; + + if (memory_fail) { + WebRtc_FreeDelayEstimator(self); + self = NULL; + } + } + + return self; +} + +int WebRtc_InitDelayEstimator(void* handle) { + DelayEstimator* self = (DelayEstimator*) handle; + + if (self == NULL) { + return -1; + } + + // Initialize binary delay estimator. + WebRtc_InitBinaryDelayEstimator(self->binary_handle); + + // Set averaged far and near end spectra to zero. + memset(self->mean_near_spectrum, 0, + sizeof(SpectrumType) * self->spectrum_size); + // Reset initialization indicators. + self->near_spectrum_initialized = 0; + + return 0; +} + +int WebRtc_SoftResetDelayEstimator(void* handle, int delay_shift) { + DelayEstimator* self = (DelayEstimator*) handle; + assert(self != NULL); + return WebRtc_SoftResetBinaryDelayEstimator(self->binary_handle, delay_shift); +} + +int WebRtc_set_history_size(void* handle, int history_size) { + DelayEstimator* self = handle; + + if ((self == NULL) || (history_size <= 1)) { + return -1; + } + return WebRtc_AllocateHistoryBufferMemory(self->binary_handle, history_size); +} + +int WebRtc_history_size(const void* handle) { + const DelayEstimator* self = handle; + + if (self == NULL) { + return -1; + } + if (self->binary_handle->farend->history_size != + self->binary_handle->history_size) { + // Non matching history sizes. + return -1; + } + return self->binary_handle->history_size; +} + +int WebRtc_set_lookahead(void* handle, int lookahead) { + DelayEstimator* self = (DelayEstimator*) handle; + assert(self != NULL); + assert(self->binary_handle != NULL); + if ((lookahead > self->binary_handle->near_history_size - 1) || + (lookahead < 0)) { + return -1; + } + self->binary_handle->lookahead = lookahead; + return self->binary_handle->lookahead; +} + +int WebRtc_lookahead(void* handle) { + DelayEstimator* self = (DelayEstimator*) handle; + assert(self != NULL); + assert(self->binary_handle != NULL); + return self->binary_handle->lookahead; +} + +int WebRtc_set_allowed_offset(void* handle, int allowed_offset) { + DelayEstimator* self = (DelayEstimator*) handle; + + if ((self == NULL) || (allowed_offset < 0)) { + return -1; + } + self->binary_handle->allowed_offset = allowed_offset; + return 0; +} + +int WebRtc_get_allowed_offset(const void* handle) { + const DelayEstimator* self = (const DelayEstimator*) handle; + + if (self == NULL) { + return -1; + } + return self->binary_handle->allowed_offset; +} + +int WebRtc_enable_robust_validation(void* handle, int enable) { + DelayEstimator* self = (DelayEstimator*) handle; + + if (self == NULL) { + return -1; + } + if ((enable < 0) || (enable > 1)) { + return -1; + } + assert(self->binary_handle != NULL); + self->binary_handle->robust_validation_enabled = enable; + return 0; +} + +int WebRtc_is_robust_validation_enabled(const void* handle) { + const DelayEstimator* self = (const DelayEstimator*) handle; + + if (self == NULL) { + return -1; + } + return self->binary_handle->robust_validation_enabled; +} + +int WebRtc_DelayEstimatorProcessFix(void* handle, + const uint16_t* near_spectrum, + int spectrum_size, + int near_q) { + DelayEstimator* self = (DelayEstimator*) handle; + uint32_t binary_spectrum = 0; + + if (self == NULL) { + return -1; + } + if (near_spectrum == NULL) { + // Empty near end spectrum. + return -1; + } + if (spectrum_size != self->spectrum_size) { + // Data sizes don't match. + return -1; + } + if (near_q > 15) { + // If |near_q| is larger than 15 we cannot guarantee no wrap around. + return -1; + } + + // Get binary spectra. + binary_spectrum = BinarySpectrumFix(near_spectrum, + self->mean_near_spectrum, + near_q, + &(self->near_spectrum_initialized)); + + return WebRtc_ProcessBinarySpectrum(self->binary_handle, binary_spectrum); +} + +int WebRtc_DelayEstimatorProcessFloat(void* handle, + const float* near_spectrum, + int spectrum_size) { + DelayEstimator* self = (DelayEstimator*) handle; + uint32_t binary_spectrum = 0; + + if (self == NULL) { + return -1; + } + if (near_spectrum == NULL) { + // Empty near end spectrum. + return -1; + } + if (spectrum_size != self->spectrum_size) { + // Data sizes don't match. + return -1; + } + + // Get binary spectrum. + binary_spectrum = BinarySpectrumFloat(near_spectrum, self->mean_near_spectrum, + &(self->near_spectrum_initialized)); + + return WebRtc_ProcessBinarySpectrum(self->binary_handle, binary_spectrum); +} + +int WebRtc_last_delay(void* handle) { + DelayEstimator* self = (DelayEstimator*) handle; + + if (self == NULL) { + return -1; + } + + return WebRtc_binary_last_delay(self->binary_handle); +} + +float WebRtc_last_delay_quality(void* handle) { + DelayEstimator* self = (DelayEstimator*) handle; + assert(self != NULL); + return WebRtc_binary_last_delay_quality(self->binary_handle); +} diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h new file mode 100644 index 00000000..fdadebeb --- /dev/null +++ b/third_party/webrtc/src/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h @@ -0,0 +1,244 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Performs delay estimation on block by block basis. +// The return value is 0 - OK and -1 - Error, unless otherwise stated. + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_WRAPPER_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_WRAPPER_H_ + +#include "webrtc/typedefs.h" + +// Releases the memory allocated by WebRtc_CreateDelayEstimatorFarend(...) +void WebRtc_FreeDelayEstimatorFarend(void* handle); + +// Allocates the memory needed by the far-end part of the delay estimation. The +// memory needs to be initialized separately through +// WebRtc_InitDelayEstimatorFarend(...). +// +// Inputs: +// - spectrum_size : Size of the spectrum used both in far-end and +// near-end. Used to allocate memory for spectrum +// specific buffers. +// - history_size : The far-end history buffer size. A change in buffer +// size can be forced with WebRtc_set_history_size(). +// Note that the maximum delay which can be estimated is +// determined together with WebRtc_set_lookahead(). +// +// Return value: +// - void* : Created |handle|. If the memory can't be allocated or +// if any of the input parameters are invalid NULL is +// returned. +void* WebRtc_CreateDelayEstimatorFarend(int spectrum_size, int history_size); + +// Initializes the far-end part of the delay estimation instance returned by +// WebRtc_CreateDelayEstimatorFarend(...) +int WebRtc_InitDelayEstimatorFarend(void* handle); + +// Soft resets the far-end part of the delay estimation instance returned by +// WebRtc_CreateDelayEstimatorFarend(...). +// Input: +// - delay_shift : The amount of blocks to shift history buffers. +void WebRtc_SoftResetDelayEstimatorFarend(void* handle, int delay_shift); + +// Adds the far-end spectrum to the far-end history buffer. This spectrum is +// used as reference when calculating the delay using +// WebRtc_ProcessSpectrum(). +// +// Inputs: +// - far_spectrum : Far-end spectrum. +// - spectrum_size : The size of the data arrays (same for both far- and +// near-end). +// - far_q : The Q-domain of the far-end data. +// +// Output: +// - handle : Updated far-end instance. +// +int WebRtc_AddFarSpectrumFix(void* handle, + const uint16_t* far_spectrum, + int spectrum_size, + int far_q); + +// See WebRtc_AddFarSpectrumFix() for description. +int WebRtc_AddFarSpectrumFloat(void* handle, + const float* far_spectrum, + int spectrum_size); + +// Releases the memory allocated by WebRtc_CreateDelayEstimator(...) +void WebRtc_FreeDelayEstimator(void* handle); + +// Allocates the memory needed by the delay estimation. The memory needs to be +// initialized separately through WebRtc_InitDelayEstimator(...). +// +// Inputs: +// - farend_handle : Pointer to the far-end part of the delay estimation +// instance created prior to this call using +// WebRtc_CreateDelayEstimatorFarend(). +// +// Note that WebRtc_CreateDelayEstimator does not take +// ownership of |farend_handle|, which has to be torn +// down properly after this instance. +// +// - max_lookahead : Maximum amount of non-causal lookahead allowed. The +// actual amount of lookahead used can be controlled by +// WebRtc_set_lookahead(...). The default |lookahead| is +// set to |max_lookahead| at create time. Use +// WebRtc_set_lookahead(...) before start if a different +// value is desired. +// +// Using lookahead can detect cases in which a near-end +// signal occurs before the corresponding far-end signal. +// It will delay the estimate for the current block by an +// equal amount, and the returned values will be offset +// by it. +// +// A value of zero is the typical no-lookahead case. +// This also represents the minimum delay which can be +// estimated. +// +// Note that the effective range of delay estimates is +// [-|lookahead|,... ,|history_size|-|lookahead|) +// where |history_size| is set through +// WebRtc_set_history_size(). +// +// Return value: +// - void* : Created |handle|. If the memory can't be allocated or +// if any of the input parameters are invalid NULL is +// returned. +void* WebRtc_CreateDelayEstimator(void* farend_handle, int max_lookahead); + +// Initializes the delay estimation instance returned by +// WebRtc_CreateDelayEstimator(...) +int WebRtc_InitDelayEstimator(void* handle); + +// Soft resets the delay estimation instance returned by +// WebRtc_CreateDelayEstimator(...) +// Input: +// - delay_shift : The amount of blocks to shift history buffers. +// +// Return value: +// - actual_shifts : The actual number of shifts performed. +int WebRtc_SoftResetDelayEstimator(void* handle, int delay_shift); + +// Sets the effective |history_size| used. Valid values from 2. We simply need +// at least two delays to compare to perform an estimate. If |history_size| is +// changed, buffers are reallocated filling in with zeros if necessary. +// Note that changing the |history_size| affects both buffers in far-end and +// near-end. Hence it is important to change all DelayEstimators that use the +// same reference far-end, to the same |history_size| value. +// Inputs: +// - handle : Pointer to the delay estimation instance. +// - history_size : Effective history size to be used. +// Return value: +// - new_history_size : The new history size used. If the memory was not able +// to be allocated 0 is returned. +int WebRtc_set_history_size(void* handle, int history_size); + +// Returns the history_size currently used. +// Input: +// - handle : Pointer to the delay estimation instance. +int WebRtc_history_size(const void* handle); + +// Sets the amount of |lookahead| to use. Valid values are [0, max_lookahead] +// where |max_lookahead| was set at create time through +// WebRtc_CreateDelayEstimator(...). +// +// Input: +// - handle : Pointer to the delay estimation instance. +// - lookahead : The amount of lookahead to be used. +// +// Return value: +// - new_lookahead : The actual amount of lookahead set, unless |handle| is +// a NULL pointer or |lookahead| is invalid, for which an +// error is returned. +int WebRtc_set_lookahead(void* handle, int lookahead); + +// Returns the amount of lookahead we currently use. +// Input: +// - handle : Pointer to the delay estimation instance. +int WebRtc_lookahead(void* handle); + +// Sets the |allowed_offset| used in the robust validation scheme. If the +// delay estimator is used in an echo control component, this parameter is +// related to the filter length. In principle |allowed_offset| should be set to +// the echo control filter length minus the expected echo duration, i.e., the +// delay offset the echo control can handle without quality regression. The +// default value, used if not set manually, is zero. Note that |allowed_offset| +// has to be non-negative. +// Inputs: +// - handle : Pointer to the delay estimation instance. +// - allowed_offset : The amount of delay offset, measured in partitions, +// the echo control filter can handle. +int WebRtc_set_allowed_offset(void* handle, int allowed_offset); + +// Returns the |allowed_offset| in number of partitions. +int WebRtc_get_allowed_offset(const void* handle); + +// Enables/Disables a robust validation functionality in the delay estimation. +// This is by default set to disabled at create time. The state is preserved +// over a reset. +// Inputs: +// - handle : Pointer to the delay estimation instance. +// - enable : Enable (1) or disable (0) this feature. +int WebRtc_enable_robust_validation(void* handle, int enable); + +// Returns 1 if robust validation is enabled and 0 if disabled. +int WebRtc_is_robust_validation_enabled(const void* handle); + +// Estimates and returns the delay between the far-end and near-end blocks. The +// value will be offset by the lookahead (i.e. the lookahead should be +// subtracted from the returned value). +// Inputs: +// - handle : Pointer to the delay estimation instance. +// - near_spectrum : Pointer to the near-end spectrum data of the current +// block. +// - spectrum_size : The size of the data arrays (same for both far- and +// near-end). +// - near_q : The Q-domain of the near-end data. +// +// Output: +// - handle : Updated instance. +// +// Return value: +// - delay : >= 0 - Calculated delay value. +// -1 - Error. +// -2 - Insufficient data for estimation. +int WebRtc_DelayEstimatorProcessFix(void* handle, + const uint16_t* near_spectrum, + int spectrum_size, + int near_q); + +// See WebRtc_DelayEstimatorProcessFix() for description. +int WebRtc_DelayEstimatorProcessFloat(void* handle, + const float* near_spectrum, + int spectrum_size); + +// Returns the last calculated delay updated by the function +// WebRtc_DelayEstimatorProcess(...). +// +// Input: +// - handle : Pointer to the delay estimation instance. +// +// Return value: +// - delay : >= 0 - Last calculated delay value. +// -1 - Error. +// -2 - Insufficient data for estimation. +int WebRtc_last_delay(void* handle); + +// Returns the estimation quality/probability of the last calculated delay +// updated by the function WebRtc_DelayEstimatorProcess(...). The estimation +// quality is a value in the interval [0, 1]. The higher the value, the better +// the quality. +// +// Return value: +// - delay_quality : >= 0 - Estimation quality of last calculated delay. +float WebRtc_last_delay_quality(void* handle); + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_WRAPPER_H_ diff --git a/third_party/webrtc/src/webrtc/system_wrappers/interface/compile_assert_c.h b/third_party/webrtc/src/webrtc/system_wrappers/interface/compile_assert_c.h new file mode 100644 index 00000000..dbb5292d --- /dev/null +++ b/third_party/webrtc/src/webrtc/system_wrappers/interface/compile_assert_c.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_SYSTEM_WRAPPERS_INTERFACE_COMPILE_ASSERT_H_ +#define WEBRTC_SYSTEM_WRAPPERS_INTERFACE_COMPILE_ASSERT_H_ + +#ifdef __cplusplus +#error "Only use this for C files. For C++, use static_assert." +#endif + +// Use this macro to verify at compile time that certain restrictions are met. +// The argument is the boolean expression to evaluate. +// Example: +// COMPILE_ASSERT(sizeof(foo) < 128); +#define COMPILE_ASSERT(expression) switch (0) {case 0: case expression:;} + +#endif // WEBRTC_SYSTEM_WRAPPERS_INTERFACE_COMPILE_ASSERT_H_ diff --git a/third_party/webrtc/src/webrtc/system_wrappers/interface/cpu_features_wrapper.h b/third_party/webrtc/src/webrtc/system_wrappers/interface/cpu_features_wrapper.h new file mode 100644 index 00000000..5697c491 --- /dev/null +++ b/third_party/webrtc/src/webrtc/system_wrappers/interface/cpu_features_wrapper.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_SYSTEM_WRAPPERS_INTERFACE_CPU_FEATURES_WRAPPER_H_ +#define WEBRTC_SYSTEM_WRAPPERS_INTERFACE_CPU_FEATURES_WRAPPER_H_ + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +#include "webrtc/typedefs.h" + +// List of features in x86. +typedef enum { + kSSE2, + kSSE3 +} CPUFeature; + +// List of features in ARM. +enum { + kCPUFeatureARMv7 = (1 << 0), + kCPUFeatureVFPv3 = (1 << 1), + kCPUFeatureNEON = (1 << 2), + kCPUFeatureLDREXSTREX = (1 << 3) +}; + +typedef int (*WebRtc_CPUInfo)(CPUFeature feature); + +// Returns true if the CPU supports the feature. +extern WebRtc_CPUInfo WebRtc_GetCPUInfo; + +// No CPU feature is available => straight C path. +extern WebRtc_CPUInfo WebRtc_GetCPUInfoNoASM; + +// Return the features in an ARM device. +// It detects the features in the hardware platform, and returns supported +// values in the above enum definition as a bitmask. +extern uint64_t WebRtc_GetCPUFeaturesARM(void); + +#if defined(__cplusplus) || defined(c_plusplus) +} // extern "C" +#endif + +#endif // WEBRTC_SYSTEM_WRAPPERS_INTERFACE_CPU_FEATURES_WRAPPER_H_ diff --git a/third_party/webrtc/src/webrtc/system_wrappers/source/cpu_features.cc b/third_party/webrtc/src/webrtc/system_wrappers/source/cpu_features.cc new file mode 100644 index 00000000..b924d773 --- /dev/null +++ b/third_party/webrtc/src/webrtc/system_wrappers/source/cpu_features.cc @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Parts of this file derived from Chromium's base/cpu.cc. + +#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h" + +#if defined(WEBRTC_ARCH_X86_FAMILY) && defined(_MSC_VER) +#include <intrin.h> +#endif + +#include "webrtc/typedefs.h" + +// No CPU feature is available => straight C path. +int GetCPUInfoNoASM(CPUFeature feature) { + (void)feature; + return 0; +} + +#if defined(WEBRTC_ARCH_X86_FAMILY) +#ifndef _MSC_VER +// Intrinsic for "cpuid". +#if defined(__pic__) && defined(__i386__) +static inline void __cpuid(int cpu_info[4], int info_type) { + __asm__ volatile( + "mov %%ebx, %%edi\n" + "cpuid\n" + "xchg %%edi, %%ebx\n" + : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) + : "a"(info_type)); +} +#else +static inline void __cpuid(int cpu_info[4], int info_type) { + __asm__ volatile( + "cpuid\n" + : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) + : "a"(info_type)); +} +#endif +#endif // _MSC_VER +#endif // WEBRTC_ARCH_X86_FAMILY + +#if defined(WEBRTC_ARCH_X86_FAMILY) +// Actual feature detection for x86. +static int GetCPUInfo(CPUFeature feature) { + int cpu_info[4]; + __cpuid(cpu_info, 1); + if (feature == kSSE2) { + return 0 != (cpu_info[3] & 0x04000000); + } + if (feature == kSSE3) { + return 0 != (cpu_info[2] & 0x00000001); + } + return 0; +} +#else +// Default to straight C for other platforms. +static int GetCPUInfo(CPUFeature feature) { + (void)feature; + return 0; +} +#endif + +WebRtc_CPUInfo WebRtc_GetCPUInfo = GetCPUInfo; +WebRtc_CPUInfo WebRtc_GetCPUInfoNoASM = GetCPUInfoNoASM; diff --git a/third_party/webrtc/src/webrtc/system_wrappers/source/cpu_features_android.c b/third_party/webrtc/src/webrtc/system_wrappers/source/cpu_features_android.c new file mode 100644 index 00000000..0cb3a6c5 --- /dev/null +++ b/third_party/webrtc/src/webrtc/system_wrappers/source/cpu_features_android.c @@ -0,0 +1,15 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <cpu-features.h> + +uint64_t WebRtc_GetCPUFeaturesARM(void) { + return android_getCpuFeatures(); +} diff --git a/third_party/webrtc/src/webrtc/typedefs.h b/third_party/webrtc/src/webrtc/typedefs.h new file mode 100644 index 00000000..3034c7e7 --- /dev/null +++ b/third_party/webrtc/src/webrtc/typedefs.h @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This file contains platform-specific typedefs and defines. +// Much of it is derived from Chromium's build/build_config.h. + +#ifndef WEBRTC_TYPEDEFS_H_ +#define WEBRTC_TYPEDEFS_H_ + +// Processor architecture detection. For more info on what's defined, see: +// http://msdn.microsoft.com/en-us/library/b0084kay.aspx +// http://www.agner.org/optimize/calling_conventions.pdf +// or with gcc, run: "echo | gcc -E -dM -" +#if defined(_M_X64) || defined(__x86_64__) +#define WEBRTC_ARCH_X86_FAMILY +#define WEBRTC_ARCH_X86_64 +#define WEBRTC_ARCH_64_BITS +#define WEBRTC_ARCH_LITTLE_ENDIAN +#elif defined(__aarch64__) +#define WEBRTC_ARCH_64_BITS +#define WEBRTC_ARCH_LITTLE_ENDIAN +#elif defined(_M_IX86) || defined(__i386__) +#define WEBRTC_ARCH_X86_FAMILY +#define WEBRTC_ARCH_X86 +#define WEBRTC_ARCH_32_BITS +#define WEBRTC_ARCH_LITTLE_ENDIAN +#elif defined(__ARMEL__) +// TODO(ajm): We'd prefer to control platform defines here, but this is +// currently provided by the Android makefiles. Commented to avoid duplicate +// definition warnings. +//#define WEBRTC_ARCH_ARM +// TODO(ajm): Chromium uses the following two defines. Should we switch? +//#define WEBRTC_ARCH_ARM_FAMILY +//#define WEBRTC_ARCH_ARMEL +#define WEBRTC_ARCH_32_BITS +#define WEBRTC_ARCH_LITTLE_ENDIAN +#elif defined(__MIPSEL__) +#define WEBRTC_ARCH_32_BITS +#define WEBRTC_ARCH_LITTLE_ENDIAN +#elif defined(__pnacl__) +#define WEBRTC_ARCH_32_BITS +#define WEBRTC_ARCH_LITTLE_ENDIAN +#else +#error Please add support for your architecture in typedefs.h +#endif + +#if !(defined(WEBRTC_ARCH_LITTLE_ENDIAN) ^ defined(WEBRTC_ARCH_BIG_ENDIAN)) +#error Define either WEBRTC_ARCH_LITTLE_ENDIAN or WEBRTC_ARCH_BIG_ENDIAN +#endif + +// TODO(zhongwei.yao): WEBRTC_CPU_DETECTION is only used in one place; we should +// probably just remove it. +#if (defined(WEBRTC_ARCH_X86_FAMILY) && !defined(__SSE2__)) || \ + defined(WEBRTC_DETECT_NEON) +#define WEBRTC_CPU_DETECTION +#endif + +#if !defined(_MSC_VER) +#include <stdint.h> +#else +// Define C99 equivalent types, since pre-2010 MSVC doesn't provide stdint.h. +typedef signed char int8_t; +typedef signed short int16_t; +typedef signed int int32_t; +typedef __int64 int64_t; +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; +typedef unsigned __int64 uint64_t; +#endif + +// Annotate a function indicating the caller must examine the return value. +// Use like: +// int foo() WARN_UNUSED_RESULT; +// TODO(ajm): Hack to avoid multiple definitions until the base/ of webrtc and +// libjingle are merged. +#if !defined(WARN_UNUSED_RESULT) +#if defined(__GNUC__) +#define WARN_UNUSED_RESULT __attribute__((warn_unused_result)) +#else +#define WARN_UNUSED_RESULT +#endif +#endif // WARN_UNUSED_RESULT + +// Put after a variable that might not be used, to prevent compiler warnings: +// int result ATTRIBUTE_UNUSED = DoSomething(); +// assert(result == 17); +#ifndef ATTRIBUTE_UNUSED +#if defined(__GNUC__) || defined(__clang__) +#define ATTRIBUTE_UNUSED __attribute__((unused)) +#else +#define ATTRIBUTE_UNUSED +#endif +#endif + +// Macro to be used for switch-case fallthrough (required for enabling +// -Wimplicit-fallthrough warning on Clang). +#ifndef FALLTHROUGH +#if defined(__clang__) +#define FALLTHROUGH() [[clang::fallthrough]] +#else +#define FALLTHROUGH() do { } while (0) +#endif +#endif + +// Annotate a function that will not return control flow to the caller. +#if defined(_MSC_VER) +#define NO_RETURN __declspec(noreturn) +#elif defined(__GNUC__) +#define NO_RETURN __attribute__((noreturn)) +#else +#define NO_RETURN +#endif + +#endif // WEBRTC_TYPEDEFS_H_ |