From c0970767b422b18bb22e71efac3d6353bba37006 Mon Sep 17 00:00:00 2001 From: Benny Prijono Date: Sat, 9 Aug 2008 05:40:22 +0000 Subject: Ticket #588: Improvements to echo cancellation framework git-svn-id: http://svn.pjsip.org/repos/pjproject/trunk@2198 74dad513-b988-da41-8d7b-12977e46ad98 --- third_party/speex/libspeex/mdf.c | 510 +++++++++++++++------------------------ 1 file changed, 201 insertions(+), 309 deletions(-) (limited to 'third_party') diff --git a/third_party/speex/libspeex/mdf.c b/third_party/speex/libspeex/mdf.c index 456ab847..1fbb4d60 100644 --- a/third_party/speex/libspeex/mdf.c +++ b/third_party/speex/libspeex/mdf.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2003-2008 Jean-Marc Valin +/* Copyright (C) 2003-2006 Jean-Marc Valin File: mdf.c Echo canceller based on the MDF algorithm (see below) @@ -88,12 +88,6 @@ #define WEIGHT_SHIFT 0 #endif -#ifdef FIXED_POINT -#define WORD2INT(x) ((x) < -32767 ? -32768 : ((x) > 32766 ? 32767 : (x))) -#else -#define WORD2INT(x) ((x) < -32767.5f ? -32768 : ((x) > 32766.5f ? 32767 : floor(.5+(x)))) -#endif - /* If enabled, the AEC will use a foreground filter and a background filter to be more robust to double-talk and difficult signals in general. The cost is an extra FFT and a matrix-vector multiply */ #define TWO_PATH @@ -137,8 +131,6 @@ struct SpeexEchoState_ { int adapted; int saturated; int screwed_up; - int C; /** Number of input channels (microphones) */ - int K; /** Number of output channels (loudspeakers) */ spx_int32_t sampling_rate; spx_word16_t spec_average; spx_word16_t beta0; @@ -179,10 +171,10 @@ struct SpeexEchoState_ { spx_word16_t *window; spx_word16_t *prop; void *fft_table; - spx_word16_t *memX, *memD, *memE; + spx_word16_t memX, memD, memE; spx_word16_t preemph; spx_word16_t notch_radius; - spx_mem_t *notch_mem; + spx_mem_t notch_mem[2]; /* NOTE: If you only use speex_echo_cancel() and want to save some memory, remove this */ spx_int16_t *play_buf; @@ -190,7 +182,7 @@ struct SpeexEchoState_ { int play_buf_started; }; -static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem, int stride) +static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem) { int i; spx_word16_t den2; @@ -202,7 +194,7 @@ static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, /*printf ("%d %d %d %d %d %d\n", num[0], num[1], num[2], den[0], den[1], den[2]);*/ for (i=0;iK = nb_speakers; - st->C = nb_mic; - C=st->C; - K=st->K; #ifdef DUMP_ECHO_CANCEL_DATA if (rFile || pFile || oFile) speex_fatal("Opening dump files twice"); @@ -443,23 +413,23 @@ EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_lengt st->fft_table = spx_fft_init(N); - st->e = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); - st->x = (spx_word16_t*)speex_alloc(K*N*sizeof(spx_word16_t)); - st->input = (spx_word16_t*)speex_alloc(C*st->frame_size*sizeof(spx_word16_t)); - st->y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); - st->last_y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); + st->e = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); + st->x = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); + st->input = (spx_word16_t*)speex_alloc(st->frame_size*sizeof(spx_word16_t)); + st->y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); + st->last_y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->Xf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->Yh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->Eh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); - st->X = (spx_word16_t*)speex_alloc(K*(M+1)*N*sizeof(spx_word16_t)); - st->Y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); - st->E = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); - st->W = (spx_word32_t*)speex_alloc(C*K*M*N*sizeof(spx_word32_t)); + st->X = (spx_word16_t*)speex_alloc((M+1)*N*sizeof(spx_word16_t)); + st->Y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); + st->E = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); + st->W = (spx_word32_t*)speex_alloc(M*N*sizeof(spx_word32_t)); #ifdef TWO_PATH - st->foreground = (spx_word16_t*)speex_alloc(M*N*C*K*sizeof(spx_word16_t)); + st->foreground = (spx_word16_t*)speex_alloc(M*N*sizeof(spx_word16_t)); #endif st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); st->power = (spx_word32_t*)speex_alloc((frame_size+1)*sizeof(spx_word32_t)); @@ -480,7 +450,7 @@ EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_lengt #endif for (i=0;i<=st->frame_size;i++) st->power_1[i] = FLOAT_ONE; - for (i=0;iW[i] = 0; { spx_word32_t sum = 0; @@ -495,13 +465,11 @@ EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_lengt } for (i=M-1;i>=0;i--) { - st->prop[i] = DIV32(MULT16_16(QCONST16(.8f,15), st->prop[i]),sum); + st->prop[i] = DIV32(MULT16_16(QCONST16(.8,15), st->prop[i]),sum); } } - st->memX = (spx_word16_t*)speex_alloc(K*sizeof(spx_word16_t)); - st->memD = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t)); - st->memE = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t)); + st->memX=st->memD=st->memE=0; st->preemph = QCONST16(.9,15); if (st->sampling_rate<12000) st->notch_radius = QCONST16(.9, 15); @@ -510,7 +478,7 @@ EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_lengt else st->notch_radius = QCONST16(.992, 15); - st->notch_mem = (spx_mem_t*)speex_alloc(2*C*sizeof(spx_mem_t)); + st->notch_mem[0] = st->notch_mem[1] = 0; st->adapted = 0; st->Pey = st->Pyy = FLOAT_ONE; @@ -519,7 +487,7 @@ EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_lengt st->Dvar1 = st->Dvar2 = FLOAT_ZERO; #endif - st->play_buf = (spx_int16_t*)speex_alloc(K*(PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t)); + st->play_buf = (spx_int16_t*)speex_alloc((PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t)); st->play_buf_pos = PLAYBACK_DELAY*st->frame_size; st->play_buf_started = 0; @@ -527,15 +495,13 @@ EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_lengt } /** Resets echo canceller state */ -EXPORT void speex_echo_state_reset(SpeexEchoState *st) +void speex_echo_state_reset(SpeexEchoState *st) { - int i, M, N, C, K; + int i, M, N; st->cancel_count=0; st->screwed_up = 0; N = st->window_size; M = st->M; - C=st->C; - K=st->K; for (i=0;iW[i] = 0; #ifdef TWO_PATH @@ -555,20 +521,13 @@ EXPORT void speex_echo_state_reset(SpeexEchoState *st) { st->last_y[i] = 0; } - for (i=0;iE[i] = 0; - } - for (i=0;ix[i] = 0; } - for (i=0;i<2*C;i++) - st->notch_mem[i] = 0; - for (i=0;imemD[i]=st->memE[i]=0; - for (i=0;imemX[i]=0; + st->notch_mem[0] = st->notch_mem[1] = 0; + st->memX=st->memD=st->memE=0; st->saturated = 0; st->adapted = 0; @@ -586,7 +545,7 @@ EXPORT void speex_echo_state_reset(SpeexEchoState *st) } /** Destroys an echo canceller state */ -EXPORT void speex_echo_state_destroy(SpeexEchoState *st) +void speex_echo_state_destroy(SpeexEchoState *st) { spx_fft_destroy(st->fft_table); @@ -617,11 +576,6 @@ EXPORT void speex_echo_state_destroy(SpeexEchoState *st) #ifdef FIXED_POINT speex_free(st->wtmp2); #endif - speex_free(st->memX); - speex_free(st->memD); - speex_free(st->memE); - speex_free(st->notch_mem); - speex_free(st->play_buf); speex_free(st); @@ -633,7 +587,7 @@ EXPORT void speex_echo_state_destroy(SpeexEchoState *st) #endif } -EXPORT void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out) +void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out) { int i; /*speex_warning_int("capture with fill level ", st->play_buf_pos/st->frame_size);*/ @@ -656,7 +610,7 @@ EXPORT void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_i } } -EXPORT void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play) +void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play) { /*speex_warning_int("playback with fill level ", st->play_buf_pos/st->frame_size);*/ if (!st->play_buf_started) @@ -683,16 +637,16 @@ EXPORT void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play) } /** Performs echo cancellation on a frame (deprecated, last arg now ignored) */ -EXPORT void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout) +void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout) { speex_echo_cancellation(st, in, far_end, out); } /** Performs echo cancellation on a frame */ -EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out) +void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out) { - int i,j, chan, speak; - int N,M, C, K; + int i,j; + int N,M; spx_word32_t Syy,See,Sxx,Sdd, Sff; #ifdef TWO_PATH spx_word32_t Dbf; @@ -707,9 +661,6 @@ EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, c N = st->window_size; M = st->M; - C = st->C; - K = st->K; - st->cancel_count++; #ifdef FIXED_POINT ss=DIV32_16(11469,M); @@ -719,178 +670,137 @@ EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, c ss_1 = 1-ss; #endif - for (chan = 0; chan < C; chan++) + /* Apply a notch filter to make sure DC doesn't end up causing problems */ + filter_dc_notch16(in, st->notch_radius, st->input, st->frame_size, st->notch_mem); + /* Copy input data to buffer and apply pre-emphasis */ + for (i=0;iframe_size;i++) { - /* Apply a notch filter to make sure DC doesn't end up causing problems */ - filter_dc_notch16(in+chan, st->notch_radius, st->input+chan*st->frame_size, st->frame_size, st->notch_mem+2*chan, C); - /* Copy input data to buffer and apply pre-emphasis */ - /* Copy input data to buffer */ - for (i=0;iframe_size;i++) - { - spx_word32_t tmp32; - /* FIXME: This core has changed a bit, need to merge properly */ - tmp32 = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD[chan]))); + spx_word32_t tmp32; + tmp32 = SUB32(EXTEND32(far_end[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memX))); #ifdef FIXED_POINT - if (tmp32 > 32767) - { - tmp32 = 32767; - if (st->saturated == 0) - st->saturated = 1; - } - if (tmp32 < -32767) - { - tmp32 = -32767; - if (st->saturated == 0) - st->saturated = 1; - } -#endif - st->memD[chan] = st->input[chan*st->frame_size+i]; - st->input[chan*st->frame_size+i] = EXTRACT16(tmp32); + /* If saturation occurs here, we need to freeze adaptation for M+1 frames (not just one) */ + if (tmp32 > 32767) + { + tmp32 = 32767; + st->saturated = M+1; } - } - - for (speak = 0; speak < K; speak++) - { - for (i=0;iframe_size;i++) + if (tmp32 < -32767) { - spx_word32_t tmp32; - st->x[speak*N+i] = st->x[speak*N+i+st->frame_size]; - tmp32 = SUB32(EXTEND32(far_end[i*K+speak]), EXTEND32(MULT16_16_P15(st->preemph, st->memX[speak]))); -#ifdef FIXED_POINT - /*FIXME: If saturation occurs here, we need to freeze adaptation for M frames (not just one) */ - if (tmp32 > 32767) - { - tmp32 = 32767; - st->saturated = M+1; - } - if (tmp32 < -32767) - { - tmp32 = -32767; - st->saturated = M+1; - } + tmp32 = -32767; + st->saturated = M+1; + } #endif - st->x[speak*N+i+st->frame_size] = EXTRACT16(tmp32); - st->memX[speak] = far_end[i*K+speak]; - } - } - - for (speak = 0; speak < K; speak++) - { - /* Shift memory: this could be optimized eventually*/ - for (j=M-1;j>=0;j--) + st->x[i+st->frame_size] = EXTRACT16(tmp32); + st->memX = far_end[i]; + + tmp32 = SUB32(EXTEND32(st->input[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD))); +#ifdef FIXED_POINT + if (tmp32 > 32767) { - for (i=0;iX[(j+1)*N*K+speak*N+i] = st->X[j*N*K+speak*N+i]; + tmp32 = 32767; + if (st->saturated == 0) + st->saturated = 1; + } + if (tmp32 < -32767) + { + tmp32 = -32767; + if (st->saturated == 0) + st->saturated = 1; } - /* Convert x (echo input) to frequency domain */ - spx_fft(st->fft_table, st->x+speak*N, &st->X[speak*N]); +#endif + st->memD = st->input[i]; + st->input[i] = tmp32; } - - Sxx = 0; - for (speak = 0; speak < K; speak++) + + /* Shift memory: this could be optimized eventually*/ + for (j=M-1;j>=0;j--) { - Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size); - power_spectrum_accum(st->X+speak*N, st->Xf, N); + for (i=0;iX[(j+1)*N+i] = st->X[j*N+i]; } + + /* Convert x (far end) to frequency domain */ + spx_fft(st->fft_table, st->x, &st->X[0]); + for (i=0;ilast_y[i] = st->x[i]; + Sxx = mdf_inner_prod(st->x+st->frame_size, st->x+st->frame_size, st->frame_size); + for (i=0;iframe_size;i++) + st->x[i] = st->x[i+st->frame_size]; + /* From here on, the top part of x is used as scratch space */ - Sff = 0; - for (chan = 0; chan < C; chan++) - { #ifdef TWO_PATH - /* Compute foreground filter */ - spectral_mul_accum16(st->X, st->foreground+chan*N*K*M, st->Y+chan*N, N, M*K); - spx_ifft(st->fft_table, st->Y+chan*N, st->e+chan*N); - for (i=0;iframe_size;i++) - st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->e[chan*N+i+st->frame_size]); - Sff += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size); + /* Compute foreground filter */ + spectral_mul_accum16(st->X, st->foreground, st->Y, N, M); + spx_ifft(st->fft_table, st->Y, st->e); + for (i=0;iframe_size;i++) + st->e[i] = SUB16(st->input[i], st->e[i+st->frame_size]); + Sff = mdf_inner_prod(st->e, st->e, st->frame_size); #endif - } /* Adjust proportional adaption rate */ - /* FIXME: Adjust that for C, K*/ - if (st->adapted) - mdf_adjust_prop (st->W, N, M, C*K, st->prop); + mdf_adjust_prop (st->W, N, M, st->prop); /* Compute weight gradient */ if (st->saturated == 0) { - for (chan = 0; chan < C; chan++) + for (j=M-1;j>=0;j--) { - for (speak = 0; speak < K; speak++) - { - for (j=M-1;j>=0;j--) - { - weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N*K+speak*N], st->E+chan*N, st->PHI, N); - for (i=0;iW[chan*N*K*M + j*N*K + speak*N + i] += st->PHI[i]; - } - } + weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N], st->E, st->PHI, N); + for (i=0;iW[j*N+i] = ADD32(st->W[j*N+i], st->PHI[i]); + } } else { st->saturated--; } - /* FIXME: MC conversion required */ /* Update weight to prevent circular convolution (MDF / AUMDF) */ - for (chan = 0; chan < C; chan++) + for (j=0;jcancel_count%(M-1) == j-1) { - for (j=0;jcancel_count%(M-1) == j-1) - { #ifdef FIXED_POINT - for (i=0;iwtmp2[i] = EXTRACT16(PSHR32(st->W[chan*N*K*M + j*N*K + speak*N + i],NORMALIZE_SCALEDOWN+16)); - spx_ifft(st->fft_table, st->wtmp2, st->wtmp); - for (i=0;iframe_size;i++) - { - st->wtmp[i]=0; - } - for (i=st->frame_size;iwtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP); - } - spx_fft(st->fft_table, st->wtmp, st->wtmp2); - /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */ - for (i=0;iW[chan*N*K*M + j*N*K + speak*N + i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1); + for (i=0;iwtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],NORMALIZE_SCALEDOWN+16)); + spx_ifft(st->fft_table, st->wtmp2, st->wtmp); + for (i=0;iframe_size;i++) + { + st->wtmp[i]=0; + } + for (i=st->frame_size;iwtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP); + } + spx_fft(st->fft_table, st->wtmp, st->wtmp2); + /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */ + for (i=0;iW[j*N+i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1); #else - spx_ifft(st->fft_table, &st->W[chan*N*K*M + j*N*K + speak*N], st->wtmp); - for (i=st->frame_size;iwtmp[i]=0; - } - spx_fft(st->fft_table, st->wtmp, &st->W[chan*N*K*M + j*N*K + speak*N]); -#endif - } + spx_ifft(st->fft_table, &st->W[j*N], st->wtmp); + for (i=st->frame_size;iwtmp[i]=0; } + spx_fft(st->fft_table, st->wtmp, &st->W[j*N]); +#endif } } - - /* So we can use power_spectrum_accum */ - for (i=0;i<=st->frame_size;i++) - st->Rf[i] = st->Yf[i] = st->Xf[i] = 0; - - Dbf = 0; - See = 0; + + /* Compute filter response Y */ + spectral_mul_accum(st->X, st->W, st->Y, N, M); + spx_ifft(st->fft_table, st->Y, st->y); + #ifdef TWO_PATH /* Difference in response, this is used to estimate the variance of our residual power estimate */ - for (chan = 0; chan < C; chan++) - { - spectral_mul_accum(st->X, st->W+chan*N*K*M, st->Y+chan*N, N, M*K); - spx_ifft(st->fft_table, st->Y+chan*N, st->y+chan*N); - for (i=0;iframe_size;i++) - st->e[chan*N+i] = SUB16(st->e[chan*N+i+st->frame_size], st->y[chan*N+i+st->frame_size]); - Dbf += 10+mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size); - for (i=0;iframe_size;i++) - st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]); - See += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size); - } + for (i=0;iframe_size;i++) + st->e[i] = SUB16(st->e[i+st->frame_size], st->y[i+st->frame_size]); + Dbf = 10+mdf_inner_prod(st->e, st->e, st->frame_size); #endif + for (i=0;iframe_size;i++) + st->e[i] = SUB16(st->input[i], st->y[i+st->frame_size]); + See = mdf_inner_prod(st->e, st->e, st->frame_size); #ifndef TWO_PATH Sff = See; #endif @@ -927,12 +837,11 @@ EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, c st->Davg1 = st->Davg2 = 0; st->Dvar1 = st->Dvar2 = FLOAT_ZERO; /* Copy background filter to foreground filter */ - for (i=0;iforeground[i] = EXTRACT16(PSHR32(st->W[i],16)); /* Apply a smooth transition so as to not introduce blocking artifacts */ - for (chan = 0; chan < C; chan++) - for (i=0;iframe_size;i++) - st->e[chan*N+i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[chan*N+i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[chan*N+i+st->frame_size]); + for (i=0;iframe_size;i++) + st->e[i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[i+st->frame_size]); } else { int reset_background=0; /* Otherwise, check if the background filter is significantly worse */ @@ -945,16 +854,13 @@ EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, c if (reset_background) { /* Copy foreground filter to background filter */ - for (i=0;iW[i] = SHL32(EXTEND32(st->foreground[i]),16); /* We also need to copy the output so as to get correct adaptation */ - for (chan = 0; chan < C; chan++) - { - for (i=0;iframe_size;i++) - st->y[chan*N+i+st->frame_size] = st->e[chan*N+i+st->frame_size]; - for (i=0;iframe_size;i++) - st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]); - } + for (i=0;iframe_size;i++) + st->y[i+st->frame_size] = st->e[i+st->frame_size]; + for (i=0;iframe_size;i++) + st->e[i] = SUB16(st->input[i], st->y[i+st->frame_size]); See = Sff; st->Davg1 = st->Davg2 = 0; st->Dvar1 = st->Dvar2 = FLOAT_ZERO; @@ -962,57 +868,47 @@ EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, c } #endif - Sey = Syy = Sdd = 0; - for (chan = 0; chan < C; chan++) - { - /* Compute error signal (for the output with de-emphasis) */ - for (i=0;iframe_size;i++) - { - spx_word32_t tmp_out; + /* Compute error signal (for the output with de-emphasis) */ + for (i=0;iframe_size;i++) + { + spx_word32_t tmp_out; #ifdef TWO_PATH - tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->e[chan*N+i+st->frame_size])); + tmp_out = SUB32(EXTEND32(st->input[i]), EXTEND32(st->e[i+st->frame_size])); #else - tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->y[chan*N+i+st->frame_size])); + tmp_out = SUB32(EXTEND32(st->input[i]), EXTEND32(st->y[i+st->frame_size])); #endif - tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE[chan]))); + /* Saturation */ + if (tmp_out>32767) + tmp_out = 32767; + else if (tmp_out<-32768) + tmp_out = -32768; + tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE))); /* This is an arbitrary test for saturation in the microphone signal */ - if (in[i*C+chan] <= -32000 || in[i*C+chan] >= 32000) - { + if (in[i] <= -32000 || in[i] >= 32000) + { + tmp_out = 0; if (st->saturated == 0) st->saturated = 1; - } - out[i*C+chan] = WORD2INT(tmp_out); - st->memE[chan] = tmp_out; } - + out[i] = (spx_int16_t)tmp_out; + st->memE = tmp_out; + } + #ifdef DUMP_ECHO_CANCEL_DATA - dump_audio(in, far_end, out, st->frame_size); + dump_audio(in, far_end, out, st->frame_size); #endif - /* Compute error signal (filter update version) */ - for (i=0;iframe_size;i++) - { - st->e[chan*N+i+st->frame_size] = st->e[chan*N+i]; - st->e[chan*N+i] = 0; - } - - /* Compute a bunch of correlations */ - /* FIXME: bad merge */ - Sey += mdf_inner_prod(st->e+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size); - Syy += mdf_inner_prod(st->y+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size); - Sdd += mdf_inner_prod(st->input+chan*st->frame_size, st->input+chan*st->frame_size, st->frame_size); - - /* Convert error to frequency domain */ - spx_fft(st->fft_table, st->e+chan*N, st->E+chan*N); - for (i=0;iframe_size;i++) - st->y[i+chan*N] = 0; - spx_fft(st->fft_table, st->y+chan*N, st->Y+chan*N); - - /* Compute power spectrum of echo (X), error (E) and filter response (Y) */ - power_spectrum_accum(st->E+chan*N, st->Rf, N); - power_spectrum_accum(st->Y+chan*N, st->Yf, N); - + /* Compute error signal (filter update version) */ + for (i=0;iframe_size;i++) + { + st->e[i+st->frame_size] = st->e[i]; + st->e[i] = 0; } + + /* Compute a bunch of correlations */ + Sey = mdf_inner_prod(st->e+st->frame_size, st->y+st->frame_size, st->frame_size); + Syy = mdf_inner_prod(st->y+st->frame_size, st->y+st->frame_size, st->frame_size); + Sdd = mdf_inner_prod(st->input, st->input, st->frame_size); /*printf ("%f %f %f %f\n", Sff, See, Syy, Sdd, st->update_cond);*/ @@ -1025,7 +921,7 @@ EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, c { /* Things have gone really bad */ st->screwed_up += 50; - for (i=0;iframe_size*C;i++) + for (i=0;iframe_size;i++) out[i] = 0; } else if (SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6))) { @@ -1044,17 +940,36 @@ EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, c /* Add a small noise floor to make sure not to have problems when dividing */ See = MAX32(See, SHR32(MULT16_16(N, 100),6)); - - for (speak = 0; speak < K; speak++) - { - Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size); - power_spectrum_accum(st->X+speak*N, st->Xf, N); - } + /* Convert error to frequency domain */ + spx_fft(st->fft_table, st->e, st->E); + for (i=0;iframe_size;i++) + st->y[i] = 0; + spx_fft(st->fft_table, st->y, st->Y); + + /* Compute power spectrum of far end (X), error (E) and filter response (Y) */ + power_spectrum(st->E, st->Rf, N); + power_spectrum(st->Y, st->Yf, N); + power_spectrum(st->X, st->Xf, N); /* Smooth far end energy estimate over time */ for (j=0;j<=st->frame_size;j++) st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]); + + /* Enable this to compute the power based only on the tail (would need to compute more + efficiently to make this really useful */ + if (0) + { + float scale2 = .5f/M; + for (j=0;j<=st->frame_size;j++) + st->power[j] = 100; + for (i=0;iX[i*N], st->Xf, N); + for (j=0;j<=st->frame_size;j++) + st->power[j] += scale2*st->Xf[j]; + } + } /* Compute filtered spectra and (cross-)correlations */ for (j=st->frame_size;j>=0;j--) @@ -1176,12 +1091,12 @@ EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, c st->sum_adapt = ADD32(st->sum_adapt,adapt_rate); } - /* FIXME: MC conversion required */ - for (i=0;iframe_size;i++) - st->last_y[i] = st->last_y[st->frame_size+i]; + /* Save residual echo so it can be used by the nonlinear processor */ if (st->adapted) { /* If the filter is adapted, take the filtered echo */ + for (i=0;iframe_size;i++) + st->last_y[i] = st->last_y[st->frame_size+i]; for (i=0;iframe_size;i++) st->last_y[st->frame_size+i] = in[i]-out[i]; } else { @@ -1226,7 +1141,7 @@ void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *residual_echo, in } -EXPORT int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr) +int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr) { switch(request) { @@ -1254,29 +1169,6 @@ EXPORT int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr) case SPEEX_ECHO_GET_SAMPLING_RATE: (*(int*)ptr) = st->sampling_rate; break; - case SPEEX_ECHO_GET_IMPULSE_RESPONSE_SIZE: - /*FIXME: Implement this for multiple channels */ - *((spx_int32_t *)ptr) = st->M * st->frame_size; - break; - case SPEEX_ECHO_GET_IMPULSE_RESPONSE: - { - int M = st->M, N = st->window_size, n = st->frame_size, i, j; - spx_int32_t *filt = (spx_int32_t *) ptr; - for(j=0;jwtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],16+NORMALIZE_SCALEDOWN)); - spx_ifft(st->fft_table, st->wtmp2, st->wtmp); -#else - spx_ifft(st->fft_table, &st->W[j*N], st->wtmp); -#endif - for(i=0;iwtmp[i]), WEIGHT_SHIFT-NORMALIZE_SCALEDOWN); - } - } - break; default: speex_warning_int("Unknown speex_echo_ctl request: ", request); return -1; -- cgit v1.2.3