From 67d4e56dc8a365871c3dca4f04fcf8b9c9f47ee6 Mon Sep 17 00:00:00 2001 From: Benny Prijono Date: Thu, 23 Nov 2006 10:19:46 +0000 Subject: Updated Speex to their latest SVN (1.2-beta). AEC seems to work much better now and take less CPU, so I increased default tail length in PJSUA to 800ms. git-svn-id: http://svn.pjsip.org/repos/pjproject/trunk@823 74dad513-b988-da41-8d7b-12977e46ad98 --- pjmedia/src/pjmedia-codec/speex/mdf.c | 524 +++++++++++++++++++++------------- 1 file changed, 326 insertions(+), 198 deletions(-) (limited to 'pjmedia/src/pjmedia-codec/speex/mdf.c') diff --git a/pjmedia/src/pjmedia-codec/speex/mdf.c b/pjmedia/src/pjmedia-codec/speex/mdf.c index 94e08f7f..3cea2343 100644 --- a/pjmedia/src/pjmedia-codec/speex/mdf.c +++ b/pjmedia/src/pjmedia-codec/speex/mdf.c @@ -41,8 +41,9 @@ double-talk is achieved using a variable learning rate as described in: Valin, J.-M., On Adjusting the Learning Rate in Frequency Domain Echo - Cancellation With Double-Talk. Submitted to IEEE Transactions on Speech - and Audio Processing, 2006. + Cancellation With Double-Talk. To appear in IEEE Transactions on Audio, + Speech and Language Processing, 2006. + http://people.xiph.org/~jm/papers/valin_taslp2006.pdf There is no explicit double-talk detection, but a continuous variation in the learning rate based on residual echo, double-talk and background @@ -78,9 +79,6 @@ #define M_PI 3.14159265358979323846 #endif -#define min(a,b) ((a)<(b) ? (a) : (b)) -#define max(a,b) ((a)>(b) ? (a) : (b)) - #ifdef FIXED_POINT #define WEIGHT_SHIFT 11 #define NORMALIZE_SCALEDOWN 5 @@ -89,15 +87,24 @@ #define WEIGHT_SHIFT 0 #endif +/* If enabled, the transition between blocks is smooth, so there isn't any blocking +aftifact when adapting. The cost is an extra FFT and a matrix-vector multiply */ +#define SMOOTH_BLOCKS + #ifdef FIXED_POINT -static const spx_float_t MIN_LEAK = {16777, -24}; +static const spx_float_t MIN_LEAK = {16777, -19}; #define TOP16(x) ((x)>>16) #else -static const spx_float_t MIN_LEAK = .001f; +static const spx_float_t MIN_LEAK = .032f; #define TOP16(x) (x) #endif +#define PLAYBACK_DELAY 2 + +void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len); + + /** Speex echo cancellation state. */ struct SpeexEchoState_ { int frame_size; /**< Number of samples processed each time */ @@ -105,36 +112,40 @@ struct SpeexEchoState_ { int M; int cancel_count; int adapted; + int saturated; + int screwed_up; spx_int32_t sampling_rate; spx_word16_t spec_average; spx_word16_t beta0; spx_word16_t beta_max; spx_word32_t sum_adapt; - spx_word16_t *e; + spx_word16_t leak_estimate; + + spx_word16_t *e; /* scratch */ spx_word16_t *x; spx_word16_t *X; - spx_word16_t *d; - spx_word16_t *y; + spx_word16_t *input; /* scratch */ + spx_word16_t *y; /* scratch */ spx_word16_t *last_y; - spx_word32_t *Yps; - spx_word16_t *Y; + spx_word16_t *Y; /* scratch */ spx_word16_t *E; - spx_word32_t *PHI; + spx_word32_t *PHI; /* scratch */ spx_word32_t *W; spx_word32_t *power; spx_float_t *power_1; - spx_word16_t *wtmp; + spx_word16_t *wtmp; /* scratch */ #ifdef FIXED_POINT - spx_word16_t *wtmp2; + spx_word16_t *wtmp2; /* scratch */ #endif - spx_word32_t *Rf; - spx_word32_t *Yf; - spx_word32_t *Xf; + spx_word32_t *Rf; /* scratch */ + spx_word32_t *Yf; /* scratch */ + spx_word32_t *Xf; /* scratch */ spx_word32_t *Eh; spx_word32_t *Yh; spx_float_t Pey; spx_float_t Pyy; spx_word16_t *window; + spx_word16_t *prop; void *fft_table; spx_word16_t memX, memD, memE; spx_word16_t preemph; @@ -144,9 +155,10 @@ struct SpeexEchoState_ { /* NOTE: If you only use speex_echo_cancel() and want to save some memory, remove this */ spx_int16_t *play_buf; int play_buf_pos; + int play_buf_started; }; -PJ_INLINE(void) filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem) +static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem) { int i; spx_word16_t den2; @@ -170,7 +182,7 @@ PJ_INLINE(void) filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, sp } } -PJ_INLINE(spx_word32_t ) mdf_inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len) +static inline spx_word32_t mdf_inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len) { spx_word32_t sum=0; len >>= 1; @@ -186,7 +198,7 @@ PJ_INLINE(spx_word32_t ) mdf_inner_prod(const spx_word16_t *x, const spx_word16_ } /** Compute power spectrum of a half-complex (packed) vector */ -PJ_INLINE(void) power_spectrum(const spx_word16_t *X, spx_word32_t *ps, int N) +static inline void power_spectrum(const spx_word16_t *X, spx_word32_t *ps, int N) { int i, j; ps[0]=MULT16_16(X[0],X[0]); @@ -227,7 +239,7 @@ static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT); } #else -PJ_INLINE(void) spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M) +static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M) { int i,j; for (i=0;iM = (filter_length+st->frame_size-1)/frame_size; st->cancel_count=0; st->sum_adapt = 0; - /* FIXME: Make that an init option (new API call?) */ + st->saturated = 0; + st->screwed_up = 0; + /* This is the default sampling rate */ st->sampling_rate = 8000; st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate); #ifdef FIXED_POINT @@ -283,14 +301,14 @@ SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) st->beta0 = (2.0f*st->frame_size)/st->sampling_rate; st->beta_max = (.5f*st->frame_size)/st->sampling_rate; #endif + st->leak_estimate = 0; st->fft_table = spx_fft_init(N); st->e = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); st->x = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); - st->d = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); + st->input = (spx_word16_t*)speex_alloc(st->frame_size*sizeof(spx_word16_t)); st->y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); - st->Yps = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); st->last_y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); @@ -298,14 +316,15 @@ SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) st->Yh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->Eh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); - st->X = (spx_word16_t*)speex_alloc(M*N*sizeof(spx_word16_t)); + st->X = (spx_word16_t*)speex_alloc((M+1)*N*sizeof(spx_word16_t)); st->Y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); st->E = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); st->W = (spx_word32_t*)speex_alloc(M*N*sizeof(spx_word32_t)); - st->PHI = (spx_word32_t*)speex_alloc(M*N*sizeof(spx_word32_t)); + st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); st->power = (spx_word32_t*)speex_alloc((frame_size+1)*sizeof(spx_word32_t)); st->power_1 = (spx_float_t*)speex_alloc((frame_size+1)*sizeof(spx_float_t)); st->window = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); + st->prop = (spx_word16_t*)speex_alloc(M*sizeof(spx_word16_t)); st->wtmp = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); #ifdef FIXED_POINT st->wtmp2 = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); @@ -318,10 +337,27 @@ SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) for (i=0;iwindow[i] = .5-.5*cos(2*M_PI*i/N); #endif + for (i=0;i<=st->frame_size;i++) + st->power_1[i] = FLOAT_ONE; for (i=0;iW[i] = 0; { - st->W[i] = st->PHI[i] = 0; + spx_word32_t sum = 0; + /* Ratio of ~10 between adaptation rate of first and last block */ + spx_word16_t decay = SHR32(spx_exp(NEG16(DIV32_16(QCONST16(2.4,11),M))),1); + st->prop[0] = QCONST16(.7, 15); + sum = EXTEND32(st->prop[0]); + for (i=1;iprop[i] = MULT16_16_Q15(st->prop[i-1], decay); + sum = ADD32(sum, EXTEND32(st->prop[i])); + } + for (i=M-1;i>=0;i--) + { + st->prop[i] = DIV32(MULT16_16(QCONST16(.8,15), st->prop[i]),sum); + } } + st->memX=st->memD=st->memE=0; st->preemph = QCONST16(.9,15); if (st->sampling_rate<12000) @@ -335,9 +371,10 @@ SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) st->adapted = 0; st->Pey = st->Pyy = FLOAT_ONE; - st->play_buf = (spx_int16_t*)speex_alloc(2*st->frame_size*sizeof(spx_int16_t)); - st->play_buf_pos = 0; - + st->play_buf = (spx_int16_t*)speex_alloc((PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t)); + st->play_buf_pos = PLAYBACK_DELAY*st->frame_size; + st->play_buf_started = 0; + return st; } @@ -346,19 +383,40 @@ void speex_echo_state_reset(SpeexEchoState *st) { int i, M, N; st->cancel_count=0; + st->screwed_up = 0; N = st->window_size; M = st->M; for (i=0;iW[i] = 0; + for (i=0;iX[i] = 0; - } for (i=0;i<=st->frame_size;i++) + { st->power[i] = 0; - + st->power_1[i] = FLOAT_ONE; + st->Eh[i] = 0; + st->Yh[i] = 0; + } + for (i=0;iframe_size;i++) + { + st->last_y[i] = 0; + } + for (i=0;iE[i] = 0; + st->x[i] = 0; + } + st->notch_mem[0] = st->notch_mem[1] = 0; + st->memX=st->memD=st->memE=0; + + st->saturated = 0; st->adapted = 0; st->sum_adapt = 0; st->Pey = st->Pyy = FLOAT_ONE; + for (i=0;i<3*st->frame_size;i++) + st->play_buf[i] = 0; + st->play_buf_pos = PLAYBACK_DELAY*st->frame_size; + st->play_buf_started = 0; } @@ -369,10 +427,9 @@ void speex_echo_state_destroy(SpeexEchoState *st) speex_free(st->e); speex_free(st->x); - speex_free(st->d); + speex_free(st->input); speex_free(st->y); speex_free(st->last_y); - speex_free(st->Yps); speex_free(st->Yf); speex_free(st->Rf); speex_free(st->Xf); @@ -387,6 +444,7 @@ void speex_echo_state_destroy(SpeexEchoState *st) speex_free(st->power); speex_free(st->power_1); speex_free(st->window); + speex_free(st->prop); speex_free(st->wtmp); #ifdef FIXED_POINT speex_free(st->wtmp2); @@ -395,17 +453,19 @@ void speex_echo_state_destroy(SpeexEchoState *st) speex_free(st); } -void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out, spx_int32_t *Yout) +void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out) { int i; + /*speex_warning_int("capture with fill level ", st->play_buf_pos/st->frame_size);*/ + st->play_buf_started = 1; if (st->play_buf_pos>=st->frame_size) { - speex_echo_cancel(st, rec, st->play_buf, out, Yout); + speex_echo_cancellation(st, rec, st->play_buf, out); st->play_buf_pos -= st->frame_size; - for (i=0;iframe_size;i++) + for (i=0;iplay_buf_pos;i++) st->play_buf[i] = st->play_buf[i+st->frame_size]; } else { - speex_warning("no playback frame available"); + speex_warning("No playback frame available (your application is buggy and/or got xruns)"); if (st->play_buf_pos!=0) { speex_warning("internal playback buffer corruption?"); @@ -418,31 +478,48 @@ void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play) { - if (st->play_buf_pos<=st->frame_size) + /*speex_warning_int("playback with fill level ", st->play_buf_pos/st->frame_size);*/ + if (!st->play_buf_started) + { + speex_warning("discarded first playback frame"); + return; + } + if (st->play_buf_pos<=PLAYBACK_DELAY*st->frame_size) { int i; for (i=0;iframe_size;i++) st->play_buf[st->play_buf_pos+i] = play[i]; st->play_buf_pos += st->frame_size; + if (st->play_buf_pos <= (PLAYBACK_DELAY-1)*st->frame_size) + { + speex_warning("Auto-filling the buffer (your application is buggy and/or got xruns)"); + for (i=0;iframe_size;i++) + st->play_buf[st->play_buf_pos+i] = play[i]; + st->play_buf_pos += st->frame_size; + } } else { - speex_warning("had to discard a playback frame"); + speex_warning("Had to discard a playback frame (your application is buggy and/or got xruns)"); } } /** Performs echo cancellation on a frame */ -void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int16_t *echo, spx_int16_t *out, spx_int32_t *Yout) +void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout) +{ + speex_echo_cancellation(st, in, far_end, out); +} + +/** Performs echo cancellation on a frame (deprecated, last arg now ignored) */ +void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out) { int i,j; int N,M; - spx_word32_t Syy,See; - spx_word16_t leak_estimate; + spx_word32_t Syy,See,Sxx,Sdd; + spx_word32_t Sey; spx_word16_t ss, ss_1; spx_float_t Pey = FLOAT_ONE, Pyy=FLOAT_ONE; spx_float_t alpha, alpha_1; spx_word16_t RER; spx_word32_t tmp32; - spx_word16_t M_1; - int saturated=0; N = st->window_size; M = st->M; @@ -450,82 +527,130 @@ void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int #ifdef FIXED_POINT ss=DIV32_16(11469,M); ss_1 = SUB16(32767,ss); - M_1 = DIV32_16(32767,M); #else ss=.35/M; ss_1 = 1-ss; - M_1 = 1.f/M; #endif - filter_dc_notch16(ref, st->notch_radius, st->d, st->frame_size, st->notch_mem); - /* Copy input data to buffer */ + filter_dc_notch16(in, st->notch_radius, st->input, st->frame_size, st->notch_mem); + /* Copy input data to buffer and apply pre-emphasis */ for (i=0;iframe_size;i++) { - spx_word16_t tmp; spx_word32_t tmp32; st->x[i] = st->x[i+st->frame_size]; - tmp32 = SUB32(EXTEND32(echo[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memX))); + tmp32 = SUB32(EXTEND32(far_end[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memX))); #ifdef FIXED_POINT - /*FIXME: If saturation occurs here, we need to freeze adaptation for M frames (not just one) */ + /* If saturation occurs here, we need to freeze adaptation for M+1 frames (not just one) */ if (tmp32 > 32767) { tmp32 = 32767; - saturated = 1; - } + st->saturated = M+1; + } if (tmp32 < -32767) { tmp32 = -32767; - saturated = 1; + st->saturated = M+1; } #endif st->x[i+st->frame_size] = EXTRACT16(tmp32); - st->memX = echo[i]; + st->memX = far_end[i]; - tmp = st->d[i]; - st->d[i] = st->d[i+st->frame_size]; - tmp32 = SUB32(EXTEND32(tmp), EXTEND32(MULT16_16_P15(st->preemph, st->memD))); + tmp32 = SUB32(EXTEND32(st->input[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD))); #ifdef FIXED_POINT if (tmp32 > 32767) { tmp32 = 32767; - saturated = 1; + if (st->saturated == 0) + st->saturated = 1; } if (tmp32 < -32767) { tmp32 = -32767; - saturated = 1; + if (st->saturated == 0) + st->saturated = 1; } #endif - st->d[i+st->frame_size] = tmp32; - st->memD = tmp; + st->memD = st->input[i]; + st->input[i] = tmp32; } /* Shift memory: this could be optimized eventually*/ - for (i=0;iX[i]=st->X[i+N]; + for (j=M-1;j>=0;j--) + { + for (i=0;iX[(j+1)*N+i] = st->X[j*N+i]; + } + + /* Convert x (far end) to frequency domain */ + spx_fft(st->fft_table, st->x, &st->X[0]); + +#ifdef SMOOTH_BLOCKS + spectral_mul_accum(st->X, st->W, st->Y, N, M); + spx_ifft(st->fft_table, st->Y, st->e); +#endif - /* Convert x (echo input) to frequency domain */ - spx_fft(st->fft_table, st->x, &st->X[(M-1)*N]); + /* Compute weight gradient */ + if (st->saturated == 0) + { + for (j=M-1;j>=0;j--) + { + weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N], st->E, st->PHI, N); + for (i=0;iW[j*N+i] = ADD32(st->W[j*N+i], st->PHI[i]); + + } + } else { + st->saturated--; + } + /* Update weight to prevent circular convolution (MDF / AUMDF) */ + for (j=0;jcancel_count%(M-1) == j-1) + { +#ifdef FIXED_POINT + for (i=0;iwtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],NORMALIZE_SCALEDOWN+16)); + spx_ifft(st->fft_table, st->wtmp2, st->wtmp); + for (i=0;iframe_size;i++) + { + st->wtmp[i]=0; + } + for (i=st->frame_size;iwtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP); + } + spx_fft(st->fft_table, st->wtmp, st->wtmp2); + /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */ + for (i=0;iW[j*N+i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1); +#else + spx_ifft(st->fft_table, &st->W[j*N], st->wtmp); + for (i=st->frame_size;iwtmp[i]=0; + } + spx_fft(st->fft_table, st->wtmp, &st->W[j*N]); +#endif + } + } + /* Compute filter response Y */ spectral_mul_accum(st->X, st->W, st->Y, N, M); - spx_ifft(st->fft_table, st->Y, st->y); -#if 1 - spectral_mul_accum(st->X, st->PHI, st->Y, N, M); - spx_ifft(st->fft_table, st->Y, st->e); -#endif - + /* Compute error signal (for the output with de-emphasis) */ for (i=0;iframe_size;i++) { spx_word32_t tmp_out; -#if 1 +#ifdef SMOOTH_BLOCKS spx_word16_t y = MULT16_16_Q15(st->window[i+st->frame_size],st->e[i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[i+st->frame_size]); - tmp_out = SUB32(EXTEND32(st->d[i+st->frame_size]), EXTEND32(y)); + tmp_out = SUB32(EXTEND32(st->input[i]), EXTEND32(y)); #else - tmp_out = SUB32(EXTEND32(st->d[i+st->frame_size]), EXTEND32(st->y[i+st->frame_size])); + tmp_out = SUB32(EXTEND32(st->input[i]), EXTEND32(st->y[i+st->frame_size])); #endif /* Saturation */ @@ -534,13 +659,14 @@ void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int else if (tmp_out<-32768) tmp_out = -32768; tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE))); - /* This is an arbitrary test for saturation */ - if (ref[i] <= -32000 || ref[i] >= 32000) + /* This is an arbitrary test for saturation in the microphone signal */ + if (in[i] <= -32000 || in[i] >= 32000) { tmp_out = 0; - saturated = 1; + if (st->saturated == 0) + st->saturated = 1; } - out[i] = tmp_out; + out[i] = (spx_int16_t)tmp_out; st->memE = tmp_out; } @@ -548,26 +674,57 @@ void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int for (i=0;iframe_size;i++) { st->e[i] = 0; - st->e[i+st->frame_size] = st->d[i+st->frame_size] - st->y[i+st->frame_size]; + st->e[i+st->frame_size] = st->input[i] - st->y[i+st->frame_size]; } /* Compute a bunch of correlations */ + Sey = mdf_inner_prod(st->e+st->frame_size, st->y+st->frame_size, st->frame_size); See = mdf_inner_prod(st->e+st->frame_size, st->e+st->frame_size, st->frame_size); - See = ADD32(See, SHR32(EXTEND32(10000),6)); Syy = mdf_inner_prod(st->y+st->frame_size, st->y+st->frame_size, st->frame_size); + Sxx = mdf_inner_prod(st->x+st->frame_size, st->x+st->frame_size, st->frame_size); + Sdd = mdf_inner_prod(st->input, st->input, st->frame_size); + /* Do some sanity check */ + if (!(Syy>=0 && Sxx>=0 && See >= 0) +#ifndef FIXED_POINT + || !(See < N*1e9 && Syy < N*1e9 && Sxx < N*1e9) +#endif + ) + { + /* Things have gone really bad */ + st->screwed_up += 50; + for (i=0;iframe_size;i++) + out[i] = 0; + } else if (SHR32(See, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 100),6))) + { + /* AEC seems to add lots of echo instead of removing it, let's see if it will improve */ + st->screwed_up++; + } else { + /* Everything's fine */ + st->screwed_up=0; + } + if (st->screwed_up>=50) + { + speex_warning("The echo canceller started acting funny and got slapped (reset). It swears it will behave now."); + speex_echo_state_reset(st); + return; + } + + /* Add a small noise floor to make sure not to have problems when dividing */ + See = MAX32(See, SHR32(MULT16_16(N, 100),6)); + /* Convert error to frequency domain */ spx_fft(st->fft_table, st->e, st->E); for (i=0;iframe_size;i++) st->y[i] = 0; spx_fft(st->fft_table, st->y, st->Y); - /* Compute power spectrum of echo (X), error (E) and filter response (Y) */ + /* Compute power spectrum of far end (X), error (E) and filter response (Y) */ power_spectrum(st->E, st->Rf, N); power_spectrum(st->Y, st->Yf, N); - power_spectrum(&st->X[(M-1)*N], st->Xf, N); + power_spectrum(st->X, st->Xf, N); - /* Smooth echo energy estimate over time */ + /* Smooth far end energy estimate over time */ for (j=0;j<=st->frame_size;j++) st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]); @@ -577,7 +734,7 @@ void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int { float scale2 = .5f/M; for (j=0;j<=st->frame_size;j++) - st->power[j] = 0; + st->power[j] = 100; for (i=0;iX[i*N], st->Xf, N); @@ -603,6 +760,9 @@ void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int #endif } + Pyy = FLOAT_SQRT(Pyy); + Pey = FLOAT_DIVU(Pey,Pyy); + /* Compute correlation updatete rate */ tmp32 = MULT16_32_Q15(st->beta0,Syy); if (tmp32 > MULT16_32_Q15(st->beta_max,See)) @@ -620,29 +780,41 @@ void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int if (FLOAT_GT(st->Pey, st->Pyy)) st->Pey = st->Pyy; /* leak_estimate is the linear regression result */ - leak_estimate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(st->Pey, st->Pyy),14)); + st->leak_estimate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(st->Pey, st->Pyy),14)); /* This looks like a stupid bug, but it's right (because we convert from Q14 to Q15) */ - if (leak_estimate > 16383) - leak_estimate = 32767; + if (st->leak_estimate > 16383) + st->leak_estimate = 32767; else - leak_estimate = SHL16(leak_estimate,1); - /*printf ("%f\n", leak_estimate);*/ + st->leak_estimate = SHL16(st->leak_estimate,1); + /*printf ("%f\n", st->leak_estimate);*/ /* Compute Residual to Error Ratio */ #ifdef FIXED_POINT - tmp32 = MULT16_32_Q15(leak_estimate,Syy); - tmp32 = ADD32(tmp32, SHL32(tmp32,1)); + tmp32 = MULT16_32_Q15(st->leak_estimate,Syy); + tmp32 = ADD32(SHR32(Sxx,13), ADD32(tmp32, SHL32(tmp32,1))); + /* Check for y in e (lower bound on RER) */ + { + spx_float_t bound = PSEUDOFLOAT(Sey); + bound = FLOAT_DIVU(FLOAT_MULT(bound, bound), PSEUDOFLOAT(ADD32(1,Syy))); + if (FLOAT_GT(bound, PSEUDOFLOAT(See))) + tmp32 = See; + else if (tmp32 < FLOAT_EXTRACT32(bound)) + tmp32 = FLOAT_EXTRACT32(bound); + } if (tmp32 > SHR32(See,1)) tmp32 = SHR32(See,1); RER = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32,See),15)); #else - RER = 3.*MULT16_32_Q15(leak_estimate,Syy) / See; + RER = (.0001*Sxx + 3.*MULT16_32_Q15(st->leak_estimate,Syy)) / See; + /* Check for y in e (lower bound on RER) */ + if (RER < Sey*Sey/(1+See*Syy)) + RER = Sey*Sey/(1+See*Syy); if (RER > .5) RER = .5; #endif /* We consider that the filter has had minimal adaptation if the following is true*/ - if (!st->adapted && st->sum_adapt > QCONST32(1,15)) + if (!st->adapted && st->sum_adapt > QCONST32(M,15)) { st->adapted = 1; } @@ -653,7 +825,7 @@ void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int { spx_word32_t r, e; /* Compute frequency-domain adaptation mask */ - r = MULT16_32_Q15(leak_estimate,SHL32(st->Yf[i],3)); + r = MULT16_32_Q15(st->leak_estimate,SHL32(st->Yf[i],3)); e = SHL32(st->Rf[i],3)+1; #ifdef FIXED_POINT if (r>SHR32(e,1)) @@ -662,27 +834,26 @@ void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int if (r>.5*e) r = .5*e; #endif - r = MULT16_32_Q15(QCONST16(.8,15),r) + MULT16_32_Q15(QCONST16(.2,15),(spx_word32_t)(MULT16_32_Q15(RER,e))); + r = MULT16_32_Q15(QCONST16(.7,15),r) + MULT16_32_Q15(QCONST16(.3,15),(spx_word32_t)(MULT16_32_Q15(RER,e))); /*st->power_1[i] = adapt_rate*r/(e*(1+st->power[i]));*/ - st->power_1[i] = FLOAT_SHL(FLOAT_DIV32_FLOAT(MULT16_32_Q15(M_1,r),FLOAT_MUL32U(e,st->power[i]+10)),WEIGHT_SHIFT+16); + st->power_1[i] = FLOAT_SHL(FLOAT_DIV32_FLOAT(r,FLOAT_MUL32U(e,st->power[i]+10)),WEIGHT_SHIFT+16); } } else { - spx_word32_t Sxx; + /* Temporary adaption rate if filter is not yet adapted enough */ spx_word16_t adapt_rate=0; - Sxx = mdf_inner_prod(st->x+st->frame_size, st->x+st->frame_size, st->frame_size); - /* Temporary adaption rate if filter is not adapted correctly */ - - tmp32 = MULT16_32_Q15(QCONST16(.15f, 15), Sxx); + if (Sxx > SHR32(MULT16_16(N, 1000),6)) + { + tmp32 = MULT16_32_Q15(QCONST16(.25f, 15), Sxx); #ifdef FIXED_POINT - if (Sxx > SHR32(See,2)) - Sxx = SHR32(See,2); + if (tmp32 > SHR32(See,2)) + tmp32 = SHR32(See,2); #else - if (Sxx > .25*See) - Sxx = .25*See; + if (tmp32 > .25*See) + tmp32 = .25*See; #endif - adapt_rate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(MULT16_32_Q15(M_1,Sxx), See),15)); - + adapt_rate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32, See),15)); + } for (i=0;i<=st->frame_size;i++) st->power_1[i] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(st->power[i],10)),WEIGHT_SHIFT+1); @@ -690,100 +861,57 @@ void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int /* How much have we adapted so far? */ st->sum_adapt = ADD32(st->sum_adapt,adapt_rate); } - /* Compute weight gradient */ - for (j=0;jpower_1, &st->X[j*N], st->E, st->PHI+N*j, N); - } - if (!saturated) + /* Save residual echo so it can be used by the nonlinear processor */ + if (st->adapted) { - /* Gradient descent */ - for (i=0;iW[i] += st->PHI[i]; - /* Old value of W in PHI */ - st->PHI[i] = st->W[i] - st->PHI[i]; - } + /* If the filter is adapted, take the filtered echo */ + for (i=0;iframe_size;i++) + st->last_y[i] = st->last_y[st->frame_size+i]; + for (i=0;iframe_size;i++) + st->last_y[st->frame_size+i] = in[i]-out[i]; + } else { + /* If filter isn't adapted yet, all we can do is take the far end signal directly */ + for (i=0;ilast_y[i] = st->x[i]; } + +} + +/* Compute spectrum of estimated echo for use in an echo post-filter */ +void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *residual_echo, int len) +{ + int i; + spx_word16_t leak2; + int N; - /* Update weight to prevent circular convolution (MDF / AUMDF) */ - for (j=0;jcancel_count%(M-1) == j) - { -#ifdef FIXED_POINT - for (i=0;iwtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],NORMALIZE_SCALEDOWN+16)); - spx_ifft(st->fft_table, st->wtmp2, st->wtmp); - for (i=0;iframe_size;i++) - { - st->wtmp[i]=0; - } - for (i=st->frame_size;iwtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP); - } - spx_fft(st->fft_table, st->wtmp, st->wtmp2); - /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */ - for (i=0;iW[j*N+i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1); -#else - spx_ifft(st->fft_table, &st->W[j*N], st->wtmp); - for (i=st->frame_size;iwtmp[i]=0; - } - spx_fft(st->fft_table, st->wtmp, &st->W[j*N]); -#endif - } - } + N = st->window_size; - /* Compute spectrum of estimated echo for use in an echo post-filter (if necessary)*/ - if (Yout) - { - spx_word16_t leak2; - if (st->adapted) - { - /* If the filter is adapted, take the filtered echo */ - for (i=0;iframe_size;i++) - st->last_y[i] = st->last_y[st->frame_size+i]; - for (i=0;iframe_size;i++) - st->last_y[st->frame_size+i] = ref[i]-out[i]; - } else { - /* If filter isn't adapted yet, all we can do is take the echo signal directly */ - for (i=0;ilast_y[i] = st->x[i]; - } - - /* Apply hanning window (should pre-compute it)*/ - for (i=0;iy[i] = MULT16_16_Q15(st->window[i],st->last_y[i]); + /* Apply hanning window (should pre-compute it)*/ + for (i=0;iy[i] = MULT16_16_Q15(st->window[i],st->last_y[i]); - /* Compute power spectrum of the echo */ - spx_fft(st->fft_table, st->y, st->Y); - power_spectrum(st->Y, st->Yps, N); + /* Compute power spectrum of the echo */ + spx_fft(st->fft_table, st->y, st->Y); + power_spectrum(st->Y, residual_echo, N); #ifdef FIXED_POINT - if (leak_estimate > 16383) - leak2 = 32767; - else - leak2 = SHL16(leak_estimate, 1); + if (st->leak_estimate > 16383) + leak2 = 32767; + else + leak2 = SHL16(st->leak_estimate, 1); #else - if (leak_estimate>.5) - leak2 = 1; - else - leak2 = 2*leak_estimate; + if (st->leak_estimate>.5) + leak2 = 1; + else + leak2 = 2*st->leak_estimate; #endif - /* Estimate residual echo */ - for (i=0;i<=st->frame_size;i++) - Yout[i] = MULT16_32_Q15(leak2,st->Yps[i]); - } + /* Estimate residual echo */ + for (i=0;i<=st->frame_size;i++) + residual_echo[i] = (spx_int32_t)MULT16_32_Q15(leak2,residual_echo[i]); + } - int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr) { switch(request) -- cgit v1.2.3