/* * New pass at a good echo canceller * * Mark Spencer * * This time, we'll try floating point first... * */ #ifndef _MECNEW_H #define _MECNEW_H typedef float FLOAT; /* typedef double FLOAT; */ #ifdef __KERNEL__ #define MALLOC(a) kmalloc(a, GFP_KERNEL) #defien FREE(a) kfree(a) #else #include #define MALLOC(a) malloc(a) #define FREE(a) free(a) #endif #define HANGT 600 #define DEFAULT_N 128 #define DEFAULT_M 16 #define DEFAULT_T0 5 #define SAMPLE_FREQ 8000 #define DEFAULT_TAU 1 #define MAX_BETA 16 / DEFAULT_M #define SUPPR -12.0 /* db */ #define EC_MIN_DB_VALUE -70.0 /* db */ #ifdef GEN_CONST #define _CUTOFF pow(2.0, -15.0) static FLOAT CUTOFF; #define _ALPHA_ST pow(2.0, -5.0) static FLOAT ALPHA_ST; #define _ALPHA_YT pow(2.0, -5.0) static FLOAT ALPHA_YT; #define _SIGMA_LU pow(2.0, -7.0) static FLOAT SIGMA_LU; #define _SIGMA_LY pow(2.0, -7.0) static FLOAT SIGMA_LY; #define _DEFAULT_BETA1 pow(2.0, -12.0) /* Choose -13.0 for 256 taps, they recommend -11.0 for 128 taps */ static FLOAT DEFAULT_BETA1; #define _MIN_BETA _DEFAULT_BETA1 * pow(2.0, -4.0) static FLOAT MIN_BETA; #define _EC_MIN_VALUE pow(10, EC_MIN_DB_VALUE/10.0) static FLOAT EC_MIN_VALUE; #else #include "mec-2-const.h" #endif #define SUPPR_FLOOR -64.0 #define SUPPR_CEIL -24.0 #define RES_SUPR_FACTOR -20.0 typedef struct mecnew { /* Big arrays first */ FLOAT a_d[DEFAULT_N]; /* Coefficients */ FLOAT y_d[2 * (DEFAULT_N + DEFAULT_M)]; /* Circular buf reference */ FLOAT s_d[2 * (DEFAULT_N + DEFAULT_M)]; /* Circular buf signal */ int yspos; /* Position in circular buf */ FLOAT e_d[2 * DEFAULT_M]; int eupos; FLOAT y_tilde_d[2 * DEFAULT_N]; /* Reference signal */ int ytpos; /* Now some things to keep track of */ FLOAT s_tilde_d; /* Near-end speech detector */ int HCNTR_d; int start_speech_d; FLOAT Ly_d; /* Power computations */ FLOAT Lu_d; int i_d; /* Absolute time */ FLOAT beta1_d; FLOAT beta2_d; int flag_d; } echo_can_state_t; static inline echo_can_state_t *echo_can_create(int taps, int adaption) { echo_can_state_t *ec; #ifdef GEN_CONST /* Initialize constants */ ALPHA_ST = _ALPHA_ST; ALPHA_YT = _ALPHA_YT; SIGMA_LU = _SIGMA_LU; SIGMA_LY = _SIGMA_LY; DEFAULT_BETA1 = _DEFAULT_BETA1; MIN_BETA = _MIN_BETA; EC_MIN_VALUE = _EC_MIN_VALUE; CUTOFF = _CUTOFF; #endif ec = MALLOC(sizeof(echo_can_state_t)); if (ec) { memset(ec, 0, sizeof(echo_can_state_t)); ec->Ly_d = CUTOFF; ec->flag_d = 1; ec->beta1_d = DEFAULT_BETA1; ec->beta2_d = DEFAULT_BETA1; } return ec; } /* Features */ /* #define DECAY_BETA_OVER_TIME */ /* #define ADJUST_SUPRESSION */ static inline short echo_can_update(echo_can_state_t *ec, short _ref, short _sig) { FLOAT ref = ((FLOAT) _ref) / 32767.0; FLOAT sig = ((FLOAT) _sig) / 32767.0; FLOAT r = 0.0; FLOAT u_suppr; FLOAT Py; FLOAT two_beta; FLOAT max_y_tilde; FLOAT grad; FLOAT suppr_value; int k,m; int rpos, rpos2; /* Save reference and signal */ ec->y_d[ec->yspos] = ref; ec->y_d[ec->yspos + DEFAULT_M + DEFAULT_N] = ref; ec->s_d[ec->yspos] = sig; ec->s_d[ec->yspos + DEFAULT_M + DEFAULT_N] = sig; /* Convolute the current estimated echo */ for (k = 0; k < DEFAULT_N; k++) { rpos = ec->yspos - k + (DEFAULT_N + DEFAULT_M); r += ec->a_d[k] * ec->y_d[rpos]; } u_suppr = sig - r; ec->e_d[ec->eupos] = u_suppr; ec->e_d[ec->eupos + DEFAULT_M] = u_suppr; /* Update near-end speech detector */ ec->s_tilde_d = (1.0 - ALPHA_ST) * ec->s_tilde_d + (ALPHA_ST * fabs(sig)); /* XXX Maybe just keep track of the last one in a separate variable or something XXX */ rpos = ec->ytpos + DEFAULT_N - 1; ec->y_tilde_d[ec->ytpos] = (1.0 - ALPHA_YT) * ec->y_tilde_d[rpos] + (ALPHA_YT * fabs(ref)); ec->y_tilde_d[ec->ytpos + DEFAULT_N] = ec->y_tilde_d[ec->ytpos]; Py = ec->Ly_d * ec->Ly_d; if (ec->HCNTR_d > 0) Py = 1.0; #ifdef DECAY_BETA_OVER_TIME /* XXX OMG what is all this crap about, surely we can do some IIR filter or something to simulate this nonsense. All this is supposed to do is decay beta over time XXX */ if (ec->start_speech_d != 0) { if (ec->i_d > (DEFAULT_T0 + ec->start_speech_d) * (SAMPLE_FREQ)) { ec->beta2_d = ec->beta1_d * exp((-1.0/DEFAULT_TAU) * ((ec->i_d / (FLOAT) SAMPLE_FREQ) - DEFAULT_T0 - ec->start_speech_d)); if (ec->beta2_d < MIN_BETA) ec->beta2_d = MIN_BETA; } } else { ec->beta2_d = ec->beta1_d; } #endif two_beta = ec->beta2_d / Py; if (two_beta > MAX_BETA) two_beta = MAX_BETA; ec->Lu_d = ((1.0 - SIGMA_LU) * ec->Lu_d) + (SIGMA_LU * fabs(u_suppr)); ec->Ly_d = ((1.0 - SIGMA_LY) * ec->Ly_d) + (SIGMA_LY * fabs(ref)); if (ec->Ly_d < CUTOFF) ec->Ly_d = CUTOFF; max_y_tilde = 0.0; for (k=0;ky_tilde_d[k]) max_y_tilde = ec->y_tilde_d[k]; } if (ec->s_tilde_d > (0.2 * max_y_tilde)) { ec->HCNTR_d = HANGT; } else if (ec->HCNTR_d) { ec->HCNTR_d--; } if ((ec->HCNTR_d == 0) && (ec->i_d % DEFAULT_M == 0)) { /* Update coefficients */ for (k=0; k < DEFAULT_N; k++) { grad = 0.0; for (m = 0; m < DEFAULT_M; m++) { rpos = ec->eupos - m + DEFAULT_M; rpos2 = ec->yspos-m-k + DEFAULT_M + DEFAULT_N; grad += ec->e_d[rpos] * ec->y_d[rpos2]; } ec->a_d[k] += two_beta * grad; } } if (ec->flag_d == 1) { ec->start_speech_d = ec->i_d / (FLOAT)SAMPLE_FREQ; ec->flag_d = 0; } #ifdef ADJUST_SUPPRESSION suppr_value = pow(10, SUPPR / 10.0); /* XXX Reverse logic so that it operates on actual value instead of dB XXX */ if ((ec->HCNTR_d == 0) && ((ec->Lu_d/ec->Ly_d) < suppr_value) && ((ec->Lu_d/ec->Ly_d) > EC_MIN_VALUE)) { FLOAT suppr_factor = (1/(FLOAT)(SUPPR_FLOOR-SUPPR_CEIL)) * 10 * log(ec->Lu_d/ec->Ly_d) - SUPPR_CEIL/(FLOAT)(SUPPR_FLOOR - SUPPR_CEIL); u_suppr = pow(10.0,(suppr_factor) * RES_SUPR_FACTOR/10.0) * u_suppr; } #endif /* Increment everything */ ec->i_d++; ec->yspos = (ec->yspos + 1) % (DEFAULT_N + DEFAULT_M); ec->eupos = (ec->eupos + 1) % DEFAULT_M; ec->ytpos = (ec->ytpos + 1) % DEFAULT_N; return (short)(u_suppr * 32767.0); } #endif /* _MECNEW_H */