diff options
Diffstat (limited to 'pjmedia/src/pjmedia/echo_suppress.c')
-rw-r--r-- | pjmedia/src/pjmedia/echo_suppress.c | 805 |
1 files changed, 805 insertions, 0 deletions
diff --git a/pjmedia/src/pjmedia/echo_suppress.c b/pjmedia/src/pjmedia/echo_suppress.c new file mode 100644 index 0000000..1563fb0 --- /dev/null +++ b/pjmedia/src/pjmedia/echo_suppress.c @@ -0,0 +1,805 @@ +/* $Id: echo_suppress.c 3664 2011-07-19 03:42:28Z nanang $ */ +/* + * Copyright (C) 2008-2011 Teluu Inc. (http://www.teluu.com) + * Copyright (C) 2003-2008 Benny Prijono <benny@prijono.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <pjmedia/types.h> +#include <pjmedia/alaw_ulaw.h> +#include <pjmedia/errno.h> +#include <pjmedia/frame.h> +#include <pjmedia/silencedet.h> +#include <pj/array.h> +#include <pj/assert.h> +#include <pj/lock.h> +#include <pj/log.h> +#include <pj/os.h> +#include <pj/pool.h> + +#include "echo_internal.h" + +#define THIS_FILE "echo_suppress.c" + +/* Maximum float constant */ +#define MAX_FLOAT (float)1.701411e38 + +/* The effective learn duration (in seconds) before we declare that learning + * is complete. The actual learning duration itself may be longer depending + * on the conversation pattern (e.g. we can't detect echo if speaker is only + * playing silence). + */ +#define MAX_CALC_DURATION_SEC 3 + +/* The internal audio segment length, in milliseconds. 10ms shold be good + * and no need to change it. + */ +#define SEGMENT_PTIME 10 + +/* The length of the template signal in milliseconds. The longer the template, + * the better correlation will be found, at the expense of more processing + * and longer learning time. + */ +#define TEMPLATE_PTIME 200 + +/* How long to look back in the past to see if either mic or speaker is + * active. + */ +#define SIGNAL_LOOKUP_MSEC 200 + +/* The minimum level value to be considered as talking, in uLaw complement + * (0-255). + */ +#define MIN_SIGNAL_ULAW 35 + +/* The period (in seconds) on which the ES will analize it's effectiveness, + * and it may trigger soft-reset to force recalculation. + */ +#define CHECK_PERIOD 30 + +/* Maximum signal level of average echo residue (in uLaw complement). When + * the residue value exceeds this value, we force the ES to re-learn. + */ +#define MAX_RESIDUE 2.5 + + +#if 0 +# define TRACE_(expr) PJ_LOG(5,expr) +#else +# define TRACE_(expr) +#endif + +PJ_INLINE(float) FABS(float val) +{ + if (val < 0) + return -val; + else + return val; +} + + +#if defined(PJ_HAS_FLOATING_POINT) && PJ_HAS_FLOATING_POINT!=0 + typedef float pj_ufloat_t; +# define pj_ufloat_from_float(f) (f) +# define pj_ufloat_mul_u(val1, f) ((val1) * (f)) +# define pj_ufloat_mul_i(val1, f) ((val1) * (f)) +#else + typedef pj_uint32_t pj_ufloat_t; + + pj_ufloat_t pj_ufloat_from_float(float f) + { + return (pj_ufloat_t)(f * 65536); + } + + unsigned pj_ufloat_mul_u(unsigned val1, pj_ufloat_t val2) + { + return (val1 * val2) >> 16; + } + + int pj_ufloat_mul_i(int val1, pj_ufloat_t val2) + { + return (val1 * (pj_int32_t)val2) >> 16; + } +#endif + + +/* Conversation state */ +typedef enum talk_state +{ + ST_NULL, + ST_LOCAL_TALK, + ST_REM_SILENT, + ST_DOUBLETALK, + ST_REM_TALK +} talk_state_t; + +const char *state_names[] = +{ + "Null", + "local talking", + "remote silent", + "doubletalk", + "remote talking" +}; + + +/* Description: + + The echo suppressor tries to find the position of echoed signal by looking + at the correlation between signal played to the speaker (played signal) + and the signal captured from the microphone (recorded signal). + + To do this, it first divides the frames (from mic and speaker) into + segments, calculate the audio level of the segment, and save the level + information in the playback and record history (play_hist and rec_hist + respectively). + + In the history, the newest element (depicted as "t0" in the diagram belo) + is put in the last position of the array. + + The record history size is as large as the template size (tmpl_cnt), since + we will use the record history as the template to find the best matching + position in the playback history. + + Here is the record history buffer: + + <--templ_cnt--> + +-------------+ + | rec_hist | + +-------------+ + t-templ_cnt......t0 + + As you can see, the newest frame ("t0") is put as the last element in the + array. + + The playback history size is larger than record history, since we need to + find the matching pattern in the past. The playback history size is + "templ_cnt + tail_cnt", where "tail_cnt" is the number of segments equal + to the maximum tail length. The maximum tail length is set when the ES + is created. + + Here is the playback history buffer: + + <-----tail_cnt-----> <--templ_cnt--> + +-------------------+--------------+ + | play_hist | + +-------------------+--------------+ + t-play_hist_cnt...t-templ_cnt.......t0 + + + + Learning: + + During the processing, the ES calculates the following values: + - the correlation value, that is how similar the playback signal compared + to the mic signal. The lower the correlation value the better (i.e. more + similar) the signal is. The correlation value is done over the template + duration. + - the gain scaling factor, that is the ratio between mic signal and + speaker signal. The ES calculates both the minimum and average ratios. + + The ES calculates both the values above for every tail position in the + playback history. The values are saved in arrays below: + + <-----tail_cnt-----> + +-------------------+ + | corr_sum | + +-------------------+ + | min_factor | + +-------------------+ + | avg_factor | + +-------------------+ + + At the end of processing, the ES iterates through the correlation array and + picks the tail index with the lowest corr_sum value. This is the position + where echo is most likely to be found. + + + Processing: + + Once learning is done, the ES will change the level of the mic signal + depending on the state of the conversation and according to the ratio that + has been found in the learning phase above. + + */ + +/* + * The simple echo suppresor state + */ +typedef struct echo_supp +{ + unsigned clock_rate; /* Clock rate. */ + pj_uint16_t samples_per_frame; /* Frame length in samples */ + pj_uint16_t samples_per_segment;/* Segment length in samples */ + pj_uint16_t tail_ms; /* Tail length in milliseconds */ + pj_uint16_t tail_samples; /* Tail length in samples. */ + + pj_bool_t learning; /* Are we still learning yet? */ + talk_state_t talk_state; /* Current talking state */ + int tail_index; /* Echo location, -1 if not found */ + + unsigned max_calc; /* # of calc before learning complete. + (see MAX_CALC_DURATION_SEC) */ + unsigned calc_cnt; /* Number of calculations so far */ + + unsigned update_cnt; /* # of updates */ + unsigned templ_cnt; /* Template length, in # of segments */ + unsigned tail_cnt; /* Tail length, in # of segments */ + unsigned play_hist_cnt; /* # of segments in play_hist */ + pj_uint16_t *play_hist; /* Array of playback levels */ + pj_uint16_t *rec_hist; /* Array of rec levels */ + + float *corr_sum; /* Array of corr for each tail pos. */ + float *tmp_corr; /* Temporary corr array calculation */ + float best_corr; /* Best correlation so far. */ + + unsigned sum_rec_level; /* Running sum of level in rec_hist */ + float rec_corr; /* Running corr in rec_hist. */ + + unsigned sum_play_level0; /* Running sum of level for first pos */ + float play_corr0; /* Running corr for first pos . */ + + float *min_factor; /* Array of minimum scaling factor */ + float *avg_factor; /* Array of average scaling factor */ + float *tmp_factor; /* Array to store provisional result */ + + unsigned running_cnt; /* Running duration in # of frames */ + float residue; /* Accummulated echo residue. */ + float last_factor; /* Last factor applied to mic signal */ +} echo_supp; + + + +/* + * Create. + */ +PJ_DEF(pj_status_t) echo_supp_create( pj_pool_t *pool, + unsigned clock_rate, + unsigned channel_count, + unsigned samples_per_frame, + unsigned tail_ms, + unsigned options, + void **p_state ) +{ + echo_supp *ec; + + PJ_UNUSED_ARG(channel_count); + PJ_UNUSED_ARG(options); + + PJ_ASSERT_RETURN(samples_per_frame >= SEGMENT_PTIME * clock_rate / 1000, + PJ_ENOTSUP); + + ec = PJ_POOL_ZALLOC_T(pool, struct echo_supp); + ec->clock_rate = clock_rate; + ec->samples_per_frame = (pj_uint16_t)samples_per_frame; + ec->samples_per_segment = (pj_uint16_t)(SEGMENT_PTIME * clock_rate / 1000); + ec->tail_ms = (pj_uint16_t)tail_ms; + ec->tail_samples = (pj_uint16_t)(tail_ms * clock_rate / 1000); + + ec->templ_cnt = TEMPLATE_PTIME / SEGMENT_PTIME; + ec->tail_cnt = (pj_uint16_t)(tail_ms / SEGMENT_PTIME); + ec->play_hist_cnt = (pj_uint16_t)(ec->tail_cnt+ec->templ_cnt); + + ec->max_calc = (pj_uint16_t)(MAX_CALC_DURATION_SEC * clock_rate / + ec->samples_per_segment); + + ec->rec_hist = (pj_uint16_t*) + pj_pool_alloc(pool, ec->templ_cnt * + sizeof(ec->rec_hist[0])); + + /* Note: play history has twice number of elements */ + ec->play_hist = (pj_uint16_t*) + pj_pool_alloc(pool, ec->play_hist_cnt * + sizeof(ec->play_hist[0])); + + ec->corr_sum = (float*) + pj_pool_alloc(pool, ec->tail_cnt * + sizeof(ec->corr_sum[0])); + ec->tmp_corr = (float*) + pj_pool_alloc(pool, ec->tail_cnt * + sizeof(ec->tmp_corr[0])); + ec->min_factor = (float*) + pj_pool_alloc(pool, ec->tail_cnt * + sizeof(ec->min_factor[0])); + ec->avg_factor = (float*) + pj_pool_alloc(pool, ec->tail_cnt * + sizeof(ec->avg_factor[0])); + ec->tmp_factor = (float*) + pj_pool_alloc(pool, ec->tail_cnt * + sizeof(ec->tmp_factor[0])); + echo_supp_reset(ec); + + *p_state = ec; + return PJ_SUCCESS; +} + + +/* + * Destroy. + */ +PJ_DEF(pj_status_t) echo_supp_destroy(void *state) +{ + PJ_UNUSED_ARG(state); + return PJ_SUCCESS; +} + + +/* + * Hard reset + */ +PJ_DEF(void) echo_supp_reset(void *state) +{ + unsigned i; + echo_supp *ec = (echo_supp*) state; + + pj_bzero(ec->rec_hist, ec->templ_cnt * sizeof(ec->rec_hist[0])); + pj_bzero(ec->play_hist, ec->play_hist_cnt * sizeof(ec->play_hist[0])); + + for (i=0; i<ec->tail_cnt; ++i) { + ec->corr_sum[i] = ec->avg_factor[i] = 0; + ec->min_factor[i] = MAX_FLOAT; + } + + ec->update_cnt = 0; + ec->calc_cnt = 0; + ec->learning = PJ_TRUE; + ec->tail_index = -1; + ec->best_corr = MAX_FLOAT; + ec->talk_state = ST_NULL; + ec->last_factor = 1.0; + ec->residue = 0; + ec->running_cnt = 0; + ec->sum_rec_level = ec->sum_play_level0 = 0; + ec->rec_corr = ec->play_corr0 = 0; +} + +/* + * Soft reset to force the EC to re-learn without having to discard all + * rec and playback history. + */ +PJ_DEF(void) echo_supp_soft_reset(void *state) +{ + unsigned i; + + echo_supp *ec = (echo_supp*) state; + + for (i=0; i<ec->tail_cnt; ++i) { + ec->corr_sum[i] = 0; + } + + ec->update_cnt = 0; + ec->calc_cnt = 0; + ec->learning = PJ_TRUE; + ec->best_corr = MAX_FLOAT; + ec->residue = 0; + ec->running_cnt = 0; + ec->sum_rec_level = ec->sum_play_level0 = 0; + ec->rec_corr = ec->play_corr0 = 0; + + PJ_LOG(4,(THIS_FILE, "Echo suppressor soft reset. Re-learning..")); +} + + +/* Set state */ +static void echo_supp_set_state(echo_supp *ec, talk_state_t state, + unsigned level) +{ + PJ_UNUSED_ARG(level); + + if (state != ec->talk_state) { + TRACE_((THIS_FILE, "[%03d.%03d] %s --> %s, level=%u", + (ec->update_cnt * SEGMENT_PTIME / 1000), + ((ec->update_cnt * SEGMENT_PTIME) % 1000), + state_names[ec->talk_state], + state_names[state], level)); + ec->talk_state = state; + } +} + +/* + * Update EC state + */ +static void echo_supp_update(echo_supp *ec, pj_int16_t *rec_frm, + const pj_int16_t *play_frm) +{ + int prev_index; + unsigned i, j, frm_level, sum_play_level, ulaw; + pj_uint16_t old_rec_frm_level, old_play_frm_level; + float play_corr; + + ++ec->update_cnt; + if (ec->update_cnt > 0x7FFFFFFF) + ec->update_cnt = 0x7FFFFFFF; /* Detect overflow */ + + /* Calculate current play frame level */ + frm_level = pjmedia_calc_avg_signal(play_frm, ec->samples_per_segment); + ++frm_level; /* to avoid division by zero */ + + /* Save the oldest frame level for later */ + old_play_frm_level = ec->play_hist[0]; + + /* Push current frame level to the back of the play history */ + pj_array_erase(ec->play_hist, sizeof(pj_uint16_t), ec->play_hist_cnt, 0); + ec->play_hist[ec->play_hist_cnt-1] = (pj_uint16_t) frm_level; + + /* Calculate level of current mic frame */ + frm_level = pjmedia_calc_avg_signal(rec_frm, ec->samples_per_segment); + ++frm_level; /* to avoid division by zero */ + + /* Save the oldest frame level for later */ + old_rec_frm_level = ec->rec_hist[0]; + + /* Push to the back of the rec history */ + pj_array_erase(ec->rec_hist, sizeof(pj_uint16_t), ec->templ_cnt, 0); + ec->rec_hist[ec->templ_cnt-1] = (pj_uint16_t) frm_level; + + + /* Can't do the calc until the play history is full. */ + if (ec->update_cnt < ec->play_hist_cnt) + return; + + /* Skip if learning is done */ + if (!ec->learning) + return; + + + /* Calculate rec signal pattern */ + if (ec->sum_rec_level == 0) { + /* Buffer has just been filled up, do full calculation */ + ec->rec_corr = 0; + ec->sum_rec_level = 0; + for (i=0; i < ec->templ_cnt-1; ++i) { + float corr; + corr = (float)ec->rec_hist[i+1] / ec->rec_hist[i]; + ec->rec_corr += corr; + ec->sum_rec_level += ec->rec_hist[i]; + } + ec->sum_rec_level += ec->rec_hist[i]; + } else { + /* Update from previous calculation */ + ec->sum_rec_level = ec->sum_rec_level - old_rec_frm_level + + ec->rec_hist[ec->templ_cnt-1]; + ec->rec_corr = ec->rec_corr - ((float)ec->rec_hist[0] / + old_rec_frm_level) + + ((float)ec->rec_hist[ec->templ_cnt-1] / + ec->rec_hist[ec->templ_cnt-2]); + } + + /* Iterate through the play history and calculate the signal correlation + * for every tail position in the play_hist. Save the result in temporary + * array since we may bail out early if the conversation state is not good + * to detect echo. + */ + /* + * First phase: do full calculation for the first position + */ + if (ec->sum_play_level0 == 0) { + /* Buffer has just been filled up, do full calculation */ + sum_play_level = 0; + play_corr = 0; + for (j=0; j<ec->templ_cnt-1; ++j) { + float corr; + corr = (float)ec->play_hist[j+1] / ec->play_hist[j]; + play_corr += corr; + sum_play_level += ec->play_hist[j]; + } + sum_play_level += ec->play_hist[j]; + ec->sum_play_level0 = sum_play_level; + ec->play_corr0 = play_corr; + } else { + /* Update from previous calculation */ + ec->sum_play_level0 = ec->sum_play_level0 - old_play_frm_level + + ec->play_hist[ec->templ_cnt-1]; + ec->play_corr0 = ec->play_corr0 - ((float)ec->play_hist[0] / + old_play_frm_level) + + ((float)ec->play_hist[ec->templ_cnt-1] / + ec->play_hist[ec->templ_cnt-2]); + sum_play_level = ec->sum_play_level0; + play_corr = ec->play_corr0; + } + ec->tmp_corr[0] = FABS(play_corr - ec->rec_corr); + ec->tmp_factor[0] = (float)ec->sum_rec_level / sum_play_level; + + /* Bail out if remote isn't talking */ + ulaw = pjmedia_linear2ulaw(sum_play_level/ec->templ_cnt) ^ 0xFF; + if (ulaw < MIN_SIGNAL_ULAW) { + echo_supp_set_state(ec, ST_REM_SILENT, ulaw); + return; + } + /* Bail out if local user is talking */ + if (ec->sum_rec_level >= sum_play_level) { + echo_supp_set_state(ec, ST_LOCAL_TALK, ulaw); + return; + } + + /* + * Second phase: do incremental calculation for the rest of positions + */ + for (i=1; i < ec->tail_cnt; ++i) { + unsigned end; + + end = i + ec->templ_cnt; + + sum_play_level = sum_play_level - ec->play_hist[i-1] + + ec->play_hist[end-1]; + play_corr = play_corr - ((float)ec->play_hist[i]/ec->play_hist[i-1]) + + ((float)ec->play_hist[end-1]/ec->play_hist[end-2]); + + /* Bail out if remote isn't talking */ + ulaw = pjmedia_linear2ulaw(sum_play_level/ec->templ_cnt) ^ 0xFF; + if (ulaw < MIN_SIGNAL_ULAW) { + echo_supp_set_state(ec, ST_REM_SILENT, ulaw); + return; + } + + /* Bail out if local user is talking */ + if (ec->sum_rec_level >= sum_play_level) { + echo_supp_set_state(ec, ST_LOCAL_TALK, ulaw); + return; + } + +#if 0 + // disabled: not a good idea if mic throws out loud echo + /* Also bail out if we suspect there's a doubletalk */ + ulaw = pjmedia_linear2ulaw(ec->sum_rec_level/ec->templ_cnt) ^ 0xFF; + if (ulaw > MIN_SIGNAL_ULAW) { + echo_supp_set_state(ec, ST_DOUBLETALK, ulaw); + return; + } +#endif + + /* Calculate correlation and save to temporary array */ + ec->tmp_corr[i] = FABS(play_corr - ec->rec_corr); + + /* Also calculate the gain factor between mic and speaker level */ + ec->tmp_factor[i] = (float)ec->sum_rec_level / sum_play_level; + pj_assert(ec->tmp_factor[i] < 1); + } + + /* We seem to have good signal, we can update the EC state */ + echo_supp_set_state(ec, ST_REM_TALK, MIN_SIGNAL_ULAW); + + /* Accummulate the correlation value to the history and at the same + * time find the tail index of the best correlation. + */ + prev_index = ec->tail_index; + for (i=1; i<ec->tail_cnt-1; ++i) { + float *p = &ec->corr_sum[i], sum; + + /* Accummulate correlation value for this tail position */ + ec->corr_sum[i] += ec->tmp_corr[i]; + + /* Update the min and avg gain factor for this tail position */ + if (ec->tmp_factor[i] < ec->min_factor[i]) + ec->min_factor[i] = ec->tmp_factor[i]; + ec->avg_factor[i] = ((ec->avg_factor[i] * ec->tail_cnt) + + ec->tmp_factor[i]) / + (ec->tail_cnt + 1); + + /* To get the best correlation, also include the correlation + * value of the neighbouring tail locations. + */ + sum = *(p-1) + (*p)*2 + *(p+1); + //sum = *p; + + /* See if we have better correlation value */ + if (sum < ec->best_corr) { + ec->tail_index = i; + ec->best_corr = sum; + } + } + + if (ec->tail_index != prev_index) { + unsigned duration; + int imin, iavg; + + duration = ec->update_cnt * SEGMENT_PTIME; + imin = (int)(ec->min_factor[ec->tail_index] * 1000); + iavg = (int)(ec->avg_factor[ec->tail_index] * 1000); + + PJ_LOG(4,(THIS_FILE, + "Echo suppressor updated at t=%03d.%03ds, echo tail=%d msec" + ", factor min/avg=%d.%03d/%d.%03d", + (duration/1000), (duration%1000), + (ec->tail_cnt-ec->tail_index) * SEGMENT_PTIME, + imin/1000, imin%1000, + iavg/1000, iavg%1000)); + + } + + ++ec->calc_cnt; + + if (ec->calc_cnt > ec->max_calc) { + unsigned duration; + int imin, iavg; + + + ec->learning = PJ_FALSE; + ec->running_cnt = 0; + + duration = ec->update_cnt * SEGMENT_PTIME; + imin = (int)(ec->min_factor[ec->tail_index] * 1000); + iavg = (int)(ec->avg_factor[ec->tail_index] * 1000); + + PJ_LOG(4,(THIS_FILE, + "Echo suppressor learning done at t=%03d.%03ds, tail=%d ms" + ", factor min/avg=%d.%03d/%d.%03d", + (duration/1000), (duration%1000), + (ec->tail_cnt-ec->tail_index) * SEGMENT_PTIME, + imin/1000, imin%1000, + iavg/1000, iavg%1000)); + } + +} + + +/* Amplify frame */ +static void amplify_frame(pj_int16_t *frm, unsigned length, + pj_ufloat_t factor) +{ + unsigned i; + + for (i=0; i<length; ++i) { + frm[i] = (pj_int16_t)pj_ufloat_mul_i(frm[i], factor); + } +} + +/* + * Perform echo cancellation. + */ +PJ_DEF(pj_status_t) echo_supp_cancel_echo( void *state, + pj_int16_t *rec_frm, + const pj_int16_t *play_frm, + unsigned options, + void *reserved ) +{ + unsigned i, N; + echo_supp *ec = (echo_supp*) state; + + PJ_UNUSED_ARG(options); + PJ_UNUSED_ARG(reserved); + + /* Calculate number of segments. This should be okay even if + * samples_per_frame is not a multiply of samples_per_segment, since + * we only calculate level. + */ + N = ec->samples_per_frame / ec->samples_per_segment; + pj_assert(N>0); + for (i=0; i<N; ++i) { + unsigned pos = i * ec->samples_per_segment; + echo_supp_update(ec, rec_frm+pos, play_frm+pos); + } + + if (ec->tail_index < 0) { + /* Not ready */ + } else { + unsigned lookup_cnt, rec_level=0, play_level=0; + unsigned tail_cnt; + float factor; + + /* How many previous segments to lookup */ + lookup_cnt = SIGNAL_LOOKUP_MSEC / SEGMENT_PTIME; + if (lookup_cnt > ec->templ_cnt) + lookup_cnt = ec->templ_cnt; + + /* Lookup in recording history to get maximum mic level, to see + * if local user is currently talking + */ + for (i=ec->templ_cnt - lookup_cnt; i < ec->templ_cnt; ++i) { + if (ec->rec_hist[i] > rec_level) + rec_level = ec->rec_hist[i]; + } + rec_level = pjmedia_linear2ulaw(rec_level) ^ 0xFF; + + /* Calculate the detected tail length, in # of segments */ + tail_cnt = (ec->tail_cnt - ec->tail_index); + + /* Lookup in playback history to get max speaker level, to see + * if remote user is currently talking + */ + for (i=ec->play_hist_cnt -lookup_cnt -tail_cnt; + i<ec->play_hist_cnt-tail_cnt; ++i) + { + if (ec->play_hist[i] > play_level) + play_level = ec->play_hist[i]; + } + play_level = pjmedia_linear2ulaw(play_level) ^ 0xFF; + + if (rec_level >= MIN_SIGNAL_ULAW) { + if (play_level < MIN_SIGNAL_ULAW) { + /* Mic is talking, speaker is idle. Let mic signal pass as is. + */ + factor = 1.0; + echo_supp_set_state(ec, ST_LOCAL_TALK, rec_level); + } else if (rec_level > play_level) { + /* Seems that both are talking. Scale the mic signal + * down a little bit to reduce echo, while allowing both + * parties to talk at the same time. + */ + factor = (float)(ec->avg_factor[ec->tail_index] * 2); + echo_supp_set_state(ec, ST_DOUBLETALK, rec_level); + } else { + /* Speaker is active, but we've picked up large signal in + * the microphone. Assume that this is an echo, so bring + * the level down to minimum too. + */ + factor = ec->min_factor[ec->tail_index] / 2; + echo_supp_set_state(ec, ST_REM_TALK, play_level); + } + } else { + if (play_level < MIN_SIGNAL_ULAW) { + /* Both mic and speaker seems to be idle. Also scale the + * mic signal down with average factor to reduce low power + * echo. + */ + factor = ec->avg_factor[ec->tail_index] * 3 / 2; + echo_supp_set_state(ec, ST_REM_SILENT, rec_level); + } else { + /* Mic is idle, but there's something playing in speaker. + * Scale the mic down to minimum + */ + factor = ec->min_factor[ec->tail_index] / 2; + echo_supp_set_state(ec, ST_REM_TALK, play_level); + } + } + + /* Smoothen the transition */ + if (factor >= ec->last_factor) + factor = (factor + ec->last_factor) / 2; + else + factor = (factor + ec->last_factor*19) / 20; + + /* Amplify frame */ + amplify_frame(rec_frm, ec->samples_per_frame, + pj_ufloat_from_float(factor)); + ec->last_factor = factor; + + if (ec->talk_state == ST_REM_TALK) { + unsigned level, recalc_cnt; + + /* Get the adjusted frame signal level */ + level = pjmedia_calc_avg_signal(rec_frm, ec->samples_per_frame); + level = pjmedia_linear2ulaw(level) ^ 0xFF; + + /* Accumulate average echo residue to see the ES effectiveness */ + ec->residue = ((ec->residue * ec->running_cnt) + level) / + (ec->running_cnt + 1); + + ++ec->running_cnt; + + /* Check if we need to re-learn */ + recalc_cnt = CHECK_PERIOD * ec->clock_rate / ec->samples_per_frame; + if (ec->running_cnt > recalc_cnt) { + int iresidue; + + iresidue = (int)(ec->residue*1000); + + PJ_LOG(5,(THIS_FILE, "Echo suppressor residue = %d.%03d", + iresidue/1000, iresidue%1000)); + + if (ec->residue > MAX_RESIDUE && !ec->learning) { + echo_supp_soft_reset(ec); + ec->residue = 0; + } else { + ec->running_cnt = 0; + ec->residue = 0; + } + } + } + } + + return PJ_SUCCESS; +} + |