summaryrefslogtreecommitdiff
path: root/pjmedia/src/pjmedia/echo_suppress.c
diff options
context:
space:
mode:
Diffstat (limited to 'pjmedia/src/pjmedia/echo_suppress.c')
-rw-r--r--pjmedia/src/pjmedia/echo_suppress.c805
1 files changed, 805 insertions, 0 deletions
diff --git a/pjmedia/src/pjmedia/echo_suppress.c b/pjmedia/src/pjmedia/echo_suppress.c
new file mode 100644
index 0000000..1563fb0
--- /dev/null
+++ b/pjmedia/src/pjmedia/echo_suppress.c
@@ -0,0 +1,805 @@
+/* $Id: echo_suppress.c 3664 2011-07-19 03:42:28Z nanang $ */
+/*
+ * Copyright (C) 2008-2011 Teluu Inc. (http://www.teluu.com)
+ * Copyright (C) 2003-2008 Benny Prijono <benny@prijono.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <pjmedia/types.h>
+#include <pjmedia/alaw_ulaw.h>
+#include <pjmedia/errno.h>
+#include <pjmedia/frame.h>
+#include <pjmedia/silencedet.h>
+#include <pj/array.h>
+#include <pj/assert.h>
+#include <pj/lock.h>
+#include <pj/log.h>
+#include <pj/os.h>
+#include <pj/pool.h>
+
+#include "echo_internal.h"
+
+#define THIS_FILE "echo_suppress.c"
+
+/* Maximum float constant */
+#define MAX_FLOAT (float)1.701411e38
+
+/* The effective learn duration (in seconds) before we declare that learning
+ * is complete. The actual learning duration itself may be longer depending
+ * on the conversation pattern (e.g. we can't detect echo if speaker is only
+ * playing silence).
+ */
+#define MAX_CALC_DURATION_SEC 3
+
+/* The internal audio segment length, in milliseconds. 10ms shold be good
+ * and no need to change it.
+ */
+#define SEGMENT_PTIME 10
+
+/* The length of the template signal in milliseconds. The longer the template,
+ * the better correlation will be found, at the expense of more processing
+ * and longer learning time.
+ */
+#define TEMPLATE_PTIME 200
+
+/* How long to look back in the past to see if either mic or speaker is
+ * active.
+ */
+#define SIGNAL_LOOKUP_MSEC 200
+
+/* The minimum level value to be considered as talking, in uLaw complement
+ * (0-255).
+ */
+#define MIN_SIGNAL_ULAW 35
+
+/* The period (in seconds) on which the ES will analize it's effectiveness,
+ * and it may trigger soft-reset to force recalculation.
+ */
+#define CHECK_PERIOD 30
+
+/* Maximum signal level of average echo residue (in uLaw complement). When
+ * the residue value exceeds this value, we force the ES to re-learn.
+ */
+#define MAX_RESIDUE 2.5
+
+
+#if 0
+# define TRACE_(expr) PJ_LOG(5,expr)
+#else
+# define TRACE_(expr)
+#endif
+
+PJ_INLINE(float) FABS(float val)
+{
+ if (val < 0)
+ return -val;
+ else
+ return val;
+}
+
+
+#if defined(PJ_HAS_FLOATING_POINT) && PJ_HAS_FLOATING_POINT!=0
+ typedef float pj_ufloat_t;
+# define pj_ufloat_from_float(f) (f)
+# define pj_ufloat_mul_u(val1, f) ((val1) * (f))
+# define pj_ufloat_mul_i(val1, f) ((val1) * (f))
+#else
+ typedef pj_uint32_t pj_ufloat_t;
+
+ pj_ufloat_t pj_ufloat_from_float(float f)
+ {
+ return (pj_ufloat_t)(f * 65536);
+ }
+
+ unsigned pj_ufloat_mul_u(unsigned val1, pj_ufloat_t val2)
+ {
+ return (val1 * val2) >> 16;
+ }
+
+ int pj_ufloat_mul_i(int val1, pj_ufloat_t val2)
+ {
+ return (val1 * (pj_int32_t)val2) >> 16;
+ }
+#endif
+
+
+/* Conversation state */
+typedef enum talk_state
+{
+ ST_NULL,
+ ST_LOCAL_TALK,
+ ST_REM_SILENT,
+ ST_DOUBLETALK,
+ ST_REM_TALK
+} talk_state_t;
+
+const char *state_names[] =
+{
+ "Null",
+ "local talking",
+ "remote silent",
+ "doubletalk",
+ "remote talking"
+};
+
+
+/* Description:
+
+ The echo suppressor tries to find the position of echoed signal by looking
+ at the correlation between signal played to the speaker (played signal)
+ and the signal captured from the microphone (recorded signal).
+
+ To do this, it first divides the frames (from mic and speaker) into
+ segments, calculate the audio level of the segment, and save the level
+ information in the playback and record history (play_hist and rec_hist
+ respectively).
+
+ In the history, the newest element (depicted as "t0" in the diagram belo)
+ is put in the last position of the array.
+
+ The record history size is as large as the template size (tmpl_cnt), since
+ we will use the record history as the template to find the best matching
+ position in the playback history.
+
+ Here is the record history buffer:
+
+ <--templ_cnt-->
+ +-------------+
+ | rec_hist |
+ +-------------+
+ t-templ_cnt......t0
+
+ As you can see, the newest frame ("t0") is put as the last element in the
+ array.
+
+ The playback history size is larger than record history, since we need to
+ find the matching pattern in the past. The playback history size is
+ "templ_cnt + tail_cnt", where "tail_cnt" is the number of segments equal
+ to the maximum tail length. The maximum tail length is set when the ES
+ is created.
+
+ Here is the playback history buffer:
+
+ <-----tail_cnt-----> <--templ_cnt-->
+ +-------------------+--------------+
+ | play_hist |
+ +-------------------+--------------+
+ t-play_hist_cnt...t-templ_cnt.......t0
+
+
+
+ Learning:
+
+ During the processing, the ES calculates the following values:
+ - the correlation value, that is how similar the playback signal compared
+ to the mic signal. The lower the correlation value the better (i.e. more
+ similar) the signal is. The correlation value is done over the template
+ duration.
+ - the gain scaling factor, that is the ratio between mic signal and
+ speaker signal. The ES calculates both the minimum and average ratios.
+
+ The ES calculates both the values above for every tail position in the
+ playback history. The values are saved in arrays below:
+
+ <-----tail_cnt----->
+ +-------------------+
+ | corr_sum |
+ +-------------------+
+ | min_factor |
+ +-------------------+
+ | avg_factor |
+ +-------------------+
+
+ At the end of processing, the ES iterates through the correlation array and
+ picks the tail index with the lowest corr_sum value. This is the position
+ where echo is most likely to be found.
+
+
+ Processing:
+
+ Once learning is done, the ES will change the level of the mic signal
+ depending on the state of the conversation and according to the ratio that
+ has been found in the learning phase above.
+
+ */
+
+/*
+ * The simple echo suppresor state
+ */
+typedef struct echo_supp
+{
+ unsigned clock_rate; /* Clock rate. */
+ pj_uint16_t samples_per_frame; /* Frame length in samples */
+ pj_uint16_t samples_per_segment;/* Segment length in samples */
+ pj_uint16_t tail_ms; /* Tail length in milliseconds */
+ pj_uint16_t tail_samples; /* Tail length in samples. */
+
+ pj_bool_t learning; /* Are we still learning yet? */
+ talk_state_t talk_state; /* Current talking state */
+ int tail_index; /* Echo location, -1 if not found */
+
+ unsigned max_calc; /* # of calc before learning complete.
+ (see MAX_CALC_DURATION_SEC) */
+ unsigned calc_cnt; /* Number of calculations so far */
+
+ unsigned update_cnt; /* # of updates */
+ unsigned templ_cnt; /* Template length, in # of segments */
+ unsigned tail_cnt; /* Tail length, in # of segments */
+ unsigned play_hist_cnt; /* # of segments in play_hist */
+ pj_uint16_t *play_hist; /* Array of playback levels */
+ pj_uint16_t *rec_hist; /* Array of rec levels */
+
+ float *corr_sum; /* Array of corr for each tail pos. */
+ float *tmp_corr; /* Temporary corr array calculation */
+ float best_corr; /* Best correlation so far. */
+
+ unsigned sum_rec_level; /* Running sum of level in rec_hist */
+ float rec_corr; /* Running corr in rec_hist. */
+
+ unsigned sum_play_level0; /* Running sum of level for first pos */
+ float play_corr0; /* Running corr for first pos . */
+
+ float *min_factor; /* Array of minimum scaling factor */
+ float *avg_factor; /* Array of average scaling factor */
+ float *tmp_factor; /* Array to store provisional result */
+
+ unsigned running_cnt; /* Running duration in # of frames */
+ float residue; /* Accummulated echo residue. */
+ float last_factor; /* Last factor applied to mic signal */
+} echo_supp;
+
+
+
+/*
+ * Create.
+ */
+PJ_DEF(pj_status_t) echo_supp_create( pj_pool_t *pool,
+ unsigned clock_rate,
+ unsigned channel_count,
+ unsigned samples_per_frame,
+ unsigned tail_ms,
+ unsigned options,
+ void **p_state )
+{
+ echo_supp *ec;
+
+ PJ_UNUSED_ARG(channel_count);
+ PJ_UNUSED_ARG(options);
+
+ PJ_ASSERT_RETURN(samples_per_frame >= SEGMENT_PTIME * clock_rate / 1000,
+ PJ_ENOTSUP);
+
+ ec = PJ_POOL_ZALLOC_T(pool, struct echo_supp);
+ ec->clock_rate = clock_rate;
+ ec->samples_per_frame = (pj_uint16_t)samples_per_frame;
+ ec->samples_per_segment = (pj_uint16_t)(SEGMENT_PTIME * clock_rate / 1000);
+ ec->tail_ms = (pj_uint16_t)tail_ms;
+ ec->tail_samples = (pj_uint16_t)(tail_ms * clock_rate / 1000);
+
+ ec->templ_cnt = TEMPLATE_PTIME / SEGMENT_PTIME;
+ ec->tail_cnt = (pj_uint16_t)(tail_ms / SEGMENT_PTIME);
+ ec->play_hist_cnt = (pj_uint16_t)(ec->tail_cnt+ec->templ_cnt);
+
+ ec->max_calc = (pj_uint16_t)(MAX_CALC_DURATION_SEC * clock_rate /
+ ec->samples_per_segment);
+
+ ec->rec_hist = (pj_uint16_t*)
+ pj_pool_alloc(pool, ec->templ_cnt *
+ sizeof(ec->rec_hist[0]));
+
+ /* Note: play history has twice number of elements */
+ ec->play_hist = (pj_uint16_t*)
+ pj_pool_alloc(pool, ec->play_hist_cnt *
+ sizeof(ec->play_hist[0]));
+
+ ec->corr_sum = (float*)
+ pj_pool_alloc(pool, ec->tail_cnt *
+ sizeof(ec->corr_sum[0]));
+ ec->tmp_corr = (float*)
+ pj_pool_alloc(pool, ec->tail_cnt *
+ sizeof(ec->tmp_corr[0]));
+ ec->min_factor = (float*)
+ pj_pool_alloc(pool, ec->tail_cnt *
+ sizeof(ec->min_factor[0]));
+ ec->avg_factor = (float*)
+ pj_pool_alloc(pool, ec->tail_cnt *
+ sizeof(ec->avg_factor[0]));
+ ec->tmp_factor = (float*)
+ pj_pool_alloc(pool, ec->tail_cnt *
+ sizeof(ec->tmp_factor[0]));
+ echo_supp_reset(ec);
+
+ *p_state = ec;
+ return PJ_SUCCESS;
+}
+
+
+/*
+ * Destroy.
+ */
+PJ_DEF(pj_status_t) echo_supp_destroy(void *state)
+{
+ PJ_UNUSED_ARG(state);
+ return PJ_SUCCESS;
+}
+
+
+/*
+ * Hard reset
+ */
+PJ_DEF(void) echo_supp_reset(void *state)
+{
+ unsigned i;
+ echo_supp *ec = (echo_supp*) state;
+
+ pj_bzero(ec->rec_hist, ec->templ_cnt * sizeof(ec->rec_hist[0]));
+ pj_bzero(ec->play_hist, ec->play_hist_cnt * sizeof(ec->play_hist[0]));
+
+ for (i=0; i<ec->tail_cnt; ++i) {
+ ec->corr_sum[i] = ec->avg_factor[i] = 0;
+ ec->min_factor[i] = MAX_FLOAT;
+ }
+
+ ec->update_cnt = 0;
+ ec->calc_cnt = 0;
+ ec->learning = PJ_TRUE;
+ ec->tail_index = -1;
+ ec->best_corr = MAX_FLOAT;
+ ec->talk_state = ST_NULL;
+ ec->last_factor = 1.0;
+ ec->residue = 0;
+ ec->running_cnt = 0;
+ ec->sum_rec_level = ec->sum_play_level0 = 0;
+ ec->rec_corr = ec->play_corr0 = 0;
+}
+
+/*
+ * Soft reset to force the EC to re-learn without having to discard all
+ * rec and playback history.
+ */
+PJ_DEF(void) echo_supp_soft_reset(void *state)
+{
+ unsigned i;
+
+ echo_supp *ec = (echo_supp*) state;
+
+ for (i=0; i<ec->tail_cnt; ++i) {
+ ec->corr_sum[i] = 0;
+ }
+
+ ec->update_cnt = 0;
+ ec->calc_cnt = 0;
+ ec->learning = PJ_TRUE;
+ ec->best_corr = MAX_FLOAT;
+ ec->residue = 0;
+ ec->running_cnt = 0;
+ ec->sum_rec_level = ec->sum_play_level0 = 0;
+ ec->rec_corr = ec->play_corr0 = 0;
+
+ PJ_LOG(4,(THIS_FILE, "Echo suppressor soft reset. Re-learning.."));
+}
+
+
+/* Set state */
+static void echo_supp_set_state(echo_supp *ec, talk_state_t state,
+ unsigned level)
+{
+ PJ_UNUSED_ARG(level);
+
+ if (state != ec->talk_state) {
+ TRACE_((THIS_FILE, "[%03d.%03d] %s --> %s, level=%u",
+ (ec->update_cnt * SEGMENT_PTIME / 1000),
+ ((ec->update_cnt * SEGMENT_PTIME) % 1000),
+ state_names[ec->talk_state],
+ state_names[state], level));
+ ec->talk_state = state;
+ }
+}
+
+/*
+ * Update EC state
+ */
+static void echo_supp_update(echo_supp *ec, pj_int16_t *rec_frm,
+ const pj_int16_t *play_frm)
+{
+ int prev_index;
+ unsigned i, j, frm_level, sum_play_level, ulaw;
+ pj_uint16_t old_rec_frm_level, old_play_frm_level;
+ float play_corr;
+
+ ++ec->update_cnt;
+ if (ec->update_cnt > 0x7FFFFFFF)
+ ec->update_cnt = 0x7FFFFFFF; /* Detect overflow */
+
+ /* Calculate current play frame level */
+ frm_level = pjmedia_calc_avg_signal(play_frm, ec->samples_per_segment);
+ ++frm_level; /* to avoid division by zero */
+
+ /* Save the oldest frame level for later */
+ old_play_frm_level = ec->play_hist[0];
+
+ /* Push current frame level to the back of the play history */
+ pj_array_erase(ec->play_hist, sizeof(pj_uint16_t), ec->play_hist_cnt, 0);
+ ec->play_hist[ec->play_hist_cnt-1] = (pj_uint16_t) frm_level;
+
+ /* Calculate level of current mic frame */
+ frm_level = pjmedia_calc_avg_signal(rec_frm, ec->samples_per_segment);
+ ++frm_level; /* to avoid division by zero */
+
+ /* Save the oldest frame level for later */
+ old_rec_frm_level = ec->rec_hist[0];
+
+ /* Push to the back of the rec history */
+ pj_array_erase(ec->rec_hist, sizeof(pj_uint16_t), ec->templ_cnt, 0);
+ ec->rec_hist[ec->templ_cnt-1] = (pj_uint16_t) frm_level;
+
+
+ /* Can't do the calc until the play history is full. */
+ if (ec->update_cnt < ec->play_hist_cnt)
+ return;
+
+ /* Skip if learning is done */
+ if (!ec->learning)
+ return;
+
+
+ /* Calculate rec signal pattern */
+ if (ec->sum_rec_level == 0) {
+ /* Buffer has just been filled up, do full calculation */
+ ec->rec_corr = 0;
+ ec->sum_rec_level = 0;
+ for (i=0; i < ec->templ_cnt-1; ++i) {
+ float corr;
+ corr = (float)ec->rec_hist[i+1] / ec->rec_hist[i];
+ ec->rec_corr += corr;
+ ec->sum_rec_level += ec->rec_hist[i];
+ }
+ ec->sum_rec_level += ec->rec_hist[i];
+ } else {
+ /* Update from previous calculation */
+ ec->sum_rec_level = ec->sum_rec_level - old_rec_frm_level +
+ ec->rec_hist[ec->templ_cnt-1];
+ ec->rec_corr = ec->rec_corr - ((float)ec->rec_hist[0] /
+ old_rec_frm_level) +
+ ((float)ec->rec_hist[ec->templ_cnt-1] /
+ ec->rec_hist[ec->templ_cnt-2]);
+ }
+
+ /* Iterate through the play history and calculate the signal correlation
+ * for every tail position in the play_hist. Save the result in temporary
+ * array since we may bail out early if the conversation state is not good
+ * to detect echo.
+ */
+ /*
+ * First phase: do full calculation for the first position
+ */
+ if (ec->sum_play_level0 == 0) {
+ /* Buffer has just been filled up, do full calculation */
+ sum_play_level = 0;
+ play_corr = 0;
+ for (j=0; j<ec->templ_cnt-1; ++j) {
+ float corr;
+ corr = (float)ec->play_hist[j+1] / ec->play_hist[j];
+ play_corr += corr;
+ sum_play_level += ec->play_hist[j];
+ }
+ sum_play_level += ec->play_hist[j];
+ ec->sum_play_level0 = sum_play_level;
+ ec->play_corr0 = play_corr;
+ } else {
+ /* Update from previous calculation */
+ ec->sum_play_level0 = ec->sum_play_level0 - old_play_frm_level +
+ ec->play_hist[ec->templ_cnt-1];
+ ec->play_corr0 = ec->play_corr0 - ((float)ec->play_hist[0] /
+ old_play_frm_level) +
+ ((float)ec->play_hist[ec->templ_cnt-1] /
+ ec->play_hist[ec->templ_cnt-2]);
+ sum_play_level = ec->sum_play_level0;
+ play_corr = ec->play_corr0;
+ }
+ ec->tmp_corr[0] = FABS(play_corr - ec->rec_corr);
+ ec->tmp_factor[0] = (float)ec->sum_rec_level / sum_play_level;
+
+ /* Bail out if remote isn't talking */
+ ulaw = pjmedia_linear2ulaw(sum_play_level/ec->templ_cnt) ^ 0xFF;
+ if (ulaw < MIN_SIGNAL_ULAW) {
+ echo_supp_set_state(ec, ST_REM_SILENT, ulaw);
+ return;
+ }
+ /* Bail out if local user is talking */
+ if (ec->sum_rec_level >= sum_play_level) {
+ echo_supp_set_state(ec, ST_LOCAL_TALK, ulaw);
+ return;
+ }
+
+ /*
+ * Second phase: do incremental calculation for the rest of positions
+ */
+ for (i=1; i < ec->tail_cnt; ++i) {
+ unsigned end;
+
+ end = i + ec->templ_cnt;
+
+ sum_play_level = sum_play_level - ec->play_hist[i-1] +
+ ec->play_hist[end-1];
+ play_corr = play_corr - ((float)ec->play_hist[i]/ec->play_hist[i-1]) +
+ ((float)ec->play_hist[end-1]/ec->play_hist[end-2]);
+
+ /* Bail out if remote isn't talking */
+ ulaw = pjmedia_linear2ulaw(sum_play_level/ec->templ_cnt) ^ 0xFF;
+ if (ulaw < MIN_SIGNAL_ULAW) {
+ echo_supp_set_state(ec, ST_REM_SILENT, ulaw);
+ return;
+ }
+
+ /* Bail out if local user is talking */
+ if (ec->sum_rec_level >= sum_play_level) {
+ echo_supp_set_state(ec, ST_LOCAL_TALK, ulaw);
+ return;
+ }
+
+#if 0
+ // disabled: not a good idea if mic throws out loud echo
+ /* Also bail out if we suspect there's a doubletalk */
+ ulaw = pjmedia_linear2ulaw(ec->sum_rec_level/ec->templ_cnt) ^ 0xFF;
+ if (ulaw > MIN_SIGNAL_ULAW) {
+ echo_supp_set_state(ec, ST_DOUBLETALK, ulaw);
+ return;
+ }
+#endif
+
+ /* Calculate correlation and save to temporary array */
+ ec->tmp_corr[i] = FABS(play_corr - ec->rec_corr);
+
+ /* Also calculate the gain factor between mic and speaker level */
+ ec->tmp_factor[i] = (float)ec->sum_rec_level / sum_play_level;
+ pj_assert(ec->tmp_factor[i] < 1);
+ }
+
+ /* We seem to have good signal, we can update the EC state */
+ echo_supp_set_state(ec, ST_REM_TALK, MIN_SIGNAL_ULAW);
+
+ /* Accummulate the correlation value to the history and at the same
+ * time find the tail index of the best correlation.
+ */
+ prev_index = ec->tail_index;
+ for (i=1; i<ec->tail_cnt-1; ++i) {
+ float *p = &ec->corr_sum[i], sum;
+
+ /* Accummulate correlation value for this tail position */
+ ec->corr_sum[i] += ec->tmp_corr[i];
+
+ /* Update the min and avg gain factor for this tail position */
+ if (ec->tmp_factor[i] < ec->min_factor[i])
+ ec->min_factor[i] = ec->tmp_factor[i];
+ ec->avg_factor[i] = ((ec->avg_factor[i] * ec->tail_cnt) +
+ ec->tmp_factor[i]) /
+ (ec->tail_cnt + 1);
+
+ /* To get the best correlation, also include the correlation
+ * value of the neighbouring tail locations.
+ */
+ sum = *(p-1) + (*p)*2 + *(p+1);
+ //sum = *p;
+
+ /* See if we have better correlation value */
+ if (sum < ec->best_corr) {
+ ec->tail_index = i;
+ ec->best_corr = sum;
+ }
+ }
+
+ if (ec->tail_index != prev_index) {
+ unsigned duration;
+ int imin, iavg;
+
+ duration = ec->update_cnt * SEGMENT_PTIME;
+ imin = (int)(ec->min_factor[ec->tail_index] * 1000);
+ iavg = (int)(ec->avg_factor[ec->tail_index] * 1000);
+
+ PJ_LOG(4,(THIS_FILE,
+ "Echo suppressor updated at t=%03d.%03ds, echo tail=%d msec"
+ ", factor min/avg=%d.%03d/%d.%03d",
+ (duration/1000), (duration%1000),
+ (ec->tail_cnt-ec->tail_index) * SEGMENT_PTIME,
+ imin/1000, imin%1000,
+ iavg/1000, iavg%1000));
+
+ }
+
+ ++ec->calc_cnt;
+
+ if (ec->calc_cnt > ec->max_calc) {
+ unsigned duration;
+ int imin, iavg;
+
+
+ ec->learning = PJ_FALSE;
+ ec->running_cnt = 0;
+
+ duration = ec->update_cnt * SEGMENT_PTIME;
+ imin = (int)(ec->min_factor[ec->tail_index] * 1000);
+ iavg = (int)(ec->avg_factor[ec->tail_index] * 1000);
+
+ PJ_LOG(4,(THIS_FILE,
+ "Echo suppressor learning done at t=%03d.%03ds, tail=%d ms"
+ ", factor min/avg=%d.%03d/%d.%03d",
+ (duration/1000), (duration%1000),
+ (ec->tail_cnt-ec->tail_index) * SEGMENT_PTIME,
+ imin/1000, imin%1000,
+ iavg/1000, iavg%1000));
+ }
+
+}
+
+
+/* Amplify frame */
+static void amplify_frame(pj_int16_t *frm, unsigned length,
+ pj_ufloat_t factor)
+{
+ unsigned i;
+
+ for (i=0; i<length; ++i) {
+ frm[i] = (pj_int16_t)pj_ufloat_mul_i(frm[i], factor);
+ }
+}
+
+/*
+ * Perform echo cancellation.
+ */
+PJ_DEF(pj_status_t) echo_supp_cancel_echo( void *state,
+ pj_int16_t *rec_frm,
+ const pj_int16_t *play_frm,
+ unsigned options,
+ void *reserved )
+{
+ unsigned i, N;
+ echo_supp *ec = (echo_supp*) state;
+
+ PJ_UNUSED_ARG(options);
+ PJ_UNUSED_ARG(reserved);
+
+ /* Calculate number of segments. This should be okay even if
+ * samples_per_frame is not a multiply of samples_per_segment, since
+ * we only calculate level.
+ */
+ N = ec->samples_per_frame / ec->samples_per_segment;
+ pj_assert(N>0);
+ for (i=0; i<N; ++i) {
+ unsigned pos = i * ec->samples_per_segment;
+ echo_supp_update(ec, rec_frm+pos, play_frm+pos);
+ }
+
+ if (ec->tail_index < 0) {
+ /* Not ready */
+ } else {
+ unsigned lookup_cnt, rec_level=0, play_level=0;
+ unsigned tail_cnt;
+ float factor;
+
+ /* How many previous segments to lookup */
+ lookup_cnt = SIGNAL_LOOKUP_MSEC / SEGMENT_PTIME;
+ if (lookup_cnt > ec->templ_cnt)
+ lookup_cnt = ec->templ_cnt;
+
+ /* Lookup in recording history to get maximum mic level, to see
+ * if local user is currently talking
+ */
+ for (i=ec->templ_cnt - lookup_cnt; i < ec->templ_cnt; ++i) {
+ if (ec->rec_hist[i] > rec_level)
+ rec_level = ec->rec_hist[i];
+ }
+ rec_level = pjmedia_linear2ulaw(rec_level) ^ 0xFF;
+
+ /* Calculate the detected tail length, in # of segments */
+ tail_cnt = (ec->tail_cnt - ec->tail_index);
+
+ /* Lookup in playback history to get max speaker level, to see
+ * if remote user is currently talking
+ */
+ for (i=ec->play_hist_cnt -lookup_cnt -tail_cnt;
+ i<ec->play_hist_cnt-tail_cnt; ++i)
+ {
+ if (ec->play_hist[i] > play_level)
+ play_level = ec->play_hist[i];
+ }
+ play_level = pjmedia_linear2ulaw(play_level) ^ 0xFF;
+
+ if (rec_level >= MIN_SIGNAL_ULAW) {
+ if (play_level < MIN_SIGNAL_ULAW) {
+ /* Mic is talking, speaker is idle. Let mic signal pass as is.
+ */
+ factor = 1.0;
+ echo_supp_set_state(ec, ST_LOCAL_TALK, rec_level);
+ } else if (rec_level > play_level) {
+ /* Seems that both are talking. Scale the mic signal
+ * down a little bit to reduce echo, while allowing both
+ * parties to talk at the same time.
+ */
+ factor = (float)(ec->avg_factor[ec->tail_index] * 2);
+ echo_supp_set_state(ec, ST_DOUBLETALK, rec_level);
+ } else {
+ /* Speaker is active, but we've picked up large signal in
+ * the microphone. Assume that this is an echo, so bring
+ * the level down to minimum too.
+ */
+ factor = ec->min_factor[ec->tail_index] / 2;
+ echo_supp_set_state(ec, ST_REM_TALK, play_level);
+ }
+ } else {
+ if (play_level < MIN_SIGNAL_ULAW) {
+ /* Both mic and speaker seems to be idle. Also scale the
+ * mic signal down with average factor to reduce low power
+ * echo.
+ */
+ factor = ec->avg_factor[ec->tail_index] * 3 / 2;
+ echo_supp_set_state(ec, ST_REM_SILENT, rec_level);
+ } else {
+ /* Mic is idle, but there's something playing in speaker.
+ * Scale the mic down to minimum
+ */
+ factor = ec->min_factor[ec->tail_index] / 2;
+ echo_supp_set_state(ec, ST_REM_TALK, play_level);
+ }
+ }
+
+ /* Smoothen the transition */
+ if (factor >= ec->last_factor)
+ factor = (factor + ec->last_factor) / 2;
+ else
+ factor = (factor + ec->last_factor*19) / 20;
+
+ /* Amplify frame */
+ amplify_frame(rec_frm, ec->samples_per_frame,
+ pj_ufloat_from_float(factor));
+ ec->last_factor = factor;
+
+ if (ec->talk_state == ST_REM_TALK) {
+ unsigned level, recalc_cnt;
+
+ /* Get the adjusted frame signal level */
+ level = pjmedia_calc_avg_signal(rec_frm, ec->samples_per_frame);
+ level = pjmedia_linear2ulaw(level) ^ 0xFF;
+
+ /* Accumulate average echo residue to see the ES effectiveness */
+ ec->residue = ((ec->residue * ec->running_cnt) + level) /
+ (ec->running_cnt + 1);
+
+ ++ec->running_cnt;
+
+ /* Check if we need to re-learn */
+ recalc_cnt = CHECK_PERIOD * ec->clock_rate / ec->samples_per_frame;
+ if (ec->running_cnt > recalc_cnt) {
+ int iresidue;
+
+ iresidue = (int)(ec->residue*1000);
+
+ PJ_LOG(5,(THIS_FILE, "Echo suppressor residue = %d.%03d",
+ iresidue/1000, iresidue%1000));
+
+ if (ec->residue > MAX_RESIDUE && !ec->learning) {
+ echo_supp_soft_reset(ec);
+ ec->residue = 0;
+ } else {
+ ec->running_cnt = 0;
+ ec->residue = 0;
+ }
+ }
+ }
+ }
+
+ return PJ_SUCCESS;
+}
+