Ticket #590: new echo suppressor which should work much better than the old one

git-svn-id: http://svn.pjsip.org/repos/pjproject/trunk@2199 74dad513-b988-da41-8d7b-12977e46ad98
author: Benny Prijono <bennylp@teluu.com> 2008-08-10 16:15:14 +0000
committer: Benny Prijono <bennylp@teluu.com> 2008-08-10 16:15:14 +0000
commit: 4d391149bdafa66e0a16dc7c96f628fff745d2ac (patch)
tree: cfa7bfff8edfb9c7ab2e9558e99611e737e9bece
parent: c0970767b422b18bb22e71efac3d6353bba37006 (diff)
2 files changed, 657 insertions, 46 deletions
diff --git a/pjmedia/src/pjmedia/echo_suppress.c b/pjmedia/src/pjmedia/echo_suppress.c
index a86a058d..b0f32e0f 100644
--- a/pjmedia/src/pjmedia/echo_suppress.c
+++ b/pjmedia/src/pjmedia/echo_suppress.c
@@ -17,8 +17,10 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
  */
 #include <pjmedia/types.h>
+#include <pjmedia/alaw_ulaw.h>
 #include <pjmedia/errno.h>
 #include <pjmedia/silencedet.h>
+#include <pj/array.h>
 #include <pj/assert.h>
 #include <pj/lock.h>
 #include <pj/log.h>
@@ -29,15 +31,225 @@
 
 #define THIS_FILE			    "echo_suppress.c"
 
+/* Maximum float constant */
+#define MAX_FLOAT		(float)1.701411e38
+
+/* The effective learn duration (in seconds) before we declare that learning
+ * is complete. The actual learning duration itself may be longer depending
+ * on the conversation pattern (e.g. we can't detect echo if speaker is only
+ * playing silence).
+ */
+#define MAX_CALC_DURATION_SEC	3
+
+/* The internal audio segment length, in milliseconds. 10ms shold be good
+ * and no need to change it.
+ */
+#define SEGMENT_PTIME		10
+
+/* The length of the template signal in milliseconds. The longer the template,
+ * the better correlation will be found, at the expense of more processing
+ * and longer learning time.
+ */
+#define TEMPLATE_PTIME		200
+
+/* How long to look back in the past to see if either mic or speaker is
+ * active.
+ */
+#define SIGNAL_LOOKUP_MSEC	200
+
+/* The minimum level value to be considered as talking, in uLaw complement
+ * (0-255).
+ */
+#define MIN_SIGNAL_ULAW		35
+
+/* The period (in seconds) on which the ES will analize it's effectiveness,
+ * and it may trigger soft-reset to force recalculation.
+ */
+#define CHECK_PERIOD		30
+
+/* Maximum signal level of average echo residue (in uLaw complement). When
+ * the residue value exceeds this value, we force the ES to re-learn.
+ */
+#define MAX_RESIDUE		2.5
+
+
+#if 0
+#   define TRACE_(expr)	PJ_LOG(5,expr)
+#else
+#   define TRACE_(expr)
+#endif
+
+PJ_INLINE(float) fabs(float val)
+{
+    if (val < 0)
+	return -val;
+    else
+	return val;
+}
+
+
+#if defined(PJ_HAS_FLOATING_POINT) && PJ_HAS_FLOATING_POINT!=0
+    typedef float pj_ufloat_t;
+#   define pj_ufloat_from_float(f)	(f)
+#   define pj_ufloat_mul_u(val1, f)	((val1) * (f))
+#   define pj_ufloat_mul_i(val1, f)	((val1) * (f))
+#else
+    typedef pj_uint32_t pj_ufloat_t;
+
+    pj_ufloat_t pj_ufloat_from_float(float f)
+    {
+	return (pj_ufloat_t)(f * 65536);
+    }
+
+    unsigned pj_ufloat_mul_u(unsigned val1, pj_ufloat_t val2)
+    {
+	return (val1 * val2) >> 16;
+    }
+
+    int pj_ufloat_mul_i(int val1, pj_ufloat_t val2)
+    {
+	return (val1 * (pj_int32_t)val2) >> 16;
+    }
+#endif
+
+
+/* Conversation state */
+typedef enum talk_state
+{
+    ST_NULL,
+    ST_LOCAL_TALK,
+    ST_REM_SILENT,
+    ST_DOUBLETALK,
+    ST_REM_TALK
+} talk_state;
+
+const char *state_names[] = 
+{
+    "Null",
+    "local talking",
+    "remote silent",
+    "doubletalk",
+    "remote talking"
+};
+
+
+/* Description:
+
+   The echo suppressor tries to find the position of echoed signal by looking
+   at the correlation between signal played to the speaker (played signal) 
+   and the signal captured from the microphone (recorded signal).
+
+   To do this, it first divides the frames (from mic and speaker) into 
+   segments, calculate the audio level of the segment, and save the level
+   information in the playback and record history (play_hist and rec_hist
+   respectively).
+
+   In the history, the newest element (depicted as "t0" in the diagram belo)
+   is put in the last position of the array.
+
+   The record history size is as large as the template size (tmpl_cnt), since
+   we will use the record history as the template to find the best matching 
+   position in the playback history.
+
+   Here is the record history buffer:
+
+       <--templ_cnt-->
+       +-------------+
+       |   rec_hist  |
+       +-------------+
+    t-templ_cnt......t0
+
+   As you can see, the newest frame ("t0") is put as the last element in the
+   array.
+
+   The playback history size is larger than record history, since we need to
+   find the matching pattern in the past. The playback history size is
+   "templ_cnt + tail_cnt", where "tail_cnt" is the number of segments equal
+   to the maximum tail length. The maximum tail length is set when the ES
+   is created.
+
+   Here is the playback history buffer:
+
+       <-----tail_cnt-----> <--templ_cnt-->
+       +-------------------+--------------+
+       |             play_hist            |
+       +-------------------+--------------+
+   t-play_hist_cnt...t-templ_cnt.......t0
+
+
+
+   Learning:
+
+   During the processing, the ES calculates the following values:
+    - the correlation value, that is how similar the playback signal compared
+      to the mic signal. The lower the correlation value the better (i.e. more
+      similar) the signal is. The correlation value is done over the template
+      duration.
+    - the gain scaling factor, that is the ratio between mic signal and 
+      speaker signal. The ES calculates both the minimum and average ratios.
+
+   The ES calculates both the values above for every tail position in the
+   playback history. The values are saved in arrays below:
+
+     <-----tail_cnt----->
+     +-------------------+
+     |      corr_sum     |
+     +-------------------+
+     |     min_factor    |
+     +-------------------+
+     |     avg_factor    |
+     +-------------------+
+
+   At the end of processing, the ES iterates through the correlation array and
+   picks the tail index with the lowest corr_sum value. This is the position
+   where echo is most likely to be found.
+
+
+   Processing:
+
+   Once learning is done, the ES will change the level of the mic signal 
+   depending on the state of the conversation and according to the ratio that
+   has been found in the learning phase above.
+
+ */
 
 /*
- * Simple echo suppresor
+ * The simple echo suppresor state
  */
 typedef struct echo_supp
 {
-    pjmedia_silence_det	*sd;
-    unsigned		 samples_per_frame;
-    unsigned		 tail_ms;
+    unsigned	 clock_rate;	    /* Clock rate.			    */
+    pj_uint16_t	 samples_per_frame; /* Frame length in samples		    */
+    pj_uint16_t  samples_per_segment;/* Segment length in samples	    */
+    pj_uint16_t  tail_ms;	    /* Tail length in milliseconds	    */
+    pj_uint16_t  tail_samples;	    /* Tail length in samples.		    */
+
+    pj_bool_t	 learning;	    /* Are we still learning yet?	    */
+    talk_state	 talk_state;	    /* Current talking state		    */
+    int		 tail_index;	    /* Echo location, -1 if not found	    */
+
+    unsigned	 max_calc;	    /* # of calc before learning complete.
+                                       (see MAX_CALC_DURATION_SEC)	    */
+    unsigned	 calc_cnt;	    /* Number of calculations so far	    */
+
+    unsigned	 update_cnt;	    /* # of updates			    */
+    unsigned	 templ_cnt;	    /* Template length, in # of segments    */
+    unsigned	 tail_cnt;	    /* Tail length, in # of segments	    */
+    unsigned	 play_hist_cnt;	    /* # of segments in play_hist	    */
+    pj_uint16_t *play_hist;	    /* Array of playback levels		    */
+    pj_uint16_t *rec_hist;	    /* Array of rec levels		    */
+
+    float	*corr_sum;	    /* Array of corr for each tail pos.	    */
+    float	*tmp_corr;	    /* Temporary corr array calculation	    */
+    float	 best_corr;	    /* Best correlation so far.		    */
+
+    float	*min_factor;	    /* Array of minimum scaling factor	    */
+    float	*avg_factor;	    /* Array of average scaling factor	    */
+    float	*tmp_factor;	    /* Array to store provisional result    */
+
+    unsigned	 running_cnt;	    /* Running duration in # of frames	    */
+    float	 residue;	    /* Accummulated echo residue.	    */
+    float	 last_factor;	    /* Last factor applied to mic signal    */
 } echo_supp;
 
 
@@ -54,24 +266,52 @@ PJ_DEF(pj_status_t) echo_supp_create( pj_pool_t *pool,
 				      void **p_state )
 {
     echo_supp *ec;
-    pj_status_t status;
 
-    PJ_UNUSED_ARG(clock_rate);
     PJ_UNUSED_ARG(channel_count);
     PJ_UNUSED_ARG(options);
 
+    PJ_ASSERT_RETURN(samples_per_frame >= SEGMENT_PTIME * clock_rate / 1000,
+		     PJ_ENOTSUP);
+
     ec = PJ_POOL_ZALLOC_T(pool, struct echo_supp);
-    ec->samples_per_frame = samples_per_frame;
-    ec->tail_ms = tail_ms;
+    ec->clock_rate = clock_rate;
+    ec->samples_per_frame = (pj_uint16_t)samples_per_frame;
+    ec->samples_per_segment = (pj_uint16_t)(SEGMENT_PTIME * clock_rate / 1000);
+    ec->tail_ms = (pj_uint16_t)tail_ms;
+    ec->tail_samples = (pj_uint16_t)(tail_ms * clock_rate / 1000);
+
+    ec->templ_cnt = TEMPLATE_PTIME / SEGMENT_PTIME;
+    ec->tail_cnt = (pj_uint16_t)(tail_ms / SEGMENT_PTIME);
+    ec->play_hist_cnt = (pj_uint16_t)(ec->tail_cnt+ec->templ_cnt);
+
+    ec->max_calc = (pj_uint16_t)(MAX_CALC_DURATION_SEC * clock_rate / 
+				 ec->samples_per_segment);
 
-    status = pjmedia_silence_det_create(pool, clock_rate, samples_per_frame,
-					&ec->sd);
-    if (status != PJ_SUCCESS)
-	return status;
+    ec->rec_hist = (pj_uint16_t*) 
+		    pj_pool_alloc(pool, ec->templ_cnt *
+					sizeof(ec->rec_hist[0]));
 
-    pjmedia_silence_det_set_name(ec->sd, "ecsu%p");
-    pjmedia_silence_det_set_adaptive(ec->sd, PJMEDIA_ECHO_SUPPRESS_THRESHOLD);
-    pjmedia_silence_det_set_params(ec->sd, 100, 500, 3000);
+    /* Note: play history has twice number of elements */
+    ec->play_hist = (pj_uint16_t*) 
+		     pj_pool_alloc(pool, ec->play_hist_cnt *
+					 sizeof(ec->play_hist[0]));
+
+    ec->corr_sum = (float*)
+		   pj_pool_alloc(pool, ec->tail_cnt * 
+				       sizeof(ec->corr_sum[0]));
+    ec->tmp_corr = (float*)
+		   pj_pool_alloc(pool, ec->tail_cnt * 
+				       sizeof(ec->tmp_corr[0]));
+    ec->min_factor = (float*)
+		     pj_pool_alloc(pool, ec->tail_cnt * 
+				         sizeof(ec->min_factor[0]));
+    ec->avg_factor = (float*)
+		     pj_pool_alloc(pool, ec->tail_cnt * 
+				         sizeof(ec->avg_factor[0]));
+    ec->tmp_factor = (float*)
+		     pj_pool_alloc(pool, ec->tail_cnt * 
+				         sizeof(ec->tmp_factor[0]));
+    echo_supp_reset(ec);
 
     *p_state = ec;
     return PJ_SUCCESS;
@@ -89,15 +329,257 @@ PJ_DEF(pj_status_t) echo_supp_destroy(void *state)
 
 
 /*
- * Reset
+ * Hard reset
  */
 PJ_DEF(void) echo_supp_reset(void *state)
 {
-    PJ_UNUSED_ARG(state);
-    return;
+    unsigned i;
+    echo_supp *ec = (echo_supp*) state;
+
+    pj_bzero(ec->rec_hist, ec->templ_cnt * sizeof(ec->rec_hist[0]));
+    pj_bzero(ec->play_hist, ec->play_hist_cnt * sizeof(ec->play_hist[0]));
+
+    for (i=0; i<ec->tail_cnt; ++i) {
+	ec->corr_sum[i] = ec->avg_factor[i] = 0;
+	ec->min_factor[i] = MAX_FLOAT;
+    }
+
+    ec->update_cnt = 0;
+    ec->calc_cnt = 0;
+    ec->learning = PJ_TRUE;
+    ec->tail_index = -1;
+    ec->best_corr = MAX_FLOAT;
+    ec->talk_state = ST_NULL;
+    ec->last_factor = 1.0;
+    ec->residue = 0;
+    ec->running_cnt = 0;
+}
+
+/*
+ * Soft reset to force the EC to re-learn without having to discard all
+ * rec and playback history.
+ */
+PJ_DEF(void) echo_supp_soft_reset(void *state)
+{
+    unsigned i;
+
+    echo_supp *ec = (echo_supp*) state;
+
+    for (i=0; i<ec->tail_cnt; ++i) {
+	ec->corr_sum[i] = 0;
+    }
+
+    ec->update_cnt = 0;
+    ec->calc_cnt = 0;
+    ec->learning = PJ_TRUE;
+    ec->best_corr = MAX_FLOAT;
+    ec->residue = 0;
+    ec->running_cnt = 0;
+
+    PJ_LOG(4,(THIS_FILE, "Echo suppressor soft reset. Re-learning.."));
+}
+
+
+/* Set state */
+static void echo_supp_set_state(echo_supp *ec, enum talk_state state)
+{
+    if (state != ec->talk_state) {
+	TRACE_((THIS_FILE, "[%03d.%03d] %s --> %s", 
+			   (ec->update_cnt * SEGMENT_PTIME / 1000), 
+			   ((ec->update_cnt * SEGMENT_PTIME) % 1000),
+			   state_names[ec->talk_state],
+			   state_names[state]));
+	ec->talk_state = state;
+    }
 }
 
 /*
+ * Update EC state
+ */
+static void echo_supp_update(echo_supp *ec, pj_int16_t *rec_frm,
+			     const pj_int16_t *play_frm)
+{
+    int prev_index;
+    unsigned i, frm_level, sum_rec_level;
+    float rec_corr;
+
+    ++ec->update_cnt;
+    if (ec->update_cnt > 0x7FFFFFFF)
+	ec->update_cnt = 0x7FFFFFFF; /* Detect overflow */
+
+    /* Calculate current play frame level */
+    frm_level = pjmedia_calc_avg_signal(play_frm, ec->samples_per_segment);
+    ++frm_level; /* to avoid division by zero */
+
+    /* Push current frame level to the back of the play history */
+    pj_array_erase(ec->play_hist, sizeof(pj_uint16_t), ec->play_hist_cnt, 0);
+    ec->play_hist[ec->play_hist_cnt-1] = (pj_uint16_t) frm_level;
+
+    /* Calculate level of current mic frame */
+    frm_level = pjmedia_calc_avg_signal(rec_frm, ec->samples_per_segment);
+    ++frm_level; /* to avoid division by zero */
+
+    /* Push to the back of the rec history */
+    pj_array_erase(ec->rec_hist, sizeof(pj_uint16_t), ec->templ_cnt, 0);
+    ec->rec_hist[ec->templ_cnt-1] = (pj_uint16_t) frm_level;
+
+
+    /* Can't do the calc until the play history is full. */
+    if (ec->update_cnt < ec->play_hist_cnt)
+	return;
+
+    /* Skip if learning is done */
+    if (!ec->learning)
+	return;
+
+
+    /* Calculate rec signal pattern */
+    rec_corr = 0;
+    sum_rec_level = 0;
+    for (i=0; i < ec->templ_cnt-1; ++i) {
+	float corr;
+	corr = (float)ec->rec_hist[i+1] / ec->rec_hist[i];
+	rec_corr += corr;
+	sum_rec_level += ec->rec_hist[i];
+    }
+    sum_rec_level += ec->rec_hist[i];
+
+    /* Iterate through the play history and calculate the signal correlation
+     * for every tail position in the play_hist. Save the result in temporary
+     * array since we may bail out early if the conversation state is not good
+     * to detect echo.
+     */
+    for (i=0; i < ec->tail_cnt; ++i) {
+	unsigned j, end, sum_play_level, ulaw;
+	float play_corr = 0, corr_diff;
+
+	sum_play_level = 0;
+	for (j=i, end=i+ec->templ_cnt-1; j<end; ++j) {
+	    float corr;
+	    corr = (float)ec->play_hist[j+1] / ec->play_hist[j];
+	    play_corr += corr;
+	    sum_play_level += ec->play_hist[j];
+	}
+	sum_play_level += ec->play_hist[j];
+
+	/* Bail out if remote isn't talking */
+	ulaw = pjmedia_linear2ulaw(sum_play_level/ec->templ_cnt) ^ 0xFF;
+	if (ulaw < MIN_SIGNAL_ULAW) {
+	    echo_supp_set_state(ec, ST_REM_SILENT);
+	    return;
+	}
+
+	/* Bail out if local user is talking */
+	if (sum_rec_level >= sum_play_level) {
+	    echo_supp_set_state(ec, ST_LOCAL_TALK);
+	    return;
+	}
+
+	/* Also bail out if we suspect there's a doubletalk */
+	ulaw = pjmedia_linear2ulaw(sum_rec_level/ec->templ_cnt) ^ 0xFF;
+	if (ulaw > MIN_SIGNAL_ULAW) {
+	    echo_supp_set_state(ec, ST_DOUBLETALK);
+	    return;
+	}
+
+	/* Calculate correlation and save to temporary array */
+	corr_diff = fabs(play_corr - rec_corr);
+	ec->tmp_corr[i] = corr_diff;
+
+	/* Also calculate the gain factor between mic and speaker level */
+	ec->tmp_factor[i] = (float)sum_rec_level / sum_play_level;
+	pj_assert(ec->tmp_factor[i] < 1);
+    }
+
+    /* We seem to have good signal, we can update the EC state */
+    echo_supp_set_state(ec, ST_REM_TALK);
+
+    /* Accummulate the correlation value to the history and at the same
+     * time find the tail index of the best correlation.
+     */
+    prev_index = ec->tail_index;
+    for (i=1; i<ec->tail_cnt-1; ++i) {
+	float *p = &ec->corr_sum[i], sum;
+
+	/* Accummulate correlation value  for this tail position */
+	ec->corr_sum[i] += ec->tmp_corr[i];
+
+	/* Update the min and avg gain factor for this tail position */
+	if (ec->tmp_factor[i] < ec->min_factor[i])
+	    ec->min_factor[i] = ec->tmp_factor[i];
+	ec->avg_factor[i] = ((ec->avg_factor[i] * ec->tail_cnt) + 
+				    ec->tmp_factor[i]) /
+			    (ec->tail_cnt + 1);
+
+	/* To get the best correlation, also include the correlation
+	 * value of the neighbouring tail locations.
+	 */
+	sum = *(p-1) + (*p)*2 + *(p+1);
+	//sum = *p;
+
+	/* See if we have better correlation value */
+	if (sum < ec->best_corr) {
+	    ec->tail_index = i;
+	    ec->best_corr = sum;
+	}
+    }
+
+    if (ec->tail_index != prev_index) {
+	unsigned duration;
+	int imin, iavg;
+
+	duration = ec->update_cnt * SEGMENT_PTIME;
+	imin = (int)(ec->min_factor[ec->tail_index] * 1000);
+	iavg = (int)(ec->avg_factor[ec->tail_index] * 1000);
+
+	PJ_LOG(4,(THIS_FILE, 
+		  "Echo suppressor updated at t=%03d.%03ds, echo tail=%d msec"
+		  ", factor min/avg=%d.%03d/%d.%03d",
+		  (duration/1000), (duration%1000),
+		  (ec->tail_cnt-ec->tail_index) * SEGMENT_PTIME,
+		  imin/1000, imin%1000,
+		  iavg/1000, iavg%1000));
+
+    }
+
+    ++ec->calc_cnt;
+
+    if (ec->calc_cnt > ec->max_calc) {
+	unsigned duration;
+	int imin, iavg;
+
+
+	ec->learning = PJ_FALSE;
+	ec->running_cnt = 0;
+
+	duration = ec->update_cnt * SEGMENT_PTIME;
+	imin = (int)(ec->min_factor[ec->tail_index] * 1000);
+	iavg = (int)(ec->avg_factor[ec->tail_index] * 1000);
+
+	PJ_LOG(4,(THIS_FILE, 
+	          "Echo suppressor learning done at t=%03d.%03ds, tail=%d ms"
+		  ", factor min/avg=%d.%03d/%d.%03d",
+		  (duration/1000), (duration%1000),
+		  (ec->tail_cnt-ec->tail_index) * SEGMENT_PTIME,
+		  imin/1000, imin%1000,
+		  iavg/1000, iavg%1000));
+    }
+
+}
+
+
+/* Amplify frame */
+static void amplify_frame(pj_int16_t *frm, unsigned length, 
+			  pj_ufloat_t factor)
+{
+    unsigned i;
+
+    for (i=0; i<length; ++i) {
+	frm[i] = (pj_int16_t)pj_ufloat_mul_i(frm[i], factor);
+    }
+}
+
+/* 
  * Perform echo cancellation.
  */
 PJ_DEF(pj_status_t) echo_supp_cancel_echo( void *state,
@@ -106,25 +588,132 @@ PJ_DEF(pj_status_t) echo_supp_cancel_echo( void *state,
 					   unsigned options,
 					   void *reserved )
 {
+    unsigned i, N;
     echo_supp *ec = (echo_supp*) state;
-    pj_bool_t silence;
 
     PJ_UNUSED_ARG(options);
     PJ_UNUSED_ARG(reserved);
 
-    silence = pjmedia_silence_det_detect(ec->sd, play_frm, 
-					 ec->samples_per_frame, NULL);
+    /* Calculate number of segments. This should be okay even if
+     * samples_per_frame is not a multiply of samples_per_segment, since
+     * we only calculate level.
+     */
+    N = ec->samples_per_frame / ec->samples_per_segment;
+    pj_assert(N>0);
+    for (i=0; i<N; ++i) {
+	unsigned pos = i * ec->samples_per_segment;
+	echo_supp_update(ec, rec_frm+pos, play_frm+pos);
+    }
+
+    if (ec->tail_index < 0) {
+	/* Not ready */
+    } else {
+	unsigned lookup_cnt, rec_level=0, play_level=0;
+	unsigned tail_cnt;
+	float factor;
+
+	/* How many previous segments to lookup */
+	lookup_cnt = SIGNAL_LOOKUP_MSEC / SEGMENT_PTIME;
+	if (lookup_cnt > ec->templ_cnt)
+	    lookup_cnt = ec->templ_cnt;
 
-    if (!silence) {
-#if defined(PJMEDIA_ECHO_SUPPRESS_FACTOR) && PJMEDIA_ECHO_SUPPRESS_FACTOR!=0
-	unsigned i;
-	for (i=0; i<ec->samples_per_frame; ++i) {
-	    rec_frm[i] = (pj_int16_t)(rec_frm[i] >> 
-				      PJMEDIA_ECHO_SUPPRESS_FACTOR);
+	/* Lookup in recording history to get maximum mic level, to see
+	 * if local user is currently talking
+	 */
+	for (i=ec->templ_cnt - lookup_cnt; i < ec->templ_cnt; ++i) {
+	    if (ec->rec_hist[i] > rec_level)
+		rec_level = ec->rec_hist[i];
+	}
+	rec_level = pjmedia_linear2ulaw(rec_level) ^ 0xFF;
+
+	/* Calculate the detected tail length, in # of segments */
+	tail_cnt = (ec->tail_cnt - ec->tail_index);
+
+	/* Lookup in playback history to get max speaker level, to see
+	 * if remote user is currently talking
+	 */
+	for (i=ec->play_hist_cnt -lookup_cnt -tail_cnt; 
+	     i<ec->play_hist_cnt-tail_cnt; ++i) 
+	{
+	    if (ec->play_hist[i] > play_level)
+		play_level = ec->play_hist[i];
+	}
+	play_level = pjmedia_linear2ulaw(play_level) ^ 0xFF;
+
+	if (rec_level >= MIN_SIGNAL_ULAW) {
+	    if (play_level < MIN_SIGNAL_ULAW) {
+		/* Mic is talking, speaker is idle. Let mic signal pass as is.
+		 */
+		factor = 1.0;
+		echo_supp_set_state(ec, ST_LOCAL_TALK);
+	    } else {
+		/* Seems that both are talking. Scale the mic signal
+		 * down a little bit to reduce echo, while allowing both
+		 * parties to talk at the same time.
+		 */
+		factor = (float)(ec->avg_factor[ec->tail_index] * 2);
+		echo_supp_set_state(ec, ST_DOUBLETALK);
+	    }
+	} else {
+	    if (play_level < MIN_SIGNAL_ULAW) {
+		/* Both mic and speaker seems to be idle. Also scale the
+		 * mic signal down with average factor to reduce low power
+		 * echo.
+		 */
+		factor = ec->avg_factor[ec->tail_index] * 3 / 2;
+		echo_supp_set_state(ec, ST_REM_SILENT);
+	    } else {
+		/* Mic is idle, but there's something playing in speaker.
+		 * Scale the mic down to minimum
+		 */
+		factor = ec->min_factor[ec->tail_index] / 2;
+		echo_supp_set_state(ec, ST_REM_TALK);
+	    }
+	}
+
+	/* Smoothen the transition */
+	if (factor > ec->last_factor)
+	    factor = (factor + ec->last_factor) / 2;
+	else
+	    factor = (factor + ec->last_factor*9) / 10;
+
+	/* Amplify frame */
+	amplify_frame(rec_frm, ec->samples_per_frame, 
+		      pj_ufloat_from_float(factor));
+	ec->last_factor = factor;
+
+	if (ec->talk_state == ST_REM_TALK) {
+	    unsigned level, recalc_cnt;
+
+	    /* Get the adjusted frame signal level */
+	    level = pjmedia_calc_avg_signal(rec_frm, ec->samples_per_frame);
+	    level = pjmedia_linear2ulaw(level) ^ 0xFF;
+
+	    /* Accumulate average echo residue to see the ES effectiveness */
+	    ec->residue = ((ec->residue * ec->running_cnt) + level) / 
+			  (ec->running_cnt + 1);
+
+	    ++ec->running_cnt;
+
+	    /* Check if we need to re-learn */
+	    recalc_cnt = CHECK_PERIOD * ec->clock_rate / ec->samples_per_frame;
+	    if (ec->running_cnt > recalc_cnt) {
+		int iresidue;
+
+		iresidue = (int)(ec->residue*1000);
+
+		PJ_LOG(5,(THIS_FILE, "Echo suppressor residue = %d.%03d",
+			  iresidue/1000, iresidue%1000));
+
+		if (ec->residue > MAX_RESIDUE && !ec->learning) {
+		    echo_supp_soft_reset(ec);
+		    ec->residue = 0;
+		} else {
+		    ec->running_cnt = 0;
+		    ec->residue = 0;
+		}
+	    }
 	}
-#else
-	pjmedia_zero_samples(rec_frm, ec->samples_per_frame);
-#endif
     }
 
     return PJ_SUCCESS;
diff --git a/pjsip-apps/src/samples/aectest.c b/pjsip-apps/src/samples/aectest.c
index 14017749..773f7c94 100644
--- a/pjsip-apps/src/samples/aectest.c
+++ b/pjsip-apps/src/samples/aectest.c
@@ -56,6 +56,7 @@ static const char *desc =
 " options:\n"
 "  -d  The delay between playback and capture in ms. Default is zero.\n"
 "  -l  Set the echo tail length in ms. Default is 200 ms	    \n"
+"  -r  Set repeat count (default=1)                                 \n"
 "  -a  Algorithm: 0=default, 1=speex, 3=echo suppress		    \n";
 
 /* 
@@ -91,10 +92,10 @@ int main(int argc, char *argv[])
     unsigned latency_ms = 0;
     unsigned tail_ms = TAIL_LENGTH;
     pj_timestamp t0, t1;
-    int c;
+    int i, repeat=1, c;
 
     pj_optind = 0;
-    while ((c=pj_getopt(argc, argv, "d:l:a:")) !=-1) {
+    while ((c=pj_getopt(argc, argv, "d:l:a:r:")) !=-1) {
 	switch (c) {
 	case 'd':
 	    latency_ms = atoi(pj_optarg);
@@ -121,6 +122,14 @@ int main(int argc, char *argv[])
 		}
 	    }
 	    break;
+	case 'r':
+	    repeat = atoi(pj_optarg);
+	    if (repeat < 1) {
+		puts("Invalid algorithm");
+		puts(desc);
+		return 1;
+	    }
+	    break;
 	}
     }
 
@@ -210,25 +219,30 @@ int main(int argc, char *argv[])
     play_frame.buf = pj_pool_alloc(pool, wav_play->info.samples_per_frame<<1);
     rec_frame.buf = pj_pool_alloc(pool, wav_play->info.samples_per_frame<<1);
     pj_get_timestamp(&t0);
-    for (;;) {
-	play_frame.size = wav_play->info.samples_per_frame << 1;
-	status = pjmedia_port_get_frame(wav_play, &play_frame);
-	if (status != PJ_SUCCESS)
-	    break;
+    for (i=0; i < repeat; ++i) {
+	for (;;) {
+	    play_frame.size = wav_play->info.samples_per_frame << 1;
+	    status = pjmedia_port_get_frame(wav_play, &play_frame);
+	    if (status != PJ_SUCCESS)
+		break;
 
-	status = pjmedia_echo_playback(ec, (short*)play_frame.buf);
+	    status = pjmedia_echo_playback(ec, (short*)play_frame.buf);
 
-	rec_frame.size = wav_play->info.samples_per_frame << 1;
-	status = pjmedia_port_get_frame(wav_rec, &rec_frame);
-	if (status != PJ_SUCCESS)
-	    break;
+	    rec_frame.size = wav_play->info.samples_per_frame << 1;
+	    status = pjmedia_port_get_frame(wav_rec, &rec_frame);
+	    if (status != PJ_SUCCESS)
+		break;
 
-	status = pjmedia_echo_capture(ec, (short*)rec_frame.buf, 0);
+	    status = pjmedia_echo_capture(ec, (short*)rec_frame.buf, 0);
 
-	//status = pjmedia_echo_cancel(ec, (short*)rec_frame.buf, 
-	//			     (short*)play_frame.buf, 0, NULL);
+	    //status = pjmedia_echo_cancel(ec, (short*)rec_frame.buf, 
+	    //			     (short*)play_frame.buf, 0, NULL);
 
-	pjmedia_port_put_frame(wav_out, &rec_frame);
+	    pjmedia_port_put_frame(wav_out, &rec_frame);
+	}
+
+	pjmedia_wav_player_port_set_pos(wav_play, 0);
+	pjmedia_wav_player_port_set_pos(wav_rec, 0);
     }
     pj_get_timestamp(&t1);
 
@@ -257,6 +271,14 @@ int main(int argc, char *argv[])
     /* Shutdown PJLIB */
     pj_shutdown();
 
+#if 0
+    {
+	char s[10];
+	puts("ENTER to quit");
+	fgets(s, sizeof(s), stdin);
+    }
+#endif
+
     /* Done. */
     return 0;
 }
author	Benny Prijono <bennylp@teluu.com>	2008-08-10 16:15:14 +0000
committer	Benny Prijono <bennylp@teluu.com>	2008-08-10 16:15:14 +0000
commit	4d391149bdafa66e0a16dc7c96f628fff745d2ac (patch)
tree	cfa7bfff8edfb9c7ab2e9558e99611e737e9bece
parent	c0970767b422b18bb22e71efac3d6353bba37006 (diff)