summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNanang Izzuddin <nanang@teluu.com>2008-08-19 11:04:32 +0000
committerNanang Izzuddin <nanang@teluu.com>2008-08-19 11:04:32 +0000
commit5c4dfab39616ef259653a2a45b4034db13a81854 (patch)
tree959c0f2853262b12fb59f1fd8ea5e6e9d0c7fa89
parentd381787bef341238ce77601d7547d9d29631853a (diff)
Ticket #490: Updated VAD with new algorithm.
git-svn-id: http://svn.pjsip.org/repos/pjproject/trunk@2222 74dad513-b988-da41-8d7b-12977e46ad98
-rw-r--r--pjmedia/include/pjmedia/silencedet.h29
-rw-r--r--pjmedia/src/pjmedia/silencedet.c282
2 files changed, 165 insertions, 146 deletions
diff --git a/pjmedia/include/pjmedia/silencedet.h b/pjmedia/include/pjmedia/silencedet.h
index af6f0e28..5f6a2491 100644
--- a/pjmedia/include/pjmedia/silencedet.h
+++ b/pjmedia/include/pjmedia/silencedet.h
@@ -1,6 +1,6 @@
/* $Id$ */
/*
- * Copyright (C) 2003-2008 Benny Prijono <benny@prijono.org>
+ * Copyright (C) 2003-2007 Benny Prijono <benny@prijono.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -108,26 +108,27 @@ PJ_DECL(pj_status_t) pjmedia_silence_det_set_adaptive(pjmedia_silence_det *sd,
* Set other silence detector parameters.
*
* @param sd The silence detector
- * @param min_silence Minimum duration of silence (in msec) before
+ * @param before_silence Minimum duration of silence (in msec) before
* silence is reported. If -1 is specified, then
* the default value will be used. The default is
* 400 msec.
- * @param min_signal Minimum duration of signal (in msec) before
- * signal is reported. If -1 is specified, then
- * the default value will be used. The default is
- * equal to one frame.
- * @param recalc_time The interval to recalculate signal and silence
- * proportion and to readjust the silence threshold
- * when adaptive silence detection is set. If -1
- * is specified, then the default value will be used.
- * The default value is 5000 (msec).
+ * @param recalc_time1 The interval (in msec) to recalculate threshold
+ * in non-silence condition when adaptive silence
+ * detection is set. If -1 is specified, then the
+ * default value will be used. The default is 4000
+ * (msec).
+ * @param recalc_time2 The interval (in msec) to recalculate threshold
+ * in silence condition when adaptive silence detection
+ * is set. If -1 is specified, then the default value
+ * will be used. The default value is 2000 (msec).
*
* @return PJ_SUCCESS on success.
*/
PJ_DECL(pj_status_t) pjmedia_silence_det_set_params( pjmedia_silence_det *sd,
- int min_silence,
- int min_signal,
- int recalc_time);
+ int before_silence,
+ int recalc_time1,
+ int recalc_time2);
+
/**
* Disable the silence detector.
diff --git a/pjmedia/src/pjmedia/silencedet.c b/pjmedia/src/pjmedia/silencedet.c
index f5d29e6c..59738f1e 100644
--- a/pjmedia/src/pjmedia/silencedet.c
+++ b/pjmedia/src/pjmedia/silencedet.c
@@ -1,6 +1,6 @@
/* $Id$ */
/*
- * Copyright (C) 2003-2008 Benny Prijono <benny@prijono.org>
+ * Copyright (C) 2003-2007 Benny Prijono <benny@prijono.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -23,16 +23,42 @@
#include <pj/log.h>
#include <pj/pool.h>
-
#define THIS_FILE "silencedet.c"
+#if 0
+# define TRACE_(x) PJ_LOG(3,x)
+#else
+# define TRACE_(x)
+#endif
+
+/**
+ * This enumeration specifies operation mode of silence detector
+ */
typedef enum pjmedia_silence_det_mode {
VAD_MODE_NONE,
VAD_MODE_FIXED,
VAD_MODE_ADAPTIVE
} pjmedia_silence_det_mode;
+/**
+ * Default settings
+ */
+#define DEF_RECALC_ON_VOICED 4000 /* Time to recalculate threshold
+ in voiced condition, in ms */
+#define DEF_RECALC_ON_SILENCE 2000 /* Time to recalculate threshold
+ in silence condition, in ms. */
+#define DEF_BEFORE_SILENCE 400 /* Silence time before really changing
+ state into SILENCE, in ms. */
+#define DEF_THRESHOLD 1000 /* Default threshold. */
+/**
+ * This enumeration specifies the states of the silence detector.
+ */
+enum pjmedia_silence_det_state {
+ STATE_SILENCE,
+ STATE_START_SILENCE,
+ STATE_VOICED
+};
/**
* This structure holds the silence detector state.
@@ -41,20 +67,23 @@ struct pjmedia_silence_det
{
char objname[PJ_MAX_OBJ_NAME]; /**< VAD name. */
- int mode; /**< VAD mode. */
- unsigned ptime; /**< Frame time, in msec. */
-
- unsigned min_signal_cnt; /**< # of signal frames.before talk burst */
- unsigned min_silence_cnt; /**< # of silence frames before silence. */
- unsigned recalc_cnt; /**< # of frames before adaptive recalc. */
+ int mode; /**< VAD mode. */
+ unsigned ptime; /**< Frame time, in msec. */
- pj_bool_t in_talk; /**< In talk burst? */
- unsigned cur_cnt; /**< # of frames in current mode. */
- unsigned signal_cnt; /**< # of signal frames received. */
- unsigned silence_cnt; /**< # of silence frames received */
- unsigned cur_threshold; /**< Current silence threshold. */
- unsigned weakest_signal; /**< Weakest signal detected. */
- unsigned loudest_silence; /**< Loudest silence detected. */
+ unsigned threshold; /**< Current threshold level. */
+ unsigned sum_level; /**< Total sum of recent level. */
+ unsigned sum_cnt; /**< Number of level summed. */
+ unsigned silence_timer; /**< Silence condition timer. */
+ unsigned voiced_timer; /**< Voiced condition timer. */
+
+ enum pjmedia_silence_det_state state;/**< Silence detector state. */
+ unsigned recalc_on_voiced; /**< Setting of time to recalc
+ threshold in voiced condition. */
+ unsigned recalc_on_silence; /**< Setting of time to recalc
+ threshold in silence condition.*/
+ unsigned before_silence; /**< Setting of silence time before
+ really changing state into SILENCE,
+ in ms. */
};
@@ -70,20 +99,15 @@ PJ_DEF(pj_status_t) pjmedia_silence_det_create( pj_pool_t *pool,
sd = PJ_POOL_ZALLOC_T(pool, pjmedia_silence_det);
- pj_ansi_strncpy(sd->objname, THIS_FILE, PJ_MAX_OBJ_NAME);
+ pj_ansi_snprintf(sd->objname, PJ_MAX_OBJ_NAME, THIS_FILE, sd);
sd->objname[PJ_MAX_OBJ_NAME-1] = '\0';
sd->ptime = samples_per_frame * 1000 / clock_rate;
- sd->signal_cnt = 0;
- sd->silence_cnt = 0;
- sd->weakest_signal = 0xFFFFFFFFUL;
- sd->loudest_silence = 0;
/* Default settings */
pjmedia_silence_det_set_params(sd, -1, -1, -1);
- /* Restart in fixed, silent mode */
- sd->in_talk = PJ_FALSE;
+ /* Restart in adaptive, silent mode */
pjmedia_silence_det_set_adaptive( sd, -1 );
*p_sd = sd;
@@ -101,17 +125,16 @@ PJ_DEF(pj_status_t) pjmedia_silence_det_set_name( pjmedia_silence_det *sd,
return PJ_SUCCESS;
}
-
PJ_DEF(pj_status_t) pjmedia_silence_det_set_adaptive(pjmedia_silence_det *sd,
int threshold)
{
PJ_ASSERT_RETURN(sd, PJ_EINVAL);
if (threshold < 0)
- threshold = PJMEDIA_SILENCE_DET_THRESHOLD;
+ threshold = DEF_THRESHOLD;
sd->mode = VAD_MODE_ADAPTIVE;
- sd->cur_threshold = threshold;
+ sd->threshold = threshold;
return PJ_SUCCESS;
}
@@ -122,31 +145,31 @@ PJ_DEF(pj_status_t) pjmedia_silence_det_set_fixed( pjmedia_silence_det *sd,
PJ_ASSERT_RETURN(sd, PJ_EINVAL);
if (threshold < 0)
- threshold = PJMEDIA_SILENCE_DET_THRESHOLD;
+ threshold = DEF_THRESHOLD;
sd->mode = VAD_MODE_FIXED;
- sd->cur_threshold = threshold;
+ sd->threshold = threshold;
return PJ_SUCCESS;
}
PJ_DEF(pj_status_t) pjmedia_silence_det_set_params( pjmedia_silence_det *sd,
- int min_silence,
- int min_signal,
- int recalc_time)
+ int before_silence,
+ int recalc_time1,
+ int recalc_time2)
{
PJ_ASSERT_RETURN(sd, PJ_EINVAL);
- if (min_silence == -1)
- min_silence = 500;
- if (min_signal < 0)
- min_signal = sd->ptime;
- if (recalc_time < 0)
- recalc_time = 2000;
+ if (recalc_time1 < 0)
+ recalc_time1 = DEF_RECALC_ON_VOICED;
+ if (recalc_time2 < 0)
+ recalc_time2 = DEF_RECALC_ON_SILENCE;
+ if (before_silence < 0)
+ before_silence = DEF_BEFORE_SILENCE;
- sd->min_signal_cnt = min_signal / sd->ptime;
- sd->min_silence_cnt = min_silence / sd->ptime;
- sd->recalc_cnt = recalc_time / sd->ptime;
+ sd->recalc_on_voiced = recalc_time1;
+ sd->recalc_on_silence = recalc_time2;
+ sd->before_silence = before_silence;
return PJ_SUCCESS;
}
@@ -186,109 +209,104 @@ PJ_DEF(pj_int32_t) pjmedia_calc_avg_signal( const pj_int16_t samples[],
PJ_DEF(pj_bool_t) pjmedia_silence_det_apply( pjmedia_silence_det *sd,
pj_uint32_t level)
{
- pj_bool_t have_signal;
+ int avg_recent_level;
- /* Always return false if VAD is disabled */
if (sd->mode == VAD_MODE_NONE)
return PJ_FALSE;
- /* Convert PCM level to ulaw */
- level = pjmedia_linear2ulaw(level) ^ 0xff;
-
- /* Do we have signal? */
- have_signal = level > sd->cur_threshold;
-
- /* We we're in transition between silence and signel, increment the
- * current frame counter. We will only switch mode when we have enough
- * frames.
- */
- if (sd->in_talk != have_signal) {
- unsigned limit;
-
- sd->cur_cnt++;
-
- limit = (sd->in_talk ? sd->min_silence_cnt :
- sd->min_signal_cnt);
-
- if (sd->cur_cnt > limit) {
-
- /* Swap mode */
- sd->in_talk = !sd->in_talk;
-
- /* Restart adaptive cur_threshold measurements */
- sd->weakest_signal = 0xFFFFFFFFUL;
- sd->loudest_silence = 0;
- sd->signal_cnt = 0;
- sd->silence_cnt = 0;
- sd->cur_cnt = 0;
- }
+ if (sd->mode == VAD_MODE_FIXED)
+ return (level < sd->threshold);
- } else {
- /* Reset frame count */
- sd->cur_cnt = 0;
- }
-
+ /* Calculating recent level */
+ sd->sum_level += level;
+ ++sd->sum_cnt;
+ avg_recent_level = (sd->sum_level / sd->sum_cnt);
- /* Count the number of silent and signal frames and calculate min/max */
- if (have_signal) {
- if (level < sd->weakest_signal)
- sd->weakest_signal = level;
- sd->signal_cnt++;
- }
- else {
- if (level > sd->loudest_silence)
- sd->loudest_silence = level;
- sd->silence_cnt++;
- }
+ if (level > sd->threshold) {
+ sd->silence_timer = 0;
+ sd->voiced_timer += sd->ptime;
- /* See if we have had enough frames to look at proportions of
- * silence/signal frames.
- */
- if ((sd->signal_cnt + sd->silence_cnt) > sd->recalc_cnt) {
-
- if (sd->mode == VAD_MODE_ADAPTIVE) {
- pj_bool_t updated = PJ_TRUE;
- unsigned pct_signal, new_threshold = sd->cur_threshold;
-
- /* Get percentage of signal */
- pct_signal = sd->signal_cnt * 100 /
- (sd->signal_cnt + sd->silence_cnt);
-
- /* Adjust according to signal/silence proportions. */
- if (pct_signal > 95) {
- new_threshold += (sd->weakest_signal+1 - sd->cur_threshold)/2;
- } else if (pct_signal < 5) {
- new_threshold = (sd->cur_threshold+sd->loudest_silence)/2+1;
- } else if (pct_signal > 80) {
- new_threshold++;
- } else if (pct_signal < 10) {
- new_threshold--;
- } else {
- updated = PJ_FALSE;
- }
-
- if (new_threshold > PJMEDIA_SILENCE_DET_MAX_THRESHOLD)
- new_threshold = PJMEDIA_SILENCE_DET_MAX_THRESHOLD;
-
- if (updated && sd->cur_threshold != new_threshold) {
- PJ_LOG(5,(sd->objname,
- "Vad cur_threshold updated %d-->%d. "
- "Signal lo=%d",
- sd->cur_threshold, new_threshold,
- sd->weakest_signal));
- sd->cur_threshold = new_threshold;
- }
- }
+ switch(sd->state) {
+ case STATE_VOICED:
+ if (sd->voiced_timer > sd->recalc_on_voiced) {
+ /* Voiced for long time (>recalc_on_voiced), current
+ * threshold seems to be too low.
+ */
+ sd->threshold = (avg_recent_level + sd->threshold) >> 1;
+ TRACE_((THIS_FILE,"Re-adjust threshold (in talk burst)"
+ "to %d", sd->threshold));
+
+ sd->voiced_timer = 0;
+
+ /* Reset sig_level */
+ sd->sum_level = avg_recent_level;
+ sd->sum_cnt = 1;
+ }
+ break;
+
+ case STATE_SILENCE:
+ TRACE_((THIS_FILE,"Starting talk burst (level=%d threshold=%d)",
+ level, sd->threshold));
- /* Reset. */
- sd->weakest_signal = 0xFFFFFFFFUL;
- sd->loudest_silence = 0;
- sd->signal_cnt = 0;
- sd->silence_cnt = 0;
+ case STATE_START_SILENCE:
+ sd->state = STATE_VOICED;
+
+ /* Reset sig_level */
+ sd->sum_level = level;
+ sd->sum_cnt = 1;
+
+ break;
+
+ default:
+ pj_assert(0);
+ break;
+ }
+ } else {
+ sd->voiced_timer = 0;
+ sd->silence_timer += sd->ptime;
+
+ switch(sd->state) {
+ case STATE_SILENCE:
+ if (sd->silence_timer >= sd->recalc_on_silence) {
+ sd->threshold = avg_recent_level << 1;
+ TRACE_((THIS_FILE,"Re-adjust threshold (in silence)"
+ "to %d", sd->threshold));
+
+ sd->silence_timer = 0;
+
+ /* Reset sig_level */
+ sd->sum_level = avg_recent_level;
+ sd->sum_cnt = 1;
+ }
+ break;
+
+ case STATE_VOICED:
+ sd->state = STATE_START_SILENCE;
+
+ /* Reset sig_level */
+ sd->sum_level = level;
+ sd->sum_cnt = 1;
+
+ case STATE_START_SILENCE:
+ if (sd->silence_timer >= sd->before_silence) {
+ sd->state = STATE_SILENCE;
+ sd->threshold = avg_recent_level << 1;
+ TRACE_((THIS_FILE,"Starting silence (level=%d "
+ "threshold=%d)", level, sd->threshold));
+
+ /* Reset sig_level */
+ sd->sum_level = avg_recent_level;
+ sd->sum_cnt = 1;
+ }
+ break;
+
+ default:
+ pj_assert(0);
+ break;
+ }
}
-
- return !sd->in_talk;
+ return (sd->state == STATE_SILENCE);
}