diff options
-rw-r--r-- | pjmedia/build/pjmedia.dsp | 4 | ||||
-rw-r--r-- | pjmedia/include/pjmedia/echo.h | 72 | ||||
-rw-r--r-- | pjmedia/src/pjmedia/echo_common.c | 252 | ||||
-rw-r--r-- | pjmedia/src/pjmedia/echo_internal.h | 31 | ||||
-rw-r--r-- | pjmedia/src/pjmedia/echo_port.c | 7 | ||||
-rw-r--r-- | pjmedia/src/pjmedia/echo_speex.c | 404 | ||||
-rw-r--r-- | pjmedia/src/pjmedia/echo_suppress.c | 68 | ||||
-rw-r--r-- | pjmedia/src/pjmedia/sound_port.c | 16 | ||||
-rw-r--r-- | pjsip-apps/src/samples/aectest.c | 201 | ||||
-rw-r--r-- | third_party/speex/libspeex/mdf.c | 510 |
10 files changed, 659 insertions, 906 deletions
diff --git a/pjmedia/build/pjmedia.dsp b/pjmedia/build/pjmedia.dsp index f0ce7c3e..5c8a96b1 100644 --- a/pjmedia/build/pjmedia.dsp +++ b/pjmedia/build/pjmedia.dsp @@ -125,6 +125,10 @@ SOURCE=..\src\pjmedia\echo_common.c # End Source File
# Begin Source File
+SOURCE=..\src\pjmedia\echo_internal.h
+# End Source File
+# Begin Source File
+
SOURCE=..\src\pjmedia\echo_port.c
# End Source File
# Begin Source File
diff --git a/pjmedia/include/pjmedia/echo.h b/pjmedia/include/pjmedia/echo.h index 53e262f3..4a6c28f7 100644 --- a/pjmedia/include/pjmedia/echo.h +++ b/pjmedia/include/pjmedia/echo.h @@ -57,18 +57,37 @@ typedef struct pjmedia_echo_state pjmedia_echo_state; typedef enum pjmedia_echo_flag { /** + * Use any available backend echo canceller algorithm. This is + * the default settings. This setting is mutually exclusive with + * PJMEDIA_ECHO_SIMPLE and PJMEDIA_ECHO_SPEEX. + */ + PJMEDIA_ECHO_DEFAULT= 0, + + /** + * Force to use Speex AEC as the backend echo canceller algorithm. + * This setting is mutually exclusive with PJMEDIA_ECHO_SIMPLE. + */ + PJMEDIA_ECHO_SPEEX = 1, + + /** * If PJMEDIA_ECHO_SIMPLE flag is specified during echo canceller * creation, then a simple echo suppressor will be used instead of - * an accoustic echo cancellation. + * an accoustic echo cancellation. This setting is mutually exclusive + * with PJMEDIA_ECHO_SPEEX. + */ + PJMEDIA_ECHO_SIMPLE = 2, + + /** + * For internal use. */ - PJMEDIA_ECHO_SIMPLE = 1, + PJMEDIA_ECHO_ALGO_MASK = 15, /** * If PJMEDIA_ECHO_NO_LOCK flag is specified, no mutex will be created * for the echo canceller, but application will guarantee that echo * canceller will not be called by different threads at the same time. */ - PJMEDIA_ECHO_NO_LOCK = 2 + PJMEDIA_ECHO_NO_LOCK = 16 } pjmedia_echo_flag; @@ -102,6 +121,34 @@ PJ_DECL(pj_status_t) pjmedia_echo_create(pj_pool_t *pool, unsigned options, pjmedia_echo_state **p_echo ); +/** + * Create multi-channel the echo canceller. + * + * @param pool Pool to allocate memory. + * @param clock_rate Media clock rate/sampling rate. + * @param channel_count Number of channels. + * @param samples_per_frame Number of samples per frame. + * @param tail_ms Tail length, miliseconds. + * @param latency_ms Total lacency introduced by playback and + * recording device. Set to zero if the latency + * is not known. + * @param options Options. If PJMEDIA_ECHO_SIMPLE is specified, + * then a simple echo suppressor implementation + * will be used instead of an accoustic echo + * cancellation. + * See #pjmedia_echo_flag for other options. + * @param p_echo Pointer to receive the Echo Canceller state. + * + * @return PJ_SUCCESS on success, or the appropriate status. + */ +PJ_DECL(pj_status_t) pjmedia_echo_create2(pj_pool_t *pool, + unsigned clock_rate, + unsigned channel_count, + unsigned samples_per_frame, + unsigned tail_ms, + unsigned latency_ms, + unsigned options, + pjmedia_echo_state **p_echo ); /** * Destroy the Echo Canceller. @@ -114,7 +161,17 @@ PJ_DECL(pj_status_t) pjmedia_echo_destroy(pjmedia_echo_state *echo ); /** - * Let the Echo Canceller knows that a frame has been played to the speaker. + * Reset the echo canceller. + * + * @param echo The Echo Canceller. + * + * @return PJ_SUCCESS on success. + */ +PJ_DECL(pj_status_t) pjmedia_echo_reset(pjmedia_echo_state *echo ); + + +/** + * Let the Echo Canceller know that a frame has been played to the speaker. * The Echo Canceller will keep the frame in its internal buffer, to be used * when cancelling the echo with #pjmedia_echo_capture(). * @@ -131,10 +188,9 @@ PJ_DECL(pj_status_t) pjmedia_echo_playback(pjmedia_echo_state *echo, /** - * Let the Echo Canceller knows that a frame has been captured from - * the microphone. - * The Echo Canceller will cancel the echo from the captured signal, - * using the internal buffer (supplied by #pjmedia_echo_playback()) + * Let the Echo Canceller know that a frame has been captured from the + * microphone. The Echo Canceller will cancel the echo from the captured + * signal, using the internal buffer (supplied by #pjmedia_echo_playback()) * as the FES (Far End Speech) reference. * * @param echo The Echo Canceller. diff --git a/pjmedia/src/pjmedia/echo_common.c b/pjmedia/src/pjmedia/echo_common.c index a7e6b1bb..633e78a8 100644 --- a/pjmedia/src/pjmedia/echo_common.c +++ b/pjmedia/src/pjmedia/echo_common.c @@ -17,36 +17,56 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include <pjmedia/config.h> #include <pjmedia/echo.h> +#include <pjmedia/delaybuf.h> +#include <pjmedia/errno.h> #include <pj/assert.h> +#include <pj/list.h> +#include <pj/log.h> #include <pj/pool.h> #include "echo_internal.h" +#define THIS_FILE "echo_common.c" + typedef struct ec_operations ec_operations; +struct frame +{ + PJ_DECL_LIST_MEMBER(struct frame); + short buf[1]; +}; + struct pjmedia_echo_state { + pj_pool_t *pool; + char *obj_name; + unsigned samples_per_frame; void *state; ec_operations *op; + + pj_bool_t lat_ready; /* lat_buf has been filled in. */ + unsigned lat_target_cnt;/* Target number of frames in lat_buf */ + unsigned lat_buf_cnt; /* Actual number of frames in lat_buf */ + struct frame lat_buf; /* Frame queue for delayed playback */ + struct frame lat_free; /* Free frame list. */ + + pjmedia_delay_buf *delay_buf; }; struct ec_operations { + const char *name; + pj_status_t (*ec_create)(pj_pool_t *pool, - unsigned clock_rate, - unsigned samples_per_frame, - unsigned tail_ms, - unsigned latency_ms, - unsigned options, - void **p_state ); + unsigned clock_rate, + unsigned channel_count, + unsigned samples_per_frame, + unsigned tail_ms, + unsigned options, + void **p_state ); pj_status_t (*ec_destroy)(void *state ); - pj_status_t (*ec_playback)(void *state, - pj_int16_t *play_frm ); - pj_status_t (*ec_capture)(void *state, - pj_int16_t *rec_frm, - unsigned options ); + void (*ec_reset)(void *state ); pj_status_t (*ec_cancel)(void *state, pj_int16_t *rec_frm, const pj_int16_t *play_frm, @@ -57,10 +77,10 @@ struct ec_operations static struct ec_operations echo_supp_op = { + "Echo suppressor", &echo_supp_create, &echo_supp_destroy, - &echo_supp_playback, - &echo_supp_capture, + &echo_supp_reset, &echo_supp_cancel_echo }; @@ -70,20 +90,30 @@ static struct ec_operations echo_supp_op = * Speex AEC prototypes */ #if defined(PJMEDIA_HAS_SPEEX_AEC) && PJMEDIA_HAS_SPEEX_AEC!=0 -static struct ec_operations aec_op = +static struct ec_operations speex_aec_op = { + "AEC", &speex_aec_create, &speex_aec_destroy, - &speex_aec_playback, - &speex_aec_capture, + &speex_aec_reset, &speex_aec_cancel_echo }; - -#else -#define aec_op echo_supp_op #endif +/* + * IPP AEC prototypes + */ +#if defined(PJMEDIA_HAS_INTEL_IPP_AEC) && PJMEDIA_HAS_INTEL_IPP_AEC!=0 +static struct ec_operations ipp_aec_op = +{ + "IPP AEC", + &ipp_aec_create, + &ipp_aec_destroy, + &ipp_aec_reset, + &ipp_aec_cancel_echo +}; +#endif /* * Create the echo canceller. @@ -96,34 +126,106 @@ PJ_DEF(pj_status_t) pjmedia_echo_create( pj_pool_t *pool, unsigned options, pjmedia_echo_state **p_echo ) { + return pjmedia_echo_create2(pool, clock_rate, 1, samples_per_frame, + tail_ms, latency_ms, options, p_echo); +} + +/* + * Create the echo canceller. + */ +PJ_DEF(pj_status_t) pjmedia_echo_create2(pj_pool_t *pool, + unsigned clock_rate, + unsigned channel_count, + unsigned samples_per_frame, + unsigned tail_ms, + unsigned latency_ms, + unsigned options, + pjmedia_echo_state **p_echo ) +{ + unsigned ptime; pjmedia_echo_state *ec; pj_status_t status; - /* Force to use simple echo suppressor if AEC is not available */ -#if !defined(PJMEDIA_HAS_SPEEX_AEC) || PJMEDIA_HAS_SPEEX_AEC==0 - options |= PJMEDIA_ECHO_SIMPLE; + /* Create new pool and instantiate and init the EC */ + pool = pj_pool_create(pool->factory, "ec%p", 256, 256, NULL); + ec = PJ_POOL_ZALLOC_T(pool, struct pjmedia_echo_state); + ec->pool = pool; + ec->obj_name = pool->obj_name; + pj_list_init(&ec->lat_buf); + pj_list_init(&ec->lat_free); + + /* Select the backend algorithm */ + if (0) { + /* Dummy */ + ; +#if defined(PJMEDIA_HAS_SPEEX_AEC) && PJMEDIA_HAS_SPEEX_AEC!=0 + } else if ((options & PJMEDIA_ECHO_ALGO_MASK) == PJMEDIA_ECHO_SPEEX || + (options & PJMEDIA_ECHO_ALGO_MASK) == PJMEDIA_ECHO_DEFAULT) + { + ec->op = &speex_aec_op; #endif - ec = PJ_POOL_ZALLOC_T(pool, struct pjmedia_echo_state); +#if defined(PJMEDIA_HAS_INTEL_IPP_AEC) && PJMEDIA_HAS_INTEL_IPP_AEC!=0 + } else if ((options & PJMEDIA_ECHO_ALGO_MASK) == PJMEDIA_ECHO_IPP || + (options & PJMEDIA_ECHO_ALGO_MASK) == PJMEDIA_ECHO_DEFAULT) + { + ec->op = &ipp_aec_op; + +#endif - if (options & PJMEDIA_ECHO_SIMPLE) { + } else { ec->op = &echo_supp_op; - status = (*echo_supp_op.ec_create)(pool, clock_rate, samples_per_frame, - tail_ms, latency_ms, options, - &ec->state); + } + + PJ_LOG(5,(ec->obj_name, "Creating %s", ec->op->name)); + + /* Instantiate EC object */ + status = (*ec->op->ec_create)(pool, clock_rate, channel_count, + samples_per_frame, tail_ms, + options, &ec->state); + if (status != PJ_SUCCESS) { + pj_pool_release(pool); + return status; + } + + /* Create latency buffers */ + ptime = samples_per_frame * 1000 / clock_rate; + if (latency_ms == 0) { + /* Give at least one frame delay to simplify programming */ + latency_ms = ptime; + } + ec->lat_target_cnt = latency_ms / ptime; + if (ec->lat_target_cnt != 0) { + unsigned i; + for (i=0; i < ec->lat_target_cnt; ++i) { + struct frame *frm; + + frm = (struct frame*) pj_pool_alloc(pool, (samples_per_frame<<1) + + sizeof(struct frame)); + pj_list_push_back(&ec->lat_free, frm); + } } else { - ec->op = &aec_op; - status = (*aec_op.ec_create)(pool, clock_rate, - samples_per_frame, - tail_ms, latency_ms, options, - &ec->state); + ec->lat_ready = PJ_TRUE; } - if (status != PJ_SUCCESS) + /* Create delay buffer to compensate drifts */ + status = pjmedia_delay_buf_create(ec->pool, ec->obj_name, clock_rate, + samples_per_frame, channel_count, + (PJMEDIA_SOUND_BUFFER_COUNT+1) * ptime, + 0, &ec->delay_buf); + if (status != PJ_SUCCESS) { + pj_pool_release(pool); return status; + } - pj_assert(ec->state != NULL); + PJ_LOG(4,(ec->obj_name, + "%s created, clock_rate=%d, channel=%d, " + "samples per frame=%d, tail length=%d ms, " + "latency=%d ms", + ec->op->name, clock_rate, channel_count, samples_per_frame, + tail_ms, latency_ms)); + /* Done */ *p_echo = ec; return PJ_SUCCESS; @@ -135,18 +237,63 @@ PJ_DEF(pj_status_t) pjmedia_echo_create( pj_pool_t *pool, */ PJ_DEF(pj_status_t) pjmedia_echo_destroy(pjmedia_echo_state *echo ) { - return (*echo->op->ec_destroy)(echo->state); + (*echo->op->ec_destroy)(echo->state); + pj_pool_release(echo->pool); + return PJ_SUCCESS; } +/* + * Reset the echo canceller. + */ +PJ_DEF(pj_status_t) pjmedia_echo_reset(pjmedia_echo_state *echo ) +{ + while (!pj_list_empty(&echo->lat_buf)) { + struct frame *frm; + frm = echo->lat_buf.next; + pj_list_erase(frm); + pj_list_push_back(&echo->lat_free, frm); + } + echo->lat_ready = PJ_FALSE; + pjmedia_delay_buf_reset(echo->delay_buf); + echo->op->ec_reset(echo->state); + return PJ_SUCCESS; +} + /* - * Let the Echo Canceller knows that a frame has been played to the speaker. + * Let the Echo Canceller know that a frame has been played to the speaker. */ PJ_DEF(pj_status_t) pjmedia_echo_playback( pjmedia_echo_state *echo, pj_int16_t *play_frm ) { - return (*echo->op->ec_playback)(echo->state, play_frm); + if (!echo->lat_ready) { + /* We've not built enough latency in the buffer, so put this frame + * in the latency buffer list. + */ + struct frame *frm; + + if (pj_list_empty(&echo->lat_free)) { + echo->lat_ready = PJ_TRUE; + PJ_LOG(5,(echo->obj_name, "Latency bufferring complete")); + pjmedia_delay_buf_put(echo->delay_buf, play_frm); + return PJ_SUCCESS; + } + + frm = echo->lat_free.prev; + pj_list_erase(frm); + + pjmedia_copy_samples(frm->buf, play_frm, echo->samples_per_frame); + pj_list_push_back(&echo->lat_buf, frm); + + } else { + /* Latency buffer is ready (full), so we put this frame in the + * delay buffer. + */ + pjmedia_delay_buf_put(echo->delay_buf, play_frm); + } + + return PJ_SUCCESS; } @@ -158,7 +305,34 @@ PJ_DEF(pj_status_t) pjmedia_echo_capture( pjmedia_echo_state *echo, pj_int16_t *rec_frm, unsigned options ) { - return (*echo->op->ec_capture)(echo->state, rec_frm, options); + struct frame *oldest_frm; + pj_status_t status, rc; + + if (!echo->lat_ready) { + /* Prefetching to fill in the desired latency */ + PJ_LOG(5,(echo->obj_name, "Prefetching..")); + return PJ_SUCCESS; + } + + /* Retrieve oldest frame from the latency buffer */ + oldest_frm = echo->lat_buf.next; + pj_list_erase(oldest_frm); + + /* Cancel echo using this reference frame */ + status = pjmedia_echo_cancel(echo, rec_frm, oldest_frm->buf, + options, NULL); + + /* Move one frame from delay buffer to the latency buffer. */ + rc = pjmedia_delay_buf_get(echo->delay_buf, oldest_frm->buf); + if (rc != PJ_SUCCESS) { + /* Ooops.. no frame! */ + PJ_LOG(5,(echo->obj_name, + "No frame from delay buffer. This will upset EC later")); + pjmedia_zero_samples(oldest_frm->buf, echo->samples_per_frame); + } + pj_list_push_back(&echo->lat_buf, oldest_frm); + + return status; } diff --git a/pjmedia/src/pjmedia/echo_internal.h b/pjmedia/src/pjmedia/echo_internal.h index c382abbe..6b6a4b60 100644 --- a/pjmedia/src/pjmedia/echo_internal.h +++ b/pjmedia/src/pjmedia/echo_internal.h @@ -28,17 +28,13 @@ PJ_BEGIN_DECL */ PJ_DECL(pj_status_t) echo_supp_create(pj_pool_t *pool, unsigned clock_rate, + unsigned channel_count, unsigned samples_per_frame, unsigned tail_ms, - unsigned latency_ms, unsigned options, void **p_state ); PJ_DECL(pj_status_t) echo_supp_destroy(void *state); -PJ_DECL(pj_status_t) echo_supp_playback(void *state, - pj_int16_t *play_frm ); -PJ_DECL(pj_status_t) echo_supp_capture(void *state, - pj_int16_t *rec_frm, - unsigned options ); +PJ_DECL(void) echo_supp_reset(void *state); PJ_DECL(pj_status_t) echo_supp_cancel_echo(void *state, pj_int16_t *rec_frm, const pj_int16_t *play_frm, @@ -47,23 +43,34 @@ PJ_DECL(pj_status_t) echo_supp_cancel_echo(void *state, PJ_DECL(pj_status_t) speex_aec_create(pj_pool_t *pool, unsigned clock_rate, + unsigned channel_count, unsigned samples_per_frame, unsigned tail_ms, - unsigned latency_ms, unsigned options, void **p_state ); PJ_DECL(pj_status_t) speex_aec_destroy(void *state ); -PJ_DECL(pj_status_t) speex_aec_playback(void *state, - pj_int16_t *play_frm ); -PJ_DECL(pj_status_t) speex_aec_capture(void *state, - pj_int16_t *rec_frm, - unsigned options ); +PJ_DECL(void) speex_aec_reset(void *state ); PJ_DECL(pj_status_t) speex_aec_cancel_echo(void *state, pj_int16_t *rec_frm, const pj_int16_t *play_frm, unsigned options, void *reserved ); +PJ_DECL(pj_status_t) ipp_aec_create(pj_pool_t *pool, + unsigned clock_rate, + unsigned channel_count, + unsigned samples_per_frame, + unsigned tail_ms, + unsigned options, + void **p_echo ); +PJ_DECL(pj_status_t) ipp_aec_destroy(void *state ); +PJ_DECL(void) ipp_aec_reset(void *state ); +PJ_DECL(pj_status_t) ipp_aec_cancel_echo(void *state, + pj_int16_t *rec_frm, + const pj_int16_t *play_frm, + unsigned options, + void *reserved ); + PJ_END_DECL diff --git a/pjmedia/src/pjmedia/echo_port.c b/pjmedia/src/pjmedia/echo_port.c index 5d36e134..1b1c89c9 100644 --- a/pjmedia/src/pjmedia/echo_port.c +++ b/pjmedia/src/pjmedia/echo_port.c @@ -67,9 +67,10 @@ PJ_DEF(pj_status_t) pjmedia_echo_port_create(pj_pool_t *pool, dn_port->info.bits_per_sample, dn_port->info.samples_per_frame); - status = pjmedia_echo_create(pool, dn_port->info.clock_rate, - dn_port->info.samples_per_frame, - tail_ms, latency_ms, options, &ec->ec); + status = pjmedia_echo_create2(pool, dn_port->info.clock_rate, + dn_port->info.channel_count, + dn_port->info.samples_per_frame, + tail_ms, latency_ms, options, &ec->ec); if (status != PJ_SUCCESS) return status; diff --git a/pjmedia/src/pjmedia/echo_speex.c b/pjmedia/src/pjmedia/echo_speex.c index d7f27507..033597e8 100644 --- a/pjmedia/src/pjmedia/echo_speex.c +++ b/pjmedia/src/pjmedia/echo_speex.c @@ -19,221 +19,13 @@ #include <pjmedia/echo.h> #include <pjmedia/errno.h> -#include <pjmedia/silencedet.h> #include <pj/assert.h> -#include <pj/lock.h> -#include <pj/log.h> -#include <pj/os.h> #include <pj/pool.h> #include <speex/speex_echo.h> #include <speex/speex_preprocess.h> #include "echo_internal.h" -#define THIS_FILE "echo_speex.c" -#define BUF_COUNT PJMEDIA_SOUND_BUFFER_COUNT -#define MIN_PREFETCH 2 -#define MAX_PREFETCH (BUF_COUNT*2/3) - - - -#if 0 -# define TRACE_(expr) PJ_LOG(5,expr) -#else -# define TRACE_(expr) -#endif - - -typedef struct pjmedia_frame_queue pjmedia_frame_queue; - -struct fq_frame -{ - PJ_DECL_LIST_MEMBER(struct fq_frame); - void *buf; - unsigned size; - pj_uint32_t seq; -}; - -struct pjmedia_frame_queue -{ - char obj_name[PJ_MAX_OBJ_NAME]; - unsigned frame_size; - int samples_per_frame; - unsigned count; - unsigned max_count; - struct fq_frame frame_list; - struct fq_frame free_list; - - int seq_delay; - int prefetch_count; -}; - -PJ_DEF(pj_status_t) pjmedia_frame_queue_create( pj_pool_t *pool, - const char *name, - unsigned frame_size, - unsigned samples_per_frame, - unsigned max_count, - pjmedia_frame_queue **p_fq) -{ - pjmedia_frame_queue *fq; - unsigned i; - - fq = PJ_POOL_ZALLOC_T(pool, pjmedia_frame_queue); - - pj_ansi_snprintf(fq->obj_name, sizeof(fq->obj_name), name, fq); - fq->obj_name[sizeof(fq->obj_name)-1] = '\0'; - - fq->max_count = max_count; - fq->frame_size = frame_size; - fq->samples_per_frame = samples_per_frame; - fq->count = 0; - - pj_list_init(&fq->frame_list); - pj_list_init(&fq->free_list); - - for (i=0; i<max_count; ++i) { - struct fq_frame *f; - - f = PJ_POOL_ZALLOC_T(pool, struct fq_frame); - f->buf = pj_pool_alloc(pool, frame_size); - - pj_list_push_back(&fq->free_list, f); - - } - - *p_fq = fq; - return PJ_SUCCESS; -} - -PJ_DEF(pj_status_t) pjmedia_frame_queue_init( pjmedia_frame_queue *fq, - int seq_delay, - int prefetch_count) -{ - if (prefetch_count > MAX_PREFETCH) - prefetch_count = MAX_PREFETCH; - - fq->seq_delay = seq_delay; - fq->prefetch_count = prefetch_count; - fq->count = 0; - pj_list_merge_first(&fq->free_list, &fq->frame_list); - - PJ_LOG(5,(fq->obj_name, "AEC reset, delay=%d, prefetch=%d", - fq->seq_delay, fq->prefetch_count)); - - return PJ_SUCCESS; -} - -PJ_DEF(pj_bool_t) pjmedia_frame_queue_empty( pjmedia_frame_queue *fq ) -{ - return pj_list_empty(&fq->frame_list); -} - -PJ_DEF(int) pjmedia_frame_queue_get_prefetch( pjmedia_frame_queue *fq ) -{ - return fq->prefetch_count; -} - -PJ_DEF(pj_status_t) pjmedia_frame_queue_put( pjmedia_frame_queue *fq, - const void *framebuf, - unsigned size, - pj_uint32_t timestamp ) -{ - struct fq_frame *f; - - TRACE_((fq->obj_name, "PUT seq=%d, count=%d", - timestamp / fq->samples_per_frame, fq->count)); - - if (pj_list_empty(&fq->free_list)) { - PJ_LOG(5,(fq->obj_name, - " AEC info: queue is full, frame discarded " - "[count=%d, seq=%d]", - fq->max_count, timestamp / fq->samples_per_frame)); - //pjmedia_frame_queue_init(fq, fq->seq_delay, fq->prefetch_count); - return PJ_ETOOMANY; - } - - PJ_ASSERT_RETURN(size <= fq->frame_size, PJ_ETOOBIG); - - f = fq->free_list.next; - pj_list_erase(f); - - pj_memcpy(f->buf, framebuf, size); - f->size = size; - f->seq = timestamp / fq->samples_per_frame; - - pj_list_push_back(&fq->frame_list, f); - ++fq->count; - - return PJ_SUCCESS; -} - -PJ_DEF(pj_status_t) pjmedia_frame_queue_get( pjmedia_frame_queue *fq, - pj_uint32_t get_timestamp, - void **framebuf, - unsigned *size ) -{ - pj_uint32_t frame_seq; - struct fq_frame *f; - - frame_seq = get_timestamp/fq->samples_per_frame + fq->seq_delay - - fq->prefetch_count; - - TRACE_((fq->obj_name, "GET seq=%d for seq=%d delay=%d, prefetch=%d", - get_timestamp/fq->samples_per_frame, frame_seq, fq->seq_delay, - fq->prefetch_count)); - - *size = 0; - - /* Remove old frames */ - for (;!pj_list_empty(&fq->frame_list);) { - f = fq->frame_list.next; - if (f->seq >= frame_seq) - break; - - PJ_LOG(5,(fq->obj_name, - " AEC Info: old frame removed (seq=%d, want=%d, count=%d)", - f->seq, frame_seq, fq->count)); - pj_list_erase(f); - --fq->count; - pj_list_push_back(&fq->free_list, f); - } - - if (pj_list_empty(&fq->frame_list)) { - PJ_LOG(5,(fq->obj_name, - " AEC Info: empty queue for seq=%d!", - frame_seq)); - return PJ_ENOTFOUND; - } - - f = fq->frame_list.next; - - if (f->seq > frame_seq) { - PJ_LOG(5,(fq->obj_name, - " AEC Info: prefetching (first seq=%d)", - f->seq)); - return -1; - } - - pj_list_erase(f); - --fq->count; - - *framebuf = (void*)f->buf; - *size = f->size; - - TRACE_((fq->obj_name, " returning frame with seq=%d, count=%d", - f->seq, fq->count)); - - pj_list_push_front(&fq->free_list, f); - return PJ_SUCCESS; -} - -enum -{ - TS_FLAG_PLAY = 1, - TS_FLAG_REC = 2, - TS_FLAG_OK = 3, -}; - typedef struct speex_ec { SpeexEchoState *state; @@ -243,14 +35,6 @@ typedef struct speex_ec unsigned prefetch; unsigned options; pj_int16_t *tmp_frame; - spx_int32_t *residue; - - pj_uint32_t play_ts, - rec_ts, - ts_flag; - - pjmedia_frame_queue *frame_queue; - pj_lock_t *lock; /* To protect buffers, if required */ } speex_ec; @@ -260,43 +44,33 @@ typedef struct speex_ec */ PJ_DEF(pj_status_t) speex_aec_create(pj_pool_t *pool, unsigned clock_rate, + unsigned channel_count, unsigned samples_per_frame, unsigned tail_ms, - unsigned latency_ms, unsigned options, void **p_echo ) { speex_ec *echo; int sampling_rate; - pj_status_t status; *p_echo = NULL; echo = PJ_POOL_ZALLOC_T(pool, speex_ec); PJ_ASSERT_RETURN(echo != NULL, PJ_ENOMEM); - if (options & PJMEDIA_ECHO_NO_LOCK) { - status = pj_lock_create_null_mutex(pool, "aec%p", &echo->lock); - if (status != PJ_SUCCESS) - return status; - } else { - status = pj_lock_create_simple_mutex(pool, "aec%p", &echo->lock); - if (status != PJ_SUCCESS) - return status; - } - echo->samples_per_frame = samples_per_frame; - echo->prefetch = (latency_ms * clock_rate / 1000) / samples_per_frame; - if (echo->prefetch < MIN_PREFETCH) - echo->prefetch = MIN_PREFETCH; - if (echo->prefetch > MAX_PREFETCH) - echo->prefetch = MAX_PREFETCH; echo->options = options; - echo->state = speex_echo_state_init(samples_per_frame, - clock_rate * tail_ms / 1000); +#if 0 + echo->state = speex_echo_state_init_mc(echo->samples_per_frame, + clock_rate * tail_ms / 1000, + channel_count, channel_count); +#else + PJ_ASSERT_RETURN(channel_count==1, PJ_EINVAL); + echo->state = speex_echo_state_init(echo->samples_per_frame, + clock_rate * tail_ms / 1000); +#endif if (echo->state == NULL) { - pj_lock_destroy(echo->lock); return PJ_ENOMEM; } @@ -305,11 +79,10 @@ PJ_DEF(pj_status_t) speex_aec_create(pj_pool_t *pool, speex_echo_ctl(echo->state, SPEEX_ECHO_SET_SAMPLING_RATE, &sampling_rate); - echo->preprocess = speex_preprocess_state_init(samples_per_frame, + echo->preprocess = speex_preprocess_state_init(echo->samples_per_frame, clock_rate); if (echo->preprocess == NULL) { speex_echo_state_destroy(echo->state); - pj_lock_destroy(echo->lock); return PJ_ENOMEM; } @@ -324,7 +97,7 @@ PJ_DEF(pj_status_t) speex_aec_create(pj_pool_t *pool, speex_preprocess_ctl(echo->preprocess, SPEEX_PREPROCESS_SET_VAD, &disabled); speex_preprocess_ctl(echo->preprocess, SPEEX_PREPROCESS_SET_DEREVERB, - &disabled); + &enabled); #endif /* Control echo cancellation in the preprocessor */ @@ -333,33 +106,11 @@ PJ_DEF(pj_status_t) speex_aec_create(pj_pool_t *pool, /* Create temporary frame for echo cancellation */ - echo->tmp_frame = (pj_int16_t*) pj_pool_zalloc(pool, 2 * samples_per_frame); + echo->tmp_frame = (pj_int16_t*) pj_pool_zalloc(pool, 2*samples_per_frame); PJ_ASSERT_RETURN(echo->tmp_frame != NULL, PJ_ENOMEM); - /* Create temporary frame to receive residue */ - echo->residue = (spx_int32_t*) - pj_pool_zalloc(pool, sizeof(spx_int32_t) * - (samples_per_frame+1)); - PJ_ASSERT_RETURN(echo->residue != NULL, PJ_ENOMEM); - - /* Create frame queue */ - status = pjmedia_frame_queue_create(pool, "aec%p", samples_per_frame*2, - samples_per_frame, BUF_COUNT, - &echo->frame_queue); - if (status != PJ_SUCCESS) { - speex_preprocess_state_destroy(echo->preprocess); - speex_echo_state_destroy(echo->state); - pj_lock_destroy(echo->lock); - return status; - } - /* Done */ *p_echo = echo; - - PJ_LOG(4,(THIS_FILE, "Speex Echo canceller/AEC created, clock_rate=%d, " - "samples per frame=%d, tail length=%d ms, " - "latency=%d ms", - clock_rate, samples_per_frame, tail_ms, latency_ms)); return PJ_SUCCESS; } @@ -374,9 +125,6 @@ PJ_DEF(pj_status_t) speex_aec_destroy(void *state ) PJ_ASSERT_RETURN(echo && echo->state, PJ_EINVAL); - if (echo->lock) - pj_lock_acquire(echo->lock); - if (echo->state) { speex_echo_state_destroy(echo->state); echo->state = NULL; @@ -387,137 +135,17 @@ PJ_DEF(pj_status_t) speex_aec_destroy(void *state ) echo->preprocess = NULL; } - if (echo->lock) { - pj_lock_destroy(echo->lock); - echo->lock = NULL; - } - return PJ_SUCCESS; } /* - * Let the AEC knows that a frame has been played to the speaker. + * Reset AEC */ -PJ_DEF(pj_status_t) speex_aec_playback(void *state, - pj_int16_t *play_frm ) +PJ_DEF(void) speex_aec_reset(void *state ) { speex_ec *echo = (speex_ec*) state; - - /* Sanity checks */ - PJ_ASSERT_RETURN(echo && play_frm, PJ_EINVAL); - - /* The AEC must be configured to support internal playback buffer */ - PJ_ASSERT_RETURN(echo->frame_queue!= NULL, PJ_EINVALIDOP); - - pj_lock_acquire(echo->lock); - - /* Inc timestamp */ - echo->play_ts += echo->samples_per_frame; - - /* Initialize frame delay. */ - if ((echo->ts_flag & TS_FLAG_PLAY) == 0) { - echo->ts_flag |= TS_FLAG_PLAY; - - if (echo->ts_flag == TS_FLAG_OK) { - int seq_delay; - - seq_delay = ((int)echo->play_ts - (int)echo->rec_ts) / - (int)echo->samples_per_frame; - pjmedia_frame_queue_init(echo->frame_queue, seq_delay, - echo->prefetch); - } - } - - if (pjmedia_frame_queue_put(echo->frame_queue, play_frm, - echo->samples_per_frame*2, - echo->play_ts) != PJ_SUCCESS) - { - int seq_delay; - - /* On full reset frame queue */ - seq_delay = ((int)echo->play_ts - (int)echo->rec_ts) / - (int)echo->samples_per_frame; - pjmedia_frame_queue_init(echo->frame_queue, seq_delay, - echo->prefetch); - - /* And re-put */ - pjmedia_frame_queue_put(echo->frame_queue, play_frm, - echo->samples_per_frame*2, - echo->play_ts); - } - - pj_lock_release(echo->lock); - - return PJ_SUCCESS; -} - - -/* - * Let the AEC knows that a frame has been captured from the microphone. - */ -PJ_DEF(pj_status_t) speex_aec_capture( void *state, - pj_int16_t *rec_frm, - unsigned options ) -{ - speex_ec *echo = (speex_ec*) state; - pj_status_t status = PJ_SUCCESS; - - /* Sanity checks */ - PJ_ASSERT_RETURN(echo && rec_frm, PJ_EINVAL); - - /* The AEC must be configured to support internal playback buffer */ - PJ_ASSERT_RETURN(echo->frame_queue!= NULL, PJ_EINVALIDOP); - - /* Lock mutex */ - pj_lock_acquire(echo->lock); - - /* Inc timestamp */ - echo->rec_ts += echo->samples_per_frame; - - /* Init frame delay. */ - if ((echo->ts_flag & TS_FLAG_REC) == 0) { - echo->ts_flag |= TS_FLAG_REC; - - if (echo->ts_flag == TS_FLAG_OK) { - int seq_delay; - - seq_delay = ((int)echo->play_ts - (int)echo->rec_ts) / - (int)echo->samples_per_frame; - pjmedia_frame_queue_init(echo->frame_queue, seq_delay, - echo->prefetch); - } - } - - /* Cancel echo */ - if (echo->ts_flag == TS_FLAG_OK) { - void *play_buf; - unsigned size = 0; - - if (pjmedia_frame_queue_empty(echo->frame_queue)) { - int seq_delay; - - seq_delay = ((int)echo->play_ts - (int)echo->rec_ts) / - (int)echo->samples_per_frame; - pjmedia_frame_queue_init(echo->frame_queue, seq_delay, - echo->prefetch); - status = -1; - - } else { - status = pjmedia_frame_queue_get(echo->frame_queue, echo->rec_ts, - &play_buf, &size); - if (size != 0) { - speex_aec_cancel_echo(echo, rec_frm, (pj_int16_t*)play_buf, - options, NULL); - } - } - - if (status != PJ_SUCCESS) - speex_echo_state_reset(echo->state); - } - - pj_lock_release(echo->lock); - return PJ_SUCCESS; + speex_echo_state_reset(echo->state); } diff --git a/pjmedia/src/pjmedia/echo_suppress.c b/pjmedia/src/pjmedia/echo_suppress.c index 8ef071ba..a86a058d 100644 --- a/pjmedia/src/pjmedia/echo_suppress.c +++ b/pjmedia/src/pjmedia/echo_suppress.c @@ -35,9 +35,7 @@ */ typedef struct echo_supp { - pj_bool_t suppressing; pjmedia_silence_det *sd; - pj_time_val last_signal; unsigned samples_per_frame; unsigned tail_ms; } echo_supp; @@ -49,9 +47,9 @@ typedef struct echo_supp */ PJ_DEF(pj_status_t) echo_supp_create( pj_pool_t *pool, unsigned clock_rate, + unsigned channel_count, unsigned samples_per_frame, unsigned tail_ms, - unsigned latency_ms, unsigned options, void **p_state ) { @@ -59,8 +57,8 @@ PJ_DEF(pj_status_t) echo_supp_create( pj_pool_t *pool, pj_status_t status; PJ_UNUSED_ARG(clock_rate); + PJ_UNUSED_ARG(channel_count); PJ_UNUSED_ARG(options); - PJ_UNUSED_ARG(latency_ms); ec = PJ_POOL_ZALLOC_T(pool, struct echo_supp); ec->samples_per_frame = samples_per_frame; @@ -91,68 +89,14 @@ PJ_DEF(pj_status_t) echo_supp_destroy(void *state) /* - * Let the AEC knows that a frame has been played to the speaker. + * Reset */ -PJ_DEF(pj_status_t) echo_supp_playback( void *state, - pj_int16_t *play_frm ) +PJ_DEF(void) echo_supp_reset(void *state) { - echo_supp *ec = (echo_supp*) state; - pj_bool_t silence; - pj_bool_t last_suppressing = ec->suppressing; - - silence = pjmedia_silence_det_detect(ec->sd, play_frm, - ec->samples_per_frame, NULL); - - ec->suppressing = !silence; - - if (ec->suppressing) { - pj_gettimeofday(&ec->last_signal); - } - - if (ec->suppressing!=0 && last_suppressing==0) { - PJ_LOG(5,(THIS_FILE, "Start suppressing..")); - } else if (ec->suppressing==0 && last_suppressing!=0) { - PJ_LOG(5,(THIS_FILE, "Stop suppressing..")); - } - - return PJ_SUCCESS; -} - - -/* - * Let the AEC knows that a frame has been captured from the microphone. - */ -PJ_DEF(pj_status_t) echo_supp_capture( void *state, - pj_int16_t *rec_frm, - unsigned options ) -{ - echo_supp *ec = (echo_supp*) state; - pj_time_val now; - unsigned delay_ms; - - PJ_UNUSED_ARG(options); - - pj_gettimeofday(&now); - - PJ_TIME_VAL_SUB(now, ec->last_signal); - delay_ms = PJ_TIME_VAL_MSEC(now); - - if (delay_ms < ec->tail_ms) { -#if defined(PJMEDIA_ECHO_SUPPRESS_FACTOR) && PJMEDIA_ECHO_SUPPRESS_FACTOR!=0 - unsigned i; - for (i=0; i<ec->samples_per_frame; ++i) { - rec_frm[i] = (pj_int16_t)(rec_frm[i] >> - PJMEDIA_ECHO_SUPPRESS_FACTOR); - } -#else - pjmedia_zero_samples(rec_frm, ec->samples_per_frame); -#endif - } - - return PJ_SUCCESS; + PJ_UNUSED_ARG(state); + return; } - /* * Perform echo cancellation. */ diff --git a/pjmedia/src/pjmedia/sound_port.c b/pjmedia/src/pjmedia/sound_port.c index e180ed2b..50f35d8e 100644 --- a/pjmedia/src/pjmedia/sound_port.c +++ b/pjmedia/src/pjmedia/sound_port.c @@ -544,12 +544,16 @@ PJ_DEF(pj_status_t) pjmedia_snd_port_set_ec( pjmedia_snd_port *snd_port, if (status != PJ_SUCCESS) si.rec_latency = si.play_latency = 0; - delay_ms = (si.rec_latency + si.play_latency) * 1000 / - snd_port->clock_rate; - status = pjmedia_echo_create(pool, snd_port->clock_rate, - snd_port->samples_per_frame, - tail_ms, delay_ms, - options, &snd_port->ec_state); + //No need to add input latency in the latency calculation, + //since actual input latency should be zero. + //delay_ms = (si.rec_latency + si.play_latency) * 1000 / + // snd_port->clock_rate; + delay_ms = si.play_latency * 1000 / snd_port->clock_rate; + status = pjmedia_echo_create2(pool, snd_port->clock_rate, + snd_port->channel_count, + snd_port->samples_per_frame, + tail_ms, delay_ms, + options, &snd_port->ec_state); if (status != PJ_SUCCESS) snd_port->ec_state = NULL; else diff --git a/pjsip-apps/src/samples/aectest.c b/pjsip-apps/src/samples/aectest.c index 14c14d60..14017749 100644 --- a/pjsip-apps/src/samples/aectest.c +++ b/pjsip-apps/src/samples/aectest.c @@ -32,10 +32,9 @@ #include <pjlib-util.h> /* pj_getopt */ #include <pjlib.h> -/* For logging purpose. */ -#define THIS_FILE "playfile.c" +#define THIS_FILE "aectest.c" #define PTIME 20 -#define TAIL_LENGTH 800 +#define TAIL_LENGTH 200 static const char *desc = " FILE \n" @@ -48,12 +47,22 @@ static const char *desc = " \n" " USAGE \n" " \n" -" aectest INPUT.WAV OUTPUT.WAV \n" +" aectest [options] <PLAY.WAV> <REC.WAV> <OUTPUT.WAV> \n" " \n" -" INPUT.WAV is the file to be played to the speaker. \n" -" OUTPUT.WAV is the output file containing recorded signal from the\n" -" microphone."; +" <PLAY.WAV> is the signal played to the speaker. \n" +" <REC.WAV> is the signal captured from the microphone. \n" +" <OUTPUT.WAV> is the output file to store the test result \n" +"\n" +" options:\n" +" -d The delay between playback and capture in ms. Default is zero.\n" +" -l Set the echo tail length in ms. Default is 200 ms \n" +" -a Algorithm: 0=default, 1=speex, 3=echo suppress \n"; +/* + * Sample session: + * + * -d 100 -a 1 ../bin/orig8.wav ../bin/echo8.wav ../bin/result8.wav + */ static void app_perror(const char *sender, const char *title, pj_status_t st) { @@ -72,21 +81,55 @@ int main(int argc, char *argv[]) pj_caching_pool cp; pjmedia_endpt *med_endpt; pj_pool_t *pool; - pjmedia_port *play_port; - pjmedia_port *rec_port; - pjmedia_port *bidir_port; - pjmedia_snd_port *snd; - char tmp[10]; + pjmedia_port *wav_play; + pjmedia_port *wav_rec; + pjmedia_port *wav_out; pj_status_t status; + pjmedia_echo_state *ec; + pjmedia_frame play_frame, rec_frame; + unsigned opt = 0; + unsigned latency_ms = 0; + unsigned tail_ms = TAIL_LENGTH; + pj_timestamp t0, t1; + int c; + + pj_optind = 0; + while ((c=pj_getopt(argc, argv, "d:l:a:")) !=-1) { + switch (c) { + case 'd': + latency_ms = atoi(pj_optarg); + break; + case 'l': + tail_ms = atoi(pj_optarg); + break; + case 'a': + { + int alg = atoi(pj_optarg); + switch (alg) { + case 0: + opt = 0; + case 1: + opt = PJMEDIA_ECHO_SPEEX; + break; + case 3: + opt = PJMEDIA_ECHO_SIMPLE; + break; + default: + puts("Invalid algorithm"); + puts(desc); + return 1; + } + } + break; + } + } - - if (argc != 3) { - puts("Error: arguments required"); + if (argc - pj_optind != 3) { + puts("Error: missing argument(s)"); puts(desc); return 1; } - /* Must init PJLIB first: */ status = pj_init(); PJ_ASSERT_RETURN(status == PJ_SUCCESS, 1); @@ -109,98 +152,98 @@ int main(int argc, char *argv[]) NULL /* callback on error */ ); - /* Create file media port from the WAV file */ - status = pjmedia_wav_player_port_create( pool, /* memory pool */ - argv[1], /* file to play */ - PTIME, /* ptime. */ - 0, /* flags */ - 0, /* default buffer */ - &play_port); + /* Open wav_play */ + status = pjmedia_wav_player_port_create(pool, argv[pj_optind], PTIME, + PJMEDIA_FILE_NO_LOOP, 0, + &wav_play); if (status != PJ_SUCCESS) { - app_perror(THIS_FILE, "Unable to open input WAV file", status); + app_perror(THIS_FILE, "Error opening playback WAV file", status); return 1; } - - if (play_port->info.channel_count != 1) { - puts("Error: input WAV must have 1 channel audio"); + + /* Open recorded wav */ + status = pjmedia_wav_player_port_create(pool, argv[pj_optind+1], PTIME, + PJMEDIA_FILE_NO_LOOP, 0, + &wav_rec); + if (status != PJ_SUCCESS) { + app_perror(THIS_FILE, "Error opening recorded WAV file", status); return 1; } - if (play_port->info.bits_per_sample != 16) { - puts("Error: input WAV must be encoded as 16bit PCM"); + + /* play and rec WAVs must have the same clock rate */ + if (wav_play->info.clock_rate != wav_rec->info.clock_rate) { + puts("Error: clock rate mismatch in the WAV files"); return 1; } -#ifdef PJ_DARWINOS - /* Need to force clock rate on MacOS */ - if (play_port->info.clock_rate != 44100) { - pjmedia_port *resample_port; - - status = pjmedia_resample_port_create(pool, play_port, 44100, 0, - &resample_port); - if (status != PJ_SUCCESS) { - app_perror(THIS_FILE, "Unable to create resampling port", status); - return 1; - } - - data.play_port = resample_port; + /* .. and channel count */ + if (wav_play->info.channel_count != wav_rec->info.channel_count) { + puts("Error: clock rate mismatch in the WAV files"); + return 1; } -#endif - - /* Create WAV output file port */ - status = pjmedia_wav_writer_port_create(pool, argv[2], - play_port->info.clock_rate, - play_port->info.channel_count, - play_port->info.samples_per_frame, - play_port->info.bits_per_sample, - 0, 0, &rec_port); + + /* Create output wav */ + status = pjmedia_wav_writer_port_create(pool, argv[pj_optind+2], + wav_play->info.clock_rate, + wav_play->info.channel_count, + wav_play->info.samples_per_frame, + wav_play->info.bits_per_sample, + 0, 0, &wav_out); if (status != PJ_SUCCESS) { - app_perror(THIS_FILE, "Unable to open output file", status); + app_perror(THIS_FILE, "Error opening output WAV file", status); return 1; } - /* Create bidirectional port from the WAV ports */ - pjmedia_bidirectional_port_create(pool, play_port, rec_port, &bidir_port); - - /* Create sound device. */ - status = pjmedia_snd_port_create(pool, -1, -1, - play_port->info.clock_rate, - play_port->info.channel_count, - play_port->info.samples_per_frame, - play_port->info.bits_per_sample, - 0, &snd); + /* Create echo canceller */ + status = pjmedia_echo_create2(pool, wav_play->info.clock_rate, + wav_play->info.channel_count, + wav_play->info.samples_per_frame, + tail_ms, latency_ms, + opt, &ec); if (status != PJ_SUCCESS) { - app_perror(THIS_FILE, "Unable to open sound device", status); + app_perror(THIS_FILE, "Error creating EC", status); return 1; } - /* Customize AEC */ - pjmedia_snd_port_set_ec(snd, pool, TAIL_LENGTH, 0); + /* Processing loop */ + play_frame.buf = pj_pool_alloc(pool, wav_play->info.samples_per_frame<<1); + rec_frame.buf = pj_pool_alloc(pool, wav_play->info.samples_per_frame<<1); + pj_get_timestamp(&t0); + for (;;) { + play_frame.size = wav_play->info.samples_per_frame << 1; + status = pjmedia_port_get_frame(wav_play, &play_frame); + if (status != PJ_SUCCESS) + break; - /* Connect sound to the port */ - pjmedia_snd_port_connect(snd, bidir_port); + status = pjmedia_echo_playback(ec, (short*)play_frame.buf); + rec_frame.size = wav_play->info.samples_per_frame << 1; + status = pjmedia_port_get_frame(wav_rec, &rec_frame); + if (status != PJ_SUCCESS) + break; - puts(""); - printf("Playing %s and recording to %s\n", argv[1], argv[2]); - puts("Press <ENTER> to quit"); + status = pjmedia_echo_capture(ec, (short*)rec_frame.buf, 0); - fgets(tmp, sizeof(tmp), stdin); + //status = pjmedia_echo_cancel(ec, (short*)rec_frame.buf, + // (short*)play_frame.buf, 0, NULL); - - /* Start deinitialization: */ - - /* Destroy sound device */ - status = pjmedia_snd_port_destroy( snd ); - PJ_ASSERT_RETURN(status == PJ_SUCCESS, 1); + pjmedia_port_put_frame(wav_out, &rec_frame); + } + pj_get_timestamp(&t1); + PJ_LOG(3,(THIS_FILE, "Completed in %u msec\n", pj_elapsed_msec(&t0, &t1))); /* Destroy file port(s) */ - status = pjmedia_port_destroy( play_port ); + status = pjmedia_port_destroy( wav_play ); + PJ_ASSERT_RETURN(status == PJ_SUCCESS, 1); + status = pjmedia_port_destroy( wav_rec ); PJ_ASSERT_RETURN(status == PJ_SUCCESS, 1); - status = pjmedia_port_destroy( rec_port ); + status = pjmedia_port_destroy( wav_out ); PJ_ASSERT_RETURN(status == PJ_SUCCESS, 1); + /* Destroy ec */ + pjmedia_echo_destroy(ec); /* Release application pool */ pj_pool_release( pool ); diff --git a/third_party/speex/libspeex/mdf.c b/third_party/speex/libspeex/mdf.c index 456ab847..1fbb4d60 100644 --- a/third_party/speex/libspeex/mdf.c +++ b/third_party/speex/libspeex/mdf.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2003-2008 Jean-Marc Valin +/* Copyright (C) 2003-2006 Jean-Marc Valin File: mdf.c Echo canceller based on the MDF algorithm (see below) @@ -88,12 +88,6 @@ #define WEIGHT_SHIFT 0 #endif -#ifdef FIXED_POINT -#define WORD2INT(x) ((x) < -32767 ? -32768 : ((x) > 32766 ? 32767 : (x))) -#else -#define WORD2INT(x) ((x) < -32767.5f ? -32768 : ((x) > 32766.5f ? 32767 : floor(.5+(x)))) -#endif - /* If enabled, the AEC will use a foreground filter and a background filter to be more robust to double-talk and difficult signals in general. The cost is an extra FFT and a matrix-vector multiply */ #define TWO_PATH @@ -137,8 +131,6 @@ struct SpeexEchoState_ { int adapted; int saturated; int screwed_up; - int C; /** Number of input channels (microphones) */ - int K; /** Number of output channels (loudspeakers) */ spx_int32_t sampling_rate; spx_word16_t spec_average; spx_word16_t beta0; @@ -179,10 +171,10 @@ struct SpeexEchoState_ { spx_word16_t *window; spx_word16_t *prop; void *fft_table; - spx_word16_t *memX, *memD, *memE; + spx_word16_t memX, memD, memE; spx_word16_t preemph; spx_word16_t notch_radius; - spx_mem_t *notch_mem; + spx_mem_t notch_mem[2]; /* NOTE: If you only use speex_echo_cancel() and want to save some memory, remove this */ spx_int16_t *play_buf; @@ -190,7 +182,7 @@ struct SpeexEchoState_ { int play_buf_started; }; -static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem, int stride) +static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem) { int i; spx_word16_t den2; @@ -202,7 +194,7 @@ static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, /*printf ("%d %d %d %d %d %d\n", num[0], num[1], num[2], den[0], den[1], den[2]);*/ for (i=0;i<len;i++) { - spx_word16_t vin = in[i*stride]; + spx_word16_t vin = in[i]; spx_word32_t vout = mem[0] + SHL32(EXTEND32(vin),15); #ifdef FIXED_POINT mem[0] = mem[1] + SHL32(SHL32(-EXTEND32(vin),15) + MULT16_32_Q15(radius,vout),1); @@ -242,18 +234,6 @@ static inline void power_spectrum(const spx_word16_t *X, spx_word32_t *ps, int N ps[j]=MULT16_16(X[i],X[i]); } -/** Compute power spectrum of a half-complex (packed) vector and accumulate */ -static inline void power_spectrum_accum(const spx_word16_t *X, spx_word32_t *ps, int N) -{ - int i, j; - ps[0]+=MULT16_16(X[0],X[0]); - for (i=1,j=1;i<N-1;i+=2,j++) - { - ps[j] += MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]); - } - ps[j]+=MULT16_16(X[i],X[i]); -} - /** Compute cross-power spectrum of a half-complex (packed) vectors and add to acc */ #ifdef FIXED_POINT static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M) @@ -350,17 +330,16 @@ static inline void weighted_spectral_mul_conj(const spx_float_t *w, const spx_fl prod[i] = FLOAT_MUL32(W,MULT16_16(X[i],Y[i])); } -static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, int P, spx_word16_t *prop) +static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, spx_word16_t *prop) { - int i, j, p; + int i, j; spx_word16_t max_sum = 1; spx_word32_t prop_sum = 1; for (i=0;i<M;i++) { spx_word32_t tmp = 1; - for (p=0;p<P;p++) - for (j=0;j<N;j++) - tmp += MULT16_16(EXTRACT16(SHR32(W[p*N*M + i*N+j],18)), EXTRACT16(SHR32(W[p*N*M + i*N+j],18))); + for (j=0;j<N;j++) + tmp += MULT16_16(EXTRACT16(SHR32(W[i*N+j],18)), EXTRACT16(SHR32(W[i*N+j],18))); #ifdef FIXED_POINT /* Just a security in case an overflow were to occur */ tmp = MIN32(ABS32(tmp), 536870912); @@ -399,20 +378,11 @@ static void dump_audio(const spx_int16_t *rec, const spx_int16_t *play, const sp #endif /** Creates a new echo canceller state */ -EXPORT SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) -{ - return speex_echo_state_init_mc(frame_size, filter_length, 1, 1); -} - -EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_length, int nb_mic, int nb_speakers) +SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) { - int i,N,M, C, K; + int i,N,M; SpeexEchoState *st = (SpeexEchoState *)speex_alloc(sizeof(SpeexEchoState)); - st->K = nb_speakers; - st->C = nb_mic; - C=st->C; - K=st->K; #ifdef DUMP_ECHO_CANCEL_DATA if (rFile || pFile || oFile) speex_fatal("Opening dump files twice"); @@ -443,23 +413,23 @@ EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_lengt st->fft_table = spx_fft_init(N); - st->e = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); - st->x = (spx_word16_t*)speex_alloc(K*N*sizeof(spx_word16_t)); - st->input = (spx_word16_t*)speex_alloc(C*st->frame_size*sizeof(spx_word16_t)); - st->y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); - st->last_y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); + st->e = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); + st->x = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); + st->input = (spx_word16_t*)speex_alloc(st->frame_size*sizeof(spx_word16_t)); + st->y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); + st->last_y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->Xf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->Yh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->Eh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); - st->X = (spx_word16_t*)speex_alloc(K*(M+1)*N*sizeof(spx_word16_t)); - st->Y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); - st->E = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); - st->W = (spx_word32_t*)speex_alloc(C*K*M*N*sizeof(spx_word32_t)); + st->X = (spx_word16_t*)speex_alloc((M+1)*N*sizeof(spx_word16_t)); + st->Y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); + st->E = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); + st->W = (spx_word32_t*)speex_alloc(M*N*sizeof(spx_word32_t)); #ifdef TWO_PATH - st->foreground = (spx_word16_t*)speex_alloc(M*N*C*K*sizeof(spx_word16_t)); + st->foreground = (spx_word16_t*)speex_alloc(M*N*sizeof(spx_word16_t)); #endif st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); st->power = (spx_word32_t*)speex_alloc((frame_size+1)*sizeof(spx_word32_t)); @@ -480,7 +450,7 @@ EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_lengt #endif for (i=0;i<=st->frame_size;i++) st->power_1[i] = FLOAT_ONE; - for (i=0;i<N*M*K*C;i++) + for (i=0;i<N*M;i++) st->W[i] = 0; { spx_word32_t sum = 0; @@ -495,13 +465,11 @@ EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_lengt } for (i=M-1;i>=0;i--) { - st->prop[i] = DIV32(MULT16_16(QCONST16(.8f,15), st->prop[i]),sum); + st->prop[i] = DIV32(MULT16_16(QCONST16(.8,15), st->prop[i]),sum); } } - st->memX = (spx_word16_t*)speex_alloc(K*sizeof(spx_word16_t)); - st->memD = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t)); - st->memE = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t)); + st->memX=st->memD=st->memE=0; st->preemph = QCONST16(.9,15); if (st->sampling_rate<12000) st->notch_radius = QCONST16(.9, 15); @@ -510,7 +478,7 @@ EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_lengt else st->notch_radius = QCONST16(.992, 15); - st->notch_mem = (spx_mem_t*)speex_alloc(2*C*sizeof(spx_mem_t)); + st->notch_mem[0] = st->notch_mem[1] = 0; st->adapted = 0; st->Pey = st->Pyy = FLOAT_ONE; @@ -519,7 +487,7 @@ EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_lengt st->Dvar1 = st->Dvar2 = FLOAT_ZERO; #endif - st->play_buf = (spx_int16_t*)speex_alloc(K*(PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t)); + st->play_buf = (spx_int16_t*)speex_alloc((PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t)); st->play_buf_pos = PLAYBACK_DELAY*st->frame_size; st->play_buf_started = 0; @@ -527,15 +495,13 @@ EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_lengt } /** Resets echo canceller state */ -EXPORT void speex_echo_state_reset(SpeexEchoState *st) +void speex_echo_state_reset(SpeexEchoState *st) { - int i, M, N, C, K; + int i, M, N; st->cancel_count=0; st->screwed_up = 0; N = st->window_size; M = st->M; - C=st->C; - K=st->K; for (i=0;i<N*M;i++) st->W[i] = 0; #ifdef TWO_PATH @@ -555,20 +521,13 @@ EXPORT void speex_echo_state_reset(SpeexEchoState *st) { st->last_y[i] = 0; } - for (i=0;i<N*C;i++) + for (i=0;i<N;i++) { st->E[i] = 0; - } - for (i=0;i<N*K;i++) - { st->x[i] = 0; } - for (i=0;i<2*C;i++) - st->notch_mem[i] = 0; - for (i=0;i<C;i++) - st->memD[i]=st->memE[i]=0; - for (i=0;i<K;i++) - st->memX[i]=0; + st->notch_mem[0] = st->notch_mem[1] = 0; + st->memX=st->memD=st->memE=0; st->saturated = 0; st->adapted = 0; @@ -586,7 +545,7 @@ EXPORT void speex_echo_state_reset(SpeexEchoState *st) } /** Destroys an echo canceller state */ -EXPORT void speex_echo_state_destroy(SpeexEchoState *st) +void speex_echo_state_destroy(SpeexEchoState *st) { spx_fft_destroy(st->fft_table); @@ -617,11 +576,6 @@ EXPORT void speex_echo_state_destroy(SpeexEchoState *st) #ifdef FIXED_POINT speex_free(st->wtmp2); #endif - speex_free(st->memX); - speex_free(st->memD); - speex_free(st->memE); - speex_free(st->notch_mem); - speex_free(st->play_buf); speex_free(st); @@ -633,7 +587,7 @@ EXPORT void speex_echo_state_destroy(SpeexEchoState *st) #endif } -EXPORT void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out) +void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out) { int i; /*speex_warning_int("capture with fill level ", st->play_buf_pos/st->frame_size);*/ @@ -656,7 +610,7 @@ EXPORT void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_i } } -EXPORT void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play) +void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play) { /*speex_warning_int("playback with fill level ", st->play_buf_pos/st->frame_size);*/ if (!st->play_buf_started) @@ -683,16 +637,16 @@ EXPORT void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play) } /** Performs echo cancellation on a frame (deprecated, last arg now ignored) */ -EXPORT void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout) +void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout) { speex_echo_cancellation(st, in, far_end, out); } /** Performs echo cancellation on a frame */ -EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out) +void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out) { - int i,j, chan, speak; - int N,M, C, K; + int i,j; + int N,M; spx_word32_t Syy,See,Sxx,Sdd, Sff; #ifdef TWO_PATH spx_word32_t Dbf; @@ -707,9 +661,6 @@ EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, c N = st->window_size; M = st->M; - C = st->C; - K = st->K; - st->cancel_count++; #ifdef FIXED_POINT ss=DIV32_16(11469,M); @@ -719,178 +670,137 @@ EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, c ss_1 = 1-ss; #endif - for (chan = 0; chan < C; chan++) + /* Apply a notch filter to make sure DC doesn't end up causing problems */ + filter_dc_notch16(in, st->notch_radius, st->input, st->frame_size, st->notch_mem); + /* Copy input data to buffer and apply pre-emphasis */ + for (i=0;i<st->frame_size;i++) { - /* Apply a notch filter to make sure DC doesn't end up causing problems */ - filter_dc_notch16(in+chan, st->notch_radius, st->input+chan*st->frame_size, st->frame_size, st->notch_mem+2*chan, C); - /* Copy input data to buffer and apply pre-emphasis */ - /* Copy input data to buffer */ - for (i=0;i<st->frame_size;i++) - { - spx_word32_t tmp32; - /* FIXME: This core has changed a bit, need to merge properly */ - tmp32 = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD[chan]))); + spx_word32_t tmp32; + tmp32 = SUB32(EXTEND32(far_end[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memX))); #ifdef FIXED_POINT - if (tmp32 > 32767) - { - tmp32 = 32767; - if (st->saturated == 0) - st->saturated = 1; - } - if (tmp32 < -32767) - { - tmp32 = -32767; - if (st->saturated == 0) - st->saturated = 1; - } -#endif - st->memD[chan] = st->input[chan*st->frame_size+i]; - st->input[chan*st->frame_size+i] = EXTRACT16(tmp32); + /* If saturation occurs here, we need to freeze adaptation for M+1 frames (not just one) */ + if (tmp32 > 32767) + { + tmp32 = 32767; + st->saturated = M+1; } - } - - for (speak = 0; speak < K; speak++) - { - for (i=0;i<st->frame_size;i++) + if (tmp32 < -32767) { - spx_word32_t tmp32; - st->x[speak*N+i] = st->x[speak*N+i+st->frame_size]; - tmp32 = SUB32(EXTEND32(far_end[i*K+speak]), EXTEND32(MULT16_16_P15(st->preemph, st->memX[speak]))); -#ifdef FIXED_POINT - /*FIXME: If saturation occurs here, we need to freeze adaptation for M frames (not just one) */ - if (tmp32 > 32767) - { - tmp32 = 32767; - st->saturated = M+1; - } - if (tmp32 < -32767) - { - tmp32 = -32767; - st->saturated = M+1; - } + tmp32 = -32767; + st->saturated = M+1; + } #endif - st->x[speak*N+i+st->frame_size] = EXTRACT16(tmp32); - st->memX[speak] = far_end[i*K+speak]; - } - } - - for (speak = 0; speak < K; speak++) - { - /* Shift memory: this could be optimized eventually*/ - for (j=M-1;j>=0;j--) + st->x[i+st->frame_size] = EXTRACT16(tmp32); + st->memX = far_end[i]; + + tmp32 = SUB32(EXTEND32(st->input[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD))); +#ifdef FIXED_POINT + if (tmp32 > 32767) { - for (i=0;i<N;i++) - st->X[(j+1)*N*K+speak*N+i] = st->X[j*N*K+speak*N+i]; + tmp32 = 32767; + if (st->saturated == 0) + st->saturated = 1; + } + if (tmp32 < -32767) + { + tmp32 = -32767; + if (st->saturated == 0) + st->saturated = 1; } - /* Convert x (echo input) to frequency domain */ - spx_fft(st->fft_table, st->x+speak*N, &st->X[speak*N]); +#endif + st->memD = st->input[i]; + st->input[i] = tmp32; } - - Sxx = 0; - for (speak = 0; speak < K; speak++) + + /* Shift memory: this could be optimized eventually*/ + for (j=M-1;j>=0;j--) { - Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size); - power_spectrum_accum(st->X+speak*N, st->Xf, N); + for (i=0;i<N;i++) + st->X[(j+1)*N+i] = st->X[j*N+i]; } + + /* Convert x (far end) to frequency domain */ + spx_fft(st->fft_table, st->x, &st->X[0]); + for (i=0;i<N;i++) + st->last_y[i] = st->x[i]; + Sxx = mdf_inner_prod(st->x+st->frame_size, st->x+st->frame_size, st->frame_size); + for (i=0;i<st->frame_size;i++) + st->x[i] = st->x[i+st->frame_size]; + /* From here on, the top part of x is used as scratch space */ - Sff = 0; - for (chan = 0; chan < C; chan++) - { #ifdef TWO_PATH - /* Compute foreground filter */ - spectral_mul_accum16(st->X, st->foreground+chan*N*K*M, st->Y+chan*N, N, M*K); - spx_ifft(st->fft_table, st->Y+chan*N, st->e+chan*N); - for (i=0;i<st->frame_size;i++) - st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->e[chan*N+i+st->frame_size]); - Sff += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size); + /* Compute foreground filter */ + spectral_mul_accum16(st->X, st->foreground, st->Y, N, M); + spx_ifft(st->fft_table, st->Y, st->e); + for (i=0;i<st->frame_size;i++) + st->e[i] = SUB16(st->input[i], st->e[i+st->frame_size]); + Sff = mdf_inner_prod(st->e, st->e, st->frame_size); #endif - } /* Adjust proportional adaption rate */ - /* FIXME: Adjust that for C, K*/ - if (st->adapted) - mdf_adjust_prop (st->W, N, M, C*K, st->prop); + mdf_adjust_prop (st->W, N, M, st->prop); /* Compute weight gradient */ if (st->saturated == 0) { - for (chan = 0; chan < C; chan++) + for (j=M-1;j>=0;j--) { - for (speak = 0; speak < K; speak++) - { - for (j=M-1;j>=0;j--) - { - weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N*K+speak*N], st->E+chan*N, st->PHI, N); - for (i=0;i<N;i++) - st->W[chan*N*K*M + j*N*K + speak*N + i] += st->PHI[i]; - } - } + weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N], st->E, st->PHI, N); + for (i=0;i<N;i++) + st->W[j*N+i] = ADD32(st->W[j*N+i], st->PHI[i]); + } } else { st->saturated--; } - /* FIXME: MC conversion required */ /* Update weight to prevent circular convolution (MDF / AUMDF) */ - for (chan = 0; chan < C; chan++) + for (j=0;j<M;j++) { - for (speak = 0; speak < K; speak++) + /* This is a variant of the Alternatively Updated MDF (AUMDF) */ + /* Remove the "if" to make this an MDF filter */ + if (j==0 || st->cancel_count%(M-1) == j-1) { - for (j=0;j<M;j++) - { - /* This is a variant of the Alternatively Updated MDF (AUMDF) */ - /* Remove the "if" to make this an MDF filter */ - if (j==0 || st->cancel_count%(M-1) == j-1) - { #ifdef FIXED_POINT - for (i=0;i<N;i++) - st->wtmp2[i] = EXTRACT16(PSHR32(st->W[chan*N*K*M + j*N*K + speak*N + i],NORMALIZE_SCALEDOWN+16)); - spx_ifft(st->fft_table, st->wtmp2, st->wtmp); - for (i=0;i<st->frame_size;i++) - { - st->wtmp[i]=0; - } - for (i=st->frame_size;i<N;i++) - { - st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP); - } - spx_fft(st->fft_table, st->wtmp, st->wtmp2); - /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */ - for (i=0;i<N;i++) - st->W[chan*N*K*M + j*N*K + speak*N + i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1); + for (i=0;i<N;i++) + st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],NORMALIZE_SCALEDOWN+16)); + spx_ifft(st->fft_table, st->wtmp2, st->wtmp); + for (i=0;i<st->frame_size;i++) + { + st->wtmp[i]=0; + } + for (i=st->frame_size;i<N;i++) + { + st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP); + } + spx_fft(st->fft_table, st->wtmp, st->wtmp2); + /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */ + for (i=0;i<N;i++) + st->W[j*N+i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1); #else - spx_ifft(st->fft_table, &st->W[chan*N*K*M + j*N*K + speak*N], st->wtmp); - for (i=st->frame_size;i<N;i++) - { - st->wtmp[i]=0; - } - spx_fft(st->fft_table, st->wtmp, &st->W[chan*N*K*M + j*N*K + speak*N]); -#endif - } + spx_ifft(st->fft_table, &st->W[j*N], st->wtmp); + for (i=st->frame_size;i<N;i++) + { + st->wtmp[i]=0; } + spx_fft(st->fft_table, st->wtmp, &st->W[j*N]); +#endif } } - - /* So we can use power_spectrum_accum */ - for (i=0;i<=st->frame_size;i++) - st->Rf[i] = st->Yf[i] = st->Xf[i] = 0; - - Dbf = 0; - See = 0; + + /* Compute filter response Y */ + spectral_mul_accum(st->X, st->W, st->Y, N, M); + spx_ifft(st->fft_table, st->Y, st->y); + #ifdef TWO_PATH /* Difference in response, this is used to estimate the variance of our residual power estimate */ - for (chan = 0; chan < C; chan++) - { - spectral_mul_accum(st->X, st->W+chan*N*K*M, st->Y+chan*N, N, M*K); - spx_ifft(st->fft_table, st->Y+chan*N, st->y+chan*N); - for (i=0;i<st->frame_size;i++) - st->e[chan*N+i] = SUB16(st->e[chan*N+i+st->frame_size], st->y[chan*N+i+st->frame_size]); - Dbf += 10+mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size); - for (i=0;i<st->frame_size;i++) - st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]); - See += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size); - } + for (i=0;i<st->frame_size;i++) + st->e[i] = SUB16(st->e[i+st->frame_size], st->y[i+st->frame_size]); + Dbf = 10+mdf_inner_prod(st->e, st->e, st->frame_size); #endif + for (i=0;i<st->frame_size;i++) + st->e[i] = SUB16(st->input[i], st->y[i+st->frame_size]); + See = mdf_inner_prod(st->e, st->e, st->frame_size); #ifndef TWO_PATH Sff = See; #endif @@ -927,12 +837,11 @@ EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, c st->Davg1 = st->Davg2 = 0; st->Dvar1 = st->Dvar2 = FLOAT_ZERO; /* Copy background filter to foreground filter */ - for (i=0;i<N*M*C*K;i++) + for (i=0;i<N*M;i++) st->foreground[i] = EXTRACT16(PSHR32(st->W[i],16)); /* Apply a smooth transition so as to not introduce blocking artifacts */ - for (chan = 0; chan < C; chan++) - for (i=0;i<st->frame_size;i++) - st->e[chan*N+i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[chan*N+i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[chan*N+i+st->frame_size]); + for (i=0;i<st->frame_size;i++) + st->e[i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[i+st->frame_size]); } else { int reset_background=0; /* Otherwise, check if the background filter is significantly worse */ @@ -945,16 +854,13 @@ EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, c if (reset_background) { /* Copy foreground filter to background filter */ - for (i=0;i<N*M*C*K;i++) + for (i=0;i<N*M;i++) st->W[i] = SHL32(EXTEND32(st->foreground[i]),16); /* We also need to copy the output so as to get correct adaptation */ - for (chan = 0; chan < C; chan++) - { - for (i=0;i<st->frame_size;i++) - st->y[chan*N+i+st->frame_size] = st->e[chan*N+i+st->frame_size]; - for (i=0;i<st->frame_size;i++) - st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]); - } + for (i=0;i<st->frame_size;i++) + st->y[i+st->frame_size] = st->e[i+st->frame_size]; + for (i=0;i<st->frame_size;i++) + st->e[i] = SUB16(st->input[i], st->y[i+st->frame_size]); See = Sff; st->Davg1 = st->Davg2 = 0; st->Dvar1 = st->Dvar2 = FLOAT_ZERO; @@ -962,57 +868,47 @@ EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, c } #endif - Sey = Syy = Sdd = 0; - for (chan = 0; chan < C; chan++) - { - /* Compute error signal (for the output with de-emphasis) */ - for (i=0;i<st->frame_size;i++) - { - spx_word32_t tmp_out; + /* Compute error signal (for the output with de-emphasis) */ + for (i=0;i<st->frame_size;i++) + { + spx_word32_t tmp_out; #ifdef TWO_PATH - tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->e[chan*N+i+st->frame_size])); + tmp_out = SUB32(EXTEND32(st->input[i]), EXTEND32(st->e[i+st->frame_size])); #else - tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->y[chan*N+i+st->frame_size])); + tmp_out = SUB32(EXTEND32(st->input[i]), EXTEND32(st->y[i+st->frame_size])); #endif - tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE[chan]))); + /* Saturation */ + if (tmp_out>32767) + tmp_out = 32767; + else if (tmp_out<-32768) + tmp_out = -32768; + tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE))); /* This is an arbitrary test for saturation in the microphone signal */ - if (in[i*C+chan] <= -32000 || in[i*C+chan] >= 32000) - { + if (in[i] <= -32000 || in[i] >= 32000) + { + tmp_out = 0; if (st->saturated == 0) st->saturated = 1; - } - out[i*C+chan] = WORD2INT(tmp_out); - st->memE[chan] = tmp_out; } - + out[i] = (spx_int16_t)tmp_out; + st->memE = tmp_out; + } + #ifdef DUMP_ECHO_CANCEL_DATA - dump_audio(in, far_end, out, st->frame_size); + dump_audio(in, far_end, out, st->frame_size); #endif - /* Compute error signal (filter update version) */ - for (i=0;i<st->frame_size;i++) - { - st->e[chan*N+i+st->frame_size] = st->e[chan*N+i]; - st->e[chan*N+i] = 0; - } - - /* Compute a bunch of correlations */ - /* FIXME: bad merge */ - Sey += mdf_inner_prod(st->e+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size); - Syy += mdf_inner_prod(st->y+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size); - Sdd += mdf_inner_prod(st->input+chan*st->frame_size, st->input+chan*st->frame_size, st->frame_size); - - /* Convert error to frequency domain */ - spx_fft(st->fft_table, st->e+chan*N, st->E+chan*N); - for (i=0;i<st->frame_size;i++) - st->y[i+chan*N] = 0; - spx_fft(st->fft_table, st->y+chan*N, st->Y+chan*N); - - /* Compute power spectrum of echo (X), error (E) and filter response (Y) */ - power_spectrum_accum(st->E+chan*N, st->Rf, N); - power_spectrum_accum(st->Y+chan*N, st->Yf, N); - + /* Compute error signal (filter update version) */ + for (i=0;i<st->frame_size;i++) + { + st->e[i+st->frame_size] = st->e[i]; + st->e[i] = 0; } + + /* Compute a bunch of correlations */ + Sey = mdf_inner_prod(st->e+st->frame_size, st->y+st->frame_size, st->frame_size); + Syy = mdf_inner_prod(st->y+st->frame_size, st->y+st->frame_size, st->frame_size); + Sdd = mdf_inner_prod(st->input, st->input, st->frame_size); /*printf ("%f %f %f %f\n", Sff, See, Syy, Sdd, st->update_cond);*/ @@ -1025,7 +921,7 @@ EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, c { /* Things have gone really bad */ st->screwed_up += 50; - for (i=0;i<st->frame_size*C;i++) + for (i=0;i<st->frame_size;i++) out[i] = 0; } else if (SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6))) { @@ -1044,17 +940,36 @@ EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, c /* Add a small noise floor to make sure not to have problems when dividing */ See = MAX32(See, SHR32(MULT16_16(N, 100),6)); - - for (speak = 0; speak < K; speak++) - { - Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size); - power_spectrum_accum(st->X+speak*N, st->Xf, N); - } + /* Convert error to frequency domain */ + spx_fft(st->fft_table, st->e, st->E); + for (i=0;i<st->frame_size;i++) + st->y[i] = 0; + spx_fft(st->fft_table, st->y, st->Y); + + /* Compute power spectrum of far end (X), error (E) and filter response (Y) */ + power_spectrum(st->E, st->Rf, N); + power_spectrum(st->Y, st->Yf, N); + power_spectrum(st->X, st->Xf, N); /* Smooth far end energy estimate over time */ for (j=0;j<=st->frame_size;j++) st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]); + + /* Enable this to compute the power based only on the tail (would need to compute more + efficiently to make this really useful */ + if (0) + { + float scale2 = .5f/M; + for (j=0;j<=st->frame_size;j++) + st->power[j] = 100; + for (i=0;i<M;i++) + { + power_spectrum(&st->X[i*N], st->Xf, N); + for (j=0;j<=st->frame_size;j++) + st->power[j] += scale2*st->Xf[j]; + } + } /* Compute filtered spectra and (cross-)correlations */ for (j=st->frame_size;j>=0;j--) @@ -1176,13 +1091,13 @@ EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, c st->sum_adapt = ADD32(st->sum_adapt,adapt_rate); } - /* FIXME: MC conversion required */ - for (i=0;i<st->frame_size;i++) - st->last_y[i] = st->last_y[st->frame_size+i]; + /* Save residual echo so it can be used by the nonlinear processor */ if (st->adapted) { /* If the filter is adapted, take the filtered echo */ for (i=0;i<st->frame_size;i++) + st->last_y[i] = st->last_y[st->frame_size+i]; + for (i=0;i<st->frame_size;i++) st->last_y[st->frame_size+i] = in[i]-out[i]; } else { /* If filter isn't adapted yet, all we can do is take the far end signal directly */ @@ -1226,7 +1141,7 @@ void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *residual_echo, in } -EXPORT int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr) +int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr) { switch(request) { @@ -1254,29 +1169,6 @@ EXPORT int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr) case SPEEX_ECHO_GET_SAMPLING_RATE: (*(int*)ptr) = st->sampling_rate; break; - case SPEEX_ECHO_GET_IMPULSE_RESPONSE_SIZE: - /*FIXME: Implement this for multiple channels */ - *((spx_int32_t *)ptr) = st->M * st->frame_size; - break; - case SPEEX_ECHO_GET_IMPULSE_RESPONSE: - { - int M = st->M, N = st->window_size, n = st->frame_size, i, j; - spx_int32_t *filt = (spx_int32_t *) ptr; - for(j=0;j<M;j++) - { - /*FIXME: Implement this for multiple channels */ -#ifdef FIXED_POINT - for (i=0;i<N;i++) - st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],16+NORMALIZE_SCALEDOWN)); - spx_ifft(st->fft_table, st->wtmp2, st->wtmp); -#else - spx_ifft(st->fft_table, &st->W[j*N], st->wtmp); -#endif - for(i=0;i<n;i++) - filt[j*n+i] = PSHR32(MULT16_16(32767,st->wtmp[i]), WEIGHT_SHIFT-NORMALIZE_SCALEDOWN); - } - } - break; default: speex_warning_int("Unknown speex_echo_ctl request: ", request); return -1; |