diff options
-rw-r--r-- | pjmedia/include/pjmedia/config.h | 29 | ||||
-rw-r--r-- | pjmedia/src/pjmedia-codec/gsm.c | 22 | ||||
-rw-r--r-- | pjmedia/src/pjmedia-codec/ilbc.c | 35 | ||||
-rw-r--r-- | pjmedia/src/pjmedia/g711.c | 34 | ||||
-rw-r--r-- | pjmedia/src/pjmedia/silencedet.c | 13 | ||||
-rw-r--r-- | pjmedia/src/pjmedia/stream.c | 38 |
6 files changed, 134 insertions, 37 deletions
diff --git a/pjmedia/include/pjmedia/config.h b/pjmedia/include/pjmedia/config.h index ea84baa5..5ecc04bf 100644 --- a/pjmedia/include/pjmedia/config.h +++ b/pjmedia/include/pjmedia/config.h @@ -174,11 +174,16 @@ /** * Specify how long (in miliseconds) the stream should suspend the * silence detector/voice activity detector (VAD) during the initial - * period of the session. + * period of the session. This feature is useful to open bindings in + * all NAT routers between local and remote endpoint since most NATs + * do not allow incoming packet to get in before local endpoint sends + * outgoing packets. * * Specify zero to disable this feature. * - * Default: 600 msec + * Default: 600 msec (which gives good probability that some RTP + * packets will reach the destination, but without + * filling up the jitter buffer on the remote end). */ #ifndef PJMEDIA_STREAM_VAD_SUSPEND_MSEC # define PJMEDIA_STREAM_VAD_SUSPEND_MSEC 600 @@ -186,6 +191,26 @@ /** + * Specify the maximum duration of silence period in the codec. + * This is useful for example to keep NAT binding open in the firewall + * and to prevent server from disconnecting the call because no + * RTP packet is received. + * + * This only applies to codecs that use PJMEDIA's VAD (pretty much + * everything including iLBC, except Speex, which has its own DTX + * mechanism). + * + * Use (-1) to disable this feature. + * + * Default: 8000 (one second on 8KHz). + * + */ +#ifndef PJMEDIA_CODEC_MAX_SILENCE_PERIOD +# define PJMEDIA_CODEC_MAX_SILENCE_PERIOD 8000 +#endif + + +/** * Suggested or default threshold to be set for fixed silence detection * or as starting threshold for adaptive silence detection. The threshold * has the range from zero to 255. diff --git a/pjmedia/src/pjmedia-codec/gsm.c b/pjmedia/src/pjmedia-codec/gsm.c index f774a5e5..8118bcba 100644 --- a/pjmedia/src/pjmedia-codec/gsm.c +++ b/pjmedia/src/pjmedia-codec/gsm.c @@ -119,6 +119,7 @@ struct gsm_data pjmedia_plc *plc; pj_bool_t vad_enabled; pjmedia_silence_det *vad; + pj_timestamp last_tx; }; @@ -365,6 +366,9 @@ static pj_status_t gsm_dealloc_codec( pjmedia_codec_factory *factory, pjmedia_plc_save(gsm_data->plc, frame); } + /* Re-init silence_period */ + pj_set_timestamp32(&gsm_data->last_tx, 0, 0); + /* Put in the free list. */ pj_mutex_lock(gsm_codec_factory.mutex); pj_list_push_front(&gsm_codec_factory.codec_list, codec); @@ -497,23 +501,31 @@ static pj_status_t gsm_codec_encode( pjmedia_codec *codec, if (output_buf_len < 33) return PJMEDIA_CODEC_EFRMTOOSHORT; - if (input->size < 320) - return PJMEDIA_CODEC_EPCMTOOSHORT; + PJ_ASSERT_RETURN(input->size==320, PJMEDIA_CODEC_EPCMFRMINLEN); /* Detect silence */ if (gsm_data->vad_enabled) { pj_bool_t is_silence; + pj_int32_t silence_duration; + + silence_duration = pj_timestamp_diff32(&gsm_data->last_tx, + &input->timestamp); is_silence = pjmedia_silence_det_detect(gsm_data->vad, input->buf, - input->size / 2, + (input->size >> 1), NULL); - if (is_silence) { + if (is_silence && + PJMEDIA_CODEC_MAX_SILENCE_PERIOD != -1 && + silence_duration < PJMEDIA_CODEC_MAX_SILENCE_PERIOD) + { output->type = PJMEDIA_FRAME_TYPE_NONE; output->buf = NULL; output->size = 0; - output->timestamp.u64 = input->timestamp.u64; + output->timestamp = input->timestamp; return PJ_SUCCESS; + } else { + gsm_data->last_tx = input->timestamp; } } diff --git a/pjmedia/src/pjmedia-codec/ilbc.c b/pjmedia/src/pjmedia-codec/ilbc.c index a21b6889..52cf9406 100644 --- a/pjmedia/src/pjmedia-codec/ilbc.c +++ b/pjmedia/src/pjmedia-codec/ilbc.c @@ -128,6 +128,7 @@ struct ilbc_codec pjmedia_silence_det *vad; pj_bool_t vad_enabled; pj_bool_t plc_enabled; + pj_timestamp last_tx; pj_bool_t enc_ready; iLBC_Enc_Inst_t enc; @@ -426,6 +427,11 @@ static pj_status_t ilbc_codec_open(pjmedia_codec *codec, if (status != PJ_SUCCESS) return status; + /* Init last_tx (not necessary because of zalloc, but better + * be safe in case someone remove zalloc later. + */ + pj_set_timestamp32(&ilbc_codec->last_tx, 0, 0); + PJ_LOG(5,(ilbc_codec->obj_name, "iLBC codec opened, encoder mode=%d, decoder mode=%d", attr->setting.enc_fmtp_mode, attr->setting.dec_fmtp_mode)); @@ -512,23 +518,32 @@ static pj_status_t ilbc_codec_encode(pjmedia_codec *codec, if (output_buf_len < ilbc_codec->enc_frame_size) return PJMEDIA_CODEC_EFRMTOOSHORT; - if (input->size != ilbc_codec->enc_samples_per_frame * 2) + if (input->size != (ilbc_codec->enc_samples_per_frame << 1)) return PJMEDIA_CODEC_EPCMFRMINLEN; /* Detect silence */ if (ilbc_codec->vad_enabled) { pj_bool_t is_silence; + pj_int32_t silence_period; + + silence_period = pj_timestamp_diff32(&ilbc_codec->last_tx, + &input->timestamp); is_silence = pjmedia_silence_det_detect(ilbc_codec->vad, input->buf, - input->size / 2, + (input->size >> 1), NULL); - if (is_silence) { + if (is_silence && + PJMEDIA_CODEC_MAX_SILENCE_PERIOD != -1 && + silence_period < PJMEDIA_CODEC_MAX_SILENCE_PERIOD) + { output->type = PJMEDIA_FRAME_TYPE_NONE; output->buf = NULL; output->size = 0; - output->timestamp.u64 = input->timestamp.u64; + output->timestamp = input->timestamp; return PJ_SUCCESS; + } else { + ilbc_codec->last_tx = input->timestamp; } } @@ -544,7 +559,7 @@ static pj_status_t ilbc_codec_encode(pjmedia_codec *codec, output->type = PJMEDIA_FRAME_TYPE_AUDIO; output->size = ilbc_codec->enc.no_of_bytes; - output->timestamp.u64 = input->timestamp.u64; + output->timestamp = input->timestamp; return PJ_SUCCESS; } @@ -563,7 +578,7 @@ static pj_status_t ilbc_codec_decode(pjmedia_codec *codec, pj_assert(ilbc_codec != NULL); PJ_ASSERT_RETURN(input && output, PJ_EINVAL); - if (output_buf_len < ilbc_codec->dec_samples_per_frame*2) + if (output_buf_len < (ilbc_codec->dec_samples_per_frame << 1)) return PJMEDIA_CODEC_EPCMTOOSHORT; if (input->size != ilbc_codec->dec_frame_size) @@ -577,9 +592,9 @@ static pj_status_t ilbc_codec_decode(pjmedia_codec *codec, for (i=0; i<ilbc_codec->dec_samples_per_frame; ++i) { ((short*)output->buf)[i] = (short)ilbc_codec->dec_block[i]; } - output->size = ilbc_codec->dec_samples_per_frame * 2; + output->size = (ilbc_codec->dec_samples_per_frame << 1); output->type = PJMEDIA_FRAME_TYPE_AUDIO; - output->timestamp.u64 = input->timestamp.u64; + output->timestamp = input->timestamp; return PJ_SUCCESS; } @@ -598,7 +613,7 @@ static pj_status_t ilbc_codec_recover(pjmedia_codec *codec, pj_assert(ilbc_codec != NULL); PJ_ASSERT_RETURN(output, PJ_EINVAL); - if (output_buf_len < ilbc_codec->dec_samples_per_frame*2) + if (output_buf_len < (ilbc_codec->dec_samples_per_frame << 1)) return PJMEDIA_CODEC_EPCMTOOSHORT; /* Decode to temporary buffer */ @@ -608,7 +623,7 @@ static pj_status_t ilbc_codec_recover(pjmedia_codec *codec, for (i=0; i<ilbc_codec->dec_samples_per_frame; ++i) { ((short*)output->buf)[i] = (short)ilbc_codec->dec_block[i]; } - output->size = ilbc_codec->dec_samples_per_frame * 2; + output->size = (ilbc_codec->dec_samples_per_frame << 1); output->type = PJMEDIA_FRAME_TYPE_AUDIO; return PJ_SUCCESS; diff --git a/pjmedia/src/pjmedia/g711.c b/pjmedia/src/pjmedia/g711.c index 99b4ec3d..afa469c1 100644 --- a/pjmedia/src/pjmedia/g711.c +++ b/pjmedia/src/pjmedia/g711.c @@ -125,6 +125,7 @@ struct g711_private pjmedia_plc *plc; pj_bool_t vad_enabled; pjmedia_silence_det *vad; + pj_timestamp last_tx; }; @@ -465,37 +466,48 @@ static pj_status_t g711_encode(pjmedia_codec *codec, struct g711_private *priv = codec->codec_data; /* Check output buffer length */ - if (output_buf_len < input->size / 2) + if (output_buf_len < (input->size >> 1)) return PJMEDIA_CODEC_EFRMTOOSHORT; /* Detect silence if VAD is enabled */ if (priv->vad_enabled) { pj_bool_t is_silence; + pj_int32_t silence_period; + + silence_period = pj_timestamp_diff32(&priv->last_tx, + &input->timestamp); is_silence = pjmedia_silence_det_detect(priv->vad, input->buf, - input->size / 2, NULL); - if (is_silence) { + (input->size >> 1), NULL); + if (is_silence && + PJMEDIA_CODEC_MAX_SILENCE_PERIOD != -1 && + silence_period < PJMEDIA_CODEC_MAX_SILENCE_PERIOD) + { output->type = PJMEDIA_FRAME_TYPE_NONE; output->buf = NULL; output->size = 0; - output->timestamp.u64 = input->timestamp.u64; + output->timestamp = input->timestamp; return PJ_SUCCESS; + } else { + priv->last_tx = input->timestamp; } } /* Encode */ if (priv->pt == PJMEDIA_RTP_PT_PCMA) { - unsigned i; + unsigned i, n; pj_uint8_t *dst = output->buf; - for (i=0; i!=input->size/2; ++i, ++dst) { + n = (input->size >> 1); + for (i=0; i!=n; ++i, ++dst) { *dst = pjmedia_linear2alaw(samples[i]); } } else if (priv->pt == PJMEDIA_RTP_PT_PCMU) { - unsigned i; + unsigned i, n; pj_uint8_t *dst = output->buf; - for (i=0; i!=input->size/2; ++i, ++dst) { + n = (input->size >> 1); + for (i=0; i!=n; ++i, ++dst) { *dst = pjmedia_linear2ulaw(samples[i]); } @@ -504,7 +516,7 @@ static pj_status_t g711_encode(pjmedia_codec *codec, } output->type = PJMEDIA_FRAME_TYPE_AUDIO; - output->size = input->size / 2; + output->size = (input->size >> 1); return PJ_SUCCESS; } @@ -517,7 +529,7 @@ static pj_status_t g711_decode(pjmedia_codec *codec, struct g711_private *priv = codec->codec_data; /* Check output buffer length */ - PJ_ASSERT_RETURN(output_buf_len >= input->size * 2, + PJ_ASSERT_RETURN(output_buf_len >= (input->size << 1), PJMEDIA_CODEC_EPCMTOOSHORT); /* Input buffer MUST have exactly 80 bytes long */ @@ -547,7 +559,7 @@ static pj_status_t g711_decode(pjmedia_codec *codec, } output->type = PJMEDIA_FRAME_TYPE_AUDIO; - output->size = input->size * 2; + output->size = (input->size << 1); if (priv->plc_enabled) pjmedia_plc_save( priv->plc, output->buf); diff --git a/pjmedia/src/pjmedia/silencedet.c b/pjmedia/src/pjmedia/silencedet.c index 6d6db1bf..e72dfd08 100644 --- a/pjmedia/src/pjmedia/silencedet.c +++ b/pjmedia/src/pjmedia/silencedet.c @@ -142,7 +142,7 @@ PJ_DEF(pj_status_t) pjmedia_silence_det_set_params( pjmedia_silence_det *sd, if (min_signal < 0) min_signal = sd->ptime; if (recalc_time < 0) - recalc_time = 5000; + recalc_time = 2000; sd->min_signal_cnt = min_signal / sd->ptime; sd->min_silence_cnt = min_silence / sd->ptime; @@ -256,10 +256,10 @@ PJ_DEF(pj_bool_t) pjmedia_silence_det_apply( pjmedia_silence_det *sd, /* Adjust according to signal/silence proportions. */ if (pct_signal > 95) { - new_threshold += (sd->weakest_signal - sd->cur_threshold)/4; + new_threshold += (sd->weakest_signal+1 - sd->cur_threshold)/2; } else if (pct_signal < 5) { new_threshold = (sd->cur_threshold+sd->loudest_silence)/2+1; - } else if (pct_signal > 90) { + } else if (pct_signal > 80) { new_threshold++; } else if (pct_signal < 10) { new_threshold--; @@ -268,9 +268,12 @@ PJ_DEF(pj_bool_t) pjmedia_silence_det_apply( pjmedia_silence_det *sd, } if (updated && sd->cur_threshold != new_threshold) { + PJ_LOG(5,(sd->objname, + "Vad cur_threshold updated %d-->%d. " + "Signal lo=%d", + sd->cur_threshold, new_threshold, + sd->weakest_signal)); sd->cur_threshold = new_threshold; - PJ_LOG(5,(sd->objname, "Vad cur_threshold updated to %d", - sd->cur_threshold)); } } diff --git a/pjmedia/src/pjmedia/stream.c b/pjmedia/src/pjmedia/stream.c index 748c222f..de6ca8e7 100644 --- a/pjmedia/src/pjmedia/stream.c +++ b/pjmedia/src/pjmedia/stream.c @@ -561,13 +561,21 @@ static pj_status_t put_frame_imp( pjmedia_port *port, &rtphdrlen); } else if (frame->type != PJMEDIA_FRAME_TYPE_NONE) { - unsigned ts; + unsigned ts, codec_samples_per_frame; /* Repeatedly call encode if there are multiple frames to be * sent. */ + codec_samples_per_frame = stream->codec_param.info.enc_ptime * + stream->codec_param.info.clock_rate / + 1000; + if (codec_samples_per_frame == 0) { + codec_samples_per_frame = stream->codec_param.info.frm_ptime * + stream->codec_param.info.clock_rate / + 1000; + } - for (ts=0; ts<ts_len; ts += samples_per_frame) { + for (ts=0; ts<ts_len; ts += codec_samples_per_frame) { pjmedia_frame tmp_out_frame, tmp_in_frame; unsigned bytes_per_sample, max_size; @@ -575,8 +583,9 @@ static pj_status_t put_frame_imp( pjmedia_port *port, bytes_per_sample = stream->codec_param.info.pcm_bits_per_sample/8; /* Split original PCM input frame into base frame size */ + tmp_in_frame.timestamp.u64 = frame->timestamp.u64 + ts; tmp_in_frame.buf = ((char*)frame->buf) + ts * bytes_per_sample; - tmp_in_frame.size = samples_per_frame * bytes_per_sample; + tmp_in_frame.size = codec_samples_per_frame * bytes_per_sample; tmp_in_frame.type = PJMEDIA_FRAME_TYPE_AUDIO; /* Set output frame position */ @@ -689,11 +698,31 @@ static pj_status_t put_frame( pjmedia_port *port, const pjmedia_frame *frame ) { pjmedia_stream *stream = port->port_data.pdata; - pjmedia_frame tmp_in_frame; + pjmedia_frame tmp_zero_frame; unsigned samples_per_frame; samples_per_frame = stream->enc_samples_per_frame; + /* http://www.pjsip.org/trac/ticket/56: + * when input is PJMEDIA_FRAME_TYPE_NONE, feed zero PCM frame + * instead so that encoder can decide whether or not to transmit + * silence frame. + */ + if (frame->type == PJMEDIA_FRAME_TYPE_NONE && + samples_per_frame <= ZERO_PCM_MAX_SIZE) + { + pj_memcpy(&tmp_zero_frame, frame, sizeof(pjmedia_frame)); + frame = &tmp_zero_frame; + + tmp_zero_frame.buf = zero_frame; + tmp_zero_frame.size = samples_per_frame * 2; + tmp_zero_frame.type = PJMEDIA_FRAME_TYPE_AUDIO; + } + +#if 0 + // This is no longer needed because each TYPE_NONE frame will + // be converted into zero frame above + /* If VAD is temporarily disabled during creation, feed zero PCM frame * to the codec. */ @@ -709,6 +738,7 @@ static pj_status_t put_frame( pjmedia_port *port, tmp_in_frame.size = samples_per_frame * 2; tmp_in_frame.type = PJMEDIA_FRAME_TYPE_AUDIO; } +#endif /* If VAD is temporarily disabled during creation, enable it * after transmitting for VAD_SUSPEND_SEC seconds. |