From 5f1ff63b18fbf86eaa27798a35ae8979e3a01b11 Mon Sep 17 00:00:00 2001 From: Nanang Izzuddin Date: Thu, 5 Jun 2008 10:50:40 +0000 Subject: Ticket #473: - fixed issue on Speex multiple frames (encoding: encoded bits concatenation & decoding: frames parsing) - updated pjmedia stream & codecs on encoding multiple frames - introduced bit_info in pjmedia_frame and jitter buffer git-svn-id: http://svn.pjsip.org/repos/pjproject/trunk@1983 74dad513-b988-da41-8d7b-12977e46ad98 --- pjmedia/src/pjmedia-codec/g722.c | 12 +- pjmedia/src/pjmedia-codec/gsm.c | 26 ++-- pjmedia/src/pjmedia-codec/ilbc.c | 42 ++++--- pjmedia/src/pjmedia-codec/l16.c | 4 + pjmedia/src/pjmedia-codec/speex_codec.c | 215 +++++++++++++++++++++++++------- 5 files changed, 218 insertions(+), 81 deletions(-) (limited to 'pjmedia/src/pjmedia-codec') diff --git a/pjmedia/src/pjmedia-codec/g722.c b/pjmedia/src/pjmedia-codec/g722.c index 4a89b182..aa54c177 100644 --- a/pjmedia/src/pjmedia-codec/g722.c +++ b/pjmedia/src/pjmedia-codec/g722.c @@ -532,14 +532,10 @@ static pj_status_t g722_codec_encode(pjmedia_codec *codec, struct g722_data *g722_data = (struct g722_data*) codec->codec_data; pj_status_t status; - pj_assert(g722_data != NULL); - PJ_ASSERT_RETURN(input && output, PJ_EINVAL); - - if (output_buf_len < FRAME_LEN) - return PJMEDIA_CODEC_EFRMTOOSHORT; + pj_assert(g722_data && input && output); - PJ_ASSERT_RETURN(input->size/2 == SAMPLES_PER_FRAME, - PJMEDIA_CODEC_EPCMFRMINLEN); + PJ_ASSERT_RETURN((input->size >> 2) <= output_buf_len, + PJMEDIA_CODEC_EFRMTOOSHORT); /* Detect silence */ if (g722_data->vad_enabled) { @@ -570,7 +566,7 @@ static pj_status_t g722_codec_encode(pjmedia_codec *codec, /* Encode to temporary buffer */ output->size = output_buf_len; status = g722_enc_encode(&g722_data->encoder, (pj_int16_t*)input->buf, - SAMPLES_PER_FRAME, output->buf, &output->size); + (input->size >> 1), output->buf, &output->size); if (status != PJ_SUCCESS) { output->size = 0; output->buf = NULL; diff --git a/pjmedia/src/pjmedia-codec/gsm.c b/pjmedia/src/pjmedia-codec/gsm.c index 8bc33bb4..555901f8 100644 --- a/pjmedia/src/pjmedia-codec/gsm.c +++ b/pjmedia/src/pjmedia-codec/gsm.c @@ -515,14 +515,17 @@ static pj_status_t gsm_codec_encode( pjmedia_codec *codec, struct pjmedia_frame *output) { struct gsm_data *gsm_data = (struct gsm_data*) codec->codec_data; + pj_int16_t *pcm_in; + unsigned in_size; - pj_assert(gsm_data != NULL); - PJ_ASSERT_RETURN(input && output, PJ_EINVAL); - - if (output_buf_len < 33) - return PJMEDIA_CODEC_EFRMTOOSHORT; + pj_assert(gsm_data && input && output); + + pcm_in = (pj_int16_t*)input->buf; + in_size = input->size; - PJ_ASSERT_RETURN(input->size==320, PJMEDIA_CODEC_EPCMFRMINLEN); + PJ_ASSERT_RETURN(in_size % 320 == 0, PJMEDIA_CODEC_EPCMFRMINLEN); + PJ_ASSERT_RETURN(output_buf_len >= 33 * in_size/320, + PJMEDIA_CODEC_EFRMTOOSHORT); /* Detect silence */ if (gsm_data->vad_enabled) { @@ -551,10 +554,15 @@ static pj_status_t gsm_codec_encode( pjmedia_codec *codec, } /* Encode */ - gsm_encode(gsm_data->encoder, (short*)input->buf, - (unsigned char*)output->buf); + output->size = 0; + while (in_size >= 320) { + gsm_encode(gsm_data->encoder, pcm_in, + (unsigned char*)output->buf + output->size); + pcm_in += 160; + output->size += 33; + in_size -= 320; + } - output->size = 33; output->type = PJMEDIA_FRAME_TYPE_AUDIO; return PJ_SUCCESS; diff --git a/pjmedia/src/pjmedia-codec/ilbc.c b/pjmedia/src/pjmedia-codec/ilbc.c index 7819867f..2134e77b 100644 --- a/pjmedia/src/pjmedia-codec/ilbc.c +++ b/pjmedia/src/pjmedia-codec/ilbc.c @@ -510,16 +510,19 @@ static pj_status_t ilbc_codec_encode(pjmedia_codec *codec, struct pjmedia_frame *output) { struct ilbc_codec *ilbc_codec = (struct ilbc_codec*)codec; - unsigned i; + pj_int16_t *pcm_in; + unsigned nsamples; - pj_assert(ilbc_codec != NULL); - PJ_ASSERT_RETURN(input && output, PJ_EINVAL); + pj_assert(ilbc_codec && input && output); - if (output_buf_len < ilbc_codec->enc_frame_size) - return PJMEDIA_CODEC_EFRMTOOSHORT; + pcm_in = (pj_int16_t*)input->buf; + nsamples = input->size >> 1; - if (input->size != (ilbc_codec->enc_samples_per_frame << 1)) - return PJMEDIA_CODEC_EPCMFRMINLEN; + PJ_ASSERT_RETURN(nsamples % ilbc_codec->enc_samples_per_frame == 0, + PJMEDIA_CODEC_EPCMFRMINLEN); + PJ_ASSERT_RETURN(output_buf_len >= ilbc_codec->enc_frame_size * nsamples / + ilbc_codec->enc_samples_per_frame, + PJMEDIA_CODEC_EFRMTOOSHORT); /* Detect silence */ if (ilbc_codec->vad_enabled) { @@ -547,18 +550,25 @@ static pj_status_t ilbc_codec_encode(pjmedia_codec *codec, } } - /* Convert to float */ - for (i=0; ienc_samples_per_frame; ++i) { - ilbc_codec->enc_block[i] = (float) (((pj_int16_t*)input->buf)[i]); - } - /* Encode */ - iLBC_encode((unsigned char *)output->buf, - ilbc_codec->enc_block, - &ilbc_codec->enc); + output->size = 0; + while (nsamples >= ilbc_codec->enc_samples_per_frame) { + unsigned i; + + /* Convert to float */ + for (i=0; ienc_samples_per_frame; ++i) { + ilbc_codec->enc_block[i] = (float) (*pcm_in++); + } + + iLBC_encode((unsigned char *)output->buf + output->size, + ilbc_codec->enc_block, + &ilbc_codec->enc); + + output->size += ilbc_codec->enc.no_of_bytes; + nsamples -= ilbc_codec->enc_samples_per_frame; + } output->type = PJMEDIA_FRAME_TYPE_AUDIO; - output->size = ilbc_codec->enc.no_of_bytes; output->timestamp = input->timestamp; return PJ_SUCCESS; diff --git a/pjmedia/src/pjmedia-codec/l16.c b/pjmedia/src/pjmedia-codec/l16.c index df456045..0ccc9309 100644 --- a/pjmedia/src/pjmedia-codec/l16.c +++ b/pjmedia/src/pjmedia-codec/l16.c @@ -564,6 +564,8 @@ static pj_status_t l16_encode(pjmedia_codec *codec, #if defined(PJ_IS_LITTLE_ENDIAN) && PJ_IS_LITTLE_ENDIAN!=0 while (samp!=samp_end) *samp_out++ = pj_htons(*samp++); +#else + pjmedia_copy_samples(samp_out, samp, input->size >> 1); #endif @@ -596,6 +598,8 @@ static pj_status_t l16_decode(pjmedia_codec *codec, #if defined(PJ_IS_LITTLE_ENDIAN) && PJ_IS_LITTLE_ENDIAN!=0 while (samp!=samp_end) *samp_out++ = pj_htons(*samp++); +#else + pjmedia_copy_samples(samp_out, samp, input->size >> 1); #endif diff --git a/pjmedia/src/pjmedia-codec/speex_codec.c b/pjmedia/src/pjmedia-codec/speex_codec.c index dc2c7400..05d6b67b 100644 --- a/pjmedia/src/pjmedia-codec/speex_codec.c +++ b/pjmedia/src/pjmedia-codec/speex_codec.c @@ -160,9 +160,9 @@ static pj_status_t get_speex_info( struct speex_param *p ) if (!state) return PJMEDIA_CODEC_EFAILED; - /* Set the quality */ - if (p->quality != -1) - speex_encoder_ctl(state, SPEEX_SET_QUALITY, &p->quality); + /* We have to get maximum bitrate, so let's set maximum quality */ + tmp = 10; + speex_encoder_ctl(state, SPEEX_SET_QUALITY, &tmp); /* Sampling rate. */ speex_encoder_ctl(state, SPEEX_SET_SAMPLING_RATE, &p->clock_rate); @@ -595,9 +595,6 @@ static pj_status_t spx_codec_open( pjmedia_codec *codec, spx = (struct spx_private*) codec->codec_data; id = spx->param_id; - /* Only supports one frame per packet */ - PJ_ASSERT_RETURN(attr->setting.frm_per_pkt==1, PJ_EINVAL); - /* * Create and initialize encoder. */ @@ -687,9 +684,6 @@ static pj_status_t spx_codec_modify(pjmedia_codec *codec, spx = (struct spx_private*) codec->codec_data; - /* Only supports one frame per packet */ - PJ_ASSERT_RETURN(attr->setting.frm_per_pkt==1, PJ_EINVAL); - /* VAD */ tmp = (attr->setting.vad != 0); speex_encoder_ctl(spx->enc, SPEEX_SET_VAD, &tmp); @@ -702,6 +696,119 @@ static pj_status_t spx_codec_modify(pjmedia_codec *codec, return PJ_SUCCESS; } +#if 0 +# define TRACE__(args) PJ_LOG(5,args) +#else +# define TRACE__(args) +#endif + +#undef THIS_FUNC +#define THIS_FUNC "speex_get_next_frame" + +#define NB_SUBMODES 16 +#define NB_SUBMODE_BITS 4 + +#define SB_SUBMODES 8 +#define SB_SUBMODE_BITS 3 + +/* This function will iterate frames & submodes in the Speex bits. + * Returns 0 if a frame found, otherwise returns -1. + */ +int speex_get_next_frame(SpeexBits *bits) +{ + static const int inband_skip_table[NB_SUBMODES] = + {1, 1, 4, 4, 4, 4, 4, 4, 8, 8, 16, 16, 32, 32, 64, 64 }; + static const int wb_skip_table[SB_SUBMODES] = + {SB_SUBMODE_BITS+1, 36, 112, 192, 352, -1, -1, -1}; + + unsigned submode; + unsigned nb_count = 0; + + while (speex_bits_remaining(bits) >= 5) { + unsigned wb_count = 0; + unsigned bit_ptr = bits->bitPtr; + unsigned char_ptr = bits->charPtr; + + /* WB frame */ + while ((speex_bits_remaining(bits) >= 4) + && speex_bits_unpack_unsigned(bits, 1)) + { + int advance; + + submode = speex_bits_unpack_unsigned(bits, 3); + advance = wb_skip_table[submode]; + if (advance < 0) { + TRACE__((THIS_FUNC, "Invalid mode encountered. " + "The stream is corrupted.")); + return -1; + } + TRACE__((THIS_FUNC, "WB layer skipped: %d bits", advance)); + advance -= (SB_SUBMODE_BITS+1); + speex_bits_advance(bits, advance); + + bit_ptr = bits->bitPtr; + char_ptr = bits->charPtr; + + /* Consecutive subband frames may not exceed 2 frames */ + if (++wb_count > 2) + return -1; + } + + /* End of bits, return the frame */ + if (speex_bits_remaining(bits) < 4) { + TRACE__((THIS_FUNC, "End of stream")); + return 0; + } + + /* Stop iteration, return the frame */ + if (nb_count > 0) { + bits->bitPtr = bit_ptr; + bits->charPtr = char_ptr; + return 0; + } + + /* Get control bits */ + submode = speex_bits_unpack_unsigned(bits, 4); + TRACE__((THIS_FUNC, "Control bits: %d at %d", + submode, bits->charPtr*8+bits->bitPtr)); + + if (submode == 15) { + TRACE__((THIS_FUNC, "Found submode: terminator")); + return 0; + } else if (submode == 14) { + /* in-band signal; next 4 bits contain signal id */ + submode = speex_bits_unpack_unsigned(bits, 4); + TRACE__((THIS_FUNC, "Found submode: in-band %d bits", + inband_skip_table[submode])); + speex_bits_advance(bits, inband_skip_table[submode]); + } else if (submode == 13) { + /* user in-band; next 5 bits contain msg len */ + submode = speex_bits_unpack_unsigned(bits, 5); + TRACE__((THIS_FUNC, "Found submode: user-band %d bytes", submode)); + speex_bits_advance(bits, submode * 8); + } else if (submode > 8) { + TRACE__((THIS_FUNC, "Unknown sub-mode %d", submode)); + return 0; + } else { + /* NB frame */ + unsigned int advance = submode; + speex_mode_query(&speex_nb_mode, SPEEX_SUBMODE_BITS_PER_FRAME, &advance); + if (advance < 0) { + TRACE__((THIS_FUNC, "Invalid mode encountered. " + "The stream is corrupted.")); + return -1; + } + TRACE__((THIS_FUNC, "Submode %d: %d bits", submode, advance)); + advance -= (NB_SUBMODE_BITS+1); + speex_bits_advance(bits, advance); + + ++nb_count; + } + } + + return 0; +} + /* * Get frames in the packet. @@ -713,45 +820,42 @@ static pj_status_t spx_codec_parse( pjmedia_codec *codec, unsigned *frame_cnt, pjmedia_frame frames[]) { - struct spx_private *spx; - unsigned frame_size, samples_per_frame; - unsigned count; - - spx = (struct spx_private*) codec->codec_data; - - frame_size = spx_factory.speex_param[spx->param_id].framesize; - samples_per_frame = spx_factory.speex_param[spx->param_id].samples_per_frame; - - /* Don't really know how to do this... */ - count = 0; - while (pkt_size >= frame_size && count < *frame_cnt) { - frames[count].buf = pkt; - frames[count].size = frame_size; + struct spx_private *spx = (struct spx_private*) codec->codec_data; + unsigned samples_per_frame; + unsigned count = 0; + int char_ptr = 0; + int bit_ptr = 0; + + samples_per_frame=spx_factory.speex_param[spx->param_id].samples_per_frame; + + /* Copy the data into the speex bit-stream */ + speex_bits_read_from(&spx->dec_bits, (char*)pkt, pkt_size); + + while (speex_get_next_frame(&spx->dec_bits) == 0 && + spx->dec_bits.charPtr != char_ptr) + { + frames[count].buf = (char*)pkt + char_ptr; + /* Bit info contains start bit offset of the frame */ + frames[count].bit_info = bit_ptr; frames[count].type = PJMEDIA_FRAME_TYPE_AUDIO; frames[count].timestamp.u64 = ts->u64 + count * samples_per_frame; + frames[count].size = spx->dec_bits.charPtr - char_ptr; + if (spx->dec_bits.bitPtr) + ++frames[count].size; - pkt_size -= frame_size; - ++count; - pkt = ((char*)pkt) + frame_size; - } + bit_ptr = spx->dec_bits.bitPtr; + char_ptr = spx->dec_bits.charPtr; - /* Just in case speex has silence frame which size is less than normal - * frame size... - */ - if (pkt_size && count < *frame_cnt) { - frames[count].buf = pkt; - frames[count].size = pkt_size; - frames[count].type = PJMEDIA_FRAME_TYPE_AUDIO; - frames[count].timestamp.u64 = ts->u64 + count * samples_per_frame; ++count; } *frame_cnt = count; + return PJ_SUCCESS; } /* - * Encode frame. + * Encode frames. */ static pj_status_t spx_codec_encode( pjmedia_codec *codec, const struct pjmedia_frame *input, @@ -759,8 +863,10 @@ static pj_status_t spx_codec_encode( pjmedia_codec *codec, struct pjmedia_frame *output) { struct spx_private *spx; - unsigned sz; - int tx; + unsigned samples_per_frame; + int tx = 0; + spx_int16_t *pcm_in = (spx_int16_t*)input->buf; + unsigned nsamples; spx = (struct spx_private*) codec->codec_data; @@ -772,12 +878,21 @@ static pj_status_t spx_codec_encode( pjmedia_codec *codec, return PJ_SUCCESS; } + nsamples = input->size >> 1; + samples_per_frame=spx_factory.speex_param[spx->param_id].samples_per_frame; + + PJ_ASSERT_RETURN(nsamples % samples_per_frame == 0, + PJMEDIA_CODEC_EPCMFRMINLEN); + /* Flush all the bits in the struct so we can encode a new frame */ speex_bits_reset(&spx->enc_bits); - /* Encode the frame */ - tx = speex_encode_int(spx->enc, (spx_int16_t*)input->buf, - &spx->enc_bits); + /* Encode the frames */ + while (nsamples >= samples_per_frame) { + tx += speex_encode_int(spx->enc, pcm_in, &spx->enc_bits); + pcm_in += samples_per_frame; + nsamples -= samples_per_frame; + } /* Check if we need not to transmit the frame (DTX) */ if (tx == 0) { @@ -789,8 +904,7 @@ static pj_status_t spx_codec_encode( pjmedia_codec *codec, } /* Check size. */ - sz = speex_bits_nbytes(&spx->enc_bits); - pj_assert(sz <= output_buf_len); + pj_assert(speex_bits_nbytes(&spx->enc_bits) <= (int)output_buf_len); /* Copy the bits to an array of char that can be written */ output->size = speex_bits_write(&spx->enc_bits, @@ -810,14 +924,17 @@ static pj_status_t spx_codec_decode( pjmedia_codec *codec, struct pjmedia_frame *output) { struct spx_private *spx; + unsigned samples_per_frame; spx = (struct spx_private*) codec->codec_data; + samples_per_frame=spx_factory.speex_param[spx->param_id].samples_per_frame; - PJ_ASSERT_RETURN(output_buf_len >= 320, PJMEDIA_CODEC_EPCMTOOSHORT); + PJ_ASSERT_RETURN(output_buf_len >= samples_per_frame << 1, + PJMEDIA_CODEC_EPCMTOOSHORT); if (input->type != PJMEDIA_FRAME_TYPE_AUDIO) { - pjmedia_zero_samples((pj_int16_t*)output->buf, 160); - output->size = 320; + pjmedia_zero_samples((pj_int16_t*)output->buf, samples_per_frame); + output->size = samples_per_frame << 1; output->timestamp.u64 = input->timestamp.u64; output->type = PJMEDIA_FRAME_TYPE_AUDIO; return PJ_SUCCESS; @@ -825,15 +942,17 @@ static pj_status_t spx_codec_decode( pjmedia_codec *codec, /* Copy the data into the bit-stream struct */ speex_bits_read_from(&spx->dec_bits, (char*)input->buf, input->size); + + /* Set Speex dec_bits pointer to the start bit of the frame */ + speex_bits_advance(&spx->dec_bits, input->bit_info); /* Decode the data */ speex_decode_int(spx->dec, &spx->dec_bits, (spx_int16_t*)output->buf); output->type = PJMEDIA_FRAME_TYPE_AUDIO; - output->size = 320; + output->size = samples_per_frame << 1; output->timestamp.u64 = input->timestamp.u64; - return PJ_SUCCESS; } -- cgit v1.2.3