From 4978aaf239c287dc2037db12e892b1ae4981a1d4 Mon Sep 17 00:00:00 2001 From: Benny Prijono Date: Sun, 17 Sep 2006 15:09:58 +0000 Subject: Now really checked in the new PLC software! git-svn-id: http://svn.pjsip.org/repos/pjproject/trunk@724 74dad513-b988-da41-8d7b-12977e46ad98 --- pjmedia/src/pjmedia/plc_steveu.c | 338 +++++++++++++++++++++++++++++++++++++++ pjmedia/src/pjmedia/plc_steveu.h | 153 ++++++++++++++++++ 2 files changed, 491 insertions(+) create mode 100644 pjmedia/src/pjmedia/plc_steveu.c create mode 100644 pjmedia/src/pjmedia/plc_steveu.h (limited to 'pjmedia') diff --git a/pjmedia/src/pjmedia/plc_steveu.c b/pjmedia/src/pjmedia/plc_steveu.c new file mode 100644 index 00000000..3326b748 --- /dev/null +++ b/pjmedia/src/pjmedia/plc_steveu.c @@ -0,0 +1,338 @@ +/* + * SpanDSP - a series of DSP components for telephony + * + * plc.c + * + * Written by Steve Underwood + * + * Copyright (C) 2004 Steve Underwood + * + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * This version may be optionally licenced under the GNU LGPL licence. + * This version is disclaimed to DIGIUM for inclusion in the Asterisk project. + */ + +/*! \file */ + +#include +#include +#include +#include +#include +#include + +#include "plc_steveu.h" + +#if !defined(FALSE) +#define FALSE 0 +#endif +#if !defined(TRUE) +#define TRUE (!FALSE) +#endif + +#ifndef INT16_MAX +#define INT16_MAX (32767) +#endif + +#ifndef INT16_MIN +#define INT16_MIN (-32767-1) +#endif + +//#define PJ_HAS_RINT 1 + + +/* We do a straight line fade to zero volume in 50ms when we are filling in for missing data. */ +#define ATTENUATION_INCREMENT 0.0025 /* Attenuation per sample */ + +#define ms_to_samples(t) (((t)*SAMPLE_RATE)/1000) + + +#if defined(PJ_HAS_RINT) && PJ_HAS_RINT!=0 +#define RINT(d) rint(d) +#else +double RINT(double d) +{ + double f = floor(d); + double c = ceil(d); + + if (c-d > d-f) + return f; + else if (c-d < d-f) + return c; + else if (d >= 0) { + if (f/2==f) + return f; + else + return c; + } else { + if (c/2==c) + return c; + else + return f; + } +} +#endif + + +PJ_INLINE(pj_int16_t) fsaturate(double damp) +{ + if (damp > 32767.0) + return INT16_MAX; + else if (damp < -32768.0) + return INT16_MIN; + else { + return (pj_int16_t) RINT(damp); + } +} + +static void save_history(plc_state_t *s, pj_int16_t *buf, int len) +{ + if (len >= PLC_HISTORY_LEN) + { + /* Just keep the last part of the new data, starting at the beginning of the buffer */ + memcpy(s->history, buf + len - PLC_HISTORY_LEN, sizeof(pj_int16_t)*PLC_HISTORY_LEN); + s->buf_ptr = 0; + return; + } + if (s->buf_ptr + len > PLC_HISTORY_LEN) + { + /* Wraps around - must break into two sections */ + memcpy(s->history + s->buf_ptr, buf, sizeof(pj_int16_t)*(PLC_HISTORY_LEN - s->buf_ptr)); + len -= (PLC_HISTORY_LEN - s->buf_ptr); + memcpy(s->history, buf + (PLC_HISTORY_LEN - s->buf_ptr), sizeof(pj_int16_t)*len); + s->buf_ptr = len; + return; + } + /* Can use just one section */ + memcpy(s->history + s->buf_ptr, buf, sizeof(pj_int16_t)*len); + s->buf_ptr += len; +} +/*- End of function --------------------------------------------------------*/ + +static void normalise_history(plc_state_t *s) +{ + pj_int16_t tmp[PLC_HISTORY_LEN]; + + if (s->buf_ptr == 0) + return; + memcpy(tmp, s->history, sizeof(pj_int16_t)*s->buf_ptr); + memcpy(s->history, s->history + s->buf_ptr, sizeof(pj_int16_t)*(PLC_HISTORY_LEN - s->buf_ptr)); + memcpy(s->history + PLC_HISTORY_LEN - s->buf_ptr, tmp, sizeof(pj_int16_t)*s->buf_ptr); + s->buf_ptr = 0; +} +/*- End of function --------------------------------------------------------*/ + +PJ_INLINE(int) amdf_pitch(int min_pitch, int max_pitch, pj_int16_t amp[], int len) +{ + int i; + int j; + int acc; + int min_acc; + int pitch; + + pitch = min_pitch; + min_acc = INT_MAX; + for (i = max_pitch; i <= min_pitch; i++) + { + acc = 0; + for (j = 0; j < len; j++) + acc += abs(amp[i + j] - amp[j]); + if (acc < min_acc) + { + min_acc = acc; + pitch = i; + } + } + return pitch; +} +/*- End of function --------------------------------------------------------*/ + +int plc_rx(plc_state_t *s, pj_int16_t amp[], int len) +{ + int i; + /*int overlap_len;*/ + int pitch_overlap; + float old_step; + float new_step; + float old_weight; + float new_weight; + float gain; + + if (s->missing_samples) + { + /* Although we have a real signal, we need to smooth it to fit well + with the synthetic signal we used for the previous block */ + + /* The start of the real data is overlapped with the next 1/4 cycle + of the synthetic data. */ + pitch_overlap = s->pitch >> 2; + if (pitch_overlap > len) + pitch_overlap = len; + gain = 1.0 - s->missing_samples*ATTENUATION_INCREMENT; + if (gain < 0.0) + gain = 0.0; + new_step = 1.0/pitch_overlap; + old_step = new_step*gain; + new_weight = new_step; + old_weight = (1.0 - new_step)*gain; + for (i = 0; i < pitch_overlap; i++) + { + amp[i] = fsaturate(old_weight*s->pitchbuf[s->pitch_offset] + new_weight*amp[i]); + if (++s->pitch_offset >= s->pitch) + s->pitch_offset = 0; + new_weight += new_step; + old_weight -= old_step; + if (old_weight < 0.0) + old_weight = 0.0; + } + s->missing_samples = 0; + } + save_history(s, amp, len); + return len; +} +/*- End of function --------------------------------------------------------*/ + +int plc_fillin(plc_state_t *s, pj_int16_t amp[], int len) +{ + /*pj_int16_t tmp[PLC_PITCH_OVERLAP_MAX];*/ + int i; + int pitch_overlap; + float old_step; + float new_step; + float old_weight; + float new_weight; + float gain; + pj_int16_t *orig_amp; + int orig_len; + + orig_amp = amp; + orig_len = len; + if (s->missing_samples == 0) + { + /* As the gap in real speech starts we need to assess the last known pitch, + and prepare the synthetic data we will use for fill-in */ + normalise_history(s); + s->pitch = amdf_pitch(PLC_PITCH_MIN, PLC_PITCH_MAX, s->history + PLC_HISTORY_LEN - CORRELATION_SPAN - PLC_PITCH_MIN, CORRELATION_SPAN); + /* We overlap a 1/4 wavelength */ + pitch_overlap = s->pitch >> 2; + /* Cook up a single cycle of pitch, using a single of the real signal with 1/4 + cycle OLA'ed to make the ends join up nicely */ + /* The first 3/4 of the cycle is a simple copy */ + for (i = 0; i < s->pitch - pitch_overlap; i++) + s->pitchbuf[i] = s->history[PLC_HISTORY_LEN - s->pitch + i]; + /* The last 1/4 of the cycle is overlapped with the end of the previous cycle */ + new_step = 1.0/pitch_overlap; + new_weight = new_step; + for ( ; i < s->pitch; i++) + { + s->pitchbuf[i] = s->history[PLC_HISTORY_LEN - s->pitch + i]*(1.0 - new_weight) + s->history[PLC_HISTORY_LEN - 2*s->pitch + i]*new_weight; + new_weight += new_step; + } + /* We should now be ready to fill in the gap with repeated, decaying cycles + of what is in pitchbuf */ + + /* We need to OLA the first 1/4 wavelength of the synthetic data, to smooth + it into the previous real data. To avoid the need to introduce a delay + in the stream, reverse the last 1/4 wavelength, and OLA with that. */ + gain = 1.0; + new_step = 1.0/pitch_overlap; + old_step = new_step; + new_weight = new_step; + old_weight = 1.0 - new_step; + for (i = 0; i < pitch_overlap; i++) + { + amp[i] = fsaturate(old_weight*s->history[PLC_HISTORY_LEN - 1 - i] + new_weight*s->pitchbuf[i]); + new_weight += new_step; + old_weight -= old_step; + if (old_weight < 0.0) + old_weight = 0.0; + } + s->pitch_offset = i; + } + else + { + gain = 1.0 - s->missing_samples*ATTENUATION_INCREMENT; + i = 0; + } + for ( ; gain > 0.0 && i < len; i++) + { + amp[i] = (pj_int16_t)(s->pitchbuf[s->pitch_offset]*gain); + gain = gain - ATTENUATION_INCREMENT; + if (++s->pitch_offset >= s->pitch) + s->pitch_offset = 0; + } + for ( ; i < len; i++) + amp[i] = 0; + s->missing_samples += orig_len; + save_history(s, amp, len); + return len; +} +/*- End of function --------------------------------------------------------*/ + +plc_state_t *plc_init(plc_state_t *s) +{ + memset(s, 0, sizeof(*s)); + return s; +} +/*- End of function --------------------------------------------------------*/ + + +/* + * PJMEDIA specifics + */ +#include +#include +#include + +#define THIS_FILE "plc_steveu.c" + +struct steveu_plc +{ + plc_state_t state; + unsigned samples_per_frame; +}; + +void* pjmedia_plc_steveu_create(pj_pool_t *pool, unsigned c, unsigned f) +{ + struct steveu_plc *splc; + + PJ_ASSERT_RETURN(c==8000, NULL); + PJ_UNUSED_ARG(c); + + splc = pj_pool_alloc(pool, sizeof(struct steveu_plc)); + plc_init(&splc->state); + splc->samples_per_frame = f; + + return splc; +} + +void pjmedia_plc_steveu_save(void *obj, pj_int16_t *samples) +{ + struct steveu_plc *splc = obj; + plc_rx(&splc->state, samples, splc->samples_per_frame); +} + +void pjmedia_plc_steveu_generate(void *obj, pj_int16_t *samples) +{ + struct steveu_plc *splc = obj; + //PJ_LOG(5,(THIS_FILE, "PLC: generating lost frame")); + plc_fillin(&splc->state, samples, splc->samples_per_frame); +} + +/*- End of file ------------------------------------------------------------*/ + diff --git a/pjmedia/src/pjmedia/plc_steveu.h b/pjmedia/src/pjmedia/plc_steveu.h new file mode 100644 index 00000000..483e774b --- /dev/null +++ b/pjmedia/src/pjmedia/plc_steveu.h @@ -0,0 +1,153 @@ +/*! \file + * \brief SpanDSP - a series of DSP components for telephony + * + * plc.h + * + * \author Steve Underwood + * + * Copyright (C) 2004 Steve Underwood + * + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * This version may be optionally licenced under the GNU LGPL licence. + * + * A license has been granted to Digium (via disclaimer) for the use of + * this code. + */ + + +#if !defined(_PLC_H_) +#define _PLC_H_ + + +/*! \page plc_page Packet loss concealment +\section plc_page_sec_1 What does it do? +The packet loss concealment module provides a suitable synthetic fill-in signal, +to minimise the audible effect of lost packets in VoIP applications. It is not +tied to any particular codec, and could be used with almost any codec which does not +specify its own procedure for packet loss concealment. + +Where a codec specific concealment procedure exists, the algorithm is usually built +around knowledge of the characteristics of the particular codec. It will, therefore, +generally give better results for that particular codec than this generic concealer will. + +\section plc_page_sec_2 How does it work? +While good packets are being received, the plc_rx() routine keeps a record of the trailing +section of the known speech signal. If a packet is missed, plc_fillin() is called to produce +a synthetic replacement for the real speech signal. The average mean difference function +(AMDF) is applied to the last known good signal, to determine its effective pitch. +Based on this, the last pitch period of signal is saved. Essentially, this cycle of speech +will be repeated over and over until the real speech resumes. However, several refinements +are needed to obtain smooth pleasant sounding results. + +- The two ends of the stored cycle of speech will not always fit together smoothly. This can + cause roughness, or even clicks, at the joins between cycles. To soften this, the + 1/4 pitch period of real speech preceeding the cycle to be repeated is blended with the last + 1/4 pitch period of the cycle to be repeated, using an overlap-add (OLA) technique (i.e. + in total, the last 5/4 pitch periods of real speech are used). + +- The start of the synthetic speech will not always fit together smoothly with the tail of + real speech passed on before the erasure was identified. Ideally, we would like to modify + the last 1/4 pitch period of the real speech, to blend it into the synthetic speech. However, + it is too late for that. We could have delayed the real speech a little, but that would + require more buffer manipulation, and hurt the efficiency of the no-lost-packets case + (which we hope is the dominant case). Instead we use a degenerate form of OLA to modify + the start of the synthetic data. The last 1/4 pitch period of real speech is time reversed, + and OLA is used to blend it with the first 1/4 pitch period of synthetic speech. The result + seems quite acceptable. + +- As we progress into the erasure, the chances of the synthetic signal being anything like + correct steadily fall. Therefore, the volume of the synthesized signal is made to decay + linearly, such that after 50ms of missing audio it is reduced to silence. + +- When real speech resumes, an extra 1/4 pitch period of sythetic speech is blended with the + start of the real speech. If the erasure is small, this smoothes the transition. If the erasure + is long, and the synthetic signal has faded to zero, the blending softens the start up of the + real signal, avoiding a kind of "click" or "pop" effect that might occur with a sudden onset. + +\section plc_page_sec_3 How do I use it? +Before audio is processed, call plc_init() to create an instance of the packet loss +concealer. For each received audio packet that is acceptable (i.e. not including those being +dropped for being too late) call plc_rx() to record the content of the packet. Note this may +modify the packet a little after a period of packet loss, to blend real synthetic data smoothly. +When a real packet is not available in time, call plc_fillin() to create a sythetic substitute. +That's it! +*/ + +/*! Minimum allowed pitch (66 Hz) */ +#define PLC_PITCH_MIN 120 +/*! Maximum allowed pitch (200 Hz) */ +#define PLC_PITCH_MAX 40 +/*! Maximum pitch OLA window */ +#define PLC_PITCH_OVERLAP_MAX (PLC_PITCH_MIN >> 2) +/*! The length over which the AMDF function looks for similarity (20 ms) */ +#define CORRELATION_SPAN 160 +/*! History buffer length. The buffer much also be at leat 1.25 times + PLC_PITCH_MIN, but that is much smaller than the buffer needs to be for + the pitch assessment. */ +#define PLC_HISTORY_LEN (CORRELATION_SPAN + PLC_PITCH_MIN) + +typedef struct +{ + /*! Consecutive erased samples */ + int missing_samples; + /*! Current offset into pitch period */ + int pitch_offset; + /*! Pitch estimate */ + int pitch; + /*! Buffer for a cycle of speech */ + float pitchbuf[PLC_PITCH_MIN]; + /*! History buffer */ + pj_int16_t history[PLC_HISTORY_LEN]; + /*! Current pointer into the history buffer */ + int buf_ptr; +} plc_state_t; + + +#ifdef __cplusplus +extern "C" { +#endif + +/*! Process a block of received audio samples. + \brief Process a block of received audio samples. + \param s The packet loss concealer context. + \param amp The audio sample buffer. + \param len The number of samples in the buffer. + \return The number of samples in the buffer. */ +int plc_rx(plc_state_t *s, pj_int16_t amp[], int len); + +/*! Fill-in a block of missing audio samples. + \brief Fill-in a block of missing audio samples. + \param s The packet loss concealer context. + \param amp The audio sample buffer. + \param len The number of samples to be synthesised. + \return The number of samples synthesized. */ +int plc_fillin(plc_state_t *s, pj_int16_t amp[], int len); + +/*! Process a block of received V.29 modem audio samples. + \brief Process a block of received V.29 modem audio samples. + \param s The packet loss concealer context. + \return A pointer to the he packet loss concealer context. */ +plc_state_t *plc_init(plc_state_t *s); + +#ifdef __cplusplus +} +#endif + +#endif +/*- End of file ------------------------------------------------------------*/ + -- cgit v1.2.3