From 2fa99a7ade8748fae314aeab2fea83e9bc88c36c Mon Sep 17 00:00:00 2001 From: Benny Prijono Date: Wed, 26 Jul 2006 17:04:54 +0000 Subject: - Bring speex codec up to date with their SVN trunk - Speex codec should work in FIXED_POINT mode when PJ_HAS_FLOATING_POINT is set to zero. - ulaw2linear will return zero if zero is given (this would make the VAD works better, and it also fixed click noise when call is established/hangup). git-svn-id: http://svn.pjsip.org/repos/pjproject/trunk@628 74dad513-b988-da41-8d7b-12977e46ad98 --- pjmedia/build/Makefile | 4 +- pjmedia/src/pjmedia-codec/speex/_kiss_fft_guts.h | 4 +- pjmedia/src/pjmedia-codec/speex/arch.h | 21 +- pjmedia/src/pjmedia-codec/speex/bits.c | 36 +- pjmedia/src/pjmedia-codec/speex/cb_search.c | 45 +- pjmedia/src/pjmedia-codec/speex/cb_search.h | 4 +- pjmedia/src/pjmedia-codec/speex/cb_search_bfin.h | 14 +- pjmedia/src/pjmedia-codec/speex/config.h | 10 +- pjmedia/src/pjmedia-codec/speex/fftwrap.c | 18 +- pjmedia/src/pjmedia-codec/speex/filters.c | 481 ++++++++++--- pjmedia/src/pjmedia-codec/speex/filters.h | 27 +- pjmedia/src/pjmedia-codec/speex/filters_arm4.h | 10 + pjmedia/src/pjmedia-codec/speex/filters_bfin.h | 342 +++++++++- pjmedia/src/pjmedia-codec/speex/fixed_bfin.h | 81 ++- pjmedia/src/pjmedia-codec/speex/fixed_debug.h | 15 +- pjmedia/src/pjmedia-codec/speex/fixed_generic.h | 12 +- pjmedia/src/pjmedia-codec/speex/gain_table.c | 258 +++---- pjmedia/src/pjmedia-codec/speex/gain_table_lbr.c | 66 +- pjmedia/src/pjmedia-codec/speex/jitter.c | 434 ++++++++---- pjmedia/src/pjmedia-codec/speex/kiss_fft.c | 10 +- pjmedia/src/pjmedia-codec/speex/kiss_fft.h | 13 +- pjmedia/src/pjmedia-codec/speex/kiss_fftr.c | 6 +- pjmedia/src/pjmedia-codec/speex/kiss_fftr.h | 2 +- pjmedia/src/pjmedia-codec/speex/lpc.c | 8 +- pjmedia/src/pjmedia-codec/speex/lsp.c | 333 +++++---- pjmedia/src/pjmedia-codec/speex/ltp.c | 624 ++++++++--------- pjmedia/src/pjmedia-codec/speex/ltp.h | 33 +- pjmedia/src/pjmedia-codec/speex/ltp_arm4.h | 4 +- pjmedia/src/pjmedia-codec/speex/ltp_bfin.h | 304 ++++++++- pjmedia/src/pjmedia-codec/speex/ltp_sse.h | 4 +- pjmedia/src/pjmedia-codec/speex/mdf.c | 147 +++- pjmedia/src/pjmedia-codec/speex/misc.c | 17 +- pjmedia/src/pjmedia-codec/speex/misc.h | 34 +- pjmedia/src/pjmedia-codec/speex/modes.c | 81 +-- pjmedia/src/pjmedia-codec/speex/modes.h | 14 +- pjmedia/src/pjmedia-codec/speex/nb_celp.c | 815 +++++++++++------------ pjmedia/src/pjmedia-codec/speex/nb_celp.h | 159 ++--- pjmedia/src/pjmedia-codec/speex/pseudofloat.h | 94 ++- pjmedia/src/pjmedia-codec/speex/quant_lsp.c | 20 +- pjmedia/src/pjmedia-codec/speex/sb_celp.c | 283 ++++---- pjmedia/src/pjmedia-codec/speex/sb_celp.h | 103 +-- pjmedia/src/pjmedia-codec/speex/speex.h | 7 + pjmedia/src/pjmedia-codec/speex/speex_echo.h | 8 +- pjmedia/src/pjmedia-codec/speex/speex_jitter.h | 61 +- pjmedia/src/pjmedia-codec/speex/speex_stereo.h | 2 +- pjmedia/src/pjmedia-codec/speex/speex_types.h | 14 +- pjmedia/src/pjmedia-codec/speex/stack_alloc.h | 2 +- pjmedia/src/pjmedia-codec/speex/window.c | 94 +++ pjmedia/src/pjmedia-codec/speex_codec.c | 46 +- pjmedia/src/pjmedia/alaw_ulaw.c | 6 + 50 files changed, 3216 insertions(+), 2014 deletions(-) create mode 100644 pjmedia/src/pjmedia-codec/speex/window.c (limited to 'pjmedia') diff --git a/pjmedia/build/Makefile b/pjmedia/build/Makefile index 4e163306..b9861951 100644 --- a/pjmedia/build/Makefile +++ b/pjmedia/build/Makefile @@ -103,8 +103,8 @@ SPEEX_OBJS := speex_codec.o \ speex/nb_celp.o speex/preprocess_spx.o \ speex/quant_lsp.o speex/sb_celp.o speex/smallft.o \ speex/speex.o speex/speex_callbacks.o speex/speex_header.o \ - speex/stereo.o speex/vbr.o speex/vq.o -SPEEX_CFLAGS := -DHAVE_CONFIG=1 -I../src/pjmedia-codec + speex/stereo.o speex/vbr.o speex/vq.o speex/window.o +SPEEX_CFLAGS := -DHAVE_CONFIG_H=1 -I../src/pjmedia-codec export PJMEDIA_CODEC_SRCDIR = ../src/pjmedia-codec export PJMEDIA_CODEC_OBJS += l16.o \ diff --git a/pjmedia/src/pjmedia-codec/speex/_kiss_fft_guts.h b/pjmedia/src/pjmedia-codec/speex/_kiss_fft_guts.h index abbd8b1f..72acee18 100644 --- a/pjmedia/src/pjmedia-codec/speex/_kiss_fft_guts.h +++ b/pjmedia/src/pjmedia-codec/speex/_kiss_fft_guts.h @@ -20,7 +20,6 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND and defines typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */ #include "kiss_fft.h" -#include #define MAXFACTORS 32 /* e.g. an fft of length 128 has 4 factors @@ -45,8 +44,9 @@ struct kiss_fft_state{ C_ADDTO( res , a) : res += a * */ #ifdef FIXED_POINT +#include "misc.h" # define FRACBITS 15 -# define SAMPPROD int32_t +# define SAMPPROD spx_int32_t #define SAMP_MAX 32767 #define SAMP_MIN -SAMP_MAX diff --git a/pjmedia/src/pjmedia-codec/speex/arch.h b/pjmedia/src/pjmedia-codec/speex/arch.h index 52066196..05004373 100644 --- a/pjmedia/src/pjmedia-codec/speex/arch.h +++ b/pjmedia/src/pjmedia-codec/speex/arch.h @@ -41,18 +41,12 @@ #define ABS16(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 16-bit value. */ #define MAX16(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 16-bit value. */ #define ABS32(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 32-bit value. */ +#define MAX32(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 32-bit value. */ #ifdef FIXED_POINT typedef spx_int16_t spx_word16_t; typedef spx_int32_t spx_word32_t; -#ifdef _MSC_VER -typedef __int64 spx_word64_t; -#elif defined NO_LONGLONG -typedef double spx_word64_t; -#else -typedef long long spx_word64_t; -#endif typedef spx_word32_t spx_mem_t; typedef spx_word16_t spx_coef_t; typedef spx_word16_t spx_lsp_t; @@ -103,7 +97,6 @@ typedef float spx_lsp_t; typedef float spx_sig_t; typedef float spx_word16_t; typedef float spx_word32_t; -typedef float spx_word64_t; #define Q15ONE 1.0f #define LPC_SCALING 1.f @@ -146,7 +139,6 @@ typedef float spx_word64_t; #define SUB16(a,b) ((a)-(b)) #define ADD32(a,b) ((a)+(b)) #define SUB32(a,b) ((a)-(b)) -#define ADD64(a,b) ((a)+(b)) #define MULT16_16_16(a,b) ((a)*(b)) #define MULT16_16(a,b) ((spx_word32_t)(a)*(spx_word32_t)(b)) #define MAC16_16(c,a,b) ((c)+(spx_word32_t)(a)*(spx_word32_t)(b)) @@ -161,20 +153,25 @@ typedef float spx_word64_t; #define MAC16_16_Q11(c,a,b) ((c)+(a)*(b)) #define MAC16_16_Q13(c,a,b) ((c)+(a)*(b)) +#define MAC16_16_P13(c,a,b) ((c)+(a)*(b)) #define MULT16_16_Q11_32(a,b) ((a)*(b)) #define MULT16_16_Q13(a,b) ((a)*(b)) #define MULT16_16_Q14(a,b) ((a)*(b)) #define MULT16_16_Q15(a,b) ((a)*(b)) #define MULT16_16_P15(a,b) ((a)*(b)) +#define MULT16_16_P13(a,b) ((a)*(b)) +#define MULT16_16_P14(a,b) ((a)*(b)) -#define DIV32_16(a,b) ((a)/(b)) -#define DIV32(a,b) ((a)/(b)) +#define DIV32_16(a,b) (((spx_word32_t)(a))/(spx_word16_t)(b)) +#define PDIV32_16(a,b) (((spx_word32_t)(a))/(spx_word16_t)(b)) +#define DIV32(a,b) (((spx_word32_t)(a))/(spx_word32_t)(b)) +#define PDIV32(a,b) (((spx_word32_t)(a))/(spx_word32_t)(b)) #endif -#ifdef CONFIG_TI_C55X +#if defined (CONFIG_TI_C54X) || defined (CONFIG_TI_C55X) /* 2 on TI C5x DSP */ #define BYTES_PER_CHAR 2 diff --git a/pjmedia/src/pjmedia-codec/speex/bits.c b/pjmedia/src/pjmedia-codec/speex/bits.c index fae7a9e0..376e804f 100644 --- a/pjmedia/src/pjmedia-codec/speex/bits.c +++ b/pjmedia/src/pjmedia-codec/speex/bits.c @@ -93,28 +93,36 @@ void speex_bits_rewind(SpeexBits *bits) void speex_bits_read_from(SpeexBits *bits, char *chars, int len) { int i; - if (len > bits->buf_size) + int nchars = len / BYTES_PER_CHAR; + if (nchars > bits->buf_size) { speex_warning_int("Packet is larger than allocated buffer: ", len); if (bits->owner) { - char *tmp = (char*)speex_realloc(bits->chars, len); + char *tmp = (char*)speex_realloc(bits->chars, nchars); if (tmp) { - bits->buf_size=len; + bits->buf_size=nchars; bits->chars=tmp; } else { - len=bits->buf_size; + nchars=bits->buf_size; speex_warning("Could not resize input buffer: truncating input"); } } else { speex_warning("Do not own input buffer: truncating input"); - len=bits->buf_size; + nchars=bits->buf_size; } } - for (i=0;ichars[i]=chars[i]; - bits->nbBits=len<<3; +#if (BYTES_PER_CHAR==2) +/* Swap bytes to proper endian order (could be done externally) */ +#define HTOLS(A) ((((A) >> 8)&0xff)|(((A) & 0xff)<<8)) +#else +#define HTOLS(A) (A) +#endif + for (i=0;ichars[i]=HTOLS(chars[i]); + + bits->nbBits=nchars<charPtr=0; bits->bitPtr=0; bits->overflow=0; @@ -161,7 +169,7 @@ void speex_bits_read_whole_bytes(SpeexBits *bits, char *chars, int nbytes) speex_bits_flush(bits); pos=bits->nbBits>>LOG2_BITS_PER_CHAR; for (i=0;ichars[pos+i]=chars[i]; + bits->chars[pos+i]=HTOLS(chars[i]); bits->nbBits+=nchars< ((bits->nbBits+BITS_PER_CHAR-1)>>LOG2_BITS_PER_CHAR)) max_nchars = ((bits->nbBits+BITS_PER_CHAR-1)>>LOG2_BITS_PER_CHAR); -#if BYTES_PER_CHAR==1 -#define HTOLS(A) (A) -#else -#define HTOLS(A) ((((A) >> 8)&0xff)|(((A) & 0xff)<<8)) -#endif + for (i=0;ichars[i]); return max_nchars*BYTES_PER_CHAR; @@ -199,8 +203,8 @@ int speex_bits_write_whole_bytes(SpeexBits *bits, char *chars, int max_nbytes) if (max_nchars > ((bits->nbBits)>>LOG2_BITS_PER_CHAR)) max_nchars = ((bits->nbBits)>>LOG2_BITS_PER_CHAR); for (i=0;ichars[i]; - + chars[i]=HTOLS(bits->chars[i]); + if (bits->bitPtr>0) bits->chars[0]=bits->chars[max_nchars]; else diff --git a/pjmedia/src/pjmedia-codec/speex/cb_search.c b/pjmedia/src/pjmedia-codec/speex/cb_search.c index 234dc2ee..b4a223bc 100644 --- a/pjmedia/src/pjmedia-codec/speex/cb_search.c +++ b/pjmedia/src/pjmedia-codec/speex/cb_search.c @@ -70,7 +70,7 @@ static void compute_weighted_codebook(const signed char *shape_cb, const spx_wor for (k=0;k<=j;k++) resj = MAC16_16(resj,shape[k],r[j-k]); #ifdef FIXED_POINT - res16 = EXTRACT16(SHR32(resj, 11)); + res16 = EXTRACT16(SHR32(resj, 13)); #else res16 = 0.03125f*resj; #endif @@ -88,16 +88,15 @@ static void compute_weighted_codebook(const signed char *shape_cb, const spx_wor static inline void target_update(spx_word16_t *t, spx_word16_t g, spx_word16_t *r, int len) { int n; - int q=0; - for (n=0;n>>= 11;\n\t" + "R0 >>>= 13;\n\t" "A1 += R0.L*R0.L (IS);\n\t" "W[P3++] = R0;\n\t" "P0 += 1;\n\t" @@ -72,7 +72,8 @@ void compute_weighted_codebook(const signed char *shape_cb, const spx_word16_t * "[P4] = R1;\n\t" : : "m" (subvect_size), "m" (shape_cb), "m" (r), "m" (resp), "m" (E) - : "A0", "P0", "P1", "P2", "P3", "P4", "R0", "R1", "R2", "I0", "I1", "L0", "L1", "A0", "A1", "memory" + : "A0", "P0", "P1", "P2", "P3", "P4", "R0", "R1", "R2", "I0", "I1", "L0", + "L1", "A0", "A1", "memory", "LC0", "LC1" ); shape_cb += subvect_size; resp += subvect_size; @@ -83,23 +84,26 @@ void compute_weighted_codebook(const signed char *shape_cb, const spx_word16_t * #define OVERRIDE_TARGET_UPDATE static inline void target_update(spx_word16_t *t, spx_word16_t g, spx_word16_t *r, int len) { + if (!len) + return; __asm__ __volatile__ ( "I0 = %0;\n\t" "I1 = %1;\n\t" "L0 = 0;\n\t" "L1 = 0;\n\t" + "R2 = 4096;\n\t" "LOOP tupdate%= LC0 = %3;\n\t" "LOOP_BEGIN tupdate%=;\n\t" "R0.L = W[I0] || R1.L = W[I1++];\n\t" "R1 = (A1 = R1.L*%2.L) (IS);\n\t" - "R1 >>>= 11;\n\t" + "R1 = R1 + R2;\n\t" + "R1 >>>= 13;\n\t" "R0.L = R0.L - R1.L;\n\t" "W[I0++] = R0.L;\n\t" "LOOP_END tupdate%=;\n\t" : : "a" (t), "a" (r), "d" (g), "a" (len) - : "R0", "R1", "A1", "I0", "I1", "L0", "L1" + : "R0", "R1", "R2", "A1", "I0", "I1", "L0", "L1" ); } - diff --git a/pjmedia/src/pjmedia-codec/speex/config.h b/pjmedia/src/pjmedia-codec/speex/config.h index ef3a4a03..6ab22359 100644 --- a/pjmedia/src/pjmedia-codec/speex/config.h +++ b/pjmedia/src/pjmedia-codec/speex/config.h @@ -1,15 +1,15 @@ -#include +/* Check if we need to use the fixed point version */ +#if !defined(PJ_HAS_FLOATING_POINT) || PJ_HAS_FLOATING_POINT==0 +# define FIXED_POINT +#endif + #define inline __inline #define restrict #include "misc.h" -#if !defined(PJ_HAS_FLOATING_POINT) || PJ_HAS_FLOATING_POINT==0 -# define FIXED_POINT -#endif - #ifdef _MSC_VER # pragma warning(disable: 4100) // unreferenced formal parameter # pragma warning(disable: 4101) // unreferenced local variable diff --git a/pjmedia/src/pjmedia-codec/speex/fftwrap.c b/pjmedia/src/pjmedia-codec/speex/fftwrap.c index de093569..79a1de30 100644 --- a/pjmedia/src/pjmedia-codec/speex/fftwrap.c +++ b/pjmedia/src/pjmedia-codec/speex/fftwrap.c @@ -42,6 +42,7 @@ #include "misc.h" +#define MAX_FFT_SIZE 2048 #ifdef FIXED_POINT static int maximize_range(spx_word16_t *in, spx_word16_t *out, spx_word16_t bound, int len) @@ -225,9 +226,8 @@ void spx_ifft(void *table, spx_word16_t *in, spx_word16_t *out) #endif -int fixed_point = 1; #ifdef FIXED_POINT -#include "smallft.h" +/*#include "smallft.h"*/ void spx_fft_float(void *table, float *in, float *out) @@ -239,13 +239,19 @@ void spx_fft_float(void *table, float *in, float *out) int N = ((struct kiss_config *)table)->N; #else #endif +#ifdef VAR_ARRAYS spx_word16_t _in[N]; spx_word16_t _out[N]; +#else + spx_word16_t _in[MAX_FFT_SIZE]; + spx_word16_t _out[MAX_FFT_SIZE]; +#endif for (i=0;iN; #else #endif +#ifdef VAR_ARRAYS spx_word16_t _in[N]; spx_word16_t _out[N]; +#else + spx_word16_t _in[MAX_FFT_SIZE]; + spx_word16_t _out[MAX_FFT_SIZE]; +#endif for (i=0;i SHL32(EXTEND32(SIG_SCALING), 8)) { spx_word16_t scale_1; scale = PSHR32(scale, SIG_SHIFT); - scale_1 = EXTRACT16(DIV32_16(SHL32(EXTEND32(SIG_SCALING),7),scale)); + scale_1 = EXTRACT16(PDIV32_16(SHL32(EXTEND32(SIG_SCALING),7),scale)); for (i=0;i SHR32(EXTEND32(SIG_SCALING), 2)) { spx_word16_t scale_1; scale = PSHR32(scale, SIG_SHIFT-5); scale_1 = DIV32_16(SHL32(EXTEND32(SIG_SCALING),3),scale); for (i=0;i max_val) + max_val = tmp; + } + if (max_val>16383) + { + spx_word32_t sum=0; + for (i=0;ilast_pitch=0; - mem->last_pitch_gain[0]=mem->last_pitch_gain[1]=mem->last_pitch_gain[2]=0; - mem->smooth_gain=1; -} - #ifdef FIXED_POINT -#define COMB_STEP 32767 +#if 0 +spx_word16_t shift_filt[3][7] = {{-33, 1043, -4551, 19959, 19959, -4551, 1043}, + {-98, 1133, -4425, 29179, 8895, -2328, 444}, + {444, -2328, 8895, 29179, -4425, 1133, -98}}; #else -#define COMB_STEP 1.0 +spx_word16_t shift_filt[3][7] = {{-390, 1540, -4993, 20123, 20123, -4993, 1540}, + {-1064, 2817, -6694, 31589, 6837, -990, -209}, + {-209, -990, 6837, 31589, -6694, 2817, -1064}}; +#endif +#else +#if 0 +float shift_filt[3][7] = {{-9.9369e-04, 3.1831e-02, -1.3889e-01, 6.0910e-01, 6.0910e-01, -1.3889e-01, 3.1831e-02}, + {-0.0029937, 0.0345613, -0.1350474, 0.8904793, 0.2714479, -0.0710304, 0.0135403}, + {0.0135403, -0.0710304, 0.2714479, 0.8904793, -0.1350474, 0.0345613, -0.0029937}}; +#else +float shift_filt[3][7] = {{-0.011915, 0.046995, -0.152373, 0.614108, 0.614108, -0.152373, 0.046995}, + {-0.0324855, 0.0859768, -0.2042986, 0.9640297, 0.2086420, -0.0302054, -0.0063646}, + {-0.0063646, -0.0302054, 0.2086420, 0.9640297, -0.2042986, 0.0859768, -0.0324855}}; +#endif #endif -void comb_filter( -spx_sig_t *exc, /*decoded excitation*/ -spx_sig_t *new_exc, /*enhanced excitation*/ +int interp_pitch( +spx_word16_t *exc, /*decoded excitation*/ +spx_word16_t *interp, /*decoded excitation*/ +int pitch, /*pitch period*/ +int len +) +{ + int i,j,k; + spx_word32_t corr[4][7]; + spx_word32_t maxcorr; + int maxi, maxj; + for (i=0;i<7;i++) + { + corr[0][i] = inner_prod(exc, exc-pitch-3+i, len); + } + for (i=0;i<3;i++) + { + for (j=0;j<7;j++) + { + int i1, i2; + spx_word32_t tmp=0; + i1 = 3-j; + if (i1<0) + i1 = 0; + i2 = 10-j; + if (i2>7) + i2 = 7; + for (k=i1;k maxcorr) + { + maxcorr = corr[i][j]; + maxi=i; + maxj=j; + } + } + } + for (i=0;i0) + { + for (k=0;k<7;k++) + { + tmp += MULT16_16(exc[i-(pitch-maxj+3)+k-3],shift_filt[maxi-1][k]); + } + } else { + tmp = SHL32(exc[i-(pitch-maxj+3)],15); + } + interp[i] = PSHR32(tmp,15); + } + return pitch-maxj+3; +} + +void multicomb( +spx_word16_t *exc, /*decoded excitation*/ +spx_word16_t *new_exc, /*enhanced excitation*/ spx_coef_t *ak, /*LPC filter coefs*/ int p, /*LPC order*/ int nsf, /*sub-frame size*/ int pitch, /*pitch period*/ -spx_word16_t *pitch_gain, /*pitch gain (3-tap)*/ +int max_pitch, spx_word16_t comb_gain, /*gain of comb filter*/ -CombFilterMem *mem +char *stack ) { - int i; - spx_word16_t exc_energy=0, new_exc_energy=0; - spx_word16_t gain; - spx_word16_t step; - spx_word16_t fact; - - /*Compute excitation amplitude prior to enhancement*/ - exc_energy = compute_rms(exc, nsf); - /*for (i=0;ilast_pitch_gain); - if (g > 166) - comb_gain = MULT16_16_Q15(DIV32_16(SHL32(EXTEND32(165),15),g), comb_gain); - if (g < 64) - comb_gain = MULT16_16_Q15(SHL16(g, 9), comb_gain); - } +#ifdef FIXED_POINT + if ((nol_pitch_coef[i]>MULT16_16_Q15(nol_pitch_coef[0],19661)) && #else - { - float g=0; - g = GAIN_SCALING_1*.5*(gain_3tap_to_1tap(pitch_gain)+gain_3tap_to_1tap(mem->last_pitch_gain)); - if (g>1.3) - comb_gain*=1.3/g; - if (g<.5) - comb_gain*=2.*g; - } + if ((nol_pitch_coef[i]>.6*nol_pitch_coef[0]) && #endif - step = DIV32(COMB_STEP, nsf); - fact=0; - - /*Apply pitch comb-filter (filter out noise between pitch harmonics)*/ - for (i=0;ilast_pitch_gain[0],7),exc[i-mem->last_pitch+1]) + - MULT16_32_Q15(SHL16(mem->last_pitch_gain[1],7),exc[i-mem->last_pitch]) + - MULT16_32_Q15(SHL16(mem->last_pitch_gain[2],7),exc[i-mem->last_pitch-1]),2); - - new_exc[i] = exc[i] + MULT16_32_Q15(comb_gain, ADD32(MULT16_32_Q15(fact,exc1), MULT16_32_Q15(SUB16(COMB_STEP,fact), exc2))); + (ABS(2*nol_pitch[i]-corr_pitch)<=2 || ABS(3*nol_pitch[i]-corr_pitch)<=3 || + ABS(4*nol_pitch[i]-corr_pitch)<=4 || ABS(5*nol_pitch[i]-corr_pitch)<=5)) + { + corr_pitch = nol_pitch[i]; + } } - - mem->last_pitch_gain[0] = pitch_gain[0]; - mem->last_pitch_gain[1] = pitch_gain[1]; - mem->last_pitch_gain[2] = pitch_gain[2]; - mem->last_pitch = pitch; - - /*Amplitude after enhancement*/ - new_exc_energy = compute_rms(new_exc, nsf); - - if (exc_energy > new_exc_energy) - exc_energy = new_exc_energy; +#else + corr_pitch = pitch; +#endif + + ALLOC(iexc, 2*nsf, spx_word16_t); - gain = DIV32_16(SHL32(EXTEND32(exc_energy),15),ADD16(1,new_exc_energy)); + interp_pitch(exc, iexc, corr_pitch, 80); + if (corr_pitch>max_pitch) + interp_pitch(exc, iexc+nsf, 2*corr_pitch, 80); + else + interp_pitch(exc, iexc+nsf, -corr_pitch, 80); + /*interp_pitch(exc, iexc+2*nsf, 2*corr_pitch, 80);*/ + + /*printf ("%d %d %f\n", pitch, corr_pitch, max_corr*ener_1);*/ + iexc0_mag = spx_sqrt(1000+inner_prod(iexc,iexc,nsf)); + iexc1_mag = spx_sqrt(1000+inner_prod(iexc+nsf,iexc+nsf,nsf)); + exc_mag = spx_sqrt(1+inner_prod(exc,exc,nsf)); + corr0 = inner_prod(iexc,exc,nsf); + if (corr0<0) + corr0=0; + corr1 = inner_prod(iexc+nsf,exc,nsf); + if (corr1<0) + corr1=0; #ifdef FIXED_POINT - if (gain < 16384) - gain = 16384; -#else - if (gain < .5) - gain=.5; + /* Doesn't cost much to limit the ratio and it makes the rest easier */ + if (SHL32(EXTEND32(iexc0_mag),6) < EXTEND32(exc_mag)) + iexc0_mag = ADD16(1,PSHR16(exc_mag,6)); + if (SHL32(EXTEND32(iexc1_mag),6) < EXTEND32(exc_mag)) + iexc1_mag = ADD16(1,PSHR16(exc_mag,6)); #endif - + if (corr0 > MULT16_16(iexc0_mag,exc_mag)) + pgain1 = QCONST16(1., 14); + else + pgain1 = PDIV32_16(SHL32(PDIV32(corr0, exc_mag),14),iexc0_mag); + if (corr1 > MULT16_16(iexc1_mag,exc_mag)) + pgain2 = QCONST16(1., 14); + else + pgain2 = PDIV32_16(SHL32(PDIV32(corr1, exc_mag),14),iexc1_mag); + gg1 = PDIV32_16(SHL32(EXTEND32(exc_mag),8), iexc0_mag); + gg2 = PDIV32_16(SHL32(EXTEND32(exc_mag),8), iexc1_mag); + if (comb_gain>0) + { #ifdef FIXED_POINT - for (i=0;ismooth_gain = ADD16(MULT16_16_Q15(31457,mem->smooth_gain), MULT16_16_Q15(1311,gain)); - new_exc[i] = MULT16_32_Q15(mem->smooth_gain, new_exc[i]); + c1=c2=0; } +#ifdef FIXED_POINT + g1 = 32767 - MULT16_16_Q13(MULT16_16_Q15(c2, pgain1),pgain1); + g2 = 32767 - MULT16_16_Q13(MULT16_16_Q15(c2, pgain2),pgain2); #else - for (i=0;imax_pitch) { - mem->smooth_gain = .96*mem->smooth_gain + .04*gain; - new_exc[i] *= mem->smooth_gain; + gain0 = MULT16_16_Q15(QCONST16(.7,15),MULT16_16_Q14(g1,gg1)); + gain1 = MULT16_16_Q15(QCONST16(.3,15),MULT16_16_Q14(g2,gg2)); + } else { + gain0 = MULT16_16_Q15(QCONST16(.6,15),MULT16_16_Q14(g1,gg1)); + gain1 = MULT16_16_Q15(QCONST16(.6,15),MULT16_16_Q14(g2,gg2)); } -#endif + for (i=0;i new_ener) + old_ener = new_ener; + ngain = PDIV32_16(SHL32(EXTEND32(old_ener),14),new_ener); + + for (i=0;i - #define OVERRIDE_NORMALIZE16 int normalize16(const spx_sig_t *x, spx_word16_t *y, spx_sig_t max_scale, int len) { spx_sig_t max_val=1; int sig_shift; - __asm__ ( "%0 = 0;\n\t" @@ -67,18 +64,17 @@ int normalize16(const spx_sig_t *x, spx_word16_t *y, spx_sig_t max_scale, int le ( "I0 = %0;\n\t" "L0 = 0;\n\t" - "I1 = %1;\n\t" - "L1 = 0;\n\t" + "P1 = %1;\n\t" "R0 = [I0++];\n\t" - "LOOP norm_shift%= LC0 = %3 >> 1;\n\t" + "LOOP norm_shift%= LC0 = %3;\n\t" "LOOP_BEGIN norm_shift%=;\n\t" - "R1 = ASHIFT R0 by %2.L || R2 = [I0++];\n\t" - "R3 = ASHIFT R2 by %2.L || R0 = [I0++];\n\t" - "R3 = PACK(R3.L, R1.L);\n\t" - "[I1++] = R3;\n\t" + "R1 = ASHIFT R0 by %2.L || R0 = [I0++];\n\t" + "W[P1++] = R1;\n\t" "LOOP_END norm_shift%=;\n\t" - : : "a" (x), "a" (y), "d" (-sig_shift), "a" (len) - : "I0", "L0", "I1", "L1", "R0", "R1", "R2", "R3", "memory" + "R1 = ASHIFT R0 by %2.L;\n\t" + "W[P1++] = R1;\n\t" + : : "a" (x), "a" (y), "d" (-sig_shift), "a" (len-1) + : "I0", "L0", "P1", "R0", "R1", "memory" ); return sig_shift; } @@ -103,26 +99,26 @@ void filter_mem2(const spx_sig_t *_x, const spx_coef_t *num, const spx_coef_t *d "P0 = %3;\n\t" "I0 = P0;\n\t" - "B0 = P0;\n\t" + "B0 = P0;\n\t" /* numden */ "L0 = 0;\n\t" - "P2 = %0;\n\t" + "P2 = %0;\n\t" /* Fused xy */ "I2 = P2;\n\t" "L2 = 0;\n\t" - "P4 = %6;\n\t" - "P0 = %1;\n\t" - "P1 = %2;\n\t" + "P4 = %6;\n\t" /* mem */ + "P0 = %1;\n\t" /* _x */ + "P1 = %2;\n\t" /* _y */ /* First sample */ "R1 = [P4++];\n\t" - "R1 <<= 1;\n\t" - "R2 = [P0++];\n\t" + "R1 <<= 1;\n\t" /* shift mem */ + "R2 = [P0++];\n\t" /* load x[0] */ "R1 = R1 + R2;\n\t" - "[P1++] = R1;\n\t" + "[P1++] = R1;\n\t" /* store y[0] */ "R1 <<= 2;\n\t" "R2 <<= 2;\n\t" - "R2 = PACK(R1.H, R2.H);\n\t" + "R2 = PACK(R1.H, R2.H);\n\t" /* pack x16 and y16 */ "[P2] = R2;\n\t" /* Samples 1 to ord-1 (using memory) */ @@ -147,13 +143,13 @@ void filter_mem2(const spx_sig_t *_x, const spx_coef_t *num, const spx_coef_t *d "LOOP_END filter_start_inner%=;\n\t" "A0 += A1;\n\t" "R4 = A0;\n\t" - "R4 <<= 1;\n\t" - "R2 = [P0++];\n\t" + "R4 <<= 1;\n\t" /* shift mem */ + "R2 = [P0++];\n\t" /* load x */ "R4 = R4 + R2;\n\t" - "[P1++] = R4;\n\t" + "[P1++] = R4;\n\t" /* store y */ "R4 <<= 2;\n\t" "R2 <<= 2;\n\t" - "R2 = PACK(R4.H, R2.H);\n\t" + "R2 = PACK(R4.H, R2.H);\n\t" /* pack x16 and y16 */ "[P2] = R2;\n\t" "LOOP_END filter_start%=;\n\t" @@ -161,14 +157,14 @@ void filter_mem2(const spx_sig_t *_x, const spx_coef_t *num, const spx_coef_t *d /* Samples ord to N*/ "R0 = %5;\n\t" "R0 <<= 1;\n\t" - "I0 = B0;\n\t" + "I0 = B0;\n\t" /* numden */ "R0 <<= 1;\n\t" "L0 = R0;\n\t" - "R0 = %5;\n\t" - "R2 = %4;\n\t" + "R0 = %5;\n\t" /* org */ + "R2 = %4;\n\t" /* N */ "R2 = R2 - R0;\n\t" - "R4 = [I0++];\n\t" + "R4 = [I0++];\n\t" /* numden */ "LC0 = R2;\n\t" "P3 = R0;\n\t" "R0 <<= 2;\n\t" @@ -176,7 +172,7 @@ void filter_mem2(const spx_sig_t *_x, const spx_coef_t *num, const spx_coef_t *d "I2 = P2;\n\t" "M0 = R0;\n\t" "A1 = A0 = 0;\n\t" - "R5 = [I2--];\n\t" + "R5 = [I2--];\n\t" /* load xy */ "LOOP filter_mid%= LC0;\n\t" "LOOP_BEGIN filter_mid%=;\n\t" "LOOP filter_mid_inner%= LC1=P3;\n\t" @@ -184,9 +180,9 @@ void filter_mem2(const spx_sig_t *_x, const spx_coef_t *num, const spx_coef_t *d "A1 -= R4.H*R5.H, A0 += R4.L*R5.L (IS) || R4 = [I0++] || R5 = [I2--];\n\t" "LOOP_END filter_mid_inner%=;\n\t" "R0 = (A0 += A1) || I2 += M0;\n\t" - "R0 = R0 << 1 || R5 = [P0++];\n\t" + "R0 = R0 << 1 || R5 = [P0++];\n\t" /* load x */ "R0 = R0 + R5;\n\t" - "R0 = R0 << 2 || [P1++] = R0;\n\t" + "R0 = R0 << 2 || [P1++] = R0;\n\t" /* shift y | store y */ "R5 = R5 << 2;\n\t" "R5 = PACK(R0.H, R5.H);\n\t" "A1 = A0 = 0 || [I2--] = R5\n\t" @@ -222,6 +218,150 @@ void filter_mem2(const spx_sig_t *_x, const spx_coef_t *num, const spx_coef_t *d } +#define OVERRIDE_FILTER_MEM16 +void filter_mem16(const spx_word16_t *_x, const spx_coef_t *num, const spx_coef_t *den, spx_word16_t *_y, int N, int ord, spx_mem_t *mem, char *stack) +{ + VARDECL(spx_word32_t *xy2); + VARDECL(spx_word32_t *numden_a); + spx_word32_t *xy; + spx_word16_t *numden; + int i; + + ALLOC(xy2, (N+1), spx_word32_t); + ALLOC(numden_a, (2*ord+2), spx_word32_t); + xy = xy2+1; + numden = (spx_word16_t*) numden_a; + + for (i=0;i>1; + __asm__ ( + "P0 = 15;\n\t" + "R0 = %1;\n\t" + "R1 = %2;\n\t" + //"R0 = R0 + R1;\n\t" + "R0 <<= 1;\n\t" + "DIVS (R0, R1);\n\t" + "LOOP divide%= LC0 = P0;\n\t" + "LOOP_BEGIN divide%=;\n\t" + "DIVQ (R0, R1);\n\t" + "LOOP_END divide%=;\n\t" + "R0 = R0.L;\n\t" + "%0 = R0;\n\t" + : "=m" (res) + : "m" (a), "m" (bb) + : "P0", "R0", "R1", "cc"); + return res; +} + #undef DIV32_16 static inline spx_word16_t DIV32_16(spx_word32_t a, spx_word16_t b) { spx_word32_t res, bb; bb = b; + /* Make the roundinf consistent with the C version + (do we need to do that?)*/ + if (a<0) + a += (b-1); __asm__ ( "P0 = 15;\n\t" "R0 = %1;\n\t" @@ -79,14 +108,12 @@ static inline spx_word32_t MULT16_32_Q15(spx_word16_t a, spx_word32_t b) spx_word32_t res; __asm__ ( - "%1 <<= 1;\n\t" - "A1 = %2.L*%1.L (M,IS);\n\t" - "A1 = A1 >>> 16;\n\t" - "R1 = (A1 += %2.L*%1.H) (IS);\n\t" - "%0 = R1;\n\t" - : "=&d" (res), "=&d" (b) + "A1 = %2.L*%1.L (M);\n\t" + "A1 = A1 >>> 15;\n\t" + "%0 = (A1 += %2.L*%1.H) ;\n\t" + : "=&W" (res), "=&d" (b) : "d" (a), "1" (b) - : "A1", "R1" + : "A1" ); return res; } @@ -97,14 +124,13 @@ static inline spx_word32_t MAC16_32_Q15(spx_word32_t c, spx_word16_t a, spx_word spx_word32_t res; __asm__ ( - "%1 <<= 1;\n\t" - "A1 = %2.L*%1.L (M,IS);\n\t" - "A1 = A1 >>> 16;\n\t" - "R1 = (A1 += %2.L*%1.H) (IS);\n\t" - "%0 = R1 + %4;\n\t" - : "=&d" (res), "=&d" (b) + "A1 = %2.L*%1.L (M);\n\t" + "A1 = A1 >>> 15;\n\t" + "%0 = (A1 += %2.L*%1.H);\n\t" + "%0 = %0 + %4;\n\t" + : "=&W" (res), "=&d" (b) : "d" (a), "1" (b), "d" (c) - : "A1", "R1" + : "A1" ); return res; } @@ -115,14 +141,13 @@ static inline spx_word32_t MULT16_32_Q14(spx_word16_t a, spx_word32_t b) spx_word32_t res; __asm__ ( - "%2 <<= 2;\n\t" - "A1 = %1.L*%2.L (M,IS);\n\t" - "A1 = A1 >>> 16;\n\t" - "R1 = (A1 += %1.L*%2.H) (IS);\n\t" - "%0 = R1;\n\t" - : "=d" (res), "=d" (a), "=d" (b) + "%2 <<= 1;\n\t" + "A1 = %1.L*%2.L (M);\n\t" + "A1 = A1 >>> 15;\n\t" + "%0 = (A1 += %1.L*%2.H);\n\t" + : "=W" (res), "=d" (a), "=d" (b) : "1" (a), "2" (b) - : "A1", "R1" + : "A1" ); return res; } @@ -133,14 +158,14 @@ static inline spx_word32_t MAC16_32_Q14(spx_word32_t c, spx_word16_t a, spx_word spx_word32_t res; __asm__ ( - "%1 <<= 2;\n\t" - "A1 = %2.L*%1.L (M,IS);\n\t" - "A1 = A1 >>> 16;\n\t" - "R1 = (A1 += %2.L*%1.H) (IS);\n\t" - "%0 = R1 + %4;\n\t" - : "=&d" (res), "=&d" (b) + "%1 <<= 1;\n\t" + "A1 = %2.L*%1.L (M);\n\t" + "A1 = A1 >>> 15;\n\t" + "%0 = (A1 += %2.L*%1.H);\n\t" + "%0 = %0 + %4;\n\t" + : "=&W" (res), "=&d" (b) : "d" (a), "1" (b), "d" (c) - : "A1", "R1" + : "A1" ); return res; } diff --git a/pjmedia/src/pjmedia-codec/speex/fixed_debug.h b/pjmedia/src/pjmedia-codec/speex/fixed_debug.h index 7403a18e..65c5712d 100644 --- a/pjmedia/src/pjmedia-codec/speex/fixed_debug.h +++ b/pjmedia/src/pjmedia-codec/speex/fixed_debug.h @@ -40,8 +40,9 @@ extern long long spx_mips; #define MIPS_INC spx_mips++, -#define QCONST16(x,bits) ((spx_word16_t)((x)*(1<<(bits))+(1<<((bits)-1)))) -#define QCONST32(x,bits) ((spx_word32_t)((x)*(1<<(bits))+(1<<((bits)-1)))) +#define QCONST16(x,bits) ((spx_word16_t)(.5+(x)*(((spx_word32_t)1)<<(bits)))) +#define QCONST32(x,bits) ((spx_word32_t)(.5+(x)*(((spx_word32_t)1)<<(bits)))) + #define VERIFY_SHORT(x) ((x)<=32767&&(x)>=-32768) #define VERIFY_INT(x) ((x)<=2147483647LL&&(x)>=-2147483648LL) @@ -169,7 +170,7 @@ static inline short ADD16(int a, int b) } res = a+b; if (!VERIFY_SHORT(res)) - fprintf (stderr, "ADD16: output is not short: %d\n", res); + fprintf (stderr, "ADD16: output is not short: %d+%d=%d\n", a,b,res); spx_mips++; return res; } @@ -196,7 +197,9 @@ static inline int ADD32(long long a, long long b) } res = a+b; if (!VERIFY_INT(res)) + { fprintf (stderr, "ADD32: output is not int: %d\n", (int)res); + } spx_mips++; return res; } @@ -251,6 +254,8 @@ static inline int MULT16_16(int a, int b) #define MAC16_16(c,a,b) (spx_mips--,ADD32((c),MULT16_16((a),(b)))) #define MAC16_16_Q11(c,a,b) (ADD16((c),EXTRACT16(SHR32(MULT16_16((a),(b)),11)))) #define MAC16_16_Q13(c,a,b) (ADD16((c),EXTRACT16(SHR32(MULT16_16((a),(b)),13)))) +#define MAC16_16_P13(c,a,b) (ADD32((c),SHR(ADD32(4096,MULT16_16((a),(b))),13))) + static inline int MULT16_32_QX(int a, long long b, int Q) { @@ -437,7 +442,7 @@ static inline int DIV32(long long a, long long b) spx_mips+=36; return res; } - - +#define PDIV32(a,b) DIV32(ADD32((a),(b)>>1),b) +#define PDIV32_16(a,b) DIV32_16(ADD32((a),(b)>>1),b) #endif diff --git a/pjmedia/src/pjmedia-codec/speex/fixed_generic.h b/pjmedia/src/pjmedia-codec/speex/fixed_generic.h index d4bdc159..375050c3 100644 --- a/pjmedia/src/pjmedia-codec/speex/fixed_generic.h +++ b/pjmedia/src/pjmedia-codec/speex/fixed_generic.h @@ -35,13 +35,13 @@ #ifndef FIXED_GENERIC_H #define FIXED_GENERIC_H -#define QCONST16(x,bits) ((spx_word16_t)(.5+(x)*(1<<(bits)))) -#define QCONST32(x,bits) ((spx_word32_t)(.5+(x)*(1<<(bits)))) +#define QCONST16(x,bits) ((spx_word16_t)(.5+(x)*(((spx_word32_t)1)<<(bits)))) +#define QCONST32(x,bits) ((spx_word32_t)(.5+(x)*(((spx_word32_t)1)<<(bits)))) #define NEG16(x) (-(x)) #define NEG32(x) (-(x)) -#define EXTRACT16(x) ((spx_word16_t)x) -#define EXTEND32(x) ((spx_word32_t)x) +#define EXTRACT16(x) ((spx_word16_t)(x)) +#define EXTEND32(x) ((spx_word32_t)(x)) #define SHR16(a,shift) ((a) >> (shift)) #define SHL16(a,shift) ((a) << (shift)) #define SHR32(a,shift) ((a) >> (shift)) @@ -61,7 +61,6 @@ #define SUB16(a,b) ((spx_word16_t)(a)-(spx_word16_t)(b)) #define ADD32(a,b) ((spx_word32_t)(a)+(spx_word32_t)(b)) #define SUB32(a,b) ((spx_word32_t)(a)-(spx_word32_t)(b)) -#define ADD64(a,b) ((spx_word64_t)(a)+(spx_word64_t)(b)) /* result fits in 16 bits */ @@ -84,6 +83,7 @@ #define MAC16_16_Q11(c,a,b) (ADD32((c),SHR(MULT16_16((a),(b)),11))) #define MAC16_16_Q13(c,a,b) (ADD32((c),SHR(MULT16_16((a),(b)),13))) +#define MAC16_16_P13(c,a,b) (ADD32((c),SHR(ADD32(4096,MULT16_16((a),(b))),13))) #define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11)) #define MULT16_16_Q13(a,b) (SHR(MULT16_16((a),(b)),13)) @@ -97,6 +97,8 @@ #define MUL_16_32_R15(a,bh,bl) ADD32(MULT16_16((a),(bh)), SHR(MULT16_16((a),(bl)),15)) #define DIV32_16(a,b) ((spx_word16_t)(((spx_word32_t)(a))/((spx_word16_t)(b)))) +#define PDIV32_16(a,b) ((spx_word16_t)(((spx_word32_t)(a)+((spx_word16_t)(b)>>1))/((spx_word16_t)(b)))) #define DIV32(a,b) (((spx_word32_t)(a))/((spx_word32_t)(b))) +#define PDIV32(a,b) (((spx_word32_t)(a)+((spx_word16_t)(b)>>1))/((spx_word32_t)(b))) #endif diff --git a/pjmedia/src/pjmedia-codec/speex/gain_table.c b/pjmedia/src/pjmedia-codec/speex/gain_table.c index 54a54070..00b82442 100644 --- a/pjmedia/src/pjmedia-codec/speex/gain_table.c +++ b/pjmedia/src/pjmedia-codec/speex/gain_table.c @@ -29,132 +29,132 @@ POSSIBILITY OF SUCH DAMAGE. */ -const signed char gain_cdbk_nb[384] = { --32,-32,-32, --28,-67,-5, --42,-6,-32, --57,-10,-54, --16,27,-41, -19,-19,-40, --45,24,-21, --8,-14,-18, -1,14,-58, --18,-88,-39, --38,21,-18, --19,20,-43, -10,17,-48, --52,-58,-13, --44,-1,-11, --12,-11,-34, -14,0,-46, --37,-35,-34, --25,44,-30, -6,-4,-63, --31,43,-41, --23,30,-43, --43,26,-14, --33,1,-13, --13,18,-37, --46,-73,-45, --36,24,-25, --36,-11,-20, --25,12,-18, --36,-69,-59, --45,6,8, --22,-14,-24, --1,13,-44, --39,-48,-26, --32,31,-37, --33,15,-46, --24,30,-36, --41,31,-23, --50,22,-4, --22,2,-21, --17,30,-34, --7,-60,-28, --38,42,-28, --44,-11,21, --16,8,-44, --39,-55,-43, --11,-35,26, --9,0,-34, --8,121,-81, -7,-16,-22, --37,33,-31, --27,-7,-36, --34,70,-57, --37,-11,-48, --40,17,-1, --33,6,-6, --9,0,-20, --21,69,-33, --29,33,-31, --55,12,-1, --33,27,-22, --50,-33,-47, --50,54,51, --1,-5,-44, --4,22,-40, --39,-66,-25, --33,1,-26, --24,-23,-25, --11,21,-45, --25,-45,-19, --43,105,-16, -5,-21,1, --16,11,-33, --13,-99,-4, --37,33,-15, --25,37,-63, --36,24,-31, --53,-56,-38, --41,-4,4, --33,13,-30, -49,52,-94, --5,-30,-15, -1,38,-40, --23,12,-36, --17,40,-47, --37,-41,-39, --49,34,0, --18,-7,-4, --16,17,-27, -30,5,-62, -4,48,-68, --43,11,-11, --18,19,-15, --23,-62,-39, --42,10,-2, --21,-13,-13, --9,13,-47, --23,-62,-24, --44,60,-21, --18,-3,-52, --22,22,-36, --75,57,16, --19,3,10, --29,23,-38, --5,-62,-51, --51,40,-18, --42,13,-24, --34,14,-20, --56,-75,-26, --26,32,15, --26,17,-29, --7,28,-52, --12,-30,5, --5,-48,-5, -2,2,-43, -21,16,16, --25,-45,-32, --43,18,-10, -9,0,-1, --1,7,-30, -19,-48,-4, --28,25,-29, --22,0,-31, --32,17,-10, --64,-41,-62, --52,15,16, --30,-22,-32, --7,9,-38}; +const signed char gain_cdbk_nb[512] = { +-32, -32, -32, 0, +-28, -67, -5, 33, +-42, -6, -32, 18, +-57, -10, -54, 35, +-16, 27, -41, 42, +19, -19, -40, 36, +-45, 24, -21, 40, +-8, -14, -18, 28, +1, 14, -58, 53, +-18, -88, -39, 39, +-38, 21, -18, 37, +-19, 20, -43, 38, +10, 17, -48, 54, +-52, -58, -13, 33, +-44, -1, -11, 32, +-12, -11, -34, 22, +14, 0, -46, 46, +-37, -35, -34, 5, +-25, 44, -30, 43, +6, -4, -63, 49, +-31, 43, -41, 43, +-23, 30, -43, 41, +-43, 26, -14, 44, +-33, 1, -13, 27, +-13, 18, -37, 37, +-46, -73, -45, 34, +-36, 24, -25, 34, +-36, -11, -20, 19, +-25, 12, -18, 33, +-36, -69, -59, 34, +-45, 6, 8, 46, +-22, -14, -24, 18, +-1, 13, -44, 44, +-39, -48, -26, 15, +-32, 31, -37, 34, +-33, 15, -46, 31, +-24, 30, -36, 37, +-41, 31, -23, 41, +-50, 22, -4, 50, +-22, 2, -21, 28, +-17, 30, -34, 40, +-7, -60, -28, 29, +-38, 42, -28, 42, +-44, -11, 21, 43, +-16, 8, -44, 34, +-39, -55, -43, 21, +-11, -35, 26, 41, +-9, 0, -34, 29, +-8, 121, -81, 113, +7, -16, -22, 33, +-37, 33, -31, 36, +-27, -7, -36, 17, +-34, 70, -57, 65, +-37, -11, -48, 21, +-40, 17, -1, 44, +-33, 6, -6, 33, +-9, 0, -20, 34, +-21, 69, -33, 57, +-29, 33, -31, 35, +-55, 12, -1, 49, +-33, 27, -22, 35, +-50, -33, -47, 17, +-50, 54, 51, 94, +-1, -5, -44, 35, +-4, 22, -40, 45, +-39, -66, -25, 24, +-33, 1, -26, 20, +-24, -23, -25, 12, +-11, 21, -45, 44, +-25, -45, -19, 17, +-43, 105, -16, 82, +5, -21, 1, 41, +-16, 11, -33, 30, +-13, -99, -4, 57, +-37, 33, -15, 44, +-25, 37, -63, 54, +-36, 24, -31, 31, +-53, -56, -38, 26, +-41, -4, 4, 37, +-33, 13, -30, 24, +49, 52, -94, 114, +-5, -30, -15, 23, +1, 38, -40, 56, +-23, 12, -36, 29, +-17, 40, -47, 51, +-37, -41, -39, 11, +-49, 34, 0, 58, +-18, -7, -4, 34, +-16, 17, -27, 35, +30, 5, -62, 65, +4, 48, -68, 76, +-43, 11, -11, 38, +-18, 19, -15, 41, +-23, -62, -39, 23, +-42, 10, -2, 41, +-21, -13, -13, 25, +-9, 13, -47, 42, +-23, -62, -24, 24, +-44, 60, -21, 58, +-18, -3, -52, 32, +-22, 22, -36, 34, +-75, 57, 16, 90, +-19, 3, 10, 45, +-29, 23, -38, 32, +-5, -62, -51, 38, +-51, 40, -18, 53, +-42, 13, -24, 32, +-34, 14, -20, 30, +-56, -75, -26, 37, +-26, 32, 15, 59, +-26, 17, -29, 29, +-7, 28, -52, 53, +-12, -30, 5, 30, +-5, -48, -5, 35, +2, 2, -43, 40, +21, 16, 16, 75, +-25, -45, -32, 10, +-43, 18, -10, 42, +9, 0, -1, 52, +-1, 7, -30, 36, +19, -48, -4, 48, +-28, 25, -29, 32, +-22, 0, -31, 22, +-32, 17, -10, 36, +-64, -41, -62, 36, +-52, 15, 16, 58, +-30, -22, -32, 6, +-7, 9, -38, 36}; diff --git a/pjmedia/src/pjmedia-codec/speex/gain_table_lbr.c b/pjmedia/src/pjmedia-codec/speex/gain_table_lbr.c index 24357f03..3c1c3dba 100644 --- a/pjmedia/src/pjmedia-codec/speex/gain_table_lbr.c +++ b/pjmedia/src/pjmedia-codec/speex/gain_table_lbr.c @@ -29,36 +29,36 @@ POSSIBILITY OF SUCH DAMAGE. */ -const signed char gain_cdbk_lbr[96] = { --32,-32,-32, --31,-58,-16, --41,-24,-43, --56,-22,-55, --13,33,-41, --4,-39,-9, --41,15,-12, --8,-15,-12, -1,2,-44, --22,-66,-42, --38,28,-23, --21,14,-37, -0,21,-50, --53,-71,-27, --37,-1,-19, --19,-5,-28, -6,65,-44, --33,-48,-33, --40,57,-14, --17,4,-45, --31,38,-33, --23,28,-40, --43,29,-12, --34,13,-23, --16,15,-27, --14,-82,-15, --31,25,-32, --21,5,-5, --47,-63,-51, --46,12,3, --28,-17,-29, --10,14,-40}; +const signed char gain_cdbk_lbr[128] = { +-32, -32, -32, 0, +-31, -58, -16, 22, +-41, -24, -43, 14, +-56, -22, -55, 29, +-13, 33, -41, 47, +-4, -39, -9, 29, +-41, 15, -12, 38, +-8, -15, -12, 31, +1, 2, -44, 40, +-22, -66, -42, 27, +-38, 28, -23, 38, +-21, 14, -37, 31, +0, 21, -50, 52, +-53, -71, -27, 33, +-37, -1, -19, 25, +-19, -5, -28, 22, +6, 65, -44, 74, +-33, -48, -33, 9, +-40, 57, -14, 58, +-17, 4, -45, 32, +-31, 38, -33, 36, +-23, 28, -40, 39, +-43, 29, -12, 46, +-34, 13, -23, 28, +-16, 15, -27, 34, +-14, -82, -15, 43, +-31, 25, -32, 29, +-21, 5, -5, 38, +-47, -63, -51, 33, +-46, 12, 3, 47, +-28, -17, -29, 11, +-10, 14, -40, 38}; diff --git a/pjmedia/src/pjmedia-codec/speex/jitter.c b/pjmedia/src/pjmedia-codec/speex/jitter.c index 2860c109..a4c07516 100644 --- a/pjmedia/src/pjmedia-codec/speex/jitter.c +++ b/pjmedia/src/pjmedia-codec/speex/jitter.c @@ -36,9 +36,6 @@ #include "config.h" #endif -#ifndef NULL -#define NULL 0 -#endif #include "misc.h" #include @@ -46,124 +43,162 @@ #include #include -#define LATE_BINS 4 +#define LATE_BINS 10 +#define MAX_MARGIN 30 /**< Number of bins in margin histogram */ -void speex_jitter_init(SpeexJitter *jitter, void *decoder, int sampling_rate) +#define SPEEX_JITTER_MAX_BUFFER_SIZE 200 /**< Maximum number of packets in jitter buffer */ + + + +#define GT32(a,b) (((spx_int32_t)((a)-(b)))>0) +#define GE32(a,b) (((spx_int32_t)((a)-(b)))>=0) +#define LT32(a,b) (((spx_int32_t)((a)-(b)))<0) +#define LE32(a,b) (((spx_int32_t)((a)-(b)))<=0) + +/** Jitter buffer structure */ +struct JitterBuffer_ { + spx_uint32_t pointer_timestamp; /**< Timestamp of what we will *get* next */ + spx_uint32_t current_timestamp; /**< Timestamp of the local clock (what we will *play* next) */ + + char *buf[SPEEX_JITTER_MAX_BUFFER_SIZE]; /**< Buffer of packets (NULL if slot is free) */ + spx_uint32_t timestamp[SPEEX_JITTER_MAX_BUFFER_SIZE]; /**< Timestamp of packet */ + int span[SPEEX_JITTER_MAX_BUFFER_SIZE]; /**< Timestamp of packet */ + int len[SPEEX_JITTER_MAX_BUFFER_SIZE]; /**< Number of bytes in packet */ + + int tick_size; /**< Output granularity */ + int reset_state; /**< True if state was just reset */ + int buffer_margin; /**< How many frames we want to keep in the buffer (lower bound) */ + + int lost_count; /**< Number of consecutive lost packets */ + float shortterm_margin[MAX_MARGIN]; /**< Short term margin histogram */ + float longterm_margin[MAX_MARGIN]; /**< Long term margin histogram */ + float loss_rate; /**< Average loss rate */ +}; + +/** Initialise jitter buffer */ +JitterBuffer *jitter_buffer_init(int tick) +{ + JitterBuffer *jitter = speex_alloc(sizeof(JitterBuffer)); + if (jitter) + { + int i; + for (i=0;ibuf[i]=NULL; + jitter->tick_size = tick; + jitter->buffer_margin = 1; + jitter_buffer_reset(jitter); + } + return jitter; +} + +/** Reset jitter buffer */ +void jitter_buffer_reset(JitterBuffer *jitter) { int i; for (i=0;ilen[i]=-1; - jitter->timestamp[i]=-1; + if (jitter->buf[i]) + { + speex_free(jitter->buf[i]); + jitter->buf[i] = NULL; + } } - - jitter->dec = decoder; - speex_decoder_ctl(decoder, SPEEX_GET_FRAME_SIZE, &jitter->frame_size); - jitter->frame_time = jitter->frame_size; - - speex_bits_init(&jitter->current_packet); - jitter->valid_bits = 0; - - jitter->buffer_size = 4; - - jitter->pointer_timestamp = -jitter->frame_time * jitter->buffer_size; + /* Timestamp is actually undefined at this point */ + jitter->pointer_timestamp = 0; + jitter->current_timestamp = 0; jitter->reset_state = 1; jitter->lost_count = 0; jitter->loss_rate = 0; + for (i=0;ishortterm_margin[i] = 0; + jitter->longterm_margin[i] = 0; + } + /*fprintf (stderr, "reset\n");*/ } -void speex_jitter_destroy(SpeexJitter *jitter) +/** Destroy jitter buffer */ +void jitter_buffer_destroy(JitterBuffer *jitter) { - speex_bits_destroy(&jitter->current_packet); + jitter_buffer_reset(jitter); + speex_free(jitter); } - -void speex_jitter_put(SpeexJitter *jitter, char *packet, int len, int timestamp) +/** Put one packet into the jitter buffer */ +void jitter_buffer_put(JitterBuffer *jitter, const JitterBufferPacket *packet) { int i,j; - int arrival_margin; - + spx_int32_t arrival_margin; + /*fprintf (stderr, "put packet %d %d\n", timestamp, span);*/ if (jitter->reset_state) { jitter->reset_state=0; - jitter->pointer_timestamp = timestamp-jitter->frame_time * jitter->buffer_size; - for (i=0;ishortterm_margin[i] = 0; - jitter->longterm_margin[i] = 0; - } - for (i=0;ilen[i]=-1; - jitter->timestamp[i]=-1; - } - fprintf(stderr, "reset to %d\n", timestamp); + jitter->pointer_timestamp = packet->timestamp; + jitter->current_timestamp = packet->timestamp; + /*fprintf(stderr, "reset to %d\n", timestamp);*/ } /* Cleanup buffer (remove old packets that weren't played) */ for (i=0;itimestamp[i]pointer_timestamp) + if (jitter->buf[i] && LE32(jitter->timestamp[i] + jitter->span[i], jitter->pointer_timestamp)) { - jitter->len[i]=-1; - /*if (jitter->timestamp[i] != -1) - fprintf (stderr, "discarding %d %d\n", jitter->timestamp[i], jitter->pointer_timestamp);*/ + /*fprintf (stderr, "cleaned (not played)\n");*/ + speex_free(jitter->buf[i]); + jitter->buf[i] = NULL; } } /*Find an empty slot in the buffer*/ for (i=0;ilen[i]==-1) + if (jitter->buf[i]==NULL) break; } /*fprintf(stderr, "%d %d %f\n", timestamp, jitter->pointer_timestamp, jitter->drift_average);*/ + /*No place left in the buffer*/ if (i==SPEEX_JITTER_MAX_BUFFER_SIZE) { int earliest=jitter->timestamp[0]; i=0; for (j=1;jtimestamp[j]buf[i] || LT32(jitter->timestamp[j],earliest)) { earliest = jitter->timestamp[j]; i=j; } } - /*fprintf (stderr, "Buffer is full, discarding earliest frame %d (currently at %d)\n", timestamp, jitter->pointer_timestamp);*/ - /*No place left in the buffer*/ - - /*skip some frame(s) */ - /*return;*/ + speex_free(jitter->buf[i]); + jitter->buf[i]=NULL; + if (jitter->lost_count>20) + { + jitter_buffer_reset(jitter); + } + /*fprintf (stderr, "Buffer is full, discarding earliest frame %d (currently at %d)\n", timestamp, jitter->pointer_timestamp);*/ } /* Copy packet in buffer */ - if (len>SPEEX_JITTER_MAX_PACKET_SIZE) - len=SPEEX_JITTER_MAX_PACKET_SIZE; - for (j=0;jbuf[i][j]=packet[j]; - jitter->timestamp[i]=timestamp; - jitter->len[i]=len; + jitter->buf[i]=speex_alloc(packet->len); + for (j=0;jlen;j++) + jitter->buf[i][j]=packet->data[j]; + jitter->timestamp[i]=packet->timestamp; + jitter->span[i]=packet->span; + jitter->len[i]=packet->len; - /* Don't count late packets when adjusting the synchro (we're taking care of them elsewhere) */ - /*if (timestamp <= jitter->pointer_timestamp) - { - fprintf (stderr, "frame for timestamp %d arrived too late (at time %d)\n", timestamp, jitter->pointer_timestamp); - }*/ - /* Adjust the buffer size depending on network conditions */ - arrival_margin = (timestamp - jitter->pointer_timestamp - jitter->frame_time); + arrival_margin = (packet->timestamp - jitter->current_timestamp) - jitter->buffer_margin*jitter->tick_size; - if (arrival_margin >= -LATE_BINS*jitter->frame_time) + if (arrival_margin >= -LATE_BINS*jitter->tick_size) { - int int_margin; + spx_int32_t int_margin; for (i=0;ishortterm_margin[i] *= .98; jitter->longterm_margin[i] *= .995; } - int_margin = (arrival_margin + LATE_BINS*jitter->frame_time)/jitter->frame_time; + int_margin = LATE_BINS + arrival_margin/jitter->tick_size; if (int_margin>MAX_MARGIN-1) int_margin = MAX_MARGIN-1; if (int_margin>=0) @@ -171,21 +206,52 @@ void speex_jitter_put(SpeexJitter *jitter, char *packet, int len, int timestamp) jitter->shortterm_margin[int_margin] += .02; jitter->longterm_margin[int_margin] += .005; } + } else { + + /*fprintf (stderr, "way too late = %d\n", arrival_margin);*/ + if (jitter->lost_count>20) + { + jitter_buffer_reset(jitter); + } } - - /*fprintf (stderr, "margin : %d %d %f %f %f %f\n", arrival_margin, jitter->buffer_size, 100*jitter->loss_rate, 100*jitter->late_ratio, 100*jitter->ontime_ratio, 100*jitter->early_ratio);*/ +#if 0 /* Enable to check how much is being buffered */ + if (rand()%1000==0) + { + int count = 0; + for (j=0;jbuf[j]) + count++; + } + fprintf (stderr, "buffer_size = %d\n", count); + } +#endif } -void speex_jitter_get(SpeexJitter *jitter, short *out, int *current_timestamp) +/** Get one packet from the jitter buffer */ +int jitter_buffer_get(JitterBuffer *jitter, JitterBufferPacket *packet, spx_uint32_t *start_offset) { - int i; - int ret; + int i, j; float late_ratio_short; float late_ratio_long; float ontime_ratio_short; float ontime_ratio_long; float early_ratio_short; float early_ratio_long; + int chunk_size; + int incomplete = 0; + + if (LT32(jitter->current_timestamp+jitter->tick_size, jitter->pointer_timestamp)) + { + jitter->current_timestamp = jitter->pointer_timestamp; + speex_warning("did you forget to call jitter_buffer_tick() by any chance?"); + } + /*fprintf (stderr, "get packet %d %d\n", jitter->pointer_timestamp, jitter->current_timestamp);*/ + + /* FIXME: This should be only what remaining of the current tick */ + chunk_size = jitter->tick_size; + + /* Compiling arrival statistics */ late_ratio_short = 0; late_ratio_long = 0; @@ -204,12 +270,15 @@ void speex_jitter_get(SpeexJitter *jitter, short *out, int *current_timestamp) } if (0&&jitter->pointer_timestamp%1000==0) { - fprintf (stderr, "%f %f %f %f %f %f\n", early_ratio_short, early_ratio_long, ontime_ratio_short, ontime_ratio_long, late_ratio_short, late_ratio_long); + /*fprintf (stderr, "%f %f %f %f %f %f\n", early_ratio_short, early_ratio_long, ontime_ratio_short, ontime_ratio_long, late_ratio_short, late_ratio_long);*/ /*fprintf (stderr, "%f %f\n", early_ratio_short + ontime_ratio_short + late_ratio_short, early_ratio_long + ontime_ratio_long + late_ratio_long);*/ } + /* Adjusting the buffering */ + if (late_ratio_short > .1 || late_ratio_long > .03) { + /* If too many packets are arriving late */ jitter->shortterm_margin[MAX_MARGIN-1] += jitter->shortterm_margin[MAX_MARGIN-2]; jitter->longterm_margin[MAX_MARGIN-1] += jitter->longterm_margin[MAX_MARGIN-2]; for (i=MAX_MARGIN-3;i>=0;i--) @@ -219,18 +288,13 @@ void speex_jitter_get(SpeexJitter *jitter, short *out, int *current_timestamp) } jitter->shortterm_margin[0] = 0; jitter->longterm_margin[0] = 0; - /*fprintf (stderr, "interpolate frame\n");*/ - speex_decode_int(jitter->dec, NULL, (spx_int16_t*)out); - if (current_timestamp) - *current_timestamp = jitter->pointer_timestamp; - return; - } - - /* Increment timestamp */ - jitter->pointer_timestamp += jitter->frame_time; - - if (late_ratio_short + ontime_ratio_short < .005 && late_ratio_long + ontime_ratio_long < .01 && early_ratio_short > .8) + jitter->pointer_timestamp -= jitter->tick_size; + jitter->current_timestamp -= jitter->tick_size; + /*fprintf (stderr, "i");*/ + /*fprintf (stderr, "interpolate (getting some slack)\n");*/ + } else if (late_ratio_short + ontime_ratio_short < .005 && late_ratio_long + ontime_ratio_long < .01 && early_ratio_short > .8) { + /* Many frames arriving early */ jitter->shortterm_margin[0] += jitter->shortterm_margin[1]; jitter->longterm_margin[0] += jitter->longterm_margin[1]; for (i=1;ishortterm_margin[MAX_MARGIN-1] = 0; jitter->longterm_margin[MAX_MARGIN-1] = 0; /*fprintf (stderr, "drop frame\n");*/ - jitter->pointer_timestamp += jitter->frame_time; - } - - if (current_timestamp) - *current_timestamp = jitter->pointer_timestamp; - - /* Send zeros while we fill in the buffer */ - if (jitter->pointer_timestamp<0) - { - for (i=0;iframe_size;i++) - out[i]=0; - return; + /*fprintf (stderr, "d");*/ + jitter->pointer_timestamp += jitter->tick_size; + jitter->current_timestamp += jitter->tick_size; + /*fprintf (stderr, "dropping packet (getting more aggressive)\n");*/ } - /* Search the buffer for a packet with the right timestamp */ + /* Searching for the packet that fits best */ + + /* Search the buffer for a packet with the right timestamp and spanning the whole current chunk */ for (i=0;ilen[i]!=-1 && jitter->timestamp[i]==jitter->pointer_timestamp) + if (jitter->buf[i] && jitter->timestamp[i]==jitter->pointer_timestamp && GE32(jitter->timestamp[i]+jitter->span[i],jitter->pointer_timestamp+chunk_size)) break; } + /* If no match, try for an "older" packet that still spans (fully) the current chunk */ if (i==SPEEX_JITTER_MAX_BUFFER_SIZE) { - /* No packet found */ - if (jitter->valid_bits) + for (i=0;idec, &jitter->current_packet, (spx_int16_t*)out); - if (ret == 0) + if (jitter->buf[i] && jitter->timestamp[i]<=jitter->pointer_timestamp && GE32(jitter->timestamp[i]+jitter->span[i],jitter->pointer_timestamp+chunk_size)) + break; + } + } + + /* If still no match, try for an "older" packet that spans part of the current chunk */ + if (i==SPEEX_JITTER_MAX_BUFFER_SIZE) + { + for (i=0;ibuf[i] && jitter->timestamp[i]<=jitter->pointer_timestamp && GT32(jitter->timestamp[i]+jitter->span[i],jitter->pointer_timestamp)) + break; + } + } + + /* If still no match, try for earliest packet possible */ + if (i==SPEEX_JITTER_MAX_BUFFER_SIZE) + { + int found = 0; + spx_uint32_t best_time=0; + int best_span=0; + int besti=0; + for (i=0;ibuf[i] && LT32(jitter->timestamp[i],jitter->pointer_timestamp+chunk_size) && GE32(jitter->timestamp[i],jitter->pointer_timestamp)) { - jitter->lost_count = 0; - return; - } else { - jitter->valid_bits = 0; + if (!found || LT32(jitter->timestamp[i],best_time) || (jitter->timestamp[i]==best_time && GT32(jitter->span[i],best_span))) + { + best_time = jitter->timestamp[i]; + best_span = jitter->span[i]; + besti = i; + found = 1; + } } } + if (found) + { + i=besti; + incomplete = 1; + /*fprintf (stderr, "incomplete: %d %d %d %d\n", jitter->timestamp[i], jitter->pointer_timestamp, chunk_size, jitter->span[i]);*/ + } + } - /*fprintf (stderr, "lost/late frame %d\n", jitter->pointer_timestamp);*/ - /*Packet is late or lost*/ - speex_decode_int(jitter->dec, NULL, (spx_int16_t*)out); - jitter->lost_count++; - if (jitter->lost_count>=25) + /* If we find something */ + if (i!=SPEEX_JITTER_MAX_BUFFER_SIZE) + { + /* We (obviously) haven't lost this packet */ + jitter->lost_count = 0; + jitter->loss_rate = .999*jitter->loss_rate; + /* Check for potential overflow */ + packet->len = jitter->len[i]; + /* Copy packet */ + for (j=0;jlen;j++) + packet->data[j] = jitter->buf[i][j]; + /* Remove packet */ + speex_free(jitter->buf[i]); + jitter->buf[i] = NULL; + /* Set timestamp and span (if requested) */ + if (start_offset) + *start_offset = jitter->timestamp[i]-jitter->pointer_timestamp; + packet->timestamp = jitter->timestamp[i]; + packet->span = jitter->span[i]; + /* Point at the end of the current packet */ + jitter->pointer_timestamp = jitter->timestamp[i]+jitter->span[i]; + if (incomplete) + return JITTER_BUFFER_INCOMPLETE; + else + return JITTER_BUFFER_OK; + } + + + /* If we haven't found anything worth returning */ + /*fprintf (stderr, "not found\n");*/ + jitter->lost_count++; + /*fprintf (stderr, "m");*/ + /*fprintf (stderr, "lost_count = %d\n", jitter->lost_count);*/ + jitter->loss_rate = .999*jitter->loss_rate + .001; + if (start_offset) + *start_offset = 0; + packet->timestamp = jitter->pointer_timestamp; + packet->span = jitter->tick_size; + jitter->pointer_timestamp += chunk_size; + packet->len = 0; + return JITTER_BUFFER_MISSING; + +} + +/** Get pointer timestamp of jitter buffer */ +int jitter_buffer_get_pointer_timestamp(JitterBuffer *jitter) +{ + return jitter->pointer_timestamp; +} + +void jitter_buffer_tick(JitterBuffer *jitter) +{ + jitter->current_timestamp += jitter->tick_size; +} + + + + + +void speex_jitter_init(SpeexJitter *jitter, void *decoder, int sampling_rate) +{ + jitter->dec = decoder; + speex_decoder_ctl(decoder, SPEEX_GET_FRAME_SIZE, &jitter->frame_size); + + jitter->packets = jitter_buffer_init(jitter->frame_size); + + speex_bits_init(&jitter->current_packet); + jitter->valid_bits = 0; + +} + +void speex_jitter_destroy(SpeexJitter *jitter) +{ + jitter_buffer_destroy(jitter->packets); + speex_bits_destroy(&jitter->current_packet); +} + +void speex_jitter_put(SpeexJitter *jitter, char *packet, int len, int timestamp) +{ + JitterBufferPacket p; + p.data = packet; + p.len = len; + p.timestamp = timestamp; + p.span = jitter->frame_size; + jitter_buffer_put(jitter->packets, &p); +} + +void speex_jitter_get(SpeexJitter *jitter, short *out, int *current_timestamp) +{ + int i; + int ret; + char data[2048]; + JitterBufferPacket packet; + packet.data = data; + + if (jitter->valid_bits) + { + /* Try decoding last received packet */ + ret = speex_decode_int(jitter->dec, &jitter->current_packet, out); + if (ret == 0) { - jitter->lost_count = 0; - jitter->reset_state = 1; - speex_decoder_ctl(jitter->dec, SPEEX_RESET_STATE, NULL); + jitter_buffer_tick(jitter->packets); + return; + } else { + jitter->valid_bits = 0; } - jitter->loss_rate = .999*jitter->loss_rate + .001; + } + + ret = jitter_buffer_get(jitter->packets, &packet, NULL); + + if (ret != JITTER_BUFFER_OK) + { + /* No packet found */ + + /*fprintf (stderr, "lost/late frame\n");*/ + /*Packet is late or lost*/ + speex_decode_int(jitter->dec, NULL, out); } else { - jitter->lost_count = 0; - /* Found the right packet */ - speex_bits_read_from(&jitter->current_packet, jitter->buf[i], jitter->len[i]); - jitter->len[i]=-1; + speex_bits_read_from(&jitter->current_packet, packet.data, packet.len); /* Decode packet */ - ret = speex_decode_int(jitter->dec, &jitter->current_packet, (spx_int16_t*)out); + ret = speex_decode_int(jitter->dec, &jitter->current_packet, out); if (ret == 0) { jitter->valid_bits = 1; @@ -304,13 +498,11 @@ void speex_jitter_get(SpeexJitter *jitter, short *out, int *current_timestamp) for (i=0;iframe_size;i++) out[i]=0; } - jitter->loss_rate = .999*jitter->loss_rate; } - - + jitter_buffer_tick(jitter->packets); } int speex_jitter_get_pointer_timestamp(SpeexJitter *jitter) { - return jitter->pointer_timestamp; + return jitter_buffer_get_pointer_timestamp(jitter->packets); } diff --git a/pjmedia/src/pjmedia-codec/speex/kiss_fft.c b/pjmedia/src/pjmedia-codec/speex/kiss_fft.c index bea55ee8..a0b3724b 100644 --- a/pjmedia/src/pjmedia-codec/speex/kiss_fft.c +++ b/pjmedia/src/pjmedia-codec/speex/kiss_fft.c @@ -32,7 +32,7 @@ static size_t ntmpbuf=0; #define CHECKBUF(buf,nbuf,n) \ do { \ if ( nbuf < (size_t)(n) ) {\ - free(buf); \ + speex_free(buf); \ buf = (kiss_fft_cpx*)KISS_FFT_MALLOC(sizeof(kiss_fft_cpx)*(n)); \ nbuf = (size_t)(n); \ } \ @@ -87,7 +87,7 @@ static void kf_bfly4( if (!st->inverse) { int i; kiss_fft_cpx *x=Fout; - for (i=0;i<(int)(4*m);i++) + for (i=0;i<4*m;i++) { x[i].r = PSHR16(x[i].r,2); x[i].i = PSHR16(x[i].i,2); @@ -404,7 +404,7 @@ void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout, if (fin == fout) { CHECKBUF(tmpbuf,ntmpbuf,st->nfft); kf_work(tmpbuf,fin,1,in_stride, st->factors,st); - memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft); + speex_move(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft); }else{ kf_work( fout, fin, 1,in_stride, st->factors,st ); } @@ -421,10 +421,10 @@ void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) */ void kiss_fft_cleanup(void) { - free(scratchbuf); + speex_free(scratchbuf); scratchbuf = NULL; nscratchbuf=0; - free(tmpbuf); + speex_free(tmpbuf); tmpbuf=NULL; ntmpbuf=0; } diff --git a/pjmedia/src/pjmedia-codec/speex/kiss_fft.h b/pjmedia/src/pjmedia-codec/speex/kiss_fft.h index d07b78b8..54627e7d 100644 --- a/pjmedia/src/pjmedia-codec/speex/kiss_fft.h +++ b/pjmedia/src/pjmedia-codec/speex/kiss_fft.h @@ -2,11 +2,8 @@ #define KISS_FFT_H #include -#include #include -#include -//Not available in gcc MacOS X (bennylp) -//#include +#include "misc.h" #ifdef __cplusplus extern "C" { @@ -30,13 +27,13 @@ extern "C" { # define kiss_fft_scalar __m128 #define KISS_FFT_MALLOC(nbytes) memalign(16,nbytes) #else -#define KISS_FFT_MALLOC malloc +#define KISS_FFT_MALLOC speex_alloc #endif #ifdef FIXED_POINT -#include -# define kiss_fft_scalar int16_t +#include "misc.h" +# define kiss_fft_scalar spx_int16_t #else # ifndef kiss_fft_scalar /* default is float */ @@ -95,7 +92,7 @@ void kiss_fft_stride(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout /* If kiss_fft_alloc allocated a buffer, it is one contiguous buffer and can be simply free()d when no longer needed*/ -#define kiss_fft_free free +#define kiss_fft_free speex_free /* Cleans up some memory that gets managed internally. Not necessary to call, but it might clean up diff --git a/pjmedia/src/pjmedia-codec/speex/kiss_fftr.c b/pjmedia/src/pjmedia-codec/speex/kiss_fftr.c index 3ac4db98..b90b7254 100644 --- a/pjmedia/src/pjmedia-codec/speex/kiss_fftr.c +++ b/pjmedia/src/pjmedia-codec/speex/kiss_fftr.c @@ -35,7 +35,7 @@ kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem,size_t * lenme size_t subsize, memneeded; if (nfft & 1) { - fprintf(stderr,"Real FFT optimization must be even.\n"); + speex_warning("Real FFT optimization must be even.\n"); return NULL; } nfft >>= 1; @@ -75,7 +75,7 @@ void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *fr kiss_fft_cpx fpnk,fpk,f1k,f2k,tw,tdc; if ( st->substate->inverse) { - fprintf(stderr,"kiss fft usage error: improper alloc\n"); + speex_warning("kiss fft usage error: improper alloc\n"); exit(1); } @@ -130,7 +130,7 @@ void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx *freqdata,kiss_fft_scalar *t int k, ncfft; if (st->substate->inverse == 0) { - fprintf (stderr, "kiss fft usage error: improper alloc\n"); + speex_warning ("kiss fft usage error: improper alloc\n"); exit (1); } diff --git a/pjmedia/src/pjmedia-codec/speex/kiss_fftr.h b/pjmedia/src/pjmedia-codec/speex/kiss_fftr.h index 72e5a577..2e8351a6 100644 --- a/pjmedia/src/pjmedia-codec/speex/kiss_fftr.h +++ b/pjmedia/src/pjmedia-codec/speex/kiss_fftr.h @@ -38,7 +38,7 @@ void kiss_fftri(kiss_fftr_cfg cfg,const kiss_fft_cpx *freqdata,kiss_fft_scalar * output timedata has nfft scalar points */ -#define kiss_fftr_free free +#define kiss_fftr_free speex_free #ifdef __cplusplus } diff --git a/pjmedia/src/pjmedia-codec/speex/lpc.c b/pjmedia/src/pjmedia-codec/speex/lpc.c index c465faea..fd5d3821 100644 --- a/pjmedia/src/pjmedia-codec/speex/lpc.c +++ b/pjmedia/src/pjmedia-codec/speex/lpc.c @@ -94,7 +94,7 @@ int p for (j = 0; j < i; j++) rr = SUB32(rr,MULT16_16(lpc[j],ac[i - j])); #ifdef FIXED_POINT - r = DIV32_16(rr,ADD16(error,16)); + r = DIV32_16(rr+PSHR32(error,1),ADD16(error,8)); #else r = rr/(error+.003*ac[0]); #endif @@ -103,11 +103,11 @@ int p for (j = 0; j < i>>1; j++) { spx_word16_t tmp = lpc[j]; - lpc[j] = MAC16_16_Q13(lpc[j],r,lpc[i-1-j]); - lpc[i-1-j] = MAC16_16_Q13(lpc[i-1-j],r,tmp); + lpc[j] = MAC16_16_P13(lpc[j],r,lpc[i-1-j]); + lpc[i-1-j] = MAC16_16_P13(lpc[i-1-j],r,tmp); } if (i & 1) - lpc[j] = MAC16_16_Q13(lpc[j],lpc[j],r); + lpc[j] = MAC16_16_P13(lpc[j],lpc[j],r); error = SUB16(error,MULT16_16_Q13(r,MULT16_16_Q13(error,r))); } diff --git a/pjmedia/src/pjmedia-codec/speex/lsp.c b/pjmedia/src/pjmedia-codec/speex/lsp.c index f4350aee..6e7ea311 100644 --- a/pjmedia/src/pjmedia-codec/speex/lsp.c +++ b/pjmedia/src/pjmedia-codec/speex/lsp.c @@ -1,8 +1,6 @@ /*---------------------------------------------------------------------------*\ Original copyright - FILE........: AKSLSPD.C - TYPE........: Turbo C - COMPANY.....: Voicetronix + FILE........: lsp.c AUTHOR......: David Rowe DATE CREATED: 24/2/93 @@ -44,6 +42,43 @@ Heavily modified by Jean-Marc Valin (fixed-point, optimizations, SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/*---------------------------------------------------------------------------*\ + + Introduction to Line Spectrum Pairs (LSPs) + ------------------------------------------ + + LSPs are used to encode the LPC filter coefficients {ak} for + transmission over the channel. LSPs have several properties (like + less sensitivity to quantisation noise) that make them superior to + direct quantisation of {ak}. + + A(z) is a polynomial of order lpcrdr with {ak} as the coefficients. + + A(z) is transformed to P(z) and Q(z) (using a substitution and some + algebra), to obtain something like: + + A(z) = 0.5[P(z)(z+z^-1) + Q(z)(z-z^-1)] (1) + + As you can imagine A(z) has complex zeros all over the z-plane. P(z) + and Q(z) have the very neat property of only having zeros _on_ the + unit circle. So to find them we take a test point z=exp(jw) and + evaluate P (exp(jw)) and Q(exp(jw)) using a grid of points between 0 + and pi. + + The zeros (roots) of P(z) also happen to alternate, which is why we + swap coefficients as we find roots. So the process of finding the + LSP frequencies is basically finding the roots of 5th order + polynomials. + + The root so P(z) and Q(z) occur in symmetrical pairs at +/-w, hence + the name Line Spectrum Pairs (LSPs). + + To convert back to ak we just evaluate (1), "clocking" an impulse + thru it lpcrdr times gives us the impulse response of A(z) which is + {ak}. + +\*---------------------------------------------------------------------------*/ + #ifdef HAVE_CONFIG_H #include "config.h" #endif @@ -63,8 +98,6 @@ Heavily modified by Jean-Marc Valin (fixed-point, optimizations, #ifdef FIXED_POINT - - #define FREQ_SCALE 16384 /*#define ANGLE2X(a) (32768*cos(((a)/8192.)))*/ @@ -73,6 +106,10 @@ Heavily modified by Jean-Marc Valin (fixed-point, optimizations, /*#define X2ANGLE(x) (acos(.00006103515625*(x))*LSP_SCALING)*/ #define X2ANGLE(x) (spx_acos(x)) +#ifdef BFIN_ASM +#include "lsp_bfin.h" +#endif + #else /*#define C1 0.99940307 @@ -88,27 +125,28 @@ Heavily modified by Jean-Marc Valin (fixed-point, optimizations, /*---------------------------------------------------------------------------*\ - FUNCTION....: cheb_poly_eva() + FUNCTION....: cheb_poly_eva() - AUTHOR......: David Rowe - DATE CREATED: 24/2/93 + AUTHOR......: David Rowe + DATE CREATED: 24/2/93 - This function evaluates a series of Chebyshev polynomials + This function evaluates a series of Chebyshev polynomials \*---------------------------------------------------------------------------*/ #ifdef FIXED_POINT -static inline spx_word32_t cheb_poly_eva(spx_word32_t *coef,spx_word16_t x,int m,char *stack) -/* float coef[] coefficients of the polynomial to be evaluated */ -/* float x the point where polynomial is to be evaluated */ -/* int m order of the polynomial */ +#ifndef OVERRIDE_CHEB_POLY_EVA +static inline spx_word32_t cheb_poly_eva( + spx_word16_t *coef, /* P or Q coefs in Q13 format */ + spx_word16_t x, /* cos of freq (-1.0 to 1.0) in Q14 format */ + int m, /* LPC order/2 */ + char *stack +) { int i; - VARDECL(spx_word16_t *T); + spx_word16_t b0, b1; spx_word32_t sum; - int m2=m>>1; - VARDECL(spx_word16_t *coefn); /*Prevents overflows*/ if (x>16383) @@ -116,73 +154,55 @@ static inline spx_word32_t cheb_poly_eva(spx_word32_t *coef,spx_word16_t x,int m if (x<-16383) x = -16383; - /* Allocate memory for Chebyshev series formulation */ - ALLOC(T, m2+1, spx_word16_t); - ALLOC(coefn, m2+1, spx_word16_t); - - for (i=0;i>1; + int k; + float b0, b1, tmp; - /* Allocate memory for Chebyshev series formulation */ - ALLOC(T, m2+1, float); + /* Initial conditions */ + b0=0; /* b_(m+1) */ + b1=0; /* b_(m+2) */ - /* Initialise values */ - T[0]=1; - T[1]=x; - - /* Evaluate Chebyshev series formulation using iterative approach */ - /* Evaluate polynomial and return value also free memory space */ - sum = coef[m2] + coef[m2-1]*x; - x *= 2; - for(i=2;i<=m2;i++) - { - T[i] = x*T[i-1] - T[i-2]; - sum += coef[m2-i] * T[i]; - } - - return sum; + x*=2; + + /* Calculate the b_(k) */ + for(k=m;k>0;k--) + { + tmp=b0; /* tmp holds the previous value of b0 */ + b0=x*b0-b1+coef[m-k]; /* b0 holds its new value based on b0 and b1 */ + b1=tmp; /* b1 holds the previous value of b0 */ + } + + return(-b1+.5*x*b0+coef[m]); } #endif /*---------------------------------------------------------------------------*\ - FUNCTION....: lpc_to_lsp() + FUNCTION....: lpc_to_lsp() - AUTHOR......: David Rowe - DATE CREATED: 24/2/93 + AUTHOR......: David Rowe + DATE CREATED: 24/2/93 This function converts LPC coefficients to LSP coefficients. @@ -210,11 +230,13 @@ int lpc_to_lsp (spx_coef_t *a,int lpcrdr,spx_lsp_t *freq,int nb,spx_word16_t del int i,j,m,flag,k; VARDECL(spx_word32_t *Q); /* ptrs for memory allocation */ VARDECL(spx_word32_t *P); + VARDECL(spx_word16_t *Q16); /* ptrs for memory allocation */ + VARDECL(spx_word16_t *P16); spx_word32_t *px; /* ptrs of respective P'(z) & Q'(z) */ spx_word32_t *qx; spx_word32_t *p; spx_word32_t *q; - spx_word32_t *pt; /* ptr used for cheb_poly_eval() + spx_word16_t *pt; /* ptr used for cheb_poly_eval() whether P' or Q' */ int roots=0; /* DR 8/2/94: number of roots found */ flag = 1; /* program is searching for a root when, @@ -276,20 +298,31 @@ int lpc_to_lsp (spx_coef_t *a,int lpcrdr,spx_lsp_t *freq,int nb,spx_word16_t del px = P; /* re-initialise ptrs */ qx = Q; + /* now that we have computed P and Q convert to 16 bits to + speed up cheb_poly_eval */ + + ALLOC(P16, m+1, spx_word16_t); + ALLOC(Q16, m+1, spx_word16_t); + + for (i=0;i= -FREQ_SCALE)){ spx_word16_t dd; @@ -304,7 +337,7 @@ int lpc_to_lsp (spx_coef_t *a,int lpcrdr,spx_lsp_t *freq,int nb,spx_word16_t del dd *= .5; #endif xr = SUB16(xl, dd); /* interval spacing */ - psumr = cheb_poly_eva(pt,xr,lpcrdr,stack);/* poly(xl-delta_x) */ + psumr = cheb_poly_eva(pt,xr,m,stack);/* poly(xl-delta_x) */ temp_psumr = psumr; temp_xr = xr; @@ -328,7 +361,7 @@ int lpc_to_lsp (spx_coef_t *a,int lpcrdr,spx_lsp_t *freq,int nb,spx_word16_t del #else xm = .5*(xl+xr); /* bisect the interval */ #endif - psumm=cheb_poly_eva(pt,xm,lpcrdr,stack); + psumm=cheb_poly_eva(pt,xm,m,stack); /*if(psumm*psuml>0.)*/ if(!SIGN_CHANGE(psumm,psuml)) { @@ -354,7 +387,6 @@ int lpc_to_lsp (spx_coef_t *a,int lpcrdr,spx_lsp_t *freq,int nb,spx_word16_t del return(roots); } - /*---------------------------------------------------------------------------*\ FUNCTION....: lsp_to_lpc() @@ -362,8 +394,7 @@ int lpc_to_lsp (spx_coef_t *a,int lpcrdr,spx_lsp_t *freq,int nb,spx_word16_t del AUTHOR......: David Rowe DATE CREATED: 24/2/93 - lsp_to_lpc: This function converts LSP coefficients to LPC - coefficients. + Converts LSP coefficients to LPC coefficients. \*---------------------------------------------------------------------------*/ @@ -373,77 +404,119 @@ void lsp_to_lpc(spx_lsp_t *freq,spx_coef_t *ak,int lpcrdr, char *stack) /* float *freq array of LSP frequencies in the x domain */ /* float *ak array of LPC coefficients */ /* int lpcrdr order of LPC coefficients */ - - { int i,j; - spx_word32_t xout1,xout2,xin1,xin2; - VARDECL(spx_word32_t *Wp); - spx_word32_t *pw,*n1,*n2,*n3,*n4=NULL; + spx_word32_t xout1,xout2,xin; + spx_word32_t mult, a; VARDECL(spx_word16_t *freqn); + VARDECL(spx_word32_t **xp); + VARDECL(spx_word32_t *xpmem); + VARDECL(spx_word32_t **xq); + VARDECL(spx_word32_t *xqmem); int m = lpcrdr>>1; + + /* + + Reconstruct P(z) and Q(z) by cascading second order polynomials + in form 1 - 2cos(w)z(-1) + z(-2), where w is the LSP frequency. + In the time domain this is: + + y(n) = x(n) - 2cos(w)x(n-1) + x(n-2) + This is what the ALLOCS below are trying to do: + + int xp[m+1][lpcrdr+1+2]; // P matrix in QIMP + int xq[m+1][lpcrdr+1+2]; // Q matrix in QIMP + + These matrices store the output of each stage on each row. The + final (m-th) row has the output of the final (m-th) cascaded + 2nd order filter. The first row is the impulse input to the + system (not written as it is known). + + The version below takes advantage of the fact that a lot of the + outputs are zero or known, for example if we put an inpulse + into the first section the "clock" it 10 times only the first 3 + outputs samples are non-zero (it's an FIR filter). + */ + + ALLOC(xp, (m+1), spx_word32_t*); + ALLOC(xpmem, (m+1)*(lpcrdr+1+2), spx_word32_t); + + ALLOC(xq, (m+1), spx_word32_t*); + ALLOC(xqmem, (m+1)*(lpcrdr+1+2), spx_word32_t); + + for(i=0; i<=m; i++) { + xp[i] = xpmem + i*(lpcrdr+1+2); + xq[i] = xqmem + i*(lpcrdr+1+2); + } + + /* work out 2cos terms in Q14 */ + ALLOC(freqn, lpcrdr, spx_word16_t); - for (i=0;i0) - { - if (xout1 + xout2>SHL32(EXTEND32(32766),8)) - ak[j-1] = 32767; - else if (xout1 + xout2 < -SHL32(EXTEND32(32766),8)) - ak[j-1] = -32767; - else - ak[j-1] = EXTRACT16(PSHR32(ADD32(xout1,xout2),8)); - } else {/*speex_warning_int("ak[0] = ", EXTRACT16(PSHR32(ADD32(xout1,xout2),8)));*/} - *(n4+1) = xin1; - *(n4+2) = xin2; + for(j=1;j<2*(i+1)-1;j++) { + mult = MULT16_32_Q14(freqn[2*i],xp[i][j+1]); + xp[i+1][j+2] = ADD32(SUB32(xp[i][j+2], mult), xp[i][j]); + mult = MULT16_32_Q14(freqn[2*i+1],xq[i][j+1]); + xq[i+1][j+2] = ADD32(SUB32(xq[i][j+2], mult), xq[i][j]); + } + + /* for last col xp[i][j+2] = xq[i][j+2] = 0 */ + + mult = MULT16_32_Q14(freqn[2*i],xp[i][j+1]); + xp[i+1][j+2] = SUB32(xp[i][j], mult); + mult = MULT16_32_Q14(freqn[2*i+1],xq[i][j+1]); + xq[i+1][j+2] = SUB32(xq[i][j], mult); + } + + /* process last row to extra a{k} */ + + for(j=1;j<=lpcrdr;j++) { + int shift = QIMP-13; - xin1 = 0; - xin2 = 0; + /* final filter sections */ + a = PSHR32(xp[m][j+2] + xout1 + xq[m][j+2] - xout2, shift); + xout1 = xp[m][j+2]; + xout2 = xq[m][j+2]; + + /* hard limit ak's to +/- 32767 */ + + if (a < -32767) a = 32767; + if (a > 32767) a = 32767; + ak[j-1] = (short)a; + } + } + #else void lsp_to_lpc(spx_lsp_t *freq,spx_coef_t *ak,int lpcrdr, char *stack) diff --git a/pjmedia/src/pjmedia-codec/speex/ltp.c b/pjmedia/src/pjmedia-codec/speex/ltp.c index 94189c34..9a5a295e 100644 --- a/pjmedia/src/pjmedia-codec/speex/ltp.c +++ b/pjmedia/src/pjmedia-codec/speex/ltp.c @@ -55,7 +55,7 @@ #endif #ifndef OVERRIDE_INNER_PROD -static spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len) +spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len) { spx_word32_t sum=0; len >>= 2; @@ -75,7 +75,7 @@ static spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int #ifndef OVERRIDE_PITCH_XCORR #if 0 /* HINT: Enable this for machines with enough registers (i.e. not x86) */ -static void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack) +void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack) { int i,j; for (i=0;i0) - { - if (SHR16(corr16[i-start],4)>ener16[i-start]) - tmp = SHL32(EXTEND32(ener16[i-start]),14); - else if (-SHR16(corr16[i-start],4)>ener16[i-start]) - tmp = -SHL32(EXTEND32(ener16[i-start]),14); - else - tmp = SHL32(tmp,10); - g = DIV32_16(tmp, 8+ener16[i-start]); - score[i-start] = MULT16_16(corr16[i-start],g); - } else + spx_word16_t tmp = MULT16_16_16(corr16[i-start],corr16[i-start]); + /* Instead of dividing the tmp by the energy, we multiply on the other side */ + if (MULT16_16(tmp,best_ener[N-1])>MULT16_16(best_score[N-1],ADD16(1,ener16[i-start]))) { - score[i-start] = 1; + /* We can safely put it last and then check */ + best_score[N-1]=tmp; + best_ener[N-1]=ener16[i-start]+1; + pitch[N-1]=i; + /* Check if it comes in front of others */ + for (j=0;jMULT16_16(best_score[j],ADD16(1,ener16[i-start]))) + { + for (k=N-1;k>j;k--) + { + best_score[k]=best_score[k-1]; + best_ener[k]=best_ener[k-1]; + pitch[k]=pitch[k-1]; + } + best_score[j]=tmp; + best_ener[j]=ener16[i-start]+1; + pitch[j]=i; + break; + } + } } } } #else for (i=start;i<=end;i++) { - float g = corr[i-start]/(1+energy[i-start]); - if (g>16) - g = 16; - else if (g<-16) - g = -16; - score[i-start] = g*corr[i-start]; - } -#endif - - /* Extract best scores */ - for (i=start;i<=end;i++) - { - if (score[i-start]>best_score[N-1]) + float tmp = corr[i-start]*corr[i-start]; + if (tmp*best_ener[N-1]>best_score[N-1]*(1+energy[i-start])) { for (j=0;j best_score[j]) + if (tmp*best_ener[j]>best_score[j]*(1+energy[i-start])) { for (k=N-1;k>j;k--) { best_score[k]=best_score[k-1]; + best_ener[k]=best_ener[k-1]; pitch[k]=pitch[k-1]; } - best_score[j]=score[i-start]; + best_score[j]=tmp; + best_ener[j]=energy[i-start]+1; pitch[j]=i; break; } } } } +#endif /* Compute open-loop gain */ if (gain) @@ -290,164 +284,131 @@ void open_loop_nbest_pitch(spx_sig_t *sw, int start, int end, int len, int *pitc } } } +#endif + +#ifndef OVERRIDE_PITCH_GAIN_SEARCH_3TAP_VQ +static int pitch_gain_search_3tap_vq( + const signed char *gain_cdbk, + int gain_cdbk_size, + spx_word16_t *C16, + spx_word16_t max_gain +) +{ + const signed char *ptr=gain_cdbk; + int best_cdbk=0; + spx_word32_t best_sum=-VERY_LARGE32; + spx_word32_t sum=0; + spx_word16_t g[3]; + spx_word16_t pitch_control=64; + spx_word16_t gain_sum; + int i; + + for (i=0;ibest_sum && gain_sum<=max_gain) { + best_sum=sum; + best_cdbk=i; + } + } + return best_cdbk; +} +#endif /** Finds the best quantized 3-tap pitch predictor by analysis by synthesis */ -static spx_word64_t pitch_gain_search_3tap( -const spx_sig_t target[], /* Target vector */ +static spx_word32_t pitch_gain_search_3tap( +const spx_word16_t target[], /* Target vector */ const spx_coef_t ak[], /* LPCs for this subframe */ const spx_coef_t awk1[], /* Weighted LPCs #1 for this subframe */ const spx_coef_t awk2[], /* Weighted LPCs #2 for this subframe */ spx_sig_t exc[], /* Excitation */ -const void *par, +const signed char *gain_cdbk, +int gain_cdbk_size, int pitch, /* Pitch value */ int p, /* Number of LPC coeffs */ int nsf, /* Number of samples in subframe */ SpeexBits *bits, char *stack, -const spx_sig_t *exc2, +const spx_word16_t *exc2, const spx_word16_t *r, -spx_sig_t *new_target, +spx_word16_t *new_target, int *cdbk_index, -int cdbk_offset, -int plc_tuning +int plc_tuning, +spx_word32_t cumul_gain ) { int i,j; - VARDECL(spx_sig_t *tmp1); - VARDECL(spx_sig_t *tmp2); - spx_sig_t *x[3]; - spx_sig_t *e[3]; + VARDECL(spx_word16_t *tmp1); + VARDECL(spx_word16_t *e); + spx_word16_t *x[3]; spx_word32_t corr[3]; spx_word32_t A[3][3]; - int gain_cdbk_size; - const signed char *gain_cdbk; spx_word16_t gain[3]; - spx_word64_t err; + spx_word32_t err; + spx_word16_t max_gain=128; + int best_cdbk=0; - const ltp_params *params; - params = (const ltp_params*) par; - gain_cdbk_size = 1<gain_bits; - gain_cdbk = params->gain_cdbk + 3*gain_cdbk_size*cdbk_offset; - ALLOC(tmp1, 3*nsf, spx_sig_t); - ALLOC(tmp2, 3*nsf, spx_sig_t); + ALLOC(tmp1, 3*nsf, spx_word16_t); + ALLOC(e, nsf, spx_word16_t); + if (cumul_gain > 262144) + max_gain = 31; + x[0]=tmp1; x[1]=tmp1+nsf; x[2]=tmp1+2*nsf; - e[0]=tmp2; - e[1]=tmp2+nsf; - e[2]=tmp2+2*nsf; - for (i=2;i>=0;i--) { - int pp=pitch+1-i; + VARDECL(spx_mem_t *mm); + int pp=pitch-1; + ALLOC(mm, p, spx_mem_t); for (j=0;j max_val) - max_val = tmp; - } - } - for (i=0;i max_val) - max_val = tmp; - } - - sig_shift=0; - while (max_val>16384) - { - sig_shift++; - max_val >>= 1; - } - - for (j=0;j<3;j++) - { - for (i=0;i=0;i--) { - for (i=0;i<3;i++) - corr[i]=inner_prod(x[i],target,nsf); - - for (i=0;i<3;i++) - for (j=0;j<=i;j++) - A[i][j]=A[j][i]=inner_prod(x[i],x[j],nsf); + spx_word16_t e0=exc2[-pitch-1+i]; + x[i][0]=MULT16_16_Q14(r[0], e0); + for (j=0;j30) + plc_tuning=30; #ifdef FIXED_POINT - C[0] = MAC16_32_Q15(C[0],MULT16_16_16(plc_tuning,-327),C[0]); - C[1] = MAC16_32_Q15(C[1],MULT16_16_16(plc_tuning,-327),C[1]); - C[2] = MAC16_32_Q15(C[2],MULT16_16_16(plc_tuning,-327),C[2]); C[0] = SHL32(C[0],1); C[1] = SHL32(C[1],1); C[2] = SHL32(C[2],1); C[3] = SHL32(C[3],1); C[4] = SHL32(C[4],1); C[5] = SHL32(C[5],1); + C[6] = MAC16_32_Q15(C[6],MULT16_16_16(plc_tuning,655),C[6]); + C[7] = MAC16_32_Q15(C[7],MULT16_16_16(plc_tuning,655),C[7]); + C[8] = MAC16_32_Q15(C[8],MULT16_16_16(plc_tuning,655),C[8]); + normalize16(C, C16, 32767, 9); #else - C[0]*=1-.01*plc_tuning; - C[1]*=1-.01*plc_tuning; - C[2]*=1-.01*plc_tuning; - C[6]*=.5*(1+.01*plc_tuning); - C[7]*=.5*(1+.01*plc_tuning); - C[8]*=.5*(1+.01*plc_tuning); + C[6]*=.5*(1+.02*plc_tuning); + C[7]*=.5*(1+.02*plc_tuning); + C[8]*=.5*(1+.02*plc_tuning); #endif - for (i=0;i 64) - { - gain_sum = SUB16(gain_sum, 64); - if (gain_sum > 127) - gain_sum = 127; -#ifdef FIXED_POINT - pitch_control = SUB16(64,EXTRACT16(PSHR32(MULT16_16(64,MULT16_16_16(plc_tuning, gain_sum)),10))); -#else - pitch_control = 64*(1.-.001*plc_tuning*gain_sum); -#endif - if (pitch_control < 0) - pitch_control = 0; - } - - sum = compute_pitch_error(C, g, pitch_control); - - if (sum>best_sum || i==0) - { - best_sum=sum; - best_cdbk=i; - } - } + + best_cdbk = pitch_gain_search_3tap_vq(gain_cdbk, gain_cdbk_size, C16, max_gain); + #ifdef FIXED_POINT - gain[0] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*3]); - gain[1] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*3+1]); - gain[2] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*3+2]); + gain[0] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*4]); + gain[1] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*4+1]); + gain[2] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*4+2]); /*printf ("%d %d %d %d\n",gain[0],gain[1],gain[2], best_cdbk);*/ #else - gain[0] = 0.015625*gain_cdbk[best_cdbk*3] + .5; - gain[1] = 0.015625*gain_cdbk[best_cdbk*3+1]+ .5; - gain[2] = 0.015625*gain_cdbk[best_cdbk*3+2]+ .5; + gain[0] = 0.015625*gain_cdbk[best_cdbk*4] + .5; + gain[1] = 0.015625*gain_cdbk[best_cdbk*4+1]+ .5; + gain[2] = 0.015625*gain_cdbk[best_cdbk*4+2]+ .5; #endif *cdbk_index=best_cdbk; } -#ifdef FIXED_POINT - for (i=0;ipp) + tmp1=pp; + for (j=0;jpp+pitch) + tmp3=pp+pitch; + for (j=tmp1;jgain_bits; + gain_cdbk = params->gain_cdbk + 4*gain_cdbk_size*cdbk_offset; + N=complexity; if (N>10) N=10; @@ -614,23 +545,24 @@ int plc_tuning return start; } - ALLOC(best_exc, nsf, spx_sig_t); - ALLOC(new_target, nsf, spx_sig_t); - ALLOC(best_target, nsf, spx_sig_t); - if (N>end-start+1) N=end-start+1; if (end != start) open_loop_nbest_pitch(sw, start, end, nsf, nbest, NULL, N, stack); else nbest[0] = start; + + ALLOC(best_exc, nsf, spx_sig_t); + ALLOC(new_target, nsf, spx_word16_t); + ALLOC(best_target, nsf, spx_word16_t); + for (i=0;ipitch_bits); speex_bits_pack(bits, best_gain_index, params->gain_bits); +#ifdef FIXED_POINT + *cumul_gain = MULT16_32_Q13(SHL16(params->gain_cdbk[4*best_gain_index+3],8), MAX32(1024,*cumul_gain)); +#else + *cumul_gain = 0.03125*MAX32(1024,*cumul_gain)*params->gain_cdbk[4*best_gain_index+3]; +#endif + /*printf ("%f\n", cumul_gain);*/ /*printf ("encode pitch: %d %d\n", best_pitch, best_gain_index);*/ for (i=0;igain_bits; - gain_cdbk = params->gain_cdbk + 3*gain_cdbk_size*cdbk_offset; + gain_cdbk = params->gain_cdbk + 4*gain_cdbk_size*cdbk_offset; pitch = speex_bits_unpack_unsigned(bits, params->pitch_bits); pitch += start; gain_index = speex_bits_unpack_unsigned(bits, params->gain_bits); /*printf ("decode pitch: %d %d\n", pitch, gain_index);*/ #ifdef FIXED_POINT - gain[0] = ADD16(32,(spx_word16_t)gain_cdbk[gain_index*3]); - gain[1] = ADD16(32,(spx_word16_t)gain_cdbk[gain_index*3+1]); - gain[2] = ADD16(32,(spx_word16_t)gain_cdbk[gain_index*3+2]); + gain[0] = ADD16(32,(spx_word16_t)gain_cdbk[gain_index*4]); + gain[1] = ADD16(32,(spx_word16_t)gain_cdbk[gain_index*4+1]); + gain[2] = ADD16(32,(spx_word16_t)gain_cdbk[gain_index*4+2]); #else - gain[0] = 0.015625*gain_cdbk[gain_index*3]+.5; - gain[1] = 0.015625*gain_cdbk[gain_index*3+1]+.5; - gain[2] = 0.015625*gain_cdbk[gain_index*3+2]+.5; + gain[0] = 0.015625*gain_cdbk[gain_index*4]+.5; + gain[1] = 0.015625*gain_cdbk[gain_index*4+1]+.5; + gain[2] = 0.015625*gain_cdbk[gain_index*4+2]+.5; #endif if (count_lost && pitch > subframe_offset) @@ -728,66 +666,36 @@ int cdbk_offset gain_val[0]=gain[0]; gain_val[1]=gain[1]; gain_val[2]=gain[2]; - + gain[0] = SHL16(gain[0],7); + gain[1] = SHL16(gain[1],7); + gain[2] = SHL16(gain[2],7); + for (i=0;ipp) - tmp1=pp; - for (j=0;jpp+pitch) - tmp3=pp+pitch; - for (j=tmp1;jpp) + tmp1=pp; + for (j=0;jpp+pitch) + tmp3=pp+pitch; + for (j=tmp1;j.99) - coef=.99; - for (i=0;i63) + pitch_coef=63; +#else + if (pitch_coef>.99) + pitch_coef=.99; +#endif + for (i=0;i.99) - coef=.99; +#ifdef FIXED_POINT + if (pitch_coef>63) + pitch_coef=63; +#else + if (pitch_coef>.99) + pitch_coef=.99; +#endif for (i=0;i0 ? g[0] : -.5*g[0]) + (g[2]>0 ? g[2] : -.5*g[2])) #endif -void open_loop_nbest_pitch(spx_sig_t *sw, int start, int end, int len, int *pitch, spx_word16_t *gain, int N, char *stack); +spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len); +void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack); + +void open_loop_nbest_pitch(spx_word16_t *sw, int start, int end, int len, int *pitch, spx_word16_t *gain, int N, char *stack); /** Finds the best quantized 3-tap pitch predictor by analysis by synthesis */ int pitch_search_3tap( -spx_sig_t target[], /* Target vector */ -spx_sig_t *sw, +spx_word16_t target[], /* Target vector */ +spx_word16_t *sw, spx_coef_t ak[], /* LPCs for this subframe */ spx_coef_t awk1[], /* Weighted LPCs #1 for this subframe */ spx_coef_t awk2[], /* Weighted LPCs #2 for this subframe */ @@ -67,19 +70,21 @@ int p, /* Number of LPC coeffs */ int nsf, /* Number of samples in subframe */ SpeexBits *bits, char *stack, -spx_sig_t *exc2, +spx_word16_t *exc2, spx_word16_t *r, int complexity, int cdbk_offset, -int plc_tuning +int plc_tuning, +spx_word32_t *cumul_gain ); /*Unquantize adaptive codebook and update pitch contribution*/ void pitch_unquant_3tap( -spx_sig_t exc[], /* Excitation */ +spx_word16_t exc[], /* Input excitation */ +spx_word32_t exc_out[], /* Output excitation */ int start, /* Smallest pitch value allowed */ int end, /* Largest pitch value allowed */ -spx_word16_t pitch_coef, /* Voicing (pitch) coefficient */ +spx_word16_t pitch_coef, /* Voicing (pitch) coefficient */ const void *par, int nsf, /* Number of samples in subframe */ int *pitch_val, @@ -94,8 +99,8 @@ int cdbk_offset /** Forced pitch delay and gain */ int forced_pitch_quant( -spx_sig_t target[], /* Target vector */ -spx_sig_t *sw, +spx_word16_t target[], /* Target vector */ +spx_word16_t *sw, spx_coef_t ak[], /* LPCs for this subframe */ spx_coef_t awk1[], /* Weighted LPCs #1 for this subframe */ spx_coef_t awk2[], /* Weighted LPCs #2 for this subframe */ @@ -108,19 +113,21 @@ int p, /* Number of LPC coeffs */ int nsf, /* Number of samples in subframe */ SpeexBits *bits, char *stack, -spx_sig_t *exc2, +spx_word16_t *exc2, spx_word16_t *r, int complexity, int cdbk_offset, -int plc_tuning +int plc_tuning, +spx_word32_t *cumul_gain ); /** Unquantize forced pitch delay and gain */ void forced_pitch_unquant( -spx_sig_t exc[], /* Excitation */ +spx_word16_t exc[], /* Input excitation */ +spx_word32_t exc_out[], /* Output excitation */ int start, /* Smallest pitch value allowed */ int end, /* Largest pitch value allowed */ -spx_word16_t pitch_coef, /* Voicing (pitch) coefficient */ +spx_word16_t pitch_coef, /* Voicing (pitch) coefficient */ const void *par, int nsf, /* Number of samples in subframe */ int *pitch_val, diff --git a/pjmedia/src/pjmedia-codec/speex/ltp_arm4.h b/pjmedia/src/pjmedia-codec/speex/ltp_arm4.h index a5a0beeb..7479e8bf 100644 --- a/pjmedia/src/pjmedia-codec/speex/ltp_arm4.h +++ b/pjmedia/src/pjmedia-codec/speex/ltp_arm4.h @@ -33,7 +33,7 @@ */ #define OVERRIDE_INNER_PROD -static spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len) +spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len) { spx_word32_t sum1=0,sum2=0; spx_word16_t *deadx, *deady; @@ -84,7 +84,7 @@ static spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int } #define OVERRIDE_PITCH_XCORR -static void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack) +void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack) { int i,j; for (i=0;i>> 16;\n\t" - "A0 += A1;\n\t" "%0 = A0;\n\t" : "=&D" (sum), "=a" (C) : "d" (g[0]), "d" (g[1]), "d" (g[2]), "d" (pitch_control), "1" (C) @@ -163,3 +152,268 @@ static inline spx_word32_t compute_pitch_error(spx_word32_t *C, spx_word16_t *g, return sum; } +#define OVERRIDE_OPEN_LOOP_NBEST_PITCH +#ifdef OVERRIDE_OPEN_LOOP_NBEST_PITCH +void open_loop_nbest_pitch(spx_word16_t *sw, int start, int end, int len, int *pitch, spx_word16_t *gain, int N, char *stack) +{ + int i,j,k; + VARDECL(spx_word32_t *best_score); + VARDECL(spx_word32_t *best_ener); + spx_word32_t e0; + VARDECL(spx_word32_t *corr); + VARDECL(spx_word32_t *energy); + + ALLOC(best_score, N, spx_word32_t); + ALLOC(best_ener, N, spx_word32_t); + ALLOC(corr, end-start+1, spx_word32_t); + ALLOC(energy, end-start+2, spx_word32_t); + + for (i=0;i>>= 6;\n\t" +" R1 = R1 + R2;\n\t" +" R0 >>>= 6;\n\t" +" R1 = R1 - R0;\n\t" +" R2 = MAX(R1,R3);\n\t" +"eu2: [P0++] = R2;\n\t" + : : "d" (energy), "d" (&sw[-start-1]), "d" (&sw[-start+len-1]), + "a" (end-start) + : "P0", "I1", "I2", "R0", "R1", "R2", "R3" +#if (__GNUC__ == 4) + , "LC1" +#endif + ); + + pitch_xcorr(sw, sw-end, corr, len, end-start+1, stack); + + /* FIXME: Fixed-point and floating-point code should be merged */ + { + VARDECL(spx_word16_t *corr16); + VARDECL(spx_word16_t *ener16); + ALLOC(corr16, end-start+1, spx_word16_t); + ALLOC(ener16, end-start+1, spx_word16_t); + /* Normalize to 180 so we can square it and it still fits in 16 bits */ + normalize16(corr, corr16, 180, end-start+1); + normalize16(energy, ener16, 180, end-start+1); + + if (N == 1) { + /* optimised asm to handle N==1 case */ + __asm__ __volatile__ + ( +" I0 = %1;\n\t" /* I0: corr16[] */ +" L0 = 0;\n\t" +" I1 = %2;\n\t" /* I1: energy */ +" L1 = 0;\n\t" +" R2 = -1;\n\t" /* R2: best score */ +" R3 = 0;\n\t" /* R3: best energy */ +" P0 = %4;\n\t" /* P0: best pitch */ +" P1 = %4;\n\t" /* P1: counter */ +" LSETUP (sl1, sl2) LC1 = %3;\n\t" +"sl1: R0.L = W [I0++] || R1.L = W [I1++];\n\t" +" R0 = R0.L * R0.L (IS);\n\t" +" R1 += 1;\n\t" +" R4 = R0.L * R3.L;\n\t" +" R5 = R2.L * R1.L;\n\t" +" cc = R5 < R4;\n\t" +" if cc R2 = R0;\n\t" +" if cc R3 = R1;\n\t" +" if cc P0 = P1;\n\t" +"sl2: P1 += 1;\n\t" +" %0 = P0;\n\t" + : "=&d" (pitch[0]) + : "a" (corr16), "a" (ener16), "a" (end+1-start), "d" (start) + : "P0", "P1", "I0", "I1", "R0", "R1", "R2", "R3", "R4", "R5" +#if (__GNUC__ == 4) + , "LC1" +#endif + ); + + } + else { + for (i=start;i<=end;i++) + { + spx_word16_t tmp = MULT16_16_16(corr16[i-start],corr16[i-start]); + /* Instead of dividing the tmp by the energy, we multiply on the other side */ + if (MULT16_16(tmp,best_ener[N-1])>MULT16_16(best_score[N-1],ADD16(1,ener16[i-start]))) + { + /* We can safely put it last and then check */ + best_score[N-1]=tmp; + best_ener[N-1]=ener16[i-start]+1; + pitch[N-1]=i; + /* Check if it comes in front of others */ + for (j=0;jMULT16_16(best_score[j],ADD16(1,ener16[i-start]))) + { + for (k=N-1;k>j;k--) + { + best_score[k]=best_score[k-1]; + best_ener[k]=best_ener[k-1]; + pitch[k]=pitch[k-1]; + } + best_score[j]=tmp; + best_ener[j]=ener16[i-start]+1; + pitch[j]=i; + break; + } + } + } + } + } + } + + /* Compute open-loop gain */ + if (gain) + { + for (j=0;jbest_sum && gain_sum<=max_gain) ------ (1) + + if (sum>best_sum && !(gain_sum>max_gain)) ------ (2) + + if (max_gain<=gain_sum) { ------ (3) + sum = -VERY_LARGE32; + } + if (best_sum<=sum) + + The blackin cc instructions are all of the form: + + cc = x < y (or cc = x <= y) +*/ +" R1 = B0\n\t" +" R2 = %5\n\t" +" R3 = %6\n\t" +" cc = R2 <= R1;\n\t" +" if cc R0 = R3;\n\t" +" cc = %0 <= R0;\n\t" +" if cc %0 = R0;\n\t" +" if cc %1 = P1;\n\t" + +"pgs2: P1 += 1;\n\t" + + : "=&d" (best_sum), "=&d" (best_cdbk) + : "a" (gain_cdbk), "a" (C16), "a" (gain_cdbk_size), "a" (max_gain), + "b" (-VERY_LARGE32) + : "R0", "R1", "R2", "R3", "R4", "R5", "P0", + "P1", "I1", "L1", "A0", "B0" +#if (__GNUC__ == 4) + , "LC1" +#endif + ); + + return best_cdbk; +} +#endif + diff --git a/pjmedia/src/pjmedia-codec/speex/ltp_sse.h b/pjmedia/src/pjmedia-codec/speex/ltp_sse.h index 94c0012a..bed6eaac 100644 --- a/pjmedia/src/pjmedia-codec/speex/ltp_sse.h +++ b/pjmedia/src/pjmedia-codec/speex/ltp_sse.h @@ -35,7 +35,7 @@ #include #define OVERRIDE_INNER_PROD -static float inner_prod(const float *a, const float *b, int len) +float inner_prod(const float *a, const float *b, int len) { int i; float ret; @@ -54,7 +54,7 @@ static float inner_prod(const float *a, const float *b, int len) } #define OVERRIDE_PITCH_XCORR -static void pitch_xcorr(const float *_x, const float *_y, float *corr, int len, int nb_pitch, char *stack) +void pitch_xcorr(const float *_x, const float *_y, float *corr, int len, int nb_pitch, char *stack) { int i, offset; VARDECL(__m128 *x); diff --git a/pjmedia/src/pjmedia-codec/speex/mdf.c b/pjmedia/src/pjmedia-codec/speex/mdf.c index 0e7219ca..eabf4339 100644 --- a/pjmedia/src/pjmedia-codec/speex/mdf.c +++ b/pjmedia/src/pjmedia-codec/speex/mdf.c @@ -90,7 +90,7 @@ #endif #ifdef FIXED_POINT -static const spx_float_t MIN_LEAK = ((spx_float_t){16777, -24}); +static const spx_float_t MIN_LEAK = {16777, -24}; #define TOP16(x) ((x)>>16) #else static const spx_float_t MIN_LEAK = .001f; @@ -140,9 +140,13 @@ struct SpeexEchoState_ { spx_word16_t preemph; spx_word16_t notch_radius; spx_mem_t notch_mem[2]; + + /* NOTE: If you only use speex_echo_cancel() and want to save some memory, remove this */ + spx_int16_t *play_buf; + int play_buf_pos; }; -static inline void filter_dc_notch16(spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem) +static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem) { int i; spx_word16_t den2; @@ -166,17 +170,15 @@ static inline void filter_dc_notch16(spx_int16_t *in, spx_word16_t radius, spx_w } } -static inline spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len) +static inline spx_word32_t mdf_inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len) { spx_word32_t sum=0; - len >>= 2; + len >>= 1; while(len--) { spx_word32_t part=0; part = MAC16_16(part,*x++,*y++); part = MAC16_16(part,*x++,*y++); - part = MAC16_16(part,*x++,*y++); - part = MAC16_16(part,*x++,*y++); /* HINT: If you had a 40-bit accumulator, you could shift only at the end */ sum = ADD32(sum,SHR32(part,6)); } @@ -184,7 +186,7 @@ static inline spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t } /** Compute power spectrum of a half-complex (packed) vector */ -static inline void power_spectrum(spx_word16_t *X, spx_word32_t *ps, int N) +static inline void power_spectrum(const spx_word16_t *X, spx_word32_t *ps, int N) { int i, j; ps[0]=MULT16_16(X[0],X[0]); @@ -197,7 +199,7 @@ static inline void power_spectrum(spx_word16_t *X, spx_word32_t *ps, int N) /** Compute cross-power spectrum of a half-complex (packed) vectors and add to acc */ #ifdef FIXED_POINT -static inline void spectral_mul_accum(spx_word16_t *X, spx_word32_t *Y, spx_word16_t *acc, int N, int M) +static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M) { int i,j; spx_word32_t tmp1=0,tmp2=0; @@ -225,7 +227,7 @@ static inline void spectral_mul_accum(spx_word16_t *X, spx_word32_t *Y, spx_word acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT); } #else -static inline void spectral_mul_accum(spx_word16_t *X, spx_word32_t *Y, spx_word16_t *acc, int N, int M) +static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M) { int i,j; for (i=0;isum_adapt = 0; /* FIXME: Make that an init option (new API call?) */ st->sampling_rate = 8000; - st->spec_average = DIV32_16(SHL32(st->frame_size, 15), st->sampling_rate); + st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate); #ifdef FIXED_POINT - st->beta0 = DIV32_16(SHL32(st->frame_size, 16), st->sampling_rate); - st->beta_max = DIV32_16(SHL32(st->frame_size, 14), st->sampling_rate); + st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate); + st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate); #else st->beta0 = (2.0f*st->frame_size)/st->sampling_rate; st->beta_max = (.5f*st->frame_size)/st->sampling_rate; @@ -332,6 +334,10 @@ SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) st->notch_mem[0] = st->notch_mem[1] = 0; st->adapted = 0; st->Pey = st->Pyy = FLOAT_ONE; + + st->play_buf = (spx_int16_t*)speex_alloc(2*st->frame_size*sizeof(spx_int16_t)); + st->play_buf_pos = 0; + return st; } @@ -385,12 +391,46 @@ void speex_echo_state_destroy(SpeexEchoState *st) #ifdef FIXED_POINT speex_free(st->wtmp2); #endif + speex_free(st->play_buf); speex_free(st); } -extern int fixed_point; +void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out, spx_int32_t *Yout) +{ + int i; + if (st->play_buf_pos>=st->frame_size) + { + speex_echo_cancel(st, rec, st->play_buf, out, Yout); + st->play_buf_pos -= st->frame_size; + for (i=0;iframe_size;i++) + st->play_buf[i] = st->play_buf[i+st->frame_size]; + } else { + speex_warning("no playback frame available"); + if (st->play_buf_pos!=0) + { + speex_warning("internal playback buffer corruption?"); + st->play_buf_pos = 0; + } + for (i=0;iframe_size;i++) + out[i] = rec[i]; + } +} + +void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play) +{ + if (st->play_buf_pos<=st->frame_size) + { + int i; + for (i=0;iframe_size;i++) + st->play_buf[st->play_buf_pos+i] = play[i]; + st->play_buf_pos += st->frame_size; + } else { + speex_warning("had to discard a playback frame"); + } +} + /** Performs echo cancellation on a frame */ -void speex_echo_cancel(SpeexEchoState *st, short *ref, short *echo, short *out, spx_int32_t *Yout) +void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int16_t *echo, spx_int16_t *out, spx_int32_t *Yout) { int i,j; int N,M; @@ -402,6 +442,7 @@ void speex_echo_cancel(SpeexEchoState *st, short *ref, short *echo, short *out, spx_word16_t RER; spx_word32_t tmp32; spx_word16_t M_1; + int saturated=0; N = st->window_size; M = st->M; @@ -416,18 +457,46 @@ void speex_echo_cancel(SpeexEchoState *st, short *ref, short *echo, short *out, M_1 = 1.f/M; #endif - filter_dc_notch16((spx_int16_t*)ref, st->notch_radius, st->d, st->frame_size, st->notch_mem); + filter_dc_notch16(ref, st->notch_radius, st->d, st->frame_size, st->notch_mem); /* Copy input data to buffer */ for (i=0;iframe_size;i++) { spx_word16_t tmp; + spx_word32_t tmp32; st->x[i] = st->x[i+st->frame_size]; - st->x[i+st->frame_size] = SUB16(echo[i], MULT16_16_P15(st->preemph, st->memX)); + tmp32 = SUB32(EXTEND32(echo[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memX))); +#ifdef FIXED_POINT + /*FIXME: If saturation occurs here, we need to freeze adaptation for M frames (not just one) */ + if (tmp32 > 32767) + { + tmp32 = 32767; + saturated = 1; + } + if (tmp32 < -32767) + { + tmp32 = -32767; + saturated = 1; + } +#endif + st->x[i+st->frame_size] = EXTRACT16(tmp32); st->memX = echo[i]; tmp = st->d[i]; st->d[i] = st->d[i+st->frame_size]; - st->d[i+st->frame_size] = SUB16(tmp, MULT16_16_P15(st->preemph, st->memD)); + tmp32 = SUB32(EXTEND32(tmp), EXTEND32(MULT16_16_P15(st->preemph, st->memD))); +#ifdef FIXED_POINT + if (tmp32 > 32767) + { + tmp32 = 32767; + saturated = 1; + } + if (tmp32 < -32767) + { + tmp32 = -32767; + saturated = 1; + } +#endif + st->d[i+st->frame_size] = tmp32; st->memD = tmp; } @@ -465,6 +534,12 @@ void speex_echo_cancel(SpeexEchoState *st, short *ref, short *echo, short *out, else if (tmp_out<-32768) tmp_out = -32768; tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE))); + /* This is an arbitrary test for saturation */ + if (ref[i] <= -32000 || ref[i] >= 32000) + { + tmp_out = 0; + saturated = 1; + } out[i] = tmp_out; st->memE = tmp_out; } @@ -477,9 +552,9 @@ void speex_echo_cancel(SpeexEchoState *st, short *ref, short *echo, short *out, } /* Compute a bunch of correlations */ - See = inner_prod(st->e+st->frame_size, st->e+st->frame_size, st->frame_size); - See = ADD32(See, SHR32(10000,6)); - Syy = inner_prod(st->y+st->frame_size, st->y+st->frame_size, st->frame_size); + See = mdf_inner_prod(st->e+st->frame_size, st->e+st->frame_size, st->frame_size); + See = ADD32(See, SHR32(EXTEND32(10000),6)); + Syy = mdf_inner_prod(st->y+st->frame_size, st->y+st->frame_size, st->frame_size); /* Convert error to frequency domain */ spx_fft(st->fft_table, st->e, st->E); @@ -544,8 +619,9 @@ void speex_echo_cancel(SpeexEchoState *st, short *ref, short *echo, short *out, st->Pey = FLOAT_MULT(MIN_LEAK,st->Pyy); if (FLOAT_GT(st->Pey, st->Pyy)) st->Pey = st->Pyy; - /* leak_estimate is the limear regression result */ + /* leak_estimate is the linear regression result */ leak_estimate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(st->Pey, st->Pyy),14)); + /* This looks like a stupid bug, but it's right (because we convert from Q14 to Q15) */ if (leak_estimate > 16383) leak_estimate = 32767; else @@ -594,7 +670,7 @@ void speex_echo_cancel(SpeexEchoState *st, short *ref, short *echo, short *out, spx_word32_t Sxx; spx_word16_t adapt_rate=0; - Sxx = inner_prod(st->x+st->frame_size, st->x+st->frame_size, st->frame_size); + Sxx = mdf_inner_prod(st->x+st->frame_size, st->x+st->frame_size, st->frame_size); /* Temporary adaption rate if filter is not adapted correctly */ tmp32 = MULT16_32_Q15(QCONST16(.15f, 15), Sxx); @@ -620,12 +696,15 @@ void speex_echo_cancel(SpeexEchoState *st, short *ref, short *echo, short *out, weighted_spectral_mul_conj(st->power_1, &st->X[j*N], st->E, st->PHI+N*j, N); } - /* Gradient descent */ - for (i=0;iW[i] += st->PHI[i]; - /* Old value of W in PHI */ - st->PHI[i] = st->W[i] - st->PHI[i]; + /* Gradient descent */ + for (i=0;iW[i] += st->PHI[i]; + /* Old value of W in PHI */ + st->PHI[i] = st->W[i] - st->PHI[i]; + } } /* Update weight to prevent circular convolution (MDF / AUMDF) */ @@ -637,7 +716,7 @@ void speex_echo_cancel(SpeexEchoState *st, short *ref, short *echo, short *out, { #ifdef FIXED_POINT for (i=0;iwtmp2[i] = PSHR32(st->W[j*N+i],NORMALIZE_SCALEDOWN+16); + st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],NORMALIZE_SCALEDOWN+16)); spx_ifft(st->fft_table, st->wtmp2, st->wtmp); for (i=0;iframe_size;i++) { @@ -645,12 +724,12 @@ void speex_echo_cancel(SpeexEchoState *st, short *ref, short *echo, short *out, } for (i=st->frame_size;iwtmp[i]=SHL(st->wtmp[i],NORMALIZE_SCALEUP); + st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP); } spx_fft(st->fft_table, st->wtmp, st->wtmp2); /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */ for (i=0;iW[j*N+i] -= SHL32(st->wtmp2[i],16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1); + st->W[j*N+i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1); #else spx_ifft(st->fft_table, &st->W[j*N], st->wtmp); for (i=st->frame_size;isampling_rate = (*(int*)ptr); - st->spec_average = DIV32_16(SHL32(st->frame_size, 15), st->sampling_rate); + st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate); #ifdef FIXED_POINT - st->beta0 = DIV32_16(SHL32(st->frame_size, 16), st->sampling_rate); - st->beta_max = DIV32_16(SHL32(st->frame_size, 14), st->sampling_rate); + st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate); + st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate); #else st->beta0 = (2.0f*st->frame_size)/st->sampling_rate; st->beta_max = (.5f*st->frame_size)/st->sampling_rate; diff --git a/pjmedia/src/pjmedia-codec/speex/misc.c b/pjmedia/src/pjmedia-codec/speex/misc.c index fc2cb7cf..53bdd0b6 100644 --- a/pjmedia/src/pjmedia-codec/speex/misc.c +++ b/pjmedia/src/pjmedia-codec/speex/misc.c @@ -196,12 +196,12 @@ void speex_warning_int(const char *str, int val) #endif #ifdef FIXED_POINT -spx_word32_t speex_rand(spx_word16_t std, spx_int32_t *seed) +spx_word16_t speex_rand(spx_word16_t std, spx_int32_t *seed) { spx_word32_t res; *seed = 1664525 * *seed + 1013904223; res = MULT16_16(EXTRACT16(SHR32(*seed,16)),std); - return SUB32(res, SHR(res, 3)); + return PSHR32(SUB32(res, SHR(res, 3)),14); } #else spx_word16_t speex_rand(spx_word16_t std, spx_int32_t *seed) @@ -216,19 +216,6 @@ spx_word16_t speex_rand(spx_word16_t std, spx_int32_t *seed) } #endif -void speex_rand_vec(float std, spx_sig_t *data, int len) -{ - int i; - for (i=0;i #include "vbr.h" #include "misc.h" +#include "math_approx.h" #include #ifdef VORBIS_PSYCHO @@ -106,6 +107,8 @@ const float exc_gain_quant_scal1[2]={0.70469, 1.05127}; #define sqr(x) ((x)*(x)) +extern const spx_word16_t lpc_window[]; + void *nb_encoder_init(const SpeexMode *m) { EncState *st; @@ -125,9 +128,9 @@ void *nb_encoder_init(const SpeexMode *m) st->mode=m; st->frameSize = mode->frameSize; - st->windowSize = st->frameSize*3/2; st->nbSubframes=mode->frameSize/mode->subframeSize; st->subframeSize=mode->subframeSize; + st->windowSize = st->frameSize+st->subframeSize; st->lpcSize = mode->lpcSize; st->gamma1=mode->gamma1; st->gamma2=mode->gamma2; @@ -149,69 +152,50 @@ void *nb_encoder_init(const SpeexMode *m) st->psy = vorbis_psy_init(8000, 256); st->curve = speex_alloc(128*sizeof(float)); st->old_curve = speex_alloc(128*sizeof(float)); + st->psy_window = speex_alloc(256*sizeof(float)); #endif + st->cumul_gain = 1024; + /* Allocating input buffer */ - st->inBuf = speex_alloc((st->windowSize+EXTRA_BUFFER)*sizeof(spx_sig_t)); - st->frame = st->inBuf+EXTRA_BUFFER; + st->winBuf = speex_alloc((st->windowSize-st->frameSize)*sizeof(spx_word16_t)); /* Allocating excitation buffer */ - st->excBuf = speex_alloc((mode->frameSize+mode->pitchEnd+1)*sizeof(spx_sig_t)); - st->exc = st->excBuf + mode->pitchEnd + 1; - st->swBuf = speex_alloc((mode->frameSize+mode->pitchEnd+1)*sizeof(spx_sig_t)); - st->sw = st->swBuf + mode->pitchEnd + 1; - - st->innov = speex_alloc((st->frameSize)*sizeof(spx_sig_t)); + st->excBuf = speex_alloc((mode->frameSize+mode->pitchEnd+2)*sizeof(spx_word16_t)); + st->exc = st->excBuf + mode->pitchEnd + 2; + st->swBuf = speex_alloc((mode->frameSize+mode->pitchEnd+2)*sizeof(spx_word16_t)); + st->sw = st->swBuf + mode->pitchEnd + 2; - /* Asymmetric "pseudo-Hamming" window */ - { - int part1, part2; - part1=st->frameSize - (st->subframeSize>>1); - part2=(st->frameSize>>1) + (st->subframeSize>>1); - st->window = speex_alloc((st->windowSize)*sizeof(spx_word16_t)); - for (i=0;iwindow[i]=(spx_word16_t)(SIG_SCALING*(.54-.46*cos(M_PI*i/part1))); - for (i=0;iwindow[part1+i]=(spx_word16_t)(SIG_SCALING*(.54+.46*cos(M_PI*i/part2))); - } + st->window= lpc_window; + /* Create the window for autocorrelation (lag-windowing) */ st->lagWindow = speex_alloc((st->lpcSize+1)*sizeof(spx_word16_t)); for (i=0;ilpcSize+1;i++) st->lagWindow[i]=16384*exp(-.5*sqr(2*M_PI*st->lag_factor*i)); - st->autocorr = speex_alloc((st->lpcSize+1)*sizeof(spx_word16_t)); - - st->lpc = speex_alloc((st->lpcSize)*sizeof(spx_coef_t)); - st->interp_lpc = speex_alloc((st->lpcSize)*sizeof(spx_coef_t)); - st->interp_qlpc = speex_alloc((st->lpcSize)*sizeof(spx_coef_t)); - st->bw_lpc1 = speex_alloc((st->lpcSize)*sizeof(spx_coef_t)); - st->bw_lpc2 = speex_alloc((st->lpcSize)*sizeof(spx_coef_t)); - - st->lsp = speex_alloc((st->lpcSize)*sizeof(spx_lsp_t)); - st->qlsp = speex_alloc((st->lpcSize)*sizeof(spx_lsp_t)); st->old_lsp = speex_alloc((st->lpcSize)*sizeof(spx_lsp_t)); st->old_qlsp = speex_alloc((st->lpcSize)*sizeof(spx_lsp_t)); - st->interp_lsp = speex_alloc((st->lpcSize)*sizeof(spx_lsp_t)); - st->interp_qlsp = speex_alloc((st->lpcSize)*sizeof(spx_lsp_t)); - st->first = 1; for (i=0;ilpcSize;i++) { - st->lsp[i]=LSP_SCALING*(M_PI*((float)(i+1)))/(st->lpcSize+1); + st->old_lsp[i]=LSP_SCALING*(M_PI*((float)(i+1)))/(st->lpcSize+1); } st->mem_sp = speex_alloc((st->lpcSize)*sizeof(spx_mem_t)); st->mem_sw = speex_alloc((st->lpcSize)*sizeof(spx_mem_t)); st->mem_sw_whole = speex_alloc((st->lpcSize)*sizeof(spx_mem_t)); st->mem_exc = speex_alloc((st->lpcSize)*sizeof(spx_mem_t)); + st->mem_exc2 = speex_alloc((st->lpcSize)*sizeof(spx_mem_t)); st->pi_gain = speex_alloc((st->nbSubframes)*sizeof(spx_word32_t)); - + st->innov_save = NULL; + st->pitch = speex_alloc((st->nbSubframes)*sizeof(int)); st->vbr = speex_alloc(sizeof(VBRState)); vbr_init(st->vbr); st->vbr_quality = 8; st->vbr_enabled = 0; + st->vbr_max = 0; st->vad_enabled = 0; st->dtx_enabled = 0; st->abr_enabled = 0; @@ -236,30 +220,19 @@ void nb_encoder_destroy(void *state) speex_free_scratch(st->stack); #endif - speex_free (st->inBuf); + speex_free (st->winBuf); speex_free (st->excBuf); - speex_free (st->innov); - speex_free (st->interp_qlpc); - speex_free (st->qlsp); speex_free (st->old_qlsp); - speex_free (st->interp_qlsp); speex_free (st->swBuf); - speex_free (st->window); speex_free (st->lagWindow); - speex_free (st->autocorr); - speex_free (st->lpc); - speex_free (st->lsp); - speex_free (st->interp_lpc); - speex_free (st->bw_lpc1); - speex_free (st->bw_lpc2); speex_free (st->old_lsp); - speex_free (st->interp_lsp); speex_free (st->mem_sp); speex_free (st->mem_sw); speex_free (st->mem_sw_whole); speex_free (st->mem_exc); + speex_free (st->mem_exc2); speex_free (st->pi_gain); speex_free (st->pitch); @@ -270,6 +243,7 @@ void nb_encoder_destroy(void *state) vorbis_psy_destroy(st->psy); speex_free (st->curve); speex_free (st->old_curve); + speex_free (st->psy_window); #endif /*Free state memory... should be last*/ @@ -283,12 +257,23 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) int ol_pitch; spx_word16_t ol_pitch_coef; spx_word32_t ol_gain; - VARDECL(spx_sig_t *res); - VARDECL(spx_sig_t *target); + VARDECL(spx_word16_t *ringing); + VARDECL(spx_word16_t *target); + VARDECL(spx_sig_t *innov); + VARDECL(spx_word32_t *exc32); VARDECL(spx_mem_t *mem); + VARDECL(spx_coef_t *bw_lpc1); + VARDECL(spx_coef_t *bw_lpc2); + VARDECL(spx_coef_t *lpc); + VARDECL(spx_lsp_t *lsp); + VARDECL(spx_lsp_t *qlsp); + VARDECL(spx_lsp_t *interp_lsp); + VARDECL(spx_lsp_t *interp_qlsp); + VARDECL(spx_coef_t *interp_lpc); + VARDECL(spx_coef_t *interp_qlpc); char *stack; VARDECL(spx_word16_t *syn_resp); - VARDECL(spx_sig_t *real_exc); + VARDECL(spx_word16_t *real_exc); #ifdef EPIC_48K int pitch_half[2]; int ol_pitch_id=0; @@ -298,79 +283,85 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) st=(EncState *)state; stack=st->stack; - /* Copy new data in input buffer */ - speex_move(st->inBuf, st->inBuf+st->frameSize, (EXTRA_BUFFER+st->windowSize-st->frameSize)*sizeof(spx_sig_t)); - for (i=0;iframeSize;i++) - st->inBuf[st->windowSize-st->frameSize+i+EXTRA_BUFFER] = SHL32(EXTEND32(in[i]), SIG_SHIFT); + ALLOC(lpc, st->lpcSize, spx_coef_t); + ALLOC(bw_lpc1, st->lpcSize, spx_coef_t); + ALLOC(bw_lpc2, st->lpcSize, spx_coef_t); + ALLOC(lsp, st->lpcSize, spx_lsp_t); + ALLOC(qlsp, st->lpcSize, spx_lsp_t); + ALLOC(interp_lsp, st->lpcSize, spx_lsp_t); + ALLOC(interp_qlsp, st->lpcSize, spx_lsp_t); + ALLOC(interp_lpc, st->lpcSize, spx_coef_t); + ALLOC(interp_qlpc, st->lpcSize, spx_coef_t); /* Move signals 1 frame towards the past */ - speex_move(st->excBuf, st->excBuf+st->frameSize, (st->max_pitch+1)*sizeof(spx_sig_t)); - speex_move(st->swBuf, st->swBuf+st->frameSize, (st->max_pitch+1)*sizeof(spx_sig_t)); + speex_move(st->excBuf, st->excBuf+st->frameSize, (st->max_pitch+2)*sizeof(spx_word16_t)); + speex_move(st->swBuf, st->swBuf+st->frameSize, (st->max_pitch+2)*sizeof(spx_word16_t)); { VARDECL(spx_word16_t *w_sig); + VARDECL(spx_word16_t *autocorr); ALLOC(w_sig, st->windowSize, spx_word16_t); + ALLOC(autocorr, st->lpcSize+1, spx_word16_t); /* Window for analysis */ - for (i=0;iwindowSize;i++) - w_sig[i] = EXTRACT16(SHR32(MULT16_16(EXTRACT16(SHR32(st->frame[i],SIG_SHIFT)),st->window[i]),SIG_SHIFT)); - + for (i=0;iwindowSize-st->frameSize;i++) + w_sig[i] = EXTRACT16(SHR32(MULT16_16(st->winBuf[i],st->window[i]),SIG_SHIFT)); + for (;iwindowSize;i++) + w_sig[i] = EXTRACT16(SHR32(MULT16_16(in[i-st->windowSize+st->frameSize],st->window[i]),SIG_SHIFT)); /* Compute auto-correlation */ - _spx_autocorr(w_sig, st->autocorr, st->lpcSize+1, st->windowSize); - } - st->autocorr[0] = ADD16(st->autocorr[0],MULT16_16_Q15(st->autocorr[0],st->lpc_floor)); /* Noise floor in auto-correlation domain */ - - /* Lag windowing: equivalent to filtering in the power-spectrum domain */ - for (i=0;ilpcSize+1;i++) - st->autocorr[i] = MULT16_16_Q14(st->autocorr[i],st->lagWindow[i]); - - /* Levinson-Durbin */ - _spx_lpc(st->lpc, st->autocorr, st->lpcSize); - - /* LPC to LSPs (x-domain) transform */ - roots=lpc_to_lsp (st->lpc, st->lpcSize, st->lsp, 15, LSP_DELTA1, stack); - /* Check if we found all the roots */ - if (roots!=st->lpcSize) - { - /* Search again if we can afford it */ - if (st->complexity>1) - roots = lpc_to_lsp (st->lpc, st->lpcSize, st->lsp, 11, LSP_DELTA2, stack); - if (roots!=st->lpcSize) + _spx_autocorr(w_sig, autocorr, st->lpcSize+1, st->windowSize); + autocorr[0] = ADD16(autocorr[0],MULT16_16_Q15(autocorr[0],st->lpc_floor)); /* Noise floor in auto-correlation domain */ + + /* Lag windowing: equivalent to filtering in the power-spectrum domain */ + for (i=0;ilpcSize+1;i++) + autocorr[i] = MULT16_16_Q14(autocorr[i],st->lagWindow[i]); + + /* Levinson-Durbin */ + _spx_lpc(lpc, autocorr, st->lpcSize); + /* LPC to LSPs (x-domain) transform */ + roots=lpc_to_lsp (lpc, st->lpcSize, lsp, 10, LSP_DELTA1, stack); + /* Check if we found all the roots */ + if (roots!=st->lpcSize) { /*If we can't find all LSP's, do some damage control and use previous filter*/ for (i=0;ilpcSize;i++) { - st->lsp[i]=st->old_lsp[i]; + lsp[i]=st->old_lsp[i]; } } } + /* Whole frame analysis (open-loop estimation of pitch and excitation gain) */ { if (st->first) for (i=0;ilpcSize;i++) - st->interp_lsp[i] = st->lsp[i]; + interp_lsp[i] = lsp[i]; else - lsp_interpolate(st->old_lsp, st->lsp, st->interp_lsp, st->lpcSize, st->nbSubframes, st->nbSubframes<<1); + lsp_interpolate(st->old_lsp, lsp, interp_lsp, st->lpcSize, st->nbSubframes, st->nbSubframes<<1); - lsp_enforce_margin(st->interp_lsp, st->lpcSize, LSP_MARGIN); + lsp_enforce_margin(interp_lsp, st->lpcSize, LSP_MARGIN); /* Compute interpolated LPCs (unquantized) for whole frame*/ - lsp_to_lpc(st->interp_lsp, st->interp_lpc, st->lpcSize,stack); + lsp_to_lpc(interp_lsp, interp_lpc, st->lpcSize,stack); /*Open-loop pitch*/ - if (!st->submodes[st->submodeID] || st->vbr_enabled || st->vad_enabled || SUBMODE(forced_pitch_gain) || + if (st->complexity>2 || !st->submodes[st->submodeID] || st->vbr_enabled || st->vad_enabled || SUBMODE(forced_pitch_gain) || SUBMODE(lbr_pitch) != -1) { int nol_pitch[6]; spx_word16_t nol_pitch_coef[6]; - bw_lpc(st->gamma1, st->interp_lpc, st->bw_lpc1, st->lpcSize); - bw_lpc(st->gamma2, st->interp_lpc, st->bw_lpc2, st->lpcSize); + bw_lpc(st->gamma1, interp_lpc, bw_lpc1, st->lpcSize); + bw_lpc(st->gamma2, interp_lpc, bw_lpc2, st->lpcSize); - filter_mem2(st->frame, st->bw_lpc1, st->bw_lpc2, st->sw, st->frameSize, st->lpcSize, st->mem_sw_whole); + for (i=0;iwindowSize-st->frameSize;i++) + st->sw[i] = st->winBuf[i]; + for (;iframeSize;i++) + st->sw[i] = in[i-st->windowSize+st->frameSize]; + filter_mem16(st->sw, bw_lpc1, bw_lpc2, st->sw, st->frameSize, st->lpcSize, st->mem_sw_whole, stack); open_loop_nbest_pitch(st->sw, st->min_pitch, st->max_pitch, st->frameSize, nol_pitch, nol_pitch_coef, 6, stack); @@ -412,8 +403,13 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) ol_pitch=0; ol_pitch_coef=0; } + /*Compute "real" excitation*/ - fir_mem2(st->frame, st->interp_lpc, st->exc, st->frameSize, st->lpcSize, st->mem_exc); + for (i=0;iwindowSize-st->frameSize;i++) + st->exc[i] = st->winBuf[i]; + for (;iframeSize;i++) + st->exc[i] = in[i-st->windowSize+st->frameSize]; + fir_mem16(st->exc, interp_lpc, st->exc, st->frameSize, st->lpcSize, st->mem_exc, stack); /* Compute open-loop excitation gain */ #ifdef EPIC_48K @@ -421,8 +417,8 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) { float ol1=0,ol2=0; float ol_gain2; - ol1 = compute_rms(st->exc, st->frameSize>>1); - ol2 = compute_rms(st->exc+(st->frameSize>>1), st->frameSize>>1); + ol1 = compute_rms16(st->exc, st->frameSize>>1); + ol2 = compute_rms16(st->exc+(st->frameSize>>1), st->frameSize>>1); ol1 *= ol1*(st->frameSize>>1); ol2 *= ol2*(st->frameSize>>1); @@ -433,16 +429,24 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) ol_gain=SHR(sqrt(1+ol_gain2/st->frameSize),SIG_SHIFT); - } else { + } else #endif - ol_gain = SHL32(EXTEND32(compute_rms(st->exc, st->frameSize)),SIG_SHIFT); -#ifdef EPIC_48K + { + spx_word16_t g = compute_rms16(st->exc, st->frameSize); + if (ol_pitch>0) + ol_gain = MULT16_16(g, MULT16_16_Q14(QCONST16(1.1,14), + spx_sqrt(QCONST32(1.,28)-MULT16_32_Q15(QCONST16(.8,15),SHL32(MULT16_16(ol_pitch_coef,ol_pitch_coef),16))))); + else + ol_gain = SHL32(EXTEND32(g),SIG_SHIFT); } -#endif } #ifdef VORBIS_PSYCHO - compute_curve(st->psy, st->frame-16, st->curve); + for(i=0;i<256-st->frameSize;i++) + st->psy_window[i] = st->psy_window[i+st->frameSize]; + for(i=0;iframeSize;i++) + st->psy_window[256-st->frameSize+i] = in[i]; + compute_curve(st->psy, st->psy_window, st->curve); /*print_vec(st->curve, 128, "curve");*/ if (st->first) for (i=0;i<128;i++) @@ -454,7 +458,7 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) { float lsp_dist=0; for (i=0;ilpcSize;i++) - lsp_dist += (st->old_lsp[i] - st->lsp[i])*(st->old_lsp[i] - st->lsp[i]); + lsp_dist += (st->old_lsp[i] - lsp[i])*(st->old_lsp[i] - lsp[i]); lsp_dist /= LSP_SCALING*LSP_SCALING; if (st->abr_enabled) @@ -518,7 +522,17 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) } speex_encoder_ctl(state, SPEEX_SET_MODE, &mode); - + if (st->vbr_max>0) + { + spx_int32_t rate; + speex_encoder_ctl(state, SPEEX_GET_BITRATE, &rate); + if (rate > st->vbr_max) + { + rate = st->vbr_max; + speex_encoder_ctl(state, SPEEX_SET_BITRATE, &rate); + } + } + if (st->abr_enabled) { int bitrate; @@ -580,13 +594,11 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) st->first=1; st->bounded_pitch = 1; - /* Final signal synthesis from excitation */ - iir_mem2(st->exc, st->interp_qlpc, st->frame, st->frameSize, st->lpcSize, st->mem_sp); + speex_move(st->winBuf, in+2*st->frameSize-st->windowSize, (st->windowSize-st->frameSize)*sizeof(spx_word16_t)); -#ifdef RESYNTH - for (i=0;iframeSize;i++) - in[i]=st->frame[i]; -#endif + /* Clear memory (no need to really compute it) */ + for (i=0;ilpcSize;i++) + st->mem_sp[i] = 0; return 0; } @@ -595,16 +607,16 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) if (st->first) { for (i=0;ilpcSize;i++) - st->old_lsp[i] = st->lsp[i]; + st->old_lsp[i] = lsp[i]; } /*Quantize LSPs*/ #if 1 /*0 for unquantized*/ - SUBMODE(lsp_quant)(st->lsp, st->qlsp, st->lpcSize, bits); + SUBMODE(lsp_quant)(lsp, qlsp, st->lpcSize, bits); #else for (i=0;ilpcSize;i++) - st->qlsp[i]=st->lsp[i]; + qlsp[i]=lsp[i]; #endif #ifdef EPIC_48K @@ -685,22 +697,25 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) if (st->first) { for (i=0;ilpcSize;i++) - st->old_qlsp[i] = st->qlsp[i]; + st->old_qlsp[i] = qlsp[i]; } - /* Filter response */ - ALLOC(res, st->subframeSize, spx_sig_t); /* Target signal */ - ALLOC(target, st->subframeSize, spx_sig_t); + ALLOC(target, st->subframeSize, spx_word16_t); + ALLOC(innov, st->subframeSize, spx_sig_t); + ALLOC(exc32, st->subframeSize, spx_word32_t); + ALLOC(ringing, st->subframeSize, spx_word16_t); ALLOC(syn_resp, st->subframeSize, spx_word16_t); - ALLOC(real_exc, st->subframeSize, spx_sig_t); + ALLOC(real_exc, st->subframeSize, spx_word16_t); ALLOC(mem, st->lpcSize, spx_mem_t); /* Loop on sub-frames */ for (sub=0;subnbSubframes;sub++) { int offset; - spx_sig_t *sp, *sw, *exc; + spx_word16_t *sw; + spx_word16_t *exc; + spx_sig_t *innov_save = NULL; int pitch; int response_bound = st->subframeSize; #ifdef EPIC_48K @@ -715,25 +730,26 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) /* Offset relative to start of frame */ offset = st->subframeSize*sub; - /* Original signal */ - sp=st->frame+offset; /* Excitation */ exc=st->exc+offset; /* Weighted signal */ sw=st->sw+offset; - + /* Pointer for saving innovation */ + if (st->innov_save) + innov_save = st->innov_save+offset; + /* LSP interpolation (quantized and unquantized) */ - lsp_interpolate(st->old_lsp, st->lsp, st->interp_lsp, st->lpcSize, sub, st->nbSubframes); - lsp_interpolate(st->old_qlsp, st->qlsp, st->interp_qlsp, st->lpcSize, sub, st->nbSubframes); + lsp_interpolate(st->old_lsp, lsp, interp_lsp, st->lpcSize, sub, st->nbSubframes); + lsp_interpolate(st->old_qlsp, qlsp, interp_qlsp, st->lpcSize, sub, st->nbSubframes); /* Make sure the filters are stable */ - lsp_enforce_margin(st->interp_lsp, st->lpcSize, LSP_MARGIN); - lsp_enforce_margin(st->interp_qlsp, st->lpcSize, LSP_MARGIN); + lsp_enforce_margin(interp_lsp, st->lpcSize, LSP_MARGIN); + lsp_enforce_margin(interp_qlsp, st->lpcSize, LSP_MARGIN); /* Compute interpolated LPCs (quantized and unquantized) */ - lsp_to_lpc(st->interp_lsp, st->interp_lpc, st->lpcSize,stack); + lsp_to_lpc(interp_lsp, interp_lpc, st->lpcSize,stack); - lsp_to_lpc(st->interp_qlsp, st->interp_qlpc, st->lpcSize, stack); + lsp_to_lpc(interp_qlsp, interp_qlpc, st->lpcSize, stack); /* Compute analysis filter gain at w=pi (for use in SB-CELP) */ { @@ -741,7 +757,7 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) for (i=0;ilpcSize;i+=2) { /*pi_g += -st->interp_qlpc[i] + st->interp_qlpc[i+1];*/ - pi_g = ADD32(pi_g, SUB32(st->interp_qlpc[i+1],st->interp_qlpc[i])); + pi_g = ADD32(pi_g, SUB32(EXTEND32(interp_qlpc[i+1]),EXTEND32(interp_qlpc[i]))); } st->pi_gain[sub] = pi_g; } @@ -752,56 +768,66 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) float fact = ((float)sub+1.0f)/st->nbSubframes; for (i=0;i<128;i++) curr_curve[i] = (1.0f-fact)*st->old_curve[i] + fact*st->curve[i]; - curve_to_lpc(st->psy, curr_curve, st->bw_lpc1, st->bw_lpc2, 10); + curve_to_lpc(st->psy, curr_curve, bw_lpc1, bw_lpc2, 10); } #else /* Compute bandwidth-expanded (unquantized) LPCs for perceptual weighting */ - bw_lpc(st->gamma1, st->interp_lpc, st->bw_lpc1, st->lpcSize); + bw_lpc(st->gamma1, interp_lpc, bw_lpc1, st->lpcSize); if (st->gamma2>=0) - bw_lpc(st->gamma2, st->interp_lpc, st->bw_lpc2, st->lpcSize); + bw_lpc(st->gamma2, interp_lpc, bw_lpc2, st->lpcSize); else { - st->bw_lpc2[0]=1; + bw_lpc2[0]=1; for (i=1;i<=st->lpcSize;i++) - st->bw_lpc2[i]=0; + bw_lpc2[i]=0; } /*print_vec(st->bw_lpc1, 10, "bw_lpc");*/ #endif - for (i=0;isubframeSize;i++) - real_exc[i] = exc[i]; + { + /*FIXME: This will break if we change the window size */ + if (st->windowSize-st->frameSize != st->subframeSize) + speex_error("windowSize-frameSize != subframeSize"); + if (sub==0) + { + for (i=0;isubframeSize;i++) + real_exc[i] = sw[i] = st->winBuf[i]; + } else { + for (i=0;isubframeSize;i++) + real_exc[i] = sw[i] = in[i+((sub-1)*st->subframeSize)]; + } + } + fir_mem16(real_exc, interp_qlpc, real_exc, st->subframeSize, st->lpcSize, st->mem_exc2, stack); if (st->complexity==0) response_bound >>= 1; - compute_impulse_response(st->interp_qlpc, st->bw_lpc1, st->bw_lpc2, syn_resp, response_bound, st->lpcSize, stack); + compute_impulse_response(interp_qlpc, bw_lpc1, bw_lpc2, syn_resp, response_bound, st->lpcSize, stack); for (i=response_bound;isubframeSize;i++) syn_resp[i]=VERY_SMALL; - /* Reset excitation */ - for (i=0;isubframeSize;i++) - exc[i]=VERY_SMALL; - /* Compute zero response of A(z/g1) / ( A(z/g2) * A(z) ) */ for (i=0;ilpcSize;i++) - mem[i]=st->mem_sp[i]; + mem[i]=SHL32(st->mem_sp[i],1); + for (i=0;isubframeSize;i++) + ringing[i] = VERY_SMALL; #ifdef SHORTCUTS2 - iir_mem2(exc, st->interp_qlpc, exc, response_bound, st->lpcSize, mem); + iir_mem16(ringing, interp_qlpc, ringing, response_bound, st->lpcSize, mem, stack); for (i=0;ilpcSize;i++) - mem[i]=st->mem_sw[i]; - filter_mem2(exc, st->bw_lpc1, st->bw_lpc2, res, response_bound, st->lpcSize, mem); + mem[i]=SHL32(st->mem_sw[i],1); + filter_mem16(ringing, st->bw_lpc1, st->bw_lpc2, ringing, response_bound, st->lpcSize, mem, stack); for (i=response_bound;isubframeSize;i++) - res[i]=0; + ringing[i]=0; #else - iir_mem2(exc, st->interp_qlpc, exc, st->subframeSize, st->lpcSize, mem); + iir_mem16(ringing, interp_qlpc, ringing, st->subframeSize, st->lpcSize, mem, stack); for (i=0;ilpcSize;i++) - mem[i]=st->mem_sw[i]; - filter_mem2(exc, st->bw_lpc1, st->bw_lpc2, res, st->subframeSize, st->lpcSize, mem); + mem[i]=SHL32(st->mem_sw[i],1); + filter_mem16(ringing, bw_lpc1, bw_lpc2, ringing, st->subframeSize, st->lpcSize, mem, stack); #endif /* Compute weighted signal */ for (i=0;ilpcSize;i++) mem[i]=st->mem_sw[i]; - filter_mem2(sp, st->bw_lpc1, st->bw_lpc2, sw, st->subframeSize, st->lpcSize, mem); + filter_mem16(sw, bw_lpc1, bw_lpc2, sw, st->subframeSize, st->lpcSize, mem, stack); if (st->complexity==0) for (i=0;ilpcSize;i++) @@ -809,8 +835,9 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) /* Compute target signal */ for (i=0;isubframeSize;i++) - target[i]=sw[i]-res[i]; + target[i]=SUB16(sw[i],PSHR32(ringing[i],1)); + /* Reset excitation */ for (i=0;isubframeSize;i++) exc[i]=0; @@ -847,18 +874,18 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) #ifdef EPIC_48K if (st->lbr_48k) { - pitch = SUBMODE(ltp_quant)(target, sw, st->interp_qlpc, st->bw_lpc1, st->bw_lpc2, - exc, SUBMODE(ltp_params), pit_min, pit_max, ol_pitch_coef, + pitch = SUBMODE(ltp_quant)(target, sw, interp_qlpc, bw_lpc1, bw_lpc2, + exc32, SUBMODE(ltp_params), pit_min, pit_max, ol_pitch_coef, st->lpcSize, st->subframeSize, bits, stack, - exc, syn_resp, st->complexity, ol_pitch_id, st->plc_tuning); + exc, syn_resp, st->complexity, ol_pitch_id, st->plc_tuning, &st->cumul_gain); } else { #endif /* Perform pitch search */ - pitch = SUBMODE(ltp_quant)(target, sw, st->interp_qlpc, st->bw_lpc1, st->bw_lpc2, - exc, SUBMODE(ltp_params), pit_min, pit_max, ol_pitch_coef, + pitch = SUBMODE(ltp_quant)(target, sw, interp_qlpc, bw_lpc1, bw_lpc2, + exc32, SUBMODE(ltp_params), pit_min, pit_max, ol_pitch_coef, st->lpcSize, st->subframeSize, bits, stack, - exc, syn_resp, st->complexity, 0, st->plc_tuning); + exc, syn_resp, st->complexity, 0, st->plc_tuning, &st->cumul_gain); #ifdef EPIC_48K } #endif @@ -870,30 +897,28 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) /* Quantization of innovation */ { - spx_sig_t *innov; spx_word32_t ener=0; spx_word16_t fine_gain; - innov = st->innov+sub*st->subframeSize; for (i=0;isubframeSize;i++) innov[i]=0; for (i=0;isubframeSize;i++) - real_exc[i] = SUB32(real_exc[i], exc[i]); + real_exc[i] = SUB16(real_exc[i], PSHR32(exc32[i],SIG_SHIFT-1)); - ener = SHL32(EXTEND32(compute_rms(real_exc, st->subframeSize)),SIG_SHIFT); + ener = SHL32(EXTEND32(compute_rms16(real_exc, st->subframeSize)),SIG_SHIFT); /*FIXME: Should use DIV32_16 and make sure result fits in 16 bits */ #ifdef FIXED_POINT { - spx_word32_t f = DIV32(ener,PSHR32(ol_gain,SIG_SHIFT)); + spx_word32_t f = PDIV32(ener,PSHR32(ol_gain,SIG_SHIFT)); if (f<=32767) fine_gain = f; else fine_gain = 32767; } #else - fine_gain = DIV32_16(ener,PSHR32(ol_gain,SIG_SHIFT)); + fine_gain = PDIV32_16(ener,PSHR32(ol_gain,SIG_SHIFT)); #endif /* Calculate gain correction for the sub-frame (if any) */ if (SUBMODE(have_subframe_gain)) @@ -922,7 +947,7 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) if (SUBMODE(innovation_quant)) { /* Codebook search */ - SUBMODE(innovation_quant)(target, st->interp_qlpc, st->bw_lpc1, st->bw_lpc2, + SUBMODE(innovation_quant)(target, interp_qlpc, bw_lpc1, bw_lpc2, SUBMODE(innovation_params), st->lpcSize, st->subframeSize, innov, syn_resp, bits, stack, st->complexity, SUBMODE(double_codebook)); @@ -930,11 +955,16 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) signal_mul(innov, innov, ener, st->subframeSize); for (i=0;isubframeSize;i++) - exc[i] = ADD32(exc[i],innov[i]); + exc[i] = EXTRACT16(PSHR32(ADD32(SHL32(exc32[i],1),innov[i]),SIG_SHIFT)); } else { speex_error("No fixed codebook"); } + if (innov_save) + { + for (i=0;isubframeSize;i++) + innov_save[i] = innov[i]; + } /* In some (rare) modes, we do a second search (more bits) to reduce noise even more */ if (SUBMODE(double_codebook)) { char *tmp_stack=stack; @@ -943,24 +973,31 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) for (i=0;isubframeSize;i++) innov2[i]=0; for (i=0;isubframeSize;i++) - target[i]*=2.2; - SUBMODE(innovation_quant)(target, st->interp_qlpc, st->bw_lpc1, st->bw_lpc2, + target[i]=MULT16_16_P13(QCONST16(2.2,13), target[i]); + SUBMODE(innovation_quant)(target, interp_qlpc, bw_lpc1, bw_lpc2, SUBMODE(innovation_params), st->lpcSize, st->subframeSize, innov2, syn_resp, bits, stack, st->complexity, 0); - signal_mul(innov2, innov2, (spx_word32_t) (ener*(1.f/2.2f)), st->subframeSize); + signal_mul(innov2, innov2, MULT16_32_Q15(QCONST16(0.454545,15),ener), st->subframeSize); for (i=0;isubframeSize;i++) - exc[i] = ADD32(exc[i],innov2[i]); + exc[i] = ADD32(exc[i],PSHR32(innov2[i],SIG_SHIFT)); + if (innov_save) + { + for (i=0;isubframeSize;i++) + innov_save[i] = ADD32(innov_save[i],innov2[i]); + } stack = tmp_stack; } } + for (i=0;isubframeSize;i++) + sw[i] = exc[i]; /* Final signal synthesis from excitation */ - iir_mem2(exc, st->interp_qlpc, sp, st->subframeSize, st->lpcSize, st->mem_sp); + iir_mem16(sw, interp_qlpc, sw, st->subframeSize, st->lpcSize, st->mem_sp, stack); /* Compute weighted signal again, from synthesized speech (not sure it's the right thing) */ if (st->complexity!=0) - filter_mem2(sp, st->bw_lpc1, st->bw_lpc2, sw, st->subframeSize, st->lpcSize, st->mem_sw); + filter_mem16(sw, bw_lpc1, bw_lpc2, sw, st->subframeSize, st->lpcSize, st->mem_sw, stack); } @@ -968,9 +1005,9 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) if (st->submodeID>=1) { for (i=0;ilpcSize;i++) - st->old_lsp[i] = st->lsp[i]; + st->old_lsp[i] = lsp[i]; for (i=0;ilpcSize;i++) - st->old_qlsp[i] = st->qlsp[i]; + st->old_qlsp[i] = qlsp[i]; } #ifdef VORBIS_PSYCHO @@ -991,19 +1028,7 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) /* The next frame will not be the first (Duh!) */ st->first = 0; - -#ifdef RESYNTH - /* Replace input by synthesized speech */ - for (i=0;iframeSize;i++) - { - spx_word32_t sig = PSHR32(st->frame[i],SIG_SHIFT); - if (sig>32767) - sig = 32767; - if (sig<-32767) - sig = -32767; - in[i]=sig; - } -#endif + speex_move(st->winBuf, in+2*st->frameSize-st->windowSize, (st->windowSize-st->frameSize)*sizeof(spx_word16_t)); if (SUBMODE(innovation_quant) == noise_codebook_quant || st->submodeID==0) st->bounded_pitch = 1; @@ -1013,7 +1038,6 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) return 1; } - void *nb_decoder_init(const SpeexMode *m) { DecState *st; @@ -1050,27 +1074,16 @@ void *nb_decoder_init(const SpeexMode *m) st->submodes=mode->submodes; st->submodeID=mode->defaultSubmode; - st->lpc_enh_enabled=0; - + st->lpc_enh_enabled=1; - st->inBuf = speex_alloc((st->frameSize)*sizeof(spx_sig_t)); - st->frame = st->inBuf; - st->excBuf = speex_alloc((st->frameSize + st->max_pitch + 1)*sizeof(spx_sig_t)); - st->exc = st->excBuf + st->max_pitch + 1; - for (i=0;iframeSize;i++) - st->inBuf[i]=0; + st->excBuf = speex_alloc((st->frameSize + 2*st->max_pitch + st->subframeSize + 12)*sizeof(spx_word16_t)); + st->exc = st->excBuf + 2*st->max_pitch + st->subframeSize + 6; for (i=0;iframeSize + st->max_pitch + 1;i++) st->excBuf[i]=0; - st->innov = speex_alloc((st->frameSize)*sizeof(spx_sig_t)); st->interp_qlpc = speex_alloc(st->lpcSize*sizeof(spx_coef_t)); - st->qlsp = speex_alloc(st->lpcSize*sizeof(spx_lsp_t)); st->old_qlsp = speex_alloc(st->lpcSize*sizeof(spx_lsp_t)); - st->interp_qlsp = speex_alloc(st->lpcSize*sizeof(spx_lsp_t)); - st->mem_sp = speex_alloc((5*st->lpcSize)*sizeof(spx_mem_t)); - st->comb_mem = speex_alloc(sizeof(CombFilterMem)); - comb_filter_mem_init (st->comb_mem); - + st->mem_sp = speex_alloc(st->lpcSize*sizeof(spx_mem_t)); st->pi_gain = speex_alloc((st->nbSubframes)*sizeof(spx_word32_t)); st->last_pitch = 40; st->count_lost=0; @@ -1104,15 +1117,10 @@ void nb_decoder_destroy(void *state) speex_free_scratch(st->stack); #endif - speex_free (st->inBuf); speex_free (st->excBuf); - speex_free (st->innov); speex_free (st->interp_qlpc); - speex_free (st->qlsp); speex_free (st->old_qlsp); - speex_free (st->interp_qlsp); speex_free (st->mem_sp); - speex_free (st->comb_mem); speex_free (st->pi_gain); speex_free(state); @@ -1131,9 +1139,6 @@ static void nb_decode_lost(DecState *st, spx_word16_t *out, char *stack) { int i, sub; int pitch_val; - VARDECL(spx_coef_t *awk1); - VARDECL(spx_coef_t *awk2); - VARDECL(spx_coef_t *awk3); spx_word16_t pitch_gain; spx_word16_t fact; spx_word16_t gain_med; @@ -1162,48 +1167,27 @@ static void nb_decode_lost(DecState *st, spx_word16_t *out, char *stack) pitch_gain = MULT16_16_Q15(fact,pitch_gain) + VERY_SMALL; /* Shift all buffers by one frame */ - /*speex_move(st->inBuf, st->inBuf+st->frameSize, (st->bufSize-st->frameSize)*sizeof(spx_sig_t));*/ - speex_move(st->excBuf, st->excBuf+st->frameSize, (st->max_pitch + 1)*sizeof(spx_sig_t)); - - ALLOC(awk1, (st->lpcSize+1), spx_coef_t); - ALLOC(awk2, (st->lpcSize+1), spx_coef_t); - ALLOC(awk3, (st->lpcSize+1), spx_coef_t); - + speex_move(st->excBuf, st->excBuf+st->frameSize, (2*st->max_pitch + st->subframeSize + 12)*sizeof(spx_word16_t)); for (sub=0;subnbSubframes;sub++) { int offset; - spx_sig_t *sp, *exc; + spx_word16_t *sp; + spx_word16_t *exc; /* Offset relative to start of frame */ offset = st->subframeSize*sub; /* Original signal */ - sp=st->frame+offset; + sp=out+offset; /* Excitation */ exc=st->exc+offset; /* Excitation after post-filter*/ - - /* Calculate perceptually enhanced LPC filter */ - if (st->lpc_enh_enabled) - { - spx_word16_t k1,k2,k3; - if (st->submodes[st->submodeID] != NULL) - { - k1=SUBMODE(lpc_enh_k1); - k2=SUBMODE(lpc_enh_k2); - k3=SUBMODE(lpc_enh_k3); - } else { - k1=k2=.7*GAMMA_SCALING; - k3=.0; - } - bw_lpc(k1, st->interp_qlpc, awk1, st->lpcSize); - bw_lpc(k2, st->interp_qlpc, awk2, st->lpcSize); - bw_lpc(k3, st->interp_qlpc, awk3, st->lpcSize); - } /* Make up a plausible excitation */ /* FIXME: THIS CAN BE IMPROVED */ /*if (pitch_gain>.95) pitch_gain=.95;*/ - innov_gain = compute_rms(st->innov, st->frameSize); + + /* FIXME: This was rms of innovation (not exc) */ + innov_gain = compute_rms16(st->exc, st->frameSize); pitch_val = st->last_pitch + SHR32((spx_int32_t)speex_rand(1+st->count_lost, &st->seed),SIG_SHIFT); if (pitch_val > st->max_pitch) pitch_val = st->max_pitch; @@ -1211,36 +1195,16 @@ static void nb_decode_lost(DecState *st, spx_word16_t *out, char *stack) pitch_val = st->min_pitch; for (i=0;isubframeSize;i++) { - exc[i]= MULT16_32_Q15(pitch_gain, (exc[i-pitch_val]+VERY_SMALL)) + - MULT16_32_Q15(fact, MULT16_32_Q15(SHL(Q15ONE,15)-SHL(MULT16_16(pitch_gain,pitch_gain),1),speex_rand(innov_gain, &st->seed))); + /* FIXME: Second term need to be 16-bit */ + exc[i]= MULT16_16_Q15(pitch_gain, (exc[i-pitch_val]+VERY_SMALL)) + + MULT16_16_Q15(fact, MULT16_16_Q15(SHL(Q15ONE,15)-SHL(MULT16_16(pitch_gain,pitch_gain),1),speex_rand(innov_gain, &st->seed))); } - for (i=0;isubframeSize;i++) - sp[i]=exc[i]; - - /* Signal synthesis */ - if (st->lpc_enh_enabled) - { - filter_mem2(sp, awk2, awk1, sp, st->subframeSize, st->lpcSize, - st->mem_sp+st->lpcSize); - filter_mem2(sp, awk3, st->interp_qlpc, sp, st->subframeSize, st->lpcSize, - st->mem_sp); - } else { - for (i=0;ilpcSize;i++) - st->mem_sp[st->lpcSize+i] = 0; - iir_mem2(sp, st->interp_qlpc, sp, st->subframeSize, st->lpcSize, - st->mem_sp); - } - } + sp[i]=exc[i-st->subframeSize]; + iir_mem16(sp, st->interp_qlpc, sp, st->subframeSize, st->lpcSize, + st->mem_sp, stack); - for (i=0;iframeSize;i++) - { - spx_word32_t sig = PSHR32(st->frame[i],SIG_SHIFT); - if (sig>32767) - sig = 32767; - if (sig<-32767) - sig = -32767; - out[i]=sig; + bw_lpc(QCONST16(.98,15), st->interp_qlpc, st->interp_qlpc, st->lpcSize); } st->first = 0; @@ -1250,6 +1214,7 @@ static void nb_decode_lost(DecState *st, spx_word16_t *out, char *stack) st->pitch_gain_buf_idx = 0; } + int nb_decode(void *state, SpeexBits *bits, void *vout) { DecState *st; @@ -1264,15 +1229,17 @@ int nb_decode(void *state, SpeexBits *bits, void *vout) int wideband; int m; char *stack; - VARDECL(spx_coef_t *awk1); - VARDECL(spx_coef_t *awk2); - VARDECL(spx_coef_t *awk3); + VARDECL(spx_sig_t *innov); + VARDECL(spx_word32_t *exc32); + VARDECL(spx_coef_t *ak); + VARDECL(spx_lsp_t *qlsp); spx_word16_t pitch_average=0; #ifdef EPIC_48K int pitch_half[2]; int ol_pitch_id=0; #endif spx_word16_t *out = vout; + VARDECL(spx_lsp_t *interp_qlsp); st=(DecState*)state; stack=st->stack; @@ -1373,7 +1340,7 @@ int nb_decode(void *state, SpeexBits *bits, void *vout) } /* Shift all buffers by one frame */ - speex_move(st->excBuf, st->excBuf+st->frameSize, (st->max_pitch + 1)*sizeof(spx_sig_t)); + speex_move(st->excBuf, st->excBuf+st->frameSize, (2*st->max_pitch + st->subframeSize + 12)*sizeof(spx_word16_t)); /* If null mode (no transmission), just set a couple things to zero*/ if (st->submodes[st->submodeID] == NULL) @@ -1386,34 +1353,28 @@ int nb_decode(void *state, SpeexBits *bits, void *vout) float pgain=GAIN_SCALING_1*st->last_pitch_gain; if (pgain>.6) pgain=.6; - innov_gain = compute_rms(st->innov, st->frameSize); + /* FIXME: This was innov, not exc */ + innov_gain = compute_rms16(st->exc, st->frameSize); for (i=0;iframeSize;i++) - st->exc[i]=VERY_SMALL; - speex_rand_vec(innov_gain, st->exc, st->frameSize); + st->exc[i]=speex_rand(innov_gain, &st->seed); } st->first=1; - /* Final signal synthesis from excitation */ - iir_mem2(st->exc, lpc, st->frame, st->frameSize, st->lpcSize, st->mem_sp); - for (i=0;iframeSize;i++) - { - spx_word32_t sig = PSHR32(st->frame[i],SIG_SHIFT); - if (sig>32767) - sig = 32767; - if (sig<-32767) - sig = -32767; - out[i]=sig; - } + out[i] = st->exc[i]; + /* Final signal synthesis from excitation */ + iir_mem16(out, lpc, out, st->frameSize, st->lpcSize, st->mem_sp, stack); st->count_lost=0; return 0; } + ALLOC(qlsp, st->lpcSize, spx_lsp_t); + /* Unquantize LSPs */ - SUBMODE(lsp_unquant)(st->qlsp, st->lpcSize, bits); + SUBMODE(lsp_unquant)(qlsp, st->lpcSize, bits); /*Damp memory if a frame was lost and the LSP changed too much*/ if (st->count_lost) @@ -1421,13 +1382,13 @@ int nb_decode(void *state, SpeexBits *bits, void *vout) spx_word16_t fact; spx_word32_t lsp_dist=0; for (i=0;ilpcSize;i++) - lsp_dist = ADD32(lsp_dist, EXTEND32(ABS(st->old_qlsp[i] - st->qlsp[i]))); + lsp_dist = ADD32(lsp_dist, EXTEND32(ABS(st->old_qlsp[i] - qlsp[i]))); #ifdef FIXED_POINT fact = SHR16(19661,SHR32(lsp_dist,LSP_SHIFT+2)); #else fact = .6*exp(-.2*lsp_dist); #endif - for (i=0;i<2*st->lpcSize;i++) + for (i=0;ilpcSize;i++) st->mem_sp[i] = MULT16_32_Q15(fact,st->mem_sp[i]); } @@ -1436,7 +1397,7 @@ int nb_decode(void *state, SpeexBits *bits, void *vout) if (st->first || st->count_lost) { for (i=0;ilpcSize;i++) - st->old_qlsp[i] = st->qlsp[i]; + st->old_qlsp[i] = qlsp[i]; } #ifdef EPIC_48K @@ -1483,9 +1444,9 @@ int nb_decode(void *state, SpeexBits *bits, void *vout) } #endif - ALLOC(awk1, st->lpcSize+1, spx_coef_t); - ALLOC(awk2, st->lpcSize+1, spx_coef_t); - ALLOC(awk3, st->lpcSize+1, spx_coef_t); + ALLOC(ak, st->lpcSize, spx_coef_t); + ALLOC(innov, st->subframeSize, spx_sig_t); + ALLOC(exc32, st->subframeSize, spx_word32_t); if (st->submodeID==1) { @@ -1504,7 +1465,9 @@ int nb_decode(void *state, SpeexBits *bits, void *vout) for (sub=0;subnbSubframes;sub++) { int offset; - spx_sig_t *sp, *exc; + spx_word16_t *exc; + spx_word16_t *sp; + spx_sig_t *innov_save = NULL; spx_word16_t tmp; #ifdef EPIC_48K @@ -1519,40 +1482,13 @@ int nb_decode(void *state, SpeexBits *bits, void *vout) /* Offset relative to start of frame */ offset = st->subframeSize*sub; - /* Original signal */ - sp=st->frame+offset; /* Excitation */ exc=st->exc+offset; - /* Excitation after post-filter*/ - - /* LSP interpolation (quantized and unquantized) */ - lsp_interpolate(st->old_qlsp, st->qlsp, st->interp_qlsp, st->lpcSize, sub, st->nbSubframes); - - /* Make sure the LSP's are stable */ - lsp_enforce_margin(st->interp_qlsp, st->lpcSize, LSP_MARGIN); - - - /* Compute interpolated LPCs (unquantized) */ - lsp_to_lpc(st->interp_qlsp, st->interp_qlpc, st->lpcSize, stack); - - /* Compute enhanced synthesis filter */ - if (st->lpc_enh_enabled) - { - bw_lpc(SUBMODE(lpc_enh_k1), st->interp_qlpc, awk1, st->lpcSize); - bw_lpc(SUBMODE(lpc_enh_k2), st->interp_qlpc, awk2, st->lpcSize); - bw_lpc(SUBMODE(lpc_enh_k3), st->interp_qlpc, awk3, st->lpcSize); - } + /* Original signal */ + sp=out+offset; + if (st->innov_save) + innov_save = st->innov_save+offset; - /* Compute analysis filter at w=pi */ - { - spx_word32_t pi_g=LPC_SCALING; - for (i=0;ilpcSize;i+=2) - { - /*pi_g += -st->interp_qlpc[i] + st->interp_qlpc[i+1];*/ - pi_g = ADD32(pi_g, SUB32(st->interp_qlpc[i+1],st->interp_qlpc[i])); - } - st->pi_gain[sub] = pi_g; - } /* Reset excitation */ for (i=0;isubframeSize;i++) @@ -1595,13 +1531,13 @@ int nb_decode(void *state, SpeexBits *bits, void *vout) #ifdef EPIC_48K if (st->lbr_48k) { - SUBMODE(ltp_unquant)(exc, pit_min, pit_max, ol_pitch_coef, SUBMODE(ltp_params), + SUBMODE(ltp_unquant)(exc, exc32, pit_min, pit_max, ol_pitch_coef, SUBMODE(ltp_params), st->subframeSize, &pitch, &pitch_gain[0], bits, stack, st->count_lost, offset, st->last_pitch_gain, ol_pitch_id); } else { #endif - SUBMODE(ltp_unquant)(exc, pit_min, pit_max, ol_pitch_coef, SUBMODE(ltp_params), + SUBMODE(ltp_unquant)(exc, exc32, pit_min, pit_max, ol_pitch_coef, SUBMODE(ltp_params), st->subframeSize, &pitch, &pitch_gain[0], bits, stack, st->count_lost, offset, st->last_pitch_gain, 0); @@ -1609,25 +1545,16 @@ int nb_decode(void *state, SpeexBits *bits, void *vout) } #endif - - /* If we had lost frames, check energy of last received frame */ - if (st->count_lost && ol_gain < st->last_ol_gain) - { - /*float fact = (float)ol_gain/(st->last_ol_gain+1); - for (i=0;isubframeSize;i++) - exc[i]*=fact;*/ - spx_word16_t fact = DIV32_16(SHL32(EXTEND32(ol_gain),15),st->last_ol_gain+1); - for (i=0;isubframeSize;i++) - exc[i] = MULT16_32_Q15(fact, exc[i]); - } - tmp = gain_3tap_to_1tap(pitch_gain); pitch_average += tmp; - if (tmp>best_pitch_gain) + if ((tmp>best_pitch_gain&&ABS(2*best_pitch-pitch)>=3&&ABS(3*best_pitch-pitch)>=4&&ABS(4*best_pitch-pitch)>=5) + || (tmp>MULT16_16_Q15(QCONST16(.6,15),best_pitch_gain)&&(ABS(best_pitch-2*pitch)<3||ABS(best_pitch-3*pitch)<4||ABS(best_pitch-4*pitch)<5)) + || (MULT16_16_Q15(QCONST16(.67,15),tmp)>best_pitch_gain&&(ABS(2*best_pitch-pitch)<3||ABS(3*best_pitch-pitch)<4||ABS(4*best_pitch-pitch)<5)) ) { best_pitch = pitch; - best_pitch_gain = tmp; + if (tmp > best_pitch_gain) + best_pitch_gain = tmp; } } else { speex_error("No pitch prediction, what's wrong"); @@ -1637,9 +1564,7 @@ int nb_decode(void *state, SpeexBits *bits, void *vout) { int q_energy; spx_word32_t ener; - spx_sig_t *innov; - innov = st->innov+sub*st->subframeSize; for (i=0;isubframeSize;i++) innov[i]=0; @@ -1681,7 +1606,7 @@ int nb_decode(void *state, SpeexBits *bits, void *vout) while (st->voc_offsetsubframeSize) { if (st->voc_offset>=0) - exc[st->voc_offset]=SIG_SCALING*sqrt(1.0*ol_pitch); + exc[st->voc_offset]=sqrt(1.0*ol_pitch); st->voc_offset+=ol_pitch; } st->voc_offset -= st->subframeSize; @@ -1693,8 +1618,9 @@ int nb_decode(void *state, SpeexBits *bits, void *vout) g=1; for (i=0;isubframeSize;i++) { - float exci=exc[i]; - exc[i]=.8*g*exc[i]*ol_gain/SIG_SCALING + .6*g*st->voc_m1*ol_gain/SIG_SCALING + .5*g*innov[i] - .5*g*st->voc_m2 + (1-g)*innov[i]; + spx_word16_t exci=exc[i]; + /* FIXME: cleanup the innov[i]/SIG_SCALING */ + exc[i]=.8*g*exc[i]*PSHR32(ol_gain,SIG_SHIFT) + .6*g*st->voc_m1*PSHR32(ol_gain,SIG_SHIFT) + (1-.5*g)*PSHR32(innov[i],SIG_SHIFT) - .5*g*PSHR32(st->voc_m2,SIG_SHIFT); st->voc_m1 = exci; st->voc_m2=innov[i]; st->voc_mean = .95*st->voc_mean + .05*exc[i]; @@ -1702,9 +1628,14 @@ int nb_decode(void *state, SpeexBits *bits, void *vout) } } else { for (i=0;isubframeSize;i++) - exc[i]=ADD32(exc[i],innov[i]); + exc[i]=PSHR32(ADD32(SHL32(exc32[i],1),innov[i]),SIG_SHIFT); /*print_vec(exc, 40, "innov");*/ } + if (innov_save) + { + for (i=0;isubframeSize;i++) + innov_save[i] = innov[i]; + } /* Decode second codebook (only for some modes) */ if (SUBMODE(double_codebook)) { @@ -1714,68 +1645,93 @@ int nb_decode(void *state, SpeexBits *bits, void *vout) for (i=0;isubframeSize;i++) innov2[i]=0; SUBMODE(innovation_unquant)(innov2, SUBMODE(innovation_params), st->subframeSize, bits, stack); - signal_mul(innov2, innov2, (spx_word32_t) (ener*(1/2.2)), st->subframeSize); + signal_mul(innov2, innov2, MULT16_32_Q15(QCONST16(0.454545,15),ener), st->subframeSize); for (i=0;isubframeSize;i++) - exc[i] = ADD32(exc[i],innov2[i]); + exc[i] = ADD16(exc[i],PSHR32(innov2[i],SIG_SHIFT)); + if (innov_save) + { + for (i=0;isubframeSize;i++) + innov_save[i] = ADD32(innov_save[i],innov2[i]); + } stack = tmp_stack; } - } + } + + ALLOC(interp_qlsp, st->lpcSize, spx_lsp_t); - /* If the last packet was lost, re-scale the excitation to obtain the same energy as encoded in ol_gain */ - if (st->count_lost) - { - spx_word16_t exc_ener; - spx_word32_t gain32; - spx_word16_t gain; - exc_ener = compute_rms (exc, st->subframeSize); - gain32 = DIV32(ol_gain, ADD16(exc_ener,1)); + if (st->lpc_enh_enabled && SUBMODE(comb_gain)>0 && !st->count_lost) + { + multicomb(st->exc-st->subframeSize, out, st->interp_qlpc, st->lpcSize, 2*st->subframeSize, best_pitch, 40, SUBMODE(comb_gain), stack); + multicomb(st->exc+st->subframeSize, out+2*st->subframeSize, st->interp_qlpc, st->lpcSize, 2*st->subframeSize, best_pitch, 40, SUBMODE(comb_gain), stack); + } else { + for (i=0;iframeSize;i++) + out[i]=st->exc[i-st->subframeSize]; + } + + /* If the last packet was lost, re-scale the excitation to obtain the same energy as encoded in ol_gain */ + if (st->count_lost) + { + spx_word16_t exc_ener; + spx_word32_t gain32; + spx_word16_t gain; + exc_ener = compute_rms16 (st->exc, st->frameSize); + gain32 = PDIV32(ol_gain, ADD16(exc_ener,1)); #ifdef FIXED_POINT - if (gain32 > 32768) - gain32 = 32768; - gain = EXTRACT16(gain32); + if (gain32 > 32768) + gain32 = 32768; + gain = EXTRACT16(gain32); #else - if (gain32 > 2) - gain32=2; - gain = gain32; + if (gain32 > 2) + gain32=2; + gain = gain32; #endif - for (i=0;isubframeSize;i++) - exc[i] = MULT16_32_Q14(gain, exc[i]); + for (i=0;iframeSize;i++) + { + st->exc[i] = MULT16_16_Q14(gain, st->exc[i]); + out[i]=st->exc[i-st->subframeSize]; } + } - for (i=0;isubframeSize;i++) - sp[i]=exc[i]; + /*Loop on subframes */ + for (sub=0;subnbSubframes;sub++) + { + int offset; + spx_word16_t *sp; + spx_word16_t *exc; + /* Offset relative to start of frame */ + offset = st->subframeSize*sub; + /* Original signal */ + sp=out+offset; + /* Excitation */ + exc=st->exc+offset; + + /* LSP interpolation (quantized and unquantized) */ + lsp_interpolate(st->old_qlsp, qlsp, interp_qlsp, st->lpcSize, sub, st->nbSubframes); + + /* Make sure the LSP's are stable */ + lsp_enforce_margin(interp_qlsp, st->lpcSize, LSP_MARGIN); - /* Signal synthesis */ - if (st->lpc_enh_enabled && SUBMODE(comb_gain)>0) - comb_filter(exc, sp, st->interp_qlpc, st->lpcSize, st->subframeSize, - pitch, pitch_gain, SUBMODE(comb_gain), st->comb_mem); + /* Compute interpolated LPCs (unquantized) */ + lsp_to_lpc(interp_qlsp, ak, st->lpcSize, stack); - if (st->lpc_enh_enabled) + /* Compute analysis filter at w=pi */ { - /* Use enhanced LPC filter */ - filter_mem2(sp, awk2, awk1, sp, st->subframeSize, st->lpcSize, - st->mem_sp+st->lpcSize); - filter_mem2(sp, awk3, st->interp_qlpc, sp, st->subframeSize, st->lpcSize, - st->mem_sp); - } else { - /* Use regular filter */ - for (i=0;ilpcSize;i++) - st->mem_sp[st->lpcSize+i] = 0; - iir_mem2(sp, st->interp_qlpc, sp, st->subframeSize, st->lpcSize, - st->mem_sp); + spx_word32_t pi_g=LPC_SCALING; + for (i=0;ilpcSize;i+=2) + { + /*pi_g += -st->interp_qlpc[i] + st->interp_qlpc[i+1];*/ + pi_g = ADD32(pi_g, SUB32(EXTEND32(st->interp_qlpc[i+1]),EXTEND32(st->interp_qlpc[i]))); + } + st->pi_gain[sub] = pi_g; } - } - - /*Copy output signal*/ - for (i=0;iframeSize;i++) - { - spx_word32_t sig = PSHR32(st->frame[i],SIG_SHIFT); - if (sig>32767) - sig = 32767; - if (sig<-32767) - sig = -32767; - out[i]=sig; + + iir_mem16(sp, st->interp_qlpc, sp, st->subframeSize, st->lpcSize, + st->mem_sp, stack); + + for (i=0;ilpcSize;i++) + st->interp_qlpc[i] = ak[i]; + } /*for (i=0;iframeSize;i++) @@ -1783,7 +1739,7 @@ int nb_decode(void *state, SpeexBits *bits, void *vout) /* Store the LSPs for interpolation in the next frame */ for (i=0;ilpcSize;i++) - st->old_qlsp[i] = st->qlsp[i]; + st->old_qlsp[i] = qlsp[i]; /* The next frame will not be the first (Duh!) */ st->first = 0; @@ -1839,12 +1795,14 @@ int nb_encoder_ctl(void *state, int request, void *ptr) (*(int*)ptr) = st->dtx_enabled; break; case SPEEX_SET_ABR: - st->abr_enabled = (*(int*)ptr); - st->vbr_enabled = 1; + st->abr_enabled = (*(spx_int32_t*)ptr); + st->vbr_enabled = st->abr_enabled!=0; + if (st->vbr_enabled) { - int i=10, rate, target; + int i=10; + spx_int32_t rate, target; float vbr_qual; - target = (*(int*)ptr); + target = (*(spx_int32_t*)ptr); while (i>=0) { speex_encoder_ctl(st, SPEEX_SET_QUALITY, &i); @@ -1864,7 +1822,7 @@ int nb_encoder_ctl(void *state, int request, void *ptr) break; case SPEEX_GET_ABR: - (*(int*)ptr) = st->abr_enabled; + (*(spx_int32_t*)ptr) = st->abr_enabled; break; case SPEEX_SET_VBR_QUALITY: st->vbr_quality = (*(float*)ptr); @@ -1888,12 +1846,13 @@ int nb_encoder_ctl(void *state, int request, void *ptr) st->complexity=0; break; case SPEEX_GET_COMPLEXITY: - (*(int*)ptr) = st->complexity; + (*(spx_int32_t*)ptr) = st->complexity; break; case SPEEX_SET_BITRATE: { - int i=10, rate, target; - target = (*(int*)ptr); + int i=10; + spx_int32_t rate, target; + target = (*(spx_int32_t*)ptr); while (i>=0) { speex_encoder_ctl(st, SPEEX_SET_QUALITY, &i); @@ -1906,15 +1865,15 @@ int nb_encoder_ctl(void *state, int request, void *ptr) break; case SPEEX_GET_BITRATE: if (st->submodes[st->submodeID]) - (*(int*)ptr) = st->sampling_rate*SUBMODE(bits_per_frame)/st->frameSize; + (*(spx_int32_t*)ptr) = st->sampling_rate*SUBMODE(bits_per_frame)/st->frameSize; else - (*(int*)ptr) = st->sampling_rate*(NB_SUBMODE_BITS+1)/st->frameSize; + (*(spx_int32_t*)ptr) = st->sampling_rate*(NB_SUBMODE_BITS+1)/st->frameSize; break; case SPEEX_SET_SAMPLING_RATE: - st->sampling_rate = (*(int*)ptr); + st->sampling_rate = (*(spx_int32_t*)ptr); break; case SPEEX_GET_SAMPLING_RATE: - (*(int*)ptr)=st->sampling_rate; + (*(spx_int32_t*)ptr)=st->sampling_rate; break; case SPEEX_RESET_STATE: { @@ -1922,13 +1881,13 @@ int nb_encoder_ctl(void *state, int request, void *ptr) st->bounded_pitch = 1; st->first = 1; for (i=0;ilpcSize;i++) - st->lsp[i]=(M_PI*((float)(i+1)))/(st->lpcSize+1); + st->old_lsp[i]=(M_PI*((float)(i+1)))/(st->lpcSize+1); for (i=0;ilpcSize;i++) st->mem_sw[i]=st->mem_sw_whole[i]=st->mem_sp[i]=st->mem_exc[i]=0; for (i=0;iframeSize+st->max_pitch+1;i++) st->excBuf[i]=st->swBuf[i]=0; - for (i=0;iwindowSize;i++) - st->inBuf[i]=0; + for (i=0;iwindowSize-st->frameSize;i++) + st->winBuf[i]=0; } break; case SPEEX_SET_SUBMODE_ENCODING: @@ -1948,6 +1907,15 @@ int nb_encoder_ctl(void *state, int request, void *ptr) case SPEEX_GET_PLC_TUNING: (*(int*)ptr)=(st->plc_tuning); break; + case SPEEX_SET_VBR_MAX_BITRATE: + st->vbr_max = (*(spx_int32_t*)ptr); + break; + case SPEEX_GET_VBR_MAX_BITRATE: + (*(spx_int32_t*)ptr) = st->vbr_max; + break; + + + /* This is all internal stuff past this point */ case SPEEX_GET_PI_GAIN: { int i; @@ -1959,22 +1927,17 @@ int nb_encoder_ctl(void *state, int request, void *ptr) case SPEEX_GET_EXC: { int i; - spx_sig_t *e = (spx_sig_t*)ptr; + spx_word16_t *e = (spx_word16_t*)ptr; for (i=0;iframeSize;i++) e[i]=st->exc[i]; } break; - case SPEEX_GET_INNOV: - { - int i; - spx_sig_t *e = (spx_sig_t*)ptr; - for (i=0;iframeSize;i++) - e[i]=st->innov[i]; - } - break; case SPEEX_GET_RELATIVE_QUALITY: (*(float*)ptr)=st->relative_quality; break; + case SPEEX_SET_INNOVATION_SAVE: + st->innov_save = ptr; + break; default: speex_warning_int("Unknown nb_ctl request: ", request); return -1; @@ -2007,15 +1970,15 @@ int nb_decoder_ctl(void *state, int request, void *ptr) break; case SPEEX_GET_BITRATE: if (st->submodes[st->submodeID]) - (*(int*)ptr) = st->sampling_rate*SUBMODE(bits_per_frame)/st->frameSize; + (*(spx_int32_t*)ptr) = st->sampling_rate*SUBMODE(bits_per_frame)/st->frameSize; else - (*(int*)ptr) = st->sampling_rate*(NB_SUBMODE_BITS+1)/st->frameSize; + (*(spx_int32_t*)ptr) = st->sampling_rate*(NB_SUBMODE_BITS+1)/st->frameSize; break; case SPEEX_SET_SAMPLING_RATE: - st->sampling_rate = (*(int*)ptr); + st->sampling_rate = (*(spx_int32_t*)ptr); break; case SPEEX_GET_SAMPLING_RATE: - (*(int*)ptr)=st->sampling_rate; + (*(spx_int32_t*)ptr)=st->sampling_rate; break; case SPEEX_SET_HANDLER: { @@ -2036,12 +1999,10 @@ int nb_decoder_ctl(void *state, int request, void *ptr) case SPEEX_RESET_STATE: { int i; - for (i=0;i<2*st->lpcSize;i++) + for (i=0;ilpcSize;i++) st->mem_sp[i]=0; for (i=0;iframeSize + st->max_pitch + 1;i++) st->excBuf[i]=0; - for (i=0;iframeSize;i++) - st->inBuf[i] = 0; } break; case SPEEX_SET_SUBMODE_ENCODING: @@ -2050,6 +2011,9 @@ int nb_decoder_ctl(void *state, int request, void *ptr) case SPEEX_GET_SUBMODE_ENCODING: (*(int*)ptr) = st->encode_submode; break; + case SPEEX_GET_LOOKAHEAD: + (*(int*)ptr)=st->subframeSize; + break; case SPEEX_GET_PI_GAIN: { int i; @@ -2061,22 +2025,17 @@ int nb_decoder_ctl(void *state, int request, void *ptr) case SPEEX_GET_EXC: { int i; - spx_sig_t *e = (spx_sig_t*)ptr; + spx_word16_t *e = (spx_word16_t*)ptr; for (i=0;iframeSize;i++) e[i]=st->exc[i]; } break; - case SPEEX_GET_INNOV: - { - int i; - spx_sig_t *e = (spx_sig_t*)ptr; - for (i=0;iframeSize;i++) - e[i]=st->innov[i]; - } - break; case SPEEX_GET_DTX_STATUS: *((int*)ptr) = st->dtx_enabled; break; + case SPEEX_SET_INNOVATION_SAVE: + st->innov_save = ptr; + break; default: speex_warning_int("Unknown nb_ctl request: ", request); return -1; diff --git a/pjmedia/src/pjmedia-codec/speex/nb_celp.h b/pjmedia/src/pjmedia-codec/speex/nb_celp.h index c9fb2b3d..92028cb5 100644 --- a/pjmedia/src/pjmedia-codec/speex/nb_celp.h +++ b/pjmedia/src/pjmedia-codec/speex/nb_celp.h @@ -48,20 +48,20 @@ /**Structure representing the full state of the narrowband encoder*/ typedef struct EncState { - const SpeexMode *mode; /**< Mode corresponding to the state */ - int first; /**< Is this the first frame? */ - int frameSize; /**< Size of frames */ - int subframeSize; /**< Size of sub-frames */ - int nbSubframes; /**< Number of sub-frames */ - int windowSize; /**< Analysis (LPC) window length */ - int lpcSize; /**< LPC order */ - int min_pitch; /**< Minimum pitch value allowed */ - int max_pitch; /**< Maximum pitch value allowed */ - - int safe_pitch; /**< Don't use too large values for pitch (in case we lose a packet) */ - int bounded_pitch; /**< Next frame should not rely on previous frames for pitch */ - int ol_pitch; /**< Open-loop pitch */ - int ol_voiced; /**< Open-loop voiced/non-voiced decision */ + const SpeexMode *mode; /**< Mode corresponding to the state */ + int first; /**< Is this the first frame? */ + int frameSize; /**< Size of frames */ + int subframeSize; /**< Size of sub-frames */ + int nbSubframes; /**< Number of sub-frames */ + int windowSize; /**< Analysis (LPC) window length */ + int lpcSize; /**< LPC order */ + int min_pitch; /**< Minimum pitch value allowed */ + int max_pitch; /**< Maximum pitch value allowed */ + + spx_word32_t cumul_gain; /**< Product of previously used pitch gains (Q10) */ + int bounded_pitch; /**< Next frame should not rely on previous frames for pitch */ + int ol_pitch; /**< Open-loop pitch */ + int ol_voiced; /**< Open-loop voiced/non-voiced decision */ int *pitch; #ifdef EPIC_48K @@ -70,111 +70,100 @@ typedef struct EncState { #ifdef VORBIS_PSYCHO VorbisPsy *psy; + float *psy_window; float *curve; float *old_curve; #endif spx_word16_t gamma1; /**< Perceptual filter: A(z/gamma1) */ spx_word16_t gamma2; /**< Perceptual filter: A(z/gamma2) */ - float lag_factor; /**< Lag windowing Gaussian width */ + float lag_factor; /**< Lag windowing Gaussian width */ spx_word16_t lpc_floor; /**< Noise floor multiplier for A[0] in LPC analysis*/ - char *stack; /**< Pseudo-stack allocation for temporary memory */ - spx_sig_t *inBuf; /**< Input buffer (original signal) */ - spx_sig_t *frame; /**< Start of original frame */ - spx_sig_t *excBuf; /**< Excitation buffer */ - spx_sig_t *exc; /**< Start of excitation frame */ - spx_sig_t *swBuf; /**< Weighted signal buffer */ - spx_sig_t *sw; /**< Start of weighted signal frame */ - spx_sig_t *innov; /**< Innovation for the frame */ - spx_word16_t *window; /**< Temporary (Hanning) window */ - spx_word16_t *autocorr; /**< auto-correlation */ + char *stack; /**< Pseudo-stack allocation for temporary memory */ + spx_word16_t *winBuf; /**< Input buffer (original signal) */ + spx_word16_t *excBuf; /**< Excitation buffer */ + spx_word16_t *exc; /**< Start of excitation frame */ + spx_word16_t *swBuf; /**< Weighted signal buffer */ + spx_word16_t *sw; /**< Start of weighted signal frame */ + const spx_word16_t *window; /**< Temporary (Hanning) window */ spx_word16_t *lagWindow; /**< Window applied to auto-correlation */ - spx_coef_t *lpc; /**< LPCs for current frame */ - spx_lsp_t *lsp; /**< LSPs for current frame */ - spx_lsp_t *qlsp; /**< Quantized LSPs for current frame */ - spx_lsp_t *old_lsp; /**< LSPs for previous frame */ - spx_lsp_t *old_qlsp; /**< Quantized LSPs for previous frame */ - spx_lsp_t *interp_lsp; /**< Interpolated LSPs */ - spx_lsp_t *interp_qlsp; /**< Interpolated quantized LSPs */ - spx_coef_t *interp_lpc; /**< Interpolated LPCs */ - spx_coef_t *interp_qlpc; /**< Interpolated quantized LPCs */ - spx_coef_t *bw_lpc1; /**< LPCs after bandwidth expansion by gamma1 for perceptual weighting*/ - spx_coef_t *bw_lpc2; /**< LPCs after bandwidth expansion by gamma2 for perceptual weighting*/ - spx_mem_t *mem_sp; /**< Filter memory for signal synthesis */ - spx_mem_t *mem_sw; /**< Filter memory for perceptually-weighted signal */ - spx_mem_t *mem_sw_whole; /**< Filter memory for perceptually-weighted signal (whole frame)*/ - spx_mem_t *mem_exc; /**< Filter memory for excitation (whole frame) */ + spx_lsp_t *old_lsp; /**< LSPs for previous frame */ + spx_lsp_t *old_qlsp; /**< Quantized LSPs for previous frame */ + spx_mem_t *mem_sp; /**< Filter memory for signal synthesis */ + spx_mem_t *mem_sw; /**< Filter memory for perceptually-weighted signal */ + spx_mem_t *mem_sw_whole; /**< Filter memory for perceptually-weighted signal (whole frame)*/ + spx_mem_t *mem_exc; /**< Filter memory for excitation (whole frame) */ + spx_mem_t *mem_exc2; /**< Filter memory for excitation (whole frame) */ spx_word32_t *pi_gain; /**< Gain of LPC filter at theta=pi (fe/2) */ - - VBRState *vbr; /**< State of the VBR data */ - float vbr_quality; /**< Quality setting for VBR encoding */ - float relative_quality; /**< Relative quality that will be needed by VBR */ - int vbr_enabled; /**< 1 for enabling VBR, 0 otherwise */ - int vad_enabled; /**< 1 for enabling VAD, 0 otherwise */ - int dtx_enabled; /**< 1 for enabling DTX, 0 otherwise */ - int dtx_count; /**< Number of consecutive DTX frames */ - int abr_enabled; /**< ABR setting (in bps), 0 if off */ + spx_sig_t *innov_save; /**< If non-NULL, innovation is copied here */ + + VBRState *vbr; /**< State of the VBR data */ + float vbr_quality; /**< Quality setting for VBR encoding */ + float relative_quality; /**< Relative quality that will be needed by VBR */ + int vbr_enabled; /**< 1 for enabling VBR, 0 otherwise */ + spx_int32_t vbr_max; /**< Max bit-rate allowed in VBR mode */ + int vad_enabled; /**< 1 for enabling VAD, 0 otherwise */ + int dtx_enabled; /**< 1 for enabling DTX, 0 otherwise */ + int dtx_count; /**< Number of consecutive DTX frames */ + spx_int32_t abr_enabled; /**< ABR setting (in bps), 0 if off */ float abr_drift; float abr_drift2; float abr_count; - int complexity; /**< Complexity setting (0-10 from least complex to most complex) */ - int sampling_rate; + int complexity; /**< Complexity setting (0-10 from least complex to most complex) */ + spx_int32_t sampling_rate; int plc_tuning; int encode_submode; const SpeexSubmode * const *submodes; /**< Sub-mode data */ - int submodeID; /**< Activated sub-mode */ - int submodeSelect; /**< Mode chosen by the user (may differ from submodeID if VAD is on) */ + int submodeID; /**< Activated sub-mode */ + int submodeSelect; /**< Mode chosen by the user (may differ from submodeID if VAD is on) */ } EncState; /**Structure representing the full state of the narrowband decoder*/ typedef struct DecState { const SpeexMode *mode; /**< Mode corresponding to the state */ - int first; /**< Is this the first frame? */ - int count_lost; /**< Was the last frame lost? */ - int frameSize; /**< Size of frames */ - int subframeSize; /**< Size of sub-frames */ - int nbSubframes; /**< Number of sub-frames */ - int lpcSize; /**< LPC order */ - int min_pitch; /**< Minimum pitch value allowed */ - int max_pitch; /**< Maximum pitch value allowed */ - int sampling_rate; + int first; /**< Is this the first frame? */ + int count_lost; /**< Was the last frame lost? */ + int frameSize; /**< Size of frames */ + int subframeSize; /**< Size of sub-frames */ + int nbSubframes; /**< Number of sub-frames */ + int lpcSize; /**< LPC order */ + int min_pitch; /**< Minimum pitch value allowed */ + int max_pitch; /**< Maximum pitch value allowed */ + spx_int32_t sampling_rate; #ifdef EPIC_48K int lbr_48k; #endif - spx_word16_t last_ol_gain; /**< Open-loop gain for previous frame */ - - char *stack; /**< Pseudo-stack allocation for temporary memory */ - spx_sig_t *inBuf; /**< Input buffer (original signal) */ - spx_sig_t *frame; /**< Start of original frame */ - spx_sig_t *excBuf; /**< Excitation buffer */ - spx_sig_t *exc; /**< Start of excitation frame */ - spx_sig_t *innov; /**< Innovation for the frame */ - spx_lsp_t *qlsp; /**< Quantized LSPs for current frame */ - spx_lsp_t *old_qlsp; /**< Quantized LSPs for previous frame */ - spx_lsp_t *interp_qlsp; /**< Interpolated quantized LSPs */ - spx_coef_t *interp_qlpc; /**< Interpolated quantized LPCs */ - spx_mem_t *mem_sp; /**< Filter memory for synthesis signal */ - spx_word32_t *pi_gain; /**< Gain of LPC filter at theta=pi (fe/2) */ - int last_pitch; /**< Pitch of last correctly decoded frame */ + spx_word16_t last_ol_gain; /**< Open-loop gain for previous frame */ + + char *stack; /**< Pseudo-stack allocation for temporary memory */ + spx_word16_t *excBuf; /**< Excitation buffer */ + spx_word16_t *exc; /**< Start of excitation frame */ + spx_lsp_t *old_qlsp; /**< Quantized LSPs for previous frame */ + spx_coef_t *interp_qlpc; /**< Interpolated quantized LPCs */ + spx_mem_t *mem_sp; /**< Filter memory for synthesis signal */ + spx_word32_t *pi_gain; /**< Gain of LPC filter at theta=pi (fe/2) */ + spx_sig_t *innov_save; /** If non-NULL, innovation is copied here */ + + /* This is used in packet loss concealment */ + int last_pitch; /**< Pitch of last correctly decoded frame */ spx_word16_t last_pitch_gain; /**< Pitch gain of last correctly decoded frame */ - spx_word16_t pitch_gain_buf[3]; /**< Pitch gain of last decoded frames */ - int pitch_gain_buf_idx; /**< Tail of the buffer */ - spx_int32_t seed; /** Seed used for random number generation */ + spx_word16_t pitch_gain_buf[3]; /**< Pitch gain of last decoded frames */ + int pitch_gain_buf_idx; /**< Tail of the buffer */ + spx_int32_t seed; /** Seed used for random number generation */ int encode_submode; const SpeexSubmode * const *submodes; /**< Sub-mode data */ - int submodeID; /**< Activated sub-mode */ - int lpc_enh_enabled; /**< 1 when LPC enhancer is on, 0 otherwise */ - CombFilterMem *comb_mem; + int submodeID; /**< Activated sub-mode */ + int lpc_enh_enabled; /**< 1 when LPC enhancer is on, 0 otherwise */ SpeexCallback speex_callbacks[SPEEX_MAX_CALLBACKS]; SpeexCallback user_callback; /*Vocoder data*/ - float voc_m1; - float voc_m2; + spx_word16_t voc_m1; + spx_word32_t voc_m2; float voc_mean; int voc_offset; diff --git a/pjmedia/src/pjmedia-codec/speex/pseudofloat.h b/pjmedia/src/pjmedia-codec/speex/pseudofloat.h index 8642bf0c..e85f60e4 100644 --- a/pjmedia/src/pjmedia-codec/speex/pseudofloat.h +++ b/pjmedia/src/pjmedia-codec/speex/pseudofloat.h @@ -45,9 +45,9 @@ typedef struct { spx_int16_t e; } spx_float_t; -#define FLOAT_ZERO ((spx_float_t){0,0}) -#define FLOAT_ONE ((spx_float_t){16384,-14}) -#define FLOAT_HALF ((spx_float_t){16384,-15}) +static const spx_float_t FLOAT_ZERO = {0,0}; +static const spx_float_t FLOAT_ONE = {16384,-14}; +static const spx_float_t FLOAT_HALF = {16384,-15}; #define MIN(a,b) ((a)<(b)?(a):(b)) static inline spx_float_t PSEUDOFLOAT(spx_int32_t x) @@ -60,7 +60,10 @@ static inline spx_float_t PSEUDOFLOAT(spx_int32_t x) x = -x; } if (x==0) - return (spx_float_t) {0,0}; + { + spx_float_t r = {0,0}; + return r; + } while (x>32767) { x >>= 1; @@ -74,9 +77,19 @@ static inline spx_float_t PSEUDOFLOAT(spx_int32_t x) e--; } if (sign) - return (spx_float_t) {-x,e}; + { + spx_float_t r; + r.m = -x; + r.e = e; + return r; + } else - return (spx_float_t) {x,e}; + { + spx_float_t r; + r.m = x; + r.e = e; + return r; + } } @@ -87,7 +100,16 @@ static inline spx_float_t FLOAT_ADD(spx_float_t a, spx_float_t b) return b; else if (b.m==0) return a; - r = (a).e > (b).e ? (spx_float_t) {((a).m>>1) + ((b).m>>MIN(15,(a).e-(b).e+1)),(a).e+1} : (spx_float_t) {((b).m>>1) + ((a).m>>MIN(15,(b).e-(a).e+1)),(b).e+1}; + if ((a).e > (b).e) + { + r.m = ((a).m>>1) + ((b).m>>MIN(15,(a).e-(b).e+1)); + r.e = (a).e+1; + } + else + { + r.m = ((b).m>>1) + ((a).m>>MIN(15,(b).e-(a).e+1)); + r.e = (b).e+1; + } if (r.m>0) { if (r.m<16384) @@ -113,7 +135,16 @@ static inline spx_float_t FLOAT_SUB(spx_float_t a, spx_float_t b) return b; else if (b.m==0) return a; - r = (a).e > (b).e ? (spx_float_t) {((a).m>>1) - ((b).m>>MIN(15,(a).e-(b).e+1)),(a).e+1} : (spx_float_t) {((a).m>>MIN(15,(b).e-(a).e+1)) - ((b).m>>1) ,(b).e+1}; + if ((a).e > (b).e) + { + r.m = ((a).m>>1) - ((b).m>>MIN(15,(a).e-(b).e+1)); + r.e = (a).e+1; + } + else + { + r.m = ((a).m>>MIN(15,(b).e-(a).e+1)) - ((b).m>>1); + r.e = (b).e+1; + } if (r.m>0) { if (r.m<16384) @@ -152,7 +183,9 @@ static inline int FLOAT_GT(spx_float_t a, spx_float_t b) static inline spx_float_t FLOAT_MULT(spx_float_t a, spx_float_t b) { - spx_float_t r = (spx_float_t) {(spx_int16_t)((spx_int32_t)(a).m*(b).m>>15), (a).e+(b).e+15}; + spx_float_t r; + r.m = (spx_int16_t)((spx_int32_t)(a).m*(b).m>>15); + r.e = (a).e+(b).e+15; if (r.m>0) { if (r.m<16384) @@ -174,13 +207,16 @@ static inline spx_float_t FLOAT_MULT(spx_float_t a, spx_float_t b) static inline spx_float_t FLOAT_SHL(spx_float_t a, int b) { - return (spx_float_t) {a.m,a.e+b}; + spx_float_t r; + r.m = a.m; + r.e = a.e+b; + return r; } static inline spx_int16_t FLOAT_EXTRACT16(spx_float_t a) { if (a.e<0) - return (a.m+(1<<(-a.e-1)))>>-a.e; + return EXTRACT16((EXTEND32(a.m)+(1<<(-a.e-1)))>>-a.e); else return a.m<32767) { a >>= 1; @@ -219,35 +258,45 @@ static inline spx_float_t FLOAT_MUL32U(spx_word32_t a, spx_word32_t b) b <<= 1; e--; } - return (spx_float_t) {MULT16_16_Q15(a,b),e+15}; + r.m = MULT16_16_Q15(a,b); + r.e = e+15; + return r; } static inline spx_float_t FLOAT_DIV32_FLOAT(spx_word32_t a, spx_float_t b) { int e=0; + spx_float_t r; /* FIXME: Handle the sign */ if (a==0) - return (spx_float_t) {0,0}; - while (a=SHL32(b.m-1,15)) + while (a>=SHL32(EXTEND32(b.m-1),15)) { a >>= 1; e++; } - return (spx_float_t) {DIV32_16(a,b.m),e-b.e}; + r.m = DIV32_16(a,b.m); + r.e = e-b.e; + return r; } static inline spx_float_t FLOAT_DIV32(spx_word32_t a, spx_word32_t b) { int e=0; + spx_float_t r; /* FIXME: Handle the sign */ if (a==0) - return (spx_float_t) {0,0}; + { + return FLOAT_ZERO; + } while (b>32767) { b >>= 1; @@ -263,13 +312,16 @@ static inline spx_float_t FLOAT_DIV32(spx_word32_t a, spx_word32_t b) a >>= 1; e++; } - return (spx_float_t) {DIV32_16(a,b),e}; + r.m = DIV32_16(a,b); + r.e = e; + return r; } static inline spx_float_t FLOAT_DIVU(spx_float_t a, spx_float_t b) { int e=0; spx_int32_t num; + spx_float_t r; num = a.m; while (a.m >= b.m) { @@ -277,7 +329,9 @@ static inline spx_float_t FLOAT_DIVU(spx_float_t a, spx_float_t b) a.m >>= 1; } num = num << (15-e); - return (spx_float_t) {DIV32_16(num,b.m),a.e-b.e-15+e}; + r.m = DIV32_16(num,b.m); + r.e = a.e-b.e-15+e; + return r; } #else diff --git a/pjmedia/src/pjmedia-codec/speex/quant_lsp.c b/pjmedia/src/pjmedia-codec/speex/quant_lsp.c index 7bd0b91f..bfca5870 100644 --- a/pjmedia/src/pjmedia-codec/speex/quant_lsp.c +++ b/pjmedia/src/pjmedia-codec/speex/quant_lsp.c @@ -40,9 +40,12 @@ #define M_PI 3.14159265358979323846 #endif - #include "misc.h" +#ifdef BFIN_ASM +#include "quant_lsp_bfin.h" +#endif + #ifdef FIXED_POINT #define LSP_LINEAR(i) (SHL16(i+1,11)) @@ -90,12 +93,13 @@ static void compute_quant_weights(spx_lsp_t *qlsp, spx_word16_t *quant_weight, i } /* Note: x is modified*/ +#ifndef OVERRIDE_LSP_QUANT static int lsp_quant(spx_word16_t *x, const signed char *cdbk, int nbVec, int nbDim) { int i,j; spx_word32_t dist; spx_word16_t tmp; - spx_word32_t best_dist=0; + spx_word32_t best_dist=VERY_LARGE32; int best_id=0; const signed char *ptr=cdbk; for (i=0;iframe_size = mode->frameSize; st->subframeSize = mode->subframeSize; st->nbSubframes = mode->frameSize/mode->subframeSize; - st->windowSize = st->frame_size*3/2; + st->windowSize = st->frame_size+st->subframeSize; st->lpcSize=mode->lpcSize; st->bufSize=mode->bufSize; @@ -277,18 +279,7 @@ void *sb_encoder_init(const SpeexMode *m) st->res=speex_alloc((st->frame_size)*sizeof(spx_sig_t)); st->sw=speex_alloc((st->frame_size)*sizeof(spx_sig_t)); - st->target=speex_alloc((st->frame_size)*sizeof(spx_sig_t)); - /*Asymmetric "pseudo-Hamming" window*/ - { - int part1, part2; - part1 = st->subframeSize*7/2; - part2 = st->subframeSize*5/2; - st->window = speex_alloc((st->windowSize)*sizeof(spx_word16_t)); - for (i=0;iwindow[i]=(spx_word16_t)(SIG_SCALING*(.54-.46*cos(M_PI*i/part1))); - for (i=0;iwindow[part1+i]=(spx_word16_t)(SIG_SCALING*(.54+.46*cos(M_PI*i/part2))); - } + st->window= lpc_window; st->lagWindow = speex_alloc((st->lpcSize+1)*sizeof(spx_word16_t)); for (i=0;ilpcSize+1;i++) @@ -307,13 +298,18 @@ void *sb_encoder_init(const SpeexMode *m) st->interp_lpc = speex_alloc(st->lpcSize*sizeof(spx_coef_t)); st->interp_qlpc = speex_alloc(st->lpcSize*sizeof(spx_coef_t)); st->pi_gain = speex_alloc((st->nbSubframes)*sizeof(spx_word32_t)); - + st->low_innov = speex_alloc((st->frame_size)*sizeof(spx_word32_t)); + speex_encoder_ctl(st->st_low, SPEEX_SET_INNOVATION_SAVE, st->low_innov); + st->innov_save = NULL; + st->mem_sp = speex_alloc((st->lpcSize)*sizeof(spx_mem_t)); st->mem_sp2 = speex_alloc((st->lpcSize)*sizeof(spx_mem_t)); st->mem_sw = speex_alloc((st->lpcSize)*sizeof(spx_mem_t)); st->vbr_quality = 8; st->vbr_enabled = 0; + st->vbr_max = 0; + st->vbr_max_high = 20000; /* We just need a big value here */ st->vad_enabled = 0; st->abr_enabled = 0; st->relative_quality=0; @@ -350,8 +346,6 @@ void sb_encoder_destroy(void *state) speex_free(st->excBuf); speex_free(st->res); speex_free(st->sw); - speex_free(st->target); - speex_free(st->window); speex_free(st->lagWindow); speex_free(st->autocorr); @@ -384,10 +378,10 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) char *stack; VARDECL(spx_mem_t *mem); VARDECL(spx_sig_t *innov); + VARDECL(spx_word16_t *target); VARDECL(spx_word16_t *syn_resp); VARDECL(spx_word32_t *low_pi_gain); - VARDECL(spx_sig_t *low_exc); - VARDECL(spx_sig_t *low_innov); + VARDECL(spx_word16_t *low_exc); const SpeexSBMode *mode; int dtx; spx_word16_t *in = vin; @@ -422,11 +416,9 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) ALLOC(low_pi_gain, st->nbSubframes, spx_word32_t); - ALLOC(low_exc, st->frame_size, spx_sig_t); - ALLOC(low_innov, st->frame_size, spx_sig_t); + ALLOC(low_exc, st->frame_size, spx_word16_t); speex_encoder_ctl(st->st_low, SPEEX_GET_PI_GAIN, low_pi_gain); speex_encoder_ctl(st->st_low, SPEEX_GET_EXC, low_exc); - speex_encoder_ctl(st->st_low, SPEEX_GET_INNOV, low_innov); speex_encoder_ctl(st->st_low, SPEEX_GET_LOW_MODE, &dtx); @@ -455,15 +447,15 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) _spx_lpc(st->lpc, st->autocorr, st->lpcSize); /* LPC to LSPs (x-domain) transform */ - roots=lpc_to_lsp (st->lpc, st->lpcSize, st->lsp, 15, LSP_DELTA1, stack); + roots=lpc_to_lsp (st->lpc, st->lpcSize, st->lsp, 10, LSP_DELTA1, stack); if (roots!=st->lpcSize) { - roots = lpc_to_lsp (st->lpc, st->lpcSize, st->lsp, 11, LSP_DELTA2, stack); + roots = lpc_to_lsp (st->lpc, st->lpcSize, st->lsp, 10, LSP_DELTA2, stack); if (roots!=st->lpcSize) { /*If we can't find all LSP's, do some damage control and use a flat filter*/ for (i=0;ilpcSize;i++) { - st->lsp[i]=M_PI*((float)(i+1))/(st->lpcSize+1); + st->lsp[i]=LSP_SCALING*M_PI*((float)(i+1))/(st->lpcSize+1); } } } @@ -521,7 +513,7 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) else thresh = (st->vbr_quality-v1) * mode->vbr_thresh[modeid][v1+1] + (1+v1-st->vbr_quality) * mode->vbr_thresh[modeid][v1]; - if (st->relative_quality >= thresh) + if (st->relative_quality >= thresh && st->sampling_rate*st->submodes[modeid]->bits_per_frame/st->full_frame_size <= st->vbr_max_high) break; modeid--; } @@ -601,10 +593,11 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) ALLOC(mem, st->lpcSize, spx_mem_t); ALLOC(syn_resp, st->subframeSize, spx_word16_t); ALLOC(innov, st->subframeSize, spx_sig_t); + ALLOC(target, st->subframeSize, spx_word16_t); for (sub=0;subnbSubframes;sub++) { - spx_sig_t *exc, *sp, *res, *target, *sw; + spx_sig_t *exc, *sp, *res, *sw, *innov_save=NULL; spx_word16_t filter_ratio; int offset; spx_word32_t rl, rh; @@ -614,8 +607,14 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) sp=st->high+offset; exc=st->exc+offset; res=st->res+offset; - target=st->target+offset; sw=st->sw+offset; + /* Pointer for saving innovation */ + if (st->innov_save) + { + innov_save = st->innov_save+2*offset; + for (i=0;i<2*st->subframeSize;i++) + innov_save[i]=0; + } /* LSP interpolation (quantized and unquantized) */ lsp_interpolate(st->old_lsp, st->lsp, st->interp_lsp, st->lpcSize, sub, st->nbSubframes); @@ -642,7 +641,7 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) rl = low_pi_gain[sub]; #ifdef FIXED_POINT - filter_ratio=DIV32_16(SHL(rl+82,2),SHR(82+rh,5)); + filter_ratio=PDIV32_16(SHL(rl+82,2),SHR(82+rh,5)); #else filter_ratio=(rl+.01)/(rh+.01); #endif @@ -656,10 +655,10 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) if (!SUBMODE(innovation_quant)) {/* 1 for spectral folding excitation, 0 for stochastic */ float g; spx_word16_t el; - el = compute_rms(low_innov+offset, st->subframeSize); + el = compute_rms(st->low_innov+offset, st->subframeSize); /* Gain to use if we want to use the low-band excitation for high-band */ - g=eh/(.01+el); + g=eh/(1.+el); #if 0 { @@ -669,7 +668,7 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) ALLOC(tmp_sig, st->subframeSize, spx_sig_t); for (i=0;ilpcSize;i++) mem[i]=st->mem_sp[i]; - iir_mem2(low_innov+offset, st->interp_qlpc, tmp_sig, st->subframeSize, st->lpcSize, mem); + iir_mem2(st->low_innov+offset, st->interp_qlpc, tmp_sig, st->subframeSize, st->lpcSize, mem); g2 = compute_rms(sp, st->subframeSize)/(.01+compute_rms(tmp_sig, st->subframeSize)); /*fprintf (stderr, "gains: %f %f\n", g, g2);*/ g = g2; @@ -698,9 +697,9 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) spx_word16_t gc; spx_word32_t scale; spx_word16_t el; - el = compute_rms(low_exc+offset, st->subframeSize); + el = compute_rms16(low_exc+offset, st->subframeSize); - gc = DIV32_16(MULT16_16(filter_ratio,1+eh),1+el); + gc = PDIV32_16(MULT16_16(filter_ratio,1+eh),1+el); /* This is a kludge that cleans up a historical bug */ if (st->subframeSize==80) @@ -726,7 +725,7 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) if (st->subframeSize==80) gc *= 1.4142; - scale = SHL(MULT16_16(DIV32_16(SHL(gc,SIG_SHIFT-4),filter_ratio),(1+el)),4); + scale = SHL32(MULT16_16(PDIV32_16(SHL32(EXTEND32(gc),SIG_SHIFT-6),filter_ratio),(1+el)),6); compute_impulse_response(st->interp_qlpc, st->bw_lpc1, st->bw_lpc2, syn_resp, st->subframeSize, st->lpcSize, stack); @@ -751,7 +750,7 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) /* Compute target signal */ for (i=0;isubframeSize;i++) - target[i]=sw[i]-res[i]; + target[i]=PSHR32(sw[i]-res[i],SIG_SHIFT); for (i=0;isubframeSize;i++) exc[i]=0; @@ -773,6 +772,12 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) for (i=0;isubframeSize;i++) exc[i] = ADD32(exc[i], innov[i]); + if (st->innov_save) + { + for (i=0;isubframeSize;i++) + innov_save[2*i]=innov[i]; + } + if (SUBMODE(double_codebook)) { char *tmp_stack=stack; VARDECL(spx_sig_t *innov2); @@ -871,6 +876,7 @@ void *sb_decoder_init(const SpeexMode *m) st->g1_mem=speex_alloc((QMF_ORDER)*sizeof(spx_word32_t)); st->exc=speex_alloc((st->frame_size)*sizeof(spx_sig_t)); + st->excBuf=speex_alloc((st->subframeSize)*sizeof(spx_sig_t)); st->qlsp = speex_alloc((st->lpcSize)*sizeof(spx_lsp_t)); st->old_qlsp = speex_alloc((st->lpcSize)*sizeof(spx_lsp_t)); @@ -880,6 +886,11 @@ void *sb_decoder_init(const SpeexMode *m) st->pi_gain = speex_alloc((st->nbSubframes)*sizeof(spx_word32_t)); st->mem_sp = speex_alloc((2*st->lpcSize)*sizeof(spx_mem_t)); + st->low_innov = speex_alloc((st->frame_size)*sizeof(spx_word32_t)); + speex_decoder_ctl(st->st_low, SPEEX_SET_INNOVATION_SAVE, st->low_innov); + st->innov_save = NULL; + + st->lpc_enh_enabled=0; st->seed = 1000; @@ -906,6 +917,7 @@ void sb_decoder_destroy(void *state) speex_free(st->g0_mem); speex_free(st->g1_mem); speex_free(st->exc); + speex_free(st->excBuf); speex_free(st->qlsp); speex_free(st->old_qlsp); speex_free(st->interp_qlsp); @@ -919,9 +931,6 @@ void sb_decoder_destroy(void *state) static void sb_decode_lost(SBDecState *st, spx_word16_t *out, int dtx, char *stack) { int i; - VARDECL(spx_coef_t *awk1); - VARDECL(spx_coef_t *awk2); - VARDECL(spx_coef_t *awk3); int saved_modeid=0; if (dtx) @@ -934,28 +943,6 @@ static void sb_decode_lost(SBDecState *st, spx_word16_t *out, int dtx, char *sta st->first=1; - ALLOC(awk1, st->lpcSize+1, spx_coef_t); - ALLOC(awk2, st->lpcSize+1, spx_coef_t); - ALLOC(awk3, st->lpcSize+1, spx_coef_t); - - if (st->lpc_enh_enabled) - { - spx_word16_t k1,k2,k3; - if (st->submodes[st->submodeID] != NULL) - { - k1=SUBMODE(lpc_enh_k1); - k2=SUBMODE(lpc_enh_k2); - k3=SUBMODE(lpc_enh_k3); - } else { - k1=k2=.7*GAMMA_SCALING; - k3 = 0; - } - bw_lpc(k1, st->interp_qlpc, awk1, st->lpcSize); - bw_lpc(k2, st->interp_qlpc, awk2, st->lpcSize); - bw_lpc(k3, st->interp_qlpc, awk3, st->lpcSize); - /*fprintf (stderr, "%f %f %f\n", k1, k2, k3);*/ - } - /* Final signal synthesis from excitation */ if (!dtx) @@ -969,22 +956,9 @@ static void sb_decode_lost(SBDecState *st, spx_word16_t *out, int dtx, char *sta for (i=0;iframe_size;i++) st->high[i]=st->exc[i]; - if (st->lpc_enh_enabled) - { - /* Use enhanced LPC filter */ - filter_mem2(st->high, awk2, awk1, st->high, st->frame_size, st->lpcSize, - st->mem_sp+st->lpcSize); - filter_mem2(st->high, awk3, st->interp_qlpc, st->high, st->frame_size, st->lpcSize, - st->mem_sp); - } else { - /* Use regular filter */ - for (i=0;ilpcSize;i++) - st->mem_sp[st->lpcSize+i] = 0; - iir_mem2(st->high, st->interp_qlpc, st->high, st->frame_size, st->lpcSize, - st->mem_sp); - } + iir_mem2(st->high, st->interp_qlpc, st->high, st->frame_size, st->lpcSize, + st->mem_sp); - /*iir_mem2(st->exc, st->interp_qlpc, st->high, st->frame_size, st->lpcSize, st->mem_sp);*/ /* Reconstruct the original */ fir_mem_up(st->x0d, h0, st->y0, st->full_frame_size, QMF_ORDER, st->g0_mem, stack); @@ -1008,11 +982,8 @@ int sb_decode(void *state, SpeexBits *bits, void *vout) int ret; char *stack; VARDECL(spx_word32_t *low_pi_gain); - VARDECL(spx_sig_t *low_exc); - VARDECL(spx_sig_t *low_innov); - VARDECL(spx_coef_t *awk1); - VARDECL(spx_coef_t *awk2); - VARDECL(spx_coef_t *awk3); + VARDECL(spx_word16_t *low_exc); + VARDECL(spx_coef_t *ak); int dtx; const SpeexSBMode *mode; spx_word16_t *out = vout; @@ -1101,11 +1072,9 @@ int sb_decode(void *state, SpeexBits *bits, void *vout) st->exc[i]=0; ALLOC(low_pi_gain, st->nbSubframes, spx_word32_t); - ALLOC(low_exc, st->frame_size, spx_sig_t); - ALLOC(low_innov, st->frame_size, spx_sig_t); + ALLOC(low_exc, st->frame_size, spx_word16_t); speex_decoder_ctl(st->st_low, SPEEX_GET_PI_GAIN, low_pi_gain); speex_decoder_ctl(st->st_low, SPEEX_GET_EXC, low_exc); - speex_decoder_ctl(st->st_low, SPEEX_GET_INNOV, low_innov); SUBMODE(lsp_unquant)(st->qlsp, st->lpcSize, bits); @@ -1115,13 +1084,11 @@ int sb_decode(void *state, SpeexBits *bits, void *vout) st->old_qlsp[i] = st->qlsp[i]; } - ALLOC(awk1, st->lpcSize+1, spx_coef_t); - ALLOC(awk2, st->lpcSize+1, spx_coef_t); - ALLOC(awk3, st->lpcSize+1, spx_coef_t); + ALLOC(ak, st->lpcSize, spx_coef_t); for (sub=0;subnbSubframes;sub++) { - spx_sig_t *exc, *sp; + spx_sig_t *exc, *sp, *innov_save=NULL; spx_word16_t filter_ratio; spx_word16_t el=0; int offset; @@ -1130,6 +1097,13 @@ int sb_decode(void *state, SpeexBits *bits, void *vout) offset = st->subframeSize*sub; sp=st->high+offset; exc=st->exc+offset; + /* Pointer for saving innovation */ + if (st->innov_save) + { + innov_save = st->innov_save+2*offset; + for (i=0;i<2*st->subframeSize;i++) + innov_save[i]=0; + } /* LSP interpolation */ lsp_interpolate(st->old_qlsp, st->qlsp, st->interp_qlsp, st->lpcSize, sub, st->nbSubframes); @@ -1137,21 +1111,7 @@ int sb_decode(void *state, SpeexBits *bits, void *vout) lsp_enforce_margin(st->interp_qlsp, st->lpcSize, LSP_MARGIN); /* LSP to LPC */ - lsp_to_lpc(st->interp_qlsp, st->interp_qlpc, st->lpcSize, stack); - - - if (st->lpc_enh_enabled) - { - spx_word16_t k1,k2,k3; - k1=SUBMODE(lpc_enh_k1); - k2=SUBMODE(lpc_enh_k2); - k3=SUBMODE(lpc_enh_k3); - bw_lpc(k1, st->interp_qlpc, awk1, st->lpcSize); - bw_lpc(k2, st->interp_qlpc, awk2, st->lpcSize); - bw_lpc(k3, st->interp_qlpc, awk3, st->lpcSize); - /*fprintf (stderr, "%f %f %f\n", k1, k2, k3);*/ - } - + lsp_to_lpc(st->interp_qlsp, ak, st->lpcSize, stack); /* Calculate reponse ratio between the low and high filter in the middle of the band (4000 Hz) */ @@ -1166,7 +1126,7 @@ int sb_decode(void *state, SpeexBits *bits, void *vout) rl = low_pi_gain[sub]; #ifdef FIXED_POINT - filter_ratio=DIV32_16(SHL(rl+82,2),SHR(82+rh,5)); + filter_ratio=PDIV32_16(SHL(rl+82,2),SHR(82+rh,5)); #else filter_ratio=(rl+.01)/(rh+.01); #endif @@ -1190,7 +1150,7 @@ int sb_decode(void *state, SpeexBits *bits, void *vout) #if 0 for (i=0;isubframeSize;i++) - exc[i]=mode->folding_gain*g*low_innov[offset+i]; + exc[i]=mode->folding_gain*g*st->low_innov[offset+i]; #else { float tmp=1; @@ -1199,7 +1159,7 @@ int sb_decode(void *state, SpeexBits *bits, void *vout) el = compute_rms(low_innov+offset, st->subframeSize);*/ for (i=0;isubframeSize;i++) { - float e=tmp*g*mode->folding_gain*low_innov[offset+i]; + float e=tmp*g*mode->folding_gain*st->low_innov[offset+i]; tmp *= -1; exc[i] = e; /*float r = speex_rand(g*el,&seed); @@ -1210,14 +1170,13 @@ int sb_decode(void *state, SpeexBits *bits, void *vout) } - /*speex_rand_vec(mode->folding_gain*g*el, exc, st->subframeSize);*/ #endif } else { spx_word16_t gc; spx_word32_t scale; int qgc = speex_bits_unpack_unsigned(bits, 4); - el = compute_rms(low_exc+offset, st->subframeSize); + el = compute_rms16(low_exc+offset, st->subframeSize); #ifdef FIXED_POINT gc = MULT16_32_Q15(28626,gc_quant_bound[qgc]); @@ -1228,7 +1187,7 @@ int sb_decode(void *state, SpeexBits *bits, void *vout) if (st->subframeSize==80) gc *= 1.4142; - scale = SHL(MULT16_16(DIV32_16(SHL(gc,SIG_SHIFT-4),filter_ratio),(1+el)),4); + scale = SHL(MULT16_16(PDIV32_16(SHL(gc,SIG_SHIFT-6),filter_ratio),(1+el)),6); SUBMODE(innovation_unquant)(exc, SUBMODE(innovation_params), st->subframeSize, bits, stack); @@ -1251,24 +1210,21 @@ int sb_decode(void *state, SpeexBits *bits, void *vout) } } - - for (i=0;isubframeSize;i++) - sp[i]=exc[i]; - if (st->lpc_enh_enabled) + + if (st->innov_save) { - /* Use enhanced LPC filter */ - filter_mem2(sp, awk2, awk1, sp, st->subframeSize, st->lpcSize, - st->mem_sp+st->lpcSize); - filter_mem2(sp, awk3, st->interp_qlpc, sp, st->subframeSize, st->lpcSize, - st->mem_sp); - } else { - /* Use regular filter */ - for (i=0;ilpcSize;i++) - st->mem_sp[st->lpcSize+i] = 0; - iir_mem2(sp, st->interp_qlpc, sp, st->subframeSize, st->lpcSize, - st->mem_sp); + for (i=0;isubframeSize;i++) + innov_save[2*i]=exc[i]; } - /*iir_mem2(exc, st->interp_qlpc, sp, st->subframeSize, st->lpcSize, st->mem_sp);*/ + + for (i=0;isubframeSize;i++) + sp[i]=st->excBuf[i]; + iir_mem2(sp, st->interp_qlpc, sp, st->subframeSize, st->lpcSize, + st->mem_sp); + for (i=0;isubframeSize;i++) + st->excBuf[i]=exc[i]; + for (i=0;ilpcSize;i++) + st->interp_qlpc[i] = ak[i]; } @@ -1345,13 +1301,14 @@ int sb_encoder_ctl(void *state, int request, void *ptr) (*(float*)ptr) = st->vbr_quality; break; case SPEEX_SET_ABR: - st->abr_enabled = (*(int*)ptr); - st->vbr_enabled = 1; + st->abr_enabled = (*(spx_int32_t*)ptr); + st->vbr_enabled = st->abr_enabled!=0; speex_encoder_ctl(st->st_low, SPEEX_SET_VBR, &st->vbr_enabled); + if (st->vbr_enabled) { int i=10, rate, target; float vbr_qual; - target = (*(int*)ptr); + target = (*(spx_int32_t*)ptr); while (i>=0) { speex_encoder_ctl(st, SPEEX_SET_QUALITY, &i); @@ -1371,7 +1328,7 @@ int sb_encoder_ctl(void *state, int request, void *ptr) break; case SPEEX_GET_ABR: - (*(int*)ptr) = st->abr_enabled; + (*(spx_int32_t*)ptr) = st->abr_enabled; break; case SPEEX_SET_QUALITY: { @@ -1397,8 +1354,9 @@ int sb_encoder_ctl(void *state, int request, void *ptr) break; case SPEEX_SET_BITRATE: { - int i=10, rate, target; - target = (*(int*)ptr); + int i=10; + spx_int32_t rate, target; + target = (*(spx_int32_t*)ptr); while (i>=0) { speex_encoder_ctl(st, SPEEX_SET_QUALITY, &i); @@ -1413,21 +1371,21 @@ int sb_encoder_ctl(void *state, int request, void *ptr) speex_encoder_ctl(st->st_low, request, ptr); /*fprintf (stderr, "before: %d\n", (*(int*)ptr));*/ if (st->submodes[st->submodeID]) - (*(int*)ptr) += st->sampling_rate*SUBMODE(bits_per_frame)/st->full_frame_size; + (*(spx_int32_t*)ptr) += st->sampling_rate*SUBMODE(bits_per_frame)/st->full_frame_size; else - (*(int*)ptr) += st->sampling_rate*(SB_SUBMODE_BITS+1)/st->full_frame_size; + (*(spx_int32_t*)ptr) += st->sampling_rate*(SB_SUBMODE_BITS+1)/st->full_frame_size; /*fprintf (stderr, "after: %d\n", (*(int*)ptr));*/ break; case SPEEX_SET_SAMPLING_RATE: { - int tmp=(*(int*)ptr); + spx_int32_t tmp=(*(spx_int32_t*)ptr); st->sampling_rate = tmp; tmp>>=1; speex_encoder_ctl(st->st_low, SPEEX_SET_SAMPLING_RATE, &tmp); } break; case SPEEX_GET_SAMPLING_RATE: - (*(int*)ptr)=st->sampling_rate; + (*(spx_int32_t*)ptr)=st->sampling_rate; break; case SPEEX_RESET_STATE: { @@ -1454,6 +1412,45 @@ int sb_encoder_ctl(void *state, int request, void *ptr) speex_encoder_ctl(st->st_low, SPEEX_GET_LOOKAHEAD, ptr); (*(int*)ptr) = 2*(*(int*)ptr) + QMF_ORDER - 1; break; + case SPEEX_SET_PLC_TUNING: + speex_encoder_ctl(st->st_low, SPEEX_SET_PLC_TUNING, ptr); + break; + case SPEEX_GET_PLC_TUNING: + speex_encoder_ctl(st->st_low, SPEEX_GET_PLC_TUNING, ptr); + break; + case SPEEX_SET_VBR_MAX_BITRATE: + { + st->vbr_max = (*(spx_int32_t*)ptr); + if (SPEEX_SET_VBR_MAX_BITRATE<1) + { + speex_encoder_ctl(st->st_low, SPEEX_SET_VBR_MAX_BITRATE, &st->vbr_max); + st->vbr_max_high = 17600; + } else { + spx_int32_t low_rate; + /* FIXME: Need to adapt that to ultra-wideband */ + if (st->vbr_max >= 42200) + { + st->vbr_max_high = 17600; + } else if (st->vbr_max >= 27800) + { + st->vbr_max_high = 9600; + } else if (st->vbr_max > 20600) + { + st->vbr_max_high = 5600; + } else { + st->vbr_max_high = 1800; + } + low_rate = st->vbr_max - st->vbr_max_high; + speex_encoder_ctl(st->st_low, SPEEX_SET_VBR_MAX_BITRATE, &low_rate); + } + } + break; + case SPEEX_GET_VBR_MAX_BITRATE: + (*(spx_int32_t*)ptr) = st->vbr_max; + break; + + + /* This is all internal stuff past this point */ case SPEEX_GET_PI_GAIN: { int i; @@ -1485,6 +1482,9 @@ int sb_encoder_ctl(void *state, int request, void *ptr) case SPEEX_GET_RELATIVE_QUALITY: (*(float*)ptr)=st->relative_quality; break; + case SPEEX_SET_INNOVATION_SAVE: + st->innov_save = ptr; + break; default: speex_warning_int("Unknown nb_ctl request: ", request); return -1; @@ -1534,20 +1534,20 @@ int sb_decoder_ctl(void *state, int request, void *ptr) case SPEEX_GET_BITRATE: speex_decoder_ctl(st->st_low, request, ptr); if (st->submodes[st->submodeID]) - (*(int*)ptr) += st->sampling_rate*SUBMODE(bits_per_frame)/st->full_frame_size; + (*(spx_int32_t*)ptr) += st->sampling_rate*SUBMODE(bits_per_frame)/st->full_frame_size; else - (*(int*)ptr) += st->sampling_rate*(SB_SUBMODE_BITS+1)/st->full_frame_size; + (*(spx_int32_t*)ptr) += st->sampling_rate*(SB_SUBMODE_BITS+1)/st->full_frame_size; break; case SPEEX_SET_SAMPLING_RATE: { - int tmp=(*(int*)ptr); + spx_int32_t tmp=(*(spx_int32_t*)ptr); st->sampling_rate = tmp; tmp>>=1; speex_decoder_ctl(st->st_low, SPEEX_SET_SAMPLING_RATE, &tmp); } break; case SPEEX_GET_SAMPLING_RATE: - (*(int*)ptr)=st->sampling_rate; + (*(spx_int32_t*)ptr)=st->sampling_rate; break; case SPEEX_SET_HANDLER: speex_decoder_ctl(st->st_low, SPEEX_SET_HANDLER, ptr); @@ -1571,6 +1571,10 @@ int sb_decoder_ctl(void *state, int request, void *ptr) case SPEEX_GET_SUBMODE_ENCODING: (*(int*)ptr) = st->encode_submode; break; + case SPEEX_GET_LOOKAHEAD: + speex_decoder_ctl(st->st_low, SPEEX_GET_LOOKAHEAD, ptr); + (*(int*)ptr) = 2*(*(int*)ptr); + break; case SPEEX_GET_PI_GAIN: { int i; @@ -1602,6 +1606,9 @@ int sb_decoder_ctl(void *state, int request, void *ptr) case SPEEX_GET_DTX_STATUS: speex_decoder_ctl(st->st_low, SPEEX_GET_DTX_STATUS, ptr); break; + case SPEEX_SET_INNOVATION_SAVE: + st->innov_save = ptr; + break; default: speex_warning_int("Unknown nb_ctl request: ", request); return -1; diff --git a/pjmedia/src/pjmedia-codec/speex/sb_celp.h b/pjmedia/src/pjmedia-codec/speex/sb_celp.h index c2831093..194cdd93 100644 --- a/pjmedia/src/pjmedia-codec/speex/sb_celp.h +++ b/pjmedia/src/pjmedia-codec/speex/sb_celp.h @@ -42,60 +42,63 @@ /**Structure representing the full state of the sub-band encoder*/ typedef struct SBEncState { - const SpeexMode *mode; /**< Pointer to the mode (containing for vtable info) */ - void *st_low; /**< State of the low-band (narrowband) encoder */ - int full_frame_size; /**< Length of full-band frames*/ - int frame_size; /**< Length of high-band frames*/ - int subframeSize; /**< Length of high-band sub-frames*/ - int nbSubframes; /**< Number of high-band sub-frames*/ - int windowSize; /**< Length of high-band LPC window*/ - int lpcSize; /**< Order of high-band LPC analysis */ - int bufSize; /**< Buffer size */ - int first; /**< First frame? */ - float lag_factor; /**< Lag-windowing control parameter */ - spx_word16_t lpc_floor; /**< Controls LPC analysis noise floor */ - spx_word16_t gamma1; /**< Perceptual weighting coef 1 */ - spx_word16_t gamma2; /**< Perceptual weighting coef 2 */ - - char *stack; /**< Temporary allocation stack */ - spx_sig_t *x0d, *x1d; /**< QMF filter signals*/ - spx_sig_t *high; /**< High-band signal (buffer) */ - spx_sig_t *y0, *y1; /**< QMF synthesis signals */ + const SpeexMode *mode; /**< Pointer to the mode (containing for vtable info) */ + void *st_low; /**< State of the low-band (narrowband) encoder */ + int full_frame_size; /**< Length of full-band frames*/ + int frame_size; /**< Length of high-band frames*/ + int subframeSize; /**< Length of high-band sub-frames*/ + int nbSubframes; /**< Number of high-band sub-frames*/ + int windowSize; /**< Length of high-band LPC window*/ + int lpcSize; /**< Order of high-band LPC analysis */ + int bufSize; /**< Buffer size */ + int first; /**< First frame? */ + float lag_factor; /**< Lag-windowing control parameter */ + spx_word16_t lpc_floor; /**< Controls LPC analysis noise floor */ + spx_word16_t gamma1; /**< Perceptual weighting coef 1 */ + spx_word16_t gamma2; /**< Perceptual weighting coef 2 */ + + char *stack; /**< Temporary allocation stack */ + spx_sig_t *x0d, *x1d; /**< QMF filter signals*/ + spx_sig_t *high; /**< High-band signal (buffer) */ + spx_sig_t *y0, *y1; /**< QMF synthesis signals */ spx_word16_t *h0_mem, *h1_mem; spx_word32_t *g0_mem, *g1_mem; /**< QMF memories */ - spx_sig_t *excBuf; /**< High-band excitation */ - spx_sig_t *exc; /**< High-band excitation (for QMF only)*/ - spx_sig_t *res; /**< Zero-input response (ringing) */ - spx_sig_t *sw; /**< Perceptually weighted signal */ - spx_sig_t *target; /**< Weighted target signal (analysis by synthesis) */ - spx_word16_t *window; /**< LPC analysis window */ - spx_word16_t *lagWindow; /**< Auto-correlation window */ - spx_word16_t *autocorr; /**< Auto-correlation (for LPC analysis) */ - spx_coef_t *lpc; /**< LPC coefficients */ - spx_lsp_t *lsp; /**< LSP coefficients */ - spx_lsp_t *qlsp; /**< Quantized LSPs */ - spx_lsp_t *old_lsp; /**< LSPs of previous frame */ - spx_lsp_t *old_qlsp; /**< Quantized LSPs of previous frame */ - spx_lsp_t *interp_lsp; /**< Interpolated LSPs for current sub-frame */ - spx_lsp_t *interp_qlsp; /**< Interpolated quantized LSPs for current sub-frame */ - spx_coef_t *interp_lpc; /**< Interpolated LPCs for current sub-frame */ - spx_coef_t *interp_qlpc; /**< Interpolated quantized LPCs for current sub-frame */ - spx_coef_t *bw_lpc1; /**< Bandwidth-expanded version of LPCs (#1) */ - spx_coef_t *bw_lpc2; /**< Bandwidth-expanded version of LPCs (#2) */ - - spx_mem_t *mem_sp; /**< Synthesis signal memory */ + spx_sig_t *excBuf; /**< High-band excitation */ + spx_sig_t *exc; /**< High-band excitation (for QMF only)*/ + spx_sig_t *res; /**< Zero-input response (ringing) */ + spx_sig_t *sw; /**< Perceptually weighted signal */ + const spx_word16_t *window; /**< LPC analysis window */ + spx_word16_t *lagWindow; /**< Auto-correlation window */ + spx_word16_t *autocorr; /**< Auto-correlation (for LPC analysis) */ + spx_coef_t *lpc; /**< LPC coefficients */ + spx_lsp_t *lsp; /**< LSP coefficients */ + spx_lsp_t *qlsp; /**< Quantized LSPs */ + spx_lsp_t *old_lsp; /**< LSPs of previous frame */ + spx_lsp_t *old_qlsp; /**< Quantized LSPs of previous frame */ + spx_lsp_t *interp_lsp; /**< Interpolated LSPs for current sub-frame */ + spx_lsp_t *interp_qlsp; /**< Interpolated quantized LSPs for current sub-frame */ + spx_coef_t *interp_lpc; /**< Interpolated LPCs for current sub-frame */ + spx_coef_t *interp_qlpc; /**< Interpolated quantized LPCs for current sub-frame */ + spx_coef_t *bw_lpc1; /**< Bandwidth-expanded version of LPCs (#1) */ + spx_coef_t *bw_lpc2; /**< Bandwidth-expanded version of LPCs (#2) */ + + spx_mem_t *mem_sp; /**< Synthesis signal memory */ spx_mem_t *mem_sp2; - spx_mem_t *mem_sw; /**< Perceptual signal memory */ + spx_mem_t *mem_sw; /**< Perceptual signal memory */ spx_word32_t *pi_gain; - - float vbr_quality; /**< Quality setting for VBR encoding */ - int vbr_enabled; /**< 1 for enabling VBR, 0 otherwise */ - int abr_enabled; /**< ABR setting (in bps), 0 if off */ + spx_sig_t *innov_save; /**< If non-NULL, innovation is copied here */ + spx_sig_t *low_innov; /**< Lower-band innovation is copied here magically */ + + float vbr_quality; /**< Quality setting for VBR encoding */ + int vbr_enabled; /**< 1 for enabling VBR, 0 otherwise */ + spx_int32_t vbr_max; /**< Max bit-rate allowed in VBR mode (total) */ + spx_int32_t vbr_max_high; /**< Max bit-rate allowed in VBR mode for the high-band */ + spx_int32_t abr_enabled; /**< ABR setting (in bps), 0 if off */ float abr_drift; float abr_drift2; float abr_count; - int vad_enabled; /**< 1 for enabling VAD, 0 otherwise */ + int vad_enabled; /**< 1 for enabling VAD, 0 otherwise */ float relative_quality; int encode_submode; @@ -103,7 +106,7 @@ typedef struct SBEncState { int submodeID; int submodeSelect; int complexity; - int sampling_rate; + spx_int32_t sampling_rate; } SBEncState; @@ -118,7 +121,7 @@ typedef struct SBDecState { int nbSubframes; int lpcSize; int first; - int sampling_rate; + spx_int32_t sampling_rate; int lpc_enh_enabled; char *stack; @@ -128,6 +131,7 @@ typedef struct SBDecState { spx_word32_t *g0_mem, *g1_mem; spx_sig_t *exc; + spx_sig_t *excBuf; spx_lsp_t *qlsp; spx_lsp_t *old_qlsp; spx_lsp_t *interp_qlsp; @@ -135,6 +139,9 @@ typedef struct SBDecState { spx_mem_t *mem_sp; spx_word32_t *pi_gain; + spx_sig_t *innov_save; /** If non-NULL, innovation is copied here */ + spx_sig_t *low_innov; /** Lower-band innovation is copied here magically */ + spx_int32_t seed; int encode_submode; diff --git a/pjmedia/src/pjmedia-codec/speex/speex.h b/pjmedia/src/pjmedia-codec/speex/speex.h index 0eb2b8a8..c7f7547e 100644 --- a/pjmedia/src/pjmedia-codec/speex/speex.h +++ b/pjmedia/src/pjmedia-codec/speex/speex.h @@ -141,6 +141,11 @@ extern "C" { /** Gets tuning for PLC */ #define SPEEX_GET_PLC_TUNING 41 +/** Sets the max bit-rate allowed in VBR mode */ +#define SPEEX_SET_VBR_MAX_BITRATE 42 +/** Gets the max bit-rate allowed in VBR mode */ +#define SPEEX_GET_VBR_MAX_BITRATE 43 + /* Used internally, not to be used in applications */ /** Used internally*/ #define SPEEX_GET_PI_GAIN 100 @@ -150,6 +155,8 @@ extern "C" { #define SPEEX_GET_INNOV 102 /** Used internally*/ #define SPEEX_GET_DTX_STATUS 103 +/** Used internally*/ +#define SPEEX_SET_INNOVATION_SAVE 104 /* Preserving compatibility:*/ diff --git a/pjmedia/src/pjmedia-codec/speex/speex_echo.h b/pjmedia/src/pjmedia-codec/speex/speex_echo.h index 1962a560..4813b5a0 100644 --- a/pjmedia/src/pjmedia-codec/speex/speex_echo.h +++ b/pjmedia/src/pjmedia-codec/speex/speex_echo.h @@ -61,7 +61,13 @@ SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length); void speex_echo_state_destroy(SpeexEchoState *st); /** Performs echo cancellation a frame */ -void speex_echo_cancel(SpeexEchoState *st, short *ref, short *echo, short *out, spx_int32_t *Y); +void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *rec, const spx_int16_t *play, spx_int16_t *out, spx_int32_t *Yout); + +/** Perform echo cancellation using internal playback buffer */ +void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out, spx_int32_t *Yout); + +/** Let the echo canceller know that a frame was just played */ +void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play); /** Reset the echo canceller state */ void speex_echo_state_reset(SpeexEchoState *st); diff --git a/pjmedia/src/pjmedia-codec/speex/speex_jitter.h b/pjmedia/src/pjmedia-codec/speex/speex_jitter.h index 31b5c53c..34043b37 100644 --- a/pjmedia/src/pjmedia-codec/speex/speex_jitter.h +++ b/pjmedia/src/pjmedia-codec/speex/speex_jitter.h @@ -43,32 +43,55 @@ extern "C" { #endif -#define SPEEX_JITTER_MAX_PACKET_SIZE 1500 /**< Maximum number of bytes per packet */ -#define SPEEX_JITTER_MAX_BUFFER_SIZE 20 /**< Maximum number of packets in jitter buffer */ +struct JitterBuffer_; + +typedef struct JitterBuffer_ JitterBuffer; + +typedef struct _JitterBufferPacket JitterBufferPacket; + +struct _JitterBufferPacket { + char *data; + spx_uint32_t len; + spx_uint32_t timestamp; + spx_uint32_t span; +}; + + +#define JITTER_BUFFER_OK 0 +#define JITTER_BUFFER_MISSING 1 +#define JITTER_BUFFER_INCOMPLETE 2 +#define JITTER_BUFFER_INTERNAL_ERROR -1 +#define JITTER_BUFFER_BAD_ARGUMENT -2 + +/** Initialise jitter buffer */ +JitterBuffer *jitter_buffer_init(int tick); + +/** Reset jitter buffer */ +void jitter_buffer_reset(JitterBuffer *jitter); + +/** Destroy jitter buffer */ +void jitter_buffer_destroy(JitterBuffer *jitter); + +/** Put one packet into the jitter buffer */ +void jitter_buffer_put(JitterBuffer *jitter, const JitterBufferPacket *packet); + +/** Get one packet from the jitter buffer */ +int jitter_buffer_get(JitterBuffer *jitter, JitterBufferPacket *packet, spx_uint32_t *current_timestamp); + +/** Get pointer timestamp of jitter buffer */ +int jitter_buffer_get_pointer_timestamp(JitterBuffer *jitter); + +/** Advance by one tick */ +void jitter_buffer_tick(JitterBuffer *jitter); -#define MAX_MARGIN 12 /**< Number of bins in margin histogram */ /** Speex jitter-buffer state. */ typedef struct SpeexJitter { - int buffer_size; /**< Buffer size */ - int pointer_timestamp; /**< Pointer timestamp */ - SpeexBits current_packet; /**< Current Speex packet */ int valid_bits; /**< True if Speex bits are valid */ - - char buf[SPEEX_JITTER_MAX_BUFFER_SIZE][SPEEX_JITTER_MAX_PACKET_SIZE]; /**< Buffer of packets */ - int timestamp[SPEEX_JITTER_MAX_BUFFER_SIZE]; /**< Timestamp of packet */ - int len[SPEEX_JITTER_MAX_BUFFER_SIZE]; /**< Number of bytes in packet */ - + JitterBuffer *packets; void *dec; /**< Pointer to Speex decoder */ int frame_size; /**< Frame size of Speex decoder */ - int frame_time; /**< Frame time in [ms] of Speex decoder */ - int reset_state; /**< True if Speex state was reset */ - - int lost_count; /**< Number of lost packets */ - float shortterm_margin[MAX_MARGIN]; /**< Short term margins */ - float longterm_margin[MAX_MARGIN]; /**< Long term margins */ - float loss_rate; /**< Loss rate */ } SpeexJitter; /** Initialise jitter buffer */ @@ -81,7 +104,7 @@ void speex_jitter_destroy(SpeexJitter *jitter); void speex_jitter_put(SpeexJitter *jitter, char *packet, int len, int timestamp); /** Get one packet from the jitter buffer */ -void speex_jitter_get(SpeexJitter *jitter, short *out, int *current_timestamp); +void speex_jitter_get(SpeexJitter *jitter, spx_int16_t *out, int *start_offset); /** Get pointer timestamp of jitter buffer */ int speex_jitter_get_pointer_timestamp(SpeexJitter *jitter); diff --git a/pjmedia/src/pjmedia-codec/speex/speex_stereo.h b/pjmedia/src/pjmedia-codec/speex/speex_stereo.h index 0b70021a..6ccaa318 100644 --- a/pjmedia/src/pjmedia-codec/speex/speex_stereo.h +++ b/pjmedia/src/pjmedia-codec/speex/speex_stereo.h @@ -53,7 +53,7 @@ typedef struct SpeexStereoState { } SpeexStereoState; /** Initialization value for a stereo state */ -#define SPEEX_STEREO_STATE_INIT {1,.5,1,1} +#define SPEEX_STEREO_STATE_INIT {1,.5,1,1,0,0} /** Transforms a stereo frame into a mono frame and stores intensity stereo info in 'bits' */ void speex_encode_stereo(float *data, int frame_size, SpeexBits *bits); diff --git a/pjmedia/src/pjmedia-codec/speex/speex_types.h b/pjmedia/src/pjmedia-codec/speex/speex_types.h index b67c74fa..a6ebf0cb 100644 --- a/pjmedia/src/pjmedia-codec/speex/speex_types.h +++ b/pjmedia/src/pjmedia-codec/speex/speex_types.h @@ -26,7 +26,6 @@ # if defined(__CYGWIN__) # include <_G_config.h> - typedef _G_int64_t spx_int64_t; typedef _G_int32_t spx_int32_t; typedef _G_uint32_t spx_uint32_t; typedef _G_int16_t spx_int16_t; @@ -36,17 +35,13 @@ typedef unsigned short spx_uint16_t; typedef int spx_int32_t; typedef unsigned int spx_uint32_t; - typedef long long spx_int64_t; - typedef unsigned long long spx_uint64_t; # elif defined(__MWERKS__) - typedef long long spx_int64_t; typedef int spx_int32_t; typedef unsigned int spx_uint32_t; typedef short spx_int16_t; typedef unsigned short spx_uint16_t; # else /* MSVC/Borland */ - typedef __int64 spx_int64_t; typedef __int32 spx_int32_t; typedef unsigned __int32 spx_uint32_t; typedef __int16 spx_int16_t; @@ -60,7 +55,6 @@ typedef UInt16 spx_uint16_t; typedef SInt32 spx_int32_t; typedef UInt32 spx_uint32_t; - typedef SInt64 spx_int64_t; #elif defined(__MACOSX__) /* MacOS X Framework build */ @@ -69,7 +63,6 @@ typedef u_int16_t spx_uint16_t; typedef int32_t spx_int32_t; typedef u_int32_t spx_uint32_t; - typedef int64_t spx_int64_t; #elif defined(__BEOS__) @@ -79,7 +72,6 @@ typedef u_int16_t spx_uint16_t; typedef int32_t spx_int32_t; typedef u_int32_t spx_uint32_t; - typedef int64_t spx_int64_t; #elif defined (__EMX__) @@ -88,7 +80,6 @@ typedef unsigned short spx_uint16_t; typedef int spx_int32_t; typedef unsigned int spx_uint32_t; - typedef long long spx_int64_t; #elif defined (DJGPP) @@ -96,12 +87,10 @@ typedef short spx_int16_t; typedef int spx_int32_t; typedef unsigned int spx_uint32_t; - typedef long long spx_int64_t; #elif defined(R5900) /* PS2 EE */ - typedef long spx_int64_t; typedef int spx_int32_t; typedef unsigned spx_uint32_t; typedef short spx_int16_t; @@ -113,7 +102,6 @@ typedef unsigned short spx_uint16_t; typedef signed int spx_int32_t; typedef unsigned int spx_uint32_t; - typedef long long int spx_int64_t; #elif defined(CONFIG_TI_C54X) || defined (CONFIG_TI_C55X) @@ -122,7 +110,7 @@ typedef long spx_int32_t; typedef unsigned long spx_uint32_t; -#elif defined(CONFIG_TI_C5X) +#elif defined(CONFIG_TI_C6X) typedef short spx_int16_t; typedef unsigned short spx_uint16_t; diff --git a/pjmedia/src/pjmedia-codec/speex/stack_alloc.h b/pjmedia/src/pjmedia-codec/speex/stack_alloc.h index 6270d12c..cb048fa5 100644 --- a/pjmedia/src/pjmedia-codec/speex/stack_alloc.h +++ b/pjmedia/src/pjmedia-codec/speex/stack_alloc.h @@ -114,7 +114,7 @@ #define ALLOC(var, size, type) type var[size] #elif defined(USE_ALLOCA) #define VARDECL(var) var -#define ALLOC(var, size, type) var = alloca(sizeof(type)*size) +#define ALLOC(var, size, type) var = alloca(sizeof(type)*(size)) #else #define VARDECL(var) var #define ALLOC(var, size, type) var = PUSH(stack, size, type) diff --git a/pjmedia/src/pjmedia-codec/speex/window.c b/pjmedia/src/pjmedia-codec/speex/window.c new file mode 100644 index 00000000..3748f656 --- /dev/null +++ b/pjmedia/src/pjmedia-codec/speex/window.c @@ -0,0 +1,94 @@ +/* Copyright (C) 2006 Jean-Marc Valin + File: window.c + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + - Neither the name of the Xiph.org Foundation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "misc.h" + +#ifdef FIXED_POINT +const spx_word16_t lpc_window[200] = { +1310, 1313, 1321, 1333, 1352, 1375, 1403, 1436, +1475, 1518, 1567, 1621, 1679, 1743, 1811, 1884, +1962, 2044, 2132, 2224, 2320, 2421, 2526, 2636, +2750, 2868, 2990, 3116, 3246, 3380, 3518, 3659, +3804, 3952, 4104, 4259, 4417, 4578, 4742, 4909, +5079, 5251, 5425, 5602, 5781, 5963, 6146, 6331, +6518, 6706, 6896, 7087, 7280, 7473, 7668, 7863, +8059, 8256, 8452, 8650, 8847, 9044, 9241, 9438, +9635, 9831, 10026, 10220, 10414, 10606, 10797, 10987, +11176, 11363, 11548, 11731, 11912, 12091, 12268, 12443, +12615, 12785, 12952, 13116, 13277, 13435, 13590, 13742, +13890, 14035, 14176, 14314, 14448, 14578, 14704, 14826, +14944, 15058, 15168, 15273, 15374, 15470, 15562, 15649, +15732, 15810, 15883, 15951, 16015, 16073, 16127, 16175, +16219, 16257, 16291, 16319, 16342, 16360, 16373, 16381, +16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, +16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, +16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, +16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, +16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, +16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, +16384, 16384, 16384, 16361, 16294, 16183, 16028, 15830, +15588, 15304, 14979, 14613, 14207, 13763, 13282, 12766, +12215, 11631, 11016, 10373, 9702, 9007, 8289, 7551, +6797, 6028, 5251, 4470, 3695, 2943, 2248, 1696 +}; +#else +const spx_word16_t lpc_window[200] = { + 0.080000, 0.080158, 0.080630, 0.081418, 0.082520, 0.083935, 0.085663, 0.087703, + 0.090052, 0.092710, 0.095674, 0.098943, 0.102514, 0.106385, 0.110553, 0.115015, + 0.119769, 0.124811, 0.130137, 0.135744, 0.141628, 0.147786, 0.154212, 0.160902, + 0.167852, 0.175057, 0.182513, 0.190213, 0.198153, 0.206328, 0.214731, 0.223357, + 0.232200, 0.241254, 0.250513, 0.259970, 0.269619, 0.279453, 0.289466, 0.299651, + 0.310000, 0.320507, 0.331164, 0.341965, 0.352901, 0.363966, 0.375151, 0.386449, + 0.397852, 0.409353, 0.420943, 0.432615, 0.444361, 0.456172, 0.468040, 0.479958, + 0.491917, 0.503909, 0.515925, 0.527959, 0.540000, 0.552041, 0.564075, 0.576091, + 0.588083, 0.600042, 0.611960, 0.623828, 0.635639, 0.647385, 0.659057, 0.670647, + 0.682148, 0.693551, 0.704849, 0.716034, 0.727099, 0.738035, 0.748836, 0.759493, + 0.770000, 0.780349, 0.790534, 0.800547, 0.810381, 0.820030, 0.829487, 0.838746, + 0.847800, 0.856643, 0.865269, 0.873672, 0.881847, 0.889787, 0.897487, 0.904943, + 0.912148, 0.919098, 0.925788, 0.932214, 0.938372, 0.944256, 0.949863, 0.955189, + 0.960231, 0.964985, 0.969447, 0.973615, 0.977486, 0.981057, 0.984326, 0.987290, + 0.989948, 0.992297, 0.994337, 0.996065, 0.997480, 0.998582, 0.999370, 0.999842, + 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, + 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, + 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, + 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, + 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, + 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, + 1.000000, 1.000000, 1.000000, 0.998640, 0.994566, 0.987787, 0.978324, 0.966203, + 0.951458, 0.934131, 0.914270, 0.891931, 0.867179, 0.840084, 0.810723, 0.779182, + 0.745551, 0.709930, 0.672424, 0.633148, 0.592223, 0.549781, 0.505964, 0.460932, + 0.414863, 0.367968, 0.320511, 0.272858, 0.225569, 0.179655, 0.137254, 0.103524 +}; +#endif diff --git a/pjmedia/src/pjmedia-codec/speex_codec.c b/pjmedia/src/pjmedia-codec/speex_codec.c index 6a64c77d..b434ee85 100644 --- a/pjmedia/src/pjmedia-codec/speex_codec.c +++ b/pjmedia/src/pjmedia-codec/speex_codec.c @@ -683,9 +683,7 @@ static pj_status_t spx_codec_encode( pjmedia_codec *codec, struct pjmedia_frame *output) { struct spx_private *spx; - float tmp[642]; /* 20ms at 32KHz + 2 */ - pj_int16_t *samp_in; - unsigned i, samp_count, sz; + unsigned sz; int tx; spx = (struct spx_private*) codec->codec_data; @@ -698,19 +696,11 @@ static pj_status_t spx_codec_encode( pjmedia_codec *codec, return PJ_SUCCESS; } - /* Copy frame to float buffer. */ - samp_count = input->size / 2; - pj_assert(samp_count <= PJ_ARRAY_SIZE(tmp)); - samp_in = input->buf; - for (i=0; ienc_bits); /* Encode the frame */ - tx = speex_encode(spx->enc, tmp, &spx->enc_bits); + tx = speex_encode_int(spx->enc, input->buf, &spx->enc_bits); /* Check if we need not to transmit the frame (DTX) */ if (tx == 0) { @@ -743,9 +733,6 @@ static pj_status_t spx_codec_decode( pjmedia_codec *codec, struct pjmedia_frame *output) { struct spx_private *spx; - float tmp[642]; /* 20ms at 32KHz + 2 */ - pj_int16_t *dst_buf; - unsigned i, count, sz; spx = (struct spx_private*) codec->codec_data; @@ -764,21 +751,11 @@ static pj_status_t spx_codec_decode( pjmedia_codec *codec, speex_bits_read_from(&spx->dec_bits, input->buf, input->size); /* Decode the data */ - speex_decode(spx->dec, &spx->dec_bits, tmp); - - /* Check size. */ - sz = speex_bits_nbytes(&spx->enc_bits); - pj_assert(sz <= output_buf_len); + speex_decode_int(spx->dec, &spx->dec_bits, output->buf); - /* Copy from float to short samples. */ - count = spx_factory.speex_param[spx->param_id].clock_rate * 20 / 1000; - pj_assert((count <= output_buf_len/2) && count <= PJ_ARRAY_SIZE(tmp)); - dst_buf = output->buf; - for (i=0; itype = PJMEDIA_FRAME_TYPE_AUDIO; - output->size = count * 2; + output->size = speex_bits_nbytes(&spx->dec_bits); + pj_assert(output->size <= (int)output_buf_len); output->timestamp.u64 = input->timestamp.u64; @@ -793,9 +770,7 @@ static pj_status_t spx_codec_recover(pjmedia_codec *codec, struct pjmedia_frame *output) { struct spx_private *spx; - float tmp[642]; /* 20ms at 32KHz + 2 */ - pj_int16_t *dst_buf; - unsigned i, count; + unsigned count; /* output_buf_len is unreferenced when building in Release mode */ PJ_UNUSED_ARG(output_buf_len); @@ -803,16 +778,11 @@ static pj_status_t spx_codec_recover(pjmedia_codec *codec, spx = (struct spx_private*) codec->codec_data; count = spx_factory.speex_param[spx->param_id].clock_rate * 20 / 1000; - pj_assert((count <= output_buf_len/2) && count <= PJ_ARRAY_SIZE(tmp)); + pj_assert(count <= output_buf_len/2); /* Recover packet loss */ - speex_decode(spx->dec, NULL, tmp); + speex_decode_int(spx->dec, NULL, output->buf); - /* Copy from float to short samples. */ - dst_buf = output->buf; - for (i=0; isize = count * 2; return PJ_SUCCESS; diff --git a/pjmedia/src/pjmedia/alaw_ulaw.c b/pjmedia/src/pjmedia/alaw_ulaw.c index 5fe119bb..1e2fe002 100644 --- a/pjmedia/src/pjmedia/alaw_ulaw.c +++ b/pjmedia/src/pjmedia/alaw_ulaw.c @@ -255,6 +255,12 @@ ulaw2linear( { int t; + /* Shortcut: when input is zero, output is zero + * This will also make the VAD works harder. + * -bennylp + */ + if (u_val == 0) return 0; + /* Complement to obtain normal u-law value. */ u_val = ~u_val; -- cgit v1.2.3