1 files changed, 203 insertions, 130 deletions
diff --git a/pjmedia/src/pjmedia-codec/speex/lsp.c b/pjmedia/src/pjmedia-codec/speex/lsp.c
index f4350aee..6e7ea311 100644
--- a/pjmedia/src/pjmedia-codec/speex/lsp.c
+++ b/pjmedia/src/pjmedia-codec/speex/lsp.c
@@ -1,8 +1,6 @@
 /*---------------------------------------------------------------------------*\
 Original copyright
-	FILE........: AKSLSPD.C
-	TYPE........: Turbo C
-	COMPANY.....: Voicetronix
+	FILE........: lsp.c
 	AUTHOR......: David Rowe
 	DATE CREATED: 24/2/93
 
@@ -44,6 +42,43 @@ Heavily modified by Jean-Marc Valin (fixed-point, optimizations,
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
+/*---------------------------------------------------------------------------*\
+
+  Introduction to Line Spectrum Pairs (LSPs)
+  ------------------------------------------
+
+  LSPs are used to encode the LPC filter coefficients {ak} for
+  transmission over the channel.  LSPs have several properties (like
+  less sensitivity to quantisation noise) that make them superior to
+  direct quantisation of {ak}.
+
+  A(z) is a polynomial of order lpcrdr with {ak} as the coefficients.
+
+  A(z) is transformed to P(z) and Q(z) (using a substitution and some
+  algebra), to obtain something like:
+
+    A(z) = 0.5[P(z)(z+z^-1) + Q(z)(z-z^-1)]  (1)
+
+  As you can imagine A(z) has complex zeros all over the z-plane. P(z)
+  and Q(z) have the very neat property of only having zeros _on_ the
+  unit circle.  So to find them we take a test point z=exp(jw) and
+  evaluate P (exp(jw)) and Q(exp(jw)) using a grid of points between 0
+  and pi.
+
+  The zeros (roots) of P(z) also happen to alternate, which is why we
+  swap coefficients as we find roots.  So the process of finding the
+  LSP frequencies is basically finding the roots of 5th order
+  polynomials.
+
+  The root so P(z) and Q(z) occur in symmetrical pairs at +/-w, hence
+  the name Line Spectrum Pairs (LSPs).
+
+  To convert back to ak we just evaluate (1), "clocking" an impulse
+  thru it lpcrdr times gives us the impulse response of A(z) which is
+  {ak}.
+
+\*---------------------------------------------------------------------------*/
+
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
@@ -63,8 +98,6 @@ Heavily modified by Jean-Marc Valin (fixed-point, optimizations,
 
 #ifdef FIXED_POINT
 
-
-
 #define FREQ_SCALE 16384
 
 /*#define ANGLE2X(a) (32768*cos(((a)/8192.)))*/
@@ -73,6 +106,10 @@ Heavily modified by Jean-Marc Valin (fixed-point, optimizations,
 /*#define X2ANGLE(x) (acos(.00006103515625*(x))*LSP_SCALING)*/
 #define X2ANGLE(x) (spx_acos(x))
 
+#ifdef BFIN_ASM
+#include "lsp_bfin.h"
+#endif
+
 #else
 
 /*#define C1 0.99940307
@@ -88,27 +125,28 @@ Heavily modified by Jean-Marc Valin (fixed-point, optimizations,
 
 /*---------------------------------------------------------------------------*\
 
-	FUNCTION....: cheb_poly_eva()
+   FUNCTION....: cheb_poly_eva()
 
-	AUTHOR......: David Rowe
-	DATE CREATED: 24/2/93
+   AUTHOR......: David Rowe
+   DATE CREATED: 24/2/93
 
-    This function evaluates a series of Chebyshev polynomials
+   This function evaluates a series of Chebyshev polynomials
 
 \*---------------------------------------------------------------------------*/
 
 #ifdef FIXED_POINT
 
-static inline spx_word32_t cheb_poly_eva(spx_word32_t *coef,spx_word16_t x,int m,char *stack)
-/*  float coef[]  	coefficients of the polynomial to be evaluated 	*/
-/*  float x   		the point where polynomial is to be evaluated 	*/
-/*  int m 		order of the polynomial 			*/
+#ifndef OVERRIDE_CHEB_POLY_EVA
+static inline spx_word32_t cheb_poly_eva(
+  spx_word16_t *coef, /* P or Q coefs in Q13 format               */
+  spx_word16_t     x, /* cos of freq (-1.0 to 1.0) in Q14 format  */
+  int              m, /* LPC order/2                              */
+  char         *stack
+)
 {
     int i;
-    VARDECL(spx_word16_t *T);
+    spx_word16_t b0, b1;
     spx_word32_t sum;
-    int m2=m>>1;
-    VARDECL(spx_word16_t *coefn);
 
     /*Prevents overflows*/
     if (x>16383)
@@ -116,73 +154,55 @@ static inline spx_word32_t cheb_poly_eva(spx_word32_t *coef,spx_word16_t x,int m
     if (x<-16383)
        x = -16383;
 
-    /* Allocate memory for Chebyshev series formulation */
-    ALLOC(T, m2+1, spx_word16_t);
-    ALLOC(coefn, m2+1, spx_word16_t);
-
-    for (i=0;i<m2+1;i++)
-    {
-       coefn[i] = coef[i];
-       /*printf ("%f ", coef[i]);*/
-    }
-    /*printf ("\n");*/
-
     /* Initialise values */
-    T[0]=16384;
-    T[1]=x;
-
-    /* Evaluate Chebyshev series formulation using iterative approach  */
-    /* Evaluate polynomial and return value also free memory space */
-    sum = ADD32(EXTEND32(coefn[m2]), EXTEND32(MULT16_16_P14(coefn[m2-1],x)));
-    /*x *= 2;*/
-    for(i=2;i<=m2;i++)
+    b1=16384;
+    b0=x;
+
+    /* Evaluate Chebyshev series formulation usin g iterative approach  */
+    sum = ADD32(EXTEND32(coef[m]), EXTEND32(MULT16_16_P14(coef[m-1],x)));
+    for(i=2;i<=m;i++)
     {
-       T[i] = SUB16(MULT16_16_Q13(x,T[i-1]), T[i-2]);
-       sum = ADD32(sum, EXTEND32(MULT16_16_P14(coefn[m2-i],T[i])));
-       /*printf ("%f ", sum);*/
+       spx_word16_t tmp=b0;
+       b0 = SUB16(MULT16_16_Q13(x,b0), b1);
+       b1 = tmp;
+       sum = ADD32(sum, EXTEND32(MULT16_16_P14(coef[m-i],b0)));
     }
     
-    /*printf ("\n");*/
     return sum;
 }
+#endif
+
 #else
-static float cheb_poly_eva(spx_word32_t *coef,float x,int m,char *stack)
-/*  float coef[]  	coefficients of the polynomial to be evaluated 	*/
-/*  float x   		the point where polynomial is to be evaluated 	*/
-/*  int m 		order of the polynomial 			*/
+
+static float cheb_poly_eva(spx_word32_t *coef, spx_word16_t x, int m, char *stack)
 {
-    int i;
-    VARDECL(float *T);
-    float sum;
-    int m2=m>>1;
+   int k;
+   float b0, b1, tmp;
 
-    /* Allocate memory for Chebyshev series formulation */
-    ALLOC(T, m2+1, float);
+   /* Initial conditions */
+   b0=0; /* b_(m+1) */
+   b1=0; /* b_(m+2) */
 
-    /* Initialise values */
-    T[0]=1;
-    T[1]=x;
-
-    /* Evaluate Chebyshev series formulation using iterative approach  */
-    /* Evaluate polynomial and return value also free memory space */
-    sum = coef[m2] + coef[m2-1]*x;
-    x *= 2;
-    for(i=2;i<=m2;i++)
-    {
-       T[i] = x*T[i-1] - T[i-2];
-       sum += coef[m2-i] * T[i];
-    }
-    
-    return sum;
+   x*=2;
+
+   /* Calculate the b_(k) */
+   for(k=m;k>0;k--)
+   {
+      tmp=b0;                           /* tmp holds the previous value of b0 */
+      b0=x*b0-b1+coef[m-k];    /* b0 holds its new value based on b0 and b1 */
+      b1=tmp;                           /* b1 holds the previous value of b0 */
+   }
+
+   return(-b1+.5*x*b0+coef[m]);
 }
 #endif
 
 /*---------------------------------------------------------------------------*\
 
-	FUNCTION....: lpc_to_lsp()
+    FUNCTION....: lpc_to_lsp()
 
-	AUTHOR......: David Rowe
-	DATE CREATED: 24/2/93
+    AUTHOR......: David Rowe
+    DATE CREATED: 24/2/93
 
     This function converts LPC coefficients to LSP
     coefficients.
@@ -210,11 +230,13 @@ int lpc_to_lsp (spx_coef_t *a,int lpcrdr,spx_lsp_t *freq,int nb,spx_word16_t del
     int i,j,m,flag,k;
     VARDECL(spx_word32_t *Q);                 	/* ptrs for memory allocation 		*/
     VARDECL(spx_word32_t *P);
+    VARDECL(spx_word16_t *Q16);         /* ptrs for memory allocation 		*/
+    VARDECL(spx_word16_t *P16);
     spx_word32_t *px;                	/* ptrs of respective P'(z) & Q'(z)	*/
     spx_word32_t *qx;
     spx_word32_t *p;
     spx_word32_t *q;
-    spx_word32_t *pt;                	/* ptr used for cheb_poly_eval()
+    spx_word16_t *pt;                	/* ptr used for cheb_poly_eval()
 				whether P' or Q' 			*/
     int roots=0;              	/* DR 8/2/94: number of roots found 	*/
     flag = 1;                	/*  program is searching for a root when,
@@ -276,20 +298,31 @@ int lpc_to_lsp (spx_coef_t *a,int lpcrdr,spx_lsp_t *freq,int nb,spx_word16_t del
     px = P;             	/* re-initialise ptrs 			*/
     qx = Q;
 
+    /* now that we have computed P and Q convert to 16 bits to
+       speed up cheb_poly_eval */
+
+    ALLOC(P16, m+1, spx_word16_t);
+    ALLOC(Q16, m+1, spx_word16_t);
+
+    for (i=0;i<m+1;i++)
+    {
+       P16[i] = P[i];
+       Q16[i] = Q[i];
+    }
+
     /* Search for a zero in P'(z) polynomial first and then alternate to Q'(z).
     Keep alternating between the two polynomials as each zero is found 	*/
 
     xr = 0;             	/* initialise xr to zero 		*/
     xl = FREQ_SCALE;               	/* start at point xl = 1 		*/
 
-
     for(j=0;j<lpcrdr;j++){
 	if(j&1)            	/* determines whether P' or Q' is eval. */
-	    pt = qx;
+	    pt = Q16;
 	else
-	    pt = px;
+	    pt = P16;
 
-	psuml = cheb_poly_eva(pt,xl,lpcrdr,stack);	/* evals poly. at xl 	*/
+	psuml = cheb_poly_eva(pt,xl,m,stack);	/* evals poly. at xl 	*/
 	flag = 1;
 	while(flag && (xr >= -FREQ_SCALE)){
            spx_word16_t dd;
@@ -304,7 +337,7 @@ int lpc_to_lsp (spx_coef_t *a,int lpcrdr,spx_lsp_t *freq,int nb,spx_word16_t del
               dd *= .5;
 #endif
            xr = SUB16(xl, dd);                        	/* interval spacing 	*/
-	    psumr = cheb_poly_eva(pt,xr,lpcrdr,stack);/* poly(xl-delta_x) 	*/
+	    psumr = cheb_poly_eva(pt,xr,m,stack);/* poly(xl-delta_x) 	*/
 	    temp_psumr = psumr;
 	    temp_xr = xr;
 
@@ -328,7 +361,7 @@ int lpc_to_lsp (spx_coef_t *a,int lpcrdr,spx_lsp_t *freq,int nb,spx_word16_t del
 #else
                     xm = .5*(xl+xr);        	/* bisect the interval 	*/
 #endif
-		    psumm=cheb_poly_eva(pt,xm,lpcrdr,stack);
+		    psumm=cheb_poly_eva(pt,xm,m,stack);
 		    /*if(psumm*psuml>0.)*/
 		    if(!SIGN_CHANGE(psumm,psuml))
                     {
@@ -354,7 +387,6 @@ int lpc_to_lsp (spx_coef_t *a,int lpcrdr,spx_lsp_t *freq,int nb,spx_word16_t del
     return(roots);
 }
 
-
 /*---------------------------------------------------------------------------*\
 
 	FUNCTION....: lsp_to_lpc()
@@ -362,8 +394,7 @@ int lpc_to_lsp (spx_coef_t *a,int lpcrdr,spx_lsp_t *freq,int nb,spx_word16_t del
 	AUTHOR......: David Rowe
 	DATE CREATED: 24/2/93
 
-    lsp_to_lpc: This function converts LSP coefficients to LPC
-    coefficients.
+        Converts LSP coefficients to LPC coefficients.
 
 \*---------------------------------------------------------------------------*/
 
@@ -373,77 +404,119 @@ void lsp_to_lpc(spx_lsp_t *freq,spx_coef_t *ak,int lpcrdr, char *stack)
 /*  float *freq 	array of LSP frequencies in the x domain	*/
 /*  float *ak 		array of LPC coefficients 			*/
 /*  int lpcrdr  	order of LPC coefficients 			*/
-
-
 {
     int i,j;
-    spx_word32_t xout1,xout2,xin1,xin2;
-    VARDECL(spx_word32_t *Wp);
-    spx_word32_t *pw,*n1,*n2,*n3,*n4=NULL;
+    spx_word32_t xout1,xout2,xin;
+    spx_word32_t mult, a;
     VARDECL(spx_word16_t *freqn);
+    VARDECL(spx_word32_t **xp);
+    VARDECL(spx_word32_t *xpmem);
+    VARDECL(spx_word32_t **xq);
+    VARDECL(spx_word32_t *xqmem);
     int m = lpcrdr>>1;
+
+    /* 
+    
+       Reconstruct P(z) and Q(z) by cascading second order polynomials
+       in form 1 - 2cos(w)z(-1) + z(-2), where w is the LSP frequency.
+       In the time domain this is:
+
+       y(n) = x(n) - 2cos(w)x(n-1) + x(n-2)
     
+       This is what the ALLOCS below are trying to do:
+
+         int xp[m+1][lpcrdr+1+2]; // P matrix in QIMP
+         int xq[m+1][lpcrdr+1+2]; // Q matrix in QIMP
+
+       These matrices store the output of each stage on each row.  The
+       final (m-th) row has the output of the final (m-th) cascaded
+       2nd order filter.  The first row is the impulse input to the
+       system (not written as it is known).
+
+       The version below takes advantage of the fact that a lot of the
+       outputs are zero or known, for example if we put an inpulse
+       into the first section the "clock" it 10 times only the first 3
+       outputs samples are non-zero (it's an FIR filter).
+    */
+
+    ALLOC(xp, (m+1), spx_word32_t*);
+    ALLOC(xpmem, (m+1)*(lpcrdr+1+2), spx_word32_t);
+
+    ALLOC(xq, (m+1), spx_word32_t*);
+    ALLOC(xqmem, (m+1)*(lpcrdr+1+2), spx_word32_t);
+    
+    for(i=0; i<=m; i++) {
+      xp[i] = xpmem + i*(lpcrdr+1+2);
+      xq[i] = xqmem + i*(lpcrdr+1+2);
+    }
+
+    /* work out 2cos terms in Q14 */
+
     ALLOC(freqn, lpcrdr, spx_word16_t);
-    for (i=0;i<lpcrdr;i++)
+    for (i=0;i<lpcrdr;i++) 
        freqn[i] = ANGLE2X(freq[i]);
 
-    ALLOC(Wp, 4*m+2, spx_word32_t);
-    pw = Wp;
+    #define QIMP  21   /* scaling for impulse */
 
+    xin = SHL32(EXTEND32(1), (QIMP-1)); /* 0.5 in QIMP format */
+   
+    /* first col and last non-zero values of each row are trivial */
+    
+    for(i=0;i<=m;i++) {
+     xp[i][1] = 0;
+     xp[i][2] = xin;
+     xp[i][2+2*i] = xin;
+     xq[i][1] = 0;
+     xq[i][2] = xin;
+     xq[i][2+2*i] = xin;
+    }
 
-    /* initialise contents of array */
+    /* 2nd row (first output row) is trivial */
 
-    for(i=0;i<=4*m+1;i++){       	/* set contents of buffer to 0 */
-	*pw++ = 0;
-    }
+    xp[1][3] = -MULT16_32_Q14(freqn[0],xp[0][2]);
+    xq[1][3] = -MULT16_32_Q14(freqn[1],xq[0][2]);
 
-    /* Set pointers up */
+    xout1 = xout2 = 0;
 
-    pw = Wp;
-    xin1 = 1048576;
-    xin2 = 1048576;
+    /* now generate remaining rows */
 
-    /* reconstruct P(z) and Q(z) by  cascading second order
-      polynomials in form 1 - 2xz(-1) +z(-2), where x is the
-      LSP coefficient */
+    for(i=1;i<m;i++) {
 
-    for(j=0;j<=lpcrdr;j++){
-       spx_word16_t *fr=freqn;
-	for(i=0;i<m;i++){
-	    n1 = pw+(i<<2);
-	    n2 = n1 + 1;
-	    n3 = n2 + 1;
-	    n4 = n3 + 1;
-	    xout1 = ADD32(SUB32(xin1, MULT16_32_Q14(*fr,*n1)), *n2);
-            fr++;
-            xout2 = ADD32(SUB32(xin2, MULT16_32_Q14(*fr,*n3)), *n4);
-            fr++;
-	    *n2 = *n1;
-	    *n4 = *n3;
-	    *n1 = xin1;
-	    *n3 = xin2;
-	    xin1 = xout1;
-	    xin2 = xout2;
-	}
-	xout1 = xin1 + *(n4+1);
-	xout2 = xin2 - *(n4+2);
-        /* FIXME: perhaps apply bandwidth expansion in case of overflow? */
-	if (j>0)
-	{
-        if (xout1 + xout2>SHL32(EXTEND32(32766),8))
-           ak[j-1] = 32767;
-        else if (xout1 + xout2 < -SHL32(EXTEND32(32766),8))
-           ak[j-1] = -32767;
-        else
-           ak[j-1] = EXTRACT16(PSHR32(ADD32(xout1,xout2),8));
-	} else {/*speex_warning_int("ak[0] = ", EXTRACT16(PSHR32(ADD32(xout1,xout2),8)));*/}
-	*(n4+1) = xin1;
-	*(n4+2) = xin2;
+      for(j=1;j<2*(i+1)-1;j++) {
+	mult = MULT16_32_Q14(freqn[2*i],xp[i][j+1]);
+	xp[i+1][j+2] = ADD32(SUB32(xp[i][j+2], mult), xp[i][j]);
+	mult = MULT16_32_Q14(freqn[2*i+1],xq[i][j+1]);
+	xq[i+1][j+2] = ADD32(SUB32(xq[i][j+2], mult), xq[i][j]);
+      }
+
+      /* for last col xp[i][j+2] = xq[i][j+2] = 0 */
+
+      mult = MULT16_32_Q14(freqn[2*i],xp[i][j+1]);
+      xp[i+1][j+2] = SUB32(xp[i][j], mult);
+      mult = MULT16_32_Q14(freqn[2*i+1],xq[i][j+1]);
+      xq[i+1][j+2] = SUB32(xq[i][j], mult);
+    }
+
+    /* process last row to extra a{k} */
+
+    for(j=1;j<=lpcrdr;j++) {
+      int shift = QIMP-13;
 
-	xin1 = 0;
-	xin2 = 0;
+      /* final filter sections */
+      a = PSHR32(xp[m][j+2] + xout1 + xq[m][j+2] - xout2, shift); 
+      xout1 = xp[m][j+2];
+      xout2 = xq[m][j+2];
+      
+      /* hard limit ak's to +/- 32767 */
+
+      if (a < -32767) a = 32767;
+      if (a > 32767) a = 32767;
+      ak[j-1] = (short)a;
+     
     }
+
 }
+
 #else
 
 void lsp_to_lpc(spx_lsp_t *freq,spx_coef_t *ak,int lpcrdr, char *stack)