1 files changed, 376 insertions, 105 deletions
diff --git a/pjmedia/src/pjmedia-codec/speex/filters.c b/pjmedia/src/pjmedia-codec/speex/filters.c
index abc8d9cf..73cb3912 100644
--- a/pjmedia/src/pjmedia-codec/speex/filters.c
+++ b/pjmedia/src/pjmedia-codec/speex/filters.c
@@ -75,25 +75,35 @@ void signal_mul(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len)
    }
 }
 
-void signal_div(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len)
+void signal_div(const spx_word16_t *x, spx_word16_t *y, spx_word32_t scale, int len)
 {
    int i;
    if (scale > SHL32(EXTEND32(SIG_SCALING), 8))
    {
       spx_word16_t scale_1;
       scale = PSHR32(scale, SIG_SHIFT);
-      scale_1 = EXTRACT16(DIV32_16(SHL32(EXTEND32(SIG_SCALING),7),scale));
+      scale_1 = EXTRACT16(PDIV32_16(SHL32(EXTEND32(SIG_SCALING),7),scale));
       for (i=0;i<len;i++)
       {
-         y[i] = SHR32(MULT16_16(scale_1, EXTRACT16(SHR32(x[i],SIG_SHIFT))),7);
+         y[i] = MULT16_16_P15(scale_1, x[i]);
       }
-   } else {
+   } else if (scale > SHR32(EXTEND32(SIG_SCALING), 2)) {
       spx_word16_t scale_1;
       scale = PSHR32(scale, SIG_SHIFT-5);
       scale_1 = DIV32_16(SHL32(EXTEND32(SIG_SCALING),3),scale);
       for (i=0;i<len;i++)
       {
-         y[i] = MULT16_16(scale_1, EXTRACT16(SHR32(x[i],SIG_SHIFT-2)));
+         y[i] = PSHR32(MULT16_16(scale_1, SHL16(x[i],2)),8);
+      }
+   } else {
+      spx_word16_t scale_1;
+      scale = PSHR32(scale, SIG_SHIFT-7);
+      if (scale < 5)
+         scale = 5;
+      scale_1 = DIV32_16(SHL32(EXTEND32(SIG_SCALING),3),scale);
+      for (i=0;i<len;i++)
+      {
+         y[i] = PSHR32(MULT16_16(scale_1, SHL16(x[i],2)),6);
       }
    }
 }
@@ -160,9 +170,56 @@ spx_word16_t compute_rms(const spx_sig_t *x, int len)
       sum = ADD32(sum,SHR32(sum2,6));
    }
    
-   return EXTRACT16(SHR32(SHL32(EXTEND32(spx_sqrt(1+DIV32(sum,len))),(sig_shift+3)),SIG_SHIFT));
+   return EXTRACT16(PSHR32(SHL32(EXTEND32(spx_sqrt(DIV32(sum,len))),(sig_shift+3)),SIG_SHIFT));
 }
 
+spx_word16_t compute_rms16(const spx_word16_t *x, int len)
+{
+   int i;
+   spx_word16_t max_val=10; 
+
+   for (i=0;i<len;i++)
+   {
+      spx_sig_t tmp = x[i];
+      if (tmp<0)
+         tmp = -tmp;
+      if (tmp > max_val)
+         max_val = tmp;
+   }
+   if (max_val>16383)
+   {
+      spx_word32_t sum=0;
+      for (i=0;i<len;i+=4)
+      {
+         spx_word32_t sum2=0;
+         sum2 = MAC16_16(sum2,PSHR16(x[i],1),PSHR16(x[i],1));
+         sum2 = MAC16_16(sum2,PSHR16(x[i+1],1),PSHR16(x[i+1],1));
+         sum2 = MAC16_16(sum2,PSHR16(x[i+2],1),PSHR16(x[i+2],1));
+         sum2 = MAC16_16(sum2,PSHR16(x[i+3],1),PSHR16(x[i+3],1));
+         sum = ADD32(sum,SHR32(sum2,6));
+      }
+      return SHL16(spx_sqrt(DIV32(sum,len)),4);
+   } else {
+      spx_word32_t sum=0;
+      int sig_shift=0;
+      if (max_val < 8192)
+         sig_shift=1;
+      if (max_val < 4096)
+         sig_shift=2;
+      if (max_val < 2048)
+         sig_shift=3;
+      for (i=0;i<len;i+=4)
+      {
+         spx_word32_t sum2=0;
+         sum2 = MAC16_16(sum2,SHL16(x[i],sig_shift),SHL16(x[i],sig_shift));
+         sum2 = MAC16_16(sum2,SHL16(x[i+1],sig_shift),SHL16(x[i+1],sig_shift));
+         sum2 = MAC16_16(sum2,SHL16(x[i+2],sig_shift),SHL16(x[i+2],sig_shift));
+         sum2 = MAC16_16(sum2,SHL16(x[i+3],sig_shift),SHL16(x[i+3],sig_shift));
+         sum = ADD32(sum,SHR32(sum2,6));
+      }
+      return SHL16(spx_sqrt(DIV32(sum,len)),3-sig_shift);   
+   }
+}
 
 #ifndef OVERRIDE_NORMALIZE16
 int normalize16(const spx_sig_t *x, spx_word16_t *y, spx_sig_t max_scale, int len)
@@ -206,6 +263,10 @@ spx_word16_t compute_rms(const spx_sig_t *x, int len)
    }
    return sqrt(.1+sum/len);
 }
+spx_word16_t compute_rms16(const spx_word16_t *x, int len)
+{
+   return compute_rms(x, len);
+}
 #endif
 
 
@@ -236,6 +297,8 @@ void filter_mem2(const spx_sig_t *x, const spx_coef_t *num, const spx_coef_t *de
    int i,j;
    spx_sig_t xi,yi,nyi;
 
+   for (i=0;i<ord;i++)
+      mem[i] = SHR32(mem[i],1);   
    for (i=0;i<N;i++)
    {
       xi=SATURATE(x[i],805306368);
@@ -248,10 +311,40 @@ void filter_mem2(const spx_sig_t *x, const spx_coef_t *num, const spx_coef_t *de
       mem[ord-1] = SUB32(MULT16_32_Q15(num[ord-1],xi), MULT16_32_Q15(den[ord-1],yi));
       y[i] = yi;
    }
+   for (i=0;i<ord;i++)
+      mem[i] = SHL32(mem[i],1);   
+}
+#endif
+#endif
+
+#ifdef FIXED_POINT
+#ifndef OVERRIDE_FILTER_MEM16
+void filter_mem16(const spx_word16_t *x, const spx_coef_t *num, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack)
+{
+   int i,j;
+   spx_word16_t xi,yi,nyi;
+   for (i=0;i<N;i++)
+   {
+      xi= x[i];
+      yi = EXTRACT16(SATURATE(ADD32(EXTEND32(x[i]),PSHR32(mem[0],LPC_SHIFT)),32767));
+      nyi = NEG16(yi);
+      for (j=0;j<ord-1;j++)
+      {
+         mem[j] = MAC16_16(MAC16_16(mem[j+1], num[j],xi), den[j],nyi);
+      }
+      mem[ord-1] = ADD32(MULT16_16(num[ord-1],xi), MULT16_16(den[ord-1],nyi));
+      y[i] = yi;
+   }
 }
 #endif
+#else
+void filter_mem16(const spx_word16_t *x, const spx_coef_t *num, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack)
+{
+   filter_mem2(x, num, den, y, N, ord, mem);
+}
 #endif
 
+
 #ifndef OVERRIDE_IIR_MEM2
 #ifdef PRECISION16
 void iir_mem2(const spx_sig_t *x, const spx_coef_t *den, spx_sig_t *y, int N, int ord, spx_mem_t *mem)
@@ -277,6 +370,8 @@ void iir_mem2(const spx_sig_t *x, const spx_coef_t *den, spx_sig_t *y, int N, in
    int i,j;
    spx_word32_t xi,yi,nyi;
 
+   for (i=0;i<ord;i++)
+      mem[i] = SHR32(mem[i],1);   
    for (i=0;i<N;i++)
    {
       xi=SATURATE(x[i],805306368);
@@ -289,10 +384,40 @@ void iir_mem2(const spx_sig_t *x, const spx_coef_t *den, spx_sig_t *y, int N, in
       mem[ord-1] = MULT16_32_Q15(den[ord-1],nyi);
       y[i] = yi;
    }
+   for (i=0;i<ord;i++)
+      mem[i] = SHL32(mem[i],1);   
 }
 #endif
 #endif
 
+#ifdef FIXED_POINT
+#ifndef OVERRIDE_IIR_MEM16
+void iir_mem16(const spx_word16_t *x, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack)
+{
+   int i,j;
+   spx_word16_t yi,nyi;
+
+   for (i=0;i<N;i++)
+   {
+      yi = EXTRACT16(SATURATE(ADD32(EXTEND32(x[i]),PSHR32(mem[0],LPC_SHIFT)),32767));
+      nyi = NEG16(yi);
+      for (j=0;j<ord-1;j++)
+      {
+         mem[j] = MAC16_16(mem[j+1],den[j],nyi);
+      }
+      mem[ord-1] = MULT16_16(den[ord-1],nyi);
+      y[i] = yi;
+   }
+}
+#endif
+#else
+void iir_mem16(const spx_word16_t *x, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack)
+{
+   iir_mem2(x, den, y, N, ord, mem);
+}
+#endif
+
+
 #ifndef OVERRIDE_FIR_MEM2
 #ifdef PRECISION16
 void fir_mem2(const spx_sig_t *x, const spx_coef_t *num, spx_sig_t *y, int N, int ord, spx_mem_t *mem)
@@ -318,6 +443,8 @@ void fir_mem2(const spx_sig_t *x, const spx_coef_t *num, spx_sig_t *y, int N, in
    int i,j;
    spx_word32_t xi,yi;
 
+   for (i=0;i<ord;i++)
+      mem[i] = SHR32(mem[i],1);   
    for (i=0;i<N;i++)
    {
       xi=SATURATE(x[i],805306368);
@@ -329,11 +456,38 @@ void fir_mem2(const spx_sig_t *x, const spx_coef_t *num, spx_sig_t *y, int N, in
       mem[ord-1] = MULT16_32_Q15(num[ord-1],xi);
       y[i] = SATURATE(yi,805306368);
    }
+   for (i=0;i<ord;i++)
+      mem[i] = SHL32(mem[i],1);   
 }
 #endif
 #endif
 
+#ifdef FIXED_POINT
+#ifndef OVERRIDE_FIR_MEM16
+void fir_mem16(const spx_word16_t *x, const spx_coef_t *num, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack)
+{
+   int i,j;
+   spx_word16_t xi,yi;
 
+   for (i=0;i<N;i++)
+   {
+      xi=x[i];
+      yi = EXTRACT16(SATURATE(ADD32(EXTEND32(x[i]),PSHR32(mem[0],LPC_SHIFT)),32767));
+      for (j=0;j<ord-1;j++)
+      {
+         mem[j] = MAC16_16(mem[j+1], num[j],xi);
+      }
+      mem[ord-1] = MULT16_16(num[ord-1],xi);
+      y[i] = yi;
+   }
+}
+#endif
+#else
+void fir_mem16(const spx_word16_t *x, const spx_coef_t *num, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack)
+{
+   fir_mem2(x, num, y, N, ord, mem);
+}
+#endif
 
 
 
@@ -382,14 +536,13 @@ void compute_impulse_response(const spx_coef_t *ak, const spx_coef_t *awk1, cons
    i++;
    for (;i<N;i++)
       y[i] = VERY_SMALL;
-   
    for (i=0;i<ord;i++)
       mem1[i] = mem2[i] = 0;
    for (i=0;i<N;i++)
    {
       y1 = ADD16(y[i], EXTRACT16(PSHR32(mem1[0],LPC_SHIFT)));
       ny1i = NEG16(y1);
-      y[i] = ADD16(SHL16(y1,1), EXTRACT16(PSHR32(mem2[0],LPC_SHIFT)));
+      y[i] = PSHR32(ADD32(SHL32(EXTEND32(y1),LPC_SHIFT+1),mem2[0]),LPC_SHIFT);
       ny2i = NEG16(y[i]);
       for (j=0;j<ord-1;j++)
       {
@@ -426,12 +579,14 @@ void qmf_decomp(const spx_word16_t *xx, const spx_word16_t *aa, spx_sig_t *y1, s
       y2[k]=0;
       for (j=0;j<M2;j++)
       {
-         y1[k]=ADD32(y1[k],SHR(MULT16_16(a[j],ADD16(x[i+j],x2[i-j])),1));
-         y2[k]=SUB32(y2[k],SHR(MULT16_16(a[j],SUB16(x[i+j],x2[i-j])),1));
+         y1[k]=ADD32(y1[k],MULT16_16(a[j],ADD16(x[i+j],x2[i-j])));
+         y2[k]=SUB32(y2[k],MULT16_16(a[j],SUB16(x[i+j],x2[i-j])));
          j++;
-         y1[k]=ADD32(y1[k],SHR(MULT16_16(a[j],ADD16(x[i+j],x2[i-j])),1));
-         y2[k]=ADD32(y2[k],SHR(MULT16_16(a[j],SUB16(x[i+j],x2[i-j])),1));
+         y1[k]=ADD32(y1[k],MULT16_16(a[j],ADD16(x[i+j],x2[i-j])));
+         y2[k]=ADD32(y2[k],MULT16_16(a[j],SUB16(x[i+j],x2[i-j])));
       }
+      y1[k] = SHR32(y1[k],1);
+      y2[k] = SHR32(y2[k],1);
    }
    for (i=0;i<M-1;i++)
      mem[i]=SATURATE(PSHR(xx[N-i-1],1),16383);
@@ -450,7 +605,7 @@ void fir_mem_up(const spx_sig_t *x, const spx_word16_t *a, spx_sig_t *y, int N,
    ALLOC(xx, M+N-1, spx_word16_t);
 
    for (i = 0; i < N/2; i++)
-      xx[2*i] = SHR(x[N/2-1-i],SIG_SHIFT+1);
+      xx[2*i] = PSHR32(x[N/2-1-i],SIG_SHIFT);
    for (i = 0; i < M - 1; i += 2)
       xx[N+i] = mem[i+1];
 
@@ -469,19 +624,19 @@ void fir_mem_up(const spx_sig_t *x, const spx_word16_t *a, spx_sig_t *y, int N,
          a1 = a[j+1];
          x1 = xx[N-2+j-i];
 
-         y0 = ADD32(y0,SHR(MULT16_16(a0, x1),1));
-         y1 = ADD32(y1,SHR(MULT16_16(a1, x1),1));
-         y2 = ADD32(y2,SHR(MULT16_16(a0, x0),1));
-         y3 = ADD32(y3,SHR(MULT16_16(a1, x0),1));
+         y0 = ADD32(y0,SHR(MULT16_16(a0, x1),2));
+         y1 = ADD32(y1,SHR(MULT16_16(a1, x1),2));
+         y2 = ADD32(y2,SHR(MULT16_16(a0, x0),2));
+         y3 = ADD32(y3,SHR(MULT16_16(a1, x0),2));
 
          a0 = a[j+2];
          a1 = a[j+3];
          x0 = xx[N+j-i];
 
-         y0 = ADD32(y0,SHR(MULT16_16(a0, x0),1));
-         y1 = ADD32(y1,SHR(MULT16_16(a1, x0),1));
-         y2 = ADD32(y2,SHR(MULT16_16(a0, x1),1));
-         y3 = ADD32(y3,SHR(MULT16_16(a1, x1),1));
+         y0 = ADD32(y0,SHR(MULT16_16(a0, x0),2));
+         y1 = ADD32(y1,SHR(MULT16_16(a1, x0),2));
+         y2 = ADD32(y2,SHR(MULT16_16(a0, x1),2));
+         y3 = ADD32(y3,SHR(MULT16_16(a1, x1),2));
       }
       y[i] = y0;
       y[i+1] = y1;
@@ -493,113 +648,229 @@ void fir_mem_up(const spx_sig_t *x, const spx_word16_t *a, spx_sig_t *y, int N,
       mem[i+1] = xx[i];
 }
 
-void comb_filter_mem_init (CombFilterMem *mem)
-{
-   mem->last_pitch=0;
-   mem->last_pitch_gain[0]=mem->last_pitch_gain[1]=mem->last_pitch_gain[2]=0;
-   mem->smooth_gain=1;
-}
-
 #ifdef FIXED_POINT
-#define COMB_STEP 32767
+#if 0
+spx_word16_t shift_filt[3][7] = {{-33,    1043,   -4551,   19959,   19959,   -4551,    1043},
+                                 {-98,    1133,   -4425,   29179,    8895,   -2328,     444},
+                                 {444,   -2328,    8895,   29179,   -4425,    1133,     -98}};
 #else
-#define COMB_STEP 1.0
+spx_word16_t shift_filt[3][7] = {{-390,    1540,   -4993,   20123,   20123,   -4993,    1540},
+                                {-1064,    2817,   -6694,   31589,    6837,    -990,    -209},
+                                 {-209,    -990,    6837,   31589,   -6694,    2817,   -1064}};
+#endif
+#else
+#if 0
+float shift_filt[3][7] = {{-9.9369e-04, 3.1831e-02, -1.3889e-01, 6.0910e-01, 6.0910e-01, -1.3889e-01, 3.1831e-02},
+                          {-0.0029937, 0.0345613, -0.1350474, 0.8904793, 0.2714479, -0.0710304, 0.0135403},
+                          {0.0135403, -0.0710304, 0.2714479, 0.8904793, -0.1350474, 0.0345613,  -0.0029937}};
+#else
+float shift_filt[3][7] = {{-0.011915, 0.046995, -0.152373, 0.614108, 0.614108, -0.152373, 0.046995},
+                          {-0.0324855, 0.0859768, -0.2042986, 0.9640297, 0.2086420, -0.0302054, -0.0063646},
+                          {-0.0063646, -0.0302054, 0.2086420, 0.9640297, -0.2042986, 0.0859768, -0.0324855}};
+#endif
 #endif
 
-void comb_filter(
-spx_sig_t *exc,          /*decoded excitation*/
-spx_sig_t *new_exc,      /*enhanced excitation*/
+int interp_pitch(
+spx_word16_t *exc,          /*decoded excitation*/
+spx_word16_t *interp,          /*decoded excitation*/
+int pitch,               /*pitch period*/
+int len
+)
+{
+   int i,j,k;
+   spx_word32_t corr[4][7];
+   spx_word32_t maxcorr;
+   int maxi, maxj;
+   for (i=0;i<7;i++)
+   {
+      corr[0][i] = inner_prod(exc, exc-pitch-3+i, len);
+   }
+   for (i=0;i<3;i++)
+   {
+      for (j=0;j<7;j++)
+      {
+         int i1, i2;
+         spx_word32_t tmp=0;
+         i1 = 3-j;
+         if (i1<0)
+            i1 = 0;
+         i2 = 10-j;
+         if (i2>7)
+            i2 = 7;
+         for (k=i1;k<i2;k++)
+            tmp += MULT16_32_Q15(shift_filt[i][k],corr[0][j+k-3]);
+         corr[i+1][j] = tmp;
+      }
+   }
+   maxi=maxj=0;
+   maxcorr = corr[0][0];
+   for (i=0;i<4;i++)
+   {
+      for (j=0;j<7;j++)
+      {
+         if (corr[i][j] > maxcorr)
+         {
+            maxcorr = corr[i][j];
+            maxi=i;
+            maxj=j;
+         }
+      }
+   }
+   for (i=0;i<len;i++)
+   {
+      spx_word32_t tmp = 0;
+      if (maxi>0)
+      {
+         for (k=0;k<7;k++)
+         {
+            tmp += MULT16_16(exc[i-(pitch-maxj+3)+k-3],shift_filt[maxi-1][k]);
+         }
+      } else {
+         tmp = SHL32(exc[i-(pitch-maxj+3)],15);
+      }
+      interp[i] = PSHR32(tmp,15);
+   }
+   return pitch-maxj+3;
+}
+
+void multicomb(
+spx_word16_t *exc,          /*decoded excitation*/
+spx_word16_t *new_exc,      /*enhanced excitation*/
 spx_coef_t *ak,           /*LPC filter coefs*/
 int p,               /*LPC order*/
 int nsf,             /*sub-frame size*/
 int pitch,           /*pitch period*/
-spx_word16_t *pitch_gain,   /*pitch gain (3-tap)*/
+int max_pitch,
 spx_word16_t  comb_gain,    /*gain of comb filter*/
-CombFilterMem *mem
+char *stack
 )
 {
-   int i;
-   spx_word16_t exc_energy=0, new_exc_energy=0;
-   spx_word16_t gain;
-   spx_word16_t step;
-   spx_word16_t fact;
-
-   /*Compute excitation amplitude prior to enhancement*/
-   exc_energy = compute_rms(exc, nsf);
-   /*for (i=0;i<nsf;i++)
-     exc_energy+=((float)exc[i])*exc[i];*/
-
-   /*Some gain adjustment if pitch is too high or if unvoiced*/
-#ifdef FIXED_POINT
+   int i; 
+   VARDECL(spx_word16_t *iexc);
+   spx_word16_t old_ener, new_ener;
+   int corr_pitch;
+   
+   spx_word16_t iexc0_mag, iexc1_mag, exc_mag;
+   spx_word32_t corr0, corr1;
+   spx_word16_t gain0, gain1;
+   spx_word16_t pgain1, pgain2;
+   spx_word16_t c1, c2;
+   spx_word16_t g1, g2;
+   spx_word16_t ngain;
+   spx_word16_t gg1, gg2;
+
+#if 0 /* Set to 1 to enable full pitch search */
+   int nol_pitch[6];
+   spx_word16_t nol_pitch_coef[6];
+   spx_word16_t ol_pitch_coef;
+   open_loop_nbest_pitch(exc, 20, 120, nsf, 
+                         nol_pitch, nol_pitch_coef, 6, stack);
+   corr_pitch=nol_pitch[0];
+   ol_pitch_coef = nol_pitch_coef[0];
+   /*Try to remove pitch multiples*/
+   for (i=1;i<6;i++)
    {
-      spx_word16_t g = gain_3tap_to_1tap(pitch_gain)+gain_3tap_to_1tap(mem->last_pitch_gain);
-      if (g > 166)
-         comb_gain = MULT16_16_Q15(DIV32_16(SHL32(EXTEND32(165),15),g), comb_gain);
-      if (g < 64)
-         comb_gain = MULT16_16_Q15(SHL16(g, 9), comb_gain);
-   }
+#ifdef FIXED_POINT
+      if ((nol_pitch_coef[i]>MULT16_16_Q15(nol_pitch_coef[0],19661)) && 
 #else
-   {
-      float g=0;
-      g = GAIN_SCALING_1*.5*(gain_3tap_to_1tap(pitch_gain)+gain_3tap_to_1tap(mem->last_pitch_gain));
-      if (g>1.3)
-         comb_gain*=1.3/g;
-      if (g<.5)
-         comb_gain*=2.*g;
-   }
+      if ((nol_pitch_coef[i]>.6*nol_pitch_coef[0]) && 
 #endif
-   step = DIV32(COMB_STEP, nsf);
-   fact=0;
-
-   /*Apply pitch comb-filter (filter out noise between pitch harmonics)*/
-   for (i=0;i<nsf;i++)
-   {
-      spx_word32_t exc1, exc2;
-
-      fact = ADD16(fact,step);
-      
-      exc1 = SHL32(MULT16_32_Q15(SHL16(pitch_gain[0],7),exc[i-pitch+1]) +
-                 MULT16_32_Q15(SHL16(pitch_gain[1],7),exc[i-pitch]) +
-                 MULT16_32_Q15(SHL16(pitch_gain[2],7),exc[i-pitch-1]) , 2);
-      exc2 = SHL32(MULT16_32_Q15(SHL16(mem->last_pitch_gain[0],7),exc[i-mem->last_pitch+1]) +
-                 MULT16_32_Q15(SHL16(mem->last_pitch_gain[1],7),exc[i-mem->last_pitch]) +
-                 MULT16_32_Q15(SHL16(mem->last_pitch_gain[2],7),exc[i-mem->last_pitch-1]),2);
-
-      new_exc[i] = exc[i] + MULT16_32_Q15(comb_gain, ADD32(MULT16_32_Q15(fact,exc1), MULT16_32_Q15(SUB16(COMB_STEP,fact), exc2)));
+         (ABS(2*nol_pitch[i]-corr_pitch)<=2 || ABS(3*nol_pitch[i]-corr_pitch)<=3 || 
+         ABS(4*nol_pitch[i]-corr_pitch)<=4 || ABS(5*nol_pitch[i]-corr_pitch)<=5))
+      {
+         corr_pitch = nol_pitch[i];
+      }
    }
-
-   mem->last_pitch_gain[0] = pitch_gain[0];
-   mem->last_pitch_gain[1] = pitch_gain[1];
-   mem->last_pitch_gain[2] = pitch_gain[2];
-   mem->last_pitch = pitch;
-
-   /*Amplitude after enhancement*/
-   new_exc_energy = compute_rms(new_exc, nsf);
-
-   if (exc_energy > new_exc_energy)
-      exc_energy = new_exc_energy;
+#else
+   corr_pitch = pitch;
+#endif
+   
+   ALLOC(iexc, 2*nsf, spx_word16_t);
    
-   gain = DIV32_16(SHL32(EXTEND32(exc_energy),15),ADD16(1,new_exc_energy));
+   interp_pitch(exc, iexc, corr_pitch, 80);
+   if (corr_pitch>max_pitch)
+      interp_pitch(exc, iexc+nsf, 2*corr_pitch, 80);
+   else
+      interp_pitch(exc, iexc+nsf, -corr_pitch, 80);
 
+   /*interp_pitch(exc, iexc+2*nsf, 2*corr_pitch, 80);*/
+   
+   /*printf ("%d %d %f\n", pitch, corr_pitch, max_corr*ener_1);*/
+   iexc0_mag = spx_sqrt(1000+inner_prod(iexc,iexc,nsf));
+   iexc1_mag = spx_sqrt(1000+inner_prod(iexc+nsf,iexc+nsf,nsf));
+   exc_mag = spx_sqrt(1+inner_prod(exc,exc,nsf));
+   corr0  = inner_prod(iexc,exc,nsf);
+   if (corr0<0)
+      corr0=0;
+   corr1 = inner_prod(iexc+nsf,exc,nsf);
+   if (corr1<0)
+      corr1=0;
 #ifdef FIXED_POINT
-   if (gain < 16384)
-      gain = 16384;
-#else
-   if (gain < .5)
-      gain=.5;
+   /* Doesn't cost much to limit the ratio and it makes the rest easier */
+   if (SHL32(EXTEND32(iexc0_mag),6) < EXTEND32(exc_mag))
+      iexc0_mag = ADD16(1,PSHR16(exc_mag,6));
+   if (SHL32(EXTEND32(iexc1_mag),6) < EXTEND32(exc_mag))
+      iexc1_mag = ADD16(1,PSHR16(exc_mag,6));
 #endif
-
+   if (corr0 > MULT16_16(iexc0_mag,exc_mag))
+      pgain1 = QCONST16(1., 14);
+   else
+      pgain1 = PDIV32_16(SHL32(PDIV32(corr0, exc_mag),14),iexc0_mag);
+   if (corr1 > MULT16_16(iexc1_mag,exc_mag))
+      pgain2 = QCONST16(1., 14);
+   else
+      pgain2 = PDIV32_16(SHL32(PDIV32(corr1, exc_mag),14),iexc1_mag);
+   gg1 = PDIV32_16(SHL32(EXTEND32(exc_mag),8), iexc0_mag);
+   gg2 = PDIV32_16(SHL32(EXTEND32(exc_mag),8), iexc1_mag);
+   if (comb_gain>0)
+   {
 #ifdef FIXED_POINT
-   for (i=0;i<nsf;i++)
+      c1 = (MULT16_16_Q15(QCONST16(.4,15),comb_gain)+QCONST16(.07,15));
+      c2 = QCONST16(.5,15)+MULT16_16_Q14(QCONST16(1.72,14),(c1-QCONST16(.07,15)));
+#else
+      c1 = .4*comb_gain+.07;
+      c2 = .5+1.72*(c1-.07);
+#endif
+   } else 
    {
-      mem->smooth_gain = ADD16(MULT16_16_Q15(31457,mem->smooth_gain), MULT16_16_Q15(1311,gain));
-      new_exc[i] = MULT16_32_Q15(mem->smooth_gain, new_exc[i]);
+      c1=c2=0;
    }
+#ifdef FIXED_POINT
+   g1 = 32767 - MULT16_16_Q13(MULT16_16_Q15(c2, pgain1),pgain1);
+   g2 = 32767 - MULT16_16_Q13(MULT16_16_Q15(c2, pgain2),pgain2);
 #else
-   for (i=0;i<nsf;i++)
+   g1 = 1-c2*pgain1*pgain1;
+   g2 = 1-c2*pgain2*pgain2;
+#endif
+   if (g1<c1)
+      g1 = c1;
+   if (g2<c1)
+      g2 = c1;
+   g1 = (spx_word16_t)PDIV32_16(SHL32(EXTEND32(c1),14),(spx_word16_t)g1);
+   g2 = (spx_word16_t)PDIV32_16(SHL32(EXTEND32(c1),14),(spx_word16_t)g2);
+   if (corr_pitch>max_pitch)
    {
-      mem->smooth_gain = .96*mem->smooth_gain + .04*gain;
-      new_exc[i] *= mem->smooth_gain;
+      gain0 = MULT16_16_Q15(QCONST16(.7,15),MULT16_16_Q14(g1,gg1));
+      gain1 = MULT16_16_Q15(QCONST16(.3,15),MULT16_16_Q14(g2,gg2));
+   } else {
+      gain0 = MULT16_16_Q15(QCONST16(.6,15),MULT16_16_Q14(g1,gg1));
+      gain1 = MULT16_16_Q15(QCONST16(.6,15),MULT16_16_Q14(g2,gg2));
    }
-#endif
+   for (i=0;i<nsf;i++)
+      new_exc[i] = ADD16(exc[i], EXTRACT16(PSHR32(ADD32(MULT16_16(gain0,iexc[i]), MULT16_16(gain1,iexc[i+nsf])),8)));
+   /* FIXME: compute_rms16 is currently not quite accurate enough (but close) */
+   new_ener = compute_rms16(new_exc, nsf);
+   old_ener = compute_rms16(exc, nsf);
+   
+   if (old_ener < 1)
+      old_ener = 1;
+   if (new_ener < 1)
+      new_ener = 1;
+   if (old_ener > new_ener)
+      old_ener = new_ener;
+   ngain = PDIV32_16(SHL32(EXTEND32(old_ener),14),new_ener);
+   
+   for (i=0;i<nsf;i++)
+      new_exc[i] = MULT16_16_Q14(ngain, new_exc[i]);
 }
+