summaryrefslogtreecommitdiff
path: root/pjmedia/src/pjmedia-codec/speex/filters_bfin.h
diff options
context:
space:
mode:
Diffstat (limited to 'pjmedia/src/pjmedia-codec/speex/filters_bfin.h')
-rw-r--r--pjmedia/src/pjmedia-codec/speex/filters_bfin.h342
1 files changed, 310 insertions, 32 deletions
diff --git a/pjmedia/src/pjmedia-codec/speex/filters_bfin.h b/pjmedia/src/pjmedia-codec/speex/filters_bfin.h
index 9f7ea6a9..2180ed42 100644
--- a/pjmedia/src/pjmedia-codec/speex/filters_bfin.h
+++ b/pjmedia/src/pjmedia-codec/speex/filters_bfin.h
@@ -32,14 +32,11 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#include <stdio.h>
-
#define OVERRIDE_NORMALIZE16
int normalize16(const spx_sig_t *x, spx_word16_t *y, spx_sig_t max_scale, int len)
{
spx_sig_t max_val=1;
int sig_shift;
-
__asm__
(
"%0 = 0;\n\t"
@@ -67,18 +64,17 @@ int normalize16(const spx_sig_t *x, spx_word16_t *y, spx_sig_t max_scale, int le
(
"I0 = %0;\n\t"
"L0 = 0;\n\t"
- "I1 = %1;\n\t"
- "L1 = 0;\n\t"
+ "P1 = %1;\n\t"
"R0 = [I0++];\n\t"
- "LOOP norm_shift%= LC0 = %3 >> 1;\n\t"
+ "LOOP norm_shift%= LC0 = %3;\n\t"
"LOOP_BEGIN norm_shift%=;\n\t"
- "R1 = ASHIFT R0 by %2.L || R2 = [I0++];\n\t"
- "R3 = ASHIFT R2 by %2.L || R0 = [I0++];\n\t"
- "R3 = PACK(R3.L, R1.L);\n\t"
- "[I1++] = R3;\n\t"
+ "R1 = ASHIFT R0 by %2.L || R0 = [I0++];\n\t"
+ "W[P1++] = R1;\n\t"
"LOOP_END norm_shift%=;\n\t"
- : : "a" (x), "a" (y), "d" (-sig_shift), "a" (len)
- : "I0", "L0", "I1", "L1", "R0", "R1", "R2", "R3", "memory"
+ "R1 = ASHIFT R0 by %2.L;\n\t"
+ "W[P1++] = R1;\n\t"
+ : : "a" (x), "a" (y), "d" (-sig_shift), "a" (len-1)
+ : "I0", "L0", "P1", "R0", "R1", "memory"
);
return sig_shift;
}
@@ -103,26 +99,26 @@ void filter_mem2(const spx_sig_t *_x, const spx_coef_t *num, const spx_coef_t *d
"P0 = %3;\n\t"
"I0 = P0;\n\t"
- "B0 = P0;\n\t"
+ "B0 = P0;\n\t" /* numden */
"L0 = 0;\n\t"
- "P2 = %0;\n\t"
+ "P2 = %0;\n\t" /* Fused xy */
"I2 = P2;\n\t"
"L2 = 0;\n\t"
- "P4 = %6;\n\t"
- "P0 = %1;\n\t"
- "P1 = %2;\n\t"
+ "P4 = %6;\n\t" /* mem */
+ "P0 = %1;\n\t" /* _x */
+ "P1 = %2;\n\t" /* _y */
/* First sample */
"R1 = [P4++];\n\t"
- "R1 <<= 1;\n\t"
- "R2 = [P0++];\n\t"
+ "R1 <<= 1;\n\t" /* shift mem */
+ "R2 = [P0++];\n\t" /* load x[0] */
"R1 = R1 + R2;\n\t"
- "[P1++] = R1;\n\t"
+ "[P1++] = R1;\n\t" /* store y[0] */
"R1 <<= 2;\n\t"
"R2 <<= 2;\n\t"
- "R2 = PACK(R1.H, R2.H);\n\t"
+ "R2 = PACK(R1.H, R2.H);\n\t" /* pack x16 and y16 */
"[P2] = R2;\n\t"
/* Samples 1 to ord-1 (using memory) */
@@ -147,13 +143,13 @@ void filter_mem2(const spx_sig_t *_x, const spx_coef_t *num, const spx_coef_t *d
"LOOP_END filter_start_inner%=;\n\t"
"A0 += A1;\n\t"
"R4 = A0;\n\t"
- "R4 <<= 1;\n\t"
- "R2 = [P0++];\n\t"
+ "R4 <<= 1;\n\t" /* shift mem */
+ "R2 = [P0++];\n\t" /* load x */
"R4 = R4 + R2;\n\t"
- "[P1++] = R4;\n\t"
+ "[P1++] = R4;\n\t" /* store y */
"R4 <<= 2;\n\t"
"R2 <<= 2;\n\t"
- "R2 = PACK(R4.H, R2.H);\n\t"
+ "R2 = PACK(R4.H, R2.H);\n\t" /* pack x16 and y16 */
"[P2] = R2;\n\t"
"LOOP_END filter_start%=;\n\t"
@@ -161,14 +157,14 @@ void filter_mem2(const spx_sig_t *_x, const spx_coef_t *num, const spx_coef_t *d
/* Samples ord to N*/
"R0 = %5;\n\t"
"R0 <<= 1;\n\t"
- "I0 = B0;\n\t"
+ "I0 = B0;\n\t" /* numden */
"R0 <<= 1;\n\t"
"L0 = R0;\n\t"
- "R0 = %5;\n\t"
- "R2 = %4;\n\t"
+ "R0 = %5;\n\t" /* org */
+ "R2 = %4;\n\t" /* N */
"R2 = R2 - R0;\n\t"
- "R4 = [I0++];\n\t"
+ "R4 = [I0++];\n\t" /* numden */
"LC0 = R2;\n\t"
"P3 = R0;\n\t"
"R0 <<= 2;\n\t"
@@ -176,7 +172,7 @@ void filter_mem2(const spx_sig_t *_x, const spx_coef_t *num, const spx_coef_t *d
"I2 = P2;\n\t"
"M0 = R0;\n\t"
"A1 = A0 = 0;\n\t"
- "R5 = [I2--];\n\t"
+ "R5 = [I2--];\n\t" /* load xy */
"LOOP filter_mid%= LC0;\n\t"
"LOOP_BEGIN filter_mid%=;\n\t"
"LOOP filter_mid_inner%= LC1=P3;\n\t"
@@ -184,9 +180,9 @@ void filter_mem2(const spx_sig_t *_x, const spx_coef_t *num, const spx_coef_t *d
"A1 -= R4.H*R5.H, A0 += R4.L*R5.L (IS) || R4 = [I0++] || R5 = [I2--];\n\t"
"LOOP_END filter_mid_inner%=;\n\t"
"R0 = (A0 += A1) || I2 += M0;\n\t"
- "R0 = R0 << 1 || R5 = [P0++];\n\t"
+ "R0 = R0 << 1 || R5 = [P0++];\n\t" /* load x */
"R0 = R0 + R5;\n\t"
- "R0 = R0 << 2 || [P1++] = R0;\n\t"
+ "R0 = R0 << 2 || [P1++] = R0;\n\t" /* shift y | store y */
"R5 = R5 << 2;\n\t"
"R5 = PACK(R0.H, R5.H);\n\t"
"A1 = A0 = 0 || [I2--] = R5\n\t"
@@ -222,6 +218,150 @@ void filter_mem2(const spx_sig_t *_x, const spx_coef_t *num, const spx_coef_t *d
}
+#define OVERRIDE_FILTER_MEM16
+void filter_mem16(const spx_word16_t *_x, const spx_coef_t *num, const spx_coef_t *den, spx_word16_t *_y, int N, int ord, spx_mem_t *mem, char *stack)
+{
+ VARDECL(spx_word32_t *xy2);
+ VARDECL(spx_word32_t *numden_a);
+ spx_word32_t *xy;
+ spx_word16_t *numden;
+ int i;
+
+ ALLOC(xy2, (N+1), spx_word32_t);
+ ALLOC(numden_a, (2*ord+2), spx_word32_t);
+ xy = xy2+1;
+ numden = (spx_word16_t*) numden_a;
+
+ for (i=0;i<ord;i++)
+ {
+ numden[2*i] = num[i];
+ numden[2*i+1] = den[i];
+ }
+ __asm__ __volatile__
+ (
+ /* Register setup */
+ "R0 = %5;\n\t" /*ord */
+
+ "P0 = %3;\n\t"
+ "I0 = P0;\n\t"
+ "B0 = P0;\n\t" /* numden */
+ "L0 = 0;\n\t"
+
+ "P2 = %0;\n\t" /* Fused xy */
+ "I2 = P2;\n\t"
+ "L2 = 0;\n\t"
+
+ "P4 = %6;\n\t" /* mem */
+ "P0 = %1;\n\t" /* _x */
+ "P1 = %2;\n\t" /* _y */
+
+ /* First sample */
+ "R1 = [P4++];\n\t"
+ "R1 <<= 3;\n\t" /* shift mem */
+ "R1.L = R1 (RND);\n\t"
+ "R2 = W[P0++];\n\t" /* load x[0] */
+ "R1.L = R1.L + R2.L;\n\t"
+ "W[P1++] = R1;\n\t" /* store y[0] */
+ "R2 = PACK(R1.L, R2.L);\n\t" /* pack x16 and y16 */
+ "[P2] = R2;\n\t"
+
+ /* Samples 1 to ord-1 (using memory) */
+ "R0 += -1;\n\t"
+ "R3 = 0;\n\t"
+ "LC0 = R0;\n\t"
+ "LOOP filter_start%= LC0;\n\t"
+ "LOOP_BEGIN filter_start%=;\n\t"
+ "R3 += 1;\n\t"
+ "LC1 = R3;\n\t"
+
+ "R1 = [P4++];\n\t"
+ "A1 = R1;\n\t"
+ "A0 = 0;\n\t"
+ "I0 = B0;\n\t"
+ "I2 = P2;\n\t"
+ "P2 += 4;\n\t"
+ "R4 = [I0++] || R5 = [I2--];\n\t"
+ "LOOP filter_start_inner%= LC1;\n\t"
+ "LOOP_BEGIN filter_start_inner%=;\n\t"
+ "A1 -= R4.H*R5.H, A0 += R4.L*R5.L (IS) || R4 = [I0++] || R5 = [I2--];\n\t"
+ "LOOP_END filter_start_inner%=;\n\t"
+ "A0 += A1;\n\t"
+ "R4 = A0;\n\t"
+ "R4 <<= 3;\n\t" /* shift mem */
+ "R4.L = R4 (RND);\n\t"
+ "R2 = W[P0++];\n\t" /* load x */
+ "R4.L = R4.L + R2.L;\n\t"
+ "W[P1++] = R4;\n\t" /* store y */
+ //"R4 <<= 2;\n\t"
+ //"R2 <<= 2;\n\t"
+ "R2 = PACK(R4.L, R2.L);\n\t" /* pack x16 and y16 */
+ "[P2] = R2;\n\t"
+
+ "LOOP_END filter_start%=;\n\t"
+
+ /* Samples ord to N*/
+ "R0 = %5;\n\t"
+ "R0 <<= 1;\n\t"
+ "I0 = B0;\n\t" /* numden */
+ "R0 <<= 1;\n\t"
+ "L0 = R0;\n\t"
+
+ "R0 = %5;\n\t" /* org */
+ "R2 = %4;\n\t" /* N */
+ "R2 = R2 - R0;\n\t"
+ "R4 = [I0++];\n\t" /* numden */
+ "LC0 = R2;\n\t"
+ "P3 = R0;\n\t"
+ "R0 <<= 2;\n\t"
+ "R0 += 8;\n\t"
+ "I2 = P2;\n\t"
+ "M0 = R0;\n\t"
+ "A1 = A0 = 0;\n\t"
+ "R5 = [I2--];\n\t" /* load xy */
+ "LOOP filter_mid%= LC0;\n\t"
+ "LOOP_BEGIN filter_mid%=;\n\t"
+ "LOOP filter_mid_inner%= LC1=P3;\n\t"
+ "LOOP_BEGIN filter_mid_inner%=;\n\t"
+ "A1 -= R4.H*R5.H, A0 += R4.L*R5.L (IS) || R4 = [I0++] || R5 = [I2--];\n\t"
+ "LOOP_END filter_mid_inner%=;\n\t"
+ "R0 = (A0 += A1) || I2 += M0;\n\t"
+ "R0 = R0 << 3 || R5 = W[P0++];\n\t" /* load x */
+ "R0.L = R0 (RND);\n\t"
+ "R0.L = R0.L + R5.L;\n\t"
+ "R5 = PACK(R0.L, R5.L) || W[P1++] = R0;\n\t" /* shift y | store y */
+ "A1 = A0 = 0 || [I2--] = R5\n\t"
+ "LOOP_END filter_mid%=;\n\t"
+ "I2 += 4;\n\t"
+ "P2 = I2;\n\t"
+ /* Update memory */
+ "P4 = %6;\n\t"
+ "R0 = %5;\n\t"
+ "LC0 = R0;\n\t"
+ "P0 = B0;\n\t"
+ "A1 = A0 = 0;\n\t"
+ "LOOP mem_update%= LC0;\n\t"
+ "LOOP_BEGIN mem_update%=;\n\t"
+ "I2 = P2;\n\t"
+ "I0 = P0;\n\t"
+ "P0 += 4;\n\t"
+ "R0 = LC0;\n\t"
+ "LC1 = R0;\n\t"
+ "R5 = [I2--] || R4 = [I0++];\n\t"
+ "LOOP mem_accum%= LC1;\n\t"
+ "LOOP_BEGIN mem_accum%=;\n\t"
+ "A1 -= R4.H*R5.H, A0 += R4.L*R5.L (IS) || R4 = [I0++] || R5 = [I2--];\n\t"
+ "LOOP_END mem_accum%=;\n\t"
+ "R0 = (A0 += A1);\n\t"
+ "A1 = A0 = 0 || [P4++] = R0;\n\t"
+ "LOOP_END mem_update%=;\n\t"
+ "L0 = 0;\n\t"
+ : : "m" (xy), "m" (_x), "m" (_y), "m" (numden), "m" (N), "m" (ord), "m" (mem)
+ : "A0", "A1", "R0", "R1", "R2", "R3", "R4", "R5", "P0", "P1", "P2", "P3", "P4", "B0", "I0", "I2", "L0", "L2", "M0", "memory"
+ );
+
+}
+
+
#define OVERRIDE_IIR_MEM2
@@ -346,6 +486,132 @@ void iir_mem2(const spx_sig_t *_x, const spx_coef_t *den, spx_sig_t *_y, int N,
}
+
+#define OVERRIDE_IIR_MEM16
+void iir_mem16(const spx_word16_t *_x, const spx_coef_t *den, spx_word16_t *_y, int N, int ord, spx_mem_t *mem, char *stack)
+{
+ VARDECL(spx_word16_t *y);
+ spx_word16_t *yy;
+
+ ALLOC(y, (N+2), spx_word16_t);
+ yy = y+2;
+
+ __asm__ __volatile__
+ (
+ /* Register setup */
+ "R0 = %5;\n\t" /*ord */
+
+ "P1 = %3;\n\t"
+ "I1 = P1;\n\t"
+ "B1 = P1;\n\t"
+ "L1 = 0;\n\t"
+
+ "P3 = %0;\n\t"
+ "I3 = P3;\n\t"
+ "L3 = 0;\n\t"
+
+ "P4 = %6;\n\t"
+ "P0 = %1;\n\t"
+ "P1 = %2;\n\t"
+
+ /* First sample */
+ "R1 = [P4++];\n\t"
+ "R1 = R1 << 3 (S);\n\t"
+ "R1.L = R1 (RND);\n\t"
+ "R2 = W[P0++];\n\t"
+ "R1 = R1 + R2;\n\t"
+ "W[P1++] = R1;\n\t"
+ "W[P3] = R1;\n\t"
+
+ /* Samples 1 to ord-1 (using memory) */
+ "R0 += -1;\n\t"
+ "R3 = 0;\n\t"
+ "LC0 = R0;\n\t"
+ "LOOP filter_start%= LC0;\n\t"
+ "LOOP_BEGIN filter_start%=;\n\t"
+ "R3 += 1;\n\t"
+ "LC1 = R3;\n\t"
+
+ "R1 = [P4++];\n\t"
+ "A1 = R1;\n\t"
+ "I1 = B1;\n\t"
+ "I3 = P3;\n\t"
+ "P3 += 2;\n\t"
+ "LOOP filter_start_inner%= LC1;\n\t"
+ "LOOP_BEGIN filter_start_inner%=;\n\t"
+ "R4.L = W[I1++];\n\t"
+ "R5.L = W[I3--];\n\t"
+ "A1 -= R4.L*R5.L (IS);\n\t"
+ "LOOP_END filter_start_inner%=;\n\t"
+
+ "R1 = A1;\n\t"
+ "R1 <<= 3;\n\t"
+ "R1.L = R1 (RND);\n\t"
+ "R2 = W[P0++];\n\t"
+ "R1 = R1 + R2;\n\t"
+ "W[P1++] = R1;\n\t"
+ "W[P3] = R1;\n\t"
+ "LOOP_END filter_start%=;\n\t"
+
+ /* Samples ord to N*/
+ "R0 = %5;\n\t"
+ "R0 <<= 1;\n\t"
+ "I1 = B1;\n\t"
+ "L1 = R0;\n\t"
+
+ "R0 = %5;\n\t"
+ "R2 = %4;\n\t"
+ "R2 = R2 - R0;\n\t"
+ "R4.L = W[I1++];\n\t"
+ "LC0 = R2;\n\t"
+ "LOOP filter_mid%= LC0;\n\t"
+ "LOOP_BEGIN filter_mid%=;\n\t"
+ "LC1 = R0;\n\t"
+ "A1 = 0;\n\t"
+ "I3 = P3;\n\t"
+ "P3 += 2;\n\t"
+ "R5.L = W[I3--];\n\t"
+ "LOOP filter_mid_inner%= LC1;\n\t"
+ "LOOP_BEGIN filter_mid_inner%=;\n\t"
+ "A1 -= R4.L*R5.L (IS) || R4.L = W[I1++] || R5.L = W[I3--];\n\t"
+ "LOOP_END filter_mid_inner%=;\n\t"
+ "R1 = A1;\n\t"
+ "R1 = R1 << 3 || R2 = W[P0++];\n\t"
+ "R1.L = R1 (RND);\n\t"
+ "R1 = R1 + R2;\n\t"
+ "W[P1++] = R1;\n\t"
+ "W[P3] = R1;\n\t"
+ "LOOP_END filter_mid%=;\n\t"
+
+ /* Update memory */
+ "P4 = %6;\n\t"
+ "R0 = %5;\n\t"
+ "LC0 = R0;\n\t"
+ "P1 = B1;\n\t"
+ "LOOP mem_update%= LC0;\n\t"
+ "LOOP_BEGIN mem_update%=;\n\t"
+ "A0 = 0;\n\t"
+ "I3 = P3;\n\t"
+ "I1 = P1;\n\t"
+ "P1 += 2;\n\t"
+ "R0 = LC0;\n\t"
+ "LC1=R0;\n\t"
+ "R5.L = W[I3--] || R4.L = W[I1++];\n\t"
+ "LOOP mem_accum%= LC1;\n\t"
+ "LOOP_BEGIN mem_accum%=;\n\t"
+ "A0 -= R4.L*R5.L (IS) || R4.L = W[I1++] || R5.L = W[I3--];\n\t"
+ "LOOP_END mem_accum%=;\n\t"
+ "R0 = A0;\n\t"
+ "[P4++] = R0;\n\t"
+ "LOOP_END mem_update%=;\n\t"
+ "L1 = 0;\n\t"
+ : : "m" (yy), "m" (_x), "m" (_y), "m" (den), "m" (N), "m" (ord), "m" (mem)
+ : "A0", "A1", "R0", "R1", "R2", "R3", "R4", "R5", "P0", "P1", "P2", "P3", "P4", "B1", "I1", "I3", "L1", "L3", "memory"
+ );
+
+}
+
+
#define OVERRIDE_FIR_MEM2
void fir_mem2(const spx_sig_t *x, const spx_coef_t *num, spx_sig_t *y, int N, int ord, spx_mem_t *mem)
{
@@ -358,6 +624,18 @@ void fir_mem2(const spx_sig_t *x, const spx_coef_t *num, spx_sig_t *y, int N, in
filter_mem2(x, num, den, y, N, ord, mem);
}
+#define OVERRIDE_FIR_MEM16
+void fir_mem16(const spx_word16_t *x, const spx_coef_t *num, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack)
+{
+ int i;
+ spx_coef_t den2[12];
+ spx_coef_t *den;
+ den = (spx_coef_t*)((((int)den2)+4)&0xfffffffc);
+ for (i=0;i<10;i++)
+ den[i] = 0;
+ filter_mem16(x, num, den, y, N, ord, mem, stack);
+}
+
#define OVERRIDE_COMPUTE_IMPULSE_RESPONSE
void compute_impulse_response(const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_word16_t *y, int N, int ord, char *stack)