diff options
Diffstat (limited to 'pjmedia/src/pjmedia-codec/speex/filters_bfin.h')
-rw-r--r-- | pjmedia/src/pjmedia-codec/speex/filters_bfin.h | 342 |
1 files changed, 310 insertions, 32 deletions
diff --git a/pjmedia/src/pjmedia-codec/speex/filters_bfin.h b/pjmedia/src/pjmedia-codec/speex/filters_bfin.h index 9f7ea6a9..2180ed42 100644 --- a/pjmedia/src/pjmedia-codec/speex/filters_bfin.h +++ b/pjmedia/src/pjmedia-codec/speex/filters_bfin.h @@ -32,14 +32,11 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include <stdio.h> - #define OVERRIDE_NORMALIZE16 int normalize16(const spx_sig_t *x, spx_word16_t *y, spx_sig_t max_scale, int len) { spx_sig_t max_val=1; int sig_shift; - __asm__ ( "%0 = 0;\n\t" @@ -67,18 +64,17 @@ int normalize16(const spx_sig_t *x, spx_word16_t *y, spx_sig_t max_scale, int le ( "I0 = %0;\n\t" "L0 = 0;\n\t" - "I1 = %1;\n\t" - "L1 = 0;\n\t" + "P1 = %1;\n\t" "R0 = [I0++];\n\t" - "LOOP norm_shift%= LC0 = %3 >> 1;\n\t" + "LOOP norm_shift%= LC0 = %3;\n\t" "LOOP_BEGIN norm_shift%=;\n\t" - "R1 = ASHIFT R0 by %2.L || R2 = [I0++];\n\t" - "R3 = ASHIFT R2 by %2.L || R0 = [I0++];\n\t" - "R3 = PACK(R3.L, R1.L);\n\t" - "[I1++] = R3;\n\t" + "R1 = ASHIFT R0 by %2.L || R0 = [I0++];\n\t" + "W[P1++] = R1;\n\t" "LOOP_END norm_shift%=;\n\t" - : : "a" (x), "a" (y), "d" (-sig_shift), "a" (len) - : "I0", "L0", "I1", "L1", "R0", "R1", "R2", "R3", "memory" + "R1 = ASHIFT R0 by %2.L;\n\t" + "W[P1++] = R1;\n\t" + : : "a" (x), "a" (y), "d" (-sig_shift), "a" (len-1) + : "I0", "L0", "P1", "R0", "R1", "memory" ); return sig_shift; } @@ -103,26 +99,26 @@ void filter_mem2(const spx_sig_t *_x, const spx_coef_t *num, const spx_coef_t *d "P0 = %3;\n\t" "I0 = P0;\n\t" - "B0 = P0;\n\t" + "B0 = P0;\n\t" /* numden */ "L0 = 0;\n\t" - "P2 = %0;\n\t" + "P2 = %0;\n\t" /* Fused xy */ "I2 = P2;\n\t" "L2 = 0;\n\t" - "P4 = %6;\n\t" - "P0 = %1;\n\t" - "P1 = %2;\n\t" + "P4 = %6;\n\t" /* mem */ + "P0 = %1;\n\t" /* _x */ + "P1 = %2;\n\t" /* _y */ /* First sample */ "R1 = [P4++];\n\t" - "R1 <<= 1;\n\t" - "R2 = [P0++];\n\t" + "R1 <<= 1;\n\t" /* shift mem */ + "R2 = [P0++];\n\t" /* load x[0] */ "R1 = R1 + R2;\n\t" - "[P1++] = R1;\n\t" + "[P1++] = R1;\n\t" /* store y[0] */ "R1 <<= 2;\n\t" "R2 <<= 2;\n\t" - "R2 = PACK(R1.H, R2.H);\n\t" + "R2 = PACK(R1.H, R2.H);\n\t" /* pack x16 and y16 */ "[P2] = R2;\n\t" /* Samples 1 to ord-1 (using memory) */ @@ -147,13 +143,13 @@ void filter_mem2(const spx_sig_t *_x, const spx_coef_t *num, const spx_coef_t *d "LOOP_END filter_start_inner%=;\n\t" "A0 += A1;\n\t" "R4 = A0;\n\t" - "R4 <<= 1;\n\t" - "R2 = [P0++];\n\t" + "R4 <<= 1;\n\t" /* shift mem */ + "R2 = [P0++];\n\t" /* load x */ "R4 = R4 + R2;\n\t" - "[P1++] = R4;\n\t" + "[P1++] = R4;\n\t" /* store y */ "R4 <<= 2;\n\t" "R2 <<= 2;\n\t" - "R2 = PACK(R4.H, R2.H);\n\t" + "R2 = PACK(R4.H, R2.H);\n\t" /* pack x16 and y16 */ "[P2] = R2;\n\t" "LOOP_END filter_start%=;\n\t" @@ -161,14 +157,14 @@ void filter_mem2(const spx_sig_t *_x, const spx_coef_t *num, const spx_coef_t *d /* Samples ord to N*/ "R0 = %5;\n\t" "R0 <<= 1;\n\t" - "I0 = B0;\n\t" + "I0 = B0;\n\t" /* numden */ "R0 <<= 1;\n\t" "L0 = R0;\n\t" - "R0 = %5;\n\t" - "R2 = %4;\n\t" + "R0 = %5;\n\t" /* org */ + "R2 = %4;\n\t" /* N */ "R2 = R2 - R0;\n\t" - "R4 = [I0++];\n\t" + "R4 = [I0++];\n\t" /* numden */ "LC0 = R2;\n\t" "P3 = R0;\n\t" "R0 <<= 2;\n\t" @@ -176,7 +172,7 @@ void filter_mem2(const spx_sig_t *_x, const spx_coef_t *num, const spx_coef_t *d "I2 = P2;\n\t" "M0 = R0;\n\t" "A1 = A0 = 0;\n\t" - "R5 = [I2--];\n\t" + "R5 = [I2--];\n\t" /* load xy */ "LOOP filter_mid%= LC0;\n\t" "LOOP_BEGIN filter_mid%=;\n\t" "LOOP filter_mid_inner%= LC1=P3;\n\t" @@ -184,9 +180,9 @@ void filter_mem2(const spx_sig_t *_x, const spx_coef_t *num, const spx_coef_t *d "A1 -= R4.H*R5.H, A0 += R4.L*R5.L (IS) || R4 = [I0++] || R5 = [I2--];\n\t" "LOOP_END filter_mid_inner%=;\n\t" "R0 = (A0 += A1) || I2 += M0;\n\t" - "R0 = R0 << 1 || R5 = [P0++];\n\t" + "R0 = R0 << 1 || R5 = [P0++];\n\t" /* load x */ "R0 = R0 + R5;\n\t" - "R0 = R0 << 2 || [P1++] = R0;\n\t" + "R0 = R0 << 2 || [P1++] = R0;\n\t" /* shift y | store y */ "R5 = R5 << 2;\n\t" "R5 = PACK(R0.H, R5.H);\n\t" "A1 = A0 = 0 || [I2--] = R5\n\t" @@ -222,6 +218,150 @@ void filter_mem2(const spx_sig_t *_x, const spx_coef_t *num, const spx_coef_t *d } +#define OVERRIDE_FILTER_MEM16 +void filter_mem16(const spx_word16_t *_x, const spx_coef_t *num, const spx_coef_t *den, spx_word16_t *_y, int N, int ord, spx_mem_t *mem, char *stack) +{ + VARDECL(spx_word32_t *xy2); + VARDECL(spx_word32_t *numden_a); + spx_word32_t *xy; + spx_word16_t *numden; + int i; + + ALLOC(xy2, (N+1), spx_word32_t); + ALLOC(numden_a, (2*ord+2), spx_word32_t); + xy = xy2+1; + numden = (spx_word16_t*) numden_a; + + for (i=0;i<ord;i++) + { + numden[2*i] = num[i]; + numden[2*i+1] = den[i]; + } + __asm__ __volatile__ + ( + /* Register setup */ + "R0 = %5;\n\t" /*ord */ + + "P0 = %3;\n\t" + "I0 = P0;\n\t" + "B0 = P0;\n\t" /* numden */ + "L0 = 0;\n\t" + + "P2 = %0;\n\t" /* Fused xy */ + "I2 = P2;\n\t" + "L2 = 0;\n\t" + + "P4 = %6;\n\t" /* mem */ + "P0 = %1;\n\t" /* _x */ + "P1 = %2;\n\t" /* _y */ + + /* First sample */ + "R1 = [P4++];\n\t" + "R1 <<= 3;\n\t" /* shift mem */ + "R1.L = R1 (RND);\n\t" + "R2 = W[P0++];\n\t" /* load x[0] */ + "R1.L = R1.L + R2.L;\n\t" + "W[P1++] = R1;\n\t" /* store y[0] */ + "R2 = PACK(R1.L, R2.L);\n\t" /* pack x16 and y16 */ + "[P2] = R2;\n\t" + + /* Samples 1 to ord-1 (using memory) */ + "R0 += -1;\n\t" + "R3 = 0;\n\t" + "LC0 = R0;\n\t" + "LOOP filter_start%= LC0;\n\t" + "LOOP_BEGIN filter_start%=;\n\t" + "R3 += 1;\n\t" + "LC1 = R3;\n\t" + + "R1 = [P4++];\n\t" + "A1 = R1;\n\t" + "A0 = 0;\n\t" + "I0 = B0;\n\t" + "I2 = P2;\n\t" + "P2 += 4;\n\t" + "R4 = [I0++] || R5 = [I2--];\n\t" + "LOOP filter_start_inner%= LC1;\n\t" + "LOOP_BEGIN filter_start_inner%=;\n\t" + "A1 -= R4.H*R5.H, A0 += R4.L*R5.L (IS) || R4 = [I0++] || R5 = [I2--];\n\t" + "LOOP_END filter_start_inner%=;\n\t" + "A0 += A1;\n\t" + "R4 = A0;\n\t" + "R4 <<= 3;\n\t" /* shift mem */ + "R4.L = R4 (RND);\n\t" + "R2 = W[P0++];\n\t" /* load x */ + "R4.L = R4.L + R2.L;\n\t" + "W[P1++] = R4;\n\t" /* store y */ + //"R4 <<= 2;\n\t" + //"R2 <<= 2;\n\t" + "R2 = PACK(R4.L, R2.L);\n\t" /* pack x16 and y16 */ + "[P2] = R2;\n\t" + + "LOOP_END filter_start%=;\n\t" + + /* Samples ord to N*/ + "R0 = %5;\n\t" + "R0 <<= 1;\n\t" + "I0 = B0;\n\t" /* numden */ + "R0 <<= 1;\n\t" + "L0 = R0;\n\t" + + "R0 = %5;\n\t" /* org */ + "R2 = %4;\n\t" /* N */ + "R2 = R2 - R0;\n\t" + "R4 = [I0++];\n\t" /* numden */ + "LC0 = R2;\n\t" + "P3 = R0;\n\t" + "R0 <<= 2;\n\t" + "R0 += 8;\n\t" + "I2 = P2;\n\t" + "M0 = R0;\n\t" + "A1 = A0 = 0;\n\t" + "R5 = [I2--];\n\t" /* load xy */ + "LOOP filter_mid%= LC0;\n\t" + "LOOP_BEGIN filter_mid%=;\n\t" + "LOOP filter_mid_inner%= LC1=P3;\n\t" + "LOOP_BEGIN filter_mid_inner%=;\n\t" + "A1 -= R4.H*R5.H, A0 += R4.L*R5.L (IS) || R4 = [I0++] || R5 = [I2--];\n\t" + "LOOP_END filter_mid_inner%=;\n\t" + "R0 = (A0 += A1) || I2 += M0;\n\t" + "R0 = R0 << 3 || R5 = W[P0++];\n\t" /* load x */ + "R0.L = R0 (RND);\n\t" + "R0.L = R0.L + R5.L;\n\t" + "R5 = PACK(R0.L, R5.L) || W[P1++] = R0;\n\t" /* shift y | store y */ + "A1 = A0 = 0 || [I2--] = R5\n\t" + "LOOP_END filter_mid%=;\n\t" + "I2 += 4;\n\t" + "P2 = I2;\n\t" + /* Update memory */ + "P4 = %6;\n\t" + "R0 = %5;\n\t" + "LC0 = R0;\n\t" + "P0 = B0;\n\t" + "A1 = A0 = 0;\n\t" + "LOOP mem_update%= LC0;\n\t" + "LOOP_BEGIN mem_update%=;\n\t" + "I2 = P2;\n\t" + "I0 = P0;\n\t" + "P0 += 4;\n\t" + "R0 = LC0;\n\t" + "LC1 = R0;\n\t" + "R5 = [I2--] || R4 = [I0++];\n\t" + "LOOP mem_accum%= LC1;\n\t" + "LOOP_BEGIN mem_accum%=;\n\t" + "A1 -= R4.H*R5.H, A0 += R4.L*R5.L (IS) || R4 = [I0++] || R5 = [I2--];\n\t" + "LOOP_END mem_accum%=;\n\t" + "R0 = (A0 += A1);\n\t" + "A1 = A0 = 0 || [P4++] = R0;\n\t" + "LOOP_END mem_update%=;\n\t" + "L0 = 0;\n\t" + : : "m" (xy), "m" (_x), "m" (_y), "m" (numden), "m" (N), "m" (ord), "m" (mem) + : "A0", "A1", "R0", "R1", "R2", "R3", "R4", "R5", "P0", "P1", "P2", "P3", "P4", "B0", "I0", "I2", "L0", "L2", "M0", "memory" + ); + +} + + #define OVERRIDE_IIR_MEM2 @@ -346,6 +486,132 @@ void iir_mem2(const spx_sig_t *_x, const spx_coef_t *den, spx_sig_t *_y, int N, } + +#define OVERRIDE_IIR_MEM16 +void iir_mem16(const spx_word16_t *_x, const spx_coef_t *den, spx_word16_t *_y, int N, int ord, spx_mem_t *mem, char *stack) +{ + VARDECL(spx_word16_t *y); + spx_word16_t *yy; + + ALLOC(y, (N+2), spx_word16_t); + yy = y+2; + + __asm__ __volatile__ + ( + /* Register setup */ + "R0 = %5;\n\t" /*ord */ + + "P1 = %3;\n\t" + "I1 = P1;\n\t" + "B1 = P1;\n\t" + "L1 = 0;\n\t" + + "P3 = %0;\n\t" + "I3 = P3;\n\t" + "L3 = 0;\n\t" + + "P4 = %6;\n\t" + "P0 = %1;\n\t" + "P1 = %2;\n\t" + + /* First sample */ + "R1 = [P4++];\n\t" + "R1 = R1 << 3 (S);\n\t" + "R1.L = R1 (RND);\n\t" + "R2 = W[P0++];\n\t" + "R1 = R1 + R2;\n\t" + "W[P1++] = R1;\n\t" + "W[P3] = R1;\n\t" + + /* Samples 1 to ord-1 (using memory) */ + "R0 += -1;\n\t" + "R3 = 0;\n\t" + "LC0 = R0;\n\t" + "LOOP filter_start%= LC0;\n\t" + "LOOP_BEGIN filter_start%=;\n\t" + "R3 += 1;\n\t" + "LC1 = R3;\n\t" + + "R1 = [P4++];\n\t" + "A1 = R1;\n\t" + "I1 = B1;\n\t" + "I3 = P3;\n\t" + "P3 += 2;\n\t" + "LOOP filter_start_inner%= LC1;\n\t" + "LOOP_BEGIN filter_start_inner%=;\n\t" + "R4.L = W[I1++];\n\t" + "R5.L = W[I3--];\n\t" + "A1 -= R4.L*R5.L (IS);\n\t" + "LOOP_END filter_start_inner%=;\n\t" + + "R1 = A1;\n\t" + "R1 <<= 3;\n\t" + "R1.L = R1 (RND);\n\t" + "R2 = W[P0++];\n\t" + "R1 = R1 + R2;\n\t" + "W[P1++] = R1;\n\t" + "W[P3] = R1;\n\t" + "LOOP_END filter_start%=;\n\t" + + /* Samples ord to N*/ + "R0 = %5;\n\t" + "R0 <<= 1;\n\t" + "I1 = B1;\n\t" + "L1 = R0;\n\t" + + "R0 = %5;\n\t" + "R2 = %4;\n\t" + "R2 = R2 - R0;\n\t" + "R4.L = W[I1++];\n\t" + "LC0 = R2;\n\t" + "LOOP filter_mid%= LC0;\n\t" + "LOOP_BEGIN filter_mid%=;\n\t" + "LC1 = R0;\n\t" + "A1 = 0;\n\t" + "I3 = P3;\n\t" + "P3 += 2;\n\t" + "R5.L = W[I3--];\n\t" + "LOOP filter_mid_inner%= LC1;\n\t" + "LOOP_BEGIN filter_mid_inner%=;\n\t" + "A1 -= R4.L*R5.L (IS) || R4.L = W[I1++] || R5.L = W[I3--];\n\t" + "LOOP_END filter_mid_inner%=;\n\t" + "R1 = A1;\n\t" + "R1 = R1 << 3 || R2 = W[P0++];\n\t" + "R1.L = R1 (RND);\n\t" + "R1 = R1 + R2;\n\t" + "W[P1++] = R1;\n\t" + "W[P3] = R1;\n\t" + "LOOP_END filter_mid%=;\n\t" + + /* Update memory */ + "P4 = %6;\n\t" + "R0 = %5;\n\t" + "LC0 = R0;\n\t" + "P1 = B1;\n\t" + "LOOP mem_update%= LC0;\n\t" + "LOOP_BEGIN mem_update%=;\n\t" + "A0 = 0;\n\t" + "I3 = P3;\n\t" + "I1 = P1;\n\t" + "P1 += 2;\n\t" + "R0 = LC0;\n\t" + "LC1=R0;\n\t" + "R5.L = W[I3--] || R4.L = W[I1++];\n\t" + "LOOP mem_accum%= LC1;\n\t" + "LOOP_BEGIN mem_accum%=;\n\t" + "A0 -= R4.L*R5.L (IS) || R4.L = W[I1++] || R5.L = W[I3--];\n\t" + "LOOP_END mem_accum%=;\n\t" + "R0 = A0;\n\t" + "[P4++] = R0;\n\t" + "LOOP_END mem_update%=;\n\t" + "L1 = 0;\n\t" + : : "m" (yy), "m" (_x), "m" (_y), "m" (den), "m" (N), "m" (ord), "m" (mem) + : "A0", "A1", "R0", "R1", "R2", "R3", "R4", "R5", "P0", "P1", "P2", "P3", "P4", "B1", "I1", "I3", "L1", "L3", "memory" + ); + +} + + #define OVERRIDE_FIR_MEM2 void fir_mem2(const spx_sig_t *x, const spx_coef_t *num, spx_sig_t *y, int N, int ord, spx_mem_t *mem) { @@ -358,6 +624,18 @@ void fir_mem2(const spx_sig_t *x, const spx_coef_t *num, spx_sig_t *y, int N, in filter_mem2(x, num, den, y, N, ord, mem); } +#define OVERRIDE_FIR_MEM16 +void fir_mem16(const spx_word16_t *x, const spx_coef_t *num, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack) +{ + int i; + spx_coef_t den2[12]; + spx_coef_t *den; + den = (spx_coef_t*)((((int)den2)+4)&0xfffffffc); + for (i=0;i<10;i++) + den[i] = 0; + filter_mem16(x, num, den, y, N, ord, mem, stack); +} + #define OVERRIDE_COMPUTE_IMPULSE_RESPONSE void compute_impulse_response(const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_word16_t *y, int N, int ord, char *stack) |