diff options
author | Kevin P. Fleming <kpfleming@digium.com> | 2008-05-21 15:11:48 +0000 |
---|---|---|
committer | Kevin P. Fleming <kpfleming@digium.com> | 2008-05-21 15:11:48 +0000 |
commit | 802b567e6c7ba7803a950324cbed13f7d57944cb (patch) | |
tree | 6b90ca3119aaa2e4073d3b651ac965dea5d3430e /drivers/dahdi/arith.h | |
parent | ec5ce88e015b41c2f46f6c9b783339b945f9502a (diff) |
start copying kernel bits
git-svn-id: http://svn.asterisk.org/svn/dahdi/linux/trunk@4315 a0bf4364-ded3-4de4-8d8a-66a801d63aff
Diffstat (limited to 'drivers/dahdi/arith.h')
-rw-r--r-- | drivers/dahdi/arith.h | 364 |
1 files changed, 364 insertions, 0 deletions
diff --git a/drivers/dahdi/arith.h b/drivers/dahdi/arith.h new file mode 100644 index 0000000..3059db5 --- /dev/null +++ b/drivers/dahdi/arith.h @@ -0,0 +1,364 @@ +#ifndef _ZAPTEL_ARITH_H +#define _ZAPTEL_ARITH_H +/* + * Handy add/subtract functions to operate on chunks of shorts. + * Feel free to add customizations for additional architectures + * + */ + +#ifdef CONFIG_ZAPTEL_MMX +#ifdef ZT_CHUNKSIZE +static inline void __ACSS(volatile short *dst, const short *src) +{ + __asm__ __volatile__ ( + "movq 0(%0), %%mm0;\n" + "movq 0(%1), %%mm1;\n" + "movq 8(%0), %%mm2;\n" + "movq 8(%1), %%mm3;\n" + "paddsw %%mm1, %%mm0;\n" + "paddsw %%mm3, %%mm2;\n" + "movq %%mm0, 0(%0);\n" + "movq %%mm2, 8(%0);\n" + : "=r" (dst) + : "r" (src), "0" (dst) + : "memory" +#ifdef CLOBBERMMX + , "%mm0", "%mm1", "%mm2", "%mm3" +#endif + ); + +} +static inline void __SCSS(volatile short *dst, const short *src) +{ + __asm__ __volatile__ ( + "movq 0(%0), %%mm0;\n" + "movq 0(%1), %%mm1;\n" + "movq 8(%0), %%mm2;\n" + "movq 8(%1), %%mm3;\n" + "psubsw %%mm1, %%mm0;\n" + "psubsw %%mm3, %%mm2;\n" + "movq %%mm0, 0(%0);\n" + "movq %%mm2, 8(%0);\n" + : "=r" (dst) + : "r" (src), "0" (dst) + : "memory" +#ifdef CLOBBERMMX + , "%mm0", "%mm1", "%mm2", "%mm3" +#endif + ); + +} + +#if (ZT_CHUNKSIZE == 8) +#define ACSS(a,b) __ACSS(a,b) +#define SCSS(a,b) __SCSS(a,b) +#elif (ZT_CHUNKSIZE > 8) +static inline void ACSS(volatile short *dst, const short *src) +{ + int x; + for (x=0;x<ZT_CHUNKSIZE;x+=8) + __ACSS(dst + x, src + x); +} +static inline void SCSS(volatile short *dst, const short *src) +{ + int x; + for (x=0;x<ZT_CHUNKSIZE;x+=8) + __SCSS(dst + x, src + x); +} +#else +#error No MMX for ZT_CHUNKSIZE < 8 +#endif +#endif +static inline int CONVOLVE(const int *coeffs, const short *hist, int len) +{ + int sum; + /* Divide length by 16 */ + len >>= 4; + + /* Clear our accumulator, mm4 */ + + /* + + For every set of eight... + + Load 16 coefficients into four registers... + Shift each word right 16 to make them shorts... + Pack the resulting shorts into two registers... + With the coefficients now in mm0 and mm2, load the + history into mm1 and mm3... + Multiply/add mm1 into mm0, and mm3 into mm2... + Add mm2 into mm0 (without saturation, alas). Now we have two half-results. + Accumulate in mm4 (again, without saturation, alas) + */ + __asm__ ( + "pxor %%mm4, %%mm4;\n" + "mov %1, %%edi;\n" + "mov %2, %%esi;\n" + "mov %3, %%ecx;\n" + "1:" + "movq 0(%%edi), %%mm0;\n" + "movq 8(%%edi), %%mm1;\n" + "movq 16(%%edi), %%mm2;\n" + "movq 24(%%edi), %%mm3;\n" + /* can't use 4/5 since 4 is the accumulator for us */ + "movq 32(%%edi), %%mm6;\n" + "movq 40(%%edi), %%mm7;\n" + "psrad $16, %%mm0;\n" + "psrad $16, %%mm1;\n" + "psrad $16, %%mm2;\n" + "psrad $16, %%mm3;\n" + "psrad $16, %%mm6;\n" + "psrad $16, %%mm7;\n" + "packssdw %%mm1, %%mm0;\n" + "packssdw %%mm3, %%mm2;\n" + "packssdw %%mm7, %%mm6;\n" + "movq 0(%%esi), %%mm1;\n" + "movq 8(%%esi), %%mm3;\n" + "movq 16(%%esi), %%mm7;\n" + "pmaddwd %%mm1, %%mm0;\n" + "pmaddwd %%mm3, %%mm2;\n" + "pmaddwd %%mm7, %%mm6;\n" + "paddd %%mm6, %%mm4;\n" + "paddd %%mm2, %%mm4;\n" + "paddd %%mm0, %%mm4;\n" + /* Come back and do for the last few bytes */ + "movq 48(%%edi), %%mm6;\n" + "movq 56(%%edi), %%mm7;\n" + "psrad $16, %%mm6;\n" + "psrad $16, %%mm7;\n" + "packssdw %%mm7, %%mm6;\n" + "movq 24(%%esi), %%mm7;\n" + "pmaddwd %%mm7, %%mm6;\n" + "paddd %%mm6, %%mm4;\n" + "add $64, %%edi;\n" + "add $32, %%esi;\n" + "dec %%ecx;\n" + "jnz 1b;\n" + "movq %%mm4, %%mm0;\n" + "psrlq $32, %%mm0;\n" + "paddd %%mm0, %%mm4;\n" + "movd %%mm4, %0;\n" + : "=r" (sum) + : "r" (coeffs), "r" (hist), "r" (len) + : "%ecx", "%edi", "%esi" + ); + + return sum; +} + +static inline void UPDATE(volatile int *taps, const short *history, const int nsuppr, const int ntaps) +{ + int i; + int correction; + for (i=0;i<ntaps;i++) { + correction = history[i] * nsuppr; + taps[i] += correction; + } +} + +static inline void UPDATE2(volatile int *taps, volatile short *taps_short, const short *history, const int nsuppr, const int ntaps) +{ + int i; + int correction; +#if 0 + ntaps >>= 4; + /* First, load up taps, */ + __asm__ ( + "pxor %%mm4, %%mm4;\n" + "mov %0, %%edi;\n" + "mov %1, %%esi;\n" + "mov %3, %%ecx;\n" + "1:" + "jnz 1b;\n" + "movq %%mm4, %%mm0;\n" + "psrlq $32, %%mm0;\n" + "paddd %%mm0, %%mm4;\n" + "movd %%mm4, %0;\n" + : "=r" (taps), "=r" (taps_short) + : "r" (history), "r" (nsuppr), "r" (ntaps), "0" (taps) + : "%ecx", "%edi", "%esi" + ); +#endif +#if 1 + for (i=0;i<ntaps;i++) { + correction = history[i] * nsuppr; + taps[i] += correction; + taps_short[i] = taps[i] >> 16; + } +#endif +} + +static inline int CONVOLVE2(const short *coeffs, const short *hist, int len) +{ + int sum; + /* Divide length by 16 */ + len >>= 4; + + /* Clear our accumulator, mm4 */ + + /* + + For every set of eight... + Load in eight coefficients and eight historic samples, multliply add and + accumulate the result + */ + __asm__ ( + "pxor %%mm4, %%mm4;\n" + "mov %1, %%edi;\n" + "mov %2, %%esi;\n" + "mov %3, %%ecx;\n" + "1:" + "movq 0(%%edi), %%mm0;\n" + "movq 8(%%edi), %%mm2;\n" + "movq 0(%%esi), %%mm1;\n" + "movq 8(%%esi), %%mm3;\n" + "pmaddwd %%mm1, %%mm0;\n" + "pmaddwd %%mm3, %%mm2;\n" + "paddd %%mm2, %%mm4;\n" + "paddd %%mm0, %%mm4;\n" + "movq 16(%%edi), %%mm0;\n" + "movq 24(%%edi), %%mm2;\n" + "movq 16(%%esi), %%mm1;\n" + "movq 24(%%esi), %%mm3;\n" + "pmaddwd %%mm1, %%mm0;\n" + "pmaddwd %%mm3, %%mm2;\n" + "paddd %%mm2, %%mm4;\n" + "paddd %%mm0, %%mm4;\n" + "add $32, %%edi;\n" + "add $32, %%esi;\n" + "dec %%ecx;\n" + "jnz 1b;\n" + "movq %%mm4, %%mm0;\n" + "psrlq $32, %%mm0;\n" + "paddd %%mm0, %%mm4;\n" + "movd %%mm4, %0;\n" + : "=r" (sum) + : "r" (coeffs), "r" (hist), "r" (len) + : "%ecx", "%edi", "%esi" + ); + + return sum; +} +static inline short MAX16(const short *y, int len, int *pos) +{ + int k; + short max = 0; + int bestpos = 0; + for (k=0;k<len;k++) { + if (max < y[k]) { + bestpos = k; + max = y[k]; + } + } + *pos = (len - 1 - bestpos); + return max; +} + + + +#else + +#ifdef ZT_CHUNKSIZE +static inline void ACSS(short *dst, short *src) +{ + int x; + + /* Add src to dst with saturation, storing in dst */ + +#ifdef BFIN + for (x = 0; x < ZT_CHUNKSIZE; x++) + dst[x] = __builtin_bfin_add_fr1x16(dst[x], src[x]); +#else + int sum; + + for (x = 0; x < ZT_CHUNKSIZE; x++) { + sum = dst[x] + src[x]; + if (sum > 32767) + sum = 32767; + else if (sum < -32768) + sum = -32768; + dst[x] = sum; + } +#endif +} + +static inline void SCSS(short *dst, short *src) +{ + int x; + + /* Subtract src from dst with saturation, storing in dst */ +#ifdef BFIN + for (x = 0; x < ZT_CHUNKSIZE; x++) + dst[x] = __builtin_bfin_sub_fr1x16(dst[x], src[x]); +#else + int sum; + + for (x = 0; x < ZT_CHUNKSIZE; x++) { + sum = dst[x] - src[x]; + if (sum > 32767) + sum = 32767; + else if (sum < -32768) + sum = -32768; + dst[x] = sum; + } +#endif +} + +#endif /* ZT_CHUNKSIZE */ + +static inline int CONVOLVE(const int *coeffs, const short *hist, int len) +{ + int x; + int sum = 0; + for (x=0;x<len;x++) + sum += (coeffs[x] >> 16) * hist[x]; + return sum; +} + +static inline int CONVOLVE2(const short *coeffs, const short *hist, int len) +{ + int x; + int sum = 0; + for (x=0;x<len;x++) + sum += coeffs[x] * hist[x]; + return sum; +} + +static inline void UPDATE(int *taps, const short *history, const int nsuppr, const int ntaps) +{ + int i; + int correction; + for (i=0;i<ntaps;i++) { + correction = history[i] * nsuppr; + taps[i] += correction; + } +} + +static inline void UPDATE2(int *taps, short *taps_short, const short *history, const int nsuppr, const int ntaps) +{ + int i; + int correction; + for (i=0;i<ntaps;i++) { + correction = history[i] * nsuppr; + taps[i] += correction; + taps_short[i] = taps[i] >> 16; + } +} + +static inline short MAX16(const short *y, int len, int *pos) +{ + int k; + short max = 0; + int bestpos = 0; + for (k=0;k<len;k++) { + if (max < y[k]) { + bestpos = k; + max = y[k]; + } + } + *pos = (len - 1 - bestpos); + return max; +} + +#endif /* MMX */ +#endif /* _ZAPTEL_ARITH_H */ |