summaryrefslogtreecommitdiff
path: root/arith.h
diff options
context:
space:
mode:
authormarkster <markster@5390a7c7-147a-4af0-8ec9-7488f05a26cb>2002-10-28 22:08:48 +0000
committermarkster <markster@5390a7c7-147a-4af0-8ec9-7488f05a26cb>2002-10-28 22:08:48 +0000
commit7f5c6e162a9c5ff654115136ccc6783a67749a72 (patch)
treee78add6b051106cf07f5d7fe67b0a591574377de /arith.h
parentcd0da1d15c640b4a63fe9e0222b216bebaf0b08f (diff)
Version 0.3.2 from FTP
git-svn-id: http://svn.digium.com/svn/zaptel/trunk@124 5390a7c7-147a-4af0-8ec9-7488f05a26cb
Diffstat (limited to 'arith.h')
-rwxr-xr-xarith.h252
1 files changed, 250 insertions, 2 deletions
diff --git a/arith.h b/arith.h
index 7ae859d..96541c8 100755
--- a/arith.h
+++ b/arith.h
@@ -1,3 +1,5 @@
+#ifndef _ZAPTEL_ARITH_H
+#define _ZAPTEL_ARITH_H
/*
* Handy add/subtract functions to operate on chunks of shorts.
* Feel free to add customizations for additional architectures
@@ -5,7 +7,7 @@
*/
#ifdef CONFIG_ZAPTEL_MMX
-
+#ifdef ZT_CHUNKSIZE
static inline void __ACSS(volatile short *dst, const short *src)
{
__asm__ __volatile__ (
@@ -46,6 +48,7 @@ static inline void __SCSS(volatile short *dst, const short *src)
);
}
+
#if (ZT_CHUNKSIZE == 8)
#define ACSS(a,b) __ACSS(a,b)
#define SCSS(a,b) __SCSS(a,b)
@@ -65,9 +68,197 @@ static inline void SCSS(volatile short *dst, const short *src)
#else
#error No MMX for ZT_CHUNKSIZE < 8
#endif
+#endif
+static inline int CONVOLVE(const int *coeffs, const short *hist, int len)
+{
+ int sum;
+ /* Divide length by 16 */
+ len >>= 4;
+
+ /* Clear our accumulator, mm4 */
+
+ /*
+
+ For every set of eight...
+
+ Load 16 coefficients into four registers...
+ Shift each word right 16 to make them shorts...
+ Pack the resulting shorts into two registers...
+ With the coefficients now in mm0 and mm2, load the
+ history into mm1 and mm3...
+ Multiply/add mm1 into mm0, and mm3 into mm2...
+ Add mm2 into mm0 (without saturation, alas). Now we have two half-results.
+ Accumulate in mm4 (again, without saturation, alas)
+ */
+ __asm__ (
+ "pxor %%mm4, %%mm4;\n"
+ "mov %1, %%edi;\n"
+ "mov %2, %%esi;\n"
+ "mov %3, %%ecx;\n"
+ "1:"
+ "movq 0(%%edi), %%mm0;\n"
+ "movq 8(%%edi), %%mm1;\n"
+ "movq 16(%%edi), %%mm2;\n"
+ "movq 24(%%edi), %%mm3;\n"
+ /* can't use 4/5 since 4 is the accumulator for us */
+ "movq 32(%%edi), %%mm6;\n"
+ "movq 40(%%edi), %%mm7;\n"
+ "psrad $16, %%mm0;\n"
+ "psrad $16, %%mm1;\n"
+ "psrad $16, %%mm2;\n"
+ "psrad $16, %%mm3;\n"
+ "psrad $16, %%mm6;\n"
+ "psrad $16, %%mm7;\n"
+ "packssdw %%mm1, %%mm0;\n"
+ "packssdw %%mm3, %%mm2;\n"
+ "packssdw %%mm7, %%mm6;\n"
+ "movq 0(%%esi), %%mm1;\n"
+ "movq 8(%%esi), %%mm3;\n"
+ "movq 16(%%esi), %%mm7;\n"
+ "pmaddwd %%mm1, %%mm0;\n"
+ "pmaddwd %%mm3, %%mm2;\n"
+ "pmaddwd %%mm7, %%mm6;\n"
+ "paddd %%mm6, %%mm4;\n"
+ "paddd %%mm2, %%mm4;\n"
+ "paddd %%mm0, %%mm4;\n"
+ /* Come back and do for the last few bytes */
+ "movq 48(%%edi), %%mm6;\n"
+ "movq 56(%%edi), %%mm7;\n"
+ "psrad $16, %%mm6;\n"
+ "psrad $16, %%mm7;\n"
+ "packssdw %%mm7, %%mm6;\n"
+ "movq 24(%%esi), %%mm7;\n"
+ "pmaddwd %%mm7, %%mm6;\n"
+ "paddd %%mm6, %%mm4;\n"
+ "add $64, %%edi;\n"
+ "add $32, %%esi;\n"
+ "dec %%ecx;\n"
+ "jnz 1b;\n"
+ "movq %%mm4, %%mm0;\n"
+ "psrlq $32, %%mm0;\n"
+ "paddd %%mm0, %%mm4;\n"
+ "movd %%mm4, %0;\n"
+ : "=r" (sum)
+ : "r" (coeffs), "r" (hist), "r" (len)
+ : "%ecx", "%edi", "%esi"
+ );
+
+ return sum;
+}
+
+static inline void UPDATE(volatile int *taps, const short *history, const int nsuppr, const int ntaps)
+{
+ int i;
+ int correction;
+ for (i=0;i<ntaps;i++) {
+ correction = history[i] * nsuppr;
+ taps[i] += correction;
+ }
+}
+
+static inline void UPDATE2(volatile int *taps, volatile short *taps_short, const short *history, const int nsuppr, const int ntaps)
+{
+ int i;
+ int correction;
+#if 0
+ ntaps >>= 4;
+ /* First, load up taps, */
+ __asm__ (
+ "pxor %%mm4, %%mm4;\n"
+ "mov %0, %%edi;\n"
+ "mov %1, %%esi;\n"
+ "mov %3, %%ecx;\n"
+ "1:"
+ "jnz 1b;\n"
+ "movq %%mm4, %%mm0;\n"
+ "psrlq $32, %%mm0;\n"
+ "paddd %%mm0, %%mm4;\n"
+ "movd %%mm4, %0;\n"
+ : "=r" (taps), "=r" (taps_short)
+ : "r" (history), "r" (nsuppr), "r" (ntaps), "0" (taps)
+ : "%ecx", "%edi", "%esi"
+ );
+#endif
+#if 1
+ for (i=0;i<ntaps;i++) {
+ correction = history[i] * nsuppr;
+ taps[i] += correction;
+ taps_short[i] = taps[i] >> 16;
+ }
+#endif
+}
+
+static inline int CONVOLVE2(const short *coeffs, const short *hist, int len)
+{
+ int sum;
+ /* Divide length by 16 */
+ len >>= 4;
+
+ /* Clear our accumulator, mm4 */
+
+ /*
+
+ For every set of eight...
+ Load in eight coefficients and eight historic samples, multliply add and
+ accumulate the result
+ */
+ __asm__ (
+ "pxor %%mm4, %%mm4;\n"
+ "mov %1, %%edi;\n"
+ "mov %2, %%esi;\n"
+ "mov %3, %%ecx;\n"
+ "1:"
+ "movq 0(%%edi), %%mm0;\n"
+ "movq 8(%%edi), %%mm2;\n"
+ "movq 0(%%esi), %%mm1;\n"
+ "movq 8(%%esi), %%mm3;\n"
+ "pmaddwd %%mm1, %%mm0;\n"
+ "pmaddwd %%mm3, %%mm2;\n"
+ "paddd %%mm2, %%mm4;\n"
+ "paddd %%mm0, %%mm4;\n"
+ "movq 16(%%edi), %%mm0;\n"
+ "movq 24(%%edi), %%mm2;\n"
+ "movq 16(%%esi), %%mm1;\n"
+ "movq 24(%%esi), %%mm3;\n"
+ "pmaddwd %%mm1, %%mm0;\n"
+ "pmaddwd %%mm3, %%mm2;\n"
+ "paddd %%mm2, %%mm4;\n"
+ "paddd %%mm0, %%mm4;\n"
+ "add $32, %%edi;\n"
+ "add $32, %%esi;\n"
+ "dec %%ecx;\n"
+ "jnz 1b;\n"
+ "movq %%mm4, %%mm0;\n"
+ "psrlq $32, %%mm0;\n"
+ "paddd %%mm0, %%mm4;\n"
+ "movd %%mm4, %0;\n"
+ : "=r" (sum)
+ : "r" (coeffs), "r" (hist), "r" (len)
+ : "%ecx", "%edi", "%esi"
+ );
+
+ return sum;
+}
+static inline short MAX16(const short *y, int len, int *pos)
+{
+ int k;
+ short max = 0;
+ int bestpos = 0;
+ for (k=0;k<len;k++) {
+ if (max < y[k]) {
+ bestpos = k;
+ max = y[k];
+ }
+ }
+ *pos = (len - 1 - bestpos);
+ return max;
+}
+
+
#else
+#ifdef ZT_CHUNKSIZE
static inline void ACSS(short *dst, short *src)
{
int x,sum;
@@ -96,4 +287,61 @@ static inline void SCSS(short *dst, short *src)
}
}
-#endif
+#endif /* ZT_CHUNKSIZE */
+
+static inline int CONVOLVE(const int *coeffs, const short *hist, int len)
+{
+ int x;
+ int sum = 0;
+ for (x=0;x<len;x++)
+ sum += (coeffs[x] >> 16) * hist[x];
+ return sum;
+}
+
+static inline int CONVOLVE2(const short *coeffs, const short *hist, int len)
+{
+ int x;
+ int sum = 0;
+ for (x=0;x<len;x++)
+ sum += coeffs[x] * hist[x];
+ return sum;
+}
+
+static inline void UPDATE(int *taps, const short *history, const int nsuppr, const int ntaps)
+{
+ int i;
+ int correction;
+ for (i=0;i<ntaps;i++) {
+ correction = history[i] * nsuppr;
+ taps[i] += correction;
+ }
+}
+
+static inline void UPDATE2(int *taps, short *taps_short, const short *history, const int nsuppr, const int ntaps)
+{
+ int i;
+ int correction;
+ for (i=0;i<ntaps;i++) {
+ correction = history[i] * nsuppr;
+ taps[i] += correction;
+ taps_short[i] = taps[i] >> 16;
+ }
+}
+
+static inline short MAX16(const short *y, int len, int *pos)
+{
+ int k;
+ short max = 0;
+ int bestpos = 0;
+ for (k=0;k<len;k++) {
+ if (max < y[k]) {
+ bestpos = k;
+ max = y[k];
+ }
+ }
+ *pos = (len - 1 - bestpos);
+ return max;
+}
+
+#endif /* MMX */
+#endif /* _ZAPTEL_ARITH_H */