diff options
-rwxr-xr-x | arith.h | 99 |
1 files changed, 99 insertions, 0 deletions
@@ -0,0 +1,99 @@ +/* + * Handy add/subtract functions to operate on chunks of shorts. + * Feel free to add customizations for additional architectures + * + */ + +#ifdef CONFIG_ZAPTEL_MMX + +static inline void __ACSS(volatile short *dst, const short *src) +{ + __asm__ __volatile__ ( + "movq 0(%0), %%mm0;\n" + "movq 0(%1), %%mm1;\n" + "movq 8(%0), %%mm2;\n" + "movq 8(%1), %%mm3;\n" + "paddsw %%mm1, %%mm0;\n" + "paddsw %%mm3, %%mm2;\n" + "movq %%mm0, 0(%0);\n" + "movq %%mm2, 8(%0);\n" + : "=r" (dst) + : "r" (src), "0" (dst) + : "memory" +#if CLOBBERMMX + , "%mm0", "%mm1", "%mm2", "%mm3" +#endif + ); + +} +static inline void __SCSS(volatile short *dst, const short *src) +{ + __asm__ __volatile__ ( + "movq 0(%0), %%mm0;\n" + "movq 0(%1), %%mm1;\n" + "movq 8(%0), %%mm2;\n" + "movq 8(%1), %%mm3;\n" + "psubsw %%mm1, %%mm0;\n" + "psubsw %%mm3, %%mm2;\n" + "movq %%mm0, 0(%0);\n" + "movq %%mm2, 8(%0);\n" + : "=r" (dst) + : "r" (src), "0" (dst) + : "memory" +#if CLOBBERMMX + , "%mm0", "%mm1", "%mm2", "%mm3" +#endif + ); + +} +#if (ZT_CHUNKSIZE == 8) +#define ACSS(a,b) __ACSS(a,b) +#define SCSS(a,b) __SCSS(a,b) +#elif (ZT_CHUNKSIZE > 8) +static inline void ACSS(volatile short *dst, const short *src) +{ + int x; + for (x=0;x<ZT_CHUNKSIZE;x+=8) + __ACSS(dst + x, src + x); +} +static inline void SCSS(volatile short *dst, const short *src) +{ + int x; + for (x=0;x<ZT_CHUNKSIZE;x+=8) + __SCSS(dst + x, src + x); +} +#else +#error No MMX for ZT_CHUNKSIZE < 8 +#endif + +#else + +static inline void ACSS(short *dst, short *src) +{ + int x,sum; + /* Add src to dst with saturation, storing in dst */ + for (x=0;x<ZT_CHUNKSIZE;x++) { + sum = dst[x]+src[x]; + if (sum > 32767) + sum = 32767; + else if (sum < -32768) + sum = -32768; + dst[x] = sum; + } +} + +static inline void SCSS(short *dst, short *src) +{ + int x,sum; + /* Add src to dst with saturation, storing in dst */ + for (x=0;x<ZT_CHUNKSIZE;x++) { + sum = dst[x]-src[x]; + if (sum > 32767) + sum = 32767; + else if (sum < -32768) + sum = -32768; + dst[x] = sum; + } +} + +#endif |