/* * Handy add/subtract functions to operate on chunks of shorts. * Feel free to add customizations for additional architectures * */ #ifdef CONFIG_ZAPTEL_MMX static inline void __ACSS(volatile short *dst, const short *src) { __asm__ __volatile__ ( "movq 0(%0), %%mm0;\n" "movq 0(%1), %%mm1;\n" "movq 8(%0), %%mm2;\n" "movq 8(%1), %%mm3;\n" "paddsw %%mm1, %%mm0;\n" "paddsw %%mm3, %%mm2;\n" "movq %%mm0, 0(%0);\n" "movq %%mm2, 8(%0);\n" : "=r" (dst) : "r" (src), "0" (dst) : "memory" #if CLOBBERMMX , "%mm0", "%mm1", "%mm2", "%mm3" #endif ); } static inline void __SCSS(volatile short *dst, const short *src) { __asm__ __volatile__ ( "movq 0(%0), %%mm0;\n" "movq 0(%1), %%mm1;\n" "movq 8(%0), %%mm2;\n" "movq 8(%1), %%mm3;\n" "psubsw %%mm1, %%mm0;\n" "psubsw %%mm3, %%mm2;\n" "movq %%mm0, 0(%0);\n" "movq %%mm2, 8(%0);\n" : "=r" (dst) : "r" (src), "0" (dst) : "memory" #if CLOBBERMMX , "%mm0", "%mm1", "%mm2", "%mm3" #endif ); } #if (ZT_CHUNKSIZE == 8) #define ACSS(a,b) __ACSS(a,b) #define SCSS(a,b) __SCSS(a,b) #elif (ZT_CHUNKSIZE > 8) static inline void ACSS(volatile short *dst, const short *src) { int x; for (x=0;x 32767) sum = 32767; else if (sum < -32768) sum = -32768; dst[x] = sum; } } static inline void SCSS(short *dst, short *src) { int x,sum; /* Add src to dst with saturation, storing in dst */ for (x=0;x 32767) sum = 32767; else if (sum < -32768) sum = -32768; dst[x] = sum; } } #endif