summaryrefslogtreecommitdiff
path: root/arith.h
blob: 7ae859d2ce792e80176c0ddf92be0cbb5af7449d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
/*
 * Handy add/subtract functions to operate on chunks of shorts.
 * Feel free to add customizations for additional architectures
 *
 */

#ifdef CONFIG_ZAPTEL_MMX

static inline void __ACSS(volatile short *dst, const short *src)
{
	__asm__ __volatile__ (
	        "movq 0(%0), %%mm0;\n"
	        "movq 0(%1), %%mm1;\n"
                "movq 8(%0), %%mm2;\n"
	        "movq 8(%1), %%mm3;\n"
	        "paddsw %%mm1, %%mm0;\n"
	        "paddsw %%mm3, %%mm2;\n"
                "movq %%mm0, 0(%0);\n"
                "movq %%mm2, 8(%0);\n"
	    : "=r" (dst)
	    : "r" (src), "0" (dst)
	    : "memory"
#if CLOBBERMMX
	    , "%mm0", "%mm1", "%mm2", "%mm3"
#endif
      );

}
static inline void __SCSS(volatile short *dst, const short *src)
{
	__asm__ __volatile__ (
	        "movq 0(%0), %%mm0;\n"
	        "movq 0(%1), %%mm1;\n"
                "movq 8(%0), %%mm2;\n"
	        "movq 8(%1), %%mm3;\n"
	        "psubsw %%mm1, %%mm0;\n"
	        "psubsw %%mm3, %%mm2;\n"
                "movq %%mm0, 0(%0);\n"
                "movq %%mm2, 8(%0);\n"
	    : "=r" (dst)
	    : "r" (src), "0" (dst)
	    : "memory"
#if CLOBBERMMX
	    , "%mm0", "%mm1", "%mm2", "%mm3"
#endif
      );

}
#if (ZT_CHUNKSIZE == 8)
#define ACSS(a,b) __ACSS(a,b)
#define SCSS(a,b) __SCSS(a,b)
#elif (ZT_CHUNKSIZE > 8)
static inline void ACSS(volatile short *dst, const short *src)
{
	int x;
	for (x=0;x<ZT_CHUNKSIZE;x+=8)
		__ACSS(dst + x, src + x);
}
static inline void SCSS(volatile short *dst, const short *src)
{
	int x;
	for (x=0;x<ZT_CHUNKSIZE;x+=8)
		__SCSS(dst + x, src + x);
}
#else
#error No MMX for ZT_CHUNKSIZE < 8
#endif

#else

static inline void ACSS(short *dst, short *src)
{
	int x,sum;
	/* Add src to dst with saturation, storing in dst */
	for (x=0;x<ZT_CHUNKSIZE;x++) {
		sum = dst[x]+src[x];
		if (sum > 32767)
			sum = 32767;
		else if (sum < -32768)
			sum = -32768;
		dst[x] = sum;
	}
}

static inline void SCSS(short *dst, short *src)
{
	int x,sum;
	/* Add src to dst with saturation, storing in dst */
	for (x=0;x<ZT_CHUNKSIZE;x++) {
		sum = dst[x]-src[x];
		if (sum > 32767)
			sum = 32767;
		else if (sum < -32768)
			sum = -32768;
		dst[x] = sum;
	}
}

#endif