From 7a2104ae7bc5cf93a60180d3cc5e3a1d59277870 Mon Sep 17 00:00:00 2001 From: Tzafrir Cohen Date: Tue, 5 Jan 2010 20:07:13 +0200 Subject: Bring OSLEC up-to-date with out-of-tree version * Bring back MMX support. * This is done in a quick-and-dirty way, - copy over existing files. * TODO: reduce the changes in this diff to only include MMX support. --- drivers/staging/echo/fir.h | 149 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 114 insertions(+), 35 deletions(-) (limited to 'drivers/staging/echo/fir.h') diff --git a/drivers/staging/echo/fir.h b/drivers/staging/echo/fir.h index 7b9fabf..5645cb1 100644 --- a/drivers/staging/echo/fir.h +++ b/drivers/staging/echo/fir.h @@ -21,8 +21,18 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * $Id: fir.h,v 1.8 2006/10/24 13:45:28 steveu Exp $ */ +/*! \page fir_page FIR filtering +\section fir_page_sec_1 What does it do? +???. + +\section fir_page_sec_2 How does it work? +???. +*/ + #if !defined(_FIR_H_) #define _FIR_H_ @@ -54,47 +64,51 @@ can. */ -/* - * 16 bit integer FIR descriptor. This defines the working state for a single - * instance of an FIR filter using 16 bit integer coefficients. - */ -struct fir16_state_t { +#if defined(USE_MMX) || defined(USE_SSE2) +#include "mmx.h" +#endif + +/*! + 16 bit integer FIR descriptor. This defines the working state for a single + instance of an FIR filter using 16 bit integer coefficients. +*/ +typedef struct { int taps; int curr_pos; const int16_t *coeffs; int16_t *history; -}; +} fir16_state_t; -/* - * 32 bit integer FIR descriptor. This defines the working state for a single - * instance of an FIR filter using 32 bit integer coefficients, and filtering - * 16 bit integer data. - */ -struct fir32_state_t { +/*! + 32 bit integer FIR descriptor. This defines the working state for a single + instance of an FIR filter using 32 bit integer coefficients, and filtering + 16 bit integer data. +*/ +typedef struct { int taps; int curr_pos; const int32_t *coeffs; int16_t *history; -}; +} fir32_state_t; -/* - * Floating point FIR descriptor. This defines the working state for a single - * instance of an FIR filter using floating point coefficients and data. - */ -struct fir_float_state_t { +/*! + Floating point FIR descriptor. This defines the working state for a single + instance of an FIR filter using floating point coefficients and data. +*/ +typedef struct { int taps; int curr_pos; const float *coeffs; float *history; -}; +} fir_float_state_t; -static inline const int16_t *fir16_create(struct fir16_state_t *fir, - const int16_t *coeffs, int taps) +static __inline__ const int16_t *fir16_create(fir16_state_t * fir, + const int16_t * coeffs, int taps) { fir->taps = taps; fir->curr_pos = taps - 1; fir->coeffs = coeffs; -#if defined(__bfin__) +#if defined(USE_MMX) || defined(USE_SSE2) || defined(__bfin__) fir->history = kcalloc(2 * taps, sizeof(int16_t), GFP_KERNEL); #else fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL); @@ -102,16 +116,16 @@ static inline const int16_t *fir16_create(struct fir16_state_t *fir, return fir->history; } -static inline void fir16_flush(struct fir16_state_t *fir) +static __inline__ void fir16_flush(fir16_state_t * fir) { -#if defined(__bfin__) +#if defined(USE_MMX) || defined(USE_SSE2) || defined(__bfin__) memset(fir->history, 0, 2 * fir->taps * sizeof(int16_t)); #else memset(fir->history, 0, fir->taps * sizeof(int16_t)); #endif } -static inline void fir16_free(struct fir16_state_t *fir) +static __inline__ void fir16_free(fir16_state_t * fir) { kfree(fir->history); } @@ -134,19 +148,83 @@ static inline int32_t dot_asm(short *x, short *y, int len) "A0 += R0.L*R1.L (IS);\n\t" "R0 = A0;\n\t" "%0 = R0;\n\t" - : "=&d"(dot) - : "a"(x), "a"(y), "a"(len) - : "I0", "I1", "A1", "A0", "R0", "R1" + :"=&d"(dot) + :"a"(x), "a"(y), "a"(len) + :"I0", "I1", "A1", "A0", "R0", "R1" ); return dot; } #endif -static inline int16_t fir16(struct fir16_state_t *fir, int16_t sample) +static __inline__ int16_t fir16(fir16_state_t * fir, int16_t sample) { int32_t y; -#if defined(__bfin__) +#if defined(USE_MMX) + int i; + mmx_t *mmx_coeffs; + mmx_t *mmx_hist; + + fir->history[fir->curr_pos] = sample; + fir->history[fir->curr_pos + fir->taps] = sample; + + mmx_coeffs = (mmx_t *) fir->coeffs; + mmx_hist = (mmx_t *) & fir->history[fir->curr_pos]; + i = fir->taps; + pxor_r2r(mm4, mm4); + /* 8 samples per iteration, so the filter must be a multiple of 8 long. */ + while (i > 0) { + movq_m2r(mmx_coeffs[0], mm0); + movq_m2r(mmx_coeffs[1], mm2); + movq_m2r(mmx_hist[0], mm1); + movq_m2r(mmx_hist[1], mm3); + mmx_coeffs += 2; + mmx_hist += 2; + pmaddwd_r2r(mm1, mm0); + pmaddwd_r2r(mm3, mm2); + paddd_r2r(mm0, mm4); + paddd_r2r(mm2, mm4); + i -= 8; + } + movq_r2r(mm4, mm0); + psrlq_i2r(32, mm0); + paddd_r2r(mm0, mm4); + movd_r2m(mm4, y); + emms(); +#elif defined(USE_SSE2) + int i; + xmm_t *xmm_coeffs; + xmm_t *xmm_hist; + + fir->history[fir->curr_pos] = sample; + fir->history[fir->curr_pos + fir->taps] = sample; + + xmm_coeffs = (xmm_t *) fir->coeffs; + xmm_hist = (xmm_t *) & fir->history[fir->curr_pos]; + i = fir->taps; + pxor_r2r(xmm4, xmm4); + /* 16 samples per iteration, so the filter must be a multiple of 16 long. */ + while (i > 0) { + movdqu_m2r(xmm_coeffs[0], xmm0); + movdqu_m2r(xmm_coeffs[1], xmm2); + movdqu_m2r(xmm_hist[0], xmm1); + movdqu_m2r(xmm_hist[1], xmm3); + xmm_coeffs += 2; + xmm_hist += 2; + pmaddwd_r2r(xmm1, xmm0); + pmaddwd_r2r(xmm3, xmm2); + paddd_r2r(xmm0, xmm4); + paddd_r2r(xmm2, xmm4); + i -= 16; + } + movdqa_r2r(xmm4, xmm0); + psrldq_i2r(8, xmm0); + paddd_r2r(xmm0, xmm4); + movdqa_r2r(xmm4, xmm0); + psrldq_i2r(4, xmm0); + paddd_r2r(xmm0, xmm4); + movd_r2m(xmm4, y); +#elif defined(__bfin__) fir->history[fir->curr_pos] = sample; fir->history[fir->curr_pos + fir->taps] = sample; y = dot_asm((int16_t *) fir->coeffs, &fir->history[fir->curr_pos], @@ -172,8 +250,8 @@ static inline int16_t fir16(struct fir16_state_t *fir, int16_t sample) return (int16_t) (y >> 15); } -static inline const int16_t *fir32_create(struct fir32_state_t *fir, - const int32_t *coeffs, int taps) +static __inline__ const int16_t *fir32_create(fir32_state_t * fir, + const int32_t * coeffs, int taps) { fir->taps = taps; fir->curr_pos = taps - 1; @@ -182,17 +260,17 @@ static inline const int16_t *fir32_create(struct fir32_state_t *fir, return fir->history; } -static inline void fir32_flush(struct fir32_state_t *fir) +static __inline__ void fir32_flush(fir32_state_t * fir) { memset(fir->history, 0, fir->taps * sizeof(int16_t)); } -static inline void fir32_free(struct fir32_state_t *fir) +static __inline__ void fir32_free(fir32_state_t * fir) { kfree(fir->history); } -static inline int16_t fir32(struct fir32_state_t *fir, int16_t sample) +static __inline__ int16_t fir32(fir32_state_t * fir, int16_t sample) { int i; int32_t y; @@ -214,3 +292,4 @@ static inline int16_t fir32(struct fir32_state_t *fir, int16_t sample) } #endif +/*- End of file ------------------------------------------------------------*/ -- cgit v1.2.3