summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTzafrir Cohen <tzafrir@cohens.org.il>2010-01-05 20:07:13 +0200
committerTzafrir Cohen <tzafrir@cohens.org.il>2010-01-05 20:07:13 +0200
commit7a2104ae7bc5cf93a60180d3cc5e3a1d59277870 (patch)
treee0af0e9c61b0c2c19af15f8e578719a644319847
parentb1cc4a4d1cc5b62dd465b4a4a167306f9a337f10 (diff)
Bring OSLEC up-to-date with out-of-tree version
* Bring back MMX support. * This is done in a quick-and-dirty way, - copy over existing files. * TODO: reduce the changes in this diff to only include MMX support.
-rw-r--r--drivers/staging/echo/Kbuild6
-rw-r--r--drivers/staging/echo/echo.h21
-rw-r--r--drivers/staging/echo/fir.h149
3 files changed, 129 insertions, 47 deletions
diff --git a/drivers/staging/echo/Kbuild b/drivers/staging/echo/Kbuild
new file mode 100644
index 0000000..8e19659
--- /dev/null
+++ b/drivers/staging/echo/Kbuild
@@ -0,0 +1,6 @@
+ifdef DAHDI_USE_MMX
+EXTRA_CFLAGSi += USE_MMX
+endif
+
+# An explicit 'obj-m' , unlike the Makefile
+obj-m += echo.o
diff --git a/drivers/staging/echo/echo.h b/drivers/staging/echo/echo.h
index 754e66d..9fb9543 100644
--- a/drivers/staging/echo/echo.h
+++ b/drivers/staging/echo/echo.h
@@ -23,22 +23,20 @@
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * $Id: echo.h,v 1.9 2006/10/24 13:45:28 steveu Exp $
*/
#ifndef __ECHO_H
#define __ECHO_H
-/*
-Line echo cancellation for voice
-
-What does it do?
+/*! \page echo_can_page Line echo cancellation for voice
+\section echo_can_page_sec_1 What does it do?
This module aims to provide G.168-2002 compliant echo cancellation, to remove
electrical echoes (e.g. from 2-4 wire hybrids) from voice calls.
-
-How does it work?
-
+\section echo_can_page_sec_2 How does it work?
The heart of the echo cancellor is FIR filter. This is adapted to match the
echo impulse response of the telephone line. It must be long enough to
adequately cover the duration of that impulse response. The signal transmitted
@@ -112,8 +110,7 @@ major mis-convergence in the adaption process. An assessment algorithm is
needed which produces a fairly accurate result from a very short burst of far
end energy.
-How do I use it?
-
+\section echo_can_page_sec_3 How do I use it?
The echo cancellor processes both the transmit and receive streams sample by
sample. The processing function is not declared inline. Unfortunately,
cancellation requires many operations per sample, so the call overhead is only
@@ -123,7 +120,7 @@ a minor burden.
#include "fir.h"
#include "oslec.h"
-/*
+/*!
G.168 echo canceller descriptor. This defines the working state for a line
echo canceller.
*/
@@ -152,8 +149,8 @@ struct oslec_state {
int Lbgn, Lbgn_acc, Lbgn_upper, Lbgn_upper_acc;
/* foreground and background filter states */
- struct fir16_state_t fir_state;
- struct fir16_state_t fir_state_bg;
+ fir16_state_t fir_state;
+ fir16_state_t fir_state_bg;
int16_t *fir_taps16[2];
/* DC blocking filter states */
diff --git a/drivers/staging/echo/fir.h b/drivers/staging/echo/fir.h
index 7b9fabf..5645cb1 100644
--- a/drivers/staging/echo/fir.h
+++ b/drivers/staging/echo/fir.h
@@ -21,8 +21,18 @@
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * $Id: fir.h,v 1.8 2006/10/24 13:45:28 steveu Exp $
*/
+/*! \page fir_page FIR filtering
+\section fir_page_sec_1 What does it do?
+???.
+
+\section fir_page_sec_2 How does it work?
+???.
+*/
+
#if !defined(_FIR_H_)
#define _FIR_H_
@@ -54,47 +64,51 @@
can.
*/
-/*
- * 16 bit integer FIR descriptor. This defines the working state for a single
- * instance of an FIR filter using 16 bit integer coefficients.
- */
-struct fir16_state_t {
+#if defined(USE_MMX) || defined(USE_SSE2)
+#include "mmx.h"
+#endif
+
+/*!
+ 16 bit integer FIR descriptor. This defines the working state for a single
+ instance of an FIR filter using 16 bit integer coefficients.
+*/
+typedef struct {
int taps;
int curr_pos;
const int16_t *coeffs;
int16_t *history;
-};
+} fir16_state_t;
-/*
- * 32 bit integer FIR descriptor. This defines the working state for a single
- * instance of an FIR filter using 32 bit integer coefficients, and filtering
- * 16 bit integer data.
- */
-struct fir32_state_t {
+/*!
+ 32 bit integer FIR descriptor. This defines the working state for a single
+ instance of an FIR filter using 32 bit integer coefficients, and filtering
+ 16 bit integer data.
+*/
+typedef struct {
int taps;
int curr_pos;
const int32_t *coeffs;
int16_t *history;
-};
+} fir32_state_t;
-/*
- * Floating point FIR descriptor. This defines the working state for a single
- * instance of an FIR filter using floating point coefficients and data.
- */
-struct fir_float_state_t {
+/*!
+ Floating point FIR descriptor. This defines the working state for a single
+ instance of an FIR filter using floating point coefficients and data.
+*/
+typedef struct {
int taps;
int curr_pos;
const float *coeffs;
float *history;
-};
+} fir_float_state_t;
-static inline const int16_t *fir16_create(struct fir16_state_t *fir,
- const int16_t *coeffs, int taps)
+static __inline__ const int16_t *fir16_create(fir16_state_t * fir,
+ const int16_t * coeffs, int taps)
{
fir->taps = taps;
fir->curr_pos = taps - 1;
fir->coeffs = coeffs;
-#if defined(__bfin__)
+#if defined(USE_MMX) || defined(USE_SSE2) || defined(__bfin__)
fir->history = kcalloc(2 * taps, sizeof(int16_t), GFP_KERNEL);
#else
fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL);
@@ -102,16 +116,16 @@ static inline const int16_t *fir16_create(struct fir16_state_t *fir,
return fir->history;
}
-static inline void fir16_flush(struct fir16_state_t *fir)
+static __inline__ void fir16_flush(fir16_state_t * fir)
{
-#if defined(__bfin__)
+#if defined(USE_MMX) || defined(USE_SSE2) || defined(__bfin__)
memset(fir->history, 0, 2 * fir->taps * sizeof(int16_t));
#else
memset(fir->history, 0, fir->taps * sizeof(int16_t));
#endif
}
-static inline void fir16_free(struct fir16_state_t *fir)
+static __inline__ void fir16_free(fir16_state_t * fir)
{
kfree(fir->history);
}
@@ -134,19 +148,83 @@ static inline int32_t dot_asm(short *x, short *y, int len)
"A0 += R0.L*R1.L (IS);\n\t"
"R0 = A0;\n\t"
"%0 = R0;\n\t"
- : "=&d"(dot)
- : "a"(x), "a"(y), "a"(len)
- : "I0", "I1", "A1", "A0", "R0", "R1"
+ :"=&d"(dot)
+ :"a"(x), "a"(y), "a"(len)
+ :"I0", "I1", "A1", "A0", "R0", "R1"
);
return dot;
}
#endif
-static inline int16_t fir16(struct fir16_state_t *fir, int16_t sample)
+static __inline__ int16_t fir16(fir16_state_t * fir, int16_t sample)
{
int32_t y;
-#if defined(__bfin__)
+#if defined(USE_MMX)
+ int i;
+ mmx_t *mmx_coeffs;
+ mmx_t *mmx_hist;
+
+ fir->history[fir->curr_pos] = sample;
+ fir->history[fir->curr_pos + fir->taps] = sample;
+
+ mmx_coeffs = (mmx_t *) fir->coeffs;
+ mmx_hist = (mmx_t *) & fir->history[fir->curr_pos];
+ i = fir->taps;
+ pxor_r2r(mm4, mm4);
+ /* 8 samples per iteration, so the filter must be a multiple of 8 long. */
+ while (i > 0) {
+ movq_m2r(mmx_coeffs[0], mm0);
+ movq_m2r(mmx_coeffs[1], mm2);
+ movq_m2r(mmx_hist[0], mm1);
+ movq_m2r(mmx_hist[1], mm3);
+ mmx_coeffs += 2;
+ mmx_hist += 2;
+ pmaddwd_r2r(mm1, mm0);
+ pmaddwd_r2r(mm3, mm2);
+ paddd_r2r(mm0, mm4);
+ paddd_r2r(mm2, mm4);
+ i -= 8;
+ }
+ movq_r2r(mm4, mm0);
+ psrlq_i2r(32, mm0);
+ paddd_r2r(mm0, mm4);
+ movd_r2m(mm4, y);
+ emms();
+#elif defined(USE_SSE2)
+ int i;
+ xmm_t *xmm_coeffs;
+ xmm_t *xmm_hist;
+
+ fir->history[fir->curr_pos] = sample;
+ fir->history[fir->curr_pos + fir->taps] = sample;
+
+ xmm_coeffs = (xmm_t *) fir->coeffs;
+ xmm_hist = (xmm_t *) & fir->history[fir->curr_pos];
+ i = fir->taps;
+ pxor_r2r(xmm4, xmm4);
+ /* 16 samples per iteration, so the filter must be a multiple of 16 long. */
+ while (i > 0) {
+ movdqu_m2r(xmm_coeffs[0], xmm0);
+ movdqu_m2r(xmm_coeffs[1], xmm2);
+ movdqu_m2r(xmm_hist[0], xmm1);
+ movdqu_m2r(xmm_hist[1], xmm3);
+ xmm_coeffs += 2;
+ xmm_hist += 2;
+ pmaddwd_r2r(xmm1, xmm0);
+ pmaddwd_r2r(xmm3, xmm2);
+ paddd_r2r(xmm0, xmm4);
+ paddd_r2r(xmm2, xmm4);
+ i -= 16;
+ }
+ movdqa_r2r(xmm4, xmm0);
+ psrldq_i2r(8, xmm0);
+ paddd_r2r(xmm0, xmm4);
+ movdqa_r2r(xmm4, xmm0);
+ psrldq_i2r(4, xmm0);
+ paddd_r2r(xmm0, xmm4);
+ movd_r2m(xmm4, y);
+#elif defined(__bfin__)
fir->history[fir->curr_pos] = sample;
fir->history[fir->curr_pos + fir->taps] = sample;
y = dot_asm((int16_t *) fir->coeffs, &fir->history[fir->curr_pos],
@@ -172,8 +250,8 @@ static inline int16_t fir16(struct fir16_state_t *fir, int16_t sample)
return (int16_t) (y >> 15);
}
-static inline const int16_t *fir32_create(struct fir32_state_t *fir,
- const int32_t *coeffs, int taps)
+static __inline__ const int16_t *fir32_create(fir32_state_t * fir,
+ const int32_t * coeffs, int taps)
{
fir->taps = taps;
fir->curr_pos = taps - 1;
@@ -182,17 +260,17 @@ static inline const int16_t *fir32_create(struct fir32_state_t *fir,
return fir->history;
}
-static inline void fir32_flush(struct fir32_state_t *fir)
+static __inline__ void fir32_flush(fir32_state_t * fir)
{
memset(fir->history, 0, fir->taps * sizeof(int16_t));
}
-static inline void fir32_free(struct fir32_state_t *fir)
+static __inline__ void fir32_free(fir32_state_t * fir)
{
kfree(fir->history);
}
-static inline int16_t fir32(struct fir32_state_t *fir, int16_t sample)
+static __inline__ int16_t fir32(fir32_state_t * fir, int16_t sample)
{
int i;
int32_t y;
@@ -214,3 +292,4 @@ static inline int16_t fir32(struct fir32_state_t *fir, int16_t sample)
}
#endif
+/*- End of file ------------------------------------------------------------*/