summaryrefslogtreecommitdiff
path: root/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_mips.c
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_mips.c')
-rw-r--r--third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_mips.c1002
1 files changed, 1002 insertions, 0 deletions
diff --git a/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_mips.c b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_mips.c
new file mode 100644
index 00000000..d99be872
--- /dev/null
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/ns/nsx_core_mips.c
@@ -0,0 +1,1002 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <string.h>
+
+#include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h"
+#include "webrtc/modules/audio_processing/ns/nsx_core.h"
+
+static const int16_t kIndicatorTable[17] = {
+ 0, 2017, 3809, 5227, 6258, 6963, 7424, 7718,
+ 7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187
+};
+
+// Compute speech/noise probability
+// speech/noise probability is returned in: probSpeechFinal
+//snrLocPrior is the prior SNR for each frequency (in Q11)
+//snrLocPost is the post SNR for each frequency (in Q11)
+void WebRtcNsx_SpeechNoiseProb(NoiseSuppressionFixedC* inst,
+ uint16_t* nonSpeechProbFinal,
+ uint32_t* priorLocSnr,
+ uint32_t* postLocSnr) {
+ uint32_t tmpU32no1, tmpU32no2, tmpU32no3;
+ int32_t indPriorFX, tmp32no1;
+ int32_t logLrtTimeAvgKsumFX;
+ int16_t indPriorFX16;
+ int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac;
+ size_t i;
+ int normTmp, nShifts;
+
+ int32_t r0, r1, r2, r3, r4, r5, r6, r7, r8, r9;
+ int32_t const_max = 0x7fffffff;
+ int32_t const_neg43 = -43;
+ int32_t const_5412 = 5412;
+ int32_t const_11rsh12 = (11 << 12);
+ int32_t const_178 = 178;
+
+
+ // compute feature based on average LR factor
+ // this is the average over all frequencies of the smooth log LRT
+ logLrtTimeAvgKsumFX = 0;
+ for (i = 0; i < inst->magnLen; i++) {
+ r0 = postLocSnr[i]; // Q11
+ r1 = priorLocSnr[i];
+ r2 = inst->logLrtTimeAvgW32[i];
+
+ __asm __volatile(
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "clz %[r3], %[r0] \n\t"
+ "clz %[r5], %[r1] \n\t"
+ "slti %[r4], %[r3], 32 \n\t"
+ "slti %[r6], %[r5], 32 \n\t"
+ "movz %[r3], $0, %[r4] \n\t"
+ "movz %[r5], $0, %[r6] \n\t"
+ "slti %[r4], %[r3], 11 \n\t"
+ "addiu %[r6], %[r3], -11 \n\t"
+ "neg %[r7], %[r6] \n\t"
+ "sllv %[r6], %[r1], %[r6] \n\t"
+ "srav %[r7], %[r1], %[r7] \n\t"
+ "movn %[r6], %[r7], %[r4] \n\t"
+ "sllv %[r1], %[r1], %[r5] \n\t"
+ "and %[r1], %[r1], %[const_max] \n\t"
+ "sra %[r1], %[r1], 19 \n\t"
+ "mul %[r7], %[r1], %[r1] \n\t"
+ "sllv %[r3], %[r0], %[r3] \n\t"
+ "divu %[r8], %[r3], %[r6] \n\t"
+ "slti %[r6], %[r6], 1 \n\t"
+ "mul %[r7], %[r7], %[const_neg43] \n\t"
+ "sra %[r7], %[r7], 19 \n\t"
+ "movz %[r3], %[r8], %[r6] \n\t"
+ "subu %[r0], %[r0], %[r3] \n\t"
+ "movn %[r0], $0, %[r6] \n\t"
+ "mul %[r1], %[r1], %[const_5412] \n\t"
+ "sra %[r1], %[r1], 12 \n\t"
+ "addu %[r7], %[r7], %[r1] \n\t"
+ "addiu %[r1], %[r7], 37 \n\t"
+ "addiu %[r5], %[r5], -31 \n\t"
+ "neg %[r5], %[r5] \n\t"
+ "sll %[r5], %[r5], 12 \n\t"
+ "addu %[r5], %[r5], %[r1] \n\t"
+ "subu %[r7], %[r5], %[const_11rsh12] \n\t"
+ "mul %[r7], %[r7], %[const_178] \n\t"
+ "sra %[r7], %[r7], 8 \n\t"
+ "addu %[r7], %[r7], %[r2] \n\t"
+ "sra %[r7], %[r7], 1 \n\t"
+ "subu %[r2], %[r2], %[r7] \n\t"
+ "addu %[r2], %[r2], %[r0] \n\t"
+ ".set pop \n\t"
+ : [r0] "+r" (r0), [r1] "+r" (r1), [r2] "+r" (r2),
+ [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
+ [r6] "=&r" (r6), [r7] "=&r" (r7), [r8] "=&r" (r8)
+ : [const_max] "r" (const_max), [const_neg43] "r" (const_neg43),
+ [const_5412] "r" (const_5412), [const_11rsh12] "r" (const_11rsh12),
+ [const_178] "r" (const_178)
+ : "hi", "lo"
+ );
+ inst->logLrtTimeAvgW32[i] = r2;
+ logLrtTimeAvgKsumFX += r2;
+ }
+
+ inst->featureLogLrt = (logLrtTimeAvgKsumFX * BIN_SIZE_LRT) >>
+ (inst->stages + 11);
+
+ // done with computation of LR factor
+
+ //
+ // compute the indicator functions
+ //
+
+ // average LRT feature
+ // FLOAT code
+ // indicator0 = 0.5 * (tanh(widthPrior *
+ // (logLrtTimeAvgKsum - threshPrior0)) + 1.0);
+ tmpIndFX = 16384; // Q14(1.0)
+ tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12
+ nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5;
+ //use larger width in tanh map for pause regions
+ if (tmp32no1 < 0) {
+ tmpIndFX = 0;
+ tmp32no1 = -tmp32no1;
+ //widthPrior = widthPrior * 2.0;
+ nShifts++;
+ }
+ tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14
+ // compute indicator function: sigmoid map
+ tableIndex = (int16_t)(tmp32no1 >> 14);
+ if ((tableIndex < 16) && (tableIndex >= 0)) {
+ tmp16no2 = kIndicatorTable[tableIndex];
+ tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
+ frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14
+ tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14);
+ if (tmpIndFX == 0) {
+ tmpIndFX = 8192 - tmp16no2; // Q14
+ } else {
+ tmpIndFX = 8192 + tmp16no2; // Q14
+ }
+ }
+ indPriorFX = inst->weightLogLrt * tmpIndFX; // 6*Q14
+
+ //spectral flatness feature
+ if (inst->weightSpecFlat) {
+ tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10
+ tmpIndFX = 16384; // Q14(1.0)
+ //use larger width in tanh map for pause regions
+ tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10
+ nShifts = 4;
+ if (inst->thresholdSpecFlat < tmpU32no1) {
+ tmpIndFX = 0;
+ tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat;
+ //widthPrior = widthPrior * 2.0;
+ nShifts++;
+ }
+ tmpU32no1 = WebRtcSpl_DivU32U16(tmpU32no2 << nShifts, 25); //Q14
+ // compute indicator function: sigmoid map
+ // FLOAT code
+ // indicator1 = 0.5 * (tanh(sgnMap * widthPrior *
+ // (threshPrior1 - tmpFloat1)) + 1.0);
+ tableIndex = (int16_t)(tmpU32no1 >> 14);
+ if (tableIndex < 16) {
+ tmp16no2 = kIndicatorTable[tableIndex];
+ tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
+ frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
+ tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14);
+ if (tmpIndFX) {
+ tmpIndFX = 8192 + tmp16no2; // Q14
+ } else {
+ tmpIndFX = 8192 - tmp16no2; // Q14
+ }
+ }
+ indPriorFX += inst->weightSpecFlat * tmpIndFX; // 6*Q14
+ }
+
+ //for template spectral-difference
+ if (inst->weightSpecDiff) {
+ tmpU32no1 = 0;
+ if (inst->featureSpecDiff) {
+ normTmp = WEBRTC_SPL_MIN(20 - inst->stages,
+ WebRtcSpl_NormU32(inst->featureSpecDiff));
+ assert(normTmp >= 0);
+ tmpU32no1 = inst->featureSpecDiff << normTmp; // Q(normTmp-2*stages)
+ tmpU32no2 = inst->timeAvgMagnEnergy >> (20 - inst->stages - normTmp);
+ if (tmpU32no2 > 0) {
+ // Q(20 - inst->stages)
+ tmpU32no1 /= tmpU32no2;
+ } else {
+ tmpU32no1 = (uint32_t)(0x7fffffff);
+ }
+ }
+ tmpU32no3 = (inst->thresholdSpecDiff << 17) / 25;
+ tmpU32no2 = tmpU32no1 - tmpU32no3;
+ nShifts = 1;
+ tmpIndFX = 16384; // Q14(1.0)
+ //use larger width in tanh map for pause regions
+ if (tmpU32no2 & 0x80000000) {
+ tmpIndFX = 0;
+ tmpU32no2 = tmpU32no3 - tmpU32no1;
+ //widthPrior = widthPrior * 2.0;
+ nShifts--;
+ }
+ tmpU32no1 = tmpU32no2 >> nShifts;
+ // compute indicator function: sigmoid map
+ /* FLOAT code
+ indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0);
+ */
+ tableIndex = (int16_t)(tmpU32no1 >> 14);
+ if (tableIndex < 16) {
+ tmp16no2 = kIndicatorTable[tableIndex];
+ tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
+ frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
+ tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
+ tmp16no1, frac, 14);
+ if (tmpIndFX) {
+ tmpIndFX = 8192 + tmp16no2;
+ } else {
+ tmpIndFX = 8192 - tmp16no2;
+ }
+ }
+ indPriorFX += inst->weightSpecDiff * tmpIndFX; // 6*Q14
+ }
+
+ //combine the indicator function with the feature weights
+ // FLOAT code
+ // indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 *
+ // indicator1 + weightIndPrior2 * indicator2);
+ indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14
+ // done with computing indicator function
+
+ //compute the prior probability
+ // FLOAT code
+ // inst->priorNonSpeechProb += PRIOR_UPDATE *
+ // (indPriorNonSpeech - inst->priorNonSpeechProb);
+ tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14
+ inst->priorNonSpeechProb += (int16_t)((PRIOR_UPDATE_Q14 * tmp16) >> 14);
+
+ //final speech probability: combine prior model with LR factor:
+
+ memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen);
+
+ if (inst->priorNonSpeechProb > 0) {
+ r0 = inst->priorNonSpeechProb;
+ r1 = 16384 - r0;
+ int32_t const_23637 = 23637;
+ int32_t const_44 = 44;
+ int32_t const_84 = 84;
+ int32_t const_1 = 1;
+ int32_t const_neg8 = -8;
+ for (i = 0; i < inst->magnLen; i++) {
+ r2 = inst->logLrtTimeAvgW32[i];
+ if (r2 < 65300) {
+ __asm __volatile(
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "mul %[r2], %[r2], %[const_23637] \n\t"
+ "sll %[r6], %[r1], 16 \n\t"
+ "clz %[r7], %[r6] \n\t"
+ "clo %[r8], %[r6] \n\t"
+ "slt %[r9], %[r6], $0 \n\t"
+ "movn %[r7], %[r8], %[r9] \n\t"
+ "sra %[r2], %[r2], 14 \n\t"
+ "andi %[r3], %[r2], 0xfff \n\t"
+ "mul %[r4], %[r3], %[r3] \n\t"
+ "mul %[r3], %[r3], %[const_84] \n\t"
+ "sra %[r2], %[r2], 12 \n\t"
+ "slt %[r5], %[r2], %[const_neg8] \n\t"
+ "movn %[r2], %[const_neg8], %[r5] \n\t"
+ "mul %[r4], %[r4], %[const_44] \n\t"
+ "sra %[r3], %[r3], 7 \n\t"
+ "addiu %[r7], %[r7], -1 \n\t"
+ "slti %[r9], %[r7], 31 \n\t"
+ "movz %[r7], $0, %[r9] \n\t"
+ "sra %[r4], %[r4], 19 \n\t"
+ "addu %[r4], %[r4], %[r3] \n\t"
+ "addiu %[r3], %[r2], 8 \n\t"
+ "addiu %[r2], %[r2], -4 \n\t"
+ "neg %[r5], %[r2] \n\t"
+ "sllv %[r6], %[r4], %[r2] \n\t"
+ "srav %[r5], %[r4], %[r5] \n\t"
+ "slt %[r2], %[r2], $0 \n\t"
+ "movn %[r6], %[r5], %[r2] \n\t"
+ "sllv %[r3], %[const_1], %[r3] \n\t"
+ "addu %[r2], %[r3], %[r6] \n\t"
+ "clz %[r4], %[r2] \n\t"
+ "clo %[r5], %[r2] \n\t"
+ "slt %[r8], %[r2], $0 \n\t"
+ "movn %[r4], %[r5], %[r8] \n\t"
+ "addiu %[r4], %[r4], -1 \n\t"
+ "slt %[r5], $0, %[r2] \n\t"
+ "or %[r5], %[r5], %[r7] \n\t"
+ "movz %[r4], $0, %[r5] \n\t"
+ "addiu %[r6], %[r7], -7 \n\t"
+ "addu %[r6], %[r6], %[r4] \n\t"
+ "bltz %[r6], 1f \n\t"
+ " nop \n\t"
+ "addiu %[r4], %[r6], -8 \n\t"
+ "neg %[r3], %[r4] \n\t"
+ "srav %[r5], %[r2], %[r3] \n\t"
+ "mul %[r5], %[r5], %[r1] \n\t"
+ "mul %[r2], %[r2], %[r1] \n\t"
+ "slt %[r4], %[r4], $0 \n\t"
+ "srav %[r5], %[r5], %[r6] \n\t"
+ "sra %[r2], %[r2], 8 \n\t"
+ "movn %[r2], %[r5], %[r4] \n\t"
+ "sll %[r3], %[r0], 8 \n\t"
+ "addu %[r2], %[r0], %[r2] \n\t"
+ "divu %[r3], %[r3], %[r2] \n\t"
+ "1: \n\t"
+ ".set pop \n\t"
+ : [r2] "+r" (r2), [r3] "=&r" (r3), [r4] "=&r" (r4),
+ [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7),
+ [r8] "=&r" (r8), [r9] "=&r" (r9)
+ : [r0] "r" (r0), [r1] "r" (r1), [const_23637] "r" (const_23637),
+ [const_neg8] "r" (const_neg8), [const_84] "r" (const_84),
+ [const_1] "r" (const_1), [const_44] "r" (const_44)
+ : "hi", "lo"
+ );
+ nonSpeechProbFinal[i] = r3;
+ }
+ }
+ }
+}
+
+// Update analysis buffer for lower band, and window data before FFT.
+void WebRtcNsx_AnalysisUpdate_mips(NoiseSuppressionFixedC* inst,
+ int16_t* out,
+ int16_t* new_speech) {
+ int iters, after;
+ int anaLen = (int)inst->anaLen;
+ int *window = (int*)inst->window;
+ int *anaBuf = (int*)inst->analysisBuffer;
+ int *outBuf = (int*)out;
+ int r0, r1, r2, r3, r4, r5, r6, r7;
+#if defined(MIPS_DSP_R1_LE)
+ int r8;
+#endif
+
+ // For lower band update analysis buffer.
+ memcpy(inst->analysisBuffer, inst->analysisBuffer + inst->blockLen10ms,
+ (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->analysisBuffer));
+ memcpy(inst->analysisBuffer + inst->anaLen - inst->blockLen10ms, new_speech,
+ inst->blockLen10ms * sizeof(*inst->analysisBuffer));
+
+ // Window data before FFT.
+#if defined(MIPS_DSP_R1_LE)
+ __asm __volatile(
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "sra %[iters], %[anaLen], 3 \n\t"
+ "1: \n\t"
+ "blez %[iters], 2f \n\t"
+ " nop \n\t"
+ "lw %[r0], 0(%[window]) \n\t"
+ "lw %[r1], 0(%[anaBuf]) \n\t"
+ "lw %[r2], 4(%[window]) \n\t"
+ "lw %[r3], 4(%[anaBuf]) \n\t"
+ "lw %[r4], 8(%[window]) \n\t"
+ "lw %[r5], 8(%[anaBuf]) \n\t"
+ "lw %[r6], 12(%[window]) \n\t"
+ "lw %[r7], 12(%[anaBuf]) \n\t"
+ "muleq_s.w.phl %[r8], %[r0], %[r1] \n\t"
+ "muleq_s.w.phr %[r0], %[r0], %[r1] \n\t"
+ "muleq_s.w.phl %[r1], %[r2], %[r3] \n\t"
+ "muleq_s.w.phr %[r2], %[r2], %[r3] \n\t"
+ "muleq_s.w.phl %[r3], %[r4], %[r5] \n\t"
+ "muleq_s.w.phr %[r4], %[r4], %[r5] \n\t"
+ "muleq_s.w.phl %[r5], %[r6], %[r7] \n\t"
+ "muleq_s.w.phr %[r6], %[r6], %[r7] \n\t"
+#if defined(MIPS_DSP_R2_LE)
+ "precr_sra_r.ph.w %[r8], %[r0], 15 \n\t"
+ "precr_sra_r.ph.w %[r1], %[r2], 15 \n\t"
+ "precr_sra_r.ph.w %[r3], %[r4], 15 \n\t"
+ "precr_sra_r.ph.w %[r5], %[r6], 15 \n\t"
+ "sw %[r8], 0(%[outBuf]) \n\t"
+ "sw %[r1], 4(%[outBuf]) \n\t"
+ "sw %[r3], 8(%[outBuf]) \n\t"
+ "sw %[r5], 12(%[outBuf]) \n\t"
+#else
+ "shra_r.w %[r8], %[r8], 15 \n\t"
+ "shra_r.w %[r0], %[r0], 15 \n\t"
+ "shra_r.w %[r1], %[r1], 15 \n\t"
+ "shra_r.w %[r2], %[r2], 15 \n\t"
+ "shra_r.w %[r3], %[r3], 15 \n\t"
+ "shra_r.w %[r4], %[r4], 15 \n\t"
+ "shra_r.w %[r5], %[r5], 15 \n\t"
+ "shra_r.w %[r6], %[r6], 15 \n\t"
+ "sll %[r0], %[r0], 16 \n\t"
+ "sll %[r2], %[r2], 16 \n\t"
+ "sll %[r4], %[r4], 16 \n\t"
+ "sll %[r6], %[r6], 16 \n\t"
+ "packrl.ph %[r0], %[r8], %[r0] \n\t"
+ "packrl.ph %[r2], %[r1], %[r2] \n\t"
+ "packrl.ph %[r4], %[r3], %[r4] \n\t"
+ "packrl.ph %[r6], %[r5], %[r6] \n\t"
+ "sw %[r0], 0(%[outBuf]) \n\t"
+ "sw %[r2], 4(%[outBuf]) \n\t"
+ "sw %[r4], 8(%[outBuf]) \n\t"
+ "sw %[r6], 12(%[outBuf]) \n\t"
+#endif
+ "addiu %[window], %[window], 16 \n\t"
+ "addiu %[anaBuf], %[anaBuf], 16 \n\t"
+ "addiu %[outBuf], %[outBuf], 16 \n\t"
+ "b 1b \n\t"
+ " addiu %[iters], %[iters], -1 \n\t"
+ "2: \n\t"
+ "andi %[after], %[anaLen], 7 \n\t"
+ "3: \n\t"
+ "blez %[after], 4f \n\t"
+ " nop \n\t"
+ "lh %[r0], 0(%[window]) \n\t"
+ "lh %[r1], 0(%[anaBuf]) \n\t"
+ "mul %[r0], %[r0], %[r1] \n\t"
+ "addiu %[window], %[window], 2 \n\t"
+ "addiu %[anaBuf], %[anaBuf], 2 \n\t"
+ "addiu %[outBuf], %[outBuf], 2 \n\t"
+ "shra_r.w %[r0], %[r0], 14 \n\t"
+ "sh %[r0], -2(%[outBuf]) \n\t"
+ "b 3b \n\t"
+ " addiu %[after], %[after], -1 \n\t"
+ "4: \n\t"
+ ".set pop \n\t"
+ : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
+ [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
+ [r6] "=&r" (r6), [r7] "=&r" (r7), [r8] "=&r" (r8),
+ [iters] "=&r" (iters), [after] "=&r" (after),
+ [window] "+r" (window),[anaBuf] "+r" (anaBuf),
+ [outBuf] "+r" (outBuf)
+ : [anaLen] "r" (anaLen)
+ : "memory", "hi", "lo"
+ );
+#else
+ __asm __volatile(
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "sra %[iters], %[anaLen], 2 \n\t"
+ "1: \n\t"
+ "blez %[iters], 2f \n\t"
+ " nop \n\t"
+ "lh %[r0], 0(%[window]) \n\t"
+ "lh %[r1], 0(%[anaBuf]) \n\t"
+ "lh %[r2], 2(%[window]) \n\t"
+ "lh %[r3], 2(%[anaBuf]) \n\t"
+ "lh %[r4], 4(%[window]) \n\t"
+ "lh %[r5], 4(%[anaBuf]) \n\t"
+ "lh %[r6], 6(%[window]) \n\t"
+ "lh %[r7], 6(%[anaBuf]) \n\t"
+ "mul %[r0], %[r0], %[r1] \n\t"
+ "mul %[r2], %[r2], %[r3] \n\t"
+ "mul %[r4], %[r4], %[r5] \n\t"
+ "mul %[r6], %[r6], %[r7] \n\t"
+ "addiu %[window], %[window], 8 \n\t"
+ "addiu %[anaBuf], %[anaBuf], 8 \n\t"
+ "addiu %[r0], %[r0], 0x2000 \n\t"
+ "addiu %[r2], %[r2], 0x2000 \n\t"
+ "addiu %[r4], %[r4], 0x2000 \n\t"
+ "addiu %[r6], %[r6], 0x2000 \n\t"
+ "sra %[r0], %[r0], 14 \n\t"
+ "sra %[r2], %[r2], 14 \n\t"
+ "sra %[r4], %[r4], 14 \n\t"
+ "sra %[r6], %[r6], 14 \n\t"
+ "sh %[r0], 0(%[outBuf]) \n\t"
+ "sh %[r2], 2(%[outBuf]) \n\t"
+ "sh %[r4], 4(%[outBuf]) \n\t"
+ "sh %[r6], 6(%[outBuf]) \n\t"
+ "addiu %[outBuf], %[outBuf], 8 \n\t"
+ "b 1b \n\t"
+ " addiu %[iters], %[iters], -1 \n\t"
+ "2: \n\t"
+ "andi %[after], %[anaLen], 3 \n\t"
+ "3: \n\t"
+ "blez %[after], 4f \n\t"
+ " nop \n\t"
+ "lh %[r0], 0(%[window]) \n\t"
+ "lh %[r1], 0(%[anaBuf]) \n\t"
+ "mul %[r0], %[r0], %[r1] \n\t"
+ "addiu %[window], %[window], 2 \n\t"
+ "addiu %[anaBuf], %[anaBuf], 2 \n\t"
+ "addiu %[outBuf], %[outBuf], 2 \n\t"
+ "addiu %[r0], %[r0], 0x2000 \n\t"
+ "sra %[r0], %[r0], 14 \n\t"
+ "sh %[r0], -2(%[outBuf]) \n\t"
+ "b 3b \n\t"
+ " addiu %[after], %[after], -1 \n\t"
+ "4: \n\t"
+ ".set pop \n\t"
+ : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
+ [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
+ [r6] "=&r" (r6), [r7] "=&r" (r7), [iters] "=&r" (iters),
+ [after] "=&r" (after), [window] "+r" (window),
+ [anaBuf] "+r" (anaBuf), [outBuf] "+r" (outBuf)
+ : [anaLen] "r" (anaLen)
+ : "memory", "hi", "lo"
+ );
+#endif
+}
+
+// For the noise supression process, synthesis, read out fully processed
+// segment, and update synthesis buffer.
+void WebRtcNsx_SynthesisUpdate_mips(NoiseSuppressionFixedC* inst,
+ int16_t* out_frame,
+ int16_t gain_factor) {
+ int iters = (int)inst->blockLen10ms >> 2;
+ int after = inst->blockLen10ms & 3;
+ int r0, r1, r2, r3, r4, r5, r6, r7;
+ int16_t *window = (int16_t*)inst->window;
+ int16_t *real = inst->real;
+ int16_t *synthBuf = inst->synthesisBuffer;
+ int16_t *out = out_frame;
+ int sat_pos = 0x7fff;
+ int sat_neg = 0xffff8000;
+ int block10 = (int)inst->blockLen10ms;
+ int anaLen = (int)inst->anaLen;
+
+ __asm __volatile(
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "1: \n\t"
+ "blez %[iters], 2f \n\t"
+ " nop \n\t"
+ "lh %[r0], 0(%[window]) \n\t"
+ "lh %[r1], 0(%[real]) \n\t"
+ "lh %[r2], 2(%[window]) \n\t"
+ "lh %[r3], 2(%[real]) \n\t"
+ "lh %[r4], 4(%[window]) \n\t"
+ "lh %[r5], 4(%[real]) \n\t"
+ "lh %[r6], 6(%[window]) \n\t"
+ "lh %[r7], 6(%[real]) \n\t"
+ "mul %[r0], %[r0], %[r1] \n\t"
+ "mul %[r2], %[r2], %[r3] \n\t"
+ "mul %[r4], %[r4], %[r5] \n\t"
+ "mul %[r6], %[r6], %[r7] \n\t"
+ "addiu %[r0], %[r0], 0x2000 \n\t"
+ "addiu %[r2], %[r2], 0x2000 \n\t"
+ "addiu %[r4], %[r4], 0x2000 \n\t"
+ "addiu %[r6], %[r6], 0x2000 \n\t"
+ "sra %[r0], %[r0], 14 \n\t"
+ "sra %[r2], %[r2], 14 \n\t"
+ "sra %[r4], %[r4], 14 \n\t"
+ "sra %[r6], %[r6], 14 \n\t"
+ "mul %[r0], %[r0], %[gain_factor] \n\t"
+ "mul %[r2], %[r2], %[gain_factor] \n\t"
+ "mul %[r4], %[r4], %[gain_factor] \n\t"
+ "mul %[r6], %[r6], %[gain_factor] \n\t"
+ "addiu %[r0], %[r0], 0x1000 \n\t"
+ "addiu %[r2], %[r2], 0x1000 \n\t"
+ "addiu %[r4], %[r4], 0x1000 \n\t"
+ "addiu %[r6], %[r6], 0x1000 \n\t"
+ "sra %[r0], %[r0], 13 \n\t"
+ "sra %[r2], %[r2], 13 \n\t"
+ "sra %[r4], %[r4], 13 \n\t"
+ "sra %[r6], %[r6], 13 \n\t"
+ "slt %[r1], %[r0], %[sat_pos] \n\t"
+ "slt %[r3], %[r2], %[sat_pos] \n\t"
+ "slt %[r5], %[r4], %[sat_pos] \n\t"
+ "slt %[r7], %[r6], %[sat_pos] \n\t"
+ "movz %[r0], %[sat_pos], %[r1] \n\t"
+ "movz %[r2], %[sat_pos], %[r3] \n\t"
+ "movz %[r4], %[sat_pos], %[r5] \n\t"
+ "movz %[r6], %[sat_pos], %[r7] \n\t"
+ "lh %[r1], 0(%[synthBuf]) \n\t"
+ "lh %[r3], 2(%[synthBuf]) \n\t"
+ "lh %[r5], 4(%[synthBuf]) \n\t"
+ "lh %[r7], 6(%[synthBuf]) \n\t"
+ "addu %[r0], %[r0], %[r1] \n\t"
+ "addu %[r2], %[r2], %[r3] \n\t"
+ "addu %[r4], %[r4], %[r5] \n\t"
+ "addu %[r6], %[r6], %[r7] \n\t"
+ "slt %[r1], %[r0], %[sat_pos] \n\t"
+ "slt %[r3], %[r2], %[sat_pos] \n\t"
+ "slt %[r5], %[r4], %[sat_pos] \n\t"
+ "slt %[r7], %[r6], %[sat_pos] \n\t"
+ "movz %[r0], %[sat_pos], %[r1] \n\t"
+ "movz %[r2], %[sat_pos], %[r3] \n\t"
+ "movz %[r4], %[sat_pos], %[r5] \n\t"
+ "movz %[r6], %[sat_pos], %[r7] \n\t"
+ "slt %[r1], %[r0], %[sat_neg] \n\t"
+ "slt %[r3], %[r2], %[sat_neg] \n\t"
+ "slt %[r5], %[r4], %[sat_neg] \n\t"
+ "slt %[r7], %[r6], %[sat_neg] \n\t"
+ "movn %[r0], %[sat_neg], %[r1] \n\t"
+ "movn %[r2], %[sat_neg], %[r3] \n\t"
+ "movn %[r4], %[sat_neg], %[r5] \n\t"
+ "movn %[r6], %[sat_neg], %[r7] \n\t"
+ "sh %[r0], 0(%[synthBuf]) \n\t"
+ "sh %[r2], 2(%[synthBuf]) \n\t"
+ "sh %[r4], 4(%[synthBuf]) \n\t"
+ "sh %[r6], 6(%[synthBuf]) \n\t"
+ "sh %[r0], 0(%[out]) \n\t"
+ "sh %[r2], 2(%[out]) \n\t"
+ "sh %[r4], 4(%[out]) \n\t"
+ "sh %[r6], 6(%[out]) \n\t"
+ "addiu %[window], %[window], 8 \n\t"
+ "addiu %[real], %[real], 8 \n\t"
+ "addiu %[synthBuf],%[synthBuf], 8 \n\t"
+ "addiu %[out], %[out], 8 \n\t"
+ "b 1b \n\t"
+ " addiu %[iters], %[iters], -1 \n\t"
+ "2: \n\t"
+ "blez %[after], 3f \n\t"
+ " subu %[block10], %[anaLen], %[block10] \n\t"
+ "lh %[r0], 0(%[window]) \n\t"
+ "lh %[r1], 0(%[real]) \n\t"
+ "mul %[r0], %[r0], %[r1] \n\t"
+ "addiu %[window], %[window], 2 \n\t"
+ "addiu %[real], %[real], 2 \n\t"
+ "addiu %[r0], %[r0], 0x2000 \n\t"
+ "sra %[r0], %[r0], 14 \n\t"
+ "mul %[r0], %[r0], %[gain_factor] \n\t"
+ "addiu %[r0], %[r0], 0x1000 \n\t"
+ "sra %[r0], %[r0], 13 \n\t"
+ "slt %[r1], %[r0], %[sat_pos] \n\t"
+ "movz %[r0], %[sat_pos], %[r1] \n\t"
+ "lh %[r1], 0(%[synthBuf]) \n\t"
+ "addu %[r0], %[r0], %[r1] \n\t"
+ "slt %[r1], %[r0], %[sat_pos] \n\t"
+ "movz %[r0], %[sat_pos], %[r1] \n\t"
+ "slt %[r1], %[r0], %[sat_neg] \n\t"
+ "movn %[r0], %[sat_neg], %[r1] \n\t"
+ "sh %[r0], 0(%[synthBuf]) \n\t"
+ "sh %[r0], 0(%[out]) \n\t"
+ "addiu %[synthBuf],%[synthBuf], 2 \n\t"
+ "addiu %[out], %[out], 2 \n\t"
+ "b 2b \n\t"
+ " addiu %[after], %[after], -1 \n\t"
+ "3: \n\t"
+ "sra %[iters], %[block10], 2 \n\t"
+ "4: \n\t"
+ "blez %[iters], 5f \n\t"
+ " andi %[after], %[block10], 3 \n\t"
+ "lh %[r0], 0(%[window]) \n\t"
+ "lh %[r1], 0(%[real]) \n\t"
+ "lh %[r2], 2(%[window]) \n\t"
+ "lh %[r3], 2(%[real]) \n\t"
+ "lh %[r4], 4(%[window]) \n\t"
+ "lh %[r5], 4(%[real]) \n\t"
+ "lh %[r6], 6(%[window]) \n\t"
+ "lh %[r7], 6(%[real]) \n\t"
+ "mul %[r0], %[r0], %[r1] \n\t"
+ "mul %[r2], %[r2], %[r3] \n\t"
+ "mul %[r4], %[r4], %[r5] \n\t"
+ "mul %[r6], %[r6], %[r7] \n\t"
+ "addiu %[r0], %[r0], 0x2000 \n\t"
+ "addiu %[r2], %[r2], 0x2000 \n\t"
+ "addiu %[r4], %[r4], 0x2000 \n\t"
+ "addiu %[r6], %[r6], 0x2000 \n\t"
+ "sra %[r0], %[r0], 14 \n\t"
+ "sra %[r2], %[r2], 14 \n\t"
+ "sra %[r4], %[r4], 14 \n\t"
+ "sra %[r6], %[r6], 14 \n\t"
+ "mul %[r0], %[r0], %[gain_factor] \n\t"
+ "mul %[r2], %[r2], %[gain_factor] \n\t"
+ "mul %[r4], %[r4], %[gain_factor] \n\t"
+ "mul %[r6], %[r6], %[gain_factor] \n\t"
+ "addiu %[r0], %[r0], 0x1000 \n\t"
+ "addiu %[r2], %[r2], 0x1000 \n\t"
+ "addiu %[r4], %[r4], 0x1000 \n\t"
+ "addiu %[r6], %[r6], 0x1000 \n\t"
+ "sra %[r0], %[r0], 13 \n\t"
+ "sra %[r2], %[r2], 13 \n\t"
+ "sra %[r4], %[r4], 13 \n\t"
+ "sra %[r6], %[r6], 13 \n\t"
+ "slt %[r1], %[r0], %[sat_pos] \n\t"
+ "slt %[r3], %[r2], %[sat_pos] \n\t"
+ "slt %[r5], %[r4], %[sat_pos] \n\t"
+ "slt %[r7], %[r6], %[sat_pos] \n\t"
+ "movz %[r0], %[sat_pos], %[r1] \n\t"
+ "movz %[r2], %[sat_pos], %[r3] \n\t"
+ "movz %[r4], %[sat_pos], %[r5] \n\t"
+ "movz %[r6], %[sat_pos], %[r7] \n\t"
+ "lh %[r1], 0(%[synthBuf]) \n\t"
+ "lh %[r3], 2(%[synthBuf]) \n\t"
+ "lh %[r5], 4(%[synthBuf]) \n\t"
+ "lh %[r7], 6(%[synthBuf]) \n\t"
+ "addu %[r0], %[r0], %[r1] \n\t"
+ "addu %[r2], %[r2], %[r3] \n\t"
+ "addu %[r4], %[r4], %[r5] \n\t"
+ "addu %[r6], %[r6], %[r7] \n\t"
+ "slt %[r1], %[r0], %[sat_pos] \n\t"
+ "slt %[r3], %[r2], %[sat_pos] \n\t"
+ "slt %[r5], %[r4], %[sat_pos] \n\t"
+ "slt %[r7], %[r6], %[sat_pos] \n\t"
+ "movz %[r0], %[sat_pos], %[r1] \n\t"
+ "movz %[r2], %[sat_pos], %[r3] \n\t"
+ "movz %[r4], %[sat_pos], %[r5] \n\t"
+ "movz %[r6], %[sat_pos], %[r7] \n\t"
+ "slt %[r1], %[r0], %[sat_neg] \n\t"
+ "slt %[r3], %[r2], %[sat_neg] \n\t"
+ "slt %[r5], %[r4], %[sat_neg] \n\t"
+ "slt %[r7], %[r6], %[sat_neg] \n\t"
+ "movn %[r0], %[sat_neg], %[r1] \n\t"
+ "movn %[r2], %[sat_neg], %[r3] \n\t"
+ "movn %[r4], %[sat_neg], %[r5] \n\t"
+ "movn %[r6], %[sat_neg], %[r7] \n\t"
+ "sh %[r0], 0(%[synthBuf]) \n\t"
+ "sh %[r2], 2(%[synthBuf]) \n\t"
+ "sh %[r4], 4(%[synthBuf]) \n\t"
+ "sh %[r6], 6(%[synthBuf]) \n\t"
+ "addiu %[window], %[window], 8 \n\t"
+ "addiu %[real], %[real], 8 \n\t"
+ "addiu %[synthBuf],%[synthBuf], 8 \n\t"
+ "b 4b \n\t"
+ " addiu %[iters], %[iters], -1 \n\t"
+ "5: \n\t"
+ "blez %[after], 6f \n\t"
+ " nop \n\t"
+ "lh %[r0], 0(%[window]) \n\t"
+ "lh %[r1], 0(%[real]) \n\t"
+ "mul %[r0], %[r0], %[r1] \n\t"
+ "addiu %[window], %[window], 2 \n\t"
+ "addiu %[real], %[real], 2 \n\t"
+ "addiu %[r0], %[r0], 0x2000 \n\t"
+ "sra %[r0], %[r0], 14 \n\t"
+ "mul %[r0], %[r0], %[gain_factor] \n\t"
+ "addiu %[r0], %[r0], 0x1000 \n\t"
+ "sra %[r0], %[r0], 13 \n\t"
+ "slt %[r1], %[r0], %[sat_pos] \n\t"
+ "movz %[r0], %[sat_pos], %[r1] \n\t"
+ "lh %[r1], 0(%[synthBuf]) \n\t"
+ "addu %[r0], %[r0], %[r1] \n\t"
+ "slt %[r1], %[r0], %[sat_pos] \n\t"
+ "movz %[r0], %[sat_pos], %[r1] \n\t"
+ "slt %[r1], %[r0], %[sat_neg] \n\t"
+ "movn %[r0], %[sat_neg], %[r1] \n\t"
+ "sh %[r0], 0(%[synthBuf]) \n\t"
+ "addiu %[synthBuf],%[synthBuf], 2 \n\t"
+ "b 2b \n\t"
+ " addiu %[after], %[after], -1 \n\t"
+ "6: \n\t"
+ ".set pop \n\t"
+ : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
+ [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
+ [r6] "=&r" (r6), [r7] "=&r" (r7), [iters] "+r" (iters),
+ [after] "+r" (after), [block10] "+r" (block10),
+ [window] "+r" (window), [real] "+r" (real),
+ [synthBuf] "+r" (synthBuf), [out] "+r" (out)
+ : [gain_factor] "r" (gain_factor), [sat_pos] "r" (sat_pos),
+ [sat_neg] "r" (sat_neg), [anaLen] "r" (anaLen)
+ : "memory", "hi", "lo"
+ );
+
+ // update synthesis buffer
+ memcpy(inst->synthesisBuffer, inst->synthesisBuffer + inst->blockLen10ms,
+ (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->synthesisBuffer));
+ WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer
+ + inst->anaLen - inst->blockLen10ms, inst->blockLen10ms);
+}
+
+// Filter the data in the frequency domain, and create spectrum.
+void WebRtcNsx_PrepareSpectrum_mips(NoiseSuppressionFixedC* inst,
+ int16_t* freq_buf) {
+ uint16_t *noiseSupFilter = inst->noiseSupFilter;
+ int16_t *real = inst->real;
+ int16_t *imag = inst->imag;
+ int32_t loop_count = 2;
+ int16_t tmp_1, tmp_2, tmp_3, tmp_4, tmp_5, tmp_6;
+ int16_t tmp16 = (int16_t)(inst->anaLen << 1) - 4;
+ int16_t* freq_buf_f = freq_buf;
+ int16_t* freq_buf_s = &freq_buf[tmp16];
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ //first sample
+ "lh %[tmp_1], 0(%[noiseSupFilter]) \n\t"
+ "lh %[tmp_2], 0(%[real]) \n\t"
+ "lh %[tmp_3], 0(%[imag]) \n\t"
+ "mul %[tmp_2], %[tmp_2], %[tmp_1] \n\t"
+ "mul %[tmp_3], %[tmp_3], %[tmp_1] \n\t"
+ "sra %[tmp_2], %[tmp_2], 14 \n\t"
+ "sra %[tmp_3], %[tmp_3], 14 \n\t"
+ "sh %[tmp_2], 0(%[real]) \n\t"
+ "sh %[tmp_3], 0(%[imag]) \n\t"
+ "negu %[tmp_3], %[tmp_3] \n\t"
+ "sh %[tmp_2], 0(%[freq_buf_f]) \n\t"
+ "sh %[tmp_3], 2(%[freq_buf_f]) \n\t"
+ "addiu %[real], %[real], 2 \n\t"
+ "addiu %[imag], %[imag], 2 \n\t"
+ "addiu %[noiseSupFilter], %[noiseSupFilter], 2 \n\t"
+ "addiu %[freq_buf_f], %[freq_buf_f], 4 \n\t"
+ "1: \n\t"
+ "lh %[tmp_1], 0(%[noiseSupFilter]) \n\t"
+ "lh %[tmp_2], 0(%[real]) \n\t"
+ "lh %[tmp_3], 0(%[imag]) \n\t"
+ "lh %[tmp_4], 2(%[noiseSupFilter]) \n\t"
+ "lh %[tmp_5], 2(%[real]) \n\t"
+ "lh %[tmp_6], 2(%[imag]) \n\t"
+ "mul %[tmp_2], %[tmp_2], %[tmp_1] \n\t"
+ "mul %[tmp_3], %[tmp_3], %[tmp_1] \n\t"
+ "mul %[tmp_5], %[tmp_5], %[tmp_4] \n\t"
+ "mul %[tmp_6], %[tmp_6], %[tmp_4] \n\t"
+ "addiu %[loop_count], %[loop_count], 2 \n\t"
+ "sra %[tmp_2], %[tmp_2], 14 \n\t"
+ "sra %[tmp_3], %[tmp_3], 14 \n\t"
+ "sra %[tmp_5], %[tmp_5], 14 \n\t"
+ "sra %[tmp_6], %[tmp_6], 14 \n\t"
+ "addiu %[noiseSupFilter], %[noiseSupFilter], 4 \n\t"
+ "sh %[tmp_2], 0(%[real]) \n\t"
+ "sh %[tmp_2], 4(%[freq_buf_s]) \n\t"
+ "sh %[tmp_3], 0(%[imag]) \n\t"
+ "sh %[tmp_3], 6(%[freq_buf_s]) \n\t"
+ "negu %[tmp_3], %[tmp_3] \n\t"
+ "sh %[tmp_5], 2(%[real]) \n\t"
+ "sh %[tmp_5], 0(%[freq_buf_s]) \n\t"
+ "sh %[tmp_6], 2(%[imag]) \n\t"
+ "sh %[tmp_6], 2(%[freq_buf_s]) \n\t"
+ "negu %[tmp_6], %[tmp_6] \n\t"
+ "addiu %[freq_buf_s], %[freq_buf_s], -8 \n\t"
+ "addiu %[real], %[real], 4 \n\t"
+ "addiu %[imag], %[imag], 4 \n\t"
+ "sh %[tmp_2], 0(%[freq_buf_f]) \n\t"
+ "sh %[tmp_3], 2(%[freq_buf_f]) \n\t"
+ "sh %[tmp_5], 4(%[freq_buf_f]) \n\t"
+ "sh %[tmp_6], 6(%[freq_buf_f]) \n\t"
+ "blt %[loop_count], %[loop_size], 1b \n\t"
+ " addiu %[freq_buf_f], %[freq_buf_f], 8 \n\t"
+ //last two samples:
+ "lh %[tmp_1], 0(%[noiseSupFilter]) \n\t"
+ "lh %[tmp_2], 0(%[real]) \n\t"
+ "lh %[tmp_3], 0(%[imag]) \n\t"
+ "lh %[tmp_4], 2(%[noiseSupFilter]) \n\t"
+ "lh %[tmp_5], 2(%[real]) \n\t"
+ "lh %[tmp_6], 2(%[imag]) \n\t"
+ "mul %[tmp_2], %[tmp_2], %[tmp_1] \n\t"
+ "mul %[tmp_3], %[tmp_3], %[tmp_1] \n\t"
+ "mul %[tmp_5], %[tmp_5], %[tmp_4] \n\t"
+ "mul %[tmp_6], %[tmp_6], %[tmp_4] \n\t"
+ "sra %[tmp_2], %[tmp_2], 14 \n\t"
+ "sra %[tmp_3], %[tmp_3], 14 \n\t"
+ "sra %[tmp_5], %[tmp_5], 14 \n\t"
+ "sra %[tmp_6], %[tmp_6], 14 \n\t"
+ "sh %[tmp_2], 0(%[real]) \n\t"
+ "sh %[tmp_2], 4(%[freq_buf_s]) \n\t"
+ "sh %[tmp_3], 0(%[imag]) \n\t"
+ "sh %[tmp_3], 6(%[freq_buf_s]) \n\t"
+ "negu %[tmp_3], %[tmp_3] \n\t"
+ "sh %[tmp_2], 0(%[freq_buf_f]) \n\t"
+ "sh %[tmp_3], 2(%[freq_buf_f]) \n\t"
+ "sh %[tmp_5], 4(%[freq_buf_f]) \n\t"
+ "sh %[tmp_6], 6(%[freq_buf_f]) \n\t"
+ "sh %[tmp_5], 2(%[real]) \n\t"
+ "sh %[tmp_6], 2(%[imag]) \n\t"
+ ".set pop \n\t"
+ : [real] "+r" (real), [imag] "+r" (imag),
+ [freq_buf_f] "+r" (freq_buf_f), [freq_buf_s] "+r" (freq_buf_s),
+ [loop_count] "+r" (loop_count), [noiseSupFilter] "+r" (noiseSupFilter),
+ [tmp_1] "=&r" (tmp_1), [tmp_2] "=&r" (tmp_2), [tmp_3] "=&r" (tmp_3),
+ [tmp_4] "=&r" (tmp_4), [tmp_5] "=&r" (tmp_5), [tmp_6] "=&r" (tmp_6)
+ : [loop_size] "r" (inst->anaLen2)
+ : "memory", "hi", "lo"
+ );
+}
+
+#if defined(MIPS_DSP_R1_LE)
+// Denormalize the real-valued signal |in|, the output from inverse FFT.
+void WebRtcNsx_Denormalize_mips(NoiseSuppressionFixedC* inst,
+ int16_t* in,
+ int factor) {
+ int32_t r0, r1, r2, r3, t0;
+ int len = (int)inst->anaLen;
+ int16_t *out = &inst->real[0];
+ int shift = factor - inst->normData;
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "beqz %[len], 8f \n\t"
+ " nop \n\t"
+ "bltz %[shift], 4f \n\t"
+ " sra %[t0], %[len], 2 \n\t"
+ "beqz %[t0], 2f \n\t"
+ " andi %[len], %[len], 3 \n\t"
+ "1: \n\t"
+ "lh %[r0], 0(%[in]) \n\t"
+ "lh %[r1], 2(%[in]) \n\t"
+ "lh %[r2], 4(%[in]) \n\t"
+ "lh %[r3], 6(%[in]) \n\t"
+ "shllv_s.ph %[r0], %[r0], %[shift] \n\t"
+ "shllv_s.ph %[r1], %[r1], %[shift] \n\t"
+ "shllv_s.ph %[r2], %[r2], %[shift] \n\t"
+ "shllv_s.ph %[r3], %[r3], %[shift] \n\t"
+ "addiu %[in], %[in], 8 \n\t"
+ "addiu %[t0], %[t0], -1 \n\t"
+ "sh %[r0], 0(%[out]) \n\t"
+ "sh %[r1], 2(%[out]) \n\t"
+ "sh %[r2], 4(%[out]) \n\t"
+ "sh %[r3], 6(%[out]) \n\t"
+ "bgtz %[t0], 1b \n\t"
+ " addiu %[out], %[out], 8 \n\t"
+ "2: \n\t"
+ "beqz %[len], 8f \n\t"
+ " nop \n\t"
+ "3: \n\t"
+ "lh %[r0], 0(%[in]) \n\t"
+ "addiu %[in], %[in], 2 \n\t"
+ "addiu %[len], %[len], -1 \n\t"
+ "shllv_s.ph %[r0], %[r0], %[shift] \n\t"
+ "addiu %[out], %[out], 2 \n\t"
+ "bgtz %[len], 3b \n\t"
+ " sh %[r0], -2(%[out]) \n\t"
+ "b 8f \n\t"
+ "4: \n\t"
+ "negu %[shift], %[shift] \n\t"
+ "beqz %[t0], 6f \n\t"
+ " andi %[len], %[len], 3 \n\t"
+ "5: \n\t"
+ "lh %[r0], 0(%[in]) \n\t"
+ "lh %[r1], 2(%[in]) \n\t"
+ "lh %[r2], 4(%[in]) \n\t"
+ "lh %[r3], 6(%[in]) \n\t"
+ "srav %[r0], %[r0], %[shift] \n\t"
+ "srav %[r1], %[r1], %[shift] \n\t"
+ "srav %[r2], %[r2], %[shift] \n\t"
+ "srav %[r3], %[r3], %[shift] \n\t"
+ "addiu %[in], %[in], 8 \n\t"
+ "addiu %[t0], %[t0], -1 \n\t"
+ "sh %[r0], 0(%[out]) \n\t"
+ "sh %[r1], 2(%[out]) \n\t"
+ "sh %[r2], 4(%[out]) \n\t"
+ "sh %[r3], 6(%[out]) \n\t"
+ "bgtz %[t0], 5b \n\t"
+ " addiu %[out], %[out], 8 \n\t"
+ "6: \n\t"
+ "beqz %[len], 8f \n\t"
+ " nop \n\t"
+ "7: \n\t"
+ "lh %[r0], 0(%[in]) \n\t"
+ "addiu %[in], %[in], 2 \n\t"
+ "addiu %[len], %[len], -1 \n\t"
+ "srav %[r0], %[r0], %[shift] \n\t"
+ "addiu %[out], %[out], 2 \n\t"
+ "bgtz %[len], 7b \n\t"
+ " sh %[r0], -2(%[out]) \n\t"
+ "8: \n\t"
+ ".set pop \n\t"
+ : [t0] "=&r" (t0), [r0] "=&r" (r0), [r1] "=&r" (r1),
+ [r2] "=&r" (r2), [r3] "=&r" (r3)
+ : [len] "r" (len), [shift] "r" (shift), [in] "r" (in),
+ [out] "r" (out)
+ : "memory"
+ );
+}
+#endif
+
+// Normalize the real-valued signal |in|, the input to forward FFT.
+void WebRtcNsx_NormalizeRealBuffer_mips(NoiseSuppressionFixedC* inst,
+ const int16_t* in,
+ int16_t* out) {
+ int32_t r0, r1, r2, r3, t0;
+ int len = (int)inst->anaLen;
+ int shift = inst->normData;
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "beqz %[len], 4f \n\t"
+ " sra %[t0], %[len], 2 \n\t"
+ "beqz %[t0], 2f \n\t"
+ " andi %[len], %[len], 3 \n\t"
+ "1: \n\t"
+ "lh %[r0], 0(%[in]) \n\t"
+ "lh %[r1], 2(%[in]) \n\t"
+ "lh %[r2], 4(%[in]) \n\t"
+ "lh %[r3], 6(%[in]) \n\t"
+ "sllv %[r0], %[r0], %[shift] \n\t"
+ "sllv %[r1], %[r1], %[shift] \n\t"
+ "sllv %[r2], %[r2], %[shift] \n\t"
+ "sllv %[r3], %[r3], %[shift] \n\t"
+ "addiu %[in], %[in], 8 \n\t"
+ "addiu %[t0], %[t0], -1 \n\t"
+ "sh %[r0], 0(%[out]) \n\t"
+ "sh %[r1], 2(%[out]) \n\t"
+ "sh %[r2], 4(%[out]) \n\t"
+ "sh %[r3], 6(%[out]) \n\t"
+ "bgtz %[t0], 1b \n\t"
+ " addiu %[out], %[out], 8 \n\t"
+ "2: \n\t"
+ "beqz %[len], 4f \n\t"
+ " nop \n\t"
+ "3: \n\t"
+ "lh %[r0], 0(%[in]) \n\t"
+ "addiu %[in], %[in], 2 \n\t"
+ "addiu %[len], %[len], -1 \n\t"
+ "sllv %[r0], %[r0], %[shift] \n\t"
+ "addiu %[out], %[out], 2 \n\t"
+ "bgtz %[len], 3b \n\t"
+ " sh %[r0], -2(%[out]) \n\t"
+ "4: \n\t"
+ ".set pop \n\t"
+ : [t0] "=&r" (t0), [r0] "=&r" (r0), [r1] "=&r" (r1),
+ [r2] "=&r" (r2), [r3] "=&r" (r3)
+ : [len] "r" (len), [shift] "r" (shift), [in] "r" (in),
+ [out] "r" (out)
+ : "memory"
+ );
+}
+