summaryrefslogtreecommitdiff
path: root/third_party/g7221/decode
diff options
context:
space:
mode:
authorBenny Prijono <bennylp@teluu.com>2009-04-18 14:29:28 +0000
committerBenny Prijono <bennylp@teluu.com>2009-04-18 14:29:28 +0000
commit345cd27b4a25e0c326ed1efc6f7f8372bde4ed6b (patch)
tree23f45ad5fcde6553d7d0dcfa5065b22c94edc790 /third_party/g7221/decode
parenta1af7e95f02cb3744d53b17671ce52cfb5edc7f8 (diff)
More ticket #774: optimization for siren7/siren14 codecs
git-svn-id: http://svn.pjsip.org/repos/pjproject/trunk@2616 74dad513-b988-da41-8d7b-12977e46ad98
Diffstat (limited to 'third_party/g7221/decode')
-rw-r--r--third_party/g7221/decode/coef2sam.c10
-rw-r--r--third_party/g7221/decode/dct4_s.c54
-rw-r--r--third_party/g7221/decode/decoder.c28
3 files changed, 58 insertions, 34 deletions
diff --git a/third_party/g7221/decode/coef2sam.c b/third_party/g7221/decode/coef2sam.c
index a52095d1..87deab0e 100644
--- a/third_party/g7221/decode/coef2sam.c
+++ b/third_party/g7221/decode/coef2sam.c
@@ -88,7 +88,7 @@ void rmlt_coefs_to_samples(Word16 *coefs,
- half_dct_size = shr(dct_length,1);
+ half_dct_size = shr_nocheck(dct_length,1);
/* Perform a Type IV (inverse) DCT on the coefficients */
dct_type_iv_s(coefs, new_samples, dct_length);
@@ -98,7 +98,7 @@ void rmlt_coefs_to_samples(Word16 *coefs,
{
for(index=0;index<dct_length;index++)
{
- new_samples[index] = shr(new_samples[index],mag_shift);
+ new_samples[index] = shr_nocheck(new_samples[index],mag_shift);
move16();
}
}
@@ -110,7 +110,7 @@ void rmlt_coefs_to_samples(Word16 *coefs,
mag_shift = negate(mag_shift);
for(index=0;index<dct_length;index++)
{
- new_samples[index] = shl(new_samples[index],mag_shift);
+ new_samples[index] = shl_nocheck(new_samples[index],mag_shift);
move16();
}
}
@@ -147,7 +147,7 @@ void rmlt_coefs_to_samples(Word16 *coefs,
move32();
sum = L_mac(sum,*win_new++, *--new_ptr);
sum = L_mac(sum,*--win_old, *old_ptr++);
- *out_ptr++ = itu_round(L_shl(sum,2));
+ *out_ptr++ = itu_round(L_shl_nocheck(sum,2));
move16();
}
@@ -160,7 +160,7 @@ void rmlt_coefs_to_samples(Word16 *coefs,
move32();
sum = L_mac(sum,*win_new++, *new_ptr++);
sum = L_mac(sum,negate(*--win_old), *--old_ptr);
- *out_ptr++ = itu_round(L_shl(sum,2));
+ *out_ptr++ = itu_round(L_shl_nocheck(sum,2));
move16();
}
diff --git a/third_party/g7221/decode/dct4_s.c b/third_party/g7221/decode/dct4_s.c
index 82405a31..0123a13b 100644
--- a/third_party/g7221/decode/dct4_s.c
+++ b/third_party/g7221/decode/dct4_s.c
@@ -140,9 +140,9 @@ void dct_type_iv_s (Word16 *input,Word16 *output,Word16 dct_length)
/*===========================================================*/
/* set_span = 1 << (DCT_LENGTH_LOG - set_count_log); */
- set_span = shr(dct_length,set_count_log);
+ set_span = shr_nocheck(dct_length,set_count_log);
- set_count = shl(1,set_count_log);
+ set_count = shl_nocheck(1,set_count_log);
in_ptr = in_buffer;
move16();
next_out_base = out_buffer;
@@ -185,12 +185,18 @@ void dct_type_iv_s (Word16 *input,Word16 *output,Word16 dct_length)
/* IF THIS WORKS, IT'S PREFERABLE */
dummy = add(in_val_low,dither_ptr[i++]);
- acca = L_add(dummy,in_val_high);
- out_val_low = extract_l(L_shr(acca,1));
+ // blp: addition of two 16bits vars, there's no way
+ // they'll overflow a 32bit var
+ //acca = L_add(dummy,in_val_high);
+ acca = dummy + in_val_high;
+ out_val_low = extract_l(L_shr_nocheck(acca,1));
dummy = add(in_val_low,dither_ptr[i++]);
- acca = L_add(dummy,-in_val_high);
- out_val_high = extract_l(L_shr(acca,1));
+ // blp: addition of two 16bits vars, there's no way
+ // they'll overflow a 32bit var
+ //acca = L_add(dummy,-in_val_high);
+ acca = dummy - in_val_high;
+ out_val_high = extract_l(L_shr_nocheck(acca,1));
*out_ptr_low++ = out_val_low;
move16();
@@ -284,6 +290,17 @@ void dct_type_iv_s (Word16 *input,Word16 *output,Word16 dct_length)
{
for ( k=0; k<CORE_SIZE; k++ )
{
+#if PJ_HAS_INT64
+ /* blp: danger danger! not really compatible but faster */
+ pj_int64_t sum64=0;
+ move32();
+
+ for ( i=0; i<CORE_SIZE; i++ )
+ {
+ sum64 += L_mult(pair_ptr[i], dct_core_s[i][k]);
+ }
+ sum = L_saturate(sum64);
+#else
sum=0L;
move32();
@@ -291,6 +308,7 @@ void dct_type_iv_s (Word16 *input,Word16 *output,Word16 dct_length)
{
sum = L_mac(sum, pair_ptr[i],dct_core_s[i][k]);
}
+#endif
buffer_swap[k] = itu_round(sum);
}
@@ -323,9 +341,9 @@ void dct_type_iv_s (Word16 *input,Word16 *output,Word16 dct_length)
/*===========================================================*/
/* set_span = 1 << (DCT_LENGTH_LOG - set_count_log); */
- set_span = shr(dct_length,set_count_log);
+ set_span = shr_nocheck(dct_length,set_count_log);
- set_count = shl(1,set_count_log);
+ set_count = shl_nocheck(1,set_count_log);
next_in_base = in_buffer;
move16();
test();
@@ -354,7 +372,7 @@ void dct_type_iv_s (Word16 *input,Word16 *output,Word16 dct_length)
in_ptr_low = next_in_base;
move16();
- temp = shr(set_span,1);
+ temp = shr_nocheck(set_span,1);
in_ptr_high = in_ptr_low + temp;
move16();
@@ -401,25 +419,25 @@ void dct_type_iv_s (Word16 *input,Word16 *output,Word16 dct_length)
sum = L_mac(sum,cos_even,in_low_even);
sum = L_mac(sum,negate(msin_even),in_high_even);
- out_low_even = itu_round(L_shl(sum,1));
+ out_low_even = itu_round(L_shl_nocheck(sum,1));
sum = 0L;
move32();
sum = L_mac(sum,msin_even,in_low_even);
sum = L_mac(sum,cos_even,in_high_even);
- out_high_even = itu_round(L_shl(sum,1));
+ out_high_even = itu_round(L_shl_nocheck(sum,1));
sum = 0L;
move32();
sum = L_mac(sum,cos_odd,in_low_odd);
sum = L_mac(sum,msin_odd,in_high_odd);
- out_low_odd = itu_round(L_shl(sum,1));
+ out_low_odd = itu_round(L_shl_nocheck(sum,1));
sum = 0L;
move32();
sum = L_mac(sum,msin_odd,in_low_odd);
sum = L_mac(sum,negate(cos_odd),in_high_odd);
- out_high_odd = itu_round(L_shl(sum,1));
+ out_high_odd = itu_round(L_shl_nocheck(sum,1));
*out_ptr_low++ = out_low_even;
move16();
@@ -458,7 +476,10 @@ void dct_type_iv_s (Word16 *input,Word16 *output,Word16 dct_length)
{
for(i=0;i<320;i++)
{
- sum = L_add(output[i],syn_bias_7khz[i]);
+ // blp: addition of two 16bits vars, there's no way
+ // they'll overflow a 32bit var
+ //sum = L_add(output[i],syn_bias_7khz[i]);
+ sum = output[i] + syn_bias_7khz[i];
acca = L_sub(sum,32767);
test();
if (acca > 0)
@@ -466,7 +487,10 @@ void dct_type_iv_s (Word16 *input,Word16 *output,Word16 dct_length)
sum = 32767L;
move32();
}
- acca = L_add(sum,32768L);
+ // blp: addition of two 16bits vars, there's no way
+ // they'll overflow 32bit var
+ //acca = L_add(sum,32768L);
+ acca = sum + 32768;
test();
if (acca < 0)
{
diff --git a/third_party/g7221/decode/decoder.c b/third_party/g7221/decode/decoder.c
index c6b8c065..d642a910 100644
--- a/third_party/g7221/decode/decoder.c
+++ b/third_party/g7221/decode/decoder.c
@@ -136,7 +136,7 @@ void decoder(Bit_Obj *bitobj,
for (i=0; i<num_categorization_control_bits; i++)
{
get_next_bit(bitobj);
- categorization_control = shl(categorization_control,1);
+ categorization_control = shl_nocheck(categorization_control,1);
categorization_control = add(categorization_control,bitobj->next_bit);
}
@@ -246,7 +246,7 @@ void decode_envelope(Bit_Obj *bitobj,
for (i=0; i<5; i++)
{
get_next_bit(bitobj);
- index = shl(index,1);
+ index = shl_nocheck(index,1);
index = add(index,bitobj->next_bit);
}
bitobj->number_of_bits_left = sub(bitobj->number_of_bits_left,5);
@@ -332,7 +332,7 @@ void decode_envelope(Bit_Obj *bitobj,
while ((i >= 0) && ((temp1 >= 0) || (temp2 > 0)))
{
i = sub(i,1);
- temp = shr(temp,1);
+ temp = shr_nocheck(temp,1);
max_index = sub(max_index,2);
temp1 = sub(temp,8);
temp2 = sub(max_index,28);
@@ -530,13 +530,13 @@ void decode_vector_quantized_mlt_indices(Bit_Obj *bitobj,
test();
if (bitobj->next_bit == 0)
{
- temp = shl(index,1);
+ temp = shl_nocheck(index,1);
index = (Word16)*(decoder_table_ptr + temp);
move16();
}
else
{
- temp = shl(index,1);
+ temp = shl_nocheck(index,1);
index = (Word16)*(decoder_table_ptr + temp + 1);
move16();
}
@@ -567,18 +567,18 @@ void decode_vector_quantized_mlt_indices(Bit_Obj *bitobj,
for (j=0; j<num_sign_bits; j++)
{
get_next_bit(bitobj);
- signs_index = shl(signs_index,1);
+ signs_index = shl_nocheck(signs_index,1);
signs_index = add(signs_index,bitobj->next_bit);
bitobj->number_of_bits_left = sub(bitobj->number_of_bits_left,1);
}
temp = sub(num_sign_bits,1);
- bit = shl(1,(temp));
+ bit = shl_nocheck(1,(temp));
}
for (j=0; j<vec_dim; j++)
{
acca = L_mult0(standard_deviation,mlt_quant_centroid[category][k[j]]);
- acca = L_shr(acca,12);
+ acca = L_shr_nocheck(acca,12);
decoder_mlt_value = extract_l(acca);
test();
@@ -587,7 +587,7 @@ void decode_vector_quantized_mlt_indices(Bit_Obj *bitobj,
test();
if ((signs_index & bit) == 0)
decoder_mlt_value = negate(decoder_mlt_value);
- bit = shr(bit,1);
+ bit = shr_nocheck(bit,1);
}
*decoder_mlt_ptr++ = decoder_mlt_value;
move16();
@@ -652,7 +652,7 @@ void decode_vector_quantized_mlt_indices(Bit_Obj *bitobj,
}
*decoder_mlt_ptr = temp1;
move16();
- random_word = shr(random_word,1);
+ random_word = shr_nocheck(random_word,1);
}
/* pointer arithmetic */
decoder_mlt_ptr++;
@@ -677,7 +677,7 @@ void decode_vector_quantized_mlt_indices(Bit_Obj *bitobj,
}
*decoder_mlt_ptr = temp1;
move16();
- random_word = shr(random_word,1);
+ random_word = shr_nocheck(random_word,1);
}
/* pointer arithmetic */
decoder_mlt_ptr++;
@@ -710,7 +710,7 @@ void decode_vector_quantized_mlt_indices(Bit_Obj *bitobj,
}
*decoder_mlt_ptr++ = temp1;
move16();
- random_word = shr(random_word,1);
+ random_word = shr_nocheck(random_word,1);
}
random_word = get_rand(randobj);
for (j=0; j<10; j++)
@@ -730,7 +730,7 @@ void decode_vector_quantized_mlt_indices(Bit_Obj *bitobj,
*decoder_mlt_ptr++ = temp1;
move16();
- random_word = shr(random_word,1);
+ random_word = shr_nocheck(random_word,1);
}
}
}
@@ -1059,7 +1059,7 @@ void get_next_bit(Bit_Obj *bitobj)
move16();
}
bitobj->code_bit_count = sub(bitobj->code_bit_count,1);
- temp = shr(bitobj->current_word,bitobj->code_bit_count);
+ temp = shr_nocheck(bitobj->current_word,bitobj->code_bit_count);
logic16();
bitobj->next_bit = (Word16 )(temp & 1);