diff options
author | Benny Prijono <bennylp@teluu.com> | 2009-04-18 14:29:28 +0000 |
---|---|---|
committer | Benny Prijono <bennylp@teluu.com> | 2009-04-18 14:29:28 +0000 |
commit | 345cd27b4a25e0c326ed1efc6f7f8372bde4ed6b (patch) | |
tree | 23f45ad5fcde6553d7d0dcfa5065b22c94edc790 /third_party/g7221/decode | |
parent | a1af7e95f02cb3744d53b17671ce52cfb5edc7f8 (diff) |
More ticket #774: optimization for siren7/siren14 codecs
git-svn-id: http://svn.pjsip.org/repos/pjproject/trunk@2616 74dad513-b988-da41-8d7b-12977e46ad98
Diffstat (limited to 'third_party/g7221/decode')
-rw-r--r-- | third_party/g7221/decode/coef2sam.c | 10 | ||||
-rw-r--r-- | third_party/g7221/decode/dct4_s.c | 54 | ||||
-rw-r--r-- | third_party/g7221/decode/decoder.c | 28 |
3 files changed, 58 insertions, 34 deletions
diff --git a/third_party/g7221/decode/coef2sam.c b/third_party/g7221/decode/coef2sam.c index a52095d1..87deab0e 100644 --- a/third_party/g7221/decode/coef2sam.c +++ b/third_party/g7221/decode/coef2sam.c @@ -88,7 +88,7 @@ void rmlt_coefs_to_samples(Word16 *coefs, - half_dct_size = shr(dct_length,1); + half_dct_size = shr_nocheck(dct_length,1); /* Perform a Type IV (inverse) DCT on the coefficients */ dct_type_iv_s(coefs, new_samples, dct_length); @@ -98,7 +98,7 @@ void rmlt_coefs_to_samples(Word16 *coefs, { for(index=0;index<dct_length;index++) { - new_samples[index] = shr(new_samples[index],mag_shift); + new_samples[index] = shr_nocheck(new_samples[index],mag_shift); move16(); } } @@ -110,7 +110,7 @@ void rmlt_coefs_to_samples(Word16 *coefs, mag_shift = negate(mag_shift); for(index=0;index<dct_length;index++) { - new_samples[index] = shl(new_samples[index],mag_shift); + new_samples[index] = shl_nocheck(new_samples[index],mag_shift); move16(); } } @@ -147,7 +147,7 @@ void rmlt_coefs_to_samples(Word16 *coefs, move32(); sum = L_mac(sum,*win_new++, *--new_ptr); sum = L_mac(sum,*--win_old, *old_ptr++); - *out_ptr++ = itu_round(L_shl(sum,2)); + *out_ptr++ = itu_round(L_shl_nocheck(sum,2)); move16(); } @@ -160,7 +160,7 @@ void rmlt_coefs_to_samples(Word16 *coefs, move32(); sum = L_mac(sum,*win_new++, *new_ptr++); sum = L_mac(sum,negate(*--win_old), *--old_ptr); - *out_ptr++ = itu_round(L_shl(sum,2)); + *out_ptr++ = itu_round(L_shl_nocheck(sum,2)); move16(); } diff --git a/third_party/g7221/decode/dct4_s.c b/third_party/g7221/decode/dct4_s.c index 82405a31..0123a13b 100644 --- a/third_party/g7221/decode/dct4_s.c +++ b/third_party/g7221/decode/dct4_s.c @@ -140,9 +140,9 @@ void dct_type_iv_s (Word16 *input,Word16 *output,Word16 dct_length) /*===========================================================*/ /* set_span = 1 << (DCT_LENGTH_LOG - set_count_log); */ - set_span = shr(dct_length,set_count_log); + set_span = shr_nocheck(dct_length,set_count_log); - set_count = shl(1,set_count_log); + set_count = shl_nocheck(1,set_count_log); in_ptr = in_buffer; move16(); next_out_base = out_buffer; @@ -185,12 +185,18 @@ void dct_type_iv_s (Word16 *input,Word16 *output,Word16 dct_length) /* IF THIS WORKS, IT'S PREFERABLE */ dummy = add(in_val_low,dither_ptr[i++]); - acca = L_add(dummy,in_val_high); - out_val_low = extract_l(L_shr(acca,1)); + // blp: addition of two 16bits vars, there's no way + // they'll overflow a 32bit var + //acca = L_add(dummy,in_val_high); + acca = dummy + in_val_high; + out_val_low = extract_l(L_shr_nocheck(acca,1)); dummy = add(in_val_low,dither_ptr[i++]); - acca = L_add(dummy,-in_val_high); - out_val_high = extract_l(L_shr(acca,1)); + // blp: addition of two 16bits vars, there's no way + // they'll overflow a 32bit var + //acca = L_add(dummy,-in_val_high); + acca = dummy - in_val_high; + out_val_high = extract_l(L_shr_nocheck(acca,1)); *out_ptr_low++ = out_val_low; move16(); @@ -284,6 +290,17 @@ void dct_type_iv_s (Word16 *input,Word16 *output,Word16 dct_length) { for ( k=0; k<CORE_SIZE; k++ ) { +#if PJ_HAS_INT64 + /* blp: danger danger! not really compatible but faster */ + pj_int64_t sum64=0; + move32(); + + for ( i=0; i<CORE_SIZE; i++ ) + { + sum64 += L_mult(pair_ptr[i], dct_core_s[i][k]); + } + sum = L_saturate(sum64); +#else sum=0L; move32(); @@ -291,6 +308,7 @@ void dct_type_iv_s (Word16 *input,Word16 *output,Word16 dct_length) { sum = L_mac(sum, pair_ptr[i],dct_core_s[i][k]); } +#endif buffer_swap[k] = itu_round(sum); } @@ -323,9 +341,9 @@ void dct_type_iv_s (Word16 *input,Word16 *output,Word16 dct_length) /*===========================================================*/ /* set_span = 1 << (DCT_LENGTH_LOG - set_count_log); */ - set_span = shr(dct_length,set_count_log); + set_span = shr_nocheck(dct_length,set_count_log); - set_count = shl(1,set_count_log); + set_count = shl_nocheck(1,set_count_log); next_in_base = in_buffer; move16(); test(); @@ -354,7 +372,7 @@ void dct_type_iv_s (Word16 *input,Word16 *output,Word16 dct_length) in_ptr_low = next_in_base; move16(); - temp = shr(set_span,1); + temp = shr_nocheck(set_span,1); in_ptr_high = in_ptr_low + temp; move16(); @@ -401,25 +419,25 @@ void dct_type_iv_s (Word16 *input,Word16 *output,Word16 dct_length) sum = L_mac(sum,cos_even,in_low_even); sum = L_mac(sum,negate(msin_even),in_high_even); - out_low_even = itu_round(L_shl(sum,1)); + out_low_even = itu_round(L_shl_nocheck(sum,1)); sum = 0L; move32(); sum = L_mac(sum,msin_even,in_low_even); sum = L_mac(sum,cos_even,in_high_even); - out_high_even = itu_round(L_shl(sum,1)); + out_high_even = itu_round(L_shl_nocheck(sum,1)); sum = 0L; move32(); sum = L_mac(sum,cos_odd,in_low_odd); sum = L_mac(sum,msin_odd,in_high_odd); - out_low_odd = itu_round(L_shl(sum,1)); + out_low_odd = itu_round(L_shl_nocheck(sum,1)); sum = 0L; move32(); sum = L_mac(sum,msin_odd,in_low_odd); sum = L_mac(sum,negate(cos_odd),in_high_odd); - out_high_odd = itu_round(L_shl(sum,1)); + out_high_odd = itu_round(L_shl_nocheck(sum,1)); *out_ptr_low++ = out_low_even; move16(); @@ -458,7 +476,10 @@ void dct_type_iv_s (Word16 *input,Word16 *output,Word16 dct_length) { for(i=0;i<320;i++) { - sum = L_add(output[i],syn_bias_7khz[i]); + // blp: addition of two 16bits vars, there's no way + // they'll overflow a 32bit var + //sum = L_add(output[i],syn_bias_7khz[i]); + sum = output[i] + syn_bias_7khz[i]; acca = L_sub(sum,32767); test(); if (acca > 0) @@ -466,7 +487,10 @@ void dct_type_iv_s (Word16 *input,Word16 *output,Word16 dct_length) sum = 32767L; move32(); } - acca = L_add(sum,32768L); + // blp: addition of two 16bits vars, there's no way + // they'll overflow 32bit var + //acca = L_add(sum,32768L); + acca = sum + 32768; test(); if (acca < 0) { diff --git a/third_party/g7221/decode/decoder.c b/third_party/g7221/decode/decoder.c index c6b8c065..d642a910 100644 --- a/third_party/g7221/decode/decoder.c +++ b/third_party/g7221/decode/decoder.c @@ -136,7 +136,7 @@ void decoder(Bit_Obj *bitobj, for (i=0; i<num_categorization_control_bits; i++) { get_next_bit(bitobj); - categorization_control = shl(categorization_control,1); + categorization_control = shl_nocheck(categorization_control,1); categorization_control = add(categorization_control,bitobj->next_bit); } @@ -246,7 +246,7 @@ void decode_envelope(Bit_Obj *bitobj, for (i=0; i<5; i++) { get_next_bit(bitobj); - index = shl(index,1); + index = shl_nocheck(index,1); index = add(index,bitobj->next_bit); } bitobj->number_of_bits_left = sub(bitobj->number_of_bits_left,5); @@ -332,7 +332,7 @@ void decode_envelope(Bit_Obj *bitobj, while ((i >= 0) && ((temp1 >= 0) || (temp2 > 0))) { i = sub(i,1); - temp = shr(temp,1); + temp = shr_nocheck(temp,1); max_index = sub(max_index,2); temp1 = sub(temp,8); temp2 = sub(max_index,28); @@ -530,13 +530,13 @@ void decode_vector_quantized_mlt_indices(Bit_Obj *bitobj, test(); if (bitobj->next_bit == 0) { - temp = shl(index,1); + temp = shl_nocheck(index,1); index = (Word16)*(decoder_table_ptr + temp); move16(); } else { - temp = shl(index,1); + temp = shl_nocheck(index,1); index = (Word16)*(decoder_table_ptr + temp + 1); move16(); } @@ -567,18 +567,18 @@ void decode_vector_quantized_mlt_indices(Bit_Obj *bitobj, for (j=0; j<num_sign_bits; j++) { get_next_bit(bitobj); - signs_index = shl(signs_index,1); + signs_index = shl_nocheck(signs_index,1); signs_index = add(signs_index,bitobj->next_bit); bitobj->number_of_bits_left = sub(bitobj->number_of_bits_left,1); } temp = sub(num_sign_bits,1); - bit = shl(1,(temp)); + bit = shl_nocheck(1,(temp)); } for (j=0; j<vec_dim; j++) { acca = L_mult0(standard_deviation,mlt_quant_centroid[category][k[j]]); - acca = L_shr(acca,12); + acca = L_shr_nocheck(acca,12); decoder_mlt_value = extract_l(acca); test(); @@ -587,7 +587,7 @@ void decode_vector_quantized_mlt_indices(Bit_Obj *bitobj, test(); if ((signs_index & bit) == 0) decoder_mlt_value = negate(decoder_mlt_value); - bit = shr(bit,1); + bit = shr_nocheck(bit,1); } *decoder_mlt_ptr++ = decoder_mlt_value; move16(); @@ -652,7 +652,7 @@ void decode_vector_quantized_mlt_indices(Bit_Obj *bitobj, } *decoder_mlt_ptr = temp1; move16(); - random_word = shr(random_word,1); + random_word = shr_nocheck(random_word,1); } /* pointer arithmetic */ decoder_mlt_ptr++; @@ -677,7 +677,7 @@ void decode_vector_quantized_mlt_indices(Bit_Obj *bitobj, } *decoder_mlt_ptr = temp1; move16(); - random_word = shr(random_word,1); + random_word = shr_nocheck(random_word,1); } /* pointer arithmetic */ decoder_mlt_ptr++; @@ -710,7 +710,7 @@ void decode_vector_quantized_mlt_indices(Bit_Obj *bitobj, } *decoder_mlt_ptr++ = temp1; move16(); - random_word = shr(random_word,1); + random_word = shr_nocheck(random_word,1); } random_word = get_rand(randobj); for (j=0; j<10; j++) @@ -730,7 +730,7 @@ void decode_vector_quantized_mlt_indices(Bit_Obj *bitobj, *decoder_mlt_ptr++ = temp1; move16(); - random_word = shr(random_word,1); + random_word = shr_nocheck(random_word,1); } } } @@ -1059,7 +1059,7 @@ void get_next_bit(Bit_Obj *bitobj) move16(); } bitobj->code_bit_count = sub(bitobj->code_bit_count,1); - temp = shr(bitobj->current_word,bitobj->code_bit_count); + temp = shr_nocheck(bitobj->current_word,bitobj->code_bit_count); logic16(); bitobj->next_bit = (Word16 )(temp & 1); |