From 345cd27b4a25e0c326ed1efc6f7f8372bde4ed6b Mon Sep 17 00:00:00 2001 From: Benny Prijono Date: Sat, 18 Apr 2009 14:29:28 +0000 Subject: More ticket #774: optimization for siren7/siren14 codecs git-svn-id: http://svn.pjsip.org/repos/pjproject/trunk@2616 74dad513-b988-da41-8d7b-12977e46ad98 --- third_party/g7221/encode/dct4_a.c | 33 ++++++++++++++------ third_party/g7221/encode/encoder.c | 62 ++++++++++++++++++------------------- third_party/g7221/encode/sam2coef.c | 10 +++--- 3 files changed, 60 insertions(+), 45 deletions(-) (limited to 'third_party/g7221/encode') diff --git a/third_party/g7221/encode/dct4_a.c b/third_party/g7221/encode/dct4_a.c index 4d7c7650..94ddf9fe 100644 --- a/third_party/g7221/encode/dct4_a.c +++ b/third_party/g7221/encode/dct4_a.c @@ -135,9 +135,9 @@ void dct_type_iv_a (Word16 *input,Word16 *output,Word16 dct_length) /*===========================================================*/ /* set_span = 1 << (DCT_LENGTH_LOG - set_count_log); */ - set_span = shr(dct_length,set_count_log); + set_span = shr_nocheck(dct_length,set_count_log); - set_count = shl(1,set_count_log); + set_count = shl_nocheck(1,set_count_log); in_ptr = in_buffer; move16(); @@ -168,12 +168,15 @@ void dct_type_iv_a (Word16 *input,Word16 *output,Word16 dct_length) { in_val_low = *in_ptr++; in_val_high = *in_ptr++; - acca = L_add(in_val_low,in_val_high); - acca = L_shr(acca,1); + // blp: addition of two 16bits vars, there's no way + // they'll overflow a 32bit var + //acca = L_add(in_val_low,in_val_high); + acca = (in_val_low + in_val_high); + acca = L_shr_nocheck(acca,1); out_val_low = extract_l(acca); acca = L_sub(in_val_low,in_val_high); - acca = L_shr(acca,1); + acca = L_shr_nocheck(acca,1); out_val_high = extract_l(acca); *out_ptr_low++ = out_val_low; @@ -213,18 +216,30 @@ void dct_type_iv_a (Word16 *input,Word16 *output,Word16 dct_length) move16(); temp = sub(dct_length_log,1); - temp = shl(1,temp); + temp = shl_nocheck(1,temp); for (pairs_left=temp; pairs_left > 0; pairs_left--) { for ( k=0; k= 0) { - temp = extract_l(L_shr(current_word,j)); + temp = extract_l(L_shr_nocheck(current_word,j)); out_word = add(out_word,temp); out_words[out_word_index++] = out_word; @@ -290,12 +290,12 @@ void bits_to_words(UWord32 *region_mlt_bits, for (region=0;region 0) @@ -472,14 +472,14 @@ void adjust_abs_region_power_index(Word16 *absolute_region_power_index,Word16 *m for (i=0; i 0) { test(); - long_accumulator = L_shr(long_accumulator,1); + long_accumulator = L_shr_nocheck(long_accumulator,1); acca = (long_accumulator & 0x7fff0000L); logic32(); @@ -587,12 +587,12 @@ Word16 compute_region_powers(Word16 *mlt_coefs, test(); logic16(); - long_accumulator = L_shl(long_accumulator,1); + long_accumulator = L_shl_nocheck(long_accumulator,1); acca = L_sub(long_accumulator,32767); power_shift--; temp = add(power_shift,15); } - long_accumulator = L_shr(long_accumulator,1); + long_accumulator = L_shr_nocheck(long_accumulator,1); /* 28963 corresponds to square root of 2 times REGION_SIZE(20). */ acca = L_sub(long_accumulator,28963); @@ -601,7 +601,7 @@ Word16 compute_region_powers(Word16 *mlt_coefs, power_shift = add(power_shift,1); acca = L_deposit_l(mag_shift); - acca = L_shl(acca,1); + acca = L_shl_nocheck(acca,1); acca = L_sub(power_shift,acca); acca = L_add(35,acca); acca = L_sub(acca,REGION_POWER_TABLE_NUM_NEGATIVES); @@ -785,7 +785,7 @@ void vector_quantize_mlts(Word16 number_of_available_bits, Word16 temp2; /* Start in the middle of the categorization control range. */ - temp = shr(num_categorization_control_possibilities,1); + temp = shr_nocheck(num_categorization_control_possibilities,1); temp = sub(temp,1); for (*p_categorization_control = 0; *p_categorization_control < temp; (*p_categorization_control)++) { @@ -808,7 +808,7 @@ void vector_quantize_mlts(Word16 number_of_available_bits, { region_mlt_bit_counts[region] = vector_huffman(category, absolute_region_power_index[region],raw_mlt_ptr, - ®ion_mlt_bits[shl(region,2)]); + ®ion_mlt_bits[shl_nocheck(region,2)]); } else { @@ -849,7 +849,7 @@ void vector_quantize_mlts(Word16 number_of_available_bits, { region_mlt_bit_counts[region] = vector_huffman(category, absolute_region_power_index[region],raw_mlt_ptr, - ®ion_mlt_bits[shl(region,2)]); + ®ion_mlt_bits[shl_nocheck(region,2)]); } else { @@ -895,7 +895,7 @@ void vector_quantize_mlts(Word16 number_of_available_bits, { region_mlt_bit_counts[region] = vector_huffman(category, absolute_region_power_index[region],raw_mlt_ptr, - ®ion_mlt_bits[shl(region,2)]); + ®ion_mlt_bits[shl_nocheck(region,2)]); } else { @@ -1002,16 +1002,16 @@ Word16 vector_huffman(Word16 category, /* compute inverse of step size * standard deviation */ acca = L_mult(step_size_inverse_table[category],standard_deviation_inverse_table[power_index]); - acca = L_shr(acca,1); + acca = L_shr_nocheck(acca,1); acca = L_add(acca,4096); - acca = L_shr(acca,13); + acca = L_shr_nocheck(acca,13); /* * The next two lines are new to Release 1.2 */ mytemp = acca & 0x3; - acca = L_shr(acca,2); + acca = L_shr_nocheck(acca,2); inv_of_step_size_times_std_dev = extract_l(acca); @@ -1032,16 +1032,16 @@ Word16 vector_huffman(Word16 category, k = abs_s(*raw_mlt_ptr); acca = L_mult(k,inv_of_step_size_times_std_dev); - acca = L_shr(acca,1); + acca = L_shr_nocheck(acca,1); /* * The next four lines are new to Release 1.2 */ myacca = (Word16)L_mult(k,mytemp); - myacca = (Word16)L_shr(myacca,1); + myacca = (Word16)L_shr_nocheck(myacca,1); myacca = (Word16)L_add(myacca,int_dead_zone_low_bits[category]); - myacca = (Word16)L_shr(myacca,2); + myacca = (Word16)L_shr_nocheck(myacca,2); acca = L_add(acca,int_dead_zone[category]); @@ -1050,7 +1050,7 @@ Word16 vector_huffman(Word16 category, */ acca = L_add(acca,myacca); - acca = L_shr(acca,13); + acca = L_shr_nocheck(acca,13); k = extract_l(acca); @@ -1058,7 +1058,7 @@ Word16 vector_huffman(Word16 category, if (k != 0) { number_of_non_zero = add(number_of_non_zero,1); - signs_index = shl(signs_index,1); + signs_index = shl_nocheck(signs_index,1); test(); if (*raw_mlt_ptr > 0) @@ -1074,7 +1074,7 @@ Word16 vector_huffman(Word16 category, move16(); } } - acca = L_shr(L_mult(index,(kmax_plus_one)),1); + acca = L_shr_nocheck(L_mult(index,(kmax_plus_one)),1); index = extract_l(acca); index = add(index,k); raw_mlt_ptr++; @@ -1104,7 +1104,7 @@ Word16 vector_huffman(Word16 category, else { j = negate(j); - acca = L_shr(code_bits,j); + acca = L_shr_nocheck(code_bits,j); current_word = L_add(current_word,acca); *word_ptr++ = current_word; diff --git a/third_party/g7221/encode/sam2coef.c b/third_party/g7221/encode/sam2coef.c index 57c8fce8..fc369e0c 100644 --- a/third_party/g7221/encode/sam2coef.c +++ b/third_party/g7221/encode/sam2coef.c @@ -84,7 +84,7 @@ Word16 samples_to_rmlt_coefs(Word16 *new_samples,Word16 *old_samples,Word16 *coe Word16 temp2; Word16 temp5; - half_dct_size = shr(dct_length,1); + half_dct_size = shr_nocheck(dct_length,1); /*++++++++++++++++++++++++++++++++++++++++++++*/ /* Get the first half of the windowed samples */ @@ -208,7 +208,7 @@ Word16 samples_to_rmlt_coefs(Word16 *new_samples,Word16 *old_samples,Word16 *coe move16(); } accb = L_mult(temp,9587); - acca = L_shr(accb,20); + acca = L_shr_nocheck(accb,20); temp5 = extract_l(acca); temp = norm_s(temp5); test(); @@ -230,7 +230,7 @@ Word16 samples_to_rmlt_coefs(Word16 *new_samples,Word16 *old_samples,Word16 *coe acca = L_add(acca,temp); } - acca = L_shr(acca,7); + acca = L_shr_nocheck(acca,7); test(); if (temp1 < acca) @@ -243,7 +243,7 @@ Word16 samples_to_rmlt_coefs(Word16 *new_samples,Word16 *old_samples,Word16 *coe { for(index=0;index