diff options
author | Benny Prijono <bennylp@teluu.com> | 2009-04-18 14:29:28 +0000 |
---|---|---|
committer | Benny Prijono <bennylp@teluu.com> | 2009-04-18 14:29:28 +0000 |
commit | 345cd27b4a25e0c326ed1efc6f7f8372bde4ed6b (patch) | |
tree | 23f45ad5fcde6553d7d0dcfa5065b22c94edc790 /third_party/g7221/encode/dct4_a.c | |
parent | a1af7e95f02cb3744d53b17671ce52cfb5edc7f8 (diff) |
More ticket #774: optimization for siren7/siren14 codecs
git-svn-id: http://svn.pjsip.org/repos/pjproject/trunk@2616 74dad513-b988-da41-8d7b-12977e46ad98
Diffstat (limited to 'third_party/g7221/encode/dct4_a.c')
-rw-r--r-- | third_party/g7221/encode/dct4_a.c | 33 |
1 files changed, 24 insertions, 9 deletions
diff --git a/third_party/g7221/encode/dct4_a.c b/third_party/g7221/encode/dct4_a.c index 4d7c7650..94ddf9fe 100644 --- a/third_party/g7221/encode/dct4_a.c +++ b/third_party/g7221/encode/dct4_a.c @@ -135,9 +135,9 @@ void dct_type_iv_a (Word16 *input,Word16 *output,Word16 dct_length) /*===========================================================*/ /* set_span = 1 << (DCT_LENGTH_LOG - set_count_log); */ - set_span = shr(dct_length,set_count_log); + set_span = shr_nocheck(dct_length,set_count_log); - set_count = shl(1,set_count_log); + set_count = shl_nocheck(1,set_count_log); in_ptr = in_buffer; move16(); @@ -168,12 +168,15 @@ void dct_type_iv_a (Word16 *input,Word16 *output,Word16 dct_length) { in_val_low = *in_ptr++; in_val_high = *in_ptr++; - acca = L_add(in_val_low,in_val_high); - acca = L_shr(acca,1); + // blp: addition of two 16bits vars, there's no way + // they'll overflow a 32bit var + //acca = L_add(in_val_low,in_val_high); + acca = (in_val_low + in_val_high); + acca = L_shr_nocheck(acca,1); out_val_low = extract_l(acca); acca = L_sub(in_val_low,in_val_high); - acca = L_shr(acca,1); + acca = L_shr_nocheck(acca,1); out_val_high = extract_l(acca); *out_ptr_low++ = out_val_low; @@ -213,18 +216,30 @@ void dct_type_iv_a (Word16 *input,Word16 *output,Word16 dct_length) move16(); temp = sub(dct_length_log,1); - temp = shl(1,temp); + temp = shl_nocheck(1,temp); for (pairs_left=temp; pairs_left > 0; pairs_left--) { for ( k=0; k<CORE_SIZE; k++ ) { +#if PJ_HAS_INT64 + /* blp: danger danger! not really compatible but faster */ + pj_int64_t sum64=0; + move32(); + + for ( i=0; i<CORE_SIZE; i++ ) + { + sum64 += L_mult(pair_ptr[i], dct_core_a[i][k]); + } + sum = L_saturate(sum64); +#else sum=0L; move32(); for ( i=0; i<CORE_SIZE; i++ ) { sum = L_mac(sum, pair_ptr[i],dct_core_a[i][k]); } +#endif buffer_swap[k] = itu_round(sum); } /* address arithmetic */ @@ -250,9 +265,9 @@ void dct_type_iv_a (Word16 *input,Word16 *output,Word16 dct_length) /* Initialization for the loop over sets at the current size */ /*===========================================================*/ /* set_span = 1 << (DCT_LENGTH_LOG - set_count_log); */ - set_span = shr(dct_length,set_count_log); + set_span = shr_nocheck(dct_length,set_count_log); - set_count = shl(1,set_count_log); + set_count = shl_nocheck(1,set_count_log); next_in_base = in_buffer; move16(); @@ -277,7 +292,7 @@ void dct_type_iv_a (Word16 *input,Word16 *output,Word16 dct_length) /*|||||||||||||||||||||||||||||||||||||||||*/ in_ptr_low = next_in_base; move16(); - temp = shr(set_span,1); + temp = shr_nocheck(set_span,1); /* address arithmetic */ in_ptr_high = in_ptr_low + temp; |