diff options
Diffstat (limited to 'pjmedia/src/pjmedia-codec/speex/speex_preprocess.h')
-rw-r--r-- | pjmedia/src/pjmedia-codec/speex/speex_preprocess.h | 141 |
1 files changed, 69 insertions, 72 deletions
diff --git a/pjmedia/src/pjmedia-codec/speex/speex_preprocess.h b/pjmedia/src/pjmedia-codec/speex/speex_preprocess.h index 5bb3a2c4..a6845b42 100644 --- a/pjmedia/src/pjmedia-codec/speex/speex_preprocess.h +++ b/pjmedia/src/pjmedia-codec/speex/speex_preprocess.h @@ -1,8 +1,10 @@ /* Copyright (C) 2003 Epic Games Written by Jean-Marc Valin */ /** - @file speex_preprocess.h - @brief Speex preprocessor + * @file speex_preprocess.h + * @brief Speex preprocessor. The preprocess can do noise suppression, + * residual echo suppression (after using the echo canceller), automatic + * gain control (AGC) and voice activity detection (VAD). */ /* Redistribution and use in source and binary forms, with or without @@ -34,91 +36,61 @@ #ifndef SPEEX_PREPROCESS_H #define SPEEX_PREPROCESS_H +/** @defgroup SpeexPreprocessState SpeexPreprocessState: The Speex preprocessor + * This is the Speex preprocessor. The preprocess can do noise suppression, + * residual echo suppression (after using the echo canceller), automatic + * gain control (AGC) and voice activity detection (VAD). + * @{ + */ #include "speex/speex_types.h" #ifdef __cplusplus extern "C" { #endif + +/** State of the preprocessor (one per channel). Should never be accessed directly. */ +struct SpeexPreprocessState_; -struct drft_lookup; +/** State of the preprocessor (one per channel). Should never be accessed directly. */ +typedef struct SpeexPreprocessState_ SpeexPreprocessState; -/** Speex pre-processor state. */ -typedef struct SpeexPreprocessState { - int frame_size; /**< Number of samples processed each time */ - int ps_size; /**< Number of points in the power spectrum */ - int sampling_rate; /**< Sampling rate of the input/output */ - - /* parameters */ - int denoise_enabled; - int agc_enabled; - float agc_level; - int vad_enabled; - int dereverb_enabled; - float reverb_decay; - float reverb_level; - float speech_prob_start; - float speech_prob_continue; - - float *frame; /**< Processing frame (2*ps_size) */ - float *ps; /**< Current power spectrum */ - float *gain2; /**< Adjusted gains */ - float *window; /**< Analysis/Synthesis window */ - float *noise; /**< Noise estimate */ - float *reverb_estimate; /**< Estimate of reverb energy */ - float *old_ps; /**< Power spectrum for last frame */ - float *gain; /**< Ephraim Malah gain */ - float *prior; /**< A-priori SNR */ - float *post; /**< A-posteriori SNR */ - - float *S; /**< Smoothed power spectrum */ - float *Smin; /**< See Cohen paper */ - float *Stmp; /**< See Cohen paper */ - float *update_prob; /**< Propability of speech presence for noise update */ - - float *zeta; /**< Smoothed a priori SNR */ - float Zpeak; - float Zlast; - - float *loudness_weight; /**< Perceptual loudness curve */ - - float *echo_noise; - - float *noise_bands; - float *noise_bands2; - int noise_bandsN; - float *speech_bands; - float *speech_bands2; - int speech_bandsN; - - float *inbuf; /**< Input buffer (overlapped analysis) */ - float *outbuf; /**< Output buffer (for overlap and add) */ - - float speech_prob; - int last_speech; - float loudness; /**< loudness estimate */ - float loudness2; /**< loudness estimate */ - int nb_adapt; /**< Number of frames used for adaptation so far */ - int nb_loudness_adapt; /**< Number of frames used for loudness adaptation so far */ - int consec_noise; /**< Number of consecutive noise frames */ - int nb_preprocess; /**< Number of frames processed so far */ - struct drft_lookup *fft_lookup; /**< Lookup table for the FFT */ - -} SpeexPreprocessState; - -/** Creates a new preprocessing state */ + +/** Creates a new preprocessing state. You MUST create one state per channel processed. + * @param frame_size Number of samples to process at one time (should correspond to 10-20 ms). Must be + * the same value as that used for the echo canceller for residual echo cancellation to work. + * @param sampling_rate Sampling rate used for the input. + * @return Newly created preprocessor state +*/ SpeexPreprocessState *speex_preprocess_state_init(int frame_size, int sampling_rate); -/** Destroys a denoising state */ +/** Destroys a preprocessor state + * @param st Preprocessor state to destroy +*/ void speex_preprocess_state_destroy(SpeexPreprocessState *st); -/** Preprocess a frame */ +/** Preprocess a frame + * @param st Preprocessor state + * @param x Audio sample vector (in and out). Must be same size as specified in speex_preprocess_state_init(). + * @return Bool value for voice activity (1 for speech, 0 for noise/silence), ONLY if VAD turned on. +*/ +int speex_preprocess_run(SpeexPreprocessState *st, spx_int16_t *x); + +/** Preprocess a frame (deprecated, use speex_preprocess_run() instead)*/ int speex_preprocess(SpeexPreprocessState *st, spx_int16_t *x, spx_int32_t *echo); -/** Preprocess a frame */ -void speex_preprocess_estimate_update(SpeexPreprocessState *st, spx_int16_t *x, spx_int32_t *echo); +/** Update preprocessor state, but do not compute the output + * @param st Preprocessor state + * @param x Audio sample vector (in only). Must be same size as specified in speex_preprocess_state_init(). +*/ +void speex_preprocess_estimate_update(SpeexPreprocessState *st, spx_int16_t *x); -/** Used like the ioctl function to control the preprocessor parameters */ +/** Used like the ioctl function to control the preprocessor parameters + * @param st Preprocessor state + * @param request ioctl-type request (one of the SPEEX_PREPROCESS_* macros) + * @param ptr Data exchanged to-from function + * @return 0 if no error, -1 if request in unknown +*/ int speex_preprocess_ctl(SpeexPreprocessState *st, int request, void *ptr); @@ -158,14 +130,39 @@ int speex_preprocess_ctl(SpeexPreprocessState *st, int request, void *ptr); /** Get preprocessor dereverb decay */ #define SPEEX_PREPROCESS_GET_DEREVERB_DECAY 13 +/** Set probability required for the VAD to go from silence to voice */ #define SPEEX_PREPROCESS_SET_PROB_START 14 +/** Get probability required for the VAD to go from silence to voice */ #define SPEEX_PREPROCESS_GET_PROB_START 15 +/** Set probability required for the VAD to stay in the voice state (integer percent) */ #define SPEEX_PREPROCESS_SET_PROB_CONTINUE 16 +/** Get probability required for the VAD to stay in the voice state (integer percent) */ #define SPEEX_PREPROCESS_GET_PROB_CONTINUE 17 +/** Set maximum attenuation of the noise in dB (negative number) */ +#define SPEEX_PREPROCESS_SET_NOISE_SUPPRESS 18 +/** Get maximum attenuation of the noise in dB (negative number) */ +#define SPEEX_PREPROCESS_GET_NOISE_SUPPRESS 19 + +/** Set maximum attenuation of the residual echo in dB (negative number) */ +#define SPEEX_PREPROCESS_SET_ECHO_SUPPRESS 20 +/** Get maximum attenuation of the residual echo in dB (negative number) */ +#define SPEEX_PREPROCESS_GET_ECHO_SUPPRESS 21 + +/** Set maximum attenuation of the residual echo in dB when near end is active (negative number) */ +#define SPEEX_PREPROCESS_SET_ECHO_SUPPRESS_ACTIVE 22 +/** Get maximum attenuation of the residual echo in dB when near end is active (negative number) */ +#define SPEEX_PREPROCESS_GET_ECHO_SUPPRESS_ACTIVE 23 + +/** Set the corresponding echo canceller state so that residual echo suppression can be performed (NULL for no residual echo suppression) */ +#define SPEEX_PREPROCESS_SET_ECHO_STATE 24 +/** Get the corresponding echo canceller state */ +#define SPEEX_PREPROCESS_GET_ECHO_STATE 25 + #ifdef __cplusplus } #endif +/** @}*/ #endif |