Skip to content

Commit

Permalink
audio: integrate audio processor preferences
Browse files Browse the repository at this point in the history
* add check for PulseAudio system echo cancel module
* audio processor preferences can currently only be changed in dring.yml
  * not yet integrated into daemon/client signaling system

Gitlab: #692
Change-Id: I57af6e844acbbfdb5a78d95a87a98873757c506d
  • Loading branch information
Tobias Hildebrandt authored and aberaud committed Aug 9, 2022
1 parent 869c3fe commit aa9a39e
Show file tree
Hide file tree
Showing 13 changed files with 244 additions and 77 deletions.
5 changes: 5 additions & 0 deletions src/media/audio/audio-processing/audio_processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,11 @@ class AudioProcessor
*/
virtual void enableAutomaticGainControl(bool enabled) = 0;

/**
* @brief Set the status of voice activity detection
*/
virtual void enableVoiceActivityDetection(bool enabled) = 0;

protected:
AudioFrameResizer playbackQueue_;
AudioFrameResizer recordQueue_;
Expand Down
2 changes: 2 additions & 0 deletions src/media/audio/audio-processing/null_audio_processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ class NullAudioProcessor final : public AudioProcessor
void enableNoiseSuppression(bool) override {};

void enableAutomaticGainControl(bool) override {};

void enableVoiceActivityDetection(bool) override {};
};

} // namespace jami
17 changes: 16 additions & 1 deletion src/media/audio/audio-processing/speex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,21 @@ SpeexAudioProcessor::enableAutomaticGainControl(bool enabled)
}
}

void
SpeexAudioProcessor::enableVoiceActivityDetection(bool enabled)
{
JAMI_DBG("[speex-dsp] enableVoiceActivityDetection %d", enabled);

shouldDetectVoice = enabled;

spx_int32_t speexSetValue = (spx_int32_t) enabled;
for (auto& channelPreprocessorState : preprocessorStates) {
speex_preprocess_ctl(channelPreprocessorState.get(),
SPEEX_PREPROCESS_SET_VAD,
&speexSetValue);
}
}

std::shared_ptr<AudioFrame>
SpeexAudioProcessor::getProcessed()
{
Expand Down Expand Up @@ -212,7 +227,7 @@ SpeexAudioProcessor::getProcessed()
iProcBuffer.interleave((AudioSample*) processed->pointer()->data[0]);

// add stabilized voice activity to the AudioFrame
processed->has_voice = getStabilizedVoiceActivity(overallVad);
processed->has_voice = shouldDetectVoice && getStabilizedVoiceActivity(overallVad);

return processed;
}
Expand Down
5 changes: 5 additions & 0 deletions src/media/audio/audio-processing/speex.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ class SpeexAudioProcessor final : public AudioProcessor
void enableEchoCancel(bool enabled) override;
void enableNoiseSuppression(bool enabled) override;
void enableAutomaticGainControl(bool enabled) override;
void enableVoiceActivityDetection(bool enabled) override;

private:
using SpeexEchoStatePtr = std::unique_ptr<SpeexEchoState, void (*)(SpeexEchoState*)>;
Expand All @@ -59,5 +60,9 @@ class SpeexAudioProcessor final : public AudioProcessor

// if we should do echo cancellation
bool shouldAEC {false};

// if we should do voice activity detection
// preprocess_run returns 1 if vad is disabled, so we have to know whether or not to ignore it
bool shouldDetectVoice {false};
};
} // namespace jami
34 changes: 19 additions & 15 deletions src/media/audio/audio-processing/webrtc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,20 +53,6 @@ WebRTCAudioProcessor::WebRTCAudioProcessor(AudioFormat format, unsigned frameSiz
JAMI_ERR("[webrtc-ap] Error initialising audio processing module");
}

// voice activity
if (apm->voice_detection()->Enable(true) != webrtcNoError) {
JAMI_ERR("[webrtc-ap] Error enabling voice detection");
}
// TODO: change likelihood?
if (apm->voice_detection()->set_likelihood(webrtc::VoiceDetection::kVeryLowLikelihood)
!= webrtcNoError) {
JAMI_ERR("[webrtc-ap] Error setting voice detection likelihood");
}
// asserted to be 10 in voice_detection_impl.cc
if (apm->voice_detection()->set_frame_size_ms(10) != webrtcNoError) {
JAMI_ERR("[webrtc-ap] Error setting voice detection frame size");
}

JAMI_INFO("[webrtc-ap] Done initializing");
}

Expand Down Expand Up @@ -118,6 +104,23 @@ WebRTCAudioProcessor::enableEchoCancel(bool enabled)
}
}

void
WebRTCAudioProcessor::enableVoiceActivityDetection(bool enabled)
{
JAMI_DBG("[webrtc-ap] enableVoiceActivityDetection %d", enabled);
if (apm->voice_detection()->Enable(enabled) != webrtcNoError) {
JAMI_ERR("[webrtc-ap] Error enabling voice activation detection");
}
if (apm->voice_detection()->set_likelihood(webrtc::VoiceDetection::kVeryLowLikelihood)
!= webrtcNoError) {
JAMI_ERR("[webrtc-ap] Error setting voice detection likelihood");
}
// asserted to be 10 in voice_detection_impl.cc
if (apm->voice_detection()->set_frame_size_ms(10) != webrtcNoError) {
JAMI_ERR("[webrtc-ap] Error setting voice detection frame size");
}
}

std::shared_ptr<AudioFrame>
WebRTCAudioProcessor::getProcessed()
{
Expand Down Expand Up @@ -193,7 +196,8 @@ WebRTCAudioProcessor::getProcessed()
format_.nb_channels);
iRecordBuffer_.interleave((AudioSample*) processed->pointer()->data[0]);

processed->has_voice = getStabilizedVoiceActivity(apm->voice_detection()->stream_has_voice());
processed->has_voice = apm->voice_detection()->is_enabled()
&& getStabilizedVoiceActivity(apm->voice_detection()->stream_has_voice());

return processed;
}
Expand Down
1 change: 1 addition & 0 deletions src/media/audio/audio-processing/webrtc.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class WebRTCAudioProcessor final : public AudioProcessor
void enableEchoCancel(bool enabled) override;
void enableNoiseSuppression(bool enabled) override;
void enableAutomaticGainControl(bool enabled) override;
void enableVoiceActivityDetection(bool enabled) override;

private:
std::unique_ptr<webrtc::AudioProcessing> apm;
Expand Down
92 changes: 65 additions & 27 deletions src/media/audio/audiolayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,12 @@
#include "tonecontrol.h"
#include "client/ring_signal.h"

// TODO: decide which library to use/how to decide (compile time? runtime?)
#include "audio-processing/null_audio_processor.h"
#if HAVE_WEBRTC_AP
#include "audio-processing/webrtc.h"
#elif HAVE_SPEEXDSP
#endif
#if HAVE_SPEEXDSP
#include "audio-processing/speex.h"
#else
#include "audio-processing/null_audio_processor.h"
#endif

#include <ctime>
Expand All @@ -54,8 +53,16 @@ AudioLayer::AudioLayer(const AudioPreference& pref)
, urgentRingBuffer_("urgentRingBuffer_id", SIZEBUF, audioFormat_)
, resampler_(new Resampler)
, lastNotificationTime_()
, pref_(pref)
{
urgentRingBuffer_.createReadOffset(RingBufferPool::DEFAULT_ID);

JAMI_INFO("[audiolayer] AGC: %d, noiseReduce: %d, VAD: %d, echoCancel: %s, audioProcessor: %s",
pref_.isAGCEnabled(),
pref.getNoiseReduce(),
pref.getVadEnabled(),
pref.getEchoCanceller().c_str(),
pref.getAudioProcessor().c_str());
}

AudioLayer::~AudioLayer() {}
Expand Down Expand Up @@ -120,14 +127,27 @@ AudioLayer::recordChanged(bool started)
recordStarted_ = started;
}

// helper function
static inline bool
shouldUseAudioProcessorEchoCancel(bool hasNativeAEC, const std::string& echoCancellerPref)
{
return
// user doesn't care which and there is a system AEC
(echoCancellerPref == "auto" && !hasNativeAEC)
// use specifically wants audioProcessor
or (echoCancellerPref == "audioProcessor");
}

void
AudioLayer::setHasNativeAEC(bool hasEAC)
AudioLayer::setHasNativeAEC(bool hasNativeAEC)
{
JAMI_INFO("[audiolayer] setHasNativeAEC: %d", hasNativeAEC);
std::lock_guard<std::mutex> lock(audioProcessorMutex);
hasNativeAEC_ = hasEAC;
hasNativeAEC_ = hasNativeAEC;
// if we have a current audio processor, tell it to enable/disable its own AEC
if (audioProcessor) {
audioProcessor->enableEchoCancel(!hasEAC);
audioProcessor->enableEchoCancel(
shouldUseAudioProcessorEchoCancel(hasNativeAEC, pref_.getEchoCanceller()));
}
}

Expand All @@ -145,14 +165,14 @@ AudioLayer::createAudioProcessor()

AudioFormat formatForProcessor {sample_rate, nb_channels};

#if HAVE_SPEEXDSP && !HAVE_WEBRTC_AP
// we are using speex
// TODO: maybe force this to be equivalent to 20ms? as expected by speex
auto frame_size = sample_rate / 50u;
#else
// we are using either webrtc-ap or null
auto frame_size = sample_rate / 100u;
#endif
unsigned int frame_size;
if (pref_.getAudioProcessor() == "speex") {
// TODO: maybe force this to be equivalent to 20ms? as expected by speex
frame_size = sample_rate / 50u;
} else {
frame_size = sample_rate / 100u;
}

JAMI_WARN("Input {%d Hz, %d channels}",
audioInputFormat_.sample_rate,
audioInputFormat_.nb_channels);
Expand All @@ -162,23 +182,41 @@ AudioLayer::createAudioProcessor()
nb_channels,
frame_size);

if (pref_.getAudioProcessor() == "webrtc") {
#if HAVE_WEBRTC_AP
JAMI_INFO("[audiolayer] using webrtc audio processor");
audioProcessor.reset(new WebRTCAudioProcessor(formatForProcessor, frame_size));
#elif HAVE_SPEEXDSP
JAMI_INFO("[audiolayer] using speex audio processor");
audioProcessor.reset(new SpeexAudioProcessor(formatForProcessor, frame_size));
JAMI_WARN("[audiolayer] using WebRTCAudioProcessor");
audioProcessor.reset(new WebRTCAudioProcessor(formatForProcessor, frame_size));
#else
JAMI_ERR("[audiolayer] audioProcessor preference is webrtc, but library not linked! "
"using NullAudioProcessor instead");
audioProcessor.reset(new NullAudioProcessor(formatForProcessor, frame_size));
#endif
} else if (pref_.getAudioProcessor() == "speex") {
#if HAVE_SPEEXDSP
JAMI_WARN("[audiolayer] using SpeexAudioProcessor");
audioProcessor.reset(new SpeexAudioProcessor(formatForProcessor, frame_size));
#else
JAMI_INFO("[audiolayer] using null audio processor");
audioProcessor.reset(new NullAudioProcessor(formatForProcessor, frame_size));
JAMI_ERR("[audiolayer] audioProcessor preference is speex, but library not linked! "
"using NullAudioProcessor instead");
audioProcessor.reset(new NullAudioProcessor(formatForProcessor, frame_size));
#endif
} else if (pref_.getAudioProcessor() == "null") {
JAMI_WARN("[audiolayer] using NullAudioProcessor");
audioProcessor.reset(new NullAudioProcessor(formatForProcessor, frame_size));
} else {
JAMI_ERR("[audiolayer] audioProcessor preference not recognized, using NullAudioProcessor "
"instead");
audioProcessor.reset(new NullAudioProcessor(formatForProcessor, frame_size));
}

audioProcessor->enableNoiseSuppression(pref_.getNoiseReduce());

audioProcessor->enableAutomaticGainControl(pref_.isAGCEnabled());

audioProcessor->enableNoiseSuppression(true);
// TODO: enable AGC?
audioProcessor->enableAutomaticGainControl(false);
audioProcessor->enableEchoCancel(
shouldUseAudioProcessorEchoCancel(hasNativeAEC_, pref_.getEchoCanceller()));

// can also be updated after creation via setHasNativeAEC
audioProcessor->enableEchoCancel(!hasNativeAEC_);
audioProcessor->enableVoiceActivityDetection(pref_.getVadEnabled());
}

// must acquire lock beforehand
Expand Down
3 changes: 3 additions & 0 deletions src/media/audio/audiolayer.h
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,9 @@ class AudioLayer
*/
double playbackGain_;

// audio processor preferences
const AudioPreference& pref_;

/**
* Buffers for audio processing
*/
Expand Down
Loading

0 comments on commit aa9a39e

Please sign in to comment.