audio: integrate audio processor preferences

* add check for PulseAudio system echo cancel module * audio processor preferences can currently only be changed in dring.yml * not yet integrated into daemon/client signaling system Gitlab: #692 Change-Id: I57af6e844acbbfdb5a78d95a87a98873757c506d
savoirfairelinux · Aug 9, 2022 · aa9a39e · aa9a39e
1 parent 869c3fe
commit aa9a39e
Show file tree

Hide file tree

Showing 13 changed files with 244 additions and 77 deletions.
diff --git a/src/media/audio/audio-processing/audio_processor.h b/src/media/audio/audio-processing/audio_processor.h
@@ -82,6 +82,11 @@ class AudioProcessor
      */
     virtual void enableAutomaticGainControl(bool enabled) = 0;
 
+    /**
+     * @brief Set the status of voice activity detection
+     */
+    virtual void enableVoiceActivityDetection(bool enabled) = 0;
+
 protected:
     AudioFrameResizer playbackQueue_;
     AudioFrameResizer recordQueue_;

diff --git a/src/media/audio/audio-processing/null_audio_processor.h b/src/media/audio/audio-processing/null_audio_processor.h
@@ -35,6 +35,8 @@ class NullAudioProcessor final : public AudioProcessor
     void enableNoiseSuppression(bool) override {};
 
     void enableAutomaticGainControl(bool) override {};
+
+    void enableVoiceActivityDetection(bool) override {};
 };
 
 } // namespace jami
diff --git a/src/media/audio/audio-processing/speex.cpp b/src/media/audio/audio-processing/speex.cpp
@@ -152,6 +152,21 @@ SpeexAudioProcessor::enableAutomaticGainControl(bool enabled)
     }
 }
 
+void
+SpeexAudioProcessor::enableVoiceActivityDetection(bool enabled)
+{
+    JAMI_DBG("[speex-dsp] enableVoiceActivityDetection %d", enabled);
+
+    shouldDetectVoice = enabled;
+
+    spx_int32_t speexSetValue = (spx_int32_t) enabled;
+    for (auto& channelPreprocessorState : preprocessorStates) {
+        speex_preprocess_ctl(channelPreprocessorState.get(),
+                             SPEEX_PREPROCESS_SET_VAD,
+                             &speexSetValue);
+    }
+}
+
 std::shared_ptr<AudioFrame>
 SpeexAudioProcessor::getProcessed()
 {
@@ -212,7 +227,7 @@ SpeexAudioProcessor::getProcessed()
     iProcBuffer.interleave((AudioSample*) processed->pointer()->data[0]);
 
     // add stabilized voice activity to the AudioFrame
-    processed->has_voice = getStabilizedVoiceActivity(overallVad);
+    processed->has_voice = shouldDetectVoice && getStabilizedVoiceActivity(overallVad);
 
     return processed;
 }

diff --git a/src/media/audio/audio-processing/speex.h b/src/media/audio/audio-processing/speex.h
@@ -43,6 +43,7 @@ class SpeexAudioProcessor final : public AudioProcessor
     void enableEchoCancel(bool enabled) override;
     void enableNoiseSuppression(bool enabled) override;
     void enableAutomaticGainControl(bool enabled) override;
+    void enableVoiceActivityDetection(bool enabled) override;
 
 private:
     using SpeexEchoStatePtr = std::unique_ptr<SpeexEchoState, void (*)(SpeexEchoState*)>;
@@ -59,5 +60,9 @@ class SpeexAudioProcessor final : public AudioProcessor
 
     // if we should do echo cancellation
     bool shouldAEC {false};
+
+    // if we should do voice activity detection
+    // preprocess_run returns 1 if vad is disabled, so we have to know whether or not to ignore it
+    bool shouldDetectVoice {false};
 };
 } // namespace jami
diff --git a/src/media/audio/audio-processing/webrtc.cpp b/src/media/audio/audio-processing/webrtc.cpp
@@ -53,20 +53,6 @@ WebRTCAudioProcessor::WebRTCAudioProcessor(AudioFormat format, unsigned frameSiz
         JAMI_ERR("[webrtc-ap] Error initialising audio processing module");
     }
 
-    // voice activity
-    if (apm->voice_detection()->Enable(true) != webrtcNoError) {
-        JAMI_ERR("[webrtc-ap] Error enabling voice detection");
-    }
-    // TODO: change likelihood?
-    if (apm->voice_detection()->set_likelihood(webrtc::VoiceDetection::kVeryLowLikelihood)
-        != webrtcNoError) {
-        JAMI_ERR("[webrtc-ap] Error setting voice detection likelihood");
-    }
-    // asserted to be 10 in voice_detection_impl.cc
-    if (apm->voice_detection()->set_frame_size_ms(10) != webrtcNoError) {
-        JAMI_ERR("[webrtc-ap] Error setting voice detection frame size");
-    }
-
     JAMI_INFO("[webrtc-ap] Done initializing");
 }
 
@@ -118,6 +104,23 @@ WebRTCAudioProcessor::enableEchoCancel(bool enabled)
     }
 }
 
+void
+WebRTCAudioProcessor::enableVoiceActivityDetection(bool enabled)
+{
+    JAMI_DBG("[webrtc-ap] enableVoiceActivityDetection %d", enabled);
+    if (apm->voice_detection()->Enable(enabled) != webrtcNoError) {
+        JAMI_ERR("[webrtc-ap] Error enabling voice activation detection");
+    }
+    if (apm->voice_detection()->set_likelihood(webrtc::VoiceDetection::kVeryLowLikelihood)
+        != webrtcNoError) {
+        JAMI_ERR("[webrtc-ap] Error setting voice detection likelihood");
+    }
+    // asserted to be 10 in voice_detection_impl.cc
+    if (apm->voice_detection()->set_frame_size_ms(10) != webrtcNoError) {
+        JAMI_ERR("[webrtc-ap] Error setting voice detection frame size");
+    }
+}
+
 std::shared_ptr<AudioFrame>
 WebRTCAudioProcessor::getProcessed()
 {
@@ -193,7 +196,8 @@ WebRTCAudioProcessor::getProcessed()
                                                 format_.nb_channels);
     iRecordBuffer_.interleave((AudioSample*) processed->pointer()->data[0]);
 
-    processed->has_voice = getStabilizedVoiceActivity(apm->voice_detection()->stream_has_voice());
+    processed->has_voice = apm->voice_detection()->is_enabled()
+                           && getStabilizedVoiceActivity(apm->voice_detection()->stream_has_voice());
 
     return processed;
 }

diff --git a/src/media/audio/audio-processing/webrtc.h b/src/media/audio/audio-processing/webrtc.h
@@ -38,6 +38,7 @@ class WebRTCAudioProcessor final : public AudioProcessor
     void enableEchoCancel(bool enabled) override;
     void enableNoiseSuppression(bool enabled) override;
     void enableAutomaticGainControl(bool enabled) override;
+    void enableVoiceActivityDetection(bool enabled) override;
 
 private:
     std::unique_ptr<webrtc::AudioProcessing> apm;

diff --git a/src/media/audio/audiolayer.cpp b/src/media/audio/audiolayer.cpp
@@ -28,13 +28,12 @@
 #include "tonecontrol.h"
 #include "client/ring_signal.h"
 
-// TODO: decide which library to use/how to decide (compile time? runtime?)
+#include "audio-processing/null_audio_processor.h"
 #if HAVE_WEBRTC_AP
 #include "audio-processing/webrtc.h"
-#elif HAVE_SPEEXDSP
+#endif
+#if HAVE_SPEEXDSP
 #include "audio-processing/speex.h"
-#else
-#include "audio-processing/null_audio_processor.h"
 #endif
 
 #include <ctime>
@@ -54,8 +53,16 @@ AudioLayer::AudioLayer(const AudioPreference& pref)
     , urgentRingBuffer_("urgentRingBuffer_id", SIZEBUF, audioFormat_)
     , resampler_(new Resampler)
     , lastNotificationTime_()
+    , pref_(pref)
 {
     urgentRingBuffer_.createReadOffset(RingBufferPool::DEFAULT_ID);
+
+    JAMI_INFO("[audiolayer] AGC: %d, noiseReduce: %d, VAD: %d, echoCancel: %s, audioProcessor: %s",
+              pref_.isAGCEnabled(),
+              pref.getNoiseReduce(),
+              pref.getVadEnabled(),
+              pref.getEchoCanceller().c_str(),
+              pref.getAudioProcessor().c_str());
 }
 
 AudioLayer::~AudioLayer() {}
@@ -120,14 +127,27 @@ AudioLayer::recordChanged(bool started)
     recordStarted_ = started;
 }
 
+// helper function
+static inline bool
+shouldUseAudioProcessorEchoCancel(bool hasNativeAEC, const std::string& echoCancellerPref)
+{
+    return
+        // user doesn't care which and there is a system AEC
+        (echoCancellerPref == "auto" && !hasNativeAEC)
+        // use specifically wants audioProcessor
+        or (echoCancellerPref == "audioProcessor");
+}
+
 void
-AudioLayer::setHasNativeAEC(bool hasEAC)
+AudioLayer::setHasNativeAEC(bool hasNativeAEC)
 {
+    JAMI_INFO("[audiolayer] setHasNativeAEC: %d", hasNativeAEC);
     std::lock_guard<std::mutex> lock(audioProcessorMutex);
-    hasNativeAEC_ = hasEAC;
+    hasNativeAEC_ = hasNativeAEC;
     // if we have a current audio processor, tell it to enable/disable its own AEC
     if (audioProcessor) {
-        audioProcessor->enableEchoCancel(!hasEAC);
+        audioProcessor->enableEchoCancel(
+            shouldUseAudioProcessorEchoCancel(hasNativeAEC, pref_.getEchoCanceller()));
     }
 }
 
@@ -145,14 +165,14 @@ AudioLayer::createAudioProcessor()
 
     AudioFormat formatForProcessor {sample_rate, nb_channels};
 
-#if HAVE_SPEEXDSP && !HAVE_WEBRTC_AP
-    // we are using speex
-    // TODO: maybe force this to be equivalent to 20ms? as expected by speex
-    auto frame_size = sample_rate / 50u;
-#else
-    // we are using either webrtc-ap or null
-    auto frame_size = sample_rate / 100u;
-#endif
+    unsigned int frame_size;
+    if (pref_.getAudioProcessor() == "speex") {
+        // TODO: maybe force this to be equivalent to 20ms? as expected by speex
+        frame_size = sample_rate / 50u;
+    } else {
+        frame_size = sample_rate / 100u;
+    }
+
     JAMI_WARN("Input {%d Hz, %d channels}",
               audioInputFormat_.sample_rate,
               audioInputFormat_.nb_channels);
@@ -162,23 +182,41 @@ AudioLayer::createAudioProcessor()
               nb_channels,
               frame_size);
 
+    if (pref_.getAudioProcessor() == "webrtc") {
 #if HAVE_WEBRTC_AP
-    JAMI_INFO("[audiolayer] using webrtc audio processor");
-    audioProcessor.reset(new WebRTCAudioProcessor(formatForProcessor, frame_size));
-#elif HAVE_SPEEXDSP
-    JAMI_INFO("[audiolayer] using speex audio processor");
-    audioProcessor.reset(new SpeexAudioProcessor(formatForProcessor, frame_size));
+        JAMI_WARN("[audiolayer] using WebRTCAudioProcessor");
+        audioProcessor.reset(new WebRTCAudioProcessor(formatForProcessor, frame_size));
+#else
+        JAMI_ERR("[audiolayer] audioProcessor preference is webrtc, but library not linked! "
+                 "using NullAudioProcessor instead");
+        audioProcessor.reset(new NullAudioProcessor(formatForProcessor, frame_size));
+#endif
+    } else if (pref_.getAudioProcessor() == "speex") {
+#if HAVE_SPEEXDSP
+        JAMI_WARN("[audiolayer] using SpeexAudioProcessor");
+        audioProcessor.reset(new SpeexAudioProcessor(formatForProcessor, frame_size));
 #else
-    JAMI_INFO("[audiolayer] using null audio processor");
-    audioProcessor.reset(new NullAudioProcessor(formatForProcessor, frame_size));
+        JAMI_ERR("[audiolayer] audioProcessor preference is speex, but library not linked! "
+                 "using NullAudioProcessor instead");
+        audioProcessor.reset(new NullAudioProcessor(formatForProcessor, frame_size));
 #endif
+    } else if (pref_.getAudioProcessor() == "null") {
+        JAMI_WARN("[audiolayer] using NullAudioProcessor");
+        audioProcessor.reset(new NullAudioProcessor(formatForProcessor, frame_size));
+    } else {
+        JAMI_ERR("[audiolayer] audioProcessor preference not recognized, using NullAudioProcessor "
+                 "instead");
+        audioProcessor.reset(new NullAudioProcessor(formatForProcessor, frame_size));
+    }
+
+    audioProcessor->enableNoiseSuppression(pref_.getNoiseReduce());
+
+    audioProcessor->enableAutomaticGainControl(pref_.isAGCEnabled());
 
-    audioProcessor->enableNoiseSuppression(true);
-    // TODO: enable AGC?
-    audioProcessor->enableAutomaticGainControl(false);
+    audioProcessor->enableEchoCancel(
+        shouldUseAudioProcessorEchoCancel(hasNativeAEC_, pref_.getEchoCanceller()));
 
-    // can also be updated after creation via setHasNativeAEC
-    audioProcessor->enableEchoCancel(!hasNativeAEC_);
+    audioProcessor->enableVoiceActivityDetection(pref_.getVadEnabled());
 }
 
 // must acquire lock beforehand

diff --git a/src/media/audio/audiolayer.h b/src/media/audio/audiolayer.h
@@ -250,6 +250,9 @@ class AudioLayer
      */
     double playbackGain_;
 
+    // audio processor preferences
+    const AudioPreference& pref_;
+
     /**
      * Buffers for audio processing
      */