Improved audio processing by basic channel up-mixing to avoid additional audio converter

devopvoid · devopvoid · commit e665a4a69aaf · 2021-11-23T16:52:39.000+01:00
diff --git a/webrtc-jni/src/main/cpp/src/JNI_AudioProcessing.cpp b/webrtc-jni/src/main/cpp/src/JNI_AudioProcessing.cpp
@@ -27,6 +27,7 @@
 #include "media/audio/AudioProcessing.h"
 #include "media/audio/AudioProcessingConfig.h"
 #include "media/audio/AudioProcessingStreamConfig.h"
+#include "api/audio/audio_frame.h"
 #include "api/scoped_refptr.h"
 #include "modules/audio_processing/include/audio_processing.h"
 #include "rtc_base/logging.h"
@@ -72,7 +73,35 @@ JNIEXPORT jint JNICALL Java_dev_onvoid_webrtc_media_audio_AudioProcessing_proces
 	jbyte * srcPtr = env->GetByteArrayElements(src, nullptr);
 	jbyte * dstPtr = env->GetByteArrayElements(dest, &isDstCopy);
 
-	int result = apm->ProcessStream(reinterpret_cast<const int16_t *>(srcPtr), srcConfig, dstConfig, reinterpret_cast<int16_t *>(dstPtr));
+	int result;
+
+	if (srcConfig.num_channels() == 1 && dstConfig.num_channels() == 2) {
+		// Up-mixing, only mono to stereo.
+		// For complex channel layouts an audio converter is required.
+
+		const size_t srcNumSamples = srcConfig.num_samples();
+		const size_t dstNumChannels = dstConfig.num_channels();
+		const size_t frameSize = srcNumSamples * dstNumChannels;
+
+		if (frameSize > webrtc::AudioFrame::kMaxDataSizeSamples) {
+			return -9;
+		}
+
+		const int16_t * srcFrame = reinterpret_cast<const int16_t *>(srcPtr);
+		int16_t * dstFrame = reinterpret_cast<int16_t*>(dstPtr);
+
+		for (int i = srcNumSamples - 1; i >= 0; i--) {
+			for (size_t j = 0; j < dstNumChannels; ++j) {
+				dstFrame[dstNumChannels * i + j] = srcFrame[i];
+			}
+		}
+
+		result = apm->ProcessStream(dstFrame, srcConfig, dstConfig, dstFrame);
+	}
+	else {
+		// Will also down-mix if required, e.g. from stereo to mono.
+		result = apm->ProcessStream(reinterpret_cast<const int16_t *>(srcPtr), srcConfig, dstConfig, reinterpret_cast<int16_t *>(dstPtr));
+	}
 
 	if (isDstCopy == JNI_TRUE) {
 		jsize dstLength = env->GetArrayLength(dest);
@@ -100,7 +129,35 @@ JNIEXPORT jint JNICALL Java_dev_onvoid_webrtc_media_audio_AudioProcessing_proces
 	jbyte * srcPtr = env->GetByteArrayElements(src, nullptr);
 	jbyte * dstPtr = env->GetByteArrayElements(dest, &isDstCopy);
 
-	int result = apm->ProcessReverseStream(reinterpret_cast<int16_t *>(srcPtr), srcConfig, dstConfig, reinterpret_cast<int16_t *>(dstPtr));
+	int result;
+
+	if (srcConfig.num_channels() == 1 && dstConfig.num_channels() == 2) {
+		// Up-mixing, only mono to stereo.
+		// For complex channel layouts an audio converter is required.
+
+		const size_t srcNumSamples = srcConfig.num_samples();
+		const size_t dstNumChannels = dstConfig.num_channels();
+		const size_t frameSize = srcNumSamples * dstNumChannels;
+
+		if (frameSize > webrtc::AudioFrame::kMaxDataSizeSamples) {
+			return -9;
+		}
+
+		const int16_t * srcFrame = reinterpret_cast<const int16_t *>(srcPtr);
+		int16_t * dstFrame = reinterpret_cast<int16_t *>(dstPtr);
+
+		for (int i = srcNumSamples - 1; i >= 0; i--) {
+			for (size_t j = 0; j < dstNumChannels; ++j) {
+				dstFrame[dstNumChannels * i + j] = srcFrame[i];
+			}
+		}
+
+		result = apm->ProcessStream(dstFrame, srcConfig, dstConfig, dstFrame);
+	}
+	else {
+		// Will also down-mix if required, e.g. from stereo to mono.
+		result = apm->ProcessStream(reinterpret_cast<const int16_t *>(srcPtr), srcConfig, dstConfig, reinterpret_cast<int16_t *>(dstPtr));
+	}
 
 	if (isDstCopy == JNI_TRUE) {
 		jsize dstLength = env->GetArrayLength(dest);
diff --git a/webrtc/src/main/java/dev/onvoid/webrtc/media/audio/AudioConverter.java b/webrtc/src/main/java/dev/onvoid/webrtc/media/audio/AudioConverter.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2021 Alex Andres
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package dev.onvoid.webrtc.media.audio;
+
+import dev.onvoid.webrtc.internal.DisposableNativeObject;
+
+public class AudioConverter extends DisposableNativeObject {
+
+	public AudioConverter(int srcSampleRate, int srcChannels, int dstSampleRate,
+			int dstChannels) {
+		initialize(srcSampleRate, srcChannels, dstSampleRate, dstChannels);
+	}
+
+	public native int convert(byte[] src, int nSrcSamples, byte[] dst, int nDstSamples);
+
+	@Override
+	public native void dispose();
+
+	private native void initialize(int srcSampleRate, int srcChannels,
+			int dstSampleRate, int dstChannels);
+
+}