Skip to content

In-app audio capture format converter #598

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Feb 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 41 additions & 21 deletions Sources/LiveKit/Audio/DefaultMixerAudioObserver.swift
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,6 @@ public final class DefaultMixerAudioObserver: AudioEngineObserver, Loggable {
_state.read { $0.micNode }
}

var isConnected: Bool {
_state.read { $0.isConnected }
}

struct State {
var next: (any AudioEngineObserver)?

Expand All @@ -65,7 +61,10 @@ public final class DefaultMixerAudioObserver: AudioEngineObserver, Loggable {
public let micNode = AVAudioPlayerNode()
public let micMixerNode = AVAudioMixerNode()

public var isConnected: Bool = false
// Internal states
var isConnected: Bool = false
var appAudioConverter: AudioConverter?
var engineFormat: AVAudioFormat?
}

let _state = StateSync(State())
Expand Down Expand Up @@ -117,16 +116,9 @@ public final class DefaultMixerAudioObserver: AudioEngineObserver, Loggable {
($0.appNode, $0.appMixerNode, $0.micNode, $0.micMixerNode)
}

// TODO: Investigate if possible to get this format prior to starting screen capture.
// <AVAudioFormat 0x600003055180: 2 ch, 48000 Hz, Float32, deinterleaved>
let appAudioNodeFormat = AVAudioFormat(commonFormat: .pcmFormatFloat32,
sampleRate: format.sampleRate, // Assume same sample rate
channels: 2,
interleaved: false)

log("Connecting app -> appMixer -> mainMixer")
// appAudio -> appAudioMixer -> mainMixer
engine.connect(appNode, to: appMixerNode, format: appAudioNodeFormat)
engine.connect(appNode, to: appMixerNode, format: format)
engine.connect(appMixerNode, to: mainMixerNode, format: format)

// src is not null if device rendering mode.
Expand All @@ -136,21 +128,49 @@ public final class DefaultMixerAudioObserver: AudioEngineObserver, Loggable {
engine.connect(src, to: micMixerNode, format: format)
}

// TODO: Investigate if possible to get this format prior to starting screen capture.
let micNodeFormat = AVAudioFormat(commonFormat: .pcmFormatFloat32,
sampleRate: format.sampleRate, // Assume same sample rate
channels: 1, // Mono
interleaved: false)

log("Connecting micAudio (player) to micMixer -> mainMixer")
// mic (player) -> micMixer -> mainMixer
engine.connect(micNode, to: micMixerNode, format: micNodeFormat)
engine.connect(micNode, to: micMixerNode, format: format)
// Always connect micMixer to mainMixer
engine.connect(micMixerNode, to: mainMixerNode, format: format)

_state.mutate { $0.isConnected = true }
_state.mutate {
$0.engineFormat = format
$0.isConnected = true
}

// Invoke next
next?.engineWillConnectInput(engine, src: src, dst: dst, format: format, context: context)
}
}

extension DefaultMixerAudioObserver {
// Capture appAudio and apply conversion automatically suitable for internal audio engine.
func capture(appAudio inputBuffer: AVAudioPCMBuffer) {
let (isConnected, appNode, oldConverter, engineFormat) = _state.read {
($0.isConnected, $0.appNode, $0.appAudioConverter, $0.engineFormat)
}

guard isConnected, let engineFormat, let engine = appNode.engine, engine.isRunning else { return }

// Create or update the converter if needed
let converter = (oldConverter?.inputFormat == inputBuffer.format)
? oldConverter
: {
let newConverter = AudioConverter(from: inputBuffer.format, to: engineFormat)!
self._state.mutate { $0.appAudioConverter = newConverter }
return newConverter
}()

guard let converter else { return }

converter.convert(from: inputBuffer)
// Copy the converted segment from buffer and schedule it.
let segment = converter.outputBuffer.copySegment()
appNode.scheduleBuffer(segment)

if !appNode.isPlaying {
appNode.play()
}
}
}
30 changes: 28 additions & 2 deletions Sources/LiveKit/Convenience/AudioProcessing.swift
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,12 @@
*/

import Accelerate
import AVFoundation
import Foundation
import AVFAudio

#if os(iOS) && targetEnvironment(macCatalyst)
// Required for UnsafeMutableAudioBufferListPointer.
import CoreAudio
#endif

public struct AudioLevel {
/// Linear Scale RMS Value
Expand Down Expand Up @@ -68,6 +72,28 @@ public extension CMSampleBuffer {
}

public extension AVAudioPCMBuffer {
/// Copies a range of an AVAudioPCMBuffer.
func copySegment(from startFrame: AVAudioFramePosition, to endFrame: AVAudioFramePosition) -> AVAudioPCMBuffer {
let framesToCopy = AVAudioFrameCount(endFrame - startFrame)
let segment = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: framesToCopy)!

let sampleSize = format.streamDescription.pointee.mBytesPerFrame

let srcPtr = UnsafeMutableAudioBufferListPointer(mutableAudioBufferList)
let dstPtr = UnsafeMutableAudioBufferListPointer(segment.mutableAudioBufferList)
for (src, dst) in zip(srcPtr, dstPtr) {
memcpy(dst.mData, src.mData?.advanced(by: Int(startFrame) * Int(sampleSize)), Int(framesToCopy) * Int(sampleSize))
}

segment.frameLength = framesToCopy
return segment
}

/// Copies a full segment from 0 to frameLength. frameCapacity will be equal to frameLength.
func copySegment() -> AVAudioPCMBuffer {
copySegment(from: 0, to: AVAudioFramePosition(frameLength))
}

/// Computes Peak and Linear Scale RMS Value (Average) for all channels.
func audioLevels() -> [AudioLevel] {
var result: [AudioLevel] = []
Expand Down
99 changes: 99 additions & 0 deletions Sources/LiveKit/Support/Audio/AVAudioPCMRingBuffer.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
/*
* Copyright 2025 LiveKit
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import AVFAudio

#if os(iOS) && targetEnvironment(macCatalyst)
// Required for UnsafeMutableAudioBufferListPointer.
import CoreAudio
#endif

class AVAudioPCMRingBuffer {
let buffer: AVAudioPCMBuffer
let capacity: AVAudioFrameCount
private var writeIndex: AVAudioFramePosition = 0
private var readIndex: AVAudioFramePosition = 0
private var availableFrames: AVAudioFrameCount = 0

init(format: AVAudioFormat, frameCapacity: AVAudioFrameCount = 1024 * 10) {
capacity = frameCapacity
let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: frameCapacity)!
buffer.frameLength = 0
self.buffer = buffer
}

func append(audioBuffer srcBuffer: AVAudioPCMBuffer) {
let framesToCopy = min(srcBuffer.frameLength, capacity - availableFrames) // Prevent overflow

let sampleSize = buffer.format.streamDescription.pointee.mBytesPerFrame
let srcPtr = UnsafeMutableAudioBufferListPointer(srcBuffer.mutableAudioBufferList)
let dstPtr = UnsafeMutableAudioBufferListPointer(buffer.mutableAudioBufferList)

for (src, dst) in zip(srcPtr, dstPtr) {
guard let srcData = src.mData, let dstData = dst.mData else { continue }

let firstCopyFrames = min(framesToCopy, capacity - AVAudioFrameCount(writeIndex % AVAudioFramePosition(capacity))) // First segment
let remainingFrames = framesToCopy - firstCopyFrames // Remaining after wrap

// First copy
let dstOffset = Int(writeIndex % AVAudioFramePosition(capacity)) * Int(sampleSize)
memcpy(dstData.advanced(by: dstOffset), srcData, Int(firstCopyFrames) * Int(sampleSize))

// Wrap copy if needed
if remainingFrames > 0 {
memcpy(dstData, srcData.advanced(by: Int(firstCopyFrames) * Int(sampleSize)), Int(remainingFrames) * Int(sampleSize))
}
}

// Update write index and available frames
writeIndex = (writeIndex + AVAudioFramePosition(framesToCopy)) % AVAudioFramePosition(capacity)
availableFrames += framesToCopy
}

func read(frames: AVAudioFrameCount) -> AVAudioPCMBuffer? {
guard frames <= availableFrames else { return nil } // Not enough data

let format = buffer.format
guard let outputBuffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: frames) else { return nil }
outputBuffer.frameLength = frames

let sampleSize = Int(buffer.format.streamDescription.pointee.mBytesPerFrame)
let srcPtr = UnsafeMutableAudioBufferListPointer(buffer.mutableAudioBufferList)
let dstPtr = UnsafeMutableAudioBufferListPointer(outputBuffer.mutableAudioBufferList)

for (src, dst) in zip(srcPtr, dstPtr) {
guard let srcData = src.mData, let dstData = dst.mData else { continue }

let firstReadFrames = min(frames, capacity - AVAudioFrameCount(readIndex % AVAudioFramePosition(capacity))) // First segment
let remainingFrames = frames - firstReadFrames

// First copy
let srcOffset = Int(readIndex % AVAudioFramePosition(capacity)) * sampleSize
memcpy(dstData, srcData.advanced(by: srcOffset), Int(firstReadFrames) * sampleSize)

// Wrap copy if needed
if remainingFrames > 0 {
memcpy(dstData.advanced(by: Int(firstReadFrames) * sampleSize), srcData, Int(remainingFrames) * sampleSize)
}
}

// Update read index and available frames
readIndex = (readIndex + AVAudioFramePosition(frames)) % AVAudioFramePosition(capacity)
availableFrames -= frames

return outputBuffer
}
}
60 changes: 60 additions & 0 deletions Sources/LiveKit/Support/Audio/AudioConverter.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Copyright 2025 LiveKit
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

@preconcurrency import AVFAudio

final class AudioConverter: Sendable {
let converter: AVAudioConverter
let inputFormat: AVAudioFormat
let outputFormat: AVAudioFormat
let outputBuffer: AVAudioPCMBuffer

/// Computes required frame capacity for output buffer.
static func frameCapacity(from inputFormat: AVAudioFormat, to outputFormat: AVAudioFormat, inputFrameCount: AVAudioFrameCount) -> AVAudioFrameCount {
let inputSampleRate = inputFormat.sampleRate
let outputSampleRate = outputFormat.sampleRate
// Compute the output frame capacity based on sample rate ratio
return AVAudioFrameCount(Double(inputFrameCount) * (outputSampleRate / inputSampleRate))
}

init?(from inputFormat: AVAudioFormat, to outputFormat: AVAudioFormat, outputBufferCapacity: AVAudioFrameCount = 9600) {
guard let converter = AVAudioConverter(from: inputFormat, to: outputFormat),
let buffer = AVAudioPCMBuffer(pcmFormat: outputFormat, frameCapacity: outputBufferCapacity)
else {
return nil
}

outputBuffer = buffer
self.converter = converter
self.inputFormat = inputFormat
self.outputFormat = outputFormat
}

func convert(from inputBuffer: AVAudioPCMBuffer) {
var error: NSError?
var bufferFilled = false

converter.convert(to: outputBuffer, error: &error) { _, outStatus in
if bufferFilled {
outStatus.pointee = .noDataNow
return nil
}
outStatus.pointee = .haveData
bufferFilled = true
return inputBuffer
}
}
}
10 changes: 1 addition & 9 deletions Sources/LiveKit/Track/Capturers/MacOSScreenCapturer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -219,15 +219,7 @@ extension MacOSScreenCapturer: SCStreamOutput {

if case .audio = outputType {
guard let pcm = sampleBuffer.toAVAudioPCMBuffer() else { return }
let mixer = AudioManager.shared.mixer
let node = mixer.appAudioNode
guard mixer.isConnected, let engine = node.engine, engine.isRunning else { return }

node.scheduleBuffer(pcm)
if !node.isPlaying {
node.play()
}

AudioManager.shared.mixer.capture(appAudio: pcm)
} else if case .screen = outputType {
// Retrieve the array of metadata attachments from the sample buffer.
guard let attachmentsArray = CMSampleBufferGetSampleAttachmentsArray(sampleBuffer,
Expand Down
Loading
Loading