Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement MediaReader/Writer Classes Using AVFoundation #1005

Merged
merged 13 commits into from
Jun 24, 2024
Prev Previous commit
Next Next commit
feat: Implement audio input using AVF
  • Loading branch information
yuto-trd committed Jun 24, 2024
commit fc4317b7527f7c0c1950a4d586194ccac07d60e9
15 changes: 11 additions & 4 deletions src/Beutl.Extensions.AVFoundation/Encoding/AVFEncoderInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,18 @@ namespace Beutl.Extensions.AVFoundation.Encoding;

public sealed class AVFEncoderInfo(AVFEncodingExtension extension) : IEncoderInfo
{
public string Name =>"AVFoundation";
public string Name => "AVFoundation";

public MediaWriter? Create(string file, VideoEncoderSettings videoConfig, AudioEncoderSettings audioConfig)
{

try
{
return new AVFWriter(file, (AVFVideoEncoderSettings)videoConfig, (AVFAudioEncoderSettings)audioConfig);
}
catch (Exception e)
{
return null;
}
}

public IEnumerable<string> SupportExtensions()
Expand All @@ -29,11 +36,11 @@ public IEnumerable<string> SupportExtensions()

public VideoEncoderSettings DefaultVideoConfig()
{
return new VideoEncoderSettings();
return new AVFVideoEncoderSettings();
}

public AudioEncoderSettings DefaultAudioConfig()
{
return new AudioEncoderSettings();
return new AVFAudioEncoderSettings();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ static AVFAudioEncoderSettings()
.Register();

LinearPcmBitDepthProperty = ConfigureProperty<BitDepth, AVFAudioEncoderSettings>(nameof(LinearPcmBitDepth))
.DefaultValue(BitDepth.Default)
.DefaultValue(BitDepth.Bits16)
.Register();

LinearPcmBigEndianProperty = ConfigureProperty<bool, AVFAudioEncoderSettings>(nameof(LinearPcmBigEndian))
Expand Down Expand Up @@ -91,11 +91,10 @@ public AudioQuality SampleRateConverterQuality

public enum BitDepth
{
Default,
Bits8,
Bits16,
Bits24,
Bits32
Bits8 = 8,
Bits16 = 16,
Bits24 = 24,
Bits32 = 32
}

public enum AudioQuality
Expand Down
258 changes: 184 additions & 74 deletions src/Beutl.Extensions.AVFoundation/Encoding/AVFWriter.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
using Beutl.Media;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using Beutl.Media;
using Beutl.Media.Encoding;
using Beutl.Media.Music;
using Beutl.Media.Music.Samples;
using Beutl.Media.Pixel;
using MonoMac.AudioToolbox;
using MonoMac.AVFoundation;
Expand All @@ -19,6 +23,8 @@ public class AVFWriter : MediaWriter
private long _numberOfFrames;
private readonly AVAssetWriterInput _audioInput;
private readonly AudioSettings _audioSettings;
private AudioConverter? _audioConverter;
private AudioStreamBasicDescription? _audioSourceFormat;
private long _numberOfSamples;

public AVFWriter(string file, AVFVideoEncoderSettings videoConfig, AVFAudioEncoderSettings audioConfig)
Expand All @@ -32,28 +38,13 @@ public AVFWriter(string file, AVFVideoEncoderSettings videoConfig, AVFAudioEncod
{
Width = videoConfig.DestinationSize.Width,
Height = videoConfig.DestinationSize.Height,
Codec = videoConfig.Codec switch
{
AVFVideoEncoderSettings.VideoCodec.H264 => AVVideoCodec.H264,
AVFVideoEncoderSettings.VideoCodec.JPEG => AVVideoCodec.JPEG,
_ => null,
},
Codec = ToAVVideoCodec(videoConfig.Codec),
CodecSettings = new AVVideoCodecSettings
{
AverageBitRate = videoConfig.Bitrate == -1 ? null : videoConfig.Bitrate,
MaxKeyFrameInterval = videoConfig.KeyframeRate == -1 ? null : videoConfig.KeyframeRate,
JPEGQuality = videoConfig.JPEGQuality < 0 ? null : videoConfig.JPEGQuality,
ProfileLevelH264 = videoConfig.ProfileLevelH264 switch
{
AVFVideoEncoderSettings.VideoProfileLevelH264.Baseline30 => AVVideoProfileLevelH264.Baseline30,
AVFVideoEncoderSettings.VideoProfileLevelH264.Baseline31 => AVVideoProfileLevelH264.Baseline31,
AVFVideoEncoderSettings.VideoProfileLevelH264.Baseline41 => AVVideoProfileLevelH264.Baseline41,
AVFVideoEncoderSettings.VideoProfileLevelH264.Main30 => AVVideoProfileLevelH264.Main30,
AVFVideoEncoderSettings.VideoProfileLevelH264.Main31 => AVVideoProfileLevelH264.Main31,
AVFVideoEncoderSettings.VideoProfileLevelH264.Main32 => AVVideoProfileLevelH264.Main32,
AVFVideoEncoderSettings.VideoProfileLevelH264.Main41 => AVVideoProfileLevelH264.Main41,
_ => null,
},
ProfileLevelH264 = ToAVVideoProfileLevelH264(videoConfig.ProfileLevelH264),
},
});

Expand All @@ -64,45 +55,11 @@ public AVFWriter(string file, AVFVideoEncoderSettings videoConfig, AVFAudioEncod
SampleRate = audioConfig.SampleRate,
EncoderBitRate = audioConfig.Bitrate == -1 ? null : audioConfig.Bitrate,
NumberChannels = audioConfig.Channels,
Format = audioConfig.Format switch
{
AVFAudioEncoderSettings.AudioFormatType.MPEGLayer1 => AudioFormatType.MPEGLayer1,
AVFAudioEncoderSettings.AudioFormatType.MPEGLayer2 => AudioFormatType.MPEGLayer2,
AVFAudioEncoderSettings.AudioFormatType.MPEGLayer3 => AudioFormatType.MPEGLayer3,
AVFAudioEncoderSettings.AudioFormatType.Audible => AudioFormatType.Audible,
AVFAudioEncoderSettings.AudioFormatType.MACE3 => AudioFormatType.MACE3,
AVFAudioEncoderSettings.AudioFormatType.MACE6 => AudioFormatType.MACE6,
AVFAudioEncoderSettings.AudioFormatType.QDesign2 => AudioFormatType.QDesign2,
AVFAudioEncoderSettings.AudioFormatType.QDesign => AudioFormatType.QDesign,
AVFAudioEncoderSettings.AudioFormatType.QUALCOMM => AudioFormatType.QUALCOMM,
AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC => AudioFormatType.MPEG4AAC,
AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_ELD => AudioFormatType.MPEG4AAC_ELD,
AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_ELD_SBR => AudioFormatType.MPEG4AAC_ELD_SBR,
AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_ELD_V2 => AudioFormatType.MPEG4AAC_ELD_V2,
AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_HE => AudioFormatType.MPEG4AAC_HE,
AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_LD => AudioFormatType.MPEG4AAC_LD,
AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_HE_V2 => AudioFormatType.MPEG4AAC_HE_V2,
AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_Spatial => AudioFormatType.MPEG4AAC_Spatial,
AVFAudioEncoderSettings.AudioFormatType.AC3 => AudioFormatType.AC3,
AVFAudioEncoderSettings.AudioFormatType.AES3 => AudioFormatType.AES3,
AVFAudioEncoderSettings.AudioFormatType.AppleLossless => AudioFormatType.AppleLossless,
AVFAudioEncoderSettings.AudioFormatType.ALaw => AudioFormatType.ALaw,
AVFAudioEncoderSettings.AudioFormatType.ParameterValueStream => AudioFormatType.ParameterValueStream,
AVFAudioEncoderSettings.AudioFormatType.CAC3 => AudioFormatType.CAC3,
AVFAudioEncoderSettings.AudioFormatType.MPEG4CELP => AudioFormatType.MPEG4CELP,
AVFAudioEncoderSettings.AudioFormatType.MPEG4HVXC => AudioFormatType.MPEG4HVXC,
AVFAudioEncoderSettings.AudioFormatType.iLBC => AudioFormatType.iLBC,
AVFAudioEncoderSettings.AudioFormatType.AppleIMA4 => AudioFormatType.AppleIMA4,
AVFAudioEncoderSettings.AudioFormatType.LinearPCM => AudioFormatType.LinearPCM,
AVFAudioEncoderSettings.AudioFormatType.MIDIStream => AudioFormatType.MIDIStream,
AVFAudioEncoderSettings.AudioFormatType.DVIIntelIMA => AudioFormatType.DVIIntelIMA,
AVFAudioEncoderSettings.AudioFormatType.MicrosoftGSM => AudioFormatType.MicrosoftGSM,
AVFAudioEncoderSettings.AudioFormatType.AMR => AudioFormatType.AMR,
AVFAudioEncoderSettings.AudioFormatType.TimeCode => AudioFormatType.TimeCode,
AVFAudioEncoderSettings.AudioFormatType.MPEG4TwinVQ => AudioFormatType.MPEG4TwinVQ,
AVFAudioEncoderSettings.AudioFormatType.ULaw => AudioFormatType.ULaw,
_ => null,
},
LinearPcmFloat = audioConfig.LinearPcmFloat,
LinearPcmBigEndian = audioConfig.LinearPcmBigEndian,
LinearPcmBitDepth = (int?)audioConfig.LinearPcmBitDepth,
LinearPcmNonInterleaved = audioConfig.LinearPcmNonInterleaved,
Format = ToAudioFormatType(audioConfig.Format),
AudioQuality =
audioConfig.Quality == AVFAudioEncoderSettings.AudioQuality.Default
? null
Expand Down Expand Up @@ -171,33 +128,186 @@ public override bool AddVideo(IBitmap image)
return true;
}

public override bool AddAudio(IPcm sound)
[DllImport("/System/Library/PrivateFrameworks/CoreMedia.framework/Versions/A/CoreMedia")]
private static extern CMBlockBufferError CMBlockBufferReplaceDataBytes(
IntPtr sourceBytes,
IntPtr handle,
uint offsetIntoDestination,
uint dataLength);

[DllImport("/System/Library/Frameworks/AudioToolbox.framework/AudioToolbox")]
private static unsafe extern AudioConverterError AudioConverterConvertBuffer(
IntPtr handle,
uint inInputDataSize, IntPtr inInputData,
uint* ioOutputDataSize, IntPtr outOutputData);

[UnsafeAccessor(UnsafeAccessorKind.Field, Name = "handle")]
private static extern IntPtr GetHandle(AudioConverter self);

public override unsafe bool AddAudio(IPcm sound)
{
// if (!_audioInput.ReadyForMoreMediaData)
// {
// return false;
// }
//
// var time = new CMTime(_numberOfSamples, AudioConfig.SampleRate);
// using var dataBuffer = CMBlockBuffer.CreateEmpty(
// (uint)(sound.SampleSize * sound.NumSamples * sound.NumChannels),
// CMBlockBufferFlags.AlwaysCopyData, out var error1);
// using var formatDescription =
// CMFormatDescription.Create(CMMediaType.Audio, (uint)AudioFormatType.LinearPCM, out var error2);
// using var sampleBuffer = CMSampleBuffer.CreateWithPacketDescriptions(dataBuffer, formatDescription,
// sound.NumSamples, time, [], out var error3);
//
// sampleBuffer.
// // _numberOfSamples
if (!_audioInput.ReadyForMoreMediaData)
{
return false;
}

var audioConfig = (AVFAudioEncoderSettings)AudioConfig;
if (_audioConverter == null
|| !_audioSourceFormat.HasValue
|| (int)_audioSourceFormat.Value.SampleRate != sound.SampleRate
|| _audioSourceFormat.Value.BitsPerChannel != GetBits()
|| _audioSourceFormat.Value.ChannelsPerFrame != sound.NumChannels)
{
var sourceFormat = AudioStreamBasicDescription.CreateLinearPCM(sound.SampleRate, (uint)sound.NumChannels);
sourceFormat.FormatFlags = GetFormatFlags();
sourceFormat.BitsPerChannel = GetBits();
_audioSourceFormat = sourceFormat;

var destinationFormat =
AudioStreamBasicDescription.CreateLinearPCM(AudioConfig.SampleRate, (uint)AudioConfig.Channels,
(uint)audioConfig.LinearPcmBitDepth, audioConfig.LinearPcmBigEndian);
destinationFormat.FormatFlags =
(audioConfig.LinearPcmFloat ? AudioFormatFlags.IsFloat : AudioFormatFlags.IsSignedInteger) |
AudioFormatFlags.IsPacked;

_audioConverter?.Dispose();
_audioConverter = AudioConverter.Create(_audioSourceFormat.Value, destinationFormat);
}

uint inputDataSize = (uint)(sound.SampleSize * sound.NumSamples * sound.NumChannels);
uint bytes = (uint)audioConfig.LinearPcmBitDepth / 8;
uint outputSamples = (uint)Math.Ceiling(AudioConfig.SampleRate * sound.NumSamples / (double)sound.SampleRate);
uint outputDataSize = bytes * outputSamples * (uint)AudioConfig.Channels;
var outputData = NativeMemory.Alloc(outputDataSize);

AudioConverterConvertBuffer(
GetHandle(_audioConverter),
inputDataSize, sound.Data,
&outputDataSize, (IntPtr)outputData);
Debug.Assert(outputDataSize == bytes * outputSamples * (uint)AudioConfig.Channels);

var time = new CMTime(_numberOfSamples, AudioConfig.SampleRate);
using var dataBuffer = CMBlockBuffer.CreateEmpty(
outputDataSize,
CMBlockBufferFlags.AlwaysCopyData, out var error1);
if (error1 != CMBlockBufferError.None) throw new Exception(error1.ToString());

var error2 = CMBlockBufferReplaceDataBytes((IntPtr)outputData, dataBuffer.Handle, 0, dataBuffer.DataLength);
if (error2 != CMBlockBufferError.None) throw new Exception(error2.ToString());

using var formatDescription =
CMFormatDescription.Create(CMMediaType.Audio, (uint)AudioFormatType.LinearPCM, out var error3);
if (error3 != CMFormatDescriptionError.None) throw new Exception(error3.ToString());

using var sampleBuffer = CMSampleBuffer.CreateWithPacketDescriptions(dataBuffer, formatDescription,
(int)outputSamples, time, null, out var error4);
if (error4 != CMSampleBufferError.None) throw new Exception(error4.ToString());

if (!_audioInput.AppendSampleBuffer(sampleBuffer))
{
return false;
}

_numberOfSamples += outputSamples;
return true;

int GetBits()
{
return sound switch
{
Pcm<Stereo32BitFloat> or Pcm<Stereo32BitInteger> => 32,
Pcm<Stereo16BitInteger> => 16,
_ => throw new NotSupportedException()
};
}

AudioFormatFlags GetFormatFlags()
{
return sound switch
{
Pcm<Stereo32BitFloat> => AudioFormatFlags.IsSignedInteger | AudioFormatFlags.IsPacked,
Pcm<Stereo16BitInteger> or Pcm<Stereo32BitInteger> => AudioFormatFlags.IsSignedInteger |
AudioFormatFlags.IsPacked,
_ => throw new NotSupportedException()
};
}
}

protected override void Dispose(bool disposing)
{
base.Dispose(disposing);
_videoInput.MarkAsFinished();
_audioInput.MarkAsFinished();
_assetWriter.EndSessionAtSourceTime(new CMTime(_numberOfFrames * VideoConfig.FrameRate.Denominator,
(int)VideoConfig.FrameRate.Numerator));
_assetWriter.FinishWriting();
}

private AVVideoCodec? ToAVVideoCodec(AVFVideoEncoderSettings.VideoCodec codec)
{
return codec switch
{
AVFVideoEncoderSettings.VideoCodec.H264 => AVVideoCodec.H264,
AVFVideoEncoderSettings.VideoCodec.JPEG => AVVideoCodec.JPEG,
_ => null
};
}

private AVVideoProfileLevelH264? ToAVVideoProfileLevelH264(AVFVideoEncoderSettings.VideoProfileLevelH264 profile)
{
return profile switch
{
AVFVideoEncoderSettings.VideoProfileLevelH264.Baseline30 => AVVideoProfileLevelH264.Baseline30,
AVFVideoEncoderSettings.VideoProfileLevelH264.Baseline31 => AVVideoProfileLevelH264.Baseline31,
AVFVideoEncoderSettings.VideoProfileLevelH264.Baseline41 => AVVideoProfileLevelH264.Baseline41,
AVFVideoEncoderSettings.VideoProfileLevelH264.Main30 => AVVideoProfileLevelH264.Main30,
AVFVideoEncoderSettings.VideoProfileLevelH264.Main31 => AVVideoProfileLevelH264.Main31,
AVFVideoEncoderSettings.VideoProfileLevelH264.Main32 => AVVideoProfileLevelH264.Main32,
AVFVideoEncoderSettings.VideoProfileLevelH264.Main41 => AVVideoProfileLevelH264.Main41,
_ => null
};
}

private AudioFormatType? ToAudioFormatType(AVFAudioEncoderSettings.AudioFormatType format)
{
return format switch
{
AVFAudioEncoderSettings.AudioFormatType.MPEGLayer1 => AudioFormatType.MPEGLayer1,
AVFAudioEncoderSettings.AudioFormatType.MPEGLayer2 => AudioFormatType.MPEGLayer2,
AVFAudioEncoderSettings.AudioFormatType.MPEGLayer3 => AudioFormatType.MPEGLayer3,
AVFAudioEncoderSettings.AudioFormatType.Audible => AudioFormatType.Audible,
AVFAudioEncoderSettings.AudioFormatType.MACE3 => AudioFormatType.MACE3,
AVFAudioEncoderSettings.AudioFormatType.MACE6 => AudioFormatType.MACE6,
AVFAudioEncoderSettings.AudioFormatType.QDesign2 => AudioFormatType.QDesign2,
AVFAudioEncoderSettings.AudioFormatType.QDesign => AudioFormatType.QDesign,
AVFAudioEncoderSettings.AudioFormatType.QUALCOMM => AudioFormatType.QUALCOMM,
AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC => AudioFormatType.MPEG4AAC,
AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_ELD => AudioFormatType.MPEG4AAC_ELD,
AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_ELD_SBR => AudioFormatType.MPEG4AAC_ELD_SBR,
AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_ELD_V2 => AudioFormatType.MPEG4AAC_ELD_V2,
AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_HE => AudioFormatType.MPEG4AAC_HE,
AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_LD => AudioFormatType.MPEG4AAC_LD,
AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_HE_V2 => AudioFormatType.MPEG4AAC_HE_V2,
AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_Spatial => AudioFormatType.MPEG4AAC_Spatial,
AVFAudioEncoderSettings.AudioFormatType.AC3 => AudioFormatType.AC3,
AVFAudioEncoderSettings.AudioFormatType.AES3 => AudioFormatType.AES3,
AVFAudioEncoderSettings.AudioFormatType.AppleLossless => AudioFormatType.AppleLossless,
AVFAudioEncoderSettings.AudioFormatType.ALaw => AudioFormatType.ALaw,
AVFAudioEncoderSettings.AudioFormatType.ParameterValueStream => AudioFormatType.ParameterValueStream,
AVFAudioEncoderSettings.AudioFormatType.CAC3 => AudioFormatType.CAC3,
AVFAudioEncoderSettings.AudioFormatType.MPEG4CELP => AudioFormatType.MPEG4CELP,
AVFAudioEncoderSettings.AudioFormatType.MPEG4HVXC => AudioFormatType.MPEG4HVXC,
AVFAudioEncoderSettings.AudioFormatType.iLBC => AudioFormatType.iLBC,
AVFAudioEncoderSettings.AudioFormatType.AppleIMA4 => AudioFormatType.AppleIMA4,
AVFAudioEncoderSettings.AudioFormatType.LinearPCM => AudioFormatType.LinearPCM,
AVFAudioEncoderSettings.AudioFormatType.MIDIStream => AudioFormatType.MIDIStream,
AVFAudioEncoderSettings.AudioFormatType.DVIIntelIMA => AudioFormatType.DVIIntelIMA,
AVFAudioEncoderSettings.AudioFormatType.MicrosoftGSM => AudioFormatType.MicrosoftGSM,
AVFAudioEncoderSettings.AudioFormatType.AMR => AudioFormatType.AMR,
AVFAudioEncoderSettings.AudioFormatType.TimeCode => AudioFormatType.TimeCode,
AVFAudioEncoderSettings.AudioFormatType.MPEG4TwinVQ => AudioFormatType.MPEG4TwinVQ,
AVFAudioEncoderSettings.AudioFormatType.ULaw => AudioFormatType.ULaw,
_ => null
};
}
}
Loading