From 9b84535531c3eaf80f7311defff9765c0ec0c6e6 Mon Sep 17 00:00:00 2001 From: "Yuto Terada (indigo-san)" Date: Thu, 28 Mar 2024 13:56:12 +0900 Subject: [PATCH 01/13] aaa --- Beutl.sln | 13 ++ Directory.Packages.props | 3 +- .../AVFDecoderInfo.cs | 62 +++++++++ .../AVFDecodingExtension.cs | 27 ++++ .../AVFReader.cs | 131 ++++++++++++++++++ .../Beutl.Extensions.AVFoundation.csproj | 15 ++ .../CustomAVPlayerItemOutputPullDelegate.cs | 16 +++ src/Beutl/Beutl.csproj | 5 + .../LoadPrimitiveExtensionTask.cs | 34 +++++ 9 files changed, 305 insertions(+), 1 deletion(-) create mode 100644 src/Beutl.Extensions.AVFoundation/AVFDecoderInfo.cs create mode 100644 src/Beutl.Extensions.AVFoundation/AVFDecodingExtension.cs create mode 100644 src/Beutl.Extensions.AVFoundation/AVFReader.cs create mode 100644 src/Beutl.Extensions.AVFoundation/Beutl.Extensions.AVFoundation.csproj create mode 100644 src/Beutl.Extensions.AVFoundation/CustomAVPlayerItemOutputPullDelegate.cs diff --git a/Beutl.sln b/Beutl.sln index cd6fbac31..2f4f19235 100644 --- a/Beutl.sln +++ b/Beutl.sln @@ -109,6 +109,10 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Beutl.Extensions.MediaFound EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Beutl.PackageTools.UI", "src\Beutl.PackageTools.UI\Beutl.PackageTools.UI.csproj", "{D8A8061C-CE79-4DF7-B9E8-2002BAD47DD8}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{F678E681-3128-4853-BAAC-2732B1C3E5C3}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beutl.Extensions.AVFoundation", "src\Beutl.Extensions.AVFoundation\Beutl.Extensions.AVFoundation.csproj", "{8B040DCA-6C9C-4009-8FE5-8F764D80907B}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -371,6 +375,14 @@ Global {D8A8061C-CE79-4DF7-B9E8-2002BAD47DD8}.Release|Any CPU.Build.0 = Release|Any CPU {D8A8061C-CE79-4DF7-B9E8-2002BAD47DD8}.Release|x64.ActiveCfg = Release|Any CPU {D8A8061C-CE79-4DF7-B9E8-2002BAD47DD8}.Release|x64.Build.0 = Release|Any CPU + {8B040DCA-6C9C-4009-8FE5-8F764D80907B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {8B040DCA-6C9C-4009-8FE5-8F764D80907B}.Debug|Any CPU.Build.0 = Debug|Any CPU + {8B040DCA-6C9C-4009-8FE5-8F764D80907B}.Debug|x64.ActiveCfg = Debug|Any CPU + {8B040DCA-6C9C-4009-8FE5-8F764D80907B}.Debug|x64.Build.0 = Debug|Any CPU + {8B040DCA-6C9C-4009-8FE5-8F764D80907B}.Release|Any CPU.ActiveCfg = Release|Any CPU + {8B040DCA-6C9C-4009-8FE5-8F764D80907B}.Release|Any CPU.Build.0 = Release|Any CPU + {8B040DCA-6C9C-4009-8FE5-8F764D80907B}.Release|x64.ActiveCfg = Release|Any CPU + {8B040DCA-6C9C-4009-8FE5-8F764D80907B}.Release|x64.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -391,6 +403,7 @@ Global {ED8EECC3-4BDC-4B29-A17B-1044C7534BB2} = {464CEA08-D77A-4CB8-9B03-BEF0C852B907} {EED4AB04-13CD-4382-BD1A-64D7A258957F} = {791EB029-7801-45FA-9AB5-5E953FFA003A} {AC7E6F97-061C-47DA-B264-04A4EAE666B9} = {791EB029-7801-45FA-9AB5-5E953FFA003A} + {8B040DCA-6C9C-4009-8FE5-8F764D80907B} = {F678E681-3128-4853-BAAC-2732B1C3E5C3} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {CC3F8F37-EB22-4418-978D-C844095F8C4D} diff --git a/Directory.Packages.props b/Directory.Packages.props index dba524fa5..75b1ca314 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -27,6 +27,7 @@ + @@ -80,4 +81,4 @@ - \ No newline at end of file + diff --git a/src/Beutl.Extensions.AVFoundation/AVFDecoderInfo.cs b/src/Beutl.Extensions.AVFoundation/AVFDecoderInfo.cs new file mode 100644 index 000000000..d1822197b --- /dev/null +++ b/src/Beutl.Extensions.AVFoundation/AVFDecoderInfo.cs @@ -0,0 +1,62 @@ +using System.Diagnostics.CodeAnalysis; +using Beutl.Extensions.AVFoundation; +using Beutl.Media; +using Beutl.Media.Decoding; +using Beutl.Media.Music; + +namespace Beutl.Extensions.AVFoundation.Decoding; + +public sealed class AVFDecoderInfo(AVFDecodingExtension extension) : IDecoderInfo +{ + public string Name => "AVFoundation"; + + //https://learn.microsoft.com/ja-jp/windows/win32/medfound/supported-media-formats-in-media-foundation + public IEnumerable AudioExtensions() + { + yield return ".mp3"; + yield return ".wav"; + yield return ".m4a"; + yield return ".aac"; + yield return ".wma"; + yield return ".sami"; + yield return ".smi"; + yield return ".m4v"; + yield return ".mov"; + yield return ".mp4"; + yield return ".avi"; + yield return ".adts"; + yield return ".asf"; + yield return ".wmv"; + yield return ".3gp"; + yield return ".3gp2"; + yield return ".3gpp"; + } + + public MediaReader? Open(string file, MediaOptions options) + { + try + { + return new AVFReader(file, options, extension); + } + catch + { + return null; + } + } + + public IEnumerable VideoExtensions() + { + yield return ".mp4"; + yield return ".mov"; + yield return ".m4v"; + yield return ".avi"; + yield return ".wmv"; + yield return ".sami"; + yield return ".smi"; + yield return ".adts"; + yield return ".asf"; + yield return ".3gp"; + yield return ".3gp2"; + yield return ".3gpp"; + } +} diff --git a/src/Beutl.Extensions.AVFoundation/AVFDecodingExtension.cs b/src/Beutl.Extensions.AVFoundation/AVFDecodingExtension.cs new file mode 100644 index 000000000..1c96a1700 --- /dev/null +++ b/src/Beutl.Extensions.AVFoundation/AVFDecodingExtension.cs @@ -0,0 +1,27 @@ +using Beutl.Extensibility; +using Beutl.Media.Decoding; +using MonoMac.AppKit; + +namespace Beutl.Extensions.AVFoundation.Decoding; + +[Export] +public class AVFDecodingExtension : DecodingExtension +{ + public override string Name => "AVFoundation Decoding"; + + public override string DisplayName => "AVFoundation Decoding"; + + public override IDecoderInfo GetDecoderInfo() + { + return new AVFDecoderInfo(this); + } + + public override void Load() + { + if (OperatingSystem.IsMacOS()) + { + NSApplication.Init(); + DecoderRegistry.Register(GetDecoderInfo()); + } + } +} diff --git a/src/Beutl.Extensions.AVFoundation/AVFReader.cs b/src/Beutl.Extensions.AVFoundation/AVFReader.cs new file mode 100644 index 000000000..c3fa28d5c --- /dev/null +++ b/src/Beutl.Extensions.AVFoundation/AVFReader.cs @@ -0,0 +1,131 @@ +using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; +using Beutl.Media; +using Beutl.Media.Decoding; +using Beutl.Media.Music; +using Beutl.Media.Pixel; +using MonoMac.AVFoundation; +using MonoMac.CoreGraphics; +using MonoMac.CoreMedia; +using MonoMac.CoreVideo; +using MonoMac.Foundation; + +namespace Beutl.Extensions.AVFoundation.Decoding; + +public unsafe sealed class AVFReader : MediaReader +{ + private readonly AVAsset _asset; + private readonly AVPlayerItem _playerItem; + private readonly AVPlayerItemVideoOutput _videoOutput; + private readonly AVPlayer _player; + private readonly AVAssetTrack _videoTrack; + private string _file; + private MediaOptions _options; + private AVFDecodingExtension _extension; + + public AVFReader(string file, MediaOptions options, AVFDecodingExtension extension) + { + _file = file; + _options = options; + _extension = extension; + var url = NSUrl.FromFilename(file); + _asset = AVAsset.FromUrl(url); + _playerItem = AVPlayerItem.FromAsset(_asset); + _videoOutput = new AVPlayerItemVideoOutput(); + // _videoOutput.Delegate= new AVPlayerItemOutputPullDelegate() + + + _playerItem.AddOutput(_videoOutput); + + _player = new AVPlayer(_playerItem); + + _videoTrack = _asset.TracksWithMediaType(AVMediaType.Video)[0]; + + var fmtdesc = _videoTrack.FormatDescriptions[0]; + var frameSize = new PixelSize(fmtdesc.VideoDimensions.Width, fmtdesc.VideoDimensions.Height); + var codec = fmtdesc.VideoCodecType.ToString(); + var framerate = _videoTrack.NominalFrameRate; + var duration = _videoTrack.TotalSampleDataLength / _videoTrack.EstimatedDataRate * 8d; + VideoInfo = new VideoStreamInfo( + codec, + Rational.FromDouble(duration), + frameSize, + Rational.FromSingle(framerate)); + } + + public override VideoStreamInfo VideoInfo { get; } + + public override AudioStreamInfo AudioInfo => throw new NotImplementedException(); + + public override bool HasVideo => true; + + public override bool HasAudio => false; + + public override bool ReadAudio(int start, int length, [NotNullWhen(true)] out IPcm? sound) + { + throw new NotImplementedException(); + } + + public override bool ReadVideo(int frame, [NotNullWhen(true)] out IBitmap? image) + { + image = null; + _player.Seek(CMTime.FromSeconds(frame / (double)_videoTrack.NominalFrameRate, 1)) + // _assetReader.TimeRange = new CMTimeRange + // { + // Start = CMTime.FromSeconds(frame / (double)_videoTrack.NominalFrameRate, 1), + // Duration = CMTime.PositiveInfinity + // }; + _videoOutput. + + using var buffer = _videoReader.CopyNextSampleBuffer(); + if (buffer.DataIsReady && buffer.IsValid) + { + using var imgbuf = buffer.GetImageBuffer(); + var d = buffer.GetDataBuffer(); + // CMBlockBuffer + if (imgbuf is CVPixelBuffer pixbuf) + { + var r = pixbuf.Lock(CVOptionFlags.None); + if (r != CVReturn.Success) return false; + + var ptr = pixbuf.GetBaseAddress(0); + var bytesPerRow = pixbuf.BytesPerRow; + var width = pixbuf.Width; + var height = pixbuf.Height; + using CGColorSpace colorSpace = CGColorSpace.CreateDeviceRGB(); + + using var newContext = new CGBitmapContext( + ptr, width, height, + 8, bytesPerRow, colorSpace, + CGBitmapFlags.ByteOrder32Little | CGBitmapFlags.PremultipliedFirst); + + using CGImage cgimage = newContext.ToImage(); + using var data = cgimage.DataProvider.CopyData(); + + var bitmap = new Bitmap(cgimage.Width, cgimage.Height); + Debug.Assert(bitmap.ByteCount == (int)data.Length); + Buffer.MemoryCopy((void*)data.Bytes, (void*)bitmap.Data, (long)data.Length, bitmap.ByteCount); + Parallel.For(0, bitmap.DataSpan.Length, i => + { + ref var p = ref bitmap.DataSpan[i]; + p.A = 255; + }); + + pixbuf.Unlock(CVOptionFlags.None); + + image = bitmap; + return true; + } + } + + return false; + } + + protected override void Dispose(bool disposing) + { + base.Dispose(disposing); + _asset.Dispose(); + _assetReader.Dispose(); + _videoReader.Dispose(); + } +} diff --git a/src/Beutl.Extensions.AVFoundation/Beutl.Extensions.AVFoundation.csproj b/src/Beutl.Extensions.AVFoundation/Beutl.Extensions.AVFoundation.csproj new file mode 100644 index 000000000..6fa8fdb69 --- /dev/null +++ b/src/Beutl.Extensions.AVFoundation/Beutl.Extensions.AVFoundation.csproj @@ -0,0 +1,15 @@ + + + + false + + + + + + + + + + + diff --git a/src/Beutl.Extensions.AVFoundation/CustomAVPlayerItemOutputPullDelegate.cs b/src/Beutl.Extensions.AVFoundation/CustomAVPlayerItemOutputPullDelegate.cs new file mode 100644 index 000000000..9afeae3ee --- /dev/null +++ b/src/Beutl.Extensions.AVFoundation/CustomAVPlayerItemOutputPullDelegate.cs @@ -0,0 +1,16 @@ +using MonoMac.AVFoundation; + +namespace Beutl.Extensions.AVFoundation.Decoding; + +internal sealed class CustomAVPlayerItemOutputPullDelegate : AVPlayerItemOutputPullDelegate +{ + public override void OutputMediaDataWillChange(AVPlayerItemOutput sender) + { + base.OutputMediaDataWillChange(sender); + } + + public override void OutputSequenceWasFlushed(AVPlayerItemOutput output) + { + base.OutputSequenceWasFlushed(output); + } +} diff --git a/src/Beutl/Beutl.csproj b/src/Beutl/Beutl.csproj index 376295be7..0ea888920 100644 --- a/src/Beutl/Beutl.csproj +++ b/src/Beutl/Beutl.csproj @@ -99,6 +99,11 @@ Include="..\Beutl.Extensions.MediaFoundation\Beutl.Extensions.MediaFoundation.csproj" /> + + + + TextTemplatingFileGenerator diff --git a/src/Beutl/Services/StartupTasks/LoadPrimitiveExtensionTask.cs b/src/Beutl/Services/StartupTasks/LoadPrimitiveExtensionTask.cs index 72ba33c6c..f0e4bc4f1 100644 --- a/src/Beutl/Services/StartupTasks/LoadPrimitiveExtensionTask.cs +++ b/src/Beutl/Services/StartupTasks/LoadPrimitiveExtensionTask.cs @@ -117,6 +117,40 @@ public LoadPrimitiveExtensionTask(PackageManager manager) } #pragma warning restore CS0436 #endif + +#pragma warning disable CS0436 + if (OperatingSystem.IsMacOS()) + { + activity?.AddEvent(new("Loading_AVFoundation")); + + // Beutl.Extensions.FFmpeg.csproj + var pkg = new LocalPackage + { + ShortDescription = "AVFoundation for beutl", + Name = "Beutl.Embedding.AVFoundation", + DisplayName = "Beutl.Embedding.AVFoundation", + InstalledPath = AppContext.BaseDirectory, + Tags = { "macos", "avfoundation", "decoder", "decoding", "encoder", "encoding", "video", "audio" }, + Version = GitVersionInformation.NuGetVersionV2, + WebSite = "https://github.com/b-editor/beutl", + Publisher = "b-editor" + }; + try + { + var decoding = new Extensions.AVFoundation.Decoding.AVFDecodingExtension(); + _manager.SetupExtensionSettings(decoding); + decoding.Load(); + + provider.AddExtensions(pkg.LocalId, [decoding]); + } + catch (Exception ex) + { + Failures.Add((pkg, ex)); + } + + activity?.AddEvent(new("Loaded_AVFoundation")); + } +#pragma warning restore CS0436 } }); } From 4d8062c0681ebeea43ae19aa4df2c39c759d2186 Mon Sep 17 00:00:00 2001 From: Yuto Terada Date: Wed, 12 Jun 2024 14:47:07 +0900 Subject: [PATCH 02/13] update --- Beutl.sln | 3 - .../AVFReader.cs | 192 ++++++++++++------ .../AVFSampleCache.cs | 74 +++++++ .../AVFSampleCacheOptions.cs | 5 + .../AVFSampleUtilities.cs | 59 ++++++ .../CMTimeUtilities.cs | 16 ++ .../CustomAVPlayerItemOutputPullDelegate.cs | 16 -- 7 files changed, 281 insertions(+), 84 deletions(-) create mode 100644 src/Beutl.Extensions.AVFoundation/AVFSampleCache.cs create mode 100644 src/Beutl.Extensions.AVFoundation/AVFSampleCacheOptions.cs create mode 100644 src/Beutl.Extensions.AVFoundation/AVFSampleUtilities.cs create mode 100644 src/Beutl.Extensions.AVFoundation/CMTimeUtilities.cs delete mode 100644 src/Beutl.Extensions.AVFoundation/CustomAVPlayerItemOutputPullDelegate.cs diff --git a/Beutl.sln b/Beutl.sln index 2f4f19235..c7cccd5f6 100644 --- a/Beutl.sln +++ b/Beutl.sln @@ -109,8 +109,6 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Beutl.Extensions.MediaFound EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Beutl.PackageTools.UI", "src\Beutl.PackageTools.UI\Beutl.PackageTools.UI.csproj", "{D8A8061C-CE79-4DF7-B9E8-2002BAD47DD8}" EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{F678E681-3128-4853-BAAC-2732B1C3E5C3}" -EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Beutl.Extensions.AVFoundation", "src\Beutl.Extensions.AVFoundation\Beutl.Extensions.AVFoundation.csproj", "{8B040DCA-6C9C-4009-8FE5-8F764D80907B}" EndProject Global @@ -403,7 +401,6 @@ Global {ED8EECC3-4BDC-4B29-A17B-1044C7534BB2} = {464CEA08-D77A-4CB8-9B03-BEF0C852B907} {EED4AB04-13CD-4382-BD1A-64D7A258957F} = {791EB029-7801-45FA-9AB5-5E953FFA003A} {AC7E6F97-061C-47DA-B264-04A4EAE666B9} = {791EB029-7801-45FA-9AB5-5E953FFA003A} - {8B040DCA-6C9C-4009-8FE5-8F764D80907B} = {F678E681-3128-4853-BAAC-2732B1C3E5C3} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {CC3F8F37-EB22-4418-978D-C844095F8C4D} diff --git a/src/Beutl.Extensions.AVFoundation/AVFReader.cs b/src/Beutl.Extensions.AVFoundation/AVFReader.cs index c3fa28d5c..a215b7717 100644 --- a/src/Beutl.Extensions.AVFoundation/AVFReader.cs +++ b/src/Beutl.Extensions.AVFoundation/AVFReader.cs @@ -1,11 +1,10 @@ -using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using Beutl.Logging; using Beutl.Media; using Beutl.Media.Decoding; using Beutl.Media.Music; -using Beutl.Media.Pixel; +using Microsoft.Extensions.Logging; using MonoMac.AVFoundation; -using MonoMac.CoreGraphics; using MonoMac.CoreMedia; using MonoMac.CoreVideo; using MonoMac.Foundation; @@ -14,38 +13,48 @@ namespace Beutl.Extensions.AVFoundation.Decoding; public unsafe sealed class AVFReader : MediaReader { + private readonly ILogger _logger = Log.CreateLogger(); private readonly AVAsset _asset; - private readonly AVPlayerItem _playerItem; - private readonly AVPlayerItemVideoOutput _videoOutput; - private readonly AVPlayer _player; private readonly AVAssetTrack _videoTrack; + private AVAssetReader _assetReader; + private AVAssetReaderTrackOutput _videoReaderOutput; private string _file; private MediaOptions _options; private AVFDecodingExtension _extension; + private CMTime _currentVideoTimestamp; + private AVFSampleCache _sampleCache; + + // 現在のフレームからどれくらいの範囲ならシーケンシャル読み込みさせるかの閾値 + private readonly int _thresholdFrameCount = 30; public AVFReader(string file, MediaOptions options, AVFDecodingExtension extension) { _file = file; _options = options; _extension = extension; + + _sampleCache = new AVFSampleCache(new AVFSampleCacheOptions()); var url = NSUrl.FromFilename(file); _asset = AVAsset.FromUrl(url); - _playerItem = AVPlayerItem.FromAsset(_asset); - _videoOutput = new AVPlayerItemVideoOutput(); - // _videoOutput.Delegate= new AVPlayerItemOutputPullDelegate() - - - _playerItem.AddOutput(_videoOutput); - - _player = new AVPlayer(_playerItem); + _assetReader = AVAssetReader.FromAsset(_asset, out var error); + if (error != null) throw new Exception(error.LocalizedDescription); _videoTrack = _asset.TracksWithMediaType(AVMediaType.Video)[0]; - - var fmtdesc = _videoTrack.FormatDescriptions[0]; - var frameSize = new PixelSize(fmtdesc.VideoDimensions.Width, fmtdesc.VideoDimensions.Height); - var codec = fmtdesc.VideoCodecType.ToString(); - var framerate = _videoTrack.NominalFrameRate; - var duration = _videoTrack.TotalSampleDataLength / _videoTrack.EstimatedDataRate * 8d; + _videoReaderOutput = new AVAssetReaderTrackOutput( + _videoTrack, + NSDictionary.FromObjectsAndKeys( + [CVPixelFormatType.CV32ARGB], + [CVPixelBuffer.PixelFormatTypeKey])); + _videoReaderOutput.AlwaysCopiesSampleData = false; + _assetReader.AddOutput(_videoReaderOutput); + + _assetReader.StartReading(); + + var desc = _videoTrack.FormatDescriptions[0]; + var frameSize = new PixelSize(desc.VideoDimensions.Width, desc.VideoDimensions.Height); + string codec = desc.VideoCodecType.ToString(); + float framerate = _videoTrack.NominalFrameRate; + double duration = _videoTrack.TotalSampleDataLength / _videoTrack.EstimatedDataRate * 8d; VideoInfo = new VideoStreamInfo( codec, Rational.FromDouble(duration), @@ -66,58 +75,109 @@ public override bool ReadAudio(int start, int length, [NotNullWhen(true)] out IP throw new NotImplementedException(); } - public override bool ReadVideo(int frame, [NotNullWhen(true)] out IBitmap? image) + private CMSampleBuffer? ReadSample() { - image = null; - _player.Seek(CMTime.FromSeconds(frame / (double)_videoTrack.NominalFrameRate, 1)) - // _assetReader.TimeRange = new CMTimeRange - // { - // Start = CMTime.FromSeconds(frame / (double)_videoTrack.NominalFrameRate, 1), - // Duration = CMTime.PositiveInfinity - // }; - _videoOutput. - - using var buffer = _videoReader.CopyNextSampleBuffer(); - if (buffer.DataIsReady && buffer.IsValid) + var buffer = _videoReaderOutput.CopyNextSampleBuffer(); + if (!buffer.DataIsReady) { - using var imgbuf = buffer.GetImageBuffer(); - var d = buffer.GetDataBuffer(); - // CMBlockBuffer - if (imgbuf is CVPixelBuffer pixbuf) - { - var r = pixbuf.Lock(CVOptionFlags.None); - if (r != CVReturn.Success) return false; - - var ptr = pixbuf.GetBaseAddress(0); - var bytesPerRow = pixbuf.BytesPerRow; - var width = pixbuf.Width; - var height = pixbuf.Height; - using CGColorSpace colorSpace = CGColorSpace.CreateDeviceRGB(); - - using var newContext = new CGBitmapContext( - ptr, width, height, - 8, bytesPerRow, colorSpace, - CGBitmapFlags.ByteOrder32Little | CGBitmapFlags.PremultipliedFirst); - - using CGImage cgimage = newContext.ToImage(); - using var data = cgimage.DataProvider.CopyData(); - - var bitmap = new Bitmap(cgimage.Width, cgimage.Height); - Debug.Assert(bitmap.ByteCount == (int)data.Length); - Buffer.MemoryCopy((void*)data.Bytes, (void*)bitmap.Data, (long)data.Length, bitmap.ByteCount); - Parallel.For(0, bitmap.DataSpan.Length, i => - { - ref var p = ref bitmap.DataSpan[i]; - p.A = 255; - }); + _logger.LogTrace("buffer.DataIsReady = false"); + return null; + } + + if (!buffer.IsValid) + { + _logger.LogTrace("buffer is invalid."); + return null; + } + + // success! + // add cache + // timestamp -= _firstGapTimeStamp; + int frame = CMTimeUtilities.ConvertFrameFromTimeStamp(_currentVideoTimestamp, _videoTrack.NominalFrameRate); + _sampleCache.AddFrameSample(frame, buffer); + _currentVideoTimestamp = buffer.PresentationTimeStamp; + + return buffer; + } + + private void Seek(CMTime timestamp) + { + _sampleCache.ResetVideo(); + _assetReader.Dispose(); + _videoReaderOutput.Dispose(); + + _assetReader = AVAssetReader.FromAsset(_asset, out var error); + if (error != null) throw new Exception(error.LocalizedDescription); + _assetReader.TimeRange = new CMTimeRange { Start = timestamp, Duration = CMTime.PositiveInfinity }; - pixbuf.Unlock(CVOptionFlags.None); + _videoReaderOutput = new AVAssetReaderTrackOutput( + _videoTrack, + NSDictionary.FromObjectsAndKeys( + [CVPixelFormatType.CV32ARGB], + [CVPixelBuffer.PixelFormatTypeKey])); + _videoReaderOutput.AlwaysCopiesSampleData = false; + _assetReader.AddOutput(_videoReaderOutput); - image = bitmap; + _assetReader.StartReading(); + } + + public override bool ReadVideo(int frame, [NotNullWhen(true)] out IBitmap? image) + { + CMSampleBuffer? sample = _sampleCache.SearchFrameSample(frame); + if (sample != null) + { + image = AVFSampleUtilities.ConvertToBgra(sample); + if (image != null) return true; + } + + int currentFrame = _sampleCache.LastFrameNumber(); + + if (currentFrame == -1) + { + currentFrame = + CMTimeUtilities.ConvertFrameFromTimeStamp(_currentVideoTimestamp, _videoTrack.NominalFrameRate); + } + + if (frame < currentFrame || (currentFrame + _thresholdFrameCount) < frame) + { + CMTime destTimePosition = CMTimeUtilities.ConvertTimeStampFromFrame(frame, _videoTrack.NominalFrameRate); + Seek(destTimePosition); + _logger.LogDebug( + "ReadFrame Seek currentFrame: {currentFrame}, destFrame: {destFrame} - destTimePos: {destTimePos} relativeFrame: {relativeFrame}", + currentFrame, frame, destTimePosition.Seconds, frame - currentFrame); + } + + sample = ReadSample(); + while (sample != null) + { + try + { + int readSampleFrame = _sampleCache.LastFrameNumber(); + + if (frame <= readSampleFrame) + { + if ((readSampleFrame - frame) > 0) + { + _logger.LogWarning( + "wrong frame currentFrame: {currentFrame} targetFrame: {frame} readSampleFrame: {readSampleFrame} distance: {distance}", + currentFrame, frame, readSampleFrame, readSampleFrame - frame); + } + + image = AVFSampleUtilities.ConvertToBgra(sample); + if (image != null) + return true; + } + + sample = ReadSample(); + } + catch + { + break; } } + image = null; return false; } @@ -126,6 +186,8 @@ protected override void Dispose(bool disposing) base.Dispose(disposing); _asset.Dispose(); _assetReader.Dispose(); - _videoReader.Dispose(); + _sampleCache.ResetVideo(); + _videoTrack.Dispose(); + _videoReaderOutput.Dispose(); } } diff --git a/src/Beutl.Extensions.AVFoundation/AVFSampleCache.cs b/src/Beutl.Extensions.AVFoundation/AVFSampleCache.cs new file mode 100644 index 000000000..da4958c30 --- /dev/null +++ b/src/Beutl.Extensions.AVFoundation/AVFSampleCache.cs @@ -0,0 +1,74 @@ +using Beutl.Collections; +using Beutl.Logging; +using Microsoft.Extensions.Logging; +using MonoMac.CoreMedia; + +namespace Beutl.Extensions.AVFoundation; + +public class AVFSampleCache(AVFSampleCacheOptions options) +{ + private readonly ILogger _logger = Log.CreateLogger(); + + public const int FrameWaringGapCount = 1; + + private CircularBuffer _videoCircularBuffer = new(options.MaxVideoBufferSize); + + private readonly record struct VideoCache(int Frame, CMSampleBuffer Sample); + + public void ResetVideo() + { + CircularBuffer old = _videoCircularBuffer; + _videoCircularBuffer = new CircularBuffer(options.MaxVideoBufferSize); + foreach (VideoCache item in old) + { + item.Sample.Dispose(); + } + } + + public void AddFrameSample(int frame, CMSampleBuffer pSample) + { + int lastFrameNum = LastFrameNumber(); + if (lastFrameNum != -1) + { + if (Math.Abs(lastFrameNum + 1 - frame) > FrameWaringGapCount) + { + //_logger.LogWarning("frame error - frame: {frame} actual frame: {actual}", frame, lastFrameNum + 1); + } + + frame = lastFrameNum + 1; + } + + if (_videoCircularBuffer.IsFull) + { + _videoCircularBuffer.Front().Sample.Dispose(); + _videoCircularBuffer.PopFront(); + } + + var videoCache = new VideoCache(frame, pSample); + _videoCircularBuffer.PushBack(videoCache); + } + + public int LastFrameNumber() + { + if (_videoCircularBuffer.Size > 0) + { + VideoCache prevVideoCache = _videoCircularBuffer.Back(); + return prevVideoCache.Frame; + } + + return -1; + } + + public CMSampleBuffer? SearchFrameSample(int frame) + { + foreach (VideoCache videoCache in _videoCircularBuffer.Reverse()) + { + if (videoCache.Frame == frame) + { + return videoCache.Sample; + } + } + + return null; + } +} diff --git a/src/Beutl.Extensions.AVFoundation/AVFSampleCacheOptions.cs b/src/Beutl.Extensions.AVFoundation/AVFSampleCacheOptions.cs new file mode 100644 index 000000000..c5824b0ed --- /dev/null +++ b/src/Beutl.Extensions.AVFoundation/AVFSampleCacheOptions.cs @@ -0,0 +1,5 @@ +namespace Beutl.Extensions.AVFoundation; + +public record AVFSampleCacheOptions( + int MaxVideoBufferSize = 4, // あまり大きな値を設定するとReadSampleで停止する + int MaxAudioBufferSize = 20); diff --git a/src/Beutl.Extensions.AVFoundation/AVFSampleUtilities.cs b/src/Beutl.Extensions.AVFoundation/AVFSampleUtilities.cs new file mode 100644 index 000000000..42af5a639 --- /dev/null +++ b/src/Beutl.Extensions.AVFoundation/AVFSampleUtilities.cs @@ -0,0 +1,59 @@ +using Beutl.Media; +using Beutl.Media.Pixel; +using MonoMac.CoreGraphics; +using MonoMac.CoreImage; +using MonoMac.CoreMedia; +using MonoMac.CoreVideo; +using MonoMac.Foundation; + +namespace Beutl.Extensions.AVFoundation; + +public class AVFSampleUtilities +{ + public static unsafe Bitmap? ConvertToBgra(CMSampleBuffer buffer) + { + using var imageBuffer = buffer.GetImageBuffer(); + if (imageBuffer is not CVPixelBuffer pixelBuffer) return null; + + var r = pixelBuffer.Lock(CVOptionFlags.None); + if (r != CVReturn.Success) return null; + + int width = pixelBuffer.Width; + int height = pixelBuffer.Height; + var bitmap = new Bitmap(width, height); + if (pixelBuffer.ColorSpace.Model == CGColorSpaceModel.RGB && pixelBuffer.BytesPerRow == width * 4) + { + Buffer.MemoryCopy( + (void*)pixelBuffer.GetBaseAddress(0), (void*)bitmap.Data, + bitmap.ByteCount, bitmap.ByteCount); + pixelBuffer.Unlock(CVOptionFlags.None); + Parallel.For(0, width * height, i => + { + // argb + // bgra + var o = bitmap.DataSpan[i]; + bitmap.DataSpan[i] = new Bgra8888(o.G, o.R, o.A, o.B); + }); + return bitmap; + } + + int bytesPerRow = width * height * 4; + using (CGColorSpace colorSpace = CGColorSpace.CreateDeviceRGB()) + using (var cgContext = new CGBitmapContext( + bitmap.Data, width, height, + 8, bytesPerRow, colorSpace, + CGBitmapFlags.ByteOrderDefault | CGBitmapFlags.PremultipliedFirst)) + using (var ciImage = CIImage.FromImageBuffer(imageBuffer)) + using (var ciContext = new CIContext(NSObjectFlag.Empty)) + // CreateCGImageで落ちる、例外なしに + using (var cgImage = ciContext.CreateCGImage( + ciImage, new CGRect(0, 0, width, height), (long)CIFormat.ARGB8, colorSpace)) + { + cgContext.DrawImage(new CGRect(0, 0, width, height), cgImage); + } + + pixelBuffer.Unlock(CVOptionFlags.None); + + return bitmap; + } +} diff --git a/src/Beutl.Extensions.AVFoundation/CMTimeUtilities.cs b/src/Beutl.Extensions.AVFoundation/CMTimeUtilities.cs new file mode 100644 index 000000000..5e0a391e2 --- /dev/null +++ b/src/Beutl.Extensions.AVFoundation/CMTimeUtilities.cs @@ -0,0 +1,16 @@ +using MonoMac.CoreMedia; + +namespace Beutl.Extensions.AVFoundation.Decoding; + +internal static class CMTimeUtilities +{ + public static int ConvertFrameFromTimeStamp(CMTime timestamp, double rate) + { + return (int)Math.Round(timestamp.Seconds * rate, MidpointRounding.AwayFromZero); + } + + public static CMTime ConvertTimeStampFromFrame(int frame, double rate) + { + return CMTime.FromSeconds(frame / rate, 1); + } +} diff --git a/src/Beutl.Extensions.AVFoundation/CustomAVPlayerItemOutputPullDelegate.cs b/src/Beutl.Extensions.AVFoundation/CustomAVPlayerItemOutputPullDelegate.cs deleted file mode 100644 index 9afeae3ee..000000000 --- a/src/Beutl.Extensions.AVFoundation/CustomAVPlayerItemOutputPullDelegate.cs +++ /dev/null @@ -1,16 +0,0 @@ -using MonoMac.AVFoundation; - -namespace Beutl.Extensions.AVFoundation.Decoding; - -internal sealed class CustomAVPlayerItemOutputPullDelegate : AVPlayerItemOutputPullDelegate -{ - public override void OutputMediaDataWillChange(AVPlayerItemOutput sender) - { - base.OutputMediaDataWillChange(sender); - } - - public override void OutputSequenceWasFlushed(AVPlayerItemOutput output) - { - base.OutputSequenceWasFlushed(output); - } -} From 10cbb6d1c38ccf1db61dc0c6a97209eb079120ff Mon Sep 17 00:00:00 2001 From: Yuto Terada Date: Tue, 18 Jun 2024 16:36:13 +0900 Subject: [PATCH 03/13] =?UTF-8?q?=E9=9F=B3=E5=A3=B0=E5=85=A5=E5=8A=9B?= =?UTF-8?q?=E3=81=A8=E3=82=AF=E3=83=A9=E3=82=B9=E3=81=AE=E5=88=86=E5=89=B2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../AVFAudioSampleCache.cs | 129 ++++++++++++ .../AVFAudioStreamReader.cs | 198 ++++++++++++++++++ .../AVFReader.cs | 177 +++------------- .../AVFSampleCache.cs | 74 ------- .../AVFSampleUtilities.cs | 18 +- .../AVFVideoSampleCache.cs | 75 +++++++ .../AVFVideoStreamReader.cs | 187 +++++++++++++++++ 7 files changed, 635 insertions(+), 223 deletions(-) create mode 100644 src/Beutl.Extensions.AVFoundation/AVFAudioSampleCache.cs create mode 100644 src/Beutl.Extensions.AVFoundation/AVFAudioStreamReader.cs delete mode 100644 src/Beutl.Extensions.AVFoundation/AVFSampleCache.cs create mode 100644 src/Beutl.Extensions.AVFoundation/AVFVideoSampleCache.cs create mode 100644 src/Beutl.Extensions.AVFoundation/AVFVideoStreamReader.cs diff --git a/src/Beutl.Extensions.AVFoundation/AVFAudioSampleCache.cs b/src/Beutl.Extensions.AVFoundation/AVFAudioSampleCache.cs new file mode 100644 index 000000000..dc00a9997 --- /dev/null +++ b/src/Beutl.Extensions.AVFoundation/AVFAudioSampleCache.cs @@ -0,0 +1,129 @@ +using Beutl.Collections; +using Beutl.Logging; +using Microsoft.Extensions.Logging; +using MonoMac.CoreMedia; + +namespace Beutl.Extensions.AVFoundation; + +public class AVFAudioSampleCache(AVFSampleCacheOptions options) +{ + private readonly ILogger _logger = Log.CreateLogger(); + + public const int AudioSampleWaringGapCount = 1000; + + private CircularBuffer _audioCircularBuffer = new(options.MaxAudioBufferSize); + private short _nBlockAlign; + + private readonly record struct AudioCache(int StartSampleNum, CMSampleBuffer Sample, int AudioSampleCount) + { + public bool CopyBuffer(ref int startSample, ref int copySampleLength, ref nint buffer, short nBlockAlign) + { + int querySampleEndPos = startSample + copySampleLength; + int cacheSampleEndPos = StartSampleNum + AudioSampleCount; + // キャッシュ内に startSample位置があるかどうか + if (StartSampleNum <= startSample && startSample < cacheSampleEndPos) + { + // 要求サイズがキャッシュを超えるかどうか + if (querySampleEndPos <= cacheSampleEndPos) + { + // キャッシュ内に収まる + int actualBufferPos = (startSample - StartSampleNum) * nBlockAlign; + int actualBufferSize = copySampleLength * nBlockAlign; + AVFSampleUtilities.SampleCopyToBuffer(Sample, buffer, actualBufferPos, actualBufferSize); + + startSample += copySampleLength; + copySampleLength = 0; + buffer += actualBufferSize; + + return true; + } + else + { + // 現在のキャッシュ内のデータをコピーする + int actualBufferPos = (startSample - StartSampleNum) * nBlockAlign; + int leftSampleCount = cacheSampleEndPos - startSample; + int actualleftBufferSize = leftSampleCount * nBlockAlign; + AVFSampleUtilities.SampleCopyToBuffer(Sample, buffer, actualBufferPos, actualleftBufferSize); + + startSample += leftSampleCount; + copySampleLength -= leftSampleCount; + buffer += actualleftBufferSize; + + return true; + } + } + + return false; + } + } + + public void Reset(short nBlockAlign) + { + _nBlockAlign = nBlockAlign; + CircularBuffer old = _audioCircularBuffer; + _audioCircularBuffer = new CircularBuffer(options.MaxVideoBufferSize); + foreach (AudioCache item in old) + { + item.Sample.Dispose(); + } + } + + public void Add(int startSample, CMSampleBuffer buffer) + { + int lastAudioSampleNum = LastAudioSampleNumber(); + if (lastAudioSampleNum != -1) + { + int actualAudioSampleNum = lastAudioSampleNum + _audioCircularBuffer.Back().AudioSampleCount; + if (Math.Abs(startSample - actualAudioSampleNum) > AudioSampleWaringGapCount) + { + _logger.LogWarning( + "sample laggin - lag: {lag} startSample: {startSample} lastAudioSampleNum: {lastAudioSampleNum}", + startSample - actualAudioSampleNum, + startSample, + actualAudioSampleNum); + } + + startSample = lastAudioSampleNum + _audioCircularBuffer.Back().AudioSampleCount; + } + + // int totalLength = buffer.NumSamples; + int audioSampleCount = buffer.NumSamples; + // Debug.Assert((totalLength % _nBlockAlign) == 0); + + if (_audioCircularBuffer.IsFull) + { + _audioCircularBuffer.Front().Sample.Dispose(); + _audioCircularBuffer.PopFront(); + } + + var audioCache = new AudioCache(startSample, buffer, audioSampleCount); + _audioCircularBuffer.PushBack(audioCache); + } + + public int LastAudioSampleNumber() + { + if (_audioCircularBuffer.Size > 0) + { + AudioCache prevAudioCache = _audioCircularBuffer.Back(); + return prevAudioCache.StartSampleNum; + } + + return -1; + } + + public bool SearchAudioSampleAndCopyBuffer(int startSample, int copySampleLength, nint buffer) + { + foreach (AudioCache audioCache in _audioCircularBuffer) + { + if (audioCache.CopyBuffer(ref startSample, ref copySampleLength, ref buffer, _nBlockAlign)) + { + if (copySampleLength == 0) + { + return true; + } + } + } + + return false; + } +} diff --git a/src/Beutl.Extensions.AVFoundation/AVFAudioStreamReader.cs b/src/Beutl.Extensions.AVFoundation/AVFAudioStreamReader.cs new file mode 100644 index 000000000..4e87a8a72 --- /dev/null +++ b/src/Beutl.Extensions.AVFoundation/AVFAudioStreamReader.cs @@ -0,0 +1,198 @@ +using System.Diagnostics.CodeAnalysis; +using Beutl.Logging; +using Beutl.Media.Decoding; +using Beutl.Media.Music; +using Beutl.Media.Music.Samples; +using Microsoft.Extensions.Logging; +using MonoMac.AudioToolbox; +using MonoMac.AVFoundation; +using MonoMac.CoreMedia; + +namespace Beutl.Extensions.AVFoundation.Decoding; + +public class AVFAudioStreamReader : IDisposable +{ + private readonly ILogger _logger = Log.CreateLogger(); + private readonly AVAsset _asset; + private readonly MediaOptions _options; + private readonly AVFAudioSampleCache _sampleCache; + + private readonly AVAssetTrack _audioTrack; + private AVAssetReader _assetAudioReader; + private AVAssetReaderTrackOutput _audioReaderOutput; + private CMTime _currentAudioTimestamp; + private readonly int _thresholdSampleCount = 30000; + + public AVFAudioStreamReader(AVAsset asset, MediaOptions options) + { + _asset = asset; + _options = options; + _sampleCache = new AVFAudioSampleCache(new AVFSampleCacheOptions()); + _sampleCache.Reset(4 * 2); + + _audioTrack = _asset.TracksWithMediaType(AVMediaType.Audio)[0]; + + _assetAudioReader = AVAssetReader.FromAsset(_asset, out var error); + if (error != null) throw new Exception(error.LocalizedDescription); + + _audioReaderOutput = new AVAssetReaderTrackOutput( + _audioTrack, + new AudioSettings() + { + Format = AudioFormatType.LinearPCM, + LinearPcmBitDepth = 32, + LinearPcmBigEndian = false, + LinearPcmFloat = true, + SampleRate = options.SampleRate, + NumberChannels = 2, + }.Dictionary); + _assetAudioReader.AddOutput(_audioReaderOutput); + + _assetAudioReader.StartReading(); + + var audioDesc = _audioTrack.FormatDescriptions[0]; + AudioInfo = new AudioStreamInfo( + audioDesc.AudioFormatType.ToString(), + Rational.FromDouble(_audioTrack.TotalSampleDataLength / _audioTrack.EstimatedDataRate * 8d), + _audioTrack.NaturalTimeScale, + audioDesc.AudioChannelLayout.Channels.Length); + } + + ~AVFAudioStreamReader() + { + if (!IsDisposed) + { + DisposeCore(false); + } + } + + public bool IsDisposed { get; private set; } + + public AudioStreamInfo AudioInfo { get; } + + private void SeekAudio(CMTime timestamp) + { + // _audioTrack. + _sampleCache.Reset(4 * 2); + _assetAudioReader.Dispose(); + _audioReaderOutput.Dispose(); + + _assetAudioReader = AVAssetReader.FromAsset(_asset, out var error); + if (error != null) throw new Exception(error.LocalizedDescription); + _assetAudioReader.TimeRange = new CMTimeRange { Start = timestamp, Duration = CMTime.PositiveInfinity }; + + _audioReaderOutput = new AVAssetReaderTrackOutput( + _audioTrack, + new AudioSettings() + { + Format = AudioFormatType.LinearPCM, + LinearPcmBitDepth = 32, + LinearPcmBigEndian = false, + LinearPcmFloat = true, + SampleRate = _options.SampleRate, + NumberChannels = 2, + }.Dictionary); + _assetAudioReader.AddOutput(_audioReaderOutput); + + _assetAudioReader.StartReading(); + } + + public bool ReadAudio(int start, int length, [NotNullWhen(true)] out IPcm? sound) + { + var buffer = new Pcm(_options.SampleRate, length); + bool hitCache = _sampleCache.SearchAudioSampleAndCopyBuffer(start, length, buffer.Data); + if (hitCache) + { + sound = buffer; + return true; + } + + int currentSample = _sampleCache.LastAudioSampleNumber(); + if (currentSample == -1) + { + currentSample = + CMTimeUtilities.ConvertFrameFromTimeStamp(_currentAudioTimestamp, _audioTrack.NaturalTimeScale); + } + + if (start < currentSample || (currentSample + _thresholdSampleCount) < start) + { + CMTime destTimePosition = CMTimeUtilities.ConvertTimeStampFromFrame(start, _audioTrack.NaturalTimeScale); + SeekAudio(destTimePosition); + // _logger.LogInformation( + // "ReadAudio Seek currentTimestamp: {currentTimestamp} - destTimePos: {destTimePos} relativeSample: {relativeSample}", + // TimestampUtilities.ConvertSecFrom100ns(_currentAudioTimeStamp), + // TimestampUtilities.ConvertSecFrom100ns(destTimePosition), + // start - currentSample); + } + + CMSampleBuffer? sample = ReadAudioSample(); + while (sample != null) + { + try + { + int readSampleNum = _sampleCache.LastAudioSampleNumber(); + + if (start <= readSampleNum) + { + if (_sampleCache.SearchAudioSampleAndCopyBuffer(start, length, buffer.Data)) + { + sound = buffer; + return true; + } + } + + sample = ReadAudioSample(); + } + catch + { + break; + } + } + + buffer.Dispose(); + sound = null; + return false; + } + + private CMSampleBuffer? ReadAudioSample() + { + var buffer = _audioReaderOutput.CopyNextSampleBuffer(); + if (!buffer.DataIsReady) + { + _logger.LogTrace("buffer.DataIsReady = false"); + return null; + } + + if (!buffer.IsValid) + { + _logger.LogTrace("buffer is invalid."); + return null; + } + + // success! + // add cache + // timestamp -= _firstGapTimeStamp; + int startSample = + CMTimeUtilities.ConvertFrameFromTimeStamp(_currentAudioTimestamp, _audioTrack.NaturalTimeScale); + _sampleCache.Add(startSample, buffer); + _currentAudioTimestamp = buffer.PresentationTimeStamp; + + return buffer; + } + + private void DisposeCore(bool disposing) + { + _sampleCache.Reset(0); + _audioReaderOutput.Dispose(); + _assetAudioReader.Dispose(); + } + + public void Dispose() + { + if (IsDisposed) return; + DisposeCore(true); + + GC.SuppressFinalize(this); + IsDisposed = true; + } +} diff --git a/src/Beutl.Extensions.AVFoundation/AVFReader.cs b/src/Beutl.Extensions.AVFoundation/AVFReader.cs index a215b7717..47709a95e 100644 --- a/src/Beutl.Extensions.AVFoundation/AVFReader.cs +++ b/src/Beutl.Extensions.AVFoundation/AVFReader.cs @@ -1,180 +1,60 @@ using System.Diagnostics.CodeAnalysis; -using Beutl.Logging; using Beutl.Media; using Beutl.Media.Decoding; using Beutl.Media.Music; -using Microsoft.Extensions.Logging; using MonoMac.AVFoundation; -using MonoMac.CoreMedia; -using MonoMac.CoreVideo; using MonoMac.Foundation; namespace Beutl.Extensions.AVFoundation.Decoding; -public unsafe sealed class AVFReader : MediaReader +public sealed class AVFReader : MediaReader { - private readonly ILogger _logger = Log.CreateLogger(); private readonly AVAsset _asset; - private readonly AVAssetTrack _videoTrack; - private AVAssetReader _assetReader; - private AVAssetReaderTrackOutput _videoReaderOutput; - private string _file; - private MediaOptions _options; - private AVFDecodingExtension _extension; - private CMTime _currentVideoTimestamp; - private AVFSampleCache _sampleCache; - // 現在のフレームからどれくらいの範囲ならシーケンシャル読み込みさせるかの閾値 - private readonly int _thresholdFrameCount = 30; + private AVFVideoStreamReader? _videoReader; + private AVFAudioStreamReader? _audioReader; - public AVFReader(string file, MediaOptions options, AVFDecodingExtension extension) + public AVFReader(string file, MediaOptions options) { - _file = file; - _options = options; - _extension = extension; - - _sampleCache = new AVFSampleCache(new AVFSampleCacheOptions()); var url = NSUrl.FromFilename(file); _asset = AVAsset.FromUrl(url); - _assetReader = AVAssetReader.FromAsset(_asset, out var error); - if (error != null) throw new Exception(error.LocalizedDescription); - - _videoTrack = _asset.TracksWithMediaType(AVMediaType.Video)[0]; - _videoReaderOutput = new AVAssetReaderTrackOutput( - _videoTrack, - NSDictionary.FromObjectsAndKeys( - [CVPixelFormatType.CV32ARGB], - [CVPixelBuffer.PixelFormatTypeKey])); - _videoReaderOutput.AlwaysCopiesSampleData = false; - _assetReader.AddOutput(_videoReaderOutput); - - _assetReader.StartReading(); - - var desc = _videoTrack.FormatDescriptions[0]; - var frameSize = new PixelSize(desc.VideoDimensions.Width, desc.VideoDimensions.Height); - string codec = desc.VideoCodecType.ToString(); - float framerate = _videoTrack.NominalFrameRate; - double duration = _videoTrack.TotalSampleDataLength / _videoTrack.EstimatedDataRate * 8d; - VideoInfo = new VideoStreamInfo( - codec, - Rational.FromDouble(duration), - frameSize, - Rational.FromSingle(framerate)); - } - - public override VideoStreamInfo VideoInfo { get; } - - public override AudioStreamInfo AudioInfo => throw new NotImplementedException(); - - public override bool HasVideo => true; - - public override bool HasAudio => false; - - public override bool ReadAudio(int start, int length, [NotNullWhen(true)] out IPcm? sound) - { - throw new NotImplementedException(); - } - - private CMSampleBuffer? ReadSample() - { - var buffer = _videoReaderOutput.CopyNextSampleBuffer(); - if (!buffer.DataIsReady) + if (options.StreamsToLoad.HasFlag(MediaMode.Video)) { - _logger.LogTrace("buffer.DataIsReady = false"); - return null; + _videoReader = new AVFVideoStreamReader(_asset); } - if (!buffer.IsValid) + if (options.StreamsToLoad.HasFlag(MediaMode.Audio)) { - _logger.LogTrace("buffer is invalid."); - return null; + _audioReader = new AVFAudioStreamReader(_asset, options); } - - // success! - // add cache - // timestamp -= _firstGapTimeStamp; - int frame = CMTimeUtilities.ConvertFrameFromTimeStamp(_currentVideoTimestamp, _videoTrack.NominalFrameRate); - _sampleCache.AddFrameSample(frame, buffer); - _currentVideoTimestamp = buffer.PresentationTimeStamp; - - return buffer; } - private void Seek(CMTime timestamp) - { - _sampleCache.ResetVideo(); - _assetReader.Dispose(); - _videoReaderOutput.Dispose(); + public override VideoStreamInfo VideoInfo => + _videoReader?.VideoInfo ?? throw new Exception("VideoInfo is not available."); - _assetReader = AVAssetReader.FromAsset(_asset, out var error); - if (error != null) throw new Exception(error.LocalizedDescription); - _assetReader.TimeRange = new CMTimeRange { Start = timestamp, Duration = CMTime.PositiveInfinity }; + public override AudioStreamInfo AudioInfo => + _audioReader?.AudioInfo ?? throw new Exception("AudioInfo is not available."); - _videoReaderOutput = new AVAssetReaderTrackOutput( - _videoTrack, - NSDictionary.FromObjectsAndKeys( - [CVPixelFormatType.CV32ARGB], - [CVPixelBuffer.PixelFormatTypeKey])); - _videoReaderOutput.AlwaysCopiesSampleData = false; - _assetReader.AddOutput(_videoReaderOutput); + public override bool HasVideo => _videoReader != null; - _assetReader.StartReading(); - } + public override bool HasAudio => _audioReader != null; - public override bool ReadVideo(int frame, [NotNullWhen(true)] out IBitmap? image) + public override bool ReadAudio(int start, int length, [NotNullWhen(true)] out IPcm? sound) { - CMSampleBuffer? sample = _sampleCache.SearchFrameSample(frame); - if (sample != null) + if (_audioReader != null) { - image = AVFSampleUtilities.ConvertToBgra(sample); - if (image != null) - return true; + return _audioReader.ReadAudio(start, length, out sound); } - int currentFrame = _sampleCache.LastFrameNumber(); - - if (currentFrame == -1) - { - currentFrame = - CMTimeUtilities.ConvertFrameFromTimeStamp(_currentVideoTimestamp, _videoTrack.NominalFrameRate); - } - - if (frame < currentFrame || (currentFrame + _thresholdFrameCount) < frame) - { - CMTime destTimePosition = CMTimeUtilities.ConvertTimeStampFromFrame(frame, _videoTrack.NominalFrameRate); - Seek(destTimePosition); - _logger.LogDebug( - "ReadFrame Seek currentFrame: {currentFrame}, destFrame: {destFrame} - destTimePos: {destTimePos} relativeFrame: {relativeFrame}", - currentFrame, frame, destTimePosition.Seconds, frame - currentFrame); - } + sound = null; + return false; + } - sample = ReadSample(); - while (sample != null) + public override bool ReadVideo(int frame, [NotNullWhen(true)] out IBitmap? image) + { + if (_videoReader != null) { - try - { - int readSampleFrame = _sampleCache.LastFrameNumber(); - - if (frame <= readSampleFrame) - { - if ((readSampleFrame - frame) > 0) - { - _logger.LogWarning( - "wrong frame currentFrame: {currentFrame} targetFrame: {frame} readSampleFrame: {readSampleFrame} distance: {distance}", - currentFrame, frame, readSampleFrame, readSampleFrame - frame); - } - - image = AVFSampleUtilities.ConvertToBgra(sample); - if (image != null) - return true; - } - - sample = ReadSample(); - } - catch - { - break; - } + return _videoReader.ReadVideo(frame, out image); } image = null; @@ -184,10 +64,11 @@ public override bool ReadVideo(int frame, [NotNullWhen(true)] out IBitmap? image protected override void Dispose(bool disposing) { base.Dispose(disposing); + _audioReader?.Dispose(); + _videoReader?.Dispose(); _asset.Dispose(); - _assetReader.Dispose(); - _sampleCache.ResetVideo(); - _videoTrack.Dispose(); - _videoReaderOutput.Dispose(); + + _audioReader = null; + _videoReader = null; } } diff --git a/src/Beutl.Extensions.AVFoundation/AVFSampleCache.cs b/src/Beutl.Extensions.AVFoundation/AVFSampleCache.cs deleted file mode 100644 index da4958c30..000000000 --- a/src/Beutl.Extensions.AVFoundation/AVFSampleCache.cs +++ /dev/null @@ -1,74 +0,0 @@ -using Beutl.Collections; -using Beutl.Logging; -using Microsoft.Extensions.Logging; -using MonoMac.CoreMedia; - -namespace Beutl.Extensions.AVFoundation; - -public class AVFSampleCache(AVFSampleCacheOptions options) -{ - private readonly ILogger _logger = Log.CreateLogger(); - - public const int FrameWaringGapCount = 1; - - private CircularBuffer _videoCircularBuffer = new(options.MaxVideoBufferSize); - - private readonly record struct VideoCache(int Frame, CMSampleBuffer Sample); - - public void ResetVideo() - { - CircularBuffer old = _videoCircularBuffer; - _videoCircularBuffer = new CircularBuffer(options.MaxVideoBufferSize); - foreach (VideoCache item in old) - { - item.Sample.Dispose(); - } - } - - public void AddFrameSample(int frame, CMSampleBuffer pSample) - { - int lastFrameNum = LastFrameNumber(); - if (lastFrameNum != -1) - { - if (Math.Abs(lastFrameNum + 1 - frame) > FrameWaringGapCount) - { - //_logger.LogWarning("frame error - frame: {frame} actual frame: {actual}", frame, lastFrameNum + 1); - } - - frame = lastFrameNum + 1; - } - - if (_videoCircularBuffer.IsFull) - { - _videoCircularBuffer.Front().Sample.Dispose(); - _videoCircularBuffer.PopFront(); - } - - var videoCache = new VideoCache(frame, pSample); - _videoCircularBuffer.PushBack(videoCache); - } - - public int LastFrameNumber() - { - if (_videoCircularBuffer.Size > 0) - { - VideoCache prevVideoCache = _videoCircularBuffer.Back(); - return prevVideoCache.Frame; - } - - return -1; - } - - public CMSampleBuffer? SearchFrameSample(int frame) - { - foreach (VideoCache videoCache in _videoCircularBuffer.Reverse()) - { - if (videoCache.Frame == frame) - { - return videoCache.Sample; - } - } - - return null; - } -} diff --git a/src/Beutl.Extensions.AVFoundation/AVFSampleUtilities.cs b/src/Beutl.Extensions.AVFoundation/AVFSampleUtilities.cs index 42af5a639..d84b84207 100644 --- a/src/Beutl.Extensions.AVFoundation/AVFSampleUtilities.cs +++ b/src/Beutl.Extensions.AVFoundation/AVFSampleUtilities.cs @@ -1,4 +1,5 @@ -using Beutl.Media; +using System.Diagnostics; +using Beutl.Media; using Beutl.Media.Pixel; using MonoMac.CoreGraphics; using MonoMac.CoreImage; @@ -56,4 +57,19 @@ public class AVFSampleUtilities return bitmap; } + + public static int SampleCopyToBuffer(CMSampleBuffer buffer, nint buf, int copyBufferPos, + int copyBufferSize) + { + using var dataBuffer = buffer.GetDataBuffer(); + Debug.Assert((copyBufferPos + copyBufferSize) <= dataBuffer.DataLength); + dataBuffer.CopyDataBytes((uint)copyBufferPos, (uint)copyBufferSize, buf); + + return copyBufferSize; + } + + public static int SampleCopyToBuffer(CMSampleBuffer buffer, nint buf, int copyBufferSize) + { + return SampleCopyToBuffer(buffer, buf, 0, copyBufferSize); + } } diff --git a/src/Beutl.Extensions.AVFoundation/AVFVideoSampleCache.cs b/src/Beutl.Extensions.AVFoundation/AVFVideoSampleCache.cs new file mode 100644 index 000000000..a7eedffb6 --- /dev/null +++ b/src/Beutl.Extensions.AVFoundation/AVFVideoSampleCache.cs @@ -0,0 +1,75 @@ +using System.Diagnostics; +using Beutl.Collections; +using Beutl.Logging; +using Microsoft.Extensions.Logging; +using MonoMac.CoreMedia; + +namespace Beutl.Extensions.AVFoundation; + +public class AVFVideoSampleCache(AVFSampleCacheOptions options) +{ + private readonly ILogger _logger = Log.CreateLogger(); + + public const int FrameWaringGapCount = 1; + + private CircularBuffer _buffer = new(options.MaxVideoBufferSize); + + private readonly record struct VideoCache(int Frame, CMSampleBuffer Sample); + + public void Reset() + { + CircularBuffer old = _buffer; + _buffer = new CircularBuffer(options.MaxVideoBufferSize); + foreach (VideoCache item in old) + { + item.Sample.Dispose(); + } + } + + public void Add(int frame, CMSampleBuffer pSample) + { + int lastFrameNum = LastFrameNumber(); + if (lastFrameNum != -1) + { + if (Math.Abs(lastFrameNum + 1 - frame) > FrameWaringGapCount) + { + _logger.LogWarning("frame error - frame: {frame} actual frame: {actual}", frame, lastFrameNum + 1); + } + + frame = lastFrameNum + 1; + } + + if (_buffer.IsFull) + { + _buffer.Front().Sample.Dispose(); + _buffer.PopFront(); + } + + var videoCache = new VideoCache(frame, pSample); + _buffer.PushBack(videoCache); + } + + public int LastFrameNumber() + { + if (_buffer.Size > 0) + { + VideoCache prevVideoCache = _buffer.Back(); + return prevVideoCache.Frame; + } + + return -1; + } + + public CMSampleBuffer? SearchSample(int frame) + { + foreach (VideoCache videoCache in _buffer.Reverse()) + { + if (videoCache.Frame == frame) + { + return videoCache.Sample; + } + } + + return null; + } +} diff --git a/src/Beutl.Extensions.AVFoundation/AVFVideoStreamReader.cs b/src/Beutl.Extensions.AVFoundation/AVFVideoStreamReader.cs new file mode 100644 index 000000000..37db5f7b1 --- /dev/null +++ b/src/Beutl.Extensions.AVFoundation/AVFVideoStreamReader.cs @@ -0,0 +1,187 @@ +using System.Diagnostics.CodeAnalysis; +using Beutl.Logging; +using Beutl.Media; +using Beutl.Media.Decoding; +using Microsoft.Extensions.Logging; +using MonoMac.AVFoundation; +using MonoMac.CoreMedia; +using MonoMac.CoreVideo; + +namespace Beutl.Extensions.AVFoundation.Decoding; + +public class AVFVideoStreamReader : IDisposable +{ + private readonly ILogger _logger = Log.CreateLogger(); + private readonly AVAsset _asset; + private readonly AVFVideoSampleCache _sampleCache; + + private readonly AVAssetTrack _track; + private AVAssetReader _reader; + private AVAssetReaderTrackOutput _output; + private CMTime _currentTimestamp; + + // 現在のフレームからどれくらいの範囲ならシーケンシャル読み込みさせるかの閾値 + private readonly int _thresholdFrameCount = 30; + + public AVFVideoStreamReader(AVAsset asset) + { + _asset = asset; + _sampleCache = new AVFVideoSampleCache(new AVFSampleCacheOptions()); + + _track = _asset.TracksWithMediaType(AVMediaType.Video)[0]; + + _reader = AVAssetReader.FromAsset(_asset, out var error); + if (error != null) throw new Exception(error.LocalizedDescription); + + _output = new AVAssetReaderTrackOutput( + _track, + new CVPixelBufferAttributes { PixelFormatType = CVPixelFormatType.CV32ARGB }.Dictionary); + _output.AlwaysCopiesSampleData = false; + + _reader.AddOutput(_output); + _reader.StartReading(); + + var desc = _track.FormatDescriptions[0]; + var frameSize = new PixelSize(desc.VideoDimensions.Width, desc.VideoDimensions.Height); + string codec = desc.VideoCodecType.ToString(); + float framerate = _track.NominalFrameRate; + double duration = _track.TotalSampleDataLength / _track.EstimatedDataRate * 8d; + VideoInfo = new VideoStreamInfo( + codec, + Rational.FromDouble(duration), + frameSize, + Rational.FromSingle(framerate)); + } + + ~AVFVideoStreamReader() + { + if (!IsDisposed) + { + DisposeCore(false); + } + } + + public VideoStreamInfo VideoInfo { get; } + + public bool IsDisposed { get; private set; } + + private CMSampleBuffer? ReadSample() + { + var buffer = _output.CopyNextSampleBuffer(); + if (!buffer.DataIsReady) + { + _logger.LogTrace("buffer.DataIsReady = false"); + return null; + } + + if (!buffer.IsValid) + { + _logger.LogTrace("buffer is invalid."); + return null; + } + + // success! + // add cache + // timestamp -= _firstGapTimeStamp; + int frame = CMTimeUtilities.ConvertFrameFromTimeStamp(_currentTimestamp, _track.NominalFrameRate); + _sampleCache.Add(frame, buffer); + _currentTimestamp = buffer.PresentationTimeStamp; + + return buffer; + } + + private void Seek(CMTime timestamp) + { + _sampleCache.Reset(); + _reader.Dispose(); + _output.Dispose(); + + _reader = AVAssetReader.FromAsset(_asset, out var error); + if (error != null) throw new Exception(error.LocalizedDescription); + _reader.TimeRange = new CMTimeRange { Start = timestamp, Duration = CMTime.PositiveInfinity }; + + _output = new AVAssetReaderTrackOutput( + _track, + new CVPixelBufferAttributes { PixelFormatType = CVPixelFormatType.CV32ARGB }.Dictionary); + _output.AlwaysCopiesSampleData = false; + _reader.AddOutput(_output); + + _reader.StartReading(); + } + + public bool ReadVideo(int frame, [NotNullWhen(true)] out IBitmap? image) + { + CMSampleBuffer? sample = _sampleCache.SearchSample(frame); + if (sample != null) + { + image = AVFSampleUtilities.ConvertToBgra(sample); + if (image != null) + return true; + } + + int currentFrame = _sampleCache.LastFrameNumber(); + + if (currentFrame == -1) + { + currentFrame = + CMTimeUtilities.ConvertFrameFromTimeStamp(_currentTimestamp, _track.NominalFrameRate); + } + + if (frame < currentFrame || (currentFrame + _thresholdFrameCount) < frame) + { + CMTime destTimePosition = CMTimeUtilities.ConvertTimeStampFromFrame(frame, _track.NominalFrameRate); + Seek(destTimePosition); + _logger.LogDebug( + "ReadFrame Seek currentFrame: {currentFrame}, destFrame: {destFrame} - destTimePos: {destTimePos} relativeFrame: {relativeFrame}", + currentFrame, frame, destTimePosition.Seconds, frame - currentFrame); + } + + sample = ReadSample(); + while (sample != null) + { + try + { + int readSampleFrame = _sampleCache.LastFrameNumber(); + + if (frame <= readSampleFrame) + { + if ((readSampleFrame - frame) > 0) + { + _logger.LogWarning( + "wrong frame currentFrame: {currentFrame} targetFrame: {frame} readSampleFrame: {readSampleFrame} distance: {distance}", + currentFrame, frame, readSampleFrame, readSampleFrame - frame); + } + + image = AVFSampleUtilities.ConvertToBgra(sample); + if (image != null) + return true; + } + + sample = ReadSample(); + } + catch + { + break; + } + } + + image = null; + return false; + } + + private void DisposeCore(bool disposing) + { + _sampleCache.Reset(); + _output.Dispose(); + _reader.Dispose(); + } + + public void Dispose() + { + if (IsDisposed) return; + DisposeCore(true); + + GC.SuppressFinalize(this); + IsDisposed = true; + } +} From 44c6be6a795245f96314377a3439244c4d6678a7 Mon Sep 17 00:00:00 2001 From: Yuto Terada Date: Tue, 18 Jun 2024 16:38:57 +0900 Subject: [PATCH 04/13] =?UTF-8?q?AVFoundation=E5=85=A5=E5=8A=9B=E7=B3=BB?= =?UTF-8?q?=E3=82=AF=E3=83=A9=E3=82=B9=E3=82=92Decoding=E3=83=87=E3=82=A3?= =?UTF-8?q?=E3=83=AC=E3=82=AF=E3=83=88=E3=83=AA=E3=81=AB=E7=A7=BB=E5=8B=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../{ => Decoding}/AVFAudioSampleCache.cs | 2 +- .../{ => Decoding}/AVFAudioStreamReader.cs | 0 .../{ => Decoding}/AVFDecoderInfo.cs | 8 ++------ .../{ => Decoding}/AVFDecodingExtension.cs | 0 .../{ => Decoding}/AVFReader.cs | 0 .../{ => Decoding}/AVFSampleCacheOptions.cs | 2 +- .../{ => Decoding}/AVFVideoSampleCache.cs | 5 ++--- .../{ => Decoding}/AVFVideoStreamReader.cs | 0 8 files changed, 6 insertions(+), 11 deletions(-) rename src/Beutl.Extensions.AVFoundation/{ => Decoding}/AVFAudioSampleCache.cs (98%) rename src/Beutl.Extensions.AVFoundation/{ => Decoding}/AVFAudioStreamReader.cs (100%) rename src/Beutl.Extensions.AVFoundation/{ => Decoding}/AVFDecoderInfo.cs (87%) rename src/Beutl.Extensions.AVFoundation/{ => Decoding}/AVFDecodingExtension.cs (100%) rename src/Beutl.Extensions.AVFoundation/{ => Decoding}/AVFReader.cs (100%) rename src/Beutl.Extensions.AVFoundation/{ => Decoding}/AVFSampleCacheOptions.cs (76%) rename src/Beutl.Extensions.AVFoundation/{ => Decoding}/AVFVideoSampleCache.cs (95%) rename src/Beutl.Extensions.AVFoundation/{ => Decoding}/AVFVideoStreamReader.cs (100%) diff --git a/src/Beutl.Extensions.AVFoundation/AVFAudioSampleCache.cs b/src/Beutl.Extensions.AVFoundation/Decoding/AVFAudioSampleCache.cs similarity index 98% rename from src/Beutl.Extensions.AVFoundation/AVFAudioSampleCache.cs rename to src/Beutl.Extensions.AVFoundation/Decoding/AVFAudioSampleCache.cs index dc00a9997..f83e3f7fb 100644 --- a/src/Beutl.Extensions.AVFoundation/AVFAudioSampleCache.cs +++ b/src/Beutl.Extensions.AVFoundation/Decoding/AVFAudioSampleCache.cs @@ -3,7 +3,7 @@ using Microsoft.Extensions.Logging; using MonoMac.CoreMedia; -namespace Beutl.Extensions.AVFoundation; +namespace Beutl.Extensions.AVFoundation.Decoding; public class AVFAudioSampleCache(AVFSampleCacheOptions options) { diff --git a/src/Beutl.Extensions.AVFoundation/AVFAudioStreamReader.cs b/src/Beutl.Extensions.AVFoundation/Decoding/AVFAudioStreamReader.cs similarity index 100% rename from src/Beutl.Extensions.AVFoundation/AVFAudioStreamReader.cs rename to src/Beutl.Extensions.AVFoundation/Decoding/AVFAudioStreamReader.cs diff --git a/src/Beutl.Extensions.AVFoundation/AVFDecoderInfo.cs b/src/Beutl.Extensions.AVFoundation/Decoding/AVFDecoderInfo.cs similarity index 87% rename from src/Beutl.Extensions.AVFoundation/AVFDecoderInfo.cs rename to src/Beutl.Extensions.AVFoundation/Decoding/AVFDecoderInfo.cs index d1822197b..6a2b1fab9 100644 --- a/src/Beutl.Extensions.AVFoundation/AVFDecoderInfo.cs +++ b/src/Beutl.Extensions.AVFoundation/Decoding/AVFDecoderInfo.cs @@ -1,8 +1,4 @@ -using System.Diagnostics.CodeAnalysis; -using Beutl.Extensions.AVFoundation; -using Beutl.Media; -using Beutl.Media.Decoding; -using Beutl.Media.Music; +using Beutl.Media.Decoding; namespace Beutl.Extensions.AVFoundation.Decoding; @@ -36,7 +32,7 @@ public IEnumerable AudioExtensions() { try { - return new AVFReader(file, options, extension); + return new AVFReader(file, options); } catch { diff --git a/src/Beutl.Extensions.AVFoundation/AVFDecodingExtension.cs b/src/Beutl.Extensions.AVFoundation/Decoding/AVFDecodingExtension.cs similarity index 100% rename from src/Beutl.Extensions.AVFoundation/AVFDecodingExtension.cs rename to src/Beutl.Extensions.AVFoundation/Decoding/AVFDecodingExtension.cs diff --git a/src/Beutl.Extensions.AVFoundation/AVFReader.cs b/src/Beutl.Extensions.AVFoundation/Decoding/AVFReader.cs similarity index 100% rename from src/Beutl.Extensions.AVFoundation/AVFReader.cs rename to src/Beutl.Extensions.AVFoundation/Decoding/AVFReader.cs diff --git a/src/Beutl.Extensions.AVFoundation/AVFSampleCacheOptions.cs b/src/Beutl.Extensions.AVFoundation/Decoding/AVFSampleCacheOptions.cs similarity index 76% rename from src/Beutl.Extensions.AVFoundation/AVFSampleCacheOptions.cs rename to src/Beutl.Extensions.AVFoundation/Decoding/AVFSampleCacheOptions.cs index c5824b0ed..c8a689bd3 100644 --- a/src/Beutl.Extensions.AVFoundation/AVFSampleCacheOptions.cs +++ b/src/Beutl.Extensions.AVFoundation/Decoding/AVFSampleCacheOptions.cs @@ -1,4 +1,4 @@ -namespace Beutl.Extensions.AVFoundation; +namespace Beutl.Extensions.AVFoundation.Decoding; public record AVFSampleCacheOptions( int MaxVideoBufferSize = 4, // あまり大きな値を設定するとReadSampleで停止する diff --git a/src/Beutl.Extensions.AVFoundation/AVFVideoSampleCache.cs b/src/Beutl.Extensions.AVFoundation/Decoding/AVFVideoSampleCache.cs similarity index 95% rename from src/Beutl.Extensions.AVFoundation/AVFVideoSampleCache.cs rename to src/Beutl.Extensions.AVFoundation/Decoding/AVFVideoSampleCache.cs index a7eedffb6..2b1cc2f10 100644 --- a/src/Beutl.Extensions.AVFoundation/AVFVideoSampleCache.cs +++ b/src/Beutl.Extensions.AVFoundation/Decoding/AVFVideoSampleCache.cs @@ -1,10 +1,9 @@ -using System.Diagnostics; -using Beutl.Collections; +using Beutl.Collections; using Beutl.Logging; using Microsoft.Extensions.Logging; using MonoMac.CoreMedia; -namespace Beutl.Extensions.AVFoundation; +namespace Beutl.Extensions.AVFoundation.Decoding; public class AVFVideoSampleCache(AVFSampleCacheOptions options) { diff --git a/src/Beutl.Extensions.AVFoundation/AVFVideoStreamReader.cs b/src/Beutl.Extensions.AVFoundation/Decoding/AVFVideoStreamReader.cs similarity index 100% rename from src/Beutl.Extensions.AVFoundation/AVFVideoStreamReader.cs rename to src/Beutl.Extensions.AVFoundation/Decoding/AVFVideoStreamReader.cs From 3c04b69f193d15338e8fcf3b07afa4db80d3a0e4 Mon Sep 17 00:00:00 2001 From: Yuto Terada Date: Thu, 20 Jun 2024 01:27:46 +0900 Subject: [PATCH 05/13] =?UTF-8?q?=E9=9F=B3=E5=A3=B0=E3=81=8C=E5=86=8D?= =?UTF-8?q?=E7=94=9F=E3=81=A7=E3=81=8D=E3=81=AA=E3=81=84=E3=81=AE=E3=82=92?= =?UTF-8?q?=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Decoding/AVFAudioSampleCache.cs | 4 +- .../Decoding/AVFAudioStreamReader.cs | 58 +++++++++++++------ 2 files changed, 41 insertions(+), 21 deletions(-) diff --git a/src/Beutl.Extensions.AVFoundation/Decoding/AVFAudioSampleCache.cs b/src/Beutl.Extensions.AVFoundation/Decoding/AVFAudioSampleCache.cs index f83e3f7fb..42d23e9e0 100644 --- a/src/Beutl.Extensions.AVFoundation/Decoding/AVFAudioSampleCache.cs +++ b/src/Beutl.Extensions.AVFoundation/Decoding/AVFAudioSampleCache.cs @@ -61,7 +61,7 @@ public void Reset(short nBlockAlign) { _nBlockAlign = nBlockAlign; CircularBuffer old = _audioCircularBuffer; - _audioCircularBuffer = new CircularBuffer(options.MaxVideoBufferSize); + _audioCircularBuffer = new CircularBuffer(options.MaxAudioBufferSize); foreach (AudioCache item in old) { item.Sample.Dispose(); @@ -86,9 +86,7 @@ public void Add(int startSample, CMSampleBuffer buffer) startSample = lastAudioSampleNum + _audioCircularBuffer.Back().AudioSampleCount; } - // int totalLength = buffer.NumSamples; int audioSampleCount = buffer.NumSamples; - // Debug.Assert((totalLength % _nBlockAlign) == 0); if (_audioCircularBuffer.IsFull) { diff --git a/src/Beutl.Extensions.AVFoundation/Decoding/AVFAudioStreamReader.cs b/src/Beutl.Extensions.AVFoundation/Decoding/AVFAudioStreamReader.cs index 4e87a8a72..95df8a422 100644 --- a/src/Beutl.Extensions.AVFoundation/Decoding/AVFAudioStreamReader.cs +++ b/src/Beutl.Extensions.AVFoundation/Decoding/AVFAudioStreamReader.cs @@ -16,12 +16,13 @@ public class AVFAudioStreamReader : IDisposable private readonly AVAsset _asset; private readonly MediaOptions _options; private readonly AVFAudioSampleCache _sampleCache; - + private readonly AVAssetTrack _audioTrack; private AVAssetReader _assetAudioReader; private AVAssetReaderTrackOutput _audioReaderOutput; - private CMTime _currentAudioTimestamp; + private CMTime _currentAudioTimestamp = CMTime.Zero; private readonly int _thresholdSampleCount = 30000; + private CMTime _firstGapTimestamp = CMTime.Zero; public AVFAudioStreamReader(AVAsset asset, MediaOptions options) { @@ -56,6 +57,8 @@ public AVFAudioStreamReader(AVAsset asset, MediaOptions options) Rational.FromDouble(_audioTrack.TotalSampleDataLength / _audioTrack.EstimatedDataRate * 8d), _audioTrack.NaturalTimeScale, audioDesc.AudioChannelLayout.Channels.Length); + + TestFirstReadSample(); } ~AVFAudioStreamReader() @@ -92,6 +95,7 @@ private void SeekAudio(CMTime timestamp) SampleRate = _options.SampleRate, NumberChannels = 2, }.Dictionary); + _audioReaderOutput.AlwaysCopiesSampleData = false; _assetAudioReader.AddOutput(_audioReaderOutput); _assetAudioReader.StartReading(); @@ -99,6 +103,8 @@ private void SeekAudio(CMTime timestamp) public bool ReadAudio(int start, int length, [NotNullWhen(true)] out IPcm? sound) { + start = (int)((long)_options.SampleRate * start / AudioInfo.SampleRate); + length = (int)((long)_options.SampleRate * length / AudioInfo.SampleRate); var buffer = new Pcm(_options.SampleRate, length); bool hitCache = _sampleCache.SearchAudioSampleAndCopyBuffer(start, length, buffer.Data); if (hitCache) @@ -110,22 +116,17 @@ public bool ReadAudio(int start, int length, [NotNullWhen(true)] out IPcm? sound int currentSample = _sampleCache.LastAudioSampleNumber(); if (currentSample == -1) { - currentSample = - CMTimeUtilities.ConvertFrameFromTimeStamp(_currentAudioTimestamp, _audioTrack.NaturalTimeScale); + currentSample = (int)_currentAudioTimestamp.Value; } if (start < currentSample || (currentSample + _thresholdSampleCount) < start) { - CMTime destTimePosition = CMTimeUtilities.ConvertTimeStampFromFrame(start, _audioTrack.NaturalTimeScale); + var destTimePosition = new CMTime(start, _options.SampleRate); SeekAudio(destTimePosition); - // _logger.LogInformation( - // "ReadAudio Seek currentTimestamp: {currentTimestamp} - destTimePos: {destTimePos} relativeSample: {relativeSample}", - // TimestampUtilities.ConvertSecFrom100ns(_currentAudioTimeStamp), - // TimestampUtilities.ConvertSecFrom100ns(destTimePosition), - // start - currentSample); } CMSampleBuffer? sample = ReadAudioSample(); + while (sample != null) { try @@ -149,9 +150,8 @@ public bool ReadAudio(int start, int length, [NotNullWhen(true)] out IPcm? sound } } - buffer.Dispose(); - sound = null; - return false; + sound = buffer; + return true; } private CMSampleBuffer? ReadAudioSample() @@ -160,7 +160,15 @@ public bool ReadAudio(int start, int length, [NotNullWhen(true)] out IPcm? sound if (!buffer.DataIsReady) { _logger.LogTrace("buffer.DataIsReady = false"); - return null; + + buffer = _audioReaderOutput.CopyNextSampleBuffer(); + if (!buffer.DataIsReady) + { + _logger.LogTrace("2 buffer.DataIsReady = false"); + return null; + } + + // return null; } if (!buffer.IsValid) @@ -171,15 +179,29 @@ public bool ReadAudio(int start, int length, [NotNullWhen(true)] out IPcm? sound // success! // add cache - // timestamp -= _firstGapTimeStamp; - int startSample = - CMTimeUtilities.ConvertFrameFromTimeStamp(_currentAudioTimestamp, _audioTrack.NaturalTimeScale); + var timestamp = buffer.PresentationTimeStamp; + timestamp -= _firstGapTimestamp; + int startSample = (int)timestamp.Value; _sampleCache.Add(startSample, buffer); - _currentAudioTimestamp = buffer.PresentationTimeStamp; + _currentAudioTimestamp = timestamp; return buffer; } + private void TestFirstReadSample() + { + _ = ReadAudioSample() ?? throw new Exception("TestFirstReadSample() failed"); + _logger.LogInformation( + "TestFirstReadSample firstTimeStamp: {currentAudioTimeStamp}", + _currentAudioTimestamp); + CMTime firstAudioTimeStamp = _currentAudioTimestamp; + SeekAudio(CMTime.Zero); + _currentAudioTimestamp = CMTime.Zero; + + _firstGapTimestamp = firstAudioTimeStamp; + _logger.LogInformation("TestFirstReadSample - firstGapTimeStamp: {firstGapTimeStamp}", _firstGapTimestamp); + } + private void DisposeCore(bool disposing) { _sampleCache.Reset(0); From b7f97731626db8ff67b60de85f2f2a3130a759bd Mon Sep 17 00:00:00 2001 From: Yuto Terada Date: Sat, 22 Jun 2024 01:13:48 +0900 Subject: [PATCH 06/13] feat: Enable editing of AVFoundation Decoder settings --- .../Decoding/AVFAudioStreamReader.cs | 10 ++- .../Decoding/AVFDecoderInfo.cs | 2 +- .../Decoding/AVFDecodingExtension.cs | 2 + .../Decoding/AVFDecodingSettings.cs | 65 +++++++++++++++++++ .../Decoding/AVFReader.cs | 6 +- .../Decoding/AVFVideoStreamReader.cs | 10 +-- 6 files changed, 84 insertions(+), 11 deletions(-) create mode 100644 src/Beutl.Extensions.AVFoundation/Decoding/AVFDecodingSettings.cs diff --git a/src/Beutl.Extensions.AVFoundation/Decoding/AVFAudioStreamReader.cs b/src/Beutl.Extensions.AVFoundation/Decoding/AVFAudioStreamReader.cs index 95df8a422..94cb3f367 100644 --- a/src/Beutl.Extensions.AVFoundation/Decoding/AVFAudioStreamReader.cs +++ b/src/Beutl.Extensions.AVFoundation/Decoding/AVFAudioStreamReader.cs @@ -16,19 +16,23 @@ public class AVFAudioStreamReader : IDisposable private readonly AVAsset _asset; private readonly MediaOptions _options; private readonly AVFAudioSampleCache _sampleCache; + private readonly int _thresholdSampleCount; private readonly AVAssetTrack _audioTrack; private AVAssetReader _assetAudioReader; private AVAssetReaderTrackOutput _audioReaderOutput; private CMTime _currentAudioTimestamp = CMTime.Zero; - private readonly int _thresholdSampleCount = 30000; private CMTime _firstGapTimestamp = CMTime.Zero; - public AVFAudioStreamReader(AVAsset asset, MediaOptions options) + public AVFAudioStreamReader(AVAsset asset, MediaOptions options, AVFDecodingExtension extension) { _asset = asset; _options = options; - _sampleCache = new AVFAudioSampleCache(new AVFSampleCacheOptions()); + _sampleCache = + new AVFAudioSampleCache( + new AVFSampleCacheOptions(MaxAudioBufferSize: extension.Settings?.MaxAudioBufferSize ?? 20)); + _thresholdSampleCount = extension.Settings?.ThresholdSampleCount ?? 30000; + _sampleCache.Reset(4 * 2); _audioTrack = _asset.TracksWithMediaType(AVMediaType.Audio)[0]; diff --git a/src/Beutl.Extensions.AVFoundation/Decoding/AVFDecoderInfo.cs b/src/Beutl.Extensions.AVFoundation/Decoding/AVFDecoderInfo.cs index 6a2b1fab9..31b236234 100644 --- a/src/Beutl.Extensions.AVFoundation/Decoding/AVFDecoderInfo.cs +++ b/src/Beutl.Extensions.AVFoundation/Decoding/AVFDecoderInfo.cs @@ -32,7 +32,7 @@ public IEnumerable AudioExtensions() { try { - return new AVFReader(file, options); + return new AVFReader(file, options, extension); } catch { diff --git a/src/Beutl.Extensions.AVFoundation/Decoding/AVFDecodingExtension.cs b/src/Beutl.Extensions.AVFoundation/Decoding/AVFDecodingExtension.cs index 1c96a1700..5ec3ad82c 100644 --- a/src/Beutl.Extensions.AVFoundation/Decoding/AVFDecodingExtension.cs +++ b/src/Beutl.Extensions.AVFoundation/Decoding/AVFDecodingExtension.cs @@ -11,6 +11,8 @@ public class AVFDecodingExtension : DecodingExtension public override string DisplayName => "AVFoundation Decoding"; + public override AVFDecodingSettings? Settings { get; } = new(); + public override IDecoderInfo GetDecoderInfo() { return new AVFDecoderInfo(this); diff --git a/src/Beutl.Extensions.AVFoundation/Decoding/AVFDecodingSettings.cs b/src/Beutl.Extensions.AVFoundation/Decoding/AVFDecodingSettings.cs new file mode 100644 index 000000000..9ac63a74a --- /dev/null +++ b/src/Beutl.Extensions.AVFoundation/Decoding/AVFDecodingSettings.cs @@ -0,0 +1,65 @@ +using System.ComponentModel.DataAnnotations; +using Beutl.Extensibility; + +namespace Beutl.Extensions.AVFoundation.Decoding; + +public sealed class AVFDecodingSettings : ExtensionSettings +{ + public static readonly CoreProperty ThresholdFrameCountProperty; + public static readonly CoreProperty ThresholdSampleCountProperty; + public static readonly CoreProperty MaxVideoBufferSizeProperty; + public static readonly CoreProperty MaxAudioBufferSizeProperty; + + static AVFDecodingSettings() + { + ThresholdFrameCountProperty = ConfigureProperty(nameof(ThresholdFrameCount)) + .DefaultValue(30) + .Register(); + + ThresholdSampleCountProperty = ConfigureProperty(nameof(ThresholdSampleCount)) + .DefaultValue(30000) + .Register(); + + MaxVideoBufferSizeProperty = ConfigureProperty(nameof(MaxVideoBufferSize)) + .DefaultValue(4) + .Register(); + + MaxAudioBufferSizeProperty = ConfigureProperty(nameof(MaxAudioBufferSize)) + .DefaultValue(20) + .Register(); + + AffectsConfig( + ThresholdFrameCountProperty, + ThresholdSampleCountProperty, + MaxVideoBufferSizeProperty, + MaxAudioBufferSizeProperty); + } + + [Range(1, int.MaxValue)] + public int ThresholdFrameCount + { + get => GetValue(ThresholdFrameCountProperty); + set => SetValue(ThresholdFrameCountProperty, value); + } + + [Range(1, int.MaxValue)] + public int ThresholdSampleCount + { + get => GetValue(ThresholdSampleCountProperty); + set => SetValue(ThresholdSampleCountProperty, value); + } + + [Range(1, int.MaxValue)] + public int MaxVideoBufferSize + { + get => GetValue(MaxVideoBufferSizeProperty); + set => SetValue(MaxVideoBufferSizeProperty, value); + } + + [Range(1, int.MaxValue)] + public int MaxAudioBufferSize + { + get => GetValue(MaxAudioBufferSizeProperty); + set => SetValue(MaxAudioBufferSizeProperty, value); + } +} diff --git a/src/Beutl.Extensions.AVFoundation/Decoding/AVFReader.cs b/src/Beutl.Extensions.AVFoundation/Decoding/AVFReader.cs index 47709a95e..9abf7069d 100644 --- a/src/Beutl.Extensions.AVFoundation/Decoding/AVFReader.cs +++ b/src/Beutl.Extensions.AVFoundation/Decoding/AVFReader.cs @@ -14,18 +14,18 @@ public sealed class AVFReader : MediaReader private AVFVideoStreamReader? _videoReader; private AVFAudioStreamReader? _audioReader; - public AVFReader(string file, MediaOptions options) + public AVFReader(string file, MediaOptions options, AVFDecodingExtension extension) { var url = NSUrl.FromFilename(file); _asset = AVAsset.FromUrl(url); if (options.StreamsToLoad.HasFlag(MediaMode.Video)) { - _videoReader = new AVFVideoStreamReader(_asset); + _videoReader = new AVFVideoStreamReader(_asset, extension); } if (options.StreamsToLoad.HasFlag(MediaMode.Audio)) { - _audioReader = new AVFAudioStreamReader(_asset, options); + _audioReader = new AVFAudioStreamReader(_asset, options, extension); } } diff --git a/src/Beutl.Extensions.AVFoundation/Decoding/AVFVideoStreamReader.cs b/src/Beutl.Extensions.AVFoundation/Decoding/AVFVideoStreamReader.cs index 37db5f7b1..def70a7d5 100644 --- a/src/Beutl.Extensions.AVFoundation/Decoding/AVFVideoStreamReader.cs +++ b/src/Beutl.Extensions.AVFoundation/Decoding/AVFVideoStreamReader.cs @@ -14,19 +14,21 @@ public class AVFVideoStreamReader : IDisposable private readonly ILogger _logger = Log.CreateLogger(); private readonly AVAsset _asset; private readonly AVFVideoSampleCache _sampleCache; + // 現在のフレームからどれくらいの範囲ならシーケンシャル読み込みさせるかの閾値 + private readonly int _thresholdFrameCount; private readonly AVAssetTrack _track; private AVAssetReader _reader; private AVAssetReaderTrackOutput _output; private CMTime _currentTimestamp; - // 現在のフレームからどれくらいの範囲ならシーケンシャル読み込みさせるかの閾値 - private readonly int _thresholdFrameCount = 30; - public AVFVideoStreamReader(AVAsset asset) + public AVFVideoStreamReader(AVAsset asset, AVFDecodingExtension extension) { _asset = asset; - _sampleCache = new AVFVideoSampleCache(new AVFSampleCacheOptions()); + _sampleCache = new AVFVideoSampleCache( + new AVFSampleCacheOptions(MaxVideoBufferSize: extension.Settings?.MaxVideoBufferSize ?? 4)); + _thresholdFrameCount = extension.Settings?.ThresholdFrameCount ?? 30; _track = _asset.TracksWithMediaType(AVMediaType.Video)[0]; From 86f9f76503f3907a09e9d890b35f12e15d816e94 Mon Sep 17 00:00:00 2001 From: Yuto Terada Date: Sat, 22 Jun 2024 20:46:36 +0900 Subject: [PATCH 07/13] add: AVFWriter --- .../AVFSampleUtilities.cs | 23 ++ .../Decoding/AVFDecodingExtension.cs | 9 +- .../Encoding/AVFEncoderInfo.cs | 39 ++++ .../Encoding/AVFEncodingExtension.cs | 35 +++ .../Encoding/AVFVideoEncoderSettings.cs | 213 ++++++++++++++++++ .../Encoding/AVFWriter.cs | 203 +++++++++++++++++ 6 files changed, 521 insertions(+), 1 deletion(-) create mode 100644 src/Beutl.Extensions.AVFoundation/Encoding/AVFEncoderInfo.cs create mode 100644 src/Beutl.Extensions.AVFoundation/Encoding/AVFEncodingExtension.cs create mode 100644 src/Beutl.Extensions.AVFoundation/Encoding/AVFVideoEncoderSettings.cs create mode 100644 src/Beutl.Extensions.AVFoundation/Encoding/AVFWriter.cs diff --git a/src/Beutl.Extensions.AVFoundation/AVFSampleUtilities.cs b/src/Beutl.Extensions.AVFoundation/AVFSampleUtilities.cs index d84b84207..d679c5997 100644 --- a/src/Beutl.Extensions.AVFoundation/AVFSampleUtilities.cs +++ b/src/Beutl.Extensions.AVFoundation/AVFSampleUtilities.cs @@ -11,6 +11,29 @@ namespace Beutl.Extensions.AVFoundation; public class AVFSampleUtilities { + public static unsafe CVPixelBuffer? ConvertToCVPixelBuffer(Bitmap bitmap) + { + int width = bitmap.Width; + int height = bitmap.Height; + var pixelBuffer = new CVPixelBuffer(width, height, CVPixelFormatType.CV32BGRA, new CVPixelBufferAttributes + { + PixelFormatType = CVPixelFormatType.CV32BGRA, + Width = width, + Height = height + }); + + var r = pixelBuffer.Lock(CVOptionFlags.None); + if (r != CVReturn.Success) return null; + + Buffer.MemoryCopy( + (void*)bitmap.Data, (void*)pixelBuffer.GetBaseAddress(0), + bitmap.ByteCount, bitmap.ByteCount); + + pixelBuffer.Unlock(CVOptionFlags.None); + + return pixelBuffer; + } + public static unsafe Bitmap? ConvertToBgra(CMSampleBuffer buffer) { using var imageBuffer = buffer.GetImageBuffer(); diff --git a/src/Beutl.Extensions.AVFoundation/Decoding/AVFDecodingExtension.cs b/src/Beutl.Extensions.AVFoundation/Decoding/AVFDecodingExtension.cs index 5ec3ad82c..8d522eab3 100644 --- a/src/Beutl.Extensions.AVFoundation/Decoding/AVFDecodingExtension.cs +++ b/src/Beutl.Extensions.AVFoundation/Decoding/AVFDecodingExtension.cs @@ -22,7 +22,14 @@ public override void Load() { if (OperatingSystem.IsMacOS()) { - NSApplication.Init(); + try + { + NSApplication.Init(); + } + catch + { + } + DecoderRegistry.Register(GetDecoderInfo()); } } diff --git a/src/Beutl.Extensions.AVFoundation/Encoding/AVFEncoderInfo.cs b/src/Beutl.Extensions.AVFoundation/Encoding/AVFEncoderInfo.cs new file mode 100644 index 000000000..bfd9496fa --- /dev/null +++ b/src/Beutl.Extensions.AVFoundation/Encoding/AVFEncoderInfo.cs @@ -0,0 +1,39 @@ +using Beutl.Media.Encoding; + +namespace Beutl.Extensions.AVFoundation.Encoding; + +public sealed class AVFEncoderInfo(AVFEncodingExtension extension) : IEncoderInfo +{ + public string Name =>"AVFoundation"; + + public MediaWriter? Create(string file, VideoEncoderSettings videoConfig, AudioEncoderSettings audioConfig) + { + + } + + public IEnumerable SupportExtensions() + { + yield return ".mp4"; + yield return ".mov"; + yield return ".m4v"; + yield return ".avi"; + yield return ".wmv"; + yield return ".sami"; + yield return ".smi"; + yield return ".adts"; + yield return ".asf"; + yield return ".3gp"; + yield return ".3gp2"; + yield return ".3gpp"; + } + + public VideoEncoderSettings DefaultVideoConfig() + { + return new VideoEncoderSettings(); + } + + public AudioEncoderSettings DefaultAudioConfig() + { + return new AudioEncoderSettings(); + } +} diff --git a/src/Beutl.Extensions.AVFoundation/Encoding/AVFEncodingExtension.cs b/src/Beutl.Extensions.AVFoundation/Encoding/AVFEncodingExtension.cs new file mode 100644 index 000000000..28d3ce1d2 --- /dev/null +++ b/src/Beutl.Extensions.AVFoundation/Encoding/AVFEncodingExtension.cs @@ -0,0 +1,35 @@ +using Beutl.Extensibility; +using Beutl.Media.Decoding; +using Beutl.Media.Encoding; +using MonoMac.AppKit; + +namespace Beutl.Extensions.AVFoundation.Encoding; + +[Export] +public class AVFEncodingExtension : EncodingExtension +{ + public override string Name => "AVFoundation Encoding"; + + public override string DisplayName => "AVFoundation Encoding"; + + public override IEncoderInfo GetEncoderInfo() + { + return new AVFEncoderInfo(this); + } + + public override void Load() + { + if (OperatingSystem.IsMacOS()) + { + try + { + NSApplication.Init(); + } + catch + { + } + + EncoderRegistry.Register(GetEncoderInfo()); + } + } +} diff --git a/src/Beutl.Extensions.AVFoundation/Encoding/AVFVideoEncoderSettings.cs b/src/Beutl.Extensions.AVFoundation/Encoding/AVFVideoEncoderSettings.cs new file mode 100644 index 000000000..1f2aaf042 --- /dev/null +++ b/src/Beutl.Extensions.AVFoundation/Encoding/AVFVideoEncoderSettings.cs @@ -0,0 +1,213 @@ +using Beutl.Media.Encoding; + +namespace Beutl.Extensions.AVFoundation.Encoding; + +public sealed class AVFAudioEncoderSettings : AudioEncoderSettings +{ + public static readonly CoreProperty FormatProperty; + public static readonly CoreProperty LinearPcmBitDepthProperty; + public static readonly CoreProperty LinearPcmBigEndianProperty; + public static readonly CoreProperty LinearPcmFloatProperty; + public static readonly CoreProperty LinearPcmNonInterleavedProperty; + public static readonly CoreProperty QualityProperty; + public static readonly CoreProperty SampleRateConverterQualityProperty; + + static AVFAudioEncoderSettings() + { + FormatProperty = ConfigureProperty(nameof(Format)) + .DefaultValue(AudioFormatType.Default) + .Register(); + + LinearPcmBitDepthProperty = ConfigureProperty(nameof(LinearPcmBitDepth)) + .DefaultValue(BitDepth.Default) + .Register(); + + LinearPcmBigEndianProperty = ConfigureProperty(nameof(LinearPcmBigEndian)) + .DefaultValue(false) + .Register(); + + LinearPcmFloatProperty = ConfigureProperty(nameof(LinearPcmFloat)) + .DefaultValue(false) + .Register(); + + LinearPcmNonInterleavedProperty = + ConfigureProperty(nameof(LinearPcmNonInterleaved)) + .DefaultValue(false) + .Register(); + + QualityProperty = ConfigureProperty(nameof(Quality)) + .DefaultValue(AudioQuality.Default) + .Register(); + + SampleRateConverterQualityProperty = + ConfigureProperty(nameof(SampleRateConverterQuality)) + .DefaultValue(AudioQuality.Default) + .Register(); + + BitrateProperty.OverrideDefaultValue(-1); + } + + public AudioFormatType Format + { + get => GetValue(FormatProperty); + set => SetValue(FormatProperty, value); + } + + public BitDepth LinearPcmBitDepth + { + get => GetValue(LinearPcmBitDepthProperty); + set => SetValue(LinearPcmBitDepthProperty, value); + } + + public bool LinearPcmBigEndian + { + get => GetValue(LinearPcmBigEndianProperty); + set => SetValue(LinearPcmBigEndianProperty, value); + } + + public bool LinearPcmFloat + { + get => GetValue(LinearPcmFloatProperty); + set => SetValue(LinearPcmFloatProperty, value); + } + + public bool LinearPcmNonInterleaved + { + get => GetValue(LinearPcmNonInterleavedProperty); + set => SetValue(LinearPcmNonInterleavedProperty, value); + } + + public AudioQuality Quality + { + get => GetValue(QualityProperty); + set => SetValue(QualityProperty, value); + } + + public AudioQuality SampleRateConverterQuality + { + get => GetValue(SampleRateConverterQualityProperty); + set => SetValue(SampleRateConverterQualityProperty, value); + } + + public enum BitDepth + { + Default, + Bits8, + Bits16, + Bits24, + Bits32 + } + + public enum AudioQuality + { + Default = -1, + Min = 0, + Low = 32, // 0x00000020 + Medium = 64, // 0x00000040 + High = 96, // 0x00000060 + Max = 127, // 0x0000007F + } + + public enum AudioFormatType + { + Default = 0, + MPEGLayer1 = 778924081, // 0x2E6D7031 + MPEGLayer2 = 778924082, // 0x2E6D7032 + MPEGLayer3 = 778924083, // 0x2E6D7033 + Audible = 1096107074, // 0x41554442 + MACE3 = 1296122675, // 0x4D414333 + MACE6 = 1296122678, // 0x4D414336 + QDesign2 = 1363430706, // 0x51444D32 + QDesign = 1363430723, // 0x51444D43 + QUALCOMM = 1365470320, // 0x51636C70 + MPEG4AAC = 1633772320, // 0x61616320 + MPEG4AAC_ELD = 1633772389, // 0x61616365 + MPEG4AAC_ELD_SBR = 1633772390, // 0x61616366 + MPEG4AAC_ELD_V2 = 1633772391, // 0x61616367 + MPEG4AAC_HE = 1633772392, // 0x61616368 + MPEG4AAC_LD = 1633772396, // 0x6161636C + MPEG4AAC_HE_V2 = 1633772400, // 0x61616370 + MPEG4AAC_Spatial = 1633772403, // 0x61616373 + AC3 = 1633889587, // 0x61632D33 + AES3 = 1634038579, // 0x61657333 + AppleLossless = 1634492771, // 0x616C6163 + ALaw = 1634492791, // 0x616C6177 + ParameterValueStream = 1634760307, // 0x61707673 + CAC3 = 1667326771, // 0x63616333 + MPEG4CELP = 1667591280, // 0x63656C70 + MPEG4HVXC = 1752594531, // 0x68767863 + iLBC = 1768710755, // 0x696C6263 + AppleIMA4 = 1768775988, // 0x696D6134 + LinearPCM = 1819304813, // 0x6C70636D + MIDIStream = 1835623529, // 0x6D696469 + DVIIntelIMA = 1836253201, // 0x6D730011 + MicrosoftGSM = 1836253233, // 0x6D730031 + AMR = 1935764850, // 0x73616D72 + TimeCode = 1953066341, // 0x74696D65 + MPEG4TwinVQ = 1953986161, // 0x74777671 + ULaw = 1970037111, // 0x756C6177 + } +} + +public sealed class AVFVideoEncoderSettings : VideoEncoderSettings +{ + public static readonly CoreProperty CodecProperty; + public static readonly CoreProperty JPEGQualityProperty; + public static readonly CoreProperty ProfileLevelH264Property; + + static AVFVideoEncoderSettings() + { + CodecProperty = ConfigureProperty(nameof(Codec)) + .DefaultValue(VideoCodec.H264) + .Register(); + + JPEGQualityProperty = ConfigureProperty(nameof(JPEGQuality)) + .DefaultValue(-1) + .Register(); + + ProfileLevelH264Property = + ConfigureProperty(nameof(ProfileLevelH264)) + .DefaultValue(VideoProfileLevelH264.Default) + .Register(); + + BitrateProperty.OverrideDefaultValue(-1); + KeyframeRateProperty.OverrideDefaultValue(-1); + } + + public VideoCodec Codec + { + get => GetValue(CodecProperty); + set => SetValue(CodecProperty, value); + } + + public float JPEGQuality + { + get => GetValue(JPEGQualityProperty); + set => SetValue(JPEGQualityProperty, value); + } + + public VideoProfileLevelH264 ProfileLevelH264 + { + get => GetValue(ProfileLevelH264Property); + set => SetValue(ProfileLevelH264Property, value); + } + + public enum VideoCodec + { + Default = 0, + H264 = 1, + JPEG = 2, + } + + public enum VideoProfileLevelH264 + { + Default = 0, + Baseline30 = 1, + Baseline31 = 2, + Baseline41 = 3, + Main30 = 4, + Main31 = 5, + Main32 = 6, + Main41 = 7, + } +} diff --git a/src/Beutl.Extensions.AVFoundation/Encoding/AVFWriter.cs b/src/Beutl.Extensions.AVFoundation/Encoding/AVFWriter.cs new file mode 100644 index 000000000..757ffdf6b --- /dev/null +++ b/src/Beutl.Extensions.AVFoundation/Encoding/AVFWriter.cs @@ -0,0 +1,203 @@ +using Beutl.Media; +using Beutl.Media.Encoding; +using Beutl.Media.Music; +using Beutl.Media.Pixel; +using MonoMac.AudioToolbox; +using MonoMac.AVFoundation; +using MonoMac.CoreFoundation; +using MonoMac.CoreMedia; +using MonoMac.CoreVideo; +using MonoMac.Foundation; + +namespace Beutl.Extensions.AVFoundation.Encoding; + +public class AVFWriter : MediaWriter +{ + private readonly AVAssetWriter _assetWriter; + private readonly AVAssetWriterInput _videoInput; + private readonly AVAssetWriterInputPixelBufferAdaptor _videoAdaptor; + private long _numberOfFrames; + private readonly AVAssetWriterInput _audioInput; + private readonly AudioSettings _audioSettings; + private long _numberOfSamples; + + public AVFWriter(string file, AVFVideoEncoderSettings videoConfig, AVFAudioEncoderSettings audioConfig) + : base(videoConfig, audioConfig) + { + var url = NSUrl.FromFilename(file); + _assetWriter = AVAssetWriter.FromUrl(url, AVFileType.Mpeg4, out var error); + if (error != null) throw new Exception(error.LocalizedDescription); + + _videoInput = AVAssetWriterInput.Create(AVMediaType.Video, new AVVideoSettingsCompressed + { + Width = videoConfig.DestinationSize.Width, + Height = videoConfig.DestinationSize.Height, + Codec = videoConfig.Codec switch + { + AVFVideoEncoderSettings.VideoCodec.H264 => AVVideoCodec.H264, + AVFVideoEncoderSettings.VideoCodec.JPEG => AVVideoCodec.JPEG, + _ => null, + }, + CodecSettings = new AVVideoCodecSettings + { + AverageBitRate = videoConfig.Bitrate == -1 ? null : videoConfig.Bitrate, + MaxKeyFrameInterval = videoConfig.KeyframeRate == -1 ? null : videoConfig.KeyframeRate, + JPEGQuality = videoConfig.JPEGQuality < 0 ? null : videoConfig.JPEGQuality, + ProfileLevelH264 = videoConfig.ProfileLevelH264 switch + { + AVFVideoEncoderSettings.VideoProfileLevelH264.Baseline30 => AVVideoProfileLevelH264.Baseline30, + AVFVideoEncoderSettings.VideoProfileLevelH264.Baseline31 => AVVideoProfileLevelH264.Baseline31, + AVFVideoEncoderSettings.VideoProfileLevelH264.Baseline41 => AVVideoProfileLevelH264.Baseline41, + AVFVideoEncoderSettings.VideoProfileLevelH264.Main30 => AVVideoProfileLevelH264.Main30, + AVFVideoEncoderSettings.VideoProfileLevelH264.Main31 => AVVideoProfileLevelH264.Main31, + AVFVideoEncoderSettings.VideoProfileLevelH264.Main32 => AVVideoProfileLevelH264.Main32, + AVFVideoEncoderSettings.VideoProfileLevelH264.Main41 => AVVideoProfileLevelH264.Main41, + _ => null, + }, + }, + }); + + _assetWriter.AddInput(_videoInput); + + _audioSettings = new AudioSettings + { + SampleRate = audioConfig.SampleRate, + EncoderBitRate = audioConfig.Bitrate == -1 ? null : audioConfig.Bitrate, + NumberChannels = audioConfig.Channels, + Format = audioConfig.Format switch + { + AVFAudioEncoderSettings.AudioFormatType.MPEGLayer1 => AudioFormatType.MPEGLayer1, + AVFAudioEncoderSettings.AudioFormatType.MPEGLayer2 => AudioFormatType.MPEGLayer2, + AVFAudioEncoderSettings.AudioFormatType.MPEGLayer3 => AudioFormatType.MPEGLayer3, + AVFAudioEncoderSettings.AudioFormatType.Audible => AudioFormatType.Audible, + AVFAudioEncoderSettings.AudioFormatType.MACE3 => AudioFormatType.MACE3, + AVFAudioEncoderSettings.AudioFormatType.MACE6 => AudioFormatType.MACE6, + AVFAudioEncoderSettings.AudioFormatType.QDesign2 => AudioFormatType.QDesign2, + AVFAudioEncoderSettings.AudioFormatType.QDesign => AudioFormatType.QDesign, + AVFAudioEncoderSettings.AudioFormatType.QUALCOMM => AudioFormatType.QUALCOMM, + AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC => AudioFormatType.MPEG4AAC, + AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_ELD => AudioFormatType.MPEG4AAC_ELD, + AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_ELD_SBR => AudioFormatType.MPEG4AAC_ELD_SBR, + AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_ELD_V2 => AudioFormatType.MPEG4AAC_ELD_V2, + AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_HE => AudioFormatType.MPEG4AAC_HE, + AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_LD => AudioFormatType.MPEG4AAC_LD, + AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_HE_V2 => AudioFormatType.MPEG4AAC_HE_V2, + AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_Spatial => AudioFormatType.MPEG4AAC_Spatial, + AVFAudioEncoderSettings.AudioFormatType.AC3 => AudioFormatType.AC3, + AVFAudioEncoderSettings.AudioFormatType.AES3 => AudioFormatType.AES3, + AVFAudioEncoderSettings.AudioFormatType.AppleLossless => AudioFormatType.AppleLossless, + AVFAudioEncoderSettings.AudioFormatType.ALaw => AudioFormatType.ALaw, + AVFAudioEncoderSettings.AudioFormatType.ParameterValueStream => AudioFormatType.ParameterValueStream, + AVFAudioEncoderSettings.AudioFormatType.CAC3 => AudioFormatType.CAC3, + AVFAudioEncoderSettings.AudioFormatType.MPEG4CELP => AudioFormatType.MPEG4CELP, + AVFAudioEncoderSettings.AudioFormatType.MPEG4HVXC => AudioFormatType.MPEG4HVXC, + AVFAudioEncoderSettings.AudioFormatType.iLBC => AudioFormatType.iLBC, + AVFAudioEncoderSettings.AudioFormatType.AppleIMA4 => AudioFormatType.AppleIMA4, + AVFAudioEncoderSettings.AudioFormatType.LinearPCM => AudioFormatType.LinearPCM, + AVFAudioEncoderSettings.AudioFormatType.MIDIStream => AudioFormatType.MIDIStream, + AVFAudioEncoderSettings.AudioFormatType.DVIIntelIMA => AudioFormatType.DVIIntelIMA, + AVFAudioEncoderSettings.AudioFormatType.MicrosoftGSM => AudioFormatType.MicrosoftGSM, + AVFAudioEncoderSettings.AudioFormatType.AMR => AudioFormatType.AMR, + AVFAudioEncoderSettings.AudioFormatType.TimeCode => AudioFormatType.TimeCode, + AVFAudioEncoderSettings.AudioFormatType.MPEG4TwinVQ => AudioFormatType.MPEG4TwinVQ, + AVFAudioEncoderSettings.AudioFormatType.ULaw => AudioFormatType.ULaw, + _ => null, + }, + AudioQuality = + audioConfig.Quality == AVFAudioEncoderSettings.AudioQuality.Default + ? null + : (AVAudioQuality?)audioConfig.Quality, + SampleRateConverterAudioQuality = + audioConfig.SampleRateConverterQuality == AVFAudioEncoderSettings.AudioQuality.Default + ? null + : (AVAudioQuality?)audioConfig.SampleRateConverterQuality, + }; + _audioInput = AVAssetWriterInput.Create(AVMediaType.Audio, _audioSettings); + _assetWriter.AddInput(_audioInput); + _audioInput.ExpectsMediaDataInRealTime = true; + + _videoAdaptor = AVAssetWriterInputPixelBufferAdaptor.Create(_videoInput, + new CVPixelBufferAttributes + { + PixelFormatType = CVPixelFormatType.CV32ARGB, + Width = videoConfig.SourceSize.Width, + Height = videoConfig.SourceSize.Width, + }); + _videoInput.ExpectsMediaDataInRealTime = true; + + if (!_assetWriter.StartWriting()) + { + throw new Exception("Failed to start writing"); + } + + _assetWriter.StartSessionAtSourceTime(CMTime.Zero); + } + + public override long NumberOfFrames => _numberOfFrames; + + public override long NumberOfSamples => _numberOfSamples; + + public override bool AddVideo(IBitmap image) + { + if (!_videoAdaptor.AssetWriterInput.ReadyForMoreMediaData) + { + return false; + } + + var time = new CMTime(_numberOfFrames * VideoConfig.FrameRate.Denominator, + (int)VideoConfig.FrameRate.Numerator); + CVPixelBuffer? pixelBuffer; + if (image is Bitmap bgra8888) + { + pixelBuffer = AVFSampleUtilities.ConvertToCVPixelBuffer(bgra8888); + } + else + { + using var copy = image.Convert(); + pixelBuffer = AVFSampleUtilities.ConvertToCVPixelBuffer(copy); + } + + if (pixelBuffer == null) + { + return false; + } + + if (!_videoAdaptor.AppendPixelBufferWithPresentationTime(pixelBuffer, time)) + { + return false; + } + + _numberOfFrames++; + return true; + } + + public override bool AddAudio(IPcm sound) + { + // if (!_audioInput.ReadyForMoreMediaData) + // { + // return false; + // } + // + // var time = new CMTime(_numberOfSamples, AudioConfig.SampleRate); + // using var dataBuffer = CMBlockBuffer.CreateEmpty( + // (uint)(sound.SampleSize * sound.NumSamples * sound.NumChannels), + // CMBlockBufferFlags.AlwaysCopyData, out var error1); + // using var formatDescription = + // CMFormatDescription.Create(CMMediaType.Audio, (uint)AudioFormatType.LinearPCM, out var error2); + // using var sampleBuffer = CMSampleBuffer.CreateWithPacketDescriptions(dataBuffer, formatDescription, + // sound.NumSamples, time, [], out var error3); + // + // sampleBuffer. + // // _numberOfSamples + return true; + } + + protected override void Dispose(bool disposing) + { + base.Dispose(disposing); + _videoInput.MarkAsFinished(); + _assetWriter.EndSessionAtSourceTime(new CMTime(_numberOfFrames * VideoConfig.FrameRate.Denominator, + (int)VideoConfig.FrameRate.Numerator)); + _assetWriter.FinishWriting(); + } +} From 516bb2d97494f33043b16600b80da7d54f49fc0a Mon Sep 17 00:00:00 2001 From: Yuto Terada Date: Sat, 22 Jun 2024 22:23:02 +0900 Subject: [PATCH 08/13] fix: Resolve black screen issue after seeking during AVF decoding --- .../Decoding/AVFVideoStreamReader.cs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Beutl.Extensions.AVFoundation/Decoding/AVFVideoStreamReader.cs b/src/Beutl.Extensions.AVFoundation/Decoding/AVFVideoStreamReader.cs index def70a7d5..ddb4a38af 100644 --- a/src/Beutl.Extensions.AVFoundation/Decoding/AVFVideoStreamReader.cs +++ b/src/Beutl.Extensions.AVFoundation/Decoding/AVFVideoStreamReader.cs @@ -13,7 +13,9 @@ public class AVFVideoStreamReader : IDisposable { private readonly ILogger _logger = Log.CreateLogger(); private readonly AVAsset _asset; + private readonly AVFVideoSampleCache _sampleCache; + // 現在のフレームからどれくらいの範囲ならシーケンシャル読み込みさせるかの閾値 private readonly int _thresholdFrameCount; @@ -85,9 +87,9 @@ public AVFVideoStreamReader(AVAsset asset, AVFDecodingExtension extension) // success! // add cache // timestamp -= _firstGapTimeStamp; + _currentTimestamp = buffer.PresentationTimeStamp; int frame = CMTimeUtilities.ConvertFrameFromTimeStamp(_currentTimestamp, _track.NominalFrameRate); _sampleCache.Add(frame, buffer); - _currentTimestamp = buffer.PresentationTimeStamp; return buffer; } From fc4317b7527f7c0c1950a4d586194ccac07d60e9 Mon Sep 17 00:00:00 2001 From: Yuto Terada Date: Sat, 22 Jun 2024 22:23:54 +0900 Subject: [PATCH 09/13] feat: Implement audio input using AVF --- .../Encoding/AVFEncoderInfo.cs | 15 +- .../Encoding/AVFVideoEncoderSettings.cs | 11 +- .../Encoding/AVFWriter.cs | 258 +++++++++++++----- .../LoadPrimitiveExtensionTask.cs | 33 ++- 4 files changed, 229 insertions(+), 88 deletions(-) diff --git a/src/Beutl.Extensions.AVFoundation/Encoding/AVFEncoderInfo.cs b/src/Beutl.Extensions.AVFoundation/Encoding/AVFEncoderInfo.cs index bfd9496fa..b585c076c 100644 --- a/src/Beutl.Extensions.AVFoundation/Encoding/AVFEncoderInfo.cs +++ b/src/Beutl.Extensions.AVFoundation/Encoding/AVFEncoderInfo.cs @@ -4,11 +4,18 @@ namespace Beutl.Extensions.AVFoundation.Encoding; public sealed class AVFEncoderInfo(AVFEncodingExtension extension) : IEncoderInfo { - public string Name =>"AVFoundation"; + public string Name => "AVFoundation"; public MediaWriter? Create(string file, VideoEncoderSettings videoConfig, AudioEncoderSettings audioConfig) { - + try + { + return new AVFWriter(file, (AVFVideoEncoderSettings)videoConfig, (AVFAudioEncoderSettings)audioConfig); + } + catch (Exception e) + { + return null; + } } public IEnumerable SupportExtensions() @@ -29,11 +36,11 @@ public IEnumerable SupportExtensions() public VideoEncoderSettings DefaultVideoConfig() { - return new VideoEncoderSettings(); + return new AVFVideoEncoderSettings(); } public AudioEncoderSettings DefaultAudioConfig() { - return new AudioEncoderSettings(); + return new AVFAudioEncoderSettings(); } } diff --git a/src/Beutl.Extensions.AVFoundation/Encoding/AVFVideoEncoderSettings.cs b/src/Beutl.Extensions.AVFoundation/Encoding/AVFVideoEncoderSettings.cs index 1f2aaf042..3f0706093 100644 --- a/src/Beutl.Extensions.AVFoundation/Encoding/AVFVideoEncoderSettings.cs +++ b/src/Beutl.Extensions.AVFoundation/Encoding/AVFVideoEncoderSettings.cs @@ -19,7 +19,7 @@ static AVFAudioEncoderSettings() .Register(); LinearPcmBitDepthProperty = ConfigureProperty(nameof(LinearPcmBitDepth)) - .DefaultValue(BitDepth.Default) + .DefaultValue(BitDepth.Bits16) .Register(); LinearPcmBigEndianProperty = ConfigureProperty(nameof(LinearPcmBigEndian)) @@ -91,11 +91,10 @@ public AudioQuality SampleRateConverterQuality public enum BitDepth { - Default, - Bits8, - Bits16, - Bits24, - Bits32 + Bits8 = 8, + Bits16 = 16, + Bits24 = 24, + Bits32 = 32 } public enum AudioQuality diff --git a/src/Beutl.Extensions.AVFoundation/Encoding/AVFWriter.cs b/src/Beutl.Extensions.AVFoundation/Encoding/AVFWriter.cs index 757ffdf6b..f73964a33 100644 --- a/src/Beutl.Extensions.AVFoundation/Encoding/AVFWriter.cs +++ b/src/Beutl.Extensions.AVFoundation/Encoding/AVFWriter.cs @@ -1,6 +1,10 @@ -using Beutl.Media; +using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using Beutl.Media; using Beutl.Media.Encoding; using Beutl.Media.Music; +using Beutl.Media.Music.Samples; using Beutl.Media.Pixel; using MonoMac.AudioToolbox; using MonoMac.AVFoundation; @@ -19,6 +23,8 @@ public class AVFWriter : MediaWriter private long _numberOfFrames; private readonly AVAssetWriterInput _audioInput; private readonly AudioSettings _audioSettings; + private AudioConverter? _audioConverter; + private AudioStreamBasicDescription? _audioSourceFormat; private long _numberOfSamples; public AVFWriter(string file, AVFVideoEncoderSettings videoConfig, AVFAudioEncoderSettings audioConfig) @@ -32,28 +38,13 @@ public AVFWriter(string file, AVFVideoEncoderSettings videoConfig, AVFAudioEncod { Width = videoConfig.DestinationSize.Width, Height = videoConfig.DestinationSize.Height, - Codec = videoConfig.Codec switch - { - AVFVideoEncoderSettings.VideoCodec.H264 => AVVideoCodec.H264, - AVFVideoEncoderSettings.VideoCodec.JPEG => AVVideoCodec.JPEG, - _ => null, - }, + Codec = ToAVVideoCodec(videoConfig.Codec), CodecSettings = new AVVideoCodecSettings { AverageBitRate = videoConfig.Bitrate == -1 ? null : videoConfig.Bitrate, MaxKeyFrameInterval = videoConfig.KeyframeRate == -1 ? null : videoConfig.KeyframeRate, JPEGQuality = videoConfig.JPEGQuality < 0 ? null : videoConfig.JPEGQuality, - ProfileLevelH264 = videoConfig.ProfileLevelH264 switch - { - AVFVideoEncoderSettings.VideoProfileLevelH264.Baseline30 => AVVideoProfileLevelH264.Baseline30, - AVFVideoEncoderSettings.VideoProfileLevelH264.Baseline31 => AVVideoProfileLevelH264.Baseline31, - AVFVideoEncoderSettings.VideoProfileLevelH264.Baseline41 => AVVideoProfileLevelH264.Baseline41, - AVFVideoEncoderSettings.VideoProfileLevelH264.Main30 => AVVideoProfileLevelH264.Main30, - AVFVideoEncoderSettings.VideoProfileLevelH264.Main31 => AVVideoProfileLevelH264.Main31, - AVFVideoEncoderSettings.VideoProfileLevelH264.Main32 => AVVideoProfileLevelH264.Main32, - AVFVideoEncoderSettings.VideoProfileLevelH264.Main41 => AVVideoProfileLevelH264.Main41, - _ => null, - }, + ProfileLevelH264 = ToAVVideoProfileLevelH264(videoConfig.ProfileLevelH264), }, }); @@ -64,45 +55,11 @@ public AVFWriter(string file, AVFVideoEncoderSettings videoConfig, AVFAudioEncod SampleRate = audioConfig.SampleRate, EncoderBitRate = audioConfig.Bitrate == -1 ? null : audioConfig.Bitrate, NumberChannels = audioConfig.Channels, - Format = audioConfig.Format switch - { - AVFAudioEncoderSettings.AudioFormatType.MPEGLayer1 => AudioFormatType.MPEGLayer1, - AVFAudioEncoderSettings.AudioFormatType.MPEGLayer2 => AudioFormatType.MPEGLayer2, - AVFAudioEncoderSettings.AudioFormatType.MPEGLayer3 => AudioFormatType.MPEGLayer3, - AVFAudioEncoderSettings.AudioFormatType.Audible => AudioFormatType.Audible, - AVFAudioEncoderSettings.AudioFormatType.MACE3 => AudioFormatType.MACE3, - AVFAudioEncoderSettings.AudioFormatType.MACE6 => AudioFormatType.MACE6, - AVFAudioEncoderSettings.AudioFormatType.QDesign2 => AudioFormatType.QDesign2, - AVFAudioEncoderSettings.AudioFormatType.QDesign => AudioFormatType.QDesign, - AVFAudioEncoderSettings.AudioFormatType.QUALCOMM => AudioFormatType.QUALCOMM, - AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC => AudioFormatType.MPEG4AAC, - AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_ELD => AudioFormatType.MPEG4AAC_ELD, - AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_ELD_SBR => AudioFormatType.MPEG4AAC_ELD_SBR, - AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_ELD_V2 => AudioFormatType.MPEG4AAC_ELD_V2, - AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_HE => AudioFormatType.MPEG4AAC_HE, - AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_LD => AudioFormatType.MPEG4AAC_LD, - AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_HE_V2 => AudioFormatType.MPEG4AAC_HE_V2, - AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_Spatial => AudioFormatType.MPEG4AAC_Spatial, - AVFAudioEncoderSettings.AudioFormatType.AC3 => AudioFormatType.AC3, - AVFAudioEncoderSettings.AudioFormatType.AES3 => AudioFormatType.AES3, - AVFAudioEncoderSettings.AudioFormatType.AppleLossless => AudioFormatType.AppleLossless, - AVFAudioEncoderSettings.AudioFormatType.ALaw => AudioFormatType.ALaw, - AVFAudioEncoderSettings.AudioFormatType.ParameterValueStream => AudioFormatType.ParameterValueStream, - AVFAudioEncoderSettings.AudioFormatType.CAC3 => AudioFormatType.CAC3, - AVFAudioEncoderSettings.AudioFormatType.MPEG4CELP => AudioFormatType.MPEG4CELP, - AVFAudioEncoderSettings.AudioFormatType.MPEG4HVXC => AudioFormatType.MPEG4HVXC, - AVFAudioEncoderSettings.AudioFormatType.iLBC => AudioFormatType.iLBC, - AVFAudioEncoderSettings.AudioFormatType.AppleIMA4 => AudioFormatType.AppleIMA4, - AVFAudioEncoderSettings.AudioFormatType.LinearPCM => AudioFormatType.LinearPCM, - AVFAudioEncoderSettings.AudioFormatType.MIDIStream => AudioFormatType.MIDIStream, - AVFAudioEncoderSettings.AudioFormatType.DVIIntelIMA => AudioFormatType.DVIIntelIMA, - AVFAudioEncoderSettings.AudioFormatType.MicrosoftGSM => AudioFormatType.MicrosoftGSM, - AVFAudioEncoderSettings.AudioFormatType.AMR => AudioFormatType.AMR, - AVFAudioEncoderSettings.AudioFormatType.TimeCode => AudioFormatType.TimeCode, - AVFAudioEncoderSettings.AudioFormatType.MPEG4TwinVQ => AudioFormatType.MPEG4TwinVQ, - AVFAudioEncoderSettings.AudioFormatType.ULaw => AudioFormatType.ULaw, - _ => null, - }, + LinearPcmFloat = audioConfig.LinearPcmFloat, + LinearPcmBigEndian = audioConfig.LinearPcmBigEndian, + LinearPcmBitDepth = (int?)audioConfig.LinearPcmBitDepth, + LinearPcmNonInterleaved = audioConfig.LinearPcmNonInterleaved, + Format = ToAudioFormatType(audioConfig.Format), AudioQuality = audioConfig.Quality == AVFAudioEncoderSettings.AudioQuality.Default ? null @@ -171,33 +128,186 @@ public override bool AddVideo(IBitmap image) return true; } - public override bool AddAudio(IPcm sound) + [DllImport("/System/Library/PrivateFrameworks/CoreMedia.framework/Versions/A/CoreMedia")] + private static extern CMBlockBufferError CMBlockBufferReplaceDataBytes( + IntPtr sourceBytes, + IntPtr handle, + uint offsetIntoDestination, + uint dataLength); + + [DllImport("/System/Library/Frameworks/AudioToolbox.framework/AudioToolbox")] + private static unsafe extern AudioConverterError AudioConverterConvertBuffer( + IntPtr handle, + uint inInputDataSize, IntPtr inInputData, + uint* ioOutputDataSize, IntPtr outOutputData); + + [UnsafeAccessor(UnsafeAccessorKind.Field, Name = "handle")] + private static extern IntPtr GetHandle(AudioConverter self); + + public override unsafe bool AddAudio(IPcm sound) { - // if (!_audioInput.ReadyForMoreMediaData) - // { - // return false; - // } - // - // var time = new CMTime(_numberOfSamples, AudioConfig.SampleRate); - // using var dataBuffer = CMBlockBuffer.CreateEmpty( - // (uint)(sound.SampleSize * sound.NumSamples * sound.NumChannels), - // CMBlockBufferFlags.AlwaysCopyData, out var error1); - // using var formatDescription = - // CMFormatDescription.Create(CMMediaType.Audio, (uint)AudioFormatType.LinearPCM, out var error2); - // using var sampleBuffer = CMSampleBuffer.CreateWithPacketDescriptions(dataBuffer, formatDescription, - // sound.NumSamples, time, [], out var error3); - // - // sampleBuffer. - // // _numberOfSamples + if (!_audioInput.ReadyForMoreMediaData) + { + return false; + } + + var audioConfig = (AVFAudioEncoderSettings)AudioConfig; + if (_audioConverter == null + || !_audioSourceFormat.HasValue + || (int)_audioSourceFormat.Value.SampleRate != sound.SampleRate + || _audioSourceFormat.Value.BitsPerChannel != GetBits() + || _audioSourceFormat.Value.ChannelsPerFrame != sound.NumChannels) + { + var sourceFormat = AudioStreamBasicDescription.CreateLinearPCM(sound.SampleRate, (uint)sound.NumChannels); + sourceFormat.FormatFlags = GetFormatFlags(); + sourceFormat.BitsPerChannel = GetBits(); + _audioSourceFormat = sourceFormat; + + var destinationFormat = + AudioStreamBasicDescription.CreateLinearPCM(AudioConfig.SampleRate, (uint)AudioConfig.Channels, + (uint)audioConfig.LinearPcmBitDepth, audioConfig.LinearPcmBigEndian); + destinationFormat.FormatFlags = + (audioConfig.LinearPcmFloat ? AudioFormatFlags.IsFloat : AudioFormatFlags.IsSignedInteger) | + AudioFormatFlags.IsPacked; + + _audioConverter?.Dispose(); + _audioConverter = AudioConverter.Create(_audioSourceFormat.Value, destinationFormat); + } + + uint inputDataSize = (uint)(sound.SampleSize * sound.NumSamples * sound.NumChannels); + uint bytes = (uint)audioConfig.LinearPcmBitDepth / 8; + uint outputSamples = (uint)Math.Ceiling(AudioConfig.SampleRate * sound.NumSamples / (double)sound.SampleRate); + uint outputDataSize = bytes * outputSamples * (uint)AudioConfig.Channels; + var outputData = NativeMemory.Alloc(outputDataSize); + + AudioConverterConvertBuffer( + GetHandle(_audioConverter), + inputDataSize, sound.Data, + &outputDataSize, (IntPtr)outputData); + Debug.Assert(outputDataSize == bytes * outputSamples * (uint)AudioConfig.Channels); + + var time = new CMTime(_numberOfSamples, AudioConfig.SampleRate); + using var dataBuffer = CMBlockBuffer.CreateEmpty( + outputDataSize, + CMBlockBufferFlags.AlwaysCopyData, out var error1); + if (error1 != CMBlockBufferError.None) throw new Exception(error1.ToString()); + + var error2 = CMBlockBufferReplaceDataBytes((IntPtr)outputData, dataBuffer.Handle, 0, dataBuffer.DataLength); + if (error2 != CMBlockBufferError.None) throw new Exception(error2.ToString()); + + using var formatDescription = + CMFormatDescription.Create(CMMediaType.Audio, (uint)AudioFormatType.LinearPCM, out var error3); + if (error3 != CMFormatDescriptionError.None) throw new Exception(error3.ToString()); + + using var sampleBuffer = CMSampleBuffer.CreateWithPacketDescriptions(dataBuffer, formatDescription, + (int)outputSamples, time, null, out var error4); + if (error4 != CMSampleBufferError.None) throw new Exception(error4.ToString()); + + if (!_audioInput.AppendSampleBuffer(sampleBuffer)) + { + return false; + } + + _numberOfSamples += outputSamples; return true; + + int GetBits() + { + return sound switch + { + Pcm or Pcm => 32, + Pcm => 16, + _ => throw new NotSupportedException() + }; + } + + AudioFormatFlags GetFormatFlags() + { + return sound switch + { + Pcm => AudioFormatFlags.IsSignedInteger | AudioFormatFlags.IsPacked, + Pcm or Pcm => AudioFormatFlags.IsSignedInteger | + AudioFormatFlags.IsPacked, + _ => throw new NotSupportedException() + }; + } } protected override void Dispose(bool disposing) { base.Dispose(disposing); _videoInput.MarkAsFinished(); + _audioInput.MarkAsFinished(); _assetWriter.EndSessionAtSourceTime(new CMTime(_numberOfFrames * VideoConfig.FrameRate.Denominator, (int)VideoConfig.FrameRate.Numerator)); _assetWriter.FinishWriting(); } + + private AVVideoCodec? ToAVVideoCodec(AVFVideoEncoderSettings.VideoCodec codec) + { + return codec switch + { + AVFVideoEncoderSettings.VideoCodec.H264 => AVVideoCodec.H264, + AVFVideoEncoderSettings.VideoCodec.JPEG => AVVideoCodec.JPEG, + _ => null + }; + } + + private AVVideoProfileLevelH264? ToAVVideoProfileLevelH264(AVFVideoEncoderSettings.VideoProfileLevelH264 profile) + { + return profile switch + { + AVFVideoEncoderSettings.VideoProfileLevelH264.Baseline30 => AVVideoProfileLevelH264.Baseline30, + AVFVideoEncoderSettings.VideoProfileLevelH264.Baseline31 => AVVideoProfileLevelH264.Baseline31, + AVFVideoEncoderSettings.VideoProfileLevelH264.Baseline41 => AVVideoProfileLevelH264.Baseline41, + AVFVideoEncoderSettings.VideoProfileLevelH264.Main30 => AVVideoProfileLevelH264.Main30, + AVFVideoEncoderSettings.VideoProfileLevelH264.Main31 => AVVideoProfileLevelH264.Main31, + AVFVideoEncoderSettings.VideoProfileLevelH264.Main32 => AVVideoProfileLevelH264.Main32, + AVFVideoEncoderSettings.VideoProfileLevelH264.Main41 => AVVideoProfileLevelH264.Main41, + _ => null + }; + } + + private AudioFormatType? ToAudioFormatType(AVFAudioEncoderSettings.AudioFormatType format) + { + return format switch + { + AVFAudioEncoderSettings.AudioFormatType.MPEGLayer1 => AudioFormatType.MPEGLayer1, + AVFAudioEncoderSettings.AudioFormatType.MPEGLayer2 => AudioFormatType.MPEGLayer2, + AVFAudioEncoderSettings.AudioFormatType.MPEGLayer3 => AudioFormatType.MPEGLayer3, + AVFAudioEncoderSettings.AudioFormatType.Audible => AudioFormatType.Audible, + AVFAudioEncoderSettings.AudioFormatType.MACE3 => AudioFormatType.MACE3, + AVFAudioEncoderSettings.AudioFormatType.MACE6 => AudioFormatType.MACE6, + AVFAudioEncoderSettings.AudioFormatType.QDesign2 => AudioFormatType.QDesign2, + AVFAudioEncoderSettings.AudioFormatType.QDesign => AudioFormatType.QDesign, + AVFAudioEncoderSettings.AudioFormatType.QUALCOMM => AudioFormatType.QUALCOMM, + AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC => AudioFormatType.MPEG4AAC, + AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_ELD => AudioFormatType.MPEG4AAC_ELD, + AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_ELD_SBR => AudioFormatType.MPEG4AAC_ELD_SBR, + AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_ELD_V2 => AudioFormatType.MPEG4AAC_ELD_V2, + AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_HE => AudioFormatType.MPEG4AAC_HE, + AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_LD => AudioFormatType.MPEG4AAC_LD, + AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_HE_V2 => AudioFormatType.MPEG4AAC_HE_V2, + AVFAudioEncoderSettings.AudioFormatType.MPEG4AAC_Spatial => AudioFormatType.MPEG4AAC_Spatial, + AVFAudioEncoderSettings.AudioFormatType.AC3 => AudioFormatType.AC3, + AVFAudioEncoderSettings.AudioFormatType.AES3 => AudioFormatType.AES3, + AVFAudioEncoderSettings.AudioFormatType.AppleLossless => AudioFormatType.AppleLossless, + AVFAudioEncoderSettings.AudioFormatType.ALaw => AudioFormatType.ALaw, + AVFAudioEncoderSettings.AudioFormatType.ParameterValueStream => AudioFormatType.ParameterValueStream, + AVFAudioEncoderSettings.AudioFormatType.CAC3 => AudioFormatType.CAC3, + AVFAudioEncoderSettings.AudioFormatType.MPEG4CELP => AudioFormatType.MPEG4CELP, + AVFAudioEncoderSettings.AudioFormatType.MPEG4HVXC => AudioFormatType.MPEG4HVXC, + AVFAudioEncoderSettings.AudioFormatType.iLBC => AudioFormatType.iLBC, + AVFAudioEncoderSettings.AudioFormatType.AppleIMA4 => AudioFormatType.AppleIMA4, + AVFAudioEncoderSettings.AudioFormatType.LinearPCM => AudioFormatType.LinearPCM, + AVFAudioEncoderSettings.AudioFormatType.MIDIStream => AudioFormatType.MIDIStream, + AVFAudioEncoderSettings.AudioFormatType.DVIIntelIMA => AudioFormatType.DVIIntelIMA, + AVFAudioEncoderSettings.AudioFormatType.MicrosoftGSM => AudioFormatType.MicrosoftGSM, + AVFAudioEncoderSettings.AudioFormatType.AMR => AudioFormatType.AMR, + AVFAudioEncoderSettings.AudioFormatType.TimeCode => AudioFormatType.TimeCode, + AVFAudioEncoderSettings.AudioFormatType.MPEG4TwinVQ => AudioFormatType.MPEG4TwinVQ, + AVFAudioEncoderSettings.AudioFormatType.ULaw => AudioFormatType.ULaw, + _ => null + }; + } } diff --git a/src/Beutl/Services/StartupTasks/LoadPrimitiveExtensionTask.cs b/src/Beutl/Services/StartupTasks/LoadPrimitiveExtensionTask.cs index f0e4bc4f1..9f80d2284 100644 --- a/src/Beutl/Services/StartupTasks/LoadPrimitiveExtensionTask.cs +++ b/src/Beutl/Services/StartupTasks/LoadPrimitiveExtensionTask.cs @@ -6,6 +6,7 @@ namespace Beutl.Services.StartupTasks; public sealed class LoadPrimitiveExtensionTask : StartupTask { private readonly PackageManager _manager; + public static readonly Extension[] PrimitiveExtensions = [ EditPageExtension.Instance, @@ -40,6 +41,7 @@ public LoadPrimitiveExtensionTask(PackageManager manager) _manager.SetupExtensionSettings(item); item.Load(); } + provider.AddExtensions(LocalPackage.Reserved0, PrimitiveExtensions); activity?.AddEvent(new("Loaded_Extensions")); @@ -56,7 +58,16 @@ public LoadPrimitiveExtensionTask(PackageManager manager) Name = "Beutl.Embedding.FFmpeg", DisplayName = "Beutl.Embedding.FFmpeg", InstalledPath = AppContext.BaseDirectory, - Tags = { "ffmpeg", "decoder", "decoding", "encoder", "encoding", "video", "audio" }, + Tags = + { + "ffmpeg", + "decoder", + "decoding", + "encoder", + "encoding", + "video", + "audio" + }, Version = GitVersionInformation.NuGetVersionV2, WebSite = "https://github.com/b-editor/beutl", Publisher = "b-editor" @@ -95,7 +106,8 @@ public LoadPrimitiveExtensionTask(PackageManager manager) Name = "Beutl.Embedding.MediaFoundation", DisplayName = "Beutl.Embedding.MediaFoundation", InstalledPath = AppContext.BaseDirectory, - Tags = { "windows", "media-foundation", "decoder", "decoding", "encoder", "encoding", "video", "audio" }, + Tags = + { "windows", "media-foundation", "decoder", "decoding", "encoder", "encoding", "video", "audio" }, Version = GitVersionInformation.NuGetVersionV2, WebSite = "https://github.com/b-editor/beutl", Publisher = "b-editor" @@ -130,7 +142,17 @@ public LoadPrimitiveExtensionTask(PackageManager manager) Name = "Beutl.Embedding.AVFoundation", DisplayName = "Beutl.Embedding.AVFoundation", InstalledPath = AppContext.BaseDirectory, - Tags = { "macos", "avfoundation", "decoder", "decoding", "encoder", "encoding", "video", "audio" }, + Tags = + { + "macos", + "avfoundation", + "decoder", + "decoding", + "encoder", + "encoding", + "video", + "audio" + }, Version = GitVersionInformation.NuGetVersionV2, WebSite = "https://github.com/b-editor/beutl", Publisher = "b-editor" @@ -138,10 +160,13 @@ public LoadPrimitiveExtensionTask(PackageManager manager) try { var decoding = new Extensions.AVFoundation.Decoding.AVFDecodingExtension(); + var encoding = new Extensions.AVFoundation.Encoding.AVFEncodingExtension(); _manager.SetupExtensionSettings(decoding); + _manager.SetupExtensionSettings(encoding); decoding.Load(); + encoding.Load(); - provider.AddExtensions(pkg.LocalId, [decoding]); + provider.AddExtensions(pkg.LocalId, [decoding, encoding]); } catch (Exception ex) { From ce2d18638eebf4efe4ca46265831a921eb13733a Mon Sep 17 00:00:00 2001 From: Yuto Terada Date: Sun, 23 Jun 2024 02:14:53 +0900 Subject: [PATCH 10/13] fix: Modify audio input handling --- .../Encoding/AVFEncoderInfo.cs | 4 +- .../Encoding/AVFEncodingExtension.cs | 6 +- .../Encoding/AVFVideoEncoderSettings.cs | 3 +- .../Encoding/AVFWriter.cs | 199 ++++++++++-------- 4 files changed, 116 insertions(+), 96 deletions(-) diff --git a/src/Beutl.Extensions.AVFoundation/Encoding/AVFEncoderInfo.cs b/src/Beutl.Extensions.AVFoundation/Encoding/AVFEncoderInfo.cs index b585c076c..da2353fd2 100644 --- a/src/Beutl.Extensions.AVFoundation/Encoding/AVFEncoderInfo.cs +++ b/src/Beutl.Extensions.AVFoundation/Encoding/AVFEncoderInfo.cs @@ -1,7 +1,9 @@ -using Beutl.Media.Encoding; +using System.Runtime.Versioning; +using Beutl.Media.Encoding; namespace Beutl.Extensions.AVFoundation.Encoding; +[SupportedOSPlatform("macos")] public sealed class AVFEncoderInfo(AVFEncodingExtension extension) : IEncoderInfo { public string Name => "AVFoundation"; diff --git a/src/Beutl.Extensions.AVFoundation/Encoding/AVFEncodingExtension.cs b/src/Beutl.Extensions.AVFoundation/Encoding/AVFEncodingExtension.cs index 28d3ce1d2..fbe4b5a17 100644 --- a/src/Beutl.Extensions.AVFoundation/Encoding/AVFEncodingExtension.cs +++ b/src/Beutl.Extensions.AVFoundation/Encoding/AVFEncodingExtension.cs @@ -1,4 +1,7 @@ -using Beutl.Extensibility; +using System.Runtime.InteropServices; +using System.Runtime.InteropServices.ObjectiveC; +using System.Runtime.Versioning; +using Beutl.Extensibility; using Beutl.Media.Decoding; using Beutl.Media.Encoding; using MonoMac.AppKit; @@ -12,6 +15,7 @@ public class AVFEncodingExtension : EncodingExtension public override string DisplayName => "AVFoundation Encoding"; + [SupportedOSPlatform("macos")] public override IEncoderInfo GetEncoderInfo() { return new AVFEncoderInfo(this); diff --git a/src/Beutl.Extensions.AVFoundation/Encoding/AVFVideoEncoderSettings.cs b/src/Beutl.Extensions.AVFoundation/Encoding/AVFVideoEncoderSettings.cs index 3f0706093..96f237775 100644 --- a/src/Beutl.Extensions.AVFoundation/Encoding/AVFVideoEncoderSettings.cs +++ b/src/Beutl.Extensions.AVFoundation/Encoding/AVFVideoEncoderSettings.cs @@ -15,7 +15,7 @@ public sealed class AVFAudioEncoderSettings : AudioEncoderSettings static AVFAudioEncoderSettings() { FormatProperty = ConfigureProperty(nameof(Format)) - .DefaultValue(AudioFormatType.Default) + .DefaultValue(AudioFormatType.MPEG4AAC) .Register(); LinearPcmBitDepthProperty = ConfigureProperty(nameof(LinearPcmBitDepth)) @@ -109,7 +109,6 @@ public enum AudioQuality public enum AudioFormatType { - Default = 0, MPEGLayer1 = 778924081, // 0x2E6D7031 MPEGLayer2 = 778924082, // 0x2E6D7032 MPEGLayer3 = 778924083, // 0x2E6D7033 diff --git a/src/Beutl.Extensions.AVFoundation/Encoding/AVFWriter.cs b/src/Beutl.Extensions.AVFoundation/Encoding/AVFWriter.cs index f73964a33..79c3d1c90 100644 --- a/src/Beutl.Extensions.AVFoundation/Encoding/AVFWriter.cs +++ b/src/Beutl.Extensions.AVFoundation/Encoding/AVFWriter.cs @@ -1,6 +1,6 @@ -using System.Diagnostics; -using System.Runtime.CompilerServices; +using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using System.Runtime.Versioning; using Beutl.Media; using Beutl.Media.Encoding; using Beutl.Media.Music; @@ -8,13 +8,13 @@ using Beutl.Media.Pixel; using MonoMac.AudioToolbox; using MonoMac.AVFoundation; -using MonoMac.CoreFoundation; using MonoMac.CoreMedia; using MonoMac.CoreVideo; using MonoMac.Foundation; namespace Beutl.Extensions.AVFoundation.Encoding; +[SupportedOSPlatform("macos")] public class AVFWriter : MediaWriter { private readonly AVAssetWriter _assetWriter; @@ -22,9 +22,6 @@ public class AVFWriter : MediaWriter private readonly AVAssetWriterInputPixelBufferAdaptor _videoAdaptor; private long _numberOfFrames; private readonly AVAssetWriterInput _audioInput; - private readonly AudioSettings _audioSettings; - private AudioConverter? _audioConverter; - private AudioStreamBasicDescription? _audioSourceFormat; private long _numberOfSamples; public AVFWriter(string file, AVFVideoEncoderSettings videoConfig, AVFAudioEncoderSettings audioConfig) @@ -47,18 +44,21 @@ public AVFWriter(string file, AVFVideoEncoderSettings videoConfig, AVFAudioEncod ProfileLevelH264 = ToAVVideoProfileLevelH264(videoConfig.ProfileLevelH264), }, }); - + _videoInput.ExpectsMediaDataInRealTime = true; + _videoAdaptor = AVAssetWriterInputPixelBufferAdaptor.Create(_videoInput, + new CVPixelBufferAttributes + { + PixelFormatType = CVPixelFormatType.CV32ARGB, + Width = videoConfig.SourceSize.Width, + Height = videoConfig.SourceSize.Width, + }); _assetWriter.AddInput(_videoInput); - _audioSettings = new AudioSettings + var audioSettings = new AudioSettings { SampleRate = audioConfig.SampleRate, EncoderBitRate = audioConfig.Bitrate == -1 ? null : audioConfig.Bitrate, NumberChannels = audioConfig.Channels, - LinearPcmFloat = audioConfig.LinearPcmFloat, - LinearPcmBigEndian = audioConfig.LinearPcmBigEndian, - LinearPcmBitDepth = (int?)audioConfig.LinearPcmBitDepth, - LinearPcmNonInterleaved = audioConfig.LinearPcmNonInterleaved, Format = ToAudioFormatType(audioConfig.Format), AudioQuality = audioConfig.Quality == AVFAudioEncoderSettings.AudioQuality.Default @@ -69,18 +69,17 @@ public AVFWriter(string file, AVFVideoEncoderSettings videoConfig, AVFAudioEncod ? null : (AVAudioQuality?)audioConfig.SampleRateConverterQuality, }; - _audioInput = AVAssetWriterInput.Create(AVMediaType.Audio, _audioSettings); - _assetWriter.AddInput(_audioInput); - _audioInput.ExpectsMediaDataInRealTime = true; + if (audioSettings.Format == AudioFormatType.LinearPCM) + { + audioSettings.LinearPcmFloat = audioConfig.LinearPcmFloat; + audioSettings.LinearPcmBigEndian = audioConfig.LinearPcmBigEndian; + audioSettings.LinearPcmBitDepth = (int?)audioConfig.LinearPcmBitDepth; + audioSettings.LinearPcmNonInterleaved = audioConfig.LinearPcmNonInterleaved; + } - _videoAdaptor = AVAssetWriterInputPixelBufferAdaptor.Create(_videoInput, - new CVPixelBufferAttributes - { - PixelFormatType = CVPixelFormatType.CV32ARGB, - Width = videoConfig.SourceSize.Width, - Height = videoConfig.SourceSize.Width, - }); - _videoInput.ExpectsMediaDataInRealTime = true; + _audioInput = AVAssetWriterInput.Create(AVMediaType.Audio, audioSettings); + _audioInput.ExpectsMediaDataInRealTime = true; + _assetWriter.AddInput(_audioInput); if (!_assetWriter.StartWriting()) { @@ -96,9 +95,15 @@ public AVFWriter(string file, AVFVideoEncoderSettings videoConfig, AVFAudioEncod public override bool AddVideo(IBitmap image) { - if (!_videoAdaptor.AssetWriterInput.ReadyForMoreMediaData) + int count = 0; + while (!_videoAdaptor.AssetWriterInput.ReadyForMoreMediaData) { - return false; + Thread.Sleep(10); + count++; + if (count > 100) + { + return false; + } } var time = new CMTime(_numberOfFrames * VideoConfig.FrameRate.Denominator, @@ -129,78 +134,88 @@ public override bool AddVideo(IBitmap image) } [DllImport("/System/Library/PrivateFrameworks/CoreMedia.framework/Versions/A/CoreMedia")] - private static extern CMBlockBufferError CMBlockBufferReplaceDataBytes( - IntPtr sourceBytes, - IntPtr handle, - uint offsetIntoDestination, - uint dataLength); - - [DllImport("/System/Library/Frameworks/AudioToolbox.framework/AudioToolbox")] - private static unsafe extern AudioConverterError AudioConverterConvertBuffer( - IntPtr handle, - uint inInputDataSize, IntPtr inInputData, - uint* ioOutputDataSize, IntPtr outOutputData); - - [UnsafeAccessor(UnsafeAccessorKind.Field, Name = "handle")] - private static extern IntPtr GetHandle(AudioConverter self); - - public override unsafe bool AddAudio(IPcm sound) + private static extern unsafe CMFormatDescriptionError CMAudioFormatDescriptionCreate( + IntPtr allocator, + void* asbd, + uint layoutSize, + void* layout, + uint magicCookieSize, + void* magicCookie, + IntPtr extensions, + out IntPtr handle); + + [UnsafeAccessor(UnsafeAccessorKind.Constructor)] + private static extern CMAudioFormatDescription NewCMAudioFormatDescription(IntPtr handle); + + [UnsafeAccessor(UnsafeAccessorKind.Constructor)] + private static extern CMBlockBuffer NewCMBlockBuffer(IntPtr handle); + + [DllImport("/System/Library/PrivateFrameworks/CoreMedia.framework/Versions/A/CoreMedia")] + private static extern CMBlockBufferError CMBlockBufferCreateWithMemoryBlock( + IntPtr allocator, + IntPtr memoryBlock, + uint blockLength, + IntPtr blockAllocator, + IntPtr customBlockSource, + uint offsetToData, + uint dataLength, + CMBlockBufferFlags flags, + out IntPtr handle); + + private static unsafe CMAudioFormatDescription CreateAudioFormatDescription(AudioStreamBasicDescription asbd) + { + var error = CMAudioFormatDescriptionCreate( + IntPtr.Zero, + &asbd, + 0, + null, + 0, + null, + IntPtr.Zero, + out var handle); + if (error != CMFormatDescriptionError.None) throw new Exception(error.ToString()); + return NewCMAudioFormatDescription(handle); + } + + private static CMBlockBuffer CreateCMBlockBufferWithMemoryBlock(uint length, IntPtr memoryBlock, + CMBlockBufferFlags flags) + { + var error = CMBlockBufferCreateWithMemoryBlock( + IntPtr.Zero, + memoryBlock, + length, + IntPtr.Zero, + IntPtr.Zero, + 0, + length, + flags, + out var handle); + if (error != CMBlockBufferError.None) throw new Exception(error.ToString()); + return NewCMBlockBuffer(handle); + } + + public override bool AddAudio(IPcm sound) { if (!_audioInput.ReadyForMoreMediaData) { return false; } - var audioConfig = (AVFAudioEncoderSettings)AudioConfig; - if (_audioConverter == null - || !_audioSourceFormat.HasValue - || (int)_audioSourceFormat.Value.SampleRate != sound.SampleRate - || _audioSourceFormat.Value.BitsPerChannel != GetBits() - || _audioSourceFormat.Value.ChannelsPerFrame != sound.NumChannels) - { - var sourceFormat = AudioStreamBasicDescription.CreateLinearPCM(sound.SampleRate, (uint)sound.NumChannels); - sourceFormat.FormatFlags = GetFormatFlags(); - sourceFormat.BitsPerChannel = GetBits(); - _audioSourceFormat = sourceFormat; - - var destinationFormat = - AudioStreamBasicDescription.CreateLinearPCM(AudioConfig.SampleRate, (uint)AudioConfig.Channels, - (uint)audioConfig.LinearPcmBitDepth, audioConfig.LinearPcmBigEndian); - destinationFormat.FormatFlags = - (audioConfig.LinearPcmFloat ? AudioFormatFlags.IsFloat : AudioFormatFlags.IsSignedInteger) | - AudioFormatFlags.IsPacked; - - _audioConverter?.Dispose(); - _audioConverter = AudioConverter.Create(_audioSourceFormat.Value, destinationFormat); - } + var sourceFormat = AudioStreamBasicDescription.CreateLinearPCM(sound.SampleRate, (uint)sound.NumChannels); + sourceFormat.FormatFlags = GetFormatFlags(); + sourceFormat.BitsPerChannel = GetBits(); + var fmtError = AudioStreamBasicDescription.GetFormatInfo(ref sourceFormat); + if (fmtError != AudioFormatError.None) throw new Exception(fmtError.ToString()); + + uint inputDataSize = (uint)(sound.SampleSize * sound.NumSamples); + var time = new CMTime(_numberOfSamples, sound.SampleRate); + using var dataBuffer = + CreateCMBlockBufferWithMemoryBlock(inputDataSize, sound.Data, CMBlockBufferFlags.AlwaysCopyData); + + using var formatDescription = CreateAudioFormatDescription(sourceFormat); - uint inputDataSize = (uint)(sound.SampleSize * sound.NumSamples * sound.NumChannels); - uint bytes = (uint)audioConfig.LinearPcmBitDepth / 8; - uint outputSamples = (uint)Math.Ceiling(AudioConfig.SampleRate * sound.NumSamples / (double)sound.SampleRate); - uint outputDataSize = bytes * outputSamples * (uint)AudioConfig.Channels; - var outputData = NativeMemory.Alloc(outputDataSize); - - AudioConverterConvertBuffer( - GetHandle(_audioConverter), - inputDataSize, sound.Data, - &outputDataSize, (IntPtr)outputData); - Debug.Assert(outputDataSize == bytes * outputSamples * (uint)AudioConfig.Channels); - - var time = new CMTime(_numberOfSamples, AudioConfig.SampleRate); - using var dataBuffer = CMBlockBuffer.CreateEmpty( - outputDataSize, - CMBlockBufferFlags.AlwaysCopyData, out var error1); - if (error1 != CMBlockBufferError.None) throw new Exception(error1.ToString()); - - var error2 = CMBlockBufferReplaceDataBytes((IntPtr)outputData, dataBuffer.Handle, 0, dataBuffer.DataLength); - if (error2 != CMBlockBufferError.None) throw new Exception(error2.ToString()); - - using var formatDescription = - CMFormatDescription.Create(CMMediaType.Audio, (uint)AudioFormatType.LinearPCM, out var error3); - if (error3 != CMFormatDescriptionError.None) throw new Exception(error3.ToString()); - - using var sampleBuffer = CMSampleBuffer.CreateWithPacketDescriptions(dataBuffer, formatDescription, - (int)outputSamples, time, null, out var error4); + var sampleBuffer = CMSampleBuffer.CreateWithPacketDescriptions(dataBuffer, formatDescription, + sound.NumSamples, time, null, out var error4); if (error4 != CMSampleBufferError.None) throw new Exception(error4.ToString()); if (!_audioInput.AppendSampleBuffer(sampleBuffer)) @@ -208,7 +223,7 @@ public override unsafe bool AddAudio(IPcm sound) return false; } - _numberOfSamples += outputSamples; + _numberOfSamples += sound.NumSamples; return true; int GetBits() @@ -225,7 +240,7 @@ AudioFormatFlags GetFormatFlags() { return sound switch { - Pcm => AudioFormatFlags.IsSignedInteger | AudioFormatFlags.IsPacked, + Pcm => AudioFormatFlags.IsFloat | AudioFormatFlags.IsPacked, Pcm or Pcm => AudioFormatFlags.IsSignedInteger | AudioFormatFlags.IsPacked, _ => throw new NotSupportedException() From 47ebad973560bfdd68df1e4bfdcf046fe0ad42b7 Mon Sep 17 00:00:00 2001 From: Yuto Terada Date: Mon, 24 Jun 2024 18:11:16 +0900 Subject: [PATCH 11/13] change: Delegate buffer release to GC --- src/Beutl.Extensions.AVFoundation/Encoding/AVFWriter.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Beutl.Extensions.AVFoundation/Encoding/AVFWriter.cs b/src/Beutl.Extensions.AVFoundation/Encoding/AVFWriter.cs index 79c3d1c90..4e3662a78 100644 --- a/src/Beutl.Extensions.AVFoundation/Encoding/AVFWriter.cs +++ b/src/Beutl.Extensions.AVFoundation/Encoding/AVFWriter.cs @@ -209,10 +209,10 @@ public override bool AddAudio(IPcm sound) uint inputDataSize = (uint)(sound.SampleSize * sound.NumSamples); var time = new CMTime(_numberOfSamples, sound.SampleRate); - using var dataBuffer = - CreateCMBlockBufferWithMemoryBlock(inputDataSize, sound.Data, CMBlockBufferFlags.AlwaysCopyData); + var dataBuffer = CreateCMBlockBufferWithMemoryBlock( + inputDataSize, sound.Data, CMBlockBufferFlags.AlwaysCopyData); - using var formatDescription = CreateAudioFormatDescription(sourceFormat); + var formatDescription = CreateAudioFormatDescription(sourceFormat); var sampleBuffer = CMSampleBuffer.CreateWithPacketDescriptions(dataBuffer, formatDescription, sound.NumSamples, time, null, out var error4); From 3022d321cf354f51c0148ece21c2df9acd06046c Mon Sep 17 00:00:00 2001 From: Yuto Terada Date: Mon, 24 Jun 2024 18:11:51 +0900 Subject: [PATCH 12/13] feat: Specify ChannelLayout in CMAudioFormatDescription --- .../Encoding/AVFWriter.cs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/Beutl.Extensions.AVFoundation/Encoding/AVFWriter.cs b/src/Beutl.Extensions.AVFoundation/Encoding/AVFWriter.cs index 4e3662a78..a43817d58 100644 --- a/src/Beutl.Extensions.AVFoundation/Encoding/AVFWriter.cs +++ b/src/Beutl.Extensions.AVFoundation/Encoding/AVFWriter.cs @@ -164,11 +164,19 @@ private static extern CMBlockBufferError CMBlockBufferCreateWithMemoryBlock( private static unsafe CMAudioFormatDescription CreateAudioFormatDescription(AudioStreamBasicDescription asbd) { + var channelLayout = new AudioChannelLayout + { + AudioTag = asbd.ChannelsPerFrame == 2 ? AudioChannelLayoutTag.Stereo : AudioChannelLayoutTag.Mono, + Channels = [], + Bitmap = 0, + }; + var data = channelLayout.AsData(); + var error = CMAudioFormatDescriptionCreate( IntPtr.Zero, &asbd, - 0, - null, + (uint)data.Length, + (void*)data.Bytes, 0, null, IntPtr.Zero, From b9ea235656d00066fbb0837e9144a4370c1307bf Mon Sep 17 00:00:00 2001 From: Yuto Terada Date: Mon, 24 Jun 2024 18:12:41 +0900 Subject: [PATCH 13/13] feat: Add waiting mechanism when ReadyForMoreMediaData is false --- .../Encoding/AVFWriter.cs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/Beutl.Extensions.AVFoundation/Encoding/AVFWriter.cs b/src/Beutl.Extensions.AVFoundation/Encoding/AVFWriter.cs index a43817d58..81e6013d2 100644 --- a/src/Beutl.Extensions.AVFoundation/Encoding/AVFWriter.cs +++ b/src/Beutl.Extensions.AVFoundation/Encoding/AVFWriter.cs @@ -204,9 +204,15 @@ private static CMBlockBuffer CreateCMBlockBufferWithMemoryBlock(uint length, Int public override bool AddAudio(IPcm sound) { - if (!_audioInput.ReadyForMoreMediaData) + int count = 0; + while (!_audioInput.ReadyForMoreMediaData) { - return false; + Thread.Sleep(10); + count++; + if (count > 100) + { + return false; + } } var sourceFormat = AudioStreamBasicDescription.CreateLinearPCM(sound.SampleRate, (uint)sound.NumChannels);