-
Notifications
You must be signed in to change notification settings - Fork 46
Refactor audio sample conversion in encoder #704
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -288,10 +288,13 @@ void AudioEncoder::encode() { | |
// encoded frame would contain more samples than necessary and our results | ||
// wouldn't match the ffmpeg CLI. | ||
avFrame->nb_samples = numSamplesToEncode; | ||
encodeInnerLoop(autoAVPacket, avFrame); | ||
|
||
avFrame->pts += static_cast<int64_t>(numSamplesToEncode); | ||
UniqueAVFrame convertedAVFrame = maybeConvertAVFrame(avFrame); | ||
encodeInnerLoop(autoAVPacket, convertedAVFrame); | ||
|
||
numEncodedSamples += numSamplesToEncode; | ||
// TODO-ENCODING set frame pts correctly, and test against it. | ||
// avFrame->pts += static_cast<int64_t>(numSamplesToEncode); | ||
} | ||
TORCH_CHECK(numEncodedSamples == numSamples, "Hmmmmmm something went wrong."); | ||
|
||
|
@@ -304,42 +307,43 @@ void AudioEncoder::encode() { | |
getFFMPEGErrorStringFromErrorCode(status)); | ||
} | ||
|
||
void AudioEncoder::encodeInnerLoop( | ||
AutoAVPacket& autoAVPacket, | ||
const UniqueAVFrame& srcAVFrame) { | ||
bool mustConvert = | ||
(srcAVFrame != nullptr && | ||
(avCodecContext_->sample_fmt != AV_SAMPLE_FMT_FLTP || | ||
getNumChannels(srcAVFrame) != outNumChannels_)); | ||
|
||
UniqueAVFrame convertedAVFrame; | ||
if (mustConvert) { | ||
if (!swrContext_) { | ||
swrContext_.reset(createSwrContext( | ||
AV_SAMPLE_FMT_FLTP, | ||
avCodecContext_->sample_fmt, | ||
srcAVFrame->sample_rate, // No sample rate conversion | ||
srcAVFrame->sample_rate, | ||
srcAVFrame, | ||
outNumChannels_)); | ||
} | ||
convertedAVFrame = convertAudioAVFrameSamples( | ||
swrContext_, | ||
srcAVFrame, | ||
UniqueAVFrame AudioEncoder::maybeConvertAVFrame(const UniqueAVFrame& avFrame) { | ||
if (static_cast<AVSampleFormat>(avFrame->format) == | ||
avCodecContext_->sample_fmt && | ||
getNumChannels(avFrame) == outNumChannels_) { | ||
// Note: the clone references the same underlying data, it's a cheap copy. | ||
return UniqueAVFrame(av_frame_clone(avFrame.get())); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The main logic change is this one above: the call to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Huh, I would have thought that using In order for that to work, though, we'd have to change how we pass the frame into There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the review! As discussed offline I'll merge now but I want to dig a little bit more about what we could do differently |
||
|
||
if (!swrContext_) { | ||
swrContext_.reset(createSwrContext( | ||
static_cast<AVSampleFormat>(avFrame->format), | ||
avCodecContext_->sample_fmt, | ||
srcAVFrame->sample_rate, // No sample rate conversion | ||
outNumChannels_); | ||
TORCH_CHECK( | ||
convertedAVFrame->nb_samples == srcAVFrame->nb_samples, | ||
"convertedAVFrame->nb_samples=", | ||
convertedAVFrame->nb_samples, | ||
" differs from ", | ||
"srcAVFrame->nb_samples=", | ||
srcAVFrame->nb_samples, | ||
"This is unexpected, please report on the TorchCodec bug tracker."); | ||
avFrame->sample_rate, // No sample rate conversion | ||
avFrame->sample_rate, | ||
avFrame, | ||
outNumChannels_)); | ||
} | ||
const UniqueAVFrame& avFrame = mustConvert ? convertedAVFrame : srcAVFrame; | ||
UniqueAVFrame convertedAVFrame = convertAudioAVFrameSamples( | ||
swrContext_, | ||
avFrame, | ||
avCodecContext_->sample_fmt, | ||
avFrame->sample_rate, // No sample rate conversion | ||
outNumChannels_); | ||
TORCH_CHECK( | ||
convertedAVFrame->nb_samples == avFrame->nb_samples, | ||
"convertedAVFrame->nb_samples=", | ||
convertedAVFrame->nb_samples, | ||
" differs from ", | ||
"avFrame->nb_samples=", | ||
avFrame->nb_samples, | ||
"This is unexpected, please report on the TorchCodec bug tracker."); | ||
return convertedAVFrame; | ||
} | ||
|
||
void AudioEncoder::encodeInnerLoop( | ||
AutoAVPacket& autoAVPacket, | ||
const UniqueAVFrame& avFrame) { | ||
auto status = avcodec_send_frame(avCodecContext_.get(), avFrame.get()); | ||
TORCH_CHECK( | ||
status == AVSUCCESS, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a drive-by. I realized that commenting this out was letting our tests pass, so this isn't load bearing. We weren't setting the pts on the convertedAVFrame anyway, so our logic was already wrong. I'm adding this TODO to investigate that later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's also create an issue.