Skip to content

Commit

Permalink
audio: Fix timestamps outputted by AudioOpusEncoder
Browse files Browse the repository at this point in the history
Make AudioOpusEncoder output correct timestamps even if inputs'
length are not aligned to 60ms.

Bug: 1166554
Change-Id: If7b48d5d7a135d6a10471ca4fe4647643486263d
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2644917
Commit-Queue: Eugene Zemtsov <eugene@chromium.org>
Reviewed-by: Dan Sanders <sandersd@chromium.org>
Cr-Commit-Position: refs/heads/master@{#846577}
  • Loading branch information
Djuffin authored and Chromium LUCI CQ committed Jan 24, 2021
1 parent e2f8934 commit 4c64dac
Show file tree
Hide file tree
Showing 6 changed files with 189 additions and 52 deletions.
170 changes: 137 additions & 33 deletions media/audio/audio_encoders_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <vector>

#include "base/bind.h"
#include "base/test/bind.h"
#include "base/time/time.h"
#include "media/audio/audio_opus_encoder.h"
#include "media/audio/audio_pcm_encoder.h"
Expand All @@ -24,9 +25,6 @@ namespace {

constexpr int kAudioSampleRate = 48000;

constexpr base::TimeDelta kBufferDuration =
base::TimeDelta::FromMilliseconds(10);

// This is the preferred opus buffer duration (60 ms), which corresponds to a
// value of 2880 frames per buffer (|kOpusFramesPerBuffer|).
constexpr base::TimeDelta kOpusBufferDuration =
Expand All @@ -36,33 +34,28 @@ constexpr int kOpusFramesPerBuffer = kOpusBufferDuration.InMicroseconds() *
base::Time::kMicrosecondsPerSecond;

struct TestAudioParams {
const media::AudioParameters::Format format;
const media::ChannelLayout channel_layout;
const AudioParameters::Format format;
const ChannelLayout channel_layout;
const int sample_rate;
};

constexpr TestAudioParams kTestAudioParams[] = {
{media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
media::CHANNEL_LAYOUT_STEREO, kAudioSampleRate},
{AudioParameters::AUDIO_PCM_LOW_LATENCY, CHANNEL_LAYOUT_STEREO,
kAudioSampleRate},
// Change to mono:
{media::AudioParameters::AUDIO_PCM_LOW_LATENCY, media::CHANNEL_LAYOUT_MONO,
{AudioParameters::AUDIO_PCM_LOW_LATENCY, CHANNEL_LAYOUT_MONO,
kAudioSampleRate},
// Different sampling rate as well:
{media::AudioParameters::AUDIO_PCM_LOW_LATENCY, media::CHANNEL_LAYOUT_MONO,
24000},
{media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
media::CHANNEL_LAYOUT_STEREO, 8000},
{AudioParameters::AUDIO_PCM_LOW_LATENCY, CHANNEL_LAYOUT_MONO, 24000},
{AudioParameters::AUDIO_PCM_LOW_LATENCY, CHANNEL_LAYOUT_STEREO, 8000},
// Using a non-default Opus sampling rate (48, 24, 16, 12, or 8 kHz).
{media::AudioParameters::AUDIO_PCM_LOW_LATENCY, media::CHANNEL_LAYOUT_MONO,
22050},
{media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
media::CHANNEL_LAYOUT_STEREO, 44100},
{media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
media::CHANNEL_LAYOUT_STEREO, 96000},
{media::AudioParameters::AUDIO_PCM_LOW_LATENCY, media::CHANNEL_LAYOUT_MONO,
{AudioParameters::AUDIO_PCM_LOW_LATENCY, CHANNEL_LAYOUT_MONO, 22050},
{AudioParameters::AUDIO_PCM_LOW_LATENCY, CHANNEL_LAYOUT_STEREO, 44100},
{AudioParameters::AUDIO_PCM_LOW_LATENCY, CHANNEL_LAYOUT_STEREO, 96000},
{AudioParameters::AUDIO_PCM_LOW_LATENCY, CHANNEL_LAYOUT_MONO,
kAudioSampleRate},
{AudioParameters::AUDIO_PCM_LOW_LATENCY, CHANNEL_LAYOUT_STEREO,
kAudioSampleRate},
{media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
media::CHANNEL_LAYOUT_STEREO, kAudioSampleRate},
};

} // namespace
Expand Down Expand Up @@ -90,24 +83,28 @@ class AudioEncodersTest : public ::testing::TestWithParam<TestAudioParams> {
encode_callback_count_ = 0;
}

// Produces an audio data that corresponds to a |kBufferDuration| and the
// Produces an audio data that corresponds to a |buffer_duration| and the
// sample rate of the current |input_params_|. The produced data is send to
// |encoder_| to be encoded, and the number of frames generated is returned.
int ProduceAudioAndEncode() {
int ProduceAudioAndEncode(
base::TimeTicks timestamp = base::TimeTicks::Now()) {
DCHECK(encoder_);
const int num_frames =
input_params_.sample_rate() * kBufferDuration.InSecondsF();
current_audio_bus_ =
media::AudioBus::Create(input_params_.channels(), num_frames);
const auto capture_time = base::TimeTicks::Now();
input_params_.sample_rate() * buffer_duration.InSecondsF();
base::TimeTicks capture_time = timestamp + buffer_duration;
current_audio_bus_ = AudioBus::Create(input_params_.channels(), num_frames);
audio_source_.OnMoreData(base::TimeDelta(), capture_time, 0,
current_audio_bus_.get());
encoder_->EncodeAudio(*current_audio_bus_, capture_time);
return num_frames;
}

// Used to verify we get no errors.
void OnErrorCallback(Status error) { FAIL() << error.message(); }
void OnErrorCallback(Status error) {
DCHECK(!error.is_ok());
FAIL() << error.message();
NOTREACHED();
}

// Used as the callback of the PCM encoder.
void VerifyPcmEncoding(EncodedAudioBuffer output) {
Expand Down Expand Up @@ -143,22 +140,24 @@ class AudioEncodersTest : public ::testing::TestWithParam<TestAudioParams> {
kOpusFramesPerBuffer, 0));
}

private:
protected:
// The input params as initialized from the test's parameter.
const AudioParameters input_params_;

// The audio source used to fill in the data of the |current_audio_bus_|.
media::SineWaveAudioSource audio_source_;
SineWaveAudioSource audio_source_;

// The encoder the test is verifying.
std::unique_ptr<AudioEncoder> encoder_;

// The audio bus that was most recently generated and sent to the |encoder_|
// by ProduceAudioAndEncode().
std::unique_ptr<media::AudioBus> current_audio_bus_;
std::unique_ptr<AudioBus> current_audio_bus_;

// The number of encoder callbacks received.
int encode_callback_count_ = 0;

base::TimeDelta buffer_duration = base::TimeDelta::FromMilliseconds(10);
};

TEST_P(AudioEncodersTest, PcmEncoder) {
Expand All @@ -176,6 +175,108 @@ TEST_P(AudioEncodersTest, PcmEncoder) {
EXPECT_EQ(kCount, encode_callback_count());
}

TEST_P(AudioEncodersTest, OpusTimestamps) {
constexpr int kCount = 12;
for (base::TimeDelta duration :
{kOpusBufferDuration * 10, kOpusBufferDuration,
kOpusBufferDuration * 2 / 3}) {
buffer_duration = duration;
size_t expected_outputs = (buffer_duration * kCount) / kOpusBufferDuration;
base::TimeTicks current_timestamp;
std::vector<base::TimeTicks> timestamps;

auto output_cb = base::BindLambdaForTesting([&](EncodedAudioBuffer output) {
timestamps.push_back(output.timestamp);
});

SetEncoder(std::make_unique<AudioOpusEncoder>(
input_params(), std::move(output_cb),
base::BindRepeating(&AudioEncodersTest::OnErrorCallback,
base::Unretained(this)),
/*opus_bitrate=*/0));

for (int i = 0; i < kCount; ++i) {
ProduceAudioAndEncode(current_timestamp);
current_timestamp += buffer_duration;
}
encoder()->Flush();
EXPECT_EQ(expected_outputs, timestamps.size());

current_timestamp = base::TimeTicks();
for (auto& ts : timestamps) {
EXPECT_EQ(current_timestamp, ts);
current_timestamp += kOpusBufferDuration;
}
}
}

// Check how Opus encoder reacts to breaks in continuity of incoming sound.
// Capture times are expected to be exactly buffer durations apart,
// but the encoder should be ready to handle situations when it's not the case.
TEST_P(AudioEncodersTest, OpusTimeContinuityBreak) {
base::TimeTicks current_timestamp;
base::TimeDelta small_gap = base::TimeDelta::FromMicroseconds(500);
base::TimeDelta large_gap = base::TimeDelta::FromMicroseconds(1500);
std::vector<base::TimeTicks> timestamps;

auto output_cb = base::BindLambdaForTesting([&](EncodedAudioBuffer output) {
timestamps.push_back(output.timestamp);
});

SetEncoder(std::make_unique<AudioOpusEncoder>(
input_params(), std::move(output_cb),
base::BindRepeating(&AudioEncodersTest::OnErrorCallback,
base::Unretained(this)),
/*opus_bitrate=*/0));

// Encode first normal buffer and immediately get an output for it.
buffer_duration = kOpusBufferDuration;
auto ts0 = current_timestamp;
ProduceAudioAndEncode(current_timestamp);
current_timestamp += buffer_duration;
EXPECT_EQ(1u, timestamps.size());
EXPECT_EQ(ts0, timestamps[0]);

// Add another buffer which is too small and will be buffered
buffer_duration = kOpusBufferDuration / 2;
auto ts1 = current_timestamp;
ProduceAudioAndEncode(current_timestamp);
current_timestamp += buffer_duration;
EXPECT_EQ(1u, timestamps.size());

// Add another large buffer after a large gap, 2 outputs are expected
// because large gap should trigger a flush.
current_timestamp += large_gap;
buffer_duration = kOpusBufferDuration;
auto ts2 = current_timestamp;
ProduceAudioAndEncode(current_timestamp);
current_timestamp += buffer_duration;
EXPECT_EQ(3u, timestamps.size());
EXPECT_EQ(ts1, timestamps[1]);
EXPECT_EQ(ts2, timestamps[2]);

// Add another buffer which is too small and will be buffered
buffer_duration = kOpusBufferDuration / 2;
auto ts3 = current_timestamp;
ProduceAudioAndEncode(current_timestamp);
current_timestamp += buffer_duration;
EXPECT_EQ(3u, timestamps.size());

// Add a small gap and a large buffer, only one output is expected because
// small gap doesn't trigger a flush.
// Small gap itself is not counted in output timestamps.
auto ts4 = current_timestamp + kOpusBufferDuration / 2;
current_timestamp += small_gap;
buffer_duration = kOpusBufferDuration;
ProduceAudioAndEncode(current_timestamp);
EXPECT_EQ(4u, timestamps.size());
EXPECT_EQ(ts3, timestamps[3]);

encoder()->Flush();
EXPECT_EQ(5u, timestamps.size());
EXPECT_EQ(ts4, timestamps[4]);
}

TEST_P(AudioEncodersTest, OpusEncoder) {
int error;
OpusDecoder* opus_decoder =
Expand All @@ -195,8 +296,11 @@ TEST_P(AudioEncodersTest, OpusEncoder) {
const int frames_in_60_ms =
kOpusBufferDuration.InSecondsF() * input_params().sample_rate();
int total_frames = 0;
while (total_frames < frames_in_60_ms)
total_frames += ProduceAudioAndEncode();
base::TimeTicks time;
while (total_frames < frames_in_60_ms) {
time += buffer_duration;
total_frames += ProduceAudioAndEncode(time);
}

EXPECT_EQ(1, encode_callback_count());

Expand Down
31 changes: 26 additions & 5 deletions media/audio/audio_opus_encoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "base/logging.h"
#include "base/stl_util.h"
#include "base/strings/stringprintf.h"
#include "media/base/audio_timestamp_helper.h"
#include "media/base/status.h"
#include "media/base/status_codes.h"

Expand Down Expand Up @@ -218,6 +219,23 @@ void AudioOpusEncoder::EncodeAudioImpl(const AudioBus& audio_bus,
if (!opus_encoder_)
return;

// If |capture_time| - |audio_bus|'s duration is not equal to the expected
// timestamp for the next audio sample. It means there is a gap/overlap,
// if it's big enough we flash and start anew, otherwise we ignore it.
auto capture_ts = ComputeTimestamp(audio_bus.frames(), capture_time);
auto end_of_existing_buffer_ts =
next_timestamp_ +
AudioTimestampHelper::FramesToTime(fifo_.queued_frames(),
audio_input_params().sample_rate());
base::TimeDelta gap = (capture_ts - end_of_existing_buffer_ts).magnitude();
constexpr base::TimeDelta max_gap = base::TimeDelta::FromMilliseconds(1);
if (gap > max_gap) {
DLOG(ERROR) << "Large gap in sound. Forced flush. "
<< "Gap/overlap duration: " << gap;
FlushImpl();
next_timestamp_ = capture_ts;
}

// The |fifo_| won't trigger OnFifoOutput() until we have enough frames
// suitable for the converter.
fifo_.Push(audio_bus);
Expand Down Expand Up @@ -251,11 +269,14 @@ void AudioOpusEncoder::OnFifoOutput(const AudioBus& output_bus,
converted_params_.frames_per_buffer(), &encoded_data,
&encoded_data_size)) {
DCHECK_GT(encoded_data_size, 1u);
encode_callback().Run(EncodedAudioBuffer(
converted_params_, std::move(encoded_data), encoded_data_size,
ComputeTimestamp(
number_of_flushed_frames_.value_or(output_bus.frames()),
last_capture_time())));
auto reported_duration = AudioTimestampHelper::FramesToTime(
number_of_flushed_frames_.value_or(output_bus.frames()),
audio_input_params().sample_rate());

encode_callback().Run(
EncodedAudioBuffer(converted_params_, std::move(encoded_data),
encoded_data_size, next_timestamp_));
next_timestamp_ += reported_duration;
}
}

Expand Down
3 changes: 3 additions & 0 deletions media/audio/audio_opus_encoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ class MEDIA_EXPORT AudioOpusEncoder : public AudioEncoder {
// this will be the number of flushed frames, which is used to compute the
// timestamp provided in the output |EncodedAudioBuffer|.
base::Optional<int> number_of_flushed_frames_;

// Timestamp that should be reported by the next call of encode_callback()
base::TimeTicks next_timestamp_;
};

} // namespace media
Expand Down
2 changes: 1 addition & 1 deletion media/base/audio_encoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ void AudioEncoder::EncodeAudio(const AudioBus& audio_bus,
!last_capture_time_.is_null() &&
((capture_time - last_capture_time_).InSecondsF() >
1.5f * audio_bus.frames() / audio_input_params().sample_rate()))
<< "Possibly frames were skipped, which may result in inaccuarate "
<< "Possibly frames were skipped, which may result in inaccurate "
"timestamp calculation.";

last_capture_time_ = capture_time;
Expand Down
16 changes: 11 additions & 5 deletions third_party/blink/renderer/modules/webcodecs/audio_encoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include "media/audio/audio_opus_encoder.h"
#include "media/base/audio_parameters.h"
#include "media/base/audio_timestamp_helper.h"
#include "third_party/blink/public/mojom/web_feature/web_feature.mojom-blink.h"
#include "third_party/blink/renderer/bindings/modules/v8/v8_audio_encoder_config.h"
#include "third_party/blink/renderer/bindings/modules/v8/v8_audio_frame_init.h"
Expand Down Expand Up @@ -81,11 +82,16 @@ void AudioEncoder::ProcessEncode(Request* request) {
DCHECK_GT(requested_encodes_, 0);

auto* frame = request->frame.Release();

base::TimeTicks time =
base::TimeTicks() + base::TimeDelta::FromMicroseconds(frame->timestamp());

auto* buffer = frame->buffer();

// Converting time at the beginning of the frame (aka timestamp) into
// time at the end of the frame (aka capture time) that is expected by
// media::AudioEncoder.
base::TimeTicks capture_time =
base::TimeTicks() +
base::TimeDelta::FromMicroseconds(frame->timestamp()) +
media::AudioTimestampHelper::FramesToTime(buffer->length(),
active_config_->sample_rate);
DCHECK(buffer);
{
auto audio_bus = media::AudioBus::CreateWrapper(buffer->numberOfChannels());
Expand All @@ -95,7 +101,7 @@ void AudioEncoder::ProcessEncode(Request* request) {
audio_bus->SetChannelData(channel, data);
}
audio_bus->set_frames(buffer->length());
media_encoder_->EncodeAudio(*audio_bus, time);
media_encoder_->EncodeAudio(*audio_bus, capture_time);
}

frame->close();
Expand Down
Loading

0 comments on commit 4c64dac

Please sign in to comment.