-
Notifications
You must be signed in to change notification settings - Fork 376
Open
Description
I need to close one question.
There is such a thing as clipping.
And if I understood correctly, then clipping is the distortion of sound due to too high a volume level.
So, how do I avoid this clipping?
By reducing/limiting the volume level?
Or how do I adjust the volume so that clipping doesn't occur at all (even with 10 players communicating at the same time)?
The code that processes the voice:
void CAudio::ProcessVoiceOutput(opus_int16* out, unsigned long framesCount) {
memset(out, 0, framesCount * sizeof(opus_int16));
if (!codec || !codec->decoder) return;
std::lock_guard<std::mutex> lock(jitter_mutex);
if (jitter_buffers.empty()) return;
const float BASE_GAIN = 1.0f;
const int MAX_SIMULTANEOUS_VOICES = 4;
const float TARGET_PEAK = 0.8f;
std::vector<float> mixBuffer(framesCount, 0.0f);
uint32_t currentTime = static_cast<uint32_t>(
std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::steady_clock::now().time_since_epoch()).count());
struct SimpleVoice {
std::vector<float> samples;
float gain = 0.0f;
};
std::vector<SimpleVoice> voices;
for (auto it = jitter_buffers.begin(); it != jitter_buffers.end(); ) {
auto& buffer = it->second;
if (buffer.empty()) {
++it;
continue;
}
AudioFrame& frame = buffer.front();
uint32_t packetAge = currentTime - frame.receivedTime;
if (packetAge > 1000) {
buffer.pop_front();
++it;
continue;
}
float volume = frame.volume;
float volumeNorm = volume;
volumeNorm = sqrtf(max(0.0f, volumeNorm));
volumeNorm = std::clamp(volumeNorm, 0.0f, 0.9f);
if (volumeNorm < 0.05f) {
buffer.pop_front();
++it;
continue;
}
float totalGain = BASE_GAIN * volumeNorm;
if (volumeNorm < 0.3f) {
totalGain *= 1.5f;
}
totalGain = min(totalGain, 3.0f);
SimpleVoice voice;
voice.gain = totalGain;
voice.samples.resize(framesCount, 0.0f);
size_t copySize = min(frame.pcm.size(), framesCount);
for (size_t i = 0; i < copySize; i++) {
voice.samples[i] = static_cast<float>(frame.pcm[i]) / 32768.0f;
}
voices.push_back(std::move(voice));
buffer.pop_front();
++it;
}
if (voices.empty()) return;
if (voices.size() > MAX_SIMULTANEOUS_VOICES) {
voices.resize(MAX_SIMULTANEOUS_VOICES);
}
for (const auto& voice : voices) {
for (size_t i = 0; i < framesCount; i++) {
float sample = voice.samples[i] * voice.gain;
sample = compressor.process(sample);
if (sample > 0.8f) sample = 0.8f;
if (sample < -0.8f) sample = -0.8f;
mixBuffer[i] += sample;
}
}
float peak = 0.0f;
for (size_t i = 0; i < framesCount; i++) {
peak = max(peak, fabsf(mixBuffer[i]));
}
float autoGain = 1.0f;
if (peak > 0.001f && peak < 0.5f) {
autoGain = 0.7f / peak;
autoGain = min(autoGain, 4.0f);
}
int clippedSamples = 0;
for (size_t i = 0; i < framesCount; i++) {
float sample = mixBuffer[i] * autoGain;
if (sample > 0.9f) {
sample = 0.9f;
clippedSamples++;
}
if (sample < -0.9f) {
sample = -0.9f;
clippedSamples++;
}
out[i] = static_cast<opus_int16>(sample * 32767.0f);
}
static int logCounter = 0;
if (logCounter++ % 50 == 0) {
printf("[Audio] Voices: %zu, Gain: x%.1f\n", voices.size(), autoGain);
}
for (auto it = jitter_buffers.begin(); it != jitter_buffers.end(); ) {
if (it->second.empty()) {
it = jitter_buffers.erase(it);
}
else {
++it;
}
}
}
Metadata
Metadata
Assignees
Labels
No labels