Skip to content

how to achieve clean audio when doing VoIP 2 #1110

@DigitalOneThe

Description

@DigitalOneThe

I have problems with the quality of audio playback from other players.

I'm writing a voice chat for GTA SAMP.

I've minimized clipping, but not really, how do I get rid of it forever?

Current code:

// This class is designed to work with audio. It mainly initializes PortAudio and works with it.

#include "CAudio.h"
#include "pch.h"
#include "CMultiplayer.h"
#include <algorithm>
#include <cmath>

void CAudio::Stop()
{
    if (stream) {
        Pa_StopStream(stream);
        Pa_CloseStream(stream);
        stream = nullptr;
    }
}

void CAudio::setVolume(float volume)
{
    float gain_db = 20.0f * log10f(volume);
    opus_int32 gain_q8 = (opus_int32)(gain_db * 256.0f);
    opus_decoder_ctl(this->codec->decoder, OPUS_SET_GAIN(gain_q8));
}

void CAudio::ProcessVoiceInput(const opus_int16* in, unsigned long framesCount)
{
    PaError err = Pa_IsStreamActive(audio->stream);
    if (err == 1) {
        if (!is_silent(in, framesCount)) {
            const uint32_t current_time = static_cast<uint32_t>(
                std::chrono::duration_cast<std::chrono::milliseconds>(
                    std::chrono::steady_clock::now().time_since_epoch()).count()
            );

            std::vector<unsigned char> encoded_buffer(MAX_VOICE_PACKET_SIZE);
            const int encoded_len = opus_encode(audio->codec->encoder,
                in,
                framesCount,
                encoded_buffer.data(),
                encoded_buffer.size());

            if (encoded_len > 0) {
                std::vector<unsigned char> packet;
                packet.reserve(encoded_len + 6);

                const uint16_t playerId = samp->GetLocalPlayerID();
                const uint32_t timestamp = current_time;

                packet.insert(packet.end(),
                    reinterpret_cast<const unsigned char*>(&playerId),
                    reinterpret_cast<const unsigned char*>(&playerId) + sizeof(playerId)
                );

                packet.insert(packet.end(),
                    reinterpret_cast<const unsigned char*>(&timestamp),
                    reinterpret_cast<const unsigned char*>(&timestamp) + sizeof(timestamp)
                );

                packet.insert(packet.end(),
                    encoded_buffer.begin(),
                    encoded_buffer.begin() + encoded_len);

                std::lock_guard<std::mutex> lock(audio->queueMutex);
                audio->audioQueue.push(std::move(packet));
            }
        }
    }
}

void CAudio::ProcessVoiceOutput(opus_int16* out, unsigned long framesCount)
{
    memset(out, 0, framesCount * sizeof(opus_int16));
    std::lock_guard<std::mutex> lock(audio->jitter_mutex);

    std::vector<float> mixBuffer(framesCount, 0.0f);
    int mixedPlayers = 0;

    const uint32_t current_time = static_cast<uint32_t>(
        std::chrono::duration_cast<std::chrono::milliseconds>(
            std::chrono::steady_clock::now().time_since_epoch()).count()
    );

    unsigned long samplesNeeded = framesCount;

    for (auto& [playerId, buffer] : audio->jitter_buffers) {
        if (buffer.empty()) continue;

        unsigned long samplesMixed = 0;

        while (samplesMixed < samplesNeeded && !buffer.empty()) {
            AudioFrame& frame = buffer.front();

            uint32_t packetAge = current_time - frame.receivedTime;
            uint32_t allowedLatency = 2000 + frame.playerPing;

            if (packetAge > allowedLatency) {
                printf("[Voice] Player %d: drop packet | Ping: %i, Packet delay: %i, AllowedLatency: %i \n", frame.playerId, frame.playerPing, packetAge, allowedLatency);
                buffer.pop_front();
                continue;
            }

            size_t frameSamples = frame.pcm.size();
            size_t samplesToTake = min(frameSamples, samplesNeeded - samplesMixed);

            float rawGain = frame.volume / 30.0f;
            float gain = 0.5f + rawGain * 4.5f;

            if (gain > 3.5f) gain = 3.5f;
            if (gain < 0.5f) gain = 0.5f;

            for (size_t i = 0; i < samplesToTake; ++i) {
                float sample = static_cast<float>(frame.pcm[i]) * gain;
                mixBuffer[samplesMixed + i] += sample;
            }

            samplesMixed += samplesToTake;

            if (samplesToTake == frameSamples) {
                buffer.pop_front();
            }
            else {
                std::vector<opus_int16> remaining(frame.pcm.begin() + samplesToTake, frame.pcm.end());
                frame.pcm = std::move(remaining);
            }

            mixedPlayers++;
        }
    }

    for (unsigned long i = 0; i < framesCount; ++i) {
        float sample = mixBuffer[i];

        if (mixedPlayers > 1) {
            if (mixedPlayers >= 3) {
                sample /= 1.5f;
            }
        }

        const float limit = 31000.0f;
        if (sample > limit) {
            float overshoot = sample - limit;
            sample = limit + overshoot / (1.0f + overshoot / 800.0f);
        }
        else if (sample < -limit) {
            float overshoot = sample + limit;
            sample = -limit + overshoot / (1.0f - overshoot / 800.0f);
        }

        if (sample > 32700.0f) sample = 32700.0f;
        if (sample < -32700.0f) sample = -32700.0f;

        out[i] = static_cast<opus_int16>(sample);
    }

    for (auto it = audio->jitter_buffers.begin(); it != audio->jitter_buffers.end();) {
        if (it->second.empty()) {
            it = audio->jitter_buffers.erase(it);
        }
        else {
            ++it;
        }
    }
}

int CAudio::ProcessAudioStream(const void* input, void* output, unsigned long frameCount,
    const PaStreamCallbackTimeInfo*, PaStreamCallbackFlags, void* userData)
{
    CAudio* audio = static_cast<CAudio*>(userData);
    const opus_int16* in = static_cast<const opus_int16*>(input);
    opus_int16* out = static_cast<opus_int16*>(output);

    if (output) {
        audio->ProcessVoiceOutput(out, frameCount);
    }

    if (input && AVSSync::microphone) {
        audio->ProcessVoiceInput(in, frameCount);
    }

    return paContinue;
}

int CAudio::is_silent(const opus_int16* buffer, int frame_size) {
    int sum = 0;
    for (int i = 0; i < frame_size; i++) {
        sum += abs(buffer[i]);
    }
    float avg = sum / (float)frame_size;
    return avg < 150.0f;
}

void CAudio::SendAudioPackets() {
    if (!AVSSync::getRakClientIntf() || !AVSSync::getRakClientIntf()->IsConnected()) return;

    std::lock_guard<std::mutex> lock(queueMutex);

    while (!audioQueue.empty()) {
        auto& packet = audioQueue.front();
        if (packet.size() >= 6) {
            AVSSync::SendVoicePacket(packet);
        }
        audioQueue.pop();
    }
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions