how to achieve clean audio when doing VoIP 2

I have problems with the quality of audio playback from other players.

I'm writing a voice chat for GTA SAMP.

I've minimized clipping, but not really, how do I get rid of it forever?

Current code:

```
// This class is designed to work with audio. It mainly initializes PortAudio and works with it.

#include "CAudio.h"
#include "pch.h"
#include "CMultiplayer.h"
#include <algorithm>
#include <cmath>

void CAudio::Stop()
{
    if (stream) {
        Pa_StopStream(stream);
        Pa_CloseStream(stream);
        stream = nullptr;
    }
}

void CAudio::setVolume(float volume)
{
    float gain_db = 20.0f * log10f(volume);
    opus_int32 gain_q8 = (opus_int32)(gain_db * 256.0f);
    opus_decoder_ctl(this->codec->decoder, OPUS_SET_GAIN(gain_q8));
}

void CAudio::ProcessVoiceInput(const opus_int16* in, unsigned long framesCount)
{
    PaError err = Pa_IsStreamActive(audio->stream);
    if (err == 1) {
        if (!is_silent(in, framesCount)) {
            const uint32_t current_time = static_cast<uint32_t>(
                std::chrono::duration_cast<std::chrono::milliseconds>(
                    std::chrono::steady_clock::now().time_since_epoch()).count()
            );

            std::vector<unsigned char> encoded_buffer(MAX_VOICE_PACKET_SIZE);
            const int encoded_len = opus_encode(audio->codec->encoder,
                in,
                framesCount,
                encoded_buffer.data(),
                encoded_buffer.size());

            if (encoded_len > 0) {
                std::vector<unsigned char> packet;
                packet.reserve(encoded_len + 6);

                const uint16_t playerId = samp->GetLocalPlayerID();
                const uint32_t timestamp = current_time;

                packet.insert(packet.end(),
                    reinterpret_cast<const unsigned char*>(&playerId),
                    reinterpret_cast<const unsigned char*>(&playerId) + sizeof(playerId)
                );

                packet.insert(packet.end(),
                    reinterpret_cast<const unsigned char*>(&timestamp),
                    reinterpret_cast<const unsigned char*>(&timestamp) + sizeof(timestamp)
                );

                packet.insert(packet.end(),
                    encoded_buffer.begin(),
                    encoded_buffer.begin() + encoded_len);

                std::lock_guard<std::mutex> lock(audio->queueMutex);
                audio->audioQueue.push(std::move(packet));
            }
        }
    }
}

void CAudio::ProcessVoiceOutput(opus_int16* out, unsigned long framesCount)
{
    memset(out, 0, framesCount * sizeof(opus_int16));
    std::lock_guard<std::mutex> lock(audio->jitter_mutex);

    std::vector<float> mixBuffer(framesCount, 0.0f);
    int mixedPlayers = 0;

    const uint32_t current_time = static_cast<uint32_t>(
        std::chrono::duration_cast<std::chrono::milliseconds>(
            std::chrono::steady_clock::now().time_since_epoch()).count()
    );

    unsigned long samplesNeeded = framesCount;

    for (auto& [playerId, buffer] : audio->jitter_buffers) {
        if (buffer.empty()) continue;

        unsigned long samplesMixed = 0;

        while (samplesMixed < samplesNeeded && !buffer.empty()) {
            AudioFrame& frame = buffer.front();

            uint32_t packetAge = current_time - frame.receivedTime;
            uint32_t allowedLatency = 2000 + frame.playerPing;

            if (packetAge > allowedLatency) {
                printf("[Voice] Player %d: drop packet | Ping: %i, Packet delay: %i, AllowedLatency: %i \n", frame.playerId, frame.playerPing, packetAge, allowedLatency);
                buffer.pop_front();
                continue;
            }

            size_t frameSamples = frame.pcm.size();
            size_t samplesToTake = min(frameSamples, samplesNeeded - samplesMixed);

            float rawGain = frame.volume / 30.0f;
            float gain = 0.5f + rawGain * 4.5f;

            if (gain > 3.5f) gain = 3.5f;
            if (gain < 0.5f) gain = 0.5f;

            for (size_t i = 0; i < samplesToTake; ++i) {
                float sample = static_cast<float>(frame.pcm[i]) * gain;
                mixBuffer[samplesMixed + i] += sample;
            }

            samplesMixed += samplesToTake;

            if (samplesToTake == frameSamples) {
                buffer.pop_front();
            }
            else {
                std::vector<opus_int16> remaining(frame.pcm.begin() + samplesToTake, frame.pcm.end());
                frame.pcm = std::move(remaining);
            }

            mixedPlayers++;
        }
    }

    for (unsigned long i = 0; i < framesCount; ++i) {
        float sample = mixBuffer[i];

        if (mixedPlayers > 1) {
            if (mixedPlayers >= 3) {
                sample /= 1.5f;
            }
        }

        const float limit = 31000.0f;
        if (sample > limit) {
            float overshoot = sample - limit;
            sample = limit + overshoot / (1.0f + overshoot / 800.0f);
        }
        else if (sample < -limit) {
            float overshoot = sample + limit;
            sample = -limit + overshoot / (1.0f - overshoot / 800.0f);
        }

        if (sample > 32700.0f) sample = 32700.0f;
        if (sample < -32700.0f) sample = -32700.0f;

        out[i] = static_cast<opus_int16>(sample);
    }

    for (auto it = audio->jitter_buffers.begin(); it != audio->jitter_buffers.end();) {
        if (it->second.empty()) {
            it = audio->jitter_buffers.erase(it);
        }
        else {
            ++it;
        }
    }
}

int CAudio::ProcessAudioStream(const void* input, void* output, unsigned long frameCount,
    const PaStreamCallbackTimeInfo*, PaStreamCallbackFlags, void* userData)
{
    CAudio* audio = static_cast<CAudio*>(userData);
    const opus_int16* in = static_cast<const opus_int16*>(input);
    opus_int16* out = static_cast<opus_int16*>(output);

    if (output) {
        audio->ProcessVoiceOutput(out, frameCount);
    }

    if (input && AVSSync::microphone) {
        audio->ProcessVoiceInput(in, frameCount);
    }

    return paContinue;
}

int CAudio::is_silent(const opus_int16* buffer, int frame_size) {
    int sum = 0;
    for (int i = 0; i < frame_size; i++) {
        sum += abs(buffer[i]);
    }
    float avg = sum / (float)frame_size;
    return avg < 150.0f;
}

void CAudio::SendAudioPackets() {
    if (!AVSSync::getRakClientIntf() || !AVSSync::getRakClientIntf()->IsConnected()) return;

    std::lock_guard<std::mutex> lock(queueMutex);

    while (!audioQueue.empty()) {
        auto& packet = audioQueue.front();
        if (packet.size() >= 6) {
            AVSSync::SendVoicePacket(packet);
        }
        audioQueue.pop();
    }
}
```

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

how to achieve clean audio when doing VoIP 2 #1110

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

how to achieve clean audio when doing VoIP 2 #1110

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions