DSP HLE: Get ADPCM audio decoding working (#499)

* Start decoding ADPCM

* Fix accidentally skipping ADPCM samples

* DSP HLE: ADPCM weights are signed

* Format

* Format

* Fix broken amend
This commit is contained in:
wheremyfoodat 2024-04-26 21:53:17 +00:00 committed by GitHub
parent 2fc66fd3ba
commit 609eb6d880
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 156 additions and 16 deletions

View file

@ -1,6 +1,9 @@
#pragma once
#include <array>
#include <cassert>
#include <deque>
#include <queue>
#include <span>
#include <vector>
#include "audio/dsp_core.hpp"
@ -18,7 +21,7 @@ namespace Audio {
u32 paddr; // Physical address of the buffer
u32 sampleCount; // Total number of samples
u8 adpcmScale; // ADPCM predictor/scale
u8 pad1; // Unknown
u8 pad1; // Unknown
std::array<s16, 2> previousSamples; // ADPCM y[n-1] and y[n-2]
bool adpcmDirty;
@ -39,17 +42,40 @@ namespace Audio {
return this->bufferID > other.bufferID;
}
};
// Buffer of decoded PCM16 samples. TODO: Are there better alternatives to use over deque?
using SampleBuffer = std::deque<std::array<s16, 2>>;
using BufferQueue = std::priority_queue<Buffer>;
BufferQueue buffers;
std::array<float, 3> gain0, gain1, gain2;
u16 syncCount;
bool enabled;
bool enabled; // Is the source enabled?
BufferQueue buffers;
// ADPCM decoding info:
// An array of fixed point S5.11 coefficients. These provide "weights" for the history samples
// The system describing how an ADPCM output sample is generated is
// y[n] = x[n] + 0.5 + coeff1 * y[n-1] + coeff2 * y[n-2]
// Where y[n] is the output sample we're generating, x[n] is the ADPCM "differential" of the current sample
// And coeff1/coeff2 are the coefficients from this array that are used for weighing the history samples
std::array<s16, 16> adpcmCoefficients;
s16 history1; // y[n-1], the previous output sample
s16 history2; // y[n-2], the previous previous output sample
SampleBuffer currentSamples;
int index = 0; // Index of the voice in [0, 23] for debugging
void reset();
// Pop a buffer from the buffer queue and return it
Buffer popBuffer() {
assert(!buffers.empty());
Buffer ret = buffers.top();
buffers.pop();
return ret;
}
DSPSource() { reset(); }
};
@ -61,7 +87,7 @@ namespace Audio {
template <typename T, usize channelCount>
using Frame = std::array<Sample<T, channelCount>, 160>;
template <typename T>
using MonoFrame = Frame<T, 1>;
@ -72,6 +98,8 @@ namespace Audio {
using QuadFrame = Frame<T, 4>;
using Source = Audio::DSPSource;
using SampleBuffer = Source::SampleBuffer;
private:
enum class DSPState : u32 {
Off,
@ -91,7 +119,7 @@ namespace Audio {
SourceType sourceType = SourceType::Stereo;
void resetAudioPipe();
bool loaded = false; // Have we loaded a component?
bool loaded = false; // Have we loaded a component?
// Get the index for the current region we'll be reading. Returns the region with the highest frame counter
// Accounting for whether one of the frame counters has wrapped around
@ -130,10 +158,13 @@ namespace Audio {
}
}
void updateSourceConfig(Source& source, HLE::SourceConfiguration::Configuration& config);
void updateSourceConfig(Source& source, HLE::SourceConfiguration::Configuration& config, s16_le* adpcmCoefficients);
void generateFrame(StereoFrame<s16>& frame);
void outputFrame();
void dumpBuffer(const Source::Buffer& buffer);
// Decode an entire buffer worth of audio
void decodeBuffer(DSPSource& source);
SampleBuffer decodeADPCM(const u8* data, usize sampleCount, Source& source);
public:
HLE_DSP(Memory& mem, Scheduler& scheduler, DSPService& dspService);
~HLE_DSP() override {}

View file

@ -1,5 +1,7 @@
#include "audio/hle_core.hpp"
#include <algorithm>
#include <cassert>
#include <thread>
#include <utility>
@ -105,7 +107,7 @@ namespace Audio {
outputFrame();
scheduler.addEvent(Scheduler::EventType::RunDSP, scheduler.currentTimestamp + Audio::cyclesPerFrame);
}
u16 HLE_DSP::recvData(u32 regId) {
if (regId != 0) {
Helpers::panic("Audio: invalid register in HLE frontend");
@ -139,14 +141,11 @@ namespace Audio {
// TODO: Other initialization stuff here
dspState = DSPState::On;
resetAudioPipe();
dspService.triggerPipeEvent(DSPPipeType::Audio);
break;
case StateChange::Shutdown:
dspState = DSPState::Off;
break;
case StateChange::Shutdown: dspState = DSPState::Off; break;
default: Helpers::panic("Unimplemented DSP audio pipe state change %d", state);
}
}
@ -210,7 +209,7 @@ namespace Audio {
// Update source configuration from the read region of shared memory
auto& config = read.sourceConfigurations.config[i];
auto& source = sources[i];
updateSourceConfig(source, config);
updateSourceConfig(source, config, read.adpcmCoefficients.coeff[i]);
// Generate audio
if (source.enabled && !source.buffers.empty()) {
@ -229,7 +228,7 @@ namespace Audio {
}
}
void HLE_DSP::updateSourceConfig(Source& source, HLE::SourceConfiguration::Configuration& config) {
void HLE_DSP::updateSourceConfig(Source& source, HLE::SourceConfiguration::Configuration& config, s16_le* adpcmCoefficients) {
// Check if the any dirty bit is set, otherwise exit early
if (!config.dirtyRaw) {
return;
@ -245,6 +244,15 @@ namespace Audio {
source.syncCount = config.syncCount;
}
if (config.adpcmCoefficientsDirty) {
config.adpcmCoefficientsDirty = 0;
// Convert the ADPCM coefficients in DSP shared memory from s16_le to s16 and cache them in source.adpcmCoefficients
std::transform(
adpcmCoefficients, adpcmCoefficients + source.adpcmCoefficients.size(), source.adpcmCoefficients.begin(),
[](const s16_le& input) -> s16 { return s16(input); }
);
}
if (config.resetFlag) {
config.resetFlag = 0;
source.reset();
@ -254,7 +262,7 @@ namespace Audio {
config.partialResetFlag = 0;
source.buffers = {};
}
// TODO: Should we check bufferQueueDirty here too?
if (config.formatDirty || config.embeddedBufferDirty) {
sampleFormat = config.format;
@ -302,6 +310,107 @@ namespace Audio {
config.dirtyRaw = 0;
}
void HLE_DSP::decodeBuffer(DSPSource& source) {
if (source.buffers.empty()) {
// No queued buffers, there's nothing to decode so return
return;
}
DSPSource::Buffer buffer = source.popBuffer();
if (buffer.adpcmDirty) {
source.history1 = buffer.previousSamples[0];
source.history2 = buffer.previousSamples[1];
}
const u8* data = getPointerPhys<u8>(buffer.paddr);
if (data == nullptr) {
return;
}
switch (buffer.format) {
case SampleFormat::PCM8:
case SampleFormat::PCM16: Helpers::warn("Unimplemented sample format!"); break;
case SampleFormat::ADPCM: source.currentSamples = decodeADPCM(data, buffer.sampleCount, source); break;
default: Helpers::warn("Invalid DSP sample format"); break;
}
}
HLE_DSP::SampleBuffer HLE_DSP::decodeADPCM(const u8* data, usize sampleCount, Source& source) {
static constexpr uint samplesPerBlock = 14;
// An ADPCM block is comprised of a single header which contains the scale and predictor value for the block, and then 14 4bpp samples (hence
// the / 2)
static constexpr usize blockSize = sizeof(u8) + samplesPerBlock / 2;
// How many ADPCM blocks we'll be consuming. It's sampleCount / samplesPerBlock, rounded up.
const usize blockCount = (sampleCount + (samplesPerBlock - 1)) / samplesPerBlock;
const usize outputSize = sampleCount + (sampleCount & 1); // Bump the output size to a multiple of 2
usize outputCount = 0; // How many stereo samples have we output thus far?
SampleBuffer decodedSamples(outputSize);
s16 history1 = source.history1;
s16 history2 = source.history2;
// Decode samples in frames. Stop when we reach sampleCount samples
for (uint blockIndex = 0; blockIndex < blockCount; blockIndex++) {
const u8 scaleAndPredictor = *data++;
const u32 scale = 1 << u32(scaleAndPredictor & 0xF);
// This is referred to as 4-bit in some documentation, but I am pretty sure that's a mistake
const u32 predictor = (scaleAndPredictor >> 4) & 0x7;
// Fixed point (s5.11) coefficients for the history samples
const s32 weight1 = source.adpcmCoefficients[predictor * 2];
const s32 weight2 = source.adpcmCoefficients[predictor * 2 + 1];
// Decode samples in batches of 2
// Each 4 bit ADPCM differential corresponds to 1 mono sample which will be output from both the left and right channel
// So each byte of ADPCM data ends up generating 2 stereo samples
for (uint sampleIndex = 0; sampleIndex < samplesPerBlock && outputCount < sampleCount; sampleIndex += 2) {
const auto decode = [&](s32 nibble) -> s16 {
static constexpr s32 ONE = 0x800; // 1.0 in S5.11 fixed point
static constexpr s32 HALF = ONE / 2; // 0.5 similarly
// Sign extend our nibble from s4 to s32
nibble = (nibble << 28) >> 28;
// Scale the extended nibble by the scale specified in the ADPCM block header, to get the real value of the sample's differential
const s32 diff = nibble * scale;
// Convert ADPCM to PCM using y[n] = x[n] + 0.5 + coeff1 * y[n - 1] + coeff2 * y[n - 2]
// The coefficients are in s5.11 fixed point so we also perform the proper conversions
s32 output = ((diff << 11) + HALF + weight1 * history1 + weight2 * history2) >> 11;
output = std::clamp<s32>(output, -32768, 32767);
// Write back new history samples
history2 = history1; // y[n-2] = y[n-1]
history1 = output; // y[n-1] = y[n]
return s16(output);
};
const u8 samples = *data++; // Fetch the byte containing 2 4-bpp samples
const s32 topNibble = s32(samples) >> 4; // First sample
const s32 bottomNibble = s32(samples) & 0xF; // Second sample
// Decode and write first sample, then the second one
const s16 sample1 = decode(topNibble);
decodedSamples[outputCount].fill(sample1);
const s16 sample2 = decode(bottomNibble);
decodedSamples[outputCount + 1].fill(sample2);
outputCount += 2;
}
}
// Store new history samples in the DSP source and return samples
source.history1 = history1;
source.history2 = history2;
return decodedSamples;
}
void DSPSource::reset() {
enabled = false;
syncCount = 0;