From 609eb6d880e808e7e1e5b8f0cb510a3a0bcd2065 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Fri, 26 Apr 2024 21:53:17 +0000 Subject: [PATCH] DSP HLE: Get ADPCM audio decoding working (#499) * Start decoding ADPCM * Fix accidentally skipping ADPCM samples * DSP HLE: ADPCM weights are signed * Format * Format * Fix broken amend --- include/audio/hle_core.hpp | 45 +++++++++++-- src/core/audio/hle_core.cpp | 127 +++++++++++++++++++++++++++++++++--- 2 files changed, 156 insertions(+), 16 deletions(-) diff --git a/include/audio/hle_core.hpp b/include/audio/hle_core.hpp index 6d7b3ad1..b533727b 100644 --- a/include/audio/hle_core.hpp +++ b/include/audio/hle_core.hpp @@ -1,6 +1,9 @@ #pragma once #include +#include +#include #include +#include #include #include "audio/dsp_core.hpp" @@ -18,7 +21,7 @@ namespace Audio { u32 paddr; // Physical address of the buffer u32 sampleCount; // Total number of samples u8 adpcmScale; // ADPCM predictor/scale - u8 pad1; // Unknown + u8 pad1; // Unknown std::array previousSamples; // ADPCM y[n-1] and y[n-2] bool adpcmDirty; @@ -39,17 +42,40 @@ namespace Audio { return this->bufferID > other.bufferID; } }; + // Buffer of decoded PCM16 samples. TODO: Are there better alternatives to use over deque? + using SampleBuffer = std::deque>; using BufferQueue = std::priority_queue; + BufferQueue buffers; std::array gain0, gain1, gain2; u16 syncCount; - bool enabled; + bool enabled; // Is the source enabled? - BufferQueue buffers; + // ADPCM decoding info: + // An array of fixed point S5.11 coefficients. These provide "weights" for the history samples + // The system describing how an ADPCM output sample is generated is + // y[n] = x[n] + 0.5 + coeff1 * y[n-1] + coeff2 * y[n-2] + // Where y[n] is the output sample we're generating, x[n] is the ADPCM "differential" of the current sample + // And coeff1/coeff2 are the coefficients from this array that are used for weighing the history samples + std::array adpcmCoefficients; + s16 history1; // y[n-1], the previous output sample + s16 history2; // y[n-2], the previous previous output sample + + SampleBuffer currentSamples; int index = 0; // Index of the voice in [0, 23] for debugging void reset(); + // Pop a buffer from the buffer queue and return it + Buffer popBuffer() { + assert(!buffers.empty()); + + Buffer ret = buffers.top(); + buffers.pop(); + + return ret; + } + DSPSource() { reset(); } }; @@ -61,7 +87,7 @@ namespace Audio { template using Frame = std::array, 160>; - + template using MonoFrame = Frame; @@ -72,6 +98,8 @@ namespace Audio { using QuadFrame = Frame; using Source = Audio::DSPSource; + using SampleBuffer = Source::SampleBuffer; + private: enum class DSPState : u32 { Off, @@ -91,7 +119,7 @@ namespace Audio { SourceType sourceType = SourceType::Stereo; void resetAudioPipe(); - bool loaded = false; // Have we loaded a component? + bool loaded = false; // Have we loaded a component? // Get the index for the current region we'll be reading. Returns the region with the highest frame counter // Accounting for whether one of the frame counters has wrapped around @@ -130,10 +158,13 @@ namespace Audio { } } - void updateSourceConfig(Source& source, HLE::SourceConfiguration::Configuration& config); + void updateSourceConfig(Source& source, HLE::SourceConfiguration::Configuration& config, s16_le* adpcmCoefficients); void generateFrame(StereoFrame& frame); void outputFrame(); - void dumpBuffer(const Source::Buffer& buffer); + // Decode an entire buffer worth of audio + void decodeBuffer(DSPSource& source); + SampleBuffer decodeADPCM(const u8* data, usize sampleCount, Source& source); + public: HLE_DSP(Memory& mem, Scheduler& scheduler, DSPService& dspService); ~HLE_DSP() override {} diff --git a/src/core/audio/hle_core.cpp b/src/core/audio/hle_core.cpp index 245894ce..4ee5a1dc 100644 --- a/src/core/audio/hle_core.cpp +++ b/src/core/audio/hle_core.cpp @@ -1,5 +1,7 @@ #include "audio/hle_core.hpp" +#include +#include #include #include @@ -105,7 +107,7 @@ namespace Audio { outputFrame(); scheduler.addEvent(Scheduler::EventType::RunDSP, scheduler.currentTimestamp + Audio::cyclesPerFrame); } - + u16 HLE_DSP::recvData(u32 regId) { if (regId != 0) { Helpers::panic("Audio: invalid register in HLE frontend"); @@ -139,14 +141,11 @@ namespace Audio { // TODO: Other initialization stuff here dspState = DSPState::On; resetAudioPipe(); - + dspService.triggerPipeEvent(DSPPipeType::Audio); break; - case StateChange::Shutdown: - dspState = DSPState::Off; - break; - + case StateChange::Shutdown: dspState = DSPState::Off; break; default: Helpers::panic("Unimplemented DSP audio pipe state change %d", state); } } @@ -210,7 +209,7 @@ namespace Audio { // Update source configuration from the read region of shared memory auto& config = read.sourceConfigurations.config[i]; auto& source = sources[i]; - updateSourceConfig(source, config); + updateSourceConfig(source, config, read.adpcmCoefficients.coeff[i]); // Generate audio if (source.enabled && !source.buffers.empty()) { @@ -229,7 +228,7 @@ namespace Audio { } } - void HLE_DSP::updateSourceConfig(Source& source, HLE::SourceConfiguration::Configuration& config) { + void HLE_DSP::updateSourceConfig(Source& source, HLE::SourceConfiguration::Configuration& config, s16_le* adpcmCoefficients) { // Check if the any dirty bit is set, otherwise exit early if (!config.dirtyRaw) { return; @@ -245,6 +244,15 @@ namespace Audio { source.syncCount = config.syncCount; } + if (config.adpcmCoefficientsDirty) { + config.adpcmCoefficientsDirty = 0; + // Convert the ADPCM coefficients in DSP shared memory from s16_le to s16 and cache them in source.adpcmCoefficients + std::transform( + adpcmCoefficients, adpcmCoefficients + source.adpcmCoefficients.size(), source.adpcmCoefficients.begin(), + [](const s16_le& input) -> s16 { return s16(input); } + ); + } + if (config.resetFlag) { config.resetFlag = 0; source.reset(); @@ -254,7 +262,7 @@ namespace Audio { config.partialResetFlag = 0; source.buffers = {}; } - + // TODO: Should we check bufferQueueDirty here too? if (config.formatDirty || config.embeddedBufferDirty) { sampleFormat = config.format; @@ -302,6 +310,107 @@ namespace Audio { config.dirtyRaw = 0; } + void HLE_DSP::decodeBuffer(DSPSource& source) { + if (source.buffers.empty()) { + // No queued buffers, there's nothing to decode so return + return; + } + + DSPSource::Buffer buffer = source.popBuffer(); + if (buffer.adpcmDirty) { + source.history1 = buffer.previousSamples[0]; + source.history2 = buffer.previousSamples[1]; + } + + const u8* data = getPointerPhys(buffer.paddr); + if (data == nullptr) { + return; + } + + switch (buffer.format) { + case SampleFormat::PCM8: + case SampleFormat::PCM16: Helpers::warn("Unimplemented sample format!"); break; + + case SampleFormat::ADPCM: source.currentSamples = decodeADPCM(data, buffer.sampleCount, source); break; + default: Helpers::warn("Invalid DSP sample format"); break; + } + } + + HLE_DSP::SampleBuffer HLE_DSP::decodeADPCM(const u8* data, usize sampleCount, Source& source) { + static constexpr uint samplesPerBlock = 14; + // An ADPCM block is comprised of a single header which contains the scale and predictor value for the block, and then 14 4bpp samples (hence + // the / 2) + static constexpr usize blockSize = sizeof(u8) + samplesPerBlock / 2; + + // How many ADPCM blocks we'll be consuming. It's sampleCount / samplesPerBlock, rounded up. + const usize blockCount = (sampleCount + (samplesPerBlock - 1)) / samplesPerBlock; + const usize outputSize = sampleCount + (sampleCount & 1); // Bump the output size to a multiple of 2 + + usize outputCount = 0; // How many stereo samples have we output thus far? + SampleBuffer decodedSamples(outputSize); + + s16 history1 = source.history1; + s16 history2 = source.history2; + + // Decode samples in frames. Stop when we reach sampleCount samples + for (uint blockIndex = 0; blockIndex < blockCount; blockIndex++) { + const u8 scaleAndPredictor = *data++; + + const u32 scale = 1 << u32(scaleAndPredictor & 0xF); + // This is referred to as 4-bit in some documentation, but I am pretty sure that's a mistake + const u32 predictor = (scaleAndPredictor >> 4) & 0x7; + + // Fixed point (s5.11) coefficients for the history samples + const s32 weight1 = source.adpcmCoefficients[predictor * 2]; + const s32 weight2 = source.adpcmCoefficients[predictor * 2 + 1]; + + // Decode samples in batches of 2 + // Each 4 bit ADPCM differential corresponds to 1 mono sample which will be output from both the left and right channel + // So each byte of ADPCM data ends up generating 2 stereo samples + for (uint sampleIndex = 0; sampleIndex < samplesPerBlock && outputCount < sampleCount; sampleIndex += 2) { + const auto decode = [&](s32 nibble) -> s16 { + static constexpr s32 ONE = 0x800; // 1.0 in S5.11 fixed point + static constexpr s32 HALF = ONE / 2; // 0.5 similarly + + // Sign extend our nibble from s4 to s32 + nibble = (nibble << 28) >> 28; + + // Scale the extended nibble by the scale specified in the ADPCM block header, to get the real value of the sample's differential + const s32 diff = nibble * scale; + + // Convert ADPCM to PCM using y[n] = x[n] + 0.5 + coeff1 * y[n - 1] + coeff2 * y[n - 2] + // The coefficients are in s5.11 fixed point so we also perform the proper conversions + s32 output = ((diff << 11) + HALF + weight1 * history1 + weight2 * history2) >> 11; + output = std::clamp(output, -32768, 32767); + + // Write back new history samples + history2 = history1; // y[n-2] = y[n-1] + history1 = output; // y[n-1] = y[n] + + return s16(output); + }; + + const u8 samples = *data++; // Fetch the byte containing 2 4-bpp samples + const s32 topNibble = s32(samples) >> 4; // First sample + const s32 bottomNibble = s32(samples) & 0xF; // Second sample + + // Decode and write first sample, then the second one + const s16 sample1 = decode(topNibble); + decodedSamples[outputCount].fill(sample1); + + const s16 sample2 = decode(bottomNibble); + decodedSamples[outputCount + 1].fill(sample2); + + outputCount += 2; + } + } + + // Store new history samples in the DSP source and return samples + source.history1 = history1; + source.history2 = history2; + return decodedSamples; + } + void DSPSource::reset() { enabled = false; syncCount = 0;