mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-06 14:15:41 +12:00
DSP HLE: Get ADPCM audio decoding working (#499)
* Start decoding ADPCM * Fix accidentally skipping ADPCM samples * DSP HLE: ADPCM weights are signed * Format * Format * Fix broken amend
This commit is contained in:
parent
2fc66fd3ba
commit
609eb6d880
2 changed files with 156 additions and 16 deletions
|
@ -1,6 +1,9 @@
|
|||
#pragma once
|
||||
#include <array>
|
||||
#include <cassert>
|
||||
#include <deque>
|
||||
#include <queue>
|
||||
#include <span>
|
||||
#include <vector>
|
||||
|
||||
#include "audio/dsp_core.hpp"
|
||||
|
@ -18,7 +21,7 @@ namespace Audio {
|
|||
u32 paddr; // Physical address of the buffer
|
||||
u32 sampleCount; // Total number of samples
|
||||
u8 adpcmScale; // ADPCM predictor/scale
|
||||
u8 pad1; // Unknown
|
||||
u8 pad1; // Unknown
|
||||
|
||||
std::array<s16, 2> previousSamples; // ADPCM y[n-1] and y[n-2]
|
||||
bool adpcmDirty;
|
||||
|
@ -39,17 +42,40 @@ namespace Audio {
|
|||
return this->bufferID > other.bufferID;
|
||||
}
|
||||
};
|
||||
// Buffer of decoded PCM16 samples. TODO: Are there better alternatives to use over deque?
|
||||
using SampleBuffer = std::deque<std::array<s16, 2>>;
|
||||
|
||||
using BufferQueue = std::priority_queue<Buffer>;
|
||||
BufferQueue buffers;
|
||||
|
||||
std::array<float, 3> gain0, gain1, gain2;
|
||||
u16 syncCount;
|
||||
bool enabled;
|
||||
bool enabled; // Is the source enabled?
|
||||
|
||||
BufferQueue buffers;
|
||||
// ADPCM decoding info:
|
||||
// An array of fixed point S5.11 coefficients. These provide "weights" for the history samples
|
||||
// The system describing how an ADPCM output sample is generated is
|
||||
// y[n] = x[n] + 0.5 + coeff1 * y[n-1] + coeff2 * y[n-2]
|
||||
// Where y[n] is the output sample we're generating, x[n] is the ADPCM "differential" of the current sample
|
||||
// And coeff1/coeff2 are the coefficients from this array that are used for weighing the history samples
|
||||
std::array<s16, 16> adpcmCoefficients;
|
||||
s16 history1; // y[n-1], the previous output sample
|
||||
s16 history2; // y[n-2], the previous previous output sample
|
||||
|
||||
SampleBuffer currentSamples;
|
||||
int index = 0; // Index of the voice in [0, 23] for debugging
|
||||
|
||||
void reset();
|
||||
// Pop a buffer from the buffer queue and return it
|
||||
Buffer popBuffer() {
|
||||
assert(!buffers.empty());
|
||||
|
||||
Buffer ret = buffers.top();
|
||||
buffers.pop();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
DSPSource() { reset(); }
|
||||
};
|
||||
|
||||
|
@ -61,7 +87,7 @@ namespace Audio {
|
|||
|
||||
template <typename T, usize channelCount>
|
||||
using Frame = std::array<Sample<T, channelCount>, 160>;
|
||||
|
||||
|
||||
template <typename T>
|
||||
using MonoFrame = Frame<T, 1>;
|
||||
|
||||
|
@ -72,6 +98,8 @@ namespace Audio {
|
|||
using QuadFrame = Frame<T, 4>;
|
||||
|
||||
using Source = Audio::DSPSource;
|
||||
using SampleBuffer = Source::SampleBuffer;
|
||||
|
||||
private:
|
||||
enum class DSPState : u32 {
|
||||
Off,
|
||||
|
@ -91,7 +119,7 @@ namespace Audio {
|
|||
SourceType sourceType = SourceType::Stereo;
|
||||
|
||||
void resetAudioPipe();
|
||||
bool loaded = false; // Have we loaded a component?
|
||||
bool loaded = false; // Have we loaded a component?
|
||||
|
||||
// Get the index for the current region we'll be reading. Returns the region with the highest frame counter
|
||||
// Accounting for whether one of the frame counters has wrapped around
|
||||
|
@ -130,10 +158,13 @@ namespace Audio {
|
|||
}
|
||||
}
|
||||
|
||||
void updateSourceConfig(Source& source, HLE::SourceConfiguration::Configuration& config);
|
||||
void updateSourceConfig(Source& source, HLE::SourceConfiguration::Configuration& config, s16_le* adpcmCoefficients);
|
||||
void generateFrame(StereoFrame<s16>& frame);
|
||||
void outputFrame();
|
||||
void dumpBuffer(const Source::Buffer& buffer);
|
||||
// Decode an entire buffer worth of audio
|
||||
void decodeBuffer(DSPSource& source);
|
||||
SampleBuffer decodeADPCM(const u8* data, usize sampleCount, Source& source);
|
||||
|
||||
public:
|
||||
HLE_DSP(Memory& mem, Scheduler& scheduler, DSPService& dspService);
|
||||
~HLE_DSP() override {}
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
#include "audio/hle_core.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <thread>
|
||||
#include <utility>
|
||||
|
||||
|
@ -105,7 +107,7 @@ namespace Audio {
|
|||
outputFrame();
|
||||
scheduler.addEvent(Scheduler::EventType::RunDSP, scheduler.currentTimestamp + Audio::cyclesPerFrame);
|
||||
}
|
||||
|
||||
|
||||
u16 HLE_DSP::recvData(u32 regId) {
|
||||
if (regId != 0) {
|
||||
Helpers::panic("Audio: invalid register in HLE frontend");
|
||||
|
@ -139,14 +141,11 @@ namespace Audio {
|
|||
// TODO: Other initialization stuff here
|
||||
dspState = DSPState::On;
|
||||
resetAudioPipe();
|
||||
|
||||
|
||||
dspService.triggerPipeEvent(DSPPipeType::Audio);
|
||||
break;
|
||||
|
||||
case StateChange::Shutdown:
|
||||
dspState = DSPState::Off;
|
||||
break;
|
||||
|
||||
case StateChange::Shutdown: dspState = DSPState::Off; break;
|
||||
default: Helpers::panic("Unimplemented DSP audio pipe state change %d", state);
|
||||
}
|
||||
}
|
||||
|
@ -210,7 +209,7 @@ namespace Audio {
|
|||
// Update source configuration from the read region of shared memory
|
||||
auto& config = read.sourceConfigurations.config[i];
|
||||
auto& source = sources[i];
|
||||
updateSourceConfig(source, config);
|
||||
updateSourceConfig(source, config, read.adpcmCoefficients.coeff[i]);
|
||||
|
||||
// Generate audio
|
||||
if (source.enabled && !source.buffers.empty()) {
|
||||
|
@ -229,7 +228,7 @@ namespace Audio {
|
|||
}
|
||||
}
|
||||
|
||||
void HLE_DSP::updateSourceConfig(Source& source, HLE::SourceConfiguration::Configuration& config) {
|
||||
void HLE_DSP::updateSourceConfig(Source& source, HLE::SourceConfiguration::Configuration& config, s16_le* adpcmCoefficients) {
|
||||
// Check if the any dirty bit is set, otherwise exit early
|
||||
if (!config.dirtyRaw) {
|
||||
return;
|
||||
|
@ -245,6 +244,15 @@ namespace Audio {
|
|||
source.syncCount = config.syncCount;
|
||||
}
|
||||
|
||||
if (config.adpcmCoefficientsDirty) {
|
||||
config.adpcmCoefficientsDirty = 0;
|
||||
// Convert the ADPCM coefficients in DSP shared memory from s16_le to s16 and cache them in source.adpcmCoefficients
|
||||
std::transform(
|
||||
adpcmCoefficients, adpcmCoefficients + source.adpcmCoefficients.size(), source.adpcmCoefficients.begin(),
|
||||
[](const s16_le& input) -> s16 { return s16(input); }
|
||||
);
|
||||
}
|
||||
|
||||
if (config.resetFlag) {
|
||||
config.resetFlag = 0;
|
||||
source.reset();
|
||||
|
@ -254,7 +262,7 @@ namespace Audio {
|
|||
config.partialResetFlag = 0;
|
||||
source.buffers = {};
|
||||
}
|
||||
|
||||
|
||||
// TODO: Should we check bufferQueueDirty here too?
|
||||
if (config.formatDirty || config.embeddedBufferDirty) {
|
||||
sampleFormat = config.format;
|
||||
|
@ -302,6 +310,107 @@ namespace Audio {
|
|||
config.dirtyRaw = 0;
|
||||
}
|
||||
|
||||
void HLE_DSP::decodeBuffer(DSPSource& source) {
|
||||
if (source.buffers.empty()) {
|
||||
// No queued buffers, there's nothing to decode so return
|
||||
return;
|
||||
}
|
||||
|
||||
DSPSource::Buffer buffer = source.popBuffer();
|
||||
if (buffer.adpcmDirty) {
|
||||
source.history1 = buffer.previousSamples[0];
|
||||
source.history2 = buffer.previousSamples[1];
|
||||
}
|
||||
|
||||
const u8* data = getPointerPhys<u8>(buffer.paddr);
|
||||
if (data == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
switch (buffer.format) {
|
||||
case SampleFormat::PCM8:
|
||||
case SampleFormat::PCM16: Helpers::warn("Unimplemented sample format!"); break;
|
||||
|
||||
case SampleFormat::ADPCM: source.currentSamples = decodeADPCM(data, buffer.sampleCount, source); break;
|
||||
default: Helpers::warn("Invalid DSP sample format"); break;
|
||||
}
|
||||
}
|
||||
|
||||
HLE_DSP::SampleBuffer HLE_DSP::decodeADPCM(const u8* data, usize sampleCount, Source& source) {
|
||||
static constexpr uint samplesPerBlock = 14;
|
||||
// An ADPCM block is comprised of a single header which contains the scale and predictor value for the block, and then 14 4bpp samples (hence
|
||||
// the / 2)
|
||||
static constexpr usize blockSize = sizeof(u8) + samplesPerBlock / 2;
|
||||
|
||||
// How many ADPCM blocks we'll be consuming. It's sampleCount / samplesPerBlock, rounded up.
|
||||
const usize blockCount = (sampleCount + (samplesPerBlock - 1)) / samplesPerBlock;
|
||||
const usize outputSize = sampleCount + (sampleCount & 1); // Bump the output size to a multiple of 2
|
||||
|
||||
usize outputCount = 0; // How many stereo samples have we output thus far?
|
||||
SampleBuffer decodedSamples(outputSize);
|
||||
|
||||
s16 history1 = source.history1;
|
||||
s16 history2 = source.history2;
|
||||
|
||||
// Decode samples in frames. Stop when we reach sampleCount samples
|
||||
for (uint blockIndex = 0; blockIndex < blockCount; blockIndex++) {
|
||||
const u8 scaleAndPredictor = *data++;
|
||||
|
||||
const u32 scale = 1 << u32(scaleAndPredictor & 0xF);
|
||||
// This is referred to as 4-bit in some documentation, but I am pretty sure that's a mistake
|
||||
const u32 predictor = (scaleAndPredictor >> 4) & 0x7;
|
||||
|
||||
// Fixed point (s5.11) coefficients for the history samples
|
||||
const s32 weight1 = source.adpcmCoefficients[predictor * 2];
|
||||
const s32 weight2 = source.adpcmCoefficients[predictor * 2 + 1];
|
||||
|
||||
// Decode samples in batches of 2
|
||||
// Each 4 bit ADPCM differential corresponds to 1 mono sample which will be output from both the left and right channel
|
||||
// So each byte of ADPCM data ends up generating 2 stereo samples
|
||||
for (uint sampleIndex = 0; sampleIndex < samplesPerBlock && outputCount < sampleCount; sampleIndex += 2) {
|
||||
const auto decode = [&](s32 nibble) -> s16 {
|
||||
static constexpr s32 ONE = 0x800; // 1.0 in S5.11 fixed point
|
||||
static constexpr s32 HALF = ONE / 2; // 0.5 similarly
|
||||
|
||||
// Sign extend our nibble from s4 to s32
|
||||
nibble = (nibble << 28) >> 28;
|
||||
|
||||
// Scale the extended nibble by the scale specified in the ADPCM block header, to get the real value of the sample's differential
|
||||
const s32 diff = nibble * scale;
|
||||
|
||||
// Convert ADPCM to PCM using y[n] = x[n] + 0.5 + coeff1 * y[n - 1] + coeff2 * y[n - 2]
|
||||
// The coefficients are in s5.11 fixed point so we also perform the proper conversions
|
||||
s32 output = ((diff << 11) + HALF + weight1 * history1 + weight2 * history2) >> 11;
|
||||
output = std::clamp<s32>(output, -32768, 32767);
|
||||
|
||||
// Write back new history samples
|
||||
history2 = history1; // y[n-2] = y[n-1]
|
||||
history1 = output; // y[n-1] = y[n]
|
||||
|
||||
return s16(output);
|
||||
};
|
||||
|
||||
const u8 samples = *data++; // Fetch the byte containing 2 4-bpp samples
|
||||
const s32 topNibble = s32(samples) >> 4; // First sample
|
||||
const s32 bottomNibble = s32(samples) & 0xF; // Second sample
|
||||
|
||||
// Decode and write first sample, then the second one
|
||||
const s16 sample1 = decode(topNibble);
|
||||
decodedSamples[outputCount].fill(sample1);
|
||||
|
||||
const s16 sample2 = decode(bottomNibble);
|
||||
decodedSamples[outputCount + 1].fill(sample2);
|
||||
|
||||
outputCount += 2;
|
||||
}
|
||||
}
|
||||
|
||||
// Store new history samples in the DSP source and return samples
|
||||
source.history1 = history1;
|
||||
source.history2 = history2;
|
||||
return decodedSamples;
|
||||
}
|
||||
|
||||
void DSPSource::reset() {
|
||||
enabled = false;
|
||||
syncCount = 0;
|
||||
|
|
Loading…
Add table
Reference in a new issue