mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-06 06:05:40 +12:00
DSP: Add SSE quad-conversion code
Co-Authored-By: Kelpsy <138107494+kelpsyberry@users.noreply.github.com>
This commit is contained in:
parent
33f45cf1f5
commit
878ff419fd
4 changed files with 73 additions and 13 deletions
|
@ -355,7 +355,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp
|
|||
include/PICA/pica_frag_uniforms.hpp include/PICA/shader_gen_types.hpp include/PICA/shader_decompiler.hpp
|
||||
include/PICA/pica_vert_config.hpp include/sdl_sensors.hpp include/PICA/draw_acceleration.hpp include/renderdoc.hpp
|
||||
include/align.hpp include/audio/aac_decoder.hpp include/PICA/pica_simd.hpp include/services/fonts.hpp
|
||||
include/audio/audio_interpolation.hpp include/audio/hle_mixer.hpp
|
||||
include/audio/audio_interpolation.hpp include/audio/hle_mixer.hpp include/audio/dsp_simd.hpp
|
||||
)
|
||||
|
||||
cmrc_add_resource_library(
|
||||
|
|
62
include/audio/dsp_simd.hpp
Normal file
62
include/audio/dsp_simd.hpp
Normal file
|
@ -0,0 +1,62 @@
|
|||
#pragma once
|
||||
|
||||
#include "audio/hle_mixer.hpp"
|
||||
#include "compiler_builtins.hpp"
|
||||
#include "helpers.hpp"
|
||||
|
||||
#if defined(_M_AMD64) || defined(__x86_64__)
|
||||
#define DSP_SIMD_X64
|
||||
#include <immintrin.h>
|
||||
#elif defined(_M_ARM64) || defined(__aarch64__)
|
||||
#define DSP_SIMD_ARM64
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
// Optimized SIMD functions for mixing the stereo output of a DSP voice into a quadraphonic intermediate mix
|
||||
namespace DSP::MixIntoQuad {
|
||||
using IntermediateMix = Audio::DSPMixer::IntermediateMix;
|
||||
using StereoFrame16 = Audio::DSPMixer::StereoFrame<s16>;
|
||||
|
||||
// Non-SIMD, portable algorithm
|
||||
ALWAYS_INLINE static void mixPortable(IntermediateMix& mix, StereoFrame16& frame, const float* gains) {
|
||||
for (usize sampleIndex = 0; sampleIndex < Audio::samplesInFrame; sampleIndex++) {
|
||||
// Mono samples are in the format: (l, r)
|
||||
// When converting to quad, gain0 and gain2 are applied to the left sample, gain1 and gain3 to the right one
|
||||
mix[sampleIndex][0] += s32(frame[sampleIndex][0] * gains[0]);
|
||||
mix[sampleIndex][1] += s32(frame[sampleIndex][1] * gains[1]);
|
||||
mix[sampleIndex][2] += s32(frame[sampleIndex][0] * gains[2]);
|
||||
mix[sampleIndex][3] += s32(frame[sampleIndex][1] * gains[3]);
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(DSP_SIMD_X64) && (defined(__SSE4_1__) || defined(__AVX__))
|
||||
ALWAYS_INLINE static void mixSSE4_1(IntermediateMix& mix, StereoFrame16& frame, const float* gains) {
|
||||
for (usize sampleIndex = 0; sampleIndex < Audio::samplesInFrame; sampleIndex++) {
|
||||
// The stereo samples, repeated every 4 bytes inside the vector register
|
||||
__m128i stereoSamples = _mm_castps_si128(_mm_load1_ps((float*)&frame[sampleIndex][0]));
|
||||
|
||||
__m128 currentFrame = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(stereoSamples));
|
||||
__m128 gains_ = _mm_load_ps(gains);
|
||||
__m128i offset = _mm_cvtps_epi32(_mm_mul_ps(currentFrame, gains_));
|
||||
__m128i intermediateMixPrev = _mm_load_si128((__m128i*)&mix[sampleIndex][0]);
|
||||
__m128i result = _mm_add_epi32(intermediateMixPrev, offset);
|
||||
_mm_store_si128((__m128i*)&mix[sampleIndex][0], result);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef DSP_SIMD_ARM64
|
||||
ALWAYS_INLINE static void mixNEON(IntermediateMix& mix, StereoFrame16& frame, const float* gains) { mixPortable(mix, frame, gains); }
|
||||
#endif
|
||||
|
||||
// Mixes the stereo output of a DSP voice into a quadraphonic intermediate mix
|
||||
static void mix(IntermediateMix& mix, StereoFrame16& frame, const float* gains) {
|
||||
#if defined(DSP_SIMD_ARM64)
|
||||
return mixNEON(mix, frame, gains);
|
||||
#elif defined(DSP_SIMD_X64) && (defined(__SSE4_1__) || defined(__AVX__))
|
||||
return mixSSE4_1(mix, frame, gains);
|
||||
#else
|
||||
return mixPortable(mix, frame, gains);
|
||||
#endif
|
||||
}
|
||||
} // namespace DSP::MixIntoQuad
|
|
@ -50,7 +50,9 @@ namespace Audio {
|
|||
using InterpolationMode = HLE::SourceConfiguration::Configuration::InterpolationMode;
|
||||
using InterpolationState = Audio::Interpolation::State;
|
||||
|
||||
DSPMixer::StereoFrame<s16> currentFrame;
|
||||
// The samples this voice output for this audio frame.
|
||||
// Aligned to 4 for SIMD purposes.
|
||||
alignas(4) DSPMixer::StereoFrame<s16> currentFrame;
|
||||
BufferQueue buffers;
|
||||
|
||||
SampleFormat sampleFormat = SampleFormat::ADPCM;
|
||||
|
@ -60,7 +62,8 @@ namespace Audio {
|
|||
|
||||
// There's one gain configuration for each of the 3 intermediate mixing stages
|
||||
// And each gain configuration is composed of 4 gain values, one for each sample in a quad-channel sample
|
||||
std::array<std::array<float, 4>, 3> gains;
|
||||
// Aligned to 16 for SIMD purposes
|
||||
alignas(16) std::array<std::array<float, 4>, 3> gains;
|
||||
// Of the 3 intermediate mix stages, typically only the first one is actually enabled and the other ones do nothing
|
||||
// Ie their gain is vec4(0.0). We track which stages are disabled (have a gain of all 0s) using this bitfield and skip them
|
||||
// In order to save up on CPU time.
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include <utility>
|
||||
|
||||
#include "audio/aac_decoder.hpp"
|
||||
#include "audio/dsp_simd.hpp"
|
||||
#include "services/dsp.hpp"
|
||||
|
||||
namespace Audio {
|
||||
|
@ -228,7 +229,9 @@ namespace Audio {
|
|||
// The DSP checks the DSP configuration dirty bits on every frame, applies them, and clears them
|
||||
read.dspConfiguration.dirtyRaw = 0;
|
||||
read.dspConfiguration.dirtyRaw2 = 0;
|
||||
std::array<IntermediateMix, 3> mixes{};
|
||||
|
||||
// The intermediate mix buffer is aligned to 16 for SIMD purposes
|
||||
alignas(16) std::array<IntermediateMix, 3> mixes{};
|
||||
|
||||
for (int i = 0; i < sourceCount; i++) {
|
||||
// Update source configuration from the read region of shared memory
|
||||
|
@ -263,15 +266,7 @@ namespace Audio {
|
|||
IntermediateMix& intermediateMix = mixes[mix];
|
||||
const std::array<float, 4>& gains = source.gains[mix];
|
||||
|
||||
// TODO: SIMD implementations
|
||||
for (usize sampleIndex = 0; sampleIndex < Audio::samplesInFrame; sampleIndex++) {
|
||||
// Mono samples are in the format: (l, r)
|
||||
// When converting to quad, gain0 and gain2 are applied to the left sample, gain1 and gain3 to the right one
|
||||
intermediateMix[sampleIndex][0] += s32(source.currentFrame[sampleIndex][0] * gains[0]);
|
||||
intermediateMix[sampleIndex][1] += s32(source.currentFrame[sampleIndex][1] * gains[1]);
|
||||
intermediateMix[sampleIndex][2] += s32(source.currentFrame[sampleIndex][0] * gains[2]);
|
||||
intermediateMix[sampleIndex][3] += s32(source.currentFrame[sampleIndex][1] * gains[3]);
|
||||
}
|
||||
DSP::MixIntoQuad::mix(intermediateMix, source.currentFrame, gains.data());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue