Merge branch 'master' into open-bp-cpp

This commit is contained in:
sylvieee-iot 2024-12-31 16:28:40 +02:00 committed by GitHub
commit 606158fb27
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
289 changed files with 18600 additions and 1763 deletions

View file

@ -0,0 +1,45 @@
#pragma once
#include <array>
#include "helpers.hpp"
namespace PICA {
struct DrawAcceleration {
static constexpr u32 maxAttribCount = 16;
static constexpr u32 maxLoaderCount = 12;
struct AttributeInfo {
u32 offset;
u32 stride;
u8 type;
u8 componentCount;
std::array<float, 4> fixedValue; // For fixed attributes
};
struct Loader {
// Data to upload for this loader
u8* data;
usize size;
};
u8* indexBuffer;
// Minimum and maximum index in the index buffer for a draw call
u16 minimumIndex, maximumIndex;
u32 totalAttribCount;
u32 totalLoaderCount;
u32 enabledAttributeMask;
u32 fixedAttributes;
u32 vertexDataSize;
std::array<AttributeInfo, maxAttribCount> attributeInfo;
std::array<Loader, maxLoaderCount> loaders;
bool canBeAccelerated;
bool indexed;
bool useShortIndices;
};
} // namespace PICA

View file

@ -2,7 +2,7 @@
#include "helpers.hpp"
#include "vertex_loader_rec.hpp"
// Common file for our PICA JITs (From vertex config -> CPU assembly and from PICA shader -> CPU assembly)
// Common file for our PICA JITs (From PICA shader -> CPU assembly)
namespace Dynapica {
#ifdef PANDA3DS_DYNAPICA_SUPPORTED

View file

@ -22,8 +22,11 @@ class ShaderJIT {
ShaderCache cache;
#endif
bool accurateMul = false;
public:
void setAccurateMul(bool value) { accurateMul = value; }
#ifdef PANDA3DS_SHADER_JIT_SUPPORTED
// Call this before starting to process a batch of vertices
// This will read the PICA config (uploaded shader and shader operand descriptors) and search if we've already compiled this shader
@ -36,11 +39,11 @@ class ShaderJIT {
static constexpr bool isAvailable() { return true; }
#else
void prepare(PICAShader& shaderUnit) {
Helpers::panic("Vertex Loader JIT: Tried to run ShaderJIT::Prepare on platform that does not support shader jit");
Helpers::panic("Shader JIT: Tried to run ShaderJIT::Prepare on platform that does not support shader jit");
}
void run(PICAShader& shaderUnit) {
Helpers::panic("Vertex Loader JIT: Tried to run ShaderJIT::Run on platform that does not support shader jit");
Helpers::panic("Shader JIT: Tried to run ShaderJIT::Run on platform that does not support shader jit");
}
// Define dummy callback. This should never be called if the shader JIT is not supported

View file

@ -37,6 +37,8 @@ class ShaderEmitter : private oaknut::CodeBlock, public oaknut::CodeGenerator {
// Shows whether the loaded shader has any log2 and exp2 instructions
bool codeHasLog2 = false;
bool codeHasExp2 = false;
// Whether to compile this shader using accurate, safe, non-IEEE multiplication (slow) or faster but less accurate mul
bool useSafeMUL = false;
oaknut::Label log2Func, exp2Func;
oaknut::Label emitLog2Func();
@ -123,7 +125,7 @@ class ShaderEmitter : private oaknut::CodeBlock, public oaknut::CodeGenerator {
PrologueCallback prologueCb = nullptr;
// Initialize our emitter with "allocSize" bytes of memory allocated for the code buffer
ShaderEmitter() : oaknut::CodeBlock(allocSize), oaknut::CodeGenerator(oaknut::CodeBlock::ptr()) {}
ShaderEmitter(bool useSafeMUL) : oaknut::CodeBlock(allocSize), oaknut::CodeGenerator(oaknut::CodeBlock::ptr()), useSafeMUL(useSafeMUL) {}
// PC must be a valid entrypoint here. It doesn't have that much overhead in this case, so we use std::array<>::at() to assert it does
InstructionCallback getInstructionCallback(u32 pc) { return getLabelPointer<InstructionCallback>(instructionLabels.at(pc)); }

View file

@ -32,6 +32,8 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
Label negateVector;
// Vector value of (1.0, 1.0, 1.0, 1.0) for SLT(i)/SGE(i)
Label onesVector;
// Vector value of (0xFF, 0xFF, 0xFF, 0) for setting the w component to 0 in DP3
Label dp3Vector;
u32 recompilerPC = 0; // PC the recompiler is currently recompiling @
u32 loopLevel = 0; // The current loop nesting level (0 = not in a loop)
@ -43,12 +45,17 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
// Shows whether the loaded shader has any log2 and exp2 instructions
bool codeHasLog2 = false;
bool codeHasExp2 = false;
// Whether to compile this shader using accurate, safe, non-IEEE multiplication (slow) or faster but less accurate mul
bool useSafeMUL = false;
Xbyak::Label log2Func, exp2Func;
Xbyak::Label emitLog2Func();
Xbyak::Label emitExp2Func();
Xbyak::util::Cpu cpuCaps;
// Emit a PICA200-compliant multiplication that handles "0 * inf = 0"
void emitSafeMUL(Xbyak::Xmm src1, Xbyak::Xmm src2, Xbyak::Xmm scratch);
// Compile all instructions from [current recompiler PC, end)
void compileUntil(const PICAShader& shaderUnit, u32 endPC);
// Compile instruction "instr"
@ -125,7 +132,7 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
PrologueCallback prologueCb = nullptr;
// Initialize our emitter with "allocSize" bytes of RWX memory
ShaderEmitter() : Xbyak::CodeGenerator(allocSize) {
ShaderEmitter(bool useSafeMUL) : Xbyak::CodeGenerator(allocSize), useSafeMUL(useSafeMUL) {
cpuCaps = Xbyak::util::Cpu();
haveSSE4_1 = cpuCaps.has(Xbyak::util::Cpu::tSSE41);

View file

@ -1,6 +1,7 @@
#pragma once
#include <array>
#include "PICA/draw_acceleration.hpp"
#include "PICA/dynapica/shader_rec.hpp"
#include "PICA/float_types.hpp"
#include "PICA/pica_vertex.hpp"
@ -13,6 +14,12 @@
#include "memory.hpp"
#include "renderer.hpp"
enum class ShaderExecMode {
Interpreter, // Interpret shaders on the CPU
JIT, // Recompile shaders to CPU machine code
Hardware, // Recompiler shaders to host shaders and run them on the GPU
};
class GPU {
static constexpr u32 regNum = 0x300;
static constexpr u32 extRegNum = 0x1000;
@ -45,7 +52,7 @@ class GPU {
uint immediateModeVertIndex;
uint immediateModeAttrIndex; // Index of the immediate mode attribute we're uploading
template <bool indexed, bool useShaderJIT>
template <bool indexed, ShaderExecMode mode>
void drawArrays();
// Silly method of avoiding linking problems. TODO: Change to something less silly
@ -81,6 +88,7 @@ class GPU {
std::unique_ptr<Renderer> renderer;
PICA::Vertex getImmediateModeVertex();
void getAcceleratedDrawInfo(PICA::DrawAcceleration& accel, bool indexed);
public:
// 256 entries per LUT with each LUT as its own row forming a 2D image 256 * LUT_COUNT
// Encoded in PICA native format
@ -92,6 +100,9 @@ class GPU {
// Set to false by the renderer when the lighting_lut is uploaded ot the GPU
bool lightingLUTDirty = false;
bool fogLUTDirty = false;
std::array<uint32_t, 128> fogLUT;
GPU(Memory& mem, EmulatorConfig& config);
void display() { renderer->display(); }
void screenshot(const std::string& name) { renderer->screenshot(name); }
@ -164,7 +175,8 @@ class GPU {
u32 index = paddr - PhysicalAddrs::VRAM;
return (T*)&vram[index];
} else [[unlikely]] {
Helpers::panic("[GPU] Tried to access unknown physical address: %08X", paddr);
Helpers::warn("[GPU] Tried to access unknown physical address: %08X", paddr);
return nullptr;
}
}

View file

@ -0,0 +1,257 @@
#pragma once
#include <array>
#include <cstring>
#include <type_traits>
#include <unordered_map>
#include "PICA/pica_hash.hpp"
#include "PICA/regs.hpp"
#include "bitfield.hpp"
#include "helpers.hpp"
namespace PICA {
struct OutputConfig {
union {
u32 raw{};
// Merge the enable + compare function into 1 field to avoid duplicate shaders
// enable == off means a CompareFunction of Always
BitField<0, 3, CompareFunction> alphaTestFunction;
BitField<3, 1, u32> depthMapEnable;
BitField<4, 4, LogicOpMode> logicOpMode;
};
};
struct TextureConfig {
u32 texUnitConfig;
u32 texEnvUpdateBuffer;
// There's 6 TEV stages, and each one is configured via 4 word-sized registers
// (+ the constant color register, which we don't include here, otherwise we'd generate too many shaders)
std::array<u32, 4 * 6> tevConfigs;
};
struct FogConfig {
union {
u32 raw{};
BitField<0, 3, FogMode> mode;
BitField<3, 1, u32> flipDepth;
};
};
struct Light {
union {
u16 raw;
BitField<0, 3, u16> num;
BitField<3, 1, u16> directional;
BitField<4, 1, u16> twoSidedDiffuse;
BitField<5, 1, u16> distanceAttenuationEnable;
BitField<6, 1, u16> spotAttenuationEnable;
BitField<7, 1, u16> geometricFactor0;
BitField<8, 1, u16> geometricFactor1;
BitField<9, 1, u16> shadowEnable;
};
};
struct LightingLUTConfig {
union {
u32 raw;
BitField<0, 1, u32> enable;
BitField<1, 1, u32> absInput;
BitField<2, 3, u32> type;
BitField<5, 3, u32> scale;
};
};
struct LightingConfig {
union {
u32 raw{};
BitField<0, 1, u32> enable;
BitField<1, 4, u32> lightNum;
BitField<5, 2, u32> bumpMode;
BitField<7, 2, u32> bumpSelector;
BitField<9, 1, u32> bumpRenorm;
BitField<10, 1, u32> clampHighlights;
BitField<11, 4, u32> config;
BitField<15, 1, u32> enablePrimaryAlpha;
BitField<16, 1, u32> enableSecondaryAlpha;
BitField<17, 1, u32> enableShadow;
BitField<18, 1, u32> shadowPrimary;
BitField<19, 1, u32> shadowSecondary;
BitField<20, 1, u32> shadowInvert;
BitField<21, 1, u32> shadowAlpha;
BitField<22, 2, u32> shadowSelector;
};
std::array<LightingLUTConfig, 7> luts{};
std::array<Light, 8> lights{};
LightingConfig(const std::array<u32, 0x300>& regs) {
// Ignore lighting registers if it's disabled
if ((regs[InternalRegs::LightingEnable] & 1) == 0) {
return;
}
const u32 config0 = regs[InternalRegs::LightConfig0];
const u32 config1 = regs[InternalRegs::LightConfig1];
const u32 totalLightCount = Helpers::getBits<0, 3>(regs[InternalRegs::LightNumber]) + 1;
enable = 1;
lightNum = totalLightCount;
enableShadow = Helpers::getBit<0>(config0);
if (enableShadow) [[unlikely]] {
shadowPrimary = Helpers::getBit<16>(config0);
shadowSecondary = Helpers::getBit<17>(config0);
shadowInvert = Helpers::getBit<18>(config0);
shadowAlpha = Helpers::getBit<19>(config0);
shadowSelector = Helpers::getBits<24, 2>(config0);
}
enablePrimaryAlpha = Helpers::getBit<2>(config0);
enableSecondaryAlpha = Helpers::getBit<3>(config0);
config = Helpers::getBits<4, 4>(config0);
bumpSelector = Helpers::getBits<22, 2>(config0);
clampHighlights = Helpers::getBit<27>(config0);
bumpMode = Helpers::getBits<28, 2>(config0);
bumpRenorm = Helpers::getBit<30>(config0) ^ 1; // 0 = enable so flip it with xor
for (int i = 0; i < totalLightCount; i++) {
auto& light = lights[i];
light.num = (regs[InternalRegs::LightPermutation] >> (i * 4)) & 0x7;
const u32 lightConfig = regs[InternalRegs::Light0Config + 0x10 * light.num];
light.directional = Helpers::getBit<0>(lightConfig);
light.twoSidedDiffuse = Helpers::getBit<1>(lightConfig);
light.geometricFactor0 = Helpers::getBit<2>(lightConfig);
light.geometricFactor1 = Helpers::getBit<3>(lightConfig);
light.shadowEnable = ((config1 >> light.num) & 1) ^ 1; // This also does 0 = enabled
light.spotAttenuationEnable = ((config1 >> (8 + light.num)) & 1) ^ 1; // Same here
light.distanceAttenuationEnable = ((config1 >> (24 + light.num)) & 1) ^ 1; // Of course same here
}
LightingLUTConfig& d0 = luts[Lights::LUT_D0];
LightingLUTConfig& d1 = luts[Lights::LUT_D1];
LightingLUTConfig& sp = luts[spotlightLutIndex];
LightingLUTConfig& fr = luts[Lights::LUT_FR];
LightingLUTConfig& rb = luts[Lights::LUT_RB];
LightingLUTConfig& rg = luts[Lights::LUT_RG];
LightingLUTConfig& rr = luts[Lights::LUT_RR];
d0.enable = Helpers::getBit<16>(config1) == 0;
d1.enable = Helpers::getBit<17>(config1) == 0;
fr.enable = Helpers::getBit<19>(config1) == 0;
rb.enable = Helpers::getBit<20>(config1) == 0;
rg.enable = Helpers::getBit<21>(config1) == 0;
rr.enable = Helpers::getBit<22>(config1) == 0;
sp.enable = 1;
const u32 lutAbs = regs[InternalRegs::LightLUTAbs];
const u32 lutSelect = regs[InternalRegs::LightLUTSelect];
const u32 lutScale = regs[InternalRegs::LightLUTScale];
if (d0.enable) {
d0.absInput = Helpers::getBit<1>(lutAbs) == 0;
d0.type = Helpers::getBits<0, 3>(lutSelect);
d0.scale = Helpers::getBits<0, 3>(lutScale);
}
if (d1.enable) {
d1.absInput = Helpers::getBit<5>(lutAbs) == 0;
d1.type = Helpers::getBits<4, 3>(lutSelect);
d1.scale = Helpers::getBits<4, 3>(lutScale);
}
sp.absInput = Helpers::getBit<9>(lutAbs) == 0;
sp.type = Helpers::getBits<8, 3>(lutSelect);
sp.scale = Helpers::getBits<8, 3>(lutScale);
if (fr.enable) {
fr.absInput = Helpers::getBit<13>(lutAbs) == 0;
fr.type = Helpers::getBits<12, 3>(lutSelect);
fr.scale = Helpers::getBits<12, 3>(lutScale);
}
if (rb.enable) {
rb.absInput = Helpers::getBit<17>(lutAbs) == 0;
rb.type = Helpers::getBits<16, 3>(lutSelect);
rb.scale = Helpers::getBits<16, 3>(lutScale);
}
if (rg.enable) {
rg.absInput = Helpers::getBit<21>(lutAbs) == 0;
rg.type = Helpers::getBits<20, 3>(lutSelect);
rg.scale = Helpers::getBits<20, 3>(lutScale);
}
if (rr.enable) {
rr.absInput = Helpers::getBit<25>(lutAbs) == 0;
rr.type = Helpers::getBits<24, 3>(lutSelect);
rr.scale = Helpers::getBits<24, 3>(lutScale);
}
}
};
// Config used for identifying unique fragment pipeline configurations
struct FragmentConfig {
OutputConfig outConfig;
TextureConfig texConfig;
FogConfig fogConfig;
LightingConfig lighting;
bool operator==(const FragmentConfig& config) const {
// Hash function and equality operator required by std::unordered_map
return std::memcmp(this, &config, sizeof(FragmentConfig)) == 0;
}
FragmentConfig(const std::array<u32, 0x300>& regs) : lighting(regs) {
auto alphaTestConfig = regs[InternalRegs::AlphaTestConfig];
auto alphaTestFunction = Helpers::getBits<4, 3>(alphaTestConfig);
outConfig.alphaTestFunction =
(alphaTestConfig & 1) ? static_cast<PICA::CompareFunction>(alphaTestFunction) : PICA::CompareFunction::Always;
outConfig.depthMapEnable = regs[InternalRegs::DepthmapEnable] & 1;
// Shows if blending is enabled. If it is not enabled, then logic ops are enabled instead
const bool blendingEnabled = (regs[InternalRegs::ColourOperation] & (1 << 8)) != 0;
outConfig.logicOpMode = blendingEnabled ? LogicOpMode::Copy : LogicOpMode(Helpers::getBits<0, 4>(regs[InternalRegs::LogicOp]));
texConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg];
texConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer];
// Set up TEV stages. Annoyingly we can't just memcpy as the TEV registers are arranged like
// {Source, Operand, Combiner, Color, Scale} and we want to skip the color register since it's uploaded via UBO
#define setupTevStage(stage) \
std::memcpy(&texConfig.tevConfigs[stage * 4], &regs[InternalRegs::TexEnv##stage##Source], 3 * sizeof(u32)); \
texConfig.tevConfigs[stage * 4 + 3] = regs[InternalRegs::TexEnv##stage##Source + 4];
setupTevStage(0);
setupTevStage(1);
setupTevStage(2);
setupTevStage(3);
setupTevStage(4);
setupTevStage(5);
#undef setupTevStage
fogConfig.mode = (FogMode)Helpers::getBits<0, 3>(regs[InternalRegs::TexEnvUpdateBuffer]);
if (fogConfig.mode == FogMode::Fog) {
fogConfig.flipDepth = Helpers::getBit<16>(regs[InternalRegs::TexEnvUpdateBuffer]);
}
}
};
static_assert(
std::has_unique_object_representations<OutputConfig>() && std::has_unique_object_representations<TextureConfig>() &&
std::has_unique_object_representations<FogConfig>() && std::has_unique_object_representations<Light>()
);
} // namespace PICA
// Override std::hash for our fragment config class
template <>
struct std::hash<PICA::FragmentConfig> {
std::size_t operator()(const PICA::FragmentConfig& config) const noexcept { return PICAHash::computeHash((const char*)&config, sizeof(config)); }
};

View file

@ -0,0 +1,47 @@
#pragma once
#include <array>
#include <cstddef>
#include <type_traits>
#include "helpers.hpp"
namespace PICA {
struct LightUniform {
using vec3 = std::array<float, 3>;
// std140 requires vec3s be aligned to 16 bytes
alignas(16) vec3 specular0;
alignas(16) vec3 specular1;
alignas(16) vec3 diffuse;
alignas(16) vec3 ambient;
alignas(16) vec3 position;
alignas(16) vec3 spotlightDirection;
float distanceAttenuationBias;
float distanceAttenuationScale;
};
struct FragmentUniforms {
using vec3 = std::array<float, 3>;
using vec4 = std::array<float, 4>;
static constexpr usize tevStageCount = 6;
s32 alphaReference;
float depthScale;
float depthOffset;
alignas(16) vec4 constantColors[tevStageCount];
alignas(16) vec4 tevBufferColor;
alignas(16) vec4 clipCoords;
// Note: We upload these as a u32 and decode on GPU.
// Particularly the fog colour since fog is really uncommon and it doesn't matter if we decode on GPU.
u32 globalAmbientLight;
u32 fogColor;
// NOTE: THIS MUST BE LAST so that if lighting is disabled we can potentially omit uploading it
LightUniform lightUniforms[8];
};
// Assert that lightUniforms is the last member of the structure
static_assert(offsetof(FragmentUniforms, lightUniforms) + 8 * sizeof(LightUniform) == sizeof(FragmentUniforms));
} // namespace PICA

275
include/PICA/pica_simd.hpp Normal file
View file

@ -0,0 +1,275 @@
#pragma once
#include <algorithm>
#include <limits>
#include <utility>
#include "compiler_builtins.hpp"
#include "helpers.hpp"
#if defined(_M_AMD64) || defined(__x86_64__)
#define PICA_SIMD_X64
#include <immintrin.h>
#elif defined(_M_ARM64) || defined(__aarch64__)
#define PICA_SIMD_ARM64
#include <arm_neon.h>
#endif
// Optimized functions for analyzing PICA index buffers (Finding minimum and maximum index values inside them)
namespace PICA::IndexBuffer {
// Non-SIMD, portable algorithm
template <bool useShortIndices>
std::pair<u16, u16> analyzePortable(u8* indexBuffer, u32 vertexCount) {
u16 minimumIndex = std::numeric_limits<u16>::max();
u16 maximumIndex = 0;
// Calculate the minimum and maximum indices used in the index buffer, so we'll only upload them
if constexpr (useShortIndices) {
u16* indexBuffer16 = reinterpret_cast<u16*>(indexBuffer);
for (u32 i = 0; i < vertexCount; i++) {
u16 index = indexBuffer16[i];
minimumIndex = std::min(minimumIndex, index);
maximumIndex = std::max(maximumIndex, index);
}
} else {
for (u32 i = 0; i < vertexCount; i++) {
u16 index = u16(indexBuffer[i]);
minimumIndex = std::min(minimumIndex, index);
maximumIndex = std::max(maximumIndex, index);
}
}
return {minimumIndex, maximumIndex};
}
#ifdef PICA_SIMD_ARM64
template <bool useShortIndices>
ALWAYS_INLINE std::pair<u16, u16> analyzeNEON(u8* indexBuffer, u32 vertexCount) {
// We process 16 bytes per iteration, which is 8 vertices if we're using u16 indices or 16 vertices if we're using u8 indices
constexpr u32 vertsPerLoop = (useShortIndices) ? 8 : 16;
if (vertexCount < vertsPerLoop) {
return analyzePortable<useShortIndices>(indexBuffer, vertexCount);
}
u16 minimumIndex, maximumIndex;
if constexpr (useShortIndices) {
// 16-bit indices
uint16x8_t minima = vdupq_n_u16(0xffff);
uint16x8_t maxima = vdupq_n_u16(0);
while (vertexCount >= vertsPerLoop) {
const uint16x8_t data = vld1q_u16(reinterpret_cast<u16*>(indexBuffer));
minima = vminq_u16(data, minima);
maxima = vmaxq_u16(data, maxima);
indexBuffer += 16;
vertexCount -= vertsPerLoop;
}
// Do horizontal min/max operations to get the actual minimum and maximum from all the vertices we processed with SIMD
// We want to gather the actual minimum and maximum in the line bottom lane of the minima/maxima vectors
// uint16x4_t foldedMinima1 = vmin_u16(vget_high_u16(minima), vget_low_u16(minima));
// uint16x4_t foldedMaxima1 = vmax_u16(vget_high_u16(maxima), vget_low_u16(maxima));
uint16x8_t foldedMinima1 = vpminq_u16(minima, minima);
uint16x8_t foldedMinima2 = vpminq_u16(foldedMinima1, foldedMinima1);
uint16x8_t foldedMinima3 = vpminq_u16(foldedMinima2, foldedMinima2);
uint16x8_t foldedMaxima1 = vpmaxq_u16(maxima, maxima);
uint16x8_t foldedMaxima2 = vpmaxq_u16(foldedMaxima1, foldedMaxima1);
uint16x8_t foldedMaxima3 = vpmaxq_u16(foldedMaxima2, foldedMaxima2);
minimumIndex = vgetq_lane_u16(foldedMinima3, 0);
maximumIndex = vgetq_lane_u16(foldedMaxima3, 0);
} else {
// 8-bit indices
uint8x16_t minima = vdupq_n_u8(0xff);
uint8x16_t maxima = vdupq_n_u8(0);
while (vertexCount >= vertsPerLoop) {
uint8x16_t data = vld1q_u8(indexBuffer);
minima = vminq_u8(data, minima);
maxima = vmaxq_u8(data, maxima);
indexBuffer += 16;
vertexCount -= vertsPerLoop;
}
// Do a similar horizontal min/max as in the u16 case, except now we're working uint8x16 instead of uint16x4 so we need 4 folds
uint8x16_t foldedMinima1 = vpminq_u8(minima, minima);
uint8x16_t foldedMinima2 = vpminq_u8(foldedMinima1, foldedMinima1);
uint8x16_t foldedMinima3 = vpminq_u8(foldedMinima2, foldedMinima2);
uint8x16_t foldedMinima4 = vpminq_u8(foldedMinima3, foldedMinima3);
uint8x16_t foldedMaxima1 = vpmaxq_u8(maxima, maxima);
uint8x16_t foldedMaxima2 = vpmaxq_u8(foldedMaxima1, foldedMaxima1);
uint8x16_t foldedMaxima3 = vpmaxq_u8(foldedMaxima2, foldedMaxima2);
uint8x16_t foldedMaxima4 = vpmaxq_u8(foldedMaxima3, foldedMaxima3);
minimumIndex = u16(vgetq_lane_u8(foldedMinima4, 0));
maximumIndex = u16(vgetq_lane_u8(foldedMaxima4, 0));
}
// If any indices could not be processed cause the buffer size is not 16-byte aligned, process them the naive way
// Calculate the minimum and maximum indices used in the index buffer, so we'll only upload them
while (vertexCount > 0) {
if constexpr (useShortIndices) {
u16 index = *reinterpret_cast<u16*>(indexBuffer);
minimumIndex = std::min(minimumIndex, index);
maximumIndex = std::max(maximumIndex, index);
indexBuffer += 2;
} else {
u16 index = u16(*indexBuffer++);
minimumIndex = std::min(minimumIndex, index);
maximumIndex = std::max(maximumIndex, index);
}
vertexCount -= 1;
}
return {minimumIndex, maximumIndex};
}
#endif
#if defined(PICA_SIMD_X64) && (defined(__SSE4_1__) || defined(__AVX__))
template <bool useShortIndices>
ALWAYS_INLINE std::pair<u16, u16> analyzeSSE4_1(u8* indexBuffer, u32 vertexCount) {
// We process 16 bytes per iteration, which is 8 vertices if we're using u16
// indices or 16 vertices if we're using u8 indices
constexpr u32 vertsPerLoop = (useShortIndices) ? 8 : 16;
if (vertexCount < vertsPerLoop) {
return analyzePortable<useShortIndices>(indexBuffer, vertexCount);
}
u16 minimumIndex, maximumIndex;
if constexpr (useShortIndices) {
// Calculate the horizontal minimum/maximum value across an SSE vector of 16-bit unsigned integers.
// Based on https://stackoverflow.com/a/22259607
auto horizontalMin16 = [](__m128i vector) -> u16 { return u16(_mm_cvtsi128_si32(_mm_minpos_epu16(vector))); };
auto horizontalMax16 = [](__m128i vector) -> u16 {
// We have an instruction to compute horizontal minimum but not maximum, so we use it.
// To use it, we have to subtract each value from 0xFFFF (which we do with an xor), then execute a horizontal minimum
__m128i flipped = _mm_xor_si128(vector, _mm_set_epi32(0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu));
u16 min = u16(_mm_cvtsi128_si32(_mm_minpos_epu16(flipped)));
return u16(min ^ 0xffff);
};
// 16-bit indices
// Initialize the minima vector to all FFs (So 0xFFFF for each 16-bit lane)
// And the maxima vector to all 0s (0 for each 16-bit lane)
__m128i minima = _mm_set_epi32(0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu);
__m128i maxima = _mm_set_epi32(0, 0, 0, 0);
while (vertexCount >= vertsPerLoop) {
const __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(indexBuffer));
minima = _mm_min_epu16(data, minima);
maxima = _mm_max_epu16(data, maxima);
indexBuffer += 16;
vertexCount -= vertsPerLoop;
}
minimumIndex = u16(horizontalMin16(minima));
maximumIndex = u16(horizontalMax16(maxima));
} else {
// Calculate the horizontal minimum/maximum value across an SSE vector of 8-bit unsigned integers.
// Based on https://stackoverflow.com/a/22259607
auto horizontalMin8 = [](__m128i vector) -> u8 {
vector = _mm_min_epu8(vector, _mm_shuffle_epi32(vector, _MM_SHUFFLE(3, 2, 3, 2)));
vector = _mm_min_epu8(vector, _mm_shuffle_epi32(vector, _MM_SHUFFLE(1, 1, 1, 1)));
vector = _mm_min_epu8(vector, _mm_shufflelo_epi16(vector, _MM_SHUFFLE(1, 1, 1, 1)));
vector = _mm_min_epu8(vector, _mm_srli_epi16(vector, 8));
return u8(_mm_cvtsi128_si32(vector));
};
auto horizontalMax8 = [](__m128i vector) -> u8 {
vector = _mm_max_epu8(vector, _mm_shuffle_epi32(vector, _MM_SHUFFLE(3, 2, 3, 2)));
vector = _mm_max_epu8(vector, _mm_shuffle_epi32(vector, _MM_SHUFFLE(1, 1, 1, 1)));
vector = _mm_max_epu8(vector, _mm_shufflelo_epi16(vector, _MM_SHUFFLE(1, 1, 1, 1)));
vector = _mm_max_epu8(vector, _mm_srli_epi16(vector, 8));
return u8(_mm_cvtsi128_si32(vector));
};
// 8-bit indices
// Initialize the minima vector to all FFs (So 0xFF for each 8-bit lane)
// And the maxima vector to all 0s (0 for each 8-bit lane)
__m128i minima = _mm_set_epi32(0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu);
__m128i maxima = _mm_set_epi32(0, 0, 0, 0);
while (vertexCount >= vertsPerLoop) {
const __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(indexBuffer));
minima = _mm_min_epu8(data, minima);
maxima = _mm_max_epu8(data, maxima);
indexBuffer += 16;
vertexCount -= vertsPerLoop;
}
minimumIndex = u16(horizontalMin8(minima));
maximumIndex = u16(horizontalMax8(maxima));
}
// If any indices could not be processed cause the buffer size
// is not 16-byte aligned, process them the naive way
// Calculate the minimum and maximum indices used in the index
// buffer, so we'll only upload them
while (vertexCount > 0) {
if constexpr (useShortIndices) {
u16 index = *reinterpret_cast<u16*>(indexBuffer);
minimumIndex = std::min(minimumIndex, index);
maximumIndex = std::max(maximumIndex, index);
indexBuffer += 2;
} else {
u16 index = u16(*indexBuffer++);
minimumIndex = std::min(minimumIndex, index);
maximumIndex = std::max(maximumIndex, index);
}
vertexCount -= 1;
}
return {minimumIndex, maximumIndex};
}
#endif
// Analyzes a PICA index buffer to get the minimum and maximum indices in the
// buffer, and returns them in a pair in the form [min, max]. Takes a template
// parameter to decide whether the indices in the buffer are u8 or u16
template <bool useShortIndices>
std::pair<u16, u16> analyze(u8* indexBuffer, u32 vertexCount) {
#if defined(PICA_SIMD_ARM64)
return analyzeNEON<useShortIndices>(indexBuffer, vertexCount);
#elif defined(PICA_SIMD_X64) && (defined(__SSE4_1__) || defined(__AVX__))
// Annoyingly, MSVC refuses to define __SSE4_1__ even when we're building with AVX
return analyzeSSE4_1<useShortIndices>(indexBuffer, vertexCount);
#else
return analyzePortable<useShortIndices>(indexBuffer, vertexCount);
#endif
}
// In some really unfortunate scenarios (eg Android Studio emulator), we don't have access to glDrawRangeElementsBaseVertex
// So we need to subtract the base vertex index from every index in the index buffer ourselves
// This is not really common, so we do it without SIMD for the moment, just to be able to run on Android Studio
template <bool useShortIndices>
void subtractBaseIndex(u8* indexBuffer, u32 indexCount, u16 baseIndex) {
// Calculate the minimum and maximum indices used in the index buffer, so we'll only upload them
if constexpr (useShortIndices) {
u16* indexBuffer16 = reinterpret_cast<u16*>(indexBuffer);
for (u32 i = 0; i < indexCount; i++) {
indexBuffer16[i] -= baseIndex;
}
} else {
u8 baseIndex8 = u8(baseIndex);
for (u32 i = 0; i < indexCount; i++) {
indexBuffer[i] -= baseIndex8;
}
}
}
} // namespace PICA::IndexBuffer

View file

@ -0,0 +1,57 @@
#pragma once
#include <array>
#include <cassert>
#include <cstring>
#include <type_traits>
#include <unordered_map>
#include "PICA/pica_hash.hpp"
#include "PICA/regs.hpp"
#include "PICA/shader.hpp"
#include "bitfield.hpp"
#include "helpers.hpp"
namespace PICA {
// Configuration struct used
struct VertConfig {
PICAHash::HashType shaderHash;
PICAHash::HashType opdescHash;
u32 entrypoint;
// PICA registers for configuring shader output->fragment semantic mapping
std::array<u32, 7> outmaps{};
u16 outputMask;
u8 outputCount;
bool usingUbershader;
// Pad to 56 bytes so that the compiler won't insert unnecessary padding, which in turn will affect our unordered_map lookup
// As the padding will get hashed and memcmp'd...
u32 pad{};
bool operator==(const VertConfig& config) const {
// Hash function and equality operator required by std::unordered_map
return std::memcmp(this, &config, sizeof(VertConfig)) == 0;
}
VertConfig(PICAShader& shader, const std::array<u32, 0x300>& regs, bool usingUbershader) : usingUbershader(usingUbershader) {
shaderHash = shader.getCodeHash();
opdescHash = shader.getOpdescHash();
entrypoint = shader.entrypoint;
outputCount = regs[PICA::InternalRegs::ShaderOutputCount] & 7;
outputMask = regs[PICA::InternalRegs::VertexShaderOutputMask];
for (int i = 0; i < outputCount; i++) {
// Mask out unused bits
outmaps[i] = regs[PICA::InternalRegs::ShaderOutmap0 + i] & 0x1F1F1F1F;
}
}
};
} // namespace PICA
static_assert(sizeof(PICA::VertConfig) == 56);
// Override std::hash for our vertex config class
template <>
struct std::hash<PICA::VertConfig> {
std::size_t operator()(const PICA::VertConfig& config) const noexcept { return PICAHash::computeHash((const char*)&config, sizeof(config)); }
};

View file

@ -1,7 +1,8 @@
#pragma once
#include "PICA/float_types.hpp"
#include <array>
#include "PICA/float_types.hpp"
namespace PICA {
// A representation of the output vertex as it comes out of the vertex shader, with padding and all
struct Vertex {

View file

@ -51,6 +51,18 @@ namespace PICA {
#undef defineTexEnv
// clang-format on
// Fog registers
FogColor = 0xE1,
FogLUTIndex = 0xE6,
FogLUTData0 = 0xE8,
FogLUTData1 = 0xE9,
FogLUTData2 = 0xEA,
FogLUTData3 = 0xEB,
FogLUTData4 = 0xEC,
FogLUTData5 = 0xED,
FogLUTData6 = 0xEE,
FogLUTData7 = 0xEF,
// Framebuffer registers
ColourOperation = 0x100,
BlendFunc = 0x101,
@ -67,7 +79,29 @@ namespace PICA {
ColourBufferLoc = 0x11D,
FramebufferSize = 0x11E,
//LightingRegs
// Lighting registers
LightingEnable = 0x8F,
Light0Specular0 = 0x140,
Light0Specular1 = 0x141,
Light0Diffuse = 0x142,
Light0Ambient = 0x143,
Light0XY = 0x144,
Light0Z = 0x145,
Light0SpotlightXY = 0x146,
Light0SpotlightZ = 0x147,
Light0Config = 0x149,
Light0AttenuationBias = 0x14A,
Light0AttenuationScale = 0x14B,
LightGlobalAmbient = 0x1C0,
LightNumber = 0x1C2,
LightConfig0 = 0x1C3,
LightConfig1 = 0x1C4,
LightPermutation = 0x1D9,
LightLUTAbs = 0x1D0,
LightLUTSelect = 0x1D1,
LightLUTScale = 0x1D2,
LightingLUTIndex = 0x01C5,
LightingLUTData0 = 0x01C8,
LightingLUTData1 = 0x01C9,
@ -231,7 +265,8 @@ namespace PICA {
enum : u32 {
LUT_D0 = 0,
LUT_D1,
LUT_FR,
// LUT 2 is not used, the emulator internally uses it for referring to the current source's spotlight in shaders
LUT_FR = 0x3,
LUT_RB,
LUT_RG,
LUT_RR,
@ -255,6 +290,11 @@ namespace PICA {
};
}
// There's actually 8 different LUTs (SP0-SP7), one for each light with different indices (8-15)
// We use an unused LUT value for "this light source's spotlight" instead and figure out which light source to use in compileLutLookup
// This is particularly intuitive in several places, such as checking if a LUT is enabled
static constexpr int spotlightLutIndex = 2;
enum class TextureFmt : u32 {
RGBA8 = 0x0,
RGB8 = 0x1,
@ -345,4 +385,156 @@ namespace PICA {
GeometryPrimitive = 3,
};
enum class CompareFunction : u32 {
Never = 0,
Always = 1,
Equal = 2,
NotEqual = 3,
Less = 4,
LessOrEqual = 5,
Greater = 6,
GreaterOrEqual = 7,
};
enum class LogicOpMode : u32 {
Clear = 0,
And = 1,
ReverseAnd = 2,
Copy = 3,
Set = 4,
InvertedCopy = 5,
Nop = 6,
Invert = 7,
Nand = 8,
Or = 9,
Nor = 10,
Xor = 11,
Equiv = 12,
InvertedAnd = 13,
ReverseOr = 14,
InvertedOr = 15,
};
enum class FogMode : u32 {
Disabled = 0,
Fog = 5,
Gas = 7,
};
struct TexEnvConfig {
enum class Source : u8 {
PrimaryColor = 0x0,
PrimaryFragmentColor = 0x1,
SecondaryFragmentColor = 0x2,
Texture0 = 0x3,
Texture1 = 0x4,
Texture2 = 0x5,
Texture3 = 0x6,
// TODO: Inbetween values are unknown
PreviousBuffer = 0xD,
Constant = 0xE,
Previous = 0xF,
};
enum class ColorOperand : u8 {
SourceColor = 0x0,
OneMinusSourceColor = 0x1,
SourceAlpha = 0x2,
OneMinusSourceAlpha = 0x3,
SourceRed = 0x4,
OneMinusSourceRed = 0x5,
// TODO: Inbetween values are unknown
SourceGreen = 0x8,
OneMinusSourceGreen = 0x9,
// Inbetween values are unknown
SourceBlue = 0xC,
OneMinusSourceBlue = 0xD,
};
enum class AlphaOperand : u8 {
SourceAlpha = 0x0,
OneMinusSourceAlpha = 0x1,
SourceRed = 0x2,
OneMinusSourceRed = 0x3,
SourceGreen = 0x4,
OneMinusSourceGreen = 0x5,
SourceBlue = 0x6,
OneMinusSourceBlue = 0x7,
};
enum class Operation : u8 {
Replace = 0,
Modulate = 1,
Add = 2,
AddSigned = 3,
Lerp = 4,
Subtract = 5,
Dot3RGB = 6,
Dot3RGBA = 7,
MultiplyAdd = 8,
AddMultiply = 9,
};
// RGB sources
Source colorSource1, colorSource2, colorSource3;
// Alpha sources
Source alphaSource1, alphaSource2, alphaSource3;
// RGB operands
ColorOperand colorOperand1, colorOperand2, colorOperand3;
// Alpha operands
AlphaOperand alphaOperand1, alphaOperand2, alphaOperand3;
// Texture environment operations for this stage
Operation colorOp, alphaOp;
u32 constColor;
private:
// These are the only private members since their value doesn't actually reflect the scale
// So we make them public so we'll always use the appropriate member functions instead
u8 colorScale;
u8 alphaScale;
public:
// Create texture environment object from TEV registers
TexEnvConfig(u32 source, u32 operand, u32 combiner, u32 color, u32 scale) : constColor(color) {
colorSource1 = Helpers::getBits<0, 4, Source>(source);
colorSource2 = Helpers::getBits<4, 4, Source>(source);
colorSource3 = Helpers::getBits<8, 4, Source>(source);
alphaSource1 = Helpers::getBits<16, 4, Source>(source);
alphaSource2 = Helpers::getBits<20, 4, Source>(source);
alphaSource3 = Helpers::getBits<24, 4, Source>(source);
colorOperand1 = Helpers::getBits<0, 4, ColorOperand>(operand);
colorOperand2 = Helpers::getBits<4, 4, ColorOperand>(operand);
colorOperand3 = Helpers::getBits<8, 4, ColorOperand>(operand);
alphaOperand1 = Helpers::getBits<12, 3, AlphaOperand>(operand);
alphaOperand2 = Helpers::getBits<16, 3, AlphaOperand>(operand);
alphaOperand3 = Helpers::getBits<20, 3, AlphaOperand>(operand);
colorOp = Helpers::getBits<0, 4, Operation>(combiner);
alphaOp = Helpers::getBits<16, 4, Operation>(combiner);
colorScale = Helpers::getBits<0, 2>(scale);
alphaScale = Helpers::getBits<16, 2>(scale);
}
u32 getColorScale() { return (colorScale <= 2) ? (1 << colorScale) : 1; }
u32 getAlphaScale() { return (alphaScale <= 2) ? (1 << alphaScale) : 1; }
bool isPassthroughStage() {
// clang-format off
// Thank you to the Citra dev that wrote this out
return (
colorOp == Operation::Replace && alphaOp == Operation::Replace &&
colorSource1 == Source::Previous && alphaSource1 == Source::Previous &&
colorOperand1 == ColorOperand::SourceColor && alphaOperand1 == AlphaOperand::SourceAlpha &&
getColorScale() == 1 && getAlphaScale() == 1
);
// clang-format on
}
};
} // namespace PICA

View file

@ -1,6 +1,8 @@
#pragma once
#include <algorithm>
#include <array>
#include <cassert>
#include <cstddef>
#include <cstring>
#include "PICA/float_types.hpp"
@ -21,7 +23,7 @@ namespace ShaderOpcodes {
DST = 0x04,
EX2 = 0x05,
LG2 = 0x06,
LIT = 0x07,
LITP = 0x07,
MUL = 0x08,
SGE = 0x09,
SLT = 0x0A,
@ -56,6 +58,10 @@ namespace ShaderOpcodes {
};
}
namespace PICA::ShaderGen {
class ShaderDecompiler;
};
// Note: All PICA f24 vec4 registers must have the alignas(16) specifier to make them easier to access in SSE/NEON code in the JIT
class PICAShader {
using f24 = Floats::f24;
@ -90,14 +96,22 @@ class PICAShader {
public:
// These are placed close to the temp registers and co because it helps the JIT generate better code
u32 entrypoint = 0; // Initial shader PC
u32 boolUniform;
std::array<std::array<u8, 4>, 4> intUniforms;
// We want these registers in this order & with this alignment for uploading them directly to a UBO
// When emulating shaders on the GPU. Plus this alignment for float uniforms is necessary for doing SIMD in the shader->CPU recompilers.
alignas(16) std::array<vec4f, 96> floatUniforms;
alignas(16) std::array<std::array<u8, 4>, 4> intUniforms;
u32 boolUniform;
alignas(16) std::array<vec4f, 16> fixedAttributes; // Fixed vertex attributes
alignas(16) std::array<vec4f, 16> inputs; // Attributes passed to the shader
alignas(16) std::array<vec4f, 16> outputs;
alignas(16) vec4f dummy = vec4f({f24::zero(), f24::zero(), f24::zero(), f24::zero()}); // Dummy register used by the JIT
// We use a hashmap for matching 3DS shaders to their equivalent compiled code in our shader cache in the shader JIT
// We choose our hash type to be a 64-bit integer by default, as the collision chance is very tiny and generating it is decently optimal
// Ideally we want to be able to support multiple different types of hash depending on compilation settings, but let's get this working first
using Hash = PICAHash::HashType;
protected:
std::array<u32, 128> operandDescriptors;
@ -116,20 +130,20 @@ class PICAShader {
std::array<CallInfo, 4> callInfo;
ShaderType type;
// We use a hashmap for matching 3DS shaders to their equivalent compiled code in our shader cache in the shader JIT
// We choose our hash type to be a 64-bit integer by default, as the collision chance is very tiny and generating it is decently optimal
// Ideally we want to be able to support multiple different types of hash depending on compilation settings, but let's get this working first
using Hash = PICAHash::HashType;
Hash lastCodeHash = 0; // Last hash computed for the shader code (Used for the JIT caching mechanism)
Hash lastOpdescHash = 0; // Last hash computed for the operand descriptors (Also used for the JIT)
public:
bool uniformsDirty = false;
protected:
bool codeHashDirty = false;
bool opdescHashDirty = false;
// Add these as friend classes for the JIT so it has access to all important state
friend class ShaderJIT;
friend class ShaderEmitter;
friend class PICA::ShaderGen::ShaderDecompiler;
vec4f getSource(u32 source);
vec4f& getDest(u32 dest);
@ -151,6 +165,7 @@ class PICAShader {
void jmpc(u32 instruction);
void jmpu(u32 instruction);
void lg2(u32 instruction);
void litp(u32 instruction);
void loop(u32 instruction);
void mad(u32 instruction);
void madi(u32 instruction);
@ -220,13 +235,9 @@ class PICAShader {
public:
static constexpr size_t maxInstructionCount = 4096;
std::array<u32, maxInstructionCount> loadedShader; // Currently loaded & active shader
std::array<u32, maxInstructionCount> bufferedShader; // Shader to be transferred when the SH_CODETRANSFER_END reg gets written to
PICAShader(ShaderType type) : type(type) {}
// Theese functions are in the header to be inlined more easily, though with LTO I hope I'll be able to move them
void finalize() { std::memcpy(&loadedShader[0], &bufferedShader[0], 4096 * sizeof(u32)); }
void setBufferIndex(u32 index) { bufferIndex = index & 0xfff; }
void setOpDescriptorIndex(u32 index) { opDescriptorIndex = index & 0x7f; }
@ -235,7 +246,7 @@ class PICAShader {
Helpers::panic("o no, shader upload overflew");
}
bufferedShader[bufferIndex++] = word;
loadedShader[bufferIndex++] = word;
bufferIndex &= 0xfff;
codeHashDirty = true; // Signal the JIT if necessary that the program hash has potentially changed
@ -277,6 +288,7 @@ class PICAShader {
uniform[2] = f24::fromRaw(((floatUniformBuffer[0] & 0xff) << 16) | (floatUniformBuffer[1] >> 16));
uniform[3] = f24::fromRaw(floatUniformBuffer[0] >> 8);
}
uniformsDirty = true;
}
}
@ -288,6 +300,12 @@ class PICAShader {
u[1] = getBits<8, 8>(word);
u[2] = getBits<16, 8>(word);
u[3] = getBits<24, 8>(word);
uniformsDirty = true;
}
void uploadBoolUniform(u32 value) {
boolUniform = value;
uniformsDirty = true;
}
void run();
@ -295,4 +313,13 @@ class PICAShader {
Hash getCodeHash();
Hash getOpdescHash();
};
// Returns how big the PICA uniforms are combined. Used for hw accelerated shaders where we upload the uniforms to our GPU.
static constexpr usize totalUniformSize() { return sizeof(floatUniforms) + sizeof(intUniforms) + sizeof(boolUniform); }
void* getUniformPointer() { return static_cast<void*>(&floatUniforms); }
};
static_assert(
offsetof(PICAShader, intUniforms) == offsetof(PICAShader, floatUniforms) + 96 * sizeof(float) * 4 &&
offsetof(PICAShader, boolUniform) == offsetof(PICAShader, intUniforms) + 4 * sizeof(u8) * 4
);

View file

@ -0,0 +1,131 @@
#pragma once
#include <fmt/format.h>
#include <map>
#include <set>
#include <string>
#include <tuple>
#include <utility>
#include <vector>
#include "PICA/shader.hpp"
#include "PICA/shader_gen_types.hpp"
struct EmulatorConfig;
namespace PICA::ShaderGen {
// Control flow analysis is partially based on
// https://github.com/PabloMK7/citra/blob/d0179559466ff09731d74474322ee880fbb44b00/src/video_core/shader/generator/glsl_shader_decompiler.cpp#L33
struct ControlFlow {
// A continuous range of addresses
struct AddressRange {
u32 start, end;
AddressRange(u32 start, u32 end) : start(start), end(end) {}
// Use lexicographic comparison for functions in order to sort them in a set
bool operator<(const AddressRange& other) const { return std::tie(start, end) < std::tie(other.start, other.end); }
};
struct Function {
using Labels = std::set<u32>;
enum class ExitMode {
Unknown, // Can't guarantee whether we'll exit properly, fall back to CPU shaders (can happen with jmp shenanigans)
AlwaysReturn, // All paths reach the return point.
Conditional, // One or more code paths reach the return point or an END instruction conditionally.
AlwaysEnd, // All paths reach an END instruction.
};
u32 start; // Starting PC of the function
u32 end; // End PC of the function
Labels outLabels{}; // Labels this function can "goto" (jump) to
ExitMode exitMode = ExitMode::Unknown;
explicit Function(u32 start, u32 end) : start(start), end(end) {}
bool operator<(const Function& other) const { return AddressRange(start, end) < AddressRange(other.start, other.end); }
std::string getIdentifier() const { return fmt::format("fn_{}_{}", start, end); }
// To handle weird control flow, we have to return from each function a bool that indicates whether or not the shader reached an end
// instruction and should thus terminate. This is necessary for games like Rayman and Gravity Falls, which have "END" instructions called
// from within functions deep in the callstack
std::string getForwardDecl() const { return fmt::format("bool fn_{}_{}();\n", start, end); }
std::string getCallStatement() const { return fmt::format("fn_{}_{}()", start, end); }
};
std::set<Function> functions{};
std::map<AddressRange, Function::ExitMode> exitMap{};
// Tells us whether analysis of the shader we're trying to compile failed, in which case we'll need to fail back to shader emulation
// On the CPU
bool analysisFailed = false;
// This will recursively add all functions called by the function too, as analyzeFunction will call addFunction on control flow instructions
const Function* addFunction(const PICAShader& shader, u32 start, u32 end) {
auto searchIterator = functions.find(Function(start, end));
if (searchIterator != functions.end()) {
return &(*searchIterator);
}
// Add this function and analyze it if it doesn't already exist
Function function(start, end);
function.exitMode = analyzeFunction(shader, start, end, function.outLabels);
// This function could not be fully analyzed, report failure
if (function.exitMode == Function::ExitMode::Unknown) {
analysisFailed = true;
return nullptr;
}
// Add function to our function list
auto [it, added] = functions.insert(std::move(function));
return &(*it);
}
void analyze(const PICAShader& shader, u32 entrypoint);
Function::ExitMode analyzeFunction(const PICAShader& shader, u32 start, u32 end, Function::Labels& labels);
};
class ShaderDecompiler {
using AddressRange = ControlFlow::AddressRange;
using Function = ControlFlow::Function;
ControlFlow controlFlow{};
PICAShader& shader;
EmulatorConfig& config;
std::string decompiledShader;
u32 entrypoint;
API api;
Language language;
bool compilationError = false;
void compileInstruction(u32& pc, bool& finished);
// Compile range "range" and returns the end PC or if we're "finished" with the program (called an END instruction)
std::pair<u32, bool> compileRange(const AddressRange& range);
void callFunction(const Function& function);
const Function* findFunction(const AddressRange& range);
void writeAttributes();
std::string getSource(u32 source, u32 index) const;
std::string getDest(u32 dest) const;
std::string getSwizzlePattern(u32 swizzle) const;
std::string getDestSwizzle(u32 destinationMask) const;
const char* getCondition(u32 cond, u32 refX, u32 refY);
void setDest(u32 operandDescriptor, const std::string& dest, const std::string& value);
// Returns if the instruction uses the typical register encodings most instructions use
// With some exceptions like MAD/MADI, and the control flow instructions which are completely different
bool usesCommonEncoding(u32 instruction) const;
public:
ShaderDecompiler(PICAShader& shader, EmulatorConfig& config, u32 entrypoint, API api, Language language)
: shader(shader), entrypoint(entrypoint), config(config), api(api), language(language), decompiledShader("") {}
std::string decompile();
};
std::string decompileShader(PICAShader& shader, EmulatorConfig& config, u32 entrypoint, API api, Language language);
} // namespace PICA::ShaderGen

View file

@ -0,0 +1,43 @@
#pragma once
#include <string>
#include "PICA/gpu.hpp"
#include "PICA/pica_frag_config.hpp"
#include "PICA/pica_vert_config.hpp"
#include "PICA/regs.hpp"
#include "PICA/shader_gen_types.hpp"
#include "helpers.hpp"
namespace PICA::ShaderGen {
class FragmentGenerator {
API api;
Language language;
void compileTEV(std::string& shader, int stage, const PICA::FragmentConfig& config);
void getSource(std::string& shader, PICA::TexEnvConfig::Source source, int index, const PICA::FragmentConfig& config);
void getColorOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::ColorOperand color, int index, const PICA::FragmentConfig& config);
void getAlphaOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::AlphaOperand alpha, int index, const PICA::FragmentConfig& config);
void getColorOperation(std::string& shader, PICA::TexEnvConfig::Operation op);
void getAlphaOperation(std::string& shader, PICA::TexEnvConfig::Operation op);
void applyAlphaTest(std::string& shader, const PICA::FragmentConfig& config);
void compileLights(std::string& shader, const PICA::FragmentConfig& config);
void compileLUTLookup(std::string& shader, const PICA::FragmentConfig& config, u32 lightIndex, u32 lutID);
bool isSamplerEnabled(u32 environmentID, u32 lutID);
void compileFog(std::string& shader, const PICA::FragmentConfig& config);
void compileLogicOps(std::string& shader, const PICA::FragmentConfig& config);
public:
FragmentGenerator(API api, Language language) : api(api), language(language) {}
std::string generate(const PICA::FragmentConfig& config, void* driverInfo = nullptr);
std::string getDefaultVertexShader();
// For when PICA shader is acceleration is enabled. Turn the PICA shader source into a proper vertex shader
std::string getVertexShaderAccelerated(const std::string& picaSource, const PICA::VertConfig& vertConfig, bool usingUbershader);
void setTarget(API api, Language language) {
this->api = api;
this->language = language;
}
};
}; // namespace PICA::ShaderGen

View file

@ -0,0 +1,9 @@
#pragma once
namespace PICA::ShaderGen {
// Graphics API this shader is targetting
enum class API { GL, GLES, Vulkan };
// Shading language to use (Only GLSL for the time being)
enum class Language { GLSL };
} // namespace PICA::ShaderGen

View file

@ -2,10 +2,9 @@
#include "PICA/shader.hpp"
class ShaderUnit {
public:
PICAShader vs; // Vertex shader
PICAShader gs; // Geometry shader
public:
PICAShader vs; // Vertex shader
PICAShader gs; // Geometry shader
ShaderUnit() : vs(ShaderType::Vertex), gs(ShaderType::Geometry) {}
void reset();

100
include/align.hpp Normal file
View file

@ -0,0 +1,100 @@
// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once
#include <cstdlib>
#include "compiler_builtins.hpp"
#include "helpers.hpp"
#ifdef _WIN32
#include <malloc.h>
#endif
namespace Common {
template <typename T>
constexpr bool isAligned(T value, unsigned int alignment) {
return (value % static_cast<T>(alignment)) == 0;
}
template <typename T>
constexpr T alignUp(T value, unsigned int alignment) {
return (value + static_cast<T>(alignment - 1)) / static_cast<T>(alignment) * static_cast<T>(alignment);
}
template <typename T>
constexpr T alignDown(T value, unsigned int alignment) {
return value / static_cast<T>(alignment) * static_cast<T>(alignment);
}
template <typename T>
constexpr bool isAlignedPow2(T value, unsigned int alignment) {
return (value & static_cast<T>(alignment - 1)) == 0;
}
template <typename T>
constexpr T alignUpPow2(T value, unsigned int alignment) {
return (value + static_cast<T>(alignment - 1)) & static_cast<T>(~static_cast<T>(alignment - 1));
}
template <typename T>
constexpr T alignDownPow2(T value, unsigned int alignment) {
return value & static_cast<T>(~static_cast<T>(alignment - 1));
}
template <typename T>
constexpr bool isPow2(T value) {
return (value & (value - 1)) == 0;
}
template <typename T>
constexpr T previousPow2(T value) {
if (value == static_cast<T>(0)) return 0;
value |= (value >> 1);
value |= (value >> 2);
value |= (value >> 4);
if constexpr (sizeof(T) >= 16) value |= (value >> 8);
if constexpr (sizeof(T) >= 32) value |= (value >> 16);
if constexpr (sizeof(T) >= 64) value |= (value >> 32);
return value - (value >> 1);
}
template <typename T>
constexpr T nextPow2(T value) {
// https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
if (value == static_cast<T>(0)) return 0;
value--;
value |= (value >> 1);
value |= (value >> 2);
value |= (value >> 4);
if constexpr (sizeof(T) >= 16) value |= (value >> 8);
if constexpr (sizeof(T) >= 32) value |= (value >> 16);
if constexpr (sizeof(T) >= 64) value |= (value >> 32);
value++;
return value;
}
ALWAYS_INLINE static void* alignedMalloc(size_t size, size_t alignment) {
#ifdef _WIN32
return _aligned_malloc(size, alignment);
#else
// Unaligned sizes are slow on macOS.
#ifdef __APPLE__
if (isPow2(alignment)) size = (size + alignment - 1) & ~(alignment - 1);
#endif
void* ret = nullptr;
return (posix_memalign(&ret, alignment, size) == 0) ? ret : nullptr;
#endif
}
ALWAYS_INLINE static void alignedFree(void* ptr) {
#ifdef _MSC_VER
_aligned_free(ptr);
#else
free(ptr);
#endif
}
} // namespace Common

View file

@ -54,6 +54,15 @@ namespace Audio::AAC {
u32_le sampleCount;
};
struct DecodeRequest {
u32_le address; // Address of input AAC stream
u32_le size; // Size of input AAC stream
u32_le destAddrLeft; // Output address for left channel samples
u32_le destAddrRight; // Output address for right channel samples
u32_le unknown1;
u32_le unknown2;
};
struct Message {
u16_le mode = Mode::None; // Encode or decode AAC?
u16_le command = Command::Init;
@ -62,7 +71,9 @@ namespace Audio::AAC {
// Info on the AAC request
union {
std::array<u8, 24> commandData{};
DecodeResponse decodeResponse;
DecodeRequest decodeRequest;
};
};

View file

@ -0,0 +1,25 @@
#pragma once
#include <functional>
#include "audio/aac.hpp"
#include "helpers.hpp"
struct AAC_DECODER_INSTANCE;
namespace Audio::AAC {
class Decoder {
using DecoderHandle = AAC_DECODER_INSTANCE*;
using PaddrCallback = std::function<u8*(u32)>;
DecoderHandle decoderHandle = nullptr;
bool isInitialized() { return decoderHandle != nullptr; }
void initialize();
public:
// Decode function. Takes in a reference to the AAC response & request, and a callback for paddr -> pointer conversions
// We also allow for optionally muting the AAC output (setting all of it to 0) instead of properly decoding it, for debug/research purposes
void decode(AAC::Message& response, const AAC::Message& request, PaddrCallback paddrCallback, bool enableAudio = true);
~Decoder();
};
} // namespace Audio::AAC

View file

@ -0,0 +1,58 @@
// Copyright 2016 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <deque>
#include "audio/hle_mixer.hpp"
#include "helpers.hpp"
namespace Audio::Interpolation {
// A variable length buffer of signed PCM16 stereo samples.
using StereoBuffer16 = std::deque<std::array<s16, 2>>;
using StereoFrame16 = Audio::DSPMixer::StereoFrame<s16>;
struct State {
// Two historical samples.
std::array<s16, 2> xn1 = {}; //< x[n-1]
std::array<s16, 2> xn2 = {}; //< x[n-2]
// Current fractional position.
u64 fposition = 0;
};
/**
* No interpolation. This is equivalent to a zero-order hold. There is a two-sample predelay.
* @param state Interpolation state.
* @param input Input buffer.
* @param rate Stretch factor. Must be a positive non-zero value.
* rate > 1.0 performs decimation and rate < 1.0 performs upsampling.
* @param output The resampled audio buffer.
* @param outputi The index of output to start writing to.
*/
void none(State& state, StereoBuffer16& input, float rate, StereoFrame16& output, usize& outputi);
/**
* Linear interpolation. This is equivalent to a first-order hold. There is a two-sample predelay.
* @param state Interpolation state.
* @param input Input buffer.
* @param rate Stretch factor. Must be a positive non-zero value.
* rate > 1.0 performs decimation and rate < 1.0 performs upsampling.
* @param output The resampled audio buffer.
* @param outputi The index of output to start writing to.
*/
void linear(State& state, StereoBuffer16& input, float rate, StereoFrame16& output, usize& outputi);
/**
* Polyphase interpolation. This is currently stubbed to just perform linear interpolation
* @param state Interpolation state.
* @param input Input buffer.
* @param rate Stretch factor. Must be a positive non-zero value.
* rate > 1.0 performs decimation and rate < 1.0 performs upsampling.
* @param output The resampled audio buffer.
* @param outputi The index of output to start writing to.
*/
void polyphase(State& state, StereoBuffer16& input, float rate, StereoFrame16& output, usize& outputi);
} // namespace Audio::Interpolation

View file

@ -8,12 +8,13 @@
#include "helpers.hpp"
#include "logger.hpp"
#include "scheduler.hpp"
#include "ring_buffer.hpp"
#include "scheduler.hpp"
// The DSP core must have access to the DSP service to be able to trigger interrupts properly
class DSPService;
class Memory;
struct EmulatorConfig;
namespace Audio {
// There are 160 stereo samples in 1 audio frame, so 320 samples total
@ -24,12 +25,14 @@ namespace Audio {
static constexpr u64 lleSlice = 16384;
class DSPCore {
using Samples = Common::RingBuffer<s16, 1024>;
// 0x2000 stereo (= 2 channel) samples
using Samples = Common::RingBuffer<s16, 0x2000 * 2>;
protected:
Memory& mem;
Scheduler& scheduler;
DSPService& dspService;
EmulatorConfig& settings;
Samples sampleBuffer;
bool audioEnabled = false;
@ -38,12 +41,12 @@ namespace Audio {
public:
enum class Type { Null, Teakra, HLE };
DSPCore(Memory& mem, Scheduler& scheduler, DSPService& dspService)
: mem(mem), scheduler(scheduler), dspService(dspService) {}
DSPCore(Memory& mem, Scheduler& scheduler, DSPService& dspService, EmulatorConfig& settings)
: mem(mem), scheduler(scheduler), dspService(dspService), settings(settings) {}
virtual ~DSPCore() {}
virtual void reset() = 0;
virtual void runAudioFrame() = 0;
virtual void runAudioFrame(u64 eventTimestamp) = 0;
virtual u8* getDspMemory() = 0;
virtual u16 recvData(u32 regId) = 0;
@ -62,5 +65,5 @@ namespace Audio {
virtual void setAudioEnabled(bool enable) { audioEnabled = enable; }
};
std::unique_ptr<DSPCore> makeDSPCore(DSPCore::Type type, Memory& mem, Scheduler& scheduler, DSPService& dspService);
std::unique_ptr<DSPCore> makeDSPCore(EmulatorConfig& config, Memory& mem, Scheduler& scheduler, DSPService& dspService);
} // namespace Audio

View file

@ -324,8 +324,8 @@ namespace Audio::HLE {
BitField<15, 1, u32> outputBufferCountDirty;
BitField<16, 1, u32> masterVolumeDirty;
BitField<24, 1, u32> auxReturnVolume0Dirty;
BitField<25, 1, u32> auxReturnVolume1Dirty;
BitField<24, 1, u32> auxVolume0Dirty;
BitField<25, 1, u32> auxVolume1Dirty;
BitField<26, 1, u32> outputFormatDirty;
BitField<27, 1, u32> clippingModeDirty;
BitField<28, 1, u32> headphonesConnectedDirty;
@ -337,7 +337,7 @@ namespace Audio::HLE {
/// The DSP has three intermediate audio mixers. This controls the volume level (0.0-1.0) for
/// each at the final mixer.
float_le masterVolume;
std::array<float_le, 2> auxReturnVolume;
std::array<float_le, 2> auxVolumes;
u16_le outputBufferCount;
u16 pad1[2];
@ -422,7 +422,7 @@ namespace Audio::HLE {
struct DspStatus {
u16_le unknown;
u16_le dropped_frames;
u16_le droppedFrames;
u16 pad0[0xE];
};
ASSERT_DSP_STRUCT(DspStatus, 32);

View file

@ -0,0 +1,78 @@
#pragma once
#include "audio/hle_mixer.hpp"
#include "compiler_builtins.hpp"
#include "helpers.hpp"
#if defined(_M_AMD64) || defined(__x86_64__)
#define DSP_SIMD_X64
#include <immintrin.h>
#elif defined(_M_ARM64) || defined(__aarch64__)
#define DSP_SIMD_ARM64
#include <arm_neon.h>
#endif
// Optimized SIMD functions for mixing the stereo output of a DSP voice into a quadraphonic intermediate mix
namespace DSP::MixIntoQuad {
using IntermediateMix = Audio::DSPMixer::IntermediateMix;
using StereoFrame16 = Audio::DSPMixer::StereoFrame<s16>;
// Non-SIMD, portable algorithm
ALWAYS_INLINE static void mixPortable(IntermediateMix& mix, StereoFrame16& frame, const float* gains) {
for (usize sampleIndex = 0; sampleIndex < Audio::samplesInFrame; sampleIndex++) {
// Mono samples are in the format: (l, r)
// When converting to quad, gain0 and gain2 are applied to the left sample, gain1 and gain3 to the right one
mix[sampleIndex][0] += s32(frame[sampleIndex][0] * gains[0]);
mix[sampleIndex][1] += s32(frame[sampleIndex][1] * gains[1]);
mix[sampleIndex][2] += s32(frame[sampleIndex][0] * gains[2]);
mix[sampleIndex][3] += s32(frame[sampleIndex][1] * gains[3]);
}
}
#if defined(DSP_SIMD_X64) && (defined(__SSE4_1__) || defined(__AVX__))
ALWAYS_INLINE static void mixSSE4_1(IntermediateMix& mix, StereoFrame16& frame, const float* gains) {
__m128 gains_ = _mm_load_ps(gains);
for (usize sampleIndex = 0; sampleIndex < Audio::samplesInFrame; sampleIndex++) {
// The stereo samples, repeated every 4 bytes inside the vector register
__m128i stereoSamples = _mm_castps_si128(_mm_load1_ps((float*)&frame[sampleIndex][0]));
__m128 currentFrame = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(stereoSamples));
__m128i offset = _mm_cvttps_epi32(_mm_mul_ps(currentFrame, gains_));
__m128i intermediateMixPrev = _mm_load_si128((__m128i*)&mix[sampleIndex][0]);
__m128i result = _mm_add_epi32(intermediateMixPrev, offset);
_mm_store_si128((__m128i*)&mix[sampleIndex][0], result);
}
}
#endif
#ifdef DSP_SIMD_ARM64
ALWAYS_INLINE static void mixNEON(IntermediateMix& mix, StereoFrame16& frame, const float* gains) {
float32x4_t gains_ = vld1q_f32(gains);
for (usize sampleIndex = 0; sampleIndex < Audio::samplesInFrame; sampleIndex++) {
// Load l and r samples and repeat them every 4 bytes
int32x4_t stereoSamples = vld1q_dup_s32((s32*)&frame[sampleIndex][0]);
// Expand the bottom 4 s16 samples into an int32x4 with sign extension, then convert them to float32x4
float32x4_t currentFrame = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vreinterpretq_s16_s32(stereoSamples))));
// Multiply samples by their respective gains, truncate the result, and add it into the intermediate mix buffer
int32x4_t offset = vcvtq_s32_f32(vmulq_f32(currentFrame, gains_));
int32x4_t intermediateMixPrev = vld1q_s32((s32*)&mix[sampleIndex][0]);
int32x4_t result = vaddq_s32(intermediateMixPrev, offset);
vst1q_s32((s32*)&mix[sampleIndex][0], result);
}
}
#endif
// Mixes the stereo output of a DSP voice into a quadraphonic intermediate mix
static void mix(IntermediateMix& mix, StereoFrame16& frame, const float* gains) {
#if defined(DSP_SIMD_ARM64)
return mixNEON(mix, frame, gains);
#elif defined(DSP_SIMD_X64) && (defined(__SSE4_1__) || defined(__AVX__))
return mixSSE4_1(mix, frame, gains);
#else
return mixPortable(mix, frame, gains);
#endif
}
} // namespace DSP::MixIntoQuad

View file

@ -2,18 +2,19 @@
#include <array>
#include <cassert>
#include <deque>
#include <memory>
#include <queue>
#include <vector>
#include "audio/aac.hpp"
#include "audio/aac_decoder.hpp"
#include "audio/audio_interpolation.hpp"
#include "audio/dsp_core.hpp"
#include "audio/dsp_shared_mem.hpp"
#include "audio/hle_mixer.hpp"
#include "memory.hpp"
namespace Audio {
using SampleFormat = HLE::SourceConfiguration::Configuration::Format;
using SourceType = HLE::SourceConfiguration::Configuration::MonoOrStereo;
struct DSPSource {
// Audio buffer information
// https://www.3dbrew.org/wiki/DSP_Memory_Region
@ -33,8 +34,8 @@ namespace Audio {
SampleFormat format;
SourceType sourceType;
bool fromQueue = false; // Is this buffer from the buffer queue or an embedded buffer?
bool hasPlayedOnce = false; // Has the buffer been played at least once before?
bool fromQueue = false; // Is this buffer from the buffer queue or an embedded buffer?
bool hasPlayedOnce = false; // Has the buffer been played at least once before?
bool operator<(const Buffer& other) const {
// Lower ID = Higher priority
@ -42,17 +43,34 @@ namespace Audio {
return this->bufferID > other.bufferID;
}
};
// Buffer of decoded PCM16 samples. TODO: Are there better alternatives to use over deque?
using SampleBuffer = std::deque<std::array<s16, 2>>;
using BufferQueue = std::priority_queue<Buffer>;
using InterpolationMode = HLE::SourceConfiguration::Configuration::InterpolationMode;
using InterpolationState = Audio::Interpolation::State;
// The samples this voice output for this audio frame.
// Aligned to 4 for SIMD purposes.
alignas(4) DSPMixer::StereoFrame<s16> currentFrame;
BufferQueue buffers;
SampleFormat sampleFormat = SampleFormat::ADPCM;
SourceType sourceType = SourceType::Stereo;
InterpolationMode interpolationMode = InterpolationMode::Linear;
InterpolationState interpolationState;
// There's one gain configuration for each of the 3 intermediate mixing stages
// And each gain configuration is composed of 4 gain values, one for each sample in a quad-channel sample
// Aligned to 16 for SIMD purposes
alignas(16) std::array<std::array<float, 4>, 3> gains;
// Of the 3 intermediate mix stages, typically only the first one is actually enabled and the other ones do nothing
// Ie their gain is vec4(0.0). We track which stages are disabled (have a gain of all 0s) using this bitfield and skip them
// In order to save up on CPU time.
uint enabledMixStages = 0;
std::array<float, 3> gain0, gain1, gain2;
u32 samplePosition; // Sample number into the current audio buffer
float rateMultiplier;
u16 syncCount;
u16 currentBufferID;
u16 previousBufferID;
@ -95,22 +113,23 @@ namespace Audio {
// The audio frame types are public in case we want to use them for unit tests
public:
template <typename T, usize channelCount = 1>
using Sample = std::array<T, channelCount>;
using Sample = DSPMixer::Sample<T, channelCount>;
template <typename T, usize channelCount>
using Frame = std::array<Sample<T, channelCount>, 160>;
using Frame = DSPMixer::Frame<T, channelCount>;
template <typename T>
using MonoFrame = Frame<T, 1>;
using MonoFrame = DSPMixer::MonoFrame<T>;
template <typename T>
using StereoFrame = Frame<T, 2>;
using StereoFrame = DSPMixer::StereoFrame<T>;
template <typename T>
using QuadFrame = Frame<T, 4>;
using QuadFrame = DSPMixer::QuadFrame<T>;
using Source = Audio::DSPSource;
using SampleBuffer = Source::SampleBuffer;
using IntermediateMix = DSPMixer::IntermediateMix;
private:
enum class DSPState : u32 {
@ -127,6 +146,9 @@ namespace Audio {
std::array<Source, Audio::HLE::sourceCount> sources; // DSP voices
Audio::HLE::DspMemory dspRam;
Audio::DSPMixer mixer;
std::unique_ptr<Audio::AAC::Decoder> aacDecoder;
void resetAudioPipe();
bool loaded = false; // Have we loaded a component?
@ -142,7 +164,7 @@ namespace Audio {
} else if (counter1 == 0xffff && counter0 != 0xfffe) {
return 0;
} else {
return counter0 > counter1 ? 0 : 0;
return (counter0 > counter1) ? 0 : 1;
}
}
@ -169,9 +191,12 @@ namespace Audio {
void handleAACRequest(const AAC::Message& request);
void updateSourceConfig(Source& source, HLE::SourceConfiguration::Configuration& config, s16_le* adpcmCoefficients);
void updateMixerConfig(HLE::SharedMemory& sharedMem);
void generateFrame(StereoFrame<s16>& frame);
void generateFrame(DSPSource& source);
void outputFrame();
// Perform the final mix, mixing the quadraphonic samples from all voices into the output audio frame
void performMix(Audio::HLE::SharedMemory& readRegion, Audio::HLE::SharedMemory& writeRegion);
// Decode an entire buffer worth of audio
void decodeBuffer(DSPSource& source);
@ -181,11 +206,11 @@ namespace Audio {
SampleBuffer decodeADPCM(const u8* data, usize sampleCount, Source& source);
public:
HLE_DSP(Memory& mem, Scheduler& scheduler, DSPService& dspService);
HLE_DSP(Memory& mem, Scheduler& scheduler, DSPService& dspService, EmulatorConfig& config);
~HLE_DSP() override {}
void reset() override;
void runAudioFrame() override;
void runAudioFrame(u64 eventTimestamp) override;
u8* getDspMemory() override { return dspRam.rawMemory.data(); }
@ -199,5 +224,4 @@ namespace Audio {
void setSemaphore(u16 value) override {}
void setSemaphoreMask(u16 value) override {}
};
} // namespace Audio

View file

@ -0,0 +1,50 @@
#pragma once
#include <array>
#include "audio/dsp_shared_mem.hpp"
#include "helpers.hpp"
namespace Audio {
using SampleFormat = HLE::SourceConfiguration::Configuration::Format;
using SourceType = HLE::SourceConfiguration::Configuration::MonoOrStereo;
class DSPMixer {
public:
template <typename T, usize channelCount = 1>
using Sample = std::array<T, channelCount>;
template <typename T, usize channelCount>
using Frame = std::array<Sample<T, channelCount>, 160>;
template <typename T>
using MonoFrame = Frame<T, 1>;
template <typename T>
using StereoFrame = Frame<T, 2>;
template <typename T>
using QuadFrame = Frame<T, 4>;
// Internally the DSP uses four channels when mixing.
// Neatly, QuadFrame<s32> means that every sample is a uint32x4 value, which is particularly nice for SIMD mixing
using IntermediateMix = QuadFrame<s32>;
private:
using ChannelFormat = HLE::DspConfiguration::OutputFormat;
// The audio from each DSP voice is converted to quadraphonic and then fed into 3 intermediate mixing stages
// Two of these intermediate mixers (second and third) are used for effects, including custom effects done on the CPU
static constexpr usize mixerStageCount = 3;
public:
ChannelFormat channelFormat = ChannelFormat::Stereo;
std::array<float, mixerStageCount> volumes;
std::array<bool, 2> enableAuxStages;
void reset() {
channelFormat = ChannelFormat::Stereo;
volumes.fill(0.0);
enableAuxStages.fill(false);
}
};
} // namespace Audio

View file

@ -3,29 +3,39 @@
#include <string>
#include <vector>
#include "config.hpp"
#include "helpers.hpp"
#include "miniaudio.h"
#include "ring_buffer.hpp"
class MiniAudioDevice {
using Samples = Common::RingBuffer<ma_int16, 1024>;
using Samples = Common::RingBuffer<ma_int16, 0x2000 * 2>;
static constexpr ma_uint32 sampleRate = 32768; // 3DS sample rate
static constexpr ma_uint32 channelCount = 2; // Audio output is stereo
ma_device device;
ma_context context;
ma_device_config deviceConfig;
ma_device device;
ma_resampler resampler;
Samples* samples = nullptr;
const AudioDeviceConfig& audioSettings;
bool initialized = false;
bool running = false;
// Store the last stereo sample we output. We play this when underruning to avoid pops.
std::array<s16, 2> lastStereoSample;
std::vector<std::string> audioDevices;
public:
MiniAudioDevice();
MiniAudioDevice(const AudioDeviceConfig& audioSettings);
// If safe is on, we create a null audio device
void init(Samples& samples, bool safe = false);
void close();
void start();
void stop();
bool isInitialized() const { return initialized; }
};

View file

@ -20,14 +20,14 @@ namespace Audio {
std::array<u8, Memory::DSP_RAM_SIZE> dspRam;
void resetAudioPipe();
bool loaded = false; // Have we loaded a component?
bool loaded = false; // Have we loaded a component?
public:
NullDSP(Memory& mem, Scheduler& scheduler, DSPService& dspService) : DSPCore(mem, scheduler, dspService) {}
NullDSP(Memory& mem, Scheduler& scheduler, DSPService& dspService, EmulatorConfig& config) : DSPCore(mem, scheduler, dspService, config) {}
~NullDSP() override {}
void reset() override;
void runAudioFrame() override;
void runAudioFrame(u64 eventTimestamp) override;
u8* getDspMemory() override { return dspRam.data(); }

View file

@ -77,13 +77,13 @@ namespace Audio {
}
public:
TeakraDSP(Memory& mem, Scheduler& scheduler, DSPService& dspService);
TeakraDSP(Memory& mem, Scheduler& scheduler, DSPService& dspService, EmulatorConfig& config);
~TeakraDSP() override {}
void reset() override;
// Run 1 slice of DSP instructions and schedule the next audio frame
void runAudioFrame() override {
void runAudioFrame(u64 eventTimestamp) override {
runSlice();
scheduler.addEvent(Scheduler::EventType::RunDSP, scheduler.currentTimestamp + Audio::lleSlice * 2);
}

View file

@ -1,8 +1,35 @@
#pragma once
#include <filesystem>
#include <string>
#include "audio/dsp_core.hpp"
#include "frontend_settings.hpp"
#include "renderer.hpp"
#include "services/region_codes.hpp"
struct AudioDeviceConfig {
// Audio curve to use for volumes between 0-100
enum class VolumeCurve : int {
Cubic = 0, // Samples are scaled by volume ^ 3
Linear = 1, // Samples are scaled by volume
};
float volumeRaw = 1.0f;
VolumeCurve volumeCurve = VolumeCurve::Cubic;
bool muteAudio = false;
float getVolume() const {
if (muteAudio) {
return 0.0f;
}
return volumeRaw;
}
static VolumeCurve volumeCurveFromString(std::string inString);
static const char* volumeCurveToString(VolumeCurve curve);
};
// Remember to initialize every field here to its default value otherwise bad things will happen
struct EmulatorConfig {
@ -13,27 +40,80 @@ struct EmulatorConfig {
static constexpr bool shaderJitDefault = false;
#endif
// For now, use specialized shaders by default on MacOS as M1 drivers are buggy when using the ubershader, and on Android since mobile GPUs are
// horrible. On other platforms we default to ubershader + shadergen fallback for lights
#if defined(__ANDROID__) || defined(__APPLE__)
static constexpr bool ubershaderDefault = false;
#else
static constexpr bool ubershaderDefault = true;
#endif
static constexpr bool accelerateShadersDefault = true;
#if defined(__LIBRETRO__)
static constexpr bool audioEnabledDefault = true;
#else
static constexpr bool audioEnabledDefault = false;
#endif
bool shaderJitEnabled = shaderJitDefault;
bool useUbershaders = ubershaderDefault;
bool accelerateShaders = accelerateShadersDefault;
bool accurateShaderMul = false;
bool discordRpcEnabled = false;
// Toggles whether to force shadergen when there's more than N lights active and we're using the ubershader, for better performance
bool forceShadergenForLights = true;
int lightShadergenThreshold = 1;
RendererType rendererType = RendererType::OpenGL;
Audio::DSPCore::Type dspType = Audio::DSPCore::Type::Null;
Audio::DSPCore::Type dspType = Audio::DSPCore::Type::HLE;
bool sdCardInserted = true;
bool sdWriteProtected = false;
bool usePortableBuild = false;
bool audioEnabled = false;
bool audioEnabled = audioEnabledDefault;
bool vsyncEnabled = true;
bool aacEnabled = true; // Enable AAC audio?
bool enableRenderdoc = false;
bool printAppVersion = true;
bool printDSPFirmware = false;
bool chargerPlugged = true;
// Default to 3% battery to make users suffer
int batteryPercentage = 3;
LanguageCodes systemLanguage = LanguageCodes::English;
// Default ROM path to open in Qt and misc frontends
std::filesystem::path defaultRomPath = "";
std::filesystem::path filePath;
// Frontend window settings
struct WindowSettings {
static constexpr int defaultX = 200;
static constexpr int defaultY = 200;
static constexpr int defaultWidth = 800;
static constexpr int defaultHeight = 240 * 2;
bool rememberPosition = false; // Remember window position & size
bool showAppVersion = false;
int x = defaultX;
int y = defaultY;
int width = defaultHeight;
int height = defaultHeight;
};
WindowSettings windowSettings;
AudioDeviceConfig audioDeviceConfig;
FrontendSettings frontendSettings;
EmulatorConfig(const std::filesystem::path& path);
void load();
void save();
static LanguageCodes languageCodeFromString(std::string inString);
static const char* languageCodeToString(LanguageCodes code);
};

View file

@ -1,20 +1,29 @@
#pragma once
#include <array>
#include <cstring>
#include <cstdint>
#include <climits>
#include <cstdint>
#include <cstring>
#include <filesystem>
#include <optional>
#include <vector>
#include "helpers.hpp"
#include "io_file.hpp"
#include "swap.hpp"
namespace Crypto {
constexpr std::size_t AesKeySize = 0x10;
constexpr usize AesKeySize = 0x10;
using AESKey = std::array<u8, AesKeySize>;
template <std::size_t N>
static std::array<u8, N> rolArray(const std::array<u8, N>& value, std::size_t bits) {
struct Seed {
u64_le titleID;
AESKey seed;
std::array<u8, 8> pad;
};
template <usize N>
static std::array<u8, N> rolArray(const std::array<u8, N>& value, usize bits) {
const auto bitWidth = N * CHAR_BIT;
bits %= bitWidth;
@ -24,18 +33,18 @@ namespace Crypto {
std::array<u8, N> result;
for (std::size_t i = 0; i < N; i++) {
for (usize i = 0; i < N; i++) {
result[i] = ((value[(i + byteShift) % N] << bitShift) | (value[(i + byteShift + 1) % N] >> (CHAR_BIT - bitShift))) & UINT8_MAX;
}
return result;
}
template <std::size_t N>
template <usize N>
static std::array<u8, N> addArray(const std::array<u8, N>& a, const std::array<u8, N>& b) {
std::array<u8, N> result;
std::size_t sum = 0;
std::size_t carry = 0;
usize sum = 0;
usize carry = 0;
for (std::int64_t i = N - 1; i >= 0; i--) {
sum = a[i] + b[i] + carry;
@ -46,11 +55,11 @@ namespace Crypto {
return result;
}
template <std::size_t N>
template <usize N>
static std::array<u8, N> xorArray(const std::array<u8, N>& a, const std::array<u8, N>& b) {
std::array<u8, N> result;
for (std::size_t i = 0; i < N; i++) {
for (usize i = 0; i < N; i++) {
result[i] = a[i] ^ b[i];
}
@ -63,7 +72,7 @@ namespace Crypto {
}
AESKey rawKey;
for (std::size_t i = 0; i < rawKey.size(); i++) {
for (usize i = 0; i < rawKey.size(); i++) {
rawKey[i] = static_cast<u8>(std::stoi(hex.substr(i * 2, 2), 0, 16));
}
@ -76,7 +85,7 @@ namespace Crypto {
std::optional<AESKey> normalKey = std::nullopt;
};
enum KeySlotId : std::size_t {
enum KeySlotId : usize {
NCCHKey0 = 0x2C,
NCCHKey1 = 0x25,
NCCHKey2 = 0x18,
@ -84,14 +93,17 @@ namespace Crypto {
};
class AESEngine {
private:
constexpr static std::size_t AesKeySlotCount = 0x40;
private:
constexpr static usize AesKeySlotCount = 0x40;
std::optional<AESKey> m_generator = std::nullopt;
std::array<AESKeySlot, AesKeySlotCount> m_slots;
bool keysLoaded = false;
constexpr void updateNormalKey(std::size_t slotId) {
std::vector<Seed> seeds;
IOFile seedDatabase;
constexpr void updateNormalKey(usize slotId) {
if (m_generator.has_value() && hasKeyX(slotId) && hasKeyY(slotId)) {
auto& keySlot = m_slots.at(slotId);
AESKey keyX = keySlot.keyX.value();
@ -101,13 +113,17 @@ namespace Crypto {
}
}
public:
public:
AESEngine() {}
void loadKeys(const std::filesystem::path& path);
void setSeedPath(const std::filesystem::path& path);
// Returns true on success, false on failure
bool loadSeeds();
bool haveKeys() { return keysLoaded; }
bool haveGenerator() { return m_generator.has_value(); }
constexpr bool hasKeyX(std::size_t slotId) {
constexpr bool hasKeyX(usize slotId) {
if (slotId >= AesKeySlotCount) {
return false;
}
@ -115,18 +131,16 @@ namespace Crypto {
return m_slots.at(slotId).keyX.has_value();
}
constexpr AESKey getKeyX(std::size_t slotId) {
return m_slots.at(slotId).keyX.value_or(AESKey{});
}
constexpr AESKey getKeyX(usize slotId) { return m_slots.at(slotId).keyX.value_or(AESKey{}); }
constexpr void setKeyX(std::size_t slotId, const AESKey &key) {
constexpr void setKeyX(usize slotId, const AESKey& key) {
if (slotId < AesKeySlotCount) {
m_slots.at(slotId).keyX = key;
updateNormalKey(slotId);
}
}
constexpr bool hasKeyY(std::size_t slotId) {
constexpr bool hasKeyY(usize slotId) {
if (slotId >= AesKeySlotCount) {
return false;
}
@ -134,18 +148,16 @@ namespace Crypto {
return m_slots.at(slotId).keyY.has_value();
}
constexpr AESKey getKeyY(std::size_t slotId) {
return m_slots.at(slotId).keyY.value_or(AESKey{});
}
constexpr AESKey getKeyY(usize slotId) { return m_slots.at(slotId).keyY.value_or(AESKey{}); }
constexpr void setKeyY(std::size_t slotId, const AESKey &key) {
constexpr void setKeyY(usize slotId, const AESKey& key) {
if (slotId < AesKeySlotCount) {
m_slots.at(slotId).keyY = key;
updateNormalKey(slotId);
}
}
constexpr bool hasNormalKey(std::size_t slotId) {
constexpr bool hasNormalKey(usize slotId) {
if (slotId >= AesKeySlotCount) {
return false;
}
@ -153,14 +165,14 @@ namespace Crypto {
return m_slots.at(slotId).normalKey.has_value();
}
constexpr AESKey getNormalKey(std::size_t slotId) {
return m_slots.at(slotId).normalKey.value_or(AESKey{});
}
constexpr AESKey getNormalKey(usize slotId) { return m_slots.at(slotId).normalKey.value_or(AESKey{}); }
constexpr void setNormalKey(std::size_t slotId, const AESKey &key) {
constexpr void setNormalKey(usize slotId, const AESKey& key) {
if (slotId < AesKeySlotCount) {
m_slots.at(slotId).normalKey = key;
}
}
std::optional<AESKey> getSeedFromDB(u64 titleID);
};
}
} // namespace Crypto

View file

@ -17,6 +17,8 @@ namespace Discord {
void init();
void update(RPCStatus status, const std::string& title);
void stop();
bool running() const { return enabled; }
};
} // namespace Discord

View file

@ -66,7 +66,6 @@ class Emulator {
#ifdef PANDA3DS_ENABLE_DISCORD_RPC
Discord::RPC discordRpc;
#endif
void setAudioEnabled(bool enable);
void updateDiscord();
// Keep the handle for the ROM here to reload when necessary and to prevent deleting it
@ -90,7 +89,6 @@ class Emulator {
~Emulator();
void step();
void render();
void reset(ReloadOption reload);
void runFrame();
// Poll the scheduler for events
@ -99,6 +97,7 @@ class Emulator {
void resume(); // Resume the emulator
void pause(); // Pause the emulator
void togglePause();
void setAudioEnabled(bool enable);
bool loadAmiibo(const std::filesystem::path& path);
bool loadROM(const std::filesystem::path& path);
@ -118,6 +117,9 @@ class Emulator {
void setOutputSize(u32 width, u32 height) { gpu.setOutputSize(width, height); }
void deinitGraphicsContext() { gpu.deinitGraphicsContext(); }
// Reloads some settings that require special handling, such as audio enable
void reloadSettings();
EmulatorConfig& getConfig() { return config; }
Cheats& getCheats() { return cheats; }
ServiceManager& getServiceManager() { return kernel.getServiceManager(); }
@ -135,4 +137,7 @@ class Emulator {
std::filesystem::path getAppDataRoot();
std::span<u8> getSMDH();
private:
void loadRenderdoc();
};

View file

@ -0,0 +1,34 @@
#pragma once
#include <string>
// Some UI settings that aren't fully frontend-dependent. Note: Not all frontends will support the same settings.
// Note: Any enums should ideally be ordered in the same order we want to show them in UI dropdown menus, so that we can cast indices to enums
// directly.
struct FrontendSettings {
enum class Theme : int {
System = 0,
Light = 1,
Dark = 2,
GreetingsCat = 3,
Cream = 4,
};
// Different panda-themed window icons
enum class WindowIcon : int {
Rpog = 0,
Rsyn = 1,
Rnap = 2,
Rcow = 3,
SkyEmu = 4,
};
Theme theme = Theme::Dark;
WindowIcon icon = WindowIcon::Rpog;
std::string language = "en";
static Theme themeFromString(std::string inString);
static const char* themeToString(Theme theme);
static WindowIcon iconFromString(std::string inString);
static const char* iconToString(WindowIcon icon);
};

View file

@ -7,6 +7,7 @@
#include <string>
#include <type_traits>
#include <vector>
#include "helpers.hpp"
#include "memory.hpp"
#include "result.hpp"
@ -15,13 +16,13 @@
using Result::HorizonResult;
namespace PathType {
enum : u32 {
Invalid = 0,
Empty = 1,
Binary = 2,
ASCII = 3,
UTF16 = 4,
};
enum : u32 {
Invalid = 0,
Empty = 1,
Binary = 2,
ASCII = 3,
UTF16 = 4,
};
}
namespace ArchiveID {
@ -34,91 +35,103 @@ namespace ArchiveID {
SDMC = 9,
SDMCWriteOnly = 0xA,
CardSPI = 0x12345679,
SavedataAndNcch = 0x2345678A,
// 3DBrew: This is the same as the regular SaveData archive, except with this the savedata ID and mediatype is loaded from the input archive
// lowpath.
UserSaveData1 = 0x567890B2,
// 3DBrew: Similar to 0x567890B2 but can only access Accessible Save specified in exheader?
UserSaveData2 = 0x567890B4,
TwlPhoto = 0x567890AC,
TwlSound = 0x567890AD,
};
static std::string toString(u32 id) {
switch (id) {
case SelfNCCH: return "SelfNCCH";
case SaveData: return "SaveData";
case ExtSaveData: return "ExtSaveData";
case SharedExtSaveData: return "SharedExtSaveData";
case SystemSaveData: return "SystemSaveData";
case SDMC: return "SDMC";
case SDMCWriteOnly: return "SDMC (Write-only)";
case SavedataAndNcch: return "Savedata & NCCH (archive 0x2345678A)";
default: return "Unknown archive";
}
}
}
static std::string toString(u32 id) {
switch (id) {
case SelfNCCH: return "SelfNCCH";
case SaveData: return "SaveData";
case ExtSaveData: return "ExtSaveData";
case SharedExtSaveData: return "SharedExtSaveData";
case SystemSaveData: return "SystemSaveData";
case SDMC: return "SDMC";
case SDMCWriteOnly: return "SDMC (Write-only)";
case SavedataAndNcch: return "Savedata & NCCH (archive 0x2345678A)";
case TwlPhoto: return "TWL_PHOTO";
case TwlSound: return "TWL_SOUND";
default: return "Unknown archive";
}
}
} // namespace ArchiveID
struct FSPath {
u32 type = PathType::Invalid;
u32 type = PathType::Invalid;
std::vector<u8> binary; // Path data for binary paths
std::string string; // Path data for ASCII paths
std::u16string utf16_string;
std::vector<u8> binary; // Path data for binary paths
std::string string; // Path data for ASCII paths
std::u16string utf16_string;
FSPath() {}
FSPath() {}
FSPath(u32 type, const std::vector<u8>& vec) : type(type) {
switch (type) {
case PathType::Binary:
binary = std::move(vec);
break;
FSPath(u32 type, const std::vector<u8>& vec) : type(type) {
switch (type) {
case PathType::Binary: binary = std::move(vec); break;
case PathType::ASCII:
string.resize(vec.size() - 1); // -1 because of the null terminator
std::memcpy(string.data(), vec.data(), vec.size() - 1); // Copy string data
break;
case PathType::ASCII:
string.resize(vec.size() - 1); // -1 because of the null terminator
std::memcpy(string.data(), vec.data(), vec.size() - 1); // Copy string data
break;
case PathType::UTF16: {
const size_t size = vec.size() / sizeof(u16) - 1; // Character count. -1 because null terminator here too
utf16_string.resize(size);
std::memcpy(utf16_string.data(), vec.data(), size * sizeof(u16));
break;
}
; }
}
case PathType::UTF16: {
const size_t size = vec.size() / sizeof(u16) - 1; // Character count. -1 because null terminator here too
utf16_string.resize(size);
std::memcpy(utf16_string.data(), vec.data(), size * sizeof(u16));
break;
};
}
}
bool isUTF16() const { return type == PathType::UTF16; }
bool isASCII() const { return type == PathType::ASCII; }
bool isBinary() const { return type == PathType::Binary; }
// This is not called "isEmpty()" to make obvious that we're talking about an empty-type path, NOT an empty text path
bool isEmptyType() const { return type == PathType::Empty; }
bool isTextPath() const { return isUTF16() || isASCII(); }
};
struct FilePerms {
u32 raw;
u32 raw;
FilePerms(u32 val) : raw(val) {}
bool read() const { return (raw & 1) != 0; }
bool write() const { return (raw & 2) != 0; }
bool create() const { return (raw & 4) != 0; }
FilePerms(u32 val) : raw(val) {}
bool read() const { return (raw & 1) != 0; }
bool write() const { return (raw & 2) != 0; }
bool create() const { return (raw & 4) != 0; }
};
class ArchiveBase;
struct FileSession {
ArchiveBase* archive = nullptr;
FILE* fd = nullptr; // File descriptor for file sessions that require them.
FSPath path;
FSPath archivePath;
u32 priority = 0; // TODO: What does this even do
bool isOpen;
ArchiveBase* archive = nullptr;
FILE* fd = nullptr; // File descriptor for file sessions that require them.
FSPath path;
FSPath archivePath;
u32 priority = 0; // TODO: What does this even do
bool isOpen;
FileSession(ArchiveBase* archive, const FSPath& filePath, const FSPath& archivePath, FILE* fd, bool isOpen = true) :
archive(archive), path(filePath), archivePath(archivePath), fd(fd), isOpen(isOpen), priority(0) {}
FileSession(ArchiveBase* archive, const FSPath& filePath, const FSPath& archivePath, FILE* fd, bool isOpen = true)
: archive(archive), path(filePath), archivePath(archivePath), fd(fd), isOpen(isOpen), priority(0) {}
// For cloning a file session
FileSession(const FileSession& other) : archive(other.archive), path(other.path),
archivePath(other.archivePath), fd(other.fd), isOpen(other.isOpen), priority(other.priority) {}
// For cloning a file session
FileSession(const FileSession& other)
: archive(other.archive), path(other.path), archivePath(other.archivePath), fd(other.fd), isOpen(other.isOpen), priority(other.priority) {}
};
struct ArchiveSession {
ArchiveBase* archive = nullptr;
FSPath path;
bool isOpen;
ArchiveBase* archive = nullptr;
FSPath path;
bool isOpen;
ArchiveSession(ArchiveBase* archive, const FSPath& filePath, bool isOpen = true) : archive(archive), path(filePath), isOpen(isOpen) {}
ArchiveSession(ArchiveBase* archive, const FSPath& filePath, bool isOpen = true) : archive(archive), path(filePath), isOpen(isOpen) {}
};
struct DirectoryEntry {
@ -156,106 +169,125 @@ struct DirectorySession {
using FileDescriptor = std::optional<FILE*>;
class ArchiveBase {
public:
struct FormatInfo {
u32 size; // Archive size
u32 numOfDirectories; // Number of directories
u32 numOfFiles; // Number of files
bool duplicateData; // Whether to duplicate data or not
};
public:
struct FormatInfo {
u32 size; // Archive size
u32 numOfDirectories; // Number of directories
u32 numOfFiles; // Number of files
bool duplicateData; // Whether to duplicate data or not
};
protected:
using Handle = u32;
protected:
using Handle = u32;
static constexpr FileDescriptor NoFile = nullptr;
static constexpr FileDescriptor FileError = std::nullopt;
Memory& mem;
static constexpr FileDescriptor NoFile = nullptr;
static constexpr FileDescriptor FileError = std::nullopt;
Memory& mem;
// Returns if a specified 3DS path in UTF16 or ASCII format is safe or not
// A 3DS path is considered safe if its first character is '/' which means we're not trying to access anything outside the root of the fs
// And if it doesn't contain enough instances of ".." (Indicating "climb up a folder" in filesystems) to let the software climb up the directory tree
// And access files outside of the emulator's app data folder
template <u32 format>
bool isPathSafe(const FSPath& path) {
static_assert(format == PathType::ASCII || format == PathType::UTF16);
using String = typename std::conditional<format == PathType::UTF16, std::u16string, std::string>::type; // String type for the path
using Char = typename String::value_type; // Char type for the path
// Returns if a specified 3DS path in UTF16 or ASCII format is safe or not
// A 3DS path is considered safe if its first character is '/' which means we're not trying to access anything outside the root of the fs
// And if it doesn't contain enough instances of ".." (Indicating "climb up a folder" in filesystems) to let the software climb up the directory
// tree And access files outside of the emulator's app data folder
template <u32 format>
bool isPathSafe(const FSPath& path) {
static_assert(format == PathType::ASCII || format == PathType::UTF16);
using String = typename std::conditional<format == PathType::UTF16, std::u16string, std::string>::type; // String type for the path
using Char = typename String::value_type; // Char type for the path
String pathString, dots;
if constexpr (std::is_same<String, std::u16string>()) {
pathString = path.utf16_string;
dots = u"..";
} else {
pathString = path.string;
dots = "..";
}
String pathString, dots;
if constexpr (std::is_same<String, std::u16string>()) {
pathString = path.utf16_string;
dots = u"..";
} else {
pathString = path.string;
dots = "..";
}
// If the path string doesn't begin with / then that means it's accessing outside the FS root, which is invalid & unsafe
if (pathString[0] != Char('/')) return false;
// If the path string doesn't begin with / then that means it's accessing outside the FS root, which is invalid & unsafe
if (pathString[0] != Char('/')) return false;
// Counts how many folders sans the root our file is nested under.
// If it's < 0 at any point of parsing, then the path is unsafe and tries to crawl outside our file sandbox.
// If it's 0 then this is the FS root.
// If it's > 0 then we're in a subdirectory of the root.
int level = 0;
// Counts how many folders sans the root our file is nested under.
// If it's < 0 at any point of parsing, then the path is unsafe and tries to crawl outside our file sandbox.
// If it's 0 then this is the FS root.
// If it's > 0 then we're in a subdirectory of the root.
int level = 0;
// Split the string on / characters and see how many of the substrings are ".."
size_t pos = 0;
while ((pos = pathString.find(Char('/'))) != String::npos) {
String token = pathString.substr(0, pos);
pathString.erase(0, pos + 1);
// Split the string on / characters and see how many of the substrings are ".."
size_t pos = 0;
while ((pos = pathString.find(Char('/'))) != String::npos) {
String token = pathString.substr(0, pos);
pathString.erase(0, pos + 1);
if (token == dots) {
level--;
if (level < 0) return false;
} else {
level++;
}
}
if (token == dots) {
level--;
if (level < 0) return false;
} else {
level++;
}
}
return true;
}
return true;
}
public:
virtual std::string name() = 0;
virtual u64 getFreeBytes() = 0;
virtual HorizonResult createFile(const FSPath& path, u64 size) = 0;
virtual HorizonResult deleteFile(const FSPath& path) = 0;
public:
virtual std::string name() = 0;
virtual u64 getFreeBytes() = 0;
virtual HorizonResult createFile(const FSPath& path, u64 size) = 0;
virtual HorizonResult deleteFile(const FSPath& path) = 0;
virtual Rust::Result<FormatInfo, HorizonResult> getFormatInfo(const FSPath& path) {
Helpers::panic("Unimplemented GetFormatInfo for %s archive", name().c_str());
// Return a dummy struct just to avoid the UB of not returning anything, even if we panic
return Ok(FormatInfo{ .size = 0, .numOfDirectories = 0, .numOfFiles = 0, .duplicateData = false });
}
virtual Rust::Result<FormatInfo, HorizonResult> getFormatInfo(const FSPath& path) {
Helpers::panic("Unimplemented GetFormatInfo for %s archive", name().c_str());
// Return a dummy struct just to avoid the UB of not returning anything, even if we panic
return Ok(FormatInfo{.size = 0, .numOfDirectories = 0, .numOfFiles = 0, .duplicateData = false});
}
virtual HorizonResult createDirectory(const FSPath& path) {
Helpers::panic("Unimplemented CreateDirectory for %s archive", name().c_str());
return Result::FS::AlreadyExists;
}
virtual HorizonResult createDirectory(const FSPath& path) {
Helpers::panic("Unimplemented CreateDirectory for %s archive", name().c_str());
return Result::FS::AlreadyExists;
}
// Returns nullopt if opening the file failed, otherwise returns a file descriptor to it (nullptr if none is needed)
virtual FileDescriptor openFile(const FSPath& path, const FilePerms& perms) = 0;
virtual Rust::Result<ArchiveBase*, HorizonResult> openArchive(const FSPath& path) = 0;
// Returns nullopt if opening the file failed, otherwise returns a file descriptor to it (nullptr if none is needed)
virtual FileDescriptor openFile(const FSPath& path, const FilePerms& perms) = 0;
virtual Rust::Result<ArchiveBase*, HorizonResult> openArchive(const FSPath& path) = 0;
virtual Rust::Result<DirectorySession, HorizonResult> openDirectory(const FSPath& path) {
Helpers::panic("Unimplemented OpenDirectory for %s archive", name().c_str());
return Err(Result::FS::FileNotFoundAlt);
}
virtual Rust::Result<DirectorySession, HorizonResult> openDirectory(const FSPath& path) {
Helpers::panic("Unimplemented OpenDirectory for %s archive", name().c_str());
return Err(Result::FS::FileNotFoundAlt);
}
virtual void format(const FSPath& path, const FormatInfo& info) {
Helpers::panic("Unimplemented Format for %s archive", name().c_str());
}
virtual void format(const FSPath& path, const FormatInfo& info) { Helpers::panic("Unimplemented Format for %s archive", name().c_str()); }
virtual HorizonResult renameFile(const FSPath& oldPath, const FSPath& newPath) {
virtual HorizonResult renameFile(const FSPath& oldPath, const FSPath& newPath) {
Helpers::panic("Unimplemented RenameFile for %s archive", name().c_str());
return Result::Success;
}
}
// Read size bytes from a file starting at offset "offset" into a certain buffer in memory
// Returns the number of bytes read, or nullopt if the read failed
virtual std::optional<u32> readFile(FileSession* file, u64 offset, u32 size, u32 dataPointer) = 0;
// Read size bytes from a file starting at offset "offset" into a certain buffer in memory
// Returns the number of bytes read, or nullopt if the read failed
virtual std::optional<u32> readFile(FileSession* file, u64 offset, u32 size, u32 dataPointer) = 0;
ArchiveBase(Memory& mem) : mem(mem) {}
ArchiveBase(Memory& mem) : mem(mem) {}
bool isSafeTextPath(const FSPath& path) {
if (path.type == PathType::UTF16) {
return isPathSafe<PathType::UTF16>(path);
} else if (path.type == PathType::ASCII){
return isPathSafe<PathType::ASCII>(path);
}
Helpers::panic("ArchiveBase::IsSafeTextPath: Invalid path");
}
// Appends a 3DS path to an std::filesystem::path
void appendPath(std::filesystem::path& diskPath, const FSPath& guestPath) {
if (guestPath.type == PathType::UTF16) {
diskPath += std::filesystem::path(guestPath.utf16_string).make_preferred();
} else if (guestPath.type == PathType::ASCII) {
diskPath += std::filesystem::path(guestPath.string).make_preferred();
} else [[unlikely]] {
Helpers::panic("ArchiveBase::AppendPath: Invalid 3DS path");
}
}
};
struct ArchiveResource {

View file

@ -0,0 +1,30 @@
#pragma once
#include "archive_base.hpp"
#include "result/result.hpp"
using Result::HorizonResult;
class CardSPIArchive : public ArchiveBase {
public:
CardSPIArchive(Memory& mem) : ArchiveBase(mem) {}
std::string name() override { return "Card SPI"; }
u64 getFreeBytes() override {
Helpers::warn("Unimplemented GetFreeBytes for Card SPI archive");
return 0_MB;
}
HorizonResult createDirectory(const FSPath& path) override;
HorizonResult createFile(const FSPath& path, u64 size) override;
HorizonResult deleteFile(const FSPath& path) override;
Rust::Result<ArchiveBase*, HorizonResult> openArchive(const FSPath& path) override;
Rust::Result<DirectorySession, HorizonResult> openDirectory(const FSPath& path) override;
FileDescriptor openFile(const FSPath& path, const FilePerms& perms) override;
std::optional<u32> readFile(FileSession* file, u64 offset, u32 size, u32 dataPointer) override {
Helpers::panic("Unimplemented ReadFile for Card SPI archive");
return {};
};
};

View file

@ -0,0 +1,30 @@
#pragma once
#include "archive_base.hpp"
#include "result/result.hpp"
using Result::HorizonResult;
class TWLPhotoArchive : public ArchiveBase {
public:
TWLPhotoArchive(Memory& mem) : ArchiveBase(mem) {}
std::string name() override { return "TWL_PHOTO"; }
u64 getFreeBytes() override {
Helpers::warn("Unimplemented GetFreeBytes for TWLPhoto archive");
return 32_MB;
}
HorizonResult createDirectory(const FSPath& path) override;
HorizonResult createFile(const FSPath& path, u64 size) override;
HorizonResult deleteFile(const FSPath& path) override;
Rust::Result<ArchiveBase*, HorizonResult> openArchive(const FSPath& path) override;
Rust::Result<DirectorySession, HorizonResult> openDirectory(const FSPath& path) override;
FileDescriptor openFile(const FSPath& path, const FilePerms& perms) override;
std::optional<u32> readFile(FileSession* file, u64 offset, u32 size, u32 dataPointer) override {
Helpers::panic("Unimplemented ReadFile for TWL_PHOTO archive");
return {};
};
};

View file

@ -0,0 +1,30 @@
#pragma once
#include "archive_base.hpp"
#include "result/result.hpp"
using Result::HorizonResult;
class TWLSoundArchive : public ArchiveBase {
public:
TWLSoundArchive(Memory& mem) : ArchiveBase(mem) {}
std::string name() override { return "TWL_SOUND"; }
u64 getFreeBytes() override {
Helpers::warn("Unimplemented GetFreeBytes for TWLSound archive");
return 32_MB;
}
HorizonResult createDirectory(const FSPath& path) override;
HorizonResult createFile(const FSPath& path, u64 size) override;
HorizonResult deleteFile(const FSPath& path) override;
Rust::Result<ArchiveBase*, HorizonResult> openArchive(const FSPath& path) override;
Rust::Result<DirectorySession, HorizonResult> openDirectory(const FSPath& path) override;
FileDescriptor openFile(const FSPath& path, const FilePerms& perms) override;
std::optional<u32> readFile(FileSession* file, u64 offset, u32 size, u32 dataPointer) override {
Helpers::panic("Unimplemented ReadFile for TWL_SOUND archive");
return {};
};
};

View file

@ -4,7 +4,6 @@
#include <cstdint>
#include <iostream>
#include <iterator>
#include <sstream>
#include <string>
#include <vector>
#include <memory>
@ -162,19 +161,6 @@ namespace Helpers {
return std::bit_cast<To, From>(from);
}
#endif
static std::vector<std::string> split(const std::string& s, const char c) {
std::istringstream tmp(s);
std::vector<std::string> result(1);
while (std::getline(tmp, *result.rbegin(), c)) {
result.emplace_back();
}
// Remove temporary slot
result.pop_back();
return result;
}
}; // namespace Helpers
// UDLs for memory size values

View file

@ -2,8 +2,19 @@
#include <cstdint>
namespace IPC {
namespace BufferType {
enum : std::uint32_t {
Send = 1,
Receive = 2,
};
}
constexpr std::uint32_t responseHeader(std::uint32_t commandID, std::uint32_t normalResponses, std::uint32_t translateResponses) {
// TODO: Maybe validate the response count stuff fits in 6 bits
return (commandID << 16) | (normalResponses << 6) | translateResponses;
}
}
constexpr std::uint32_t pointerHeader(std::uint32_t index, std::uint32_t size, std::uint32_t type) {
return (size << 14) | (index << 10) | (type << 1);
}
} // namespace IPC

View file

@ -8,6 +8,7 @@ namespace ConfigMem {
KernelVersionMajor = 0x1FF80003,
SyscoreVer = 0x1FF80010,
EnvInfo = 0x1FF80014,
PrevFirm = 0x1FF80016,
AppMemAlloc = 0x1FF80040,
FirmUnknown = 0x1FF80060,
FirmRevision = 0x1FF80061,
@ -30,6 +31,11 @@ namespace ConfigMem {
// Shows what type of hardware we're running on
namespace HardwareCodes {
enum : u8 { Product = 1, Devboard = 2, Debugger = 3, Capture = 4 };
enum : u8 {
Product = 1,
Devboard = 2,
Debugger = 3,
Capture = 4,
};
}
} // namespace ConfigMem

View file

@ -1,7 +1,7 @@
#pragma once
#include "helpers.hpp"
using Handle = u32;
using HorizonHandle = u32;
namespace KernelHandles {
enum : u32 {
@ -20,6 +20,7 @@ namespace KernelHandles {
CFG_U, // CFG service (Console & region info)
CFG_I,
CFG_S, // Used by most system apps in lieu of cfg:u
CFG_NOR, // Used by system settings app
CSND, // Plays audio directly from PCM samples
DLP_SRVR, // Download Play: Server. Used for network play.
DSP, // DSP service (Used for audio decoding and output)
@ -38,11 +39,14 @@ namespace KernelHandles {
NIM, // Updates, DLC, etc
NDM, // ?????
NS_S, // Nintendo Shell service
NWM_EXT, // ?????
NWM_UDS, // Local multiplayer
NEWS_U, // This service literally has 1 command (AddNotification) and I don't even understand what it does
NEWS_S, // news:u on steroids
NEWS_U, // This service literally has 1 command (AddNotification)
PTM_U, // PTM service (Used for accessing various console info, such as battery, shell and pedometer state)
PTM_SYSM, // PTM system service
PTM_PLAY, // PTM Play service, used for retrieving play history
PTM_GETS, // PTM RTC service (GetSystemTime)
SOC, // Socket service
SSL, // SSL service (Totally didn't expect that)
Y2R, // Also does camera stuff
@ -61,17 +65,17 @@ namespace KernelHandles {
};
// Returns whether "handle" belongs to one of the OS services
static constexpr bool isServiceHandle(Handle handle) {
static constexpr bool isServiceHandle(HorizonHandle handle) {
return handle >= MinServiceHandle && handle <= MaxServiceHandle;
}
// Returns whether "handle" belongs to one of the OS services' shared memory areas
static constexpr bool isSharedMemHandle(Handle handle) {
static constexpr bool isSharedMemHandle(HorizonHandle handle) {
return handle >= MinSharedMemHandle && handle <= MaxSharedMemHandle;
}
// Returns the name of a handle as a string based on the given handle
static const char* getServiceName(Handle handle) {
static const char* getServiceName(HorizonHandle handle) {
switch (handle) {
case AC: return "AC";
case ACT: return "ACT";
@ -82,6 +86,8 @@ namespace KernelHandles {
case CECD: return "CECD";
case CFG_U: return "CFG:U";
case CFG_I: return "CFG:I";
case CFG_S: return "CFG:S";
case CFG_NOR: return "CFG:NOR";
case CSND: return "CSND";
case DSP: return "DSP";
case DLP_SRVR: return "DLP::SRVR";
@ -97,13 +103,16 @@ namespace KernelHandles {
case MCU_HWC: return "MCU::HWC";
case MIC: return "MIC";
case NDM: return "NDM";
case NEWS_S: return "NEWS_S";
case NEWS_U: return "NEWS_U";
case NWM_EXT: return "nwm::EXT";
case NWM_UDS: return "nwm::UDS";
case NFC: return "NFC";
case NIM: return "NIM";
case PTM_U: return "PTM:U";
case PTM_SYSM: return "PTM:SYSM";
case PTM_PLAY: return "PTM:PLAY";
case PTM_GETS: return "PTM:GETS";
case SOC: return "SOC";
case SSL: return "SSL";
case Y2R: return "Y2R";

View file

@ -18,6 +18,8 @@ class CPU;
struct Scheduler;
class Kernel {
using Handle = HorizonHandle;
std::span<u32, 16> regs;
CPU& cpu;
Memory& mem;

View file

@ -47,7 +47,7 @@ enum class ProcessorID : s32 {
struct AddressArbiter {};
struct ResourceLimits {
Handle handle;
HorizonHandle handle;
s32 currentCommit = 0;
};
@ -91,6 +91,8 @@ struct Port {
};
struct Session {
using Handle = HorizonHandle;
Handle portHandle; // The port this session is subscribed to
Session(Handle portHandle) : portHandle(portHandle) {}
};
@ -109,6 +111,8 @@ enum class ThreadStatus {
};
struct Thread {
using Handle = HorizonHandle;
u32 initialSP; // Initial r13 value
u32 entrypoint; // Initial r15 value
u32 priority;
@ -161,6 +165,8 @@ static const char* kernelObjectTypeToString(KernelObjectType t) {
}
struct Mutex {
using Handle = HorizonHandle;
u64 waitlist; // Refer to the getWaitlist function below for documentation
Handle ownerThread = 0; // Index of the thread that holds the mutex if it's locked
Handle handle; // Handle of the mutex itself
@ -203,6 +209,8 @@ struct MemoryBlock {
// Generic kernel object class
struct KernelObject {
using Handle = HorizonHandle;
Handle handle = 0; // A u32 the OS will use to identify objects
void* data = nullptr;
KernelObjectType type;

View file

@ -50,6 +50,7 @@ struct NCCH {
static constexpr u64 mediaUnit = 0x200;
u64 size = 0; // Size of NCCH converted to bytes
u64 saveDataSize = 0;
u32 stackSize = 0;
u32 bssSize = 0;
u32 exheaderSize = 0;
@ -64,8 +65,6 @@ struct NCCH {
// Contents of the .code file in the ExeFS
std::vector<u8> codeFile;
// Contains of the cart's save data
std::vector<u8> saveData;
// The cart region. Only the CXI's region matters to us. Necessary to get past region locking
std::optional<Regions> region = std::nullopt;
std::vector<u8> smdh;
@ -78,7 +77,7 @@ struct NCCH {
bool hasExeFS() { return exeFS.size != 0; }
bool hasRomFS() { return romFS.size != 0; }
bool hasCode() { return codeFile.size() != 0; }
bool hasSaveData() { return saveData.size() != 0; }
bool hasSaveData() { return saveDataSize != 0; }
// Parse SMDH for region info and such. Returns false on failure, true on success
bool parseSMDH(const std::vector<u8> &smdh);

View file

@ -65,6 +65,7 @@ namespace Log {
static Logger<false> nwmUdsLogger;
static Logger<false> nimLogger;
static Logger<false> ndmLogger;
static Logger<false> nsLogger;
static Logger<false> ptmLogger;
static Logger<false> socLogger;
static Logger<false> sslLogger;

View file

@ -102,6 +102,8 @@ namespace KernelMemoryTypes {
}
class Memory {
using Handle = HorizonHandle;
u8* fcram;
u8* dspRam; // Provided to us by Audio
u8* vram; // Provided to the memory class by the GPU class
@ -213,8 +215,14 @@ private:
}
enum class BatteryLevel {
Empty = 0, AlmostEmpty, OneBar, TwoBars, ThreeBars, FourBars
Empty = 0,
AlmostEmpty,
OneBar,
TwoBars,
ThreeBars,
FourBars,
};
u8 getBatteryState(bool adapterConnected, bool charging, BatteryLevel batteryLevel) {
u8 value = static_cast<u8>(batteryLevel) << 2; // Bits 2:4 are the battery level from 0 to 5
if (adapterConnected) value |= 1 << 0; // Bit 0 shows if the charger is connected
@ -290,5 +298,5 @@ private:
bool allocateMainThreadStack(u32 size);
Regions getConsoleRegion();
void copySharedFont(u8* ptr);
void copySharedFont(u8* ptr, u32 vaddr);
};

View file

@ -1,6 +1,13 @@
#pragma once
#include <QAction>
#include <QCheckBox>
#include <QDialog>
#include <QLabel>
#include <QLineEdit>
#include <QListWidget>
#include <QPushButton>
#include <QTextEdit>
#include <QWidget>
#include <filesystem>
#include <memory>
@ -24,3 +31,60 @@ class CheatsWindow final : public QWidget {
std::filesystem::path cheatPath;
Emulator* emu;
};
struct CheatMetadata {
u32 handle = Cheats::badCheatHandle;
std::string name = "New cheat";
std::string code;
bool enabled = true;
};
class CheatEntryWidget : public QWidget {
Q_OBJECT
public:
CheatEntryWidget(Emulator* emu, CheatMetadata metadata, QListWidget* parent);
void Update() {
name->setText(metadata.name.c_str());
enabled->setChecked(metadata.enabled);
update();
}
void Remove() {
emu->getCheats().removeCheat(metadata.handle);
cheatList->takeItem(cheatList->row(listItem));
deleteLater();
}
const CheatMetadata& getMetadata() { return metadata; }
void setMetadata(const CheatMetadata& metadata) { this->metadata = metadata; }
private:
void checkboxChanged(int state);
void editClicked();
Emulator* emu;
CheatMetadata metadata;
u32 handle;
QLabel* name;
QCheckBox* enabled;
QListWidget* cheatList;
QListWidgetItem* listItem;
};
class CheatEditDialog : public QDialog {
Q_OBJECT
public:
CheatEditDialog(Emulator* emu, CheatEntryWidget& cheatEntry);
void accepted();
void rejected();
private:
Emulator* emu;
CheatEntryWidget& cheatEntry;
QTextEdit* codeEdit;
QLineEdit* nameEdit;
};

View file

@ -1,30 +1,58 @@
#pragma once
#include <QApplication>
#include <QCheckBox>
#include <QComboBox>
#include <QDialog>
#include <QListWidget>
#include <QPalette>
#include <QStackedWidget>
#include <QTextEdit>
#include <QWidget>
#include <QtWidgets>
#include <array>
#include <functional>
#include <utility>
#include "emulator.hpp"
#include "frontend_settings.hpp"
class ConfigWindow : public QDialog {
Q_OBJECT
private:
enum class Theme : int {
System = 0,
Light = 1,
Dark = 2,
GreetingsCat = 3,
Cream = 4,
};
using ConfigCallback = std::function<void()>;
using MainWindowCallback = std::function<QWidget*()>;
Theme currentTheme;
QComboBox* themeSelect = nullptr;
using Theme = FrontendSettings::Theme;
using WindowIcon = FrontendSettings::WindowIcon;
void setTheme(Theme theme);
QTextEdit* helpText = nullptr;
QListWidget* widgetList = nullptr;
QStackedWidget* widgetContainer = nullptr;
static constexpr size_t settingWidgetCount = 6;
std::array<QString, settingWidgetCount> helpTexts;
// The config class holds a copy of the emulator config which it edits and sends
// over to the emulator in a thread-safe manner
EmulatorConfig config;
ConfigCallback updateConfig;
MainWindowCallback getMainWindow;
void addWidget(QWidget* widget, QString title, QString icon, QString helpText);
void setTheme(FrontendSettings::Theme theme);
void setIcon(FrontendSettings::WindowIcon icon);
QComboBox* createLanguageSelect();
public:
ConfigWindow(QWidget* parent = nullptr);
ConfigWindow(ConfigCallback configCallback, MainWindowCallback windowCallback, const EmulatorConfig& config, QWidget* parent = nullptr);
~ConfigWindow();
EmulatorConfig& getConfig() { return config; }
private:
Emulator* emu;
};

View file

@ -50,6 +50,8 @@ class MainWindow : public QMainWindow {
PressTouchscreen,
ReleaseTouchscreen,
ReloadUbershader,
SetScreenSize,
UpdateConfig,
};
// Tagged union representing our message queue messages
@ -81,6 +83,11 @@ class MainWindow : public QMainWindow {
u16 x;
u16 y;
} touchscreen;
struct {
u32 width;
u32 height;
} screenSize;
};
};
@ -95,7 +102,7 @@ class MainWindow : public QMainWindow {
QMenuBar* menuBar = nullptr;
InputMappings keyboardMappings;
ScreenWidget screen;
ScreenWidget* screen;
AboutWindow* aboutWindow;
ConfigWindow* configWindow;
CheatsWindow* cheatsEditor;
@ -116,12 +123,15 @@ class MainWindow : public QMainWindow {
void showAboutMenu();
void initControllers();
void pollControllers();
void setupControllerSensors(SDL_GameController* controller);
void sendMessage(const EmulatorMessage& message);
void dispatchMessage(const EmulatorMessage& message);
void loadTranslation();
// Tracks whether we are using an OpenGL-backed renderer or a Vulkan-backed renderer
bool usingGL = false;
bool usingVk = false;
bool usingMtl = false;
// Variables to keep track of whether the user is controlling the 3DS analog stick with their keyboard
// This is done so when a gamepad is connected, we won't automatically override the 3DS analog stick settings with the gamepad's state
@ -133,12 +143,18 @@ class MainWindow : public QMainWindow {
MainWindow(QApplication* app, QWidget* parent = nullptr);
~MainWindow();
void closeEvent(QCloseEvent* event) override;
void keyPressEvent(QKeyEvent* event) override;
void keyReleaseEvent(QKeyEvent* event) override;
void mousePressEvent(QMouseEvent* event) override;
void mouseReleaseEvent(QMouseEvent* event) override;
void mouseMoveEvent(QMouseEvent* event) override;
void loadLuaScript(const std::string& code);
void reloadShader(const std::string& shader);
void editCheat(u32 handle, const std::vector<uint8_t>& cheat, const std::function<void(u32)>& callback);
void handleScreenResize(u32 width, u32 height);
void handleTouchscreenPress(QMouseEvent* event);
};

View file

@ -1,5 +1,6 @@
#pragma once
#include <QWidget>
#include <functional>
#include <memory>
#include "gl/context.h"
@ -10,15 +11,28 @@ class ScreenWidget : public QWidget {
Q_OBJECT
public:
ScreenWidget(QWidget* parent = nullptr);
using ResizeCallback = std::function<void(u32, u32)>;
ScreenWidget(ResizeCallback resizeCallback, QWidget* parent = nullptr);
void resizeEvent(QResizeEvent* event) override;
// Called by the emulator thread for resizing the actual GL surface, since the emulator thread owns the GL context
void resizeSurface(u32 width, u32 height);
GL::Context* getGLContext() { return glContext.get(); }
// Dimensions of our output surface
u32 surfaceWidth = 0;
u32 surfaceHeight = 0;
WindowInfo windowInfo;
// Cached "previous" dimensions, used when resizing our window
u32 previousWidth = 0;
u32 previousHeight = 0;
private:
std::unique_ptr<GL::Context> glContext = nullptr;
ResizeCallback resizeCallback;
bool createGLContext();
qreal devicePixelRatioFromScreen() const;

View file

@ -23,6 +23,8 @@ class FrontendSDL {
SDL_GameController* gameController = nullptr;
InputMappings keyboardMappings;
u32 windowWidth = 400;
u32 windowHeight = 480;
int gameControllerID;
bool programRunning = true;
@ -35,4 +37,6 @@ class FrontendSDL {
// And so the user can still use the keyboard to control the analog
bool keyboardAnalogX = false;
bool keyboardAnalogY = false;
void setupControllerSensors(SDL_GameController* controller);
};

73
include/renderdoc.hpp Normal file
View file

@ -0,0 +1,73 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <string>
#include "helpers.hpp"
#ifdef PANDA3DS_ENABLE_RENDERDOC
namespace Renderdoc {
// Loads renderdoc dynamic library module.
void loadRenderdoc();
// Begins a capture if a renderdoc instance is attached.
void startCapture();
// Ends current renderdoc capture.
void endCapture();
// Triggers capturing process.
void triggerCapture();
// Sets output directory for captures
void setOutputDir(const std::string& path, const std::string& prefix);
// Returns whether Renderdoc has been loaded
bool isLoaded();
// Returns whether we've compiled with Renderdoc support
static constexpr bool isSupported() { return true; }
} // namespace Renderdoc
#else
namespace Renderdoc {
static void loadRenderdoc() {}
static void startCapture() { Helpers::panic("Tried to start a Renderdoc capture while support for renderdoc is disabled"); }
static void endCapture() { Helpers::panic("Tried to end a Renderdoc capture while support for renderdoc is disabled"); }
static void triggerCapture() { Helpers::panic("Tried to trigger a Renderdoc capture while support for renderdoc is disabled"); }
static void setOutputDir(const std::string& path, const std::string& prefix) {}
static constexpr bool isSupported() { return false; }
static constexpr bool isLoaded() { return false; }
} // namespace Renderdoc
#endif
namespace Renderdoc {
// RAII scope class that encloses a Renderdoc capture, as long as it's triggered by triggerCapture
struct Scope {
Scope() { Renderdoc::startCapture(); }
~Scope() { Renderdoc::endCapture(); }
Scope(const Scope&) = delete;
Scope& operator=(const Scope&) = delete;
Scope(Scope&&) = delete;
Scope& operator=(const Scope&&) = delete;
};
// RAII scope class that encloses a Renderdoc capture. Unlike regular Scope it doesn't wait for a trigger, it will always issue the capture
// trigger on its own and take a capture
struct InstantScope {
InstantScope() {
Renderdoc::triggerCapture();
Renderdoc::startCapture();
}
~InstantScope() { Renderdoc::endCapture(); }
InstantScope(const InstantScope&) = delete;
InstantScope& operator=(const InstantScope&) = delete;
InstantScope(InstantScope&&) = delete;
InstantScope& operator=(const InstantScope&&) = delete;
};
} // namespace Renderdoc

View file

@ -1,9 +1,10 @@
#pragma once
#include <array>
#include <optional>
#include <span>
#include <string>
#include <optional>
#include "PICA/draw_acceleration.hpp"
#include "PICA/pica_vertex.hpp"
#include "PICA/regs.hpp"
#include "helpers.hpp"
@ -17,12 +18,16 @@ enum class RendererType : s8 {
Null = 0,
OpenGL = 1,
Vulkan = 2,
Software = 3,
Metal = 3,
Software = 4,
};
class GPU;
struct EmulatorConfig;
struct SDL_Window;
class GPU;
class ShaderUnit;
class Renderer {
protected:
GPU& gpu;
@ -46,6 +51,8 @@ class Renderer {
u32 outputWindowWidth = 400;
u32 outputWindowHeight = 240 * 2;
EmulatorConfig* emulatorConfig = nullptr;
public:
Renderer(GPU& gpu, const std::array<u32, regNum>& internalRegs, const std::array<u32, extRegNum>& externalRegs);
virtual ~Renderer();
@ -74,6 +81,16 @@ class Renderer {
virtual std::string getUbershader() { return ""; }
virtual void setUbershader(const std::string& shader) {}
// Only relevant for OpenGL renderer and other OpenGL-based backends (eg software)
// Called to notify the core to use OpenGL ES and not desktop GL
virtual void setupGLES() {}
// This function is called on every draw call before parsing vertex data.
// It is responsible for things like looking up which vertex/fragment shaders to use, recompiling them if they don't exist, choosing between
// ubershaders and shadergen, and so on.
// Returns whether this draw is eligible for using hardware-accelerated shaders or if shaders should run on the CPU
virtual bool prepareForDraw(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel) { return false; }
// Functions for initializing the graphics context for the Qt frontend, where we don't have the convenience of SDL_Window
#ifdef PANDA3DS_FRONTEND_QT
virtual void initGraphicsContext(GL::Context* context) { Helpers::panic("Tried to initialize incompatible renderer with GL context"); }
@ -99,4 +116,6 @@ class Renderer {
outputWindowWidth = width;
outputWindowHeight = height;
}
void setConfig(EmulatorConfig* config) { emulatorConfig = config; }
};

View file

@ -0,0 +1,13 @@
#pragma once
// Information about our OpenGL/OpenGL ES driver that we should keep track of
// Stuff like whether specific extensions are supported, and potentially things like OpenGL context information
namespace OpenGL {
struct Driver {
bool usingGLES = false;
bool supportsExtFbFetch = false;
bool supportsArmFbFetch = false;
bool supportFbFetch() const { return supportsExtFbFetch || supportsArmFbFetch; }
};
} // namespace OpenGL

View file

@ -38,11 +38,14 @@ struct GLStateManager {
GLuint stencilMask;
GLuint boundVAO;
GLuint boundVBO;
GLuint currentProgram;
GLuint boundUBO;
GLenum depthFunc;
GLenum logicOp;
GLenum blendEquationRGB, blendEquationAlpha;
GLenum blendFuncSourceRGB, blendFuncSourceAlpha;
GLenum blendFuncDestRGB, blendFuncDestAlpha;
void reset();
void resetBlend();
@ -51,7 +54,7 @@ struct GLStateManager {
void resetColourMask();
void resetDepth();
void resetVAO();
void resetVBO();
void resetBuffers();
void resetProgram();
void resetScissor();
void resetStencil();
@ -169,13 +172,6 @@ struct GLStateManager {
}
}
void bindVBO(GLuint handle) {
if (boundVBO != handle) {
boundVBO = handle;
glBindBuffer(GL_ARRAY_BUFFER, handle);
}
}
void useProgram(GLuint handle) {
if (currentProgram != handle) {
currentProgram = handle;
@ -183,8 +179,14 @@ struct GLStateManager {
}
}
void bindUBO(GLuint handle) {
if (boundUBO != handle) {
boundUBO = handle;
glBindBuffer(GL_UNIFORM_BUFFER, boundUBO);
}
}
void bindVAO(const OpenGL::VertexArray& vao) { bindVAO(vao.handle()); }
void bindVBO(const OpenGL::VertexBuffer& vbo) { bindVBO(vbo.handle()); }
void useProgram(const OpenGL::Program& program) { useProgram(program.handle()); }
void setColourMask(bool r, bool g, bool b, bool a) {
@ -224,6 +226,41 @@ struct GLStateManager {
}
void setDepthFunc(OpenGL::DepthFunc func) { setDepthFunc(static_cast<GLenum>(func)); }
// Counterpart to glBlendEquationSeparate
void setBlendEquation(GLenum modeRGB, GLenum modeAlpha) {
if (blendEquationRGB != modeRGB || blendEquationAlpha != modeAlpha) {
blendEquationRGB = modeRGB;
blendEquationAlpha = modeAlpha;
glBlendEquationSeparate(modeRGB, modeAlpha);
}
}
// Counterpart to glBlendFuncSeparate
void setBlendFunc(GLenum sourceRGB, GLenum destRGB, GLenum sourceAlpha, GLenum destAlpha) {
if (blendFuncSourceRGB != sourceRGB || blendFuncDestRGB != destRGB || blendFuncSourceAlpha != sourceAlpha ||
blendFuncDestAlpha != destAlpha) {
blendFuncSourceRGB = sourceRGB;
blendFuncDestRGB = destRGB;
blendFuncSourceAlpha = sourceAlpha;
blendFuncDestAlpha = destAlpha;
glBlendFuncSeparate(sourceRGB, destRGB,sourceAlpha, destAlpha);
}
}
// Counterpart to regular glBlendEquation
void setBlendEquation(GLenum mode) { setBlendEquation(mode, mode); }
void setBlendEquation(OpenGL::BlendEquation modeRGB, OpenGL::BlendEquation modeAlpha) {
setBlendEquation(static_cast<GLenum>(modeRGB), static_cast<GLenum>(modeAlpha));
}
void setBlendEquation(OpenGL::BlendEquation mode) {
setBlendEquation(static_cast<GLenum>(mode));
}
};
static_assert(std::is_trivially_constructible<GLStateManager>(), "OpenGL State Manager class is not trivially constructible!");

View file

@ -1,11 +1,23 @@
#pragma once
#include <array>
#include <cstring>
#include <functional>
#include <memory>
#include <optional>
#include <span>
#include <unordered_map>
#include <utility>
#include "PICA/float_types.hpp"
#include "PICA/pica_frag_config.hpp"
#include "PICA/pica_hash.hpp"
#include "PICA/pica_vert_config.hpp"
#include "PICA/pica_vertex.hpp"
#include "PICA/regs.hpp"
#include "PICA/shader_gen.hpp"
#include "gl/stream_buffer.h"
#include "gl_driver.hpp"
#include "gl_state.hpp"
#include "helpers.hpp"
#include "logger.hpp"
@ -22,27 +34,48 @@ class RendererGL final : public Renderer {
OpenGL::Program triangleProgram;
OpenGL::Program displayProgram;
OpenGL::VertexArray vao;
// VAO for when not using accelerated vertex shaders. Contains attribute declarations matching to the PICA fixed function fragment attributes
OpenGL::VertexArray defaultVAO;
// VAO for when using accelerated vertex shaders. The PICA vertex shader inputs are passed as attributes without CPU processing.
OpenGL::VertexArray hwShaderVAO;
OpenGL::VertexBuffer vbo;
// TEV configuration uniform locations
GLint textureEnvSourceLoc = -1;
GLint textureEnvOperandLoc = -1;
GLint textureEnvCombinerLoc = -1;
GLint textureEnvColorLoc = -1;
GLint textureEnvScaleLoc = -1;
// Data
struct {
// TEV configuration uniform locations
GLint textureEnvSourceLoc = -1;
GLint textureEnvOperandLoc = -1;
GLint textureEnvCombinerLoc = -1;
GLint textureEnvColorLoc = -1;
GLint textureEnvScaleLoc = -1;
// Uniform of PICA registers
GLint picaRegLoc = -1;
// Uniform of PICA registers
GLint picaRegLoc = -1;
// Depth configuration uniform locations
GLint depthOffsetLoc = -1;
GLint depthScaleLoc = -1;
GLint depthmapEnableLoc = -1;
// Depth configuration uniform locations
GLint depthOffsetLoc = -1;
GLint depthScaleLoc = -1;
GLint depthmapEnableLoc = -1;
} ubershaderData;
float oldDepthScale = -1.0;
float oldDepthOffset = 0.0;
bool oldDepthmapEnable = false;
// Set by prepareForDraw, tells us whether the current draw is using hw-accelerated shader
bool usingAcceleratedShader = false;
bool performIndexedRender = false;
bool usingShortIndices = false;
// Set by prepareForDraw, metadata for indexed renders
GLuint minimumIndex = 0;
GLuint maximumIndex = 0;
void* hwIndexBufferOffset = nullptr;
// When doing hw shaders, we cache which attributes are enabled in our VAO to avoid having to enable/disable all attributes on each draw
u32 previousAttributeMask = 0;
// Cached pointer to the current vertex shader when using HW accelerated shaders
OpenGL::Shader* generatedVertexShader = nullptr;
SurfaceCache<DepthBuffer, 16, true> depthBufferCache;
SurfaceCache<ColourBuffer, 16, true> colourBufferCache;
@ -53,25 +86,82 @@ class RendererGL final : public Renderer {
OpenGL::VertexBuffer dummyVBO;
OpenGL::Texture screenTexture;
GLuint lightLUTTextureArray;
OpenGL::Texture LUTTexture;
OpenGL::Framebuffer screenFramebuffer;
OpenGL::Texture blankTexture;
// The "default" vertex shader to use when using specialized shaders but not PICA vertex shader -> GLSL recompilation
// We can compile this once and then link it with all other generated fragment shaders
OpenGL::Shader defaultShadergenVs;
GLuint shadergenFragmentUBO;
// UBO for uploading the PICA uniforms when using hw shaders
GLuint hwShaderUniformUBO;
using StreamBuffer = OpenGLStreamBuffer;
std::unique_ptr<StreamBuffer> hwVertexBuffer;
std::unique_ptr<StreamBuffer> hwIndexBuffer;
// Cache of fixed attribute values so that we don't do any duplicate updates
std::array<std::array<float, 4>, 16> fixedAttrValues;
// Cached recompiled fragment shader
struct CachedProgram {
OpenGL::Program program;
};
struct ShaderCache {
std::unordered_map<PICA::VertConfig, std::optional<OpenGL::Shader>> vertexShaderCache;
std::unordered_map<PICA::FragmentConfig, OpenGL::Shader> fragmentShaderCache;
// Program cache indexed by GLuints for the vertex and fragment shader to use
// Top 32 bits are the vertex shader GLuint, bottom 32 bits are the fs GLuint
std::unordered_map<u64, CachedProgram> programCache;
void clear() {
for (auto& it : programCache) {
CachedProgram& cachedProgram = it.second;
cachedProgram.program.free();
}
for (auto& it : vertexShaderCache) {
if (it.second.has_value()) {
it.second->free();
}
}
for (auto& it : fragmentShaderCache) {
it.second.free();
}
programCache.clear();
vertexShaderCache.clear();
fragmentShaderCache.clear();
}
};
ShaderCache shaderCache;
OpenGL::Framebuffer getColourFBO();
OpenGL::Texture getTexture(Texture& tex);
OpenGL::Program& getSpecializedShader();
PICA::ShaderGen::FragmentGenerator fragShaderGen;
OpenGL::Driver driverInfo;
MAKE_LOG_FUNCTION(log, rendererLogger)
void setupBlending();
void setupStencilTest(bool stencilEnable);
void bindDepthBuffer();
void setupTextureEnvState();
void setupUbershaderTexEnv();
void bindTexturesToSlots();
void updateLightingLUT();
void updateFogLUT();
void initGraphicsContextInternal();
void accelerateVertexUpload(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel);
void compileDisplayShader();
public:
RendererGL(GPU& gpu, const std::array<u32, regNum>& internalRegs, const std::array<u32, extRegNum>& externalRegs)
: Renderer(gpu, internalRegs, externalRegs) {}
: Renderer(gpu, internalRegs, externalRegs), fragShaderGen(PICA::ShaderGen::API::GL, PICA::ShaderGen::Language::GLSL) {}
~RendererGL() override;
void reset() override;
@ -80,12 +170,14 @@ class RendererGL final : public Renderer {
void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override; // Clear a GPU buffer in VRAM
void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override; // Perform display transfer
void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override;
void drawVertices(PICA::PrimType primType, std::span<const PICA::Vertex> vertices) override; // Draw the given vertices
void drawVertices(PICA::PrimType primType, std::span<const PICA::Vertex> vertices) override; // Draw the given vertices
void deinitGraphicsContext() override;
virtual bool supportsShaderReload() override { return true; }
virtual std::string getUbershader() override;
virtual void setUbershader(const std::string& shader) override;
virtual bool prepareForDraw(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel) override;
virtual void setupGLES() override;
std::optional<ColourBuffer> getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true);
@ -100,4 +192,4 @@ class RendererGL final : public Renderer {
// Take a screenshot of the screen and store it in a file
void screenshot(const std::string& name) override;
};
};

View file

@ -19,8 +19,6 @@ template <typename SurfaceType, size_t capacity, bool evictOnOverflow = false>
class SurfaceCache {
// Vanilla std::optional can't hold actual references
using OptionalRef = std::optional<std::reference_wrapper<SurfaceType>>;
static_assert(std::is_same<SurfaceType, ColourBuffer>() || std::is_same<SurfaceType, DepthBuffer>() ||
std::is_same<SurfaceType, Texture>(), "Invalid surface type");
size_t size;
size_t evictionIndex;

View file

@ -0,0 +1,74 @@
#pragma once
#include <map>
#include "objc_helper.hpp"
#include "pica_to_mtl.hpp"
using namespace PICA;
namespace Metal {
struct BlitPipelineHash {
// Formats
ColorFmt colorFmt;
DepthFmt depthFmt;
};
// This pipeline only caches the pipeline with all of its color and depth attachment variations
class BlitPipelineCache {
public:
BlitPipelineCache() = default;
~BlitPipelineCache() {
reset();
vertexFunction->release();
fragmentFunction->release();
}
void set(MTL::Device* dev, MTL::Function* vert, MTL::Function* frag) {
device = dev;
vertexFunction = vert;
fragmentFunction = frag;
}
MTL::RenderPipelineState* get(BlitPipelineHash hash) {
u8 intHash = ((u8)hash.colorFmt << 3) | (u8)hash.depthFmt;
auto& pipeline = pipelineCache[intHash];
if (!pipeline) {
MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init();
desc->setVertexFunction(vertexFunction);
desc->setFragmentFunction(fragmentFunction);
auto colorAttachment = desc->colorAttachments()->object(0);
colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt));
desc->setDepthAttachmentPixelFormat(toMTLPixelFormatDepth(hash.depthFmt));
NS::Error* error = nullptr;
desc->setLabel(toNSString("Blit pipeline"));
pipeline = device->newRenderPipelineState(desc, &error);
if (error) {
Helpers::panic("Error creating blit pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding));
}
desc->release();
}
return pipeline;
}
void reset() {
for (auto& pair : pipelineCache) {
pair.second->release();
}
pipelineCache.clear();
}
private:
std::map<u8, MTL::RenderPipelineState*> pipelineCache;
MTL::Device* device;
MTL::Function* vertexFunction;
MTL::Function* fragmentFunction;
};
} // namespace Metal

View file

@ -0,0 +1,56 @@
#pragma once
#include <Metal/Metal.hpp>
namespace Metal {
struct RenderState {
MTL::RenderPipelineState* renderPipelineState = nullptr;
MTL::DepthStencilState* depthStencilState = nullptr;
MTL::Texture* textures[3] = {nullptr};
MTL::SamplerState* samplerStates[3] = {nullptr};
};
class CommandEncoder {
public:
void newRenderCommandEncoder(MTL::RenderCommandEncoder* rce) {
renderCommandEncoder = rce;
// Reset the render state
renderState = RenderState{};
}
// Resource binding
void setRenderPipelineState(MTL::RenderPipelineState* renderPipelineState) {
if (renderPipelineState != renderState.renderPipelineState) {
renderCommandEncoder->setRenderPipelineState(renderPipelineState);
renderState.renderPipelineState = renderPipelineState;
}
}
void setDepthStencilState(MTL::DepthStencilState* depthStencilState) {
if (depthStencilState != renderState.depthStencilState) {
renderCommandEncoder->setDepthStencilState(depthStencilState);
renderState.depthStencilState = depthStencilState;
}
}
void setFragmentTexture(MTL::Texture* texture, u32 index) {
if (texture != renderState.textures[index]) {
renderCommandEncoder->setFragmentTexture(texture, index);
renderState.textures[index] = texture;
}
}
void setFragmentSamplerState(MTL::SamplerState* samplerState, u32 index) {
if (samplerState != renderState.samplerStates[index]) {
renderCommandEncoder->setFragmentSamplerState(samplerState, index);
renderState.samplerStates[index] = samplerState;
}
}
private:
MTL::RenderCommandEncoder* renderCommandEncoder = nullptr;
RenderState renderState;
};
} // namespace Metal

View file

@ -0,0 +1,6 @@
#pragma once
#include <Metal/Metal.hpp>
#define GET_HELPER_TEXTURE_BINDING(binding) (30 - binding)
#define GET_HELPER_SAMPLER_STATE_BINDING(binding) (15 - binding)

View file

@ -0,0 +1,80 @@
#pragma once
#include <map>
#include "pica_to_mtl.hpp"
using namespace PICA;
namespace Metal {
struct DepthStencilHash {
u32 stencilConfig;
u16 stencilOpConfig;
bool depthStencilWrite;
u8 depthFunc;
};
class DepthStencilCache {
public:
DepthStencilCache() = default;
~DepthStencilCache() { reset(); }
void set(MTL::Device* dev) { device = dev; }
MTL::DepthStencilState* get(DepthStencilHash hash) {
u64 intHash =
((u64)hash.depthStencilWrite << 56) | ((u64)hash.depthFunc << 48) | ((u64)hash.stencilConfig << 16) | (u64)hash.stencilOpConfig;
auto& depthStencilState = depthStencilCache[intHash];
if (!depthStencilState) {
MTL::DepthStencilDescriptor* desc = MTL::DepthStencilDescriptor::alloc()->init();
desc->setDepthWriteEnabled(hash.depthStencilWrite);
desc->setDepthCompareFunction(toMTLCompareFunc(hash.depthFunc));
const bool stencilEnable = Helpers::getBit<0>(hash.stencilConfig);
MTL::StencilDescriptor* stencilDesc = nullptr;
if (stencilEnable) {
const u8 stencilFunc = Helpers::getBits<4, 3>(hash.stencilConfig);
const u8 stencilRefMask = Helpers::getBits<24, 8>(hash.stencilConfig);
const u32 stencilBufferMask = hash.depthStencilWrite ? Helpers::getBits<8, 8>(hash.stencilConfig) : 0;
const u8 stencilFailOp = Helpers::getBits<0, 3>(hash.stencilOpConfig);
const u8 depthFailOp = Helpers::getBits<4, 3>(hash.stencilOpConfig);
const u8 passOp = Helpers::getBits<8, 3>(hash.stencilOpConfig);
stencilDesc = MTL::StencilDescriptor::alloc()->init();
stencilDesc->setStencilFailureOperation(toMTLStencilOperation(stencilFailOp));
stencilDesc->setDepthFailureOperation(toMTLStencilOperation(depthFailOp));
stencilDesc->setDepthStencilPassOperation(toMTLStencilOperation(passOp));
stencilDesc->setStencilCompareFunction(toMTLCompareFunc(stencilFunc));
stencilDesc->setReadMask(stencilRefMask);
stencilDesc->setWriteMask(stencilBufferMask);
desc->setFrontFaceStencil(stencilDesc);
desc->setBackFaceStencil(stencilDesc);
}
depthStencilState = device->newDepthStencilState(desc);
desc->release();
if (stencilDesc) {
stencilDesc->release();
}
}
return depthStencilState;
}
void reset() {
for (auto& pair : depthStencilCache) {
pair.second->release();
}
depthStencilCache.clear();
}
private:
std::map<u64, MTL::DepthStencilState*> depthStencilCache;
MTL::Device* device;
};
} // namespace Metal

View file

@ -0,0 +1,162 @@
#pragma once
#include <map>
#include "objc_helper.hpp"
#include "pica_to_mtl.hpp"
using namespace PICA;
namespace Metal {
struct DrawFragmentFunctionHash {
u32 lightingConfig1; // 32 bits (TODO: check this)
bool lightingEnabled; // 1 bit
u8 lightingNumLights; // 3 bits
// | ref | func | on |
u16 alphaControl; // 12 bits (mask: 11111111 0111 0001)
};
inline bool operator<(const DrawFragmentFunctionHash& l, const DrawFragmentFunctionHash& r) {
if (!l.lightingEnabled && r.lightingEnabled) return true;
if (l.lightingNumLights < r.lightingNumLights) return true;
if (l.lightingConfig1 < r.lightingConfig1) return true;
if (l.alphaControl < r.alphaControl) return true;
return false;
}
struct DrawPipelineHash { // 56 bits
// Formats
ColorFmt colorFmt; // 3 bits
DepthFmt depthFmt; // 3 bits
// Blending
bool blendEnabled; // 1 bit
// | functions | aeq | ceq |
u32 blendControl; // 22 bits (mask: 1111111111111111 00000111 00000111)
u8 colorWriteMask; // 4 bits
DrawFragmentFunctionHash fragHash;
};
inline bool operator<(const DrawPipelineHash& l, const DrawPipelineHash& r) {
if ((u32)l.colorFmt < (u32)r.colorFmt) return true;
if ((u32)l.depthFmt < (u32)r.depthFmt) return true;
if (!l.blendEnabled && r.blendEnabled) return true;
if (l.blendControl < r.blendControl) return true;
if (l.colorWriteMask < r.colorWriteMask) return true;
if (l.fragHash < r.fragHash) return true;
return false;
}
// This pipeline only caches the pipeline with all of its color and depth attachment variations
class DrawPipelineCache {
public:
DrawPipelineCache() = default;
~DrawPipelineCache() {
reset();
vertexDescriptor->release();
vertexFunction->release();
}
void set(MTL::Device* dev, MTL::Library* lib, MTL::Function* vert, MTL::VertexDescriptor* vertDesc) {
device = dev;
library = lib;
vertexFunction = vert;
vertexDescriptor = vertDesc;
}
MTL::RenderPipelineState* get(DrawPipelineHash hash) {
auto& pipeline = pipelineCache[hash];
if (!pipeline) {
auto& fragmentFunction = fragmentFunctionCache[hash.fragHash];
if (!fragmentFunction) {
MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init();
constants->setConstantValue(&hash.fragHash.lightingEnabled, MTL::DataTypeBool, NS::UInteger(0));
constants->setConstantValue(&hash.fragHash.lightingNumLights, MTL::DataTypeUChar, NS::UInteger(1));
constants->setConstantValue(&hash.fragHash.lightingConfig1, MTL::DataTypeUInt, NS::UInteger(2));
constants->setConstantValue(&hash.fragHash.alphaControl, MTL::DataTypeUShort, NS::UInteger(3));
NS::Error* error = nullptr;
fragmentFunction = library->newFunction(NS::String::string("fragmentDraw", NS::ASCIIStringEncoding), constants, &error);
if (error) {
Helpers::panic("Error creating draw fragment function: %s", error->description()->cString(NS::ASCIIStringEncoding));
}
constants->release();
}
MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init();
desc->setVertexFunction(vertexFunction);
desc->setFragmentFunction(fragmentFunction);
desc->setVertexDescriptor(vertexDescriptor);
auto colorAttachment = desc->colorAttachments()->object(0);
colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt));
MTL::ColorWriteMask writeMask = 0;
if (hash.colorWriteMask & 0x1) writeMask |= MTL::ColorWriteMaskRed;
if (hash.colorWriteMask & 0x2) writeMask |= MTL::ColorWriteMaskGreen;
if (hash.colorWriteMask & 0x4) writeMask |= MTL::ColorWriteMaskBlue;
if (hash.colorWriteMask & 0x8) writeMask |= MTL::ColorWriteMaskAlpha;
colorAttachment->setWriteMask(writeMask);
if (hash.blendEnabled) {
const u8 rgbEquation = hash.blendControl & 0x7;
const u8 alphaEquation = Helpers::getBits<8, 3>(hash.blendControl);
// Get blending functions
const u8 rgbSourceFunc = Helpers::getBits<16, 4>(hash.blendControl);
const u8 rgbDestFunc = Helpers::getBits<20, 4>(hash.blendControl);
const u8 alphaSourceFunc = Helpers::getBits<24, 4>(hash.blendControl);
const u8 alphaDestFunc = Helpers::getBits<28, 4>(hash.blendControl);
colorAttachment->setBlendingEnabled(true);
colorAttachment->setRgbBlendOperation(toMTLBlendOperation(rgbEquation));
colorAttachment->setAlphaBlendOperation(toMTLBlendOperation(alphaEquation));
colorAttachment->setSourceRGBBlendFactor(toMTLBlendFactor(rgbSourceFunc));
colorAttachment->setDestinationRGBBlendFactor(toMTLBlendFactor(rgbDestFunc));
colorAttachment->setSourceAlphaBlendFactor(toMTLBlendFactor(alphaSourceFunc));
colorAttachment->setDestinationAlphaBlendFactor(toMTLBlendFactor(alphaDestFunc));
}
MTL::PixelFormat depthFormat = toMTLPixelFormatDepth(hash.depthFmt);
desc->setDepthAttachmentPixelFormat(depthFormat);
if (hash.depthFmt == DepthFmt::Depth24Stencil8) desc->setStencilAttachmentPixelFormat(depthFormat);
NS::Error* error = nullptr;
desc->setLabel(toNSString("Draw pipeline"));
pipeline = device->newRenderPipelineState(desc, &error);
if (error) {
Helpers::panic("Error creating draw pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding));
}
desc->release();
}
return pipeline;
}
void reset() {
for (auto& pair : pipelineCache) {
pair.second->release();
}
pipelineCache.clear();
for (auto& pair : fragmentFunctionCache) {
pair.second->release();
}
fragmentFunctionCache.clear();
}
private:
std::map<DrawPipelineHash, MTL::RenderPipelineState*> pipelineCache;
std::map<DrawFragmentFunctionHash, MTL::Function*> fragmentFunctionCache;
MTL::Device* device;
MTL::Library* library;
MTL::Function* vertexFunction;
MTL::VertexDescriptor* vertexDescriptor;
};
} // namespace Metal

View file

@ -0,0 +1,20 @@
#pragma once
#include <Metal/Metal.hpp>
namespace Metal {
class LutTexture {
public:
LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name);
~LutTexture();
u32 getNextIndex();
MTL::Texture* getTexture() { return texture; }
u32 getCurrentIndex() { return currentIndex; }
private:
MTL::Texture* texture;
u32 currentIndex = 0;
};
} // namespace Metal

View file

@ -0,0 +1,91 @@
#pragma once
#include <Metal/Metal.hpp>
#include <array>
#include <string>
#include "boost/icl/interval.hpp"
#include "helpers.hpp"
#include "math_util.hpp"
#include "objc_helper.hpp"
#include "opengl.hpp"
#include "pica_to_mtl.hpp"
template <typename T>
using Interval = boost::icl::right_open_interval<T>;
namespace Metal {
template <typename Format_t>
struct RenderTarget {
MTL::Device* device;
u32 location;
Format_t format;
OpenGL::uvec2 size;
bool valid;
// Range of VRAM taken up by buffer
Interval<u32> range;
MTL::Texture* texture = nullptr;
RenderTarget() : valid(false) {}
RenderTarget(MTL::Device* dev, u32 loc, Format_t format, u32 x, u32 y, bool valid = true)
: device(dev), location(loc), format(format), size({x, y}), valid(valid) {
u64 endLoc = (u64)loc + sizeInBytes();
// Check if start and end are valid here
range = Interval<u32>(loc, (u32)endLoc);
}
Math::Rect<u32> getSubRect(u32 inputAddress, u32 width, u32 height) {
const u32 startOffset = (inputAddress - location) / sizePerPixel(format);
const u32 x0 = (startOffset % (size.x() * 8)) / 8;
const u32 y0 = (startOffset / (size.x() * 8)) * 8;
return Math::Rect<u32>{x0, size.y() - y0, x0 + width, size.y() - height - y0};
}
// For 2 textures to "match" we only care about their locations, formats, and dimensions to match
// For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture
bool matches(RenderTarget& other) {
return location == other.location && format == other.format && size.x() == other.size.x() && size.y() == other.size.y();
}
void allocate() {
MTL::PixelFormat pixelFormat = MTL::PixelFormatInvalid;
if (std::is_same<Format_t, PICA::ColorFmt>::value) {
pixelFormat = PICA::toMTLPixelFormatColor((PICA::ColorFmt)format);
} else if (std::is_same<Format_t, PICA::DepthFmt>::value) {
pixelFormat = PICA::toMTLPixelFormatDepth((PICA::DepthFmt)format);
} else {
panic("Invalid format type");
}
MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init();
descriptor->setTextureType(MTL::TextureType2D);
descriptor->setPixelFormat(pixelFormat);
descriptor->setWidth(size.u());
descriptor->setHeight(size.v());
descriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead);
descriptor->setStorageMode(MTL::StorageModePrivate);
texture = device->newTexture(descriptor);
texture->setLabel(toNSString(
std::string(std::is_same<Format_t, PICA::ColorFmt>::value ? "Color" : "Depth") + " render target " + std::to_string(size.u()) + "x" +
std::to_string(size.v())
));
descriptor->release();
}
void free() {
valid = false;
if (texture) {
texture->release();
}
}
u64 sizeInBytes() { return (size_t)size.x() * (size_t)size.y() * PICA::sizePerPixel(format); }
};
using ColorRenderTarget = RenderTarget<PICA::ColorFmt>;
using DepthStencilRenderTarget = RenderTarget<PICA::DepthFmt>;
} // namespace Metal

View file

@ -0,0 +1,73 @@
#pragma once
#include <Metal/Metal.hpp>
#include <array>
#include <string>
#include "PICA/regs.hpp"
#include "boost/icl/interval.hpp"
#include "helpers.hpp"
#include "math_util.hpp"
#include "opengl.hpp"
#include "renderer_mtl/pica_to_mtl.hpp"
template <typename T>
using Interval = boost::icl::right_open_interval<T>;
namespace Metal {
struct Texture {
MTL::Device* device;
u32 location;
u32 config; // Magnification/minification filter, wrapping configs, etc
PICA::TextureFmt format;
OpenGL::uvec2 size;
bool valid;
// Range of VRAM taken up by buffer
Interval<u32> range;
PICA::PixelFormatInfo formatInfo;
MTL::Texture* texture = nullptr;
MTL::SamplerState* sampler = nullptr;
Texture() : valid(false) {}
Texture(MTL::Device* dev, u32 loc, PICA::TextureFmt format, u32 x, u32 y, u32 config, bool valid = true)
: device(dev), location(loc), format(format), size({x, y}), config(config), valid(valid) {
u64 endLoc = (u64)loc + sizeInBytes();
// Check if start and end are valid here
range = Interval<u32>(loc, (u32)endLoc);
}
// For 2 textures to "match" we only care about their locations, formats, and dimensions to match
// For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture
bool matches(Texture& other) {
return location == other.location && format == other.format && size.x() == other.size.x() && size.y() == other.size.y();
}
void allocate();
void setNewConfig(u32 newConfig);
void decodeTexture(std::span<const u8> data);
void free();
u64 sizeInBytes();
u8 decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data);
u16 decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data);
u32 decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data);
// Get the morton interleave offset of a texel based on its U and V values
static u32 mortonInterleave(u32 u, u32 v);
// Get the byte offset of texel (u, v) in the texture
static u32 getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel);
static u32 getSwizzledOffset_4bpp(u32 u, u32 v, u32 width);
// Returns the format of this texture as a string
std::string_view formatToString() { return PICA::textureFormatToString(format); }
// Returns the texel at coordinates (u, v) of an ETC1(A4) texture
// TODO: Make hasAlpha a template parameter
u32 getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span<const u8> data);
u32 decodeETC(u32 alpha, u32 u, u32 v, u64 colourData);
};
} // namespace Metal

View file

@ -0,0 +1,83 @@
#pragma once
#include <cstring>
#include "helpers.hpp"
#include "pica_to_mtl.hpp"
using namespace PICA;
namespace Metal {
struct BufferHandle {
MTL::Buffer* buffer;
usize offset;
};
class VertexBufferCache {
// 128MB buffer for caching vertex data
static constexpr usize CACHE_BUFFER_SIZE = 128 * 1024 * 1024;
public:
VertexBufferCache() = default;
~VertexBufferCache() {
endFrame();
buffer->release();
}
void set(MTL::Device* dev) {
device = dev;
create();
}
void endFrame() {
ptr = 0;
for (auto buffer : additionalAllocations) {
buffer->release();
}
additionalAllocations.clear();
}
BufferHandle get(const void* data, usize size) {
// If the vertex buffer is too large, just create a new one
if (ptr + size > CACHE_BUFFER_SIZE) {
MTL::Buffer* newBuffer = device->newBuffer(data, size, MTL::ResourceStorageModeShared);
newBuffer->setLabel(toNSString("Additional vertex buffer"));
additionalAllocations.push_back(newBuffer);
Helpers::warn("Vertex buffer doesn't have enough space, creating a new buffer");
return BufferHandle{newBuffer, 0};
}
// Copy the data into the buffer
std::memcpy((char*)buffer->contents() + ptr, data, size);
auto oldPtr = ptr;
ptr += size;
return BufferHandle{buffer, oldPtr};
}
void reset() {
endFrame();
if (buffer) {
buffer->release();
create();
}
}
private:
MTL::Buffer* buffer = nullptr;
usize ptr = 0;
std::vector<MTL::Buffer*> additionalAllocations;
MTL::Device* device;
void create() {
buffer = device->newBuffer(CACHE_BUFFER_SIZE, MTL::ResourceStorageModeShared);
buffer->setLabel(toNSString("Shared vertex buffer"));
}
};
} // namespace Metal

View file

@ -0,0 +1,12 @@
#pragma once
#include <string>
#include "mtl_common.hpp"
namespace Metal {
dispatch_data_t createDispatchData(const void* data, size_t size);
} // namespace Metal
// Cast from std::string to NS::String*
inline NS::String* toNSString(const std::string& str) { return NS::String::string(str.c_str(), NS::ASCIIStringEncoding); }

View file

@ -0,0 +1,152 @@
#pragma once
#include <Metal/Metal.hpp>
#include "PICA/regs.hpp"
namespace PICA {
struct PixelFormatInfo {
MTL::PixelFormat pixelFormat;
size_t bytesPerTexel;
};
constexpr PixelFormatInfo pixelFormatInfos[14] = {
{MTL::PixelFormatRGBA8Unorm, 4}, // RGBA8
{MTL::PixelFormatRGBA8Unorm, 4}, // RGB8
{MTL::PixelFormatBGR5A1Unorm, 2}, // RGBA5551
{MTL::PixelFormatB5G6R5Unorm, 2}, // RGB565
{MTL::PixelFormatABGR4Unorm, 2}, // RGBA4
{MTL::PixelFormatRGBA8Unorm, 4}, // IA8
{MTL::PixelFormatRG8Unorm, 2}, // RG8
{MTL::PixelFormatRGBA8Unorm, 4}, // I8
{MTL::PixelFormatA8Unorm, 1}, // A8
{MTL::PixelFormatABGR4Unorm, 2}, // IA4
{MTL::PixelFormatABGR4Unorm, 2}, // I4
{MTL::PixelFormatA8Unorm, 1}, // A4
{MTL::PixelFormatRGBA8Unorm, 4}, // ETC1
{MTL::PixelFormatRGBA8Unorm, 4}, // ETC1A4
};
inline PixelFormatInfo getPixelFormatInfo(TextureFmt format) { return pixelFormatInfos[static_cast<int>(format)]; }
inline MTL::PixelFormat toMTLPixelFormatColor(ColorFmt format) {
switch (format) {
case ColorFmt::RGBA8: return MTL::PixelFormatRGBA8Unorm;
case ColorFmt::RGB8: return MTL::PixelFormatRGBA8Unorm;
case ColorFmt::RGBA5551: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatBGR5A1Unorm?
case ColorFmt::RGB565: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatB5G6R5Unorm?
case ColorFmt::RGBA4: return MTL::PixelFormatABGR4Unorm;
}
}
inline MTL::PixelFormat toMTLPixelFormatDepth(DepthFmt format) {
switch (format) {
case DepthFmt::Depth16: return MTL::PixelFormatDepth16Unorm;
case DepthFmt::Unknown1: return MTL::PixelFormatInvalid;
case DepthFmt::Depth24:
return MTL::PixelFormatDepth32Float; // Metal does not support 24-bit depth formats
// Apple sillicon doesn't support 24-bit depth buffers, so we use 32-bit instead
case DepthFmt::Depth24Stencil8: return MTL::PixelFormatDepth32Float_Stencil8;
}
}
inline MTL::CompareFunction toMTLCompareFunc(u8 func) {
switch (func) {
case 0: return MTL::CompareFunctionNever;
case 1: return MTL::CompareFunctionAlways;
case 2: return MTL::CompareFunctionEqual;
case 3: return MTL::CompareFunctionNotEqual;
case 4: return MTL::CompareFunctionLess;
case 5: return MTL::CompareFunctionLessEqual;
case 6: return MTL::CompareFunctionGreater;
case 7: return MTL::CompareFunctionGreaterEqual;
default: Helpers::panic("Unknown compare function %u", func);
}
return MTL::CompareFunctionAlways;
}
inline MTL::BlendOperation toMTLBlendOperation(u8 op) {
switch (op) {
case 0: return MTL::BlendOperationAdd;
case 1: return MTL::BlendOperationSubtract;
case 2: return MTL::BlendOperationReverseSubtract;
case 3: return MTL::BlendOperationMin;
case 4: return MTL::BlendOperationMax;
case 5: return MTL::BlendOperationAdd; // Unused (same as 0)
case 6: return MTL::BlendOperationAdd; // Unused (same as 0)
case 7: return MTL::BlendOperationAdd; // Unused (same as 0)
default: Helpers::panic("Unknown blend operation %u", op);
}
return MTL::BlendOperationAdd;
}
inline MTL::BlendFactor toMTLBlendFactor(u8 factor) {
switch (factor) {
case 0: return MTL::BlendFactorZero;
case 1: return MTL::BlendFactorOne;
case 2: return MTL::BlendFactorSourceColor;
case 3: return MTL::BlendFactorOneMinusSourceColor;
case 4: return MTL::BlendFactorDestinationColor;
case 5: return MTL::BlendFactorOneMinusDestinationColor;
case 6: return MTL::BlendFactorSourceAlpha;
case 7: return MTL::BlendFactorOneMinusSourceAlpha;
case 8: return MTL::BlendFactorDestinationAlpha;
case 9: return MTL::BlendFactorOneMinusDestinationAlpha;
case 10: return MTL::BlendFactorBlendColor;
case 11: return MTL::BlendFactorOneMinusBlendColor;
case 12: return MTL::BlendFactorBlendAlpha;
case 13: return MTL::BlendFactorOneMinusBlendAlpha;
case 14: return MTL::BlendFactorSourceAlphaSaturated;
case 15: return MTL::BlendFactorOne; // Undocumented
default: Helpers::panic("Unknown blend factor %u", factor);
}
return MTL::BlendFactorOne;
}
inline MTL::StencilOperation toMTLStencilOperation(u8 op) {
switch (op) {
case 0: return MTL::StencilOperationKeep;
case 1: return MTL::StencilOperationZero;
case 2: return MTL::StencilOperationReplace;
case 3: return MTL::StencilOperationIncrementClamp;
case 4: return MTL::StencilOperationDecrementClamp;
case 5: return MTL::StencilOperationInvert;
case 6: return MTL::StencilOperationIncrementWrap;
case 7: return MTL::StencilOperationDecrementWrap;
default: Helpers::panic("Unknown stencil operation %u", op);
}
return MTL::StencilOperationKeep;
}
inline MTL::PrimitiveType toMTLPrimitiveType(PrimType primType) {
switch (primType) {
case PrimType::TriangleList: return MTL::PrimitiveTypeTriangle;
case PrimType::TriangleStrip: return MTL::PrimitiveTypeTriangleStrip;
case PrimType::TriangleFan:
Helpers::warn("Triangle fans are not supported on Metal, using triangles instead");
return MTL::PrimitiveTypeTriangle;
case PrimType::GeometryPrimitive:
return MTL::PrimitiveTypeTriangle;
}
}
inline MTL::SamplerAddressMode toMTLSamplerAddressMode(u8 addrMode) {
switch (addrMode) {
case 0: return MTL::SamplerAddressModeClampToEdge;
case 1: return MTL::SamplerAddressModeClampToBorderColor;
case 2: return MTL::SamplerAddressModeRepeat;
case 3: return MTL::SamplerAddressModeMirrorRepeat;
case 4: return MTL::SamplerAddressModeClampToEdge;
case 5: return MTL::SamplerAddressModeClampToBorderColor;
case 6: return MTL::SamplerAddressModeRepeat;
case 7: return MTL::SamplerAddressModeRepeat;
default: Helpers::panic("Unknown sampler address mode %u", addrMode);
}
return MTL::SamplerAddressModeClampToEdge;
}
} // namespace PICA

View file

@ -0,0 +1,207 @@
#pragma once
#include <Metal/Metal.hpp>
#include <QuartzCore/QuartzCore.hpp>
#include "mtl_blit_pipeline_cache.hpp"
#include "mtl_command_encoder.hpp"
#include "mtl_depth_stencil_cache.hpp"
#include "mtl_draw_pipeline_cache.hpp"
#include "mtl_lut_texture.hpp"
#include "mtl_render_target.hpp"
#include "mtl_texture.hpp"
#include "mtl_vertex_buffer_cache.hpp"
#include "renderer.hpp"
// HACK: use the OpenGL cache
#include "../renderer_gl/surface_cache.hpp"
class GPU;
struct Color4 {
float r, g, b, a;
};
class RendererMTL final : public Renderer {
public:
RendererMTL(GPU& gpu, const std::array<u32, regNum>& internalRegs, const std::array<u32, extRegNum>& externalRegs);
~RendererMTL() override;
void reset() override;
void display() override;
void initGraphicsContext(SDL_Window* window) override;
void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override;
void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override;
void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override;
void drawVertices(PICA::PrimType primType, std::span<const PICA::Vertex> vertices) override;
void screenshot(const std::string& name) override;
void deinitGraphicsContext() override;
#ifdef PANDA3DS_FRONTEND_QT
virtual void initGraphicsContext([[maybe_unused]] GL::Context* context) override {}
#endif
private:
CA::MetalLayer* metalLayer;
MTL::Device* device;
MTL::CommandQueue* commandQueue;
Metal::CommandEncoder commandEncoder;
// Libraries
MTL::Library* library;
// Caches
SurfaceCache<Metal::ColorRenderTarget, 16, true> colorRenderTargetCache;
SurfaceCache<Metal::DepthStencilRenderTarget, 16, true> depthStencilRenderTargetCache;
SurfaceCache<Metal::Texture, 256, true> textureCache;
Metal::BlitPipelineCache blitPipelineCache;
Metal::DrawPipelineCache drawPipelineCache;
Metal::DepthStencilCache depthStencilCache;
Metal::VertexBufferCache vertexBufferCache;
// Resources
MTL::SamplerState* nearestSampler;
MTL::SamplerState* linearSampler;
MTL::Texture* nullTexture;
MTL::DepthStencilState* defaultDepthStencilState;
Metal::LutTexture* lutLightingTexture;
Metal::LutTexture* lutFogTexture;
// Pipelines
MTL::RenderPipelineState* displayPipeline;
// MTL::RenderPipelineState* copyToLutTexturePipeline;
// Clears
std::map<MTL::Texture*, Color4> colorClearOps;
std::map<MTL::Texture*, float> depthClearOps;
std::map<MTL::Texture*, u8> stencilClearOps;
// Active state
MTL::CommandBuffer* commandBuffer = nullptr;
MTL::RenderCommandEncoder* renderCommandEncoder = nullptr;
MTL::Texture* lastColorTexture = nullptr;
MTL::Texture* lastDepthTexture = nullptr;
// Debug
std::string nextRenderPassName;
void createCommandBufferIfNeeded() {
if (!commandBuffer) {
commandBuffer = commandQueue->commandBuffer();
}
}
void endRenderPass() {
if (renderCommandEncoder) {
renderCommandEncoder->endEncoding();
renderCommandEncoder = nullptr;
}
}
void beginRenderPassIfNeeded(
MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture = nullptr
);
void commitCommandBuffer() {
if (renderCommandEncoder) {
renderCommandEncoder->endEncoding();
renderCommandEncoder->release();
renderCommandEncoder = nullptr;
}
if (commandBuffer) {
commandBuffer->commit();
// HACK
commandBuffer->waitUntilCompleted();
commandBuffer->release();
commandBuffer = nullptr;
}
}
template <typename AttachmentT, typename ClearDataT, typename GetAttachmentT, typename SetClearDataT>
inline void clearAttachment(
MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, ClearDataT clearData, GetAttachmentT getAttachment,
SetClearDataT setClearData
) {
bool beginRenderPass = (renderPassDescriptor == nullptr);
if (!renderPassDescriptor) {
renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init();
}
AttachmentT* attachment = getAttachment(renderPassDescriptor);
attachment->setTexture(texture);
setClearData(attachment, clearData);
attachment->setLoadAction(MTL::LoadActionClear);
attachment->setStoreAction(MTL::StoreActionStore);
if (beginRenderPass) {
if (std::is_same<AttachmentT, MTL::RenderPassColorAttachmentDescriptor>::value)
beginRenderPassIfNeeded(renderPassDescriptor, true, texture);
else
beginRenderPassIfNeeded(renderPassDescriptor, true, nullptr, texture);
}
}
template <typename AttachmentT, typename ClearDataT, typename GetAttachmentT, typename SetClearDataT>
inline bool clearAttachment(
MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, std::map<MTL::Texture*, ClearDataT>& clearOps,
GetAttachmentT getAttachment, SetClearDataT setClearData
) {
auto it = clearOps.find(texture);
if (it != clearOps.end()) {
clearAttachment<AttachmentT>(renderPassDescriptor, texture, it->second, getAttachment, setClearData);
clearOps.erase(it);
return true;
}
if (renderPassDescriptor) {
AttachmentT* attachment = getAttachment(renderPassDescriptor);
attachment->setTexture(texture);
attachment->setLoadAction(MTL::LoadActionLoad);
attachment->setStoreAction(MTL::StoreActionStore);
}
return false;
}
bool clearColor(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) {
return clearAttachment<MTL::RenderPassColorAttachmentDescriptor, Color4>(
renderPassDescriptor, texture, colorClearOps,
[](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->colorAttachments()->object(0); },
[](auto attachment, auto& color) { attachment->setClearColor(MTL::ClearColor(color.r, color.g, color.b, color.a)); }
);
}
bool clearDepth(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) {
return clearAttachment<MTL::RenderPassDepthAttachmentDescriptor, float>(
renderPassDescriptor, texture, depthClearOps,
[](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->depthAttachment(); },
[](auto attachment, auto& depth) { attachment->setClearDepth(depth); }
);
}
bool clearStencil(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) {
return clearAttachment<MTL::RenderPassStencilAttachmentDescriptor, u8>(
renderPassDescriptor, texture, stencilClearOps,
[](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->stencilAttachment(); },
[](auto attachment, auto& stencil) { attachment->setClearStencil(stencil); }
);
}
std::optional<Metal::ColorRenderTarget> getColorRenderTarget(
u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true
);
Metal::DepthStencilRenderTarget& getDepthRenderTarget();
Metal::Texture& getTexture(Metal::Texture& tex);
void setupTextureEnvState(MTL::RenderCommandEncoder* encoder);
void bindTexturesToSlots();
void updateLightingLUT(MTL::RenderCommandEncoder* encoder);
void updateFogLUT(MTL::RenderCommandEncoder* encoder);
void textureCopyImpl(
Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect<u32>& srcRect,
const Math::Rect<u32>& destRect
);
};

View file

@ -17,6 +17,10 @@ class RendererNull final : public Renderer {
void screenshot(const std::string& name) override;
void deinitGraphicsContext() override;
// Tell the GPU core that we'll handle vertex fetch & shader execution in the renderer in order to speed up execution.
// Of course, we don't do this and geometry is never actually processed, since this is the null renderer.
virtual bool prepareForDraw(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel) override { return true; };
#ifdef PANDA3DS_FRONTEND_QT
virtual void initGraphicsContext([[maybe_unused]] GL::Context* context) override {}
#endif

38
include/sdl_sensors.hpp Normal file
View file

@ -0,0 +1,38 @@
#pragma once
#include <cmath>
#include <glm/glm.hpp>
#include "helpers.hpp"
#include "services/hid.hpp"
// Convert SDL sensor readings to 3DS format
// We use the same code for Android as well, since the values we get from Android are in the same format as SDL (m/s^2 for acceleration, rad/s for
// rotation)
namespace Sensors::SDL {
// Convert the rotation data we get from SDL sensor events to rotation data we can feed right to HID
// Returns [pitch, roll, yaw]
static glm::vec3 convertRotation(glm::vec3 rotation) {
// Annoyingly, Android doesn't support the <numbers> header yet so we define pi ourselves
static constexpr double pi = 3.141592653589793;
// Convert the rotation from rad/s to deg/s and scale by the gyroscope coefficient in HID
constexpr float scale = 180.f / pi * HIDService::gyroscopeCoeff;
// The axes are also inverted, so invert scale before the multiplication.
return rotation * -scale;
}
static glm::vec3 convertAcceleration(float* data) {
// Set our cap to ~9 m/s^2. The 3DS sensors cap at -930 and +930, so values above this value will get clamped to 930
// At rest (3DS laid flat on table), hardware reads around ~0 for x and z axis, and around ~480 for y axis due to gravity.
// This code tries to mimic this approximately, with offsets based on measurements from my DualShock 4.
static constexpr float accelMax = 9.f;
// We define standard gravity(g) ourself instead of using the SDL one in order for the code to work on Android too.
static constexpr float standardGravity = 9.80665f;
s16 x = std::clamp<s16>(s16(data[0] / accelMax * 930.f), -930, +930);
s16 y = std::clamp<s16>(s16(data[1] / (standardGravity * accelMax) * 930.f - 350.f), -930, +930);
s16 z = std::clamp<s16>(s16((data[2] - 2.1f) / accelMax * 930.f), -930, +930);
return glm::vec3(x, y, z);
}
} // namespace Sensors::SDL

View file

@ -8,6 +8,8 @@
#include "result/result.hpp"
class ACService {
using Handle = HorizonHandle;
Handle handle = KernelHandles::AC;
Memory& mem;
MAKE_LOG_FUNCTION(log, acLogger)
@ -17,6 +19,7 @@ class ACService {
void closeAsync(u32 messagePointer);
void createDefaultConfig(u32 messagePointer);
void getConnectingInfraPriority(u32 messagePointer);
void getNZoneBeaconNotFoundEvent(u32 messagePointer);
void getStatus(u32 messagePointer);
void getLastErrorCode(u32 messagePointer);
void getWifiStatus(u32 messagePointer);

View file

@ -6,6 +6,8 @@
#include "result/result.hpp"
class ACTService {
using Handle = HorizonHandle;
Handle handle = KernelHandles::ACT;
Memory& mem;
MAKE_LOG_FUNCTION(log, actLogger)
@ -15,7 +17,7 @@ class ACTService {
void generateUUID(u32 messagePointer);
void getAccountDataBlock(u32 messagePointer);
public:
public:
ACTService(Memory& mem) : mem(mem) {}
void reset();
void handleSyncRequest(u32 messagePointer);

View file

@ -6,6 +6,8 @@
#include "result/result.hpp"
class AMService {
using Handle = HorizonHandle;
Handle handle = KernelHandles::AM;
Memory& mem;
MAKE_LOG_FUNCTION(log, amLogger)
@ -15,7 +17,7 @@ class AMService {
void getPatchTitleInfo(u32 messagePointer);
void listTitleInfo(u32 messagePointer);
public:
public:
AMService(Memory& mem) : mem(mem) {}
void reset();
void handleSyncRequest(u32 messagePointer);

View file

@ -12,7 +12,8 @@
class Kernel;
enum class ConsoleModel : u32 {
Old3DS, New3DS
Old3DS,
New3DS,
};
// https://www.3dbrew.org/wiki/NS_and_APT_Services#Command
@ -41,6 +42,8 @@ namespace APT::Transitions {
}
class APTService {
using Handle = HorizonHandle;
Handle handle = KernelHandles::APT;
Memory& mem;
Kernel& kernel;
@ -99,7 +102,7 @@ class APTService {
u32 screencapPostPermission;
public:
public:
APTService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel), appletManager(mem) {}
void reset();
void handleSyncRequest(u32 messagePointer);

View file

@ -6,36 +6,46 @@
#include "result/result.hpp"
class BOSSService {
using Handle = HorizonHandle;
Handle handle = KernelHandles::BOSS;
Memory& mem;
MAKE_LOG_FUNCTION(log, bossLogger)
// Service commands
void cancelTask(u32 messagePointer);
void deleteNsData(u32 messagePointer);
void initializeSession(u32 messagePointer);
void getAppNewFlag(u32 messagePointer);
void getErrorCode(u32 messagePointer);
void getNsDataHeaderInfo(u32 messagePointer);
void getNewArrivalFlag(u32 messagePointer);
void getNsDataIdList(u32 messagePointer, u32 commandWord);
void getNsDataLastUpdated(u32 messagePointer);
void getOptoutFlag(u32 messagePointer);
void getStorageEntryInfo(u32 messagePointer); // Unknown what this is, name taken from Citra
void getStorageEntryInfo(u32 messagePointer); // Unknown what this is, name taken from Citra
void getTaskIdList(u32 messagePointer);
void getTaskInfo(u32 messagePointer);
void getTaskServiceStatus(u32 messagePointer);
void getTaskState(u32 messagePointer);
void getTaskStatus(u32 messagePointer);
void getTaskStorageInfo(u32 messagePointer);
void readNsData(u32 messagePointer);
void receiveProperty(u32 messagePointer);
void registerNewArrivalEvent(u32 messagePointer);
void registerStorageEntry(u32 messagePointer);
void registerTask(u32 messagePointer);
void sendProperty(u32 messagePointer);
void setAppNewFlag(u32 messagePointer);
void setOptoutFlag(u32 messagePointer);
void startBgImmediate(u32 messagePointer);
void startTask(u32 messagePointer);
void unregisterStorage(u32 messagePointer);
void unregisterTask(u32 messagePointer);
s8 optoutFlag;
public:
public:
BOSSService(Memory& mem) : mem(mem) {}
void reset();
void handleSyncRequest(u32 messagePointer);

View file

@ -12,6 +12,7 @@
class Kernel;
class CAMService {
using Handle = HorizonHandle;
using Event = std::optional<Handle>;
struct Port {

View file

@ -1,5 +1,6 @@
#pragma once
#include <optional>
#include "helpers.hpp"
#include "kernel_types.hpp"
#include "logger.hpp"
@ -9,6 +10,8 @@
class Kernel;
class CECDService {
using Handle = HorizonHandle;
Handle handle = KernelHandles::CECD;
Memory& mem;
Kernel& kernel;
@ -20,7 +23,7 @@ class CECDService {
void getInfoEventHandle(u32 messagePointer);
void openAndRead(u32 messagePointer);
public:
public:
CECDService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel) {}
void reset();
void handleSyncRequest(u32 messagePointer);

View file

@ -1,5 +1,7 @@
#pragma once
#include <cstring>
#include "config.hpp"
#include "helpers.hpp"
#include "logger.hpp"
#include "memory.hpp"
@ -7,15 +9,19 @@
#include "result/result.hpp"
class CFGService {
using Handle = HorizonHandle;
Memory& mem;
CountryCodes country = CountryCodes::US; // Default to USA
const EmulatorConfig& settings;
CountryCodes country = CountryCodes::US; // Default to USA
MAKE_LOG_FUNCTION(log, cfgLogger)
void writeStringU16(u32 pointer, const std::u16string& string);
// Service functions
void getConfigInfoBlk2(u32 messagePointer);
void getConfigInfoBlk8(u32 messagePointer);
void getConfigInfoBlk8(u32 messagePointer, u32 commandWord);
void getCountryCodeID(u32 messagePointer);
void getLocalFriendCodeSeed(u32 messagePointer);
void getRegionCanadaUSA(u32 messagePointer);
@ -23,19 +29,26 @@ class CFGService {
void genUniqueConsoleHash(u32 messagePointer);
void secureInfoGetByte101(u32 messagePointer);
void secureInfoGetRegion(u32 messagePointer);
void setConfigInfoBlk4(u32 messagePointer);
void updateConfigNANDSavegame(u32 messagePointer);
void translateCountryInfo(u32 messagePointer);
void isFangateSupported(u32 messagePointer);
// cfg:nor functions
void norInitialize(u32 messagePointer);
void norReadData(u32 messagePointer);
void getConfigInfo(u32 output, u32 blockID, u32 size, u32 permissionMask);
public:
public:
enum class Type {
U, // cfg:u
I, // cfg:i
S, // cfg:s
NOR, // cfg:nor
U, // cfg:u
I, // cfg:i
S, // cfg:s
NOR, // cfg:nor
};
CFGService(Memory& mem) : mem(mem) {}
CFGService(Memory& mem, const EmulatorConfig& settings) : mem(mem), settings(settings) {}
void reset();
void handleSyncRequest(u32 messagePointer, Type type);
};

View file

@ -10,6 +10,8 @@
class Kernel;
class CSNDService {
using Handle = HorizonHandle;
Handle handle = KernelHandles::CSND;
Memory& mem;
Kernel& kernel;
@ -30,7 +32,5 @@ class CSNDService {
void reset();
void handleSyncRequest(u32 messagePointer);
void setSharedMemory(u8* ptr) {
sharedMemory = ptr;
}
void setSharedMemory(u8* ptr) { sharedMemory = ptr; }
};

View file

@ -8,6 +8,8 @@
// Please forgive me for how everything in this file is named
// "dlp:SRVR" is not a nice name to work with
class DlpSrvrService {
using Handle = HorizonHandle;
Handle handle = KernelHandles::DLP_SRVR;
Memory& mem;
MAKE_LOG_FUNCTION(log, dlpSrvrLogger)
@ -15,7 +17,7 @@ class DlpSrvrService {
// Service commands
void isChild(u32 messagePointer);
public:
public:
DlpSrvrService(Memory& mem) : mem(mem) {}
void reset();
void handleSyncRequest(u32 messagePointer);

View file

@ -10,14 +10,19 @@
#include "memory.hpp"
#include "result/result.hpp"
struct EmulatorConfig;
// Circular dependencies!
class Kernel;
class DSPService {
using Handle = HorizonHandle;
Handle handle = KernelHandles::DSP;
Memory& mem;
Kernel& kernel;
const EmulatorConfig& config;
Audio::DSPCore* dsp = nullptr;
MAKE_LOG_FUNCTION(log, dspServiceLogger)
// Number of DSP pipes
@ -39,9 +44,12 @@ class DSPService {
size_t totalEventCount;
std::vector<u8> loadedComponent;
bool headphonesInserted = true;
// Service functions
void convertProcessAddressFromDspDram(u32 messagePointer); // Nice function name
void flushDataCache(u32 messagePointer);
void forceHeadphoneOut(u32 messagePointer);
void getHeadphoneStatus(u32 messagePointer);
void getSemaphoreEventHandle(u32 messagePointer);
void invalidateDCache(u32 messagePointer);
@ -56,7 +64,7 @@ class DSPService {
void writeProcessPipe(u32 messagePointer);
public:
DSPService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel) {}
DSPService(Memory& mem, Kernel& kernel, const EmulatorConfig& config) : mem(mem), kernel(kernel), config(config) {}
void reset();
void handleSyncRequest(u32 messagePointer);
void setDSPCore(Audio::DSPCore* pointer) { dsp = pointer; }
@ -82,4 +90,5 @@ class DSPService {
void triggerInterrupt1();
ComponentDumpResult dumpComponent(const std::filesystem::path& path);
void printFirmwareInfo();
};

View file

@ -0,0 +1,76 @@
#pragma once
#include <array>
#include "helpers.hpp"
namespace DSP {
struct FirmwareInfo {
using Hash = std::array<u8, 32>;
Hash hash; // Firmware hash (SHA-256)
u32 size; // Firmware size in bytes
bool supportsAAC; // Does this firmware support AAC decoding?
const char* notes; // Miscellaneous notes about the firmware
explicit constexpr FirmwareInfo(const Hash& hash, u32 size, bool supportsAAC, const char* notes)
: hash(hash), size(size), supportsAAC(supportsAAC), notes(notes) {}
};
static constexpr std::array<FirmwareInfo, 9> firmwareDB = {
FirmwareInfo(
{0x47, 0xD6, 0x6C, 0xD2, 0x13, 0x1, 0xFF, 0x62, 0xAD, 0x16, 0x98, 0x2, 0x46, 0x67, 0xF3, 0x9,
0xDA, 0x7, 0x20, 0x9E, 0xFB, 0xB, 0x6A, 0x81, 0x98, 0xFF, 0x9B, 0xE0, 0x51, 0x67, 0xC9, 0xA6},
48480, false, "Spotted in some versions of Activity Log potentially other apps"
),
FirmwareInfo(
{0xF5, 0xDA, 0x79, 0xE7, 0x24, 0x6C, 0x51, 0x9A, 0x28, 0x6C, 0x50, 0xC9, 0x9F, 0xA1, 0xE6, 0x4D,
0xA5, 0x72, 0x96, 0x5F, 0xEA, 0x14, 0x20, 0xA7, 0x70, 0x90, 0x57, 0x42, 0x34, 0x6E, 0x18, 0xD1},
49674, false, "One of the most common firmwares. Found in NSMB2 and others"
),
FirmwareInfo(
{0x94, 0x4B, 0x40, 0xB5, 0x46, 0x93, 0xF4, 0xB1, 0xD9, 0x52, 0xBE, 0x84, 0x87, 0xE9, 0xE9, 0x1F,
0x66, 0x7F, 0xC4, 0x89, 0xF8, 0x15, 0x79, 0xF, 0x3D, 0x3E, 0x89, 0x26, 0x5F, 0xE0, 0x89, 0xC4},
49800, false, "One of the most common firmwares. Found in Majora's Mask and others"
),
FirmwareInfo(
{0x8E, 0x21, 0x3F, 0x3E, 0x71, 0xD2, 0xE3, 0xE4, 0x5D, 0x11, 0x69, 0xBA, 0xC6, 0x46, 0x5A, 0x70,
0xEA, 0xBE, 0xB2, 0x2B, 0x30, 0x3F, 0x1F, 0xA6, 0xD7, 0x67, 0x93, 0x70, 0xFF, 0xAD, 0xF, 0x54},
49756, false, "Fairly common firmware. Found in PSMD and others"
),
FirmwareInfo(
{0xA2, 0x6C, 0x74, 0xD1, 0xEF, 0x7F, 0x4F, 0xA5, 0xFF, 0xFF, 0xFB, 0xEC, 0x75, 0x8A, 0x40, 0x8D,
0x8F, 0x22, 0x87, 0x72, 0x78, 0x1B, 0x81, 0x88, 0x86, 0x5F, 0x83, 0x4D, 0x1D, 0x90, 0x6B, 0xAA},
48804, false, "Spotted in MK7"
),
FirmwareInfo(
{0x75, 0x12, 0x70, 0xB2, 0x43, 0xB0, 0xCA, 0xFB, 0x51, 0x99, 0xF2, 0x98, 0x2, 0x2, 0xC9, 0xB4,
0xC7, 0x7A, 0x67, 0x5E, 0xF0, 0x43, 0x8F, 0xD5, 0xA8, 0x9E, 0x83, 0xAA, 0xB9, 0xA8, 0x7, 0x9B},
48652, false, "One of the most common firmwares. Found in OoT, Pokemon Rumble Blast, and others"
),
FirmwareInfo(
{0xF2, 0x96, 0xE2, 0xE5, 0xEC, 0x34, 0x9F, 0x6A, 0x6C, 0xF3, 0xE1, 0xC7, 0xC, 0xDD, 0x65, 0xC2,
0x2, 0x72, 0xB6, 0xE7, 0xFF, 0xE5, 0x57, 0x92, 0x69, 0x4E, 0x83, 0xAE, 0x24, 0xF1, 0x68, 0xBF},
217976, true, "Most common AAC-enabled firmware. Found in Rhythm Heaven, Fire Emblem Fates/Echoes, Pokemon X/Y, and others"
),
FirmwareInfo(
{0xF0, 0x6C, 0x1B, 0x59, 0x23, 0xE1, 0x71, 0x19, 0x5, 0x66, 0x59, 0xCB, 0x3D, 0x9B, 0xF0, 0x26,
0x62, 0x84, 0xE9, 0xA6, 0xC0, 0x8, 0x23, 0x99, 0xD7, 0x45, 0x8D, 0x7C, 0x52, 0xAE, 0x32, 0x1C},
48708, false, "Spotted in Super Mario 3D Land"
),
FirmwareInfo(
{0x7E, 0xA3, 0xC4, 0x4A, 0x1C, 0x57, 0x51, 0x4B, 0xEB, 0xBE, 0xBC, 0xE8, 0xA7, 0x99, 0x5F, 0x7F,
0x3A, 0x29, 0x1, 0x70, 0xEA, 0x3B, 0x6C, 0x14, 0x57, 0x49, 0xAD, 0x93, 0x58, 0x67, 0x2C, 0x97},
49716, false, "Spotted in PMD: GTI"
),
};
} // namespace DSP

View file

@ -0,0 +1,84 @@
// Copyright 2016 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
// Adapted from https://github.com/PabloMK7/citra/blob/master/src/core/hle/service/apt/bcfnt/bcfnt.h
#pragma once
#include <memory>
#include "helpers.hpp"
#include "swap.hpp"
namespace HLE::Fonts {
struct CFNT {
u8 magic[4];
u16_le endianness;
u16_le headerSize;
u32_le version;
u32_le fileSize;
u32_le numBlocks;
};
struct SectionHeader {
u8 magic[4];
u32_le sectionSize;
};
struct FINF {
u8 magic[4];
u32_le sectionSize;
u8 fontType;
u8 lineFeed;
u16_le alterCharIndex;
u8 default_width[3];
u8 encoding;
u32_le tglpOffset;
u32_le cwdhOffset;
u32_le cmapOffset;
u8 height;
u8 width;
u8 ascent;
u8 reserved;
};
struct TGLP {
u8 magic[4];
u32_le sectionSize;
u8 cellWidth;
u8 cellHeight;
u8 baselinePosition;
u8 maxCharacterWidth;
u32_le sheetSize;
u16_le numSheets;
u16_le sheetImageFormat;
u16_le numColumns;
u16_le numRows;
u16_le sheetWidth;
u16_le sheetHeight;
u32_le sheetDataOffset;
};
struct CMAP {
u8 magic[4];
u32_le sectionSize;
u16_le codeBegin;
u16_le codeEnd;
u16_le mappingMethod;
u16_le reserved;
u32_le nextCmapOffset;
};
struct CWDH {
u8 magic[4];
u32_le sectionSize;
u16_le startIndex;
u16_le endIndex;
u32_le nextCwdhOffset;
};
// Relocates the internal addresses of the BCFNT Shared Font to the new base. The current base will
// be auto-detected based on the file headers.
void relocateSharedFont(u8* sharedFont, u32 newAddress);
} // namespace HLE::Fonts

View file

@ -1,5 +1,6 @@
#pragma once
#include <cassert>
#include "helpers.hpp"
#include "kernel_types.hpp"
#include "logger.hpp"
@ -15,6 +16,8 @@ struct FriendKey {
static_assert(sizeof(FriendKey) == 16);
class FRDService {
using Handle = HorizonHandle;
Memory& mem;
MAKE_LOG_FUNCTION(log, frdLogger)
@ -51,11 +54,11 @@ class FRDService {
};
static_assert(sizeof(Profile) == 8);
public:
public:
enum class Type {
A, // frd:a
N, // frd:n
U, // frd:u
A, // frd:a
N, // frd:n
U, // frd:u
};
FRDService(Memory& mem) : mem(mem) {}

View file

@ -1,11 +1,14 @@
#pragma once
#include "config.hpp"
#include "fs/archive_card_spi.hpp"
#include "fs/archive_ext_save_data.hpp"
#include "fs/archive_ncch.hpp"
#include "fs/archive_save_data.hpp"
#include "fs/archive_sdmc.hpp"
#include "fs/archive_self_ncch.hpp"
#include "fs/archive_system_save_data.hpp"
#include "fs/archive_twl_photo.hpp"
#include "fs/archive_twl_sound.hpp"
#include "fs/archive_user_save_data.hpp"
#include "helpers.hpp"
#include "kernel_types.hpp"
@ -16,6 +19,8 @@
class Kernel;
class FSService {
using Handle = HorizonHandle;
Handle handle = KernelHandles::FS;
Memory& mem;
Kernel& kernel;
@ -37,6 +42,10 @@ class FSService {
ExtSaveDataArchive sharedExtSaveData_nand;
SystemSaveDataArchive systemSaveData;
TWLPhotoArchive twlPhoto;
TWLSoundArchive twlSound;
CardSPIArchive cardSpi;
ArchiveBase* getArchiveFromID(u32 id, const FSPath& archivePath);
Rust::Result<Handle, HorizonResult> openArchiveHandle(u32 archiveID, const FSPath& path);
Rust::Result<Handle, HorizonResult> openDirectoryHandle(ArchiveBase* archive, const FSPath& path);
@ -81,11 +90,12 @@ class FSService {
// Used for set/get priority: Not sure what sort of priority this is referring to
u32 priority;
public:
public:
FSService(Memory& mem, Kernel& kernel, const EmulatorConfig& config)
: mem(mem), saveData(mem), sharedExtSaveData_nand(mem, "../SharedFiles/NAND", true), extSaveData_sdmc(mem, "SDMC"), sdmc(mem),
sdmcWriteOnly(mem, true), selfNcch(mem), ncch(mem), userSaveData1(mem, ArchiveID::UserSaveData1),
userSaveData2(mem, ArchiveID::UserSaveData2), kernel(kernel), config(config), systemSaveData(mem) {}
userSaveData2(mem, ArchiveID::UserSaveData2), systemSaveData(mem), twlPhoto(mem), twlSound(mem), cardSpi(mem), kernel(kernel),
config(config) {}
void reset();
void handleSyncRequest(u32 messagePointer);

View file

@ -1,6 +1,7 @@
#pragma once
#include <cstring>
#include <optional>
#include "PICA/gpu.hpp"
#include "helpers.hpp"
#include "kernel_types.hpp"
@ -9,12 +10,12 @@
#include "result/result.hpp"
enum class GPUInterrupt : u8 {
PSC0 = 0, // Memory fill completed
PSC1 = 1, // ?
VBlank0 = 2, // ?
VBlank1 = 3, // ?
PPF = 4, // Display transfer finished
P3D = 5, // Command list processing finished
PSC0 = 0, // Memory fill completed
PSC1 = 1, // ?
VBlank0 = 2, // ?
VBlank1 = 3, // ?
PPF = 4, // Display transfer finished
P3D = 5, // Command list processing finished
DMA = 6
};
@ -22,12 +23,14 @@ enum class GPUInterrupt : u8 {
class Kernel;
class GPUService {
using Handle = HorizonHandle;
Handle handle = KernelHandles::GPU;
Memory& mem;
GPU& gpu;
Kernel& kernel;
u32& currentPID; // Process ID of the current process
u8* sharedMem; // Pointer to GSP shared memory
u32& currentPID; // Process ID of the current process
u8* sharedMem; // Pointer to GSP shared memory
// At any point in time only 1 process has privileges to use rendering functions
// This is the PID of that process
@ -62,8 +65,8 @@ class GPUService {
// Used for saving and restoring GPU state via ImportDisplayCaptureInfo
struct CaptureInfo {
u32 leftFramebuffer; // Left framebuffer VA
u32 rightFramebuffer; // Right framebuffer VA (Top screen only)
u32 leftFramebuffer; // Left framebuffer VA
u32 rightFramebuffer; // Right framebuffer VA (Top screen only)
u32 format;
u32 stride;
};
@ -72,6 +75,7 @@ class GPUService {
// Service commands
void acquireRight(u32 messagePointer);
void flushDataCache(u32 messagePointer);
void invalidateDataCache(u32 messagePointer);
void importDisplayCaptureInfo(u32 messagePointer);
void readHwRegs(u32 messagePointer);
void registerInterruptRelayQueue(u32 messagePointer);
@ -106,16 +110,15 @@ class GPUService {
FramebufferUpdate* getTopFramebufferInfo() { return getFramebufferInfo(0); }
FramebufferUpdate* getBottomFramebufferInfo() { return getFramebufferInfo(1); }
public:
GPUService(Memory& mem, GPU& gpu, Kernel& kernel, u32& currentPID) : mem(mem), gpu(gpu),
kernel(kernel), currentPID(currentPID) {}
public:
GPUService(Memory& mem, GPU& gpu, Kernel& kernel, u32& currentPID) : mem(mem), gpu(gpu), kernel(kernel), currentPID(currentPID) {}
void reset();
void handleSyncRequest(u32 messagePointer);
void requestInterrupt(GPUInterrupt type);
void setSharedMem(u8* ptr) {
sharedMem = ptr;
if (ptr != nullptr) { // Zero-fill shared memory in case the process tries to read stale service data or vice versa
if (ptr != nullptr) { // Zero-fill shared memory in case the process tries to read stale service data or vice versa
std::memset(ptr, 0, 0x1000);
}
}
};
};

View file

@ -6,13 +6,13 @@
#include "result/result.hpp"
class LCDService {
Handle handle = KernelHandles::LCD;
Memory& mem;
MAKE_LOG_FUNCTION(log, gspLCDLogger)
// Service commands
void setLedForceOff(u32 messagePointer);
public:
public:
LCDService(Memory& mem) : mem(mem) {}
void reset();
void handleSyncRequest(u32 messagePointer);

View file

@ -38,6 +38,8 @@ namespace HID::Keys {
class Kernel;
class HIDService {
using Handle = HorizonHandle;
Handle handle = KernelHandles::HID;
Memory& mem;
Kernel& kernel;
@ -54,6 +56,7 @@ class HIDService {
s16 circlePadX, circlePadY; // Circlepad state
s16 touchScreenX, touchScreenY; // Touchscreen state
s16 roll, pitch, yaw; // Gyroscope state
s16 accelX, accelY, accelZ; // Accelerometer state
bool accelerometerEnabled;
bool eventsInitialized;
@ -85,7 +88,14 @@ class HIDService {
*(T*)&sharedMem[offset] = value;
}
template <typename T>
T* getSharedMemPointer(size_t offset) {
return (T*)&sharedMem[offset];
}
public:
static constexpr float gyroscopeCoeff = 14.375f; // Same as retail 3DS
HIDService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel) {}
void reset();
void handleSyncRequest(u32 messagePointer);
@ -126,6 +136,12 @@ class HIDService {
void setPitch(s16 value) { pitch = value; }
void setYaw(s16 value) { yaw = value; }
void setAccel(s16 x, s16 y, s16 z) {
accelX = x;
accelY = y;
accelZ = z;
}
void updateInputs(u64 currentTimestamp);
void setSharedMem(u8* ptr) {

View file

@ -5,6 +5,8 @@
#include "memory.hpp"
class HTTPService {
using Handle = HorizonHandle;
Handle handle = KernelHandles::HTTP;
Memory& mem;
MAKE_LOG_FUNCTION(log, httpLogger)

View file

@ -11,6 +11,8 @@
class Kernel;
class IRUserService {
using Handle = HorizonHandle;
enum class DeviceID : u8 {
CirclePadPro = 1,
};

View file

@ -8,6 +8,8 @@
class Kernel;
class LDRService {
using Handle = HorizonHandle;
Handle handle = KernelHandles::LDR_RO;
Memory& mem;
Kernel& kernel;
@ -22,7 +24,7 @@ class LDRService {
void loadCRR(u32 messagePointer);
void unloadCRO(u32 messagePointer);
public:
public:
LDRService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel) {}
void reset();
void handleSyncRequest(u32 messagePointer);

View file

@ -7,6 +7,8 @@
namespace MCU {
class HWCService {
using Handle = HorizonHandle;
Handle handle = KernelHandles::MCU_HWC;
Memory& mem;
MAKE_LOG_FUNCTION(log, mcuLogger)
@ -15,6 +17,7 @@ namespace MCU {
// Service commands
void getBatteryLevel(u32 messagePointer);
void setInfoLEDPattern(u32 messagePointer);
public:
HWCService(Memory& mem, const EmulatorConfig& config) : mem(mem), config(config) {}

View file

@ -9,6 +9,8 @@
class Kernel;
class MICService {
using Handle = HorizonHandle;
Handle handle = KernelHandles::MIC;
Memory& mem;
Kernel& kernel;
@ -29,14 +31,14 @@ class MICService {
void unmapSharedMem(u32 messagePointer);
void theCaptainToadFunction(u32 messagePointer);
u8 gain = 0; // How loud our microphone input signal is
u8 gain = 0; // How loud our microphone input signal is
bool micEnabled = false;
bool shouldClamp = false;
bool currentlySampling = false;
std::optional<Handle> eventHandle;
public:
public:
MICService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel) {}
void reset();
void handleSyncRequest(u32 messagePointer);

View file

@ -6,7 +6,14 @@
#include "result/result.hpp"
class NDMService {
enum class ExclusiveState : u32 { None = 0, Infrastructure = 1, LocalComms = 2, StreetPass = 3, StreetPassData = 4 };
using Handle = HorizonHandle;
enum class ExclusiveState : u32 {
None = 0,
Infrastructure = 1,
LocalComms = 2,
StreetPass = 3,
StreetPassData = 4,
};
Handle handle = KernelHandles::NDM;
Memory& mem;
@ -25,7 +32,7 @@ class NDMService {
ExclusiveState exclusiveState = ExclusiveState::None;
public:
public:
NDMService(Memory& mem) : mem(mem) {}
void reset();
void handleSyncRequest(u32 messagePointer);

View file

@ -5,6 +5,8 @@
#include "memory.hpp"
class NewsUService {
using Handle = HorizonHandle;
Handle handle = KernelHandles::NEWS_U;
Memory& mem;
MAKE_LOG_FUNCTION(log, newsLogger)

View file

@ -12,6 +12,8 @@
class Kernel;
class NFCService {
using Handle = HorizonHandle;
Handle handle = KernelHandles::NFC;
Memory& mem;
Kernel& kernel;

View file

@ -6,6 +6,8 @@
#include "result/result.hpp"
class NIMService {
using Handle = HorizonHandle;
Handle handle = KernelHandles::NIM;
Memory& mem;
MAKE_LOG_FUNCTION(log, nimLogger)
@ -13,7 +15,7 @@ class NIMService {
// Service commands
void initialize(u32 messagePointer);
public:
public:
NIMService(Memory& mem) : mem(mem) {}
void reset();
void handleSyncRequest(u32 messagePointer);

25
include/services/ns.hpp Normal file
View file

@ -0,0 +1,25 @@
#pragma once
#include "helpers.hpp"
#include "kernel_types.hpp"
#include "logger.hpp"
#include "memory.hpp"
#include "result/result.hpp"
class NSService {
Memory& mem;
MAKE_LOG_FUNCTION(log, nsLogger)
// Service commands
void launchTitle(u32 messagePointer);
public:
enum class Type {
S, // ns:s
P, // ns:p
C, // ns:c
};
NSService(Memory& mem) : mem(mem) {}
void reset();
void handleSyncRequest(u32 messagePointer, Type type);
};

Some files were not shown because too many files have changed in this diff Show more