mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-05-13 07:15:07 +12:00
Merge branch 'master' into open-bp-cpp
This commit is contained in:
commit
606158fb27
289 changed files with 18600 additions and 1763 deletions
45
include/PICA/draw_acceleration.hpp
Normal file
45
include/PICA/draw_acceleration.hpp
Normal file
|
@ -0,0 +1,45 @@
|
|||
#pragma once
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "helpers.hpp"
|
||||
|
||||
namespace PICA {
|
||||
struct DrawAcceleration {
|
||||
static constexpr u32 maxAttribCount = 16;
|
||||
static constexpr u32 maxLoaderCount = 12;
|
||||
|
||||
struct AttributeInfo {
|
||||
u32 offset;
|
||||
u32 stride;
|
||||
|
||||
u8 type;
|
||||
u8 componentCount;
|
||||
|
||||
std::array<float, 4> fixedValue; // For fixed attributes
|
||||
};
|
||||
|
||||
struct Loader {
|
||||
// Data to upload for this loader
|
||||
u8* data;
|
||||
usize size;
|
||||
};
|
||||
|
||||
u8* indexBuffer;
|
||||
|
||||
// Minimum and maximum index in the index buffer for a draw call
|
||||
u16 minimumIndex, maximumIndex;
|
||||
u32 totalAttribCount;
|
||||
u32 totalLoaderCount;
|
||||
u32 enabledAttributeMask;
|
||||
u32 fixedAttributes;
|
||||
u32 vertexDataSize;
|
||||
|
||||
std::array<AttributeInfo, maxAttribCount> attributeInfo;
|
||||
std::array<Loader, maxLoaderCount> loaders;
|
||||
|
||||
bool canBeAccelerated;
|
||||
bool indexed;
|
||||
bool useShortIndices;
|
||||
};
|
||||
} // namespace PICA
|
|
@ -2,7 +2,7 @@
|
|||
#include "helpers.hpp"
|
||||
#include "vertex_loader_rec.hpp"
|
||||
|
||||
// Common file for our PICA JITs (From vertex config -> CPU assembly and from PICA shader -> CPU assembly)
|
||||
// Common file for our PICA JITs (From PICA shader -> CPU assembly)
|
||||
|
||||
namespace Dynapica {
|
||||
#ifdef PANDA3DS_DYNAPICA_SUPPORTED
|
||||
|
|
|
@ -22,8 +22,11 @@ class ShaderJIT {
|
|||
|
||||
ShaderCache cache;
|
||||
#endif
|
||||
bool accurateMul = false;
|
||||
|
||||
public:
|
||||
void setAccurateMul(bool value) { accurateMul = value; }
|
||||
|
||||
#ifdef PANDA3DS_SHADER_JIT_SUPPORTED
|
||||
// Call this before starting to process a batch of vertices
|
||||
// This will read the PICA config (uploaded shader and shader operand descriptors) and search if we've already compiled this shader
|
||||
|
@ -36,11 +39,11 @@ class ShaderJIT {
|
|||
static constexpr bool isAvailable() { return true; }
|
||||
#else
|
||||
void prepare(PICAShader& shaderUnit) {
|
||||
Helpers::panic("Vertex Loader JIT: Tried to run ShaderJIT::Prepare on platform that does not support shader jit");
|
||||
Helpers::panic("Shader JIT: Tried to run ShaderJIT::Prepare on platform that does not support shader jit");
|
||||
}
|
||||
|
||||
void run(PICAShader& shaderUnit) {
|
||||
Helpers::panic("Vertex Loader JIT: Tried to run ShaderJIT::Run on platform that does not support shader jit");
|
||||
Helpers::panic("Shader JIT: Tried to run ShaderJIT::Run on platform that does not support shader jit");
|
||||
}
|
||||
|
||||
// Define dummy callback. This should never be called if the shader JIT is not supported
|
||||
|
|
|
@ -37,6 +37,8 @@ class ShaderEmitter : private oaknut::CodeBlock, public oaknut::CodeGenerator {
|
|||
// Shows whether the loaded shader has any log2 and exp2 instructions
|
||||
bool codeHasLog2 = false;
|
||||
bool codeHasExp2 = false;
|
||||
// Whether to compile this shader using accurate, safe, non-IEEE multiplication (slow) or faster but less accurate mul
|
||||
bool useSafeMUL = false;
|
||||
|
||||
oaknut::Label log2Func, exp2Func;
|
||||
oaknut::Label emitLog2Func();
|
||||
|
@ -123,7 +125,7 @@ class ShaderEmitter : private oaknut::CodeBlock, public oaknut::CodeGenerator {
|
|||
PrologueCallback prologueCb = nullptr;
|
||||
|
||||
// Initialize our emitter with "allocSize" bytes of memory allocated for the code buffer
|
||||
ShaderEmitter() : oaknut::CodeBlock(allocSize), oaknut::CodeGenerator(oaknut::CodeBlock::ptr()) {}
|
||||
ShaderEmitter(bool useSafeMUL) : oaknut::CodeBlock(allocSize), oaknut::CodeGenerator(oaknut::CodeBlock::ptr()), useSafeMUL(useSafeMUL) {}
|
||||
|
||||
// PC must be a valid entrypoint here. It doesn't have that much overhead in this case, so we use std::array<>::at() to assert it does
|
||||
InstructionCallback getInstructionCallback(u32 pc) { return getLabelPointer<InstructionCallback>(instructionLabels.at(pc)); }
|
||||
|
|
|
@ -32,6 +32,8 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
|
|||
Label negateVector;
|
||||
// Vector value of (1.0, 1.0, 1.0, 1.0) for SLT(i)/SGE(i)
|
||||
Label onesVector;
|
||||
// Vector value of (0xFF, 0xFF, 0xFF, 0) for setting the w component to 0 in DP3
|
||||
Label dp3Vector;
|
||||
|
||||
u32 recompilerPC = 0; // PC the recompiler is currently recompiling @
|
||||
u32 loopLevel = 0; // The current loop nesting level (0 = not in a loop)
|
||||
|
@ -43,12 +45,17 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
|
|||
// Shows whether the loaded shader has any log2 and exp2 instructions
|
||||
bool codeHasLog2 = false;
|
||||
bool codeHasExp2 = false;
|
||||
// Whether to compile this shader using accurate, safe, non-IEEE multiplication (slow) or faster but less accurate mul
|
||||
bool useSafeMUL = false;
|
||||
|
||||
Xbyak::Label log2Func, exp2Func;
|
||||
Xbyak::Label emitLog2Func();
|
||||
Xbyak::Label emitExp2Func();
|
||||
Xbyak::util::Cpu cpuCaps;
|
||||
|
||||
// Emit a PICA200-compliant multiplication that handles "0 * inf = 0"
|
||||
void emitSafeMUL(Xbyak::Xmm src1, Xbyak::Xmm src2, Xbyak::Xmm scratch);
|
||||
|
||||
// Compile all instructions from [current recompiler PC, end)
|
||||
void compileUntil(const PICAShader& shaderUnit, u32 endPC);
|
||||
// Compile instruction "instr"
|
||||
|
@ -125,7 +132,7 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
|
|||
PrologueCallback prologueCb = nullptr;
|
||||
|
||||
// Initialize our emitter with "allocSize" bytes of RWX memory
|
||||
ShaderEmitter() : Xbyak::CodeGenerator(allocSize) {
|
||||
ShaderEmitter(bool useSafeMUL) : Xbyak::CodeGenerator(allocSize), useSafeMUL(useSafeMUL) {
|
||||
cpuCaps = Xbyak::util::Cpu();
|
||||
|
||||
haveSSE4_1 = cpuCaps.has(Xbyak::util::Cpu::tSSE41);
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#pragma once
|
||||
#include <array>
|
||||
|
||||
#include "PICA/draw_acceleration.hpp"
|
||||
#include "PICA/dynapica/shader_rec.hpp"
|
||||
#include "PICA/float_types.hpp"
|
||||
#include "PICA/pica_vertex.hpp"
|
||||
|
@ -13,6 +14,12 @@
|
|||
#include "memory.hpp"
|
||||
#include "renderer.hpp"
|
||||
|
||||
enum class ShaderExecMode {
|
||||
Interpreter, // Interpret shaders on the CPU
|
||||
JIT, // Recompile shaders to CPU machine code
|
||||
Hardware, // Recompiler shaders to host shaders and run them on the GPU
|
||||
};
|
||||
|
||||
class GPU {
|
||||
static constexpr u32 regNum = 0x300;
|
||||
static constexpr u32 extRegNum = 0x1000;
|
||||
|
@ -45,7 +52,7 @@ class GPU {
|
|||
uint immediateModeVertIndex;
|
||||
uint immediateModeAttrIndex; // Index of the immediate mode attribute we're uploading
|
||||
|
||||
template <bool indexed, bool useShaderJIT>
|
||||
template <bool indexed, ShaderExecMode mode>
|
||||
void drawArrays();
|
||||
|
||||
// Silly method of avoiding linking problems. TODO: Change to something less silly
|
||||
|
@ -81,6 +88,7 @@ class GPU {
|
|||
std::unique_ptr<Renderer> renderer;
|
||||
PICA::Vertex getImmediateModeVertex();
|
||||
|
||||
void getAcceleratedDrawInfo(PICA::DrawAcceleration& accel, bool indexed);
|
||||
public:
|
||||
// 256 entries per LUT with each LUT as its own row forming a 2D image 256 * LUT_COUNT
|
||||
// Encoded in PICA native format
|
||||
|
@ -92,6 +100,9 @@ class GPU {
|
|||
// Set to false by the renderer when the lighting_lut is uploaded ot the GPU
|
||||
bool lightingLUTDirty = false;
|
||||
|
||||
bool fogLUTDirty = false;
|
||||
std::array<uint32_t, 128> fogLUT;
|
||||
|
||||
GPU(Memory& mem, EmulatorConfig& config);
|
||||
void display() { renderer->display(); }
|
||||
void screenshot(const std::string& name) { renderer->screenshot(name); }
|
||||
|
@ -164,7 +175,8 @@ class GPU {
|
|||
u32 index = paddr - PhysicalAddrs::VRAM;
|
||||
return (T*)&vram[index];
|
||||
} else [[unlikely]] {
|
||||
Helpers::panic("[GPU] Tried to access unknown physical address: %08X", paddr);
|
||||
Helpers::warn("[GPU] Tried to access unknown physical address: %08X", paddr);
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
257
include/PICA/pica_frag_config.hpp
Normal file
257
include/PICA/pica_frag_config.hpp
Normal file
|
@ -0,0 +1,257 @@
|
|||
#pragma once
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "PICA/pica_hash.hpp"
|
||||
#include "PICA/regs.hpp"
|
||||
#include "bitfield.hpp"
|
||||
#include "helpers.hpp"
|
||||
|
||||
namespace PICA {
|
||||
struct OutputConfig {
|
||||
union {
|
||||
u32 raw{};
|
||||
// Merge the enable + compare function into 1 field to avoid duplicate shaders
|
||||
// enable == off means a CompareFunction of Always
|
||||
BitField<0, 3, CompareFunction> alphaTestFunction;
|
||||
BitField<3, 1, u32> depthMapEnable;
|
||||
BitField<4, 4, LogicOpMode> logicOpMode;
|
||||
};
|
||||
};
|
||||
|
||||
struct TextureConfig {
|
||||
u32 texUnitConfig;
|
||||
u32 texEnvUpdateBuffer;
|
||||
|
||||
// There's 6 TEV stages, and each one is configured via 4 word-sized registers
|
||||
// (+ the constant color register, which we don't include here, otherwise we'd generate too many shaders)
|
||||
std::array<u32, 4 * 6> tevConfigs;
|
||||
};
|
||||
|
||||
struct FogConfig {
|
||||
union {
|
||||
u32 raw{};
|
||||
|
||||
BitField<0, 3, FogMode> mode;
|
||||
BitField<3, 1, u32> flipDepth;
|
||||
};
|
||||
};
|
||||
|
||||
struct Light {
|
||||
union {
|
||||
u16 raw;
|
||||
BitField<0, 3, u16> num;
|
||||
BitField<3, 1, u16> directional;
|
||||
BitField<4, 1, u16> twoSidedDiffuse;
|
||||
BitField<5, 1, u16> distanceAttenuationEnable;
|
||||
BitField<6, 1, u16> spotAttenuationEnable;
|
||||
BitField<7, 1, u16> geometricFactor0;
|
||||
BitField<8, 1, u16> geometricFactor1;
|
||||
BitField<9, 1, u16> shadowEnable;
|
||||
};
|
||||
};
|
||||
|
||||
struct LightingLUTConfig {
|
||||
union {
|
||||
u32 raw;
|
||||
BitField<0, 1, u32> enable;
|
||||
BitField<1, 1, u32> absInput;
|
||||
BitField<2, 3, u32> type;
|
||||
BitField<5, 3, u32> scale;
|
||||
};
|
||||
};
|
||||
|
||||
struct LightingConfig {
|
||||
union {
|
||||
u32 raw{};
|
||||
BitField<0, 1, u32> enable;
|
||||
BitField<1, 4, u32> lightNum;
|
||||
BitField<5, 2, u32> bumpMode;
|
||||
BitField<7, 2, u32> bumpSelector;
|
||||
BitField<9, 1, u32> bumpRenorm;
|
||||
BitField<10, 1, u32> clampHighlights;
|
||||
BitField<11, 4, u32> config;
|
||||
BitField<15, 1, u32> enablePrimaryAlpha;
|
||||
BitField<16, 1, u32> enableSecondaryAlpha;
|
||||
BitField<17, 1, u32> enableShadow;
|
||||
BitField<18, 1, u32> shadowPrimary;
|
||||
BitField<19, 1, u32> shadowSecondary;
|
||||
BitField<20, 1, u32> shadowInvert;
|
||||
BitField<21, 1, u32> shadowAlpha;
|
||||
BitField<22, 2, u32> shadowSelector;
|
||||
};
|
||||
|
||||
std::array<LightingLUTConfig, 7> luts{};
|
||||
|
||||
std::array<Light, 8> lights{};
|
||||
|
||||
LightingConfig(const std::array<u32, 0x300>& regs) {
|
||||
// Ignore lighting registers if it's disabled
|
||||
if ((regs[InternalRegs::LightingEnable] & 1) == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const u32 config0 = regs[InternalRegs::LightConfig0];
|
||||
const u32 config1 = regs[InternalRegs::LightConfig1];
|
||||
const u32 totalLightCount = Helpers::getBits<0, 3>(regs[InternalRegs::LightNumber]) + 1;
|
||||
|
||||
enable = 1;
|
||||
lightNum = totalLightCount;
|
||||
|
||||
enableShadow = Helpers::getBit<0>(config0);
|
||||
if (enableShadow) [[unlikely]] {
|
||||
shadowPrimary = Helpers::getBit<16>(config0);
|
||||
shadowSecondary = Helpers::getBit<17>(config0);
|
||||
shadowInvert = Helpers::getBit<18>(config0);
|
||||
shadowAlpha = Helpers::getBit<19>(config0);
|
||||
shadowSelector = Helpers::getBits<24, 2>(config0);
|
||||
}
|
||||
|
||||
enablePrimaryAlpha = Helpers::getBit<2>(config0);
|
||||
enableSecondaryAlpha = Helpers::getBit<3>(config0);
|
||||
config = Helpers::getBits<4, 4>(config0);
|
||||
|
||||
bumpSelector = Helpers::getBits<22, 2>(config0);
|
||||
clampHighlights = Helpers::getBit<27>(config0);
|
||||
bumpMode = Helpers::getBits<28, 2>(config0);
|
||||
bumpRenorm = Helpers::getBit<30>(config0) ^ 1; // 0 = enable so flip it with xor
|
||||
|
||||
for (int i = 0; i < totalLightCount; i++) {
|
||||
auto& light = lights[i];
|
||||
light.num = (regs[InternalRegs::LightPermutation] >> (i * 4)) & 0x7;
|
||||
|
||||
const u32 lightConfig = regs[InternalRegs::Light0Config + 0x10 * light.num];
|
||||
light.directional = Helpers::getBit<0>(lightConfig);
|
||||
light.twoSidedDiffuse = Helpers::getBit<1>(lightConfig);
|
||||
light.geometricFactor0 = Helpers::getBit<2>(lightConfig);
|
||||
light.geometricFactor1 = Helpers::getBit<3>(lightConfig);
|
||||
|
||||
light.shadowEnable = ((config1 >> light.num) & 1) ^ 1; // This also does 0 = enabled
|
||||
light.spotAttenuationEnable = ((config1 >> (8 + light.num)) & 1) ^ 1; // Same here
|
||||
light.distanceAttenuationEnable = ((config1 >> (24 + light.num)) & 1) ^ 1; // Of course same here
|
||||
}
|
||||
|
||||
LightingLUTConfig& d0 = luts[Lights::LUT_D0];
|
||||
LightingLUTConfig& d1 = luts[Lights::LUT_D1];
|
||||
LightingLUTConfig& sp = luts[spotlightLutIndex];
|
||||
LightingLUTConfig& fr = luts[Lights::LUT_FR];
|
||||
LightingLUTConfig& rb = luts[Lights::LUT_RB];
|
||||
LightingLUTConfig& rg = luts[Lights::LUT_RG];
|
||||
LightingLUTConfig& rr = luts[Lights::LUT_RR];
|
||||
|
||||
d0.enable = Helpers::getBit<16>(config1) == 0;
|
||||
d1.enable = Helpers::getBit<17>(config1) == 0;
|
||||
fr.enable = Helpers::getBit<19>(config1) == 0;
|
||||
rb.enable = Helpers::getBit<20>(config1) == 0;
|
||||
rg.enable = Helpers::getBit<21>(config1) == 0;
|
||||
rr.enable = Helpers::getBit<22>(config1) == 0;
|
||||
sp.enable = 1;
|
||||
|
||||
const u32 lutAbs = regs[InternalRegs::LightLUTAbs];
|
||||
const u32 lutSelect = regs[InternalRegs::LightLUTSelect];
|
||||
const u32 lutScale = regs[InternalRegs::LightLUTScale];
|
||||
|
||||
if (d0.enable) {
|
||||
d0.absInput = Helpers::getBit<1>(lutAbs) == 0;
|
||||
d0.type = Helpers::getBits<0, 3>(lutSelect);
|
||||
d0.scale = Helpers::getBits<0, 3>(lutScale);
|
||||
}
|
||||
|
||||
if (d1.enable) {
|
||||
d1.absInput = Helpers::getBit<5>(lutAbs) == 0;
|
||||
d1.type = Helpers::getBits<4, 3>(lutSelect);
|
||||
d1.scale = Helpers::getBits<4, 3>(lutScale);
|
||||
}
|
||||
|
||||
sp.absInput = Helpers::getBit<9>(lutAbs) == 0;
|
||||
sp.type = Helpers::getBits<8, 3>(lutSelect);
|
||||
sp.scale = Helpers::getBits<8, 3>(lutScale);
|
||||
|
||||
if (fr.enable) {
|
||||
fr.absInput = Helpers::getBit<13>(lutAbs) == 0;
|
||||
fr.type = Helpers::getBits<12, 3>(lutSelect);
|
||||
fr.scale = Helpers::getBits<12, 3>(lutScale);
|
||||
}
|
||||
|
||||
if (rb.enable) {
|
||||
rb.absInput = Helpers::getBit<17>(lutAbs) == 0;
|
||||
rb.type = Helpers::getBits<16, 3>(lutSelect);
|
||||
rb.scale = Helpers::getBits<16, 3>(lutScale);
|
||||
}
|
||||
|
||||
if (rg.enable) {
|
||||
rg.absInput = Helpers::getBit<21>(lutAbs) == 0;
|
||||
rg.type = Helpers::getBits<20, 3>(lutSelect);
|
||||
rg.scale = Helpers::getBits<20, 3>(lutScale);
|
||||
}
|
||||
|
||||
if (rr.enable) {
|
||||
rr.absInput = Helpers::getBit<25>(lutAbs) == 0;
|
||||
rr.type = Helpers::getBits<24, 3>(lutSelect);
|
||||
rr.scale = Helpers::getBits<24, 3>(lutScale);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Config used for identifying unique fragment pipeline configurations
|
||||
struct FragmentConfig {
|
||||
OutputConfig outConfig;
|
||||
TextureConfig texConfig;
|
||||
FogConfig fogConfig;
|
||||
LightingConfig lighting;
|
||||
|
||||
bool operator==(const FragmentConfig& config) const {
|
||||
// Hash function and equality operator required by std::unordered_map
|
||||
return std::memcmp(this, &config, sizeof(FragmentConfig)) == 0;
|
||||
}
|
||||
|
||||
FragmentConfig(const std::array<u32, 0x300>& regs) : lighting(regs) {
|
||||
auto alphaTestConfig = regs[InternalRegs::AlphaTestConfig];
|
||||
auto alphaTestFunction = Helpers::getBits<4, 3>(alphaTestConfig);
|
||||
|
||||
outConfig.alphaTestFunction =
|
||||
(alphaTestConfig & 1) ? static_cast<PICA::CompareFunction>(alphaTestFunction) : PICA::CompareFunction::Always;
|
||||
outConfig.depthMapEnable = regs[InternalRegs::DepthmapEnable] & 1;
|
||||
|
||||
// Shows if blending is enabled. If it is not enabled, then logic ops are enabled instead
|
||||
const bool blendingEnabled = (regs[InternalRegs::ColourOperation] & (1 << 8)) != 0;
|
||||
outConfig.logicOpMode = blendingEnabled ? LogicOpMode::Copy : LogicOpMode(Helpers::getBits<0, 4>(regs[InternalRegs::LogicOp]));
|
||||
|
||||
texConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg];
|
||||
texConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer];
|
||||
|
||||
// Set up TEV stages. Annoyingly we can't just memcpy as the TEV registers are arranged like
|
||||
// {Source, Operand, Combiner, Color, Scale} and we want to skip the color register since it's uploaded via UBO
|
||||
#define setupTevStage(stage) \
|
||||
std::memcpy(&texConfig.tevConfigs[stage * 4], ®s[InternalRegs::TexEnv##stage##Source], 3 * sizeof(u32)); \
|
||||
texConfig.tevConfigs[stage * 4 + 3] = regs[InternalRegs::TexEnv##stage##Source + 4];
|
||||
|
||||
setupTevStage(0);
|
||||
setupTevStage(1);
|
||||
setupTevStage(2);
|
||||
setupTevStage(3);
|
||||
setupTevStage(4);
|
||||
setupTevStage(5);
|
||||
#undef setupTevStage
|
||||
|
||||
fogConfig.mode = (FogMode)Helpers::getBits<0, 3>(regs[InternalRegs::TexEnvUpdateBuffer]);
|
||||
|
||||
if (fogConfig.mode == FogMode::Fog) {
|
||||
fogConfig.flipDepth = Helpers::getBit<16>(regs[InternalRegs::TexEnvUpdateBuffer]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static_assert(
|
||||
std::has_unique_object_representations<OutputConfig>() && std::has_unique_object_representations<TextureConfig>() &&
|
||||
std::has_unique_object_representations<FogConfig>() && std::has_unique_object_representations<Light>()
|
||||
);
|
||||
} // namespace PICA
|
||||
|
||||
// Override std::hash for our fragment config class
|
||||
template <>
|
||||
struct std::hash<PICA::FragmentConfig> {
|
||||
std::size_t operator()(const PICA::FragmentConfig& config) const noexcept { return PICAHash::computeHash((const char*)&config, sizeof(config)); }
|
||||
};
|
47
include/PICA/pica_frag_uniforms.hpp
Normal file
47
include/PICA/pica_frag_uniforms.hpp
Normal file
|
@ -0,0 +1,47 @@
|
|||
#pragma once
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <type_traits>
|
||||
|
||||
#include "helpers.hpp"
|
||||
|
||||
namespace PICA {
|
||||
struct LightUniform {
|
||||
using vec3 = std::array<float, 3>;
|
||||
|
||||
// std140 requires vec3s be aligned to 16 bytes
|
||||
alignas(16) vec3 specular0;
|
||||
alignas(16) vec3 specular1;
|
||||
alignas(16) vec3 diffuse;
|
||||
alignas(16) vec3 ambient;
|
||||
alignas(16) vec3 position;
|
||||
alignas(16) vec3 spotlightDirection;
|
||||
|
||||
float distanceAttenuationBias;
|
||||
float distanceAttenuationScale;
|
||||
};
|
||||
|
||||
struct FragmentUniforms {
|
||||
using vec3 = std::array<float, 3>;
|
||||
using vec4 = std::array<float, 4>;
|
||||
static constexpr usize tevStageCount = 6;
|
||||
|
||||
s32 alphaReference;
|
||||
float depthScale;
|
||||
float depthOffset;
|
||||
|
||||
alignas(16) vec4 constantColors[tevStageCount];
|
||||
alignas(16) vec4 tevBufferColor;
|
||||
alignas(16) vec4 clipCoords;
|
||||
|
||||
// Note: We upload these as a u32 and decode on GPU.
|
||||
// Particularly the fog colour since fog is really uncommon and it doesn't matter if we decode on GPU.
|
||||
u32 globalAmbientLight;
|
||||
u32 fogColor;
|
||||
// NOTE: THIS MUST BE LAST so that if lighting is disabled we can potentially omit uploading it
|
||||
LightUniform lightUniforms[8];
|
||||
};
|
||||
|
||||
// Assert that lightUniforms is the last member of the structure
|
||||
static_assert(offsetof(FragmentUniforms, lightUniforms) + 8 * sizeof(LightUniform) == sizeof(FragmentUniforms));
|
||||
} // namespace PICA
|
275
include/PICA/pica_simd.hpp
Normal file
275
include/PICA/pica_simd.hpp
Normal file
|
@ -0,0 +1,275 @@
|
|||
#pragma once
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <utility>
|
||||
|
||||
#include "compiler_builtins.hpp"
|
||||
#include "helpers.hpp"
|
||||
|
||||
#if defined(_M_AMD64) || defined(__x86_64__)
|
||||
#define PICA_SIMD_X64
|
||||
#include <immintrin.h>
|
||||
#elif defined(_M_ARM64) || defined(__aarch64__)
|
||||
#define PICA_SIMD_ARM64
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
// Optimized functions for analyzing PICA index buffers (Finding minimum and maximum index values inside them)
|
||||
namespace PICA::IndexBuffer {
|
||||
// Non-SIMD, portable algorithm
|
||||
template <bool useShortIndices>
|
||||
std::pair<u16, u16> analyzePortable(u8* indexBuffer, u32 vertexCount) {
|
||||
u16 minimumIndex = std::numeric_limits<u16>::max();
|
||||
u16 maximumIndex = 0;
|
||||
|
||||
// Calculate the minimum and maximum indices used in the index buffer, so we'll only upload them
|
||||
if constexpr (useShortIndices) {
|
||||
u16* indexBuffer16 = reinterpret_cast<u16*>(indexBuffer);
|
||||
|
||||
for (u32 i = 0; i < vertexCount; i++) {
|
||||
u16 index = indexBuffer16[i];
|
||||
minimumIndex = std::min(minimumIndex, index);
|
||||
maximumIndex = std::max(maximumIndex, index);
|
||||
}
|
||||
} else {
|
||||
for (u32 i = 0; i < vertexCount; i++) {
|
||||
u16 index = u16(indexBuffer[i]);
|
||||
minimumIndex = std::min(minimumIndex, index);
|
||||
maximumIndex = std::max(maximumIndex, index);
|
||||
}
|
||||
}
|
||||
|
||||
return {minimumIndex, maximumIndex};
|
||||
}
|
||||
|
||||
#ifdef PICA_SIMD_ARM64
|
||||
template <bool useShortIndices>
|
||||
ALWAYS_INLINE std::pair<u16, u16> analyzeNEON(u8* indexBuffer, u32 vertexCount) {
|
||||
// We process 16 bytes per iteration, which is 8 vertices if we're using u16 indices or 16 vertices if we're using u8 indices
|
||||
constexpr u32 vertsPerLoop = (useShortIndices) ? 8 : 16;
|
||||
|
||||
if (vertexCount < vertsPerLoop) {
|
||||
return analyzePortable<useShortIndices>(indexBuffer, vertexCount);
|
||||
}
|
||||
|
||||
u16 minimumIndex, maximumIndex;
|
||||
|
||||
if constexpr (useShortIndices) {
|
||||
// 16-bit indices
|
||||
uint16x8_t minima = vdupq_n_u16(0xffff);
|
||||
uint16x8_t maxima = vdupq_n_u16(0);
|
||||
|
||||
while (vertexCount >= vertsPerLoop) {
|
||||
const uint16x8_t data = vld1q_u16(reinterpret_cast<u16*>(indexBuffer));
|
||||
minima = vminq_u16(data, minima);
|
||||
maxima = vmaxq_u16(data, maxima);
|
||||
|
||||
indexBuffer += 16;
|
||||
vertexCount -= vertsPerLoop;
|
||||
}
|
||||
|
||||
// Do horizontal min/max operations to get the actual minimum and maximum from all the vertices we processed with SIMD
|
||||
// We want to gather the actual minimum and maximum in the line bottom lane of the minima/maxima vectors
|
||||
// uint16x4_t foldedMinima1 = vmin_u16(vget_high_u16(minima), vget_low_u16(minima));
|
||||
// uint16x4_t foldedMaxima1 = vmax_u16(vget_high_u16(maxima), vget_low_u16(maxima));
|
||||
|
||||
uint16x8_t foldedMinima1 = vpminq_u16(minima, minima);
|
||||
uint16x8_t foldedMinima2 = vpminq_u16(foldedMinima1, foldedMinima1);
|
||||
uint16x8_t foldedMinima3 = vpminq_u16(foldedMinima2, foldedMinima2);
|
||||
|
||||
uint16x8_t foldedMaxima1 = vpmaxq_u16(maxima, maxima);
|
||||
uint16x8_t foldedMaxima2 = vpmaxq_u16(foldedMaxima1, foldedMaxima1);
|
||||
uint16x8_t foldedMaxima3 = vpmaxq_u16(foldedMaxima2, foldedMaxima2);
|
||||
|
||||
minimumIndex = vgetq_lane_u16(foldedMinima3, 0);
|
||||
maximumIndex = vgetq_lane_u16(foldedMaxima3, 0);
|
||||
} else {
|
||||
// 8-bit indices
|
||||
uint8x16_t minima = vdupq_n_u8(0xff);
|
||||
uint8x16_t maxima = vdupq_n_u8(0);
|
||||
|
||||
while (vertexCount >= vertsPerLoop) {
|
||||
uint8x16_t data = vld1q_u8(indexBuffer);
|
||||
minima = vminq_u8(data, minima);
|
||||
maxima = vmaxq_u8(data, maxima);
|
||||
|
||||
indexBuffer += 16;
|
||||
vertexCount -= vertsPerLoop;
|
||||
}
|
||||
|
||||
// Do a similar horizontal min/max as in the u16 case, except now we're working uint8x16 instead of uint16x4 so we need 4 folds
|
||||
uint8x16_t foldedMinima1 = vpminq_u8(minima, minima);
|
||||
uint8x16_t foldedMinima2 = vpminq_u8(foldedMinima1, foldedMinima1);
|
||||
uint8x16_t foldedMinima3 = vpminq_u8(foldedMinima2, foldedMinima2);
|
||||
uint8x16_t foldedMinima4 = vpminq_u8(foldedMinima3, foldedMinima3);
|
||||
|
||||
uint8x16_t foldedMaxima1 = vpmaxq_u8(maxima, maxima);
|
||||
uint8x16_t foldedMaxima2 = vpmaxq_u8(foldedMaxima1, foldedMaxima1);
|
||||
uint8x16_t foldedMaxima3 = vpmaxq_u8(foldedMaxima2, foldedMaxima2);
|
||||
uint8x16_t foldedMaxima4 = vpmaxq_u8(foldedMaxima3, foldedMaxima3);
|
||||
|
||||
minimumIndex = u16(vgetq_lane_u8(foldedMinima4, 0));
|
||||
maximumIndex = u16(vgetq_lane_u8(foldedMaxima4, 0));
|
||||
}
|
||||
|
||||
// If any indices could not be processed cause the buffer size is not 16-byte aligned, process them the naive way
|
||||
// Calculate the minimum and maximum indices used in the index buffer, so we'll only upload them
|
||||
while (vertexCount > 0) {
|
||||
if constexpr (useShortIndices) {
|
||||
u16 index = *reinterpret_cast<u16*>(indexBuffer);
|
||||
minimumIndex = std::min(minimumIndex, index);
|
||||
maximumIndex = std::max(maximumIndex, index);
|
||||
indexBuffer += 2;
|
||||
} else {
|
||||
u16 index = u16(*indexBuffer++);
|
||||
minimumIndex = std::min(minimumIndex, index);
|
||||
maximumIndex = std::max(maximumIndex, index);
|
||||
}
|
||||
|
||||
vertexCount -= 1;
|
||||
}
|
||||
|
||||
return {minimumIndex, maximumIndex};
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(PICA_SIMD_X64) && (defined(__SSE4_1__) || defined(__AVX__))
|
||||
template <bool useShortIndices>
|
||||
ALWAYS_INLINE std::pair<u16, u16> analyzeSSE4_1(u8* indexBuffer, u32 vertexCount) {
|
||||
// We process 16 bytes per iteration, which is 8 vertices if we're using u16
|
||||
// indices or 16 vertices if we're using u8 indices
|
||||
constexpr u32 vertsPerLoop = (useShortIndices) ? 8 : 16;
|
||||
|
||||
if (vertexCount < vertsPerLoop) {
|
||||
return analyzePortable<useShortIndices>(indexBuffer, vertexCount);
|
||||
}
|
||||
|
||||
u16 minimumIndex, maximumIndex;
|
||||
|
||||
if constexpr (useShortIndices) {
|
||||
// Calculate the horizontal minimum/maximum value across an SSE vector of 16-bit unsigned integers.
|
||||
// Based on https://stackoverflow.com/a/22259607
|
||||
auto horizontalMin16 = [](__m128i vector) -> u16 { return u16(_mm_cvtsi128_si32(_mm_minpos_epu16(vector))); };
|
||||
|
||||
auto horizontalMax16 = [](__m128i vector) -> u16 {
|
||||
// We have an instruction to compute horizontal minimum but not maximum, so we use it.
|
||||
// To use it, we have to subtract each value from 0xFFFF (which we do with an xor), then execute a horizontal minimum
|
||||
__m128i flipped = _mm_xor_si128(vector, _mm_set_epi32(0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu));
|
||||
u16 min = u16(_mm_cvtsi128_si32(_mm_minpos_epu16(flipped)));
|
||||
return u16(min ^ 0xffff);
|
||||
};
|
||||
|
||||
// 16-bit indices
|
||||
// Initialize the minima vector to all FFs (So 0xFFFF for each 16-bit lane)
|
||||
// And the maxima vector to all 0s (0 for each 16-bit lane)
|
||||
__m128i minima = _mm_set_epi32(0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu);
|
||||
__m128i maxima = _mm_set_epi32(0, 0, 0, 0);
|
||||
|
||||
while (vertexCount >= vertsPerLoop) {
|
||||
const __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(indexBuffer));
|
||||
minima = _mm_min_epu16(data, minima);
|
||||
maxima = _mm_max_epu16(data, maxima);
|
||||
|
||||
indexBuffer += 16;
|
||||
vertexCount -= vertsPerLoop;
|
||||
}
|
||||
|
||||
minimumIndex = u16(horizontalMin16(minima));
|
||||
maximumIndex = u16(horizontalMax16(maxima));
|
||||
} else {
|
||||
// Calculate the horizontal minimum/maximum value across an SSE vector of 8-bit unsigned integers.
|
||||
// Based on https://stackoverflow.com/a/22259607
|
||||
auto horizontalMin8 = [](__m128i vector) -> u8 {
|
||||
vector = _mm_min_epu8(vector, _mm_shuffle_epi32(vector, _MM_SHUFFLE(3, 2, 3, 2)));
|
||||
vector = _mm_min_epu8(vector, _mm_shuffle_epi32(vector, _MM_SHUFFLE(1, 1, 1, 1)));
|
||||
vector = _mm_min_epu8(vector, _mm_shufflelo_epi16(vector, _MM_SHUFFLE(1, 1, 1, 1)));
|
||||
vector = _mm_min_epu8(vector, _mm_srli_epi16(vector, 8));
|
||||
return u8(_mm_cvtsi128_si32(vector));
|
||||
};
|
||||
|
||||
auto horizontalMax8 = [](__m128i vector) -> u8 {
|
||||
vector = _mm_max_epu8(vector, _mm_shuffle_epi32(vector, _MM_SHUFFLE(3, 2, 3, 2)));
|
||||
vector = _mm_max_epu8(vector, _mm_shuffle_epi32(vector, _MM_SHUFFLE(1, 1, 1, 1)));
|
||||
vector = _mm_max_epu8(vector, _mm_shufflelo_epi16(vector, _MM_SHUFFLE(1, 1, 1, 1)));
|
||||
vector = _mm_max_epu8(vector, _mm_srli_epi16(vector, 8));
|
||||
return u8(_mm_cvtsi128_si32(vector));
|
||||
};
|
||||
|
||||
// 8-bit indices
|
||||
// Initialize the minima vector to all FFs (So 0xFF for each 8-bit lane)
|
||||
// And the maxima vector to all 0s (0 for each 8-bit lane)
|
||||
__m128i minima = _mm_set_epi32(0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu);
|
||||
__m128i maxima = _mm_set_epi32(0, 0, 0, 0);
|
||||
|
||||
while (vertexCount >= vertsPerLoop) {
|
||||
const __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(indexBuffer));
|
||||
minima = _mm_min_epu8(data, minima);
|
||||
maxima = _mm_max_epu8(data, maxima);
|
||||
|
||||
indexBuffer += 16;
|
||||
vertexCount -= vertsPerLoop;
|
||||
}
|
||||
|
||||
minimumIndex = u16(horizontalMin8(minima));
|
||||
maximumIndex = u16(horizontalMax8(maxima));
|
||||
}
|
||||
|
||||
// If any indices could not be processed cause the buffer size
|
||||
// is not 16-byte aligned, process them the naive way
|
||||
// Calculate the minimum and maximum indices used in the index
|
||||
// buffer, so we'll only upload them
|
||||
while (vertexCount > 0) {
|
||||
if constexpr (useShortIndices) {
|
||||
u16 index = *reinterpret_cast<u16*>(indexBuffer);
|
||||
minimumIndex = std::min(minimumIndex, index);
|
||||
maximumIndex = std::max(maximumIndex, index);
|
||||
indexBuffer += 2;
|
||||
} else {
|
||||
u16 index = u16(*indexBuffer++);
|
||||
minimumIndex = std::min(minimumIndex, index);
|
||||
maximumIndex = std::max(maximumIndex, index);
|
||||
}
|
||||
|
||||
vertexCount -= 1;
|
||||
}
|
||||
|
||||
return {minimumIndex, maximumIndex};
|
||||
}
|
||||
#endif
|
||||
|
||||
// Analyzes a PICA index buffer to get the minimum and maximum indices in the
|
||||
// buffer, and returns them in a pair in the form [min, max]. Takes a template
|
||||
// parameter to decide whether the indices in the buffer are u8 or u16
|
||||
template <bool useShortIndices>
|
||||
std::pair<u16, u16> analyze(u8* indexBuffer, u32 vertexCount) {
|
||||
#if defined(PICA_SIMD_ARM64)
|
||||
return analyzeNEON<useShortIndices>(indexBuffer, vertexCount);
|
||||
#elif defined(PICA_SIMD_X64) && (defined(__SSE4_1__) || defined(__AVX__))
|
||||
// Annoyingly, MSVC refuses to define __SSE4_1__ even when we're building with AVX
|
||||
return analyzeSSE4_1<useShortIndices>(indexBuffer, vertexCount);
|
||||
#else
|
||||
return analyzePortable<useShortIndices>(indexBuffer, vertexCount);
|
||||
#endif
|
||||
}
|
||||
|
||||
// In some really unfortunate scenarios (eg Android Studio emulator), we don't have access to glDrawRangeElementsBaseVertex
|
||||
// So we need to subtract the base vertex index from every index in the index buffer ourselves
|
||||
// This is not really common, so we do it without SIMD for the moment, just to be able to run on Android Studio
|
||||
template <bool useShortIndices>
|
||||
void subtractBaseIndex(u8* indexBuffer, u32 indexCount, u16 baseIndex) {
|
||||
// Calculate the minimum and maximum indices used in the index buffer, so we'll only upload them
|
||||
if constexpr (useShortIndices) {
|
||||
u16* indexBuffer16 = reinterpret_cast<u16*>(indexBuffer);
|
||||
|
||||
for (u32 i = 0; i < indexCount; i++) {
|
||||
indexBuffer16[i] -= baseIndex;
|
||||
}
|
||||
} else {
|
||||
u8 baseIndex8 = u8(baseIndex);
|
||||
|
||||
for (u32 i = 0; i < indexCount; i++) {
|
||||
indexBuffer[i] -= baseIndex8;
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace PICA::IndexBuffer
|
57
include/PICA/pica_vert_config.hpp
Normal file
57
include/PICA/pica_vert_config.hpp
Normal file
|
@ -0,0 +1,57 @@
|
|||
#pragma once
|
||||
#include <array>
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "PICA/pica_hash.hpp"
|
||||
#include "PICA/regs.hpp"
|
||||
#include "PICA/shader.hpp"
|
||||
#include "bitfield.hpp"
|
||||
#include "helpers.hpp"
|
||||
|
||||
namespace PICA {
|
||||
// Configuration struct used
|
||||
struct VertConfig {
|
||||
PICAHash::HashType shaderHash;
|
||||
PICAHash::HashType opdescHash;
|
||||
u32 entrypoint;
|
||||
|
||||
// PICA registers for configuring shader output->fragment semantic mapping
|
||||
std::array<u32, 7> outmaps{};
|
||||
u16 outputMask;
|
||||
u8 outputCount;
|
||||
bool usingUbershader;
|
||||
|
||||
// Pad to 56 bytes so that the compiler won't insert unnecessary padding, which in turn will affect our unordered_map lookup
|
||||
// As the padding will get hashed and memcmp'd...
|
||||
u32 pad{};
|
||||
|
||||
bool operator==(const VertConfig& config) const {
|
||||
// Hash function and equality operator required by std::unordered_map
|
||||
return std::memcmp(this, &config, sizeof(VertConfig)) == 0;
|
||||
}
|
||||
|
||||
VertConfig(PICAShader& shader, const std::array<u32, 0x300>& regs, bool usingUbershader) : usingUbershader(usingUbershader) {
|
||||
shaderHash = shader.getCodeHash();
|
||||
opdescHash = shader.getOpdescHash();
|
||||
entrypoint = shader.entrypoint;
|
||||
|
||||
outputCount = regs[PICA::InternalRegs::ShaderOutputCount] & 7;
|
||||
outputMask = regs[PICA::InternalRegs::VertexShaderOutputMask];
|
||||
for (int i = 0; i < outputCount; i++) {
|
||||
// Mask out unused bits
|
||||
outmaps[i] = regs[PICA::InternalRegs::ShaderOutmap0 + i] & 0x1F1F1F1F;
|
||||
}
|
||||
}
|
||||
};
|
||||
} // namespace PICA
|
||||
|
||||
static_assert(sizeof(PICA::VertConfig) == 56);
|
||||
|
||||
// Override std::hash for our vertex config class
|
||||
template <>
|
||||
struct std::hash<PICA::VertConfig> {
|
||||
std::size_t operator()(const PICA::VertConfig& config) const noexcept { return PICAHash::computeHash((const char*)&config, sizeof(config)); }
|
||||
};
|
|
@ -1,7 +1,8 @@
|
|||
#pragma once
|
||||
#include "PICA/float_types.hpp"
|
||||
#include <array>
|
||||
|
||||
#include "PICA/float_types.hpp"
|
||||
|
||||
namespace PICA {
|
||||
// A representation of the output vertex as it comes out of the vertex shader, with padding and all
|
||||
struct Vertex {
|
||||
|
|
|
@ -51,6 +51,18 @@ namespace PICA {
|
|||
#undef defineTexEnv
|
||||
// clang-format on
|
||||
|
||||
// Fog registers
|
||||
FogColor = 0xE1,
|
||||
FogLUTIndex = 0xE6,
|
||||
FogLUTData0 = 0xE8,
|
||||
FogLUTData1 = 0xE9,
|
||||
FogLUTData2 = 0xEA,
|
||||
FogLUTData3 = 0xEB,
|
||||
FogLUTData4 = 0xEC,
|
||||
FogLUTData5 = 0xED,
|
||||
FogLUTData6 = 0xEE,
|
||||
FogLUTData7 = 0xEF,
|
||||
|
||||
// Framebuffer registers
|
||||
ColourOperation = 0x100,
|
||||
BlendFunc = 0x101,
|
||||
|
@ -67,7 +79,29 @@ namespace PICA {
|
|||
ColourBufferLoc = 0x11D,
|
||||
FramebufferSize = 0x11E,
|
||||
|
||||
//LightingRegs
|
||||
// Lighting registers
|
||||
LightingEnable = 0x8F,
|
||||
Light0Specular0 = 0x140,
|
||||
Light0Specular1 = 0x141,
|
||||
Light0Diffuse = 0x142,
|
||||
Light0Ambient = 0x143,
|
||||
Light0XY = 0x144,
|
||||
Light0Z = 0x145,
|
||||
Light0SpotlightXY = 0x146,
|
||||
Light0SpotlightZ = 0x147,
|
||||
Light0Config = 0x149,
|
||||
Light0AttenuationBias = 0x14A,
|
||||
Light0AttenuationScale = 0x14B,
|
||||
|
||||
LightGlobalAmbient = 0x1C0,
|
||||
LightNumber = 0x1C2,
|
||||
LightConfig0 = 0x1C3,
|
||||
LightConfig1 = 0x1C4,
|
||||
LightPermutation = 0x1D9,
|
||||
LightLUTAbs = 0x1D0,
|
||||
LightLUTSelect = 0x1D1,
|
||||
LightLUTScale = 0x1D2,
|
||||
|
||||
LightingLUTIndex = 0x01C5,
|
||||
LightingLUTData0 = 0x01C8,
|
||||
LightingLUTData1 = 0x01C9,
|
||||
|
@ -231,7 +265,8 @@ namespace PICA {
|
|||
enum : u32 {
|
||||
LUT_D0 = 0,
|
||||
LUT_D1,
|
||||
LUT_FR,
|
||||
// LUT 2 is not used, the emulator internally uses it for referring to the current source's spotlight in shaders
|
||||
LUT_FR = 0x3,
|
||||
LUT_RB,
|
||||
LUT_RG,
|
||||
LUT_RR,
|
||||
|
@ -255,6 +290,11 @@ namespace PICA {
|
|||
};
|
||||
}
|
||||
|
||||
// There's actually 8 different LUTs (SP0-SP7), one for each light with different indices (8-15)
|
||||
// We use an unused LUT value for "this light source's spotlight" instead and figure out which light source to use in compileLutLookup
|
||||
// This is particularly intuitive in several places, such as checking if a LUT is enabled
|
||||
static constexpr int spotlightLutIndex = 2;
|
||||
|
||||
enum class TextureFmt : u32 {
|
||||
RGBA8 = 0x0,
|
||||
RGB8 = 0x1,
|
||||
|
@ -345,4 +385,156 @@ namespace PICA {
|
|||
GeometryPrimitive = 3,
|
||||
};
|
||||
|
||||
enum class CompareFunction : u32 {
|
||||
Never = 0,
|
||||
Always = 1,
|
||||
Equal = 2,
|
||||
NotEqual = 3,
|
||||
Less = 4,
|
||||
LessOrEqual = 5,
|
||||
Greater = 6,
|
||||
GreaterOrEqual = 7,
|
||||
};
|
||||
|
||||
enum class LogicOpMode : u32 {
|
||||
Clear = 0,
|
||||
And = 1,
|
||||
ReverseAnd = 2,
|
||||
Copy = 3,
|
||||
Set = 4,
|
||||
InvertedCopy = 5,
|
||||
Nop = 6,
|
||||
Invert = 7,
|
||||
Nand = 8,
|
||||
Or = 9,
|
||||
Nor = 10,
|
||||
Xor = 11,
|
||||
Equiv = 12,
|
||||
InvertedAnd = 13,
|
||||
ReverseOr = 14,
|
||||
InvertedOr = 15,
|
||||
};
|
||||
|
||||
enum class FogMode : u32 {
|
||||
Disabled = 0,
|
||||
Fog = 5,
|
||||
Gas = 7,
|
||||
};
|
||||
|
||||
struct TexEnvConfig {
|
||||
enum class Source : u8 {
|
||||
PrimaryColor = 0x0,
|
||||
PrimaryFragmentColor = 0x1,
|
||||
SecondaryFragmentColor = 0x2,
|
||||
Texture0 = 0x3,
|
||||
Texture1 = 0x4,
|
||||
Texture2 = 0x5,
|
||||
Texture3 = 0x6,
|
||||
// TODO: Inbetween values are unknown
|
||||
PreviousBuffer = 0xD,
|
||||
Constant = 0xE,
|
||||
Previous = 0xF,
|
||||
};
|
||||
|
||||
enum class ColorOperand : u8 {
|
||||
SourceColor = 0x0,
|
||||
OneMinusSourceColor = 0x1,
|
||||
SourceAlpha = 0x2,
|
||||
OneMinusSourceAlpha = 0x3,
|
||||
SourceRed = 0x4,
|
||||
OneMinusSourceRed = 0x5,
|
||||
// TODO: Inbetween values are unknown
|
||||
SourceGreen = 0x8,
|
||||
OneMinusSourceGreen = 0x9,
|
||||
// Inbetween values are unknown
|
||||
SourceBlue = 0xC,
|
||||
OneMinusSourceBlue = 0xD,
|
||||
};
|
||||
|
||||
enum class AlphaOperand : u8 {
|
||||
SourceAlpha = 0x0,
|
||||
OneMinusSourceAlpha = 0x1,
|
||||
SourceRed = 0x2,
|
||||
OneMinusSourceRed = 0x3,
|
||||
SourceGreen = 0x4,
|
||||
OneMinusSourceGreen = 0x5,
|
||||
SourceBlue = 0x6,
|
||||
OneMinusSourceBlue = 0x7,
|
||||
};
|
||||
|
||||
enum class Operation : u8 {
|
||||
Replace = 0,
|
||||
Modulate = 1,
|
||||
Add = 2,
|
||||
AddSigned = 3,
|
||||
Lerp = 4,
|
||||
Subtract = 5,
|
||||
Dot3RGB = 6,
|
||||
Dot3RGBA = 7,
|
||||
MultiplyAdd = 8,
|
||||
AddMultiply = 9,
|
||||
};
|
||||
|
||||
// RGB sources
|
||||
Source colorSource1, colorSource2, colorSource3;
|
||||
// Alpha sources
|
||||
Source alphaSource1, alphaSource2, alphaSource3;
|
||||
|
||||
// RGB operands
|
||||
ColorOperand colorOperand1, colorOperand2, colorOperand3;
|
||||
// Alpha operands
|
||||
AlphaOperand alphaOperand1, alphaOperand2, alphaOperand3;
|
||||
|
||||
// Texture environment operations for this stage
|
||||
Operation colorOp, alphaOp;
|
||||
|
||||
u32 constColor;
|
||||
|
||||
private:
|
||||
// These are the only private members since their value doesn't actually reflect the scale
|
||||
// So we make them public so we'll always use the appropriate member functions instead
|
||||
u8 colorScale;
|
||||
u8 alphaScale;
|
||||
|
||||
public:
|
||||
// Create texture environment object from TEV registers
|
||||
TexEnvConfig(u32 source, u32 operand, u32 combiner, u32 color, u32 scale) : constColor(color) {
|
||||
colorSource1 = Helpers::getBits<0, 4, Source>(source);
|
||||
colorSource2 = Helpers::getBits<4, 4, Source>(source);
|
||||
colorSource3 = Helpers::getBits<8, 4, Source>(source);
|
||||
|
||||
alphaSource1 = Helpers::getBits<16, 4, Source>(source);
|
||||
alphaSource2 = Helpers::getBits<20, 4, Source>(source);
|
||||
alphaSource3 = Helpers::getBits<24, 4, Source>(source);
|
||||
|
||||
colorOperand1 = Helpers::getBits<0, 4, ColorOperand>(operand);
|
||||
colorOperand2 = Helpers::getBits<4, 4, ColorOperand>(operand);
|
||||
colorOperand3 = Helpers::getBits<8, 4, ColorOperand>(operand);
|
||||
|
||||
alphaOperand1 = Helpers::getBits<12, 3, AlphaOperand>(operand);
|
||||
alphaOperand2 = Helpers::getBits<16, 3, AlphaOperand>(operand);
|
||||
alphaOperand3 = Helpers::getBits<20, 3, AlphaOperand>(operand);
|
||||
|
||||
colorOp = Helpers::getBits<0, 4, Operation>(combiner);
|
||||
alphaOp = Helpers::getBits<16, 4, Operation>(combiner);
|
||||
|
||||
colorScale = Helpers::getBits<0, 2>(scale);
|
||||
alphaScale = Helpers::getBits<16, 2>(scale);
|
||||
}
|
||||
|
||||
u32 getColorScale() { return (colorScale <= 2) ? (1 << colorScale) : 1; }
|
||||
u32 getAlphaScale() { return (alphaScale <= 2) ? (1 << alphaScale) : 1; }
|
||||
|
||||
bool isPassthroughStage() {
|
||||
// clang-format off
|
||||
// Thank you to the Citra dev that wrote this out
|
||||
return (
|
||||
colorOp == Operation::Replace && alphaOp == Operation::Replace &&
|
||||
colorSource1 == Source::Previous && alphaSource1 == Source::Previous &&
|
||||
colorOperand1 == ColorOperand::SourceColor && alphaOperand1 == AlphaOperand::SourceAlpha &&
|
||||
getColorScale() == 1 && getAlphaScale() == 1
|
||||
);
|
||||
// clang-format on
|
||||
}
|
||||
};
|
||||
} // namespace PICA
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
#pragma once
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <cstring>
|
||||
|
||||
#include "PICA/float_types.hpp"
|
||||
|
@ -21,7 +23,7 @@ namespace ShaderOpcodes {
|
|||
DST = 0x04,
|
||||
EX2 = 0x05,
|
||||
LG2 = 0x06,
|
||||
LIT = 0x07,
|
||||
LITP = 0x07,
|
||||
MUL = 0x08,
|
||||
SGE = 0x09,
|
||||
SLT = 0x0A,
|
||||
|
@ -56,6 +58,10 @@ namespace ShaderOpcodes {
|
|||
};
|
||||
}
|
||||
|
||||
namespace PICA::ShaderGen {
|
||||
class ShaderDecompiler;
|
||||
};
|
||||
|
||||
// Note: All PICA f24 vec4 registers must have the alignas(16) specifier to make them easier to access in SSE/NEON code in the JIT
|
||||
class PICAShader {
|
||||
using f24 = Floats::f24;
|
||||
|
@ -90,14 +96,22 @@ class PICAShader {
|
|||
public:
|
||||
// These are placed close to the temp registers and co because it helps the JIT generate better code
|
||||
u32 entrypoint = 0; // Initial shader PC
|
||||
u32 boolUniform;
|
||||
std::array<std::array<u8, 4>, 4> intUniforms;
|
||||
|
||||
// We want these registers in this order & with this alignment for uploading them directly to a UBO
|
||||
// When emulating shaders on the GPU. Plus this alignment for float uniforms is necessary for doing SIMD in the shader->CPU recompilers.
|
||||
alignas(16) std::array<vec4f, 96> floatUniforms;
|
||||
alignas(16) std::array<std::array<u8, 4>, 4> intUniforms;
|
||||
u32 boolUniform;
|
||||
|
||||
alignas(16) std::array<vec4f, 16> fixedAttributes; // Fixed vertex attributes
|
||||
alignas(16) std::array<vec4f, 16> inputs; // Attributes passed to the shader
|
||||
alignas(16) std::array<vec4f, 16> outputs;
|
||||
alignas(16) vec4f dummy = vec4f({f24::zero(), f24::zero(), f24::zero(), f24::zero()}); // Dummy register used by the JIT
|
||||
|
||||
// We use a hashmap for matching 3DS shaders to their equivalent compiled code in our shader cache in the shader JIT
|
||||
// We choose our hash type to be a 64-bit integer by default, as the collision chance is very tiny and generating it is decently optimal
|
||||
// Ideally we want to be able to support multiple different types of hash depending on compilation settings, but let's get this working first
|
||||
using Hash = PICAHash::HashType;
|
||||
|
||||
protected:
|
||||
std::array<u32, 128> operandDescriptors;
|
||||
|
@ -116,20 +130,20 @@ class PICAShader {
|
|||
std::array<CallInfo, 4> callInfo;
|
||||
ShaderType type;
|
||||
|
||||
// We use a hashmap for matching 3DS shaders to their equivalent compiled code in our shader cache in the shader JIT
|
||||
// We choose our hash type to be a 64-bit integer by default, as the collision chance is very tiny and generating it is decently optimal
|
||||
// Ideally we want to be able to support multiple different types of hash depending on compilation settings, but let's get this working first
|
||||
using Hash = PICAHash::HashType;
|
||||
|
||||
Hash lastCodeHash = 0; // Last hash computed for the shader code (Used for the JIT caching mechanism)
|
||||
Hash lastOpdescHash = 0; // Last hash computed for the operand descriptors (Also used for the JIT)
|
||||
|
||||
public:
|
||||
bool uniformsDirty = false;
|
||||
|
||||
protected:
|
||||
bool codeHashDirty = false;
|
||||
bool opdescHashDirty = false;
|
||||
|
||||
// Add these as friend classes for the JIT so it has access to all important state
|
||||
friend class ShaderJIT;
|
||||
friend class ShaderEmitter;
|
||||
friend class PICA::ShaderGen::ShaderDecompiler;
|
||||
|
||||
vec4f getSource(u32 source);
|
||||
vec4f& getDest(u32 dest);
|
||||
|
@ -151,6 +165,7 @@ class PICAShader {
|
|||
void jmpc(u32 instruction);
|
||||
void jmpu(u32 instruction);
|
||||
void lg2(u32 instruction);
|
||||
void litp(u32 instruction);
|
||||
void loop(u32 instruction);
|
||||
void mad(u32 instruction);
|
||||
void madi(u32 instruction);
|
||||
|
@ -220,13 +235,9 @@ class PICAShader {
|
|||
public:
|
||||
static constexpr size_t maxInstructionCount = 4096;
|
||||
std::array<u32, maxInstructionCount> loadedShader; // Currently loaded & active shader
|
||||
std::array<u32, maxInstructionCount> bufferedShader; // Shader to be transferred when the SH_CODETRANSFER_END reg gets written to
|
||||
|
||||
PICAShader(ShaderType type) : type(type) {}
|
||||
|
||||
// Theese functions are in the header to be inlined more easily, though with LTO I hope I'll be able to move them
|
||||
void finalize() { std::memcpy(&loadedShader[0], &bufferedShader[0], 4096 * sizeof(u32)); }
|
||||
|
||||
void setBufferIndex(u32 index) { bufferIndex = index & 0xfff; }
|
||||
void setOpDescriptorIndex(u32 index) { opDescriptorIndex = index & 0x7f; }
|
||||
|
||||
|
@ -235,7 +246,7 @@ class PICAShader {
|
|||
Helpers::panic("o no, shader upload overflew");
|
||||
}
|
||||
|
||||
bufferedShader[bufferIndex++] = word;
|
||||
loadedShader[bufferIndex++] = word;
|
||||
bufferIndex &= 0xfff;
|
||||
|
||||
codeHashDirty = true; // Signal the JIT if necessary that the program hash has potentially changed
|
||||
|
@ -277,6 +288,7 @@ class PICAShader {
|
|||
uniform[2] = f24::fromRaw(((floatUniformBuffer[0] & 0xff) << 16) | (floatUniformBuffer[1] >> 16));
|
||||
uniform[3] = f24::fromRaw(floatUniformBuffer[0] >> 8);
|
||||
}
|
||||
uniformsDirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -288,6 +300,12 @@ class PICAShader {
|
|||
u[1] = getBits<8, 8>(word);
|
||||
u[2] = getBits<16, 8>(word);
|
||||
u[3] = getBits<24, 8>(word);
|
||||
uniformsDirty = true;
|
||||
}
|
||||
|
||||
void uploadBoolUniform(u32 value) {
|
||||
boolUniform = value;
|
||||
uniformsDirty = true;
|
||||
}
|
||||
|
||||
void run();
|
||||
|
@ -295,4 +313,13 @@ class PICAShader {
|
|||
|
||||
Hash getCodeHash();
|
||||
Hash getOpdescHash();
|
||||
};
|
||||
|
||||
// Returns how big the PICA uniforms are combined. Used for hw accelerated shaders where we upload the uniforms to our GPU.
|
||||
static constexpr usize totalUniformSize() { return sizeof(floatUniforms) + sizeof(intUniforms) + sizeof(boolUniform); }
|
||||
void* getUniformPointer() { return static_cast<void*>(&floatUniforms); }
|
||||
};
|
||||
|
||||
static_assert(
|
||||
offsetof(PICAShader, intUniforms) == offsetof(PICAShader, floatUniforms) + 96 * sizeof(float) * 4 &&
|
||||
offsetof(PICAShader, boolUniform) == offsetof(PICAShader, intUniforms) + 4 * sizeof(u8) * 4
|
||||
);
|
131
include/PICA/shader_decompiler.hpp
Normal file
131
include/PICA/shader_decompiler.hpp
Normal file
|
@ -0,0 +1,131 @@
|
|||
#pragma once
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "PICA/shader.hpp"
|
||||
#include "PICA/shader_gen_types.hpp"
|
||||
|
||||
struct EmulatorConfig;
|
||||
|
||||
namespace PICA::ShaderGen {
|
||||
// Control flow analysis is partially based on
|
||||
// https://github.com/PabloMK7/citra/blob/d0179559466ff09731d74474322ee880fbb44b00/src/video_core/shader/generator/glsl_shader_decompiler.cpp#L33
|
||||
struct ControlFlow {
|
||||
// A continuous range of addresses
|
||||
struct AddressRange {
|
||||
u32 start, end;
|
||||
AddressRange(u32 start, u32 end) : start(start), end(end) {}
|
||||
|
||||
// Use lexicographic comparison for functions in order to sort them in a set
|
||||
bool operator<(const AddressRange& other) const { return std::tie(start, end) < std::tie(other.start, other.end); }
|
||||
};
|
||||
|
||||
struct Function {
|
||||
using Labels = std::set<u32>;
|
||||
|
||||
enum class ExitMode {
|
||||
Unknown, // Can't guarantee whether we'll exit properly, fall back to CPU shaders (can happen with jmp shenanigans)
|
||||
AlwaysReturn, // All paths reach the return point.
|
||||
Conditional, // One or more code paths reach the return point or an END instruction conditionally.
|
||||
AlwaysEnd, // All paths reach an END instruction.
|
||||
};
|
||||
|
||||
u32 start; // Starting PC of the function
|
||||
u32 end; // End PC of the function
|
||||
Labels outLabels{}; // Labels this function can "goto" (jump) to
|
||||
ExitMode exitMode = ExitMode::Unknown;
|
||||
|
||||
explicit Function(u32 start, u32 end) : start(start), end(end) {}
|
||||
bool operator<(const Function& other) const { return AddressRange(start, end) < AddressRange(other.start, other.end); }
|
||||
|
||||
std::string getIdentifier() const { return fmt::format("fn_{}_{}", start, end); }
|
||||
// To handle weird control flow, we have to return from each function a bool that indicates whether or not the shader reached an end
|
||||
// instruction and should thus terminate. This is necessary for games like Rayman and Gravity Falls, which have "END" instructions called
|
||||
// from within functions deep in the callstack
|
||||
std::string getForwardDecl() const { return fmt::format("bool fn_{}_{}();\n", start, end); }
|
||||
std::string getCallStatement() const { return fmt::format("fn_{}_{}()", start, end); }
|
||||
};
|
||||
|
||||
std::set<Function> functions{};
|
||||
std::map<AddressRange, Function::ExitMode> exitMap{};
|
||||
|
||||
// Tells us whether analysis of the shader we're trying to compile failed, in which case we'll need to fail back to shader emulation
|
||||
// On the CPU
|
||||
bool analysisFailed = false;
|
||||
|
||||
// This will recursively add all functions called by the function too, as analyzeFunction will call addFunction on control flow instructions
|
||||
const Function* addFunction(const PICAShader& shader, u32 start, u32 end) {
|
||||
auto searchIterator = functions.find(Function(start, end));
|
||||
if (searchIterator != functions.end()) {
|
||||
return &(*searchIterator);
|
||||
}
|
||||
|
||||
// Add this function and analyze it if it doesn't already exist
|
||||
Function function(start, end);
|
||||
function.exitMode = analyzeFunction(shader, start, end, function.outLabels);
|
||||
|
||||
// This function could not be fully analyzed, report failure
|
||||
if (function.exitMode == Function::ExitMode::Unknown) {
|
||||
analysisFailed = true;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Add function to our function list
|
||||
auto [it, added] = functions.insert(std::move(function));
|
||||
return &(*it);
|
||||
}
|
||||
|
||||
void analyze(const PICAShader& shader, u32 entrypoint);
|
||||
Function::ExitMode analyzeFunction(const PICAShader& shader, u32 start, u32 end, Function::Labels& labels);
|
||||
};
|
||||
|
||||
class ShaderDecompiler {
|
||||
using AddressRange = ControlFlow::AddressRange;
|
||||
using Function = ControlFlow::Function;
|
||||
|
||||
ControlFlow controlFlow{};
|
||||
|
||||
PICAShader& shader;
|
||||
EmulatorConfig& config;
|
||||
std::string decompiledShader;
|
||||
|
||||
u32 entrypoint;
|
||||
|
||||
API api;
|
||||
Language language;
|
||||
bool compilationError = false;
|
||||
|
||||
void compileInstruction(u32& pc, bool& finished);
|
||||
// Compile range "range" and returns the end PC or if we're "finished" with the program (called an END instruction)
|
||||
std::pair<u32, bool> compileRange(const AddressRange& range);
|
||||
void callFunction(const Function& function);
|
||||
const Function* findFunction(const AddressRange& range);
|
||||
|
||||
void writeAttributes();
|
||||
|
||||
std::string getSource(u32 source, u32 index) const;
|
||||
std::string getDest(u32 dest) const;
|
||||
std::string getSwizzlePattern(u32 swizzle) const;
|
||||
std::string getDestSwizzle(u32 destinationMask) const;
|
||||
const char* getCondition(u32 cond, u32 refX, u32 refY);
|
||||
|
||||
void setDest(u32 operandDescriptor, const std::string& dest, const std::string& value);
|
||||
// Returns if the instruction uses the typical register encodings most instructions use
|
||||
// With some exceptions like MAD/MADI, and the control flow instructions which are completely different
|
||||
bool usesCommonEncoding(u32 instruction) const;
|
||||
|
||||
public:
|
||||
ShaderDecompiler(PICAShader& shader, EmulatorConfig& config, u32 entrypoint, API api, Language language)
|
||||
: shader(shader), entrypoint(entrypoint), config(config), api(api), language(language), decompiledShader("") {}
|
||||
|
||||
std::string decompile();
|
||||
};
|
||||
|
||||
std::string decompileShader(PICAShader& shader, EmulatorConfig& config, u32 entrypoint, API api, Language language);
|
||||
} // namespace PICA::ShaderGen
|
43
include/PICA/shader_gen.hpp
Normal file
43
include/PICA/shader_gen.hpp
Normal file
|
@ -0,0 +1,43 @@
|
|||
#pragma once
|
||||
#include <string>
|
||||
|
||||
#include "PICA/gpu.hpp"
|
||||
#include "PICA/pica_frag_config.hpp"
|
||||
#include "PICA/pica_vert_config.hpp"
|
||||
#include "PICA/regs.hpp"
|
||||
#include "PICA/shader_gen_types.hpp"
|
||||
#include "helpers.hpp"
|
||||
|
||||
namespace PICA::ShaderGen {
|
||||
class FragmentGenerator {
|
||||
API api;
|
||||
Language language;
|
||||
|
||||
void compileTEV(std::string& shader, int stage, const PICA::FragmentConfig& config);
|
||||
void getSource(std::string& shader, PICA::TexEnvConfig::Source source, int index, const PICA::FragmentConfig& config);
|
||||
void getColorOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::ColorOperand color, int index, const PICA::FragmentConfig& config);
|
||||
void getAlphaOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::AlphaOperand alpha, int index, const PICA::FragmentConfig& config);
|
||||
void getColorOperation(std::string& shader, PICA::TexEnvConfig::Operation op);
|
||||
void getAlphaOperation(std::string& shader, PICA::TexEnvConfig::Operation op);
|
||||
|
||||
void applyAlphaTest(std::string& shader, const PICA::FragmentConfig& config);
|
||||
void compileLights(std::string& shader, const PICA::FragmentConfig& config);
|
||||
void compileLUTLookup(std::string& shader, const PICA::FragmentConfig& config, u32 lightIndex, u32 lutID);
|
||||
bool isSamplerEnabled(u32 environmentID, u32 lutID);
|
||||
|
||||
void compileFog(std::string& shader, const PICA::FragmentConfig& config);
|
||||
void compileLogicOps(std::string& shader, const PICA::FragmentConfig& config);
|
||||
|
||||
public:
|
||||
FragmentGenerator(API api, Language language) : api(api), language(language) {}
|
||||
std::string generate(const PICA::FragmentConfig& config, void* driverInfo = nullptr);
|
||||
std::string getDefaultVertexShader();
|
||||
// For when PICA shader is acceleration is enabled. Turn the PICA shader source into a proper vertex shader
|
||||
std::string getVertexShaderAccelerated(const std::string& picaSource, const PICA::VertConfig& vertConfig, bool usingUbershader);
|
||||
|
||||
void setTarget(API api, Language language) {
|
||||
this->api = api;
|
||||
this->language = language;
|
||||
}
|
||||
};
|
||||
}; // namespace PICA::ShaderGen
|
9
include/PICA/shader_gen_types.hpp
Normal file
9
include/PICA/shader_gen_types.hpp
Normal file
|
@ -0,0 +1,9 @@
|
|||
#pragma once
|
||||
|
||||
namespace PICA::ShaderGen {
|
||||
// Graphics API this shader is targetting
|
||||
enum class API { GL, GLES, Vulkan };
|
||||
|
||||
// Shading language to use (Only GLSL for the time being)
|
||||
enum class Language { GLSL };
|
||||
} // namespace PICA::ShaderGen
|
|
@ -2,10 +2,9 @@
|
|||
#include "PICA/shader.hpp"
|
||||
|
||||
class ShaderUnit {
|
||||
|
||||
public:
|
||||
PICAShader vs; // Vertex shader
|
||||
PICAShader gs; // Geometry shader
|
||||
public:
|
||||
PICAShader vs; // Vertex shader
|
||||
PICAShader gs; // Geometry shader
|
||||
|
||||
ShaderUnit() : vs(ShaderType::Vertex), gs(ShaderType::Geometry) {}
|
||||
void reset();
|
||||
|
|
100
include/align.hpp
Normal file
100
include/align.hpp
Normal file
|
@ -0,0 +1,100 @@
|
|||
// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
#include "compiler_builtins.hpp"
|
||||
#include "helpers.hpp"
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
namespace Common {
|
||||
template <typename T>
|
||||
constexpr bool isAligned(T value, unsigned int alignment) {
|
||||
return (value % static_cast<T>(alignment)) == 0;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr T alignUp(T value, unsigned int alignment) {
|
||||
return (value + static_cast<T>(alignment - 1)) / static_cast<T>(alignment) * static_cast<T>(alignment);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr T alignDown(T value, unsigned int alignment) {
|
||||
return value / static_cast<T>(alignment) * static_cast<T>(alignment);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr bool isAlignedPow2(T value, unsigned int alignment) {
|
||||
return (value & static_cast<T>(alignment - 1)) == 0;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr T alignUpPow2(T value, unsigned int alignment) {
|
||||
return (value + static_cast<T>(alignment - 1)) & static_cast<T>(~static_cast<T>(alignment - 1));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr T alignDownPow2(T value, unsigned int alignment) {
|
||||
return value & static_cast<T>(~static_cast<T>(alignment - 1));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr bool isPow2(T value) {
|
||||
return (value & (value - 1)) == 0;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr T previousPow2(T value) {
|
||||
if (value == static_cast<T>(0)) return 0;
|
||||
|
||||
value |= (value >> 1);
|
||||
value |= (value >> 2);
|
||||
value |= (value >> 4);
|
||||
if constexpr (sizeof(T) >= 16) value |= (value >> 8);
|
||||
if constexpr (sizeof(T) >= 32) value |= (value >> 16);
|
||||
if constexpr (sizeof(T) >= 64) value |= (value >> 32);
|
||||
return value - (value >> 1);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr T nextPow2(T value) {
|
||||
// https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
|
||||
if (value == static_cast<T>(0)) return 0;
|
||||
|
||||
value--;
|
||||
value |= (value >> 1);
|
||||
value |= (value >> 2);
|
||||
value |= (value >> 4);
|
||||
if constexpr (sizeof(T) >= 16) value |= (value >> 8);
|
||||
if constexpr (sizeof(T) >= 32) value |= (value >> 16);
|
||||
if constexpr (sizeof(T) >= 64) value |= (value >> 32);
|
||||
value++;
|
||||
return value;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static void* alignedMalloc(size_t size, size_t alignment) {
|
||||
#ifdef _WIN32
|
||||
return _aligned_malloc(size, alignment);
|
||||
#else
|
||||
// Unaligned sizes are slow on macOS.
|
||||
#ifdef __APPLE__
|
||||
if (isPow2(alignment)) size = (size + alignment - 1) & ~(alignment - 1);
|
||||
#endif
|
||||
void* ret = nullptr;
|
||||
return (posix_memalign(&ret, alignment, size) == 0) ? ret : nullptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static void alignedFree(void* ptr) {
|
||||
#ifdef _MSC_VER
|
||||
_aligned_free(ptr);
|
||||
#else
|
||||
free(ptr);
|
||||
#endif
|
||||
}
|
||||
} // namespace Common
|
|
@ -54,6 +54,15 @@ namespace Audio::AAC {
|
|||
u32_le sampleCount;
|
||||
};
|
||||
|
||||
struct DecodeRequest {
|
||||
u32_le address; // Address of input AAC stream
|
||||
u32_le size; // Size of input AAC stream
|
||||
u32_le destAddrLeft; // Output address for left channel samples
|
||||
u32_le destAddrRight; // Output address for right channel samples
|
||||
u32_le unknown1;
|
||||
u32_le unknown2;
|
||||
};
|
||||
|
||||
struct Message {
|
||||
u16_le mode = Mode::None; // Encode or decode AAC?
|
||||
u16_le command = Command::Init;
|
||||
|
@ -62,7 +71,9 @@ namespace Audio::AAC {
|
|||
// Info on the AAC request
|
||||
union {
|
||||
std::array<u8, 24> commandData{};
|
||||
|
||||
DecodeResponse decodeResponse;
|
||||
DecodeRequest decodeRequest;
|
||||
};
|
||||
};
|
||||
|
||||
|
|
25
include/audio/aac_decoder.hpp
Normal file
25
include/audio/aac_decoder.hpp
Normal file
|
@ -0,0 +1,25 @@
|
|||
#pragma once
|
||||
#include <functional>
|
||||
|
||||
#include "audio/aac.hpp"
|
||||
#include "helpers.hpp"
|
||||
|
||||
struct AAC_DECODER_INSTANCE;
|
||||
|
||||
namespace Audio::AAC {
|
||||
class Decoder {
|
||||
using DecoderHandle = AAC_DECODER_INSTANCE*;
|
||||
using PaddrCallback = std::function<u8*(u32)>;
|
||||
|
||||
DecoderHandle decoderHandle = nullptr;
|
||||
|
||||
bool isInitialized() { return decoderHandle != nullptr; }
|
||||
void initialize();
|
||||
|
||||
public:
|
||||
// Decode function. Takes in a reference to the AAC response & request, and a callback for paddr -> pointer conversions
|
||||
// We also allow for optionally muting the AAC output (setting all of it to 0) instead of properly decoding it, for debug/research purposes
|
||||
void decode(AAC::Message& response, const AAC::Message& request, PaddrCallback paddrCallback, bool enableAudio = true);
|
||||
~Decoder();
|
||||
};
|
||||
} // namespace Audio::AAC
|
58
include/audio/audio_interpolation.hpp
Normal file
58
include/audio/audio_interpolation.hpp
Normal file
|
@ -0,0 +1,58 @@
|
|||
// Copyright 2016 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <deque>
|
||||
|
||||
#include "audio/hle_mixer.hpp"
|
||||
#include "helpers.hpp"
|
||||
|
||||
namespace Audio::Interpolation {
|
||||
// A variable length buffer of signed PCM16 stereo samples.
|
||||
using StereoBuffer16 = std::deque<std::array<s16, 2>>;
|
||||
using StereoFrame16 = Audio::DSPMixer::StereoFrame<s16>;
|
||||
|
||||
struct State {
|
||||
// Two historical samples.
|
||||
std::array<s16, 2> xn1 = {}; //< x[n-1]
|
||||
std::array<s16, 2> xn2 = {}; //< x[n-2]
|
||||
// Current fractional position.
|
||||
u64 fposition = 0;
|
||||
};
|
||||
|
||||
/**
|
||||
* No interpolation. This is equivalent to a zero-order hold. There is a two-sample predelay.
|
||||
* @param state Interpolation state.
|
||||
* @param input Input buffer.
|
||||
* @param rate Stretch factor. Must be a positive non-zero value.
|
||||
* rate > 1.0 performs decimation and rate < 1.0 performs upsampling.
|
||||
* @param output The resampled audio buffer.
|
||||
* @param outputi The index of output to start writing to.
|
||||
*/
|
||||
void none(State& state, StereoBuffer16& input, float rate, StereoFrame16& output, usize& outputi);
|
||||
|
||||
/**
|
||||
* Linear interpolation. This is equivalent to a first-order hold. There is a two-sample predelay.
|
||||
* @param state Interpolation state.
|
||||
* @param input Input buffer.
|
||||
* @param rate Stretch factor. Must be a positive non-zero value.
|
||||
* rate > 1.0 performs decimation and rate < 1.0 performs upsampling.
|
||||
* @param output The resampled audio buffer.
|
||||
* @param outputi The index of output to start writing to.
|
||||
*/
|
||||
void linear(State& state, StereoBuffer16& input, float rate, StereoFrame16& output, usize& outputi);
|
||||
|
||||
/**
|
||||
* Polyphase interpolation. This is currently stubbed to just perform linear interpolation
|
||||
* @param state Interpolation state.
|
||||
* @param input Input buffer.
|
||||
* @param rate Stretch factor. Must be a positive non-zero value.
|
||||
* rate > 1.0 performs decimation and rate < 1.0 performs upsampling.
|
||||
* @param output The resampled audio buffer.
|
||||
* @param outputi The index of output to start writing to.
|
||||
*/
|
||||
void polyphase(State& state, StereoBuffer16& input, float rate, StereoFrame16& output, usize& outputi);
|
||||
} // namespace Audio::Interpolation
|
|
@ -8,12 +8,13 @@
|
|||
|
||||
#include "helpers.hpp"
|
||||
#include "logger.hpp"
|
||||
#include "scheduler.hpp"
|
||||
#include "ring_buffer.hpp"
|
||||
#include "scheduler.hpp"
|
||||
|
||||
// The DSP core must have access to the DSP service to be able to trigger interrupts properly
|
||||
class DSPService;
|
||||
class Memory;
|
||||
struct EmulatorConfig;
|
||||
|
||||
namespace Audio {
|
||||
// There are 160 stereo samples in 1 audio frame, so 320 samples total
|
||||
|
@ -24,12 +25,14 @@ namespace Audio {
|
|||
static constexpr u64 lleSlice = 16384;
|
||||
|
||||
class DSPCore {
|
||||
using Samples = Common::RingBuffer<s16, 1024>;
|
||||
// 0x2000 stereo (= 2 channel) samples
|
||||
using Samples = Common::RingBuffer<s16, 0x2000 * 2>;
|
||||
|
||||
protected:
|
||||
Memory& mem;
|
||||
Scheduler& scheduler;
|
||||
DSPService& dspService;
|
||||
EmulatorConfig& settings;
|
||||
|
||||
Samples sampleBuffer;
|
||||
bool audioEnabled = false;
|
||||
|
@ -38,12 +41,12 @@ namespace Audio {
|
|||
|
||||
public:
|
||||
enum class Type { Null, Teakra, HLE };
|
||||
DSPCore(Memory& mem, Scheduler& scheduler, DSPService& dspService)
|
||||
: mem(mem), scheduler(scheduler), dspService(dspService) {}
|
||||
DSPCore(Memory& mem, Scheduler& scheduler, DSPService& dspService, EmulatorConfig& settings)
|
||||
: mem(mem), scheduler(scheduler), dspService(dspService), settings(settings) {}
|
||||
virtual ~DSPCore() {}
|
||||
|
||||
virtual void reset() = 0;
|
||||
virtual void runAudioFrame() = 0;
|
||||
virtual void runAudioFrame(u64 eventTimestamp) = 0;
|
||||
virtual u8* getDspMemory() = 0;
|
||||
|
||||
virtual u16 recvData(u32 regId) = 0;
|
||||
|
@ -62,5 +65,5 @@ namespace Audio {
|
|||
virtual void setAudioEnabled(bool enable) { audioEnabled = enable; }
|
||||
};
|
||||
|
||||
std::unique_ptr<DSPCore> makeDSPCore(DSPCore::Type type, Memory& mem, Scheduler& scheduler, DSPService& dspService);
|
||||
std::unique_ptr<DSPCore> makeDSPCore(EmulatorConfig& config, Memory& mem, Scheduler& scheduler, DSPService& dspService);
|
||||
} // namespace Audio
|
|
@ -324,8 +324,8 @@ namespace Audio::HLE {
|
|||
BitField<15, 1, u32> outputBufferCountDirty;
|
||||
BitField<16, 1, u32> masterVolumeDirty;
|
||||
|
||||
BitField<24, 1, u32> auxReturnVolume0Dirty;
|
||||
BitField<25, 1, u32> auxReturnVolume1Dirty;
|
||||
BitField<24, 1, u32> auxVolume0Dirty;
|
||||
BitField<25, 1, u32> auxVolume1Dirty;
|
||||
BitField<26, 1, u32> outputFormatDirty;
|
||||
BitField<27, 1, u32> clippingModeDirty;
|
||||
BitField<28, 1, u32> headphonesConnectedDirty;
|
||||
|
@ -337,7 +337,7 @@ namespace Audio::HLE {
|
|||
/// The DSP has three intermediate audio mixers. This controls the volume level (0.0-1.0) for
|
||||
/// each at the final mixer.
|
||||
float_le masterVolume;
|
||||
std::array<float_le, 2> auxReturnVolume;
|
||||
std::array<float_le, 2> auxVolumes;
|
||||
|
||||
u16_le outputBufferCount;
|
||||
u16 pad1[2];
|
||||
|
@ -422,7 +422,7 @@ namespace Audio::HLE {
|
|||
|
||||
struct DspStatus {
|
||||
u16_le unknown;
|
||||
u16_le dropped_frames;
|
||||
u16_le droppedFrames;
|
||||
u16 pad0[0xE];
|
||||
};
|
||||
ASSERT_DSP_STRUCT(DspStatus, 32);
|
||||
|
|
78
include/audio/dsp_simd.hpp
Normal file
78
include/audio/dsp_simd.hpp
Normal file
|
@ -0,0 +1,78 @@
|
|||
#pragma once
|
||||
|
||||
#include "audio/hle_mixer.hpp"
|
||||
#include "compiler_builtins.hpp"
|
||||
#include "helpers.hpp"
|
||||
|
||||
#if defined(_M_AMD64) || defined(__x86_64__)
|
||||
#define DSP_SIMD_X64
|
||||
#include <immintrin.h>
|
||||
#elif defined(_M_ARM64) || defined(__aarch64__)
|
||||
#define DSP_SIMD_ARM64
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
// Optimized SIMD functions for mixing the stereo output of a DSP voice into a quadraphonic intermediate mix
|
||||
namespace DSP::MixIntoQuad {
|
||||
using IntermediateMix = Audio::DSPMixer::IntermediateMix;
|
||||
using StereoFrame16 = Audio::DSPMixer::StereoFrame<s16>;
|
||||
|
||||
// Non-SIMD, portable algorithm
|
||||
ALWAYS_INLINE static void mixPortable(IntermediateMix& mix, StereoFrame16& frame, const float* gains) {
|
||||
for (usize sampleIndex = 0; sampleIndex < Audio::samplesInFrame; sampleIndex++) {
|
||||
// Mono samples are in the format: (l, r)
|
||||
// When converting to quad, gain0 and gain2 are applied to the left sample, gain1 and gain3 to the right one
|
||||
mix[sampleIndex][0] += s32(frame[sampleIndex][0] * gains[0]);
|
||||
mix[sampleIndex][1] += s32(frame[sampleIndex][1] * gains[1]);
|
||||
mix[sampleIndex][2] += s32(frame[sampleIndex][0] * gains[2]);
|
||||
mix[sampleIndex][3] += s32(frame[sampleIndex][1] * gains[3]);
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(DSP_SIMD_X64) && (defined(__SSE4_1__) || defined(__AVX__))
|
||||
ALWAYS_INLINE static void mixSSE4_1(IntermediateMix& mix, StereoFrame16& frame, const float* gains) {
|
||||
__m128 gains_ = _mm_load_ps(gains);
|
||||
|
||||
for (usize sampleIndex = 0; sampleIndex < Audio::samplesInFrame; sampleIndex++) {
|
||||
// The stereo samples, repeated every 4 bytes inside the vector register
|
||||
__m128i stereoSamples = _mm_castps_si128(_mm_load1_ps((float*)&frame[sampleIndex][0]));
|
||||
|
||||
__m128 currentFrame = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(stereoSamples));
|
||||
__m128i offset = _mm_cvttps_epi32(_mm_mul_ps(currentFrame, gains_));
|
||||
__m128i intermediateMixPrev = _mm_load_si128((__m128i*)&mix[sampleIndex][0]);
|
||||
__m128i result = _mm_add_epi32(intermediateMixPrev, offset);
|
||||
_mm_store_si128((__m128i*)&mix[sampleIndex][0], result);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef DSP_SIMD_ARM64
|
||||
ALWAYS_INLINE static void mixNEON(IntermediateMix& mix, StereoFrame16& frame, const float* gains) {
|
||||
float32x4_t gains_ = vld1q_f32(gains);
|
||||
|
||||
for (usize sampleIndex = 0; sampleIndex < Audio::samplesInFrame; sampleIndex++) {
|
||||
// Load l and r samples and repeat them every 4 bytes
|
||||
int32x4_t stereoSamples = vld1q_dup_s32((s32*)&frame[sampleIndex][0]);
|
||||
// Expand the bottom 4 s16 samples into an int32x4 with sign extension, then convert them to float32x4
|
||||
float32x4_t currentFrame = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vreinterpretq_s16_s32(stereoSamples))));
|
||||
|
||||
// Multiply samples by their respective gains, truncate the result, and add it into the intermediate mix buffer
|
||||
int32x4_t offset = vcvtq_s32_f32(vmulq_f32(currentFrame, gains_));
|
||||
int32x4_t intermediateMixPrev = vld1q_s32((s32*)&mix[sampleIndex][0]);
|
||||
int32x4_t result = vaddq_s32(intermediateMixPrev, offset);
|
||||
vst1q_s32((s32*)&mix[sampleIndex][0], result);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Mixes the stereo output of a DSP voice into a quadraphonic intermediate mix
|
||||
static void mix(IntermediateMix& mix, StereoFrame16& frame, const float* gains) {
|
||||
#if defined(DSP_SIMD_ARM64)
|
||||
return mixNEON(mix, frame, gains);
|
||||
#elif defined(DSP_SIMD_X64) && (defined(__SSE4_1__) || defined(__AVX__))
|
||||
return mixSSE4_1(mix, frame, gains);
|
||||
#else
|
||||
return mixPortable(mix, frame, gains);
|
||||
#endif
|
||||
}
|
||||
} // namespace DSP::MixIntoQuad
|
|
@ -2,18 +2,19 @@
|
|||
#include <array>
|
||||
#include <cassert>
|
||||
#include <deque>
|
||||
#include <memory>
|
||||
#include <queue>
|
||||
#include <vector>
|
||||
|
||||
#include "audio/aac.hpp"
|
||||
#include "audio/aac_decoder.hpp"
|
||||
#include "audio/audio_interpolation.hpp"
|
||||
#include "audio/dsp_core.hpp"
|
||||
#include "audio/dsp_shared_mem.hpp"
|
||||
#include "audio/hle_mixer.hpp"
|
||||
#include "memory.hpp"
|
||||
|
||||
namespace Audio {
|
||||
using SampleFormat = HLE::SourceConfiguration::Configuration::Format;
|
||||
using SourceType = HLE::SourceConfiguration::Configuration::MonoOrStereo;
|
||||
|
||||
struct DSPSource {
|
||||
// Audio buffer information
|
||||
// https://www.3dbrew.org/wiki/DSP_Memory_Region
|
||||
|
@ -33,8 +34,8 @@ namespace Audio {
|
|||
SampleFormat format;
|
||||
SourceType sourceType;
|
||||
|
||||
bool fromQueue = false; // Is this buffer from the buffer queue or an embedded buffer?
|
||||
bool hasPlayedOnce = false; // Has the buffer been played at least once before?
|
||||
bool fromQueue = false; // Is this buffer from the buffer queue or an embedded buffer?
|
||||
bool hasPlayedOnce = false; // Has the buffer been played at least once before?
|
||||
|
||||
bool operator<(const Buffer& other) const {
|
||||
// Lower ID = Higher priority
|
||||
|
@ -42,17 +43,34 @@ namespace Audio {
|
|||
return this->bufferID > other.bufferID;
|
||||
}
|
||||
};
|
||||
|
||||
// Buffer of decoded PCM16 samples. TODO: Are there better alternatives to use over deque?
|
||||
using SampleBuffer = std::deque<std::array<s16, 2>>;
|
||||
|
||||
using BufferQueue = std::priority_queue<Buffer>;
|
||||
using InterpolationMode = HLE::SourceConfiguration::Configuration::InterpolationMode;
|
||||
using InterpolationState = Audio::Interpolation::State;
|
||||
|
||||
// The samples this voice output for this audio frame.
|
||||
// Aligned to 4 for SIMD purposes.
|
||||
alignas(4) DSPMixer::StereoFrame<s16> currentFrame;
|
||||
BufferQueue buffers;
|
||||
|
||||
SampleFormat sampleFormat = SampleFormat::ADPCM;
|
||||
SourceType sourceType = SourceType::Stereo;
|
||||
InterpolationMode interpolationMode = InterpolationMode::Linear;
|
||||
InterpolationState interpolationState;
|
||||
|
||||
// There's one gain configuration for each of the 3 intermediate mixing stages
|
||||
// And each gain configuration is composed of 4 gain values, one for each sample in a quad-channel sample
|
||||
// Aligned to 16 for SIMD purposes
|
||||
alignas(16) std::array<std::array<float, 4>, 3> gains;
|
||||
// Of the 3 intermediate mix stages, typically only the first one is actually enabled and the other ones do nothing
|
||||
// Ie their gain is vec4(0.0). We track which stages are disabled (have a gain of all 0s) using this bitfield and skip them
|
||||
// In order to save up on CPU time.
|
||||
uint enabledMixStages = 0;
|
||||
|
||||
std::array<float, 3> gain0, gain1, gain2;
|
||||
u32 samplePosition; // Sample number into the current audio buffer
|
||||
float rateMultiplier;
|
||||
u16 syncCount;
|
||||
u16 currentBufferID;
|
||||
u16 previousBufferID;
|
||||
|
@ -95,22 +113,23 @@ namespace Audio {
|
|||
// The audio frame types are public in case we want to use them for unit tests
|
||||
public:
|
||||
template <typename T, usize channelCount = 1>
|
||||
using Sample = std::array<T, channelCount>;
|
||||
using Sample = DSPMixer::Sample<T, channelCount>;
|
||||
|
||||
template <typename T, usize channelCount>
|
||||
using Frame = std::array<Sample<T, channelCount>, 160>;
|
||||
using Frame = DSPMixer::Frame<T, channelCount>;
|
||||
|
||||
template <typename T>
|
||||
using MonoFrame = Frame<T, 1>;
|
||||
using MonoFrame = DSPMixer::MonoFrame<T>;
|
||||
|
||||
template <typename T>
|
||||
using StereoFrame = Frame<T, 2>;
|
||||
using StereoFrame = DSPMixer::StereoFrame<T>;
|
||||
|
||||
template <typename T>
|
||||
using QuadFrame = Frame<T, 4>;
|
||||
using QuadFrame = DSPMixer::QuadFrame<T>;
|
||||
|
||||
using Source = Audio::DSPSource;
|
||||
using SampleBuffer = Source::SampleBuffer;
|
||||
using IntermediateMix = DSPMixer::IntermediateMix;
|
||||
|
||||
private:
|
||||
enum class DSPState : u32 {
|
||||
|
@ -127,6 +146,9 @@ namespace Audio {
|
|||
std::array<Source, Audio::HLE::sourceCount> sources; // DSP voices
|
||||
Audio::HLE::DspMemory dspRam;
|
||||
|
||||
Audio::DSPMixer mixer;
|
||||
std::unique_ptr<Audio::AAC::Decoder> aacDecoder;
|
||||
|
||||
void resetAudioPipe();
|
||||
bool loaded = false; // Have we loaded a component?
|
||||
|
||||
|
@ -142,7 +164,7 @@ namespace Audio {
|
|||
} else if (counter1 == 0xffff && counter0 != 0xfffe) {
|
||||
return 0;
|
||||
} else {
|
||||
return counter0 > counter1 ? 0 : 0;
|
||||
return (counter0 > counter1) ? 0 : 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -169,9 +191,12 @@ namespace Audio {
|
|||
|
||||
void handleAACRequest(const AAC::Message& request);
|
||||
void updateSourceConfig(Source& source, HLE::SourceConfiguration::Configuration& config, s16_le* adpcmCoefficients);
|
||||
void updateMixerConfig(HLE::SharedMemory& sharedMem);
|
||||
void generateFrame(StereoFrame<s16>& frame);
|
||||
void generateFrame(DSPSource& source);
|
||||
void outputFrame();
|
||||
// Perform the final mix, mixing the quadraphonic samples from all voices into the output audio frame
|
||||
void performMix(Audio::HLE::SharedMemory& readRegion, Audio::HLE::SharedMemory& writeRegion);
|
||||
|
||||
// Decode an entire buffer worth of audio
|
||||
void decodeBuffer(DSPSource& source);
|
||||
|
@ -181,11 +206,11 @@ namespace Audio {
|
|||
SampleBuffer decodeADPCM(const u8* data, usize sampleCount, Source& source);
|
||||
|
||||
public:
|
||||
HLE_DSP(Memory& mem, Scheduler& scheduler, DSPService& dspService);
|
||||
HLE_DSP(Memory& mem, Scheduler& scheduler, DSPService& dspService, EmulatorConfig& config);
|
||||
~HLE_DSP() override {}
|
||||
|
||||
void reset() override;
|
||||
void runAudioFrame() override;
|
||||
void runAudioFrame(u64 eventTimestamp) override;
|
||||
|
||||
u8* getDspMemory() override { return dspRam.rawMemory.data(); }
|
||||
|
||||
|
@ -199,5 +224,4 @@ namespace Audio {
|
|||
void setSemaphore(u16 value) override {}
|
||||
void setSemaphoreMask(u16 value) override {}
|
||||
};
|
||||
|
||||
} // namespace Audio
|
||||
|
|
50
include/audio/hle_mixer.hpp
Normal file
50
include/audio/hle_mixer.hpp
Normal file
|
@ -0,0 +1,50 @@
|
|||
#pragma once
|
||||
#include <array>
|
||||
|
||||
#include "audio/dsp_shared_mem.hpp"
|
||||
#include "helpers.hpp"
|
||||
|
||||
namespace Audio {
|
||||
using SampleFormat = HLE::SourceConfiguration::Configuration::Format;
|
||||
using SourceType = HLE::SourceConfiguration::Configuration::MonoOrStereo;
|
||||
|
||||
class DSPMixer {
|
||||
public:
|
||||
template <typename T, usize channelCount = 1>
|
||||
using Sample = std::array<T, channelCount>;
|
||||
|
||||
template <typename T, usize channelCount>
|
||||
using Frame = std::array<Sample<T, channelCount>, 160>;
|
||||
|
||||
template <typename T>
|
||||
using MonoFrame = Frame<T, 1>;
|
||||
|
||||
template <typename T>
|
||||
using StereoFrame = Frame<T, 2>;
|
||||
|
||||
template <typename T>
|
||||
using QuadFrame = Frame<T, 4>;
|
||||
|
||||
// Internally the DSP uses four channels when mixing.
|
||||
// Neatly, QuadFrame<s32> means that every sample is a uint32x4 value, which is particularly nice for SIMD mixing
|
||||
using IntermediateMix = QuadFrame<s32>;
|
||||
|
||||
private:
|
||||
using ChannelFormat = HLE::DspConfiguration::OutputFormat;
|
||||
// The audio from each DSP voice is converted to quadraphonic and then fed into 3 intermediate mixing stages
|
||||
// Two of these intermediate mixers (second and third) are used for effects, including custom effects done on the CPU
|
||||
static constexpr usize mixerStageCount = 3;
|
||||
|
||||
public:
|
||||
ChannelFormat channelFormat = ChannelFormat::Stereo;
|
||||
std::array<float, mixerStageCount> volumes;
|
||||
std::array<bool, 2> enableAuxStages;
|
||||
|
||||
void reset() {
|
||||
channelFormat = ChannelFormat::Stereo;
|
||||
|
||||
volumes.fill(0.0);
|
||||
enableAuxStages.fill(false);
|
||||
}
|
||||
};
|
||||
} // namespace Audio
|
|
@ -3,29 +3,39 @@
|
|||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "config.hpp"
|
||||
#include "helpers.hpp"
|
||||
#include "miniaudio.h"
|
||||
#include "ring_buffer.hpp"
|
||||
|
||||
class MiniAudioDevice {
|
||||
using Samples = Common::RingBuffer<ma_int16, 1024>;
|
||||
using Samples = Common::RingBuffer<ma_int16, 0x2000 * 2>;
|
||||
static constexpr ma_uint32 sampleRate = 32768; // 3DS sample rate
|
||||
static constexpr ma_uint32 channelCount = 2; // Audio output is stereo
|
||||
|
||||
ma_device device;
|
||||
ma_context context;
|
||||
ma_device_config deviceConfig;
|
||||
ma_device device;
|
||||
ma_resampler resampler;
|
||||
Samples* samples = nullptr;
|
||||
|
||||
const AudioDeviceConfig& audioSettings;
|
||||
|
||||
bool initialized = false;
|
||||
bool running = false;
|
||||
|
||||
// Store the last stereo sample we output. We play this when underruning to avoid pops.
|
||||
std::array<s16, 2> lastStereoSample;
|
||||
std::vector<std::string> audioDevices;
|
||||
|
||||
public:
|
||||
MiniAudioDevice();
|
||||
MiniAudioDevice(const AudioDeviceConfig& audioSettings);
|
||||
|
||||
// If safe is on, we create a null audio device
|
||||
void init(Samples& samples, bool safe = false);
|
||||
void close();
|
||||
|
||||
void start();
|
||||
void stop();
|
||||
|
||||
bool isInitialized() const { return initialized; }
|
||||
};
|
|
@ -20,14 +20,14 @@ namespace Audio {
|
|||
std::array<u8, Memory::DSP_RAM_SIZE> dspRam;
|
||||
|
||||
void resetAudioPipe();
|
||||
bool loaded = false; // Have we loaded a component?
|
||||
bool loaded = false; // Have we loaded a component?
|
||||
|
||||
public:
|
||||
NullDSP(Memory& mem, Scheduler& scheduler, DSPService& dspService) : DSPCore(mem, scheduler, dspService) {}
|
||||
NullDSP(Memory& mem, Scheduler& scheduler, DSPService& dspService, EmulatorConfig& config) : DSPCore(mem, scheduler, dspService, config) {}
|
||||
~NullDSP() override {}
|
||||
|
||||
void reset() override;
|
||||
void runAudioFrame() override;
|
||||
void runAudioFrame(u64 eventTimestamp) override;
|
||||
|
||||
u8* getDspMemory() override { return dspRam.data(); }
|
||||
|
||||
|
|
|
@ -77,13 +77,13 @@ namespace Audio {
|
|||
}
|
||||
|
||||
public:
|
||||
TeakraDSP(Memory& mem, Scheduler& scheduler, DSPService& dspService);
|
||||
TeakraDSP(Memory& mem, Scheduler& scheduler, DSPService& dspService, EmulatorConfig& config);
|
||||
~TeakraDSP() override {}
|
||||
|
||||
void reset() override;
|
||||
|
||||
// Run 1 slice of DSP instructions and schedule the next audio frame
|
||||
void runAudioFrame() override {
|
||||
void runAudioFrame(u64 eventTimestamp) override {
|
||||
runSlice();
|
||||
scheduler.addEvent(Scheduler::EventType::RunDSP, scheduler.currentTimestamp + Audio::lleSlice * 2);
|
||||
}
|
||||
|
|
|
@ -1,8 +1,35 @@
|
|||
#pragma once
|
||||
#include <filesystem>
|
||||
#include <string>
|
||||
|
||||
#include "audio/dsp_core.hpp"
|
||||
#include "frontend_settings.hpp"
|
||||
#include "renderer.hpp"
|
||||
#include "services/region_codes.hpp"
|
||||
|
||||
struct AudioDeviceConfig {
|
||||
// Audio curve to use for volumes between 0-100
|
||||
enum class VolumeCurve : int {
|
||||
Cubic = 0, // Samples are scaled by volume ^ 3
|
||||
Linear = 1, // Samples are scaled by volume
|
||||
};
|
||||
|
||||
float volumeRaw = 1.0f;
|
||||
VolumeCurve volumeCurve = VolumeCurve::Cubic;
|
||||
|
||||
bool muteAudio = false;
|
||||
|
||||
float getVolume() const {
|
||||
if (muteAudio) {
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
return volumeRaw;
|
||||
}
|
||||
|
||||
static VolumeCurve volumeCurveFromString(std::string inString);
|
||||
static const char* volumeCurveToString(VolumeCurve curve);
|
||||
};
|
||||
|
||||
// Remember to initialize every field here to its default value otherwise bad things will happen
|
||||
struct EmulatorConfig {
|
||||
|
@ -13,27 +40,80 @@ struct EmulatorConfig {
|
|||
static constexpr bool shaderJitDefault = false;
|
||||
#endif
|
||||
|
||||
// For now, use specialized shaders by default on MacOS as M1 drivers are buggy when using the ubershader, and on Android since mobile GPUs are
|
||||
// horrible. On other platforms we default to ubershader + shadergen fallback for lights
|
||||
#if defined(__ANDROID__) || defined(__APPLE__)
|
||||
static constexpr bool ubershaderDefault = false;
|
||||
#else
|
||||
static constexpr bool ubershaderDefault = true;
|
||||
#endif
|
||||
static constexpr bool accelerateShadersDefault = true;
|
||||
|
||||
#if defined(__LIBRETRO__)
|
||||
static constexpr bool audioEnabledDefault = true;
|
||||
#else
|
||||
static constexpr bool audioEnabledDefault = false;
|
||||
#endif
|
||||
|
||||
bool shaderJitEnabled = shaderJitDefault;
|
||||
bool useUbershaders = ubershaderDefault;
|
||||
bool accelerateShaders = accelerateShadersDefault;
|
||||
bool accurateShaderMul = false;
|
||||
bool discordRpcEnabled = false;
|
||||
|
||||
// Toggles whether to force shadergen when there's more than N lights active and we're using the ubershader, for better performance
|
||||
bool forceShadergenForLights = true;
|
||||
int lightShadergenThreshold = 1;
|
||||
|
||||
RendererType rendererType = RendererType::OpenGL;
|
||||
Audio::DSPCore::Type dspType = Audio::DSPCore::Type::Null;
|
||||
Audio::DSPCore::Type dspType = Audio::DSPCore::Type::HLE;
|
||||
|
||||
bool sdCardInserted = true;
|
||||
bool sdWriteProtected = false;
|
||||
bool usePortableBuild = false;
|
||||
|
||||
bool audioEnabled = false;
|
||||
bool audioEnabled = audioEnabledDefault;
|
||||
bool vsyncEnabled = true;
|
||||
bool aacEnabled = true; // Enable AAC audio?
|
||||
|
||||
bool enableRenderdoc = false;
|
||||
bool printAppVersion = true;
|
||||
bool printDSPFirmware = false;
|
||||
|
||||
bool chargerPlugged = true;
|
||||
// Default to 3% battery to make users suffer
|
||||
int batteryPercentage = 3;
|
||||
|
||||
LanguageCodes systemLanguage = LanguageCodes::English;
|
||||
|
||||
// Default ROM path to open in Qt and misc frontends
|
||||
std::filesystem::path defaultRomPath = "";
|
||||
std::filesystem::path filePath;
|
||||
|
||||
// Frontend window settings
|
||||
struct WindowSettings {
|
||||
static constexpr int defaultX = 200;
|
||||
static constexpr int defaultY = 200;
|
||||
static constexpr int defaultWidth = 800;
|
||||
static constexpr int defaultHeight = 240 * 2;
|
||||
|
||||
bool rememberPosition = false; // Remember window position & size
|
||||
bool showAppVersion = false;
|
||||
|
||||
int x = defaultX;
|
||||
int y = defaultY;
|
||||
int width = defaultHeight;
|
||||
int height = defaultHeight;
|
||||
};
|
||||
|
||||
WindowSettings windowSettings;
|
||||
AudioDeviceConfig audioDeviceConfig;
|
||||
FrontendSettings frontendSettings;
|
||||
|
||||
EmulatorConfig(const std::filesystem::path& path);
|
||||
void load();
|
||||
void save();
|
||||
|
||||
static LanguageCodes languageCodeFromString(std::string inString);
|
||||
static const char* languageCodeToString(LanguageCodes code);
|
||||
};
|
|
@ -1,20 +1,29 @@
|
|||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <cstdint>
|
||||
#include <climits>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <filesystem>
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
|
||||
#include "helpers.hpp"
|
||||
#include "io_file.hpp"
|
||||
#include "swap.hpp"
|
||||
|
||||
namespace Crypto {
|
||||
constexpr std::size_t AesKeySize = 0x10;
|
||||
constexpr usize AesKeySize = 0x10;
|
||||
using AESKey = std::array<u8, AesKeySize>;
|
||||
|
||||
template <std::size_t N>
|
||||
static std::array<u8, N> rolArray(const std::array<u8, N>& value, std::size_t bits) {
|
||||
struct Seed {
|
||||
u64_le titleID;
|
||||
AESKey seed;
|
||||
std::array<u8, 8> pad;
|
||||
};
|
||||
|
||||
template <usize N>
|
||||
static std::array<u8, N> rolArray(const std::array<u8, N>& value, usize bits) {
|
||||
const auto bitWidth = N * CHAR_BIT;
|
||||
|
||||
bits %= bitWidth;
|
||||
|
@ -24,18 +33,18 @@ namespace Crypto {
|
|||
|
||||
std::array<u8, N> result;
|
||||
|
||||
for (std::size_t i = 0; i < N; i++) {
|
||||
for (usize i = 0; i < N; i++) {
|
||||
result[i] = ((value[(i + byteShift) % N] << bitShift) | (value[(i + byteShift + 1) % N] >> (CHAR_BIT - bitShift))) & UINT8_MAX;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <std::size_t N>
|
||||
template <usize N>
|
||||
static std::array<u8, N> addArray(const std::array<u8, N>& a, const std::array<u8, N>& b) {
|
||||
std::array<u8, N> result;
|
||||
std::size_t sum = 0;
|
||||
std::size_t carry = 0;
|
||||
usize sum = 0;
|
||||
usize carry = 0;
|
||||
|
||||
for (std::int64_t i = N - 1; i >= 0; i--) {
|
||||
sum = a[i] + b[i] + carry;
|
||||
|
@ -46,11 +55,11 @@ namespace Crypto {
|
|||
return result;
|
||||
}
|
||||
|
||||
template <std::size_t N>
|
||||
template <usize N>
|
||||
static std::array<u8, N> xorArray(const std::array<u8, N>& a, const std::array<u8, N>& b) {
|
||||
std::array<u8, N> result;
|
||||
|
||||
for (std::size_t i = 0; i < N; i++) {
|
||||
for (usize i = 0; i < N; i++) {
|
||||
result[i] = a[i] ^ b[i];
|
||||
}
|
||||
|
||||
|
@ -63,7 +72,7 @@ namespace Crypto {
|
|||
}
|
||||
|
||||
AESKey rawKey;
|
||||
for (std::size_t i = 0; i < rawKey.size(); i++) {
|
||||
for (usize i = 0; i < rawKey.size(); i++) {
|
||||
rawKey[i] = static_cast<u8>(std::stoi(hex.substr(i * 2, 2), 0, 16));
|
||||
}
|
||||
|
||||
|
@ -76,7 +85,7 @@ namespace Crypto {
|
|||
std::optional<AESKey> normalKey = std::nullopt;
|
||||
};
|
||||
|
||||
enum KeySlotId : std::size_t {
|
||||
enum KeySlotId : usize {
|
||||
NCCHKey0 = 0x2C,
|
||||
NCCHKey1 = 0x25,
|
||||
NCCHKey2 = 0x18,
|
||||
|
@ -84,14 +93,17 @@ namespace Crypto {
|
|||
};
|
||||
|
||||
class AESEngine {
|
||||
private:
|
||||
constexpr static std::size_t AesKeySlotCount = 0x40;
|
||||
private:
|
||||
constexpr static usize AesKeySlotCount = 0x40;
|
||||
|
||||
std::optional<AESKey> m_generator = std::nullopt;
|
||||
std::array<AESKeySlot, AesKeySlotCount> m_slots;
|
||||
bool keysLoaded = false;
|
||||
|
||||
constexpr void updateNormalKey(std::size_t slotId) {
|
||||
std::vector<Seed> seeds;
|
||||
IOFile seedDatabase;
|
||||
|
||||
constexpr void updateNormalKey(usize slotId) {
|
||||
if (m_generator.has_value() && hasKeyX(slotId) && hasKeyY(slotId)) {
|
||||
auto& keySlot = m_slots.at(slotId);
|
||||
AESKey keyX = keySlot.keyX.value();
|
||||
|
@ -101,13 +113,17 @@ namespace Crypto {
|
|||
}
|
||||
}
|
||||
|
||||
public:
|
||||
public:
|
||||
AESEngine() {}
|
||||
void loadKeys(const std::filesystem::path& path);
|
||||
void setSeedPath(const std::filesystem::path& path);
|
||||
// Returns true on success, false on failure
|
||||
bool loadSeeds();
|
||||
|
||||
bool haveKeys() { return keysLoaded; }
|
||||
bool haveGenerator() { return m_generator.has_value(); }
|
||||
|
||||
constexpr bool hasKeyX(std::size_t slotId) {
|
||||
constexpr bool hasKeyX(usize slotId) {
|
||||
if (slotId >= AesKeySlotCount) {
|
||||
return false;
|
||||
}
|
||||
|
@ -115,18 +131,16 @@ namespace Crypto {
|
|||
return m_slots.at(slotId).keyX.has_value();
|
||||
}
|
||||
|
||||
constexpr AESKey getKeyX(std::size_t slotId) {
|
||||
return m_slots.at(slotId).keyX.value_or(AESKey{});
|
||||
}
|
||||
constexpr AESKey getKeyX(usize slotId) { return m_slots.at(slotId).keyX.value_or(AESKey{}); }
|
||||
|
||||
constexpr void setKeyX(std::size_t slotId, const AESKey &key) {
|
||||
constexpr void setKeyX(usize slotId, const AESKey& key) {
|
||||
if (slotId < AesKeySlotCount) {
|
||||
m_slots.at(slotId).keyX = key;
|
||||
updateNormalKey(slotId);
|
||||
}
|
||||
}
|
||||
|
||||
constexpr bool hasKeyY(std::size_t slotId) {
|
||||
constexpr bool hasKeyY(usize slotId) {
|
||||
if (slotId >= AesKeySlotCount) {
|
||||
return false;
|
||||
}
|
||||
|
@ -134,18 +148,16 @@ namespace Crypto {
|
|||
return m_slots.at(slotId).keyY.has_value();
|
||||
}
|
||||
|
||||
constexpr AESKey getKeyY(std::size_t slotId) {
|
||||
return m_slots.at(slotId).keyY.value_or(AESKey{});
|
||||
}
|
||||
constexpr AESKey getKeyY(usize slotId) { return m_slots.at(slotId).keyY.value_or(AESKey{}); }
|
||||
|
||||
constexpr void setKeyY(std::size_t slotId, const AESKey &key) {
|
||||
constexpr void setKeyY(usize slotId, const AESKey& key) {
|
||||
if (slotId < AesKeySlotCount) {
|
||||
m_slots.at(slotId).keyY = key;
|
||||
updateNormalKey(slotId);
|
||||
}
|
||||
}
|
||||
|
||||
constexpr bool hasNormalKey(std::size_t slotId) {
|
||||
constexpr bool hasNormalKey(usize slotId) {
|
||||
if (slotId >= AesKeySlotCount) {
|
||||
return false;
|
||||
}
|
||||
|
@ -153,14 +165,14 @@ namespace Crypto {
|
|||
return m_slots.at(slotId).normalKey.has_value();
|
||||
}
|
||||
|
||||
constexpr AESKey getNormalKey(std::size_t slotId) {
|
||||
return m_slots.at(slotId).normalKey.value_or(AESKey{});
|
||||
}
|
||||
constexpr AESKey getNormalKey(usize slotId) { return m_slots.at(slotId).normalKey.value_or(AESKey{}); }
|
||||
|
||||
constexpr void setNormalKey(std::size_t slotId, const AESKey &key) {
|
||||
constexpr void setNormalKey(usize slotId, const AESKey& key) {
|
||||
if (slotId < AesKeySlotCount) {
|
||||
m_slots.at(slotId).normalKey = key;
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<AESKey> getSeedFromDB(u64 titleID);
|
||||
};
|
||||
}
|
||||
} // namespace Crypto
|
||||
|
|
|
@ -17,6 +17,8 @@ namespace Discord {
|
|||
void init();
|
||||
void update(RPCStatus status, const std::string& title);
|
||||
void stop();
|
||||
|
||||
bool running() const { return enabled; }
|
||||
};
|
||||
} // namespace Discord
|
||||
|
||||
|
|
|
@ -66,7 +66,6 @@ class Emulator {
|
|||
#ifdef PANDA3DS_ENABLE_DISCORD_RPC
|
||||
Discord::RPC discordRpc;
|
||||
#endif
|
||||
void setAudioEnabled(bool enable);
|
||||
void updateDiscord();
|
||||
|
||||
// Keep the handle for the ROM here to reload when necessary and to prevent deleting it
|
||||
|
@ -90,7 +89,6 @@ class Emulator {
|
|||
~Emulator();
|
||||
|
||||
void step();
|
||||
void render();
|
||||
void reset(ReloadOption reload);
|
||||
void runFrame();
|
||||
// Poll the scheduler for events
|
||||
|
@ -99,6 +97,7 @@ class Emulator {
|
|||
void resume(); // Resume the emulator
|
||||
void pause(); // Pause the emulator
|
||||
void togglePause();
|
||||
void setAudioEnabled(bool enable);
|
||||
|
||||
bool loadAmiibo(const std::filesystem::path& path);
|
||||
bool loadROM(const std::filesystem::path& path);
|
||||
|
@ -118,6 +117,9 @@ class Emulator {
|
|||
void setOutputSize(u32 width, u32 height) { gpu.setOutputSize(width, height); }
|
||||
void deinitGraphicsContext() { gpu.deinitGraphicsContext(); }
|
||||
|
||||
// Reloads some settings that require special handling, such as audio enable
|
||||
void reloadSettings();
|
||||
|
||||
EmulatorConfig& getConfig() { return config; }
|
||||
Cheats& getCheats() { return cheats; }
|
||||
ServiceManager& getServiceManager() { return kernel.getServiceManager(); }
|
||||
|
@ -135,4 +137,7 @@ class Emulator {
|
|||
std::filesystem::path getAppDataRoot();
|
||||
|
||||
std::span<u8> getSMDH();
|
||||
|
||||
private:
|
||||
void loadRenderdoc();
|
||||
};
|
||||
|
|
34
include/frontend_settings.hpp
Normal file
34
include/frontend_settings.hpp
Normal file
|
@ -0,0 +1,34 @@
|
|||
#pragma once
|
||||
#include <string>
|
||||
|
||||
// Some UI settings that aren't fully frontend-dependent. Note: Not all frontends will support the same settings.
|
||||
// Note: Any enums should ideally be ordered in the same order we want to show them in UI dropdown menus, so that we can cast indices to enums
|
||||
// directly.
|
||||
struct FrontendSettings {
|
||||
enum class Theme : int {
|
||||
System = 0,
|
||||
Light = 1,
|
||||
Dark = 2,
|
||||
GreetingsCat = 3,
|
||||
Cream = 4,
|
||||
};
|
||||
|
||||
// Different panda-themed window icons
|
||||
enum class WindowIcon : int {
|
||||
Rpog = 0,
|
||||
Rsyn = 1,
|
||||
Rnap = 2,
|
||||
Rcow = 3,
|
||||
SkyEmu = 4,
|
||||
};
|
||||
|
||||
Theme theme = Theme::Dark;
|
||||
WindowIcon icon = WindowIcon::Rpog;
|
||||
std::string language = "en";
|
||||
|
||||
static Theme themeFromString(std::string inString);
|
||||
static const char* themeToString(Theme theme);
|
||||
|
||||
static WindowIcon iconFromString(std::string inString);
|
||||
static const char* iconToString(WindowIcon icon);
|
||||
};
|
|
@ -7,6 +7,7 @@
|
|||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
#include "helpers.hpp"
|
||||
#include "memory.hpp"
|
||||
#include "result.hpp"
|
||||
|
@ -15,13 +16,13 @@
|
|||
using Result::HorizonResult;
|
||||
|
||||
namespace PathType {
|
||||
enum : u32 {
|
||||
Invalid = 0,
|
||||
Empty = 1,
|
||||
Binary = 2,
|
||||
ASCII = 3,
|
||||
UTF16 = 4,
|
||||
};
|
||||
enum : u32 {
|
||||
Invalid = 0,
|
||||
Empty = 1,
|
||||
Binary = 2,
|
||||
ASCII = 3,
|
||||
UTF16 = 4,
|
||||
};
|
||||
}
|
||||
|
||||
namespace ArchiveID {
|
||||
|
@ -34,91 +35,103 @@ namespace ArchiveID {
|
|||
SDMC = 9,
|
||||
SDMCWriteOnly = 0xA,
|
||||
|
||||
CardSPI = 0x12345679,
|
||||
SavedataAndNcch = 0x2345678A,
|
||||
// 3DBrew: This is the same as the regular SaveData archive, except with this the savedata ID and mediatype is loaded from the input archive
|
||||
// lowpath.
|
||||
UserSaveData1 = 0x567890B2,
|
||||
// 3DBrew: Similar to 0x567890B2 but can only access Accessible Save specified in exheader?
|
||||
UserSaveData2 = 0x567890B4,
|
||||
|
||||
TwlPhoto = 0x567890AC,
|
||||
TwlSound = 0x567890AD,
|
||||
};
|
||||
|
||||
static std::string toString(u32 id) {
|
||||
switch (id) {
|
||||
case SelfNCCH: return "SelfNCCH";
|
||||
case SaveData: return "SaveData";
|
||||
case ExtSaveData: return "ExtSaveData";
|
||||
case SharedExtSaveData: return "SharedExtSaveData";
|
||||
case SystemSaveData: return "SystemSaveData";
|
||||
case SDMC: return "SDMC";
|
||||
case SDMCWriteOnly: return "SDMC (Write-only)";
|
||||
case SavedataAndNcch: return "Savedata & NCCH (archive 0x2345678A)";
|
||||
default: return "Unknown archive";
|
||||
}
|
||||
}
|
||||
}
|
||||
static std::string toString(u32 id) {
|
||||
switch (id) {
|
||||
case SelfNCCH: return "SelfNCCH";
|
||||
case SaveData: return "SaveData";
|
||||
case ExtSaveData: return "ExtSaveData";
|
||||
case SharedExtSaveData: return "SharedExtSaveData";
|
||||
case SystemSaveData: return "SystemSaveData";
|
||||
case SDMC: return "SDMC";
|
||||
case SDMCWriteOnly: return "SDMC (Write-only)";
|
||||
case SavedataAndNcch: return "Savedata & NCCH (archive 0x2345678A)";
|
||||
case TwlPhoto: return "TWL_PHOTO";
|
||||
case TwlSound: return "TWL_SOUND";
|
||||
default: return "Unknown archive";
|
||||
}
|
||||
}
|
||||
} // namespace ArchiveID
|
||||
|
||||
struct FSPath {
|
||||
u32 type = PathType::Invalid;
|
||||
u32 type = PathType::Invalid;
|
||||
|
||||
std::vector<u8> binary; // Path data for binary paths
|
||||
std::string string; // Path data for ASCII paths
|
||||
std::u16string utf16_string;
|
||||
std::vector<u8> binary; // Path data for binary paths
|
||||
std::string string; // Path data for ASCII paths
|
||||
std::u16string utf16_string;
|
||||
|
||||
FSPath() {}
|
||||
FSPath() {}
|
||||
|
||||
FSPath(u32 type, const std::vector<u8>& vec) : type(type) {
|
||||
switch (type) {
|
||||
case PathType::Binary:
|
||||
binary = std::move(vec);
|
||||
break;
|
||||
FSPath(u32 type, const std::vector<u8>& vec) : type(type) {
|
||||
switch (type) {
|
||||
case PathType::Binary: binary = std::move(vec); break;
|
||||
|
||||
case PathType::ASCII:
|
||||
string.resize(vec.size() - 1); // -1 because of the null terminator
|
||||
std::memcpy(string.data(), vec.data(), vec.size() - 1); // Copy string data
|
||||
break;
|
||||
case PathType::ASCII:
|
||||
string.resize(vec.size() - 1); // -1 because of the null terminator
|
||||
std::memcpy(string.data(), vec.data(), vec.size() - 1); // Copy string data
|
||||
break;
|
||||
|
||||
case PathType::UTF16: {
|
||||
const size_t size = vec.size() / sizeof(u16) - 1; // Character count. -1 because null terminator here too
|
||||
utf16_string.resize(size);
|
||||
std::memcpy(utf16_string.data(), vec.data(), size * sizeof(u16));
|
||||
break;
|
||||
}
|
||||
; }
|
||||
}
|
||||
case PathType::UTF16: {
|
||||
const size_t size = vec.size() / sizeof(u16) - 1; // Character count. -1 because null terminator here too
|
||||
utf16_string.resize(size);
|
||||
std::memcpy(utf16_string.data(), vec.data(), size * sizeof(u16));
|
||||
break;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
bool isUTF16() const { return type == PathType::UTF16; }
|
||||
bool isASCII() const { return type == PathType::ASCII; }
|
||||
bool isBinary() const { return type == PathType::Binary; }
|
||||
// This is not called "isEmpty()" to make obvious that we're talking about an empty-type path, NOT an empty text path
|
||||
bool isEmptyType() const { return type == PathType::Empty; }
|
||||
|
||||
bool isTextPath() const { return isUTF16() || isASCII(); }
|
||||
};
|
||||
|
||||
struct FilePerms {
|
||||
u32 raw;
|
||||
u32 raw;
|
||||
|
||||
FilePerms(u32 val) : raw(val) {}
|
||||
bool read() const { return (raw & 1) != 0; }
|
||||
bool write() const { return (raw & 2) != 0; }
|
||||
bool create() const { return (raw & 4) != 0; }
|
||||
FilePerms(u32 val) : raw(val) {}
|
||||
bool read() const { return (raw & 1) != 0; }
|
||||
bool write() const { return (raw & 2) != 0; }
|
||||
bool create() const { return (raw & 4) != 0; }
|
||||
};
|
||||
|
||||
class ArchiveBase;
|
||||
struct FileSession {
|
||||
ArchiveBase* archive = nullptr;
|
||||
FILE* fd = nullptr; // File descriptor for file sessions that require them.
|
||||
FSPath path;
|
||||
FSPath archivePath;
|
||||
u32 priority = 0; // TODO: What does this even do
|
||||
bool isOpen;
|
||||
ArchiveBase* archive = nullptr;
|
||||
FILE* fd = nullptr; // File descriptor for file sessions that require them.
|
||||
FSPath path;
|
||||
FSPath archivePath;
|
||||
u32 priority = 0; // TODO: What does this even do
|
||||
bool isOpen;
|
||||
|
||||
FileSession(ArchiveBase* archive, const FSPath& filePath, const FSPath& archivePath, FILE* fd, bool isOpen = true) :
|
||||
archive(archive), path(filePath), archivePath(archivePath), fd(fd), isOpen(isOpen), priority(0) {}
|
||||
FileSession(ArchiveBase* archive, const FSPath& filePath, const FSPath& archivePath, FILE* fd, bool isOpen = true)
|
||||
: archive(archive), path(filePath), archivePath(archivePath), fd(fd), isOpen(isOpen), priority(0) {}
|
||||
|
||||
// For cloning a file session
|
||||
FileSession(const FileSession& other) : archive(other.archive), path(other.path),
|
||||
archivePath(other.archivePath), fd(other.fd), isOpen(other.isOpen), priority(other.priority) {}
|
||||
// For cloning a file session
|
||||
FileSession(const FileSession& other)
|
||||
: archive(other.archive), path(other.path), archivePath(other.archivePath), fd(other.fd), isOpen(other.isOpen), priority(other.priority) {}
|
||||
};
|
||||
|
||||
struct ArchiveSession {
|
||||
ArchiveBase* archive = nullptr;
|
||||
FSPath path;
|
||||
bool isOpen;
|
||||
ArchiveBase* archive = nullptr;
|
||||
FSPath path;
|
||||
bool isOpen;
|
||||
|
||||
ArchiveSession(ArchiveBase* archive, const FSPath& filePath, bool isOpen = true) : archive(archive), path(filePath), isOpen(isOpen) {}
|
||||
ArchiveSession(ArchiveBase* archive, const FSPath& filePath, bool isOpen = true) : archive(archive), path(filePath), isOpen(isOpen) {}
|
||||
};
|
||||
|
||||
struct DirectoryEntry {
|
||||
|
@ -156,106 +169,125 @@ struct DirectorySession {
|
|||
using FileDescriptor = std::optional<FILE*>;
|
||||
|
||||
class ArchiveBase {
|
||||
public:
|
||||
struct FormatInfo {
|
||||
u32 size; // Archive size
|
||||
u32 numOfDirectories; // Number of directories
|
||||
u32 numOfFiles; // Number of files
|
||||
bool duplicateData; // Whether to duplicate data or not
|
||||
};
|
||||
public:
|
||||
struct FormatInfo {
|
||||
u32 size; // Archive size
|
||||
u32 numOfDirectories; // Number of directories
|
||||
u32 numOfFiles; // Number of files
|
||||
bool duplicateData; // Whether to duplicate data or not
|
||||
};
|
||||
|
||||
protected:
|
||||
using Handle = u32;
|
||||
protected:
|
||||
using Handle = u32;
|
||||
|
||||
static constexpr FileDescriptor NoFile = nullptr;
|
||||
static constexpr FileDescriptor FileError = std::nullopt;
|
||||
Memory& mem;
|
||||
static constexpr FileDescriptor NoFile = nullptr;
|
||||
static constexpr FileDescriptor FileError = std::nullopt;
|
||||
Memory& mem;
|
||||
|
||||
// Returns if a specified 3DS path in UTF16 or ASCII format is safe or not
|
||||
// A 3DS path is considered safe if its first character is '/' which means we're not trying to access anything outside the root of the fs
|
||||
// And if it doesn't contain enough instances of ".." (Indicating "climb up a folder" in filesystems) to let the software climb up the directory tree
|
||||
// And access files outside of the emulator's app data folder
|
||||
template <u32 format>
|
||||
bool isPathSafe(const FSPath& path) {
|
||||
static_assert(format == PathType::ASCII || format == PathType::UTF16);
|
||||
using String = typename std::conditional<format == PathType::UTF16, std::u16string, std::string>::type; // String type for the path
|
||||
using Char = typename String::value_type; // Char type for the path
|
||||
// Returns if a specified 3DS path in UTF16 or ASCII format is safe or not
|
||||
// A 3DS path is considered safe if its first character is '/' which means we're not trying to access anything outside the root of the fs
|
||||
// And if it doesn't contain enough instances of ".." (Indicating "climb up a folder" in filesystems) to let the software climb up the directory
|
||||
// tree And access files outside of the emulator's app data folder
|
||||
template <u32 format>
|
||||
bool isPathSafe(const FSPath& path) {
|
||||
static_assert(format == PathType::ASCII || format == PathType::UTF16);
|
||||
using String = typename std::conditional<format == PathType::UTF16, std::u16string, std::string>::type; // String type for the path
|
||||
using Char = typename String::value_type; // Char type for the path
|
||||
|
||||
String pathString, dots;
|
||||
if constexpr (std::is_same<String, std::u16string>()) {
|
||||
pathString = path.utf16_string;
|
||||
dots = u"..";
|
||||
} else {
|
||||
pathString = path.string;
|
||||
dots = "..";
|
||||
}
|
||||
String pathString, dots;
|
||||
if constexpr (std::is_same<String, std::u16string>()) {
|
||||
pathString = path.utf16_string;
|
||||
dots = u"..";
|
||||
} else {
|
||||
pathString = path.string;
|
||||
dots = "..";
|
||||
}
|
||||
|
||||
// If the path string doesn't begin with / then that means it's accessing outside the FS root, which is invalid & unsafe
|
||||
if (pathString[0] != Char('/')) return false;
|
||||
// If the path string doesn't begin with / then that means it's accessing outside the FS root, which is invalid & unsafe
|
||||
if (pathString[0] != Char('/')) return false;
|
||||
|
||||
// Counts how many folders sans the root our file is nested under.
|
||||
// If it's < 0 at any point of parsing, then the path is unsafe and tries to crawl outside our file sandbox.
|
||||
// If it's 0 then this is the FS root.
|
||||
// If it's > 0 then we're in a subdirectory of the root.
|
||||
int level = 0;
|
||||
// Counts how many folders sans the root our file is nested under.
|
||||
// If it's < 0 at any point of parsing, then the path is unsafe and tries to crawl outside our file sandbox.
|
||||
// If it's 0 then this is the FS root.
|
||||
// If it's > 0 then we're in a subdirectory of the root.
|
||||
int level = 0;
|
||||
|
||||
// Split the string on / characters and see how many of the substrings are ".."
|
||||
size_t pos = 0;
|
||||
while ((pos = pathString.find(Char('/'))) != String::npos) {
|
||||
String token = pathString.substr(0, pos);
|
||||
pathString.erase(0, pos + 1);
|
||||
// Split the string on / characters and see how many of the substrings are ".."
|
||||
size_t pos = 0;
|
||||
while ((pos = pathString.find(Char('/'))) != String::npos) {
|
||||
String token = pathString.substr(0, pos);
|
||||
pathString.erase(0, pos + 1);
|
||||
|
||||
if (token == dots) {
|
||||
level--;
|
||||
if (level < 0) return false;
|
||||
} else {
|
||||
level++;
|
||||
}
|
||||
}
|
||||
if (token == dots) {
|
||||
level--;
|
||||
if (level < 0) return false;
|
||||
} else {
|
||||
level++;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public:
|
||||
virtual std::string name() = 0;
|
||||
virtual u64 getFreeBytes() = 0;
|
||||
virtual HorizonResult createFile(const FSPath& path, u64 size) = 0;
|
||||
virtual HorizonResult deleteFile(const FSPath& path) = 0;
|
||||
public:
|
||||
virtual std::string name() = 0;
|
||||
virtual u64 getFreeBytes() = 0;
|
||||
virtual HorizonResult createFile(const FSPath& path, u64 size) = 0;
|
||||
virtual HorizonResult deleteFile(const FSPath& path) = 0;
|
||||
|
||||
virtual Rust::Result<FormatInfo, HorizonResult> getFormatInfo(const FSPath& path) {
|
||||
Helpers::panic("Unimplemented GetFormatInfo for %s archive", name().c_str());
|
||||
// Return a dummy struct just to avoid the UB of not returning anything, even if we panic
|
||||
return Ok(FormatInfo{ .size = 0, .numOfDirectories = 0, .numOfFiles = 0, .duplicateData = false });
|
||||
}
|
||||
virtual Rust::Result<FormatInfo, HorizonResult> getFormatInfo(const FSPath& path) {
|
||||
Helpers::panic("Unimplemented GetFormatInfo for %s archive", name().c_str());
|
||||
// Return a dummy struct just to avoid the UB of not returning anything, even if we panic
|
||||
return Ok(FormatInfo{.size = 0, .numOfDirectories = 0, .numOfFiles = 0, .duplicateData = false});
|
||||
}
|
||||
|
||||
virtual HorizonResult createDirectory(const FSPath& path) {
|
||||
Helpers::panic("Unimplemented CreateDirectory for %s archive", name().c_str());
|
||||
return Result::FS::AlreadyExists;
|
||||
}
|
||||
virtual HorizonResult createDirectory(const FSPath& path) {
|
||||
Helpers::panic("Unimplemented CreateDirectory for %s archive", name().c_str());
|
||||
return Result::FS::AlreadyExists;
|
||||
}
|
||||
|
||||
// Returns nullopt if opening the file failed, otherwise returns a file descriptor to it (nullptr if none is needed)
|
||||
virtual FileDescriptor openFile(const FSPath& path, const FilePerms& perms) = 0;
|
||||
virtual Rust::Result<ArchiveBase*, HorizonResult> openArchive(const FSPath& path) = 0;
|
||||
// Returns nullopt if opening the file failed, otherwise returns a file descriptor to it (nullptr if none is needed)
|
||||
virtual FileDescriptor openFile(const FSPath& path, const FilePerms& perms) = 0;
|
||||
virtual Rust::Result<ArchiveBase*, HorizonResult> openArchive(const FSPath& path) = 0;
|
||||
|
||||
virtual Rust::Result<DirectorySession, HorizonResult> openDirectory(const FSPath& path) {
|
||||
Helpers::panic("Unimplemented OpenDirectory for %s archive", name().c_str());
|
||||
return Err(Result::FS::FileNotFoundAlt);
|
||||
}
|
||||
virtual Rust::Result<DirectorySession, HorizonResult> openDirectory(const FSPath& path) {
|
||||
Helpers::panic("Unimplemented OpenDirectory for %s archive", name().c_str());
|
||||
return Err(Result::FS::FileNotFoundAlt);
|
||||
}
|
||||
|
||||
virtual void format(const FSPath& path, const FormatInfo& info) {
|
||||
Helpers::panic("Unimplemented Format for %s archive", name().c_str());
|
||||
}
|
||||
virtual void format(const FSPath& path, const FormatInfo& info) { Helpers::panic("Unimplemented Format for %s archive", name().c_str()); }
|
||||
|
||||
virtual HorizonResult renameFile(const FSPath& oldPath, const FSPath& newPath) {
|
||||
virtual HorizonResult renameFile(const FSPath& oldPath, const FSPath& newPath) {
|
||||
Helpers::panic("Unimplemented RenameFile for %s archive", name().c_str());
|
||||
return Result::Success;
|
||||
}
|
||||
}
|
||||
|
||||
// Read size bytes from a file starting at offset "offset" into a certain buffer in memory
|
||||
// Returns the number of bytes read, or nullopt if the read failed
|
||||
virtual std::optional<u32> readFile(FileSession* file, u64 offset, u32 size, u32 dataPointer) = 0;
|
||||
// Read size bytes from a file starting at offset "offset" into a certain buffer in memory
|
||||
// Returns the number of bytes read, or nullopt if the read failed
|
||||
virtual std::optional<u32> readFile(FileSession* file, u64 offset, u32 size, u32 dataPointer) = 0;
|
||||
|
||||
ArchiveBase(Memory& mem) : mem(mem) {}
|
||||
ArchiveBase(Memory& mem) : mem(mem) {}
|
||||
|
||||
bool isSafeTextPath(const FSPath& path) {
|
||||
if (path.type == PathType::UTF16) {
|
||||
return isPathSafe<PathType::UTF16>(path);
|
||||
} else if (path.type == PathType::ASCII){
|
||||
return isPathSafe<PathType::ASCII>(path);
|
||||
}
|
||||
|
||||
Helpers::panic("ArchiveBase::IsSafeTextPath: Invalid path");
|
||||
}
|
||||
|
||||
// Appends a 3DS path to an std::filesystem::path
|
||||
void appendPath(std::filesystem::path& diskPath, const FSPath& guestPath) {
|
||||
if (guestPath.type == PathType::UTF16) {
|
||||
diskPath += std::filesystem::path(guestPath.utf16_string).make_preferred();
|
||||
} else if (guestPath.type == PathType::ASCII) {
|
||||
diskPath += std::filesystem::path(guestPath.string).make_preferred();
|
||||
} else [[unlikely]] {
|
||||
Helpers::panic("ArchiveBase::AppendPath: Invalid 3DS path");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct ArchiveResource {
|
||||
|
|
30
include/fs/archive_card_spi.hpp
Normal file
30
include/fs/archive_card_spi.hpp
Normal file
|
@ -0,0 +1,30 @@
|
|||
#pragma once
|
||||
#include "archive_base.hpp"
|
||||
#include "result/result.hpp"
|
||||
|
||||
using Result::HorizonResult;
|
||||
|
||||
class CardSPIArchive : public ArchiveBase {
|
||||
public:
|
||||
CardSPIArchive(Memory& mem) : ArchiveBase(mem) {}
|
||||
std::string name() override { return "Card SPI"; }
|
||||
|
||||
u64 getFreeBytes() override {
|
||||
Helpers::warn("Unimplemented GetFreeBytes for Card SPI archive");
|
||||
return 0_MB;
|
||||
}
|
||||
|
||||
HorizonResult createDirectory(const FSPath& path) override;
|
||||
HorizonResult createFile(const FSPath& path, u64 size) override;
|
||||
HorizonResult deleteFile(const FSPath& path) override;
|
||||
|
||||
Rust::Result<ArchiveBase*, HorizonResult> openArchive(const FSPath& path) override;
|
||||
Rust::Result<DirectorySession, HorizonResult> openDirectory(const FSPath& path) override;
|
||||
|
||||
FileDescriptor openFile(const FSPath& path, const FilePerms& perms) override;
|
||||
|
||||
std::optional<u32> readFile(FileSession* file, u64 offset, u32 size, u32 dataPointer) override {
|
||||
Helpers::panic("Unimplemented ReadFile for Card SPI archive");
|
||||
return {};
|
||||
};
|
||||
};
|
30
include/fs/archive_twl_photo.hpp
Normal file
30
include/fs/archive_twl_photo.hpp
Normal file
|
@ -0,0 +1,30 @@
|
|||
#pragma once
|
||||
#include "archive_base.hpp"
|
||||
#include "result/result.hpp"
|
||||
|
||||
using Result::HorizonResult;
|
||||
|
||||
class TWLPhotoArchive : public ArchiveBase {
|
||||
public:
|
||||
TWLPhotoArchive(Memory& mem) : ArchiveBase(mem) {}
|
||||
std::string name() override { return "TWL_PHOTO"; }
|
||||
|
||||
u64 getFreeBytes() override {
|
||||
Helpers::warn("Unimplemented GetFreeBytes for TWLPhoto archive");
|
||||
return 32_MB;
|
||||
}
|
||||
|
||||
HorizonResult createDirectory(const FSPath& path) override;
|
||||
HorizonResult createFile(const FSPath& path, u64 size) override;
|
||||
HorizonResult deleteFile(const FSPath& path) override;
|
||||
|
||||
Rust::Result<ArchiveBase*, HorizonResult> openArchive(const FSPath& path) override;
|
||||
Rust::Result<DirectorySession, HorizonResult> openDirectory(const FSPath& path) override;
|
||||
|
||||
FileDescriptor openFile(const FSPath& path, const FilePerms& perms) override;
|
||||
|
||||
std::optional<u32> readFile(FileSession* file, u64 offset, u32 size, u32 dataPointer) override {
|
||||
Helpers::panic("Unimplemented ReadFile for TWL_PHOTO archive");
|
||||
return {};
|
||||
};
|
||||
};
|
30
include/fs/archive_twl_sound.hpp
Normal file
30
include/fs/archive_twl_sound.hpp
Normal file
|
@ -0,0 +1,30 @@
|
|||
#pragma once
|
||||
#include "archive_base.hpp"
|
||||
#include "result/result.hpp"
|
||||
|
||||
using Result::HorizonResult;
|
||||
|
||||
class TWLSoundArchive : public ArchiveBase {
|
||||
public:
|
||||
TWLSoundArchive(Memory& mem) : ArchiveBase(mem) {}
|
||||
std::string name() override { return "TWL_SOUND"; }
|
||||
|
||||
u64 getFreeBytes() override {
|
||||
Helpers::warn("Unimplemented GetFreeBytes for TWLSound archive");
|
||||
return 32_MB;
|
||||
}
|
||||
|
||||
HorizonResult createDirectory(const FSPath& path) override;
|
||||
HorizonResult createFile(const FSPath& path, u64 size) override;
|
||||
HorizonResult deleteFile(const FSPath& path) override;
|
||||
|
||||
Rust::Result<ArchiveBase*, HorizonResult> openArchive(const FSPath& path) override;
|
||||
Rust::Result<DirectorySession, HorizonResult> openDirectory(const FSPath& path) override;
|
||||
|
||||
FileDescriptor openFile(const FSPath& path, const FilePerms& perms) override;
|
||||
|
||||
std::optional<u32> readFile(FileSession* file, u64 offset, u32 size, u32 dataPointer) override {
|
||||
Helpers::panic("Unimplemented ReadFile for TWL_SOUND archive");
|
||||
return {};
|
||||
};
|
||||
};
|
|
@ -4,7 +4,6 @@
|
|||
#include <cstdint>
|
||||
#include <iostream>
|
||||
#include <iterator>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
@ -162,19 +161,6 @@ namespace Helpers {
|
|||
return std::bit_cast<To, From>(from);
|
||||
}
|
||||
#endif
|
||||
|
||||
static std::vector<std::string> split(const std::string& s, const char c) {
|
||||
std::istringstream tmp(s);
|
||||
std::vector<std::string> result(1);
|
||||
|
||||
while (std::getline(tmp, *result.rbegin(), c)) {
|
||||
result.emplace_back();
|
||||
}
|
||||
|
||||
// Remove temporary slot
|
||||
result.pop_back();
|
||||
return result;
|
||||
}
|
||||
}; // namespace Helpers
|
||||
|
||||
// UDLs for memory size values
|
||||
|
|
|
@ -2,8 +2,19 @@
|
|||
#include <cstdint>
|
||||
|
||||
namespace IPC {
|
||||
namespace BufferType {
|
||||
enum : std::uint32_t {
|
||||
Send = 1,
|
||||
Receive = 2,
|
||||
};
|
||||
}
|
||||
|
||||
constexpr std::uint32_t responseHeader(std::uint32_t commandID, std::uint32_t normalResponses, std::uint32_t translateResponses) {
|
||||
// TODO: Maybe validate the response count stuff fits in 6 bits
|
||||
return (commandID << 16) | (normalResponses << 6) | translateResponses;
|
||||
}
|
||||
}
|
||||
|
||||
constexpr std::uint32_t pointerHeader(std::uint32_t index, std::uint32_t size, std::uint32_t type) {
|
||||
return (size << 14) | (index << 10) | (type << 1);
|
||||
}
|
||||
} // namespace IPC
|
|
@ -8,6 +8,7 @@ namespace ConfigMem {
|
|||
KernelVersionMajor = 0x1FF80003,
|
||||
SyscoreVer = 0x1FF80010,
|
||||
EnvInfo = 0x1FF80014,
|
||||
PrevFirm = 0x1FF80016,
|
||||
AppMemAlloc = 0x1FF80040,
|
||||
FirmUnknown = 0x1FF80060,
|
||||
FirmRevision = 0x1FF80061,
|
||||
|
@ -30,6 +31,11 @@ namespace ConfigMem {
|
|||
|
||||
// Shows what type of hardware we're running on
|
||||
namespace HardwareCodes {
|
||||
enum : u8 { Product = 1, Devboard = 2, Debugger = 3, Capture = 4 };
|
||||
enum : u8 {
|
||||
Product = 1,
|
||||
Devboard = 2,
|
||||
Debugger = 3,
|
||||
Capture = 4,
|
||||
};
|
||||
}
|
||||
} // namespace ConfigMem
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#pragma once
|
||||
#include "helpers.hpp"
|
||||
|
||||
using Handle = u32;
|
||||
using HorizonHandle = u32;
|
||||
|
||||
namespace KernelHandles {
|
||||
enum : u32 {
|
||||
|
@ -20,6 +20,7 @@ namespace KernelHandles {
|
|||
CFG_U, // CFG service (Console & region info)
|
||||
CFG_I,
|
||||
CFG_S, // Used by most system apps in lieu of cfg:u
|
||||
CFG_NOR, // Used by system settings app
|
||||
CSND, // Plays audio directly from PCM samples
|
||||
DLP_SRVR, // Download Play: Server. Used for network play.
|
||||
DSP, // DSP service (Used for audio decoding and output)
|
||||
|
@ -38,11 +39,14 @@ namespace KernelHandles {
|
|||
NIM, // Updates, DLC, etc
|
||||
NDM, // ?????
|
||||
NS_S, // Nintendo Shell service
|
||||
NWM_EXT, // ?????
|
||||
NWM_UDS, // Local multiplayer
|
||||
NEWS_U, // This service literally has 1 command (AddNotification) and I don't even understand what it does
|
||||
NEWS_S, // news:u on steroids
|
||||
NEWS_U, // This service literally has 1 command (AddNotification)
|
||||
PTM_U, // PTM service (Used for accessing various console info, such as battery, shell and pedometer state)
|
||||
PTM_SYSM, // PTM system service
|
||||
PTM_PLAY, // PTM Play service, used for retrieving play history
|
||||
PTM_GETS, // PTM RTC service (GetSystemTime)
|
||||
SOC, // Socket service
|
||||
SSL, // SSL service (Totally didn't expect that)
|
||||
Y2R, // Also does camera stuff
|
||||
|
@ -61,17 +65,17 @@ namespace KernelHandles {
|
|||
};
|
||||
|
||||
// Returns whether "handle" belongs to one of the OS services
|
||||
static constexpr bool isServiceHandle(Handle handle) {
|
||||
static constexpr bool isServiceHandle(HorizonHandle handle) {
|
||||
return handle >= MinServiceHandle && handle <= MaxServiceHandle;
|
||||
}
|
||||
|
||||
// Returns whether "handle" belongs to one of the OS services' shared memory areas
|
||||
static constexpr bool isSharedMemHandle(Handle handle) {
|
||||
static constexpr bool isSharedMemHandle(HorizonHandle handle) {
|
||||
return handle >= MinSharedMemHandle && handle <= MaxSharedMemHandle;
|
||||
}
|
||||
|
||||
// Returns the name of a handle as a string based on the given handle
|
||||
static const char* getServiceName(Handle handle) {
|
||||
static const char* getServiceName(HorizonHandle handle) {
|
||||
switch (handle) {
|
||||
case AC: return "AC";
|
||||
case ACT: return "ACT";
|
||||
|
@ -82,6 +86,8 @@ namespace KernelHandles {
|
|||
case CECD: return "CECD";
|
||||
case CFG_U: return "CFG:U";
|
||||
case CFG_I: return "CFG:I";
|
||||
case CFG_S: return "CFG:S";
|
||||
case CFG_NOR: return "CFG:NOR";
|
||||
case CSND: return "CSND";
|
||||
case DSP: return "DSP";
|
||||
case DLP_SRVR: return "DLP::SRVR";
|
||||
|
@ -97,13 +103,16 @@ namespace KernelHandles {
|
|||
case MCU_HWC: return "MCU::HWC";
|
||||
case MIC: return "MIC";
|
||||
case NDM: return "NDM";
|
||||
case NEWS_S: return "NEWS_S";
|
||||
case NEWS_U: return "NEWS_U";
|
||||
case NWM_EXT: return "nwm::EXT";
|
||||
case NWM_UDS: return "nwm::UDS";
|
||||
case NFC: return "NFC";
|
||||
case NIM: return "NIM";
|
||||
case PTM_U: return "PTM:U";
|
||||
case PTM_SYSM: return "PTM:SYSM";
|
||||
case PTM_PLAY: return "PTM:PLAY";
|
||||
case PTM_GETS: return "PTM:GETS";
|
||||
case SOC: return "SOC";
|
||||
case SSL: return "SSL";
|
||||
case Y2R: return "Y2R";
|
||||
|
|
|
@ -18,6 +18,8 @@ class CPU;
|
|||
struct Scheduler;
|
||||
|
||||
class Kernel {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
std::span<u32, 16> regs;
|
||||
CPU& cpu;
|
||||
Memory& mem;
|
||||
|
|
|
@ -47,7 +47,7 @@ enum class ProcessorID : s32 {
|
|||
struct AddressArbiter {};
|
||||
|
||||
struct ResourceLimits {
|
||||
Handle handle;
|
||||
HorizonHandle handle;
|
||||
|
||||
s32 currentCommit = 0;
|
||||
};
|
||||
|
@ -91,6 +91,8 @@ struct Port {
|
|||
};
|
||||
|
||||
struct Session {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Handle portHandle; // The port this session is subscribed to
|
||||
Session(Handle portHandle) : portHandle(portHandle) {}
|
||||
};
|
||||
|
@ -109,6 +111,8 @@ enum class ThreadStatus {
|
|||
};
|
||||
|
||||
struct Thread {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
u32 initialSP; // Initial r13 value
|
||||
u32 entrypoint; // Initial r15 value
|
||||
u32 priority;
|
||||
|
@ -161,6 +165,8 @@ static const char* kernelObjectTypeToString(KernelObjectType t) {
|
|||
}
|
||||
|
||||
struct Mutex {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
u64 waitlist; // Refer to the getWaitlist function below for documentation
|
||||
Handle ownerThread = 0; // Index of the thread that holds the mutex if it's locked
|
||||
Handle handle; // Handle of the mutex itself
|
||||
|
@ -203,6 +209,8 @@ struct MemoryBlock {
|
|||
|
||||
// Generic kernel object class
|
||||
struct KernelObject {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Handle handle = 0; // A u32 the OS will use to identify objects
|
||||
void* data = nullptr;
|
||||
KernelObjectType type;
|
||||
|
|
|
@ -50,6 +50,7 @@ struct NCCH {
|
|||
|
||||
static constexpr u64 mediaUnit = 0x200;
|
||||
u64 size = 0; // Size of NCCH converted to bytes
|
||||
u64 saveDataSize = 0;
|
||||
u32 stackSize = 0;
|
||||
u32 bssSize = 0;
|
||||
u32 exheaderSize = 0;
|
||||
|
@ -64,8 +65,6 @@ struct NCCH {
|
|||
|
||||
// Contents of the .code file in the ExeFS
|
||||
std::vector<u8> codeFile;
|
||||
// Contains of the cart's save data
|
||||
std::vector<u8> saveData;
|
||||
// The cart region. Only the CXI's region matters to us. Necessary to get past region locking
|
||||
std::optional<Regions> region = std::nullopt;
|
||||
std::vector<u8> smdh;
|
||||
|
@ -78,7 +77,7 @@ struct NCCH {
|
|||
bool hasExeFS() { return exeFS.size != 0; }
|
||||
bool hasRomFS() { return romFS.size != 0; }
|
||||
bool hasCode() { return codeFile.size() != 0; }
|
||||
bool hasSaveData() { return saveData.size() != 0; }
|
||||
bool hasSaveData() { return saveDataSize != 0; }
|
||||
|
||||
// Parse SMDH for region info and such. Returns false on failure, true on success
|
||||
bool parseSMDH(const std::vector<u8> &smdh);
|
||||
|
|
|
@ -65,6 +65,7 @@ namespace Log {
|
|||
static Logger<false> nwmUdsLogger;
|
||||
static Logger<false> nimLogger;
|
||||
static Logger<false> ndmLogger;
|
||||
static Logger<false> nsLogger;
|
||||
static Logger<false> ptmLogger;
|
||||
static Logger<false> socLogger;
|
||||
static Logger<false> sslLogger;
|
||||
|
|
|
@ -102,6 +102,8 @@ namespace KernelMemoryTypes {
|
|||
}
|
||||
|
||||
class Memory {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
u8* fcram;
|
||||
u8* dspRam; // Provided to us by Audio
|
||||
u8* vram; // Provided to the memory class by the GPU class
|
||||
|
@ -213,8 +215,14 @@ private:
|
|||
}
|
||||
|
||||
enum class BatteryLevel {
|
||||
Empty = 0, AlmostEmpty, OneBar, TwoBars, ThreeBars, FourBars
|
||||
Empty = 0,
|
||||
AlmostEmpty,
|
||||
OneBar,
|
||||
TwoBars,
|
||||
ThreeBars,
|
||||
FourBars,
|
||||
};
|
||||
|
||||
u8 getBatteryState(bool adapterConnected, bool charging, BatteryLevel batteryLevel) {
|
||||
u8 value = static_cast<u8>(batteryLevel) << 2; // Bits 2:4 are the battery level from 0 to 5
|
||||
if (adapterConnected) value |= 1 << 0; // Bit 0 shows if the charger is connected
|
||||
|
@ -290,5 +298,5 @@ private:
|
|||
|
||||
bool allocateMainThreadStack(u32 size);
|
||||
Regions getConsoleRegion();
|
||||
void copySharedFont(u8* ptr);
|
||||
void copySharedFont(u8* ptr, u32 vaddr);
|
||||
};
|
||||
|
|
|
@ -1,6 +1,13 @@
|
|||
#pragma once
|
||||
|
||||
#include <QAction>
|
||||
#include <QCheckBox>
|
||||
#include <QDialog>
|
||||
#include <QLabel>
|
||||
#include <QLineEdit>
|
||||
#include <QListWidget>
|
||||
#include <QPushButton>
|
||||
#include <QTextEdit>
|
||||
#include <QWidget>
|
||||
#include <filesystem>
|
||||
#include <memory>
|
||||
|
@ -24,3 +31,60 @@ class CheatsWindow final : public QWidget {
|
|||
std::filesystem::path cheatPath;
|
||||
Emulator* emu;
|
||||
};
|
||||
|
||||
struct CheatMetadata {
|
||||
u32 handle = Cheats::badCheatHandle;
|
||||
std::string name = "New cheat";
|
||||
std::string code;
|
||||
bool enabled = true;
|
||||
};
|
||||
|
||||
class CheatEntryWidget : public QWidget {
|
||||
Q_OBJECT
|
||||
|
||||
public:
|
||||
CheatEntryWidget(Emulator* emu, CheatMetadata metadata, QListWidget* parent);
|
||||
|
||||
void Update() {
|
||||
name->setText(metadata.name.c_str());
|
||||
enabled->setChecked(metadata.enabled);
|
||||
update();
|
||||
}
|
||||
|
||||
void Remove() {
|
||||
emu->getCheats().removeCheat(metadata.handle);
|
||||
cheatList->takeItem(cheatList->row(listItem));
|
||||
deleteLater();
|
||||
}
|
||||
|
||||
const CheatMetadata& getMetadata() { return metadata; }
|
||||
void setMetadata(const CheatMetadata& metadata) { this->metadata = metadata; }
|
||||
|
||||
private:
|
||||
void checkboxChanged(int state);
|
||||
void editClicked();
|
||||
|
||||
Emulator* emu;
|
||||
CheatMetadata metadata;
|
||||
u32 handle;
|
||||
QLabel* name;
|
||||
QCheckBox* enabled;
|
||||
QListWidget* cheatList;
|
||||
QListWidgetItem* listItem;
|
||||
};
|
||||
|
||||
class CheatEditDialog : public QDialog {
|
||||
Q_OBJECT
|
||||
|
||||
public:
|
||||
CheatEditDialog(Emulator* emu, CheatEntryWidget& cheatEntry);
|
||||
|
||||
void accepted();
|
||||
void rejected();
|
||||
|
||||
private:
|
||||
Emulator* emu;
|
||||
CheatEntryWidget& cheatEntry;
|
||||
QTextEdit* codeEdit;
|
||||
QLineEdit* nameEdit;
|
||||
};
|
|
@ -1,30 +1,58 @@
|
|||
#pragma once
|
||||
|
||||
#include <QApplication>
|
||||
#include <QCheckBox>
|
||||
#include <QComboBox>
|
||||
#include <QDialog>
|
||||
#include <QListWidget>
|
||||
#include <QPalette>
|
||||
#include <QStackedWidget>
|
||||
#include <QTextEdit>
|
||||
#include <QWidget>
|
||||
#include <QtWidgets>
|
||||
#include <array>
|
||||
#include <functional>
|
||||
#include <utility>
|
||||
|
||||
#include "emulator.hpp"
|
||||
#include "frontend_settings.hpp"
|
||||
|
||||
class ConfigWindow : public QDialog {
|
||||
Q_OBJECT
|
||||
|
||||
private:
|
||||
enum class Theme : int {
|
||||
System = 0,
|
||||
Light = 1,
|
||||
Dark = 2,
|
||||
GreetingsCat = 3,
|
||||
Cream = 4,
|
||||
};
|
||||
using ConfigCallback = std::function<void()>;
|
||||
using MainWindowCallback = std::function<QWidget*()>;
|
||||
|
||||
Theme currentTheme;
|
||||
QComboBox* themeSelect = nullptr;
|
||||
using Theme = FrontendSettings::Theme;
|
||||
using WindowIcon = FrontendSettings::WindowIcon;
|
||||
|
||||
void setTheme(Theme theme);
|
||||
QTextEdit* helpText = nullptr;
|
||||
QListWidget* widgetList = nullptr;
|
||||
QStackedWidget* widgetContainer = nullptr;
|
||||
|
||||
static constexpr size_t settingWidgetCount = 6;
|
||||
std::array<QString, settingWidgetCount> helpTexts;
|
||||
|
||||
// The config class holds a copy of the emulator config which it edits and sends
|
||||
// over to the emulator in a thread-safe manner
|
||||
EmulatorConfig config;
|
||||
|
||||
ConfigCallback updateConfig;
|
||||
MainWindowCallback getMainWindow;
|
||||
|
||||
void addWidget(QWidget* widget, QString title, QString icon, QString helpText);
|
||||
void setTheme(FrontendSettings::Theme theme);
|
||||
void setIcon(FrontendSettings::WindowIcon icon);
|
||||
|
||||
QComboBox* createLanguageSelect();
|
||||
|
||||
public:
|
||||
ConfigWindow(QWidget* parent = nullptr);
|
||||
ConfigWindow(ConfigCallback configCallback, MainWindowCallback windowCallback, const EmulatorConfig& config, QWidget* parent = nullptr);
|
||||
~ConfigWindow();
|
||||
|
||||
EmulatorConfig& getConfig() { return config; }
|
||||
|
||||
private:
|
||||
Emulator* emu;
|
||||
};
|
||||
|
|
|
@ -50,6 +50,8 @@ class MainWindow : public QMainWindow {
|
|||
PressTouchscreen,
|
||||
ReleaseTouchscreen,
|
||||
ReloadUbershader,
|
||||
SetScreenSize,
|
||||
UpdateConfig,
|
||||
};
|
||||
|
||||
// Tagged union representing our message queue messages
|
||||
|
@ -81,6 +83,11 @@ class MainWindow : public QMainWindow {
|
|||
u16 x;
|
||||
u16 y;
|
||||
} touchscreen;
|
||||
|
||||
struct {
|
||||
u32 width;
|
||||
u32 height;
|
||||
} screenSize;
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -95,7 +102,7 @@ class MainWindow : public QMainWindow {
|
|||
|
||||
QMenuBar* menuBar = nullptr;
|
||||
InputMappings keyboardMappings;
|
||||
ScreenWidget screen;
|
||||
ScreenWidget* screen;
|
||||
AboutWindow* aboutWindow;
|
||||
ConfigWindow* configWindow;
|
||||
CheatsWindow* cheatsEditor;
|
||||
|
@ -116,12 +123,15 @@ class MainWindow : public QMainWindow {
|
|||
void showAboutMenu();
|
||||
void initControllers();
|
||||
void pollControllers();
|
||||
void setupControllerSensors(SDL_GameController* controller);
|
||||
void sendMessage(const EmulatorMessage& message);
|
||||
void dispatchMessage(const EmulatorMessage& message);
|
||||
void loadTranslation();
|
||||
|
||||
// Tracks whether we are using an OpenGL-backed renderer or a Vulkan-backed renderer
|
||||
bool usingGL = false;
|
||||
bool usingVk = false;
|
||||
bool usingMtl = false;
|
||||
|
||||
// Variables to keep track of whether the user is controlling the 3DS analog stick with their keyboard
|
||||
// This is done so when a gamepad is connected, we won't automatically override the 3DS analog stick settings with the gamepad's state
|
||||
|
@ -133,12 +143,18 @@ class MainWindow : public QMainWindow {
|
|||
MainWindow(QApplication* app, QWidget* parent = nullptr);
|
||||
~MainWindow();
|
||||
|
||||
void closeEvent(QCloseEvent* event) override;
|
||||
void keyPressEvent(QKeyEvent* event) override;
|
||||
void keyReleaseEvent(QKeyEvent* event) override;
|
||||
|
||||
void mousePressEvent(QMouseEvent* event) override;
|
||||
void mouseReleaseEvent(QMouseEvent* event) override;
|
||||
void mouseMoveEvent(QMouseEvent* event) override;
|
||||
|
||||
void loadLuaScript(const std::string& code);
|
||||
void reloadShader(const std::string& shader);
|
||||
void editCheat(u32 handle, const std::vector<uint8_t>& cheat, const std::function<void(u32)>& callback);
|
||||
|
||||
void handleScreenResize(u32 width, u32 height);
|
||||
void handleTouchscreenPress(QMouseEvent* event);
|
||||
};
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
#pragma once
|
||||
#include <QWidget>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
|
||||
#include "gl/context.h"
|
||||
|
@ -10,15 +11,28 @@ class ScreenWidget : public QWidget {
|
|||
Q_OBJECT
|
||||
|
||||
public:
|
||||
ScreenWidget(QWidget* parent = nullptr);
|
||||
using ResizeCallback = std::function<void(u32, u32)>;
|
||||
|
||||
ScreenWidget(ResizeCallback resizeCallback, QWidget* parent = nullptr);
|
||||
void resizeEvent(QResizeEvent* event) override;
|
||||
// Called by the emulator thread for resizing the actual GL surface, since the emulator thread owns the GL context
|
||||
void resizeSurface(u32 width, u32 height);
|
||||
|
||||
GL::Context* getGLContext() { return glContext.get(); }
|
||||
|
||||
// Dimensions of our output surface
|
||||
u32 surfaceWidth = 0;
|
||||
u32 surfaceHeight = 0;
|
||||
WindowInfo windowInfo;
|
||||
|
||||
// Cached "previous" dimensions, used when resizing our window
|
||||
u32 previousWidth = 0;
|
||||
u32 previousHeight = 0;
|
||||
|
||||
private:
|
||||
std::unique_ptr<GL::Context> glContext = nullptr;
|
||||
ResizeCallback resizeCallback;
|
||||
|
||||
bool createGLContext();
|
||||
|
||||
qreal devicePixelRatioFromScreen() const;
|
||||
|
|
|
@ -23,6 +23,8 @@ class FrontendSDL {
|
|||
SDL_GameController* gameController = nullptr;
|
||||
InputMappings keyboardMappings;
|
||||
|
||||
u32 windowWidth = 400;
|
||||
u32 windowHeight = 480;
|
||||
int gameControllerID;
|
||||
bool programRunning = true;
|
||||
|
||||
|
@ -35,4 +37,6 @@ class FrontendSDL {
|
|||
// And so the user can still use the keyboard to control the analog
|
||||
bool keyboardAnalogX = false;
|
||||
bool keyboardAnalogY = false;
|
||||
|
||||
void setupControllerSensors(SDL_GameController* controller);
|
||||
};
|
73
include/renderdoc.hpp
Normal file
73
include/renderdoc.hpp
Normal file
|
@ -0,0 +1,73 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
#include <string>
|
||||
|
||||
#include "helpers.hpp"
|
||||
|
||||
#ifdef PANDA3DS_ENABLE_RENDERDOC
|
||||
namespace Renderdoc {
|
||||
// Loads renderdoc dynamic library module.
|
||||
void loadRenderdoc();
|
||||
|
||||
// Begins a capture if a renderdoc instance is attached.
|
||||
void startCapture();
|
||||
|
||||
// Ends current renderdoc capture.
|
||||
void endCapture();
|
||||
|
||||
// Triggers capturing process.
|
||||
void triggerCapture();
|
||||
|
||||
// Sets output directory for captures
|
||||
void setOutputDir(const std::string& path, const std::string& prefix);
|
||||
|
||||
// Returns whether Renderdoc has been loaded
|
||||
bool isLoaded();
|
||||
|
||||
// Returns whether we've compiled with Renderdoc support
|
||||
static constexpr bool isSupported() { return true; }
|
||||
} // namespace Renderdoc
|
||||
#else
|
||||
namespace Renderdoc {
|
||||
static void loadRenderdoc() {}
|
||||
static void startCapture() { Helpers::panic("Tried to start a Renderdoc capture while support for renderdoc is disabled"); }
|
||||
static void endCapture() { Helpers::panic("Tried to end a Renderdoc capture while support for renderdoc is disabled"); }
|
||||
static void triggerCapture() { Helpers::panic("Tried to trigger a Renderdoc capture while support for renderdoc is disabled"); }
|
||||
static void setOutputDir(const std::string& path, const std::string& prefix) {}
|
||||
static constexpr bool isSupported() { return false; }
|
||||
static constexpr bool isLoaded() { return false; }
|
||||
} // namespace Renderdoc
|
||||
#endif
|
||||
|
||||
namespace Renderdoc {
|
||||
// RAII scope class that encloses a Renderdoc capture, as long as it's triggered by triggerCapture
|
||||
struct Scope {
|
||||
Scope() { Renderdoc::startCapture(); }
|
||||
~Scope() { Renderdoc::endCapture(); }
|
||||
|
||||
Scope(const Scope&) = delete;
|
||||
Scope& operator=(const Scope&) = delete;
|
||||
|
||||
Scope(Scope&&) = delete;
|
||||
Scope& operator=(const Scope&&) = delete;
|
||||
};
|
||||
|
||||
// RAII scope class that encloses a Renderdoc capture. Unlike regular Scope it doesn't wait for a trigger, it will always issue the capture
|
||||
// trigger on its own and take a capture
|
||||
struct InstantScope {
|
||||
InstantScope() {
|
||||
Renderdoc::triggerCapture();
|
||||
Renderdoc::startCapture();
|
||||
}
|
||||
|
||||
~InstantScope() { Renderdoc::endCapture(); }
|
||||
|
||||
InstantScope(const InstantScope&) = delete;
|
||||
InstantScope& operator=(const InstantScope&) = delete;
|
||||
|
||||
InstantScope(InstantScope&&) = delete;
|
||||
InstantScope& operator=(const InstantScope&&) = delete;
|
||||
};
|
||||
} // namespace Renderdoc
|
|
@ -1,9 +1,10 @@
|
|||
#pragma once
|
||||
#include <array>
|
||||
#include <optional>
|
||||
#include <span>
|
||||
#include <string>
|
||||
#include <optional>
|
||||
|
||||
#include "PICA/draw_acceleration.hpp"
|
||||
#include "PICA/pica_vertex.hpp"
|
||||
#include "PICA/regs.hpp"
|
||||
#include "helpers.hpp"
|
||||
|
@ -17,12 +18,16 @@ enum class RendererType : s8 {
|
|||
Null = 0,
|
||||
OpenGL = 1,
|
||||
Vulkan = 2,
|
||||
Software = 3,
|
||||
Metal = 3,
|
||||
Software = 4,
|
||||
};
|
||||
|
||||
class GPU;
|
||||
struct EmulatorConfig;
|
||||
struct SDL_Window;
|
||||
|
||||
class GPU;
|
||||
class ShaderUnit;
|
||||
|
||||
class Renderer {
|
||||
protected:
|
||||
GPU& gpu;
|
||||
|
@ -46,6 +51,8 @@ class Renderer {
|
|||
u32 outputWindowWidth = 400;
|
||||
u32 outputWindowHeight = 240 * 2;
|
||||
|
||||
EmulatorConfig* emulatorConfig = nullptr;
|
||||
|
||||
public:
|
||||
Renderer(GPU& gpu, const std::array<u32, regNum>& internalRegs, const std::array<u32, extRegNum>& externalRegs);
|
||||
virtual ~Renderer();
|
||||
|
@ -74,6 +81,16 @@ class Renderer {
|
|||
virtual std::string getUbershader() { return ""; }
|
||||
virtual void setUbershader(const std::string& shader) {}
|
||||
|
||||
// Only relevant for OpenGL renderer and other OpenGL-based backends (eg software)
|
||||
// Called to notify the core to use OpenGL ES and not desktop GL
|
||||
virtual void setupGLES() {}
|
||||
|
||||
// This function is called on every draw call before parsing vertex data.
|
||||
// It is responsible for things like looking up which vertex/fragment shaders to use, recompiling them if they don't exist, choosing between
|
||||
// ubershaders and shadergen, and so on.
|
||||
// Returns whether this draw is eligible for using hardware-accelerated shaders or if shaders should run on the CPU
|
||||
virtual bool prepareForDraw(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel) { return false; }
|
||||
|
||||
// Functions for initializing the graphics context for the Qt frontend, where we don't have the convenience of SDL_Window
|
||||
#ifdef PANDA3DS_FRONTEND_QT
|
||||
virtual void initGraphicsContext(GL::Context* context) { Helpers::panic("Tried to initialize incompatible renderer with GL context"); }
|
||||
|
@ -99,4 +116,6 @@ class Renderer {
|
|||
outputWindowWidth = width;
|
||||
outputWindowHeight = height;
|
||||
}
|
||||
|
||||
void setConfig(EmulatorConfig* config) { emulatorConfig = config; }
|
||||
};
|
||||
|
|
13
include/renderer_gl/gl_driver.hpp
Normal file
13
include/renderer_gl/gl_driver.hpp
Normal file
|
@ -0,0 +1,13 @@
|
|||
#pragma once
|
||||
|
||||
// Information about our OpenGL/OpenGL ES driver that we should keep track of
|
||||
// Stuff like whether specific extensions are supported, and potentially things like OpenGL context information
|
||||
namespace OpenGL {
|
||||
struct Driver {
|
||||
bool usingGLES = false;
|
||||
bool supportsExtFbFetch = false;
|
||||
bool supportsArmFbFetch = false;
|
||||
|
||||
bool supportFbFetch() const { return supportsExtFbFetch || supportsArmFbFetch; }
|
||||
};
|
||||
} // namespace OpenGL
|
|
@ -38,11 +38,14 @@ struct GLStateManager {
|
|||
|
||||
GLuint stencilMask;
|
||||
GLuint boundVAO;
|
||||
GLuint boundVBO;
|
||||
GLuint currentProgram;
|
||||
GLuint boundUBO;
|
||||
|
||||
GLenum depthFunc;
|
||||
GLenum logicOp;
|
||||
GLenum blendEquationRGB, blendEquationAlpha;
|
||||
GLenum blendFuncSourceRGB, blendFuncSourceAlpha;
|
||||
GLenum blendFuncDestRGB, blendFuncDestAlpha;
|
||||
|
||||
void reset();
|
||||
void resetBlend();
|
||||
|
@ -51,7 +54,7 @@ struct GLStateManager {
|
|||
void resetColourMask();
|
||||
void resetDepth();
|
||||
void resetVAO();
|
||||
void resetVBO();
|
||||
void resetBuffers();
|
||||
void resetProgram();
|
||||
void resetScissor();
|
||||
void resetStencil();
|
||||
|
@ -169,13 +172,6 @@ struct GLStateManager {
|
|||
}
|
||||
}
|
||||
|
||||
void bindVBO(GLuint handle) {
|
||||
if (boundVBO != handle) {
|
||||
boundVBO = handle;
|
||||
glBindBuffer(GL_ARRAY_BUFFER, handle);
|
||||
}
|
||||
}
|
||||
|
||||
void useProgram(GLuint handle) {
|
||||
if (currentProgram != handle) {
|
||||
currentProgram = handle;
|
||||
|
@ -183,8 +179,14 @@ struct GLStateManager {
|
|||
}
|
||||
}
|
||||
|
||||
void bindUBO(GLuint handle) {
|
||||
if (boundUBO != handle) {
|
||||
boundUBO = handle;
|
||||
glBindBuffer(GL_UNIFORM_BUFFER, boundUBO);
|
||||
}
|
||||
}
|
||||
|
||||
void bindVAO(const OpenGL::VertexArray& vao) { bindVAO(vao.handle()); }
|
||||
void bindVBO(const OpenGL::VertexBuffer& vbo) { bindVBO(vbo.handle()); }
|
||||
void useProgram(const OpenGL::Program& program) { useProgram(program.handle()); }
|
||||
|
||||
void setColourMask(bool r, bool g, bool b, bool a) {
|
||||
|
@ -224,6 +226,41 @@ struct GLStateManager {
|
|||
}
|
||||
|
||||
void setDepthFunc(OpenGL::DepthFunc func) { setDepthFunc(static_cast<GLenum>(func)); }
|
||||
|
||||
// Counterpart to glBlendEquationSeparate
|
||||
void setBlendEquation(GLenum modeRGB, GLenum modeAlpha) {
|
||||
if (blendEquationRGB != modeRGB || blendEquationAlpha != modeAlpha) {
|
||||
blendEquationRGB = modeRGB;
|
||||
blendEquationAlpha = modeAlpha;
|
||||
|
||||
glBlendEquationSeparate(modeRGB, modeAlpha);
|
||||
}
|
||||
}
|
||||
|
||||
// Counterpart to glBlendFuncSeparate
|
||||
void setBlendFunc(GLenum sourceRGB, GLenum destRGB, GLenum sourceAlpha, GLenum destAlpha) {
|
||||
if (blendFuncSourceRGB != sourceRGB || blendFuncDestRGB != destRGB || blendFuncSourceAlpha != sourceAlpha ||
|
||||
blendFuncDestAlpha != destAlpha) {
|
||||
|
||||
blendFuncSourceRGB = sourceRGB;
|
||||
blendFuncDestRGB = destRGB;
|
||||
blendFuncSourceAlpha = sourceAlpha;
|
||||
blendFuncDestAlpha = destAlpha;
|
||||
|
||||
glBlendFuncSeparate(sourceRGB, destRGB,sourceAlpha, destAlpha);
|
||||
}
|
||||
}
|
||||
|
||||
// Counterpart to regular glBlendEquation
|
||||
void setBlendEquation(GLenum mode) { setBlendEquation(mode, mode); }
|
||||
|
||||
void setBlendEquation(OpenGL::BlendEquation modeRGB, OpenGL::BlendEquation modeAlpha) {
|
||||
setBlendEquation(static_cast<GLenum>(modeRGB), static_cast<GLenum>(modeAlpha));
|
||||
}
|
||||
|
||||
void setBlendEquation(OpenGL::BlendEquation mode) {
|
||||
setBlendEquation(static_cast<GLenum>(mode));
|
||||
}
|
||||
};
|
||||
|
||||
static_assert(std::is_trivially_constructible<GLStateManager>(), "OpenGL State Manager class is not trivially constructible!");
|
||||
|
|
|
@ -1,11 +1,23 @@
|
|||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <span>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
|
||||
#include "PICA/float_types.hpp"
|
||||
#include "PICA/pica_frag_config.hpp"
|
||||
#include "PICA/pica_hash.hpp"
|
||||
#include "PICA/pica_vert_config.hpp"
|
||||
#include "PICA/pica_vertex.hpp"
|
||||
#include "PICA/regs.hpp"
|
||||
#include "PICA/shader_gen.hpp"
|
||||
#include "gl/stream_buffer.h"
|
||||
#include "gl_driver.hpp"
|
||||
#include "gl_state.hpp"
|
||||
#include "helpers.hpp"
|
||||
#include "logger.hpp"
|
||||
|
@ -22,27 +34,48 @@ class RendererGL final : public Renderer {
|
|||
OpenGL::Program triangleProgram;
|
||||
OpenGL::Program displayProgram;
|
||||
|
||||
OpenGL::VertexArray vao;
|
||||
// VAO for when not using accelerated vertex shaders. Contains attribute declarations matching to the PICA fixed function fragment attributes
|
||||
OpenGL::VertexArray defaultVAO;
|
||||
// VAO for when using accelerated vertex shaders. The PICA vertex shader inputs are passed as attributes without CPU processing.
|
||||
OpenGL::VertexArray hwShaderVAO;
|
||||
OpenGL::VertexBuffer vbo;
|
||||
|
||||
// TEV configuration uniform locations
|
||||
GLint textureEnvSourceLoc = -1;
|
||||
GLint textureEnvOperandLoc = -1;
|
||||
GLint textureEnvCombinerLoc = -1;
|
||||
GLint textureEnvColorLoc = -1;
|
||||
GLint textureEnvScaleLoc = -1;
|
||||
// Data
|
||||
struct {
|
||||
// TEV configuration uniform locations
|
||||
GLint textureEnvSourceLoc = -1;
|
||||
GLint textureEnvOperandLoc = -1;
|
||||
GLint textureEnvCombinerLoc = -1;
|
||||
GLint textureEnvColorLoc = -1;
|
||||
GLint textureEnvScaleLoc = -1;
|
||||
|
||||
// Uniform of PICA registers
|
||||
GLint picaRegLoc = -1;
|
||||
// Uniform of PICA registers
|
||||
GLint picaRegLoc = -1;
|
||||
|
||||
// Depth configuration uniform locations
|
||||
GLint depthOffsetLoc = -1;
|
||||
GLint depthScaleLoc = -1;
|
||||
GLint depthmapEnableLoc = -1;
|
||||
// Depth configuration uniform locations
|
||||
GLint depthOffsetLoc = -1;
|
||||
GLint depthScaleLoc = -1;
|
||||
GLint depthmapEnableLoc = -1;
|
||||
} ubershaderData;
|
||||
|
||||
float oldDepthScale = -1.0;
|
||||
float oldDepthOffset = 0.0;
|
||||
bool oldDepthmapEnable = false;
|
||||
// Set by prepareForDraw, tells us whether the current draw is using hw-accelerated shader
|
||||
bool usingAcceleratedShader = false;
|
||||
bool performIndexedRender = false;
|
||||
bool usingShortIndices = false;
|
||||
|
||||
// Set by prepareForDraw, metadata for indexed renders
|
||||
GLuint minimumIndex = 0;
|
||||
GLuint maximumIndex = 0;
|
||||
void* hwIndexBufferOffset = nullptr;
|
||||
|
||||
// When doing hw shaders, we cache which attributes are enabled in our VAO to avoid having to enable/disable all attributes on each draw
|
||||
u32 previousAttributeMask = 0;
|
||||
|
||||
// Cached pointer to the current vertex shader when using HW accelerated shaders
|
||||
OpenGL::Shader* generatedVertexShader = nullptr;
|
||||
|
||||
SurfaceCache<DepthBuffer, 16, true> depthBufferCache;
|
||||
SurfaceCache<ColourBuffer, 16, true> colourBufferCache;
|
||||
|
@ -53,25 +86,82 @@ class RendererGL final : public Renderer {
|
|||
OpenGL::VertexBuffer dummyVBO;
|
||||
|
||||
OpenGL::Texture screenTexture;
|
||||
GLuint lightLUTTextureArray;
|
||||
OpenGL::Texture LUTTexture;
|
||||
OpenGL::Framebuffer screenFramebuffer;
|
||||
OpenGL::Texture blankTexture;
|
||||
// The "default" vertex shader to use when using specialized shaders but not PICA vertex shader -> GLSL recompilation
|
||||
// We can compile this once and then link it with all other generated fragment shaders
|
||||
OpenGL::Shader defaultShadergenVs;
|
||||
GLuint shadergenFragmentUBO;
|
||||
// UBO for uploading the PICA uniforms when using hw shaders
|
||||
GLuint hwShaderUniformUBO;
|
||||
|
||||
using StreamBuffer = OpenGLStreamBuffer;
|
||||
std::unique_ptr<StreamBuffer> hwVertexBuffer;
|
||||
std::unique_ptr<StreamBuffer> hwIndexBuffer;
|
||||
|
||||
// Cache of fixed attribute values so that we don't do any duplicate updates
|
||||
std::array<std::array<float, 4>, 16> fixedAttrValues;
|
||||
|
||||
// Cached recompiled fragment shader
|
||||
struct CachedProgram {
|
||||
OpenGL::Program program;
|
||||
};
|
||||
|
||||
struct ShaderCache {
|
||||
std::unordered_map<PICA::VertConfig, std::optional<OpenGL::Shader>> vertexShaderCache;
|
||||
std::unordered_map<PICA::FragmentConfig, OpenGL::Shader> fragmentShaderCache;
|
||||
|
||||
// Program cache indexed by GLuints for the vertex and fragment shader to use
|
||||
// Top 32 bits are the vertex shader GLuint, bottom 32 bits are the fs GLuint
|
||||
std::unordered_map<u64, CachedProgram> programCache;
|
||||
|
||||
void clear() {
|
||||
for (auto& it : programCache) {
|
||||
CachedProgram& cachedProgram = it.second;
|
||||
cachedProgram.program.free();
|
||||
}
|
||||
|
||||
for (auto& it : vertexShaderCache) {
|
||||
if (it.second.has_value()) {
|
||||
it.second->free();
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& it : fragmentShaderCache) {
|
||||
it.second.free();
|
||||
}
|
||||
|
||||
programCache.clear();
|
||||
vertexShaderCache.clear();
|
||||
fragmentShaderCache.clear();
|
||||
}
|
||||
};
|
||||
ShaderCache shaderCache;
|
||||
|
||||
OpenGL::Framebuffer getColourFBO();
|
||||
OpenGL::Texture getTexture(Texture& tex);
|
||||
OpenGL::Program& getSpecializedShader();
|
||||
|
||||
PICA::ShaderGen::FragmentGenerator fragShaderGen;
|
||||
OpenGL::Driver driverInfo;
|
||||
|
||||
MAKE_LOG_FUNCTION(log, rendererLogger)
|
||||
void setupBlending();
|
||||
void setupStencilTest(bool stencilEnable);
|
||||
void bindDepthBuffer();
|
||||
void setupTextureEnvState();
|
||||
void setupUbershaderTexEnv();
|
||||
void bindTexturesToSlots();
|
||||
void updateLightingLUT();
|
||||
void updateFogLUT();
|
||||
void initGraphicsContextInternal();
|
||||
|
||||
void accelerateVertexUpload(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel);
|
||||
void compileDisplayShader();
|
||||
|
||||
public:
|
||||
RendererGL(GPU& gpu, const std::array<u32, regNum>& internalRegs, const std::array<u32, extRegNum>& externalRegs)
|
||||
: Renderer(gpu, internalRegs, externalRegs) {}
|
||||
: Renderer(gpu, internalRegs, externalRegs), fragShaderGen(PICA::ShaderGen::API::GL, PICA::ShaderGen::Language::GLSL) {}
|
||||
~RendererGL() override;
|
||||
|
||||
void reset() override;
|
||||
|
@ -80,12 +170,14 @@ class RendererGL final : public Renderer {
|
|||
void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override; // Clear a GPU buffer in VRAM
|
||||
void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override; // Perform display transfer
|
||||
void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override;
|
||||
void drawVertices(PICA::PrimType primType, std::span<const PICA::Vertex> vertices) override; // Draw the given vertices
|
||||
void drawVertices(PICA::PrimType primType, std::span<const PICA::Vertex> vertices) override; // Draw the given vertices
|
||||
void deinitGraphicsContext() override;
|
||||
|
||||
virtual bool supportsShaderReload() override { return true; }
|
||||
virtual std::string getUbershader() override;
|
||||
virtual void setUbershader(const std::string& shader) override;
|
||||
virtual bool prepareForDraw(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel) override;
|
||||
virtual void setupGLES() override;
|
||||
|
||||
std::optional<ColourBuffer> getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true);
|
||||
|
||||
|
@ -100,4 +192,4 @@ class RendererGL final : public Renderer {
|
|||
|
||||
// Take a screenshot of the screen and store it in a file
|
||||
void screenshot(const std::string& name) override;
|
||||
};
|
||||
};
|
|
@ -19,8 +19,6 @@ template <typename SurfaceType, size_t capacity, bool evictOnOverflow = false>
|
|||
class SurfaceCache {
|
||||
// Vanilla std::optional can't hold actual references
|
||||
using OptionalRef = std::optional<std::reference_wrapper<SurfaceType>>;
|
||||
static_assert(std::is_same<SurfaceType, ColourBuffer>() || std::is_same<SurfaceType, DepthBuffer>() ||
|
||||
std::is_same<SurfaceType, Texture>(), "Invalid surface type");
|
||||
|
||||
size_t size;
|
||||
size_t evictionIndex;
|
||||
|
|
74
include/renderer_mtl/mtl_blit_pipeline_cache.hpp
Normal file
74
include/renderer_mtl/mtl_blit_pipeline_cache.hpp
Normal file
|
@ -0,0 +1,74 @@
|
|||
#pragma once
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "objc_helper.hpp"
|
||||
#include "pica_to_mtl.hpp"
|
||||
|
||||
using namespace PICA;
|
||||
|
||||
namespace Metal {
|
||||
struct BlitPipelineHash {
|
||||
// Formats
|
||||
ColorFmt colorFmt;
|
||||
DepthFmt depthFmt;
|
||||
};
|
||||
|
||||
// This pipeline only caches the pipeline with all of its color and depth attachment variations
|
||||
class BlitPipelineCache {
|
||||
public:
|
||||
BlitPipelineCache() = default;
|
||||
|
||||
~BlitPipelineCache() {
|
||||
reset();
|
||||
vertexFunction->release();
|
||||
fragmentFunction->release();
|
||||
}
|
||||
|
||||
void set(MTL::Device* dev, MTL::Function* vert, MTL::Function* frag) {
|
||||
device = dev;
|
||||
vertexFunction = vert;
|
||||
fragmentFunction = frag;
|
||||
}
|
||||
|
||||
MTL::RenderPipelineState* get(BlitPipelineHash hash) {
|
||||
u8 intHash = ((u8)hash.colorFmt << 3) | (u8)hash.depthFmt;
|
||||
auto& pipeline = pipelineCache[intHash];
|
||||
if (!pipeline) {
|
||||
MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init();
|
||||
desc->setVertexFunction(vertexFunction);
|
||||
desc->setFragmentFunction(fragmentFunction);
|
||||
|
||||
auto colorAttachment = desc->colorAttachments()->object(0);
|
||||
colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt));
|
||||
|
||||
desc->setDepthAttachmentPixelFormat(toMTLPixelFormatDepth(hash.depthFmt));
|
||||
|
||||
NS::Error* error = nullptr;
|
||||
desc->setLabel(toNSString("Blit pipeline"));
|
||||
pipeline = device->newRenderPipelineState(desc, &error);
|
||||
if (error) {
|
||||
Helpers::panic("Error creating blit pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding));
|
||||
}
|
||||
|
||||
desc->release();
|
||||
}
|
||||
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
void reset() {
|
||||
for (auto& pair : pipelineCache) {
|
||||
pair.second->release();
|
||||
}
|
||||
pipelineCache.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
std::map<u8, MTL::RenderPipelineState*> pipelineCache;
|
||||
|
||||
MTL::Device* device;
|
||||
MTL::Function* vertexFunction;
|
||||
MTL::Function* fragmentFunction;
|
||||
};
|
||||
} // namespace Metal
|
56
include/renderer_mtl/mtl_command_encoder.hpp
Normal file
56
include/renderer_mtl/mtl_command_encoder.hpp
Normal file
|
@ -0,0 +1,56 @@
|
|||
#pragma once
|
||||
|
||||
#include <Metal/Metal.hpp>
|
||||
|
||||
namespace Metal {
|
||||
struct RenderState {
|
||||
MTL::RenderPipelineState* renderPipelineState = nullptr;
|
||||
MTL::DepthStencilState* depthStencilState = nullptr;
|
||||
MTL::Texture* textures[3] = {nullptr};
|
||||
MTL::SamplerState* samplerStates[3] = {nullptr};
|
||||
};
|
||||
|
||||
class CommandEncoder {
|
||||
public:
|
||||
void newRenderCommandEncoder(MTL::RenderCommandEncoder* rce) {
|
||||
renderCommandEncoder = rce;
|
||||
|
||||
// Reset the render state
|
||||
renderState = RenderState{};
|
||||
}
|
||||
|
||||
// Resource binding
|
||||
void setRenderPipelineState(MTL::RenderPipelineState* renderPipelineState) {
|
||||
if (renderPipelineState != renderState.renderPipelineState) {
|
||||
renderCommandEncoder->setRenderPipelineState(renderPipelineState);
|
||||
renderState.renderPipelineState = renderPipelineState;
|
||||
}
|
||||
}
|
||||
|
||||
void setDepthStencilState(MTL::DepthStencilState* depthStencilState) {
|
||||
if (depthStencilState != renderState.depthStencilState) {
|
||||
renderCommandEncoder->setDepthStencilState(depthStencilState);
|
||||
renderState.depthStencilState = depthStencilState;
|
||||
}
|
||||
}
|
||||
|
||||
void setFragmentTexture(MTL::Texture* texture, u32 index) {
|
||||
if (texture != renderState.textures[index]) {
|
||||
renderCommandEncoder->setFragmentTexture(texture, index);
|
||||
renderState.textures[index] = texture;
|
||||
}
|
||||
}
|
||||
|
||||
void setFragmentSamplerState(MTL::SamplerState* samplerState, u32 index) {
|
||||
if (samplerState != renderState.samplerStates[index]) {
|
||||
renderCommandEncoder->setFragmentSamplerState(samplerState, index);
|
||||
renderState.samplerStates[index] = samplerState;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
MTL::RenderCommandEncoder* renderCommandEncoder = nullptr;
|
||||
|
||||
RenderState renderState;
|
||||
};
|
||||
} // namespace Metal
|
6
include/renderer_mtl/mtl_common.hpp
Normal file
6
include/renderer_mtl/mtl_common.hpp
Normal file
|
@ -0,0 +1,6 @@
|
|||
#pragma once
|
||||
|
||||
#include <Metal/Metal.hpp>
|
||||
|
||||
#define GET_HELPER_TEXTURE_BINDING(binding) (30 - binding)
|
||||
#define GET_HELPER_SAMPLER_STATE_BINDING(binding) (15 - binding)
|
80
include/renderer_mtl/mtl_depth_stencil_cache.hpp
Normal file
80
include/renderer_mtl/mtl_depth_stencil_cache.hpp
Normal file
|
@ -0,0 +1,80 @@
|
|||
#pragma once
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "pica_to_mtl.hpp"
|
||||
|
||||
using namespace PICA;
|
||||
|
||||
namespace Metal {
|
||||
struct DepthStencilHash {
|
||||
u32 stencilConfig;
|
||||
u16 stencilOpConfig;
|
||||
bool depthStencilWrite;
|
||||
u8 depthFunc;
|
||||
};
|
||||
|
||||
class DepthStencilCache {
|
||||
public:
|
||||
DepthStencilCache() = default;
|
||||
|
||||
~DepthStencilCache() { reset(); }
|
||||
|
||||
void set(MTL::Device* dev) { device = dev; }
|
||||
|
||||
MTL::DepthStencilState* get(DepthStencilHash hash) {
|
||||
u64 intHash =
|
||||
((u64)hash.depthStencilWrite << 56) | ((u64)hash.depthFunc << 48) | ((u64)hash.stencilConfig << 16) | (u64)hash.stencilOpConfig;
|
||||
auto& depthStencilState = depthStencilCache[intHash];
|
||||
if (!depthStencilState) {
|
||||
MTL::DepthStencilDescriptor* desc = MTL::DepthStencilDescriptor::alloc()->init();
|
||||
desc->setDepthWriteEnabled(hash.depthStencilWrite);
|
||||
desc->setDepthCompareFunction(toMTLCompareFunc(hash.depthFunc));
|
||||
|
||||
const bool stencilEnable = Helpers::getBit<0>(hash.stencilConfig);
|
||||
MTL::StencilDescriptor* stencilDesc = nullptr;
|
||||
if (stencilEnable) {
|
||||
const u8 stencilFunc = Helpers::getBits<4, 3>(hash.stencilConfig);
|
||||
const u8 stencilRefMask = Helpers::getBits<24, 8>(hash.stencilConfig);
|
||||
|
||||
const u32 stencilBufferMask = hash.depthStencilWrite ? Helpers::getBits<8, 8>(hash.stencilConfig) : 0;
|
||||
|
||||
const u8 stencilFailOp = Helpers::getBits<0, 3>(hash.stencilOpConfig);
|
||||
const u8 depthFailOp = Helpers::getBits<4, 3>(hash.stencilOpConfig);
|
||||
const u8 passOp = Helpers::getBits<8, 3>(hash.stencilOpConfig);
|
||||
|
||||
stencilDesc = MTL::StencilDescriptor::alloc()->init();
|
||||
stencilDesc->setStencilFailureOperation(toMTLStencilOperation(stencilFailOp));
|
||||
stencilDesc->setDepthFailureOperation(toMTLStencilOperation(depthFailOp));
|
||||
stencilDesc->setDepthStencilPassOperation(toMTLStencilOperation(passOp));
|
||||
stencilDesc->setStencilCompareFunction(toMTLCompareFunc(stencilFunc));
|
||||
stencilDesc->setReadMask(stencilRefMask);
|
||||
stencilDesc->setWriteMask(stencilBufferMask);
|
||||
|
||||
desc->setFrontFaceStencil(stencilDesc);
|
||||
desc->setBackFaceStencil(stencilDesc);
|
||||
}
|
||||
|
||||
depthStencilState = device->newDepthStencilState(desc);
|
||||
|
||||
desc->release();
|
||||
if (stencilDesc) {
|
||||
stencilDesc->release();
|
||||
}
|
||||
}
|
||||
|
||||
return depthStencilState;
|
||||
}
|
||||
|
||||
void reset() {
|
||||
for (auto& pair : depthStencilCache) {
|
||||
pair.second->release();
|
||||
}
|
||||
depthStencilCache.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
std::map<u64, MTL::DepthStencilState*> depthStencilCache;
|
||||
MTL::Device* device;
|
||||
};
|
||||
} // namespace Metal
|
162
include/renderer_mtl/mtl_draw_pipeline_cache.hpp
Normal file
162
include/renderer_mtl/mtl_draw_pipeline_cache.hpp
Normal file
|
@ -0,0 +1,162 @@
|
|||
#pragma once
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "objc_helper.hpp"
|
||||
#include "pica_to_mtl.hpp"
|
||||
|
||||
using namespace PICA;
|
||||
|
||||
namespace Metal {
|
||||
struct DrawFragmentFunctionHash {
|
||||
u32 lightingConfig1; // 32 bits (TODO: check this)
|
||||
bool lightingEnabled; // 1 bit
|
||||
u8 lightingNumLights; // 3 bits
|
||||
// | ref | func | on |
|
||||
u16 alphaControl; // 12 bits (mask: 11111111 0111 0001)
|
||||
};
|
||||
|
||||
inline bool operator<(const DrawFragmentFunctionHash& l, const DrawFragmentFunctionHash& r) {
|
||||
if (!l.lightingEnabled && r.lightingEnabled) return true;
|
||||
if (l.lightingNumLights < r.lightingNumLights) return true;
|
||||
if (l.lightingConfig1 < r.lightingConfig1) return true;
|
||||
if (l.alphaControl < r.alphaControl) return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
struct DrawPipelineHash { // 56 bits
|
||||
// Formats
|
||||
ColorFmt colorFmt; // 3 bits
|
||||
DepthFmt depthFmt; // 3 bits
|
||||
|
||||
// Blending
|
||||
bool blendEnabled; // 1 bit
|
||||
// | functions | aeq | ceq |
|
||||
u32 blendControl; // 22 bits (mask: 1111111111111111 00000111 00000111)
|
||||
u8 colorWriteMask; // 4 bits
|
||||
|
||||
DrawFragmentFunctionHash fragHash;
|
||||
};
|
||||
|
||||
inline bool operator<(const DrawPipelineHash& l, const DrawPipelineHash& r) {
|
||||
if ((u32)l.colorFmt < (u32)r.colorFmt) return true;
|
||||
if ((u32)l.depthFmt < (u32)r.depthFmt) return true;
|
||||
if (!l.blendEnabled && r.blendEnabled) return true;
|
||||
if (l.blendControl < r.blendControl) return true;
|
||||
if (l.colorWriteMask < r.colorWriteMask) return true;
|
||||
if (l.fragHash < r.fragHash) return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// This pipeline only caches the pipeline with all of its color and depth attachment variations
|
||||
class DrawPipelineCache {
|
||||
public:
|
||||
DrawPipelineCache() = default;
|
||||
|
||||
~DrawPipelineCache() {
|
||||
reset();
|
||||
vertexDescriptor->release();
|
||||
vertexFunction->release();
|
||||
}
|
||||
|
||||
void set(MTL::Device* dev, MTL::Library* lib, MTL::Function* vert, MTL::VertexDescriptor* vertDesc) {
|
||||
device = dev;
|
||||
library = lib;
|
||||
vertexFunction = vert;
|
||||
vertexDescriptor = vertDesc;
|
||||
}
|
||||
|
||||
MTL::RenderPipelineState* get(DrawPipelineHash hash) {
|
||||
auto& pipeline = pipelineCache[hash];
|
||||
|
||||
if (!pipeline) {
|
||||
auto& fragmentFunction = fragmentFunctionCache[hash.fragHash];
|
||||
if (!fragmentFunction) {
|
||||
MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init();
|
||||
constants->setConstantValue(&hash.fragHash.lightingEnabled, MTL::DataTypeBool, NS::UInteger(0));
|
||||
constants->setConstantValue(&hash.fragHash.lightingNumLights, MTL::DataTypeUChar, NS::UInteger(1));
|
||||
constants->setConstantValue(&hash.fragHash.lightingConfig1, MTL::DataTypeUInt, NS::UInteger(2));
|
||||
constants->setConstantValue(&hash.fragHash.alphaControl, MTL::DataTypeUShort, NS::UInteger(3));
|
||||
|
||||
NS::Error* error = nullptr;
|
||||
fragmentFunction = library->newFunction(NS::String::string("fragmentDraw", NS::ASCIIStringEncoding), constants, &error);
|
||||
if (error) {
|
||||
Helpers::panic("Error creating draw fragment function: %s", error->description()->cString(NS::ASCIIStringEncoding));
|
||||
}
|
||||
constants->release();
|
||||
}
|
||||
|
||||
MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init();
|
||||
desc->setVertexFunction(vertexFunction);
|
||||
desc->setFragmentFunction(fragmentFunction);
|
||||
desc->setVertexDescriptor(vertexDescriptor);
|
||||
|
||||
auto colorAttachment = desc->colorAttachments()->object(0);
|
||||
colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt));
|
||||
MTL::ColorWriteMask writeMask = 0;
|
||||
if (hash.colorWriteMask & 0x1) writeMask |= MTL::ColorWriteMaskRed;
|
||||
if (hash.colorWriteMask & 0x2) writeMask |= MTL::ColorWriteMaskGreen;
|
||||
if (hash.colorWriteMask & 0x4) writeMask |= MTL::ColorWriteMaskBlue;
|
||||
if (hash.colorWriteMask & 0x8) writeMask |= MTL::ColorWriteMaskAlpha;
|
||||
colorAttachment->setWriteMask(writeMask);
|
||||
if (hash.blendEnabled) {
|
||||
const u8 rgbEquation = hash.blendControl & 0x7;
|
||||
const u8 alphaEquation = Helpers::getBits<8, 3>(hash.blendControl);
|
||||
|
||||
// Get blending functions
|
||||
const u8 rgbSourceFunc = Helpers::getBits<16, 4>(hash.blendControl);
|
||||
const u8 rgbDestFunc = Helpers::getBits<20, 4>(hash.blendControl);
|
||||
const u8 alphaSourceFunc = Helpers::getBits<24, 4>(hash.blendControl);
|
||||
const u8 alphaDestFunc = Helpers::getBits<28, 4>(hash.blendControl);
|
||||
|
||||
colorAttachment->setBlendingEnabled(true);
|
||||
colorAttachment->setRgbBlendOperation(toMTLBlendOperation(rgbEquation));
|
||||
colorAttachment->setAlphaBlendOperation(toMTLBlendOperation(alphaEquation));
|
||||
colorAttachment->setSourceRGBBlendFactor(toMTLBlendFactor(rgbSourceFunc));
|
||||
colorAttachment->setDestinationRGBBlendFactor(toMTLBlendFactor(rgbDestFunc));
|
||||
colorAttachment->setSourceAlphaBlendFactor(toMTLBlendFactor(alphaSourceFunc));
|
||||
colorAttachment->setDestinationAlphaBlendFactor(toMTLBlendFactor(alphaDestFunc));
|
||||
}
|
||||
|
||||
MTL::PixelFormat depthFormat = toMTLPixelFormatDepth(hash.depthFmt);
|
||||
desc->setDepthAttachmentPixelFormat(depthFormat);
|
||||
if (hash.depthFmt == DepthFmt::Depth24Stencil8) desc->setStencilAttachmentPixelFormat(depthFormat);
|
||||
|
||||
NS::Error* error = nullptr;
|
||||
desc->setLabel(toNSString("Draw pipeline"));
|
||||
pipeline = device->newRenderPipelineState(desc, &error);
|
||||
if (error) {
|
||||
Helpers::panic("Error creating draw pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding));
|
||||
}
|
||||
|
||||
desc->release();
|
||||
}
|
||||
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
void reset() {
|
||||
for (auto& pair : pipelineCache) {
|
||||
pair.second->release();
|
||||
}
|
||||
pipelineCache.clear();
|
||||
|
||||
for (auto& pair : fragmentFunctionCache) {
|
||||
pair.second->release();
|
||||
}
|
||||
fragmentFunctionCache.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
std::map<DrawPipelineHash, MTL::RenderPipelineState*> pipelineCache;
|
||||
std::map<DrawFragmentFunctionHash, MTL::Function*> fragmentFunctionCache;
|
||||
|
||||
MTL::Device* device;
|
||||
MTL::Library* library;
|
||||
MTL::Function* vertexFunction;
|
||||
MTL::VertexDescriptor* vertexDescriptor;
|
||||
};
|
||||
|
||||
} // namespace Metal
|
20
include/renderer_mtl/mtl_lut_texture.hpp
Normal file
20
include/renderer_mtl/mtl_lut_texture.hpp
Normal file
|
@ -0,0 +1,20 @@
|
|||
#pragma once
|
||||
|
||||
#include <Metal/Metal.hpp>
|
||||
|
||||
namespace Metal {
|
||||
|
||||
class LutTexture {
|
||||
public:
|
||||
LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name);
|
||||
~LutTexture();
|
||||
u32 getNextIndex();
|
||||
|
||||
MTL::Texture* getTexture() { return texture; }
|
||||
u32 getCurrentIndex() { return currentIndex; }
|
||||
private:
|
||||
MTL::Texture* texture;
|
||||
u32 currentIndex = 0;
|
||||
};
|
||||
|
||||
} // namespace Metal
|
91
include/renderer_mtl/mtl_render_target.hpp
Normal file
91
include/renderer_mtl/mtl_render_target.hpp
Normal file
|
@ -0,0 +1,91 @@
|
|||
#pragma once
|
||||
#include <Metal/Metal.hpp>
|
||||
#include <array>
|
||||
#include <string>
|
||||
|
||||
#include "boost/icl/interval.hpp"
|
||||
#include "helpers.hpp"
|
||||
#include "math_util.hpp"
|
||||
#include "objc_helper.hpp"
|
||||
#include "opengl.hpp"
|
||||
#include "pica_to_mtl.hpp"
|
||||
|
||||
template <typename T>
|
||||
using Interval = boost::icl::right_open_interval<T>;
|
||||
|
||||
namespace Metal {
|
||||
template <typename Format_t>
|
||||
struct RenderTarget {
|
||||
MTL::Device* device;
|
||||
|
||||
u32 location;
|
||||
Format_t format;
|
||||
OpenGL::uvec2 size;
|
||||
bool valid;
|
||||
|
||||
// Range of VRAM taken up by buffer
|
||||
Interval<u32> range;
|
||||
|
||||
MTL::Texture* texture = nullptr;
|
||||
|
||||
RenderTarget() : valid(false) {}
|
||||
|
||||
RenderTarget(MTL::Device* dev, u32 loc, Format_t format, u32 x, u32 y, bool valid = true)
|
||||
: device(dev), location(loc), format(format), size({x, y}), valid(valid) {
|
||||
u64 endLoc = (u64)loc + sizeInBytes();
|
||||
// Check if start and end are valid here
|
||||
range = Interval<u32>(loc, (u32)endLoc);
|
||||
}
|
||||
|
||||
Math::Rect<u32> getSubRect(u32 inputAddress, u32 width, u32 height) {
|
||||
const u32 startOffset = (inputAddress - location) / sizePerPixel(format);
|
||||
const u32 x0 = (startOffset % (size.x() * 8)) / 8;
|
||||
const u32 y0 = (startOffset / (size.x() * 8)) * 8;
|
||||
return Math::Rect<u32>{x0, size.y() - y0, x0 + width, size.y() - height - y0};
|
||||
}
|
||||
|
||||
// For 2 textures to "match" we only care about their locations, formats, and dimensions to match
|
||||
// For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture
|
||||
bool matches(RenderTarget& other) {
|
||||
return location == other.location && format == other.format && size.x() == other.size.x() && size.y() == other.size.y();
|
||||
}
|
||||
|
||||
void allocate() {
|
||||
MTL::PixelFormat pixelFormat = MTL::PixelFormatInvalid;
|
||||
if (std::is_same<Format_t, PICA::ColorFmt>::value) {
|
||||
pixelFormat = PICA::toMTLPixelFormatColor((PICA::ColorFmt)format);
|
||||
} else if (std::is_same<Format_t, PICA::DepthFmt>::value) {
|
||||
pixelFormat = PICA::toMTLPixelFormatDepth((PICA::DepthFmt)format);
|
||||
} else {
|
||||
panic("Invalid format type");
|
||||
}
|
||||
|
||||
MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init();
|
||||
descriptor->setTextureType(MTL::TextureType2D);
|
||||
descriptor->setPixelFormat(pixelFormat);
|
||||
descriptor->setWidth(size.u());
|
||||
descriptor->setHeight(size.v());
|
||||
descriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead);
|
||||
descriptor->setStorageMode(MTL::StorageModePrivate);
|
||||
texture = device->newTexture(descriptor);
|
||||
texture->setLabel(toNSString(
|
||||
std::string(std::is_same<Format_t, PICA::ColorFmt>::value ? "Color" : "Depth") + " render target " + std::to_string(size.u()) + "x" +
|
||||
std::to_string(size.v())
|
||||
));
|
||||
descriptor->release();
|
||||
}
|
||||
|
||||
void free() {
|
||||
valid = false;
|
||||
|
||||
if (texture) {
|
||||
texture->release();
|
||||
}
|
||||
}
|
||||
|
||||
u64 sizeInBytes() { return (size_t)size.x() * (size_t)size.y() * PICA::sizePerPixel(format); }
|
||||
};
|
||||
|
||||
using ColorRenderTarget = RenderTarget<PICA::ColorFmt>;
|
||||
using DepthStencilRenderTarget = RenderTarget<PICA::DepthFmt>;
|
||||
} // namespace Metal
|
73
include/renderer_mtl/mtl_texture.hpp
Normal file
73
include/renderer_mtl/mtl_texture.hpp
Normal file
|
@ -0,0 +1,73 @@
|
|||
#pragma once
|
||||
|
||||
#include <Metal/Metal.hpp>
|
||||
#include <array>
|
||||
#include <string>
|
||||
|
||||
#include "PICA/regs.hpp"
|
||||
#include "boost/icl/interval.hpp"
|
||||
#include "helpers.hpp"
|
||||
#include "math_util.hpp"
|
||||
#include "opengl.hpp"
|
||||
#include "renderer_mtl/pica_to_mtl.hpp"
|
||||
|
||||
template <typename T>
|
||||
using Interval = boost::icl::right_open_interval<T>;
|
||||
|
||||
namespace Metal {
|
||||
struct Texture {
|
||||
MTL::Device* device;
|
||||
|
||||
u32 location;
|
||||
u32 config; // Magnification/minification filter, wrapping configs, etc
|
||||
PICA::TextureFmt format;
|
||||
OpenGL::uvec2 size;
|
||||
bool valid;
|
||||
|
||||
// Range of VRAM taken up by buffer
|
||||
Interval<u32> range;
|
||||
|
||||
PICA::PixelFormatInfo formatInfo;
|
||||
MTL::Texture* texture = nullptr;
|
||||
MTL::SamplerState* sampler = nullptr;
|
||||
|
||||
Texture() : valid(false) {}
|
||||
|
||||
Texture(MTL::Device* dev, u32 loc, PICA::TextureFmt format, u32 x, u32 y, u32 config, bool valid = true)
|
||||
: device(dev), location(loc), format(format), size({x, y}), config(config), valid(valid) {
|
||||
u64 endLoc = (u64)loc + sizeInBytes();
|
||||
// Check if start and end are valid here
|
||||
range = Interval<u32>(loc, (u32)endLoc);
|
||||
}
|
||||
|
||||
// For 2 textures to "match" we only care about their locations, formats, and dimensions to match
|
||||
// For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture
|
||||
bool matches(Texture& other) {
|
||||
return location == other.location && format == other.format && size.x() == other.size.x() && size.y() == other.size.y();
|
||||
}
|
||||
|
||||
void allocate();
|
||||
void setNewConfig(u32 newConfig);
|
||||
void decodeTexture(std::span<const u8> data);
|
||||
void free();
|
||||
u64 sizeInBytes();
|
||||
|
||||
u8 decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data);
|
||||
u16 decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data);
|
||||
u32 decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data);
|
||||
|
||||
// Get the morton interleave offset of a texel based on its U and V values
|
||||
static u32 mortonInterleave(u32 u, u32 v);
|
||||
// Get the byte offset of texel (u, v) in the texture
|
||||
static u32 getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel);
|
||||
static u32 getSwizzledOffset_4bpp(u32 u, u32 v, u32 width);
|
||||
|
||||
// Returns the format of this texture as a string
|
||||
std::string_view formatToString() { return PICA::textureFormatToString(format); }
|
||||
|
||||
// Returns the texel at coordinates (u, v) of an ETC1(A4) texture
|
||||
// TODO: Make hasAlpha a template parameter
|
||||
u32 getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span<const u8> data);
|
||||
u32 decodeETC(u32 alpha, u32 u, u32 v, u64 colourData);
|
||||
};
|
||||
} // namespace Metal
|
83
include/renderer_mtl/mtl_vertex_buffer_cache.hpp
Normal file
83
include/renderer_mtl/mtl_vertex_buffer_cache.hpp
Normal file
|
@ -0,0 +1,83 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include "helpers.hpp"
|
||||
#include "pica_to_mtl.hpp"
|
||||
|
||||
|
||||
using namespace PICA;
|
||||
|
||||
namespace Metal {
|
||||
struct BufferHandle {
|
||||
MTL::Buffer* buffer;
|
||||
usize offset;
|
||||
};
|
||||
|
||||
class VertexBufferCache {
|
||||
// 128MB buffer for caching vertex data
|
||||
static constexpr usize CACHE_BUFFER_SIZE = 128 * 1024 * 1024;
|
||||
|
||||
public:
|
||||
VertexBufferCache() = default;
|
||||
|
||||
~VertexBufferCache() {
|
||||
endFrame();
|
||||
buffer->release();
|
||||
}
|
||||
|
||||
void set(MTL::Device* dev) {
|
||||
device = dev;
|
||||
create();
|
||||
}
|
||||
|
||||
void endFrame() {
|
||||
ptr = 0;
|
||||
for (auto buffer : additionalAllocations) {
|
||||
buffer->release();
|
||||
}
|
||||
additionalAllocations.clear();
|
||||
}
|
||||
|
||||
BufferHandle get(const void* data, usize size) {
|
||||
// If the vertex buffer is too large, just create a new one
|
||||
if (ptr + size > CACHE_BUFFER_SIZE) {
|
||||
MTL::Buffer* newBuffer = device->newBuffer(data, size, MTL::ResourceStorageModeShared);
|
||||
newBuffer->setLabel(toNSString("Additional vertex buffer"));
|
||||
additionalAllocations.push_back(newBuffer);
|
||||
Helpers::warn("Vertex buffer doesn't have enough space, creating a new buffer");
|
||||
|
||||
return BufferHandle{newBuffer, 0};
|
||||
}
|
||||
|
||||
// Copy the data into the buffer
|
||||
std::memcpy((char*)buffer->contents() + ptr, data, size);
|
||||
|
||||
auto oldPtr = ptr;
|
||||
ptr += size;
|
||||
|
||||
return BufferHandle{buffer, oldPtr};
|
||||
}
|
||||
|
||||
void reset() {
|
||||
endFrame();
|
||||
|
||||
if (buffer) {
|
||||
buffer->release();
|
||||
create();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
MTL::Buffer* buffer = nullptr;
|
||||
usize ptr = 0;
|
||||
std::vector<MTL::Buffer*> additionalAllocations;
|
||||
|
||||
MTL::Device* device;
|
||||
|
||||
void create() {
|
||||
buffer = device->newBuffer(CACHE_BUFFER_SIZE, MTL::ResourceStorageModeShared);
|
||||
buffer->setLabel(toNSString("Shared vertex buffer"));
|
||||
}
|
||||
};
|
||||
} // namespace Metal
|
12
include/renderer_mtl/objc_helper.hpp
Normal file
12
include/renderer_mtl/objc_helper.hpp
Normal file
|
@ -0,0 +1,12 @@
|
|||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "mtl_common.hpp"
|
||||
|
||||
namespace Metal {
|
||||
dispatch_data_t createDispatchData(const void* data, size_t size);
|
||||
} // namespace Metal
|
||||
|
||||
// Cast from std::string to NS::String*
|
||||
inline NS::String* toNSString(const std::string& str) { return NS::String::string(str.c_str(), NS::ASCIIStringEncoding); }
|
152
include/renderer_mtl/pica_to_mtl.hpp
Normal file
152
include/renderer_mtl/pica_to_mtl.hpp
Normal file
|
@ -0,0 +1,152 @@
|
|||
#pragma once
|
||||
|
||||
#include <Metal/Metal.hpp>
|
||||
|
||||
#include "PICA/regs.hpp"
|
||||
|
||||
namespace PICA {
|
||||
struct PixelFormatInfo {
|
||||
MTL::PixelFormat pixelFormat;
|
||||
size_t bytesPerTexel;
|
||||
};
|
||||
|
||||
constexpr PixelFormatInfo pixelFormatInfos[14] = {
|
||||
{MTL::PixelFormatRGBA8Unorm, 4}, // RGBA8
|
||||
{MTL::PixelFormatRGBA8Unorm, 4}, // RGB8
|
||||
{MTL::PixelFormatBGR5A1Unorm, 2}, // RGBA5551
|
||||
{MTL::PixelFormatB5G6R5Unorm, 2}, // RGB565
|
||||
{MTL::PixelFormatABGR4Unorm, 2}, // RGBA4
|
||||
{MTL::PixelFormatRGBA8Unorm, 4}, // IA8
|
||||
{MTL::PixelFormatRG8Unorm, 2}, // RG8
|
||||
{MTL::PixelFormatRGBA8Unorm, 4}, // I8
|
||||
{MTL::PixelFormatA8Unorm, 1}, // A8
|
||||
{MTL::PixelFormatABGR4Unorm, 2}, // IA4
|
||||
{MTL::PixelFormatABGR4Unorm, 2}, // I4
|
||||
{MTL::PixelFormatA8Unorm, 1}, // A4
|
||||
{MTL::PixelFormatRGBA8Unorm, 4}, // ETC1
|
||||
{MTL::PixelFormatRGBA8Unorm, 4}, // ETC1A4
|
||||
};
|
||||
|
||||
inline PixelFormatInfo getPixelFormatInfo(TextureFmt format) { return pixelFormatInfos[static_cast<int>(format)]; }
|
||||
|
||||
inline MTL::PixelFormat toMTLPixelFormatColor(ColorFmt format) {
|
||||
switch (format) {
|
||||
case ColorFmt::RGBA8: return MTL::PixelFormatRGBA8Unorm;
|
||||
case ColorFmt::RGB8: return MTL::PixelFormatRGBA8Unorm;
|
||||
case ColorFmt::RGBA5551: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatBGR5A1Unorm?
|
||||
case ColorFmt::RGB565: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatB5G6R5Unorm?
|
||||
case ColorFmt::RGBA4: return MTL::PixelFormatABGR4Unorm;
|
||||
}
|
||||
}
|
||||
|
||||
inline MTL::PixelFormat toMTLPixelFormatDepth(DepthFmt format) {
|
||||
switch (format) {
|
||||
case DepthFmt::Depth16: return MTL::PixelFormatDepth16Unorm;
|
||||
case DepthFmt::Unknown1: return MTL::PixelFormatInvalid;
|
||||
case DepthFmt::Depth24:
|
||||
return MTL::PixelFormatDepth32Float; // Metal does not support 24-bit depth formats
|
||||
// Apple sillicon doesn't support 24-bit depth buffers, so we use 32-bit instead
|
||||
case DepthFmt::Depth24Stencil8: return MTL::PixelFormatDepth32Float_Stencil8;
|
||||
}
|
||||
}
|
||||
|
||||
inline MTL::CompareFunction toMTLCompareFunc(u8 func) {
|
||||
switch (func) {
|
||||
case 0: return MTL::CompareFunctionNever;
|
||||
case 1: return MTL::CompareFunctionAlways;
|
||||
case 2: return MTL::CompareFunctionEqual;
|
||||
case 3: return MTL::CompareFunctionNotEqual;
|
||||
case 4: return MTL::CompareFunctionLess;
|
||||
case 5: return MTL::CompareFunctionLessEqual;
|
||||
case 6: return MTL::CompareFunctionGreater;
|
||||
case 7: return MTL::CompareFunctionGreaterEqual;
|
||||
default: Helpers::panic("Unknown compare function %u", func);
|
||||
}
|
||||
|
||||
return MTL::CompareFunctionAlways;
|
||||
}
|
||||
|
||||
inline MTL::BlendOperation toMTLBlendOperation(u8 op) {
|
||||
switch (op) {
|
||||
case 0: return MTL::BlendOperationAdd;
|
||||
case 1: return MTL::BlendOperationSubtract;
|
||||
case 2: return MTL::BlendOperationReverseSubtract;
|
||||
case 3: return MTL::BlendOperationMin;
|
||||
case 4: return MTL::BlendOperationMax;
|
||||
case 5: return MTL::BlendOperationAdd; // Unused (same as 0)
|
||||
case 6: return MTL::BlendOperationAdd; // Unused (same as 0)
|
||||
case 7: return MTL::BlendOperationAdd; // Unused (same as 0)
|
||||
default: Helpers::panic("Unknown blend operation %u", op);
|
||||
}
|
||||
|
||||
return MTL::BlendOperationAdd;
|
||||
}
|
||||
|
||||
inline MTL::BlendFactor toMTLBlendFactor(u8 factor) {
|
||||
switch (factor) {
|
||||
case 0: return MTL::BlendFactorZero;
|
||||
case 1: return MTL::BlendFactorOne;
|
||||
case 2: return MTL::BlendFactorSourceColor;
|
||||
case 3: return MTL::BlendFactorOneMinusSourceColor;
|
||||
case 4: return MTL::BlendFactorDestinationColor;
|
||||
case 5: return MTL::BlendFactorOneMinusDestinationColor;
|
||||
case 6: return MTL::BlendFactorSourceAlpha;
|
||||
case 7: return MTL::BlendFactorOneMinusSourceAlpha;
|
||||
case 8: return MTL::BlendFactorDestinationAlpha;
|
||||
case 9: return MTL::BlendFactorOneMinusDestinationAlpha;
|
||||
case 10: return MTL::BlendFactorBlendColor;
|
||||
case 11: return MTL::BlendFactorOneMinusBlendColor;
|
||||
case 12: return MTL::BlendFactorBlendAlpha;
|
||||
case 13: return MTL::BlendFactorOneMinusBlendAlpha;
|
||||
case 14: return MTL::BlendFactorSourceAlphaSaturated;
|
||||
case 15: return MTL::BlendFactorOne; // Undocumented
|
||||
default: Helpers::panic("Unknown blend factor %u", factor);
|
||||
}
|
||||
|
||||
return MTL::BlendFactorOne;
|
||||
}
|
||||
|
||||
inline MTL::StencilOperation toMTLStencilOperation(u8 op) {
|
||||
switch (op) {
|
||||
case 0: return MTL::StencilOperationKeep;
|
||||
case 1: return MTL::StencilOperationZero;
|
||||
case 2: return MTL::StencilOperationReplace;
|
||||
case 3: return MTL::StencilOperationIncrementClamp;
|
||||
case 4: return MTL::StencilOperationDecrementClamp;
|
||||
case 5: return MTL::StencilOperationInvert;
|
||||
case 6: return MTL::StencilOperationIncrementWrap;
|
||||
case 7: return MTL::StencilOperationDecrementWrap;
|
||||
default: Helpers::panic("Unknown stencil operation %u", op);
|
||||
}
|
||||
|
||||
return MTL::StencilOperationKeep;
|
||||
}
|
||||
|
||||
inline MTL::PrimitiveType toMTLPrimitiveType(PrimType primType) {
|
||||
switch (primType) {
|
||||
case PrimType::TriangleList: return MTL::PrimitiveTypeTriangle;
|
||||
case PrimType::TriangleStrip: return MTL::PrimitiveTypeTriangleStrip;
|
||||
case PrimType::TriangleFan:
|
||||
Helpers::warn("Triangle fans are not supported on Metal, using triangles instead");
|
||||
return MTL::PrimitiveTypeTriangle;
|
||||
case PrimType::GeometryPrimitive:
|
||||
return MTL::PrimitiveTypeTriangle;
|
||||
}
|
||||
}
|
||||
|
||||
inline MTL::SamplerAddressMode toMTLSamplerAddressMode(u8 addrMode) {
|
||||
switch (addrMode) {
|
||||
case 0: return MTL::SamplerAddressModeClampToEdge;
|
||||
case 1: return MTL::SamplerAddressModeClampToBorderColor;
|
||||
case 2: return MTL::SamplerAddressModeRepeat;
|
||||
case 3: return MTL::SamplerAddressModeMirrorRepeat;
|
||||
case 4: return MTL::SamplerAddressModeClampToEdge;
|
||||
case 5: return MTL::SamplerAddressModeClampToBorderColor;
|
||||
case 6: return MTL::SamplerAddressModeRepeat;
|
||||
case 7: return MTL::SamplerAddressModeRepeat;
|
||||
default: Helpers::panic("Unknown sampler address mode %u", addrMode);
|
||||
}
|
||||
|
||||
return MTL::SamplerAddressModeClampToEdge;
|
||||
}
|
||||
} // namespace PICA
|
207
include/renderer_mtl/renderer_mtl.hpp
Normal file
207
include/renderer_mtl/renderer_mtl.hpp
Normal file
|
@ -0,0 +1,207 @@
|
|||
#pragma once
|
||||
|
||||
#include <Metal/Metal.hpp>
|
||||
#include <QuartzCore/QuartzCore.hpp>
|
||||
|
||||
#include "mtl_blit_pipeline_cache.hpp"
|
||||
#include "mtl_command_encoder.hpp"
|
||||
#include "mtl_depth_stencil_cache.hpp"
|
||||
#include "mtl_draw_pipeline_cache.hpp"
|
||||
#include "mtl_lut_texture.hpp"
|
||||
#include "mtl_render_target.hpp"
|
||||
#include "mtl_texture.hpp"
|
||||
#include "mtl_vertex_buffer_cache.hpp"
|
||||
#include "renderer.hpp"
|
||||
|
||||
|
||||
// HACK: use the OpenGL cache
|
||||
#include "../renderer_gl/surface_cache.hpp"
|
||||
|
||||
class GPU;
|
||||
|
||||
struct Color4 {
|
||||
float r, g, b, a;
|
||||
};
|
||||
|
||||
class RendererMTL final : public Renderer {
|
||||
public:
|
||||
RendererMTL(GPU& gpu, const std::array<u32, regNum>& internalRegs, const std::array<u32, extRegNum>& externalRegs);
|
||||
~RendererMTL() override;
|
||||
|
||||
void reset() override;
|
||||
void display() override;
|
||||
void initGraphicsContext(SDL_Window* window) override;
|
||||
void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override;
|
||||
void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override;
|
||||
void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override;
|
||||
void drawVertices(PICA::PrimType primType, std::span<const PICA::Vertex> vertices) override;
|
||||
void screenshot(const std::string& name) override;
|
||||
void deinitGraphicsContext() override;
|
||||
|
||||
#ifdef PANDA3DS_FRONTEND_QT
|
||||
virtual void initGraphicsContext([[maybe_unused]] GL::Context* context) override {}
|
||||
#endif
|
||||
|
||||
private:
|
||||
CA::MetalLayer* metalLayer;
|
||||
|
||||
MTL::Device* device;
|
||||
MTL::CommandQueue* commandQueue;
|
||||
|
||||
Metal::CommandEncoder commandEncoder;
|
||||
|
||||
// Libraries
|
||||
MTL::Library* library;
|
||||
|
||||
// Caches
|
||||
SurfaceCache<Metal::ColorRenderTarget, 16, true> colorRenderTargetCache;
|
||||
SurfaceCache<Metal::DepthStencilRenderTarget, 16, true> depthStencilRenderTargetCache;
|
||||
SurfaceCache<Metal::Texture, 256, true> textureCache;
|
||||
Metal::BlitPipelineCache blitPipelineCache;
|
||||
Metal::DrawPipelineCache drawPipelineCache;
|
||||
Metal::DepthStencilCache depthStencilCache;
|
||||
Metal::VertexBufferCache vertexBufferCache;
|
||||
|
||||
// Resources
|
||||
MTL::SamplerState* nearestSampler;
|
||||
MTL::SamplerState* linearSampler;
|
||||
MTL::Texture* nullTexture;
|
||||
MTL::DepthStencilState* defaultDepthStencilState;
|
||||
|
||||
Metal::LutTexture* lutLightingTexture;
|
||||
Metal::LutTexture* lutFogTexture;
|
||||
|
||||
// Pipelines
|
||||
MTL::RenderPipelineState* displayPipeline;
|
||||
// MTL::RenderPipelineState* copyToLutTexturePipeline;
|
||||
|
||||
// Clears
|
||||
std::map<MTL::Texture*, Color4> colorClearOps;
|
||||
std::map<MTL::Texture*, float> depthClearOps;
|
||||
std::map<MTL::Texture*, u8> stencilClearOps;
|
||||
|
||||
// Active state
|
||||
MTL::CommandBuffer* commandBuffer = nullptr;
|
||||
MTL::RenderCommandEncoder* renderCommandEncoder = nullptr;
|
||||
MTL::Texture* lastColorTexture = nullptr;
|
||||
MTL::Texture* lastDepthTexture = nullptr;
|
||||
|
||||
// Debug
|
||||
std::string nextRenderPassName;
|
||||
|
||||
void createCommandBufferIfNeeded() {
|
||||
if (!commandBuffer) {
|
||||
commandBuffer = commandQueue->commandBuffer();
|
||||
}
|
||||
}
|
||||
|
||||
void endRenderPass() {
|
||||
if (renderCommandEncoder) {
|
||||
renderCommandEncoder->endEncoding();
|
||||
renderCommandEncoder = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void beginRenderPassIfNeeded(
|
||||
MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture = nullptr
|
||||
);
|
||||
|
||||
void commitCommandBuffer() {
|
||||
if (renderCommandEncoder) {
|
||||
renderCommandEncoder->endEncoding();
|
||||
renderCommandEncoder->release();
|
||||
renderCommandEncoder = nullptr;
|
||||
}
|
||||
if (commandBuffer) {
|
||||
commandBuffer->commit();
|
||||
// HACK
|
||||
commandBuffer->waitUntilCompleted();
|
||||
commandBuffer->release();
|
||||
commandBuffer = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename AttachmentT, typename ClearDataT, typename GetAttachmentT, typename SetClearDataT>
|
||||
inline void clearAttachment(
|
||||
MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, ClearDataT clearData, GetAttachmentT getAttachment,
|
||||
SetClearDataT setClearData
|
||||
) {
|
||||
bool beginRenderPass = (renderPassDescriptor == nullptr);
|
||||
if (!renderPassDescriptor) {
|
||||
renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init();
|
||||
}
|
||||
|
||||
AttachmentT* attachment = getAttachment(renderPassDescriptor);
|
||||
attachment->setTexture(texture);
|
||||
setClearData(attachment, clearData);
|
||||
attachment->setLoadAction(MTL::LoadActionClear);
|
||||
attachment->setStoreAction(MTL::StoreActionStore);
|
||||
|
||||
if (beginRenderPass) {
|
||||
if (std::is_same<AttachmentT, MTL::RenderPassColorAttachmentDescriptor>::value)
|
||||
beginRenderPassIfNeeded(renderPassDescriptor, true, texture);
|
||||
else
|
||||
beginRenderPassIfNeeded(renderPassDescriptor, true, nullptr, texture);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename AttachmentT, typename ClearDataT, typename GetAttachmentT, typename SetClearDataT>
|
||||
inline bool clearAttachment(
|
||||
MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, std::map<MTL::Texture*, ClearDataT>& clearOps,
|
||||
GetAttachmentT getAttachment, SetClearDataT setClearData
|
||||
) {
|
||||
auto it = clearOps.find(texture);
|
||||
if (it != clearOps.end()) {
|
||||
clearAttachment<AttachmentT>(renderPassDescriptor, texture, it->second, getAttachment, setClearData);
|
||||
clearOps.erase(it);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (renderPassDescriptor) {
|
||||
AttachmentT* attachment = getAttachment(renderPassDescriptor);
|
||||
attachment->setTexture(texture);
|
||||
attachment->setLoadAction(MTL::LoadActionLoad);
|
||||
attachment->setStoreAction(MTL::StoreActionStore);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool clearColor(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) {
|
||||
return clearAttachment<MTL::RenderPassColorAttachmentDescriptor, Color4>(
|
||||
renderPassDescriptor, texture, colorClearOps,
|
||||
[](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->colorAttachments()->object(0); },
|
||||
[](auto attachment, auto& color) { attachment->setClearColor(MTL::ClearColor(color.r, color.g, color.b, color.a)); }
|
||||
);
|
||||
}
|
||||
|
||||
bool clearDepth(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) {
|
||||
return clearAttachment<MTL::RenderPassDepthAttachmentDescriptor, float>(
|
||||
renderPassDescriptor, texture, depthClearOps,
|
||||
[](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->depthAttachment(); },
|
||||
[](auto attachment, auto& depth) { attachment->setClearDepth(depth); }
|
||||
);
|
||||
}
|
||||
|
||||
bool clearStencil(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) {
|
||||
return clearAttachment<MTL::RenderPassStencilAttachmentDescriptor, u8>(
|
||||
renderPassDescriptor, texture, stencilClearOps,
|
||||
[](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->stencilAttachment(); },
|
||||
[](auto attachment, auto& stencil) { attachment->setClearStencil(stencil); }
|
||||
);
|
||||
}
|
||||
|
||||
std::optional<Metal::ColorRenderTarget> getColorRenderTarget(
|
||||
u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true
|
||||
);
|
||||
Metal::DepthStencilRenderTarget& getDepthRenderTarget();
|
||||
Metal::Texture& getTexture(Metal::Texture& tex);
|
||||
void setupTextureEnvState(MTL::RenderCommandEncoder* encoder);
|
||||
void bindTexturesToSlots();
|
||||
void updateLightingLUT(MTL::RenderCommandEncoder* encoder);
|
||||
void updateFogLUT(MTL::RenderCommandEncoder* encoder);
|
||||
void textureCopyImpl(
|
||||
Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect<u32>& srcRect,
|
||||
const Math::Rect<u32>& destRect
|
||||
);
|
||||
};
|
|
@ -17,6 +17,10 @@ class RendererNull final : public Renderer {
|
|||
void screenshot(const std::string& name) override;
|
||||
void deinitGraphicsContext() override;
|
||||
|
||||
// Tell the GPU core that we'll handle vertex fetch & shader execution in the renderer in order to speed up execution.
|
||||
// Of course, we don't do this and geometry is never actually processed, since this is the null renderer.
|
||||
virtual bool prepareForDraw(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel) override { return true; };
|
||||
|
||||
#ifdef PANDA3DS_FRONTEND_QT
|
||||
virtual void initGraphicsContext([[maybe_unused]] GL::Context* context) override {}
|
||||
#endif
|
||||
|
|
38
include/sdl_sensors.hpp
Normal file
38
include/sdl_sensors.hpp
Normal file
|
@ -0,0 +1,38 @@
|
|||
#pragma once
|
||||
|
||||
#include <cmath>
|
||||
#include <glm/glm.hpp>
|
||||
|
||||
#include "helpers.hpp"
|
||||
#include "services/hid.hpp"
|
||||
|
||||
// Convert SDL sensor readings to 3DS format
|
||||
// We use the same code for Android as well, since the values we get from Android are in the same format as SDL (m/s^2 for acceleration, rad/s for
|
||||
// rotation)
|
||||
namespace Sensors::SDL {
|
||||
// Convert the rotation data we get from SDL sensor events to rotation data we can feed right to HID
|
||||
// Returns [pitch, roll, yaw]
|
||||
static glm::vec3 convertRotation(glm::vec3 rotation) {
|
||||
// Annoyingly, Android doesn't support the <numbers> header yet so we define pi ourselves
|
||||
static constexpr double pi = 3.141592653589793;
|
||||
// Convert the rotation from rad/s to deg/s and scale by the gyroscope coefficient in HID
|
||||
constexpr float scale = 180.f / pi * HIDService::gyroscopeCoeff;
|
||||
// The axes are also inverted, so invert scale before the multiplication.
|
||||
return rotation * -scale;
|
||||
}
|
||||
|
||||
static glm::vec3 convertAcceleration(float* data) {
|
||||
// Set our cap to ~9 m/s^2. The 3DS sensors cap at -930 and +930, so values above this value will get clamped to 930
|
||||
// At rest (3DS laid flat on table), hardware reads around ~0 for x and z axis, and around ~480 for y axis due to gravity.
|
||||
// This code tries to mimic this approximately, with offsets based on measurements from my DualShock 4.
|
||||
static constexpr float accelMax = 9.f;
|
||||
// We define standard gravity(g) ourself instead of using the SDL one in order for the code to work on Android too.
|
||||
static constexpr float standardGravity = 9.80665f;
|
||||
|
||||
s16 x = std::clamp<s16>(s16(data[0] / accelMax * 930.f), -930, +930);
|
||||
s16 y = std::clamp<s16>(s16(data[1] / (standardGravity * accelMax) * 930.f - 350.f), -930, +930);
|
||||
s16 z = std::clamp<s16>(s16((data[2] - 2.1f) / accelMax * 930.f), -930, +930);
|
||||
|
||||
return glm::vec3(x, y, z);
|
||||
}
|
||||
} // namespace Sensors::SDL
|
|
@ -8,6 +8,8 @@
|
|||
#include "result/result.hpp"
|
||||
|
||||
class ACService {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Handle handle = KernelHandles::AC;
|
||||
Memory& mem;
|
||||
MAKE_LOG_FUNCTION(log, acLogger)
|
||||
|
@ -17,6 +19,7 @@ class ACService {
|
|||
void closeAsync(u32 messagePointer);
|
||||
void createDefaultConfig(u32 messagePointer);
|
||||
void getConnectingInfraPriority(u32 messagePointer);
|
||||
void getNZoneBeaconNotFoundEvent(u32 messagePointer);
|
||||
void getStatus(u32 messagePointer);
|
||||
void getLastErrorCode(u32 messagePointer);
|
||||
void getWifiStatus(u32 messagePointer);
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
#include "result/result.hpp"
|
||||
|
||||
class ACTService {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Handle handle = KernelHandles::ACT;
|
||||
Memory& mem;
|
||||
MAKE_LOG_FUNCTION(log, actLogger)
|
||||
|
@ -15,7 +17,7 @@ class ACTService {
|
|||
void generateUUID(u32 messagePointer);
|
||||
void getAccountDataBlock(u32 messagePointer);
|
||||
|
||||
public:
|
||||
public:
|
||||
ACTService(Memory& mem) : mem(mem) {}
|
||||
void reset();
|
||||
void handleSyncRequest(u32 messagePointer);
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
#include "result/result.hpp"
|
||||
|
||||
class AMService {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Handle handle = KernelHandles::AM;
|
||||
Memory& mem;
|
||||
MAKE_LOG_FUNCTION(log, amLogger)
|
||||
|
@ -15,7 +17,7 @@ class AMService {
|
|||
void getPatchTitleInfo(u32 messagePointer);
|
||||
void listTitleInfo(u32 messagePointer);
|
||||
|
||||
public:
|
||||
public:
|
||||
AMService(Memory& mem) : mem(mem) {}
|
||||
void reset();
|
||||
void handleSyncRequest(u32 messagePointer);
|
||||
|
|
|
@ -12,7 +12,8 @@
|
|||
class Kernel;
|
||||
|
||||
enum class ConsoleModel : u32 {
|
||||
Old3DS, New3DS
|
||||
Old3DS,
|
||||
New3DS,
|
||||
};
|
||||
|
||||
// https://www.3dbrew.org/wiki/NS_and_APT_Services#Command
|
||||
|
@ -41,6 +42,8 @@ namespace APT::Transitions {
|
|||
}
|
||||
|
||||
class APTService {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Handle handle = KernelHandles::APT;
|
||||
Memory& mem;
|
||||
Kernel& kernel;
|
||||
|
@ -99,7 +102,7 @@ class APTService {
|
|||
|
||||
u32 screencapPostPermission;
|
||||
|
||||
public:
|
||||
public:
|
||||
APTService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel), appletManager(mem) {}
|
||||
void reset();
|
||||
void handleSyncRequest(u32 messagePointer);
|
||||
|
|
|
@ -6,36 +6,46 @@
|
|||
#include "result/result.hpp"
|
||||
|
||||
class BOSSService {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Handle handle = KernelHandles::BOSS;
|
||||
Memory& mem;
|
||||
MAKE_LOG_FUNCTION(log, bossLogger)
|
||||
|
||||
// Service commands
|
||||
void cancelTask(u32 messagePointer);
|
||||
void deleteNsData(u32 messagePointer);
|
||||
void initializeSession(u32 messagePointer);
|
||||
void getAppNewFlag(u32 messagePointer);
|
||||
void getErrorCode(u32 messagePointer);
|
||||
void getNsDataHeaderInfo(u32 messagePointer);
|
||||
void getNewArrivalFlag(u32 messagePointer);
|
||||
void getNsDataIdList(u32 messagePointer, u32 commandWord);
|
||||
void getNsDataLastUpdated(u32 messagePointer);
|
||||
void getOptoutFlag(u32 messagePointer);
|
||||
void getStorageEntryInfo(u32 messagePointer); // Unknown what this is, name taken from Citra
|
||||
void getStorageEntryInfo(u32 messagePointer); // Unknown what this is, name taken from Citra
|
||||
void getTaskIdList(u32 messagePointer);
|
||||
void getTaskInfo(u32 messagePointer);
|
||||
void getTaskServiceStatus(u32 messagePointer);
|
||||
void getTaskState(u32 messagePointer);
|
||||
void getTaskStatus(u32 messagePointer);
|
||||
void getTaskStorageInfo(u32 messagePointer);
|
||||
void readNsData(u32 messagePointer);
|
||||
void receiveProperty(u32 messagePointer);
|
||||
void registerNewArrivalEvent(u32 messagePointer);
|
||||
void registerStorageEntry(u32 messagePointer);
|
||||
void registerTask(u32 messagePointer);
|
||||
void sendProperty(u32 messagePointer);
|
||||
void setAppNewFlag(u32 messagePointer);
|
||||
void setOptoutFlag(u32 messagePointer);
|
||||
void startBgImmediate(u32 messagePointer);
|
||||
void startTask(u32 messagePointer);
|
||||
void unregisterStorage(u32 messagePointer);
|
||||
void unregisterTask(u32 messagePointer);
|
||||
|
||||
s8 optoutFlag;
|
||||
public:
|
||||
|
||||
public:
|
||||
BOSSService(Memory& mem) : mem(mem) {}
|
||||
void reset();
|
||||
void handleSyncRequest(u32 messagePointer);
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
class Kernel;
|
||||
|
||||
class CAMService {
|
||||
using Handle = HorizonHandle;
|
||||
using Event = std::optional<Handle>;
|
||||
|
||||
struct Port {
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
#pragma once
|
||||
#include <optional>
|
||||
|
||||
#include "helpers.hpp"
|
||||
#include "kernel_types.hpp"
|
||||
#include "logger.hpp"
|
||||
|
@ -9,6 +10,8 @@
|
|||
class Kernel;
|
||||
|
||||
class CECDService {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Handle handle = KernelHandles::CECD;
|
||||
Memory& mem;
|
||||
Kernel& kernel;
|
||||
|
@ -20,7 +23,7 @@ class CECDService {
|
|||
void getInfoEventHandle(u32 messagePointer);
|
||||
void openAndRead(u32 messagePointer);
|
||||
|
||||
public:
|
||||
public:
|
||||
CECDService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel) {}
|
||||
void reset();
|
||||
void handleSyncRequest(u32 messagePointer);
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
#pragma once
|
||||
#include <cstring>
|
||||
|
||||
#include "config.hpp"
|
||||
#include "helpers.hpp"
|
||||
#include "logger.hpp"
|
||||
#include "memory.hpp"
|
||||
|
@ -7,15 +9,19 @@
|
|||
#include "result/result.hpp"
|
||||
|
||||
class CFGService {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Memory& mem;
|
||||
CountryCodes country = CountryCodes::US; // Default to USA
|
||||
const EmulatorConfig& settings;
|
||||
|
||||
CountryCodes country = CountryCodes::US; // Default to USA
|
||||
MAKE_LOG_FUNCTION(log, cfgLogger)
|
||||
|
||||
void writeStringU16(u32 pointer, const std::u16string& string);
|
||||
|
||||
// Service functions
|
||||
void getConfigInfoBlk2(u32 messagePointer);
|
||||
void getConfigInfoBlk8(u32 messagePointer);
|
||||
void getConfigInfoBlk8(u32 messagePointer, u32 commandWord);
|
||||
void getCountryCodeID(u32 messagePointer);
|
||||
void getLocalFriendCodeSeed(u32 messagePointer);
|
||||
void getRegionCanadaUSA(u32 messagePointer);
|
||||
|
@ -23,19 +29,26 @@ class CFGService {
|
|||
void genUniqueConsoleHash(u32 messagePointer);
|
||||
void secureInfoGetByte101(u32 messagePointer);
|
||||
void secureInfoGetRegion(u32 messagePointer);
|
||||
void setConfigInfoBlk4(u32 messagePointer);
|
||||
void updateConfigNANDSavegame(u32 messagePointer);
|
||||
void translateCountryInfo(u32 messagePointer);
|
||||
void isFangateSupported(u32 messagePointer);
|
||||
|
||||
// cfg:nor functions
|
||||
void norInitialize(u32 messagePointer);
|
||||
void norReadData(u32 messagePointer);
|
||||
|
||||
void getConfigInfo(u32 output, u32 blockID, u32 size, u32 permissionMask);
|
||||
|
||||
public:
|
||||
public:
|
||||
enum class Type {
|
||||
U, // cfg:u
|
||||
I, // cfg:i
|
||||
S, // cfg:s
|
||||
NOR, // cfg:nor
|
||||
U, // cfg:u
|
||||
I, // cfg:i
|
||||
S, // cfg:s
|
||||
NOR, // cfg:nor
|
||||
};
|
||||
|
||||
CFGService(Memory& mem) : mem(mem) {}
|
||||
CFGService(Memory& mem, const EmulatorConfig& settings) : mem(mem), settings(settings) {}
|
||||
void reset();
|
||||
void handleSyncRequest(u32 messagePointer, Type type);
|
||||
};
|
|
@ -10,6 +10,8 @@
|
|||
class Kernel;
|
||||
|
||||
class CSNDService {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Handle handle = KernelHandles::CSND;
|
||||
Memory& mem;
|
||||
Kernel& kernel;
|
||||
|
@ -30,7 +32,5 @@ class CSNDService {
|
|||
void reset();
|
||||
void handleSyncRequest(u32 messagePointer);
|
||||
|
||||
void setSharedMemory(u8* ptr) {
|
||||
sharedMemory = ptr;
|
||||
}
|
||||
void setSharedMemory(u8* ptr) { sharedMemory = ptr; }
|
||||
};
|
|
@ -8,6 +8,8 @@
|
|||
// Please forgive me for how everything in this file is named
|
||||
// "dlp:SRVR" is not a nice name to work with
|
||||
class DlpSrvrService {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Handle handle = KernelHandles::DLP_SRVR;
|
||||
Memory& mem;
|
||||
MAKE_LOG_FUNCTION(log, dlpSrvrLogger)
|
||||
|
@ -15,7 +17,7 @@ class DlpSrvrService {
|
|||
// Service commands
|
||||
void isChild(u32 messagePointer);
|
||||
|
||||
public:
|
||||
public:
|
||||
DlpSrvrService(Memory& mem) : mem(mem) {}
|
||||
void reset();
|
||||
void handleSyncRequest(u32 messagePointer);
|
||||
|
|
|
@ -10,14 +10,19 @@
|
|||
#include "memory.hpp"
|
||||
#include "result/result.hpp"
|
||||
|
||||
struct EmulatorConfig;
|
||||
// Circular dependencies!
|
||||
class Kernel;
|
||||
|
||||
class DSPService {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Handle handle = KernelHandles::DSP;
|
||||
Memory& mem;
|
||||
Kernel& kernel;
|
||||
const EmulatorConfig& config;
|
||||
Audio::DSPCore* dsp = nullptr;
|
||||
|
||||
MAKE_LOG_FUNCTION(log, dspServiceLogger)
|
||||
|
||||
// Number of DSP pipes
|
||||
|
@ -39,9 +44,12 @@ class DSPService {
|
|||
size_t totalEventCount;
|
||||
std::vector<u8> loadedComponent;
|
||||
|
||||
bool headphonesInserted = true;
|
||||
|
||||
// Service functions
|
||||
void convertProcessAddressFromDspDram(u32 messagePointer); // Nice function name
|
||||
void flushDataCache(u32 messagePointer);
|
||||
void forceHeadphoneOut(u32 messagePointer);
|
||||
void getHeadphoneStatus(u32 messagePointer);
|
||||
void getSemaphoreEventHandle(u32 messagePointer);
|
||||
void invalidateDCache(u32 messagePointer);
|
||||
|
@ -56,7 +64,7 @@ class DSPService {
|
|||
void writeProcessPipe(u32 messagePointer);
|
||||
|
||||
public:
|
||||
DSPService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel) {}
|
||||
DSPService(Memory& mem, Kernel& kernel, const EmulatorConfig& config) : mem(mem), kernel(kernel), config(config) {}
|
||||
void reset();
|
||||
void handleSyncRequest(u32 messagePointer);
|
||||
void setDSPCore(Audio::DSPCore* pointer) { dsp = pointer; }
|
||||
|
@ -82,4 +90,5 @@ class DSPService {
|
|||
void triggerInterrupt1();
|
||||
|
||||
ComponentDumpResult dumpComponent(const std::filesystem::path& path);
|
||||
void printFirmwareInfo();
|
||||
};
|
76
include/services/dsp_firmware_db.hpp
Normal file
76
include/services/dsp_firmware_db.hpp
Normal file
|
@ -0,0 +1,76 @@
|
|||
#pragma once
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "helpers.hpp"
|
||||
|
||||
namespace DSP {
|
||||
struct FirmwareInfo {
|
||||
using Hash = std::array<u8, 32>;
|
||||
|
||||
Hash hash; // Firmware hash (SHA-256)
|
||||
u32 size; // Firmware size in bytes
|
||||
|
||||
bool supportsAAC; // Does this firmware support AAC decoding?
|
||||
const char* notes; // Miscellaneous notes about the firmware
|
||||
|
||||
explicit constexpr FirmwareInfo(const Hash& hash, u32 size, bool supportsAAC, const char* notes)
|
||||
: hash(hash), size(size), supportsAAC(supportsAAC), notes(notes) {}
|
||||
};
|
||||
|
||||
static constexpr std::array<FirmwareInfo, 9> firmwareDB = {
|
||||
FirmwareInfo(
|
||||
{0x47, 0xD6, 0x6C, 0xD2, 0x13, 0x1, 0xFF, 0x62, 0xAD, 0x16, 0x98, 0x2, 0x46, 0x67, 0xF3, 0x9,
|
||||
0xDA, 0x7, 0x20, 0x9E, 0xFB, 0xB, 0x6A, 0x81, 0x98, 0xFF, 0x9B, 0xE0, 0x51, 0x67, 0xC9, 0xA6},
|
||||
48480, false, "Spotted in some versions of Activity Log potentially other apps"
|
||||
),
|
||||
|
||||
FirmwareInfo(
|
||||
{0xF5, 0xDA, 0x79, 0xE7, 0x24, 0x6C, 0x51, 0x9A, 0x28, 0x6C, 0x50, 0xC9, 0x9F, 0xA1, 0xE6, 0x4D,
|
||||
0xA5, 0x72, 0x96, 0x5F, 0xEA, 0x14, 0x20, 0xA7, 0x70, 0x90, 0x57, 0x42, 0x34, 0x6E, 0x18, 0xD1},
|
||||
49674, false, "One of the most common firmwares. Found in NSMB2 and others"
|
||||
),
|
||||
|
||||
FirmwareInfo(
|
||||
{0x94, 0x4B, 0x40, 0xB5, 0x46, 0x93, 0xF4, 0xB1, 0xD9, 0x52, 0xBE, 0x84, 0x87, 0xE9, 0xE9, 0x1F,
|
||||
0x66, 0x7F, 0xC4, 0x89, 0xF8, 0x15, 0x79, 0xF, 0x3D, 0x3E, 0x89, 0x26, 0x5F, 0xE0, 0x89, 0xC4},
|
||||
49800, false, "One of the most common firmwares. Found in Majora's Mask and others"
|
||||
),
|
||||
|
||||
FirmwareInfo(
|
||||
{0x8E, 0x21, 0x3F, 0x3E, 0x71, 0xD2, 0xE3, 0xE4, 0x5D, 0x11, 0x69, 0xBA, 0xC6, 0x46, 0x5A, 0x70,
|
||||
0xEA, 0xBE, 0xB2, 0x2B, 0x30, 0x3F, 0x1F, 0xA6, 0xD7, 0x67, 0x93, 0x70, 0xFF, 0xAD, 0xF, 0x54},
|
||||
49756, false, "Fairly common firmware. Found in PSMD and others"
|
||||
),
|
||||
|
||||
FirmwareInfo(
|
||||
{0xA2, 0x6C, 0x74, 0xD1, 0xEF, 0x7F, 0x4F, 0xA5, 0xFF, 0xFF, 0xFB, 0xEC, 0x75, 0x8A, 0x40, 0x8D,
|
||||
0x8F, 0x22, 0x87, 0x72, 0x78, 0x1B, 0x81, 0x88, 0x86, 0x5F, 0x83, 0x4D, 0x1D, 0x90, 0x6B, 0xAA},
|
||||
48804, false, "Spotted in MK7"
|
||||
),
|
||||
|
||||
FirmwareInfo(
|
||||
{0x75, 0x12, 0x70, 0xB2, 0x43, 0xB0, 0xCA, 0xFB, 0x51, 0x99, 0xF2, 0x98, 0x2, 0x2, 0xC9, 0xB4,
|
||||
0xC7, 0x7A, 0x67, 0x5E, 0xF0, 0x43, 0x8F, 0xD5, 0xA8, 0x9E, 0x83, 0xAA, 0xB9, 0xA8, 0x7, 0x9B},
|
||||
48652, false, "One of the most common firmwares. Found in OoT, Pokemon Rumble Blast, and others"
|
||||
),
|
||||
|
||||
FirmwareInfo(
|
||||
{0xF2, 0x96, 0xE2, 0xE5, 0xEC, 0x34, 0x9F, 0x6A, 0x6C, 0xF3, 0xE1, 0xC7, 0xC, 0xDD, 0x65, 0xC2,
|
||||
0x2, 0x72, 0xB6, 0xE7, 0xFF, 0xE5, 0x57, 0x92, 0x69, 0x4E, 0x83, 0xAE, 0x24, 0xF1, 0x68, 0xBF},
|
||||
217976, true, "Most common AAC-enabled firmware. Found in Rhythm Heaven, Fire Emblem Fates/Echoes, Pokemon X/Y, and others"
|
||||
),
|
||||
|
||||
FirmwareInfo(
|
||||
{0xF0, 0x6C, 0x1B, 0x59, 0x23, 0xE1, 0x71, 0x19, 0x5, 0x66, 0x59, 0xCB, 0x3D, 0x9B, 0xF0, 0x26,
|
||||
0x62, 0x84, 0xE9, 0xA6, 0xC0, 0x8, 0x23, 0x99, 0xD7, 0x45, 0x8D, 0x7C, 0x52, 0xAE, 0x32, 0x1C},
|
||||
48708, false, "Spotted in Super Mario 3D Land"
|
||||
),
|
||||
|
||||
FirmwareInfo(
|
||||
{0x7E, 0xA3, 0xC4, 0x4A, 0x1C, 0x57, 0x51, 0x4B, 0xEB, 0xBE, 0xBC, 0xE8, 0xA7, 0x99, 0x5F, 0x7F,
|
||||
0x3A, 0x29, 0x1, 0x70, 0xEA, 0x3B, 0x6C, 0x14, 0x57, 0x49, 0xAD, 0x93, 0x58, 0x67, 0x2C, 0x97},
|
||||
49716, false, "Spotted in PMD: GTI"
|
||||
),
|
||||
};
|
||||
} // namespace DSP
|
84
include/services/fonts.hpp
Normal file
84
include/services/fonts.hpp
Normal file
|
@ -0,0 +1,84 @@
|
|||
// Copyright 2016 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
// Adapted from https://github.com/PabloMK7/citra/blob/master/src/core/hle/service/apt/bcfnt/bcfnt.h
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "helpers.hpp"
|
||||
#include "swap.hpp"
|
||||
|
||||
namespace HLE::Fonts {
|
||||
struct CFNT {
|
||||
u8 magic[4];
|
||||
u16_le endianness;
|
||||
u16_le headerSize;
|
||||
u32_le version;
|
||||
u32_le fileSize;
|
||||
u32_le numBlocks;
|
||||
};
|
||||
|
||||
struct SectionHeader {
|
||||
u8 magic[4];
|
||||
u32_le sectionSize;
|
||||
};
|
||||
|
||||
struct FINF {
|
||||
u8 magic[4];
|
||||
u32_le sectionSize;
|
||||
u8 fontType;
|
||||
u8 lineFeed;
|
||||
u16_le alterCharIndex;
|
||||
u8 default_width[3];
|
||||
u8 encoding;
|
||||
u32_le tglpOffset;
|
||||
u32_le cwdhOffset;
|
||||
u32_le cmapOffset;
|
||||
u8 height;
|
||||
u8 width;
|
||||
u8 ascent;
|
||||
u8 reserved;
|
||||
};
|
||||
|
||||
struct TGLP {
|
||||
u8 magic[4];
|
||||
u32_le sectionSize;
|
||||
u8 cellWidth;
|
||||
u8 cellHeight;
|
||||
u8 baselinePosition;
|
||||
u8 maxCharacterWidth;
|
||||
u32_le sheetSize;
|
||||
u16_le numSheets;
|
||||
u16_le sheetImageFormat;
|
||||
u16_le numColumns;
|
||||
u16_le numRows;
|
||||
u16_le sheetWidth;
|
||||
u16_le sheetHeight;
|
||||
u32_le sheetDataOffset;
|
||||
};
|
||||
|
||||
struct CMAP {
|
||||
u8 magic[4];
|
||||
u32_le sectionSize;
|
||||
u16_le codeBegin;
|
||||
u16_le codeEnd;
|
||||
u16_le mappingMethod;
|
||||
u16_le reserved;
|
||||
u32_le nextCmapOffset;
|
||||
};
|
||||
|
||||
struct CWDH {
|
||||
u8 magic[4];
|
||||
u32_le sectionSize;
|
||||
u16_le startIndex;
|
||||
u16_le endIndex;
|
||||
u32_le nextCwdhOffset;
|
||||
};
|
||||
|
||||
// Relocates the internal addresses of the BCFNT Shared Font to the new base. The current base will
|
||||
// be auto-detected based on the file headers.
|
||||
void relocateSharedFont(u8* sharedFont, u32 newAddress);
|
||||
} // namespace HLE::Fonts
|
|
@ -1,5 +1,6 @@
|
|||
#pragma once
|
||||
#include <cassert>
|
||||
|
||||
#include "helpers.hpp"
|
||||
#include "kernel_types.hpp"
|
||||
#include "logger.hpp"
|
||||
|
@ -15,6 +16,8 @@ struct FriendKey {
|
|||
static_assert(sizeof(FriendKey) == 16);
|
||||
|
||||
class FRDService {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Memory& mem;
|
||||
MAKE_LOG_FUNCTION(log, frdLogger)
|
||||
|
||||
|
@ -51,11 +54,11 @@ class FRDService {
|
|||
};
|
||||
static_assert(sizeof(Profile) == 8);
|
||||
|
||||
public:
|
||||
public:
|
||||
enum class Type {
|
||||
A, // frd:a
|
||||
N, // frd:n
|
||||
U, // frd:u
|
||||
A, // frd:a
|
||||
N, // frd:n
|
||||
U, // frd:u
|
||||
};
|
||||
|
||||
FRDService(Memory& mem) : mem(mem) {}
|
||||
|
|
|
@ -1,11 +1,14 @@
|
|||
#pragma once
|
||||
#include "config.hpp"
|
||||
#include "fs/archive_card_spi.hpp"
|
||||
#include "fs/archive_ext_save_data.hpp"
|
||||
#include "fs/archive_ncch.hpp"
|
||||
#include "fs/archive_save_data.hpp"
|
||||
#include "fs/archive_sdmc.hpp"
|
||||
#include "fs/archive_self_ncch.hpp"
|
||||
#include "fs/archive_system_save_data.hpp"
|
||||
#include "fs/archive_twl_photo.hpp"
|
||||
#include "fs/archive_twl_sound.hpp"
|
||||
#include "fs/archive_user_save_data.hpp"
|
||||
#include "helpers.hpp"
|
||||
#include "kernel_types.hpp"
|
||||
|
@ -16,6 +19,8 @@
|
|||
class Kernel;
|
||||
|
||||
class FSService {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Handle handle = KernelHandles::FS;
|
||||
Memory& mem;
|
||||
Kernel& kernel;
|
||||
|
@ -37,6 +42,10 @@ class FSService {
|
|||
ExtSaveDataArchive sharedExtSaveData_nand;
|
||||
SystemSaveDataArchive systemSaveData;
|
||||
|
||||
TWLPhotoArchive twlPhoto;
|
||||
TWLSoundArchive twlSound;
|
||||
CardSPIArchive cardSpi;
|
||||
|
||||
ArchiveBase* getArchiveFromID(u32 id, const FSPath& archivePath);
|
||||
Rust::Result<Handle, HorizonResult> openArchiveHandle(u32 archiveID, const FSPath& path);
|
||||
Rust::Result<Handle, HorizonResult> openDirectoryHandle(ArchiveBase* archive, const FSPath& path);
|
||||
|
@ -81,11 +90,12 @@ class FSService {
|
|||
// Used for set/get priority: Not sure what sort of priority this is referring to
|
||||
u32 priority;
|
||||
|
||||
public:
|
||||
public:
|
||||
FSService(Memory& mem, Kernel& kernel, const EmulatorConfig& config)
|
||||
: mem(mem), saveData(mem), sharedExtSaveData_nand(mem, "../SharedFiles/NAND", true), extSaveData_sdmc(mem, "SDMC"), sdmc(mem),
|
||||
sdmcWriteOnly(mem, true), selfNcch(mem), ncch(mem), userSaveData1(mem, ArchiveID::UserSaveData1),
|
||||
userSaveData2(mem, ArchiveID::UserSaveData2), kernel(kernel), config(config), systemSaveData(mem) {}
|
||||
userSaveData2(mem, ArchiveID::UserSaveData2), systemSaveData(mem), twlPhoto(mem), twlSound(mem), cardSpi(mem), kernel(kernel),
|
||||
config(config) {}
|
||||
|
||||
void reset();
|
||||
void handleSyncRequest(u32 messagePointer);
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#pragma once
|
||||
#include <cstring>
|
||||
#include <optional>
|
||||
|
||||
#include "PICA/gpu.hpp"
|
||||
#include "helpers.hpp"
|
||||
#include "kernel_types.hpp"
|
||||
|
@ -9,12 +10,12 @@
|
|||
#include "result/result.hpp"
|
||||
|
||||
enum class GPUInterrupt : u8 {
|
||||
PSC0 = 0, // Memory fill completed
|
||||
PSC1 = 1, // ?
|
||||
VBlank0 = 2, // ?
|
||||
VBlank1 = 3, // ?
|
||||
PPF = 4, // Display transfer finished
|
||||
P3D = 5, // Command list processing finished
|
||||
PSC0 = 0, // Memory fill completed
|
||||
PSC1 = 1, // ?
|
||||
VBlank0 = 2, // ?
|
||||
VBlank1 = 3, // ?
|
||||
PPF = 4, // Display transfer finished
|
||||
P3D = 5, // Command list processing finished
|
||||
DMA = 6
|
||||
};
|
||||
|
||||
|
@ -22,12 +23,14 @@ enum class GPUInterrupt : u8 {
|
|||
class Kernel;
|
||||
|
||||
class GPUService {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Handle handle = KernelHandles::GPU;
|
||||
Memory& mem;
|
||||
GPU& gpu;
|
||||
Kernel& kernel;
|
||||
u32& currentPID; // Process ID of the current process
|
||||
u8* sharedMem; // Pointer to GSP shared memory
|
||||
u32& currentPID; // Process ID of the current process
|
||||
u8* sharedMem; // Pointer to GSP shared memory
|
||||
|
||||
// At any point in time only 1 process has privileges to use rendering functions
|
||||
// This is the PID of that process
|
||||
|
@ -62,8 +65,8 @@ class GPUService {
|
|||
|
||||
// Used for saving and restoring GPU state via ImportDisplayCaptureInfo
|
||||
struct CaptureInfo {
|
||||
u32 leftFramebuffer; // Left framebuffer VA
|
||||
u32 rightFramebuffer; // Right framebuffer VA (Top screen only)
|
||||
u32 leftFramebuffer; // Left framebuffer VA
|
||||
u32 rightFramebuffer; // Right framebuffer VA (Top screen only)
|
||||
u32 format;
|
||||
u32 stride;
|
||||
};
|
||||
|
@ -72,6 +75,7 @@ class GPUService {
|
|||
// Service commands
|
||||
void acquireRight(u32 messagePointer);
|
||||
void flushDataCache(u32 messagePointer);
|
||||
void invalidateDataCache(u32 messagePointer);
|
||||
void importDisplayCaptureInfo(u32 messagePointer);
|
||||
void readHwRegs(u32 messagePointer);
|
||||
void registerInterruptRelayQueue(u32 messagePointer);
|
||||
|
@ -106,16 +110,15 @@ class GPUService {
|
|||
FramebufferUpdate* getTopFramebufferInfo() { return getFramebufferInfo(0); }
|
||||
FramebufferUpdate* getBottomFramebufferInfo() { return getFramebufferInfo(1); }
|
||||
|
||||
public:
|
||||
GPUService(Memory& mem, GPU& gpu, Kernel& kernel, u32& currentPID) : mem(mem), gpu(gpu),
|
||||
kernel(kernel), currentPID(currentPID) {}
|
||||
public:
|
||||
GPUService(Memory& mem, GPU& gpu, Kernel& kernel, u32& currentPID) : mem(mem), gpu(gpu), kernel(kernel), currentPID(currentPID) {}
|
||||
void reset();
|
||||
void handleSyncRequest(u32 messagePointer);
|
||||
void requestInterrupt(GPUInterrupt type);
|
||||
void setSharedMem(u8* ptr) {
|
||||
sharedMem = ptr;
|
||||
if (ptr != nullptr) { // Zero-fill shared memory in case the process tries to read stale service data or vice versa
|
||||
if (ptr != nullptr) { // Zero-fill shared memory in case the process tries to read stale service data or vice versa
|
||||
std::memset(ptr, 0, 0x1000);
|
||||
}
|
||||
}
|
||||
};
|
||||
};
|
|
@ -6,13 +6,13 @@
|
|||
#include "result/result.hpp"
|
||||
|
||||
class LCDService {
|
||||
Handle handle = KernelHandles::LCD;
|
||||
Memory& mem;
|
||||
MAKE_LOG_FUNCTION(log, gspLCDLogger)
|
||||
|
||||
// Service commands
|
||||
void setLedForceOff(u32 messagePointer);
|
||||
|
||||
public:
|
||||
public:
|
||||
LCDService(Memory& mem) : mem(mem) {}
|
||||
void reset();
|
||||
void handleSyncRequest(u32 messagePointer);
|
||||
|
|
|
@ -38,6 +38,8 @@ namespace HID::Keys {
|
|||
class Kernel;
|
||||
|
||||
class HIDService {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Handle handle = KernelHandles::HID;
|
||||
Memory& mem;
|
||||
Kernel& kernel;
|
||||
|
@ -54,6 +56,7 @@ class HIDService {
|
|||
s16 circlePadX, circlePadY; // Circlepad state
|
||||
s16 touchScreenX, touchScreenY; // Touchscreen state
|
||||
s16 roll, pitch, yaw; // Gyroscope state
|
||||
s16 accelX, accelY, accelZ; // Accelerometer state
|
||||
|
||||
bool accelerometerEnabled;
|
||||
bool eventsInitialized;
|
||||
|
@ -85,7 +88,14 @@ class HIDService {
|
|||
*(T*)&sharedMem[offset] = value;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T* getSharedMemPointer(size_t offset) {
|
||||
return (T*)&sharedMem[offset];
|
||||
}
|
||||
|
||||
public:
|
||||
static constexpr float gyroscopeCoeff = 14.375f; // Same as retail 3DS
|
||||
|
||||
HIDService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel) {}
|
||||
void reset();
|
||||
void handleSyncRequest(u32 messagePointer);
|
||||
|
@ -126,6 +136,12 @@ class HIDService {
|
|||
void setPitch(s16 value) { pitch = value; }
|
||||
void setYaw(s16 value) { yaw = value; }
|
||||
|
||||
void setAccel(s16 x, s16 y, s16 z) {
|
||||
accelX = x;
|
||||
accelY = y;
|
||||
accelZ = z;
|
||||
}
|
||||
|
||||
void updateInputs(u64 currentTimestamp);
|
||||
|
||||
void setSharedMem(u8* ptr) {
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
#include "memory.hpp"
|
||||
|
||||
class HTTPService {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Handle handle = KernelHandles::HTTP;
|
||||
Memory& mem;
|
||||
MAKE_LOG_FUNCTION(log, httpLogger)
|
||||
|
|
|
@ -11,6 +11,8 @@
|
|||
class Kernel;
|
||||
|
||||
class IRUserService {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
enum class DeviceID : u8 {
|
||||
CirclePadPro = 1,
|
||||
};
|
||||
|
|
|
@ -8,6 +8,8 @@
|
|||
class Kernel;
|
||||
|
||||
class LDRService {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Handle handle = KernelHandles::LDR_RO;
|
||||
Memory& mem;
|
||||
Kernel& kernel;
|
||||
|
@ -22,7 +24,7 @@ class LDRService {
|
|||
void loadCRR(u32 messagePointer);
|
||||
void unloadCRO(u32 messagePointer);
|
||||
|
||||
public:
|
||||
public:
|
||||
LDRService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel) {}
|
||||
void reset();
|
||||
void handleSyncRequest(u32 messagePointer);
|
||||
|
|
|
@ -7,6 +7,8 @@
|
|||
|
||||
namespace MCU {
|
||||
class HWCService {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Handle handle = KernelHandles::MCU_HWC;
|
||||
Memory& mem;
|
||||
MAKE_LOG_FUNCTION(log, mcuLogger)
|
||||
|
@ -15,6 +17,7 @@ namespace MCU {
|
|||
|
||||
// Service commands
|
||||
void getBatteryLevel(u32 messagePointer);
|
||||
void setInfoLEDPattern(u32 messagePointer);
|
||||
|
||||
public:
|
||||
HWCService(Memory& mem, const EmulatorConfig& config) : mem(mem), config(config) {}
|
||||
|
|
|
@ -9,6 +9,8 @@
|
|||
class Kernel;
|
||||
|
||||
class MICService {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Handle handle = KernelHandles::MIC;
|
||||
Memory& mem;
|
||||
Kernel& kernel;
|
||||
|
@ -29,14 +31,14 @@ class MICService {
|
|||
void unmapSharedMem(u32 messagePointer);
|
||||
void theCaptainToadFunction(u32 messagePointer);
|
||||
|
||||
u8 gain = 0; // How loud our microphone input signal is
|
||||
u8 gain = 0; // How loud our microphone input signal is
|
||||
bool micEnabled = false;
|
||||
bool shouldClamp = false;
|
||||
bool currentlySampling = false;
|
||||
|
||||
std::optional<Handle> eventHandle;
|
||||
|
||||
public:
|
||||
public:
|
||||
MICService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel) {}
|
||||
void reset();
|
||||
void handleSyncRequest(u32 messagePointer);
|
||||
|
|
|
@ -6,7 +6,14 @@
|
|||
#include "result/result.hpp"
|
||||
|
||||
class NDMService {
|
||||
enum class ExclusiveState : u32 { None = 0, Infrastructure = 1, LocalComms = 2, StreetPass = 3, StreetPassData = 4 };
|
||||
using Handle = HorizonHandle;
|
||||
enum class ExclusiveState : u32 {
|
||||
None = 0,
|
||||
Infrastructure = 1,
|
||||
LocalComms = 2,
|
||||
StreetPass = 3,
|
||||
StreetPassData = 4,
|
||||
};
|
||||
|
||||
Handle handle = KernelHandles::NDM;
|
||||
Memory& mem;
|
||||
|
@ -25,7 +32,7 @@ class NDMService {
|
|||
|
||||
ExclusiveState exclusiveState = ExclusiveState::None;
|
||||
|
||||
public:
|
||||
public:
|
||||
NDMService(Memory& mem) : mem(mem) {}
|
||||
void reset();
|
||||
void handleSyncRequest(u32 messagePointer);
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
#include "memory.hpp"
|
||||
|
||||
class NewsUService {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Handle handle = KernelHandles::NEWS_U;
|
||||
Memory& mem;
|
||||
MAKE_LOG_FUNCTION(log, newsLogger)
|
||||
|
|
|
@ -12,6 +12,8 @@
|
|||
class Kernel;
|
||||
|
||||
class NFCService {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Handle handle = KernelHandles::NFC;
|
||||
Memory& mem;
|
||||
Kernel& kernel;
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
#include "result/result.hpp"
|
||||
|
||||
class NIMService {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Handle handle = KernelHandles::NIM;
|
||||
Memory& mem;
|
||||
MAKE_LOG_FUNCTION(log, nimLogger)
|
||||
|
@ -13,7 +15,7 @@ class NIMService {
|
|||
// Service commands
|
||||
void initialize(u32 messagePointer);
|
||||
|
||||
public:
|
||||
public:
|
||||
NIMService(Memory& mem) : mem(mem) {}
|
||||
void reset();
|
||||
void handleSyncRequest(u32 messagePointer);
|
||||
|
|
25
include/services/ns.hpp
Normal file
25
include/services/ns.hpp
Normal file
|
@ -0,0 +1,25 @@
|
|||
#pragma once
|
||||
#include "helpers.hpp"
|
||||
#include "kernel_types.hpp"
|
||||
#include "logger.hpp"
|
||||
#include "memory.hpp"
|
||||
#include "result/result.hpp"
|
||||
|
||||
class NSService {
|
||||
Memory& mem;
|
||||
MAKE_LOG_FUNCTION(log, nsLogger)
|
||||
|
||||
// Service commands
|
||||
void launchTitle(u32 messagePointer);
|
||||
|
||||
public:
|
||||
enum class Type {
|
||||
S, // ns:s
|
||||
P, // ns:p
|
||||
C, // ns:c
|
||||
};
|
||||
|
||||
NSService(Memory& mem) : mem(mem) {}
|
||||
void reset();
|
||||
void handleSyncRequest(u32 messagePointer, Type type);
|
||||
};
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue