From 6f3c7d358b777d04175835f1abc019c6b5a3a884 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Thu, 18 Jan 2024 00:27:17 +0200 Subject: [PATCH 001/251] Add GLSL shader gen files --- CMakeLists.txt | 4 ++-- include/PICA/shader_gen.hpp | 24 ++++++++++++++++++++++++ src/core/PICA/shader_gen_glsl.cpp | 30 ++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 2 deletions(-) create mode 100644 include/PICA/shader_gen.hpp create mode 100644 src/core/PICA/shader_gen_glsl.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index b4b1503a..2d5df370 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -179,7 +179,7 @@ set(SERVICE_SOURCE_FILES src/core/services/service_manager.cpp src/core/services set(PICA_SOURCE_FILES src/core/PICA/gpu.cpp src/core/PICA/regs.cpp src/core/PICA/shader_unit.cpp src/core/PICA/shader_interpreter.cpp src/core/PICA/dynapica/shader_rec.cpp src/core/PICA/dynapica/shader_rec_emitter_x64.cpp src/core/PICA/pica_hash.cpp - src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp + src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp src/core/PICA/shader_gen_glsl.cpp ) set(LOADER_SOURCE_FILES src/core/loader/elf.cpp src/core/loader/ncsd.cpp src/core/loader/ncch.cpp src/core/loader/3dsx.cpp src/core/loader/lz77.cpp) @@ -244,7 +244,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/services/news_u.hpp include/applets/software_keyboard.hpp include/applets/applet_manager.hpp include/fs/archive_user_save_data.hpp include/services/amiibo_device.hpp include/services/nfc_types.hpp include/swap.hpp include/services/csnd.hpp include/services/nwm_uds.hpp include/fs/archive_system_save_data.hpp include/lua_manager.hpp include/memory_mapped_file.hpp include/hydra_icon.hpp - include/PICA/dynapica/shader_rec_emitter_arm64.hpp + include/PICA/dynapica/shader_rec_emitter_arm64.hpp include/PICA/shader_gen.hpp ) cmrc_add_resource_library( diff --git a/include/PICA/shader_gen.hpp b/include/PICA/shader_gen.hpp new file mode 100644 index 00000000..b52cd7ab --- /dev/null +++ b/include/PICA/shader_gen.hpp @@ -0,0 +1,24 @@ +#pragma once +#include + +#include "PICA/gpu.hpp" +#include "PICA/regs.hpp" +#include "helpers.hpp" + +namespace PICA::ShaderGen { + // Graphics API this shader is targetting + enum class API { GL, GLES, Vulkan }; + + // Shading language to use (Only GLSL for the time being) + enum class Language { GLSL }; + + class FragmentGenerator { + using PICARegs = std::array; + API api; + Language language; + + public: + FragmentGenerator(API api, Language language) : api(api), language(language) {} + std::string generate(const PICARegs& regs); + }; +}; // namespace PICA::ShaderGen \ No newline at end of file diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp new file mode 100644 index 00000000..661002ac --- /dev/null +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -0,0 +1,30 @@ +#include "PICA/shader_gen.hpp" +using namespace PICA::ShaderGen; + +std::string FragmentGenerator::generate(const PICARegs& regs) { + std::string ret = ""; + + switch (api) { + case API::GL: ret += "#version 410 core"; break; + case API::GLES: ret += "#version 300 es"; break; + default: break; + } + + // Input and output attributes + ret += R"( + in vec3 v_tangent; + in vec3 v_normal; + in vec3 v_bitangent; + in vec4 v_colour; + in vec3 v_texcoord0; + in vec2 v_texcoord1; + in vec3 v_view; + in vec2 v_texcoord2; + flat in vec4 v_textureEnvColor[6]; + flat in vec4 v_textureEnvBufferColor; + + out vec4 fragColour; + )"; + + return ret; +} \ No newline at end of file From ef2467bc6029b9e79bfba5651090f7767ea51863 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Thu, 18 Jan 2024 02:59:29 +0200 Subject: [PATCH 002/251] TEV definitions for shader generator --- include/PICA/regs.hpp | 104 +++++++++++++++++++++++++++ include/PICA/shader_gen.hpp | 2 + include/renderer_gl/renderer_gl.hpp | 5 +- src/core/PICA/shader_gen_glsl.cpp | 41 +++++++++++ src/core/renderer_gl/renderer_gl.cpp | 1 + 5 files changed, 152 insertions(+), 1 deletion(-) diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index 70cecf7b..100a0573 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -344,4 +344,108 @@ namespace PICA { GeometryPrimitive = 3, }; + struct TexEnvConfig { + enum class Source : u8 { + PrimaryColor = 0x0, + PrimaryFragmentColor = 0x1, + SecondaryFragmentColor = 0x2, + Texture0 = 0x3, + Texture1 = 0x4, + Texture2 = 0x5, + Texture3 = 0x6, + // TODO: Inbetween values are unknown + PreviousBuffer = 0xD, + Constant = 0xE, + Previous = 0xF, + }; + + enum class ColorOperand : u8 { + SourceColor = 0x0, + OneMinusSourceColor = 0x1, + SourceAlpha = 0x2, + OneMinusSourceAlpha = 0x3, + SourceRed = 0x4, + OneMinusSourceRed = 0x5, + // TODO: Inbetween values are unknown + SourceGreen = 0x8, + OneMinusSourceGreen = 0x9, + // Inbetween values are unknown + SourceBlue = 0xC, + OneMinusSourceBlue = 0xD, + }; + + enum class AlphaOperand : u8 { + SourceAlpha = 0x0, + OneMinusSourceAlpha = 0x1, + SourceRed = 0x2, + OneMinusSourceRed = 0x3, + SourceGreen = 0x4, + OneMinusSourceGreen = 0x5, + SourceBlue = 0x6, + OneMinusSourceBlue = 0x7, + }; + + enum class Operation : u8 { + Replace = 0, + Modulate = 1, + Add = 2, + AddSigned = 3, + Lerp = 4, + Subtract = 5, + Dot3RGB = 6, + Dot3RGBA = 7, + MultiplyAdd = 8, + AddMultiply = 9, + }; + + // RGB sources + Source colorSource1, colorSource2, colorSource3; + // Alpha sources + Source alphaSource1, alphaSource2, alphaSource3; + + // RGB operands + ColorOperand colorOperand1, colorOperand2, colorOperand3; + // Alpha operands + AlphaOperand alphaOperand1, alphaOperand2, alphaOperand3; + + // Texture environment operations for this stage + Operation colorOp, alphaOp; + + u32 constColor; + + private: + // These are the only private members since their value doesn't actually reflect the scale + // So we make them public so we'll always use the appropriate member functions instead + u8 colorScale; + u8 alphaScale; + + public: + // Create texture environment object from TEV registers + TexEnvConfig(u32 source, u32 operand, u32 combiner, u32 color, u32 scale) : constColor(color) { + colorSource1 = Helpers::getBits<0, 4, Source>(source); + colorSource2 = Helpers::getBits<4, 4, Source>(source); + colorSource3 = Helpers::getBits<8, 4, Source>(source); + + alphaSource1 = Helpers::getBits<16, 4, Source>(source); + alphaSource2 = Helpers::getBits<20, 4, Source>(source); + alphaSource3 = Helpers::getBits<24, 4, Source>(source); + + colorOperand1 = Helpers::getBits<0, 4, ColorOperand>(operand); + colorOperand2 = Helpers::getBits<4, 4, ColorOperand>(operand); + colorOperand3 = Helpers::getBits<8, 4, ColorOperand>(operand); + + alphaOperand1 = Helpers::getBits<12, 3, AlphaOperand>(operand); + alphaOperand2 = Helpers::getBits<16, 3, AlphaOperand>(operand); + alphaOperand3 = Helpers::getBits<20, 3, AlphaOperand>(operand); + + colorOp = Helpers::getBits<0, 4, Operation>(combiner); + alphaOp = Helpers::getBits<16, 4, Operation>(combiner); + + colorScale = Helpers::getBits<0, 2>(scale); + alphaScale = Helpers::getBits<16, 2>(scale); + } + + u32 getColorScale() { return (colorScale <= 2) ? (1 << colorScale) : 1; } + u32 getAlphaScale() { return (alphaScale <= 2) ? (1 << alphaScale) : 1; } + }; } // namespace PICA diff --git a/include/PICA/shader_gen.hpp b/include/PICA/shader_gen.hpp index b52cd7ab..3fa66871 100644 --- a/include/PICA/shader_gen.hpp +++ b/include/PICA/shader_gen.hpp @@ -17,6 +17,8 @@ namespace PICA::ShaderGen { API api; Language language; + void compileTEV(std::string& shader, int stage, const PICARegs& regs); + public: FragmentGenerator(API api, Language language) : api(api), language(language) {} std::string generate(const PICARegs& regs); diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 92f02662..b662023f 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -6,6 +6,7 @@ #include "PICA/float_types.hpp" #include "PICA/pica_vertex.hpp" #include "PICA/regs.hpp" +#include "PICA/shader_gen.hpp" #include "gl_state.hpp" #include "helpers.hpp" #include "logger.hpp" @@ -60,6 +61,8 @@ class RendererGL final : public Renderer { OpenGL::Framebuffer getColourFBO(); OpenGL::Texture getTexture(Texture& tex); + PICA::ShaderGen::FragmentGenerator fragShaderGen; + MAKE_LOG_FUNCTION(log, rendererLogger) void setupBlending(); void setupStencilTest(bool stencilEnable); @@ -71,7 +74,7 @@ class RendererGL final : public Renderer { public: RendererGL(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs) - : Renderer(gpu, internalRegs, externalRegs) {} + : Renderer(gpu, internalRegs, externalRegs), fragShaderGen(PICA::ShaderGen::API::GL, PICA::ShaderGen::Language::GLSL) {} ~RendererGL() override; void reset() override; diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 661002ac..d423016d 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -1,4 +1,5 @@ #include "PICA/shader_gen.hpp" +using namespace PICA; using namespace PICA::ShaderGen; std::string FragmentGenerator::generate(const PICARegs& regs) { @@ -10,6 +11,8 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { default: break; } + bool unimplementedFlag = false; + // Input and output attributes ret += R"( in vec3 v_tangent; @@ -24,7 +27,45 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { flat in vec4 v_textureEnvBufferColor; out vec4 fragColour; + uniform sampler2D u_tex0; + uniform sampler2D u_tex1; + uniform sampler2D u_tex2; + uniform sampler1DArray u_tex_lighting_lut; + + vec4 tevSources[16]; + vec4 tevNextPreviousBuffer; + + vec3 regToColor(uint reg) { + // Normalization scale to convert from [0...255] to [0.0...1.0] + const float scale = 1.0 / 255.0; + + return scale * vec3(float(bitfieldExtract(reg, 20, 8)), float(bitfieldExtract(reg, 10, 8)), float(bitfieldExtract(reg, 00, 8))); + } )"; + // Emit main function for fragment shader + // When not initialized, source 13 is set to vec4(0.0) and 15 is set to the vertex colour + ret += R"( + void main() { + tevSources[0] = v_colour; + tevSources[13] = vec4(0.0); // Previous buffer colour + tevSources[15] = v_colour; // Previous combiner + )"; + + for (int i = 0; i < 6; i++) { + compileTEV(ret, i, regs); + } + return ret; +} + +void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICARegs& regs) { + // Base address for each TEV stage's configuration + static constexpr std::array ioBases = { + InternalRegs::TexEnv0Source, InternalRegs::TexEnv1Source, InternalRegs::TexEnv2Source, + InternalRegs::TexEnv3Source, InternalRegs::TexEnv4Source, InternalRegs::TexEnv5Source, + }; + + const u32 ioBase = ioBases[stage]; + TexEnvConfig tev(regs[ioBase], regs[ioBase + 1], regs[ioBase + 2], regs[ioBase + 3], regs[ioBase + 4]); } \ No newline at end of file diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index a11a6ffa..4828e4e6 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -388,6 +388,7 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v OpenGL::TriangleFan, OpenGL::Triangle, }; + std::cout << fragShaderGen.generate(regs); const auto primitiveTopology = primTypes[static_cast(primType)]; gl.disableScissor(); From c13c8046d47cf6e7a1a0654406ccb7129356f697 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Thu, 18 Jan 2024 03:11:41 +0200 Subject: [PATCH 003/251] Detect passthrough TEV stages --- include/PICA/regs.hpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index 100a0573..b807ae5c 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -447,5 +447,17 @@ namespace PICA { u32 getColorScale() { return (colorScale <= 2) ? (1 << colorScale) : 1; } u32 getAlphaScale() { return (alphaScale <= 2) ? (1 << alphaScale) : 1; } + + bool isPassthroughStage() { + // clang-format off + // Thank you to the Citra dev that wrote this out + return ( + colorOp == Operation::Replace && alphaOp == Operation::Replace && + colorSource1 == Source::Previous && alphaSource1 == Source::Previous && + colorOperand1 == ColorOperand::SourceColor && alphaOperand1 == AlphaOperand::SourceAlpha && + getColorScale() == 1 && getAlphaScale() == 1 + ); + // clang-format on + } }; } // namespace PICA From 45ae6bd3a8ff7b05c93a188c34fa9d61295f4116 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Fri, 2 Feb 2024 01:16:45 +0200 Subject: [PATCH 004/251] Getting TEV operations working --- include/PICA/shader_gen.hpp | 5 ++ src/core/PICA/shader_gen_glsl.cpp | 145 ++++++++++++++++++++++++++++++ 2 files changed, 150 insertions(+) diff --git a/include/PICA/shader_gen.hpp b/include/PICA/shader_gen.hpp index 3fa66871..aaef8b30 100644 --- a/include/PICA/shader_gen.hpp +++ b/include/PICA/shader_gen.hpp @@ -18,6 +18,11 @@ namespace PICA::ShaderGen { Language language; void compileTEV(std::string& shader, int stage, const PICARegs& regs); + void getSource(std::string& shader, PICA::TexEnvConfig::Source source, int index); + void getColorOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::ColorOperand color, int index); + void getAlphaOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::AlphaOperand alpha, int index); + void getColorOperation(std::string& shader, PICA::TexEnvConfig::Operation op); + void getAlphaOperation(std::string& shader, PICA::TexEnvConfig::Operation op); public: FragmentGenerator(API api, Language language) : api(api), language(language) {} diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index d423016d..0c41b8d0 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -52,10 +52,23 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { tevSources[15] = v_colour; // Previous combiner )"; + ret += R"( + vec3 colorOperand1 = vec3(0.0); + vec3 colorOperand2 = vec3(0.0); + vec3 colorOperand3 = vec3(0.0); + + float alphaOperand1 = 0.0; + float alphaOperand2 = 0.0; + float alphaOperand3 = 0.0; + )"; + for (int i = 0; i < 6; i++) { compileTEV(ret, i, regs); } + ret += "}"; // End of main function + ret += "\n\n\n\n\n\n\n\n\n\n\n\n\n"; + return ret; } @@ -68,4 +81,136 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg const u32 ioBase = ioBases[stage]; TexEnvConfig tev(regs[ioBase], regs[ioBase + 1], regs[ioBase + 2], regs[ioBase + 3], regs[ioBase + 4]); + + if (!tev.isPassthroughStage()) { + // Get color operands + shader += "colorOp1 = "; + getColorOperand(shader, tev.colorSource1, tev.colorOperand1, stage); + + shader += ";\ncolorOp2 = "; + getColorOperand(shader, tev.colorSource2, tev.colorOperand2, stage); + + shader += ";\ncolorOp3 = "; + getColorOperand(shader, tev.colorSource3, tev.colorOperand3, stage); + + shader += ";\nvec3 outputColor" + std::to_string(stage) + " = vec3(1.0)"; + shader += ";\n"; + + if (tev.colorOp == TexEnvConfig::Operation::Dot3RGBA) { + // Dot3 RGBA also writes to the alpha component so we don't need to do anything more + shader += "float outputAlpha" + std::to_string(stage) + " = colorOutput" + std::to_string(stage) + ".x;\n"; + } else { + // Get alpha operands + shader += "alphaOp1 = "; + getAlphaOperand(shader, tev.alphaSource1, tev.alphaOperand1, stage); + + shader += ";\nalphaOp2 = "; + getAlphaOperand(shader, tev.alphaSource2, tev.alphaOperand2, stage); + + shader += ";\nalphaOp3 = "; + getAlphaOperand(shader, tev.alphaSource3, tev.alphaOperand3, stage); + + shader += ";\nvec3 outputAlpha" + std::to_string(stage) + " = 1.0"; + shader += ";\n"; + } + } +} + +void FragmentGenerator::getColorOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::ColorOperand color, int index) { + using OperandType = TexEnvConfig::ColorOperand; + + // For inverting operands, add the 1.0 - x subtraction + if (color == OperandType::OneMinusSourceColor || color == OperandType::OneMinusSourceRed || color == OperandType::OneMinusSourceGreen || + color == OperandType::OneMinusSourceBlue || color == OperandType::OneMinusSourceAlpha) { + shader += "vec3(1.0, 1.0, 1.0) - "; + } + + switch (color) { + case OperandType::SourceColor: + case OperandType::OneMinusSourceColor: + getSource(shader, source, index); + shader += ".rgb"; + break; + + case OperandType::SourceRed: + case OperandType::OneMinusSourceRed: + getSource(shader, source, index); + shader += ".rrr"; + break; + + case OperandType::SourceGreen: + case OperandType::OneMinusSourceGreen: + getSource(shader, source, index); + shader += ".ggg"; + break; + + case OperandType::SourceBlue: + case OperandType::OneMinusSourceBlue: + getSource(shader, source, index); + shader += ".bbb"; + break; + + case OperandType::SourceAlpha: + case OperandType::OneMinusSourceAlpha: + getSource(shader, source, index); + shader += ".aaa"; + break; + + default: + shader += "vec3(1.0, 1.0, 1.0)"; + Helpers::warn("FragmentGenerator: Invalid TEV color operand"); + break; + } +} + +void FragmentGenerator::getAlphaOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::AlphaOperand color, int index) { + using OperandType = TexEnvConfig::AlphaOperand; + + // For inverting operands, add the 1.0 - x subtraction + if (color == OperandType::OneMinusSourceRed || color == OperandType::OneMinusSourceGreen || color == OperandType::OneMinusSourceBlue || + color == OperandType::OneMinusSourceAlpha) { + shader += "1.0 - "; + } + + switch (color) { + case OperandType::SourceRed: + case OperandType::OneMinusSourceRed: + getSource(shader, source, index); + shader += ".r"; + break; + + case OperandType::SourceGreen: + case OperandType::OneMinusSourceGreen: + getSource(shader, source, index); + shader += ".g"; + break; + + case OperandType::SourceBlue: + case OperandType::OneMinusSourceBlue: + getSource(shader, source, index); + shader += ".b"; + break; + + case OperandType::SourceAlpha: + case OperandType::OneMinusSourceAlpha: + getSource(shader, source, index); + shader += ".a"; + break; + + default: + shader += "1.0"; + Helpers::warn("FragmentGenerator: Invalid TEV color operand"); + break; + } +} + +void FragmentGenerator::getSource(std::string& shader, TexEnvConfig::Source source, int index) { + switch (source) { + case TexEnvConfig::Source::PrimaryColor: shader += "v_colour"; break; + + default: + Helpers::warn("Unimplemented TEV source: %d", static_cast(source)); + shader += "vec4(1.0, 1.0, 1.0, 1.0)"; + break; + } } \ No newline at end of file From 7e77af0de88d5a6bdbbb35b4d7372aec23ce187b Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Fri, 2 Feb 2024 01:34:33 +0200 Subject: [PATCH 005/251] GLSL Fragment Generator: Working color operations --- src/core/PICA/shader_gen_glsl.cpp | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 0c41b8d0..829aec03 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -93,7 +93,8 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg shader += ";\ncolorOp3 = "; getColorOperand(shader, tev.colorSource3, tev.colorOperand3, stage); - shader += ";\nvec3 outputColor" + std::to_string(stage) + " = vec3(1.0)"; + shader += ";\nvec3 outputColor" + std::to_string(stage) + " = "; + getColorOperation(shader, tev.colorOp); shader += ";\n"; if (tev.colorOp == TexEnvConfig::Operation::Dot3RGBA) { @@ -213,4 +214,24 @@ void FragmentGenerator::getSource(std::string& shader, TexEnvConfig::Source sour shader += "vec4(1.0, 1.0, 1.0, 1.0)"; break; } +} + +void FragmentGenerator::getColorOperation(std::string& shader, TexEnvConfig::Operation op) { + switch (op) { + case TexEnvConfig::Operation::Replace: shader += "colorOp1"; break; + case TexEnvConfig::Operation::Add: shader += "colorOp1 + colorOp2"; break; + case TexEnvConfig::Operation::AddSigned: shader += "clamp(colorOp1 + colorOp2 - 0.5, 0.0, 1.0);"; break; + case TexEnvConfig::Operation::Subtract: shader += "colorOp1 - colorOp2"; break; + case TexEnvConfig::Operation::Modulate: shader += "colorOp1 * colorOp2"; break; + case TexEnvConfig::Operation::Lerp: shader += "colorOp1 * colorOp3 + colorOp2 * (vec(1.0) - colorOp3)"; break; + + case TexEnvConfig::Operation::AddMultiply: shader += "min(colorOp1 + colorOp2, vec3(1.0)) * colorOp3"; break; + case TexEnvConfig::Operation::MultiplyAdd: shader += "colorOp1 * colorOp2 + colorOp3"; break; + case TexEnvConfig::Operation::Dot3RGB: + case TexEnvConfig::Operation::Dot3RGBA: shader += "vec3(4.0 * dot(colorOp1 - 0.5, colorOp2 - 0.5))"; break; + default: + Helpers::warn("FragmentGenerator: Unimplemented color op"); + shader += "vec3(1.0)"; + break; + } } \ No newline at end of file From 0be8c45e377a47a6db0cb530c508063b0f129f6b Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Fri, 2 Feb 2024 01:51:37 +0200 Subject: [PATCH 006/251] Fragment shader gen: Properly track TEV outputs --- src/core/PICA/shader_gen_glsl.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 829aec03..28dbc5ab 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -26,7 +26,7 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { flat in vec4 v_textureEnvColor[6]; flat in vec4 v_textureEnvBufferColor; - out vec4 fragColour; + out vec4 fragColor; uniform sampler2D u_tex0; uniform sampler2D u_tex1; uniform sampler2D u_tex2; @@ -50,6 +50,7 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { tevSources[0] = v_colour; tevSources[13] = vec4(0.0); // Previous buffer colour tevSources[15] = v_colour; // Previous combiner + vec4 combinerOutput = v_colour; // Last TEV output )"; ret += R"( @@ -66,6 +67,7 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { compileTEV(ret, i, regs); } + ret += "fragColor = combinerOutput;\n"; ret += "}"; // End of main function ret += "\n\n\n\n\n\n\n\n\n\n\n\n\n"; @@ -114,6 +116,10 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg shader += ";\nvec3 outputAlpha" + std::to_string(stage) + " = 1.0"; shader += ";\n"; } + + shader += "combinerOutput = vec4(clamp(outputColor" + std::to_string(stage) + " * " + std::to_string(tev.getColorScale()) + + ".0, vec3(0.0), vec3(1.0)), clamp(outputAlpha" + std::to_string(stage) + " * " + std::to_string(tev.getAlphaScale()) + + ".0, 0.0, 1.0));\n"; } } From 10654ce1ca94bf47f702fa36b30707c1de49434c Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 4 Feb 2024 18:54:29 +0200 Subject: [PATCH 007/251] GLSL generator: Add textures and alpha operations --- include/PICA/shader_gen.hpp | 2 ++ src/core/PICA/shader_gen_glsl.cpp | 40 +++++++++++++++++++++++++++++-- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/include/PICA/shader_gen.hpp b/include/PICA/shader_gen.hpp index aaef8b30..80c57d46 100644 --- a/include/PICA/shader_gen.hpp +++ b/include/PICA/shader_gen.hpp @@ -24,6 +24,8 @@ namespace PICA::ShaderGen { void getColorOperation(std::string& shader, PICA::TexEnvConfig::Operation op); void getAlphaOperation(std::string& shader, PICA::TexEnvConfig::Operation op); + u32 textureConfig = 0; + public: FragmentGenerator(API api, Language language) : api(api), language(language) {} std::string generate(const PICARegs& regs); diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 28dbc5ab..47cf2a7b 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -63,6 +63,7 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { float alphaOperand3 = 0.0; )"; + textureConfig = regs[InternalRegs::TexUnitCfg]; for (int i = 0; i < 6; i++) { compileTEV(ret, i, regs); } @@ -113,8 +114,10 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg shader += ";\nalphaOp3 = "; getAlphaOperand(shader, tev.alphaSource3, tev.alphaOperand3, stage); - shader += ";\nvec3 outputAlpha" + std::to_string(stage) + " = 1.0"; - shader += ";\n"; + shader += ";\nvec3 outputAlpha" + std::to_string(stage) + " = "; + getAlphaOperation(shader, tev.alphaOp); + // Clamp the alpha value to [0.0, 1.0] + shader += ";\nclamp(outputAlpha" + std::to_string(stage) + ", 0.0, 1.0);\n"; } shader += "combinerOutput = vec4(clamp(outputColor" + std::to_string(stage) + " * " + std::to_string(tev.getColorScale()) + @@ -214,6 +217,19 @@ void FragmentGenerator::getAlphaOperand(std::string& shader, TexEnvConfig::Sourc void FragmentGenerator::getSource(std::string& shader, TexEnvConfig::Source source, int index) { switch (source) { case TexEnvConfig::Source::PrimaryColor: shader += "v_colour"; break; + case TexEnvConfig::Source::Texture0: shader += "texture(u_tex0, v_texcoord0.xy)"; break; + case TexEnvConfig::Source::Texture1: shader += "texture(u_tex1, v_texcoord1)"; break; + case TexEnvConfig::Source::Texture2: { + // If bit 13 in texture config is set then we use the texcoords for texture 1, otherwise for texture 2 + if (Helpers::getBit<13>(textureConfig)) { + shader += "texture(u_tex2, v_texcoord1)"; + } else { + shader += "texture(u_tex2, v_texcoord2)"; + } + break; + } + + case TexEnvConfig::Source::Previous: shader += "combinerOutput"; break; default: Helpers::warn("Unimplemented TEV source: %d", static_cast(source)); @@ -240,4 +256,24 @@ void FragmentGenerator::getColorOperation(std::string& shader, TexEnvConfig::Ope shader += "vec3(1.0)"; break; } +} + +void FragmentGenerator::getAlphaOperation(std::string& shader, TexEnvConfig::Operation op) { + switch (op) { + case TexEnvConfig::Operation::Replace: shader += "alphaOp1"; break; + case TexEnvConfig::Operation::Add: shader += "alphaOp1 + alphaOp2"; break; + case TexEnvConfig::Operation::AddSigned: shader += "clamp(alphaOp1 + alphaOp2 - 0.5, 0.0, 1.0);"; break; + case TexEnvConfig::Operation::Subtract: shader += "alphaOp1 - alphaOp2"; break; + case TexEnvConfig::Operation::Modulate: shader += "alphaOp1 * alphaOp2"; break; + case TexEnvConfig::Operation::Lerp: shader += "alphaOp1 * alphaOp3 + alphaOp2 * (vec(1.0) - alphaOp3)"; break; + + case TexEnvConfig::Operation::AddMultiply: shader += "min(alphaOp1 + alphaOp2, vec3(1.0)) * alphaOp3"; break; + case TexEnvConfig::Operation::MultiplyAdd: shader += "alphaOp1 * alphaOp2 + alphaOp3"; break; + case TexEnvConfig::Operation::Dot3RGB: + case TexEnvConfig::Operation::Dot3RGBA: shader += "vec3(4.0 * dot(alphaOp1 - 0.5, alphaOp2 - 0.5))"; break; + default: + Helpers::warn("FragmentGenerator: Unimplemented alpha op"); + shader += "vec3(1.0)"; + break; + } } \ No newline at end of file From ddc14cea0967b231e45a378d6bb8797779177951 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Thu, 29 Feb 2024 00:56:24 +0200 Subject: [PATCH 008/251] Fix shader compilation errors --- include/PICA/shader_gen.hpp | 1 + src/core/PICA/shader_gen_glsl.cpp | 77 +++++++++++++++++++++++++--- src/core/renderer_gl/renderer_gl.cpp | 10 +++- 3 files changed, 80 insertions(+), 8 deletions(-) diff --git a/include/PICA/shader_gen.hpp b/include/PICA/shader_gen.hpp index 80c57d46..e07575a5 100644 --- a/include/PICA/shader_gen.hpp +++ b/include/PICA/shader_gen.hpp @@ -29,5 +29,6 @@ namespace PICA::ShaderGen { public: FragmentGenerator(API api, Language language) : api(api), language(language) {} std::string generate(const PICARegs& regs); + std::string getVertexShader(const PICARegs& regs); }; }; // namespace PICA::ShaderGen \ No newline at end of file diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 47cf2a7b..3a7e9b74 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -2,6 +2,69 @@ using namespace PICA; using namespace PICA::ShaderGen; +std::string FragmentGenerator::getVertexShader(const PICARegs& regs) { + std::string ret = ""; + + switch (api) { + case API::GL: ret += "#version 410 core"; break; + case API::GLES: ret += "#version 300 es"; break; + default: break; + } + + ret += R"( + layout(location = 0) in vec4 a_coords; + layout(location = 1) in vec4 a_quaternion; + layout(location = 2) in vec4 a_vertexColour; + layout(location = 3) in vec2 a_texcoord0; + layout(location = 4) in vec2 a_texcoord1; + layout(location = 5) in float a_texcoord0_w; + layout(location = 6) in vec3 a_view; + layout(location = 7) in vec2 a_texcoord2; + + out vec3 v_normal; + out vec3 v_tangent; + out vec3 v_bitangent; + out vec4 v_colour; + out vec3 v_texcoord0; + out vec2 v_texcoord1; + out vec3 v_view; + out vec2 v_texcoord2; + flat out vec4 v_textureEnvColor[6]; + flat out vec4 v_textureEnvBufferColor; + + //out float gl_ClipDistance[2]; + + vec4 abgr8888ToVec4(uint abgr) { + const float scale = 1.0 / 255.0; + return scale * vec4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24)); + } + + vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) { + vec3 u = q.xyz; + float s = q.w; + return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v); + } + + void main() { + gl_Position = a_coords; + vec4 colourAbs = abs(a_vertexColour); + v_colour = min(colourAbs, vec4(1.f)); + + // Flip y axis of UVs because OpenGL uses an inverted y for texture sampling compared to the PICA + v_texcoord0 = vec3(a_texcoord0.x, 1.0 - a_texcoord0.y, a_texcoord0_w); + v_texcoord1 = vec2(a_texcoord1.x, 1.0 - a_texcoord1.y); + v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y); + v_view = a_view; + + v_normal = normalize(rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), a_quaternion)); + v_tangent = normalize(rotateVec3ByQuaternion(vec3(1.0, 0.0, 0.0), a_quaternion)); + v_bitangent = normalize(rotateVec3ByQuaternion(vec3(0.0, 1.0, 0.0), a_quaternion)); + } +)"; + + return ret; +} + std::string FragmentGenerator::generate(const PICARegs& regs) { std::string ret = ""; @@ -54,13 +117,13 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { )"; ret += R"( - vec3 colorOperand1 = vec3(0.0); - vec3 colorOperand2 = vec3(0.0); - vec3 colorOperand3 = vec3(0.0); + vec3 colorOp1 = vec3(0.0); + vec3 colorOp2 = vec3(0.0); + vec3 colorOp3 = vec3(0.0); - float alphaOperand1 = 0.0; - float alphaOperand2 = 0.0; - float alphaOperand3 = 0.0; + float alphaOp1 = 0.0; + float alphaOp2 = 0.0; + float alphaOp3 = 0.0; )"; textureConfig = regs[InternalRegs::TexUnitCfg]; @@ -114,7 +177,7 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg shader += ";\nalphaOp3 = "; getAlphaOperand(shader, tev.alphaSource3, tev.alphaOperand3, stage); - shader += ";\nvec3 outputAlpha" + std::to_string(stage) + " = "; + shader += ";\nfloat outputAlpha" + std::to_string(stage) + " = "; getAlphaOperation(shader, tev.alphaOp); // Clamp the alpha value to [0.0, 1.0] shader += ";\nclamp(outputAlpha" + std::to_string(stage) + ", 0.0, 1.0);\n"; diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 4828e4e6..95175130 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -388,7 +388,15 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v OpenGL::TriangleFan, OpenGL::Triangle, }; - std::cout << fragShaderGen.generate(regs); + + std::string vs = fragShaderGen.getVertexShader(regs); + std::string fs = fragShaderGen.generate(regs); + std::cout << fs << "\n\n\n"; + + OpenGL::Program program; + OpenGL::Shader vertShader({vs.c_str(), vs.size()}, OpenGL::Vertex); + OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment); + program.create({vertShader, fragShader}); const auto primitiveTopology = primTypes[static_cast(primType)]; gl.disableScissor(); From fdfb012aa149487c46732d6642ab417460a56d4b Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Thu, 29 Feb 2024 01:28:00 +0200 Subject: [PATCH 009/251] GL: Add RendererGL::getSpecializedShader --- include/renderer_gl/renderer_gl.hpp | 1 + src/core/renderer_gl/renderer_gl.cpp | 25 +++++++++++++++++++++---- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index b662023f..7bc1087a 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -60,6 +60,7 @@ class RendererGL final : public Renderer { OpenGL::Framebuffer getColourFBO(); OpenGL::Texture getTexture(Texture& tex); + OpenGL::Program getSpecializedShader(); PICA::ShaderGen::FragmentGenerator fragShaderGen; diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 95175130..0bb592cf 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -393,10 +393,7 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v std::string fs = fragShaderGen.generate(regs); std::cout << fs << "\n\n\n"; - OpenGL::Program program; - OpenGL::Shader vertShader({vs.c_str(), vs.size()}, OpenGL::Vertex); - OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment); - program.create({vertShader, fragShader}); + OpenGL::Program program = getSpecializedShader(); const auto primitiveTopology = primTypes[static_cast(primType)]; gl.disableScissor(); @@ -787,6 +784,26 @@ std::optional RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt return colourBufferCache.add(sampleBuffer); } +OpenGL::Program RendererGL::getSpecializedShader() { + OpenGL::Program program; + + std::string vs = fragShaderGen.getVertexShader(regs); + std::string fs = fragShaderGen.generate(regs); + + OpenGL::Shader vertShader({vs.c_str(), vs.size()}, OpenGL::Vertex); + OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment); + program.create({vertShader, fragShader}); + program.use(); + + // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 + glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0); + glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1); + glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2); + glUniform1i(OpenGL::uniformLocation(program, "u_tex_lighting_lut"), 3); + + return program; +} + void RendererGL::screenshot(const std::string& name) { constexpr uint width = 400; constexpr uint height = 2 * 240; From 67fe3214fe00ddff37dd0a133c0fd1c38859987f Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sat, 2 Mar 2024 20:41:23 +0200 Subject: [PATCH 010/251] Add shader cache --- include/renderer_gl/renderer_gl.hpp | 64 ++++++++++++++----- src/core/PICA/shader_gen_glsl.cpp | 8 +-- src/core/renderer_gl/renderer_gl.cpp | 92 ++++++++++++++++------------ 3 files changed, 106 insertions(+), 58 deletions(-) diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 7bc1087a..e8eaeacb 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -1,9 +1,13 @@ #pragma once #include +#include +#include #include +#include #include "PICA/float_types.hpp" +#include "PICA/pica_hash.hpp" #include "PICA/pica_vertex.hpp" #include "PICA/regs.hpp" #include "PICA/shader_gen.hpp" @@ -17,6 +21,32 @@ // More circular dependencies! class GPU; +namespace PICA { + struct FragmentConfig { + u32 texUnitConfig; + u32 texEnvUpdateBuffer; + + // TODO: This should probably be a uniform + u32 texEnvBufferColor; + + // There's 6 TEV stages, and each one is configured via 5 word-sized registers + std::array tevConfigs; + + // Hash function and equality operator required by std::unordered_map + bool operator==(const FragmentConfig& config) const { + return std::memcmp(this, &config, sizeof(FragmentConfig)) == 0; + } + }; +} // namespace PICA + +// Override std::hash for our fragment config class +template <> +struct std::hash { + std::size_t operator()(const PICA::FragmentConfig& config) const noexcept { + return PICAHash::computeHash((const char*)&config, sizeof(config)); + } +}; + class RendererGL final : public Renderer { GLStateManager gl = {}; @@ -26,20 +56,23 @@ class RendererGL final : public Renderer { OpenGL::VertexArray vao; OpenGL::VertexBuffer vbo; - // TEV configuration uniform locations - GLint textureEnvSourceLoc = -1; - GLint textureEnvOperandLoc = -1; - GLint textureEnvCombinerLoc = -1; - GLint textureEnvColorLoc = -1; - GLint textureEnvScaleLoc = -1; + // Data + struct { + // TEV configuration uniform locations + GLint textureEnvSourceLoc = -1; + GLint textureEnvOperandLoc = -1; + GLint textureEnvCombinerLoc = -1; + GLint textureEnvColorLoc = -1; + GLint textureEnvScaleLoc = -1; - // Uniform of PICA registers - GLint picaRegLoc = -1; + // Uniform of PICA registers + GLint picaRegLoc = -1; - // Depth configuration uniform locations - GLint depthOffsetLoc = -1; - GLint depthScaleLoc = -1; - GLint depthmapEnableLoc = -1; + // Depth configuration uniform locations + GLint depthOffsetLoc = -1; + GLint depthScaleLoc = -1; + GLint depthmapEnableLoc = -1; + } ubershaderData; float oldDepthScale = -1.0; float oldDepthOffset = 0.0; @@ -48,6 +81,7 @@ class RendererGL final : public Renderer { SurfaceCache depthBufferCache; SurfaceCache colourBufferCache; SurfaceCache textureCache; + bool usingUbershader = false; // Dummy VAO/VBO for blitting the final output OpenGL::VertexArray dummyVAO; @@ -58,9 +92,11 @@ class RendererGL final : public Renderer { OpenGL::Framebuffer screenFramebuffer; OpenGL::Texture blankTexture; + std::unordered_map shaderCache; + OpenGL::Framebuffer getColourFBO(); OpenGL::Texture getTexture(Texture& tex); - OpenGL::Program getSpecializedShader(); + OpenGL::Program& getSpecializedShader(); PICA::ShaderGen::FragmentGenerator fragShaderGen; @@ -99,4 +135,4 @@ class RendererGL final : public Renderer { // Take a screenshot of the screen and store it in a file void screenshot(const std::string& name) override; -}; +}; \ No newline at end of file diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 3a7e9b74..1bcae30c 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -308,7 +308,7 @@ void FragmentGenerator::getColorOperation(std::string& shader, TexEnvConfig::Ope case TexEnvConfig::Operation::AddSigned: shader += "clamp(colorOp1 + colorOp2 - 0.5, 0.0, 1.0);"; break; case TexEnvConfig::Operation::Subtract: shader += "colorOp1 - colorOp2"; break; case TexEnvConfig::Operation::Modulate: shader += "colorOp1 * colorOp2"; break; - case TexEnvConfig::Operation::Lerp: shader += "colorOp1 * colorOp3 + colorOp2 * (vec(1.0) - colorOp3)"; break; + case TexEnvConfig::Operation::Lerp: shader += "colorOp1 * colorOp3 + colorOp2 * (vec3(1.0) - colorOp3)"; break; case TexEnvConfig::Operation::AddMultiply: shader += "min(colorOp1 + colorOp2, vec3(1.0)) * colorOp3"; break; case TexEnvConfig::Operation::MultiplyAdd: shader += "colorOp1 * colorOp2 + colorOp3"; break; @@ -328,15 +328,15 @@ void FragmentGenerator::getAlphaOperation(std::string& shader, TexEnvConfig::Ope case TexEnvConfig::Operation::AddSigned: shader += "clamp(alphaOp1 + alphaOp2 - 0.5, 0.0, 1.0);"; break; case TexEnvConfig::Operation::Subtract: shader += "alphaOp1 - alphaOp2"; break; case TexEnvConfig::Operation::Modulate: shader += "alphaOp1 * alphaOp2"; break; - case TexEnvConfig::Operation::Lerp: shader += "alphaOp1 * alphaOp3 + alphaOp2 * (vec(1.0) - alphaOp3)"; break; + case TexEnvConfig::Operation::Lerp: shader += "alphaOp1 * alphaOp3 + alphaOp2 * (1.0 - alphaOp3)"; break; - case TexEnvConfig::Operation::AddMultiply: shader += "min(alphaOp1 + alphaOp2, vec3(1.0)) * alphaOp3"; break; + case TexEnvConfig::Operation::AddMultiply: shader += "min(alphaOp1 + alphaOp2, 1.0) * alphaOp3"; break; case TexEnvConfig::Operation::MultiplyAdd: shader += "alphaOp1 * alphaOp2 + alphaOp3"; break; case TexEnvConfig::Operation::Dot3RGB: case TexEnvConfig::Operation::Dot3RGBA: shader += "vec3(4.0 * dot(alphaOp1 - 0.5, alphaOp2 - 0.5))"; break; default: Helpers::warn("FragmentGenerator: Unimplemented alpha op"); - shader += "vec3(1.0)"; + shader += "1.0"; break; } } \ No newline at end of file diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 0bb592cf..a0e09bba 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -38,9 +38,9 @@ void RendererGL::reset() { oldDepthOffset = 0.0; // Default depth offset to 0 oldDepthmapEnable = false; // Enable w buffering - glUniform1f(depthScaleLoc, oldDepthScale); - glUniform1f(depthOffsetLoc, oldDepthOffset); - glUniform1i(depthmapEnableLoc, oldDepthmapEnable); + glUniform1f(ubershaderData.depthScaleLoc, oldDepthScale); + glUniform1f(ubershaderData.depthOffsetLoc, oldDepthOffset); + glUniform1i(ubershaderData.depthmapEnableLoc, oldDepthmapEnable); gl.useProgram(oldProgram); // Switch to old GL program } @@ -59,16 +59,16 @@ void RendererGL::initGraphicsContextInternal() { triangleProgram.create({vert, frag}); gl.useProgram(triangleProgram); - textureEnvSourceLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvSource"); - textureEnvOperandLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvOperand"); - textureEnvCombinerLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvCombiner"); - textureEnvColorLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvColor"); - textureEnvScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvScale"); + ubershaderData.textureEnvSourceLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvSource"); + ubershaderData.textureEnvOperandLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvOperand"); + ubershaderData.textureEnvCombinerLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvCombiner"); + ubershaderData.textureEnvColorLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvColor"); + ubershaderData.textureEnvScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvScale"); - depthScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_depthScale"); - depthOffsetLoc = OpenGL::uniformLocation(triangleProgram, "u_depthOffset"); - depthmapEnableLoc = OpenGL::uniformLocation(triangleProgram, "u_depthmapEnable"); - picaRegLoc = OpenGL::uniformLocation(triangleProgram, "u_picaRegs"); + ubershaderData.depthScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_depthScale"); + ubershaderData.depthOffsetLoc = OpenGL::uniformLocation(triangleProgram, "u_depthOffset"); + ubershaderData.depthmapEnableLoc = OpenGL::uniformLocation(triangleProgram, "u_depthmapEnable"); + ubershaderData.picaRegLoc = OpenGL::uniformLocation(triangleProgram, "u_picaRegs"); // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex0"), 0); @@ -289,7 +289,6 @@ void RendererGL::setupStencilTest(bool stencilEnable) { glStencilOp(stencilOps[stencilFailOp], stencilOps[depthFailOp], stencilOps[passOp]); } - void RendererGL::setupTextureEnvState() { // TODO: Only update uniforms when the TEV config changed. Use an UBO potentially. @@ -314,11 +313,11 @@ void RendererGL::setupTextureEnvState() { textureEnvScaleRegs[i] = regs[ioBase + 4]; } - glUniform1uiv(textureEnvSourceLoc, 6, textureEnvSourceRegs); - glUniform1uiv(textureEnvOperandLoc, 6, textureEnvOperandRegs); - glUniform1uiv(textureEnvCombinerLoc, 6, textureEnvCombinerRegs); - glUniform1uiv(textureEnvColorLoc, 6, textureEnvColourRegs); - glUniform1uiv(textureEnvScaleLoc, 6, textureEnvScaleRegs); + glUniform1uiv(ubershaderData.textureEnvSourceLoc, 6, textureEnvSourceRegs); + glUniform1uiv(ubershaderData.textureEnvOperandLoc, 6, textureEnvOperandRegs); + glUniform1uiv(ubershaderData.textureEnvCombinerLoc, 6, textureEnvCombinerRegs); + glUniform1uiv(ubershaderData.textureEnvColorLoc, 6, textureEnvColourRegs); + glUniform1uiv(ubershaderData.textureEnvScaleLoc, 6, textureEnvScaleRegs); } void RendererGL::bindTexturesToSlots() { @@ -389,11 +388,7 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v OpenGL::Triangle, }; - std::string vs = fragShaderGen.getVertexShader(regs); - std::string fs = fragShaderGen.generate(regs); - std::cout << fs << "\n\n\n"; - - OpenGL::Program program = getSpecializedShader(); + OpenGL::Program& program = getSpecializedShader(); const auto primitiveTopology = primTypes[static_cast(primType)]; gl.disableScissor(); @@ -427,17 +422,17 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v // Update depth uniforms if (oldDepthScale != depthScale) { oldDepthScale = depthScale; - glUniform1f(depthScaleLoc, depthScale); + glUniform1f(ubershaderData.depthScaleLoc, depthScale); } if (oldDepthOffset != depthOffset) { oldDepthOffset = depthOffset; - glUniform1f(depthOffsetLoc, depthOffset); + glUniform1f(ubershaderData.depthOffsetLoc, depthOffset); } if (oldDepthmapEnable != depthMapEnable) { oldDepthmapEnable = depthMapEnable; - glUniform1i(depthmapEnableLoc, depthMapEnable); + glUniform1i(ubershaderData.depthmapEnableLoc, depthMapEnable); } setupTextureEnvState(); @@ -445,7 +440,7 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v // Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48) // The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates - glUniform1uiv(picaRegLoc, 0x200 - 0x48, ®s[0x48]); + glUniform1uiv(ubershaderData.picaRegLoc, 0x200 - 0x48, ®s[0x48]); if (gpu.lightingLUTDirty) { updateLightingLUT(); @@ -784,22 +779,39 @@ std::optional RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt return colourBufferCache.add(sampleBuffer); } -OpenGL::Program RendererGL::getSpecializedShader() { - OpenGL::Program program; +OpenGL::Program& RendererGL::getSpecializedShader() { + PICA::FragmentConfig fsConfig; + fsConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg]; + fsConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer]; + fsConfig.texEnvBufferColor = regs[InternalRegs::TexEnvBufferColor]; - std::string vs = fragShaderGen.getVertexShader(regs); - std::string fs = fragShaderGen.generate(regs); + // Set up TEV stages + std::memcpy(&fsConfig.tevConfigs[0 * 5], ®s[InternalRegs::TexEnv0Source], 5 * sizeof(u32)); + std::memcpy(&fsConfig.tevConfigs[1 * 5], ®s[InternalRegs::TexEnv1Source], 5 * sizeof(u32)); + std::memcpy(&fsConfig.tevConfigs[2 * 5], ®s[InternalRegs::TexEnv2Source], 5 * sizeof(u32)); + std::memcpy(&fsConfig.tevConfigs[3 * 5], ®s[InternalRegs::TexEnv3Source], 5 * sizeof(u32)); + std::memcpy(&fsConfig.tevConfigs[4 * 5], ®s[InternalRegs::TexEnv4Source], 5 * sizeof(u32)); + std::memcpy(&fsConfig.tevConfigs[5 * 5], ®s[InternalRegs::TexEnv5Source], 5 * sizeof(u32)); - OpenGL::Shader vertShader({vs.c_str(), vs.size()}, OpenGL::Vertex); - OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment); - program.create({vertShader, fragShader}); - program.use(); + OpenGL::Program& program = shaderCache[fsConfig]; + if (!program.exists()) { + printf("Creating specialized shader\n"); - // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 - glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0); - glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1); - glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2); - glUniform1i(OpenGL::uniformLocation(program, "u_tex_lighting_lut"), 3); + std::string vs = fragShaderGen.getVertexShader(regs); + std::string fs = fragShaderGen.generate(regs); + std::cout << vs << "\n\n" << fs << "\n"; + + OpenGL::Shader vertShader({vs.c_str(), vs.size()}, OpenGL::Vertex); + OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment); + program.create({vertShader, fragShader}); + program.use(); + + // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 + glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0); + glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1); + glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2); + glUniform1i(OpenGL::uniformLocation(program, "u_tex_lighting_lut"), 3); + } return program; } From fc83d518e23796e8008cef7bbfa047dd14608ff1 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sat, 2 Mar 2024 22:35:56 +0200 Subject: [PATCH 011/251] Hook up specialized shaders to GL renderer --- src/core/renderer_gl/renderer_gl.cpp | 43 +++++++++++++++++----------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index a0e09bba..4aa65586 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -291,6 +291,9 @@ void RendererGL::setupStencilTest(bool stencilEnable) { void RendererGL::setupTextureEnvState() { // TODO: Only update uniforms when the TEV config changed. Use an UBO potentially. + if (!usingUbershader) { + return; + } static constexpr std::array ioBases = { PICA::InternalRegs::TexEnv0Source, PICA::InternalRegs::TexEnv1Source, PICA::InternalRegs::TexEnv2Source, @@ -388,13 +391,17 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v OpenGL::Triangle, }; - OpenGL::Program& program = getSpecializedShader(); + if (usingUbershader) { + gl.useProgram(triangleProgram); + } else { + OpenGL::Program& program = getSpecializedShader(); + gl.useProgram(program); + } const auto primitiveTopology = primTypes[static_cast(primType)]; gl.disableScissor(); gl.bindVBO(vbo); gl.bindVAO(vao); - gl.useProgram(triangleProgram); gl.enableClipPlane(0); // Clipping plane 0 is always enabled if (regs[PICA::InternalRegs::ClipEnable] & 1) { @@ -420,27 +427,31 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v const bool depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1; // Update depth uniforms - if (oldDepthScale != depthScale) { - oldDepthScale = depthScale; - glUniform1f(ubershaderData.depthScaleLoc, depthScale); - } + if (usingUbershader) { + if (oldDepthScale != depthScale) { + oldDepthScale = depthScale; + glUniform1f(ubershaderData.depthScaleLoc, depthScale); + } - if (oldDepthOffset != depthOffset) { - oldDepthOffset = depthOffset; - glUniform1f(ubershaderData.depthOffsetLoc, depthOffset); - } + if (oldDepthOffset != depthOffset) { + oldDepthOffset = depthOffset; + glUniform1f(ubershaderData.depthOffsetLoc, depthOffset); + } - if (oldDepthmapEnable != depthMapEnable) { - oldDepthmapEnable = depthMapEnable; - glUniform1i(ubershaderData.depthmapEnableLoc, depthMapEnable); + if (oldDepthmapEnable != depthMapEnable) { + oldDepthmapEnable = depthMapEnable; + glUniform1i(ubershaderData.depthmapEnableLoc, depthMapEnable); + } } setupTextureEnvState(); bindTexturesToSlots(); - // Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48) - // The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates - glUniform1uiv(ubershaderData.picaRegLoc, 0x200 - 0x48, ®s[0x48]); + if (usingUbershader) { + // Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48) + // The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates + glUniform1uiv(ubershaderData.picaRegLoc, 0x200 - 0x48, ®s[0x48]); + } if (gpu.lightingLUTDirty) { updateLightingLUT(); From e5c09a092d76fb7339bd4202ba67630e18c0d775 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sat, 2 Mar 2024 23:29:22 +0200 Subject: [PATCH 012/251] Fix specialized shaders on Android --- include/PICA/shader_gen.hpp | 5 +++++ src/core/renderer_gl/renderer_gl.cpp | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/include/PICA/shader_gen.hpp b/include/PICA/shader_gen.hpp index e07575a5..23a87120 100644 --- a/include/PICA/shader_gen.hpp +++ b/include/PICA/shader_gen.hpp @@ -30,5 +30,10 @@ namespace PICA::ShaderGen { FragmentGenerator(API api, Language language) : api(api), language(language) {} std::string generate(const PICARegs& regs); std::string getVertexShader(const PICARegs& regs); + + void setTarget(API api, Language language) { + this->api = api; + this->language = language; + } }; }; // namespace PICA::ShaderGen \ No newline at end of file diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 4aa65586..8119cff5 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -44,6 +44,10 @@ void RendererGL::reset() { gl.useProgram(oldProgram); // Switch to old GL program } + +#ifdef __ANDROID__ + fragShaderGen.setTarget(PICA::ShaderGen::API::GLES, PICA::ShaderGen::Language::GLSL); +#endif } void RendererGL::initGraphicsContextInternal() { From 4b07ebed863fa1f23d060f8cbb3b8bdc41683854 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 3 Mar 2024 01:51:45 +0200 Subject: [PATCH 013/251] Fix shader cache bypassing GL state manager --- src/core/renderer_gl/renderer_gl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 8119cff5..5d3ed1b1 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -819,7 +819,7 @@ OpenGL::Program& RendererGL::getSpecializedShader() { OpenGL::Shader vertShader({vs.c_str(), vs.size()}, OpenGL::Vertex); OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment); program.create({vertShader, fragShader}); - program.use(); + gl.useProgram(program); // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0); From 5ba773a393a599897af821d8b1ef1fb4cfa85318 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 3 Mar 2024 02:43:41 +0200 Subject: [PATCH 014/251] Add GLES detection to fragment shader recompiler --- src/core/PICA/shader_gen_glsl.cpp | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 1bcae30c..c3056815 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -11,6 +11,15 @@ std::string FragmentGenerator::getVertexShader(const PICARegs& regs) { default: break; } + if (api == API::GLES) { + ret += R"( + #define USING_GLES 1 + + precision mediump int; + precision mediump float; + )"; + } + ret += R"( layout(location = 0) in vec4 a_coords; layout(location = 1) in vec4 a_quaternion; @@ -75,6 +84,14 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { } bool unimplementedFlag = false; + if (api == API::GLES) { + ret += R"( + #define USING_GLES 1 + + precision mediump int; + precision mediump float; + )"; + } // Input and output attributes ret += R"( @@ -93,17 +110,13 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { uniform sampler2D u_tex0; uniform sampler2D u_tex1; uniform sampler2D u_tex2; + // GLES doesn't support sampler1DArray, as such we'll have to change how we handle lighting later +#ifndef USING_GLES uniform sampler1DArray u_tex_lighting_lut; +#endif vec4 tevSources[16]; vec4 tevNextPreviousBuffer; - - vec3 regToColor(uint reg) { - // Normalization scale to convert from [0...255] to [0.0...1.0] - const float scale = 1.0 / 255.0; - - return scale * vec3(float(bitfieldExtract(reg, 20, 8)), float(bitfieldExtract(reg, 10, 8)), float(bitfieldExtract(reg, 00, 8))); - } )"; // Emit main function for fragment shader @@ -339,4 +352,4 @@ void FragmentGenerator::getAlphaOperation(std::string& shader, TexEnvConfig::Ope shader += "1.0"; break; } -} \ No newline at end of file +} From 2fc9c0a5737262fd78ebc29cb054ba98d1c35c6f Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 28 Apr 2024 16:58:42 +0300 Subject: [PATCH 015/251] DSP HLE: Broken PCM16 and handle DSP voice status better --- include/audio/dsp_shared_mem.hpp | 2 +- include/audio/hle_core.hpp | 25 ++++-- src/core/audio/hle_core.cpp | 136 +++++++++++++++++++++++++++---- 3 files changed, 138 insertions(+), 25 deletions(-) diff --git a/include/audio/dsp_shared_mem.hpp b/include/audio/dsp_shared_mem.hpp index 148986f9..25806ea1 100644 --- a/include/audio/dsp_shared_mem.hpp +++ b/include/audio/dsp_shared_mem.hpp @@ -297,7 +297,7 @@ namespace Audio::HLE { u8 isEnabled; ///< Is this channel enabled? (Doesn't have to be playing anything.) u8 currentBufferIDDirty; ///< Non-zero when current_buffer_id changes u16_le syncCount; ///< Is set by the DSP to the value of SourceConfiguration::sync_count - u32_dsp bufferPosition; ///< Number of samples into the current buffer + u32_dsp samplePosition; ///< Number of samples into the current buffer u16_le currentBufferID; ///< Updated when a buffer finishes playing u16_le lastBufferID; ///< Updated when all buffers in the queue finish playing }; diff --git a/include/audio/hle_core.hpp b/include/audio/hle_core.hpp index 257ab5ac..cee2b0c8 100644 --- a/include/audio/hle_core.hpp +++ b/include/audio/hle_core.hpp @@ -32,8 +32,8 @@ namespace Audio { SampleFormat format; SourceType sourceType; - bool fromQueue = false; // Is this buffer from the buffer queue or an embedded buffer? - bool hasPlayedOnce = false; // Has the buffer been played at least once before? + bool fromQueue = false; // Is this buffer from the buffer queue or an embedded buffer? + bool hasPlayedOnce = false; // Has the buffer been played at least once before? bool operator<(const Buffer& other) const { // Lower ID = Higher priority @@ -47,9 +47,17 @@ namespace Audio { using BufferQueue = std::priority_queue; BufferQueue buffers; + SampleFormat sampleFormat = SampleFormat::ADPCM; + SourceType sourceType = SourceType::Stereo; + std::array gain0, gain1, gain2; + u32 samplePosition; // Sample number into the current audio buffer u16 syncCount; - bool enabled; // Is the source enabled? + u16 currentBufferID; + u16 previousBufferID; + + bool enabled; // Is the source enabled? + bool isBufferIDDirty = false; // Did we change buffers? // ADPCM decoding info: // An array of fixed point S5.11 coefficients. These provide "weights" for the history samples @@ -65,6 +73,10 @@ namespace Audio { int index = 0; // Index of the voice in [0, 23] for debugging void reset(); + + // Push a buffer to the buffer queue + void pushBuffer(const Buffer& buffer) { buffers.push(buffer); } + // Pop a buffer from the buffer queue and return it Buffer popBuffer() { assert(!buffers.empty()); @@ -114,9 +126,6 @@ namespace Audio { std::array sources; // DSP voices Audio::HLE::DspMemory dspRam; - SampleFormat sampleFormat = SampleFormat::ADPCM; - SourceType sourceType = SourceType::Stereo; - void resetAudioPipe(); bool loaded = false; // Have we loaded a component? @@ -159,9 +168,13 @@ namespace Audio { void updateSourceConfig(Source& source, HLE::SourceConfiguration::Configuration& config, s16_le* adpcmCoefficients); void generateFrame(StereoFrame& frame); + void generateFrame(DSPSource& source); void outputFrame(); + // Decode an entire buffer worth of audio void decodeBuffer(DSPSource& source); + + SampleBuffer decodePCM16(const u8* data, usize sampleCount, Source& source); SampleBuffer decodeADPCM(const u8* data, usize sampleCount, Source& source); public: diff --git a/src/core/audio/hle_core.cpp b/src/core/audio/hle_core.cpp index 4ee5a1dc..e92432b5 100644 --- a/src/core/audio/hle_core.cpp +++ b/src/core/audio/hle_core.cpp @@ -7,6 +7,8 @@ #include "services/dsp.hpp" +std::vector> samplezorz = {}; + namespace Audio { namespace DSPPipeType { enum : u32 { @@ -64,10 +66,6 @@ namespace Audio { dspState = DSPState::Off; loaded = false; - // Initialize these to some sane defaults - sampleFormat = SampleFormat::ADPCM; - sourceType = SourceType::Stereo; - for (auto& e : pipeData) { e.clear(); } @@ -212,12 +210,16 @@ namespace Audio { updateSourceConfig(source, config, read.adpcmCoefficients.coeff[i]); // Generate audio - if (source.enabled && !source.buffers.empty()) { - const auto& buffer = source.buffers.top(); - const u8* data = getPointerPhys(buffer.paddr); + if (source.enabled) { + generateFrame(source); - if (data != nullptr) { - // TODO + if (samplezorz.size() > 160 * 60 * 60 * 3) { + using namespace std; + ofstream fout("audio_data.bin", ios::out | ios::binary); + fout.write((char*)&samplezorz[0], samplezorz.size() * sizeof(Sample)); + fout.close(); + + Helpers::panic("Bwaa"); } } @@ -225,6 +227,13 @@ namespace Audio { auto& status = write.sourceStatuses.status[i]; status.isEnabled = source.enabled; status.syncCount = source.syncCount; + status.currentBufferIDDirty = source.isBufferIDDirty ? 1 : 0; + status.currentBufferID = source.currentBufferID; + status.lastBufferID = source.previousBufferID; + // TODO: Properly update sample position + status.samplePosition = source.samplePosition; + + source.isBufferIDDirty = false; } } @@ -265,11 +274,11 @@ namespace Audio { // TODO: Should we check bufferQueueDirty here too? if (config.formatDirty || config.embeddedBufferDirty) { - sampleFormat = config.format; + source.sampleFormat = config.format; } if (config.monoOrStereoDirty || config.embeddedBufferDirty) { - sourceType = config.monoOrStereo; + source.sourceType = config.monoOrStereo; } if (config.embeddedBufferDirty) { @@ -285,8 +294,8 @@ namespace Audio { .looping = config.isLooping != 0, .bufferID = config.bufferID, .playPosition = config.playPosition, - .format = sampleFormat, - .sourceType = sourceType, + .format = source.sampleFormat, + .sourceType = source.sourceType, .fromQueue = false, .hasPlayedOnce = false, }; @@ -327,13 +336,95 @@ namespace Audio { return; } - switch (buffer.format) { - case SampleFormat::PCM8: - case SampleFormat::PCM16: Helpers::warn("Unimplemented sample format!"); break; + source.currentBufferID = buffer.bufferID; + source.previousBufferID = 0; + // For looping buffers, this is only set for the first time we play it. Loops do not set the dirty bit. + source.isBufferIDDirty = !buffer.hasPlayedOnce && buffer.fromQueue; - case SampleFormat::ADPCM: source.currentSamples = decodeADPCM(data, buffer.sampleCount, source); break; - default: Helpers::warn("Invalid DSP sample format"); break; + if (buffer.hasPlayedOnce) { + source.samplePosition = 0; + } else { + // Mark that the buffer has already been played once, needed for looping buffers + buffer.hasPlayedOnce = true; + // Play position is only used for the initial time the buffer is played. Loops will start from the beginning of the buffer. + source.samplePosition = buffer.playPosition; } + + switch (buffer.format) { + case SampleFormat::PCM8: Helpers::warn("Unimplemented sample format!"); break; + case SampleFormat::PCM16: source.currentSamples = decodePCM16(data, buffer.sampleCount, source); break; + case SampleFormat::ADPCM: source.currentSamples = decodeADPCM(data, buffer.sampleCount, source); break; + + default: + Helpers::warn("Invalid DSP sample format"); + source.currentSamples = {}; + break; + } + + // If the buffer is a looping buffer, re-push it + if (buffer.looping) { + source.pushBuffer(buffer); + } + } + + void HLE_DSP::generateFrame(DSPSource& source) { + if (source.currentSamples.empty()) { + // There's no audio left to play, turn the voice off + if (source.buffers.empty()) { + source.enabled = false; + source.isBufferIDDirty = true; + source.previousBufferID = source.currentBufferID; + source.currentBufferID = 0; + + return; + } + + decodeBuffer(source); + } else { + constexpr uint maxSampleCount = Audio::samplesInFrame; + uint outputCount = 0; + + while (outputCount < maxSampleCount) { + if (source.currentSamples.empty()) { + if (source.buffers.empty()) { + break; + } else { + decodeBuffer(source); + } + } + + const uint sampleCount = std::min(maxSampleCount - outputCount, source.currentSamples.size()); + samplezorz.insert(samplezorz.end(), source.currentSamples.begin(), source.currentSamples.begin() + sampleCount); + source.currentSamples.erase(source.currentSamples.begin(), source.currentSamples.begin() + sampleCount); + + outputCount += sampleCount; + } + } + } + + HLE_DSP::SampleBuffer HLE_DSP::decodePCM16(const u8* data, usize sampleCount, Source& source) { + SampleBuffer decodedSamples(sampleCount); + const s16* data16 = reinterpret_cast(data); + + if (source.sourceType == SourceType::Stereo) { + for (usize i = 0; i < sampleCount; i++) { + s16 left = *data16++; + s16 right = *data16++; + + if (left != 0 || right != 0) { + Helpers::panic("panda..."); + } + + decodedSamples[i] = {left, right}; + } + } else { + // Mono + for (usize i = 0; i < sampleCount; i++) { + decodedSamples[i].fill(*data16++); + } + } + + return decodedSamples; } HLE_DSP::SampleBuffer HLE_DSP::decodeADPCM(const u8* data, usize sampleCount, Source& source) { @@ -413,6 +504,15 @@ namespace Audio { void DSPSource::reset() { enabled = false; + isBufferIDDirty = false; + + // Initialize these to some sane defaults + sampleFormat = SampleFormat::ADPCM; + sourceType = SourceType::Stereo; + + samplePosition = 0; + previousBufferID = 0; + currentBufferID = 0; syncCount = 0; buffers = {}; From fb8130a868897627f2d22ce270a454d434d67e8a Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 30 Apr 2024 21:56:39 +0300 Subject: [PATCH 016/251] HLE DSP: Remove debug artifacts --- src/core/audio/hle_core.cpp | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/src/core/audio/hle_core.cpp b/src/core/audio/hle_core.cpp index e92432b5..d6ba21ec 100644 --- a/src/core/audio/hle_core.cpp +++ b/src/core/audio/hle_core.cpp @@ -7,8 +7,6 @@ #include "services/dsp.hpp" -std::vector> samplezorz = {}; - namespace Audio { namespace DSPPipeType { enum : u32 { @@ -102,6 +100,7 @@ namespace Audio { dspService.triggerPipeEvent(DSPPipeType::Audio); } + // TODO: Should this be called if dspState != DSPState::On? outputFrame(); scheduler.addEvent(Scheduler::EventType::RunDSP, scheduler.currentTimestamp + Audio::cyclesPerFrame); } @@ -212,15 +211,6 @@ namespace Audio { // Generate audio if (source.enabled) { generateFrame(source); - - if (samplezorz.size() > 160 * 60 * 60 * 3) { - using namespace std; - ofstream fout("audio_data.bin", ios::out | ios::binary); - fout.write((char*)&samplezorz[0], samplezorz.size() * sizeof(Sample)); - fout.close(); - - Helpers::panic("Bwaa"); - } } // Update write region of shared memory @@ -394,7 +384,7 @@ namespace Audio { } const uint sampleCount = std::min(maxSampleCount - outputCount, source.currentSamples.size()); - samplezorz.insert(samplezorz.end(), source.currentSamples.begin(), source.currentSamples.begin() + sampleCount); + // samples.insert(samples.end(), source.currentSamples.begin(), source.currentSamples.begin() + sampleCount); source.currentSamples.erase(source.currentSamples.begin(), source.currentSamples.begin() + sampleCount); outputCount += sampleCount; @@ -408,19 +398,15 @@ namespace Audio { if (source.sourceType == SourceType::Stereo) { for (usize i = 0; i < sampleCount; i++) { - s16 left = *data16++; - s16 right = *data16++; - - if (left != 0 || right != 0) { - Helpers::panic("panda..."); - } - + const s16 left = *data16++; + const s16 right = *data16++; decodedSamples[i] = {left, right}; } } else { // Mono for (usize i = 0; i < sampleCount; i++) { - decodedSamples[i].fill(*data16++); + const s16 sample = *data16++; + decodedSamples[i] = {sample, sample}; } } From 0490c6753fb4c56fc4b9835b504618e6d8101f18 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 1 May 2024 01:56:17 +0300 Subject: [PATCH 017/251] HLE DSP: Stub AAC --- CMakeLists.txt | 2 +- include/audio/aac.hpp | 71 +++++++++++++++++++++++++++++++++++++ include/audio/hle_core.hpp | 2 ++ src/core/audio/hle_core.cpp | 47 ++++++++++++++++++++++-- 4 files changed, 119 insertions(+), 3 deletions(-) create mode 100644 include/audio/aac.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 6d3901b6..48a2a0db 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -241,7 +241,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/PICA/dynapica/shader_rec_emitter_arm64.hpp include/scheduler.hpp include/applets/error_applet.hpp include/audio/dsp_core.hpp include/audio/null_core.hpp include/audio/teakra_core.hpp include/audio/miniaudio_device.hpp include/ring_buffer.hpp include/bitfield.hpp include/audio/dsp_shared_mem.hpp - include/audio/hle_core.hpp include/capstone.hpp + include/audio/hle_core.hpp include/capstone.hpp include/audio/aac.hpp ) cmrc_add_resource_library( diff --git a/include/audio/aac.hpp b/include/audio/aac.hpp new file mode 100644 index 00000000..c780e6d2 --- /dev/null +++ b/include/audio/aac.hpp @@ -0,0 +1,71 @@ +#pragma once +#include +#include + +#include "helpers.hpp" +#include "swap.hpp" + +namespace Audio::AAC { + namespace ResultCode { + enum : u32 { + Success = 0, + }; + } + + // Enum values from Citra and struct definitions based off Citra + namespace Command { + enum : u16 { + Init = 0, // Initialize encoder/decoder + EncodeDecode = 1, // Encode/Decode AAC + Shutdown = 2, // Shutdown encoder/decoder + LoadState = 3, + SaveState = 4, + }; + } + + namespace SampleRate { + enum : u32 { + Rate48000 = 0, + Rate44100 = 1, + Rate32000 = 2, + Rate24000 = 3, + Rate22050 = 4, + Rate16000 = 5, + Rate12000 = 6, + Rate11025 = 7, + Rate8000 = 8, + }; + } + + namespace Mode { + enum : u16 { + None = 0, + Decode = 1, + Encode = 2, + }; + } + + struct DecodeResponse { + u32_le sampleRate = SampleRate::Rate48000; + u32_le channelCount = 0; + u32_le size = 0; + u32_le unknown1 = 0; + u32_le unknown2 = 0; + u32_le sampleCount = 0; + }; + + struct Message { + u16_le mode = Mode::None; // Encode or decode AAC? + u16_le command = Command::Init; + u32_le resultCode = ResultCode::Success; + + // Info on the AAC request + union { + std::array commandData = {}; + DecodeResponse decodeResponse; + }; + }; + + static_assert(sizeof(Message) == 32); + static_assert(std::is_trivially_copyable()); +} // namespace Audio::AAC \ No newline at end of file diff --git a/include/audio/hle_core.hpp b/include/audio/hle_core.hpp index cee2b0c8..c57f221e 100644 --- a/include/audio/hle_core.hpp +++ b/include/audio/hle_core.hpp @@ -5,6 +5,7 @@ #include #include +#include "audio/aac.hpp" #include "audio/dsp_core.hpp" #include "audio/dsp_shared_mem.hpp" #include "memory.hpp" @@ -166,6 +167,7 @@ namespace Audio { } } + void handleAACRequest(const AAC::Message& request); void updateSourceConfig(Source& source, HLE::SourceConfiguration::Configuration& config, s16_le* adpcmCoefficients); void generateFrame(StereoFrame& frame); void generateFrame(DSPSource& source); diff --git a/src/core/audio/hle_core.cpp b/src/core/audio/hle_core.cpp index d6ba21ec..98d07ce6 100644 --- a/src/core/audio/hle_core.cpp +++ b/src/core/audio/hle_core.cpp @@ -149,12 +149,24 @@ namespace Audio { break; } - case DSPPipeType::Binary: - Helpers::warn("Unimplemented write to binary pipe! Size: %d\n", size); + case DSPPipeType::Binary: { + log("Unimplemented write to binary pipe! Size: %d\n", size); + + AAC::Message request; + if (size == sizeof(request)) { + std::array raw; + for (uint i = 0; i < size; i++) { + raw[i] = mem.read32(buffer + i); + } + + std::memcpy(&request, raw.data(), sizeof(request)); + handleAACRequest(request); + } // This pipe and interrupt are normally used for requests like AAC decode dspService.triggerPipeEvent(DSPPipeType::Binary); break; + } default: log("Audio::HLE_DSP: Wrote to unimplemented pipe %d\n", channel); break; } @@ -488,6 +500,37 @@ namespace Audio { return decodedSamples; } + void HLE_DSP::handleAACRequest(const AAC::Message& request) { + AAC::Message response = {}; + + switch (request.command) { + case AAC::Command::EncodeDecode: + // Dummy response to stop games from hanging + // TODO: Fix this when implementing AAC + response.resultCode = AAC::ResultCode::Success; + response.decodeResponse.channelCount = 2; + response.decodeResponse.sampleCount = 1024; + response.decodeResponse.size = 0; + response.decodeResponse.sampleRate = AAC::SampleRate::Rate48000; + break; + + case AAC::Command::Init: + case AAC::Command::Shutdown: + case AAC::Command::LoadState: + case AAC::Command::SaveState: + response = request; + response.resultCode = AAC::ResultCode::Success; + break; + + default: Helpers::warn("Unknown AAC command type"); break; + } + + // Copy response data to the binary pipe + auto& pipe = pipeData[DSPPipeType::Binary]; + pipe.resize(sizeof(response)); + std::memcpy(&pipe[0], &response, sizeof(response)); + } + void DSPSource::reset() { enabled = false; isBufferIDDirty = false; From ad380b8c5ae9bb8e9a2ce5bdd470a31f04385f20 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 1 May 2024 01:59:32 +0300 Subject: [PATCH 018/251] Warn on invalid AAC request --- src/core/audio/hle_core.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/core/audio/hle_core.cpp b/src/core/audio/hle_core.cpp index 98d07ce6..e38d4821 100644 --- a/src/core/audio/hle_core.cpp +++ b/src/core/audio/hle_core.cpp @@ -161,6 +161,8 @@ namespace Audio { std::memcpy(&request, raw.data(), sizeof(request)); handleAACRequest(request); + } else { + Helpers::warn("Invalid size for AAC request"); } // This pipe and interrupt are normally used for requests like AAC decode From e4b81d61a46816116808d868eb3d39b96313acb4 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 1 May 2024 16:10:51 +0300 Subject: [PATCH 019/251] HLE DSP: Fix AAC response stub --- include/audio/dsp_shared_mem.hpp | 4 ++-- src/core/audio/hle_core.cpp | 9 ++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/include/audio/dsp_shared_mem.hpp b/include/audio/dsp_shared_mem.hpp index 25806ea1..e776211d 100644 --- a/include/audio/dsp_shared_mem.hpp +++ b/include/audio/dsp_shared_mem.hpp @@ -294,12 +294,12 @@ namespace Audio::HLE { struct SourceStatus { struct Status { - u8 isEnabled; ///< Is this channel enabled? (Doesn't have to be playing anything.) + u8 enabled; ///< Is this channel enabled? (Doesn't have to be playing anything.) u8 currentBufferIDDirty; ///< Non-zero when current_buffer_id changes u16_le syncCount; ///< Is set by the DSP to the value of SourceConfiguration::sync_count u32_dsp samplePosition; ///< Number of samples into the current buffer u16_le currentBufferID; ///< Updated when a buffer finishes playing - u16_le lastBufferID; ///< Updated when all buffers in the queue finish playing + u16_le previousBufferID; ///< Updated when all buffers in the queue finish playing }; Status status[sourceCount]; diff --git a/src/core/audio/hle_core.cpp b/src/core/audio/hle_core.cpp index e38d4821..146c7bdf 100644 --- a/src/core/audio/hle_core.cpp +++ b/src/core/audio/hle_core.cpp @@ -229,11 +229,11 @@ namespace Audio { // Update write region of shared memory auto& status = write.sourceStatuses.status[i]; - status.isEnabled = source.enabled; + status.enabled = source.enabled; status.syncCount = source.syncCount; status.currentBufferIDDirty = source.isBufferIDDirty ? 1 : 0; status.currentBufferID = source.currentBufferID; - status.lastBufferID = source.previousBufferID; + status.previousBufferID = source.previousBufferID; // TODO: Properly update sample position status.samplePosition = source.samplePosition; @@ -503,7 +503,7 @@ namespace Audio { } void HLE_DSP::handleAACRequest(const AAC::Message& request) { - AAC::Message response = {}; + AAC::Message response; switch (request.command) { case AAC::Command::EncodeDecode: @@ -514,6 +514,9 @@ namespace Audio { response.decodeResponse.sampleCount = 1024; response.decodeResponse.size = 0; response.decodeResponse.sampleRate = AAC::SampleRate::Rate48000; + + response.command = request.command; + response.mode = request.mode; break; case AAC::Command::Init: From 6a424a7a66bac534800746cd90b2d7d26786bf86 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 1 May 2024 16:20:24 +0300 Subject: [PATCH 020/251] Fix CI --- include/audio/aac.hpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/include/audio/aac.hpp b/include/audio/aac.hpp index c780e6d2..afd2dbba 100644 --- a/include/audio/aac.hpp +++ b/include/audio/aac.hpp @@ -12,7 +12,7 @@ namespace Audio::AAC { }; } - // Enum values from Citra and struct definitions based off Citra + // Enum values and struct definitions based off Citra namespace Command { enum : u16 { Init = 0, // Initialize encoder/decoder @@ -46,12 +46,12 @@ namespace Audio::AAC { } struct DecodeResponse { - u32_le sampleRate = SampleRate::Rate48000; - u32_le channelCount = 0; - u32_le size = 0; - u32_le unknown1 = 0; - u32_le unknown2 = 0; - u32_le sampleCount = 0; + u32_le sampleRate; + u32_le channelCount; + u32_le size; + u32_le unknown1; + u32_le unknown2; + u32_le sampleCount; }; struct Message { @@ -61,11 +61,11 @@ namespace Audio::AAC { // Info on the AAC request union { - std::array commandData = {}; + std::array commandData{}; DecodeResponse decodeResponse; }; }; static_assert(sizeof(Message) == 32); static_assert(std::is_trivially_copyable()); -} // namespace Audio::AAC \ No newline at end of file +} // namespace Audio::AAC From 70f733ffb8437b51cfce6ad386a46d5bddfb6e7f Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Thu, 2 May 2024 00:22:13 +0300 Subject: [PATCH 021/251] GPU: Handle invalid floating point uniform writes --- include/PICA/shader.hpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/PICA/shader.hpp b/include/PICA/shader.hpp index 5b05e0b7..a9216b17 100644 --- a/include/PICA/shader.hpp +++ b/include/PICA/shader.hpp @@ -256,8 +256,10 @@ class PICAShader { void uploadFloatUniform(u32 word) { floatUniformBuffer[floatUniformWordCount++] = word; - if (floatUniformIndex >= 96) { - Helpers::panic("[PICA] Tried to write float uniform %d", floatUniformIndex); + // Check if the program tries to upload to a non-existent uniform, and empty the queue without writing in that case + if (floatUniformIndex >= 96) [[unlikely]] { + floatUniformWordCount = 0; + return; } if ((f32UniformTransfer && floatUniformWordCount >= 4) || (!f32UniformTransfer && floatUniformWordCount >= 3)) { From 81932421cfe08cf0994bfa3a5dccb909e562a4cd Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Thu, 2 May 2024 00:28:13 +0300 Subject: [PATCH 022/251] Optimize float uniform setting --- include/PICA/shader.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/PICA/shader.hpp b/include/PICA/shader.hpp index a9216b17..10f6ec88 100644 --- a/include/PICA/shader.hpp +++ b/include/PICA/shader.hpp @@ -256,16 +256,16 @@ class PICAShader { void uploadFloatUniform(u32 word) { floatUniformBuffer[floatUniformWordCount++] = word; - // Check if the program tries to upload to a non-existent uniform, and empty the queue without writing in that case - if (floatUniformIndex >= 96) [[unlikely]] { - floatUniformWordCount = 0; - return; - } if ((f32UniformTransfer && floatUniformWordCount >= 4) || (!f32UniformTransfer && floatUniformWordCount >= 3)) { - vec4f& uniform = floatUniforms[floatUniformIndex++]; floatUniformWordCount = 0; + // Check if the program tries to upload to a non-existent uniform, and empty the queue without writing in that case + if (floatUniformIndex >= 96) [[unlikely]] { + return; + } + vec4f& uniform = floatUniforms[floatUniformIndex++]; + if (f32UniformTransfer) { uniform[0] = f24::fromFloat32(*(float*)&floatUniformBuffer[3]); uniform[1] = f24::fromFloat32(*(float*)&floatUniformBuffer[2]); From 66bcf384f38d3c86052a45f824f9854359229c76 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 7 May 2024 23:08:24 +0300 Subject: [PATCH 023/251] Qt: Add file patcher --- .gitmodules | 3 + CMakeLists.txt | 5 +- include/panda_qt/ellided_label.hpp | 21 +++++ include/panda_qt/main_window.hpp | 5 +- include/panda_qt/patch_window.hpp | 21 +++++ src/panda_qt/ellided_label.cpp | 25 ++++++ src/panda_qt/main_window.cpp | 4 + src/panda_qt/patch_window.cpp | 123 +++++++++++++++++++++++++++++ third_party/hips | 1 + 9 files changed, 206 insertions(+), 2 deletions(-) create mode 100644 include/panda_qt/ellided_label.hpp create mode 100644 include/panda_qt/patch_window.hpp create mode 100644 src/panda_qt/ellided_label.cpp create mode 100644 src/panda_qt/patch_window.cpp create mode 160000 third_party/hips diff --git a/.gitmodules b/.gitmodules index 1f1d11fc..5a136acb 100644 --- a/.gitmodules +++ b/.gitmodules @@ -70,3 +70,6 @@ [submodule "third_party/capstone"] path = third_party/capstone url = https://github.com/capstone-engine/capstone +[submodule "third_party/hips"] + path = third_party/hips + url = https://github.com/wheremyfoodat/Hips diff --git a/CMakeLists.txt b/CMakeLists.txt index 48a2a0db..dca69d6b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,6 +52,7 @@ include_directories(${PROJECT_SOURCE_DIR}/include/kernel) include_directories(${FMT_INCLUDE_DIR}) include_directories(third_party/boost/) include_directories(third_party/elfio/) +include_directories(third_party/hips/include/) include_directories(third_party/imgui/) include_directories(third_party/dynarmic/src) include_directories(third_party/cryptopp/) @@ -448,9 +449,11 @@ if(NOT BUILD_HYDRA_CORE) set(FRONTEND_SOURCE_FILES src/panda_qt/main.cpp src/panda_qt/screen.cpp src/panda_qt/main_window.cpp src/panda_qt/about_window.cpp src/panda_qt/config_window.cpp src/panda_qt/zep.cpp src/panda_qt/text_editor.cpp src/panda_qt/cheats_window.cpp src/panda_qt/mappings.cpp - ) + src/panda_qt/patch_window.cpp src/panda_qt/ellided_label.cpp + ) set(FRONTEND_HEADER_FILES include/panda_qt/screen.hpp include/panda_qt/main_window.hpp include/panda_qt/about_window.hpp include/panda_qt/config_window.hpp include/panda_qt/text_editor.hpp include/panda_qt/cheats_window.hpp + include/panda_qt/patch_window.hpp include/panda_qt/ellided_label.hpp ) source_group("Source Files\\Qt" FILES ${FRONTEND_SOURCE_FILES}) diff --git a/include/panda_qt/ellided_label.hpp b/include/panda_qt/ellided_label.hpp new file mode 100644 index 00000000..19fd8c74 --- /dev/null +++ b/include/panda_qt/ellided_label.hpp @@ -0,0 +1,21 @@ +#pragma once +#include +#include +#include +#include + +class EllidedLabel : public QLabel { + Q_OBJECT + public: + explicit EllidedLabel(Qt::TextElideMode elideMode = Qt::ElideLeft, QWidget* parent = nullptr); + explicit EllidedLabel(QString text, Qt::TextElideMode elideMode = Qt::ElideLeft, QWidget* parent = nullptr); + void setText(QString text); + + protected: + void resizeEvent(QResizeEvent* event); + + private: + void updateText(); + QString m_text; + Qt::TextElideMode m_elideMode; +}; \ No newline at end of file diff --git a/include/panda_qt/main_window.hpp b/include/panda_qt/main_window.hpp index 7e93bdf6..72725257 100644 --- a/include/panda_qt/main_window.hpp +++ b/include/panda_qt/main_window.hpp @@ -17,6 +17,7 @@ #include "panda_qt/about_window.hpp" #include "panda_qt/cheats_window.hpp" #include "panda_qt/config_window.hpp" +#include "panda_qt/patch_window.hpp" #include "panda_qt/screen.hpp" #include "panda_qt/text_editor.hpp" #include "services/hid.hpp" @@ -90,13 +91,14 @@ class MainWindow : public QMainWindow { std::mutex messageQueueMutex; std::vector messageQueue; + QMenuBar* menuBar = nullptr; InputMappings keyboardMappings; ScreenWidget screen; AboutWindow* aboutWindow; ConfigWindow* configWindow; CheatsWindow* cheatsEditor; TextEditorWindow* luaEditor; - QMenuBar* menuBar = nullptr; + PatchWindow* patchWindow; // We use SDL's game controller API since it's the sanest API that supports as many controllers as possible SDL_GameController* gameController = nullptr; @@ -110,6 +112,7 @@ class MainWindow : public QMainWindow { void dumpRomFS(); void openLuaEditor(); void openCheatsEditor(); + void openPatchWindow(); void showAboutMenu(); void initControllers(); void pollControllers(); diff --git a/include/panda_qt/patch_window.hpp b/include/panda_qt/patch_window.hpp new file mode 100644 index 00000000..652c9a23 --- /dev/null +++ b/include/panda_qt/patch_window.hpp @@ -0,0 +1,21 @@ +#pragma once +#include +#include +#include + +#include "panda_qt/ellided_label.hpp" + +class PatchWindow final : public QWidget { + Q_OBJECT + + public: + PatchWindow(QWidget* parent = nullptr); + ~PatchWindow() = default; + + private: + std::filesystem::path inputPath = ""; + std::filesystem::path patchPath = ""; + + EllidedLabel* inputPathLabel = nullptr; + EllidedLabel* patchPathLabel = nullptr; +}; diff --git a/src/panda_qt/ellided_label.cpp b/src/panda_qt/ellided_label.cpp new file mode 100644 index 00000000..68c0da76 --- /dev/null +++ b/src/panda_qt/ellided_label.cpp @@ -0,0 +1,25 @@ +#include "panda_qt/ellided_label.hpp" + +// Based on https://stackoverflow.com/questions/7381100/text-overflow-for-a-qlabel-s-text-rendering-in-qt +EllidedLabel::EllidedLabel(Qt::TextElideMode elideMode, QWidget* parent) : EllidedLabel("", elideMode, parent) {} + +EllidedLabel::EllidedLabel(QString text, Qt::TextElideMode elideMode, QWidget* parent) : QLabel(parent) { + m_elideMode = elideMode; + setText(text); +} + +void EllidedLabel::setText(QString text) { + m_text = text; + updateText(); +} + +void EllidedLabel::resizeEvent(QResizeEvent* event) { + QLabel::resizeEvent(event); + updateText(); +} + +void EllidedLabel::updateText() { + QFontMetrics metrics(font()); + QString elided = metrics.elidedText(m_text, m_elideMode, width()); + QLabel::setText(elided); +} \ No newline at end of file diff --git a/src/panda_qt/main_window.cpp b/src/panda_qt/main_window.cpp index da9d2706..54e4fabe 100644 --- a/src/panda_qt/main_window.cpp +++ b/src/panda_qt/main_window.cpp @@ -54,11 +54,13 @@ MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent) auto dumpRomFSAction = toolsMenu->addAction(tr("Dump RomFS")); auto luaEditorAction = toolsMenu->addAction(tr("Open Lua Editor")); auto cheatsEditorAction = toolsMenu->addAction(tr("Open Cheats Editor")); + auto patchWindowAction = toolsMenu->addAction(tr("Open Patch Window")); auto dumpDspFirmware = toolsMenu->addAction(tr("Dump loaded DSP firmware")); connect(dumpRomFSAction, &QAction::triggered, this, &MainWindow::dumpRomFS); connect(luaEditorAction, &QAction::triggered, this, &MainWindow::openLuaEditor); connect(cheatsEditorAction, &QAction::triggered, this, &MainWindow::openCheatsEditor); + connect(patchWindowAction, &QAction::triggered, this, &MainWindow::openPatchWindow); connect(dumpDspFirmware, &QAction::triggered, this, &MainWindow::dumpDspFirmware); auto aboutAction = aboutMenu->addAction(tr("About Panda3DS")); @@ -71,6 +73,7 @@ MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent) aboutWindow = new AboutWindow(nullptr); configWindow = new ConfigWindow(this); cheatsEditor = new CheatsWindow(emu, {}, this); + patchWindow = new PatchWindow(this); luaEditor = new TextEditorWindow(this, "script.lua", ""); auto args = QCoreApplication::arguments(); @@ -293,6 +296,7 @@ void MainWindow::showAboutMenu() { void MainWindow::openLuaEditor() { luaEditor->show(); } void MainWindow::openCheatsEditor() { cheatsEditor->show(); } +void MainWindow::openPatchWindow() { patchWindow->show(); } void MainWindow::dispatchMessage(const EmulatorMessage& message) { switch (message.type) { diff --git a/src/panda_qt/patch_window.cpp b/src/panda_qt/patch_window.cpp new file mode 100644 index 00000000..98983865 --- /dev/null +++ b/src/panda_qt/patch_window.cpp @@ -0,0 +1,123 @@ +#include "panda_qt/patch_window.hpp" + +#include +#include +#include +#include +#include + +#include "hips.hpp" +#include "io_file.hpp" + +PatchWindow::PatchWindow(QWidget* parent) : QWidget(parent, Qt::Window) { + QVBoxLayout* layout = new QVBoxLayout; + layout->setContentsMargins(6, 6, 6, 6); + setLayout(layout); + + QWidget* inputBox = new QWidget; + QHBoxLayout* inputLayout = new QHBoxLayout; + QLabel* inputText = new QLabel(tr("Select input file")); + QPushButton* inputButton = new QPushButton(tr("Select")); + inputPathLabel = new EllidedLabel(""); + inputPathLabel->setFixedWidth(200); + + inputLayout->addWidget(inputText); + inputLayout->addWidget(inputButton); + inputLayout->addWidget(inputPathLabel); + inputBox->setLayout(inputLayout); + + QWidget* patchBox = new QWidget; + QHBoxLayout* patchLayout = new QHBoxLayout; + QLabel* patchText = new QLabel(tr("Select patch file")); + QPushButton* patchButton = new QPushButton(tr("Select")); + patchPathLabel = new EllidedLabel(""); + patchPathLabel->setFixedWidth(200); + + patchLayout->addWidget(patchText); + patchLayout->addWidget(patchButton); + patchLayout->addWidget(patchPathLabel); + patchBox->setLayout(patchLayout); + + QWidget* actionBox = new QWidget; + QHBoxLayout* actionLayout = new QHBoxLayout; + QPushButton* applyPatchButton = new QPushButton(tr("Apply patch")); + actionLayout->addWidget(applyPatchButton); + actionBox->setLayout(actionLayout); + + layout->addWidget(inputBox); + layout->addWidget(patchBox); + layout->addWidget(actionBox); + + connect(inputButton, &QPushButton::clicked, this, [this]() { + auto path = QFileDialog::getOpenFileName(this, tr("Select file to patch"), "", tr("All files (*.*)")); + inputPath = std::filesystem::path(path.toStdU16String()); + + inputPathLabel->setText(path); + }); + + connect(patchButton, &QPushButton::clicked, this, [this]() { + auto path = QFileDialog::getOpenFileName(this, tr("Select patch file"), "", tr("Patch files (*.ips *.ups *.bps)")); + patchPath = std::filesystem::path(path.toStdU16String()); + + patchPathLabel->setText(path); + }); + + connect(applyPatchButton, &QPushButton::clicked, this, [this]() { + if (inputPath.empty() || patchPath.empty()) { + printf("Pls set paths properly"); + return; + } + + auto path = QFileDialog::getSaveFileName(this, tr("Select file"), QString::fromStdU16String(inputPath.u16string()), tr("All files (*.*)")); + std::filesystem::path outputPath = std::filesystem::path(path.toStdU16String()); + + if (outputPath.empty()) { + printf("Pls set paths properly"); + return; + } + + Hips::PatchType patchType; + auto extension = patchPath.extension(); + + // Figure out what sort of patch we're dealing with + if (extension == ".ips") { + patchType = Hips::PatchType::IPS; + } else if (extension == ".ups") { + patchType = Hips::PatchType::UPS; + } else if (extension == ".bps") { + patchType = Hips::PatchType::BPS; + } else { + printf("Unknown patch format\n"); + return; + } + + // Read input and patch files into buffers + IOFile input(inputPath, "rb"); + IOFile patch(patchPath, "rb"); + + if (!input.isOpen() || !patch.isOpen()) { + printf("Failed to open input or patch file.\n"); + return; + } + + // Read the files into arrays + const auto inputSize = *input.size(); + const auto patchSize = *patch.size(); + + std::unique_ptr inputData(new uint8_t[inputSize]); + std::unique_ptr patchData(new uint8_t[patchSize]); + + input.rewind(); + patch.rewind(); + input.readBytes(inputData.get(), inputSize); + patch.readBytes(patchData.get(), patchSize); + + auto [bytes, result] = Hips::patch(inputData.get(), inputSize, patchData.get(), patchSize, patchType); + + // Write patched file + if (!bytes.empty()) { + IOFile output(outputPath, "wb"); + output.writeBytes(bytes.data(), bytes.size()); + } + }); +} \ No newline at end of file diff --git a/third_party/hips b/third_party/hips new file mode 160000 index 00000000..bbe8faf1 --- /dev/null +++ b/third_party/hips @@ -0,0 +1 @@ +Subproject commit bbe8faf149c4e10aaa45e2454fdb386e4cabf0cb From 332fbcfff184b98f8e6a521e7b8e834d89414177 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 7 May 2024 23:55:32 +0300 Subject: [PATCH 024/251] Qt: Add patching errors --- CMakeLists.txt | 2 +- docs/img/rpog_icon.png | Bin 0 -> 26925 bytes include/panda_qt/patch_window.hpp | 10 +++++++ src/panda_qt/patch_window.cpp | 45 ++++++++++++++++++++++++++---- 4 files changed, 51 insertions(+), 6 deletions(-) create mode 100644 docs/img/rpog_icon.png diff --git a/CMakeLists.txt b/CMakeLists.txt index dca69d6b..88ad6aeb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -486,7 +486,7 @@ if(NOT BUILD_HYDRA_CORE) qt_add_resources(AlberCore "app_images" PREFIX "/" FILES - docs/img/rsob_icon.png docs/img/rstarstruck_icon.png + docs/img/rsob_icon.png docs/img/rstarstruck_icon.png docs/img/rpog_icon.png ) else() set(FRONTEND_SOURCE_FILES src/panda_sdl/main.cpp src/panda_sdl/frontend_sdl.cpp src/panda_sdl/mappings.cpp) diff --git a/docs/img/rpog_icon.png b/docs/img/rpog_icon.png new file mode 100644 index 0000000000000000000000000000000000000000..b5426aed8a19dd97e2727087a0e44494574fab2f GIT binary patch literal 26925 zcmV*JKxV&*P)004R> z004l5008;`004mK004C`008P>0026e000+ooVrmw00006VoOIv0RI600RN!9r;`8x zXdp>MK~#9!?EQJLrs;Xs3I3k#UBC6LxmQ+|N+nfUk}b=!Y>br#L)#5DW=RF~h9{ba z0S1QdiCLOpP}a;uH#7|sm}xVZZW!oj1C?XkUa$chD}}ttTC9>)rP^~pdG>F8_h*?u z-cz!?TVMyv_=(7%Ok*7JShPAxOXj?5)_c*r&DDIBTT>XQX}; z*njKmw{sK_-g?b%t$+8HcNw}Z{(gknXpop^;ApAUJ^#5JJDRO7WOMJhp21 zHjDTPkNC;GIsfRwgr7b8ehP5^rr*?WtzZ9A9|=KUyL#0=T=Ico!uJj$dERy2hTwrf z2yZ>u0q+9N2d|Y>BO@NmHDA|h{?pUitAF@e@l#)U0Sky*pg(c*rg-+*XMZar{9C@n zM?xTfC z5O_ccAwb9glHh~z!H;9TySq94mV4{wcYgZzz40qQ^uvSVJ@0z+>KD}+zqS7DTjHl* zzx%`Qd-BRRbuM()2V*_7PSD!^3#1f;0G;cQ1cX*d30epeEl>oM03U*vLXc`HOWS5E3aA2=84crG5Y7m*OkucK`DK`2FAi?SJfBzt!(PzORt>83L!jD zHJ++&am^Z6udsDZX9G0~uQW<3ylv4ak}SptOB^L+S&lIgGSak(QmqauBDiy@`J;d2 z_kOGY>;Zq~-MbY0<^_Q``@^kUH{><00bi&?;OHpac=5$>=iYCQiwc43ipC z<%x`NLm^Zf_}u=o_%E-F^B<+ZdfI=5UsXWh=FOY(`t=tB+~C%&Tk_hqYo0F=h`M#_ zmcDlF)|dRce;=3lN8T`v3gZe{||q`qU&hi}RZoWKp%hYdfEOz}c6}=1 zc7Yym5f67*&mIw3%V|~9>3G$(e%6KVVes}x9vsiU2l(t4|BOHQ2mhG7b?b(_b`1|V zf)E1!Jr4@;v5$RrI=FP{2gk$7*Uo19t|@1#t83b}rdq986a__X>Aa`2micl?({woJ zsp7i#_7`6jEx>B^YIgOhFMrSd2ag^fXYrrRl5{H@j<%cia?{ppyL_gSTW`tz^9 z`YJI0#UIe0`*SxGH*doA>u+U=f3KGK*vCH08}q&QKlRi*elpQUH045eRYlX3EEaQ) zPG%H!N!4_m9xbVx8i&9JPuZ0Ct|QBHHZPvz()lMjf9?X4Y(PHD8BM1MBS@4ciwuzr zK^q?uq2&DstFQZS|KgwdiC_E+ebcx9;j33R&;9vHtbgZdFn9tfP`)Gf0-2AnMHNmT z-4=tb9YoN!QHVGhiu&lj=xilIr%=L@3^%b&&0_C$TvZW|HiEXx&^F7UL>{bk+6*U% zD5bU?b=4lFS@!95Q-0Lr|JlyPORoW|FaF)0e)_kGTeohhqobq1zb7{u5ke4u_J_YC z`nIdz^yco$T^1~gIrI67gX2?*a?Q!lhPBcFtWO$y0Oyr6jSGB(d;$EU3Eh`GW_C|Cdj^_=&yUjqPuYN5kUk6HjE_ zdij6c9SyfbRbrhZ%SNuMR&>Q&_#nl2>w?&S@Me$_vcaa%NkUz$iG-zf4R!5MZ9!yG ztZP}$juF8T+8RIKLnu$ES{#JY&SSo7Y>+b%l~@ud~fwr_3j`QUBPP z?vs2`hWPR?|2A>!)-84G*1y}uq6#8>42`qcuBB)zlF^uRJ6A9=MoUSg6(M*`93izNc#vMAr9x>% z=Nwuo0I#$NTjPOx6RwkSJv^QHD}6-gQV5p`{@>K!S-CUEN_K z4Q-9o8et4dKx7mVHQ98N;q)A_j1j|4hP&tKq-3;n0TpG`T_>XPR0u@dv zC$;lIBLjmZF_-c|{sBSqpWC|lTBJ9&t5=8jUp~!L?Vow_ToB2o`qO`UN8Y-1^zTF; zL?2zd7Wm=M;;;{ETN8wo&IhE?=r|<_fnn1!9*=R(!dY)XNtk5k*x9&;cW|b zi;p78^_oa4q-kjzM<6FoGi0RjQsSkM1n^x5cvNdW1W#~*&IUXRw8kTGwI$X8F9asd z0SK)F(_mdqq!N-i$NQ2bi3ot}YLo~lEx-!!79j;F!FV)6DoGR>oG^&4V{>2fBm~TIdSUxZ%*+jZh(09=1sAE{kpgY;nuBN;@Y)q?ic^88#iwJ>fh+YAO5gB z{pt<>W$Ve6$FDx~+SX(@s>*e+T`QWVrYslC7E4Z6OBS;kw{Jh-@ZgxHtB7Spk`2l7 z0oF>gVaCSxgv*z&u(`R#Xgnkz49Euqf;8Y@duvJ-YhoFYBA|oB*ELnSLQ9Fa9-$Rg zRS_pCahl_7M`tZk8-zAUTY>XPsaX{#B+{cz1lEDA5k}%=pE>9%L|5Tji+2|9TLcE7 zG%W!Y4Jc>(7;R|0#&re3IZTou(*f2gY+IwT#FGtdSwXOvY~(`Mpp8;mN7#1Fe0jPm z%heA$+|38G`DcLJ0NcZd`WwFCxnHt5Z>{H_e_m~GZwszn>;K`_Ev{X=765L6xYi#l zfuC=z{c6XII(tpHapOj~apQ*n)q+5L_`@IO#*G`|Lm&E({PsWhU;X&`bC*8Q6qRr4 zQnqbDSu9zuRxDO)P8LfZ9Un3~m~nD4qggIlR~0fc2(8%K+2!2T$Jri_xp?t1TRS`K zY;BX|LzGVO&NCcl>zIV9V3E3G;6@pt>#!~k?9yhpe`3gNsfwAR>${nwZr6_ zELL-(G-k57O=~?}=h0fCr9{UO&UIidw&_r@LC0wjCh{7O4^C+lSQRTyPL8V8YWbZf zCx?F-IQo?~;QIAH`z0S0|GU5c_cy=d%buEE9-iBbW%~6Zv`>#WcRu=HvHB6dh_VOp zd#-LPz@Ne3zhvpN|4P8cS@P*9Pe0IE>xI`6FEL3>+BH;_z$l1gLpF%0RvMBRN5JDT zNyK23bK%@MVr?kuns~J)PjZrIfRue*V1ZTDk_r&bnxTihFA!sEs0G;=Q|=5Q&%gr2{;=ga)YZIgh^3oA|Y|EM#Tv}ctmPZB4AWTV?8P= z30;Tx3NK@V?g-N1y&z4;*w)c>1qy=?5uE^Ew+JDr>Y6M`h_V!+BN0MSLMV*MLf1B4 zYa_Ncv-I3f_kWmKCa`4|Ad->fL(~ITN(NVa5{dxezr$BxRx=;Uuzx%<*pMKxF zf|47t);`-D?@2ETl_Wyx^d0qT_T?wX`fi zp)oTr_(6wwalKmI{`_q3WB=q1kABb9ZEoDSp#U!geE7pZ9}F7c=39BCYlb+2M52|X zkrIT2;KB9K?_@B7Adu3q|8O6#CDpp5^Nz!#LpG0(dEa}#28SgdOz4E5^{y}8YDelL zIvsHJ8p=5#ba*Eys|EJ-fVQ!Wwzg4u#$;oQ;61);2u(wvK{${IoNW*ayzH}Ml1%Zk zCP;yl0#85)gBRd}gmjCD?(BQDNDQv4uqs6lws7k;tyfIWUqmQNy(|!Mj8ulUb97aO zvn7LULYhwSI2sp-mBQHuA3C&mLYPS9=>+cxR#-oE>E&TQ_?yk`R~{|P+us-G@clpZ zci;N_UGwhWm44L+zxnT7ym0QZ02um?SsOqK$N@LnMO@s)oIR=b>`&_gDqux`fEBv~ zJx$<0^+1`AIX6v@jf(!#r{Fuj5<2MVeK&7Dt6qHZMfdX=;4A``M@L5&cdrT+8SnzF zG+t}sY)D*n6mCuAJZWO-oZ#Gf$$E9dBhe9?n9*d1M+!-crRzF`k#t>y7lAY$Aw)pL z4%@USqw&_#b|p#~q)d=XVr_|zW26(*tAZc}p#>l1m^7t4JfWi^&Bth?DHm(J)|enz zpPr!d9B`z`5R;DSx(=lju^Hgo3b$Gk4H5#9G@W2E2#qjEWj8bDikQdG`@i_;=ze+9 zME9Ti%CAp{dGxk%lI$uKUC*?5tP6AmL}#IK2m$y2r6#!GgmZ|13<5#G$si?4Ae^%r z1X^l@2%(kuNQ+>+7u#{P6~TY6EWiJO1Ybum5;5BWvM#qD*+9SccfK(A}(_p1Qn~c_4f>2b;B|dZ{ z#$c;8ZPk$srnL2ns#qiAh&Ue+f*_5D)Ww3jTa&~ouCAFK-XnA^IOZDL}og#%n&fdC;T!CP!o;axy! zgOm#40#ZrBS-V1Mg*K5e+9>A%DGKF7ZaxWVPLw93Dj|+FB2swQpq#`uEye1X zL@Q)~pmcxZNQN}!j36PX6qLs`9w9rd6C~*X6{pAmCdv`P)3gn7K1NE7w=KS0(lrGU z9k%v_bV53sAOmP^h{s!Wwxe3D$p%wq%VUgiC>2w;Ep^dwI$yARX_v^P6w4Xu7@~Ac zKoAJ15D-DoncVN3yCh7cBj3R3%(>25+IC6nT7^}8KwC%f7Vj|QrSbS(LOuJBKl-`b-~7QZ zd;CY=3IL9dj>307_ni0-zTvl@I=qmfZ?q_3(8|%t7O5mzn&OL+EFa;sF>TdS+5&B2 zcD62&ZA>sK#^d{nIaqYp(p58DwW2KMc}KF(?_&T#c(jjJ5UH( z+kkSYXowPmD9XWmT4!-xMO`&Gq0o^g(2*t)&MQRlIICFLCEJg`gVZRNWr<=73=>79Mlp_4 z#*=M)aQM*Ts~OH{9G)OG>1c}}4cTy)(K&;)4N@6`^cbzN!C{of>KN-RUDcw8Q)E0Q zERV51K<5e0(J9GfIO?x4GQwm@U!Z!>QsL`00f+aJ#q5xLx`A~8*|ijl6I7g`%><)T z>e(Tk>=12BVj`-hMGXdIQ9@`+Y*El6@d%VQL^|fu)&*9p8TTLTv%PyBs*+TP(}2&~r&A`>wk444jcqGV3qOo&Glvf&h`h43zz#(J;g=zSl5 zdGB|BApq#nB#kjEr2BvBSo|+S3m>ciqaY}#@8M$+jX?>V za3~v-8;LOn?fiwd+tm;Ju{^o>UkFf*Oq(ZIPxqO>ne}H4fK_Q1jq?U-_L!211vfY3E#R0&zTemznZit`$ z+$*0eidAQf(Y4hP7%)H zbxx8Fu+BlTBu*_!K0!zjLDIB62_+<&t_k=c5NKQon4r*6O5K*ES&EPeN>p^~70N{5 zE!IXLJT^E4F-p~lrA!ivN82~O=nx8 z!IUJ5>DnF&cM_ZwM5BzZtO%{=;Ne|_?2vWfQ;N0U+Vw|9>8q%!@>q_Rw1?VB2P-OX-pdxI&rfWS_-Qk;plao`d zZNavz>xyboaCCga!$%KToSx!bAj)zGu$rCXgTuML^XZa+l`Y4|hm7-_HcdG=HK<6D zClO6kGRSg-)^t*mg#Ll`TFAEQYA5}F^NRz3?OV6RKfQHJe&8Fw@ndnG{~0YcB6x(1 z@dgwfI;cJfge1#G)NO@FBBdgZBD!V;U61AkXGzl$aXbbo2ug#BkeLP%5dzdzfeea{ z4nd%4i~dZHG)Rf-JOPIYiYST+)?u49VSR#(JY7|zO@{Z9bTFc8J;V`EV|{-#_-2i0 z8;WXmb}iIcWsvcZbx{+t2~Gq?`IvHcNJB}Q3{go&2%bo3L^h;#o?v@SLMp-J@)KNq z=G`a<>(d7)Cs{w-V-X+V<00wrBEqETILABBd{MAkt?;&`S}dsBir@qFx@10|v0N-U zJvpIiDqQe5>sg+jqNFCt1}GVD)=`xWUEQLTV0C;-xn7ZvMnrjvC!ixk91oBx?vdso zXuLxQ>GD*oX6-%>y!cB3;H^wxn?`CO2`ZwA1TuI`W3kk@uBNRDe1J&BMA3j`x{WXf zBNR#pa26XZD1~bqVv`aA*tSE2l1Sw^SyNPNXj`1`2*h{`UO2QeNHxG)iT44IB{4aK zfNNTs^)W0@@S#QNl-5d&Fxa}Kb}d?HT)9GpirRIUIH8@-n6FPU`4A~JGDrvpQcavh zxFC8MF5e;@ZeW_0x;UXOjtN~uKoBJv>3E7ON+Rnp=?K?09Nv2!->nJ35@%`O`P4Zf zSPX%>m~(P+%Cam7!BW*L>b4GPK3g$AU2t%EMBCIfUCXj8Xcr|2P{yFj5)<`! zM%&aVtuRU>BEiORm+^GU=Ef$m)VQ{z>k6t$p^}tXNP<^@2p(uU`@-+|if6?y41gOq z#L>;0;R7G|!10+_UKPY)Z zY;SLI>B4zNgB%kXw9z2@9*v4(1d1R8!3TOaMz=2^{NfBC05@-%@40m4$*<|8_Qpg~ zw=JD@NGZ`uAPGnzaNbdMEs?D8K1CWu7A4>7I=TBMc?#5Ps6rgj0Z65=E!GI8JflM3ewmi2AUAK`RM)RxcOuchcjMxf5BO~8@_3+(>#JyI8(eE? z%Qbbi?yp>NiXIHe2V-2k6dGE!LRn6r?7&KvP#ZZ&A|VLr1kZ zM4JR;LK_^B6!=IJ1axhnY&)`1hPB{az;=PIZV?J34PGWhM&mFbEyIoT6pIDxV%}fP zB*f{Ms%fda83D&2jw#B5rVFH#O@^aMf1?qSb=k0*FIg-OsJj+|C5|F=KH%y*pCu77 z^V5AE-n+;8bcu;#gp_#ik=A2_Cx`%EkfbSj9@BI!W!qBMEk+2miZD@*O8Ov(;)KrO zMcZf{m7;A=HLTA9ARw=E-G9#y{J^urjqQJ!Pd16-Txy|lwxx9rZ)>Et1Q!rO(P8@^ zGTuTDCqz+pqinvTtXm?^ zx^_xoG6)z#5Hyt~%7$N6xQ+q$(fl-kLNNGXB~*tTW0JZ5=(K--n% zI~N#lUc{thl4yvMAVk2T!MC*aoO)?dQGzf%pHE3gyPUB+ctF=#vdvvgHb4tav44-l z+b?ps_lR}VG2Ytc+}q#7Z0{jWbIQi%kY;sE&lw3IYWO_V7%CmUGj=-QeniEvSZvyz}BlZ^>g5z& z9I^lCA*Tm34o>c2UB_TNLJH6Nw4-Yri7}|ykY+jK;gHdAfRGMrYlL@XCSjP37z{_G zX-<+%iL)Uhii1fKSr@a1WpngxWg|XWHiD)RJuUp!ty|&rv_3Cm@!4!T*))@FhLauP zg2T;b*kVOB-)C|1h}Fq4b-PC88*E;FJJWNQiIW^1#RzSB`Tzl%7F$<%9Ku5A9Kl!A z5sh)n1781$ALFAx{S&-=_dc_t!3)K2`?}x8(^t>4dGS1Nd*@fuwl$N@ zO_FE~NPJM(W`RzJbnO}yCk!XM_^P5_%xUTs!gpBPV6vQMF=xJ*k&UJd&OJpm*kD~1 zI3daNA%iqWv^CmDqz2j=rDK}1rCA)))GL$*sRXO}lI47#x?0lNKI28&kQhTMYmqzy(gRQq6SlBnju6?(=;@7 zMPy>o2B{UY>v|%a)ClPap`&XHTwBnz6+T2nDMVJW_vxSJAN_xy=cix4gO`eYbAx8S z;0M0YCNbK9N?O zyZ8`pf{277Hms!4%c;fZ;>WKMJdjApaW4F&OLT6UnH7ZRBTAJBvO(z%}CP> zM2pb|qZF%ZkGft^E@#xM6&8m{hO}Kt0yOoS^@og!?9Zyh2@&{9&b=h&jag_hs}84-_m(RG1a zJfe8`dD^C7``o2oGfkzzRA01?RYAx^h6X+&fq z>ZYOf4lT}(NauS{tF^e=Qq1O9-=c(}Z92U7MA9L&LU={nw1^-`O^hDK43h+GL(q!R zXiSzQnCJ{=5E^X~bR4;CAk=aee&z!gCb!<=e!lhee@KWv1KhfGOZODB_U0%{zM?1> zwo{cP1bl0;r!&@%?lCJ0lF1hN)-G|J;<0G$kw*8}v~wsG;g!R6mb$eNXJ{dboWohy z_s6{;AB6+s3b=K_x<-?cB-uNtKy=L>uJCMn=#{*P^r$fbx zC^Jamky;Q1*&k+F_Z+~`Zx{i5GD6C%Z)RmsvX${A(b62%FVok0&7YmJbC zuI&&Af>*S)L#q@OX*%x-&O%T~Es3KTX9LsSU8>a)U0q@8rVkSD$)b#4G(HVTZTg!< zB#9G)YXr(D25H7%G{IzP&q6wDTL%OPLgQ!uGi-C~)-6fzr@)7L<#T@xymsxHyK&=& z{OYg%>i^;3;r;($I2?Yxttw}oRptDc;^YW#B#*uO0}Qve$n!Bq#&{u-&|ia+0g*BU z6$oA+RfkAoK=tHJBw8pEEs*Bz=AQ7y0*Ql36Me{j6^6;SHyHua`5@j@tIG(!0~*> zqF8h1;UkK&FZzUlpL_Ln?%aLI9}Y3^{~h0koNnN3N7JpTiX~NDQm^a&{0|9LZ4oL^ zw>7q@X{r+CJ%~W-J4_TIOinzQ5JeKS!G?e@*R0lay1Jw|KBlQokWtoa8t`ao2|*%_ z?6DHz2*M(TWVpRSY+@3X5Jw4dJVY2xP!jKbXxp{Ce`gU6=b!(<L9eO3a?YBm20uB|>+7o`kc!}{=uqel^}%h{MFk=z%X4;i zE|Qp(!FZcENpW~25T^<$d#Y7%0Rb2hFh(GiAjp9Ck~RoB57K#Lpl=fC7$4;nbIM#+fKmUz3OZkI$^O6LXy+t8J3 ztgzITCa9Ps8xoWt%QCbwm?$FF8m;<=K`DjQG0LQ@Y1s=CwX#K;QcnMRe=g1rG_YC z=lvh#;?qyzg(IMlA`pPK0wq0lfY1tz@;Ik3!lM*KMj%>HQeZoQlmLp_LFaRPG^VtU zvhI*7VvuLN?dji6xVq-GPyQ1INkrGw+0{5Mbwt|~bZtivf+&jEnoMbnQ;rXAqmm7@Nm#E6Y!^tQDNVPg zsY}v)OghX_N)nlfSfmt1K?st>MBprS*U@$j0Z$S|;2qvMG?Kw!16vh{u-hYl!6JZI z_EvlXNo3+)tP`h1Nd`)yjUkfp3DN0xxm0jZ$tdit~y;JrnuK46a)r;Il*kqm2QC-<>l zqC!Ja%yFHklbW4NS1|R8Cx)I;s?o6|9*r<&#Hw6#@AhYrA#nN0ccGIW@f0D!I)@LC zCJAXGsa%DtMbBkyTWs6nx*BU+in?LFSmNuN;4NKof~{7#09r@pxR@;qnzCkk z;S$?VJ;`W$gVDxCvdtaR(U>fqAe2F#*~oTHhZMa6*9!>VBZ9~Kt`CgP($*zyThi7G zx~f8HMVyXtl_D8!feWbNh|6F1d(p`*wrS8wLa?4y>*xY>rv;DhKji-XN36;f=^$s2 zmoLpq2`q<d|KDtL)E=Zz;Tq(SkNR>kHG*yFE5wX&! zsKbf|5yyzCWVLjC`Zj?ii72ZTbur_3?>_70f>?m?jwnjleC!D}heKl1!!}*rA>zO& zNyw5RIv%2pf#Ab@ewUN^UQt%3|NFfc_I~!pjT=G;;eNdV5T3t&U47_7&vpOZpZ_mk zMRf1Zw>E-E61py6gdx@mkrcSr(okb!-xEQkIqT%P9$69r?^}YmC?9%&CiKpuky40c zKoTc-Bq}IeV~O%@lFiHH7oQ{X<6{tC2ExJ)Cb2BiYd30fB*1d>=G#zVg5cVA~DPI=`c-^bM~quac|EBl8WE(@|a zXOL;0+SR=CsmB;!{xTXJq2icve2%hSbG*OLa{mz<=PokZdW^O#Q9{vmH5$kUBZNp; zt(M?CuCVGhB33g15lEt(OlQn&iwc(b@)MM+1*d!WnVrnYlaBS#5mAz$f<#Rt z@+2dTVw8@MQH~yOGDvfLXMN3Fl@Cupg5>YqxN+mx6rTQ)4DjMJ&j=tiUHvbE>;6y_ zd`N{r*p~U?7^@<72cxsAj3o5cRZsK`AhbjaNx*|NbWKm+vgHa}71T|I435?cf{4)w zoC=u1kR+dwPB%!Bgr@D8y+%pe))G6zzx>BP$j3kRaT)?aNQ4hWu|^T2w=b|*wmdvI zL>a@p>`>_@gIE)Up!GeyYLsizOrql*!aKf-_O%};i((?3@YT;==AE8(HlBMN>jxNzOJxsL$^4g zs#jPM<3voX6U=1FAlo1vZla{2u>tS<>x$44JGXq_-~D@h^rwFmnWktjP?^D2q2CZ2 z@H(YvTOhE#xy9ADpU0@4S?p|wbAi#sAOqk%!(qbJulO34$1l;%ACTu$Vq>@(1=h<2 z)oRJ$(o>|<^ThEGoeXd!nAqStOLMyBv8(T5XZvxAc1fHKFe*lxl*lA#0_(aY)Fr-Y zSQRT!n!$KVo{u?-Qr7EZ4)^b{_h=6p6cS6lJfSQrjFRM0gvxUU(@iF$5z#P1X$@ZE zLjY-r;sI%FdUm!Ja$~#`E?j>5*YCadGhYRK;tb@o0LlHtPp8vg;4Sb=0^r$~UV4kL z@&2Yu z&PDQkgI0NrG91473L9G+yzS{-A{p>qz`4M5Bzw0I7Z6e-wP$#4m&d>ATe$tgkB|+< z{VgR_Z0tM*A#wWcUL$aAI}7g!vhQ27JyzWxfj>Cj0`5P{SAl3+bs8$&LX4wHBN1o&9*_d?j^J8sTi~6= zmMgmDDXZg$tPWnMJh?~LDm)S6^H)iBchPCq!@3d#0`EevNcWQE-kKl($&Wyyv2}%Y z4XvwjWru4^PLJ+*?0@CU-un!fE=-xv8+I;aL?Y1G&}$F?;d<@f z&Xp&yw5>Bwi|F zonnJRxfX#ZM26rz2GNUBO-Ws@@JSBwm}oF%vU85v{zF!?89FjVQj(4m@+`xrDYX>6 zDoRGEG$#@Xk;)+`gb*kVDrnYKE$db3x3?cZ*FDwz)7RGrUvuNejhFcqj5*~mll}ek z&p)q#=S`gbwaK}Qax@&;jq!*i8=O`9Yiw1~)h)j3@39c@K485k2-iPBt0mp~m~!@r z^7Mdm?>@8p_wiAN%Z3D*61srvTDqpiHJ;WwDgt$FY3smG|L8yGjW=GRIz47}ddPZy za_0KAMu>okV(MZ+Q_gA2C3GzhZok53e&*9ev103dM5F^qh+=_>#1}^3NC`U1IRB2X zX6x~HQag|9EbF#qWkIM6=fMY0)fJ#YYDKIK$slKQXP1rXc2BnokYr=BEbGCqyqBI^ z@964|rmm@*ineNSWlQ8NvFI2K6UG}8EC#1t6aXRUtX5kSM($NMELETnF#7JX^k{I81gdiBiLzYh9@?PTIluNp1iBWa_?T(ERlu!?r= zNHdLdf!*N_*%iaRd!Grj)7jgg7ytgc1K=CKapt*+%hKWREQ^|Gm{Arpba6^#w@~qb zI8Km?J`y{JZ5u-9J6)o)sIow`1(AhfwPJSnbx!WQ-ggKuShB0n5(uw5ynnZd&5&SRuW%EQ-Q;`GrWMoV;T7;l_MGJ;ZrV66ZwQ5x(g&r|VWdizqVKs$vl3 zL|XGq_cT+ZIlA+(r@l88!nV}wf@-lKO%s~cf^;yZYf6+-_|ArTQ-~zeepXhls~770 z&J&_8$`|-m)-MTw8#j>8K8t(v>bbuhR;xdzH>ZBOdqt&#O;8Ez*(p*)$k4YUg;F>n zka2|TI@-FyH*3HnqX9l?ARiKsw@G(^?4OZ69d(zGaH1t5&pi)tg9GCC%y>>m3gtyopqjjc^Vf4Uus`dC>=w z6a?>az86r|s}-H?2wt$YeTDU6$!h+H@#Y3yc|$8N`tUzFG{txmzwL8(KyH1 zUhfx>47Rpuni|SLyEvwyBF=_r6`_-ymIfDkrxWiTr-yr-K72@9%$OY>5CK;%pQo%! z>bj#`S8Q}GO}(aRHz=wV(c%bc*Q^%@Axbi_ZkAYE$Z_B}Ul z+>jS8TyWFrln?!)e!wpXfajmTu3mf*_r|BTzulFK|I_~cJvX}ixYFqc6B#qz+#(qc zP?71;K6>S?k_r)eI8Y}VWkx-gU6iyX)#j^I6ATjO1a?HWV?DI_5{+PduP5C;fY%__#b7wMEnL~I#j@u= zItW2vl9ZqnNt7T4lHetobNJzybUdc6N`z~{H~7}k7HitpQ5F{8dTQ6w(xP-Am6pSU zhe!uPDl)H;#hmu^fNW!%YJNa^DDvEIHy0?FmvL7Mg96C-3-Vx_xOq>&3;9Vd12DW!D5@|(K&p3SZ z4Tcx4u+5M>5m@07O5%e={pC4Ju>@In)18$`(nTdi0xPEjEujxwxWgRq<)ALE5aJVo(yj|QQO0zfC%l-ezO54VAxIY-F!~f(PzYXA5 z-TYtX2)G77h}29pR@6lyUFGm^ejc-Pp7CUtI2$9QoZ$L`O-cd2$Np^y$j}QHob`0| zn$_}<#p^Hd=%p7pn%9KUgw37vC?Tnq6_4)UMEsJ256a?(_xj6%ke zG!k@HU`&jy8ro)!UC&v~4)8*g#F~PJZgGI!KO&sm!(Vv^(<{%Q!x)`RNIbpPA#y|r zFajb;UG>L{kP;stv>k}vswN+6)<+LHI=D-{TA+kyXF6iNXfVSG)yXNRO@r+O{de^w zslvpXVsXlJJx5nNSY1+929tI2^yv71NOrbA8OtF;mb|Vf8t~usUElSqZvHP<19*UL zWq-0-m90=BDyljRl2~Nk;#_YNBSJ3=3+k-u58y(eYifjTaborxmWIbO~oYAeWzWvL1$CrPA+pql` zCyyTBTwjIbJ;8fal%nI9!FbBs-t%6be&);Z)slN}zQXp_1@g?G)0{}g#73inC)j|u z9)EU&>AH><||`$#8?FCY&*m^}2=inU0dO=bxPORmN^IBCRTYsG)J@Cs=#c98h{S`aYl^BQLK11i zvT2b@kVH96RFkhyyeczUpNba_6aMy7qa{B&{YIWYo3Hch-TYrJ0|ai}(!6)4GQ<3D zw_W$&9^by}4_9+JzH*gFYla(JXP$cj>m5jmMAEy9gL0BCXd2Pcp^!2|4M&`N$9wS` zPf*k~sg7tLJm64jvgvucYE3rTVZ5_T2w+S?Z2D_Yxj01F8Ma(Ah%?4-e<#iSh&yk- z!edXrizrGsb)8jETwSn*fgnS0cNuhW4KBgm-3hM2W$@tc!7V@_!QCZza0?LJb#M_A331sO^^VxTbi`Oa$F4Dw zb}=3q^5q~bXCrP36_P>J=Z6c;DrRl&V#lTQg*=^h^TRcyrau!2#j5nQJvHFR3MOU^<0n?P~8e(QDa9z^m`0(YDfmD+Sk6H#U&%2&1!?zEz|` z^~^p!k9E2g`hWH6v`Rjt+r^Qe_kf0l3Hy1BNPM8V0D0c-kFJCq6f4Ae5P-=-WqH^)Q?_Ku`@g{^ z$rucI!8>R8ah90pJ&_rxX;*}9c7lPvnZ_*d&0v)%*W6U3%9~6OoJ<- z@lk#bnFCTxP4QjtHS?m4>m^A_|2=SU^T{ZfC4edSZ5;WRlCbbL?hHX3qOHPp{Qhw~ zwDSRcD;`7?{ULJ$O35zziU{r=GOts?b)n%3ezQ;BzJ*Js;Ox5! zA`@iz@p)PsAG6)-`HNpiA@>+=sT!G4gUjKboHV0a(x)-Fqkchyp3ew6$95e^6oI>C zLfNx8zgwpyxNS6atIm7Aa4hofr`i`>-}0#K#3FgSjhWwYGn371f0RdFjvf{YRY$ar zh8=2BFL71-{26+mf1u1D3|e(>@X0N^RW37XQ+K=gP6X(`9`xTBAHCvwZRQsc%=2*9 z#vMYg{(vA+N|68X`!6C@0AO+9W}Y^fZ|y|M?>cx0&i5cY_fvZp<_a{Qcl53JMzS^n zV1OIveIGLV70oG$JCZ>PtYGvtPs~q#p~}@n<+6Jr`-1euYsg(kQ?`Ic`}d9BXqlex z9bVypLWG zDY($)s-y?7ISacfI8P=uwG0)^7g9r~$cT6-AHkr8GFZZ;u_pe#aRC)FgaZ~Ew2IvA zbZNj%m9Y8?z9u1M6L=YvlXp9!>o-zr3e4tHOkU3sAlq_wDa7ewOz@Ca2QjJlW7EX& zMt;kfS^VoQbH-|TpxjDj>+M%K)W>CXSW{%x&mw-OF;?Oq&QKIen~0#ZYV<))5*ywP zza;&hdd=0+>fmPK0UFVJkF^NH-SqxL-po!8-mXcuzC(6|ey9})zsU_Y=rUH))^?|* zTzo`h04Ew1TxP4i;wyb$4qDA@^gl*5Hrg?cQpGdYnqNUCAm>hu=JRg9CD0RS3ydOq z+sD#SoEgnJ?MI^QzDn+_?HT+x!m*8xJ}nyQ`lZ46>hj!CqyviVV;UHtV{n@rya>k6 z)4=J7^!{Lb(VS@?mm{)iF0k^kttXn>E!56ZI$7R%MyMFH=xc%xFyUmkGkJ+ z(AS;Gl%zIVbui~I_Iu|X?MQmdQ@YbIwctS;)3ffk!0?9;iMTA}^_1pP0*!yhkB#M) zc}q)}dMb}{-Zx32RIXYcV5TSusQUMd59}NU68l+@WL5aT`MB@2| z-7OupbQ7SiLShek8xD6HUEmv~jTbXqIQZ~3@Vv|0Psl%CidUy@!?+r@Fo^oDXF~(}($y!hVQw6=qUaUa#^lS!5WzrsR4HogywqS$w1xrr6cIh~L#NFRrfj zz`l>z@M+yaqxcoa-*)$u<$ykGXMOVX+2%w!6#vuiS1=~knq^Pkm-NT2UQTWaYIQ}6 zCOSSG8Uav009GWmKoNU-Gp}bZgCd!RB~kZ!bH&dM`+Mf zS0lEB!?t=eB`?=JrlBm(FSY6L{ZZnN5`l;iSj5}@Qe5w!Ha>w*0&T%jXXP6s-P6H2 z!(IZ`0^R;m{~)!yU-wL?UKk9&ZngMz3n?a>w8cLpE)NLjOZTM~@NoKgeos+edyoPC~&rW-KLGqi&ftlR+qPKZ{#LammZ5iCmE{xW%h}Z5xlTE*q*UAOY`*d z{C6l|!j!nMK&K^Kg$N8NM`d%_4JJ@jffOdRlT@^VY2Zvg+6`C)!LsV=aw^y9v#I&R z9G6F^I3)T6#`{K@I^+)G`MP{42c z#2Y1-D}Hl9Y$_lvCXA1`ExYeTTBm3*Uj(vc%D^u$xai96_C?v*QT-6Xcdx;YQW zaT26cnsTt#DfThlrAS%8-y40=zbkxLxRe`2SM!r;5ltJLE)2&@OTCuJ?wfr@?D7?* zijHNIRgiDtOm&LN8HY4UZ0 z(!_{MG&r%tFD0tBTeg%fO+glwFkL zNgtRGlyrK091-uIzxc<@$KS%W>wGVEqm@@s1)N6RaCbk%;A??32Y%7~$?@DT4u$*O zOMe5OGYbnOyh$-fgCDJh!WTj3Wrn;Y9lIclC1#EjnEbjlwIjK%J))<+- zE*BOVl6zdO5u#vSTwdN3+BOG=LJadVe}E+4e*sc=$cEQ=_5b8O`SS+#SUz|=$lfec zup)gbX9_Ri50(M|3us>!dY=}K*QtX4X?EA7tVB1|vsRF1?^_W~&3~Y9>3IRC*Ip?o z3z1*-%elCJ$kDy$MHO!Hze$gkj>zIPvFc63i&$ zZoe4eS~lPPG%8z1OG!#p=3Qds2ro|gIUWvpnI!{iK>k5E>vM;_OUTlH?7`bgBWiCl zd=CiFALX)^JWijVd95Kv5xiVy-(LmFHuZ~l;@#idHaRFCNw=P~^5`F$WNd#v8D||s z_(pKkEjpZ=>x;18VD&J{C{GWFlF@Aes{veIJtkcM|Zj+2*H-@|9l?Ad4@+;K9k4|OsjnZ`uXqYb9-R&Xc|9y1w zVGOL>h-VsfW$Sxya$(uz5!1om20b|hDD)rfGiIM{0#(t|?9Hrl`oPG-hw2+eb6gwk zru}CW0<&teF&2mjMg;g?aq;+W8K0a7t_GgNy(QEOV1Ka4li~7cd?;K=rv6_C(!jOA zKe4|7$?)ZL5~%TWh!BlxTJ$}8akD#q8z@5YmQnOvBS$wKTyEL28J+@Z#nw>!NTHPo zvHQb5k)V+12}RotIujk1DY(7WHUTd@+0$fvdqXOt++#}AEn=41`og?6(`MHVAzqWX zWV9jc-xEFos%#n%(B}Y-or>B!PMpPnSn4Ku zNUq8hdifhfI~fTr#X$7Z`u25rk$zbE^kRK~pGbUQwEzlNnMUFa=(?J-mEXZh3y{x| zb8vG6vye41eC3VI9CS=aRXGIvkNG4}-(mzF?L3A@h`q37p${#nY?Qs7gt z0$djX;!DM4URd$s8Ffv5ZnV;sF>o=d;JK{!c zN~P0Wg5#j1jOB2v4Gx6A)WyDq8_1_4RlfdgGM~=kNuEMBAe(MJ!71S(ImRt)segeF{J>p6%wL?HbNc{A-`#)Na>?zI-^OMz`k#oIyz}KQahke$u=+q zg>HV*B$9#ggf1oCt#1~rq_HUA@_U|(;iHbWw3}px6@bb0SNoXKu^B%8+<*D%{v{Yy}MyJXpd10n*$0p;cHAeyUqN?GL=ewp@*pZ0b}N(5EogmIyQb7np`cGLhV;*%R%H4=6<03F;&z+dV*yWwB+DQ;`FiLBdMb6uVZ+?Rj`BAJrp&oZB1L$Eb&; zFM2;t&9Ul9Gk^2WSlG$xsDW+jy%?*D4cbI1(T|6gls&K9&RS_81qebPlB~UWCN&q% z;$3`&LP78svLB^I4MXodZ*$4uB%@uSPhNNYufv<7PxDK`)4@QYH_@AAk&i_;Ghn4s zg8uh8Gf5t9;f*w!x5nTlG#O)8z>->fMZS!Egrs>jEgxwK9S{^b0Kb|RzYY2%KG+~_lTdv z={)4v?mF16Dn#I>RA~>`X09w7{P1PH57T0uJJwyDL}s;qn3FtyhO+6aU3JV(o&|WK zq(si?+|=CCF=}~3Dl}7nTOqy7i}KhpzX+ce|xRA7B+}037AQmUQ0JHx1oO zN|ufsNVELh`^wznZ0F#A&x4d&uwU|35*LQN`N|br!q}L8DN30T>nsK)+7Kr|H0V+6HiyQ8j$F%6-GwZK79W7|~tkrG4%=%+PmCU?X8{FdE?>uyu7%9<3 z|I&2gBk^=Ggv(;A#1NFt=ZAPkrrb_pCy`e^Z+2s#8gJNWdK}BB!@bZLa(r{V)vz#C zF#oIX;_k^Dj&T%0?6xo7naD65-Os-9kRO>M0-}4Pwi|r{3V(>X$rJYCRB9Vu){} z7bXxUf%Hb@zY`kEMzI$4_`tVt-_5EXRb>?R2?yb?RBT=Y! zi)ijWysfS?!oqZu4Dw|UzdN8G57GCq9$3^Y%bZ0-utAkpigOrY!M$R^OOc#VR;3#W z@()QQ&?^Xt^szl0O<~QKk(Z(2g_Hp)Fr$m3KkFptWMMnTV|Tef-@wd>%zZiru3mx9 za!E59<^r4|0cUWBLdXV#ehaP9-4B0m1k={z@=iBp5J$VJeoTc^napC747sf~7+!pu z;L;!jdT~IW+U1_38?zP0Dr}h?b2#JW`zrpA^U@`CW)wY#lPC_U?j-q|( z5gBg`mmU38&!(@!+d(=ubot^P@KyrSHP8`4lxn@sWyJp1|7M=`ASpV^XfVw=RfHjt z?y%60UymwSe04eNX%CTvTozUG3|k^~P;i*cf`@w2qd}k~r;xi+&G)+BT9uN*-RAGI@OdC2$98DP!m$<75d3NjI?h#LS)(|U8dQ}yN7j~` z^rF0dg? z4i=LxW1Bh5h}C0WC4*7U7g*mzRMZAn?GL&5qO;VXza(wHxViy!TYMmn#%`i?IP6s? zJLW-dChX>O6C%DXK&n6qKpsmjv!CYh#6;Du>>rYd?r?YoUS1w3fgTfB-UXY$yWX63 zrD(oJFV6l@q_RDoeOOCOYXmXIcF?9xGgh~DUSk#3D|h&JabzsfFnqKjxPU@~E# zt+XSuf7RNX57WKx?4{EsW~q+s87N!fOa2-?<{A+B&c>Vy!xl23;XKuTF-zG$L$E`q z9!3gqaUlrfUZcL;gWx2>*sbXo6aeqWJZ^GGM&RsWnACO*ol}xr_n7|Og^COX|SHku!6CdtH;X zWOmYN)Q1T%L#YXT-hvnMmE!Y8+(v?Rpn*3B=c)La#szTymm_3L8J;nf3Sxl0!m|wy! zyMO8PrTII3dJ+izMS>v2ll0f~fmtY)rcSd#bb3uOyY7jFwhAkuLh=YQz^_`i8z0mAi zja(7eB95f?ig`WtdU>%YRk8g8;cp)6z(DzfVmx_Hca?8enK(IZb{<43y$;gYM#Yb1 zK%Ye2nyd(Op!{V*OTN{>?Bm>ibFLZE>)A5Q3cEh+1d68>g2$*@*VHr^gU8sfr6x+B z1bxbU^R%ghM0~c*#G4GYVZYy0Nh@VYPhm9UfljrIci1W zby)K{1tpP-VakNjGX5bs@N1h2QRe9|kj-#MDcu|*B4A;uo#D-m*R6HGdLkI^v9NhC zJF_V4gI^NG%I!h=)ury%Lyxw^uoRmYF z!H|zXa$*ww(%wR6=K*LhrfqMFDOXW3W`a0~`S&dSDC-_=z$E%xjbc)9hh5MyL_)8S<9gI5HS z_$SN6SnT3r!{{%-#^3)KV?=qE5wuIfepF}u`FSGXegS2j;BftQW+Q?8MxGMtqrwqI#bY7ifomLiR`o>pI-X|#8HogNQ&K5|C3`IiEtM7^zNke-zy%4O+#AI_n{K1T39Z}1NWAEDNf5~6#x-;6lUtH^! zt@-@-!29)xi!$IAjr?z;ID#}-O3*>S(=(#_j%`>*Mq}ZR7{a3dtz64j&kl{D2Z~@_ z#z*sd;zcBU865y3GnM`(^U=MZQ$nkH#1}^mJ`5mn_MQNSc7h9YV-@Nr?LQ0r>`n=9 z@9Vy|fdc)m+}=}(g&_GJ23A&9PEMSja>O0c_S86{Z6Kl5bCn8lA7-&q@eBw8#~aQ@ z5vA$5%?y>Xt7R=$y4j-$lQ{kNh!3ao=!BunH`RnSOXceqk|Ee4)j?5}x#k^tR{oa$RAkQ+9UT`37c4Zlg76%{r(%3}>YFx; zDZT}IIC+YiC=$}Uy2;_i9cPRJ8WkNjoLLe%A$H2m5G%oUL{c=dagWOGX}$^zu<=u^ z&TvezJz_G7Pu^AL{PPI?3swmng8V^8c}3Zy#jkVT%gCSbswRdCOsZQJMRx8lQuzGa z#Gft9)d22OjRgm_i>HLFP=;X4Gmif~fFD4FK&Ov9%-S+Mx zZ~5mBDyoK8Sm(r(+?NAM8joqV&zujWGtLiBZ%$$&l%cgU(h|J-tcC=0sMO(q=2!>N zLz5&6=}Ia*vY4UE280)H^sqgo5p-6+S=ylklZmK#+)Y3x*1CoQHY|*Fy6Ucl4s%1Z zmj#g+{9nHOeeZq4;L;7wSyC^(B{`YzU$f&YpK?==FCMoao4sbZr?yvvP^_5RX>=|U^0jl?gN>e*@WF+wopzQ3`IK;MqSMqgf{i3oPOP!(8$EMVdh4UConr>_0mvKbaWO8marC_qOa_mi#v2Y_} z_X-+cYh8@AWCHt$yrr2!kL$%7F0wzD8Ds=%G~KNd=8Rwy(ofa!^VlBHD(7IgIB$sZi24&@hgWYi=uH=V zx?5D?>|W-`J9OuiZ{e~8mdjrqX9Q(Z$mnM`RZ(mT=KO&#tXo9 zNQXzDZFllic-?jPfcCI#)rrfu$-W(&{vJ-bb+oP;9SY9mHzPWI(DuY; z^&LQrQ>kR6Mw5Q|wa-P`(%g zb{^`%^`nuvdM5P4<=*V``R*EIyb5FF0(E}__Gy3KUw$6j`i5oj4~g-KC^T>sjti&{ zU`hcUphhQqt?*iZ0n?828wU7_4i?c8u zG{@wsBuvGsvmCESoL7BJ=z6dZR$J9-GR;Eb&XjT7Cd-SBFfUV;k#Ii*Eaoi@_rM2` znU~Z=IOp8^VC<}^ZVyB~z;8elkFXG$X+1d%K|Kj4IurNNC;2qR^?~n@Ex;MswT|`X zG!+70&q9>AJDFo62CvV!Cj#gkx#OJea~Wh#i?MG+%4;g|{V=tp-9uHR(F*hWl5?uZ zynnF;nW8U@Vo1m^{>Ts_`tm8PQ1I}TwWyaW@44`kfg(x2RblY-(fS!-P{6R-f#wm) zUj)~wc8<#Nw;9#|`oDCI0yBTSL@<$WrhZ1t@}MCqe2lM%jaH23Ck7%ac%jd>E7=wN zm?1m05Fuw6ap~AJ@3UGxj%zJ_ELFJ=Y}8NY!>~Y1vYQY{G>Z7zKITDqdlB2=n|kZo zwX6SNx>PRlcsa`SYp>PYyY{pF;*$_f?+(86iJ$6BRD2y-9Mb_`-wq=s7&qR)NzooC zoqA+f9QHlyW)H9g!TK>K+2x{X66;(mmraQ>!THtmwa^KruXyMeNJjtND#C{PBzw&7 zMs+c>rLK-$aL{>dp>ZMMwpzHmaL|pj^MsGFe~8Q$^%PPU|6yK^!KO5&x;{EXr;Z>p z=|tB+);rA7vQy=O30Wrx>&SflR_wQ|3s4a0?CiaAWD_kCi zIR1)0)9JbK+YF6T%e-XlZ_DrP=AqgSRHK=(IkB91CsvtF$?t+i+h8_h8}hA!N2e28 zlDOienK>Lxx*XbpcKR++Mub5GBkw81vW24dt##=Q9|$ka1OER5e$aQ6 m!U915`vy;YXKN47PtLCYf6N +#include #include #include @@ -13,6 +14,15 @@ class PatchWindow final : public QWidget { ~PatchWindow() = default; private: + // Show a message box + // Title: Title of the message box to display + // Message: Message to display + // Icon: The type of icon (error, warning, information, etc) to display + // IconPath: If non-null, then a path to an icon in our assets to display on the OK button + void displayMessage( + const QString& title, const QString& message, QMessageBox::Icon icon = QMessageBox::Icon::Warning, const char* iconPath = nullptr + ); + std::filesystem::path inputPath = ""; std::filesystem::path patchPath = ""; diff --git a/src/panda_qt/patch_window.cpp b/src/panda_qt/patch_window.cpp index 98983865..de5cd277 100644 --- a/src/panda_qt/patch_window.cpp +++ b/src/panda_qt/patch_window.cpp @@ -1,7 +1,9 @@ #include "panda_qt/patch_window.hpp" +#include #include #include +#include #include #include #include @@ -64,15 +66,20 @@ PatchWindow::PatchWindow(QWidget* parent) : QWidget(parent, Qt::Window) { connect(applyPatchButton, &QPushButton::clicked, this, [this]() { if (inputPath.empty() || patchPath.empty()) { - printf("Pls set paths properly"); + displayMessage(tr("Paths not provided correctly"), tr("Please provide paths for both the input file and the patch file")); return; } - auto path = QFileDialog::getSaveFileName(this, tr("Select file"), QString::fromStdU16String(inputPath.u16string()), tr("All files (*.*)")); + // std::filesystem::path only has += and not + for reasons unknown to humanity + auto defaultPath = inputPath.parent_path() / inputPath.stem(); + defaultPath += "-patched"; + defaultPath += inputPath.extension(); + + auto path = QFileDialog::getSaveFileName(this, tr("Select file"), QString::fromStdU16String(defaultPath.u16string()), tr("All files (*.*)")); std::filesystem::path outputPath = std::filesystem::path(path.toStdU16String()); if (outputPath.empty()) { - printf("Pls set paths properly"); + displayMessage(tr("No output path"), tr("No path was provided for the output file, no patching was done")); return; } @@ -87,7 +94,7 @@ PatchWindow::PatchWindow(QWidget* parent) : QWidget(parent, Qt::Window) { } else if (extension == ".bps") { patchType = Hips::PatchType::BPS; } else { - printf("Unknown patch format\n"); + displayMessage(tr("Unknown patch format"), tr("Unknown format for patch file. Currently IPS, UPS and BPS are supported")); return; } @@ -96,7 +103,7 @@ PatchWindow::PatchWindow(QWidget* parent) : QWidget(parent, Qt::Window) { IOFile patch(patchPath, "rb"); if (!input.isOpen() || !patch.isOpen()) { - printf("Failed to open input or patch file.\n"); + displayMessage(tr("Failed to open input files"), tr("Make sure they're in a directory Panda3DS has access to")); return; } @@ -119,5 +126,33 @@ PatchWindow::PatchWindow(QWidget* parent) : QWidget(parent, Qt::Window) { IOFile output(outputPath, "wb"); output.writeBytes(bytes.data(), bytes.size()); } + + switch (result) { + case Hips::Result::Success: + displayMessage( + tr("Patching Success"), tr("Your file was patched successfully."), QMessageBox::Icon::Information, ":/docs/img/rpog_icon.png" + ); + break; + + case Hips::Result::ChecksumMismatch: + displayMessage( + tr("Checksum mismatch"), + tr("Patch was applied successfully but a checksum mismatch was detected. The input or output files might not be correct") + ); + break; + + default: displayMessage(tr("Patching error"), tr("An error occured while patching")); break; + } }); +} + +void PatchWindow::PatchWindow::displayMessage(const QString& title, const QString& message, QMessageBox::Icon icon, const char* iconPath) { + QMessageBox messageBox(icon, title, message); + QAbstractButton* button = messageBox.addButton(tr("OK"), QMessageBox::ButtonRole::YesRole); + + if (iconPath != nullptr) { + button->setIcon(QIcon(iconPath)); + } + + messageBox.exec(); } \ No newline at end of file From aa7a6bfe7a17219a42b0830c8c646484eafa7852 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 8 May 2024 00:20:39 +0000 Subject: [PATCH 025/251] s/ellided/elided (#510) * s/ellided/elided * Fix header name --- CMakeLists.txt | 4 +-- .../{ellided_label.hpp => elided_label.hpp} | 6 ++--- include/panda_qt/patch_window.hpp | 2 +- src/panda_qt/elided_label.cpp | 25 +++++++++++++++++++ src/panda_qt/ellided_label.cpp | 25 ------------------- src/panda_qt/patch_window.cpp | 4 +-- 6 files changed, 33 insertions(+), 33 deletions(-) rename include/panda_qt/{ellided_label.hpp => elided_label.hpp} (53%) create mode 100644 src/panda_qt/elided_label.cpp delete mode 100644 src/panda_qt/ellided_label.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 88ad6aeb..748c298b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -449,11 +449,11 @@ if(NOT BUILD_HYDRA_CORE) set(FRONTEND_SOURCE_FILES src/panda_qt/main.cpp src/panda_qt/screen.cpp src/panda_qt/main_window.cpp src/panda_qt/about_window.cpp src/panda_qt/config_window.cpp src/panda_qt/zep.cpp src/panda_qt/text_editor.cpp src/panda_qt/cheats_window.cpp src/panda_qt/mappings.cpp - src/panda_qt/patch_window.cpp src/panda_qt/ellided_label.cpp + src/panda_qt/patch_window.cpp src/panda_qt/elided_label.cpp ) set(FRONTEND_HEADER_FILES include/panda_qt/screen.hpp include/panda_qt/main_window.hpp include/panda_qt/about_window.hpp include/panda_qt/config_window.hpp include/panda_qt/text_editor.hpp include/panda_qt/cheats_window.hpp - include/panda_qt/patch_window.hpp include/panda_qt/ellided_label.hpp + include/panda_qt/patch_window.hpp include/panda_qt/elided_label.hpp ) source_group("Source Files\\Qt" FILES ${FRONTEND_SOURCE_FILES}) diff --git a/include/panda_qt/ellided_label.hpp b/include/panda_qt/elided_label.hpp similarity index 53% rename from include/panda_qt/ellided_label.hpp rename to include/panda_qt/elided_label.hpp index 19fd8c74..9d937f9b 100644 --- a/include/panda_qt/ellided_label.hpp +++ b/include/panda_qt/elided_label.hpp @@ -4,11 +4,11 @@ #include #include -class EllidedLabel : public QLabel { +class ElidedLabel : public QLabel { Q_OBJECT public: - explicit EllidedLabel(Qt::TextElideMode elideMode = Qt::ElideLeft, QWidget* parent = nullptr); - explicit EllidedLabel(QString text, Qt::TextElideMode elideMode = Qt::ElideLeft, QWidget* parent = nullptr); + explicit ElidedLabel(Qt::TextElideMode elideMode = Qt::ElideLeft, QWidget* parent = nullptr); + explicit ElidedLabel(QString text, Qt::TextElideMode elideMode = Qt::ElideLeft, QWidget* parent = nullptr); void setText(QString text); protected: diff --git a/include/panda_qt/patch_window.hpp b/include/panda_qt/patch_window.hpp index 06676a66..ccffae4f 100644 --- a/include/panda_qt/patch_window.hpp +++ b/include/panda_qt/patch_window.hpp @@ -4,7 +4,7 @@ #include #include -#include "panda_qt/ellided_label.hpp" +#include "panda_qt/elided_label.hpp" class PatchWindow final : public QWidget { Q_OBJECT diff --git a/src/panda_qt/elided_label.cpp b/src/panda_qt/elided_label.cpp new file mode 100644 index 00000000..f15cf11d --- /dev/null +++ b/src/panda_qt/elided_label.cpp @@ -0,0 +1,25 @@ +#include "panda_qt/elided_label.hpp" + +// Based on https://stackoverflow.com/questions/7381100/text-overflow-for-a-qlabel-s-text-rendering-in-qt +ElidedLabel::ElidedLabel(Qt::TextElideMode elideMode, QWidget* parent) : ElidedLabel("", elideMode, parent) {} + +ElidedLabel::ElidedLabel(QString text, Qt::TextElideMode elideMode, QWidget* parent) : QLabel(parent) { + m_elideMode = elideMode; + setText(text); +} + +void ElidedLabel::setText(QString text) { + m_text = text; + updateText(); +} + +void ElidedLabel::resizeEvent(QResizeEvent* event) { + QLabel::resizeEvent(event); + updateText(); +} + +void ElidedLabel::updateText() { + QFontMetrics metrics(font()); + QString elided = metrics.elidedText(m_text, m_elideMode, width()); + QLabel::setText(elided); +} \ No newline at end of file diff --git a/src/panda_qt/ellided_label.cpp b/src/panda_qt/ellided_label.cpp deleted file mode 100644 index 68c0da76..00000000 --- a/src/panda_qt/ellided_label.cpp +++ /dev/null @@ -1,25 +0,0 @@ -#include "panda_qt/ellided_label.hpp" - -// Based on https://stackoverflow.com/questions/7381100/text-overflow-for-a-qlabel-s-text-rendering-in-qt -EllidedLabel::EllidedLabel(Qt::TextElideMode elideMode, QWidget* parent) : EllidedLabel("", elideMode, parent) {} - -EllidedLabel::EllidedLabel(QString text, Qt::TextElideMode elideMode, QWidget* parent) : QLabel(parent) { - m_elideMode = elideMode; - setText(text); -} - -void EllidedLabel::setText(QString text) { - m_text = text; - updateText(); -} - -void EllidedLabel::resizeEvent(QResizeEvent* event) { - QLabel::resizeEvent(event); - updateText(); -} - -void EllidedLabel::updateText() { - QFontMetrics metrics(font()); - QString elided = metrics.elidedText(m_text, m_elideMode, width()); - QLabel::setText(elided); -} \ No newline at end of file diff --git a/src/panda_qt/patch_window.cpp b/src/panda_qt/patch_window.cpp index de5cd277..189288eb 100644 --- a/src/panda_qt/patch_window.cpp +++ b/src/panda_qt/patch_window.cpp @@ -20,7 +20,7 @@ PatchWindow::PatchWindow(QWidget* parent) : QWidget(parent, Qt::Window) { QHBoxLayout* inputLayout = new QHBoxLayout; QLabel* inputText = new QLabel(tr("Select input file")); QPushButton* inputButton = new QPushButton(tr("Select")); - inputPathLabel = new EllidedLabel(""); + inputPathLabel = new ElidedLabel(""); inputPathLabel->setFixedWidth(200); inputLayout->addWidget(inputText); @@ -32,7 +32,7 @@ PatchWindow::PatchWindow(QWidget* parent) : QWidget(parent, Qt::Window) { QHBoxLayout* patchLayout = new QHBoxLayout; QLabel* patchText = new QLabel(tr("Select patch file")); QPushButton* patchButton = new QPushButton(tr("Select")); - patchPathLabel = new EllidedLabel(""); + patchPathLabel = new ElidedLabel(""); patchPathLabel->setFixedWidth(200); patchLayout->addWidget(patchText); From 9a50a57d327471a5a20a954285466dc00115d3ff Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Fri, 10 May 2024 02:13:58 +0300 Subject: [PATCH 026/251] Fix CI --- include/panda_qt/patch_window.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/panda_qt/patch_window.hpp b/include/panda_qt/patch_window.hpp index ccffae4f..a6e1a129 100644 --- a/include/panda_qt/patch_window.hpp +++ b/include/panda_qt/patch_window.hpp @@ -26,6 +26,6 @@ class PatchWindow final : public QWidget { std::filesystem::path inputPath = ""; std::filesystem::path patchPath = ""; - EllidedLabel* inputPathLabel = nullptr; - EllidedLabel* patchPathLabel = nullptr; + ElidedLabel* inputPathLabel = nullptr; + ElidedLabel* patchPathLabel = nullptr; }; From 2f9d5e30b409d0498c8a235b09b2a15181d43a75 Mon Sep 17 00:00:00 2001 From: NerduMiner Date: Sat, 11 May 2024 15:04:53 -0400 Subject: [PATCH 027/251] Index with iterator value in CAMService::startCapture rather than getSingleIndex() The port may have a value of 3 in this function, which will cause a panic. getPortIndices() handles this case for us already, so the iterator vale is safe to use --- src/core/services/cam.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/services/cam.cpp b/src/core/services/cam.cpp index b3dfd1dc..d9c005e7 100644 --- a/src/core/services/cam.cpp +++ b/src/core/services/cam.cpp @@ -343,7 +343,7 @@ void CAMService::startCapture(u32 messagePointer) { if (port.isValid()) { for (int i : port.getPortIndices()) { - auto& event = ports[port.getSingleIndex()].receiveEvent; + auto& event = ports[i].receiveEvent; // Until we properly implement cameras, immediately signal the receive event if (event.has_value()) { From 842943fa4cb674bc2b5a652d419f8e4acd889e90 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 13 May 2024 00:51:40 +0300 Subject: [PATCH 028/251] GLSL shader gen: Add alpha test (...half of it I guess) --- CMakeLists.txt | 2 +- include/PICA/pica_frag_config.hpp | 53 ++++++++++++++++++++++++++++ include/PICA/regs.hpp | 11 ++++++ include/PICA/shader_gen.hpp | 2 ++ include/renderer_gl/renderer_gl.hpp | 27 +------------- src/core/PICA/shader_gen_glsl.cpp | 24 +++++++++++++ src/core/renderer_gl/renderer_gl.cpp | 27 ++++++++------ 7 files changed, 108 insertions(+), 38 deletions(-) create mode 100644 include/PICA/pica_frag_config.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 0bdb8abb..c6b12188 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -242,7 +242,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/PICA/dynapica/shader_rec_emitter_arm64.hpp include/scheduler.hpp include/applets/error_applet.hpp include/PICA/shader_gen.hpp include/audio/dsp_core.hpp include/audio/null_core.hpp include/audio/teakra_core.hpp include/audio/miniaudio_device.hpp include/ring_buffer.hpp include/bitfield.hpp include/audio/dsp_shared_mem.hpp - include/audio/hle_core.hpp include/capstone.hpp include/audio/aac.hpp + include/audio/hle_core.hpp include/capstone.hpp include/audio/aac.hpp include/PICA/pica_frag_config.hpp ) cmrc_add_resource_library( diff --git a/include/PICA/pica_frag_config.hpp b/include/PICA/pica_frag_config.hpp new file mode 100644 index 00000000..c4d46b11 --- /dev/null +++ b/include/PICA/pica_frag_config.hpp @@ -0,0 +1,53 @@ +#pragma once +#include +#include +#include +#include + +#include "PICA/pica_hash.hpp" +#include "PICA/regs.hpp" +#include "bitfield.hpp" +#include "helpers.hpp" + +namespace PICA { + struct OutputConfig { + union { + u32 raw; + // Merge the enable + compare function into 1 field to avoid duplicate shaders + // enable == off means a CompareFunction of Always + BitField<0, 3, CompareFunction> alphaTestFunction; + }; + }; + + struct TextureConfig { + u32 texUnitConfig; + u32 texEnvUpdateBuffer; + + // TODO: This should probably be a uniform + u32 texEnvBufferColor; + + // There's 6 TEV stages, and each one is configured via 5 word-sized registers + std::array tevConfigs; + }; + + struct FragmentConfig { + OutputConfig outConfig; + TextureConfig texConfig; + + bool operator==(const FragmentConfig& config) const { + // Hash function and equality operator required by std::unordered_map + return std::memcmp(this, &config, sizeof(FragmentConfig)) == 0; + } + }; + + static_assert( + std::has_unique_object_representations() && std::has_unique_object_representations() && + std::has_unique_object_representations() + ); +} // namespace PICA + +// Override std::hash for our fragment config class +template <> +struct std::hash { + std::size_t operator()(const PICA::FragmentConfig& config) const noexcept { return PICAHash::computeHash((const char*)&config, sizeof(config)); } +}; \ No newline at end of file diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index 5b9e1830..74f8c7d5 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -345,6 +345,17 @@ namespace PICA { GeometryPrimitive = 3, }; + enum class CompareFunction : u32 { + Never = 0, + Always = 1, + Equal = 2, + NotEqual = 3, + Less = 4, + LessOrEqual = 5, + Greater = 6, + GreaterOrEqual = 7, + }; + struct TexEnvConfig { enum class Source : u8 { PrimaryColor = 0x0, diff --git a/include/PICA/shader_gen.hpp b/include/PICA/shader_gen.hpp index 23a87120..e8e8ca20 100644 --- a/include/PICA/shader_gen.hpp +++ b/include/PICA/shader_gen.hpp @@ -24,6 +24,8 @@ namespace PICA::ShaderGen { void getColorOperation(std::string& shader, PICA::TexEnvConfig::Operation op); void getAlphaOperation(std::string& shader, PICA::TexEnvConfig::Operation op); + void applyAlphaTest(std::string& shader, const PICARegs& regs); + u32 textureConfig = 0; public: diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index e8eaeacb..53ca9975 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -7,6 +7,7 @@ #include #include "PICA/float_types.hpp" +#include "PICA/pica_frag_config.hpp" #include "PICA/pica_hash.hpp" #include "PICA/pica_vertex.hpp" #include "PICA/regs.hpp" @@ -21,32 +22,6 @@ // More circular dependencies! class GPU; -namespace PICA { - struct FragmentConfig { - u32 texUnitConfig; - u32 texEnvUpdateBuffer; - - // TODO: This should probably be a uniform - u32 texEnvBufferColor; - - // There's 6 TEV stages, and each one is configured via 5 word-sized registers - std::array tevConfigs; - - // Hash function and equality operator required by std::unordered_map - bool operator==(const FragmentConfig& config) const { - return std::memcmp(this, &config, sizeof(FragmentConfig)) == 0; - } - }; -} // namespace PICA - -// Override std::hash for our fragment config class -template <> -struct std::hash { - std::size_t operator()(const PICA::FragmentConfig& config) const noexcept { - return PICAHash::computeHash((const char*)&config, sizeof(config)); - } -}; - class RendererGL final : public Renderer { GLStateManager gl = {}; diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index c3056815..50be94f0 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -144,6 +144,8 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { compileTEV(ret, i, regs); } + applyAlphaTest(ret, regs); + ret += "fragColor = combinerOutput;\n"; ret += "}"; // End of main function ret += "\n\n\n\n\n\n\n\n\n\n\n\n\n"; @@ -353,3 +355,25 @@ void FragmentGenerator::getAlphaOperation(std::string& shader, TexEnvConfig::Ope break; } } + +void FragmentGenerator::applyAlphaTest(std::string& shader, const PICARegs& regs) { + const u32 alphaConfig = regs[InternalRegs::AlphaTestConfig]; + // Alpha test disabled + if (Helpers::getBit<0>(alphaConfig) == 0) { + return; + } + + const auto function = static_cast(Helpers::getBits<4, 3>(alphaConfig)); + + shader += "if ("; + switch (function) { + case CompareFunction::Never: shader += "true"; break; + case CompareFunction::Always: shader += "false"; break; + default: + Helpers::warn("Unimplemented alpha test function"); + shader += "false"; + break; + } + + shader += ") { discard; }\n"; +} diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 5d3ed1b1..cfd197f8 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -796,22 +796,27 @@ std::optional RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt OpenGL::Program& RendererGL::getSpecializedShader() { PICA::FragmentConfig fsConfig; - fsConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg]; - fsConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer]; - fsConfig.texEnvBufferColor = regs[InternalRegs::TexEnvBufferColor]; + auto& outConfig = fsConfig.outConfig; + auto& texConfig = fsConfig.texConfig; + + auto alphaTestConfig = regs[InternalRegs::AlphaTestConfig]; + auto alphaTestFunction = Helpers::getBits<4, 3>(alphaTestConfig); + outConfig.alphaTestFunction = (alphaTestConfig & 1) ? static_cast(alphaTestFunction) : PICA::CompareFunction::Always; + + texConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg]; + texConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer]; + texConfig.texEnvBufferColor = 0; // Set up TEV stages - std::memcpy(&fsConfig.tevConfigs[0 * 5], ®s[InternalRegs::TexEnv0Source], 5 * sizeof(u32)); - std::memcpy(&fsConfig.tevConfigs[1 * 5], ®s[InternalRegs::TexEnv1Source], 5 * sizeof(u32)); - std::memcpy(&fsConfig.tevConfigs[2 * 5], ®s[InternalRegs::TexEnv2Source], 5 * sizeof(u32)); - std::memcpy(&fsConfig.tevConfigs[3 * 5], ®s[InternalRegs::TexEnv3Source], 5 * sizeof(u32)); - std::memcpy(&fsConfig.tevConfigs[4 * 5], ®s[InternalRegs::TexEnv4Source], 5 * sizeof(u32)); - std::memcpy(&fsConfig.tevConfigs[5 * 5], ®s[InternalRegs::TexEnv5Source], 5 * sizeof(u32)); + std::memcpy(&texConfig.tevConfigs[0 * 5], ®s[InternalRegs::TexEnv0Source], 5 * sizeof(u32)); + std::memcpy(&texConfig.tevConfigs[1 * 5], ®s[InternalRegs::TexEnv1Source], 5 * sizeof(u32)); + std::memcpy(&texConfig.tevConfigs[2 * 5], ®s[InternalRegs::TexEnv2Source], 5 * sizeof(u32)); + std::memcpy(&texConfig.tevConfigs[3 * 5], ®s[InternalRegs::TexEnv3Source], 5 * sizeof(u32)); + std::memcpy(&texConfig.tevConfigs[4 * 5], ®s[InternalRegs::TexEnv4Source], 5 * sizeof(u32)); + std::memcpy(&texConfig.tevConfigs[5 * 5], ®s[InternalRegs::TexEnv5Source], 5 * sizeof(u32)); OpenGL::Program& program = shaderCache[fsConfig]; if (!program.exists()) { - printf("Creating specialized shader\n"); - std::string vs = fragShaderGen.getVertexShader(regs); std::string fs = fragShaderGen.generate(regs); std::cout << vs << "\n\n" << fs << "\n"; From 85a17c3fcd507083192da82534b825bc90cbce44 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 13 May 2024 01:10:44 +0300 Subject: [PATCH 029/251] Add UBO support to opengl.hpp --- third_party/opengl/opengl.hpp | 94 +++++++++++++++++++++++++---------- 1 file changed, 68 insertions(+), 26 deletions(-) diff --git a/third_party/opengl/opengl.hpp b/third_party/opengl/opengl.hpp index f368f573..9997e63b 100644 --- a/third_party/opengl/opengl.hpp +++ b/third_party/opengl/opengl.hpp @@ -430,36 +430,36 @@ namespace OpenGL { glDispatchCompute(groupsX, groupsY, groupsZ); } - struct VertexBuffer { - GLuint m_handle = 0; + struct VertexBuffer { + GLuint m_handle = 0; - void create() { - if (m_handle == 0) { - glGenBuffers(1, &m_handle); - } - } + void create() { + if (m_handle == 0) { + glGenBuffers(1, &m_handle); + } + } - void createFixedSize(GLsizei size, GLenum usage = GL_DYNAMIC_DRAW) { - create(); - bind(); - glBufferData(GL_ARRAY_BUFFER, size, nullptr, usage); - } + void createFixedSize(GLsizei size, GLenum usage = GL_DYNAMIC_DRAW) { + create(); + bind(); + glBufferData(GL_ARRAY_BUFFER, size, nullptr, usage); + } - VertexBuffer(bool shouldCreate = false) { - if (shouldCreate) { - create(); - } - } + VertexBuffer(bool shouldCreate = false) { + if (shouldCreate) { + create(); + } + } #ifdef OPENGL_DESTRUCTORS - ~VertexBuffer() { free(); } -#endif - GLuint handle() const { return m_handle; } - bool exists() const { return m_handle != 0; } - void bind() const { glBindBuffer(GL_ARRAY_BUFFER, m_handle); } - void free() { glDeleteBuffers(1, &m_handle); } + ~VertexBuffer() { free(); } +#endif + GLuint handle() const { return m_handle; } + bool exists() const { return m_handle != 0; } + void bind() const { glBindBuffer(GL_ARRAY_BUFFER, m_handle); } + void free() { glDeleteBuffers(1, &m_handle); } - // Reallocates the buffer on every call. Prefer the sub version if possible. + // Reallocates the buffer on every call. Prefer the sub version if possible. template void bufferVerts(VertType* vertices, int vertCount, GLenum usage = GL_DYNAMIC_DRAW) { glBufferData(GL_ARRAY_BUFFER, sizeof(VertType) * vertCount, vertices, usage); @@ -471,7 +471,7 @@ namespace OpenGL { glBufferSubData(GL_ARRAY_BUFFER, offset, sizeof(VertType) * vertCount, vertices); } - // If C++20 is available, add overloads that take std::span instead of raw pointers + // If C++20 is available, add overloads that take std::span instead of raw pointers #ifdef OPENGL_HAVE_CPP20 template void bufferVerts(std::span vertices, GLenum usage = GL_DYNAMIC_DRAW) { @@ -485,6 +485,48 @@ namespace OpenGL { #endif }; + struct UniformBuffer { + GLuint m_handle = 0; + + void create() { + if (m_handle == 0) { + glGenBuffers(1, &m_handle); + } + } + + void createFixedSize(GLsizei size, GLenum usage = GL_DYNAMIC_DRAW) { + create(); + bind(); + glBufferData(GL_UNIFORM_BUFFER, size, nullptr, usage); + } + + UniformBuffer(bool shouldCreate = false) { + if (shouldCreate) { + create(); + } + } + +#ifdef OPENGL_DESTRUCTORS + ~UniformBuffer() { free(); } +#endif + GLuint handle() const { return m_handle; } + bool exists() const { return m_handle != 0; } + void bind() const { glBindBuffer(GL_UNIFORM_BUFFER, m_handle); } + void free() { glDeleteBuffers(1, &m_handle); } + + // Reallocates the buffer on every call. Prefer the sub version if possible. + template + void buffer(const UniformType& uniformData, GLenum usage = GL_DYNAMIC_DRAW) { + glBufferData(GL_UNIFORM_BUFFER, sizeof(uniformData), &uniformData, usage); + } + + // Only use if you used createFixedSize + template + void bufferSub(const UniformType& uniformData, int vertCount, GLintptr offset = 0) { + glBufferSubData(GL_UNIFORM_BUFFER, offset, sizeof(uniformData), &uniformData); + } + }; + enum DepthFunc { Never = GL_NEVER, // Depth test never passes Always = GL_ALWAYS, // Depth test always passes @@ -693,4 +735,4 @@ namespace OpenGL { using Rect = Rectangle; -} // end namespace OpenGL \ No newline at end of file +} // end namespace OpenGL From 12d25fe20d269c7afb12ebd80724ee1ade2c8b87 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sat, 8 Jun 2024 15:04:36 +0000 Subject: [PATCH 030/251] CMake: Remove Vulkan version requirement --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 748c298b..3492bf59 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -338,7 +338,7 @@ endif() if(ENABLE_VULKAN) find_package( - Vulkan 1.3.206 REQUIRED + Vulkan REQUIRED COMPONENTS glslangValidator ) From 29d9ed7224024f3ace7bf3c3a12d79d467d54be8 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 25 Jun 2024 22:11:48 +0000 Subject: [PATCH 031/251] Try to fix Vulkan on Windows CI part 2 (#521) * Try to fix Vulkan SDK on Windows CI * Try to fix Vulkan SDK on Windows CI * Update CMakeLists.txt * Update CMakeLists.txt * Try to fix Vulkan SDK on Windows CI * Add trace to Windows build * Update Windows_Build.yml * Update Windows_Build.yml * Update CMakeLists.txt * Update CMakeLists.txt * Update CMakeLists.txt * Update CMakeLists.txt * Update CMakeLists.txt * Update Windows_Build.yml --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3492bf59..80114bfa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -339,7 +339,7 @@ endif() if(ENABLE_VULKAN) find_package( Vulkan REQUIRED - COMPONENTS glslangValidator + COMPONENTS glslang ) set(RENDERER_VK_INCLUDE_FILES include/renderer_vk/renderer_vk.hpp @@ -382,7 +382,7 @@ if(ENABLE_VULKAN) add_custom_command( OUTPUT ${HOST_SHADER_SPIRV} COMMAND ${CMAKE_COMMAND} -E make_directory "${PROJECT_BINARY_DIR}/host_shaders/" - COMMAND Vulkan::glslangValidator ${RENDERER_VK_HOST_SHADERS_FLAGS} -V "${PROJECT_SOURCE_DIR}/${HOST_SHADER_SOURCE}" -o ${HOST_SHADER_SPIRV} + COMMAND glslang ${RENDERER_VK_HOST_SHADERS_FLAGS} -V "${PROJECT_SOURCE_DIR}/${HOST_SHADER_SOURCE}" -o ${HOST_SHADER_SPIRV} DEPENDS ${HOST_SHADER_SOURCE} ) list( APPEND RENDERER_VK_HOST_SHADERS_SPIRV ${HOST_SHADER_SPIRV} ) From 1c9a3ac3d3d9414a7e6b270132fe7aacb786a651 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 2 Jul 2024 15:30:38 +0300 Subject: [PATCH 032/251] Add Y2R event delay --- include/kernel/kernel.hpp | 2 ++ include/scheduler.hpp | 3 ++- include/services/service_manager.hpp | 1 + include/services/y2r.hpp | 6 +++++- src/core/kernel/kernel.cpp | 2 ++ src/core/services/y2r.cpp | 32 ++++++++++++++++++++++------ src/emulator.cpp | 2 ++ 7 files changed, 40 insertions(+), 8 deletions(-) diff --git a/include/kernel/kernel.hpp b/include/kernel/kernel.hpp index fc7fe3f3..e0c0651b 100644 --- a/include/kernel/kernel.hpp +++ b/include/kernel/kernel.hpp @@ -15,6 +15,7 @@ #include "services/service_manager.hpp" class CPU; +struct Scheduler; class Kernel { std::span regs; @@ -243,6 +244,7 @@ public: } ServiceManager& getServiceManager() { return serviceManager; } + Scheduler& getScheduler(); void sendGPUInterrupt(GPUInterrupt type) { serviceManager.sendGPUInterrupt(type); } void clearInstructionCache(); diff --git a/include/scheduler.hpp b/include/scheduler.hpp index 97c50afc..cfc4d5e8 100644 --- a/include/scheduler.hpp +++ b/include/scheduler.hpp @@ -11,7 +11,8 @@ struct Scheduler { VBlank = 0, // End of frame event UpdateTimers = 1, // Update kernel timer objects RunDSP = 2, // Make the emulated DSP run for one audio frame - Panic = 3, // Dummy event that is always pending and should never be triggered (Timestamp = UINT64_MAX) + SignalY2R = 3, // Signal that a Y2R conversion has finished + Panic = 4, // Dummy event that is always pending and should never be triggered (Timestamp = UINT64_MAX) TotalNumberOfEvents // How many event types do we have in total? }; static constexpr usize totalNumberOfEvents = static_cast(EventType::TotalNumberOfEvents); diff --git a/include/services/service_manager.hpp b/include/services/service_manager.hpp index 8d1cf381..6679f98d 100644 --- a/include/services/service_manager.hpp +++ b/include/services/service_manager.hpp @@ -109,4 +109,5 @@ class ServiceManager { HIDService& getHID() { return hid; } NFCService& getNFC() { return nfc; } DSPService& getDSP() { return dsp; } + Y2RService& getY2R() { return y2r; } }; diff --git a/include/services/y2r.hpp b/include/services/y2r.hpp index 0cc1d587..4aa96d7b 100644 --- a/include/services/y2r.hpp +++ b/include/services/y2r.hpp @@ -113,8 +113,12 @@ class Y2RService { void startConversion(u32 messagePointer); void stopConversion(u32 messagePointer); -public: + bool isBusy; + + public: Y2RService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel) {} void reset(); void handleSyncRequest(u32 messagePointer); + + void signalConversionDone(); }; \ No newline at end of file diff --git a/src/core/kernel/kernel.cpp b/src/core/kernel/kernel.cpp index 392b87fd..0d1efc15 100644 --- a/src/core/kernel/kernel.cpp +++ b/src/core/kernel/kernel.cpp @@ -399,3 +399,5 @@ std::string Kernel::getProcessName(u32 pid) { Helpers::panic("Attempted to name non-current process"); } } + +Scheduler& Kernel::getScheduler() { return cpu.getScheduler(); } diff --git a/src/core/services/y2r.cpp b/src/core/services/y2r.cpp index a796631c..ae0961cf 100644 --- a/src/core/services/y2r.cpp +++ b/src/core/services/y2r.cpp @@ -61,6 +61,7 @@ void Y2RService::reset() { inputLineWidth = 420; conversionCoefficients.fill(0); + isBusy = false; } void Y2RService::handleSyncRequest(u32 messagePointer) { @@ -156,6 +157,11 @@ void Y2RService::setTransferEndInterrupt(u32 messagePointer) { void Y2RService::stopConversion(u32 messagePointer) { log("Y2R::StopConversion\n"); + if (isBusy) { + isBusy = false; + kernel.getScheduler().removeEvent(Scheduler::EventType::SignalY2R); + } + mem.write32(messagePointer, IPC::responseHeader(0x27, 1, 0)); mem.write32(messagePointer + 4, Result::Success); } @@ -167,7 +173,7 @@ void Y2RService::isBusyConversion(u32 messagePointer) { mem.write32(messagePointer, IPC::responseHeader(0x28, 2, 0)); mem.write32(messagePointer + 4, Result::Success); - mem.write32(messagePointer + 8, static_cast(BusyStatus::NotBusy)); + mem.write32(messagePointer + 8, static_cast(isBusy ? BusyStatus::Busy : BusyStatus::NotBusy)); } void Y2RService::setBlockAlignment(u32 messagePointer) { @@ -434,11 +440,14 @@ void Y2RService::startConversion(u32 messagePointer) { mem.write32(messagePointer, IPC::responseHeader(0x26, 1, 0)); mem.write32(messagePointer + 4, Result::Success); - // Make Y2R conversion end instantly. - // Signal the transfer end event if it's been created. TODO: Is this affected by SetTransferEndInterrupt? - if (transferEndEvent.has_value()) { - kernel.signalEvent(transferEndEvent.value()); - } + // Schedule Y2R conversion end event. + static constexpr u64 delayTicks = 60'000; + isBusy = true; + + // Remove any potential pending Y2R event and schedule a new one + Scheduler& scheduler = kernel.getScheduler(); + scheduler.removeEvent(Scheduler::EventType::SignalY2R); + scheduler.addEvent(Scheduler::EventType::SignalY2R, scheduler.currentTimestamp + delayTicks); } void Y2RService::isFinishedSendingYUV(u32 messagePointer) { @@ -484,4 +493,15 @@ void Y2RService::isFinishedReceiving(u32 messagePointer) { mem.write32(messagePointer, IPC::responseHeader(0x17, 2, 0)); mem.write32(messagePointer + 4, Result::Success); mem.write32(messagePointer + 8, finished ? 1 : 0); +} + +void Y2RService::signalConversionDone() { + if (isBusy) { + isBusy = false; + + // Signal the transfer end event if it's been created. TODO: Is this affected by SetTransferEndInterrupt? + if (transferEndEvent.has_value()) { + kernel.signalEvent(transferEndEvent.value()); + } + } } \ No newline at end of file diff --git a/src/emulator.cpp b/src/emulator.cpp index 16c3bffd..af156eeb 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -169,6 +169,8 @@ void Emulator::pollScheduler() { break; } + case Scheduler::EventType::SignalY2R: kernel.getServiceManager().getY2R().signalConversionDone(); break; + default: { Helpers::panic("Scheduler: Unimplemented event type received: %d\n", static_cast(eventType)); break; From d4cf54d56cafaf1ae06d26c48e9a4f0ca1596401 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 2 Jul 2024 15:57:39 +0300 Subject: [PATCH 033/251] Tweak Y2R timings --- src/core/services/y2r.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/core/services/y2r.cpp b/src/core/services/y2r.cpp index ae0961cf..1c7b33cd 100644 --- a/src/core/services/y2r.cpp +++ b/src/core/services/y2r.cpp @@ -441,7 +441,8 @@ void Y2RService::startConversion(u32 messagePointer) { mem.write32(messagePointer + 4, Result::Success); // Schedule Y2R conversion end event. - static constexpr u64 delayTicks = 60'000; + // The tick value is tweaked based on the minimum delay needed to get FIFA 15 to not hang due to a race condition on its title screen + static constexpr u64 delayTicks = 1'350'000; isBusy = true; // Remove any potential pending Y2R event and schedule a new one From 800c11ff62a4893dc07e6c0b3eb760394befa9b4 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 2 Jul 2024 17:19:20 +0300 Subject: [PATCH 034/251] HLE DSP: Add PCM8 audio decoding --- include/audio/hle_core.hpp | 1 + src/core/audio/hle_core.cpp | 22 +++++++++++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/include/audio/hle_core.hpp b/include/audio/hle_core.hpp index c57f221e..b59dc811 100644 --- a/include/audio/hle_core.hpp +++ b/include/audio/hle_core.hpp @@ -176,6 +176,7 @@ namespace Audio { // Decode an entire buffer worth of audio void decodeBuffer(DSPSource& source); + SampleBuffer decodePCM8(const u8* data, usize sampleCount, Source& source); SampleBuffer decodePCM16(const u8* data, usize sampleCount, Source& source); SampleBuffer decodeADPCM(const u8* data, usize sampleCount, Source& source); diff --git a/src/core/audio/hle_core.cpp b/src/core/audio/hle_core.cpp index 146c7bdf..12c8f4c8 100644 --- a/src/core/audio/hle_core.cpp +++ b/src/core/audio/hle_core.cpp @@ -355,7 +355,7 @@ namespace Audio { } switch (buffer.format) { - case SampleFormat::PCM8: Helpers::warn("Unimplemented sample format!"); break; + case SampleFormat::PCM8: source.currentSamples = decodePCM8(data, buffer.sampleCount, source); break; case SampleFormat::PCM16: source.currentSamples = decodePCM16(data, buffer.sampleCount, source); break; case SampleFormat::ADPCM: source.currentSamples = decodeADPCM(data, buffer.sampleCount, source); break; @@ -406,6 +406,26 @@ namespace Audio { } } + HLE_DSP::SampleBuffer HLE_DSP::decodePCM8(const u8* data, usize sampleCount, Source& source) { + SampleBuffer decodedSamples(sampleCount); + + if (source.sourceType == SourceType::Stereo) { + for (usize i = 0; i < sampleCount; i++) { + const s16 left = s16(u16(*data++) << 8); + const s16 right = s16(u16(*data++) << 8); + decodedSamples[i] = {left, right}; + } + } else { + // Mono + for (usize i = 0; i < sampleCount; i++) { + const s16 sample = s16(u16(*data++) << 8); + decodedSamples[i] = {sample, sample}; + } + } + + return decodedSamples; + } + HLE_DSP::SampleBuffer HLE_DSP::decodePCM16(const u8* data, usize sampleCount, Source& source) { SampleBuffer decodedSamples(sampleCount); const s16* data16 = reinterpret_cast(data); From de9375122b012ab357a0bf54064422f6e2025c0a Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 2 Jul 2024 17:30:51 +0300 Subject: [PATCH 035/251] Add SDMC::DeleteFile --- src/core/fs/archive_sdmc.cpp | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/src/core/fs/archive_sdmc.cpp b/src/core/fs/archive_sdmc.cpp index 6c34de7a..8fda1320 100644 --- a/src/core/fs/archive_sdmc.cpp +++ b/src/core/fs/archive_sdmc.cpp @@ -39,7 +39,35 @@ HorizonResult SDMCArchive::createFile(const FSPath& path, u64 size) { } HorizonResult SDMCArchive::deleteFile(const FSPath& path) { - Helpers::panic("[SDMC] Unimplemented DeleteFile"); + if (path.type == PathType::UTF16) { + if (!isPathSafe(path)) { + Helpers::panic("Unsafe path in SDMC::DeleteFile"); + } + + fs::path p = IOFile::getAppData() / "SDMC"; + p += fs::path(path.utf16_string).make_preferred(); + + if (fs::is_directory(p)) { + Helpers::panic("SDMC::DeleteFile: Tried to delete directory"); + } + + if (!fs::is_regular_file(p)) { + return Result::FS::FileNotFoundAlt; + } + + std::error_code ec; + bool success = fs::remove(p, ec); + + // It might still be possible for fs::remove to fail, if there's eg an open handle to a file being deleted + // In this case, print a warning, but still return success for now + if (!success) { + Helpers::warn("SDMC::DeleteFile: fs::remove failed\n"); + } + + return Result::Success; + } + + Helpers::panic("SaveDataArchive::DeleteFile: Unknown path type"); return Result::Success; } From 0fe62f9b46153e0d6f72571650454814ae0e7cf1 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 2 Jul 2024 17:32:17 +0300 Subject: [PATCH 036/251] Correct archive names --- src/core/fs/archive_sdmc.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/fs/archive_sdmc.cpp b/src/core/fs/archive_sdmc.cpp index 8fda1320..97b02b9e 100644 --- a/src/core/fs/archive_sdmc.cpp +++ b/src/core/fs/archive_sdmc.cpp @@ -67,7 +67,7 @@ HorizonResult SDMCArchive::deleteFile(const FSPath& path) { return Result::Success; } - Helpers::panic("SaveDataArchive::DeleteFile: Unknown path type"); + Helpers::panic("SDMCArchive::DeleteFile: Unknown path type"); return Result::Success; } @@ -173,7 +173,7 @@ Rust::Result SDMCArchive::openDirectory(const F if (path.type == PathType::UTF16) { if (!isPathSafe(path)) { - Helpers::panic("Unsafe path in SaveData::OpenDirectory"); + Helpers::panic("Unsafe path in SDMC::OpenDirectory"); } fs::path p = IOFile::getAppData() / "SDMC"; From 0e4079f30457a28f3ba5fe60fb775cd089e781cd Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 3 Jul 2024 16:23:43 +0300 Subject: [PATCH 037/251] a64 shader recompiler: Add DPH/DPHI --- .../dynapica/shader_rec_emitter_arm64.cpp | 37 ++++++++++++++++++- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp b/src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp index d6358070..15200e76 100644 --- a/src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp +++ b/src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp @@ -144,8 +144,8 @@ void ShaderEmitter::compileInstruction(const PICAShader& shaderUnit) { case ShaderOpcodes::CMP2: recCMP(shaderUnit, instruction); break; case ShaderOpcodes::DP3: recDP3(shaderUnit, instruction); break; case ShaderOpcodes::DP4: recDP4(shaderUnit, instruction); break; - // case ShaderOpcodes::DPH: - // case ShaderOpcodes::DPHI: recDPH(shaderUnit, instruction); break; + case ShaderOpcodes::DPH: + case ShaderOpcodes::DPHI: recDPH(shaderUnit, instruction); break; case ShaderOpcodes::END: recEND(shaderUnit, instruction); break; case ShaderOpcodes::EX2: recEX2(shaderUnit, instruction); break; case ShaderOpcodes::FLR: recFLR(shaderUnit, instruction); break; @@ -533,6 +533,39 @@ void ShaderEmitter::recDP4(const PICAShader& shader, u32 instruction) { storeRegister(src1Vec, shader, dest, operandDescriptor); } +void ShaderEmitter::recDPH(const PICAShader& shader, u32 instruction) { + const bool isDPHI = (instruction >> 26) == ShaderOpcodes::DPHI; + + const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f]; + const u32 src1 = isDPHI ? getBits<14, 5>(instruction) : getBits<12, 7>(instruction); + const u32 src2 = isDPHI ? getBits<7, 7>(instruction) : getBits<7, 5>(instruction); + const u32 idx = getBits<19, 2>(instruction); + const u32 dest = getBits<21, 5>(instruction); + const u32 writeMask = getBits<0, 4>(operandDescriptor); + + // TODO: Safe multiplication equivalent (Multiplication is not IEEE compliant on the PICA) + loadRegister<1>(src1Vec, shader, src1, isDPHI ? 0 : idx, operandDescriptor); + loadRegister<2>(src2Vec, shader, src2, isDPHI ? idx : 0, operandDescriptor); + // // Attach 1.0 to the w component of src1 + MOV(src1Vec.Selem()[3], onesVector.Selem()[0]); + + // Now perform a DP4 + // Do a piecewise multiplication of the vectors first + if constexpr (useSafeMUL) { + emitSafeMUL(src1Vec, src2Vec, scratch1Vec); + } else { + FMUL(src1Vec.S4(), src1Vec.S4(), src2Vec.S4()); + } + FADDP(src1Vec.S4(), src1Vec.S4(), src1Vec.S4()); // Now add the adjacent components together + FADDP(src1Vec.toS(), src1Vec.toD().S2()); // Again for the bottom 2 lanes. Now the bottom lane contains the dot product + + if (writeMask != 0x8) { // Copy bottom lane to all lanes if we're not simply writing back x + DUP(src1Vec.S4(), src1Vec.Selem()[0]); // src1Vec = src1Vec.xxxx + } + + storeRegister(src1Vec, shader, dest, operandDescriptor); +} + oaknut::Label ShaderEmitter::emitLog2Func() { oaknut::Label funcStart; From 31902e92a98ce3b68dc2ac6a153d3d27f865cf3a Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 3 Jul 2024 16:27:17 +0300 Subject: [PATCH 038/251] Enable shader JIT by default on arm64 desktop + Android --- include/config.hpp | 2 +- .../java/com/panda3ds/pandroid/data/config/GlobalConfig.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/config.hpp b/include/config.hpp index 2333c682..339e651c 100644 --- a/include/config.hpp +++ b/include/config.hpp @@ -7,7 +7,7 @@ // Remember to initialize every field here to its default value otherwise bad things will happen struct EmulatorConfig { // Only enable the shader JIT by default on platforms where it's completely tested -#ifdef PANDA3DS_X64_HOST +#if defined(PANDA3DS_X64_HOST) || defined(PANDA3DS_ARM64_HOST) static constexpr bool shaderJitDefault = true; #else static constexpr bool shaderJitDefault = false; diff --git a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/data/config/GlobalConfig.java b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/data/config/GlobalConfig.java index 21645b7e..448d561a 100644 --- a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/data/config/GlobalConfig.java +++ b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/data/config/GlobalConfig.java @@ -21,7 +21,7 @@ public class GlobalConfig { public static DataModel data; - public static final Key KEY_SHADER_JIT = new Key<>("emu.shader_jit", false); + public static final Key KEY_SHADER_JIT = new Key<>("emu.shader_jit", true); public static final Key KEY_PICTURE_IN_PICTURE = new Key<>("app.behavior.pictureInPicture", false); public static final Key KEY_SHOW_PERFORMANCE_OVERLAY = new Key<>("dev.performanceOverlay", false); public static final Key KEY_LOGGER_SERVICE = new Key<>("dev.loggerService", false); From d47e964c8022c4d21b11e2c81c9947fc3172137f Mon Sep 17 00:00:00 2001 From: Jonian Guveli Date: Thu, 20 Jun 2024 11:18:31 +0300 Subject: [PATCH 039/251] Libretro: Initial implementation --- CMakeLists.txt | 14 +- src/libretro_core.cpp | 359 ++ third_party/libretro/include/libretro.h | 4405 +++++++++++++++++++++++ 3 files changed, 4777 insertions(+), 1 deletion(-) create mode 100644 src/libretro_core.cpp create mode 100644 third_party/libretro/include/libretro.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 80114bfa..2897560b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,11 +40,19 @@ option(ENABLE_DISCORD_RPC "Compile with Discord RPC support (disabled by default option(ENABLE_LUAJIT "Enable scripting with the Lua programming language" ON) option(ENABLE_QT_GUI "Enable the Qt GUI. If not selected then the emulator uses a minimal SDL-based UI instead" OFF) option(BUILD_HYDRA_CORE "Build a Hydra core" OFF) +option(BUILD_LIBRETRO_CORE "Build a Libretro core" OFF) if(BUILD_HYDRA_CORE) set(CMAKE_POSITION_INDEPENDENT_CODE ON) endif() +if(BUILD_LIBRETRO_CORE) + set(CMAKE_POSITION_INDEPENDENT_CODE ON) + set(ENABLE_DISCORD_RPC OFF) + set(ENABLE_LUAJIT OFF) + add_definitions(-D__LIBRETRO__) +endif() + add_library(AlberCore STATIC) include_directories(${PROJECT_SOURCE_DIR}/include/) @@ -438,7 +446,7 @@ else() target_compile_definitions(AlberCore PUBLIC "PANDA3DS_FRONTEND_SDL=1") endif() -if(NOT BUILD_HYDRA_CORE) +if(NOT BUILD_HYDRA_CORE AND NOT BUILD_LIBRETRO_CORE) add_executable(Alber) if(ENABLE_QT_GUI) @@ -500,6 +508,10 @@ elseif(BUILD_HYDRA_CORE) include_directories(third_party/hydra_core/include) add_library(Alber SHARED src/hydra_core.cpp) target_link_libraries(Alber PUBLIC AlberCore) +elseif(BUILD_LIBRETRO_CORE) + include_directories(third_party/libretro/include) + add_library(panda3ds_libretro SHARED src/libretro_core.cpp) + target_link_libraries(panda3ds_libretro PUBLIC AlberCore) endif() if(ENABLE_LTO OR ENABLE_USER_BUILD) diff --git a/src/libretro_core.cpp b/src/libretro_core.cpp new file mode 100644 index 00000000..ff57f0c8 --- /dev/null +++ b/src/libretro_core.cpp @@ -0,0 +1,359 @@ +#include +#include + +#include + +#include +#include + +static retro_environment_t environ_cb; +static retro_video_refresh_t video_cb; +static retro_audio_sample_batch_t audio_batch_cb; +static retro_input_poll_t input_poll_cb; +static retro_input_state_t input_state_cb; + +static struct retro_hw_render_callback hw_render; + +std::unique_ptr emulator; +RendererGL* renderer; + +static void* GetProcAddress(const char* name) { + return (void*)hw_render.get_proc_address(name); +} + +static void VideoResetContext(void) { +#ifdef USING_GLES + if (!gladLoadGLES2Loader(reinterpret_cast(GetProcAddress))) { + Helpers::panic("OpenGL ES init failed"); + } +#else + if (!gladLoadGLLoader(reinterpret_cast(GetProcAddress))) { + Helpers::panic("OpenGL init failed"); + } +#endif + + emulator->initGraphicsContext(nullptr); +} + +static void VideoDestroyContext(void) { + emulator->deinitGraphicsContext(); +} + +static bool SetHWRender(retro_hw_context_type type) { + hw_render.context_type = type; + hw_render.context_reset = VideoResetContext; + hw_render.context_destroy = VideoDestroyContext; + hw_render.bottom_left_origin = true; + + switch (type) { + case RETRO_HW_CONTEXT_OPENGL_CORE: + hw_render.version_major = 3; + hw_render.version_minor = 3; + + if (environ_cb(RETRO_ENVIRONMENT_SET_HW_RENDER, &hw_render)) { + return true; + } + break; + case RETRO_HW_CONTEXT_OPENGLES3: + case RETRO_HW_CONTEXT_OPENGL: + hw_render.version_major = 3; + hw_render.version_minor = 0; + + if (environ_cb(RETRO_ENVIRONMENT_SET_HW_RENDER, &hw_render)) { + return true; + } + break; + default: + break; + } + + return false; +} + +static void VideoInit(void) { + retro_hw_context_type preferred = RETRO_HW_CONTEXT_NONE; + environ_cb(RETRO_ENVIRONMENT_GET_PREFERRED_HW_RENDER, &preferred); + + if (preferred && SetHWRender(preferred)) + return; + if (SetHWRender(RETRO_HW_CONTEXT_OPENGL_CORE)) + return; + if (SetHWRender(RETRO_HW_CONTEXT_OPENGL)) + return; + if (SetHWRender(RETRO_HW_CONTEXT_OPENGLES3)) + return; + + hw_render.context_type = RETRO_HW_CONTEXT_NONE; +} + +static bool GetButtonState(unsigned id) { + return input_state_cb(0, RETRO_DEVICE_JOYPAD, 0, id); +} + +static float GetAxisState(unsigned index, unsigned id) { + return input_state_cb(0, RETRO_DEVICE_ANALOG, index, id); +} + +static void InputInit(void) { + static const struct retro_controller_description controllers[] = { + { "Nintendo 3DS", RETRO_DEVICE_JOYPAD }, + { NULL, 0 }, + }; + + static const struct retro_controller_info ports[] = { + { controllers, 1 }, + { NULL, 0 }, + }; + + environ_cb(RETRO_ENVIRONMENT_SET_CONTROLLER_INFO, (void*)ports); + + struct retro_input_descriptor desc[] = { + { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_LEFT, "Left" }, + { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_UP, "Up" }, + { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_DOWN, "Down" }, + { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_RIGHT, "Right" }, + { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_A, "A" }, + { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_B, "B" }, + { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_SELECT, "Select" }, + { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_START, "Start" }, + { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R, "R" }, + { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L, "L" }, + { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_X, "X" }, + { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_Y, "Y" }, + { 0, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_X, "Circle Pad X" }, + { 0, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_Y, "Circle Pad Y" }, + { 0 }, + }; + + environ_cb(RETRO_ENVIRONMENT_SET_INPUT_DESCRIPTORS, &desc); +} + +static std::string FetchVariable(std::string key, std::string def) { + struct retro_variable var = { nullptr }; + var.key = key.c_str(); + + if (!environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) || var.value == nullptr) { + Helpers::warn("Fetching variable %s failed.", key); + return def; + } + + return std::string(var.value); +} + +static bool FetchVariableBool(std::string key, bool def) { + return FetchVariable(key, def ? "enabled" : "disabled") == "enabled"; +} + +static void ConfigInit() { + static const retro_variable values[] = { + { "panda3ds_use_shader_jit", "Enable shader JIT; enabled|disabled" }, + { "panda3ds_use_vsync", "Enable VSync; enabled|disabled" }, + { "panda3ds_dsp_emulation", "DSP emulation; Null|HLE|LLE" }, + { "panda3ds_use_audio", "Enable audio; disabled|enabled" }, + { "panda3ds_use_virtual_sd", "Enable virtual SD card; enabled|disabled" }, + { "panda3ds_write_protect_virtual_sd", "Write protect virtual SD card; disabled|enabled" }, + { "panda3ds_battery_level", "Battery percentage; 5|10|20|30|50|70|90|100" }, + { "panda3ds_use_charger", "Charger plugged; enabled|disabled" }, + { nullptr, nullptr } + }; + + environ_cb(RETRO_ENVIRONMENT_SET_VARIABLES, (void*)values); +} + +static void ConfigUpdate() { + EmulatorConfig& config = emulator->getConfig(); + + config.rendererType = RendererType::OpenGL; + config.vsyncEnabled = FetchVariableBool("panda3ds_use_vsync", true); + config.shaderJitEnabled = FetchVariableBool("panda3ds_use_shader_jit", true); + config.chargerPlugged = FetchVariableBool("panda3ds_use_charger", true); + config.batteryPercentage = std::clamp(std::stoi(FetchVariable("panda3ds_battery_level", "5")), 0, 100); + config.dspType = Audio::DSPCore::typeFromString(FetchVariable("panda3ds_dsp_emulation", "null")); + config.audioEnabled = FetchVariableBool("panda3ds_use_audio", false); + config.sdCardInserted = FetchVariableBool("panda3ds_use_virtual_sd", true); + config.sdWriteProtected = FetchVariableBool("panda3ds_write_protect_virtual_sd", false); + config.discordRpcEnabled = false; +} + +void retro_get_system_info(retro_system_info* info) { + info->need_fullpath = true; + info->valid_extensions = "3ds|3dsx|elf|axf|cci|cxi|app"; + info->library_version = "0.8"; + info->library_name = "Panda3DS"; + info->block_extract = true; +} + +void retro_get_system_av_info(retro_system_av_info* info) { + info->geometry.base_width = emulator->width; + info->geometry.base_height = emulator->height; + + info->geometry.max_width = info->geometry.base_width; + info->geometry.max_height = info->geometry.base_height; + + info->geometry.aspect_ratio = 5.0 / 6.0; + info->timing.fps = 60.0; + info->timing.sample_rate = 32000; +} + +void retro_set_environment(retro_environment_t cb) { + environ_cb = cb; +} + +void retro_set_video_refresh(retro_video_refresh_t cb) { + video_cb = cb; +} + +void retro_set_audio_sample_batch(retro_audio_sample_batch_t cb) { + audio_batch_cb = cb; +} + +void retro_set_audio_sample(retro_audio_sample_t cb) { +} + +void retro_set_input_poll(retro_input_poll_t cb) { + input_poll_cb = cb; +} + +void retro_set_input_state(retro_input_state_t cb) { + input_state_cb = cb; +} + +void retro_init(void) { + enum retro_pixel_format xrgb888 = RETRO_PIXEL_FORMAT_XRGB8888; + environ_cb(RETRO_ENVIRONMENT_SET_PIXEL_FORMAT, &xrgb888); + + emulator = std::make_unique(); +} + +void retro_deinit(void) { + emulator = nullptr; +} + +bool retro_load_game(const struct retro_game_info* game) { + ConfigInit(); + ConfigUpdate(); + + if (emulator->getRendererType() != RendererType::OpenGL) { + throw std::runtime_error("Libretro: Renderer is not OpenGL"); + } + + renderer = static_cast(emulator->getRenderer()); + emulator->setOutputSize(emulator->width, emulator->height); + + InputInit(); + VideoInit(); + + return emulator->loadROM(game->path); +} + +bool retro_load_game_special(unsigned type, const struct retro_game_info* info, size_t num) { + return false; +} + +void retro_unload_game(void) { + renderer->setFBO(0); + renderer = nullptr; +} + +void retro_reset(void) { + emulator->reset(Emulator::ReloadOption::Reload); +} + +void retro_run(void) { + renderer->setFBO(hw_render.get_current_framebuffer()); + renderer->resetStateManager(); + + input_poll_cb(); + + HIDService& hid = emulator->getServiceManager().getHID(); + + hid.setKey(HID::Keys::A, GetButtonState(RETRO_DEVICE_ID_JOYPAD_A)); + hid.setKey(HID::Keys::B, GetButtonState(RETRO_DEVICE_ID_JOYPAD_B)); + hid.setKey(HID::Keys::X, GetButtonState(RETRO_DEVICE_ID_JOYPAD_X)); + hid.setKey(HID::Keys::Y, GetButtonState(RETRO_DEVICE_ID_JOYPAD_Y)); + hid.setKey(HID::Keys::L, GetButtonState(RETRO_DEVICE_ID_JOYPAD_L)); + hid.setKey(HID::Keys::R, GetButtonState(RETRO_DEVICE_ID_JOYPAD_R)); + hid.setKey(HID::Keys::Start, GetButtonState(RETRO_DEVICE_ID_JOYPAD_START)); + hid.setKey(HID::Keys::Select, GetButtonState(RETRO_DEVICE_ID_JOYPAD_SELECT)); + hid.setKey(HID::Keys::Up, GetButtonState(RETRO_DEVICE_ID_JOYPAD_UP)); + hid.setKey(HID::Keys::Down, GetButtonState(RETRO_DEVICE_ID_JOYPAD_DOWN)); + hid.setKey(HID::Keys::Left, GetButtonState(RETRO_DEVICE_ID_JOYPAD_LEFT)); + hid.setKey(HID::Keys::Right, GetButtonState(RETRO_DEVICE_ID_JOYPAD_RIGHT)); + + float x_left = GetAxisState(RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_X); + float y_left = GetAxisState(RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_Y); + + hid.setCirclepadX(x_left == 0 ? 0 : x_left < 0 ? -0x9C : 0x9C); + hid.setCirclepadY(y_left == 0 ? 0 : y_left > 0 ? -0x9C : 0x9C); + + bool touch = input_state_cb(0, RETRO_DEVICE_MOUSE, 0, RETRO_DEVICE_ID_MOUSE_LEFT); + auto pos_x = input_state_cb(0, RETRO_DEVICE_POINTER, 0, RETRO_DEVICE_ID_POINTER_X); + auto pos_y = input_state_cb(0, RETRO_DEVICE_POINTER, 0, RETRO_DEVICE_ID_POINTER_Y); + + auto new_x = static_cast((pos_x + 0x7fff) / (float)(0x7fff * 2) * emulator->width); + auto new_y = static_cast((pos_y + 0x7fff) / (float)(0x7fff * 2) * emulator->height); + + auto off_x = 40; + auto off_y = emulator->height / 2; + + bool scr_x = new_x >= off_x && new_x < emulator->width - off_x; + bool scr_y = new_y >= off_y && new_y <= emulator->height; + + if (touch && scr_y && scr_x) { + u16 x = static_cast(new_x - off_x); + u16 y = static_cast(new_y - off_y); + + hid.setTouchScreenPress(x, y); + } else { + hid.releaseTouchScreen(); + } + + hid.updateInputs(emulator->getTicks()); + + emulator->runFrame(); + video_cb(RETRO_HW_FRAME_BUFFER_VALID, emulator->width, emulator->height, 0); +} + +void retro_set_controller_port_device(unsigned port, unsigned device) { +} + +size_t retro_serialize_size(void) { + size_t size = 0; + return size; +} + +bool retro_serialize(void* data, size_t size) { + return false; +} + +bool retro_unserialize(const void* data, size_t size) { + return false; +} + +unsigned retro_get_region(void) { + return RETRO_REGION_NTSC; +} + +unsigned retro_api_version() { + return RETRO_API_VERSION; +} + +size_t retro_get_memory_size(unsigned id) { + if (id == RETRO_MEMORY_SYSTEM_RAM) { + return 0; + } + return 0; +} + +void* retro_get_memory_data(unsigned id) { + if (id == RETRO_MEMORY_SYSTEM_RAM) { + return 0; + } + return NULL; +} + +void retro_cheat_set(unsigned index, bool enabled, const char* code) { +} + +void retro_cheat_reset(void) { +} diff --git a/third_party/libretro/include/libretro.h b/third_party/libretro/include/libretro.h new file mode 100644 index 00000000..96d07df4 --- /dev/null +++ b/third_party/libretro/include/libretro.h @@ -0,0 +1,4405 @@ +/* Copyright (C) 2010-2020 The RetroArch team + * + * --------------------------------------------------------------------------------------- + * The following license statement only applies to this libretro API header (libretro.h). + * --------------------------------------------------------------------------------------- + * + * Permission is hereby granted, free of charge, + * to any person obtaining a copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef LIBRETRO_H__ +#define LIBRETRO_H__ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef __cplusplus +#if defined(_MSC_VER) && _MSC_VER < 1800 && !defined(SN_TARGET_PS3) +/* Hack applied for MSVC when compiling in C89 mode + * as it isn't C99-compliant. */ +#define bool unsigned char +#define true 1 +#define false 0 +#else +#include +#endif +#endif + +#ifndef RETRO_CALLCONV +# if defined(__GNUC__) && defined(__i386__) && !defined(__x86_64__) +# define RETRO_CALLCONV __attribute__((cdecl)) +# elif defined(_MSC_VER) && defined(_M_X86) && !defined(_M_X64) +# define RETRO_CALLCONV __cdecl +# else +# define RETRO_CALLCONV /* all other platforms only have one calling convention each */ +# endif +#endif + +#ifndef RETRO_API +# if defined(_WIN32) || defined(__CYGWIN__) || defined(__MINGW32__) +# ifdef RETRO_IMPORT_SYMBOLS +# ifdef __GNUC__ +# define RETRO_API RETRO_CALLCONV __attribute__((__dllimport__)) +# else +# define RETRO_API RETRO_CALLCONV __declspec(dllimport) +# endif +# else +# ifdef __GNUC__ +# define RETRO_API RETRO_CALLCONV __attribute__((__dllexport__)) +# else +# define RETRO_API RETRO_CALLCONV __declspec(dllexport) +# endif +# endif +# else +# if defined(__GNUC__) && __GNUC__ >= 4 +# define RETRO_API RETRO_CALLCONV __attribute__((__visibility__("default"))) +# else +# define RETRO_API RETRO_CALLCONV +# endif +# endif +#endif + +/* Used for checking API/ABI mismatches that can break libretro + * implementations. + * It is not incremented for compatible changes to the API. + */ +#define RETRO_API_VERSION 1 + +/* + * Libretro's fundamental device abstractions. + * + * Libretro's input system consists of some standardized device types, + * such as a joypad (with/without analog), mouse, keyboard, lightgun + * and a pointer. + * + * The functionality of these devices are fixed, and individual cores + * map their own concept of a controller to libretro's abstractions. + * This makes it possible for frontends to map the abstract types to a + * real input device, and not having to worry about binding input + * correctly to arbitrary controller layouts. + */ + +#define RETRO_DEVICE_TYPE_SHIFT 8 +#define RETRO_DEVICE_MASK ((1 << RETRO_DEVICE_TYPE_SHIFT) - 1) +#define RETRO_DEVICE_SUBCLASS(base, id) (((id + 1) << RETRO_DEVICE_TYPE_SHIFT) | base) + +/* Input disabled. */ +#define RETRO_DEVICE_NONE 0 + +/* The JOYPAD is called RetroPad. It is essentially a Super Nintendo + * controller, but with additional L2/R2/L3/R3 buttons, similar to a + * PS1 DualShock. */ +#define RETRO_DEVICE_JOYPAD 1 + +/* The mouse is a simple mouse, similar to Super Nintendo's mouse. + * X and Y coordinates are reported relatively to last poll (poll callback). + * It is up to the libretro implementation to keep track of where the mouse + * pointer is supposed to be on the screen. + * The frontend must make sure not to interfere with its own hardware + * mouse pointer. + */ +#define RETRO_DEVICE_MOUSE 2 + +/* KEYBOARD device lets one poll for raw key pressed. + * It is poll based, so input callback will return with the current + * pressed state. + * For event/text based keyboard input, see + * RETRO_ENVIRONMENT_SET_KEYBOARD_CALLBACK. + */ +#define RETRO_DEVICE_KEYBOARD 3 + +/* LIGHTGUN device is similar to Guncon-2 for PlayStation 2. + * It reports X/Y coordinates in screen space (similar to the pointer) + * in the range [-0x8000, 0x7fff] in both axes, with zero being center and + * -0x8000 being out of bounds. + * As well as reporting on/off screen state. It features a trigger, + * start/select buttons, auxiliary action buttons and a + * directional pad. A forced off-screen shot can be requested for + * auto-reloading function in some games. + */ +#define RETRO_DEVICE_LIGHTGUN 4 + +/* The ANALOG device is an extension to JOYPAD (RetroPad). + * Similar to DualShock2 it adds two analog sticks and all buttons can + * be analog. This is treated as a separate device type as it returns + * axis values in the full analog range of [-0x7fff, 0x7fff], + * although some devices may return -0x8000. + * Positive X axis is right. Positive Y axis is down. + * Buttons are returned in the range [0, 0x7fff]. + * Only use ANALOG type when polling for analog values. + */ +#define RETRO_DEVICE_ANALOG 5 + +/* Abstracts the concept of a pointing mechanism, e.g. touch. + * This allows libretro to query in absolute coordinates where on the + * screen a mouse (or something similar) is being placed. + * For a touch centric device, coordinates reported are the coordinates + * of the press. + * + * Coordinates in X and Y are reported as: + * [-0x7fff, 0x7fff]: -0x7fff corresponds to the far left/top of the screen, + * and 0x7fff corresponds to the far right/bottom of the screen. + * The "screen" is here defined as area that is passed to the frontend and + * later displayed on the monitor. + * + * The frontend is free to scale/resize this screen as it sees fit, however, + * (X, Y) = (-0x7fff, -0x7fff) will correspond to the top-left pixel of the + * game image, etc. + * + * To check if the pointer coordinates are valid (e.g. a touch display + * actually being touched), PRESSED returns 1 or 0. + * + * If using a mouse on a desktop, PRESSED will usually correspond to the + * left mouse button, but this is a frontend decision. + * PRESSED will only return 1 if the pointer is inside the game screen. + * + * For multi-touch, the index variable can be used to successively query + * more presses. + * If index = 0 returns true for _PRESSED, coordinates can be extracted + * with _X, _Y for index = 0. One can then query _PRESSED, _X, _Y with + * index = 1, and so on. + * Eventually _PRESSED will return false for an index. No further presses + * are registered at this point. */ +#define RETRO_DEVICE_POINTER 6 + +/* Buttons for the RetroPad (JOYPAD). + * The placement of these is equivalent to placements on the + * Super Nintendo controller. + * L2/R2/L3/R3 buttons correspond to the PS1 DualShock. + * Also used as id values for RETRO_DEVICE_INDEX_ANALOG_BUTTON */ +#define RETRO_DEVICE_ID_JOYPAD_B 0 +#define RETRO_DEVICE_ID_JOYPAD_Y 1 +#define RETRO_DEVICE_ID_JOYPAD_SELECT 2 +#define RETRO_DEVICE_ID_JOYPAD_START 3 +#define RETRO_DEVICE_ID_JOYPAD_UP 4 +#define RETRO_DEVICE_ID_JOYPAD_DOWN 5 +#define RETRO_DEVICE_ID_JOYPAD_LEFT 6 +#define RETRO_DEVICE_ID_JOYPAD_RIGHT 7 +#define RETRO_DEVICE_ID_JOYPAD_A 8 +#define RETRO_DEVICE_ID_JOYPAD_X 9 +#define RETRO_DEVICE_ID_JOYPAD_L 10 +#define RETRO_DEVICE_ID_JOYPAD_R 11 +#define RETRO_DEVICE_ID_JOYPAD_L2 12 +#define RETRO_DEVICE_ID_JOYPAD_R2 13 +#define RETRO_DEVICE_ID_JOYPAD_L3 14 +#define RETRO_DEVICE_ID_JOYPAD_R3 15 + +#define RETRO_DEVICE_ID_JOYPAD_MASK 256 + +/* Index / Id values for ANALOG device. */ +#define RETRO_DEVICE_INDEX_ANALOG_LEFT 0 +#define RETRO_DEVICE_INDEX_ANALOG_RIGHT 1 +#define RETRO_DEVICE_INDEX_ANALOG_BUTTON 2 +#define RETRO_DEVICE_ID_ANALOG_X 0 +#define RETRO_DEVICE_ID_ANALOG_Y 1 + +/* Id values for MOUSE. */ +#define RETRO_DEVICE_ID_MOUSE_X 0 +#define RETRO_DEVICE_ID_MOUSE_Y 1 +#define RETRO_DEVICE_ID_MOUSE_LEFT 2 +#define RETRO_DEVICE_ID_MOUSE_RIGHT 3 +#define RETRO_DEVICE_ID_MOUSE_WHEELUP 4 +#define RETRO_DEVICE_ID_MOUSE_WHEELDOWN 5 +#define RETRO_DEVICE_ID_MOUSE_MIDDLE 6 +#define RETRO_DEVICE_ID_MOUSE_HORIZ_WHEELUP 7 +#define RETRO_DEVICE_ID_MOUSE_HORIZ_WHEELDOWN 8 +#define RETRO_DEVICE_ID_MOUSE_BUTTON_4 9 +#define RETRO_DEVICE_ID_MOUSE_BUTTON_5 10 + +/* Id values for LIGHTGUN. */ +#define RETRO_DEVICE_ID_LIGHTGUN_SCREEN_X 13 /*Absolute Position*/ +#define RETRO_DEVICE_ID_LIGHTGUN_SCREEN_Y 14 /*Absolute*/ +#define RETRO_DEVICE_ID_LIGHTGUN_IS_OFFSCREEN 15 /*Status Check*/ +#define RETRO_DEVICE_ID_LIGHTGUN_TRIGGER 2 +#define RETRO_DEVICE_ID_LIGHTGUN_RELOAD 16 /*Forced off-screen shot*/ +#define RETRO_DEVICE_ID_LIGHTGUN_AUX_A 3 +#define RETRO_DEVICE_ID_LIGHTGUN_AUX_B 4 +#define RETRO_DEVICE_ID_LIGHTGUN_START 6 +#define RETRO_DEVICE_ID_LIGHTGUN_SELECT 7 +#define RETRO_DEVICE_ID_LIGHTGUN_AUX_C 8 +#define RETRO_DEVICE_ID_LIGHTGUN_DPAD_UP 9 +#define RETRO_DEVICE_ID_LIGHTGUN_DPAD_DOWN 10 +#define RETRO_DEVICE_ID_LIGHTGUN_DPAD_LEFT 11 +#define RETRO_DEVICE_ID_LIGHTGUN_DPAD_RIGHT 12 +/* deprecated */ +#define RETRO_DEVICE_ID_LIGHTGUN_X 0 /*Relative Position*/ +#define RETRO_DEVICE_ID_LIGHTGUN_Y 1 /*Relative*/ +#define RETRO_DEVICE_ID_LIGHTGUN_CURSOR 3 /*Use Aux:A*/ +#define RETRO_DEVICE_ID_LIGHTGUN_TURBO 4 /*Use Aux:B*/ +#define RETRO_DEVICE_ID_LIGHTGUN_PAUSE 5 /*Use Start*/ + +/* Id values for POINTER. */ +#define RETRO_DEVICE_ID_POINTER_X 0 +#define RETRO_DEVICE_ID_POINTER_Y 1 +#define RETRO_DEVICE_ID_POINTER_PRESSED 2 +#define RETRO_DEVICE_ID_POINTER_COUNT 3 + +/* Returned from retro_get_region(). */ +#define RETRO_REGION_NTSC 0 +#define RETRO_REGION_PAL 1 + +/* Id values for LANGUAGE */ +enum retro_language +{ + RETRO_LANGUAGE_ENGLISH = 0, + RETRO_LANGUAGE_JAPANESE = 1, + RETRO_LANGUAGE_FRENCH = 2, + RETRO_LANGUAGE_SPANISH = 3, + RETRO_LANGUAGE_GERMAN = 4, + RETRO_LANGUAGE_ITALIAN = 5, + RETRO_LANGUAGE_DUTCH = 6, + RETRO_LANGUAGE_PORTUGUESE_BRAZIL = 7, + RETRO_LANGUAGE_PORTUGUESE_PORTUGAL = 8, + RETRO_LANGUAGE_RUSSIAN = 9, + RETRO_LANGUAGE_KOREAN = 10, + RETRO_LANGUAGE_CHINESE_TRADITIONAL = 11, + RETRO_LANGUAGE_CHINESE_SIMPLIFIED = 12, + RETRO_LANGUAGE_ESPERANTO = 13, + RETRO_LANGUAGE_POLISH = 14, + RETRO_LANGUAGE_VIETNAMESE = 15, + RETRO_LANGUAGE_ARABIC = 16, + RETRO_LANGUAGE_GREEK = 17, + RETRO_LANGUAGE_TURKISH = 18, + RETRO_LANGUAGE_SLOVAK = 19, + RETRO_LANGUAGE_PERSIAN = 20, + RETRO_LANGUAGE_HEBREW = 21, + RETRO_LANGUAGE_ASTURIAN = 22, + RETRO_LANGUAGE_FINNISH = 23, + RETRO_LANGUAGE_INDONESIAN = 24, + RETRO_LANGUAGE_SWEDISH = 25, + RETRO_LANGUAGE_UKRAINIAN = 26, + RETRO_LANGUAGE_CZECH = 27, + RETRO_LANGUAGE_CATALAN_VALENCIA = 28, + RETRO_LANGUAGE_CATALAN = 29, + RETRO_LANGUAGE_BRITISH_ENGLISH = 30, + RETRO_LANGUAGE_HUNGARIAN = 31, + RETRO_LANGUAGE_BELARUSIAN = 32, + RETRO_LANGUAGE_LAST, + + /* Ensure sizeof(enum) == sizeof(int) */ + RETRO_LANGUAGE_DUMMY = INT_MAX +}; + +/* Passed to retro_get_memory_data/size(). + * If the memory type doesn't apply to the + * implementation NULL/0 can be returned. + */ +#define RETRO_MEMORY_MASK 0xff + +/* Regular save RAM. This RAM is usually found on a game cartridge, + * backed up by a battery. + * If save game data is too complex for a single memory buffer, + * the SAVE_DIRECTORY (preferably) or SYSTEM_DIRECTORY environment + * callback can be used. */ +#define RETRO_MEMORY_SAVE_RAM 0 + +/* Some games have a built-in clock to keep track of time. + * This memory is usually just a couple of bytes to keep track of time. + */ +#define RETRO_MEMORY_RTC 1 + +/* System ram lets a frontend peek into a game systems main RAM. */ +#define RETRO_MEMORY_SYSTEM_RAM 2 + +/* Video ram lets a frontend peek into a game systems video RAM (VRAM). */ +#define RETRO_MEMORY_VIDEO_RAM 3 + +/* Keysyms used for ID in input state callback when polling RETRO_KEYBOARD. */ +enum retro_key +{ + RETROK_UNKNOWN = 0, + RETROK_FIRST = 0, + RETROK_BACKSPACE = 8, + RETROK_TAB = 9, + RETROK_CLEAR = 12, + RETROK_RETURN = 13, + RETROK_PAUSE = 19, + RETROK_ESCAPE = 27, + RETROK_SPACE = 32, + RETROK_EXCLAIM = 33, + RETROK_QUOTEDBL = 34, + RETROK_HASH = 35, + RETROK_DOLLAR = 36, + RETROK_AMPERSAND = 38, + RETROK_QUOTE = 39, + RETROK_LEFTPAREN = 40, + RETROK_RIGHTPAREN = 41, + RETROK_ASTERISK = 42, + RETROK_PLUS = 43, + RETROK_COMMA = 44, + RETROK_MINUS = 45, + RETROK_PERIOD = 46, + RETROK_SLASH = 47, + RETROK_0 = 48, + RETROK_1 = 49, + RETROK_2 = 50, + RETROK_3 = 51, + RETROK_4 = 52, + RETROK_5 = 53, + RETROK_6 = 54, + RETROK_7 = 55, + RETROK_8 = 56, + RETROK_9 = 57, + RETROK_COLON = 58, + RETROK_SEMICOLON = 59, + RETROK_LESS = 60, + RETROK_EQUALS = 61, + RETROK_GREATER = 62, + RETROK_QUESTION = 63, + RETROK_AT = 64, + RETROK_LEFTBRACKET = 91, + RETROK_BACKSLASH = 92, + RETROK_RIGHTBRACKET = 93, + RETROK_CARET = 94, + RETROK_UNDERSCORE = 95, + RETROK_BACKQUOTE = 96, + RETROK_a = 97, + RETROK_b = 98, + RETROK_c = 99, + RETROK_d = 100, + RETROK_e = 101, + RETROK_f = 102, + RETROK_g = 103, + RETROK_h = 104, + RETROK_i = 105, + RETROK_j = 106, + RETROK_k = 107, + RETROK_l = 108, + RETROK_m = 109, + RETROK_n = 110, + RETROK_o = 111, + RETROK_p = 112, + RETROK_q = 113, + RETROK_r = 114, + RETROK_s = 115, + RETROK_t = 116, + RETROK_u = 117, + RETROK_v = 118, + RETROK_w = 119, + RETROK_x = 120, + RETROK_y = 121, + RETROK_z = 122, + RETROK_LEFTBRACE = 123, + RETROK_BAR = 124, + RETROK_RIGHTBRACE = 125, + RETROK_TILDE = 126, + RETROK_DELETE = 127, + + RETROK_KP0 = 256, + RETROK_KP1 = 257, + RETROK_KP2 = 258, + RETROK_KP3 = 259, + RETROK_KP4 = 260, + RETROK_KP5 = 261, + RETROK_KP6 = 262, + RETROK_KP7 = 263, + RETROK_KP8 = 264, + RETROK_KP9 = 265, + RETROK_KP_PERIOD = 266, + RETROK_KP_DIVIDE = 267, + RETROK_KP_MULTIPLY = 268, + RETROK_KP_MINUS = 269, + RETROK_KP_PLUS = 270, + RETROK_KP_ENTER = 271, + RETROK_KP_EQUALS = 272, + + RETROK_UP = 273, + RETROK_DOWN = 274, + RETROK_RIGHT = 275, + RETROK_LEFT = 276, + RETROK_INSERT = 277, + RETROK_HOME = 278, + RETROK_END = 279, + RETROK_PAGEUP = 280, + RETROK_PAGEDOWN = 281, + + RETROK_F1 = 282, + RETROK_F2 = 283, + RETROK_F3 = 284, + RETROK_F4 = 285, + RETROK_F5 = 286, + RETROK_F6 = 287, + RETROK_F7 = 288, + RETROK_F8 = 289, + RETROK_F9 = 290, + RETROK_F10 = 291, + RETROK_F11 = 292, + RETROK_F12 = 293, + RETROK_F13 = 294, + RETROK_F14 = 295, + RETROK_F15 = 296, + + RETROK_NUMLOCK = 300, + RETROK_CAPSLOCK = 301, + RETROK_SCROLLOCK = 302, + RETROK_RSHIFT = 303, + RETROK_LSHIFT = 304, + RETROK_RCTRL = 305, + RETROK_LCTRL = 306, + RETROK_RALT = 307, + RETROK_LALT = 308, + RETROK_RMETA = 309, + RETROK_LMETA = 310, + RETROK_LSUPER = 311, + RETROK_RSUPER = 312, + RETROK_MODE = 313, + RETROK_COMPOSE = 314, + + RETROK_HELP = 315, + RETROK_PRINT = 316, + RETROK_SYSREQ = 317, + RETROK_BREAK = 318, + RETROK_MENU = 319, + RETROK_POWER = 320, + RETROK_EURO = 321, + RETROK_UNDO = 322, + RETROK_OEM_102 = 323, + + RETROK_LAST, + + RETROK_DUMMY = INT_MAX /* Ensure sizeof(enum) == sizeof(int) */ +}; + +enum retro_mod +{ + RETROKMOD_NONE = 0x0000, + + RETROKMOD_SHIFT = 0x01, + RETROKMOD_CTRL = 0x02, + RETROKMOD_ALT = 0x04, + RETROKMOD_META = 0x08, + + RETROKMOD_NUMLOCK = 0x10, + RETROKMOD_CAPSLOCK = 0x20, + RETROKMOD_SCROLLOCK = 0x40, + + RETROKMOD_DUMMY = INT_MAX /* Ensure sizeof(enum) == sizeof(int) */ +}; + +/* If set, this call is not part of the public libretro API yet. It can + * change or be removed at any time. */ +#define RETRO_ENVIRONMENT_EXPERIMENTAL 0x10000 +/* Environment callback to be used internally in frontend. */ +#define RETRO_ENVIRONMENT_PRIVATE 0x20000 + +/* Environment commands. */ +#define RETRO_ENVIRONMENT_SET_ROTATION 1 /* const unsigned * -- + * Sets screen rotation of graphics. + * Valid values are 0, 1, 2, 3, which rotates screen by 0, 90, 180, + * 270 degrees counter-clockwise respectively. + */ +#define RETRO_ENVIRONMENT_GET_OVERSCAN 2 /* bool * -- + * NOTE: As of 2019 this callback is considered deprecated in favor of + * using core options to manage overscan in a more nuanced, core-specific way. + * + * Boolean value whether or not the implementation should use overscan, + * or crop away overscan. + */ +#define RETRO_ENVIRONMENT_GET_CAN_DUPE 3 /* bool * -- + * Boolean value whether or not frontend supports frame duping, + * passing NULL to video frame callback. + */ + + /* Environ 4, 5 are no longer supported (GET_VARIABLE / SET_VARIABLES), + * and reserved to avoid possible ABI clash. + */ + +#define RETRO_ENVIRONMENT_SET_MESSAGE 6 /* const struct retro_message * -- + * Sets a message to be displayed in implementation-specific manner + * for a certain amount of 'frames'. + * Should not be used for trivial messages, which should simply be + * logged via RETRO_ENVIRONMENT_GET_LOG_INTERFACE (or as a + * fallback, stderr). + */ +#define RETRO_ENVIRONMENT_SHUTDOWN 7 /* N/A (NULL) -- + * Requests the frontend to shutdown. + * Should only be used if game has a specific + * way to shutdown the game from a menu item or similar. + */ +#define RETRO_ENVIRONMENT_SET_PERFORMANCE_LEVEL 8 + /* const unsigned * -- + * Gives a hint to the frontend how demanding this implementation + * is on a system. E.g. reporting a level of 2 means + * this implementation should run decently on all frontends + * of level 2 and up. + * + * It can be used by the frontend to potentially warn + * about too demanding implementations. + * + * The levels are "floating". + * + * This function can be called on a per-game basis, + * as certain games an implementation can play might be + * particularly demanding. + * If called, it should be called in retro_load_game(). + */ +#define RETRO_ENVIRONMENT_GET_SYSTEM_DIRECTORY 9 + /* const char ** -- + * Returns the "system" directory of the frontend. + * This directory can be used to store system specific + * content such as BIOSes, configuration data, etc. + * The returned value can be NULL. + * If so, no such directory is defined, + * and it's up to the implementation to find a suitable directory. + * + * NOTE: Some cores used this folder also for "save" data such as + * memory cards, etc, for lack of a better place to put it. + * This is now discouraged, and if possible, cores should try to + * use the new GET_SAVE_DIRECTORY. + */ +#define RETRO_ENVIRONMENT_SET_PIXEL_FORMAT 10 + /* const enum retro_pixel_format * -- + * Sets the internal pixel format used by the implementation. + * The default pixel format is RETRO_PIXEL_FORMAT_0RGB1555. + * This pixel format however, is deprecated (see enum retro_pixel_format). + * If the call returns false, the frontend does not support this pixel + * format. + * + * This function should be called inside retro_load_game() or + * retro_get_system_av_info(). + */ +#define RETRO_ENVIRONMENT_SET_INPUT_DESCRIPTORS 11 + /* const struct retro_input_descriptor * -- + * Sets an array of retro_input_descriptors. + * It is up to the frontend to present this in a usable way. + * The array is terminated by retro_input_descriptor::description + * being set to NULL. + * This function can be called at any time, but it is recommended + * to call it as early as possible. + */ +#define RETRO_ENVIRONMENT_SET_KEYBOARD_CALLBACK 12 + /* const struct retro_keyboard_callback * -- + * Sets a callback function used to notify core about keyboard events. + */ +#define RETRO_ENVIRONMENT_SET_DISK_CONTROL_INTERFACE 13 + /* const struct retro_disk_control_callback * -- + * Sets an interface which frontend can use to eject and insert + * disk images. + * This is used for games which consist of multiple images and + * must be manually swapped out by the user (e.g. PSX). + */ +#define RETRO_ENVIRONMENT_SET_HW_RENDER 14 + /* struct retro_hw_render_callback * -- + * Sets an interface to let a libretro core render with + * hardware acceleration. + * Should be called in retro_load_game(). + * If successful, libretro cores will be able to render to a + * frontend-provided framebuffer. + * The size of this framebuffer will be at least as large as + * max_width/max_height provided in get_av_info(). + * If HW rendering is used, pass only RETRO_HW_FRAME_BUFFER_VALID or + * NULL to retro_video_refresh_t. + */ +#define RETRO_ENVIRONMENT_GET_VARIABLE 15 + /* struct retro_variable * -- + * Interface to acquire user-defined information from environment + * that cannot feasibly be supported in a multi-system way. + * 'key' should be set to a key which has already been set by + * SET_VARIABLES. + * 'data' will be set to a value or NULL. + */ +#define RETRO_ENVIRONMENT_SET_VARIABLES 16 + /* const struct retro_variable * -- + * Allows an implementation to signal the environment + * which variables it might want to check for later using + * GET_VARIABLE. + * This allows the frontend to present these variables to + * a user dynamically. + * This should be called the first time as early as + * possible (ideally in retro_set_environment). + * Afterward it may be called again for the core to communicate + * updated options to the frontend, but the number of core + * options must not change from the number in the initial call. + * + * 'data' points to an array of retro_variable structs + * terminated by a { NULL, NULL } element. + * retro_variable::key should be namespaced to not collide + * with other implementations' keys. E.g. A core called + * 'foo' should use keys named as 'foo_option'. + * retro_variable::value should contain a human readable + * description of the key as well as a '|' delimited list + * of expected values. + * + * The number of possible options should be very limited, + * i.e. it should be feasible to cycle through options + * without a keyboard. + * + * First entry should be treated as a default. + * + * Example entry: + * { "foo_option", "Speed hack coprocessor X; false|true" } + * + * Text before first ';' is description. This ';' must be + * followed by a space, and followed by a list of possible + * values split up with '|'. + * + * Only strings are operated on. The possible values will + * generally be displayed and stored as-is by the frontend. + */ +#define RETRO_ENVIRONMENT_GET_VARIABLE_UPDATE 17 + /* bool * -- + * Result is set to true if some variables are updated by + * frontend since last call to RETRO_ENVIRONMENT_GET_VARIABLE. + * Variables should be queried with GET_VARIABLE. + */ +#define RETRO_ENVIRONMENT_SET_SUPPORT_NO_GAME 18 + /* const bool * -- + * If true, the libretro implementation supports calls to + * retro_load_game() with NULL as argument. + * Used by cores which can run without particular game data. + * This should be called within retro_set_environment() only. + */ +#define RETRO_ENVIRONMENT_GET_LIBRETRO_PATH 19 + /* const char ** -- + * Retrieves the absolute path from where this libretro + * implementation was loaded. + * NULL is returned if the libretro was loaded statically + * (i.e. linked statically to frontend), or if the path cannot be + * determined. + * Mostly useful in cooperation with SET_SUPPORT_NO_GAME as assets can + * be loaded without ugly hacks. + */ + + /* Environment 20 was an obsolete version of SET_AUDIO_CALLBACK. + * It was not used by any known core at the time, + * and was removed from the API. */ +#define RETRO_ENVIRONMENT_SET_FRAME_TIME_CALLBACK 21 + /* const struct retro_frame_time_callback * -- + * Lets the core know how much time has passed since last + * invocation of retro_run(). + * The frontend can tamper with the timing to fake fast-forward, + * slow-motion, frame stepping, etc. + * In this case the delta time will use the reference value + * in frame_time_callback.. + */ +#define RETRO_ENVIRONMENT_SET_AUDIO_CALLBACK 22 + /* const struct retro_audio_callback * -- + * Sets an interface which is used to notify a libretro core about audio + * being available for writing. + * The callback can be called from any thread, so a core using this must + * have a thread safe audio implementation. + * It is intended for games where audio and video are completely + * asynchronous and audio can be generated on the fly. + * This interface is not recommended for use with emulators which have + * highly synchronous audio. + * + * The callback only notifies about writability; the libretro core still + * has to call the normal audio callbacks + * to write audio. The audio callbacks must be called from within the + * notification callback. + * The amount of audio data to write is up to the implementation. + * Generally, the audio callback will be called continously in a loop. + * + * Due to thread safety guarantees and lack of sync between audio and + * video, a frontend can selectively disallow this interface based on + * internal configuration. A core using this interface must also + * implement the "normal" audio interface. + * + * A libretro core using SET_AUDIO_CALLBACK should also make use of + * SET_FRAME_TIME_CALLBACK. + */ +#define RETRO_ENVIRONMENT_GET_RUMBLE_INTERFACE 23 + /* struct retro_rumble_interface * -- + * Gets an interface which is used by a libretro core to set + * state of rumble motors in controllers. + * A strong and weak motor is supported, and they can be + * controlled indepedently. + * Should be called from either retro_init() or retro_load_game(). + * Should not be called from retro_set_environment(). + * Returns false if rumble functionality is unavailable. + */ +#define RETRO_ENVIRONMENT_GET_INPUT_DEVICE_CAPABILITIES 24 + /* uint64_t * -- + * Gets a bitmask telling which device type are expected to be + * handled properly in a call to retro_input_state_t. + * Devices which are not handled or recognized always return + * 0 in retro_input_state_t. + * Example bitmask: caps = (1 << RETRO_DEVICE_JOYPAD) | (1 << RETRO_DEVICE_ANALOG). + * Should only be called in retro_run(). + */ +#define RETRO_ENVIRONMENT_GET_SENSOR_INTERFACE (25 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* struct retro_sensor_interface * -- + * Gets access to the sensor interface. + * The purpose of this interface is to allow + * setting state related to sensors such as polling rate, + * enabling/disable it entirely, etc. + * Reading sensor state is done via the normal + * input_state_callback API. + */ +#define RETRO_ENVIRONMENT_GET_CAMERA_INTERFACE (26 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* struct retro_camera_callback * -- + * Gets an interface to a video camera driver. + * A libretro core can use this interface to get access to a + * video camera. + * New video frames are delivered in a callback in same + * thread as retro_run(). + * + * GET_CAMERA_INTERFACE should be called in retro_load_game(). + * + * Depending on the camera implementation used, camera frames + * will be delivered as a raw framebuffer, + * or as an OpenGL texture directly. + * + * The core has to tell the frontend here which types of + * buffers can be handled properly. + * An OpenGL texture can only be handled when using a + * libretro GL core (SET_HW_RENDER). + * It is recommended to use a libretro GL core when + * using camera interface. + * + * The camera is not started automatically. The retrieved start/stop + * functions must be used to explicitly + * start and stop the camera driver. + */ +#define RETRO_ENVIRONMENT_GET_LOG_INTERFACE 27 + /* struct retro_log_callback * -- + * Gets an interface for logging. This is useful for + * logging in a cross-platform way + * as certain platforms cannot use stderr for logging. + * It also allows the frontend to + * show logging information in a more suitable way. + * If this interface is not used, libretro cores should + * log to stderr as desired. + */ +#define RETRO_ENVIRONMENT_GET_PERF_INTERFACE 28 + /* struct retro_perf_callback * -- + * Gets an interface for performance counters. This is useful + * for performance logging in a cross-platform way and for detecting + * architecture-specific features, such as SIMD support. + */ +#define RETRO_ENVIRONMENT_GET_LOCATION_INTERFACE 29 + /* struct retro_location_callback * -- + * Gets access to the location interface. + * The purpose of this interface is to be able to retrieve + * location-based information from the host device, + * such as current latitude / longitude. + */ +#define RETRO_ENVIRONMENT_GET_CONTENT_DIRECTORY 30 /* Old name, kept for compatibility. */ +#define RETRO_ENVIRONMENT_GET_CORE_ASSETS_DIRECTORY 30 + /* const char ** -- + * Returns the "core assets" directory of the frontend. + * This directory can be used to store specific assets that the + * core relies upon, such as art assets, + * input data, etc etc. + * The returned value can be NULL. + * If so, no such directory is defined, + * and it's up to the implementation to find a suitable directory. + */ +#define RETRO_ENVIRONMENT_GET_SAVE_DIRECTORY 31 + /* const char ** -- + * Returns the "save" directory of the frontend, unless there is no + * save directory available. The save directory should be used to + * store SRAM, memory cards, high scores, etc, if the libretro core + * cannot use the regular memory interface (retro_get_memory_data()). + * + * If the frontend cannot designate a save directory, it will return + * NULL to indicate that the core should attempt to operate without a + * save directory set. + * + * NOTE: early libretro cores used the system directory for save + * files. Cores that need to be backwards-compatible can still check + * GET_SYSTEM_DIRECTORY. + */ +#define RETRO_ENVIRONMENT_SET_SYSTEM_AV_INFO 32 + /* const struct retro_system_av_info * -- + * Sets a new av_info structure. This can only be called from + * within retro_run(). + * This should *only* be used if the core is completely altering the + * internal resolutions, aspect ratios, timings, sampling rate, etc. + * Calling this can require a full reinitialization of video/audio + * drivers in the frontend, + * + * so it is important to call it very sparingly, and usually only with + * the users explicit consent. + * An eventual driver reinitialize will happen so that video and + * audio callbacks + * happening after this call within the same retro_run() call will + * target the newly initialized driver. + * + * This callback makes it possible to support configurable resolutions + * in games, which can be useful to + * avoid setting the "worst case" in max_width/max_height. + * + * ***HIGHLY RECOMMENDED*** Do not call this callback every time + * resolution changes in an emulator core if it's + * expected to be a temporary change, for the reasons of possible + * driver reinitialization. + * This call is not a free pass for not trying to provide + * correct values in retro_get_system_av_info(). If you need to change + * things like aspect ratio or nominal width/height, + * use RETRO_ENVIRONMENT_SET_GEOMETRY, which is a softer variant + * of SET_SYSTEM_AV_INFO. + * + * If this returns false, the frontend does not acknowledge a + * changed av_info struct. + */ +#define RETRO_ENVIRONMENT_SET_PROC_ADDRESS_CALLBACK 33 + /* const struct retro_get_proc_address_interface * -- + * Allows a libretro core to announce support for the + * get_proc_address() interface. + * This interface allows for a standard way to extend libretro where + * use of environment calls are too indirect, + * e.g. for cases where the frontend wants to call directly into the core. + * + * If a core wants to expose this interface, SET_PROC_ADDRESS_CALLBACK + * **MUST** be called from within retro_set_environment(). + */ +#define RETRO_ENVIRONMENT_SET_SUBSYSTEM_INFO 34 + /* const struct retro_subsystem_info * -- + * This environment call introduces the concept of libretro "subsystems". + * A subsystem is a variant of a libretro core which supports + * different kinds of games. + * The purpose of this is to support e.g. emulators which might + * have special needs, e.g. Super Nintendo's Super GameBoy, Sufami Turbo. + * It can also be used to pick among subsystems in an explicit way + * if the libretro implementation is a multi-system emulator itself. + * + * Loading a game via a subsystem is done with retro_load_game_special(), + * and this environment call allows a libretro core to expose which + * subsystems are supported for use with retro_load_game_special(). + * A core passes an array of retro_game_special_info which is terminated + * with a zeroed out retro_game_special_info struct. + * + * If a core wants to use this functionality, SET_SUBSYSTEM_INFO + * **MUST** be called from within retro_set_environment(). + */ +#define RETRO_ENVIRONMENT_SET_CONTROLLER_INFO 35 + /* const struct retro_controller_info * -- + * This environment call lets a libretro core tell the frontend + * which controller subclasses are recognized in calls to + * retro_set_controller_port_device(). + * + * Some emulators such as Super Nintendo support multiple lightgun + * types which must be specifically selected from. It is therefore + * sometimes necessary for a frontend to be able to tell the core + * about a special kind of input device which is not specifcally + * provided by the Libretro API. + * + * In order for a frontend to understand the workings of those devices, + * they must be defined as a specialized subclass of the generic device + * types already defined in the libretro API. + * + * The core must pass an array of const struct retro_controller_info which + * is terminated with a blanked out struct. Each element of the + * retro_controller_info struct corresponds to the ascending port index + * that is passed to retro_set_controller_port_device() when that function + * is called to indicate to the core that the frontend has changed the + * active device subclass. SEE ALSO: retro_set_controller_port_device() + * + * The ascending input port indexes provided by the core in the struct + * are generally presented by frontends as ascending User # or Player #, + * such as Player 1, Player 2, Player 3, etc. Which device subclasses are + * supported can vary per input port. + * + * The first inner element of each entry in the retro_controller_info array + * is a retro_controller_description struct that specifies the names and + * codes of all device subclasses that are available for the corresponding + * User or Player, beginning with the generic Libretro device that the + * subclasses are derived from. The second inner element of each entry is the + * total number of subclasses that are listed in the retro_controller_description. + * + * NOTE: Even if special device types are set in the libretro core, + * libretro should only poll input based on the base input device types. + */ +#define RETRO_ENVIRONMENT_SET_MEMORY_MAPS (36 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* const struct retro_memory_map * -- + * This environment call lets a libretro core tell the frontend + * about the memory maps this core emulates. + * This can be used to implement, for example, cheats in a core-agnostic way. + * + * Should only be used by emulators; it doesn't make much sense for + * anything else. + * It is recommended to expose all relevant pointers through + * retro_get_memory_* as well. + */ +#define RETRO_ENVIRONMENT_SET_GEOMETRY 37 + /* const struct retro_game_geometry * -- + * This environment call is similar to SET_SYSTEM_AV_INFO for changing + * video parameters, but provides a guarantee that drivers will not be + * reinitialized. + * This can only be called from within retro_run(). + * + * The purpose of this call is to allow a core to alter nominal + * width/heights as well as aspect ratios on-the-fly, which can be + * useful for some emulators to change in run-time. + * + * max_width/max_height arguments are ignored and cannot be changed + * with this call as this could potentially require a reinitialization or a + * non-constant time operation. + * If max_width/max_height are to be changed, SET_SYSTEM_AV_INFO is required. + * + * A frontend must guarantee that this environment call completes in + * constant time. + */ +#define RETRO_ENVIRONMENT_GET_USERNAME 38 + /* const char ** + * Returns the specified username of the frontend, if specified by the user. + * This username can be used as a nickname for a core that has online facilities + * or any other mode where personalization of the user is desirable. + * The returned value can be NULL. + * If this environ callback is used by a core that requires a valid username, + * a default username should be specified by the core. + */ +#define RETRO_ENVIRONMENT_GET_LANGUAGE 39 + /* unsigned * -- + * Returns the specified language of the frontend, if specified by the user. + * It can be used by the core for localization purposes. + */ +#define RETRO_ENVIRONMENT_GET_CURRENT_SOFTWARE_FRAMEBUFFER (40 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* struct retro_framebuffer * -- + * Returns a preallocated framebuffer which the core can use for rendering + * the frame into when not using SET_HW_RENDER. + * The framebuffer returned from this call must not be used + * after the current call to retro_run() returns. + * + * The goal of this call is to allow zero-copy behavior where a core + * can render directly into video memory, avoiding extra bandwidth cost by copying + * memory from core to video memory. + * + * If this call succeeds and the core renders into it, + * the framebuffer pointer and pitch can be passed to retro_video_refresh_t. + * If the buffer from GET_CURRENT_SOFTWARE_FRAMEBUFFER is to be used, + * the core must pass the exact + * same pointer as returned by GET_CURRENT_SOFTWARE_FRAMEBUFFER; + * i.e. passing a pointer which is offset from the + * buffer is undefined. The width, height and pitch parameters + * must also match exactly to the values obtained from GET_CURRENT_SOFTWARE_FRAMEBUFFER. + * + * It is possible for a frontend to return a different pixel format + * than the one used in SET_PIXEL_FORMAT. This can happen if the frontend + * needs to perform conversion. + * + * It is still valid for a core to render to a different buffer + * even if GET_CURRENT_SOFTWARE_FRAMEBUFFER succeeds. + * + * A frontend must make sure that the pointer obtained from this function is + * writeable (and readable). + */ +#define RETRO_ENVIRONMENT_GET_HW_RENDER_INTERFACE (41 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* const struct retro_hw_render_interface ** -- + * Returns an API specific rendering interface for accessing API specific data. + * Not all HW rendering APIs support or need this. + * The contents of the returned pointer is specific to the rendering API + * being used. See the various headers like libretro_vulkan.h, etc. + * + * GET_HW_RENDER_INTERFACE cannot be called before context_reset has been called. + * Similarly, after context_destroyed callback returns, + * the contents of the HW_RENDER_INTERFACE are invalidated. + */ +#define RETRO_ENVIRONMENT_SET_SUPPORT_ACHIEVEMENTS (42 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* const bool * -- + * If true, the libretro implementation supports achievements + * either via memory descriptors set with RETRO_ENVIRONMENT_SET_MEMORY_MAPS + * or via retro_get_memory_data/retro_get_memory_size. + * + * This must be called before the first call to retro_run. + */ +#define RETRO_ENVIRONMENT_SET_HW_RENDER_CONTEXT_NEGOTIATION_INTERFACE (43 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* const struct retro_hw_render_context_negotiation_interface * -- + * Sets an interface which lets the libretro core negotiate with frontend how a context is created. + * The semantics of this interface depends on which API is used in SET_HW_RENDER earlier. + * This interface will be used when the frontend is trying to create a HW rendering context, + * so it will be used after SET_HW_RENDER, but before the context_reset callback. + */ +#define RETRO_ENVIRONMENT_SET_SERIALIZATION_QUIRKS 44 + /* uint64_t * -- + * Sets quirk flags associated with serialization. The frontend will zero any flags it doesn't + * recognize or support. Should be set in either retro_init or retro_load_game, but not both. + */ +#define RETRO_ENVIRONMENT_SET_HW_SHARED_CONTEXT (44 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* N/A (null) * -- + * The frontend will try to use a 'shared' hardware context (mostly applicable + * to OpenGL) when a hardware context is being set up. + * + * Returns true if the frontend supports shared hardware contexts and false + * if the frontend does not support shared hardware contexts. + * + * This will do nothing on its own until SET_HW_RENDER env callbacks are + * being used. + */ +#define RETRO_ENVIRONMENT_GET_VFS_INTERFACE (45 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* struct retro_vfs_interface_info * -- + * Gets access to the VFS interface. + * VFS presence needs to be queried prior to load_game or any + * get_system/save/other_directory being called to let front end know + * core supports VFS before it starts handing out paths. + * It is recomended to do so in retro_set_environment + */ +#define RETRO_ENVIRONMENT_GET_LED_INTERFACE (46 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* struct retro_led_interface * -- + * Gets an interface which is used by a libretro core to set + * state of LEDs. + */ +#define RETRO_ENVIRONMENT_GET_AUDIO_VIDEO_ENABLE (47 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* int * -- + * Tells the core if the frontend wants audio or video. + * If disabled, the frontend will discard the audio or video, + * so the core may decide to skip generating a frame or generating audio. + * This is mainly used for increasing performance. + * Bit 0 (value 1): Enable Video + * Bit 1 (value 2): Enable Audio + * Bit 2 (value 4): Use Fast Savestates. + * Bit 3 (value 8): Hard Disable Audio + * Other bits are reserved for future use and will default to zero. + * If video is disabled: + * * The frontend wants the core to not generate any video, + * including presenting frames via hardware acceleration. + * * The frontend's video frame callback will do nothing. + * * After running the frame, the video output of the next frame should be + * no different than if video was enabled, and saving and loading state + * should have no issues. + * If audio is disabled: + * * The frontend wants the core to not generate any audio. + * * The frontend's audio callbacks will do nothing. + * * After running the frame, the audio output of the next frame should be + * no different than if audio was enabled, and saving and loading state + * should have no issues. + * Fast Savestates: + * * Guaranteed to be created by the same binary that will load them. + * * Will not be written to or read from the disk. + * * Suggest that the core assumes loading state will succeed. + * * Suggest that the core updates its memory buffers in-place if possible. + * * Suggest that the core skips clearing memory. + * * Suggest that the core skips resetting the system. + * * Suggest that the core may skip validation steps. + * Hard Disable Audio: + * * Used for a secondary core when running ahead. + * * Indicates that the frontend will never need audio from the core. + * * Suggests that the core may stop synthesizing audio, but this should not + * compromise emulation accuracy. + * * Audio output for the next frame does not matter, and the frontend will + * never need an accurate audio state in the future. + * * State will never be saved when using Hard Disable Audio. + */ +#define RETRO_ENVIRONMENT_GET_MIDI_INTERFACE (48 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* struct retro_midi_interface ** -- + * Returns a MIDI interface that can be used for raw data I/O. + */ + +#define RETRO_ENVIRONMENT_GET_FASTFORWARDING (49 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* bool * -- + * Boolean value that indicates whether or not the frontend is in + * fastforwarding mode. + */ + +#define RETRO_ENVIRONMENT_GET_TARGET_REFRESH_RATE (50 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* float * -- + * Float value that lets us know what target refresh rate + * is curently in use by the frontend. + * + * The core can use the returned value to set an ideal + * refresh rate/framerate. + */ + +#define RETRO_ENVIRONMENT_GET_INPUT_BITMASKS (51 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* bool * -- + * Boolean value that indicates whether or not the frontend supports + * input bitmasks being returned by retro_input_state_t. The advantage + * of this is that retro_input_state_t has to be only called once to + * grab all button states instead of multiple times. + * + * If it returns true, you can pass RETRO_DEVICE_ID_JOYPAD_MASK as 'id' + * to retro_input_state_t (make sure 'device' is set to RETRO_DEVICE_JOYPAD). + * It will return a bitmask of all the digital buttons. + */ + +#define RETRO_ENVIRONMENT_GET_CORE_OPTIONS_VERSION 52 + /* unsigned * -- + * Unsigned value is the API version number of the core options + * interface supported by the frontend. If callback return false, + * API version is assumed to be 0. + * + * In legacy code, core options are set by passing an array of + * retro_variable structs to RETRO_ENVIRONMENT_SET_VARIABLES. + * This may be still be done regardless of the core options + * interface version. + * + * If version is >= 1 however, core options may instead be set by + * passing an array of retro_core_option_definition structs to + * RETRO_ENVIRONMENT_SET_CORE_OPTIONS, or a 2D array of + * retro_core_option_definition structs to RETRO_ENVIRONMENT_SET_CORE_OPTIONS_INTL. + * This allows the core to additionally set option sublabel information + * and/or provide localisation support. + * + * If version is >= 2, core options may instead be set by passing + * a retro_core_options_v2 struct to RETRO_ENVIRONMENT_SET_CORE_OPTIONS_V2, + * or an array of retro_core_options_v2 structs to + * RETRO_ENVIRONMENT_SET_CORE_OPTIONS_V2_INTL. This allows the core + * to additionally set optional core option category information + * for frontends with core option category support. + */ + +#define RETRO_ENVIRONMENT_SET_CORE_OPTIONS 53 + /* const struct retro_core_option_definition ** -- + * Allows an implementation to signal the environment + * which variables it might want to check for later using + * GET_VARIABLE. + * This allows the frontend to present these variables to + * a user dynamically. + * This should only be called if RETRO_ENVIRONMENT_GET_CORE_OPTIONS_VERSION + * returns an API version of >= 1. + * This should be called instead of RETRO_ENVIRONMENT_SET_VARIABLES. + * This should be called the first time as early as + * possible (ideally in retro_set_environment). + * Afterwards it may be called again for the core to communicate + * updated options to the frontend, but the number of core + * options must not change from the number in the initial call. + * + * 'data' points to an array of retro_core_option_definition structs + * terminated by a { NULL, NULL, NULL, {{0}}, NULL } element. + * retro_core_option_definition::key should be namespaced to not collide + * with other implementations' keys. e.g. A core called + * 'foo' should use keys named as 'foo_option'. + * retro_core_option_definition::desc should contain a human readable + * description of the key. + * retro_core_option_definition::info should contain any additional human + * readable information text that a typical user may need to + * understand the functionality of the option. + * retro_core_option_definition::values is an array of retro_core_option_value + * structs terminated by a { NULL, NULL } element. + * > retro_core_option_definition::values[index].value is an expected option + * value. + * > retro_core_option_definition::values[index].label is a human readable + * label used when displaying the value on screen. If NULL, + * the value itself is used. + * retro_core_option_definition::default_value is the default core option + * setting. It must match one of the expected option values in the + * retro_core_option_definition::values array. If it does not, or the + * default value is NULL, the first entry in the + * retro_core_option_definition::values array is treated as the default. + * + * The number of possible option values should be very limited, + * and must be less than RETRO_NUM_CORE_OPTION_VALUES_MAX. + * i.e. it should be feasible to cycle through options + * without a keyboard. + * + * Example entry: + * { + * "foo_option", + * "Speed hack coprocessor X", + * "Provides increased performance at the expense of reduced accuracy", + * { + * { "false", NULL }, + * { "true", NULL }, + * { "unstable", "Turbo (Unstable)" }, + * { NULL, NULL }, + * }, + * "false" + * } + * + * Only strings are operated on. The possible values will + * generally be displayed and stored as-is by the frontend. + */ + +#define RETRO_ENVIRONMENT_SET_CORE_OPTIONS_INTL 54 + /* const struct retro_core_options_intl * -- + * Allows an implementation to signal the environment + * which variables it might want to check for later using + * GET_VARIABLE. + * This allows the frontend to present these variables to + * a user dynamically. + * This should only be called if RETRO_ENVIRONMENT_GET_CORE_OPTIONS_VERSION + * returns an API version of >= 1. + * This should be called instead of RETRO_ENVIRONMENT_SET_VARIABLES. + * This should be called instead of RETRO_ENVIRONMENT_SET_CORE_OPTIONS. + * This should be called the first time as early as + * possible (ideally in retro_set_environment). + * Afterwards it may be called again for the core to communicate + * updated options to the frontend, but the number of core + * options must not change from the number in the initial call. + * + * This is fundamentally the same as RETRO_ENVIRONMENT_SET_CORE_OPTIONS, + * with the addition of localisation support. The description of the + * RETRO_ENVIRONMENT_SET_CORE_OPTIONS callback should be consulted + * for further details. + * + * 'data' points to a retro_core_options_intl struct. + * + * retro_core_options_intl::us is a pointer to an array of + * retro_core_option_definition structs defining the US English + * core options implementation. It must point to a valid array. + * + * retro_core_options_intl::local is a pointer to an array of + * retro_core_option_definition structs defining core options for + * the current frontend language. It may be NULL (in which case + * retro_core_options_intl::us is used by the frontend). Any items + * missing from this array will be read from retro_core_options_intl::us + * instead. + * + * NOTE: Default core option values are always taken from the + * retro_core_options_intl::us array. Any default values in + * retro_core_options_intl::local array will be ignored. + */ + +#define RETRO_ENVIRONMENT_SET_CORE_OPTIONS_DISPLAY 55 + /* struct retro_core_option_display * -- + * + * Allows an implementation to signal the environment to show + * or hide a variable when displaying core options. This is + * considered a *suggestion*. The frontend is free to ignore + * this callback, and its implementation not considered mandatory. + * + * 'data' points to a retro_core_option_display struct + * + * retro_core_option_display::key is a variable identifier + * which has already been set by SET_VARIABLES/SET_CORE_OPTIONS. + * + * retro_core_option_display::visible is a boolean, specifying + * whether variable should be displayed + * + * Note that all core option variables will be set visible by + * default when calling SET_VARIABLES/SET_CORE_OPTIONS. + */ + +#define RETRO_ENVIRONMENT_GET_PREFERRED_HW_RENDER 56 + /* unsigned * -- + * + * Allows an implementation to ask frontend preferred hardware + * context to use. Core should use this information to deal + * with what specific context to request with SET_HW_RENDER. + * + * 'data' points to an unsigned variable + */ + +#define RETRO_ENVIRONMENT_GET_DISK_CONTROL_INTERFACE_VERSION 57 + /* unsigned * -- + * Unsigned value is the API version number of the disk control + * interface supported by the frontend. If callback return false, + * API version is assumed to be 0. + * + * In legacy code, the disk control interface is defined by passing + * a struct of type retro_disk_control_callback to + * RETRO_ENVIRONMENT_SET_DISK_CONTROL_INTERFACE. + * This may be still be done regardless of the disk control + * interface version. + * + * If version is >= 1 however, the disk control interface may + * instead be defined by passing a struct of type + * retro_disk_control_ext_callback to + * RETRO_ENVIRONMENT_SET_DISK_CONTROL_EXT_INTERFACE. + * This allows the core to provide additional information about + * disk images to the frontend and/or enables extra + * disk control functionality by the frontend. + */ + +#define RETRO_ENVIRONMENT_SET_DISK_CONTROL_EXT_INTERFACE 58 + /* const struct retro_disk_control_ext_callback * -- + * Sets an interface which frontend can use to eject and insert + * disk images, and also obtain information about individual + * disk image files registered by the core. + * This is used for games which consist of multiple images and + * must be manually swapped out by the user (e.g. PSX, floppy disk + * based systems). + */ + +#define RETRO_ENVIRONMENT_GET_MESSAGE_INTERFACE_VERSION 59 + /* unsigned * -- + * Unsigned value is the API version number of the message + * interface supported by the frontend. If callback returns + * false, API version is assumed to be 0. + * + * In legacy code, messages may be displayed in an + * implementation-specific manner by passing a struct + * of type retro_message to RETRO_ENVIRONMENT_SET_MESSAGE. + * This may be still be done regardless of the message + * interface version. + * + * If version is >= 1 however, messages may instead be + * displayed by passing a struct of type retro_message_ext + * to RETRO_ENVIRONMENT_SET_MESSAGE_EXT. This allows the + * core to specify message logging level, priority and + * destination (OSD, logging interface or both). + */ + +#define RETRO_ENVIRONMENT_SET_MESSAGE_EXT 60 + /* const struct retro_message_ext * -- + * Sets a message to be displayed in an implementation-specific + * manner for a certain amount of 'frames'. Additionally allows + * the core to specify message logging level, priority and + * destination (OSD, logging interface or both). + * Should not be used for trivial messages, which should simply be + * logged via RETRO_ENVIRONMENT_GET_LOG_INTERFACE (or as a + * fallback, stderr). + */ + +#define RETRO_ENVIRONMENT_GET_INPUT_MAX_USERS 61 + /* unsigned * -- + * Unsigned value is the number of active input devices + * provided by the frontend. This may change between + * frames, but will remain constant for the duration + * of each frame. + * If callback returns true, a core need not poll any + * input device with an index greater than or equal to + * the number of active devices. + * If callback returns false, the number of active input + * devices is unknown. In this case, all input devices + * should be considered active. + */ + +#define RETRO_ENVIRONMENT_SET_AUDIO_BUFFER_STATUS_CALLBACK 62 + /* const struct retro_audio_buffer_status_callback * -- + * Lets the core know the occupancy level of the frontend + * audio buffer. Can be used by a core to attempt frame + * skipping in order to avoid buffer under-runs. + * A core may pass NULL to disable buffer status reporting + * in the frontend. + */ + +#define RETRO_ENVIRONMENT_SET_MINIMUM_AUDIO_LATENCY 63 + /* const unsigned * -- + * Sets minimum frontend audio latency in milliseconds. + * Resultant audio latency may be larger than set value, + * or smaller if a hardware limit is encountered. A frontend + * is expected to honour requests up to 512 ms. + * + * - If value is less than current frontend + * audio latency, callback has no effect + * - If value is zero, default frontend audio + * latency is set + * + * May be used by a core to increase audio latency and + * therefore decrease the probability of buffer under-runs + * (crackling) when performing 'intensive' operations. + * A core utilising RETRO_ENVIRONMENT_SET_AUDIO_BUFFER_STATUS_CALLBACK + * to implement audio-buffer-based frame skipping may achieve + * optimal results by setting the audio latency to a 'high' + * (typically 6x or 8x) integer multiple of the expected + * frame time. + * + * WARNING: This can only be called from within retro_run(). + * Calling this can require a full reinitialization of audio + * drivers in the frontend, so it is important to call it very + * sparingly, and usually only with the users explicit consent. + * An eventual driver reinitialize will happen so that audio + * callbacks happening after this call within the same retro_run() + * call will target the newly initialized driver. + */ + +#define RETRO_ENVIRONMENT_SET_FASTFORWARDING_OVERRIDE 64 + /* const struct retro_fastforwarding_override * -- + * Used by a libretro core to override the current + * fastforwarding mode of the frontend. + * If NULL is passed to this function, the frontend + * will return true if fastforwarding override + * functionality is supported (no change in + * fastforwarding state will occur in this case). + */ + +#define RETRO_ENVIRONMENT_SET_CONTENT_INFO_OVERRIDE 65 + /* const struct retro_system_content_info_override * -- + * Allows an implementation to override 'global' content + * info parameters reported by retro_get_system_info(). + * Overrides also affect subsystem content info parameters + * set via RETRO_ENVIRONMENT_SET_SUBSYSTEM_INFO. + * This function must be called inside retro_set_environment(). + * If callback returns false, content info overrides + * are unsupported by the frontend, and will be ignored. + * If callback returns true, extended game info may be + * retrieved by calling RETRO_ENVIRONMENT_GET_GAME_INFO_EXT + * in retro_load_game() or retro_load_game_special(). + * + * 'data' points to an array of retro_system_content_info_override + * structs terminated by a { NULL, false, false } element. + * If 'data' is NULL, no changes will be made to the frontend; + * a core may therefore pass NULL in order to test whether + * the RETRO_ENVIRONMENT_SET_CONTENT_INFO_OVERRIDE and + * RETRO_ENVIRONMENT_GET_GAME_INFO_EXT callbacks are supported + * by the frontend. + * + * For struct member descriptions, see the definition of + * struct retro_system_content_info_override. + * + * Example: + * + * - struct retro_system_info: + * { + * "My Core", // library_name + * "v1.0", // library_version + * "m3u|md|cue|iso|chd|sms|gg|sg", // valid_extensions + * true, // need_fullpath + * false // block_extract + * } + * + * - Array of struct retro_system_content_info_override: + * { + * { + * "md|sms|gg", // extensions + * false, // need_fullpath + * true // persistent_data + * }, + * { + * "sg", // extensions + * false, // need_fullpath + * false // persistent_data + * }, + * { NULL, false, false } + * } + * + * Result: + * - Files of type m3u, cue, iso, chd will not be + * loaded by the frontend. Frontend will pass a + * valid path to the core, and core will handle + * loading internally + * - Files of type md, sms, gg will be loaded by + * the frontend. A valid memory buffer will be + * passed to the core. This memory buffer will + * remain valid until retro_deinit() returns + * - Files of type sg will be loaded by the frontend. + * A valid memory buffer will be passed to the core. + * This memory buffer will remain valid until + * retro_load_game() (or retro_load_game_special()) + * returns + * + * NOTE: If an extension is listed multiple times in + * an array of retro_system_content_info_override + * structs, only the first instance will be registered + */ + +#define RETRO_ENVIRONMENT_GET_GAME_INFO_EXT 66 + /* const struct retro_game_info_ext ** -- + * Allows an implementation to fetch extended game + * information, providing additional content path + * and memory buffer status details. + * This function may only be called inside + * retro_load_game() or retro_load_game_special(). + * If callback returns false, extended game information + * is unsupported by the frontend. In this case, only + * regular retro_game_info will be available. + * RETRO_ENVIRONMENT_GET_GAME_INFO_EXT is guaranteed + * to return true if RETRO_ENVIRONMENT_SET_CONTENT_INFO_OVERRIDE + * returns true. + * + * 'data' points to an array of retro_game_info_ext structs. + * + * For struct member descriptions, see the definition of + * struct retro_game_info_ext. + * + * - If function is called inside retro_load_game(), + * the retro_game_info_ext array is guaranteed to + * have a size of 1 - i.e. the returned pointer may + * be used to access directly the members of the + * first retro_game_info_ext struct, for example: + * + * struct retro_game_info_ext *game_info_ext; + * if (environ_cb(RETRO_ENVIRONMENT_GET_GAME_INFO_EXT, &game_info_ext)) + * printf("Content Directory: %s\n", game_info_ext->dir); + * + * - If the function is called inside retro_load_game_special(), + * the retro_game_info_ext array is guaranteed to have a + * size equal to the num_info argument passed to + * retro_load_game_special() + */ + +#define RETRO_ENVIRONMENT_SET_CORE_OPTIONS_V2 67 + /* const struct retro_core_options_v2 * -- + * Allows an implementation to signal the environment + * which variables it might want to check for later using + * GET_VARIABLE. + * This allows the frontend to present these variables to + * a user dynamically. + * This should only be called if RETRO_ENVIRONMENT_GET_CORE_OPTIONS_VERSION + * returns an API version of >= 2. + * This should be called instead of RETRO_ENVIRONMENT_SET_VARIABLES. + * This should be called instead of RETRO_ENVIRONMENT_SET_CORE_OPTIONS. + * This should be called the first time as early as + * possible (ideally in retro_set_environment). + * Afterwards it may be called again for the core to communicate + * updated options to the frontend, but the number of core + * options must not change from the number in the initial call. + * If RETRO_ENVIRONMENT_GET_CORE_OPTIONS_VERSION returns an API + * version of >= 2, this callback is guaranteed to succeed + * (i.e. callback return value does not indicate success) + * If callback returns true, frontend has core option category + * support. + * If callback returns false, frontend does not have core option + * category support. + * + * 'data' points to a retro_core_options_v2 struct, containing + * of two pointers: + * - retro_core_options_v2::categories is an array of + * retro_core_option_v2_category structs terminated by a + * { NULL, NULL, NULL } element. If retro_core_options_v2::categories + * is NULL, all core options will have no category and will be shown + * at the top level of the frontend core option interface. If frontend + * does not have core option category support, categories array will + * be ignored. + * - retro_core_options_v2::definitions is an array of + * retro_core_option_v2_definition structs terminated by a + * { NULL, NULL, NULL, NULL, NULL, NULL, {{0}}, NULL } + * element. + * + * >> retro_core_option_v2_category notes: + * + * - retro_core_option_v2_category::key should contain string + * that uniquely identifies the core option category. Valid + * key characters are [a-z, A-Z, 0-9, _, -] + * Namespace collisions with other implementations' category + * keys are permitted. + * - retro_core_option_v2_category::desc should contain a human + * readable description of the category key. + * - retro_core_option_v2_category::info should contain any + * additional human readable information text that a typical + * user may need to understand the nature of the core option + * category. + * + * Example entry: + * { + * "advanced_settings", + * "Advanced", + * "Options affecting low-level emulation performance and accuracy." + * } + * + * >> retro_core_option_v2_definition notes: + * + * - retro_core_option_v2_definition::key should be namespaced to not + * collide with other implementations' keys. e.g. A core called + * 'foo' should use keys named as 'foo_option'. Valid key characters + * are [a-z, A-Z, 0-9, _, -]. + * - retro_core_option_v2_definition::desc should contain a human readable + * description of the key. Will be used when the frontend does not + * have core option category support. Examples: "Aspect Ratio" or + * "Video > Aspect Ratio". + * - retro_core_option_v2_definition::desc_categorized should contain a + * human readable description of the key, which will be used when + * frontend has core option category support. Example: "Aspect Ratio", + * where associated retro_core_option_v2_category::desc is "Video". + * If empty or NULL, the string specified by + * retro_core_option_v2_definition::desc will be used instead. + * retro_core_option_v2_definition::desc_categorized will be ignored + * if retro_core_option_v2_definition::category_key is empty or NULL. + * - retro_core_option_v2_definition::info should contain any additional + * human readable information text that a typical user may need to + * understand the functionality of the option. + * - retro_core_option_v2_definition::info_categorized should contain + * any additional human readable information text that a typical user + * may need to understand the functionality of the option, and will be + * used when frontend has core option category support. This is provided + * to accommodate the case where info text references an option by + * name/desc, and the desc/desc_categorized text for that option differ. + * If empty or NULL, the string specified by + * retro_core_option_v2_definition::info will be used instead. + * retro_core_option_v2_definition::info_categorized will be ignored + * if retro_core_option_v2_definition::category_key is empty or NULL. + * - retro_core_option_v2_definition::category_key should contain a + * category identifier (e.g. "video" or "audio") that will be + * assigned to the core option if frontend has core option category + * support. A categorized option will be shown in a subsection/ + * submenu of the frontend core option interface. If key is empty + * or NULL, or if key does not match one of the + * retro_core_option_v2_category::key values in the associated + * retro_core_option_v2_category array, option will have no category + * and will be shown at the top level of the frontend core option + * interface. + * - retro_core_option_v2_definition::values is an array of + * retro_core_option_value structs terminated by a { NULL, NULL } + * element. + * --> retro_core_option_v2_definition::values[index].value is an + * expected option value. + * --> retro_core_option_v2_definition::values[index].label is a + * human readable label used when displaying the value on screen. + * If NULL, the value itself is used. + * - retro_core_option_v2_definition::default_value is the default + * core option setting. It must match one of the expected option + * values in the retro_core_option_v2_definition::values array. If + * it does not, or the default value is NULL, the first entry in the + * retro_core_option_v2_definition::values array is treated as the + * default. + * + * The number of possible option values should be very limited, + * and must be less than RETRO_NUM_CORE_OPTION_VALUES_MAX. + * i.e. it should be feasible to cycle through options + * without a keyboard. + * + * Example entries: + * + * - Uncategorized: + * + * { + * "foo_option", + * "Speed hack coprocessor X", + * NULL, + * "Provides increased performance at the expense of reduced accuracy.", + * NULL, + * NULL, + * { + * { "false", NULL }, + * { "true", NULL }, + * { "unstable", "Turbo (Unstable)" }, + * { NULL, NULL }, + * }, + * "false" + * } + * + * - Categorized: + * + * { + * "foo_option", + * "Advanced > Speed hack coprocessor X", + * "Speed hack coprocessor X", + * "Setting 'Advanced > Speed hack coprocessor X' to 'true' or 'Turbo' provides increased performance at the expense of reduced accuracy", + * "Setting 'Speed hack coprocessor X' to 'true' or 'Turbo' provides increased performance at the expense of reduced accuracy", + * "advanced_settings", + * { + * { "false", NULL }, + * { "true", NULL }, + * { "unstable", "Turbo (Unstable)" }, + * { NULL, NULL }, + * }, + * "false" + * } + * + * Only strings are operated on. The possible values will + * generally be displayed and stored as-is by the frontend. + */ + +#define RETRO_ENVIRONMENT_SET_CORE_OPTIONS_V2_INTL 68 + /* const struct retro_core_options_v2_intl * -- + * Allows an implementation to signal the environment + * which variables it might want to check for later using + * GET_VARIABLE. + * This allows the frontend to present these variables to + * a user dynamically. + * This should only be called if RETRO_ENVIRONMENT_GET_CORE_OPTIONS_VERSION + * returns an API version of >= 2. + * This should be called instead of RETRO_ENVIRONMENT_SET_VARIABLES. + * This should be called instead of RETRO_ENVIRONMENT_SET_CORE_OPTIONS. + * This should be called instead of RETRO_ENVIRONMENT_SET_CORE_OPTIONS_INTL. + * This should be called instead of RETRO_ENVIRONMENT_SET_CORE_OPTIONS_V2. + * This should be called the first time as early as + * possible (ideally in retro_set_environment). + * Afterwards it may be called again for the core to communicate + * updated options to the frontend, but the number of core + * options must not change from the number in the initial call. + * If RETRO_ENVIRONMENT_GET_CORE_OPTIONS_VERSION returns an API + * version of >= 2, this callback is guaranteed to succeed + * (i.e. callback return value does not indicate success) + * If callback returns true, frontend has core option category + * support. + * If callback returns false, frontend does not have core option + * category support. + * + * This is fundamentally the same as RETRO_ENVIRONMENT_SET_CORE_OPTIONS_V2, + * with the addition of localisation support. The description of the + * RETRO_ENVIRONMENT_SET_CORE_OPTIONS_V2 callback should be consulted + * for further details. + * + * 'data' points to a retro_core_options_v2_intl struct. + * + * - retro_core_options_v2_intl::us is a pointer to a + * retro_core_options_v2 struct defining the US English + * core options implementation. It must point to a valid struct. + * + * - retro_core_options_v2_intl::local is a pointer to a + * retro_core_options_v2 struct defining core options for + * the current frontend language. It may be NULL (in which case + * retro_core_options_v2_intl::us is used by the frontend). Any items + * missing from this struct will be read from + * retro_core_options_v2_intl::us instead. + * + * NOTE: Default core option values are always taken from the + * retro_core_options_v2_intl::us struct. Any default values in + * the retro_core_options_v2_intl::local struct will be ignored. + */ + +#define RETRO_ENVIRONMENT_SET_CORE_OPTIONS_UPDATE_DISPLAY_CALLBACK 69 + /* const struct retro_core_options_update_display_callback * -- + * Allows a frontend to signal that a core must update + * the visibility of any dynamically hidden core options, + * and enables the frontend to detect visibility changes. + * Used by the frontend to update the menu display status + * of core options without requiring a call of retro_run(). + * Must be called in retro_set_environment(). + */ + +#define RETRO_ENVIRONMENT_SET_VARIABLE 70 + /* const struct retro_variable * -- + * Allows an implementation to notify the frontend + * that a core option value has changed. + * + * retro_variable::key and retro_variable::value + * must match strings that have been set previously + * via one of the following: + * + * - RETRO_ENVIRONMENT_SET_VARIABLES + * - RETRO_ENVIRONMENT_SET_CORE_OPTIONS + * - RETRO_ENVIRONMENT_SET_CORE_OPTIONS_INTL + * - RETRO_ENVIRONMENT_SET_CORE_OPTIONS_V2 + * - RETRO_ENVIRONMENT_SET_CORE_OPTIONS_V2_INTL + * + * After changing a core option value via this + * callback, RETRO_ENVIRONMENT_GET_VARIABLE_UPDATE + * will return true. + * + * If data is NULL, no changes will be registered + * and the callback will return true; an + * implementation may therefore pass NULL in order + * to test whether the callback is supported. + */ + +#define RETRO_ENVIRONMENT_GET_THROTTLE_STATE (71 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* struct retro_throttle_state * -- + * Allows an implementation to get details on the actual rate + * the frontend is attempting to call retro_run(). + */ + +#define RETRO_ENVIRONMENT_GET_SAVESTATE_CONTEXT (72 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* int * -- + * Tells the core about the context the frontend is asking for savestate. + * (see enum retro_savestate_context) + */ + +#define RETRO_ENVIRONMENT_GET_HW_RENDER_CONTEXT_NEGOTIATION_INTERFACE_SUPPORT (73 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* struct retro_hw_render_context_negotiation_interface * -- + * Before calling SET_HW_RNEDER_CONTEXT_NEGOTIATION_INTERFACE, a core can query + * which version of the interface is supported. + * + * Frontend looks at interface_type and returns the maximum supported + * context negotiation interface version. + * If the interface_type is not supported or recognized by the frontend, a version of 0 + * must be returned in interface_version and true is returned by frontend. + * + * If this environment call returns true with interface_version greater than 0, + * a core can always use a negotiation interface version larger than what the frontend returns, but only + * earlier versions of the interface will be used by the frontend. + * A frontend must not reject a negotiation interface version that is larger than + * what the frontend supports. Instead, the frontend will use the older entry points that it recognizes. + * If this is incompatible with a particular core's requirements, it can error out early. + * + * Backwards compatibility note: + * This environment call was introduced after Vulkan v1 context negotiation. + * If this environment call is not supported by frontend - i.e. the environment call returns false - + * only Vulkan v1 context negotiation is supported (if Vulkan HW rendering is supported at all). + * If a core uses Vulkan negotiation interface with version > 1, negotiation may fail unexpectedly. + * All future updates to the context negotiation interface implies that frontend must support + * this environment call to query support. + */ + +#define RETRO_ENVIRONMENT_GET_JIT_CAPABLE 74 + /* bool * -- + * Result is set to true if the frontend has already verified JIT can be + * used, mainly for use iOS/tvOS. On other platforms the result is true. + */ + +#define RETRO_ENVIRONMENT_GET_MICROPHONE_INTERFACE (75 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* struct retro_microphone_interface * -- + * Returns an interface that can be used to receive input from the microphone driver. + * + * Returns true if microphone support is available, + * even if no microphones are plugged in. + * Returns false if mic support is disabled or unavailable. + * + * This callback can be invoked at any time, + * even before the microphone driver is ready. + */ + +#define RETRO_ENVIRONMENT_SET_NETPACKET_INTERFACE 76 + /* const struct retro_netpacket_callback * -- + * When set, a core gains control over network packets sent and + * received during a multiplayer session. This can be used to + * emulate multiplayer games that were originally played on two + * or more separate consoles or computers connected together. + * + * The frontend will take care of connecting players together, + * and the core only needs to send the actual data as needed for + * the emulation, while handshake and connection management happen + * in the background. + * + * When two or more players are connected and this interface has + * been set, time manipulation features (such as pausing, slow motion, + * fast forward, rewinding, save state loading, etc.) are disabled to + * avoid interrupting communication. + * + * Should be set in either retro_init or retro_load_game, but not both. + * + * When not set, a frontend may use state serialization-based + * multiplayer, where a deterministic core supporting multiple + * input devices does not need to take any action on its own. + */ + +#define RETRO_ENVIRONMENT_GET_DEVICE_POWER (77 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* struct retro_device_power * -- + * Returns the device's current power state as reported by the frontend. + * This is useful for emulating the battery level in handheld consoles, + * or for reducing power consumption when on battery power. + * + * The return value indicates whether the frontend can provide this information, + * even if the parameter is NULL. + * + * If the frontend does not support this functionality, + * then the provided argument will remain unchanged. + * + * Note that this environment call describes the power state for the entire device, + * not for individual peripherals like controllers. + */ + +/* VFS functionality */ + +/* File paths: + * File paths passed as parameters when using this API shall be well formed UNIX-style, + * using "/" (unquoted forward slash) as directory separator regardless of the platform's native separator. + * Paths shall also include at least one forward slash ("game.bin" is an invalid path, use "./game.bin" instead). + * Other than the directory separator, cores shall not make assumptions about path format: + * "C:/path/game.bin", "http://example.com/game.bin", "#game/game.bin", "./game.bin" (without quotes) are all valid paths. + * Cores may replace the basename or remove path components from the end, and/or add new components; + * however, cores shall not append "./", "../" or multiple consecutive forward slashes ("//") to paths they request to front end. + * The frontend is encouraged to make such paths work as well as it can, but is allowed to give up if the core alters paths too much. + * Frontends are encouraged, but not required, to support native file system paths (modulo replacing the directory separator, if applicable). + * Cores are allowed to try using them, but must remain functional if the front rejects such requests. + * Cores are encouraged to use the libretro-common filestream functions for file I/O, + * as they seamlessly integrate with VFS, deal with directory separator replacement as appropriate + * and provide platform-specific fallbacks in cases where front ends do not support VFS. */ + +/* Opaque file handle + * Introduced in VFS API v1 */ +struct retro_vfs_file_handle; + +/* Opaque directory handle + * Introduced in VFS API v3 */ +struct retro_vfs_dir_handle; + +/* File open flags + * Introduced in VFS API v1 */ +#define RETRO_VFS_FILE_ACCESS_READ (1 << 0) /* Read only mode */ +#define RETRO_VFS_FILE_ACCESS_WRITE (1 << 1) /* Write only mode, discard contents and overwrites existing file unless RETRO_VFS_FILE_ACCESS_UPDATE is also specified */ +#define RETRO_VFS_FILE_ACCESS_READ_WRITE (RETRO_VFS_FILE_ACCESS_READ | RETRO_VFS_FILE_ACCESS_WRITE) /* Read-write mode, discard contents and overwrites existing file unless RETRO_VFS_FILE_ACCESS_UPDATE is also specified*/ +#define RETRO_VFS_FILE_ACCESS_UPDATE_EXISTING (1 << 2) /* Prevents discarding content of existing files opened for writing */ + +/* These are only hints. The frontend may choose to ignore them. Other than RAM/CPU/etc use, + and how they react to unlikely external interference (for example someone else writing to that file, + or the file's server going down), behavior will not change. */ +#define RETRO_VFS_FILE_ACCESS_HINT_NONE (0) +/* Indicate that the file will be accessed many times. The frontend should aggressively cache everything. */ +#define RETRO_VFS_FILE_ACCESS_HINT_FREQUENT_ACCESS (1 << 0) + +/* Seek positions */ +#define RETRO_VFS_SEEK_POSITION_START 0 +#define RETRO_VFS_SEEK_POSITION_CURRENT 1 +#define RETRO_VFS_SEEK_POSITION_END 2 + +/* stat() result flags + * Introduced in VFS API v3 */ +#define RETRO_VFS_STAT_IS_VALID (1 << 0) +#define RETRO_VFS_STAT_IS_DIRECTORY (1 << 1) +#define RETRO_VFS_STAT_IS_CHARACTER_SPECIAL (1 << 2) + +/* Get path from opaque handle. Returns the exact same path passed to file_open when getting the handle + * Introduced in VFS API v1 */ +typedef const char *(RETRO_CALLCONV *retro_vfs_get_path_t)(struct retro_vfs_file_handle *stream); + +/* Open a file for reading or writing. If path points to a directory, this will + * fail. Returns the opaque file handle, or NULL for error. + * Introduced in VFS API v1 */ +typedef struct retro_vfs_file_handle *(RETRO_CALLCONV *retro_vfs_open_t)(const char *path, unsigned mode, unsigned hints); + +/* Close the file and release its resources. Must be called if open_file returns non-NULL. Returns 0 on success, -1 on failure. + * Whether the call succeeds ot not, the handle passed as parameter becomes invalid and should no longer be used. + * Introduced in VFS API v1 */ +typedef int (RETRO_CALLCONV *retro_vfs_close_t)(struct retro_vfs_file_handle *stream); + +/* Return the size of the file in bytes, or -1 for error. + * Introduced in VFS API v1 */ +typedef int64_t (RETRO_CALLCONV *retro_vfs_size_t)(struct retro_vfs_file_handle *stream); + +/* Truncate file to specified size. Returns 0 on success or -1 on error + * Introduced in VFS API v2 */ +typedef int64_t (RETRO_CALLCONV *retro_vfs_truncate_t)(struct retro_vfs_file_handle *stream, int64_t length); + +/* Get the current read / write position for the file. Returns -1 for error. + * Introduced in VFS API v1 */ +typedef int64_t (RETRO_CALLCONV *retro_vfs_tell_t)(struct retro_vfs_file_handle *stream); + +/* Set the current read/write position for the file. Returns the new position, -1 for error. + * Introduced in VFS API v1 */ +typedef int64_t (RETRO_CALLCONV *retro_vfs_seek_t)(struct retro_vfs_file_handle *stream, int64_t offset, int seek_position); + +/* Read data from a file. Returns the number of bytes read, or -1 for error. + * Introduced in VFS API v1 */ +typedef int64_t (RETRO_CALLCONV *retro_vfs_read_t)(struct retro_vfs_file_handle *stream, void *s, uint64_t len); + +/* Write data to a file. Returns the number of bytes written, or -1 for error. + * Introduced in VFS API v1 */ +typedef int64_t (RETRO_CALLCONV *retro_vfs_write_t)(struct retro_vfs_file_handle *stream, const void *s, uint64_t len); + +/* Flush pending writes to file, if using buffered IO. Returns 0 on sucess, or -1 on failure. + * Introduced in VFS API v1 */ +typedef int (RETRO_CALLCONV *retro_vfs_flush_t)(struct retro_vfs_file_handle *stream); + +/* Delete the specified file. Returns 0 on success, -1 on failure + * Introduced in VFS API v1 */ +typedef int (RETRO_CALLCONV *retro_vfs_remove_t)(const char *path); + +/* Rename the specified file. Returns 0 on success, -1 on failure + * Introduced in VFS API v1 */ +typedef int (RETRO_CALLCONV *retro_vfs_rename_t)(const char *old_path, const char *new_path); + +/* Stat the specified file. Retruns a bitmask of RETRO_VFS_STAT_* flags, none are set if path was not valid. + * Additionally stores file size in given variable, unless NULL is given. + * Introduced in VFS API v3 */ +typedef int (RETRO_CALLCONV *retro_vfs_stat_t)(const char *path, int32_t *size); + +/* Create the specified directory. Returns 0 on success, -1 on unknown failure, -2 if already exists. + * Introduced in VFS API v3 */ +typedef int (RETRO_CALLCONV *retro_vfs_mkdir_t)(const char *dir); + +/* Open the specified directory for listing. Returns the opaque dir handle, or NULL for error. + * Support for the include_hidden argument may vary depending on the platform. + * Introduced in VFS API v3 */ +typedef struct retro_vfs_dir_handle *(RETRO_CALLCONV *retro_vfs_opendir_t)(const char *dir, bool include_hidden); + +/* Read the directory entry at the current position, and move the read pointer to the next position. + * Returns true on success, false if already on the last entry. + * Introduced in VFS API v3 */ +typedef bool (RETRO_CALLCONV *retro_vfs_readdir_t)(struct retro_vfs_dir_handle *dirstream); + +/* Get the name of the last entry read. Returns a string on success, or NULL for error. + * The returned string pointer is valid until the next call to readdir or closedir. + * Introduced in VFS API v3 */ +typedef const char *(RETRO_CALLCONV *retro_vfs_dirent_get_name_t)(struct retro_vfs_dir_handle *dirstream); + +/* Check if the last entry read was a directory. Returns true if it was, false otherwise (or on error). + * Introduced in VFS API v3 */ +typedef bool (RETRO_CALLCONV *retro_vfs_dirent_is_dir_t)(struct retro_vfs_dir_handle *dirstream); + +/* Close the directory and release its resources. Must be called if opendir returns non-NULL. Returns 0 on success, -1 on failure. + * Whether the call succeeds ot not, the handle passed as parameter becomes invalid and should no longer be used. + * Introduced in VFS API v3 */ +typedef int (RETRO_CALLCONV *retro_vfs_closedir_t)(struct retro_vfs_dir_handle *dirstream); + +struct retro_vfs_interface +{ + /* VFS API v1 */ + retro_vfs_get_path_t get_path; + retro_vfs_open_t open; + retro_vfs_close_t close; + retro_vfs_size_t size; + retro_vfs_tell_t tell; + retro_vfs_seek_t seek; + retro_vfs_read_t read; + retro_vfs_write_t write; + retro_vfs_flush_t flush; + retro_vfs_remove_t remove; + retro_vfs_rename_t rename; + /* VFS API v2 */ + retro_vfs_truncate_t truncate; + /* VFS API v3 */ + retro_vfs_stat_t stat; + retro_vfs_mkdir_t mkdir; + retro_vfs_opendir_t opendir; + retro_vfs_readdir_t readdir; + retro_vfs_dirent_get_name_t dirent_get_name; + retro_vfs_dirent_is_dir_t dirent_is_dir; + retro_vfs_closedir_t closedir; +}; + +struct retro_vfs_interface_info +{ + /* Set by core: should this be higher than the version the front end supports, + * front end will return false in the RETRO_ENVIRONMENT_GET_VFS_INTERFACE call + * Introduced in VFS API v1 */ + uint32_t required_interface_version; + + /* Frontend writes interface pointer here. The frontend also sets the actual + * version, must be at least required_interface_version. + * Introduced in VFS API v1 */ + struct retro_vfs_interface *iface; +}; + +enum retro_hw_render_interface_type +{ + RETRO_HW_RENDER_INTERFACE_VULKAN = 0, + RETRO_HW_RENDER_INTERFACE_D3D9 = 1, + RETRO_HW_RENDER_INTERFACE_D3D10 = 2, + RETRO_HW_RENDER_INTERFACE_D3D11 = 3, + RETRO_HW_RENDER_INTERFACE_D3D12 = 4, + RETRO_HW_RENDER_INTERFACE_GSKIT_PS2 = 5, + RETRO_HW_RENDER_INTERFACE_DUMMY = INT_MAX +}; + +/* Base struct. All retro_hw_render_interface_* types + * contain at least these fields. */ +struct retro_hw_render_interface +{ + enum retro_hw_render_interface_type interface_type; + unsigned interface_version; +}; + +typedef void (RETRO_CALLCONV *retro_set_led_state_t)(int led, int state); +struct retro_led_interface +{ + retro_set_led_state_t set_led_state; +}; + +/* Retrieves the current state of the MIDI input. + * Returns true if it's enabled, false otherwise. */ +typedef bool (RETRO_CALLCONV *retro_midi_input_enabled_t)(void); + +/* Retrieves the current state of the MIDI output. + * Returns true if it's enabled, false otherwise */ +typedef bool (RETRO_CALLCONV *retro_midi_output_enabled_t)(void); + +/* Reads next byte from the input stream. + * Returns true if byte is read, false otherwise. */ +typedef bool (RETRO_CALLCONV *retro_midi_read_t)(uint8_t *byte); + +/* Writes byte to the output stream. + * 'delta_time' is in microseconds and represent time elapsed since previous write. + * Returns true if byte is written, false otherwise. */ +typedef bool (RETRO_CALLCONV *retro_midi_write_t)(uint8_t byte, uint32_t delta_time); + +/* Flushes previously written data. + * Returns true if successful, false otherwise. */ +typedef bool (RETRO_CALLCONV *retro_midi_flush_t)(void); + +struct retro_midi_interface +{ + retro_midi_input_enabled_t input_enabled; + retro_midi_output_enabled_t output_enabled; + retro_midi_read_t read; + retro_midi_write_t write; + retro_midi_flush_t flush; +}; + +enum retro_hw_render_context_negotiation_interface_type +{ + RETRO_HW_RENDER_CONTEXT_NEGOTIATION_INTERFACE_VULKAN = 0, + RETRO_HW_RENDER_CONTEXT_NEGOTIATION_INTERFACE_DUMMY = INT_MAX +}; + +/* Base struct. All retro_hw_render_context_negotiation_interface_* types + * contain at least these fields. */ +struct retro_hw_render_context_negotiation_interface +{ + enum retro_hw_render_context_negotiation_interface_type interface_type; + unsigned interface_version; +}; + +/* Serialized state is incomplete in some way. Set if serialization is + * usable in typical end-user cases but should not be relied upon to + * implement frame-sensitive frontend features such as netplay or + * rerecording. */ +#define RETRO_SERIALIZATION_QUIRK_INCOMPLETE (1 << 0) +/* The core must spend some time initializing before serialization is + * supported. retro_serialize() will initially fail; retro_unserialize() + * and retro_serialize_size() may or may not work correctly either. */ +#define RETRO_SERIALIZATION_QUIRK_MUST_INITIALIZE (1 << 1) +/* Serialization size may change within a session. */ +#define RETRO_SERIALIZATION_QUIRK_CORE_VARIABLE_SIZE (1 << 2) +/* Set by the frontend to acknowledge that it supports variable-sized + * states. */ +#define RETRO_SERIALIZATION_QUIRK_FRONT_VARIABLE_SIZE (1 << 3) +/* Serialized state can only be loaded during the same session. */ +#define RETRO_SERIALIZATION_QUIRK_SINGLE_SESSION (1 << 4) +/* Serialized state cannot be loaded on an architecture with a different + * endianness from the one it was saved on. */ +#define RETRO_SERIALIZATION_QUIRK_ENDIAN_DEPENDENT (1 << 5) +/* Serialized state cannot be loaded on a different platform from the one it + * was saved on for reasons other than endianness, such as word size + * dependence */ +#define RETRO_SERIALIZATION_QUIRK_PLATFORM_DEPENDENT (1 << 6) + +#define RETRO_MEMDESC_CONST (1 << 0) /* The frontend will never change this memory area once retro_load_game has returned. */ +#define RETRO_MEMDESC_BIGENDIAN (1 << 1) /* The memory area contains big endian data. Default is little endian. */ +#define RETRO_MEMDESC_SYSTEM_RAM (1 << 2) /* The memory area is system RAM. This is main RAM of the gaming system. */ +#define RETRO_MEMDESC_SAVE_RAM (1 << 3) /* The memory area is save RAM. This RAM is usually found on a game cartridge, backed up by a battery. */ +#define RETRO_MEMDESC_VIDEO_RAM (1 << 4) /* The memory area is video RAM (VRAM) */ +#define RETRO_MEMDESC_ALIGN_2 (1 << 16) /* All memory access in this area is aligned to their own size, or 2, whichever is smaller. */ +#define RETRO_MEMDESC_ALIGN_4 (2 << 16) +#define RETRO_MEMDESC_ALIGN_8 (3 << 16) +#define RETRO_MEMDESC_MINSIZE_2 (1 << 24) /* All memory in this region is accessed at least 2 bytes at the time. */ +#define RETRO_MEMDESC_MINSIZE_4 (2 << 24) +#define RETRO_MEMDESC_MINSIZE_8 (3 << 24) +struct retro_memory_descriptor +{ + uint64_t flags; + + /* Pointer to the start of the relevant ROM or RAM chip. + * It's strongly recommended to use 'offset' if possible, rather than + * doing math on the pointer. + * + * If the same byte is mapped my multiple descriptors, their descriptors + * must have the same pointer. + * If 'start' does not point to the first byte in the pointer, put the + * difference in 'offset' instead. + * + * May be NULL if there's nothing usable here (e.g. hardware registers and + * open bus). No flags should be set if the pointer is NULL. + * It's recommended to minimize the number of descriptors if possible, + * but not mandatory. */ + void *ptr; + size_t offset; + + /* This is the location in the emulated address space + * where the mapping starts. */ + size_t start; + + /* Which bits must be same as in 'start' for this mapping to apply. + * The first memory descriptor to claim a certain byte is the one + * that applies. + * A bit which is set in 'start' must also be set in this. + * Can be zero, in which case each byte is assumed mapped exactly once. + * In this case, 'len' must be a power of two. */ + size_t select; + + /* If this is nonzero, the set bits are assumed not connected to the + * memory chip's address pins. */ + size_t disconnect; + + /* This one tells the size of the current memory area. + * If, after start+disconnect are applied, the address is higher than + * this, the highest bit of the address is cleared. + * + * If the address is still too high, the next highest bit is cleared. + * Can be zero, in which case it's assumed to be infinite (as limited + * by 'select' and 'disconnect'). */ + size_t len; + + /* To go from emulated address to physical address, the following + * order applies: + * Subtract 'start', pick off 'disconnect', apply 'len', add 'offset'. */ + + /* The address space name must consist of only a-zA-Z0-9_-, + * should be as short as feasible (maximum length is 8 plus the NUL), + * and may not be any other address space plus one or more 0-9A-F + * at the end. + * However, multiple memory descriptors for the same address space is + * allowed, and the address space name can be empty. NULL is treated + * as empty. + * + * Address space names are case sensitive, but avoid lowercase if possible. + * The same pointer may exist in multiple address spaces. + * + * Examples: + * blank+blank - valid (multiple things may be mapped in the same namespace) + * 'Sp'+'Sp' - valid (multiple things may be mapped in the same namespace) + * 'A'+'B' - valid (neither is a prefix of each other) + * 'S'+blank - valid ('S' is not in 0-9A-F) + * 'a'+blank - valid ('a' is not in 0-9A-F) + * 'a'+'A' - valid (neither is a prefix of each other) + * 'AR'+blank - valid ('R' is not in 0-9A-F) + * 'ARB'+blank - valid (the B can't be part of the address either, because + * there is no namespace 'AR') + * blank+'B' - not valid, because it's ambigous which address space B1234 + * would refer to. + * The length can't be used for that purpose; the frontend may want + * to append arbitrary data to an address, without a separator. */ + const char *addrspace; + + /* TODO: When finalizing this one, add a description field, which should be + * "WRAM" or something roughly equally long. */ + + /* TODO: When finalizing this one, replace 'select' with 'limit', which tells + * which bits can vary and still refer to the same address (limit = ~select). + * TODO: limit? range? vary? something else? */ + + /* TODO: When finalizing this one, if 'len' is above what 'select' (or + * 'limit') allows, it's bankswitched. Bankswitched data must have both 'len' + * and 'select' != 0, and the mappings don't tell how the system switches the + * banks. */ + + /* TODO: When finalizing this one, fix the 'len' bit removal order. + * For len=0x1800, pointer 0x1C00 should go to 0x1400, not 0x0C00. + * Algorithm: Take bits highest to lowest, but if it goes above len, clear + * the most recent addition and continue on the next bit. + * TODO: Can the above be optimized? Is "remove the lowest bit set in both + * pointer and 'len'" equivalent? */ + + /* TODO: Some emulators (MAME?) emulate big endian systems by only accessing + * the emulated memory in 32-bit chunks, native endian. But that's nothing + * compared to Darek Mihocka + * (section Emulation 103 - Nearly Free Byte Reversal) - he flips the ENTIRE + * RAM backwards! I'll want to represent both of those, via some flags. + * + * I suspect MAME either didn't think of that idea, or don't want the #ifdef. + * Not sure which, nor do I really care. */ + + /* TODO: Some of those flags are unused and/or don't really make sense. Clean + * them up. */ +}; + +/* The frontend may use the largest value of 'start'+'select' in a + * certain namespace to infer the size of the address space. + * + * If the address space is larger than that, a mapping with .ptr=NULL + * should be at the end of the array, with .select set to all ones for + * as long as the address space is big. + * + * Sample descriptors (minus .ptr, and RETRO_MEMFLAG_ on the flags): + * SNES WRAM: + * .start=0x7E0000, .len=0x20000 + * (Note that this must be mapped before the ROM in most cases; some of the + * ROM mappers + * try to claim $7E0000, or at least $7E8000.) + * SNES SPC700 RAM: + * .addrspace="S", .len=0x10000 + * SNES WRAM mirrors: + * .flags=MIRROR, .start=0x000000, .select=0xC0E000, .len=0x2000 + * .flags=MIRROR, .start=0x800000, .select=0xC0E000, .len=0x2000 + * SNES WRAM mirrors, alternate equivalent descriptor: + * .flags=MIRROR, .select=0x40E000, .disconnect=~0x1FFF + * (Various similar constructions can be created by combining parts of + * the above two.) + * SNES LoROM (512KB, mirrored a couple of times): + * .flags=CONST, .start=0x008000, .select=0x408000, .disconnect=0x8000, .len=512*1024 + * .flags=CONST, .start=0x400000, .select=0x400000, .disconnect=0x8000, .len=512*1024 + * SNES HiROM (4MB): + * .flags=CONST, .start=0x400000, .select=0x400000, .len=4*1024*1024 + * .flags=CONST, .offset=0x8000, .start=0x008000, .select=0x408000, .len=4*1024*1024 + * SNES ExHiROM (8MB): + * .flags=CONST, .offset=0, .start=0xC00000, .select=0xC00000, .len=4*1024*1024 + * .flags=CONST, .offset=4*1024*1024, .start=0x400000, .select=0xC00000, .len=4*1024*1024 + * .flags=CONST, .offset=0x8000, .start=0x808000, .select=0xC08000, .len=4*1024*1024 + * .flags=CONST, .offset=4*1024*1024+0x8000, .start=0x008000, .select=0xC08000, .len=4*1024*1024 + * Clarify the size of the address space: + * .ptr=NULL, .select=0xFFFFFF + * .len can be implied by .select in many of them, but was included for clarity. + */ + +struct retro_memory_map +{ + const struct retro_memory_descriptor *descriptors; + unsigned num_descriptors; +}; + +struct retro_controller_description +{ + /* Human-readable description of the controller. Even if using a generic + * input device type, this can be set to the particular device type the + * core uses. */ + const char *desc; + + /* Device type passed to retro_set_controller_port_device(). If the device + * type is a sub-class of a generic input device type, use the + * RETRO_DEVICE_SUBCLASS macro to create an ID. + * + * E.g. RETRO_DEVICE_SUBCLASS(RETRO_DEVICE_JOYPAD, 1). */ + unsigned id; +}; + +struct retro_controller_info +{ + const struct retro_controller_description *types; + unsigned num_types; +}; + +struct retro_subsystem_memory_info +{ + /* The extension associated with a memory type, e.g. "psram". */ + const char *extension; + + /* The memory type for retro_get_memory(). This should be at + * least 0x100 to avoid conflict with standardized + * libretro memory types. */ + unsigned type; +}; + +struct retro_subsystem_rom_info +{ + /* Describes what the content is (SGB BIOS, GB ROM, etc). */ + const char *desc; + + /* Same definition as retro_get_system_info(). */ + const char *valid_extensions; + + /* Same definition as retro_get_system_info(). */ + bool need_fullpath; + + /* Same definition as retro_get_system_info(). */ + bool block_extract; + + /* This is set if the content is required to load a game. + * If this is set to false, a zeroed-out retro_game_info can be passed. */ + bool required; + + /* Content can have multiple associated persistent + * memory types (retro_get_memory()). */ + const struct retro_subsystem_memory_info *memory; + unsigned num_memory; +}; + +struct retro_subsystem_info +{ + /* Human-readable string of the subsystem type, e.g. "Super GameBoy" */ + const char *desc; + + /* A computer friendly short string identifier for the subsystem type. + * This name must be [a-z]. + * E.g. if desc is "Super GameBoy", this can be "sgb". + * This identifier can be used for command-line interfaces, etc. + */ + const char *ident; + + /* Infos for each content file. The first entry is assumed to be the + * "most significant" content for frontend purposes. + * E.g. with Super GameBoy, the first content should be the GameBoy ROM, + * as it is the most "significant" content to a user. + * If a frontend creates new file paths based on the content used + * (e.g. savestates), it should use the path for the first ROM to do so. */ + const struct retro_subsystem_rom_info *roms; + + /* Number of content files associated with a subsystem. */ + unsigned num_roms; + + /* The type passed to retro_load_game_special(). */ + unsigned id; +}; + +typedef void (RETRO_CALLCONV *retro_proc_address_t)(void); + +/* libretro API extension functions: + * (None here so far). + * + * Get a symbol from a libretro core. + * Cores should only return symbols which are actual + * extensions to the libretro API. + * + * Frontends should not use this to obtain symbols to standard + * libretro entry points (static linking or dlsym). + * + * The symbol name must be equal to the function name, + * e.g. if void retro_foo(void); exists, the symbol must be called "retro_foo". + * The returned function pointer must be cast to the corresponding type. + */ +typedef retro_proc_address_t (RETRO_CALLCONV *retro_get_proc_address_t)(const char *sym); + +struct retro_get_proc_address_interface +{ + retro_get_proc_address_t get_proc_address; +}; + +enum retro_log_level +{ + RETRO_LOG_DEBUG = 0, + RETRO_LOG_INFO, + RETRO_LOG_WARN, + RETRO_LOG_ERROR, + + RETRO_LOG_DUMMY = INT_MAX +}; + +/* Logging function. Takes log level argument as well. */ +typedef void (RETRO_CALLCONV *retro_log_printf_t)(enum retro_log_level level, + const char *fmt, ...); + +struct retro_log_callback +{ + retro_log_printf_t log; +}; + +/* Performance related functions */ + +/* ID values for SIMD CPU features */ +#define RETRO_SIMD_SSE (1 << 0) +#define RETRO_SIMD_SSE2 (1 << 1) +#define RETRO_SIMD_VMX (1 << 2) +#define RETRO_SIMD_VMX128 (1 << 3) +#define RETRO_SIMD_AVX (1 << 4) +#define RETRO_SIMD_NEON (1 << 5) +#define RETRO_SIMD_SSE3 (1 << 6) +#define RETRO_SIMD_SSSE3 (1 << 7) +#define RETRO_SIMD_MMX (1 << 8) +#define RETRO_SIMD_MMXEXT (1 << 9) +#define RETRO_SIMD_SSE4 (1 << 10) +#define RETRO_SIMD_SSE42 (1 << 11) +#define RETRO_SIMD_AVX2 (1 << 12) +#define RETRO_SIMD_VFPU (1 << 13) +#define RETRO_SIMD_PS (1 << 14) +#define RETRO_SIMD_AES (1 << 15) +#define RETRO_SIMD_VFPV3 (1 << 16) +#define RETRO_SIMD_VFPV4 (1 << 17) +#define RETRO_SIMD_POPCNT (1 << 18) +#define RETRO_SIMD_MOVBE (1 << 19) +#define RETRO_SIMD_CMOV (1 << 20) +#define RETRO_SIMD_ASIMD (1 << 21) + +typedef uint64_t retro_perf_tick_t; +typedef int64_t retro_time_t; + +struct retro_perf_counter +{ + const char *ident; + retro_perf_tick_t start; + retro_perf_tick_t total; + retro_perf_tick_t call_cnt; + + bool registered; +}; + +/* Returns current time in microseconds. + * Tries to use the most accurate timer available. + */ +typedef retro_time_t (RETRO_CALLCONV *retro_perf_get_time_usec_t)(void); + +/* A simple counter. Usually nanoseconds, but can also be CPU cycles. + * Can be used directly if desired (when creating a more sophisticated + * performance counter system). + * */ +typedef retro_perf_tick_t (RETRO_CALLCONV *retro_perf_get_counter_t)(void); + +/* Returns a bit-mask of detected CPU features (RETRO_SIMD_*). */ +typedef uint64_t (RETRO_CALLCONV *retro_get_cpu_features_t)(void); + +/* Asks frontend to log and/or display the state of performance counters. + * Performance counters can always be poked into manually as well. + */ +typedef void (RETRO_CALLCONV *retro_perf_log_t)(void); + +/* Register a performance counter. + * ident field must be set with a discrete value and other values in + * retro_perf_counter must be 0. + * Registering can be called multiple times. To avoid calling to + * frontend redundantly, you can check registered field first. */ +typedef void (RETRO_CALLCONV *retro_perf_register_t)(struct retro_perf_counter *counter); + +/* Starts a registered counter. */ +typedef void (RETRO_CALLCONV *retro_perf_start_t)(struct retro_perf_counter *counter); + +/* Stops a registered counter. */ +typedef void (RETRO_CALLCONV *retro_perf_stop_t)(struct retro_perf_counter *counter); + +/* For convenience it can be useful to wrap register, start and stop in macros. + * E.g.: + * #ifdef LOG_PERFORMANCE + * #define RETRO_PERFORMANCE_INIT(perf_cb, name) static struct retro_perf_counter name = {#name}; if (!name.registered) perf_cb.perf_register(&(name)) + * #define RETRO_PERFORMANCE_START(perf_cb, name) perf_cb.perf_start(&(name)) + * #define RETRO_PERFORMANCE_STOP(perf_cb, name) perf_cb.perf_stop(&(name)) + * #else + * ... Blank macros ... + * #endif + * + * These can then be used mid-functions around code snippets. + * + * extern struct retro_perf_callback perf_cb; * Somewhere in the core. + * + * void do_some_heavy_work(void) + * { + * RETRO_PERFORMANCE_INIT(cb, work_1; + * RETRO_PERFORMANCE_START(cb, work_1); + * heavy_work_1(); + * RETRO_PERFORMANCE_STOP(cb, work_1); + * + * RETRO_PERFORMANCE_INIT(cb, work_2); + * RETRO_PERFORMANCE_START(cb, work_2); + * heavy_work_2(); + * RETRO_PERFORMANCE_STOP(cb, work_2); + * } + * + * void retro_deinit(void) + * { + * perf_cb.perf_log(); * Log all perf counters here for example. + * } + */ + +struct retro_perf_callback +{ + retro_perf_get_time_usec_t get_time_usec; + retro_get_cpu_features_t get_cpu_features; + + retro_perf_get_counter_t get_perf_counter; + retro_perf_register_t perf_register; + retro_perf_start_t perf_start; + retro_perf_stop_t perf_stop; + retro_perf_log_t perf_log; +}; + +/* FIXME: Document the sensor API and work out behavior. + * It will be marked as experimental until then. + */ +enum retro_sensor_action +{ + RETRO_SENSOR_ACCELEROMETER_ENABLE = 0, + RETRO_SENSOR_ACCELEROMETER_DISABLE, + RETRO_SENSOR_GYROSCOPE_ENABLE, + RETRO_SENSOR_GYROSCOPE_DISABLE, + RETRO_SENSOR_ILLUMINANCE_ENABLE, + RETRO_SENSOR_ILLUMINANCE_DISABLE, + + RETRO_SENSOR_DUMMY = INT_MAX +}; + +/* Id values for SENSOR types. */ +#define RETRO_SENSOR_ACCELEROMETER_X 0 +#define RETRO_SENSOR_ACCELEROMETER_Y 1 +#define RETRO_SENSOR_ACCELEROMETER_Z 2 +#define RETRO_SENSOR_GYROSCOPE_X 3 +#define RETRO_SENSOR_GYROSCOPE_Y 4 +#define RETRO_SENSOR_GYROSCOPE_Z 5 +#define RETRO_SENSOR_ILLUMINANCE 6 + +typedef bool (RETRO_CALLCONV *retro_set_sensor_state_t)(unsigned port, + enum retro_sensor_action action, unsigned rate); + +typedef float (RETRO_CALLCONV *retro_sensor_get_input_t)(unsigned port, unsigned id); + +struct retro_sensor_interface +{ + retro_set_sensor_state_t set_sensor_state; + retro_sensor_get_input_t get_sensor_input; +}; + +enum retro_camera_buffer +{ + RETRO_CAMERA_BUFFER_OPENGL_TEXTURE = 0, + RETRO_CAMERA_BUFFER_RAW_FRAMEBUFFER, + + RETRO_CAMERA_BUFFER_DUMMY = INT_MAX +}; + +/* Starts the camera driver. Can only be called in retro_run(). */ +typedef bool (RETRO_CALLCONV *retro_camera_start_t)(void); + +/* Stops the camera driver. Can only be called in retro_run(). */ +typedef void (RETRO_CALLCONV *retro_camera_stop_t)(void); + +/* Callback which signals when the camera driver is initialized + * and/or deinitialized. + * retro_camera_start_t can be called in initialized callback. + */ +typedef void (RETRO_CALLCONV *retro_camera_lifetime_status_t)(void); + +/* A callback for raw framebuffer data. buffer points to an XRGB8888 buffer. + * Width, height and pitch are similar to retro_video_refresh_t. + * First pixel is top-left origin. + */ +typedef void (RETRO_CALLCONV *retro_camera_frame_raw_framebuffer_t)(const uint32_t *buffer, + unsigned width, unsigned height, size_t pitch); + +/* A callback for when OpenGL textures are used. + * + * texture_id is a texture owned by camera driver. + * Its state or content should be considered immutable, except for things like + * texture filtering and clamping. + * + * texture_target is the texture target for the GL texture. + * These can include e.g. GL_TEXTURE_2D, GL_TEXTURE_RECTANGLE, and possibly + * more depending on extensions. + * + * affine points to a packed 3x3 column-major matrix used to apply an affine + * transform to texture coordinates. (affine_matrix * vec3(coord_x, coord_y, 1.0)) + * After transform, normalized texture coord (0, 0) should be bottom-left + * and (1, 1) should be top-right (or (width, height) for RECTANGLE). + * + * GL-specific typedefs are avoided here to avoid relying on gl.h in + * the API definition. + */ +typedef void (RETRO_CALLCONV *retro_camera_frame_opengl_texture_t)(unsigned texture_id, + unsigned texture_target, const float *affine); + +struct retro_camera_callback +{ + /* Set by libretro core. + * Example bitmask: caps = (1 << RETRO_CAMERA_BUFFER_OPENGL_TEXTURE) | (1 << RETRO_CAMERA_BUFFER_RAW_FRAMEBUFFER). + */ + uint64_t caps; + + /* Desired resolution for camera. Is only used as a hint. */ + unsigned width; + unsigned height; + + /* Set by frontend. */ + retro_camera_start_t start; + retro_camera_stop_t stop; + + /* Set by libretro core if raw framebuffer callbacks will be used. */ + retro_camera_frame_raw_framebuffer_t frame_raw_framebuffer; + + /* Set by libretro core if OpenGL texture callbacks will be used. */ + retro_camera_frame_opengl_texture_t frame_opengl_texture; + + /* Set by libretro core. Called after camera driver is initialized and + * ready to be started. + * Can be NULL, in which this callback is not called. + */ + retro_camera_lifetime_status_t initialized; + + /* Set by libretro core. Called right before camera driver is + * deinitialized. + * Can be NULL, in which this callback is not called. + */ + retro_camera_lifetime_status_t deinitialized; +}; + +/* Sets the interval of time and/or distance at which to update/poll + * location-based data. + * + * To ensure compatibility with all location-based implementations, + * values for both interval_ms and interval_distance should be provided. + * + * interval_ms is the interval expressed in milliseconds. + * interval_distance is the distance interval expressed in meters. + */ +typedef void (RETRO_CALLCONV *retro_location_set_interval_t)(unsigned interval_ms, + unsigned interval_distance); + +/* Start location services. The device will start listening for changes to the + * current location at regular intervals (which are defined with + * retro_location_set_interval_t). */ +typedef bool (RETRO_CALLCONV *retro_location_start_t)(void); + +/* Stop location services. The device will stop listening for changes + * to the current location. */ +typedef void (RETRO_CALLCONV *retro_location_stop_t)(void); + +/* Get the position of the current location. Will set parameters to + * 0 if no new location update has happened since the last time. */ +typedef bool (RETRO_CALLCONV *retro_location_get_position_t)(double *lat, double *lon, + double *horiz_accuracy, double *vert_accuracy); + +/* Callback which signals when the location driver is initialized + * and/or deinitialized. + * retro_location_start_t can be called in initialized callback. + */ +typedef void (RETRO_CALLCONV *retro_location_lifetime_status_t)(void); + +struct retro_location_callback +{ + retro_location_start_t start; + retro_location_stop_t stop; + retro_location_get_position_t get_position; + retro_location_set_interval_t set_interval; + + retro_location_lifetime_status_t initialized; + retro_location_lifetime_status_t deinitialized; +}; + +enum retro_rumble_effect +{ + RETRO_RUMBLE_STRONG = 0, + RETRO_RUMBLE_WEAK = 1, + + RETRO_RUMBLE_DUMMY = INT_MAX +}; + +/* Sets rumble state for joypad plugged in port 'port'. + * Rumble effects are controlled independently, + * and setting e.g. strong rumble does not override weak rumble. + * Strength has a range of [0, 0xffff]. + * + * Returns true if rumble state request was honored. + * Calling this before first retro_run() is likely to return false. */ +typedef bool (RETRO_CALLCONV *retro_set_rumble_state_t)(unsigned port, + enum retro_rumble_effect effect, uint16_t strength); + +struct retro_rumble_interface +{ + retro_set_rumble_state_t set_rumble_state; +}; + +/* Notifies libretro that audio data should be written. */ +typedef void (RETRO_CALLCONV *retro_audio_callback_t)(void); + +/* True: Audio driver in frontend is active, and callback is + * expected to be called regularily. + * False: Audio driver in frontend is paused or inactive. + * Audio callback will not be called until set_state has been + * called with true. + * Initial state is false (inactive). + */ +typedef void (RETRO_CALLCONV *retro_audio_set_state_callback_t)(bool enabled); + +struct retro_audio_callback +{ + retro_audio_callback_t callback; + retro_audio_set_state_callback_t set_state; +}; + +/* Notifies a libretro core of time spent since last invocation + * of retro_run() in microseconds. + * + * It will be called right before retro_run() every frame. + * The frontend can tamper with timing to support cases like + * fast-forward, slow-motion and framestepping. + * + * In those scenarios the reference frame time value will be used. */ +typedef int64_t retro_usec_t; +typedef void (RETRO_CALLCONV *retro_frame_time_callback_t)(retro_usec_t usec); +struct retro_frame_time_callback +{ + retro_frame_time_callback_t callback; + /* Represents the time of one frame. It is computed as + * 1000000 / fps, but the implementation will resolve the + * rounding to ensure that framestepping, etc is exact. */ + retro_usec_t reference; +}; + +/* Notifies a libretro core of the current occupancy + * level of the frontend audio buffer. + * + * - active: 'true' if audio buffer is currently + * in use. Will be 'false' if audio is + * disabled in the frontend + * + * - occupancy: Given as a value in the range [0,100], + * corresponding to the occupancy percentage + * of the audio buffer + * + * - underrun_likely: 'true' if the frontend expects an + * audio buffer underrun during the + * next frame (indicates that a core + * should attempt frame skipping) + * + * It will be called right before retro_run() every frame. */ +typedef void (RETRO_CALLCONV *retro_audio_buffer_status_callback_t)( + bool active, unsigned occupancy, bool underrun_likely); +struct retro_audio_buffer_status_callback +{ + retro_audio_buffer_status_callback_t callback; +}; + +/* Pass this to retro_video_refresh_t if rendering to hardware. + * Passing NULL to retro_video_refresh_t is still a frame dupe as normal. + * */ +#define RETRO_HW_FRAME_BUFFER_VALID ((void*)-1) + +/* Invalidates the current HW context. + * Any GL state is lost, and must not be deinitialized explicitly. + * If explicit deinitialization is desired by the libretro core, + * it should implement context_destroy callback. + * If called, all GPU resources must be reinitialized. + * Usually called when frontend reinits video driver. + * Also called first time video driver is initialized, + * allowing libretro core to initialize resources. + */ +typedef void (RETRO_CALLCONV *retro_hw_context_reset_t)(void); + +/* Gets current framebuffer which is to be rendered to. + * Could change every frame potentially. + */ +typedef uintptr_t (RETRO_CALLCONV *retro_hw_get_current_framebuffer_t)(void); + +/* Get a symbol from HW context. */ +typedef retro_proc_address_t (RETRO_CALLCONV *retro_hw_get_proc_address_t)(const char *sym); + +enum retro_hw_context_type +{ + RETRO_HW_CONTEXT_NONE = 0, + /* OpenGL 2.x. Driver can choose to use latest compatibility context. */ + RETRO_HW_CONTEXT_OPENGL = 1, + /* OpenGL ES 2.0. */ + RETRO_HW_CONTEXT_OPENGLES2 = 2, + /* Modern desktop core GL context. Use version_major/ + * version_minor fields to set GL version. */ + RETRO_HW_CONTEXT_OPENGL_CORE = 3, + /* OpenGL ES 3.0 */ + RETRO_HW_CONTEXT_OPENGLES3 = 4, + /* OpenGL ES 3.1+. Set version_major/version_minor. For GLES2 and GLES3, + * use the corresponding enums directly. */ + RETRO_HW_CONTEXT_OPENGLES_VERSION = 5, + + /* Vulkan, see RETRO_ENVIRONMENT_GET_HW_RENDER_INTERFACE. */ + RETRO_HW_CONTEXT_VULKAN = 6, + + /* Direct3D11, see RETRO_ENVIRONMENT_GET_HW_RENDER_INTERFACE */ + RETRO_HW_CONTEXT_D3D11 = 7, + + /* Direct3D10, see RETRO_ENVIRONMENT_GET_HW_RENDER_INTERFACE */ + RETRO_HW_CONTEXT_D3D10 = 8, + + /* Direct3D12, see RETRO_ENVIRONMENT_GET_HW_RENDER_INTERFACE */ + RETRO_HW_CONTEXT_D3D12 = 9, + + /* Direct3D9, see RETRO_ENVIRONMENT_GET_HW_RENDER_INTERFACE */ + RETRO_HW_CONTEXT_D3D9 = 10, + + RETRO_HW_CONTEXT_DUMMY = INT_MAX +}; + +struct retro_hw_render_callback +{ + /* Which API to use. Set by libretro core. */ + enum retro_hw_context_type context_type; + + /* Called when a context has been created or when it has been reset. + * An OpenGL context is only valid after context_reset() has been called. + * + * When context_reset is called, OpenGL resources in the libretro + * implementation are guaranteed to be invalid. + * + * It is possible that context_reset is called multiple times during an + * application lifecycle. + * If context_reset is called without any notification (context_destroy), + * the OpenGL context was lost and resources should just be recreated + * without any attempt to "free" old resources. + */ + retro_hw_context_reset_t context_reset; + + /* Set by frontend. + * TODO: This is rather obsolete. The frontend should not + * be providing preallocated framebuffers. */ + retro_hw_get_current_framebuffer_t get_current_framebuffer; + + /* Set by frontend. + * Can return all relevant functions, including glClear on Windows. */ + retro_hw_get_proc_address_t get_proc_address; + + /* Set if render buffers should have depth component attached. + * TODO: Obsolete. */ + bool depth; + + /* Set if stencil buffers should be attached. + * TODO: Obsolete. */ + bool stencil; + + /* If depth and stencil are true, a packed 24/8 buffer will be added. + * Only attaching stencil is invalid and will be ignored. */ + + /* Use conventional bottom-left origin convention. If false, + * standard libretro top-left origin semantics are used. + * TODO: Move to GL specific interface. */ + bool bottom_left_origin; + + /* Major version number for core GL context or GLES 3.1+. */ + unsigned version_major; + + /* Minor version number for core GL context or GLES 3.1+. */ + unsigned version_minor; + + /* If this is true, the frontend will go very far to avoid + * resetting context in scenarios like toggling fullscreen, etc. + * TODO: Obsolete? Maybe frontend should just always assume this ... + */ + bool cache_context; + + /* The reset callback might still be called in extreme situations + * such as if the context is lost beyond recovery. + * + * For optimal stability, set this to false, and allow context to be + * reset at any time. + */ + + /* A callback to be called before the context is destroyed in a + * controlled way by the frontend. */ + retro_hw_context_reset_t context_destroy; + + /* OpenGL resources can be deinitialized cleanly at this step. + * context_destroy can be set to NULL, in which resources will + * just be destroyed without any notification. + * + * Even when context_destroy is non-NULL, it is possible that + * context_reset is called without any destroy notification. + * This happens if context is lost by external factors (such as + * notified by GL_ARB_robustness). + * + * In this case, the context is assumed to be already dead, + * and the libretro implementation must not try to free any OpenGL + * resources in the subsequent context_reset. + */ + + /* Creates a debug context. */ + bool debug_context; +}; + +/* Callback type passed in RETRO_ENVIRONMENT_SET_KEYBOARD_CALLBACK. + * Called by the frontend in response to keyboard events. + * down is set if the key is being pressed, or false if it is being released. + * keycode is the RETROK value of the char. + * character is the text character of the pressed key. (UTF-32). + * key_modifiers is a set of RETROKMOD values or'ed together. + * + * The pressed/keycode state can be indepedent of the character. + * It is also possible that multiple characters are generated from a + * single keypress. + * Keycode events should be treated separately from character events. + * However, when possible, the frontend should try to synchronize these. + * If only a character is posted, keycode should be RETROK_UNKNOWN. + * + * Similarily if only a keycode event is generated with no corresponding + * character, character should be 0. + */ +typedef void (RETRO_CALLCONV *retro_keyboard_event_t)(bool down, unsigned keycode, + uint32_t character, uint16_t key_modifiers); + +struct retro_keyboard_callback +{ + retro_keyboard_event_t callback; +}; + +/* Callbacks for RETRO_ENVIRONMENT_SET_DISK_CONTROL_INTERFACE & + * RETRO_ENVIRONMENT_SET_DISK_CONTROL_EXT_INTERFACE. + * Should be set for implementations which can swap out multiple disk + * images in runtime. + * + * If the implementation can do this automatically, it should strive to do so. + * However, there are cases where the user must manually do so. + * + * Overview: To swap a disk image, eject the disk image with + * set_eject_state(true). + * Set the disk index with set_image_index(index). Insert the disk again + * with set_eject_state(false). + */ + +/* If ejected is true, "ejects" the virtual disk tray. + * When ejected, the disk image index can be set. + */ +typedef bool (RETRO_CALLCONV *retro_set_eject_state_t)(bool ejected); + +/* Gets current eject state. The initial state is 'not ejected'. */ +typedef bool (RETRO_CALLCONV *retro_get_eject_state_t)(void); + +/* Gets current disk index. First disk is index 0. + * If return value is >= get_num_images(), no disk is currently inserted. + */ +typedef unsigned (RETRO_CALLCONV *retro_get_image_index_t)(void); + +/* Sets image index. Can only be called when disk is ejected. + * The implementation supports setting "no disk" by using an + * index >= get_num_images(). + */ +typedef bool (RETRO_CALLCONV *retro_set_image_index_t)(unsigned index); + +/* Gets total number of images which are available to use. */ +typedef unsigned (RETRO_CALLCONV *retro_get_num_images_t)(void); + +struct retro_game_info; + +/* Replaces the disk image associated with index. + * Arguments to pass in info have same requirements as retro_load_game(). + * Virtual disk tray must be ejected when calling this. + * + * Replacing a disk image with info = NULL will remove the disk image + * from the internal list. + * As a result, calls to get_image_index() can change. + * + * E.g. replace_image_index(1, NULL), and previous get_image_index() + * returned 4 before. + * Index 1 will be removed, and the new index is 3. + */ +typedef bool (RETRO_CALLCONV *retro_replace_image_index_t)(unsigned index, + const struct retro_game_info *info); + +/* Adds a new valid index (get_num_images()) to the internal disk list. + * This will increment subsequent return values from get_num_images() by 1. + * This image index cannot be used until a disk image has been set + * with replace_image_index. */ +typedef bool (RETRO_CALLCONV *retro_add_image_index_t)(void); + +/* Sets initial image to insert in drive when calling + * core_load_game(). + * Since we cannot pass the initial index when loading + * content (this would require a major API change), this + * is set by the frontend *before* calling the core's + * retro_load_game()/retro_load_game_special() implementation. + * A core should therefore cache the index/path values and handle + * them inside retro_load_game()/retro_load_game_special(). + * - If 'index' is invalid (index >= get_num_images()), the + * core should ignore the set value and instead use 0 + * - 'path' is used purely for error checking - i.e. when + * content is loaded, the core should verify that the + * disk specified by 'index' has the specified file path. + * This is to guard against auto selecting the wrong image + * if (for example) the user should modify an existing M3U + * playlist. We have to let the core handle this because + * set_initial_image() must be called before loading content, + * i.e. the frontend cannot access image paths in advance + * and thus cannot perform the error check itself. + * If set path and content path do not match, the core should + * ignore the set 'index' value and instead use 0 + * Returns 'false' if index or 'path' are invalid, or core + * does not support this functionality + */ +typedef bool (RETRO_CALLCONV *retro_set_initial_image_t)(unsigned index, const char *path); + +/* Fetches the path of the specified disk image file. + * Returns 'false' if index is invalid (index >= get_num_images()) + * or path is otherwise unavailable. + */ +typedef bool (RETRO_CALLCONV *retro_get_image_path_t)(unsigned index, char *path, size_t len); + +/* Fetches a core-provided 'label' for the specified disk + * image file. In the simplest case this may be a file name + * (without extension), but for cores with more complex + * content requirements information may be provided to + * facilitate user disk swapping - for example, a core + * running floppy-disk-based content may uniquely label + * save disks, data disks, level disks, etc. with names + * corresponding to in-game disk change prompts (so the + * frontend can provide better user guidance than a 'dumb' + * disk index value). + * Returns 'false' if index is invalid (index >= get_num_images()) + * or label is otherwise unavailable. + */ +typedef bool (RETRO_CALLCONV *retro_get_image_label_t)(unsigned index, char *label, size_t len); + +struct retro_disk_control_callback +{ + retro_set_eject_state_t set_eject_state; + retro_get_eject_state_t get_eject_state; + + retro_get_image_index_t get_image_index; + retro_set_image_index_t set_image_index; + retro_get_num_images_t get_num_images; + + retro_replace_image_index_t replace_image_index; + retro_add_image_index_t add_image_index; +}; + +struct retro_disk_control_ext_callback +{ + retro_set_eject_state_t set_eject_state; + retro_get_eject_state_t get_eject_state; + + retro_get_image_index_t get_image_index; + retro_set_image_index_t set_image_index; + retro_get_num_images_t get_num_images; + + retro_replace_image_index_t replace_image_index; + retro_add_image_index_t add_image_index; + + /* NOTE: Frontend will only attempt to record/restore + * last used disk index if both set_initial_image() + * and get_image_path() are implemented */ + retro_set_initial_image_t set_initial_image; /* Optional - may be NULL */ + + retro_get_image_path_t get_image_path; /* Optional - may be NULL */ + retro_get_image_label_t get_image_label; /* Optional - may be NULL */ +}; + +/* Definitions for RETRO_ENVIRONMENT_SET_NETPACKET_INTERFACE. + * A core can set it if sending and receiving custom network packets + * during a multiplayer session is desired. + */ + +/* Netpacket flags for retro_netpacket_send_t */ +#define RETRO_NETPACKET_UNRELIABLE 0 /* Packet to be sent unreliable, depending on network quality it might not arrive. */ +#define RETRO_NETPACKET_RELIABLE (1 << 0) /* Reliable packets are guaranteed to arrive at the target in the order they were send. */ +#define RETRO_NETPACKET_UNSEQUENCED (1 << 1) /* Packet will not be sequenced with other packets and may arrive out of order. Cannot be set on reliable packets. */ + +/* Used by the core to send a packet to one or more connected players. + * A single packet sent via this interface can contain up to 64 KB of data. + * + * The broadcast flag can be set to true to send to multiple connected clients. + * In a broadcast, the client_id argument indicates 1 client NOT to send the + * packet to (pass 0xFFFF to send to everyone). Otherwise, the client_id + * argument indicates a single client to send the packet to. + * + * A frontend must support sending reliable packets (RETRO_NETPACKET_RELIABLE). + * Unreliable packets might not be supported by the frontend, but the flags can + * still be specified. Reliable transmission will be used instead. + * + * If this function is called passing NULL for buf, it will instead flush all + * previously buffered outgoing packets and instantly read any incoming packets. + * During such a call, retro_netpacket_receive_t and retro_netpacket_stop_t can + * be called. The core can perform this in a loop to do a blocking read, i.e., + * wait for incoming data, but needs to handle stop getting called and also + * give up after a short while to avoid freezing on a connection problem. + * + * This function is not guaranteed to be thread-safe and must be called during + * retro_run or any of the netpacket callbacks passed with this interface. + */ +typedef void (RETRO_CALLCONV *retro_netpacket_send_t)(int flags, const void* buf, size_t len, uint16_t client_id, bool broadcast); + +/* Called by the frontend to signify that a multiplayer session has started. + * If client_id is 0 the local player is the host of the session and at this + * point no other player has connected yet. + * + * If client_id is > 0 the local player is a client connected to a host and + * at this point is already fully connected to the host. + * + * The core must store the retro_netpacket_send_t function pointer provided + * here and use it whenever it wants to send a packet. This function pointer + * remains valid until the frontend calls retro_netpacket_stop_t. + */ +typedef void (RETRO_CALLCONV *retro_netpacket_start_t)(uint16_t client_id, retro_netpacket_send_t send_fn); + +/* Called by the frontend when a new packet arrives which has been sent from + * another player with retro_netpacket_send_t. The client_id argument indicates + * who has sent the packet. + */ +typedef void (RETRO_CALLCONV *retro_netpacket_receive_t)(const void* buf, size_t len, uint16_t client_id); + +/* Called by the frontend when the multiplayer session has ended. + * Once this gets called the retro_netpacket_send_t function pointer passed + * to retro_netpacket_start_t will not be valid anymore. + */ +typedef void (RETRO_CALLCONV *retro_netpacket_stop_t)(void); + +/* Called by the frontend every frame (between calls to retro_run while + * updating the state of the multiplayer session. + * This is a good place for the core to call retro_netpacket_send_t from. + */ +typedef void (RETRO_CALLCONV *retro_netpacket_poll_t)(void); + +/* Called by the frontend when a new player connects to the hosted session. + * This is only called on the host side, not for clients connected to the host. + * If this function returns false, the newly connected player gets dropped. + * This can be used for example to limit the number of players. + */ +typedef bool (RETRO_CALLCONV *retro_netpacket_connected_t)(uint16_t client_id); + +/* Called by the frontend when a player leaves or disconnects from the hosted session. + * This is only called on the host side, not for clients connected to the host. + */ +typedef void (RETRO_CALLCONV *retro_netpacket_disconnected_t)(uint16_t client_id); + +/** + * A callback interface for giving a core the ability to send and receive custom + * network packets during a multiplayer session between two or more instances + * of a libretro frontend. + * + * @see RETRO_ENVIRONMENT_SET_NETPACKET_INTERFACE + */ +struct retro_netpacket_callback +{ + retro_netpacket_start_t start; + retro_netpacket_receive_t receive; + retro_netpacket_stop_t stop; /* Optional - may be NULL */ + retro_netpacket_poll_t poll; /* Optional - may be NULL */ + retro_netpacket_connected_t connected; /* Optional - may be NULL */ + retro_netpacket_disconnected_t disconnected; /* Optional - may be NULL */ +}; + +enum retro_pixel_format +{ + /* 0RGB1555, native endian. + * 0 bit must be set to 0. + * This pixel format is default for compatibility concerns only. + * If a 15/16-bit pixel format is desired, consider using RGB565. */ + RETRO_PIXEL_FORMAT_0RGB1555 = 0, + + /* XRGB8888, native endian. + * X bits are ignored. */ + RETRO_PIXEL_FORMAT_XRGB8888 = 1, + + /* RGB565, native endian. + * This pixel format is the recommended format to use if a 15/16-bit + * format is desired as it is the pixel format that is typically + * available on a wide range of low-power devices. + * + * It is also natively supported in APIs like OpenGL ES. */ + RETRO_PIXEL_FORMAT_RGB565 = 2, + + /* Ensure sizeof() == sizeof(int). */ + RETRO_PIXEL_FORMAT_UNKNOWN = INT_MAX +}; + +enum retro_savestate_context +{ + /* Standard savestate written to disk. */ + RETRO_SAVESTATE_CONTEXT_NORMAL = 0, + + /* Savestate where you are guaranteed that the same instance will load the save state. + * You can store internal pointers to code or data. + * It's still a full serialization and deserialization, and could be loaded or saved at any time. + * It won't be written to disk or sent over the network. + */ + RETRO_SAVESTATE_CONTEXT_RUNAHEAD_SAME_INSTANCE = 1, + + /* Savestate where you are guaranteed that the same emulator binary will load that savestate. + * You can skip anything that would slow down saving or loading state but you can not store internal pointers. + * It won't be written to disk or sent over the network. + * Example: "Second Instance" runahead + */ + RETRO_SAVESTATE_CONTEXT_RUNAHEAD_SAME_BINARY = 2, + + /* Savestate used within a rollback netplay feature. + * You should skip anything that would unnecessarily increase bandwidth usage. + * It won't be written to disk but it will be sent over the network. + */ + RETRO_SAVESTATE_CONTEXT_ROLLBACK_NETPLAY = 3, + + /* Ensure sizeof() == sizeof(int). */ + RETRO_SAVESTATE_CONTEXT_UNKNOWN = INT_MAX +}; + +struct retro_message +{ + const char *msg; /* Message to be displayed. */ + unsigned frames; /* Duration in frames of message. */ +}; + +enum retro_message_target +{ + RETRO_MESSAGE_TARGET_ALL = 0, + RETRO_MESSAGE_TARGET_OSD, + RETRO_MESSAGE_TARGET_LOG +}; + +enum retro_message_type +{ + RETRO_MESSAGE_TYPE_NOTIFICATION = 0, + RETRO_MESSAGE_TYPE_NOTIFICATION_ALT, + RETRO_MESSAGE_TYPE_STATUS, + RETRO_MESSAGE_TYPE_PROGRESS +}; + +struct retro_message_ext +{ + /* Message string to be displayed/logged */ + const char *msg; + /* Duration (in ms) of message when targeting the OSD */ + unsigned duration; + /* Message priority when targeting the OSD + * > When multiple concurrent messages are sent to + * the frontend and the frontend does not have the + * capacity to display them all, messages with the + * *highest* priority value should be shown + * > There is no upper limit to a message priority + * value (within the bounds of the unsigned data type) + * > In the reference frontend (RetroArch), the same + * priority values are used for frontend-generated + * notifications, which are typically assigned values + * between 0 and 3 depending upon importance */ + unsigned priority; + /* Message logging level (info, warn, error, etc.) */ + enum retro_log_level level; + /* Message destination: OSD, logging interface or both */ + enum retro_message_target target; + /* Message 'type' when targeting the OSD + * > RETRO_MESSAGE_TYPE_NOTIFICATION: Specifies that a + * message should be handled in identical fashion to + * a standard frontend-generated notification + * > RETRO_MESSAGE_TYPE_NOTIFICATION_ALT: Specifies that + * message is a notification that requires user attention + * or action, but that it should be displayed in a manner + * that differs from standard frontend-generated notifications. + * This would typically correspond to messages that should be + * displayed immediately (independently from any internal + * frontend message queue), and/or which should be visually + * distinguishable from frontend-generated notifications. + * For example, a core may wish to inform the user of + * information related to a disk-change event. It is + * expected that the frontend itself may provide a + * notification in this case; if the core sends a + * message of type RETRO_MESSAGE_TYPE_NOTIFICATION, an + * uncomfortable 'double-notification' may occur. A message + * of RETRO_MESSAGE_TYPE_NOTIFICATION_ALT should therefore + * be presented such that visual conflict with regular + * notifications does not occur + * > RETRO_MESSAGE_TYPE_STATUS: Indicates that message + * is not a standard notification. This typically + * corresponds to 'status' indicators, such as a core's + * internal FPS, which are intended to be displayed + * either permanently while a core is running, or in + * a manner that does not suggest user attention or action + * is required. 'Status' type messages should therefore be + * displayed in a different on-screen location and in a manner + * easily distinguishable from both standard frontend-generated + * notifications and messages of type RETRO_MESSAGE_TYPE_NOTIFICATION_ALT + * > RETRO_MESSAGE_TYPE_PROGRESS: Indicates that message reports + * the progress of an internal core task. For example, in cases + * where a core itself handles the loading of content from a file, + * this may correspond to the percentage of the file that has been + * read. Alternatively, an audio/video playback core may use a + * message of type RETRO_MESSAGE_TYPE_PROGRESS to display the current + * playback position as a percentage of the runtime. 'Progress' type + * messages should therefore be displayed as a literal progress bar, + * where: + * - 'retro_message_ext.msg' is the progress bar title/label + * - 'retro_message_ext.progress' determines the length of + * the progress bar + * NOTE: Message type is a *hint*, and may be ignored + * by the frontend. If a frontend lacks support for + * displaying messages via alternate means than standard + * frontend-generated notifications, it will treat *all* + * messages as having the type RETRO_MESSAGE_TYPE_NOTIFICATION */ + enum retro_message_type type; + /* Task progress when targeting the OSD and message is + * of type RETRO_MESSAGE_TYPE_PROGRESS + * > -1: Unmetered/indeterminate + * > 0-100: Current progress percentage + * NOTE: Since message type is a hint, a frontend may ignore + * progress values. Where relevant, a core should therefore + * include progress percentage within the message string, + * such that the message intent remains clear when displayed + * as a standard frontend-generated notification */ + int8_t progress; +}; + +/* Describes how the libretro implementation maps a libretro input bind + * to its internal input system through a human readable string. + * This string can be used to better let a user configure input. */ +struct retro_input_descriptor +{ + /* Associates given parameters with a description. */ + unsigned port; + unsigned device; + unsigned index; + unsigned id; + + /* Human readable description for parameters. + * The pointer must remain valid until + * retro_unload_game() is called. */ + const char *description; +}; + +struct retro_system_info +{ + /* All pointers are owned by libretro implementation, and pointers must + * remain valid until it is unloaded. */ + + const char *library_name; /* Descriptive name of library. Should not + * contain any version numbers, etc. */ + const char *library_version; /* Descriptive version of core. */ + + const char *valid_extensions; /* A string listing probably content + * extensions the core will be able to + * load, separated with pipe. + * I.e. "bin|rom|iso". + * Typically used for a GUI to filter + * out extensions. */ + + /* Libretro cores that need to have direct access to their content + * files, including cores which use the path of the content files to + * determine the paths of other files, should set need_fullpath to true. + * + * Cores should strive for setting need_fullpath to false, + * as it allows the frontend to perform patching, etc. + * + * If need_fullpath is true and retro_load_game() is called: + * - retro_game_info::path is guaranteed to have a valid path + * - retro_game_info::data and retro_game_info::size are invalid + * + * If need_fullpath is false and retro_load_game() is called: + * - retro_game_info::path may be NULL + * - retro_game_info::data and retro_game_info::size are guaranteed + * to be valid + * + * See also: + * - RETRO_ENVIRONMENT_GET_SYSTEM_DIRECTORY + * - RETRO_ENVIRONMENT_GET_SAVE_DIRECTORY + */ + bool need_fullpath; + + /* If true, the frontend is not allowed to extract any archives before + * loading the real content. + * Necessary for certain libretro implementations that load games + * from zipped archives. */ + bool block_extract; +}; + +/* Defines overrides which modify frontend handling of + * specific content file types. + * An array of retro_system_content_info_override is + * passed to RETRO_ENVIRONMENT_SET_CONTENT_INFO_OVERRIDE + * NOTE: In the following descriptions, references to + * retro_load_game() may be replaced with + * retro_load_game_special() */ +struct retro_system_content_info_override +{ + /* A list of file extensions for which the override + * should apply, delimited by a 'pipe' character + * (e.g. "md|sms|gg") + * Permitted file extensions are limited to those + * included in retro_system_info::valid_extensions + * and/or retro_subsystem_rom_info::valid_extensions */ + const char *extensions; + + /* Overrides the need_fullpath value set in + * retro_system_info and/or retro_subsystem_rom_info. + * To reiterate: + * + * If need_fullpath is true and retro_load_game() is called: + * - retro_game_info::path is guaranteed to contain a valid + * path to an existent file + * - retro_game_info::data and retro_game_info::size are invalid + * + * If need_fullpath is false and retro_load_game() is called: + * - retro_game_info::path may be NULL + * - retro_game_info::data and retro_game_info::size are guaranteed + * to be valid + * + * In addition: + * + * If need_fullpath is true and retro_load_game() is called: + * - retro_game_info_ext::full_path is guaranteed to contain a valid + * path to an existent file + * - retro_game_info_ext::archive_path may be NULL + * - retro_game_info_ext::archive_file may be NULL + * - retro_game_info_ext::dir is guaranteed to contain a valid path + * to the directory in which the content file exists + * - retro_game_info_ext::name is guaranteed to contain the + * basename of the content file, without extension + * - retro_game_info_ext::ext is guaranteed to contain the + * extension of the content file in lower case format + * - retro_game_info_ext::data and retro_game_info_ext::size + * are invalid + * + * If need_fullpath is false and retro_load_game() is called: + * - If retro_game_info_ext::file_in_archive is false: + * - retro_game_info_ext::full_path is guaranteed to contain + * a valid path to an existent file + * - retro_game_info_ext::archive_path may be NULL + * - retro_game_info_ext::archive_file may be NULL + * - retro_game_info_ext::dir is guaranteed to contain a + * valid path to the directory in which the content file exists + * - retro_game_info_ext::name is guaranteed to contain the + * basename of the content file, without extension + * - retro_game_info_ext::ext is guaranteed to contain the + * extension of the content file in lower case format + * - If retro_game_info_ext::file_in_archive is true: + * - retro_game_info_ext::full_path may be NULL + * - retro_game_info_ext::archive_path is guaranteed to + * contain a valid path to an existent compressed file + * inside which the content file is located + * - retro_game_info_ext::archive_file is guaranteed to + * contain a valid path to an existent content file + * inside the compressed file referred to by + * retro_game_info_ext::archive_path + * e.g. for a compressed file '/path/to/foo.zip' + * containing 'bar.sfc' + * > retro_game_info_ext::archive_path will be '/path/to/foo.zip' + * > retro_game_info_ext::archive_file will be 'bar.sfc' + * - retro_game_info_ext::dir is guaranteed to contain a + * valid path to the directory in which the compressed file + * (containing the content file) exists + * - retro_game_info_ext::name is guaranteed to contain + * EITHER + * 1) the basename of the compressed file (containing + * the content file), without extension + * OR + * 2) the basename of the content file inside the + * compressed file, without extension + * In either case, a core should consider 'name' to + * be the canonical name/ID of the the content file + * - retro_game_info_ext::ext is guaranteed to contain the + * extension of the content file inside the compressed file, + * in lower case format + * - retro_game_info_ext::data and retro_game_info_ext::size are + * guaranteed to be valid */ + bool need_fullpath; + + /* If need_fullpath is false, specifies whether the content + * data buffer available in retro_load_game() is 'persistent' + * + * If persistent_data is false and retro_load_game() is called: + * - retro_game_info::data and retro_game_info::size + * are valid only until retro_load_game() returns + * - retro_game_info_ext::data and retro_game_info_ext::size + * are valid only until retro_load_game() returns + * + * If persistent_data is true and retro_load_game() is called: + * - retro_game_info::data and retro_game_info::size + * are valid until retro_deinit() returns + * - retro_game_info_ext::data and retro_game_info_ext::size + * are valid until retro_deinit() returns */ + bool persistent_data; +}; + +/* Similar to retro_game_info, but provides extended + * information about the source content file and + * game memory buffer status. + * And array of retro_game_info_ext is returned by + * RETRO_ENVIRONMENT_GET_GAME_INFO_EXT + * NOTE: In the following descriptions, references to + * retro_load_game() may be replaced with + * retro_load_game_special() */ +struct retro_game_info_ext +{ + /* - If file_in_archive is false, contains a valid + * path to an existent content file (UTF-8 encoded) + * - If file_in_archive is true, may be NULL */ + const char *full_path; + + /* - If file_in_archive is false, may be NULL + * - If file_in_archive is true, contains a valid path + * to an existent compressed file inside which the + * content file is located (UTF-8 encoded) */ + const char *archive_path; + + /* - If file_in_archive is false, may be NULL + * - If file_in_archive is true, contain a valid path + * to an existent content file inside the compressed + * file referred to by archive_path (UTF-8 encoded) + * e.g. for a compressed file '/path/to/foo.zip' + * containing 'bar.sfc' + * > archive_path will be '/path/to/foo.zip' + * > archive_file will be 'bar.sfc' */ + const char *archive_file; + + /* - If file_in_archive is false, contains a valid path + * to the directory in which the content file exists + * (UTF-8 encoded) + * - If file_in_archive is true, contains a valid path + * to the directory in which the compressed file + * (containing the content file) exists (UTF-8 encoded) */ + const char *dir; + + /* Contains the canonical name/ID of the content file + * (UTF-8 encoded). Intended for use when identifying + * 'complementary' content named after the loaded file - + * i.e. companion data of a different format (a CD image + * required by a ROM), texture packs, internally handled + * save files, etc. + * - If file_in_archive is false, contains the basename + * of the content file, without extension + * - If file_in_archive is true, then string is + * implementation specific. A frontend may choose to + * set a name value of: + * EITHER + * 1) the basename of the compressed file (containing + * the content file), without extension + * OR + * 2) the basename of the content file inside the + * compressed file, without extension + * RetroArch sets the 'name' value according to (1). + * A frontend that supports routine loading of + * content from archives containing multiple unrelated + * content files may set the 'name' value according + * to (2). */ + const char *name; + + /* - If file_in_archive is false, contains the extension + * of the content file in lower case format + * - If file_in_archive is true, contains the extension + * of the content file inside the compressed file, + * in lower case format */ + const char *ext; + + /* String of implementation specific meta-data. */ + const char *meta; + + /* Memory buffer of loaded game content. Will be NULL: + * IF + * - retro_system_info::need_fullpath is true and + * retro_system_content_info_override::need_fullpath + * is unset + * OR + * - retro_system_content_info_override::need_fullpath + * is true */ + const void *data; + + /* Size of game content memory buffer, in bytes */ + size_t size; + + /* True if loaded content file is inside a compressed + * archive */ + bool file_in_archive; + + /* - If data is NULL, value is unset/ignored + * - If data is non-NULL: + * - If persistent_data is false, data and size are + * valid only until retro_load_game() returns + * - If persistent_data is true, data and size are + * are valid until retro_deinit() returns */ + bool persistent_data; +}; + +struct retro_game_geometry +{ + unsigned base_width; /* Nominal video width of game. */ + unsigned base_height; /* Nominal video height of game. */ + unsigned max_width; /* Maximum possible width of game. */ + unsigned max_height; /* Maximum possible height of game. */ + + float aspect_ratio; /* Nominal aspect ratio of game. If + * aspect_ratio is <= 0.0, an aspect ratio + * of base_width / base_height is assumed. + * A frontend could override this setting, + * if desired. */ +}; + +struct retro_system_timing +{ + double fps; /* FPS of video content. */ + double sample_rate; /* Sampling rate of audio. */ +}; + +struct retro_system_av_info +{ + struct retro_game_geometry geometry; + struct retro_system_timing timing; +}; + +struct retro_variable +{ + /* Variable to query in RETRO_ENVIRONMENT_GET_VARIABLE. + * If NULL, obtains the complete environment string if more + * complex parsing is necessary. + * The environment string is formatted as key-value pairs + * delimited by semicolons as so: + * "key1=value1;key2=value2;..." + */ + const char *key; + + /* Value to be obtained. If key does not exist, it is set to NULL. */ + const char *value; +}; + +struct retro_core_option_display +{ + /* Variable to configure in RETRO_ENVIRONMENT_SET_CORE_OPTIONS_DISPLAY */ + const char *key; + + /* Specifies whether variable should be displayed + * when presenting core options to the user */ + bool visible; +}; + +/* Maximum number of values permitted for a core option + * > Note: We have to set a maximum value due the limitations + * of the C language - i.e. it is not possible to create an + * array of structs each containing a variable sized array, + * so the retro_core_option_definition values array must + * have a fixed size. The size limit of 128 is a balancing + * act - it needs to be large enough to support all 'sane' + * core options, but setting it too large may impact low memory + * platforms. In practise, if a core option has more than + * 128 values then the implementation is likely flawed. + * To quote the above API reference: + * "The number of possible options should be very limited + * i.e. it should be feasible to cycle through options + * without a keyboard." + */ +#define RETRO_NUM_CORE_OPTION_VALUES_MAX 128 + +struct retro_core_option_value +{ + /* Expected option value */ + const char *value; + + /* Human-readable value label. If NULL, value itself + * will be displayed by the frontend */ + const char *label; +}; + +struct retro_core_option_definition +{ + /* Variable to query in RETRO_ENVIRONMENT_GET_VARIABLE. */ + const char *key; + + /* Human-readable core option description (used as menu label) */ + const char *desc; + + /* Human-readable core option information (used as menu sublabel) */ + const char *info; + + /* Array of retro_core_option_value structs, terminated by NULL */ + struct retro_core_option_value values[RETRO_NUM_CORE_OPTION_VALUES_MAX]; + + /* Default core option value. Must match one of the values + * in the retro_core_option_value array, otherwise will be + * ignored */ + const char *default_value; +}; + +#ifdef __PS3__ +#undef local +#endif + +struct retro_core_options_intl +{ + /* Pointer to an array of retro_core_option_definition structs + * - US English implementation + * - Must point to a valid array */ + struct retro_core_option_definition *us; + + /* Pointer to an array of retro_core_option_definition structs + * - Implementation for current frontend language + * - May be NULL */ + struct retro_core_option_definition *local; +}; + +struct retro_core_option_v2_category +{ + /* Variable uniquely identifying the + * option category. Valid key characters + * are [a-z, A-Z, 0-9, _, -] */ + const char *key; + + /* Human-readable category description + * > Used as category menu label when + * frontend has core option category + * support */ + const char *desc; + + /* Human-readable category information + * > Used as category menu sublabel when + * frontend has core option category + * support + * > Optional (may be NULL or an empty + * string) */ + const char *info; +}; + +struct retro_core_option_v2_definition +{ + /* Variable to query in RETRO_ENVIRONMENT_GET_VARIABLE. + * Valid key characters are [a-z, A-Z, 0-9, _, -] */ + const char *key; + + /* Human-readable core option description + * > Used as menu label when frontend does + * not have core option category support + * e.g. "Video > Aspect Ratio" */ + const char *desc; + + /* Human-readable core option description + * > Used as menu label when frontend has + * core option category support + * e.g. "Aspect Ratio", where associated + * retro_core_option_v2_category::desc + * is "Video" + * > If empty or NULL, the string specified by + * desc will be used as the menu label + * > Will be ignored (and may be set to NULL) + * if category_key is empty or NULL */ + const char *desc_categorized; + + /* Human-readable core option information + * > Used as menu sublabel */ + const char *info; + + /* Human-readable core option information + * > Used as menu sublabel when frontend + * has core option category support + * (e.g. may be required when info text + * references an option by name/desc, + * and the desc/desc_categorized text + * for that option differ) + * > If empty or NULL, the string specified by + * info will be used as the menu sublabel + * > Will be ignored (and may be set to NULL) + * if category_key is empty or NULL */ + const char *info_categorized; + + /* Variable specifying category (e.g. "video", + * "audio") that will be assigned to the option + * if frontend has core option category support. + * > Categorized options will be displayed in a + * subsection/submenu of the frontend core + * option interface + * > Specified string must match one of the + * retro_core_option_v2_category::key values + * in the associated retro_core_option_v2_category + * array; If no match is not found, specified + * string will be considered as NULL + * > If specified string is empty or NULL, option will + * have no category and will be shown at the top + * level of the frontend core option interface */ + const char *category_key; + + /* Array of retro_core_option_value structs, terminated by NULL */ + struct retro_core_option_value values[RETRO_NUM_CORE_OPTION_VALUES_MAX]; + + /* Default core option value. Must match one of the values + * in the retro_core_option_value array, otherwise will be + * ignored */ + const char *default_value; +}; + +struct retro_core_options_v2 +{ + /* Array of retro_core_option_v2_category structs, + * terminated by NULL + * > If NULL, all entries in definitions array + * will have no category and will be shown at + * the top level of the frontend core option + * interface + * > Will be ignored if frontend does not have + * core option category support */ + struct retro_core_option_v2_category *categories; + + /* Array of retro_core_option_v2_definition structs, + * terminated by NULL */ + struct retro_core_option_v2_definition *definitions; +}; + +struct retro_core_options_v2_intl +{ + /* Pointer to a retro_core_options_v2 struct + * > US English implementation + * > Must point to a valid struct */ + struct retro_core_options_v2 *us; + + /* Pointer to a retro_core_options_v2 struct + * - Implementation for current frontend language + * - May be NULL */ + struct retro_core_options_v2 *local; +}; + +/* Used by the frontend to monitor changes in core option + * visibility. May be called each time any core option + * value is set via the frontend. + * - On each invocation, the core must update the visibility + * of any dynamically hidden options using the + * RETRO_ENVIRONMENT_SET_CORE_OPTIONS_DISPLAY environment + * callback. + * - On the first invocation, returns 'true' if the visibility + * of any core option has changed since the last call of + * retro_load_game() or retro_load_game_special(). + * - On each subsequent invocation, returns 'true' if the + * visibility of any core option has changed since the last + * time the function was called. */ +typedef bool (RETRO_CALLCONV *retro_core_options_update_display_callback_t)(void); +struct retro_core_options_update_display_callback +{ + retro_core_options_update_display_callback_t callback; +}; + +struct retro_game_info +{ + const char *path; /* Path to game, UTF-8 encoded. + * Sometimes used as a reference for building other paths. + * May be NULL if game was loaded from stdin or similar, + * but in this case some cores will be unable to load `data`. + * So, it is preferable to fabricate something here instead + * of passing NULL, which will help more cores to succeed. + * retro_system_info::need_fullpath requires + * that this path is valid. */ + const void *data; /* Memory buffer of loaded game. Will be NULL + * if need_fullpath was set. */ + size_t size; /* Size of memory buffer. */ + const char *meta; /* String of implementation specific meta-data. */ +}; + +#define RETRO_MEMORY_ACCESS_WRITE (1 << 0) + /* The core will write to the buffer provided by retro_framebuffer::data. */ +#define RETRO_MEMORY_ACCESS_READ (1 << 1) + /* The core will read from retro_framebuffer::data. */ +#define RETRO_MEMORY_TYPE_CACHED (1 << 0) + /* The memory in data is cached. + * If not cached, random writes and/or reading from the buffer is expected to be very slow. */ +struct retro_framebuffer +{ + void *data; /* The framebuffer which the core can render into. + Set by frontend in GET_CURRENT_SOFTWARE_FRAMEBUFFER. + The initial contents of data are unspecified. */ + unsigned width; /* The framebuffer width used by the core. Set by core. */ + unsigned height; /* The framebuffer height used by the core. Set by core. */ + size_t pitch; /* The number of bytes between the beginning of a scanline, + and beginning of the next scanline. + Set by frontend in GET_CURRENT_SOFTWARE_FRAMEBUFFER. */ + enum retro_pixel_format format; /* The pixel format the core must use to render into data. + This format could differ from the format used in + SET_PIXEL_FORMAT. + Set by frontend in GET_CURRENT_SOFTWARE_FRAMEBUFFER. */ + + unsigned access_flags; /* How the core will access the memory in the framebuffer. + RETRO_MEMORY_ACCESS_* flags. + Set by core. */ + unsigned memory_flags; /* Flags telling core how the memory has been mapped. + RETRO_MEMORY_TYPE_* flags. + Set by frontend in GET_CURRENT_SOFTWARE_FRAMEBUFFER. */ +}; + +/* Used by a libretro core to override the current + * fastforwarding mode of the frontend */ +struct retro_fastforwarding_override +{ + /* Specifies the runtime speed multiplier that + * will be applied when 'fastforward' is true. + * For example, a value of 5.0 when running 60 FPS + * content will cap the fast-forward rate at 300 FPS. + * Note that the target multiplier may not be achieved + * if the host hardware has insufficient processing + * power. + * Setting a value of 0.0 (or greater than 0.0 but + * less than 1.0) will result in an uncapped + * fast-forward rate (limited only by hardware + * capacity). + * If the value is negative, it will be ignored + * (i.e. the frontend will use a runtime speed + * multiplier of its own choosing) */ + float ratio; + + /* If true, fastforwarding mode will be enabled. + * If false, fastforwarding mode will be disabled. */ + bool fastforward; + + /* If true, and if supported by the frontend, an + * on-screen notification will be displayed while + * 'fastforward' is true. + * If false, and if supported by the frontend, any + * on-screen fast-forward notifications will be + * suppressed */ + bool notification; + + /* If true, the core will have sole control over + * when fastforwarding mode is enabled/disabled; + * the frontend will not be able to change the + * state set by 'fastforward' until either + * 'inhibit_toggle' is set to false, or the core + * is unloaded */ + bool inhibit_toggle; +}; + +/* During normal operation. Rate will be equal to the core's internal FPS. */ +#define RETRO_THROTTLE_NONE 0 + +/* While paused or stepping single frames. Rate will be 0. */ +#define RETRO_THROTTLE_FRAME_STEPPING 1 + +/* During fast forwarding. + * Rate will be 0 if not specifically limited to a maximum speed. */ +#define RETRO_THROTTLE_FAST_FORWARD 2 + +/* During slow motion. Rate will be less than the core's internal FPS. */ +#define RETRO_THROTTLE_SLOW_MOTION 3 + +/* While rewinding recorded save states. Rate can vary depending on the rewind + * speed or be 0 if the frontend is not aiming for a specific rate. */ +#define RETRO_THROTTLE_REWINDING 4 + +/* While vsync is active in the video driver and the target refresh rate is + * lower than the core's internal FPS. Rate is the target refresh rate. */ +#define RETRO_THROTTLE_VSYNC 5 + +/* When the frontend does not throttle in any way. Rate will be 0. + * An example could be if no vsync or audio output is active. */ +#define RETRO_THROTTLE_UNBLOCKED 6 + +struct retro_throttle_state +{ + /* The current throttling mode. Should be one of the values above. */ + unsigned mode; + + /* How many times per second the frontend aims to call retro_run. + * Depending on the mode, it can be 0 if there is no known fixed rate. + * This won't be accurate if the total processing time of the core and + * the frontend is longer than what is available for one frame. */ + float rate; +}; + +/** + * Opaque handle to a microphone that's been opened for use. + * The underlying object is accessed or created with \c retro_microphone_interface_t. + */ +typedef struct retro_microphone retro_microphone_t; + +/** + * Parameters for configuring a microphone. + * Some of these might not be honored, + * depending on the available hardware and driver configuration. + */ +typedef struct retro_microphone_params +{ + /** + * The desired sample rate of the microphone's input, in Hz. + * The microphone's input will be resampled, + * so cores can ask for whichever frequency they need. + * + * If zero, some reasonable default will be provided by the frontend + * (usually from its config file). + * + * @see retro_get_mic_rate_t + */ + unsigned rate; +} retro_microphone_params_t; + +/** + * @copydoc retro_microphone_interface::open_mic + */ +typedef retro_microphone_t *(RETRO_CALLCONV *retro_open_mic_t)(const retro_microphone_params_t *params); + +/** + * @copydoc retro_microphone_interface::close_mic + */ +typedef void (RETRO_CALLCONV *retro_close_mic_t)(retro_microphone_t *microphone); + +/** + * @copydoc retro_microphone_interface::get_params + */ +typedef bool (RETRO_CALLCONV *retro_get_mic_params_t)(const retro_microphone_t *microphone, retro_microphone_params_t *params); + +/** + * @copydoc retro_microphone_interface::set_mic_state + */ +typedef bool (RETRO_CALLCONV *retro_set_mic_state_t)(retro_microphone_t *microphone, bool state); + +/** + * @copydoc retro_microphone_interface::get_mic_state + */ +typedef bool (RETRO_CALLCONV *retro_get_mic_state_t)(const retro_microphone_t *microphone); + +/** + * @copydoc retro_microphone_interface::read_mic + */ +typedef int (RETRO_CALLCONV *retro_read_mic_t)(retro_microphone_t *microphone, int16_t* samples, size_t num_samples); + +/** + * The current version of the microphone interface. + * Will be incremented whenever \c retro_microphone_interface or \c retro_microphone_params_t + * receive new fields. + * + * Frontends using cores built against older mic interface versions + * should not access fields introduced in newer versions. + */ +#define RETRO_MICROPHONE_INTERFACE_VERSION 1 + +/** + * An interface for querying the microphone and accessing data read from it. + * + * @see RETRO_ENVIRONMENT_GET_MICROPHONE_INTERFACE + */ +struct retro_microphone_interface +{ + /** + * The version of this microphone interface. + * Set by the core to request a particular version, + * and set by the frontend to indicate the returned version. + * 0 indicates that the interface is invalid or uninitialized. + */ + unsigned interface_version; + + /** + * Initializes a new microphone. + * Assuming that microphone support is enabled and provided by the frontend, + * cores may call this function whenever necessary. + * A microphone could be opened throughout a core's lifetime, + * or it could wait until a microphone is plugged in to the emulated device. + * + * The returned handle will be valid until it's freed, + * even if the audio driver is reinitialized. + * + * This function is not guaranteed to be thread-safe. + * + * @param args[in] Parameters used to create the microphone. + * May be \c NULL, in which case the default value of each parameter will be used. + * + * @returns Pointer to the newly-opened microphone, + * or \c NULL if one couldn't be opened. + * This likely means that no microphone is plugged in and recognized, + * or the maximum number of supported microphones has been reached. + * + * @note Microphones are \em inactive by default; + * to begin capturing audio, call \c set_mic_state. + * @see retro_microphone_params_t + */ + retro_open_mic_t open_mic; + + /** + * Closes a microphone that was initialized with \c open_mic. + * Calling this function will stop all microphone activity + * and free up the resources that it allocated. + * Afterwards, the handle is invalid and must not be used. + * + * A frontend may close opened microphones when unloading content, + * but this behavior is not guaranteed. + * Cores should close their microphones when exiting, just to be safe. + * + * @param microphone Pointer to the microphone that was allocated by \c open_mic. + * If \c NULL, this function does nothing. + * + * @note The handle might be reused if another microphone is opened later. + */ + retro_close_mic_t close_mic; + + /** + * Returns the configured parameters of this microphone. + * These may differ from what was requested depending on + * the driver and device configuration. + * + * Cores should check these values before they start fetching samples. + * + * Will not change after the mic was opened. + * + * @param microphone[in] Opaque handle to the microphone + * whose parameters will be retrieved. + * @param params[out] The parameters object that the + * microphone's parameters will be copied to. + * + * @return \c true if the parameters were retrieved, + * \c false if there was an error. + */ + retro_get_mic_params_t get_params; + + /** + * Enables or disables the given microphone. + * Microphones are disabled by default + * and must be explicitly enabled before they can be used. + * Disabled microphones will not process incoming audio samples, + * and will therefore have minimal impact on overall performance. + * Cores may enable microphones throughout their lifetime, + * or only for periods where they're needed. + * + * Cores that accept microphone input should be able to operate without it; + * we suggest substituting silence in this case. + * + * @param microphone Opaque handle to the microphone + * whose state will be adjusted. + * This will have been provided by \c open_mic. + * @param state \c true if the microphone should receive audio input, + * \c false if it should be idle. + * @returns \c true if the microphone's state was successfully set, + * \c false if \c microphone is invalid + * or if there was an error. + */ + retro_set_mic_state_t set_mic_state; + + /** + * Queries the active state of a microphone at the given index. + * Will return whether the microphone is enabled, + * even if the driver is paused. + * + * @param microphone Opaque handle to the microphone + * whose state will be queried. + * @return \c true if the provided \c microphone is valid and active, + * \c false if not or if there was an error. + */ + retro_get_mic_state_t get_mic_state; + + /** + * Retrieves the input processed by the microphone since the last call. + * \em Must be called every frame unless \c microphone is disabled, + * similar to how \c retro_audio_sample_batch_t works. + * + * @param[in] microphone Opaque handle to the microphone + * whose recent input will be retrieved. + * @param[out] samples The buffer that will be used to store the microphone's data. + * Microphone input is in mono (i.e. one number per sample). + * Should be large enough to accommodate the expected number of samples per frame; + * for example, a 44.1kHz sample rate at 60 FPS would require space for 735 samples. + * @param[in] num_samples The size of the data buffer in samples (\em not bytes). + * Microphone input is in mono, so a "frame" and a "sample" are equivalent in length here. + * + * @return The number of samples that were copied into \c samples. + * If \c microphone is pending driver initialization, + * this function will copy silence of the requested length into \c samples. + * + * Will return -1 if the microphone is disabled, + * the audio driver is paused, + * or there was an error. + */ + retro_read_mic_t read_mic; +}; + +/** + * Describes how a device is being powered. + * @see RETRO_ENVIRONMENT_GET_DEVICE_POWER + */ +enum retro_power_state +{ + /** + * Indicates that the frontend cannot report its power state at this time, + * most likely due to a lack of support. + * + * \c RETRO_ENVIRONMENT_GET_DEVICE_POWER will not return this value; + * instead, the environment callback will return \c false. + */ + RETRO_POWERSTATE_UNKNOWN = 0, + + /** + * Indicates that the device is running on its battery. + * Usually applies to portable devices such as handhelds, laptops, and smartphones. + */ + RETRO_POWERSTATE_DISCHARGING, + + /** + * Indicates that the device's battery is currently charging. + */ + RETRO_POWERSTATE_CHARGING, + + /** + * Indicates that the device is connected to a power source + * and that its battery has finished charging. + */ + RETRO_POWERSTATE_CHARGED, + + /** + * Indicates that the device is connected to a power source + * and that it does not have a battery. + * This usually suggests a desktop computer or a non-portable game console. + */ + RETRO_POWERSTATE_PLUGGED_IN +}; + +/** + * Indicates that an estimate is not available for the battery level or time remaining, + * even if the actual power state is known. + */ +#define RETRO_POWERSTATE_NO_ESTIMATE (-1) + +/** + * Describes the power state of the device running the frontend. + * @see RETRO_ENVIRONMENT_GET_DEVICE_POWER + */ +struct retro_device_power +{ + /** + * The current state of the frontend's power usage. + */ + enum retro_power_state state; + + /** + * A rough estimate of the amount of time remaining (in seconds) + * before the device powers off. + * This value depends on a variety of factors, + * so it is not guaranteed to be accurate. + * + * Will be set to \c RETRO_POWERSTATE_NO_ESTIMATE if \c state does not equal \c RETRO_POWERSTATE_DISCHARGING. + * May still be set to \c RETRO_POWERSTATE_NO_ESTIMATE if the frontend is unable to provide an estimate. + */ + int seconds; + + /** + * The approximate percentage of battery charge, + * ranging from 0 to 100 (inclusive). + * The device may power off before this reaches 0. + * + * The user might have configured their device + * to stop charging before the battery is full, + * so do not assume that this will be 100 in the \c RETRO_POWERSTATE_CHARGED state. + */ + int8_t percent; +}; + +/* Callbacks */ + +/* Environment callback. Gives implementations a way of performing + * uncommon tasks. Extensible. */ +typedef bool (RETRO_CALLCONV *retro_environment_t)(unsigned cmd, void *data); + +/* Render a frame. Pixel format is 15-bit 0RGB1555 native endian + * unless changed (see RETRO_ENVIRONMENT_SET_PIXEL_FORMAT). + * + * Width and height specify dimensions of buffer. + * Pitch specifices length in bytes between two lines in buffer. + * + * For performance reasons, it is highly recommended to have a frame + * that is packed in memory, i.e. pitch == width * byte_per_pixel. + * Certain graphic APIs, such as OpenGL ES, do not like textures + * that are not packed in memory. + */ +typedef void (RETRO_CALLCONV *retro_video_refresh_t)(const void *data, unsigned width, + unsigned height, size_t pitch); + +/* Renders a single audio frame. Should only be used if implementation + * generates a single sample at a time. + * Format is signed 16-bit native endian. + */ +typedef void (RETRO_CALLCONV *retro_audio_sample_t)(int16_t left, int16_t right); + +/* Renders multiple audio frames in one go. + * + * One frame is defined as a sample of left and right channels, interleaved. + * I.e. int16_t buf[4] = { l, r, l, r }; would be 2 frames. + * Only one of the audio callbacks must ever be used. + */ +typedef size_t (RETRO_CALLCONV *retro_audio_sample_batch_t)(const int16_t *data, + size_t frames); + +/* Polls input. */ +typedef void (RETRO_CALLCONV *retro_input_poll_t)(void); + +/* Queries for input for player 'port'. device will be masked with + * RETRO_DEVICE_MASK. + * + * Specialization of devices such as RETRO_DEVICE_JOYPAD_MULTITAP that + * have been set with retro_set_controller_port_device() + * will still use the higher level RETRO_DEVICE_JOYPAD to request input. + */ +typedef int16_t (RETRO_CALLCONV *retro_input_state_t)(unsigned port, unsigned device, + unsigned index, unsigned id); + +/* Sets callbacks. retro_set_environment() is guaranteed to be called + * before retro_init(). + * + * The rest of the set_* functions are guaranteed to have been called + * before the first call to retro_run() is made. */ +RETRO_API void retro_set_environment(retro_environment_t); +RETRO_API void retro_set_video_refresh(retro_video_refresh_t); +RETRO_API void retro_set_audio_sample(retro_audio_sample_t); +RETRO_API void retro_set_audio_sample_batch(retro_audio_sample_batch_t); +RETRO_API void retro_set_input_poll(retro_input_poll_t); +RETRO_API void retro_set_input_state(retro_input_state_t); + +/* Library global initialization/deinitialization. */ +RETRO_API void retro_init(void); +RETRO_API void retro_deinit(void); + +/* Must return RETRO_API_VERSION. Used to validate ABI compatibility + * when the API is revised. */ +RETRO_API unsigned retro_api_version(void); + +/* Gets statically known system info. Pointers provided in *info + * must be statically allocated. + * Can be called at any time, even before retro_init(). */ +RETRO_API void retro_get_system_info(struct retro_system_info *info); + +/* Gets information about system audio/video timings and geometry. + * Can be called only after retro_load_game() has successfully completed. + * NOTE: The implementation of this function might not initialize every + * variable if needed. + * E.g. geom.aspect_ratio might not be initialized if core doesn't + * desire a particular aspect ratio. */ +RETRO_API void retro_get_system_av_info(struct retro_system_av_info *info); + +/* Sets device to be used for player 'port'. + * By default, RETRO_DEVICE_JOYPAD is assumed to be plugged into all + * available ports. + * Setting a particular device type is not a guarantee that libretro cores + * will only poll input based on that particular device type. It is only a + * hint to the libretro core when a core cannot automatically detect the + * appropriate input device type on its own. It is also relevant when a + * core can change its behavior depending on device type. + * + * As part of the core's implementation of retro_set_controller_port_device, + * the core should call RETRO_ENVIRONMENT_SET_INPUT_DESCRIPTORS to notify the + * frontend if the descriptions for any controls have changed as a + * result of changing the device type. + */ +RETRO_API void retro_set_controller_port_device(unsigned port, unsigned device); + +/* Resets the current game. */ +RETRO_API void retro_reset(void); + +/* Runs the game for one video frame. + * During retro_run(), input_poll callback must be called at least once. + * + * If a frame is not rendered for reasons where a game "dropped" a frame, + * this still counts as a frame, and retro_run() should explicitly dupe + * a frame if GET_CAN_DUPE returns true. + * In this case, the video callback can take a NULL argument for data. + */ +RETRO_API void retro_run(void); + +/* Returns the amount of data the implementation requires to serialize + * internal state (save states). + * Between calls to retro_load_game() and retro_unload_game(), the + * returned size is never allowed to be larger than a previous returned + * value, to ensure that the frontend can allocate a save state buffer once. + */ +RETRO_API size_t retro_serialize_size(void); + +/* Serializes internal state. If failed, or size is lower than + * retro_serialize_size(), it should return false, true otherwise. */ +RETRO_API bool retro_serialize(void *data, size_t size); +RETRO_API bool retro_unserialize(const void *data, size_t size); + +RETRO_API void retro_cheat_reset(void); +RETRO_API void retro_cheat_set(unsigned index, bool enabled, const char *code); + +/* Loads a game. + * Return true to indicate successful loading and false to indicate load failure. + */ +RETRO_API bool retro_load_game(const struct retro_game_info *game); + +/* Loads a "special" kind of game. Should not be used, + * except in extreme cases. */ +RETRO_API bool retro_load_game_special( + unsigned game_type, + const struct retro_game_info *info, size_t num_info +); + +/* Unloads the currently loaded game. Called before retro_deinit(void). */ +RETRO_API void retro_unload_game(void); + +/* Gets region of game. */ +RETRO_API unsigned retro_get_region(void); + +/* Gets region of memory. */ +RETRO_API void *retro_get_memory_data(unsigned id); +RETRO_API size_t retro_get_memory_size(unsigned id); + +#ifdef __cplusplus +} +#endif + +#endif From dc629e1b3f2ac7704fe7966d58fc6fa1ec679449 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 2 Jul 2024 18:57:46 +0300 Subject: [PATCH 040/251] Create panda3ds_libretro.info --- docs/libretro/panda3ds_libretro.info | 34 ++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 docs/libretro/panda3ds_libretro.info diff --git a/docs/libretro/panda3ds_libretro.info b/docs/libretro/panda3ds_libretro.info new file mode 100644 index 00000000..40df7e22 --- /dev/null +++ b/docs/libretro/panda3ds_libretro.info @@ -0,0 +1,34 @@ +# Software Information +display_name = "Nintendo - 3DS (Panda3DS)" +authors = "Panda3DS Authors (tm)" +supported_extensions = "3ds|3dsx|elf|axf|cci|cxi|app" +corename = "Panda3DS" +categories = "Emulator" +license = "GPLv3" +permissions = "" +display_version = "Git" + +# Hardware Information +manufacturer = "Nintendo" +systemname = "3DS" +systemid = "3ds" + +# Libretro Information +database = "Nintendo - Nintendo 3DS" +supports_no_game = "false" +savestate = "true" +savestate_features = "basic" +cheats = "false" +input_descriptors = "true" +memory_descriptors = "false" +libretro_saves = "true" +core_options = "true" +core_options_version = "1.0" +load_subsystem = "false" +hw_render = "true" +required_hw_api = "OpenGL Core >= 4.1" +needs_fullpath = "true" +disk_control = "false" +is_experimental = "true" + +description = "Panda3DS !" From 173bd03a53a58cefe62ce33b7366ac0af24139a9 Mon Sep 17 00:00:00 2001 From: Jonian Guveli Date: Tue, 2 Jul 2024 19:07:30 +0300 Subject: [PATCH 041/251] Libretro: Fix lib output name --- CMakeLists.txt | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2897560b..92a939fa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -510,8 +510,13 @@ elseif(BUILD_HYDRA_CORE) target_link_libraries(Alber PUBLIC AlberCore) elseif(BUILD_LIBRETRO_CORE) include_directories(third_party/libretro/include) - add_library(panda3ds_libretro SHARED src/libretro_core.cpp) - target_link_libraries(panda3ds_libretro PUBLIC AlberCore) + add_library(Alber SHARED src/libretro_core.cpp) + target_link_libraries(Alber PUBLIC AlberCore) + + set_target_properties(Alber PROPERTIES + OUTPUT_NAME "panda3ds_libretro" + PREFIX "" + ) endif() if(ENABLE_LTO OR ENABLE_USER_BUILD) From 0a49dc0af70f073c9d15c2e1f473af430f89a8e3 Mon Sep 17 00:00:00 2001 From: Jonian Guveli Date: Tue, 9 Jul 2024 14:47:44 +0300 Subject: [PATCH 042/251] Libretro: Various fixes and optimizations --- src/libretro_core.cpp | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/src/libretro_core.cpp b/src/libretro_core.cpp index ff57f0c8..8cb66c83 100644 --- a/src/libretro_core.cpp +++ b/src/libretro_core.cpp @@ -17,17 +17,17 @@ static struct retro_hw_render_callback hw_render; std::unique_ptr emulator; RendererGL* renderer; -static void* GetProcAddress(const char* name) { +static void* GetRenderProcAddress(const char* name) { return (void*)hw_render.get_proc_address(name); } static void VideoResetContext(void) { #ifdef USING_GLES - if (!gladLoadGLES2Loader(reinterpret_cast(GetProcAddress))) { + if (!gladLoadGLES2Loader(reinterpret_cast(GetRenderProcAddress))) { Helpers::panic("OpenGL ES init failed"); } #else - if (!gladLoadGLLoader(reinterpret_cast(GetProcAddress))) { + if (!gladLoadGLLoader(reinterpret_cast(GetRenderProcAddress))) { Helpers::panic("OpenGL init failed"); } #endif @@ -47,8 +47,8 @@ static bool SetHWRender(retro_hw_context_type type) { switch (type) { case RETRO_HW_CONTEXT_OPENGL_CORE: - hw_render.version_major = 3; - hw_render.version_minor = 3; + hw_render.version_major = 4; + hw_render.version_minor = 1; if (environ_cb(RETRO_ENVIRONMENT_SET_HW_RENDER, &hw_render)) { return true; @@ -57,7 +57,7 @@ static bool SetHWRender(retro_hw_context_type type) { case RETRO_HW_CONTEXT_OPENGLES3: case RETRO_HW_CONTEXT_OPENGL: hw_render.version_major = 3; - hw_render.version_minor = 0; + hw_render.version_minor = 1; if (environ_cb(RETRO_ENVIRONMENT_SET_HW_RENDER, &hw_render)) { return true; @@ -173,6 +173,16 @@ static void ConfigUpdate() { config.sdCardInserted = FetchVariableBool("panda3ds_use_virtual_sd", true); config.sdWriteProtected = FetchVariableBool("panda3ds_write_protect_virtual_sd", false); config.discordRpcEnabled = false; + + config.save(); +} + +static void ConfigCheckVariables() { + bool updated = false; + environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE_UPDATE, &updated); + + if (updated) + ConfigUpdate(); } void retro_get_system_info(retro_system_info* info) { @@ -192,7 +202,7 @@ void retro_get_system_av_info(retro_system_av_info* info) { info->geometry.aspect_ratio = 5.0 / 6.0; info->timing.fps = 60.0; - info->timing.sample_rate = 32000; + info->timing.sample_rate = 32768; } void retro_set_environment(retro_environment_t cb) { @@ -260,6 +270,8 @@ void retro_reset(void) { } void retro_run(void) { + ConfigCheckVariables(); + renderer->setFBO(hw_render.get_current_framebuffer()); renderer->resetStateManager(); @@ -283,8 +295,8 @@ void retro_run(void) { float x_left = GetAxisState(RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_X); float y_left = GetAxisState(RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_Y); - hid.setCirclepadX(x_left == 0 ? 0 : x_left < 0 ? -0x9C : 0x9C); - hid.setCirclepadY(y_left == 0 ? 0 : y_left > 0 ? -0x9C : 0x9C); + hid.setCirclepadX((x_left / +32767) * 0x9C); + hid.setCirclepadY((y_left / -32767) * 0x9C); bool touch = input_state_cb(0, RETRO_DEVICE_MOUSE, 0, RETRO_DEVICE_ID_MOUSE_LEFT); auto pos_x = input_state_cb(0, RETRO_DEVICE_POINTER, 0, RETRO_DEVICE_ID_POINTER_X); From ea03d135dab99da9d77aaa64c5a3dc41a6a034a9 Mon Sep 17 00:00:00 2001 From: Jonian Guveli Date: Tue, 9 Jul 2024 14:48:22 +0300 Subject: [PATCH 043/251] Allow overriding config/data paths in emulator --- include/emulator.hpp | 7 ++++--- src/emulator.cpp | 5 ++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/include/emulator.hpp b/include/emulator.hpp index de04648e..66aeb27e 100644 --- a/include/emulator.hpp +++ b/include/emulator.hpp @@ -87,6 +87,7 @@ class Emulator { bool frameDone = false; Emulator(); + Emulator(const std::filesystem::path& configPath); ~Emulator(); void step(); @@ -129,10 +130,10 @@ class Emulator { Renderer* getRenderer() { return gpu.getRenderer(); } u64 getTicks() { return cpu.getTicks(); } - std::filesystem::path getConfigPath(); - std::filesystem::path getAndroidAppPath(); + virtual std::filesystem::path getConfigPath(); + virtual std::filesystem::path getAndroidAppPath(); // Get the root path for the emulator's app data - std::filesystem::path getAppDataRoot(); + virtual std::filesystem::path getAppDataRoot(); std::span getSMDH(); }; diff --git a/src/emulator.cpp b/src/emulator.cpp index af156eeb..a7d859be 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -17,7 +17,10 @@ __declspec(dllexport) DWORD AmdPowerXpressRequestHighPerformance = 1; #endif Emulator::Emulator() - : config(getConfigPath()), kernel(cpu, memory, gpu, config), cpu(memory, kernel, *this), gpu(memory, config), memory(cpu.getTicksRef(), config), + : Emulator(getConfigPath()) {} + +Emulator::Emulator(const std::filesystem::path& configPath) + : config(configPath), kernel(cpu, memory, gpu, config), cpu(memory, kernel, *this), gpu(memory, config), memory(cpu.getTicksRef(), config), cheats(memory, kernel.getServiceManager().getHID()), lua(*this), running(false) #ifdef PANDA3DS_ENABLE_HTTP_SERVER , From c7e22c540d572687e5d85effceba482caf0809c6 Mon Sep 17 00:00:00 2001 From: Jonian Guveli Date: Tue, 9 Jul 2024 14:49:44 +0300 Subject: [PATCH 044/251] Libretro: Use libretro save dir for emulator files --- src/libretro_core.cpp | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/src/libretro_core.cpp b/src/libretro_core.cpp index 8cb66c83..10934233 100644 --- a/src/libretro_core.cpp +++ b/src/libretro_core.cpp @@ -13,10 +13,26 @@ static retro_input_poll_t input_poll_cb; static retro_input_state_t input_state_cb; static struct retro_hw_render_callback hw_render; +static std::filesystem::path retro_save_dir; -std::unique_ptr emulator; +class EmulatorCore : public Emulator { + public: + EmulatorCore() : Emulator(getConfigPath()) {} + std::filesystem::path getConfigPath() override; + std::filesystem::path getAppDataRoot() override; +}; + +std::unique_ptr emulator; RendererGL* renderer; +std::filesystem::path EmulatorCore::getConfigPath() { + return std::filesystem::path(retro_save_dir / "config.toml"); +} + +std::filesystem::path EmulatorCore::getAppDataRoot() { + return std::filesystem::path(retro_save_dir / "Emulator Files"); +} + static void* GetRenderProcAddress(const char* name) { return (void*)hw_render.get_proc_address(name); } @@ -232,7 +248,16 @@ void retro_init(void) { enum retro_pixel_format xrgb888 = RETRO_PIXEL_FORMAT_XRGB8888; environ_cb(RETRO_ENVIRONMENT_SET_PIXEL_FORMAT, &xrgb888); - emulator = std::make_unique(); + char* save_dir = nullptr; + + if (!environ_cb(RETRO_ENVIRONMENT_GET_SAVE_DIRECTORY, &save_dir) || save_dir == nullptr) { + Helpers::warn("No save directory provided by LibRetro."); + retro_save_dir = std::filesystem::current_path(); + } else { + retro_save_dir = std::filesystem::path(save_dir); + } + + emulator = std::make_unique(); } void retro_deinit(void) { From 623a9a64d6b2f313d015c40cc70511237c07fa43 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 9 Jul 2024 16:11:25 +0300 Subject: [PATCH 045/251] LR: Format/Cleanup --- include/emulator.hpp | 7 +- src/emulator.cpp | 9 +- src/libretro_core.cpp | 534 ++++++++++++++++++++---------------------- 3 files changed, 260 insertions(+), 290 deletions(-) diff --git a/include/emulator.hpp b/include/emulator.hpp index 66aeb27e..de04648e 100644 --- a/include/emulator.hpp +++ b/include/emulator.hpp @@ -87,7 +87,6 @@ class Emulator { bool frameDone = false; Emulator(); - Emulator(const std::filesystem::path& configPath); ~Emulator(); void step(); @@ -130,10 +129,10 @@ class Emulator { Renderer* getRenderer() { return gpu.getRenderer(); } u64 getTicks() { return cpu.getTicks(); } - virtual std::filesystem::path getConfigPath(); - virtual std::filesystem::path getAndroidAppPath(); + std::filesystem::path getConfigPath(); + std::filesystem::path getAndroidAppPath(); // Get the root path for the emulator's app data - virtual std::filesystem::path getAppDataRoot(); + std::filesystem::path getAppDataRoot(); std::span getSMDH(); }; diff --git a/src/emulator.cpp b/src/emulator.cpp index a7d859be..db6c2e1f 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -17,10 +17,7 @@ __declspec(dllexport) DWORD AmdPowerXpressRequestHighPerformance = 1; #endif Emulator::Emulator() - : Emulator(getConfigPath()) {} - -Emulator::Emulator(const std::filesystem::path& configPath) - : config(configPath), kernel(cpu, memory, gpu, config), cpu(memory, kernel, *this), gpu(memory, config), memory(cpu.getTicksRef(), config), + : config(getConfigPath()), kernel(cpu, memory, gpu, config), cpu(memory, kernel, *this), gpu(memory, config), memory(cpu.getTicksRef(), config), cheats(memory, kernel.getServiceManager().getHID()), lua(*this), running(false) #ifdef PANDA3DS_ENABLE_HTTP_SERVER , @@ -87,6 +84,7 @@ void Emulator::reset(ReloadOption reload) { } } +#ifndef __LIBRETRO__ std::filesystem::path Emulator::getAndroidAppPath() { // SDL_GetPrefPath fails to get the path due to no JNI environment std::ifstream cmdline("/proc/self/cmdline"); @@ -103,6 +101,7 @@ std::filesystem::path Emulator::getConfigPath() { return std::filesystem::current_path() / "config.toml"; } } +#endif void Emulator::step() {} void Emulator::render() {} @@ -182,6 +181,7 @@ void Emulator::pollScheduler() { } } +#ifndef __LIBRETRO__ // Get path for saving files (AppData on Windows, /home/user/.local/share/ApplicationName on Linux, etc) // Inside that path, we be use a game-specific folder as well. Eg if we were loading a ROM called PenguinDemo.3ds, the savedata would be in // %APPDATA%/Alber/PenguinDemo/SaveData on Windows, and so on. We do this because games save data in their own filesystem on the cart. @@ -205,6 +205,7 @@ std::filesystem::path Emulator::getAppDataRoot() { return appDataPath; } +#endif bool Emulator::loadROM(const std::filesystem::path& path) { // Reset the emulator if we've already loaded a ROM diff --git a/src/libretro_core.cpp b/src/libretro_core.cpp index 10934233..c329b881 100644 --- a/src/libretro_core.cpp +++ b/src/libretro_core.cpp @@ -6,391 +6,361 @@ #include #include -static retro_environment_t environ_cb; -static retro_video_refresh_t video_cb; -static retro_audio_sample_batch_t audio_batch_cb; -static retro_input_poll_t input_poll_cb; -static retro_input_state_t input_state_cb; +static retro_environment_t envCallbacks; +static retro_video_refresh_t videoCallbacks; +static retro_audio_sample_batch_t audioBatchCallback; +static retro_input_poll_t inputPollCallback; +static retro_input_state_t inputStateCallback; -static struct retro_hw_render_callback hw_render; -static std::filesystem::path retro_save_dir; +static retro_hw_render_callback hw_render; +static std::filesystem::path savePath; -class EmulatorCore : public Emulator { - public: - EmulatorCore() : Emulator(getConfigPath()) {} - std::filesystem::path getConfigPath() override; - std::filesystem::path getAppDataRoot() override; -}; - -std::unique_ptr emulator; +std::unique_ptr emulator; RendererGL* renderer; -std::filesystem::path EmulatorCore::getConfigPath() { - return std::filesystem::path(retro_save_dir / "config.toml"); +std::filesystem::path Emulator::getConfigPath() { + return std::filesystem::path(savePath / "config.toml"); } -std::filesystem::path EmulatorCore::getAppDataRoot() { - return std::filesystem::path(retro_save_dir / "Emulator Files"); +std::filesystem::path Emulator::getAppDataRoot() { + return std::filesystem::path(savePath / "Emulator Files"); } -static void* GetRenderProcAddress(const char* name) { - return (void*)hw_render.get_proc_address(name); +static void* GetGLProcAddress(const char* name) { + return (void*)hw_render.get_proc_address(name); } -static void VideoResetContext(void) { +static void VideoResetContext() { #ifdef USING_GLES - if (!gladLoadGLES2Loader(reinterpret_cast(GetRenderProcAddress))) { - Helpers::panic("OpenGL ES init failed"); - } + if (!gladLoadGLES2Loader(reinterpret_cast(GetGLProcAddress))) { + Helpers::panic("OpenGL ES init failed"); + } #else - if (!gladLoadGLLoader(reinterpret_cast(GetRenderProcAddress))) { - Helpers::panic("OpenGL init failed"); - } + if (!gladLoadGLLoader(reinterpret_cast(GetGLProcAddress))) { + Helpers::panic("OpenGL init failed"); + } #endif - emulator->initGraphicsContext(nullptr); + emulator->initGraphicsContext(nullptr); } -static void VideoDestroyContext(void) { +static void VideoDestroyContext() { emulator->deinitGraphicsContext(); } static bool SetHWRender(retro_hw_context_type type) { - hw_render.context_type = type; - hw_render.context_reset = VideoResetContext; - hw_render.context_destroy = VideoDestroyContext; - hw_render.bottom_left_origin = true; + hw_render.context_type = type; + hw_render.context_reset = VideoResetContext; + hw_render.context_destroy = VideoDestroyContext; + hw_render.bottom_left_origin = true; - switch (type) { - case RETRO_HW_CONTEXT_OPENGL_CORE: - hw_render.version_major = 4; - hw_render.version_minor = 1; + switch (type) { + case RETRO_HW_CONTEXT_OPENGL_CORE: + hw_render.version_major = 4; + hw_render.version_minor = 1; - if (environ_cb(RETRO_ENVIRONMENT_SET_HW_RENDER, &hw_render)) { - return true; - } - break; - case RETRO_HW_CONTEXT_OPENGLES3: - case RETRO_HW_CONTEXT_OPENGL: - hw_render.version_major = 3; - hw_render.version_minor = 1; + if (envCallbacks(RETRO_ENVIRONMENT_SET_HW_RENDER, &hw_render)) { + return true; + } + break; + case RETRO_HW_CONTEXT_OPENGLES3: + case RETRO_HW_CONTEXT_OPENGL: + hw_render.version_major = 3; + hw_render.version_minor = 1; - if (environ_cb(RETRO_ENVIRONMENT_SET_HW_RENDER, &hw_render)) { - return true; - } - break; - default: - break; - } + if (envCallbacks(RETRO_ENVIRONMENT_SET_HW_RENDER, &hw_render)) { + return true; + } + break; + default: break; + } - return false; + return false; } -static void VideoInit(void) { - retro_hw_context_type preferred = RETRO_HW_CONTEXT_NONE; - environ_cb(RETRO_ENVIRONMENT_GET_PREFERRED_HW_RENDER, &preferred); +static void videoInit() { + retro_hw_context_type preferred = RETRO_HW_CONTEXT_NONE; + envCallbacks(RETRO_ENVIRONMENT_GET_PREFERRED_HW_RENDER, &preferred); - if (preferred && SetHWRender(preferred)) - return; - if (SetHWRender(RETRO_HW_CONTEXT_OPENGL_CORE)) - return; - if (SetHWRender(RETRO_HW_CONTEXT_OPENGL)) - return; - if (SetHWRender(RETRO_HW_CONTEXT_OPENGLES3)) - return; + if (preferred && SetHWRender(preferred)) return; + if (SetHWRender(RETRO_HW_CONTEXT_OPENGL_CORE)) return; + if (SetHWRender(RETRO_HW_CONTEXT_OPENGL)) return; + if (SetHWRender(RETRO_HW_CONTEXT_OPENGLES3)) return; - hw_render.context_type = RETRO_HW_CONTEXT_NONE; + hw_render.context_type = RETRO_HW_CONTEXT_NONE; } -static bool GetButtonState(unsigned id) { - return input_state_cb(0, RETRO_DEVICE_JOYPAD, 0, id); -} +static bool GetButtonState(uint id) { return inputStateCallback(0, RETRO_DEVICE_JOYPAD, 0, id); } +static float GetAxisState(uint index, uint id) { return inputStateCallback(0, RETRO_DEVICE_ANALOG, index, id); } -static float GetAxisState(unsigned index, unsigned id) { - return input_state_cb(0, RETRO_DEVICE_ANALOG, index, id); -} +static void inputInit() { + static const retro_controller_description controllers[] = { + {"Nintendo 3DS", RETRO_DEVICE_JOYPAD}, + {NULL, 0}, + }; -static void InputInit(void) { - static const struct retro_controller_description controllers[] = { - { "Nintendo 3DS", RETRO_DEVICE_JOYPAD }, - { NULL, 0 }, - }; + static const retro_controller_info ports[] = { + {controllers, 1}, + {NULL, 0}, + }; - static const struct retro_controller_info ports[] = { - { controllers, 1 }, - { NULL, 0 }, - }; + envCallbacks(RETRO_ENVIRONMENT_SET_CONTROLLER_INFO, (void*)ports); - environ_cb(RETRO_ENVIRONMENT_SET_CONTROLLER_INFO, (void*)ports); + retro_input_descriptor desc[] = { + {0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_LEFT, "Left"}, + {0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_UP, "Up"}, + {0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_DOWN, "Down"}, + {0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_RIGHT, "Right"}, + {0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_A, "A"}, + {0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_B, "B"}, + {0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_SELECT, "Select"}, + {0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_START, "Start"}, + {0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R, "R"}, + {0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L, "L"}, + {0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_X, "X"}, + {0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_Y, "Y"}, + {0, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_X, "Circle Pad X"}, + {0, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_Y, "Circle Pad Y"}, + {0}, + }; - struct retro_input_descriptor desc[] = { - { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_LEFT, "Left" }, - { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_UP, "Up" }, - { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_DOWN, "Down" }, - { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_RIGHT, "Right" }, - { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_A, "A" }, - { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_B, "B" }, - { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_SELECT, "Select" }, - { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_START, "Start" }, - { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R, "R" }, - { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L, "L" }, - { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_X, "X" }, - { 0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_Y, "Y" }, - { 0, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_X, "Circle Pad X" }, - { 0, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_Y, "Circle Pad Y" }, - { 0 }, - }; - - environ_cb(RETRO_ENVIRONMENT_SET_INPUT_DESCRIPTORS, &desc); + envCallbacks(RETRO_ENVIRONMENT_SET_INPUT_DESCRIPTORS, &desc); } static std::string FetchVariable(std::string key, std::string def) { - struct retro_variable var = { nullptr }; - var.key = key.c_str(); + retro_variable var = {nullptr}; + var.key = key.c_str(); - if (!environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) || var.value == nullptr) { - Helpers::warn("Fetching variable %s failed.", key); - return def; - } + if (!envCallbacks(RETRO_ENVIRONMENT_GET_VARIABLE, &var) || var.value == nullptr) { + Helpers::warn("Fetching variable %s failed.", key); + return def; + } - return std::string(var.value); + return std::string(var.value); } static bool FetchVariableBool(std::string key, bool def) { - return FetchVariable(key, def ? "enabled" : "disabled") == "enabled"; + return FetchVariable(key, def ? "enabled" : "disabled") == "enabled"; } -static void ConfigInit() { - static const retro_variable values[] = { - { "panda3ds_use_shader_jit", "Enable shader JIT; enabled|disabled" }, - { "panda3ds_use_vsync", "Enable VSync; enabled|disabled" }, - { "panda3ds_dsp_emulation", "DSP emulation; Null|HLE|LLE" }, - { "panda3ds_use_audio", "Enable audio; disabled|enabled" }, - { "panda3ds_use_virtual_sd", "Enable virtual SD card; enabled|disabled" }, - { "panda3ds_write_protect_virtual_sd", "Write protect virtual SD card; disabled|enabled" }, - { "panda3ds_battery_level", "Battery percentage; 5|10|20|30|50|70|90|100" }, - { "panda3ds_use_charger", "Charger plugged; enabled|disabled" }, - { nullptr, nullptr } - }; +static void configInit() { + static const retro_variable values[] = { + {"panda3ds_use_shader_jit", "Enable shader JIT; enabled|disabled"}, + {"panda3ds_use_vsync", "Enable VSync; enabled|disabled"}, + {"panda3ds_dsp_emulation", "DSP emulation; Null|HLE|LLE"}, + {"panda3ds_use_audio", "Enable audio; disabled|enabled"}, + {"panda3ds_use_virtual_sd", "Enable virtual SD card; enabled|disabled"}, + {"panda3ds_write_protect_virtual_sd", "Write protect virtual SD card; disabled|enabled"}, + {"panda3ds_battery_level", "Battery percentage; 5|10|20|30|50|70|90|100"}, + {"panda3ds_use_charger", "Charger plugged; enabled|disabled"}, + {nullptr, nullptr} + }; - environ_cb(RETRO_ENVIRONMENT_SET_VARIABLES, (void*)values); + envCallbacks(RETRO_ENVIRONMENT_SET_VARIABLES, (void*)values); } -static void ConfigUpdate() { - EmulatorConfig& config = emulator->getConfig(); +static void configUpdate() { + EmulatorConfig& config = emulator->getConfig(); - config.rendererType = RendererType::OpenGL; - config.vsyncEnabled = FetchVariableBool("panda3ds_use_vsync", true); - config.shaderJitEnabled = FetchVariableBool("panda3ds_use_shader_jit", true); - config.chargerPlugged = FetchVariableBool("panda3ds_use_charger", true); - config.batteryPercentage = std::clamp(std::stoi(FetchVariable("panda3ds_battery_level", "5")), 0, 100); - config.dspType = Audio::DSPCore::typeFromString(FetchVariable("panda3ds_dsp_emulation", "null")); - config.audioEnabled = FetchVariableBool("panda3ds_use_audio", false); - config.sdCardInserted = FetchVariableBool("panda3ds_use_virtual_sd", true); - config.sdWriteProtected = FetchVariableBool("panda3ds_write_protect_virtual_sd", false); - config.discordRpcEnabled = false; + config.rendererType = RendererType::OpenGL; + config.vsyncEnabled = FetchVariableBool("panda3ds_use_vsync", true); + config.shaderJitEnabled = FetchVariableBool("panda3ds_use_shader_jit", true); + config.chargerPlugged = FetchVariableBool("panda3ds_use_charger", true); + config.batteryPercentage = std::clamp(std::stoi(FetchVariable("panda3ds_battery_level", "5")), 0, 100); + config.dspType = Audio::DSPCore::typeFromString(FetchVariable("panda3ds_dsp_emulation", "null")); + config.audioEnabled = FetchVariableBool("panda3ds_use_audio", false); + config.sdCardInserted = FetchVariableBool("panda3ds_use_virtual_sd", true); + config.sdWriteProtected = FetchVariableBool("panda3ds_write_protect_virtual_sd", false); + config.discordRpcEnabled = false; - config.save(); + config.save(); } static void ConfigCheckVariables() { - bool updated = false; - environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE_UPDATE, &updated); + bool updated = false; + envCallbacks(RETRO_ENVIRONMENT_GET_VARIABLE_UPDATE, &updated); - if (updated) - ConfigUpdate(); + if (updated) { + configUpdate(); + } } void retro_get_system_info(retro_system_info* info) { - info->need_fullpath = true; - info->valid_extensions = "3ds|3dsx|elf|axf|cci|cxi|app"; - info->library_version = "0.8"; - info->library_name = "Panda3DS"; - info->block_extract = true; + info->need_fullpath = true; + info->valid_extensions = "3ds|3dsx|elf|axf|cci|cxi|app"; + info->library_version = "0.8"; + info->library_name = "Panda3DS"; + info->block_extract = true; } void retro_get_system_av_info(retro_system_av_info* info) { - info->geometry.base_width = emulator->width; - info->geometry.base_height = emulator->height; + info->geometry.base_width = emulator->width; + info->geometry.base_height = emulator->height; - info->geometry.max_width = info->geometry.base_width; - info->geometry.max_height = info->geometry.base_height; + info->geometry.max_width = info->geometry.base_width; + info->geometry.max_height = info->geometry.base_height; - info->geometry.aspect_ratio = 5.0 / 6.0; - info->timing.fps = 60.0; - info->timing.sample_rate = 32768; + info->geometry.aspect_ratio = float(5.0 / 6.0); + info->timing.fps = 60.0; + info->timing.sample_rate = 32768; } void retro_set_environment(retro_environment_t cb) { - environ_cb = cb; + envCallbacks = cb; } void retro_set_video_refresh(retro_video_refresh_t cb) { - video_cb = cb; + videoCallbacks = cb; } void retro_set_audio_sample_batch(retro_audio_sample_batch_t cb) { - audio_batch_cb = cb; + audioBatchCallback = cb; } -void retro_set_audio_sample(retro_audio_sample_t cb) { -} +void retro_set_audio_sample(retro_audio_sample_t cb) {} void retro_set_input_poll(retro_input_poll_t cb) { - input_poll_cb = cb; + inputPollCallback = cb; } void retro_set_input_state(retro_input_state_t cb) { - input_state_cb = cb; + inputStateCallback = cb; } -void retro_init(void) { - enum retro_pixel_format xrgb888 = RETRO_PIXEL_FORMAT_XRGB8888; - environ_cb(RETRO_ENVIRONMENT_SET_PIXEL_FORMAT, &xrgb888); +void retro_init() { + enum retro_pixel_format xrgb888 = RETRO_PIXEL_FORMAT_XRGB8888; + envCallbacks(RETRO_ENVIRONMENT_SET_PIXEL_FORMAT, &xrgb888); - char* save_dir = nullptr; + char* save_dir = nullptr; - if (!environ_cb(RETRO_ENVIRONMENT_GET_SAVE_DIRECTORY, &save_dir) || save_dir == nullptr) { - Helpers::warn("No save directory provided by LibRetro."); - retro_save_dir = std::filesystem::current_path(); - } else { - retro_save_dir = std::filesystem::path(save_dir); - } + if (!envCallbacks(RETRO_ENVIRONMENT_GET_SAVE_DIRECTORY, &save_dir) || save_dir == nullptr) { + Helpers::warn("No save directory provided by LibRetro."); + savePath = std::filesystem::current_path(); + } else { + savePath = std::filesystem::path(save_dir); + } - emulator = std::make_unique(); + emulator = std::make_unique(); } -void retro_deinit(void) { - emulator = nullptr; +void retro_deinit() { + emulator = nullptr; } -bool retro_load_game(const struct retro_game_info* game) { - ConfigInit(); - ConfigUpdate(); +bool retro_load_game(const retro_game_info* game) { + configInit(); + configUpdate(); - if (emulator->getRendererType() != RendererType::OpenGL) { - throw std::runtime_error("Libretro: Renderer is not OpenGL"); - } + if (emulator->getRendererType() != RendererType::OpenGL) { + Helpers::panic("Libretro: Renderer is not OpenGL"); + } - renderer = static_cast(emulator->getRenderer()); - emulator->setOutputSize(emulator->width, emulator->height); + renderer = static_cast(emulator->getRenderer()); + emulator->setOutputSize(emulator->width, emulator->height); - InputInit(); - VideoInit(); + inputInit(); + videoInit(); - return emulator->loadROM(game->path); + return emulator->loadROM(game->path); } -bool retro_load_game_special(unsigned type, const struct retro_game_info* info, size_t num) { - return false; +bool retro_load_game_special(uint type, const retro_game_info* info, size_t num) { return false; } + +void retro_unload_game() { + renderer->setFBO(0); + renderer = nullptr; } -void retro_unload_game(void) { - renderer->setFBO(0); - renderer = nullptr; +void retro_reset() { + emulator->reset(Emulator::ReloadOption::Reload); } -void retro_reset(void) { - emulator->reset(Emulator::ReloadOption::Reload); +void retro_run() { + ConfigCheckVariables(); + + renderer->setFBO(hw_render.get_current_framebuffer()); + renderer->resetStateManager(); + + inputPollCallback(); + + HIDService& hid = emulator->getServiceManager().getHID(); + + hid.setKey(HID::Keys::A, GetButtonState(RETRO_DEVICE_ID_JOYPAD_A)); + hid.setKey(HID::Keys::B, GetButtonState(RETRO_DEVICE_ID_JOYPAD_B)); + hid.setKey(HID::Keys::X, GetButtonState(RETRO_DEVICE_ID_JOYPAD_X)); + hid.setKey(HID::Keys::Y, GetButtonState(RETRO_DEVICE_ID_JOYPAD_Y)); + hid.setKey(HID::Keys::L, GetButtonState(RETRO_DEVICE_ID_JOYPAD_L)); + hid.setKey(HID::Keys::R, GetButtonState(RETRO_DEVICE_ID_JOYPAD_R)); + hid.setKey(HID::Keys::Start, GetButtonState(RETRO_DEVICE_ID_JOYPAD_START)); + hid.setKey(HID::Keys::Select, GetButtonState(RETRO_DEVICE_ID_JOYPAD_SELECT)); + hid.setKey(HID::Keys::Up, GetButtonState(RETRO_DEVICE_ID_JOYPAD_UP)); + hid.setKey(HID::Keys::Down, GetButtonState(RETRO_DEVICE_ID_JOYPAD_DOWN)); + hid.setKey(HID::Keys::Left, GetButtonState(RETRO_DEVICE_ID_JOYPAD_LEFT)); + hid.setKey(HID::Keys::Right, GetButtonState(RETRO_DEVICE_ID_JOYPAD_RIGHT)); + + // Get analog values for the left analog stick (Right analog stick is N3DS-only and unimplemented) + float xLeft = GetAxisState(RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_X); + float yLeft = GetAxisState(RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_Y); + + hid.setCirclepadX((xLeft / +32767) * 0x9C); + hid.setCirclepadY((yLeft / -32767) * 0x9C); + + bool touch = inputStateCallback(0, RETRO_DEVICE_MOUSE, 0, RETRO_DEVICE_ID_MOUSE_LEFT); + const int posX = inputStateCallback(0, RETRO_DEVICE_POINTER, 0, RETRO_DEVICE_ID_POINTER_X); + const int posY = inputStateCallback(0, RETRO_DEVICE_POINTER, 0, RETRO_DEVICE_ID_POINTER_Y); + + const int newX = static_cast((posX + 0x7fff) / (float)(0x7fff * 2) * emulator->width); + const int newY = static_cast((posY + 0x7fff) / (float)(0x7fff * 2) * emulator->height); + + const int offsetX = 40; + const int offsetY = emulator->height / 2; + + const bool inScreenX = newX >= offsetX && newX < emulator->width - offsetX; + const bool inScreenY = newY >= offsetY && newY <= emulator->height; + + if (touch && inScreenX && inScreenY) { + u16 x = static_cast(newX - offsetX); + u16 y = static_cast(newY - offsetY); + + hid.setTouchScreenPress(x, y); + } else { + hid.releaseTouchScreen(); + } + + hid.updateInputs(emulator->getTicks()); + emulator->runFrame(); + + videoCallbacks(RETRO_HW_FRAME_BUFFER_VALID, emulator->width, emulator->height, 0); } -void retro_run(void) { - ConfigCheckVariables(); +void retro_set_controller_port_device(uint port, uint device) {} - renderer->setFBO(hw_render.get_current_framebuffer()); - renderer->resetStateManager(); - - input_poll_cb(); - - HIDService& hid = emulator->getServiceManager().getHID(); - - hid.setKey(HID::Keys::A, GetButtonState(RETRO_DEVICE_ID_JOYPAD_A)); - hid.setKey(HID::Keys::B, GetButtonState(RETRO_DEVICE_ID_JOYPAD_B)); - hid.setKey(HID::Keys::X, GetButtonState(RETRO_DEVICE_ID_JOYPAD_X)); - hid.setKey(HID::Keys::Y, GetButtonState(RETRO_DEVICE_ID_JOYPAD_Y)); - hid.setKey(HID::Keys::L, GetButtonState(RETRO_DEVICE_ID_JOYPAD_L)); - hid.setKey(HID::Keys::R, GetButtonState(RETRO_DEVICE_ID_JOYPAD_R)); - hid.setKey(HID::Keys::Start, GetButtonState(RETRO_DEVICE_ID_JOYPAD_START)); - hid.setKey(HID::Keys::Select, GetButtonState(RETRO_DEVICE_ID_JOYPAD_SELECT)); - hid.setKey(HID::Keys::Up, GetButtonState(RETRO_DEVICE_ID_JOYPAD_UP)); - hid.setKey(HID::Keys::Down, GetButtonState(RETRO_DEVICE_ID_JOYPAD_DOWN)); - hid.setKey(HID::Keys::Left, GetButtonState(RETRO_DEVICE_ID_JOYPAD_LEFT)); - hid.setKey(HID::Keys::Right, GetButtonState(RETRO_DEVICE_ID_JOYPAD_RIGHT)); - - float x_left = GetAxisState(RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_X); - float y_left = GetAxisState(RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_Y); - - hid.setCirclepadX((x_left / +32767) * 0x9C); - hid.setCirclepadY((y_left / -32767) * 0x9C); - - bool touch = input_state_cb(0, RETRO_DEVICE_MOUSE, 0, RETRO_DEVICE_ID_MOUSE_LEFT); - auto pos_x = input_state_cb(0, RETRO_DEVICE_POINTER, 0, RETRO_DEVICE_ID_POINTER_X); - auto pos_y = input_state_cb(0, RETRO_DEVICE_POINTER, 0, RETRO_DEVICE_ID_POINTER_Y); - - auto new_x = static_cast((pos_x + 0x7fff) / (float)(0x7fff * 2) * emulator->width); - auto new_y = static_cast((pos_y + 0x7fff) / (float)(0x7fff * 2) * emulator->height); - - auto off_x = 40; - auto off_y = emulator->height / 2; - - bool scr_x = new_x >= off_x && new_x < emulator->width - off_x; - bool scr_y = new_y >= off_y && new_y <= emulator->height; - - if (touch && scr_y && scr_x) { - u16 x = static_cast(new_x - off_x); - u16 y = static_cast(new_y - off_y); - - hid.setTouchScreenPress(x, y); - } else { - hid.releaseTouchScreen(); - } - - hid.updateInputs(emulator->getTicks()); - - emulator->runFrame(); - video_cb(RETRO_HW_FRAME_BUFFER_VALID, emulator->width, emulator->height, 0); +size_t retro_serialize_size() { + size_t size = 0; + return size; } -void retro_set_controller_port_device(unsigned port, unsigned device) { +bool retro_serialize(void* data, size_t size) { return false; } +bool retro_unserialize(const void* data, size_t size) { return false; } + +uint retro_get_region() { return RETRO_REGION_NTSC; } +uint retro_api_version() { return RETRO_API_VERSION; } + +size_t retro_get_memory_size(uint id) { + if (id == RETRO_MEMORY_SYSTEM_RAM) { + return 0; + } + + return 0; } -size_t retro_serialize_size(void) { - size_t size = 0; - return size; +void* retro_get_memory_data(uint id) { + if (id == RETRO_MEMORY_SYSTEM_RAM) { + return 0; + } + + return nullptr; } -bool retro_serialize(void* data, size_t size) { - return false; -} - -bool retro_unserialize(const void* data, size_t size) { - return false; -} - -unsigned retro_get_region(void) { - return RETRO_REGION_NTSC; -} - -unsigned retro_api_version() { - return RETRO_API_VERSION; -} - -size_t retro_get_memory_size(unsigned id) { - if (id == RETRO_MEMORY_SYSTEM_RAM) { - return 0; - } - return 0; -} - -void* retro_get_memory_data(unsigned id) { - if (id == RETRO_MEMORY_SYSTEM_RAM) { - return 0; - } - return NULL; -} - -void retro_cheat_set(unsigned index, bool enabled, const char* code) { -} - -void retro_cheat_reset(void) { -} +void retro_cheat_set(uint index, bool enabled, const char* code) {} +void retro_cheat_reset() {} From a12b721c957e1684d44213e176a7e5eb42889567 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 9 Jul 2024 16:52:09 +0300 Subject: [PATCH 046/251] More formatting --- CMakeLists.txt | 2 +- src/libretro_core.cpp | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 92a939fa..85a915e2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,7 +50,7 @@ if(BUILD_LIBRETRO_CORE) set(CMAKE_POSITION_INDEPENDENT_CODE ON) set(ENABLE_DISCORD_RPC OFF) set(ENABLE_LUAJIT OFF) - add_definitions(-D__LIBRETRO__) + add_compile_definitions(__LIBRETRO__) endif() add_library(AlberCore STATIC) diff --git a/src/libretro_core.cpp b/src/libretro_core.cpp index c329b881..3bf0f95f 100644 --- a/src/libretro_core.cpp +++ b/src/libretro_core.cpp @@ -264,7 +264,7 @@ bool retro_load_game(const retro_game_info* game) { return emulator->loadROM(game->path); } -bool retro_load_game_special(uint type, const retro_game_info* info, size_t num) { return false; } +bool retro_load_game_special(uint type, const retro_game_info* info, usize num) { return false; } void retro_unload_game() { renderer->setFBO(0); @@ -335,18 +335,18 @@ void retro_run() { void retro_set_controller_port_device(uint port, uint device) {} -size_t retro_serialize_size() { - size_t size = 0; +usize retro_serialize_size() { + usize size = 0; return size; } -bool retro_serialize(void* data, size_t size) { return false; } -bool retro_unserialize(const void* data, size_t size) { return false; } +bool retro_serialize(void* data, usize size) { return false; } +bool retro_unserialize(const void* data, usize size) { return false; } uint retro_get_region() { return RETRO_REGION_NTSC; } uint retro_api_version() { return RETRO_API_VERSION; } -size_t retro_get_memory_size(uint id) { +usize retro_get_memory_size(uint id) { if (id == RETRO_MEMORY_SYSTEM_RAM) { return 0; } From a3886a948fd6d7b54f94f3896e3e7d1ef841b7d3 Mon Sep 17 00:00:00 2001 From: offtkp Date: Tue, 9 Jul 2024 20:51:09 +0300 Subject: [PATCH 047/251] Switch to GL_TEXTURE_2D for lighting LUT --- include/renderer_gl/renderer_gl.hpp | 2 +- src/core/renderer_gl/renderer_gl.cpp | 21 ++++++++++---------- src/host_shaders/opengl_fragment_shader.frag | 8 ++++---- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 92f02662..057f0d3b 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -53,7 +53,7 @@ class RendererGL final : public Renderer { OpenGL::VertexBuffer dummyVBO; OpenGL::Texture screenTexture; - GLuint lightLUTTextureArray; + OpenGL::Texture lightLUTTexture; OpenGL::Framebuffer screenFramebuffer; OpenGL::Texture blankTexture; diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index a11a6ffa..9de9f8d8 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -124,7 +124,10 @@ void RendererGL::initGraphicsContextInternal() { const u32 screenTextureWidth = 400; // Top screen is 400 pixels wide, bottom is 320 const u32 screenTextureHeight = 2 * 240; // Both screens are 240 pixels tall - glGenTextures(1, &lightLUTTextureArray); + lightLUTTexture.create(256, Lights::LUT_Count, GL_R32F); + lightLUTTexture.bind(); + lightLUTTexture.setMinFilter(OpenGL::Linear); + lightLUTTexture.setMagFilter(OpenGL::Linear); auto prevTexture = OpenGL::getTex2D(); @@ -357,26 +360,22 @@ void RendererGL::bindTexturesToSlots() { } glActiveTexture(GL_TEXTURE0 + 3); - glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); + lightLUTTexture.bind(); glActiveTexture(GL_TEXTURE0); } void RendererGL::updateLightingLUT() { gpu.lightingLUTDirty = false; - std::array u16_lightinglut; + std::array lightingLut; for (int i = 0; i < gpu.lightingLUT.size(); i++) { - uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1); - u16_lightinglut[i] = value * 65535 / 4095; + uint64_t value = gpu.lightingLUT[i] & 0xFFF; + lightingLut[i] = (float)(value << 4) / 65535.0f; } glActiveTexture(GL_TEXTURE0 + 3); - glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); - glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data()); - glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + lightLUTTexture.bind(); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, Lights::LUT_Count, GL_RED, GL_FLOAT, lightingLut.data()); glActiveTexture(GL_TEXTURE0); } diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag index f6fa6c55..6b728ace 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -27,7 +27,7 @@ uniform bool u_depthmapEnable; uniform sampler2D u_tex0; uniform sampler2D u_tex1; uniform sampler2D u_tex2; -uniform sampler1DArray u_tex_lighting_lut; +uniform sampler2D u_tex_lighting_lut; uniform uint u_picaRegs[0x200 - 0x48]; @@ -145,9 +145,9 @@ vec4 tevCalculateCombiner(int tev_id) { #define RR_LUT 6u float lutLookup(uint lut, uint light, float value) { - if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1; - if (lut == SP_LUT) lut = light + 8; - return texture(u_tex_lighting_lut, vec2(value, lut)).r; + if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1u; + if (lut == SP_LUT) lut = light + 8u; + return texelFetch(u_tex_lighting_lut, ivec2(int(value * 256.0), lut), 0).r; } vec3 regToColor(uint reg) { From 6f6167a20125f3259c2de97ffccbcd26947785c3 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 9 Jul 2024 20:56:47 +0300 Subject: [PATCH 048/251] Fix LR variable fetch error --- src/libretro_core.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libretro_core.cpp b/src/libretro_core.cpp index 3bf0f95f..f9772b37 100644 --- a/src/libretro_core.cpp +++ b/src/libretro_core.cpp @@ -132,7 +132,7 @@ static std::string FetchVariable(std::string key, std::string def) { var.key = key.c_str(); if (!envCallbacks(RETRO_ENVIRONMENT_GET_VARIABLE, &var) || var.value == nullptr) { - Helpers::warn("Fetching variable %s failed.", key); + Helpers::warn("Fetching variable %s failed.", key.c_str()); return def; } From fe566e960b17471fa7bcbd11f43c1ca22368d25c Mon Sep 17 00:00:00 2001 From: offtkp Date: Tue, 9 Jul 2024 20:57:56 +0300 Subject: [PATCH 049/251] Update GL ES patch to work with latest changes --- .github/gles.patch | 103 +++++++++------------------------------------ 1 file changed, 19 insertions(+), 84 deletions(-) diff --git a/.github/gles.patch b/.github/gles.patch index f1dc2c73..3d6c96fe 100644 --- a/.github/gles.patch +++ b/.github/gles.patch @@ -1,52 +1,3 @@ -diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp -index a11a6ffa..77486a09 100644 ---- a/src/core/renderer_gl/renderer_gl.cpp -+++ b/src/core/renderer_gl/renderer_gl.cpp -@@ -357,27 +357,27 @@ void RendererGL::bindTexturesToSlots() { - } - - glActiveTexture(GL_TEXTURE0 + 3); -- glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); -+ // glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); - glActiveTexture(GL_TEXTURE0); - } - - void RendererGL::updateLightingLUT() { -- gpu.lightingLUTDirty = false; -- std::array u16_lightinglut; -- -- for (int i = 0; i < gpu.lightingLUT.size(); i++) { -- uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1); -- u16_lightinglut[i] = value * 65535 / 4095; -- } -- -- glActiveTexture(GL_TEXTURE0 + 3); -- glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); -- glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data()); -- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR); -- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR); -- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); -- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); -- glActiveTexture(GL_TEXTURE0); -+ // gpu.lightingLUTDirty = false; -+ // std::array u16_lightinglut; -+ -+ // for (int i = 0; i < gpu.lightingLUT.size(); i++) { -+ // uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1); -+ // u16_lightinglut[i] = value * 65535 / 4095; -+ // } -+ -+ // glActiveTexture(GL_TEXTURE0 + 3); -+ // glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); -+ // glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data()); -+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR); -+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR); -+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); -+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); -+ // glActiveTexture(GL_TEXTURE0); - } - - void RendererGL::drawVertices(PICA::PrimType primType, std::span vertices) { diff --git a/src/host_shaders/opengl_display.frag b/src/host_shaders/opengl_display.frag index 612671c8..1937f711 100644 --- a/src/host_shaders/opengl_display.frag @@ -70,7 +21,7 @@ index 990e2f80..2e7842ac 100644 void main() { diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag -index f6fa6c55..bb88e278 100644 +index 6b728ace..eaac1484 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -1,4 +1,5 @@ @@ -80,34 +31,16 @@ index f6fa6c55..bb88e278 100644 in vec3 v_tangent; in vec3 v_normal; -@@ -27,7 +28,7 @@ uniform bool u_depthmapEnable; - uniform sampler2D u_tex0; - uniform sampler2D u_tex1; - uniform sampler2D u_tex2; --uniform sampler1DArray u_tex_lighting_lut; -+// uniform sampler1DArray u_tex_lighting_lut; +@@ -150,11 +151,17 @@ float lutLookup(uint lut, uint light, float value) { + return texelFetch(u_tex_lighting_lut, ivec2(int(value * 256.0), lut), 0).r; + } - uniform uint u_picaRegs[0x200 - 0x48]; - -@@ -145,16 +146,23 @@ vec4 tevCalculateCombiner(int tev_id) { - #define RR_LUT 6u - - float lutLookup(uint lut, uint light, float value) { -- if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1; -- if (lut == SP_LUT) lut = light + 8; -- return texture(u_tex_lighting_lut, vec2(value, lut)).r; -+ // if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1; -+ // if (lut == SP_LUT) lut = light + 8; -+ // return texture(u_tex_lighting_lut, vec2(value, lut)).r; -+ return 0.0; -+} -+ +// some gles versions have bitfieldExtract and complain if you redefine it, some don't and compile error, using this instead +uint bitfieldExtractCompat(uint val, int off, int size) { + uint mask = uint((1 << size) - 1); + return uint(val >> off) & mask; - } - ++} ++ vec3 regToColor(uint reg) { // Normalization scale to convert from [0...255] to [0.0...1.0] const float scale = 1.0 / 255.0; @@ -117,7 +50,7 @@ index f6fa6c55..bb88e278 100644 } // Convert an arbitrary-width floating point literal to an f32 -@@ -189,7 +197,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -189,7 +196,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { vec3 view = normalize(v_view); uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008Fu); @@ -126,7 +59,7 @@ index f6fa6c55..bb88e278 100644 primary_color = secondary_color = vec4(1.0); return; } -@@ -213,7 +221,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -213,7 +220,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { bool error_unimpl = false; for (uint i = 0u; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) { @@ -135,7 +68,7 @@ index f6fa6c55..bb88e278 100644 uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + 0x10u * light_id); uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + 0x10u * light_id); -@@ -224,14 +232,14 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -224,14 +231,14 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { uint GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + 0x10u * light_id); vec3 light_vector = normalize(vec3( @@ -153,7 +86,7 @@ index f6fa6c55..bb88e278 100644 // error_unimpl = true; half_vector = normalize(normalize(light_vector + v_view) + view); } -@@ -242,12 +250,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -242,12 +249,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { } for (int c = 0; c < 7; c++) { @@ -169,7 +102,7 @@ index f6fa6c55..bb88e278 100644 if (input_id == 0u) d[c] = dot(normal, half_vector); else if (input_id == 1u) -@@ -260,9 +268,9 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -260,9 +267,9 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146u + 0x10u * light_id); uint GPUREG_LIGHTi_SPOTDIR_HIGH = readPicaReg(0x0147u + 0x10u * light_id); vec3 spot_light_vector = normalize(vec3( @@ -182,7 +115,7 @@ index f6fa6c55..bb88e278 100644 )); d[c] = dot(-light_vector, spot_light_vector); // -L dot P (aka Spotlight aka SP); } else if (input_id == 5u) { -@@ -273,13 +281,13 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -273,13 +280,13 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { } d[c] = lutLookup(uint(c), light_id, d[c] * 0.5 + 0.5) * scale; @@ -198,7 +131,7 @@ index f6fa6c55..bb88e278 100644 if (lookup_config == 0u) { d[D1_LUT] = 0.0; d[FR_LUT] = 0.0; -@@ -310,7 +318,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -310,7 +317,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { float NdotL = dot(normal, light_vector); // Li dot N // Two sided diffuse @@ -207,7 +140,7 @@ index f6fa6c55..bb88e278 100644 NdotL = max(0.0, NdotL); else NdotL = abs(NdotL); -@@ -321,8 +329,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -321,8 +328,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { secondary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_SPECULAR0) * d[D0_LUT] + regToColor(GPUREG_LIGHTi_SPECULAR1) * d[D1_LUT] * vec3(d[RR_LUT], d[RG_LUT], d[RB_LUT])); } @@ -249,14 +182,16 @@ index a25d7a6d..7cf40398 100644 + // gl_ClipDistance[1] = dot(clipData, a_coords); } diff --git a/third_party/opengl/opengl.hpp b/third_party/opengl/opengl.hpp -index f368f573..5ead7f63 100644 +index 9997e63b..5d9d7804 100644 --- a/third_party/opengl/opengl.hpp +++ b/third_party/opengl/opengl.hpp -@@ -520,21 +520,21 @@ namespace OpenGL { +@@ -561,22 +561,22 @@ namespace OpenGL { + static void disableScissor() { glDisable(GL_SCISSOR_TEST); } static void enableBlend() { glEnable(GL_BLEND); } static void disableBlend() { glDisable(GL_BLEND); } - static void enableLogicOp() { glEnable(GL_COLOR_LOGIC_OP); } +- static void enableLogicOp() { glEnable(GL_COLOR_LOGIC_OP); } - static void disableLogicOp() { glDisable(GL_COLOR_LOGIC_OP); } ++ static void enableLogicOp() { /* glEnable(GL_COLOR_LOGIC_OP); */ } + static void disableLogicOp() { /* glDisable(GL_COLOR_LOGIC_OP); */ } static void enableDepth() { glEnable(GL_DEPTH_TEST); } static void disableDepth() { glDisable(GL_DEPTH_TEST); } From a1ff34d41759f95138c03f49bfc3f77aa05b7fa8 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 9 Jul 2024 20:58:07 +0000 Subject: [PATCH 050/251] Add LR core to CI (#530) * Add LR core to CI * Update Hydra_Build.yml * Update Hydra_Build.yml * Update Hydra_Build.yml * Update Hydra_Build.yml * Update Hydra_Build.yml * Update Hydra_Build.yml * Update Hydra_Build.yml * Update Hydra_Build.yml --- .github/workflows/Hydra_Build.yml | 65 ++++++++++++++++++++++++++----- 1 file changed, 56 insertions(+), 9 deletions(-) diff --git a/.github/workflows/Hydra_Build.yml b/.github/workflows/Hydra_Build.yml index a19974fb..645f2f7a 100644 --- a/.github/workflows/Hydra_Build.yml +++ b/.github/workflows/Hydra_Build.yml @@ -32,12 +32,27 @@ jobs: - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} - - name: Upload core - uses: actions/upload-artifact@v2 + - name: Upload Hydra core + uses: actions/upload-artifact@v4 with: - name: Windows core + name: Windows Hydra core path: '${{github.workspace}}/build/${{ env.BUILD_TYPE }}/Alber.dll' + - name: Configure CMake (Again) + run: | + rm -r -fo ${{github.workspace}}/build + cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_USER_BUILD=ON -DBUILD_LIBRETRO_CORE=ON + + - name: Build (Again) + run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} + + - name: Upload Libretro core + uses: actions/upload-artifact@v4 + with: + name: Windows Libretro core + path: | + ${{github.workspace}}/build/panda3ds_libretro.dll + ${{github.workspace}}/docs/libretro/panda3ds_libretro.info MacOS: runs-on: macos-13 @@ -61,11 +76,27 @@ jobs: run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} - name: Upload core - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: - name: MacOS core + name: MacOS Hydra core path: '${{github.workspace}}/build/libAlber.dylib' + - name: Configure CMake (Again) + run: | + rm -rf ${{github.workspace}}/build + cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_USER_BUILD=ON -DBUILD_LIBRETRO_CORE=ON + + - name: Build (Again) + run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} && ls -R ${{github.workspace}}/build + + - name: Upload Libretro core + uses: actions/upload-artifact@v4 + with: + name: MacOS Libretro core + path: | + ${{github.workspace}}/build/panda3ds_libretro.dylib + ${{github.workspace}}/docs/libretro/panda3ds_libretro.info + Linux: runs-on: ubuntu-latest @@ -98,11 +129,27 @@ jobs: run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} - name: Upload core - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: - name: Linux core + name: Linux Hydra core path: '${{github.workspace}}/build/libAlber.so' + - name: Configure CMake (Again) + run: | + rm -rf ${{github.workspace}}/build + cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang-17 -DCMAKE_CXX_COMPILER=clang++-17 -DENABLE_USER_BUILD=ON -DBUILD_LIBRETRO_CORE=ON + + - name: Build (Again) + run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} + + - name: Upload Libretro core + uses: actions/upload-artifact@v4 + with: + name: Linux Libretro core + path: | + ${{github.workspace}}/build/panda3ds_libretro.so + ${{github.workspace}}/docs/libretro/panda3ds_libretro.info + Android-x64: runs-on: ubuntu-latest @@ -129,7 +176,7 @@ jobs: run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} - name: Upload core - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: - name: Android core + name: Android Hydra core path: '${{github.workspace}}/build/libAlber.so' From 096d0a89ee4d6fc6163d5db103d15a27fa12689d Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Thu, 11 Jul 2024 22:22:33 +0300 Subject: [PATCH 051/251] Fix AES-CTR decryption for non-NCCHKey0 games --- include/loader/ncch.hpp | 2 ++ src/core/loader/ncch.cpp | 22 ++++++++++++++++------ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/include/loader/ncch.hpp b/include/loader/ncch.hpp index 42ce1590..8e35643b 100644 --- a/include/loader/ncch.hpp +++ b/include/loader/ncch.hpp @@ -60,6 +60,8 @@ struct NCCH { CodeSetInfo text, data, rodata; FSInfo partitionInfo; + std::optional primaryKey, secondaryKey; + // Contents of the .code file in the ExeFS std::vector codeFile; // Contains of the cart's save data diff --git a/src/core/loader/ncch.cpp b/src/core/loader/ncch.cpp index 3bf73e5d..a8e50101 100644 --- a/src/core/loader/ncch.cpp +++ b/src/core/loader/ncch.cpp @@ -29,6 +29,9 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn smdh.clear(); partitionInfo = info; + primaryKey = {}; + secondaryKey = {}; + size = u64(*(u32*)&header[0x104]) * mediaUnit; // TODO: Maybe don't type pun because big endian will break exheaderSize = *(u32*)&header[0x180]; @@ -78,11 +81,11 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn if (!primaryResult.first || !secondaryResult.first) { gotCryptoKeys = false; } else { - Crypto::AESKey primaryKey = primaryResult.second; - Crypto::AESKey secondaryKey = secondaryResult.second; + primaryKey = primaryResult.second; + secondaryKey = secondaryResult.second; EncryptionInfo encryptionInfoTmp; - encryptionInfoTmp.normalKey = primaryKey; + encryptionInfoTmp.normalKey = *primaryKey; encryptionInfoTmp.initialCounter.fill(0); for (std::size_t i = 1; i <= sizeof(std::uint64_t) - 1; i++) { @@ -94,7 +97,7 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn encryptionInfoTmp.initialCounter[8] = 2; exeFS.encryptionInfo = encryptionInfoTmp; - encryptionInfoTmp.normalKey = secondaryKey; + encryptionInfoTmp.normalKey = *secondaryKey; encryptionInfoTmp.initialCounter[8] = 3; romFS.encryptionInfo = encryptionInfoTmp; } @@ -201,13 +204,20 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn Helpers::panic("Second code file in a single NCCH partition. What should this do?\n"); } + // All files in ExeFS use the same IV, though .code uses the secondary key for decryption + // whereas .icon/.banner use the primary key. + FSInfo info = exeFS; + if (secondaryKey.has_value() && info.encryptionInfo.has_value()) { + info.encryptionInfo->normalKey = secondaryKey.value(); + } + if (compressCode) { std::vector tmp; tmp.resize(fileSize); // A file offset of 0 means our file is located right after the ExeFS header // So in the ROM, files are located at (file offset + exeFS offset + exeFS header size) - readFromFile(file, exeFS, tmp.data(), fileOffset + exeFSHeaderSize, fileSize); + readFromFile(file, info, tmp.data(), fileOffset + exeFSHeaderSize, fileSize); // Decompress .code file from the tmp vector to the "code" vector if (!CartLZ77::decompress(codeFile, tmp)) { @@ -216,7 +226,7 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn } } else { codeFile.resize(fileSize); - readFromFile(file, exeFS, codeFile.data(), fileOffset + exeFSHeaderSize, fileSize); + readFromFile(file, info, codeFile.data(), fileOffset + exeFSHeaderSize, fileSize); } } else if (std::strcmp(name, "icon") == 0) { // Parse icon file to extract region info and more in the future (logo, etc) From e6084363152edff610951ec81fda0add720a47e1 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Thu, 11 Jul 2024 22:27:05 +0300 Subject: [PATCH 052/251] Sanity check: Assert .code is encrypted before setting normal key --- src/core/loader/ncch.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/loader/ncch.cpp b/src/core/loader/ncch.cpp index a8e50101..47d5a4c2 100644 --- a/src/core/loader/ncch.cpp +++ b/src/core/loader/ncch.cpp @@ -207,8 +207,8 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn // All files in ExeFS use the same IV, though .code uses the secondary key for decryption // whereas .icon/.banner use the primary key. FSInfo info = exeFS; - if (secondaryKey.has_value() && info.encryptionInfo.has_value()) { - info.encryptionInfo->normalKey = secondaryKey.value(); + if (encrypted && secondaryKey.has_value() && info.encryptionInfo.has_value()) { + info.encryptionInfo->normalKey = *secondaryKey; } if (compressCode) { From 276cf9e06f4fd2ef76b97ced83e53c22c914a698 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Fri, 12 Jul 2024 18:23:49 +0300 Subject: [PATCH 053/251] Build LuaJIT/Discord RPC even in LR core --- CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 85a915e2..1a876e58 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -48,8 +48,6 @@ endif() if(BUILD_LIBRETRO_CORE) set(CMAKE_POSITION_INDEPENDENT_CODE ON) - set(ENABLE_DISCORD_RPC OFF) - set(ENABLE_LUAJIT OFF) add_compile_definitions(__LIBRETRO__) endif() From d87477832b82f3543a4766030cb0f706a2dfe6d0 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 14 Jul 2024 15:32:26 +0300 Subject: [PATCH 054/251] Qt: Initial shader editor support --- CMakeLists.txt | 4 +- include/panda_qt/main_window.hpp | 7 +-- include/panda_qt/shader_editor.hpp | 28 ++++++++++ include/renderer.hpp | 8 +++ include/renderer_gl/renderer_gl.hpp | 6 ++- src/core/renderer_gl/renderer_gl.cpp | 5 +- src/host_shaders/opengl_fragment_shader.frag | 20 ++++---- src/panda_qt/main_window.cpp | 22 +++++--- src/panda_qt/shader_editor.cpp | 54 ++++++++++++++++++++ 9 files changed, 130 insertions(+), 24 deletions(-) create mode 100644 include/panda_qt/shader_editor.hpp create mode 100644 src/panda_qt/shader_editor.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 85a915e2..23c591c3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -457,11 +457,11 @@ if(NOT BUILD_HYDRA_CORE AND NOT BUILD_LIBRETRO_CORE) set(FRONTEND_SOURCE_FILES src/panda_qt/main.cpp src/panda_qt/screen.cpp src/panda_qt/main_window.cpp src/panda_qt/about_window.cpp src/panda_qt/config_window.cpp src/panda_qt/zep.cpp src/panda_qt/text_editor.cpp src/panda_qt/cheats_window.cpp src/panda_qt/mappings.cpp - src/panda_qt/patch_window.cpp src/panda_qt/elided_label.cpp + src/panda_qt/patch_window.cpp src/panda_qt/elided_label.cpp src/panda_qt/shader_editor.cpp ) set(FRONTEND_HEADER_FILES include/panda_qt/screen.hpp include/panda_qt/main_window.hpp include/panda_qt/about_window.hpp include/panda_qt/config_window.hpp include/panda_qt/text_editor.hpp include/panda_qt/cheats_window.hpp - include/panda_qt/patch_window.hpp include/panda_qt/elided_label.hpp + include/panda_qt/patch_window.hpp include/panda_qt/elided_label.hpp include/panda_qt/shader_editor.hpp ) source_group("Source Files\\Qt" FILES ${FRONTEND_SOURCE_FILES}) diff --git a/include/panda_qt/main_window.hpp b/include/panda_qt/main_window.hpp index 72725257..831074a2 100644 --- a/include/panda_qt/main_window.hpp +++ b/include/panda_qt/main_window.hpp @@ -19,6 +19,7 @@ #include "panda_qt/config_window.hpp" #include "panda_qt/patch_window.hpp" #include "panda_qt/screen.hpp" +#include "panda_qt/shader_editor.hpp" #include "panda_qt/text_editor.hpp" #include "services/hid.hpp" @@ -48,6 +49,7 @@ class MainWindow : public QMainWindow { EditCheat, PressTouchscreen, ReleaseTouchscreen, + ReloadUbershader, }; // Tagged union representing our message queue messages @@ -99,6 +101,7 @@ class MainWindow : public QMainWindow { CheatsWindow* cheatsEditor; TextEditorWindow* luaEditor; PatchWindow* patchWindow; + ShaderEditorWindow* shaderEditor; // We use SDL's game controller API since it's the sanest API that supports as many controllers as possible SDL_GameController* gameController = nullptr; @@ -110,9 +113,6 @@ class MainWindow : public QMainWindow { void selectROM(); void dumpDspFirmware(); void dumpRomFS(); - void openLuaEditor(); - void openCheatsEditor(); - void openPatchWindow(); void showAboutMenu(); void initControllers(); void pollControllers(); @@ -139,5 +139,6 @@ class MainWindow : public QMainWindow { void mouseReleaseEvent(QMouseEvent* event) override; void loadLuaScript(const std::string& code); + void reloadShader(const std::string& shader); void editCheat(u32 handle, const std::vector& cheat, const std::function& callback); }; diff --git a/include/panda_qt/shader_editor.hpp b/include/panda_qt/shader_editor.hpp new file mode 100644 index 00000000..009381a0 --- /dev/null +++ b/include/panda_qt/shader_editor.hpp @@ -0,0 +1,28 @@ +#pragma once + +#include +#include +#include +#include + +#include "zep.h" +#include "zep/mode_repl.h" +#include "zep/regress.h" + +class ShaderEditorWindow : public QDialog { + Q_OBJECT + + private: + Zep::ZepWidget_Qt zepWidget; + Zep::IZepReplProvider replProvider; + static constexpr float fontSize = 14.0f; + + // Whether this backend supports shader editor + bool shaderEditorSupported = true; + + public: + ShaderEditorWindow(QWidget* parent, const std::string& filename, const std::string& initialText); + void setText(const std::string& text) { zepWidget.GetEditor().GetMRUBuffer()->SetText(text); } + + void setEnable(bool enable); +}; \ No newline at end of file diff --git a/include/renderer.hpp b/include/renderer.hpp index 8888b41e..17812bcf 100644 --- a/include/renderer.hpp +++ b/include/renderer.hpp @@ -1,6 +1,7 @@ #pragma once #include #include +#include #include #include "PICA/pica_vertex.hpp" @@ -66,6 +67,13 @@ class Renderer { // This function does things like write back or cache necessary state before we delete our context virtual void deinitGraphicsContext() = 0; + // Functions for hooking up the renderer core to the frontend's shader editor for editing ubershaders in real time + // SupportsShaderReload: Indicates whether the backend offers ubershader reload support or not + // GetUbershader/SetUbershader: Gets or sets the renderer's current ubershader + virtual bool supportsShaderReload() { return false; } + virtual std::string getUbershader() { return ""; } + virtual void setUbershader(const std::string& shader) {} + // Functions for initializing the graphics context for the Qt frontend, where we don't have the convenience of SDL_Window #ifdef PANDA3DS_FRONTEND_QT virtual void initGraphicsContext(GL::Context* context) { Helpers::panic("Tried to initialize incompatible renderer with GL context"); } diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 92f02662..4c2d9e66 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -82,7 +82,11 @@ class RendererGL final : public Renderer { void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override; void drawVertices(PICA::PrimType primType, std::span vertices) override; // Draw the given vertices void deinitGraphicsContext() override; - + + virtual bool supportsShaderReload() override { return true; } + virtual std::string getUbershader() override; + virtual void setUbershader(const std::string& shader) override; + std::optional getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true); // Note: The caller is responsible for deleting the currently bound FBO before calling this diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index a11a6ffa..3c68b8f9 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -812,4 +812,7 @@ void RendererGL::deinitGraphicsContext() { // All other GL objects should be invalidated automatically and be recreated by the next call to initGraphicsContext // TODO: Make it so that depth and colour buffers get written back to 3DS memory printf("RendererGL::DeinitGraphicsContext called\n"); -} \ No newline at end of file +} + +std::string RendererGL::getUbershader() { return ""; } +void RendererGL::setUbershader(const std::string& shader) {} \ No newline at end of file diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag index f6fa6c55..303a27b6 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -279,26 +279,26 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { } } - uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 4, 4); + uint lookup_config = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 4, 4); if (lookup_config == 0u) { - d[D1_LUT] = 0.0; - d[FR_LUT] = 0.0; + d[D1_LUT] = 1.0; + d[FR_LUT] = 1.0; d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; } else if (lookup_config == 1u) { - d[D0_LUT] = 0.0; - d[D1_LUT] = 0.0; + d[D0_LUT] = 1.0; + d[D1_LUT] = 1.0; d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; } else if (lookup_config == 2u) { - d[FR_LUT] = 0.0; - d[SP_LUT] = 0.0; + d[FR_LUT] = 1.0; + d[SP_LUT] = 1.0; d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; } else if (lookup_config == 3u) { - d[SP_LUT] = 0.0; + d[SP_LUT] = 1.0; d[RG_LUT] = d[RB_LUT] = d[RR_LUT] = 1.0; } else if (lookup_config == 4u) { - d[FR_LUT] = 0.0; + d[FR_LUT] = 1.0; } else if (lookup_config == 5u) { - d[D1_LUT] = 0.0; + d[D1_LUT] = 1.0; } else if (lookup_config == 6u) { d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; } diff --git a/src/panda_qt/main_window.cpp b/src/panda_qt/main_window.cpp index 54e4fabe..d8aab126 100644 --- a/src/panda_qt/main_window.cpp +++ b/src/panda_qt/main_window.cpp @@ -55,12 +55,14 @@ MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent) auto luaEditorAction = toolsMenu->addAction(tr("Open Lua Editor")); auto cheatsEditorAction = toolsMenu->addAction(tr("Open Cheats Editor")); auto patchWindowAction = toolsMenu->addAction(tr("Open Patch Window")); + auto shaderEditorAction = toolsMenu->addAction(tr("Open Shader Editor")); auto dumpDspFirmware = toolsMenu->addAction(tr("Dump loaded DSP firmware")); connect(dumpRomFSAction, &QAction::triggered, this, &MainWindow::dumpRomFS); - connect(luaEditorAction, &QAction::triggered, this, &MainWindow::openLuaEditor); - connect(cheatsEditorAction, &QAction::triggered, this, &MainWindow::openCheatsEditor); - connect(patchWindowAction, &QAction::triggered, this, &MainWindow::openPatchWindow); + connect(luaEditorAction, &QAction::triggered, this, [this]() { luaEditor->show(); }); + connect(shaderEditorAction, &QAction::triggered, this, [this]() { shaderEditor->show(); }); + connect(cheatsEditorAction, &QAction::triggered, this, [this]() { cheatsEditor->show(); }); + connect(patchWindowAction, &QAction::triggered, this, [this]() { patchWindow->show(); }); connect(dumpDspFirmware, &QAction::triggered, this, &MainWindow::dumpDspFirmware); auto aboutAction = aboutMenu->addAction(tr("About Panda3DS")); @@ -75,6 +77,8 @@ MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent) cheatsEditor = new CheatsWindow(emu, {}, this); patchWindow = new PatchWindow(this); luaEditor = new TextEditorWindow(this, "script.lua", ""); + shaderEditor = new ShaderEditorWindow(this, "shader.glsl", ""); + shaderEditor->setEnable(emu->getRenderer()->supportsShaderReload()); auto args = QCoreApplication::arguments(); if (args.size() > 1) { @@ -294,10 +298,6 @@ void MainWindow::showAboutMenu() { about.exec(); } -void MainWindow::openLuaEditor() { luaEditor->show(); } -void MainWindow::openCheatsEditor() { cheatsEditor->show(); } -void MainWindow::openPatchWindow() { patchWindow->show(); } - void MainWindow::dispatchMessage(const EmulatorMessage& message) { switch (message.type) { case MessageType::LoadROM: @@ -453,6 +453,14 @@ void MainWindow::loadLuaScript(const std::string& code) { sendMessage(message); } +void MainWindow::reloadShader(const std::string& shader) { + EmulatorMessage message{.type = MessageType::ReloadUbershader}; + + // Make a copy of the code on the heap to send via the message queue + message.string.str = new std::string(shader); + sendMessage(message); +} + void MainWindow::editCheat(u32 handle, const std::vector& cheat, const std::function& callback) { EmulatorMessage message{.type = MessageType::EditCheat}; diff --git a/src/panda_qt/shader_editor.cpp b/src/panda_qt/shader_editor.cpp new file mode 100644 index 00000000..8a23c854 --- /dev/null +++ b/src/panda_qt/shader_editor.cpp @@ -0,0 +1,54 @@ +#include +#include + +#include "panda_qt/main_window.hpp" +#include "panda_qt/shader_editor.hpp" + +using namespace Zep; + +ShaderEditorWindow::ShaderEditorWindow(QWidget* parent, const std::string& filename, const std::string& initialText) + : QDialog(parent), zepWidget(this, qApp->applicationDirPath().toStdString(), fontSize) { + resize(600, 600); + + // Register our extensions + ZepRegressExCommand::Register(zepWidget.GetEditor()); + ZepReplExCommand::Register(zepWidget.GetEditor(), &replProvider); + + // Default to standard mode instead of vim mode, initialize text box + zepWidget.GetEditor().InitWithText(filename, initialText); + zepWidget.GetEditor().SetGlobalMode(Zep::ZepMode_Standard::StaticName()); + + // Layout for widgets + QVBoxLayout* mainLayout = new QVBoxLayout(); + setLayout(mainLayout); + + QPushButton* button = new QPushButton(tr("Reload shader"), this); + button->setFixedSize(100, 20); + + // When the Load Script button is pressed, send the current text to the MainWindow, which will upload it to the emulator's lua object + connect(button, &QPushButton::pressed, this, [this]() { + if (parentWidget()) { + auto buffer = zepWidget.GetEditor().GetMRUBuffer(); + const std::string text = buffer->GetBufferText(buffer->Begin(), buffer->End()); + + static_cast(parentWidget())->reloadShader(text); + } else { + // This should be unreachable, only here for safety purposes + printf("Text editor does not have any parent widget, click doesn't work :(\n"); + } + }); + + mainLayout->addWidget(button); + mainLayout->addWidget(&zepWidget); +} + +void ShaderEditorWindow::setEnable(bool enable) { + shaderEditorSupported = enable; + + if (enable) { + setDisabled(false); + } else { + setDisabled(true); + setText("Shader editor window is not available for this renderer backend"); + } +} From 186fd3b94b48cc70514553b94e115e27901e2925 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 14 Jul 2024 15:49:35 +0300 Subject: [PATCH 055/251] Qt: Shader editor now works --- include/panda_qt/shader_editor.hpp | 7 +-- include/renderer_gl/renderer_gl.hpp | 1 + src/core/renderer_gl/renderer_gl.cpp | 59 +++++++++++++------- src/host_shaders/opengl_fragment_shader.frag | 20 +++---- src/panda_qt/main_window.cpp | 9 +++ src/panda_qt/shader_editor.cpp | 2 +- 6 files changed, 63 insertions(+), 35 deletions(-) diff --git a/include/panda_qt/shader_editor.hpp b/include/panda_qt/shader_editor.hpp index 009381a0..86bc1149 100644 --- a/include/panda_qt/shader_editor.hpp +++ b/include/panda_qt/shader_editor.hpp @@ -17,12 +17,11 @@ class ShaderEditorWindow : public QDialog { Zep::IZepReplProvider replProvider; static constexpr float fontSize = 14.0f; - // Whether this backend supports shader editor - bool shaderEditorSupported = true; - public: + // Whether this backend supports shader editor + bool supported = true; + ShaderEditorWindow(QWidget* parent, const std::string& filename, const std::string& initialText); void setText(const std::string& text) { zepWidget.GetEditor().GetMRUBuffer()->SetText(text); } - void setEnable(bool enable); }; \ No newline at end of file diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 4c2d9e66..c947583e 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -92,6 +92,7 @@ class RendererGL final : public Renderer { // Note: The caller is responsible for deleting the currently bound FBO before calling this void setFBO(uint handle) { screenFramebuffer.m_handle = handle; } void resetStateManager() { gl.reset(); } + void initUbershader(OpenGL::Program& program); #ifdef PANDA3DS_FRONTEND_QT virtual void initGraphicsContext([[maybe_unused]] GL::Context* context) override { initGraphicsContextInternal(); } diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 3c68b8f9..cfa32319 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -57,24 +57,7 @@ void RendererGL::initGraphicsContextInternal() { OpenGL::Shader vert({vertexShaderSource.begin(), vertexShaderSource.size()}, OpenGL::Vertex); OpenGL::Shader frag({fragmentShaderSource.begin(), fragmentShaderSource.size()}, OpenGL::Fragment); triangleProgram.create({vert, frag}); - gl.useProgram(triangleProgram); - - textureEnvSourceLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvSource"); - textureEnvOperandLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvOperand"); - textureEnvCombinerLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvCombiner"); - textureEnvColorLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvColor"); - textureEnvScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvScale"); - - depthScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_depthScale"); - depthOffsetLoc = OpenGL::uniformLocation(triangleProgram, "u_depthOffset"); - depthmapEnableLoc = OpenGL::uniformLocation(triangleProgram, "u_depthmapEnable"); - picaRegLoc = OpenGL::uniformLocation(triangleProgram, "u_picaRegs"); - - // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 - glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex0"), 0); - glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex1"), 1); - glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex2"), 2); - glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex_lighting_lut"), 3); + initUbershader(triangleProgram); auto displayVertexShaderSource = gl_resources.open("opengl_display.vert"); auto displayFragmentShaderSource = gl_resources.open("opengl_display.frag"); @@ -814,5 +797,41 @@ void RendererGL::deinitGraphicsContext() { printf("RendererGL::DeinitGraphicsContext called\n"); } -std::string RendererGL::getUbershader() { return ""; } -void RendererGL::setUbershader(const std::string& shader) {} \ No newline at end of file +std::string RendererGL::getUbershader() { + auto gl_resources = cmrc::RendererGL::get_filesystem(); + auto fragmentShader = gl_resources.open("opengl_fragment_shader.frag"); + + return std::string(fragmentShader.begin(), fragmentShader.end()); +} + +void RendererGL::setUbershader(const std::string& shader) { + auto gl_resources = cmrc::RendererGL::get_filesystem(); + auto vertexShaderSource = gl_resources.open("opengl_vertex_shader.vert"); + + OpenGL::Shader vert({vertexShaderSource.begin(), vertexShaderSource.size()}, OpenGL::Vertex); + OpenGL::Shader frag(shader, OpenGL::Fragment); + triangleProgram.create({vert, frag}); + + initUbershader(triangleProgram); +} + +void RendererGL::initUbershader(OpenGL::Program& program) { + gl.useProgram(program); + + textureEnvSourceLoc = OpenGL::uniformLocation(program, "u_textureEnvSource"); + textureEnvOperandLoc = OpenGL::uniformLocation(program, "u_textureEnvOperand"); + textureEnvCombinerLoc = OpenGL::uniformLocation(program, "u_textureEnvCombiner"); + textureEnvColorLoc = OpenGL::uniformLocation(program, "u_textureEnvColor"); + textureEnvScaleLoc = OpenGL::uniformLocation(program, "u_textureEnvScale"); + + depthScaleLoc = OpenGL::uniformLocation(program, "u_depthScale"); + depthOffsetLoc = OpenGL::uniformLocation(program, "u_depthOffset"); + depthmapEnableLoc = OpenGL::uniformLocation(program, "u_depthmapEnable"); + picaRegLoc = OpenGL::uniformLocation(program, "u_picaRegs"); + + // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 + glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0); + glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1); + glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2); + glUniform1i(OpenGL::uniformLocation(program, "u_tex_lighting_lut"), 3); +} \ No newline at end of file diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag index 303a27b6..f6fa6c55 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -279,26 +279,26 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { } } - uint lookup_config = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 4, 4); + uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 4, 4); if (lookup_config == 0u) { - d[D1_LUT] = 1.0; - d[FR_LUT] = 1.0; + d[D1_LUT] = 0.0; + d[FR_LUT] = 0.0; d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; } else if (lookup_config == 1u) { - d[D0_LUT] = 1.0; - d[D1_LUT] = 1.0; + d[D0_LUT] = 0.0; + d[D1_LUT] = 0.0; d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; } else if (lookup_config == 2u) { - d[FR_LUT] = 1.0; - d[SP_LUT] = 1.0; + d[FR_LUT] = 0.0; + d[SP_LUT] = 0.0; d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; } else if (lookup_config == 3u) { - d[SP_LUT] = 1.0; + d[SP_LUT] = 0.0; d[RG_LUT] = d[RB_LUT] = d[RR_LUT] = 1.0; } else if (lookup_config == 4u) { - d[FR_LUT] = 1.0; + d[FR_LUT] = 0.0; } else if (lookup_config == 5u) { - d[D1_LUT] = 1.0; + d[D1_LUT] = 0.0; } else if (lookup_config == 6u) { d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; } diff --git a/src/panda_qt/main_window.cpp b/src/panda_qt/main_window.cpp index d8aab126..cfa45e85 100644 --- a/src/panda_qt/main_window.cpp +++ b/src/panda_qt/main_window.cpp @@ -78,7 +78,11 @@ MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent) patchWindow = new PatchWindow(this); luaEditor = new TextEditorWindow(this, "script.lua", ""); shaderEditor = new ShaderEditorWindow(this, "shader.glsl", ""); + shaderEditor->setEnable(emu->getRenderer()->supportsShaderReload()); + if (shaderEditor->supported) { + shaderEditor->setText(emu->getRenderer()->getUbershader()); + } auto args = QCoreApplication::arguments(); if (args.size() > 1) { @@ -351,6 +355,11 @@ void MainWindow::dispatchMessage(const EmulatorMessage& message) { emu->getServiceManager().getHID().setTouchScreenPress(message.touchscreen.x, message.touchscreen.y); break; case MessageType::ReleaseTouchscreen: emu->getServiceManager().getHID().releaseTouchScreen(); break; + + case MessageType::ReloadUbershader: + emu->getRenderer()->setUbershader(*message.string.str); + delete message.string.str; + break; } } diff --git a/src/panda_qt/shader_editor.cpp b/src/panda_qt/shader_editor.cpp index 8a23c854..122d841f 100644 --- a/src/panda_qt/shader_editor.cpp +++ b/src/panda_qt/shader_editor.cpp @@ -43,7 +43,7 @@ ShaderEditorWindow::ShaderEditorWindow(QWidget* parent, const std::string& filen } void ShaderEditorWindow::setEnable(bool enable) { - shaderEditorSupported = enable; + supported = enable; if (enable) { setDisabled(false); From c4e45ee6b8749750cf398dfe3a7c82f958fc910a Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 14 Jul 2024 18:20:59 +0300 Subject: [PATCH 056/251] Renderer GL: Fix hotswapping shaders --- src/core/renderer_gl/renderer_gl.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index cfa32319..2d29e682 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -813,6 +813,10 @@ void RendererGL::setUbershader(const std::string& shader) { triangleProgram.create({vert, frag}); initUbershader(triangleProgram); + + glUniform1f(depthScaleLoc, oldDepthScale); + glUniform1f(depthOffsetLoc, oldDepthOffset); + glUniform1i(depthmapEnableLoc, oldDepthmapEnable); } void RendererGL::initUbershader(OpenGL::Program& program) { @@ -834,4 +838,4 @@ void RendererGL::initUbershader(OpenGL::Program& program) { glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1); glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2); glUniform1i(OpenGL::uniformLocation(program, "u_tex_lighting_lut"), 3); -} \ No newline at end of file +} From bee414a4f81bc4dd3019754a808667d371787bd3 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 14 Jul 2024 23:05:49 +0300 Subject: [PATCH 057/251] Downgrade SetFileSize failure to warning --- src/core/kernel/file_operations.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/core/kernel/file_operations.cpp b/src/core/kernel/file_operations.cpp index 972190fa..2b2020d1 100644 --- a/src/core/kernel/file_operations.cpp +++ b/src/core/kernel/file_operations.cpp @@ -184,7 +184,8 @@ void Kernel::setFileSize(u32 messagePointer, Handle fileHandle) { if (success) { mem.write32(messagePointer + 4, Result::Success); } else { - Helpers::panic("FileOp::SetFileSize failed"); + Helpers::warn("FileOp::SetFileSize failed"); + mem.write32(messagePointer + 4, Result::FailurePlaceholder); } } else { Helpers::panic("Tried to set file size of file without file descriptor"); From b384cb8ad9601197ea4162d200c9fcdf2a6fbfa9 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 15 Jul 2024 00:13:22 +0300 Subject: [PATCH 058/251] Fix build --- src/core/renderer_gl/renderer_gl.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 706d52ac..0b26f004 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -872,24 +872,24 @@ void RendererGL::setUbershader(const std::string& shader) { initUbershader(triangleProgram); - glUniform1f(depthScaleLoc, oldDepthScale); - glUniform1f(depthOffsetLoc, oldDepthOffset); - glUniform1i(depthmapEnableLoc, oldDepthmapEnable); + glUniform1f(ubershaderData.depthScaleLoc, oldDepthScale); + glUniform1f(ubershaderData.depthOffsetLoc, oldDepthOffset); + glUniform1i(ubershaderData.depthmapEnableLoc, oldDepthmapEnable); } void RendererGL::initUbershader(OpenGL::Program& program) { gl.useProgram(program); - textureEnvSourceLoc = OpenGL::uniformLocation(program, "u_textureEnvSource"); - textureEnvOperandLoc = OpenGL::uniformLocation(program, "u_textureEnvOperand"); - textureEnvCombinerLoc = OpenGL::uniformLocation(program, "u_textureEnvCombiner"); - textureEnvColorLoc = OpenGL::uniformLocation(program, "u_textureEnvColor"); - textureEnvScaleLoc = OpenGL::uniformLocation(program, "u_textureEnvScale"); + ubershaderData.textureEnvSourceLoc = OpenGL::uniformLocation(program, "u_textureEnvSource"); + ubershaderData.textureEnvOperandLoc = OpenGL::uniformLocation(program, "u_textureEnvOperand"); + ubershaderData.textureEnvCombinerLoc = OpenGL::uniformLocation(program, "u_textureEnvCombiner"); + ubershaderData.textureEnvColorLoc = OpenGL::uniformLocation(program, "u_textureEnvColor"); + ubershaderData.textureEnvScaleLoc = OpenGL::uniformLocation(program, "u_textureEnvScale"); - depthScaleLoc = OpenGL::uniformLocation(program, "u_depthScale"); - depthOffsetLoc = OpenGL::uniformLocation(program, "u_depthOffset"); - depthmapEnableLoc = OpenGL::uniformLocation(program, "u_depthmapEnable"); - picaRegLoc = OpenGL::uniformLocation(program, "u_picaRegs"); + ubershaderData.depthScaleLoc = OpenGL::uniformLocation(program, "u_depthScale"); + ubershaderData.depthOffsetLoc = OpenGL::uniformLocation(program, "u_depthOffset"); + ubershaderData.depthmapEnableLoc = OpenGL::uniformLocation(program, "u_depthmapEnable"); + ubershaderData.picaRegLoc = OpenGL::uniformLocation(program, "u_picaRegs"); // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0); From ea59933b187732ec4dd2dffc52f9c3ab00c970d9 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 15 Jul 2024 00:46:15 +0300 Subject: [PATCH 059/251] Simplify alpha test code --- src/core/PICA/shader_gen_glsl.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 50be94f0..0e51ad93 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -358,13 +358,13 @@ void FragmentGenerator::getAlphaOperation(std::string& shader, TexEnvConfig::Ope void FragmentGenerator::applyAlphaTest(std::string& shader, const PICARegs& regs) { const u32 alphaConfig = regs[InternalRegs::AlphaTestConfig]; + const auto function = static_cast(Helpers::getBits<4, 3>(alphaConfig)); + // Alpha test disabled - if (Helpers::getBit<0>(alphaConfig) == 0) { + if (Helpers::getBit<0>(alphaConfig) == 0 || function == CompareFunction::Always) { return; } - const auto function = static_cast(Helpers::getBits<4, 3>(alphaConfig)); - shader += "if ("; switch (function) { case CompareFunction::Never: shader += "true"; break; From 133082c2322a97e3e96b6e1eb906ca01951f80b8 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 15 Jul 2024 02:12:04 +0300 Subject: [PATCH 060/251] x64 shader rec: Add support for PICA non-IEEE multiplication --- .../PICA/dynapica/shader_rec_emitter_x64.hpp | 5 + .../PICA/dynapica/shader_rec_emitter_x64.cpp | 160 +++++++++++++++--- 2 files changed, 142 insertions(+), 23 deletions(-) diff --git a/include/PICA/dynapica/shader_rec_emitter_x64.hpp b/include/PICA/dynapica/shader_rec_emitter_x64.hpp index 0338911c..1052d6a0 100644 --- a/include/PICA/dynapica/shader_rec_emitter_x64.hpp +++ b/include/PICA/dynapica/shader_rec_emitter_x64.hpp @@ -32,6 +32,8 @@ class ShaderEmitter : public Xbyak::CodeGenerator { Label negateVector; // Vector value of (1.0, 1.0, 1.0, 1.0) for SLT(i)/SGE(i) Label onesVector; + // Vector value of (0xFF, 0xFF, 0xFF, 0) for setting the w component to 0 in DP3 + Label dp3Vector; u32 recompilerPC = 0; // PC the recompiler is currently recompiling @ u32 loopLevel = 0; // The current loop nesting level (0 = not in a loop) @@ -49,6 +51,9 @@ class ShaderEmitter : public Xbyak::CodeGenerator { Xbyak::Label emitExp2Func(); Xbyak::util::Cpu cpuCaps; + // Emit a PICA200-compliant multiplication that handles "0 * inf = 0" + void emitSafeMUL(Xbyak::Xmm src1, Xbyak::Xmm src2, Xbyak::Xmm scratch); + // Compile all instructions from [current recompiler PC, end) void compileUntil(const PICAShader& shaderUnit, u32 endPC); // Compile instruction "instr" diff --git a/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp b/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp index c134b72f..e7bafe9f 100644 --- a/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp +++ b/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp @@ -12,6 +12,9 @@ using namespace Xbyak; using namespace Xbyak::util; using namespace Helpers; +// TODO: Expose safe/unsafe optimizations to the user +constexpr bool useSafeMUL = false; + // The shader recompiler uses quite an odd internal ABI // We make use of the fact that in regular conditions, we should pretty much never be calling C++ code from recompiled shader code // This allows us to establish an ABI that's optimized for this sort of workflow, statically allocating volatile host registers @@ -45,6 +48,16 @@ void ShaderEmitter::compile(const PICAShader& shaderUnit) { L(onesVector); dd(0x3f800000); dd(0x3f800000); dd(0x3f800000); dd(0x3f800000); // 1.0 4 times + if (useSafeMUL) { + // When doing safe mul, we need a vector to set only the w component to 0 for DP3 + L(dp3Vector); + + dd(0xFFFFFFFF); + dd(0xFFFFFFFF); + dd(0xFFFFFFFF); + dd(0); + } + // Emit prologue first align(16); prologueCb = getCurr(); @@ -523,24 +536,60 @@ void ShaderEmitter::recDP3(const PICAShader& shader, u32 instruction) { const u32 idx = getBits<19, 2>(instruction); const u32 dest = getBits<21, 5>(instruction); - // TODO: Safe multiplication equivalent (Multiplication is not IEEE compliant on the PICA) loadRegister<1>(src1_xmm, shader, src1, idx, operandDescriptor); loadRegister<2>(src2_xmm, shader, src2, 0, operandDescriptor); - dpps(src1_xmm, src2_xmm, 0b01111111); // 3-lane dot product between the 2 registers, store the result in all lanes of scratch1 similarly to PICA + + if (!useSafeMUL) { + dpps(src1_xmm, src2_xmm, 0b01111111); + } else { + const u32 writeMask = operandDescriptor & 0xf; + + // Set w component to 0 and do a DP4 + andps(src1_xmm, xword[rip + dp3Vector]); + + // Set src1 to src1 * src2, then get the dot product by doing 2 horizontal adds + emitSafeMUL(src1_xmm, src2_xmm, scratch1); + haddps(src1_xmm, src1_xmm); + haddps(src1_xmm, src1_xmm); + + // If we only write back the x component to the result, we needn't perform a shuffle to do res = res.xxxx + // Otherwise we do + if (writeMask != 0x8) { // Copy bottom lane to all lanes if we're not simply writing back x + shufps(src1_xmm, src1_xmm, 0); // src1_xmm = src1_xmm.xxxx + } + } + storeRegister(src1_xmm, shader, dest, operandDescriptor); } void ShaderEmitter::recDP4(const PICAShader& shader, u32 instruction) { const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f]; const u32 src1 = getBits<12, 7>(instruction); - const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment + const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment const u32 idx = getBits<19, 2>(instruction); const u32 dest = getBits<21, 5>(instruction); - // TODO: Safe multiplication equivalent (Multiplication is not IEEE compliant on the PICA) loadRegister<1>(src1_xmm, shader, src1, idx, operandDescriptor); loadRegister<2>(src2_xmm, shader, src2, 0, operandDescriptor); - dpps(src1_xmm, src2_xmm, 0b11111111); // 4-lane dot product between the 2 registers, store the result in all lanes of scratch1 similarly to PICA + + if (!useSafeMUL) { + // 4-lane dot product between the 2 registers, store the result in all lanes of scratch1 similarly to PICA + dpps(src1_xmm, src2_xmm, 0b11111111); + } else { + const u32 writeMask = operandDescriptor & 0xf; + + // Set src1 to src1 * src2, then get the dot product by doing 2 horizontal adds + emitSafeMUL(src1_xmm, src2_xmm, scratch1); + haddps(src1_xmm, src1_xmm); + haddps(src1_xmm, src1_xmm); + + // If we only write back the x component to the result, we needn't perform a shuffle to do res = res.xxxx + // Otherwise we do + if (writeMask != 0x8) { // Copy bottom lane to all lanes if we're not simply writing back x + shufps(src1_xmm, src1_xmm, 0); // src1_xmm = src1_xmm.xxxx + } + } + storeRegister(src1_xmm, shader, dest, operandDescriptor); } @@ -553,7 +602,6 @@ void ShaderEmitter::recDPH(const PICAShader& shader, u32 instruction) { const u32 idx = getBits<19, 2>(instruction); const u32 dest = getBits<21, 5>(instruction); - // TODO: Safe multiplication equivalent (Multiplication is not IEEE compliant on the PICA) loadRegister<1>(src1_xmm, shader, src1, isDPHI ? 0 : idx, operandDescriptor); loadRegister<2>(src2_xmm, shader, src2, isDPHI ? idx : 0, operandDescriptor); @@ -566,7 +614,25 @@ void ShaderEmitter::recDPH(const PICAShader& shader, u32 instruction) { unpcklpd(src1_xmm, scratch1); } - dpps(src1_xmm, src2_xmm, 0b11111111); // 4-lane dot product between the 2 registers, store the result in all lanes of scratch1 similarly to PICA + // Now perform a DP4 + if (!useSafeMUL) { + // 4-lane dot product between the 2 registers, store the result in all lanes of scratch1 similarly to PICA + dpps(src1_xmm, src2_xmm, 0b11111111); + } else { + const u32 writeMask = operandDescriptor & 0xf; + + // Set src1 to src1 * src2, then get the dot product by doing 2 horizontal adds + emitSafeMUL(src1_xmm, src2_xmm, scratch1); + haddps(src1_xmm, src1_xmm); + haddps(src1_xmm, src1_xmm); + + // If we only write back the x component to the result, we needn't perform a shuffle to do res = res.xxxx + // Otherwise we do + if (writeMask != 0x8) { // Copy bottom lane to all lanes if we're not simply writing back x + shufps(src1_xmm, src1_xmm, 0); // src1_xmm = src1_xmm.xxxx + } + } + storeRegister(src1_xmm, shader, dest, operandDescriptor); } @@ -603,10 +669,15 @@ void ShaderEmitter::recMUL(const PICAShader& shader, u32 instruction) { const u32 idx = getBits<19, 2>(instruction); const u32 dest = getBits<21, 5>(instruction); - // TODO: Safe multiplication equivalent (Multiplication is not IEEE compliant on the PICA) loadRegister<1>(src1_xmm, shader, src1, idx, operandDescriptor); loadRegister<2>(src2_xmm, shader, src2, 0, operandDescriptor); - mulps(src1_xmm, src2_xmm); + + if (!useSafeMUL) { + mulps(src1_xmm, src2_xmm); + } else { + emitSafeMUL(src1_xmm, src2_xmm, scratch1); + } + storeRegister(src1_xmm, shader, dest, operandDescriptor); } @@ -662,23 +733,31 @@ void ShaderEmitter::recMAD(const PICAShader& shader, u32 instruction) { loadRegister<2>(src2_xmm, shader, src2, isMADI ? 0 : idx, operandDescriptor); loadRegister<3>(src3_xmm, shader, src3, isMADI ? idx : 0, operandDescriptor); - // TODO: Implement safe PICA mul // If we have FMA3, optimize MAD to use FMA - if (haveFMA3) { - vfmadd213ps(src1_xmm, src2_xmm, src3_xmm); - storeRegister(src1_xmm, shader, dest, operandDescriptor); - } - - // If we don't have FMA3, do a multiplication and addition - else { - // Multiply src1 * src2 - if (haveAVX) { - vmulps(scratch1, src1_xmm, src2_xmm); - } else { - movaps(scratch1, src1_xmm); - mulps(scratch1, src2_xmm); + if (!useSafeMUL) { + if (haveFMA3) { + vfmadd213ps(src1_xmm, src2_xmm, src3_xmm); + storeRegister(src1_xmm, shader, dest, operandDescriptor); } + // If we don't have FMA3, do a multiplication and addition + else { + // Multiply src1 * src2 + if (haveAVX) { + vmulps(scratch1, src1_xmm, src2_xmm); + } else { + movaps(scratch1, src1_xmm); + mulps(scratch1, src2_xmm); + } + + // Add src3 + addps(scratch1, src3_xmm); + storeRegister(scratch1, shader, dest, operandDescriptor); + } + } else { + movaps(scratch1, src1_xmm); + emitSafeMUL(scratch1, src2_xmm, src1_xmm); + // Add src3 addps(scratch1, src3_xmm); storeRegister(scratch1, shader, dest, operandDescriptor); @@ -1115,6 +1194,41 @@ Xbyak::Label ShaderEmitter::emitLog2Func() { return subroutine; } +void ShaderEmitter::emitSafeMUL(Xmm src1, Xmm src2, Xmm scratch) { + // 0 * inf and inf * 0 in the PICA should return 0 instead of NaN + // This can be done by checking for NaNs before and after a multiplication + // To do this we can create a mask of which components of src1/src2 are NOT NaN using cmpordsps (cmpps with imm = 7) + // Then we multiply src1 and src2 and reate a mask of which components of the result ARE NaN using cmpunordps + // If the NaNs didn't exist (ie they were created by 0 * inf) before then we set them to 0 by XORing the 2 masks and ANDing the multiplication + // result with the xor result + // Based on Citra implementation, particularly the AVX-512 version + + if (cpuCaps.has(Cpu::tAVX512F | Cpu::tAVX512VL)) { + const Xbyak::Opmask zeroMask = k1; + + vmulps(scratch, src1, src2); + // Mask of any NaN values found in the result + vcmpunordps(zeroMask, scratch, scratch); + // Mask of any non-NaN inputs producing NaN results + vcmpordps(zeroMask | zeroMask, src1, src2); + + knotb(zeroMask, zeroMask); + vmovaps(src1 | zeroMask | T_z, scratch); + } else { + if (haveAVX) { + vcmpordps(scratch, src1, src2); + } else { + movaps(scratch, src1); + cmpordps(scratch, src2); + } + + mulps(src1, src2); + cmpunordps(src2, src1); + xorps(src2, scratch); + andps(src1, src2); + } +} + Xbyak::Label ShaderEmitter::emitExp2Func() { Xbyak::Label subroutine; From c8eb1c1128581d7409464e98c2a672a394737da9 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 15 Jul 2024 04:10:47 +0300 Subject: [PATCH 061/251] Shader recompiler: Add UBO --- CMakeLists.txt | 1 + include/PICA/pica_frag_config.hpp | 4 +- include/PICA/pica_frag_uniforms.hpp | 18 +++++++++ include/renderer_gl/renderer_gl.hpp | 7 +++- src/core/PICA/shader_gen_glsl.cpp | 37 ++++++++++++----- src/core/renderer_gl/renderer_gl.cpp | 60 ++++++++++++++++++++++++---- 6 files changed, 105 insertions(+), 22 deletions(-) create mode 100644 include/PICA/pica_frag_uniforms.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 194200f0..c52ccd51 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -249,6 +249,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/audio/dsp_core.hpp include/audio/null_core.hpp include/audio/teakra_core.hpp include/audio/miniaudio_device.hpp include/ring_buffer.hpp include/bitfield.hpp include/audio/dsp_shared_mem.hpp include/audio/hle_core.hpp include/capstone.hpp include/audio/aac.hpp include/PICA/pica_frag_config.hpp + include/PICA/pica_frag_uniforms.hpp ) cmrc_add_resource_library( diff --git a/include/PICA/pica_frag_config.hpp b/include/PICA/pica_frag_config.hpp index c4d46b11..8352cba2 100644 --- a/include/PICA/pica_frag_config.hpp +++ b/include/PICA/pica_frag_config.hpp @@ -23,13 +23,11 @@ namespace PICA { u32 texUnitConfig; u32 texEnvUpdateBuffer; - // TODO: This should probably be a uniform - u32 texEnvBufferColor; - // There's 6 TEV stages, and each one is configured via 5 word-sized registers std::array tevConfigs; }; + // Config used for identifying unique fragment pipeline configurations struct FragmentConfig { OutputConfig outConfig; TextureConfig texConfig; diff --git a/include/PICA/pica_frag_uniforms.hpp b/include/PICA/pica_frag_uniforms.hpp new file mode 100644 index 00000000..b151ed42 --- /dev/null +++ b/include/PICA/pica_frag_uniforms.hpp @@ -0,0 +1,18 @@ +#pragma once +#include +#include + +#include "helpers.hpp" + +namespace PICA { + struct FragmentUniforms { + using vec3 = std::array; + using vec4 = std::array; + static constexpr usize tevStageCount = 6; + + s32 alphaReference; + + alignas(16) vec4 constantColors[tevStageCount]; + alignas(16) vec4 tevBufferColor; + }; +} // namespace PICA \ No newline at end of file diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index b4cf9c6f..a028bdd3 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -67,7 +67,12 @@ class RendererGL final : public Renderer { OpenGL::Framebuffer screenFramebuffer; OpenGL::Texture blankTexture; - std::unordered_map shaderCache; + // Cached recompiled fragment shader + struct CachedProgram { + OpenGL::Program program; + uint uboBinding; + }; + std::unordered_map shaderCache; OpenGL::Framebuffer getColourFBO(); OpenGL::Texture getTexture(Texture& tex); diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 0e51ad93..50e9c3de 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -38,8 +38,6 @@ std::string FragmentGenerator::getVertexShader(const PICARegs& regs) { out vec2 v_texcoord1; out vec3 v_view; out vec2 v_texcoord2; - flat out vec4 v_textureEnvColor[6]; - flat out vec4 v_textureEnvBufferColor; //out float gl_ClipDistance[2]; @@ -103,8 +101,6 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { in vec2 v_texcoord1; in vec3 v_view; in vec2 v_texcoord2; - flat in vec4 v_textureEnvColor[6]; - flat in vec4 v_textureEnvBufferColor; out vec4 fragColor; uniform sampler2D u_tex0; @@ -115,18 +111,21 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { uniform sampler1DArray u_tex_lighting_lut; #endif - vec4 tevSources[16]; - vec4 tevNextPreviousBuffer; + layout(std140) uniform FragmentUniforms { + int alphaReference; + + vec4 constantColors[6]; + vec4 tevBufferColor; + }; )"; // Emit main function for fragment shader // When not initialized, source 13 is set to vec4(0.0) and 15 is set to the vertex colour ret += R"( void main() { - tevSources[0] = v_colour; - tevSources[13] = vec4(0.0); // Previous buffer colour - tevSources[15] = v_colour; // Previous combiner vec4 combinerOutput = v_colour; // Last TEV output + vec4 previousBuffer = vec4(0.0); // Previous buffer + vec4 tevNextPreviousBuffer = tevBufferColor; )"; ret += R"( @@ -148,7 +147,7 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { ret += "fragColor = combinerOutput;\n"; ret += "}"; // End of main function - ret += "\n\n\n\n\n\n\n\n\n\n\n\n\n"; + ret += "\n\n\n\n\n\n\n\n\n\n"; return ret; } @@ -201,6 +200,22 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg shader += "combinerOutput = vec4(clamp(outputColor" + std::to_string(stage) + " * " + std::to_string(tev.getColorScale()) + ".0, vec3(0.0), vec3(1.0)), clamp(outputAlpha" + std::to_string(stage) + " * " + std::to_string(tev.getAlphaScale()) + ".0, 0.0, 1.0));\n"; + + shader += "previousBuffer = tevNextPreviousBuffer;\n"; + + // Update the "next previous buffer" if necessary + const u32 textureEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer]; + if (stage < 4) { + // Check whether to update rgb + if ((textureEnvUpdateBuffer & (0x100 << stage))) { + shader += "tevNextPreviousBuffer.rgb = combinerOutput.rgb;\n"; + } + + // And whether to update alpha + if ((textureEnvUpdateBuffer & (0x1000u << stage))) { + shader += "tevNextPreviousBuffer.a = combinerOutput.a;\n"; + } + } } } @@ -308,6 +323,8 @@ void FragmentGenerator::getSource(std::string& shader, TexEnvConfig::Source sour } case TexEnvConfig::Source::Previous: shader += "combinerOutput"; break; + case TexEnvConfig::Source::Constant: shader += "constantColors[" + std::to_string(index) + "]"; break; + case TexEnvConfig::Source::PreviousBuffer: shader += "previousBuffer"; break; default: Helpers::warn("Unimplemented TEV source: %d", static_cast(source)); diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 0b26f004..aa3bb61b 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -5,6 +5,7 @@ #include #include "PICA/float_types.hpp" +#include "PICA/pica_frag_uniforms.hpp" #include "PICA/gpu.hpp" #include "PICA/regs.hpp" #include "math_util.hpp" @@ -413,7 +414,7 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v const float depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32(); const bool depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1; - // Update depth uniforms + // Update ubershader uniforms if (usingUbershader) { if (oldDepthScale != depthScale) { oldDepthScale = depthScale; @@ -429,17 +430,15 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v oldDepthmapEnable = depthMapEnable; glUniform1i(ubershaderData.depthmapEnableLoc, depthMapEnable); } - } - setupTextureEnvState(); - bindTexturesToSlots(); - - if (usingUbershader) { // Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48) // The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates glUniform1uiv(ubershaderData.picaRegLoc, 0x200 - 0x48, ®s[0x48]); } + setupTextureEnvState(); + bindTexturesToSlots(); + if (gpu.lightingLUTDirty) { updateLightingLUT(); } @@ -778,6 +777,8 @@ std::optional RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt } OpenGL::Program& RendererGL::getSpecializedShader() { + constexpr uint uboBlockBinding = 2; + PICA::FragmentConfig fsConfig; auto& outConfig = fsConfig.outConfig; auto& texConfig = fsConfig.texConfig; @@ -788,7 +789,6 @@ OpenGL::Program& RendererGL::getSpecializedShader() { texConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg]; texConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer]; - texConfig.texEnvBufferColor = 0; // Set up TEV stages std::memcpy(&texConfig.tevConfigs[0 * 5], ®s[InternalRegs::TexEnv0Source], 5 * sizeof(u32)); @@ -798,7 +798,9 @@ OpenGL::Program& RendererGL::getSpecializedShader() { std::memcpy(&texConfig.tevConfigs[4 * 5], ®s[InternalRegs::TexEnv4Source], 5 * sizeof(u32)); std::memcpy(&texConfig.tevConfigs[5 * 5], ®s[InternalRegs::TexEnv5Source], 5 * sizeof(u32)); - OpenGL::Program& program = shaderCache[fsConfig]; + CachedProgram& programEntry = shaderCache[fsConfig]; + OpenGL::Program& program = programEntry.program; + if (!program.exists()) { std::string vs = fragShaderGen.getVertexShader(regs); std::string fs = fragShaderGen.generate(regs); @@ -814,8 +816,50 @@ OpenGL::Program& RendererGL::getSpecializedShader() { glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1); glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2); glUniform1i(OpenGL::uniformLocation(program, "u_tex_lighting_lut"), 3); + + // Allocate memory for the program UBO + glGenBuffers(1, &programEntry.uboBinding); + glBindBuffer(GL_UNIFORM_BUFFER, programEntry.uboBinding); + glBufferData(GL_UNIFORM_BUFFER, sizeof(PICA::FragmentUniforms), nullptr, GL_DYNAMIC_DRAW); + + // Set up the binding for our UBO. Sadly we can't specify it in the shader like normal people, + // As it's an OpenGL 4.2 feature that MacOS doesn't support... + uint uboIndex = glGetUniformBlockIndex(program.handle(), "FragmentUniforms"); + glUniformBlockBinding(program.handle(), uboIndex, uboBlockBinding); + glBindBufferBase(GL_UNIFORM_BUFFER, uboBlockBinding, programEntry.uboBinding); } + // Upload uniform data to our shader's UBO + PICA::FragmentUniforms uniforms; + uniforms.alphaReference = Helpers::getBits<8, 8>(regs[InternalRegs::AlphaTestConfig]); + + // Set up the texenv buffer color + const u32 texEnvBufferColor = regs[InternalRegs::TexEnvBufferColor]; + uniforms.tevBufferColor[0] = float(texEnvBufferColor & 0xFF) / 255.0f; + uniforms.tevBufferColor[1] = float((texEnvBufferColor >> 8) & 0xFF) / 255.0f; + uniforms.tevBufferColor[2] = float((texEnvBufferColor >> 16) & 0xFF) / 255.0f; + uniforms.tevBufferColor[3] = float((texEnvBufferColor >> 24) & 0xFF) / 255.0f; + + // Set up the constant color for the 6 TEV stages + for (int i = 0; i < 6; i++) { + static constexpr std::array ioBases = { + PICA::InternalRegs::TexEnv0Source, PICA::InternalRegs::TexEnv1Source, PICA::InternalRegs::TexEnv2Source, + PICA::InternalRegs::TexEnv3Source, PICA::InternalRegs::TexEnv4Source, PICA::InternalRegs::TexEnv5Source, + }; + + auto& vec = uniforms.constantColors[i]; + u32 base = ioBases[i]; + u32 color = regs[base + 3]; + + vec[0] = float(color & 0xFF) / 255.0f; + vec[1] = float((color >> 8) & 0xFF) / 255.0f; + vec[2] = float((color >> 16) & 0xFF) / 255.0f; + vec[3] = float((color >> 24) & 0xFF) / 255.0f; + } + + glBindBuffer(GL_UNIFORM_BUFFER, programEntry.uboBinding); + glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(PICA::FragmentUniforms), &uniforms); + return program; } From 0878474e01aa6d982575ff63394d375e4fa1b13b Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 15 Jul 2024 04:30:04 +0300 Subject: [PATCH 062/251] Shader recompiler: Add depth mapping --- include/PICA/pica_frag_config.hpp | 1 + include/PICA/pica_frag_uniforms.hpp | 2 ++ src/core/PICA/shader_gen_glsl.cpp | 15 +++++++++++++++ src/core/renderer_gl/renderer_gl.cpp | 13 +++++++++---- 4 files changed, 27 insertions(+), 4 deletions(-) diff --git a/include/PICA/pica_frag_config.hpp b/include/PICA/pica_frag_config.hpp index 8352cba2..59f13757 100644 --- a/include/PICA/pica_frag_config.hpp +++ b/include/PICA/pica_frag_config.hpp @@ -16,6 +16,7 @@ namespace PICA { // Merge the enable + compare function into 1 field to avoid duplicate shaders // enable == off means a CompareFunction of Always BitField<0, 3, CompareFunction> alphaTestFunction; + BitField<4, 1, u32> depthMapEnable; }; }; diff --git a/include/PICA/pica_frag_uniforms.hpp b/include/PICA/pica_frag_uniforms.hpp index b151ed42..616f1882 100644 --- a/include/PICA/pica_frag_uniforms.hpp +++ b/include/PICA/pica_frag_uniforms.hpp @@ -11,6 +11,8 @@ namespace PICA { static constexpr usize tevStageCount = 6; s32 alphaReference; + float depthScale; + float depthOffset; alignas(16) vec4 constantColors[tevStageCount]; alignas(16) vec4 tevBufferColor; diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 50e9c3de..f19c699d 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -113,6 +113,8 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { layout(std140) uniform FragmentUniforms { int alphaReference; + float depthScale; + float depthOffset; vec4 constantColors[6]; vec4 tevBufferColor; @@ -138,6 +140,19 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { float alphaOp3 = 0.0; )"; + ret += R"( + // Get original depth value by converting from [near, far] = [0, 1] to [-1, 1] + // We do this by converting to [0, 2] first and subtracting 1 to go to [-1, 1] + float z_over_w = gl_FragCoord.z * 2.0f - 1.0f; + float depth = z_over_w * depthScale + depthOffset; + )"; + + if ((regs[InternalRegs::DepthmapEnable] & 1) == 0) { + ret += "depth /= gl_FragCoord.w;\n"; + } + + ret += "gl_FragDepth = depth;\n"; + textureConfig = regs[InternalRegs::TexUnitCfg]; for (int i = 0; i < 6; i++) { compileTEV(ret, i, regs); diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index aa3bb61b..9c60ac5f 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -410,12 +410,12 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v static constexpr std::array depthModes = {GL_NEVER, GL_ALWAYS, GL_EQUAL, GL_NOTEQUAL, GL_LESS, GL_LEQUAL, GL_GREATER, GL_GEQUAL}; - const float depthScale = f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32(); - const float depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32(); - const bool depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1; - // Update ubershader uniforms if (usingUbershader) { + const float depthScale = f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32(); + const float depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32(); + const bool depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1; + if (oldDepthScale != depthScale) { oldDepthScale = depthScale; glUniform1f(ubershaderData.depthScaleLoc, depthScale); @@ -785,7 +785,9 @@ OpenGL::Program& RendererGL::getSpecializedShader() { auto alphaTestConfig = regs[InternalRegs::AlphaTestConfig]; auto alphaTestFunction = Helpers::getBits<4, 3>(alphaTestConfig); + outConfig.alphaTestFunction = (alphaTestConfig & 1) ? static_cast(alphaTestFunction) : PICA::CompareFunction::Always; + outConfig.depthMapEnable = regs[InternalRegs::DepthmapEnable] & 1; texConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg]; texConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer]; @@ -840,6 +842,9 @@ OpenGL::Program& RendererGL::getSpecializedShader() { uniforms.tevBufferColor[2] = float((texEnvBufferColor >> 16) & 0xFF) / 255.0f; uniforms.tevBufferColor[3] = float((texEnvBufferColor >> 24) & 0xFF) / 255.0f; + uniforms.depthScale = f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32(); + uniforms.depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32(); + // Set up the constant color for the 6 TEV stages for (int i = 0; i < 6; i++) { static constexpr std::array ioBases = { From fe53214c863cf5fb5219319c9b2a8335ebf2f9b1 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 15 Jul 2024 04:51:08 +0300 Subject: [PATCH 063/251] Shader recompiler: Finish alpha test and stub lighting --- src/core/PICA/shader_gen_glsl.cpp | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index f19c699d..11030848 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -340,6 +340,10 @@ void FragmentGenerator::getSource(std::string& shader, TexEnvConfig::Source sour case TexEnvConfig::Source::Previous: shader += "combinerOutput"; break; case TexEnvConfig::Source::Constant: shader += "constantColors[" + std::to_string(index) + "]"; break; case TexEnvConfig::Source::PreviousBuffer: shader += "previousBuffer"; break; + + // Lighting + case TexEnvConfig::Source::PrimaryFragmentColor: + case TexEnvConfig::Source::SecondaryFragmentColor: shader += "vec4(0.0, 0.0, 0.0, 1.0)"; break; default: Helpers::warn("Unimplemented TEV source: %d", static_cast(source)); @@ -397,15 +401,23 @@ void FragmentGenerator::applyAlphaTest(std::string& shader, const PICARegs& regs return; } - shader += "if ("; + shader += "float alphaReferenceFloat = float(alphaReference) / 255.0;\n"; + shader += "if (!("; switch (function) { - case CompareFunction::Never: shader += "true"; break; - case CompareFunction::Always: shader += "false"; break; + case CompareFunction::Never: shader += "false"; break; + case CompareFunction::Always: shader += "true"; break; + case CompareFunction::Equal: shader += "combinerOutput.a == alphaReferenceFloat"; break; + case CompareFunction::NotEqual: shader += "combinerOutput.a != alphaReferenceFloat"; break; + case CompareFunction::Less: shader += "combinerOutput.a < alphaReferenceFloat"; break; + case CompareFunction::LessOrEqual: shader += "combinerOutput.a <= alphaReferenceFloat"; break; + case CompareFunction::Greater: shader += "combinerOutput.a > alphaReferenceFloat"; break; + case CompareFunction::GreaterOrEqual: shader += "combinerOutput.a >= alphaReferenceFloat"; break; + default: Helpers::warn("Unimplemented alpha test function"); shader += "false"; break; } - shader += ") { discard; }\n"; + shader += ")) { discard; }\n"; } From 11c927932978ea157ed3b7e851908f918733b036 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 15 Jul 2024 14:29:49 +0300 Subject: [PATCH 064/251] Properly flush shader cache --- src/core/PICA/shader_gen_glsl.cpp | 6 ++-- src/core/renderer_gl/renderer_gl.cpp | 10 ++++++ third_party/opengl/opengl.hpp | 51 ++++++++++++++++------------ 3 files changed, 42 insertions(+), 25 deletions(-) diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 11030848..6e682354 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -140,9 +140,9 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { float alphaOp3 = 0.0; )"; + // Get original depth value by converting from [near, far] = [0, 1] to [-1, 1] + // We do this by converting to [0, 2] first and subtracting 1 to go to [-1, 1] ret += R"( - // Get original depth value by converting from [near, far] = [0, 1] to [-1, 1] - // We do this by converting to [0, 2] first and subtracting 1 to go to [-1, 1] float z_over_w = gl_FragCoord.z * 2.0f - 1.0f; float depth = z_over_w * depthScale + depthOffset; )"; @@ -343,7 +343,7 @@ void FragmentGenerator::getSource(std::string& shader, TexEnvConfig::Source sour // Lighting case TexEnvConfig::Source::PrimaryFragmentColor: - case TexEnvConfig::Source::SecondaryFragmentColor: shader += "vec4(0.0, 0.0, 0.0, 1.0)"; break; + case TexEnvConfig::Source::SecondaryFragmentColor: shader += "vec4(1.0, 1.0, 1.0, 1.0)"; break; default: Helpers::warn("Unimplemented TEV source: %d", static_cast(source)); diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 9c60ac5f..d0e2bb31 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -23,6 +23,11 @@ void RendererGL::reset() { colourBufferCache.reset(); textureCache.reset(); + for (auto& shader : shaderCache) { + shader.second.program.free(); + } + shaderCache.clear(); + // Init the colour/depth buffer settings to some random defaults on reset colourBufferLoc = 0; colourBufferFormat = PICA::ColorFmt::RGBA8; @@ -899,6 +904,11 @@ void RendererGL::deinitGraphicsContext() { depthBufferCache.reset(); colourBufferCache.reset(); + for (auto& shader : shaderCache) { + shader.second.program.free(); + } + shaderCache.clear(); + // All other GL objects should be invalidated automatically and be recreated by the next call to initGraphicsContext // TODO: Make it so that depth and colour buffers get written back to 3DS memory printf("RendererGL::DeinitGraphicsContext called\n"); diff --git a/third_party/opengl/opengl.hpp b/third_party/opengl/opengl.hpp index 9997e63b..828fb784 100644 --- a/third_party/opengl/opengl.hpp +++ b/third_party/opengl/opengl.hpp @@ -397,34 +397,41 @@ namespace OpenGL { }; struct Program { - GLuint m_handle = 0; + GLuint m_handle = 0; - bool create(std::initializer_list> shaders) { - m_handle = glCreateProgram(); - for (const auto& shader : shaders) { - glAttachShader(m_handle, shader.get().handle()); - } + bool create(std::initializer_list> shaders) { + m_handle = glCreateProgram(); + for (const auto& shader : shaders) { + glAttachShader(m_handle, shader.get().handle()); + } - glLinkProgram(m_handle); - GLint success; - glGetProgramiv(m_handle, GL_LINK_STATUS, &success); + glLinkProgram(m_handle); + GLint success; + glGetProgramiv(m_handle, GL_LINK_STATUS, &success); - if (!success) { - char buf[4096]; - glGetProgramInfoLog(m_handle, 4096, nullptr, buf); - fprintf(stderr, "Failed to link program\nError: %s\n", buf); - glDeleteProgram(m_handle); + if (!success) { + char buf[4096]; + glGetProgramInfoLog(m_handle, 4096, nullptr, buf); + fprintf(stderr, "Failed to link program\nError: %s\n", buf); + glDeleteProgram(m_handle); - m_handle = 0; - } + m_handle = 0; + } - return m_handle != 0; - } + return m_handle != 0; + } - GLuint handle() const { return m_handle; } - bool exists() const { return m_handle != 0; } - void use() const { glUseProgram(m_handle); } - }; + GLuint handle() const { return m_handle; } + bool exists() const { return m_handle != 0; } + void use() const { glUseProgram(m_handle); } + + void free() { + if (exists()) { + glDeleteProgram(m_handle); + m_handle = 0; + } + } + }; static void dispatchCompute(GLuint groupsX = 1, GLuint groupsY = 1, GLuint groupsZ = 1) { glDispatchCompute(groupsX, groupsY, groupsZ); From c535ae43eed9898c065140cdc2bb00a9ad31ca32 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 15 Jul 2024 14:34:59 +0300 Subject: [PATCH 065/251] Shader recompiler: Fix dot3 RGBA --- src/core/PICA/shader_gen_glsl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 6e682354..56cdd936 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -194,7 +194,7 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg if (tev.colorOp == TexEnvConfig::Operation::Dot3RGBA) { // Dot3 RGBA also writes to the alpha component so we don't need to do anything more - shader += "float outputAlpha" + std::to_string(stage) + " = colorOutput" + std::to_string(stage) + ".x;\n"; + shader += "float outputAlpha" + std::to_string(stage) + " = outputColor" + std::to_string(stage) + ".x;\n"; } else { // Get alpha operands shader += "alphaOp1 = "; From 2cd50e7f376e2b7c0594dd382bedd271d1253bc8 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 15 Jul 2024 15:11:23 +0300 Subject: [PATCH 066/251] Clean up ubershader code --- include/renderer_gl/renderer_gl.hpp | 2 +- src/core/renderer_gl/renderer_gl.cpp | 9 ++------- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index a028bdd3..55a730ec 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -84,7 +84,7 @@ class RendererGL final : public Renderer { void setupBlending(); void setupStencilTest(bool stencilEnable); void bindDepthBuffer(); - void setupTextureEnvState(); + void setupUbershaderTexEnv(); void bindTexturesToSlots(); void updateLightingLUT(); void initGraphicsContextInternal(); diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index d0e2bb31..207bfbe4 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -282,12 +282,8 @@ void RendererGL::setupStencilTest(bool stencilEnable) { glStencilOp(stencilOps[stencilFailOp], stencilOps[depthFailOp], stencilOps[passOp]); } -void RendererGL::setupTextureEnvState() { +void RendererGL::setupUbershaderTexEnv() { // TODO: Only update uniforms when the TEV config changed. Use an UBO potentially. - if (!usingUbershader) { - return; - } - static constexpr std::array ioBases = { PICA::InternalRegs::TexEnv0Source, PICA::InternalRegs::TexEnv1Source, PICA::InternalRegs::TexEnv2Source, PICA::InternalRegs::TexEnv3Source, PICA::InternalRegs::TexEnv4Source, PICA::InternalRegs::TexEnv5Source, @@ -439,9 +435,9 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v // Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48) // The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates glUniform1uiv(ubershaderData.picaRegLoc, 0x200 - 0x48, ®s[0x48]); + setupUbershaderTexEnv(); } - setupTextureEnvState(); bindTexturesToSlots(); if (gpu.lightingLUTDirty) { @@ -811,7 +807,6 @@ OpenGL::Program& RendererGL::getSpecializedShader() { if (!program.exists()) { std::string vs = fragShaderGen.getVertexShader(regs); std::string fs = fragShaderGen.generate(regs); - std::cout << vs << "\n\n" << fs << "\n"; OpenGL::Shader vertShader({vs.c_str(), vs.size()}, OpenGL::Vertex); OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment); From a2649ffb76879408a174ac817bf875463a8bbcc3 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 15 Jul 2024 15:38:15 +0300 Subject: [PATCH 067/251] Simplify TEV code --- src/core/PICA/shader_gen_glsl.cpp | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 56cdd936..80dbf1ef 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -57,7 +57,6 @@ std::string FragmentGenerator::getVertexShader(const PICARegs& regs) { vec4 colourAbs = abs(a_vertexColour); v_colour = min(colourAbs, vec4(1.f)); - // Flip y axis of UVs because OpenGL uses an inverted y for texture sampling compared to the PICA v_texcoord0 = vec3(a_texcoord0.x, 1.0 - a_texcoord0.y, a_texcoord0_w); v_texcoord1 = vec2(a_texcoord1.x, 1.0 - a_texcoord1.y); v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y); @@ -125,8 +124,8 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { // When not initialized, source 13 is set to vec4(0.0) and 15 is set to the vertex colour ret += R"( void main() { - vec4 combinerOutput = v_colour; // Last TEV output - vec4 previousBuffer = vec4(0.0); // Previous buffer + vec4 combinerOutput = v_colour; + vec4 previousBuffer = vec4(0.0); vec4 tevNextPreviousBuffer = tevBufferColor; )"; @@ -162,7 +161,7 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { ret += "fragColor = combinerOutput;\n"; ret += "}"; // End of main function - ret += "\n\n\n\n\n\n\n\n\n\n"; + ret += "\n\n\n\n\n\n\n"; return ret; } @@ -188,9 +187,9 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg shader += ";\ncolorOp3 = "; getColorOperand(shader, tev.colorSource3, tev.colorOperand3, stage); - shader += ";\nvec3 outputColor" + std::to_string(stage) + " = "; + shader += ";\nvec3 outputColor" + std::to_string(stage) + " = clamp("; getColorOperation(shader, tev.colorOp); - shader += ";\n"; + shader += ", vec3(0.0), vec3(1.0));\n"; if (tev.colorOp == TexEnvConfig::Operation::Dot3RGBA) { // Dot3 RGBA also writes to the alpha component so we don't need to do anything more @@ -206,10 +205,10 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg shader += ";\nalphaOp3 = "; getAlphaOperand(shader, tev.alphaSource3, tev.alphaOperand3, stage); - shader += ";\nfloat outputAlpha" + std::to_string(stage) + " = "; + shader += ";\nfloat outputAlpha" + std::to_string(stage) + " = clamp("; getAlphaOperation(shader, tev.alphaOp); // Clamp the alpha value to [0.0, 1.0] - shader += ";\nclamp(outputAlpha" + std::to_string(stage) + ", 0.0, 1.0);\n"; + shader += ", 0.0, 1.0);\n"; } shader += "combinerOutput = vec4(clamp(outputColor" + std::to_string(stage) + " * " + std::to_string(tev.getColorScale()) + @@ -356,15 +355,15 @@ void FragmentGenerator::getColorOperation(std::string& shader, TexEnvConfig::Ope switch (op) { case TexEnvConfig::Operation::Replace: shader += "colorOp1"; break; case TexEnvConfig::Operation::Add: shader += "colorOp1 + colorOp2"; break; - case TexEnvConfig::Operation::AddSigned: shader += "clamp(colorOp1 + colorOp2 - 0.5, 0.0, 1.0);"; break; + case TexEnvConfig::Operation::AddSigned: shader += "colorOp1 + colorOp2 - vec3(0.5)"; break; case TexEnvConfig::Operation::Subtract: shader += "colorOp1 - colorOp2"; break; case TexEnvConfig::Operation::Modulate: shader += "colorOp1 * colorOp2"; break; - case TexEnvConfig::Operation::Lerp: shader += "colorOp1 * colorOp3 + colorOp2 * (vec3(1.0) - colorOp3)"; break; + case TexEnvConfig::Operation::Lerp: shader += "mix(colorOp2, colorOp1, colorOp3)"; break; - case TexEnvConfig::Operation::AddMultiply: shader += "min(colorOp1 + colorOp2, vec3(1.0)) * colorOp3"; break; - case TexEnvConfig::Operation::MultiplyAdd: shader += "colorOp1 * colorOp2 + colorOp3"; break; + case TexEnvConfig::Operation::AddMultiply: shader += "min(colorOp1 + colorOp2), vec3(1.0)) * colorOp3"; break; + case TexEnvConfig::Operation::MultiplyAdd: shader += "fma(colorOp1, colorOp2, colorOp3)"; break; case TexEnvConfig::Operation::Dot3RGB: - case TexEnvConfig::Operation::Dot3RGBA: shader += "vec3(4.0 * dot(colorOp1 - 0.5, colorOp2 - 0.5))"; break; + case TexEnvConfig::Operation::Dot3RGBA: shader += "vec3(4.0 * dot(colorOp1 - vec3(0.5), colorOp2 - vec3(0.5)))"; break; default: Helpers::warn("FragmentGenerator: Unimplemented color op"); shader += "vec3(1.0)"; @@ -376,13 +375,13 @@ void FragmentGenerator::getAlphaOperation(std::string& shader, TexEnvConfig::Ope switch (op) { case TexEnvConfig::Operation::Replace: shader += "alphaOp1"; break; case TexEnvConfig::Operation::Add: shader += "alphaOp1 + alphaOp2"; break; - case TexEnvConfig::Operation::AddSigned: shader += "clamp(alphaOp1 + alphaOp2 - 0.5, 0.0, 1.0);"; break; + case TexEnvConfig::Operation::AddSigned: shader += "alphaOp1 + alphaOp2 - 0.5"; break; case TexEnvConfig::Operation::Subtract: shader += "alphaOp1 - alphaOp2"; break; case TexEnvConfig::Operation::Modulate: shader += "alphaOp1 * alphaOp2"; break; - case TexEnvConfig::Operation::Lerp: shader += "alphaOp1 * alphaOp3 + alphaOp2 * (1.0 - alphaOp3)"; break; + case TexEnvConfig::Operation::Lerp: shader += "mix(alphaOp2, alphaOp1, alphaOp3)"; break; case TexEnvConfig::Operation::AddMultiply: shader += "min(alphaOp1 + alphaOp2, 1.0) * alphaOp3"; break; - case TexEnvConfig::Operation::MultiplyAdd: shader += "alphaOp1 * alphaOp2 + alphaOp3"; break; + case TexEnvConfig::Operation::MultiplyAdd: shader += "fma(alphaOp1, alphaOp2, alphaOp3)"; break; case TexEnvConfig::Operation::Dot3RGB: case TexEnvConfig::Operation::Dot3RGBA: shader += "vec3(4.0 * dot(alphaOp1 - 0.5, alphaOp2 - 0.5))"; break; default: From b8a186d5cd9a2a2db2f61c7ce38355cc22eb28ba Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 15 Jul 2024 16:34:33 +0300 Subject: [PATCH 068/251] Shadergen: Fix add-multiply --- src/core/PICA/shader_gen_glsl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 80dbf1ef..86594023 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -360,7 +360,7 @@ void FragmentGenerator::getColorOperation(std::string& shader, TexEnvConfig::Ope case TexEnvConfig::Operation::Modulate: shader += "colorOp1 * colorOp2"; break; case TexEnvConfig::Operation::Lerp: shader += "mix(colorOp2, colorOp1, colorOp3)"; break; - case TexEnvConfig::Operation::AddMultiply: shader += "min(colorOp1 + colorOp2), vec3(1.0)) * colorOp3"; break; + case TexEnvConfig::Operation::AddMultiply: shader += "min(colorOp1 + colorOp2, vec3(1.0)) * colorOp3"; break; case TexEnvConfig::Operation::MultiplyAdd: shader += "fma(colorOp1, colorOp2, colorOp3)"; break; case TexEnvConfig::Operation::Dot3RGB: case TexEnvConfig::Operation::Dot3RGBA: shader += "vec3(4.0 * dot(colorOp1 - vec3(0.5), colorOp2 - vec3(0.5)))"; break; From db801312134cb9654e6afd932de1784af13d0081 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 15 Jul 2024 18:27:22 +0300 Subject: [PATCH 069/251] Shadergen: Previous buffer should be able to be set even for passthrough TEV stages --- src/core/PICA/shader_gen_glsl.cpp | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 86594023..556c0794 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -214,21 +214,21 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg shader += "combinerOutput = vec4(clamp(outputColor" + std::to_string(stage) + " * " + std::to_string(tev.getColorScale()) + ".0, vec3(0.0), vec3(1.0)), clamp(outputAlpha" + std::to_string(stage) + " * " + std::to_string(tev.getAlphaScale()) + ".0, 0.0, 1.0));\n"; + } - shader += "previousBuffer = tevNextPreviousBuffer;\n"; + shader += "previousBuffer = tevNextPreviousBuffer;\n\n"; - // Update the "next previous buffer" if necessary - const u32 textureEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer]; - if (stage < 4) { - // Check whether to update rgb - if ((textureEnvUpdateBuffer & (0x100 << stage))) { - shader += "tevNextPreviousBuffer.rgb = combinerOutput.rgb;\n"; - } + // Update the "next previous buffer" if necessary + const u32 textureEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer]; + if (stage < 4) { + // Check whether to update rgb + if ((textureEnvUpdateBuffer & (0x100 << stage))) { + shader += "tevNextPreviousBuffer.rgb = combinerOutput.rgb;\n"; + } - // And whether to update alpha - if ((textureEnvUpdateBuffer & (0x1000u << stage))) { - shader += "tevNextPreviousBuffer.a = combinerOutput.a;\n"; - } + // And whether to update alpha + if ((textureEnvUpdateBuffer & (0x1000u << stage))) { + shader += "tevNextPreviousBuffer.a = combinerOutput.a;\n"; } } } @@ -382,8 +382,6 @@ void FragmentGenerator::getAlphaOperation(std::string& shader, TexEnvConfig::Ope case TexEnvConfig::Operation::AddMultiply: shader += "min(alphaOp1 + alphaOp2, 1.0) * alphaOp3"; break; case TexEnvConfig::Operation::MultiplyAdd: shader += "fma(alphaOp1, alphaOp2, alphaOp3)"; break; - case TexEnvConfig::Operation::Dot3RGB: - case TexEnvConfig::Operation::Dot3RGBA: shader += "vec3(4.0 * dot(alphaOp1 - 0.5, alphaOp2 - 0.5))"; break; default: Helpers::warn("FragmentGenerator: Unimplemented alpha op"); shader += "1.0"; From 9b4e5841e7154563a4dda0153c87a46250f46543 Mon Sep 17 00:00:00 2001 From: offtkp Date: Sun, 14 Jul 2024 00:56:55 +0300 Subject: [PATCH 070/251] Summary of the current state of lighting fragment_light.elf: works toon_shading.elf: works Cave story 3d: no longer too dark, but the intro has a bug Rabbids: positional lighting fixes, looks better Mario 3d land: ground is not too bright, mario is not yellow Kirby triple deluxe: Kirby is not shining like before Luigis mansion: better but luigi lighting is way off and spotlight sometimes turns off Captain Toad: bit better, still too bright Omega ruby: looks fine to me Pokemon Super Mystery Dungeon: looks fine to me Lego batman: didn't try but should work? --- src/host_shaders/opengl_fragment_shader.frag | 347 +++++++++++++------ 1 file changed, 244 insertions(+), 103 deletions(-) diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag index 6b728ace..1b8e9751 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -38,6 +38,21 @@ vec4 tevSources[16]; vec4 tevNextPreviousBuffer; bool tevUnimplementedSourceFlag = false; +// Holds the enabled state of the lighting samples for various PICA configurations +// As explained in https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTING_CONFIG0 +const bool samplerEnabled[9 * 7] = bool[9 * 7]( + // D0 D1 SP FR RB RG RR + true, false, true, false, false, false, true, // Configuration 0: D0, SP, RR + false, false, true, true, false, false, true, // Configuration 1: FR, SP, RR + true, true, false, false, false, false, true, // Configuration 2: D0, D1, RR + true, true, false, true, false, false, false, // Configuration 3: D0, D1, FR + true, true, true, false, true, true, true, // Configuration 4: All except for FR + true, false, true, true, true, true, true, // Configuration 5: All except for D1 + true, true, true, true, false, false, true, // Configuration 6: All except for RB and RG + false, false, false, false, false, false, false, // Configuration 7: Unused + true, true, true, true, true, true, true // Configuration 8: All +); + // OpenGL ES 1.1 reference pages for TEVs (this is what the PICA200 implements): // https://registry.khronos.org/OpenGL-Refpages/es1.1/xhtml/glTexEnv.xml @@ -144,10 +159,16 @@ vec4 tevCalculateCombiner(int tev_id) { #define RG_LUT 5u #define RR_LUT 6u -float lutLookup(uint lut, uint light, float value) { - if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1u; - if (lut == SP_LUT) lut = light + 8u; - return texelFetch(u_tex_lighting_lut, ivec2(int(value * 256.0), lut), 0).r; +uint GPUREG_LIGHTi_CONFIG; +uint GPUREG_LIGHTING_CONFIG1; +uint GPUREG_LIGHTING_LUTINPUT_SELECT; +uint GPUREG_LIGHTING_LUTINPUT_SCALE; +uint GPUREG_LIGHTING_LUTINPUT_ABS; +bool error_unimpl; +vec4 unimpl_color; + +float lutLookup(uint lut, int index) { + return texelFetch(u_tex_lighting_lut, ivec2(index, lut), 0).r; } vec3 regToColor(uint reg) { @@ -178,42 +199,155 @@ float decodeFP(uint hex, uint E, uint M) { return uintBitsToFloat(hex); } +bool isSamplerEnabled(uint environment_id, uint lut_id) { + return samplerEnabled[7 * environment_id + lut_id]; +} + +float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light_vector, vec3 half_vector) { + uint lut_index; + // lut_id is one of these values + // 0 D0 + // 1 D1 + // 2 SP + // 3 FR + // 4 RB + // 5 RG + // 6 RR + + // lut_index on the other hand represents the actual index of the LUT in the texture + // u_tex_lighting_lut has 24 LUTs and they are used like so: + // 0 D0 + // 1 D1 + // 2 is missing because SP uses LUTs 8-15 + // 3 FR + // 4 RB + // 5 RG + // 6 RR + // 8-15 SP0-7 + // 16-23 DA0-7, but this is not handled in this function as the lookup is a bit different + + int bit_in_config1; + if (lut_id == SP_LUT) { + // These are the spotlight attenuation LUTs + bit_in_config1 = 8 + int(light_id & 7u); + lut_index = 8u + light_id; + } else if (lut_id <= 6) { + bit_in_config1 = 16 + int(lut_id); + lut_index = lut_id; + } else { + error_unimpl = true; + } + + // The light environment configuration controls which LUTs are available for use + // If a LUT is not available in the selected configuration, its value will always read a constant 1.0 regardless of the enable state in GPUREG_LIGHTING_CONFIG1 + // If RR is enabled but not RG or RB, the output of RR is used for the three components; Red, Green and Blue. + bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment + + if (!current_sampler_enabled || (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) { + return 1.0; + } + + uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) * 4, 3); + float scale = float(1u << scale_id); + if (scale_id >= 6u) scale /= 256.0; + + float delta = 1.0; + uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) * 4, 3); + switch (input_id) { + case 0u: { + delta = dot(v_normal, normalize(half_vector)); + break; + } + case 1u: { + delta = dot(normalize(v_view), normalize(half_vector)); + break; + } + case 2u: { + delta = dot(v_normal, normalize(v_view)); + break; + } + case 3u: { + delta = dot(light_vector, v_normal); + break; + } + case 4u: { + // These are ints so that bitfieldExtract sign extends for us + int GPUREG_LIGHTi_SPOTDIR_LOW = int(readPicaReg(0x0146u + 0x10u * light_id)); + int GPUREG_LIGHTi_SPOTDIR_HIGH = int(readPicaReg(0x0147u + 0x10u * light_id)); + + // These are fixed point 1.1.11 values, so we need to convert them to float + float x = float(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13)) / 2047.0; + float y = float(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13)) / 2047.0; + float z = float(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13)) / 2047.0; + vec3 spotlight_vector = vec3(x, y, z); + delta = dot(light_vector, spotlight_vector); // spotlight direction is negated so we don't negate light_vector + break; + } + case 5u: { + delta = 1.0; // TODO: cos (aka CP); + error_unimpl = true; + break; + } + default: { + delta = 1.0; + error_unimpl = true; + break; + } + } + + // 0 = enabled + if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + 4 * int(lut_id), 1) == 0u) { + // Two sided diffuse + if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) { + delta = max(delta, 0.0); + } else { + delta = abs(delta); + } + int index = int(clamp(floor(delta * 256.0), 0.f, 255.f)); + return lutLookup(lut_index, index) * scale; + } else { + // Range is [-1, 1] so we need to map it to [0, 1] + int index = int(clamp(floor(delta * 128.0), -128.f, 127.f)); + if (index < 0) index += 256; + return lutLookup(lut_index, index) * scale; + } +} + // Implements the following algorthm: https://mathb.in/26766 void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - // Quaternions describe a transformation from surface-local space to eye space. - // In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1), - // the tangent vector is (1,0,0), and the bitangent vector is (0,1,0). - vec3 normal = normalize(v_normal); - vec3 tangent = normalize(v_tangent); - vec3 bitangent = normalize(v_bitangent); - vec3 view = normalize(v_view); + error_unimpl = false; + unimpl_color = vec4(1.0, 0.0, 1.0, 1.0); uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008Fu); if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) { - primary_color = secondary_color = vec4(1.0); + primary_color = secondary_color = vec4(0.0); return; } - uint GPUREG_LIGHTING_AMBIENT = readPicaReg(0x01C0u); uint GPUREG_LIGHTING_NUM_LIGHTS = (readPicaReg(0x01C2u) & 0x7u) + 1u; uint GPUREG_LIGHTING_LIGHT_PERMUTATION = readPicaReg(0x01D9u); primary_color = vec4(vec3(0.0), 1.0); secondary_color = vec4(vec3(0.0), 1.0); - primary_color.rgb += regToColor(GPUREG_LIGHTING_AMBIENT); - - uint GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u); - uint GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u); - uint GPUREG_LIGHTING_CONFIG0 = readPicaReg(0x01C3u); - uint GPUREG_LIGHTING_CONFIG1 = readPicaReg(0x01C4u); uint GPUREG_LIGHTING_LUTINPUT_SCALE = readPicaReg(0x01D2u); - float d[7]; + uint GPUREG_LIGHTING_CONFIG0 = readPicaReg(0x01C3u); + GPUREG_LIGHTING_CONFIG1 = readPicaReg(0x01C4u); + GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u); + GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u); - bool error_unimpl = false; + vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0); + vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0); + + uint environment_id = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 4, 4); + bool clamp_highlights = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u; + + uint light_id; + vec3 light_vector; + vec3 half_vector; for (uint i = 0u; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) { - uint light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3); + light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3); uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + 0x10u * light_id); uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + 0x10u * light_id); @@ -221,93 +355,29 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { uint GPUREG_LIGHTi_AMBIENT = readPicaReg(0x0143u + 0x10u * light_id); uint GPUREG_LIGHTi_VECTOR_LOW = readPicaReg(0x0144u + 0x10u * light_id); uint GPUREG_LIGHTi_VECTOR_HIGH = readPicaReg(0x0145u + 0x10u * light_id); - uint GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + 0x10u * light_id); + GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + 0x10u * light_id); - vec3 light_vector = normalize(vec3( + float light_distance; + vec3 light_position = vec3( decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u), decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u) - )); - - vec3 half_vector; + ); // Positional Light if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) { - // error_unimpl = true; - half_vector = normalize(normalize(light_vector + v_view) + view); + light_vector = light_position + v_view; } // Directional light else { - half_vector = normalize(normalize(light_vector) + view); + light_vector = light_position; } - for (int c = 0; c < 7; c++) { - if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0u) { - uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3); - float scale = float(1u << scale_id); - if (scale_id >= 6u) scale /= 256.0; + light_distance = length(light_vector); + light_vector = normalize(light_vector); + half_vector = light_vector + normalize(v_view); - uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3); - if (input_id == 0u) - d[c] = dot(normal, half_vector); - else if (input_id == 1u) - d[c] = dot(view, half_vector); - else if (input_id == 2u) - d[c] = dot(normal, view); - else if (input_id == 3u) - d[c] = dot(light_vector, normal); - else if (input_id == 4u) { - uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146u + 0x10u * light_id); - uint GPUREG_LIGHTi_SPOTDIR_HIGH = readPicaReg(0x0147u + 0x10u * light_id); - vec3 spot_light_vector = normalize(vec3( - decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1u, 11u), - decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1u, 11u), - decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1u, 11u) - )); - d[c] = dot(-light_vector, spot_light_vector); // -L dot P (aka Spotlight aka SP); - } else if (input_id == 5u) { - d[c] = 1.0; // TODO: cos (aka CP); - error_unimpl = true; - } else { - d[c] = 1.0; - } - - d[c] = lutLookup(uint(c), light_id, d[c] * 0.5 + 0.5) * scale; - if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) d[c] = abs(d[c]); - } else { - d[c] = 1.0; - } - } - - uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 4, 4); - if (lookup_config == 0u) { - d[D1_LUT] = 0.0; - d[FR_LUT] = 0.0; - d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; - } else if (lookup_config == 1u) { - d[D0_LUT] = 0.0; - d[D1_LUT] = 0.0; - d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; - } else if (lookup_config == 2u) { - d[FR_LUT] = 0.0; - d[SP_LUT] = 0.0; - d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; - } else if (lookup_config == 3u) { - d[SP_LUT] = 0.0; - d[RG_LUT] = d[RB_LUT] = d[RR_LUT] = 1.0; - } else if (lookup_config == 4u) { - d[FR_LUT] = 0.0; - } else if (lookup_config == 5u) { - d[D1_LUT] = 0.0; - } else if (lookup_config == 6u) { - d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; - } - - float distance_factor = 1.0; // a - float indirect_factor = 1.0; // fi - float shadow_factor = 1.0; // o - - float NdotL = dot(normal, light_vector); // Li dot N + float NdotL = dot(v_normal, light_vector); // N dot Li // Two sided diffuse if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) @@ -315,20 +385,91 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { else NdotL = abs(NdotL); - float light_factor = distance_factor * d[SP_LUT] * indirect_factor * shadow_factor; + float geometric_factor; + bool use_geo_0 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 2, 1) == 1u; + bool use_geo_1 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 3, 1) == 1u; + if (use_geo_0 || use_geo_1) { + geometric_factor = dot(half_vector, half_vector); + geometric_factor = geometric_factor == 0.0 ? 0.0 : min(NdotL / geometric_factor, 1.0); + } - primary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE) * NdotL); - secondary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_SPECULAR0) * d[D0_LUT] + - regToColor(GPUREG_LIGHTi_SPECULAR1) * d[D1_LUT] * vec3(d[RR_LUT], d[RG_LUT], d[RB_LUT])); + // Distance attenuation is computed differently from the other factors, for example + // it doesn't store its scale in GPUREG_LIGHTING_LUTINPUT_SCALE and it doesn't use + // GPUREG_LIGHTING_LUTINPUT_SELECT. Instead, it uses the distance from the light to the + // fragment and the distance attenuation scale and bias to calculate where in the LUT to look up. + // See: https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTi_ATTENUATION_SCALE + float distance_attenuation = 1.0; + if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) { + uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtract(readPicaReg(0x014Au), 0, 20); + uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtract(readPicaReg(0x014Bu), 0, 20); + + float distance_attenuation_bias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u); + float distance_attenuation_scale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u); + + float delta = light_distance * distance_attenuation_scale + distance_attenuation_bias; + delta = clamp(delta, 0.0, 1.0); + int index = int(clamp(floor(delta * 255.0), 0.0, 255.0)); + distance_attenuation = lutLookup(16u + light_id, index); + } + + float spotlight_attenuation = lightLutLookup(environment_id, SP_LUT, light_id, light_vector, half_vector); + float specular0_distribution = lightLutLookup(environment_id, D0_LUT, light_id, light_vector, half_vector); + float specular1_distribution = lightLutLookup(environment_id, D1_LUT, light_id, light_vector, half_vector); + vec3 reflected_color; + reflected_color.r = lightLutLookup(environment_id, RR_LUT, light_id, light_vector, half_vector); + + if (isSamplerEnabled(environment_id, RG_LUT)) { + reflected_color.g = lightLutLookup(environment_id, RG_LUT, light_id, light_vector, half_vector); + } else { + reflected_color.g = reflected_color.r; + } + + if (isSamplerEnabled(environment_id, RB_LUT)) { + reflected_color.b = lightLutLookup(environment_id, RB_LUT, light_id, light_vector, half_vector); + } else { + reflected_color.b = reflected_color.r; + } + + vec3 specular0 = regToColor(GPUREG_LIGHTi_SPECULAR0) * specular0_distribution; + vec3 specular1 = regToColor(GPUREG_LIGHTi_SPECULAR1) * specular1_distribution * reflected_color; + + specular0 *= use_geo_0 ? geometric_factor : 1.0; + specular1 *= use_geo_1 ? geometric_factor : 1.0; + + float clamp_factor = 1.0; + if (clamp_highlights && NdotL == 0.0) { + clamp_factor = 0.0; + } + + float light_factor = distance_attenuation * spotlight_attenuation; + diffuse_sum.rgb += light_factor * (regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE) * NdotL); + specular_sum.rgb += light_factor * clamp_factor * (specular0 + specular1); } + uint fresnel_output1 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 2, 1); uint fresnel_output2 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 3, 1); + // Uses parameters from the last light as Fresnel is only applied to the last light + float fresnel_factor; + + if (fresnel_output1 == 1u || fresnel_output2 == 1u) { + fresnel_factor = lightLutLookup(environment_id, FR_LUT, light_id, light_vector, half_vector); + } + + if (fresnel_output1 == 1u) { + diffuse_sum.a = fresnel_factor; + } - if (fresnel_output1 == 1u) primary_color.a = d[FR_LUT]; - if (fresnel_output2 == 1u) secondary_color.a = d[FR_LUT]; + if (fresnel_output2 == 1u) { + specular_sum.a = fresnel_factor; + } + + uint GPUREG_LIGHTING_AMBIENT = readPicaReg(0x01C0u); + vec4 global_ambient = vec4(regToColor(GPUREG_LIGHTING_AMBIENT), 1.0); + primary_color = clamp(global_ambient + diffuse_sum, vec4(0.0), vec4(1.0)); + secondary_color = clamp(specular_sum, vec4(0.0), vec4(1.0)); if (error_unimpl) { - // secondary_color = primary_color = vec4(1.0, 0., 1.0, 1.0); + secondary_color = primary_color = unimpl_color; } } From f6ebf8398230928a95101de021989e6a64a36804 Mon Sep 17 00:00:00 2001 From: offtkp Date: Tue, 16 Jul 2024 00:18:53 +0300 Subject: [PATCH 071/251] Update gles.patch --- .github/gles.patch | 176 +++++++++++++++++++++++++++------------------ 1 file changed, 106 insertions(+), 70 deletions(-) diff --git a/.github/gles.patch b/.github/gles.patch index 3d6c96fe..f5270518 100644 --- a/.github/gles.patch +++ b/.github/gles.patch @@ -21,7 +21,7 @@ index 990e2f80..2e7842ac 100644 void main() { diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag -index 6b728ace..eaac1484 100644 +index 1b8e9751..96238000 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -1,4 +1,5 @@ @@ -31,8 +31,8 @@ index 6b728ace..eaac1484 100644 in vec3 v_tangent; in vec3 v_normal; -@@ -150,11 +151,17 @@ float lutLookup(uint lut, uint light, float value) { - return texelFetch(u_tex_lighting_lut, ivec2(int(value * 256.0), lut), 0).r; +@@ -171,11 +172,17 @@ float lutLookup(uint lut, int index) { + return texelFetch(u_tex_lighting_lut, ivec2(index, lut), 0).r; } +// some gles versions have bitfieldExtract and complain if you redefine it, some don't and compile error, using this instead @@ -50,89 +50,103 @@ index 6b728ace..eaac1484 100644 } // Convert an arbitrary-width floating point literal to an f32 -@@ -189,7 +196,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - vec3 view = normalize(v_view); +@@ -243,16 +250,16 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light + // If RR is enabled but not RG or RB, the output of RR is used for the three components; Red, Green and Blue. + bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment + +- if (!current_sampler_enabled || (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) { ++ if (!current_sampler_enabled || (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) { + return 1.0; + } + +- uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) * 4, 3); ++ uint scale_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) * 4, 3); + float scale = float(1u << scale_id); + if (scale_id >= 6u) scale /= 256.0; + + float delta = 1.0; +- uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) * 4, 3); ++ uint input_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) * 4, 3); + switch (input_id) { + case 0u: { + delta = dot(v_normal, normalize(half_vector)); +@@ -271,14 +278,14 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light + break; + } + case 4u: { +- // These are ints so that bitfieldExtract sign extends for us ++ // These are ints so that bitfieldExtractCompat sign extends for us + int GPUREG_LIGHTi_SPOTDIR_LOW = int(readPicaReg(0x0146u + 0x10u * light_id)); + int GPUREG_LIGHTi_SPOTDIR_HIGH = int(readPicaReg(0x0147u + 0x10u * light_id)); + + // These are fixed point 1.1.11 values, so we need to convert them to float +- float x = float(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13)) / 2047.0; +- float y = float(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13)) / 2047.0; +- float z = float(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13)) / 2047.0; ++ float x = float(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13)) / 2047.0; ++ float y = float(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13)) / 2047.0; ++ float z = float(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13)) / 2047.0; + vec3 spotlight_vector = vec3(x, y, z); + delta = dot(light_vector, spotlight_vector); // spotlight direction is negated so we don't negate light_vector + break; +@@ -296,9 +303,9 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light + } + + // 0 = enabled +- if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + 4 * int(lut_id), 1) == 0u) { ++ if (bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + 4 * int(lut_id), 1) == 0u) { + // Two sided diffuse +- if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) { ++ if (bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) { + delta = max(delta, 0.0); + } else { + delta = abs(delta); +@@ -319,7 +326,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + unimpl_color = vec4(1.0, 0.0, 1.0, 1.0); uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008Fu); - if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) { + if (bitfieldExtractCompat(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) { - primary_color = secondary_color = vec4(1.0); + primary_color = secondary_color = vec4(0.0); return; } -@@ -213,7 +220,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - bool error_unimpl = false; +@@ -339,15 +346,15 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0); + vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0); + +- uint environment_id = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 4, 4); +- bool clamp_highlights = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u; ++ uint environment_id = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 4, 4); ++ bool clamp_highlights = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u; + + uint light_id; + vec3 light_vector; + vec3 half_vector; for (uint i = 0u; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) { -- uint light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3); -+ uint light_id = bitfieldExtractCompat(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3); +- light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3); ++ light_id = bitfieldExtractCompat(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3); uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + 0x10u * light_id); uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + 0x10u * light_id); -@@ -224,14 +231,14 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - uint GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + 0x10u * light_id); +@@ -359,12 +366,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - vec3 light_vector = normalize(vec3( + float light_distance; + vec3 light_position = vec3( - decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u), - decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u) + decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u), + decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u) - )); - - vec3 half_vector; + ); // Positional Light - if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) { + if (bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) { - // error_unimpl = true; - half_vector = normalize(normalize(light_vector + v_view) + view); - } -@@ -242,12 +249,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + light_vector = light_position + v_view; } - for (int c = 0; c < 7; c++) { -- if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0u) { -- uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3); -+ if (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0u) { -+ uint scale_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3); - float scale = float(1u << scale_id); - if (scale_id >= 6u) scale /= 256.0; - -- uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3); -+ uint input_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3); - if (input_id == 0u) - d[c] = dot(normal, half_vector); - else if (input_id == 1u) -@@ -260,9 +267,9 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146u + 0x10u * light_id); - uint GPUREG_LIGHTi_SPOTDIR_HIGH = readPicaReg(0x0147u + 0x10u * light_id); - vec3 spot_light_vector = normalize(vec3( -- decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1u, 11u), -- decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1u, 11u), -- decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1u, 11u) -+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1u, 11u), -+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1u, 11u), -+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1u, 11u) - )); - d[c] = dot(-light_vector, spot_light_vector); // -L dot P (aka Spotlight aka SP); - } else if (input_id == 5u) { -@@ -273,13 +280,13 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - } - - d[c] = lutLookup(uint(c), light_id, d[c] * 0.5 + 0.5) * scale; -- if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) d[c] = abs(d[c]); -+ if (bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) d[c] = abs(d[c]); - } else { - d[c] = 1.0; - } - } - -- uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 4, 4); -+ uint lookup_config = bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 4, 4); - if (lookup_config == 0u) { - d[D1_LUT] = 0.0; - d[FR_LUT] = 0.0; -@@ -310,7 +317,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - float NdotL = dot(normal, light_vector); // Li dot N +@@ -380,14 +387,14 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + float NdotL = dot(v_normal, light_vector); // N dot Li // Two sided diffuse - if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) @@ -140,17 +154,39 @@ index 6b728ace..eaac1484 100644 NdotL = max(0.0, NdotL); else NdotL = abs(NdotL); -@@ -321,8 +328,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - secondary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_SPECULAR0) * d[D0_LUT] + - regToColor(GPUREG_LIGHTi_SPECULAR1) * d[D1_LUT] * vec3(d[RR_LUT], d[RG_LUT], d[RB_LUT])); + + float geometric_factor; +- bool use_geo_0 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 2, 1) == 1u; +- bool use_geo_1 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 3, 1) == 1u; ++ bool use_geo_0 = bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 2, 1) == 1u; ++ bool use_geo_1 = bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 3, 1) == 1u; + if (use_geo_0 || use_geo_1) { + geometric_factor = dot(half_vector, half_vector); + geometric_factor = geometric_factor == 0.0 ? 0.0 : min(NdotL / geometric_factor, 1.0); +@@ -399,9 +406,9 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + // fragment and the distance attenuation scale and bias to calculate where in the LUT to look up. + // See: https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTi_ATTENUATION_SCALE + float distance_attenuation = 1.0; +- if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) { +- uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtract(readPicaReg(0x014Au), 0, 20); +- uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtract(readPicaReg(0x014Bu), 0, 20); ++ if (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) { ++ uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtractCompat(readPicaReg(0x014Au), 0, 20); ++ uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtractCompat(readPicaReg(0x014Bu), 0, 20); + + float distance_attenuation_bias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u); + float distance_attenuation_scale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u); +@@ -446,8 +453,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + specular_sum.rgb += light_factor * clamp_factor * (specular0 + specular1); } + - uint fresnel_output1 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 2, 1); - uint fresnel_output2 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 3, 1); + uint fresnel_output1 = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 2, 1); + uint fresnel_output2 = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 3, 1); - - if (fresnel_output1 == 1u) primary_color.a = d[FR_LUT]; - if (fresnel_output2 == 1u) secondary_color.a = d[FR_LUT]; + // Uses parameters from the last light as Fresnel is only applied to the last light + float fresnel_factor; + diff --git a/src/host_shaders/opengl_vertex_shader.vert b/src/host_shaders/opengl_vertex_shader.vert index a25d7a6d..7cf40398 100644 --- a/src/host_shaders/opengl_vertex_shader.vert From c02b3822623372d9598d38da4dd66f8f7e8a090c Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 16 Jul 2024 00:58:52 +0300 Subject: [PATCH 072/251] Perform alpha test with integers instead of floats --- src/core/PICA/shader_gen_glsl.cpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 556c0794..d4a4bf8e 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -398,17 +398,17 @@ void FragmentGenerator::applyAlphaTest(std::string& shader, const PICARegs& regs return; } - shader += "float alphaReferenceFloat = float(alphaReference) / 255.0;\n"; - shader += "if (!("; + shader += "int testingAlpha = int(combinerOutput.a * 255.0);\n"; + shader += "if ("; switch (function) { - case CompareFunction::Never: shader += "false"; break; - case CompareFunction::Always: shader += "true"; break; - case CompareFunction::Equal: shader += "combinerOutput.a == alphaReferenceFloat"; break; - case CompareFunction::NotEqual: shader += "combinerOutput.a != alphaReferenceFloat"; break; - case CompareFunction::Less: shader += "combinerOutput.a < alphaReferenceFloat"; break; - case CompareFunction::LessOrEqual: shader += "combinerOutput.a <= alphaReferenceFloat"; break; - case CompareFunction::Greater: shader += "combinerOutput.a > alphaReferenceFloat"; break; - case CompareFunction::GreaterOrEqual: shader += "combinerOutput.a >= alphaReferenceFloat"; break; + case CompareFunction::Never: shader += "true"; break; + case CompareFunction::Always: shader += "false"; break; + case CompareFunction::Equal: shader += "testingAlpha != alphaReference"; break; + case CompareFunction::NotEqual: shader += "testingAlpha == alphaReference"; break; + case CompareFunction::Less: shader += "testingAlpha >= alphaReference"; break; + case CompareFunction::LessOrEqual: shader += "testingAlpha > alphaReference"; break; + case CompareFunction::Greater: shader += "testingAlpha <= alphaReference"; break; + case CompareFunction::GreaterOrEqual: shader += "testingAlpha < alphaReference"; break; default: Helpers::warn("Unimplemented alpha test function"); @@ -416,5 +416,5 @@ void FragmentGenerator::applyAlphaTest(std::string& shader, const PICARegs& regs break; } - shader += ")) { discard; }\n"; + shader += ") { discard; }\n"; } From 441aa2346c6ebd004865a324f4e683fa814949b1 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 16 Jul 2024 02:20:37 +0300 Subject: [PATCH 073/251] Shadergen: Add clipping --- include/PICA/pica_frag_uniforms.hpp | 1 + src/core/PICA/shader_gen_glsl.cpp | 34 ++++++++++++++++++++-------- src/core/renderer_gl/renderer_gl.cpp | 7 ++++++ 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/include/PICA/pica_frag_uniforms.hpp b/include/PICA/pica_frag_uniforms.hpp index 616f1882..332acd4e 100644 --- a/include/PICA/pica_frag_uniforms.hpp +++ b/include/PICA/pica_frag_uniforms.hpp @@ -16,5 +16,6 @@ namespace PICA { alignas(16) vec4 constantColors[tevStageCount]; alignas(16) vec4 tevBufferColor; + alignas(16) vec4 clipCoords; }; } // namespace PICA \ No newline at end of file diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index d4a4bf8e..e135ac8e 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -2,6 +2,18 @@ using namespace PICA; using namespace PICA::ShaderGen; +static constexpr const char* uniformDefinition = R"( + layout(std140) uniform FragmentUniforms { + int alphaReference; + float depthScale; + float depthOffset; + + vec4 constantColors[6]; + vec4 tevBufferColor; + vec4 clipCoords; + }; +)"; + std::string FragmentGenerator::getVertexShader(const PICARegs& regs) { std::string ret = ""; @@ -20,6 +32,8 @@ std::string FragmentGenerator::getVertexShader(const PICARegs& regs) { )"; } + ret += uniformDefinition; + ret += R"( layout(location = 0) in vec4 a_coords; layout(location = 1) in vec4 a_quaternion; @@ -39,7 +53,9 @@ std::string FragmentGenerator::getVertexShader(const PICARegs& regs) { out vec3 v_view; out vec2 v_texcoord2; - //out float gl_ClipDistance[2]; + #ifndef USING_GLES + out float gl_ClipDistance[2]; + #endif vec4 abgr8888ToVec4(uint abgr) { const float scale = 1.0 / 255.0; @@ -65,6 +81,11 @@ std::string FragmentGenerator::getVertexShader(const PICARegs& regs) { v_normal = normalize(rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), a_quaternion)); v_tangent = normalize(rotateVec3ByQuaternion(vec3(1.0, 0.0, 0.0), a_quaternion)); v_bitangent = normalize(rotateVec3ByQuaternion(vec3(0.0, 1.0, 0.0), a_quaternion)); + + #ifndef USING_GLES + gl_ClipDistance[0] = -a_coords.z; + gl_ClipDistance[1] = dot(clipCoords, a_coords); + #endif } )"; @@ -109,17 +130,10 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { #ifndef USING_GLES uniform sampler1DArray u_tex_lighting_lut; #endif - - layout(std140) uniform FragmentUniforms { - int alphaReference; - float depthScale; - float depthOffset; - - vec4 constantColors[6]; - vec4 tevBufferColor; - }; )"; + ret += uniformDefinition; + // Emit main function for fragment shader // When not initialized, source 13 is set to vec4(0.0) and 15 is set to the vertex colour ret += R"( diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 207bfbe4..97b642c7 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -845,6 +845,13 @@ OpenGL::Program& RendererGL::getSpecializedShader() { uniforms.depthScale = f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32(); uniforms.depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32(); + if (regs[InternalRegs::ClipEnable] & 1) { + uniforms.clipCoords[0] = f24::fromRaw(regs[PICA::InternalRegs::ClipData0] & 0xffffff).toFloat32(); + uniforms.clipCoords[1] = f24::fromRaw(regs[PICA::InternalRegs::ClipData1] & 0xffffff).toFloat32(); + uniforms.clipCoords[2] = f24::fromRaw(regs[PICA::InternalRegs::ClipData2] & 0xffffff).toFloat32(); + uniforms.clipCoords[3] = f24::fromRaw(regs[PICA::InternalRegs::ClipData3] & 0xffffff).toFloat32(); + } + // Set up the constant color for the 6 TEV stages for (int i = 0; i < 6; i++) { static constexpr std::array ioBases = { From e5bed23cee4d0223639167edeaa5e17058588ab2 Mon Sep 17 00:00:00 2001 From: offtkp Date: Tue, 16 Jul 2024 15:48:34 +0300 Subject: [PATCH 074/251] Fix Luigi's flashlight in Luigi's Mansion --- src/host_shaders/opengl_fragment_shader.frag | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag index 1b8e9751..c3c7cf0b 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -347,7 +347,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { vec3 half_vector; for (uint i = 0u; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) { - light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3); + light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i << 2u), 3); uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + 0x10u * light_id); uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + 0x10u * light_id); From 139f35588d160928e37aa25b6de1c846d8543f59 Mon Sep 17 00:00:00 2001 From: offtkp Date: Tue, 16 Jul 2024 16:23:42 +0300 Subject: [PATCH 075/251] Switch to shifts in some places instead of multiplication --- src/host_shaders/opengl_fragment_shader.frag | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag index c3c7cf0b..32f4c1ec 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -247,12 +247,12 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light return 1.0; } - uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) * 4, 3); + uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3); float scale = float(1u << scale_id); if (scale_id >= 6u) scale /= 256.0; float delta = 1.0; - uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) * 4, 3); + uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3); switch (input_id) { case 0u: { delta = dot(v_normal, normalize(half_vector)); @@ -296,7 +296,7 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light } // 0 = enabled - if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + 4 * int(lut_id), 1) == 0u) { + if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) { // Two sided diffuse if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) { delta = max(delta, 0.0); @@ -347,7 +347,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { vec3 half_vector; for (uint i = 0u; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) { - light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i << 2u), 3); + light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3); uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + 0x10u * light_id); uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + 0x10u * light_id); From 8b4eacc7b6982c0f254baa223cf3291d50e55e49 Mon Sep 17 00:00:00 2001 From: offtkp Date: Tue, 16 Jul 2024 20:32:35 +0300 Subject: [PATCH 076/251] More luigi mansion fixes --- src/host_shaders/opengl_fragment_shader.frag | 22 ++++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag index 32f4c1ec..ae43d993 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -272,8 +272,8 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light } case 4u: { // These are ints so that bitfieldExtract sign extends for us - int GPUREG_LIGHTi_SPOTDIR_LOW = int(readPicaReg(0x0146u + 0x10u * light_id)); - int GPUREG_LIGHTi_SPOTDIR_HIGH = int(readPicaReg(0x0147u + 0x10u * light_id)); + int GPUREG_LIGHTi_SPOTDIR_LOW = int(readPicaReg(0x0146u + (light_id << 4u))); + int GPUREG_LIGHTi_SPOTDIR_HIGH = int(readPicaReg(0x0147u + (light_id << 4u))); // These are fixed point 1.1.11 values, so we need to convert them to float float x = float(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13)) / 2047.0; @@ -349,13 +349,13 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { for (uint i = 0u; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) { light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3); - uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + 0x10u * light_id); - uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + 0x10u * light_id); - uint GPUREG_LIGHTi_DIFFUSE = readPicaReg(0x0142u + 0x10u * light_id); - uint GPUREG_LIGHTi_AMBIENT = readPicaReg(0x0143u + 0x10u * light_id); - uint GPUREG_LIGHTi_VECTOR_LOW = readPicaReg(0x0144u + 0x10u * light_id); - uint GPUREG_LIGHTi_VECTOR_HIGH = readPicaReg(0x0145u + 0x10u * light_id); - GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + 0x10u * light_id); + uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + (light_id << 4u)); + uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + (light_id << 4u)); + uint GPUREG_LIGHTi_DIFFUSE = readPicaReg(0x0142u + (light_id << 4u)); + uint GPUREG_LIGHTi_AMBIENT = readPicaReg(0x0143u + (light_id << 4u)); + uint GPUREG_LIGHTi_VECTOR_LOW = readPicaReg(0x0144u + (light_id << 4u)); + uint GPUREG_LIGHTi_VECTOR_HIGH = readPicaReg(0x0145u + (light_id << 4u)); + GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + (light_id << 4u)); float light_distance; vec3 light_position = vec3( @@ -400,8 +400,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { // See: https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTi_ATTENUATION_SCALE float distance_attenuation = 1.0; if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) { - uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtract(readPicaReg(0x014Au), 0, 20); - uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtract(readPicaReg(0x014Bu), 0, 20); + uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtract(readPicaReg(0x014Au + (light_id << 4u)), 0, 20); + uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtract(readPicaReg(0x014Bu + (light_id << 4u)), 0, 20); float distance_attenuation_bias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u); float distance_attenuation_scale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u); From 0ecdf00e643fcc220224a30c492d67c2da4dc84a Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 16 Jul 2024 22:14:01 +0300 Subject: [PATCH 077/251] Add accurate shader multiplication option --- include/PICA/dynapica/shader_rec.hpp | 7 +++++-- include/PICA/dynapica/shader_rec_emitter_arm64.hpp | 4 +++- include/PICA/dynapica/shader_rec_emitter_x64.hpp | 4 +++- include/config.hpp | 1 + src/config.cpp | 2 ++ src/core/PICA/dynapica/shader_rec.cpp | 2 +- src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp | 3 --- src/core/PICA/dynapica/shader_rec_emitter_x64.cpp | 3 --- src/core/PICA/gpu.cpp | 2 ++ src/libretro_core.cpp | 4 +++- 10 files changed, 20 insertions(+), 12 deletions(-) diff --git a/include/PICA/dynapica/shader_rec.hpp b/include/PICA/dynapica/shader_rec.hpp index 2dabc128..a242d02f 100644 --- a/include/PICA/dynapica/shader_rec.hpp +++ b/include/PICA/dynapica/shader_rec.hpp @@ -22,8 +22,11 @@ class ShaderJIT { ShaderCache cache; #endif + bool accurateMul = false; public: + void setAccurateMul(bool value) { accurateMul = value; } + #ifdef PANDA3DS_SHADER_JIT_SUPPORTED // Call this before starting to process a batch of vertices // This will read the PICA config (uploaded shader and shader operand descriptors) and search if we've already compiled this shader @@ -36,11 +39,11 @@ class ShaderJIT { static constexpr bool isAvailable() { return true; } #else void prepare(PICAShader& shaderUnit) { - Helpers::panic("Vertex Loader JIT: Tried to run ShaderJIT::Prepare on platform that does not support shader jit"); + Helpers::panic("Shader JIT: Tried to run ShaderJIT::Prepare on platform that does not support shader jit"); } void run(PICAShader& shaderUnit) { - Helpers::panic("Vertex Loader JIT: Tried to run ShaderJIT::Run on platform that does not support shader jit"); + Helpers::panic("Shader JIT: Tried to run ShaderJIT::Run on platform that does not support shader jit"); } // Define dummy callback. This should never be called if the shader JIT is not supported diff --git a/include/PICA/dynapica/shader_rec_emitter_arm64.hpp b/include/PICA/dynapica/shader_rec_emitter_arm64.hpp index 7411c430..9351f383 100644 --- a/include/PICA/dynapica/shader_rec_emitter_arm64.hpp +++ b/include/PICA/dynapica/shader_rec_emitter_arm64.hpp @@ -37,6 +37,8 @@ class ShaderEmitter : private oaknut::CodeBlock, public oaknut::CodeGenerator { // Shows whether the loaded shader has any log2 and exp2 instructions bool codeHasLog2 = false; bool codeHasExp2 = false; + // Whether to compile this shader using accurate, safe, non-IEEE multiplication (slow) or faster but less accurate mul + bool useSafeMUL = false; oaknut::Label log2Func, exp2Func; oaknut::Label emitLog2Func(); @@ -123,7 +125,7 @@ class ShaderEmitter : private oaknut::CodeBlock, public oaknut::CodeGenerator { PrologueCallback prologueCb = nullptr; // Initialize our emitter with "allocSize" bytes of memory allocated for the code buffer - ShaderEmitter() : oaknut::CodeBlock(allocSize), oaknut::CodeGenerator(oaknut::CodeBlock::ptr()) {} + ShaderEmitter(bool useSafeMUL) : oaknut::CodeBlock(allocSize), oaknut::CodeGenerator(oaknut::CodeBlock::ptr()), useSafeMUL(useSafeMUL) {} // PC must be a valid entrypoint here. It doesn't have that much overhead in this case, so we use std::array<>::at() to assert it does InstructionCallback getInstructionCallback(u32 pc) { return getLabelPointer(instructionLabels.at(pc)); } diff --git a/include/PICA/dynapica/shader_rec_emitter_x64.hpp b/include/PICA/dynapica/shader_rec_emitter_x64.hpp index 1052d6a0..a43bd2dc 100644 --- a/include/PICA/dynapica/shader_rec_emitter_x64.hpp +++ b/include/PICA/dynapica/shader_rec_emitter_x64.hpp @@ -45,6 +45,8 @@ class ShaderEmitter : public Xbyak::CodeGenerator { // Shows whether the loaded shader has any log2 and exp2 instructions bool codeHasLog2 = false; bool codeHasExp2 = false; + // Whether to compile this shader using accurate, safe, non-IEEE multiplication (slow) or faster but less accurate mul + bool useSafeMUL = false; Xbyak::Label log2Func, exp2Func; Xbyak::Label emitLog2Func(); @@ -130,7 +132,7 @@ class ShaderEmitter : public Xbyak::CodeGenerator { PrologueCallback prologueCb = nullptr; // Initialize our emitter with "allocSize" bytes of RWX memory - ShaderEmitter() : Xbyak::CodeGenerator(allocSize) { + ShaderEmitter(bool useSafeMUL) : Xbyak::CodeGenerator(allocSize), useSafeMUL(useSafeMUL) { cpuCaps = Xbyak::util::Cpu(); haveSSE4_1 = cpuCaps.has(Xbyak::util::Cpu::tSSE41); diff --git a/include/config.hpp b/include/config.hpp index 339e651c..6dbae9e3 100644 --- a/include/config.hpp +++ b/include/config.hpp @@ -15,6 +15,7 @@ struct EmulatorConfig { bool shaderJitEnabled = shaderJitDefault; bool discordRpcEnabled = false; + bool accurateShaderMul = false; RendererType rendererType = RendererType::OpenGL; Audio::DSPCore::Type dspType = Audio::DSPCore::Type::Null; diff --git a/src/config.cpp b/src/config.cpp index 2f9b7e00..5af4d654 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -62,6 +62,7 @@ void EmulatorConfig::load() { shaderJitEnabled = toml::find_or(gpu, "EnableShaderJIT", shaderJitDefault); vsyncEnabled = toml::find_or(gpu, "EnableVSync", true); + accurateShaderMul = toml::find_or(gpu, "AccurateShaderMultiplication", false); } } @@ -125,6 +126,7 @@ void EmulatorConfig::save() { data["GPU"]["EnableShaderJIT"] = shaderJitEnabled; data["GPU"]["Renderer"] = std::string(Renderer::typeToString(rendererType)); data["GPU"]["EnableVSync"] = vsyncEnabled; + data["GPU"]["AccurateShaderMultiplication"] = accurateShaderMul; data["Audio"]["DSPEmulation"] = std::string(Audio::DSPCore::typeToString(dspType)); data["Audio"]["EnableAudio"] = audioEnabled; diff --git a/src/core/PICA/dynapica/shader_rec.cpp b/src/core/PICA/dynapica/shader_rec.cpp index 20e171d7..e3c13c1e 100644 --- a/src/core/PICA/dynapica/shader_rec.cpp +++ b/src/core/PICA/dynapica/shader_rec.cpp @@ -16,7 +16,7 @@ void ShaderJIT::prepare(PICAShader& shaderUnit) { auto it = cache.find(hash); if (it == cache.end()) { // Block has not been compiled yet - auto emitter = std::make_unique(); + auto emitter = std::make_unique(accurateMul); emitter->compile(shaderUnit); // Get pointer to callbacks entrypointCallback = emitter->getInstructionCallback(shaderUnit.entrypoint); diff --git a/src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp b/src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp index 15200e76..8bc460fd 100644 --- a/src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp +++ b/src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp @@ -7,9 +7,6 @@ using namespace Helpers; using namespace oaknut; using namespace oaknut::util; -// TODO: Expose safe/unsafe optimizations to the user -constexpr bool useSafeMUL = true; - // Similar to the x64 recompiler, we use an odd internal ABI, which abuses the fact that we'll very rarely be calling C++ functions // So to avoid pushing and popping, we'll be making use of volatile registers as much as possible static constexpr QReg src1Vec = Q1; diff --git a/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp b/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp index e7bafe9f..142ff8c8 100644 --- a/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp +++ b/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp @@ -12,9 +12,6 @@ using namespace Xbyak; using namespace Xbyak::util; using namespace Helpers; -// TODO: Expose safe/unsafe optimizations to the user -constexpr bool useSafeMUL = false; - // The shader recompiler uses quite an odd internal ABI // We make use of the fact that in regular conditions, we should pretty much never be calling C++ code from recompiled shader code // This allows us to establish an ABI that's optimized for this sort of workflow, statically allocating volatile host registers diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index a777d0a3..ed0e5420 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -64,6 +64,8 @@ void GPU::reset() { regs.fill(0); shaderUnit.reset(); shaderJIT.reset(); + shaderJIT.setAccurateMul(config.accurateShaderMul); + std::memset(vram, 0, vramSize); lightingLUT.fill(0); lightingLUTDirty = true; diff --git a/src/libretro_core.cpp b/src/libretro_core.cpp index f9772b37..3825d3ed 100644 --- a/src/libretro_core.cpp +++ b/src/libretro_core.cpp @@ -146,6 +146,7 @@ static bool FetchVariableBool(std::string key, bool def) { static void configInit() { static const retro_variable values[] = { {"panda3ds_use_shader_jit", "Enable shader JIT; enabled|disabled"}, + {"panda3ds_accurate_shader_mul", "Enable accurate shader multiplication; disabled|enabled"}, {"panda3ds_use_vsync", "Enable VSync; enabled|disabled"}, {"panda3ds_dsp_emulation", "DSP emulation; Null|HLE|LLE"}, {"panda3ds_use_audio", "Enable audio; disabled|enabled"}, @@ -153,7 +154,7 @@ static void configInit() { {"panda3ds_write_protect_virtual_sd", "Write protect virtual SD card; disabled|enabled"}, {"panda3ds_battery_level", "Battery percentage; 5|10|20|30|50|70|90|100"}, {"panda3ds_use_charger", "Charger plugged; enabled|disabled"}, - {nullptr, nullptr} + {nullptr, nullptr}, }; envCallbacks(RETRO_ENVIRONMENT_SET_VARIABLES, (void*)values); @@ -171,6 +172,7 @@ static void configUpdate() { config.audioEnabled = FetchVariableBool("panda3ds_use_audio", false); config.sdCardInserted = FetchVariableBool("panda3ds_use_virtual_sd", true); config.sdWriteProtected = FetchVariableBool("panda3ds_write_protect_virtual_sd", false); + config.accurateShaderMul = FetchVariableBool("panda3ds_accurate_shader_mul", false); config.discordRpcEnabled = false; config.save(); From 967d9398ce6b38f19bc9471bba49c581d7ea8db7 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 16 Jul 2024 22:28:20 +0300 Subject: [PATCH 078/251] Fix arm64 build --- src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp b/src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp index 8bc460fd..296ec932 100644 --- a/src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp +++ b/src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp @@ -488,7 +488,7 @@ void ShaderEmitter::recDP3(const PICAShader& shader, u32 instruction) { // Now do a full DP4 // Do a piecewise multiplication of the vectors first - if constexpr (useSafeMUL) { + if (useSafeMUL) { emitSafeMUL(src1Vec, src2Vec, scratch1Vec); } else { FMUL(src1Vec.S4(), src1Vec.S4(), src2Vec.S4()); @@ -515,7 +515,7 @@ void ShaderEmitter::recDP4(const PICAShader& shader, u32 instruction) { loadRegister<2>(src2Vec, shader, src2, 0, operandDescriptor); // Do a piecewise multiplication of the vectors first - if constexpr (useSafeMUL) { + if (useSafeMUL) { emitSafeMUL(src1Vec, src2Vec, scratch1Vec); } else { FMUL(src1Vec.S4(), src1Vec.S4(), src2Vec.S4()); @@ -548,7 +548,7 @@ void ShaderEmitter::recDPH(const PICAShader& shader, u32 instruction) { // Now perform a DP4 // Do a piecewise multiplication of the vectors first - if constexpr (useSafeMUL) { + if (useSafeMUL) { emitSafeMUL(src1Vec, src2Vec, scratch1Vec); } else { FMUL(src1Vec.S4(), src1Vec.S4(), src2Vec.S4()); @@ -831,7 +831,7 @@ void ShaderEmitter::recMUL(const PICAShader& shader, u32 instruction) { loadRegister<1>(src1Vec, shader, src1, idx, operandDescriptor); loadRegister<2>(src2Vec, shader, src2, 0, operandDescriptor); - if constexpr (useSafeMUL) { + if (useSafeMUL) { emitSafeMUL(src1Vec, src2Vec, scratch1Vec); } else { FMUL(src1Vec.S4(), src1Vec.S4(), src2Vec.S4()); @@ -904,7 +904,7 @@ void ShaderEmitter::recMAD(const PICAShader& shader, u32 instruction) { loadRegister<2>(src2Vec, shader, src2, isMADI ? 0 : idx, operandDescriptor); loadRegister<3>(src3Vec, shader, src3, isMADI ? idx : 0, operandDescriptor); - if constexpr (useSafeMUL) { + if (useSafeMUL) { emitSafeMUL(src1Vec, src2Vec, scratch1Vec); FADD(src3Vec.S4(), src3Vec.S4(), src1Vec.S4()); } else { From 27ddb1272ae5508610269d7f58a0c5beab785a0f Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 16 Jul 2024 23:39:48 +0300 Subject: [PATCH 079/251] Fix CI artifacts --- .github/workflows/Hydra_Build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/Hydra_Build.yml b/.github/workflows/Hydra_Build.yml index 645f2f7a..a269e839 100644 --- a/.github/workflows/Hydra_Build.yml +++ b/.github/workflows/Hydra_Build.yml @@ -51,7 +51,7 @@ jobs: with: name: Windows Libretro core path: | - ${{github.workspace}}/build/panda3ds_libretro.dll + ${{github.workspace}}/build/${{ env.BUILD_TYPE }}/panda3ds_libretro.dll ${{github.workspace}}/docs/libretro/panda3ds_libretro.info MacOS: From a4ec7705878c56dbc6f2aaf1a70a0dc384c04566 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 17 Jul 2024 01:32:55 +0300 Subject: [PATCH 080/251] Add UBO/BlendEquation/BlendFunc to GL state manager --- include/renderer_gl/gl_state.hpp | 48 +++++++++++++++++++++++++++- src/core/renderer_gl/gl_state.cpp | 18 +++++++++-- src/core/renderer_gl/renderer_gl.cpp | 8 ++--- 3 files changed, 67 insertions(+), 7 deletions(-) diff --git a/include/renderer_gl/gl_state.hpp b/include/renderer_gl/gl_state.hpp index 69960f1e..e5591ea0 100644 --- a/include/renderer_gl/gl_state.hpp +++ b/include/renderer_gl/gl_state.hpp @@ -40,9 +40,13 @@ struct GLStateManager { GLuint boundVAO; GLuint boundVBO; GLuint currentProgram; + GLuint boundUBO; GLenum depthFunc; GLenum logicOp; + GLenum blendEquationRGB, blendEquationAlpha; + GLenum blendFuncSourceRGB, blendFuncSourceAlpha; + GLenum blendFuncDestRGB, blendFuncDestAlpha; void reset(); void resetBlend(); @@ -51,7 +55,7 @@ struct GLStateManager { void resetColourMask(); void resetDepth(); void resetVAO(); - void resetVBO(); + void resetBuffers(); void resetProgram(); void resetScissor(); void resetStencil(); @@ -183,6 +187,13 @@ struct GLStateManager { } } + void bindUBO(GLuint handle) { + if (boundUBO != handle) { + boundUBO = handle; + glBindBuffer(GL_UNIFORM_BUFFER, boundUBO); + } + } + void bindVAO(const OpenGL::VertexArray& vao) { bindVAO(vao.handle()); } void bindVBO(const OpenGL::VertexBuffer& vbo) { bindVBO(vbo.handle()); } void useProgram(const OpenGL::Program& program) { useProgram(program.handle()); } @@ -224,6 +235,41 @@ struct GLStateManager { } void setDepthFunc(OpenGL::DepthFunc func) { setDepthFunc(static_cast(func)); } + + // Counterpart to glBlendEquationSeparate + void setBlendEquation(GLenum modeRGB, GLenum modeAlpha) { + if (blendEquationRGB != modeRGB || blendEquationAlpha != modeAlpha) { + blendEquationRGB = modeRGB; + blendEquationAlpha = modeAlpha; + + glBlendEquationSeparate(modeRGB, modeAlpha); + } + } + + // Counterpart to glBlendFuncSeparate + void setBlendFunc(GLenum sourceRGB, GLenum destRGB, GLenum sourceAlpha, GLenum destAlpha) { + if (blendFuncSourceRGB != sourceRGB || blendFuncDestRGB != destRGB || blendFuncSourceAlpha != sourceAlpha || + blendFuncDestAlpha != destAlpha) { + + blendFuncSourceRGB = sourceRGB; + blendFuncDestRGB = destRGB; + blendFuncSourceAlpha = sourceAlpha; + blendFuncDestAlpha = destAlpha; + + glBlendFuncSeparate(sourceRGB, destRGB,sourceAlpha, destAlpha); + } + } + + // Counterpart to regular glBlendEquation + void setBlendEquation(GLenum mode) { setBlendEquation(mode, mode); } + + void setBlendEquation(OpenGL::BlendEquation modeRGB, OpenGL::BlendEquation modeAlpha) { + setBlendEquation(static_cast(modeRGB), static_cast(modeAlpha)); + } + + void setBlendEquation(OpenGL::BlendEquation mode) { + setBlendEquation(static_cast(mode)); + } }; static_assert(std::is_trivially_constructible(), "OpenGL State Manager class is not trivially constructible!"); diff --git a/src/core/renderer_gl/gl_state.cpp b/src/core/renderer_gl/gl_state.cpp index d2eec0d5..3d1c0681 100644 --- a/src/core/renderer_gl/gl_state.cpp +++ b/src/core/renderer_gl/gl_state.cpp @@ -5,9 +5,20 @@ void GLStateManager::resetBlend() { logicOpEnabled = false; logicOp = GL_COPY; + blendEquationRGB = GL_FUNC_ADD; + blendEquationAlpha = GL_FUNC_ADD; + + blendFuncSourceRGB = GL_SRC_COLOR; + blendFuncDestRGB = GL_DST_COLOR; + blendFuncSourceAlpha = GL_SRC_ALPHA; + blendFuncDestAlpha = GL_DST_ALPHA; + OpenGL::disableBlend(); OpenGL::disableLogicOp(); OpenGL::setLogicOp(GL_COPY); + + glBlendEquationSeparate(blendEquationRGB, blendEquationAlpha); + glBlendFuncSeparate(blendFuncSourceRGB, blendFuncDestRGB, blendFuncSourceAlpha, blendFuncDestAlpha); } void GLStateManager::resetClearing() { @@ -61,9 +72,12 @@ void GLStateManager::resetVAO() { glBindVertexArray(0); } -void GLStateManager::resetVBO() { +void GLStateManager::resetBuffers() { boundVBO = 0; + boundUBO = 0; + glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindBuffer(GL_UNIFORM_BUFFER, 0); } void GLStateManager::resetProgram() { @@ -79,7 +93,7 @@ void GLStateManager::reset() { resetDepth(); resetVAO(); - resetVBO(); + resetBuffers(); resetProgram(); resetScissor(); resetStencil(); diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 97b642c7..b9a2c7ae 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -229,8 +229,8 @@ void RendererGL::setupBlending() { OpenGL::setBlendColor(float(r) / 255.f, float(g) / 255.f, float(b) / 255.f, float(a) / 255.f); // Translate equations and funcs to their GL equivalents and set them - glBlendEquationSeparate(blendingEquations[rgbEquation], blendingEquations[alphaEquation]); - glBlendFuncSeparate(blendingFuncs[rgbSourceFunc], blendingFuncs[rgbDestFunc], blendingFuncs[alphaSourceFunc], blendingFuncs[alphaDestFunc]); + gl.setBlendEquation(blendingEquations[rgbEquation], blendingEquations[alphaEquation]); + gl.setBlendFunc(blendingFuncs[rgbSourceFunc], blendingFuncs[rgbDestFunc], blendingFuncs[alphaSourceFunc], blendingFuncs[alphaDestFunc]); } } @@ -821,7 +821,7 @@ OpenGL::Program& RendererGL::getSpecializedShader() { // Allocate memory for the program UBO glGenBuffers(1, &programEntry.uboBinding); - glBindBuffer(GL_UNIFORM_BUFFER, programEntry.uboBinding); + gl.bindUBO(programEntry.uboBinding); glBufferData(GL_UNIFORM_BUFFER, sizeof(PICA::FragmentUniforms), nullptr, GL_DYNAMIC_DRAW); // Set up the binding for our UBO. Sadly we can't specify it in the shader like normal people, @@ -869,7 +869,7 @@ OpenGL::Program& RendererGL::getSpecializedShader() { vec[3] = float((color >> 24) & 0xFF) / 255.0f; } - glBindBuffer(GL_UNIFORM_BUFFER, programEntry.uboBinding); + gl.bindUBO(programEntry.uboBinding); glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(PICA::FragmentUniforms), &uniforms); return program; From aad7bb817eaf92a2632315b580ac9508d105fac5 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 17 Jul 2024 02:25:38 +0300 Subject: [PATCH 081/251] Add setting for ubershaders --- include/config.hpp | 3 +++ include/renderer.hpp | 2 ++ include/renderer_gl/renderer_gl.hpp | 4 +++- src/config.cpp | 4 ++++ src/core/PICA/gpu.cpp | 1 + src/libretro_core.cpp | 2 ++ 6 files changed, 15 insertions(+), 1 deletion(-) diff --git a/include/config.hpp b/include/config.hpp index 6dbae9e3..8aa695aa 100644 --- a/include/config.hpp +++ b/include/config.hpp @@ -13,8 +13,11 @@ struct EmulatorConfig { static constexpr bool shaderJitDefault = false; #endif + static constexpr bool ubershaderDefault = true; + bool shaderJitEnabled = shaderJitDefault; bool discordRpcEnabled = false; + bool useUbershaders = ubershaderDefault; bool accurateShaderMul = false; RendererType rendererType = RendererType::OpenGL; Audio::DSPCore::Type dspType = Audio::DSPCore::Type::Null; diff --git a/include/renderer.hpp b/include/renderer.hpp index 17812bcf..e64d49e3 100644 --- a/include/renderer.hpp +++ b/include/renderer.hpp @@ -74,6 +74,8 @@ class Renderer { virtual std::string getUbershader() { return ""; } virtual void setUbershader(const std::string& shader) {} + virtual void setUbershaderSetting(bool value) {} + // Functions for initializing the graphics context for the Qt frontend, where we don't have the convenience of SDL_Window #ifdef PANDA3DS_FRONTEND_QT virtual void initGraphicsContext(GL::Context* context) { Helpers::panic("Tried to initialize incompatible renderer with GL context"); } diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 55a730ec..6414a7cf 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -30,6 +30,7 @@ class RendererGL final : public Renderer { OpenGL::VertexArray vao; OpenGL::VertexBuffer vbo; + bool usingUbershader = true; // Data struct { @@ -56,7 +57,6 @@ class RendererGL final : public Renderer { SurfaceCache depthBufferCache; SurfaceCache colourBufferCache; SurfaceCache textureCache; - bool usingUbershader = false; // Dummy VAO/VBO for blitting the final output OpenGL::VertexArray dummyVAO; @@ -107,6 +107,8 @@ class RendererGL final : public Renderer { virtual std::string getUbershader() override; virtual void setUbershader(const std::string& shader) override; + virtual void setUbershaderSetting(bool value) override { usingUbershader = value; } + std::optional getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true); // Note: The caller is responsible for deleting the currently bound FBO before calling this diff --git a/src/config.cpp b/src/config.cpp index 5af4d654..cc34d148 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -62,6 +62,7 @@ void EmulatorConfig::load() { shaderJitEnabled = toml::find_or(gpu, "EnableShaderJIT", shaderJitDefault); vsyncEnabled = toml::find_or(gpu, "EnableVSync", true); + useUbershaders = toml::find_or(gpu, "UseUbershaders", ubershaderDefault); accurateShaderMul = toml::find_or(gpu, "AccurateShaderMultiplication", false); } } @@ -123,10 +124,13 @@ void EmulatorConfig::save() { data["General"]["EnableDiscordRPC"] = discordRpcEnabled; data["General"]["UsePortableBuild"] = usePortableBuild; data["General"]["DefaultRomPath"] = defaultRomPath.string(); + data["GPU"]["EnableShaderJIT"] = shaderJitEnabled; data["GPU"]["Renderer"] = std::string(Renderer::typeToString(rendererType)); data["GPU"]["EnableVSync"] = vsyncEnabled; data["GPU"]["AccurateShaderMultiplication"] = accurateShaderMul; + data["GPU"]["UseUbershaders"] = useUbershaders; + data["Audio"]["DSPEmulation"] = std::string(Audio::DSPCore::typeToString(dspType)); data["Audio"]["EnableAudio"] = audioEnabled; diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index ed0e5420..a54fe6eb 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -110,6 +110,7 @@ void GPU::reset() { externalRegs[Framebuffer1Config] = static_cast(PICA::ColorFmt::RGB8); externalRegs[Framebuffer1Select] = 0; + renderer->setUbershaderSetting(config.useUbershaders); renderer->reset(); } diff --git a/src/libretro_core.cpp b/src/libretro_core.cpp index 3825d3ed..a6a1ff00 100644 --- a/src/libretro_core.cpp +++ b/src/libretro_core.cpp @@ -147,6 +147,7 @@ static void configInit() { static const retro_variable values[] = { {"panda3ds_use_shader_jit", "Enable shader JIT; enabled|disabled"}, {"panda3ds_accurate_shader_mul", "Enable accurate shader multiplication; disabled|enabled"}, + {"panda3ds_use_ubershader", "Use ubershaders (No stutter, maybe slower); enabled|disabled"}, {"panda3ds_use_vsync", "Enable VSync; enabled|disabled"}, {"panda3ds_dsp_emulation", "DSP emulation; Null|HLE|LLE"}, {"panda3ds_use_audio", "Enable audio; disabled|enabled"}, @@ -173,6 +174,7 @@ static void configUpdate() { config.sdCardInserted = FetchVariableBool("panda3ds_use_virtual_sd", true); config.sdWriteProtected = FetchVariableBool("panda3ds_write_protect_virtual_sd", false); config.accurateShaderMul = FetchVariableBool("panda3ds_accurate_shader_mul", false); + config.useUbershaders = FetchVariableBool("panda3ds_use_ubershader", true); config.discordRpcEnabled = false; config.save(); From cb0e69847cc8f3eccf38f56481ea2e96151911c3 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 17 Jul 2024 21:17:54 +0300 Subject: [PATCH 082/251] Hotfix UBO binding --- src/core/PICA/shader_gen_glsl.cpp | 2 +- src/core/renderer_gl/renderer_gl.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index e135ac8e..9b467e63 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -175,7 +175,7 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { ret += "fragColor = combinerOutput;\n"; ret += "}"; // End of main function - ret += "\n\n\n\n\n\n\n"; + ret += "\n\n\n"; return ret; } diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index b9a2c7ae..0c33b898 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -828,8 +828,8 @@ OpenGL::Program& RendererGL::getSpecializedShader() { // As it's an OpenGL 4.2 feature that MacOS doesn't support... uint uboIndex = glGetUniformBlockIndex(program.handle(), "FragmentUniforms"); glUniformBlockBinding(program.handle(), uboIndex, uboBlockBinding); - glBindBufferBase(GL_UNIFORM_BUFFER, uboBlockBinding, programEntry.uboBinding); } + glBindBufferBase(GL_UNIFORM_BUFFER, uboBlockBinding, programEntry.uboBinding); // Upload uniform data to our shader's UBO PICA::FragmentUniforms uniforms; From d013582223d24619a302eca32cfa72f6284366ab Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 17 Jul 2024 21:53:19 +0300 Subject: [PATCH 083/251] Shadergen: Optimize caching --- include/PICA/pica_frag_config.hpp | 5 +++-- src/core/renderer_gl/renderer_gl.cpp | 20 +++++++++++++------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/include/PICA/pica_frag_config.hpp b/include/PICA/pica_frag_config.hpp index 59f13757..9e13b3b5 100644 --- a/include/PICA/pica_frag_config.hpp +++ b/include/PICA/pica_frag_config.hpp @@ -24,8 +24,9 @@ namespace PICA { u32 texUnitConfig; u32 texEnvUpdateBuffer; - // There's 6 TEV stages, and each one is configured via 5 word-sized registers - std::array tevConfigs; + // There's 6 TEV stages, and each one is configured via 4 word-sized registers + // (+ the constant color register, which we don't include here, otherwise we'd generate too many shaders) + std::array tevConfigs; }; // Config used for identifying unique fragment pipeline configurations diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 0c33b898..249d8484 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -793,13 +793,19 @@ OpenGL::Program& RendererGL::getSpecializedShader() { texConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg]; texConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer]; - // Set up TEV stages - std::memcpy(&texConfig.tevConfigs[0 * 5], ®s[InternalRegs::TexEnv0Source], 5 * sizeof(u32)); - std::memcpy(&texConfig.tevConfigs[1 * 5], ®s[InternalRegs::TexEnv1Source], 5 * sizeof(u32)); - std::memcpy(&texConfig.tevConfigs[2 * 5], ®s[InternalRegs::TexEnv2Source], 5 * sizeof(u32)); - std::memcpy(&texConfig.tevConfigs[3 * 5], ®s[InternalRegs::TexEnv3Source], 5 * sizeof(u32)); - std::memcpy(&texConfig.tevConfigs[4 * 5], ®s[InternalRegs::TexEnv4Source], 5 * sizeof(u32)); - std::memcpy(&texConfig.tevConfigs[5 * 5], ®s[InternalRegs::TexEnv5Source], 5 * sizeof(u32)); + // Set up TEV stages. Annoyingly we can't just memcpy as the TEV registers are arranged like + // {Source, Operand, Combiner, Color, Scale} and we want to skip the color register since it's uploaded via UBO +#define setupTevStage(stage) \ + std::memcpy(&texConfig.tevConfigs[stage * 4], ®s[InternalRegs::TexEnv##stage##Source], 3 * sizeof(u32)); \ + texConfig.tevConfigs[stage * 4 + 3] = regs[InternalRegs::TexEnv##stage##Source + 5]; + + setupTevStage(0); + setupTevStage(1); + setupTevStage(2); + setupTevStage(3); + setupTevStage(4); + setupTevStage(5); +#undef setupTevStage CachedProgram& programEntry = shaderCache[fsConfig]; OpenGL::Program& program = programEntry.program; From 0fc95ae8ef32aaad11e8d53bce392eb1f6aa531a Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 17 Jul 2024 21:54:56 +0300 Subject: [PATCH 084/251] Shadergen: Remove trailing newlines --- src/core/PICA/shader_gen_glsl.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 9b467e63..0877e5f2 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -173,9 +173,7 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { applyAlphaTest(ret, regs); - ret += "fragColor = combinerOutput;\n"; - ret += "}"; // End of main function - ret += "\n\n\n"; + ret += "fragColor = combinerOutput;\n}"; // End of main function return ret; } From 801d14e4635d35f27a60f390cc6542904b05668e Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 17 Jul 2024 18:56:57 +0000 Subject: [PATCH 085/251] Shadergen: Fix UBO uploads and optimize shader caching (#538) * Hotfix UBO binding * Shadergen: Optimize caching * Shadergen: Remove trailing newlines --- include/PICA/pica_frag_config.hpp | 5 +++-- src/core/PICA/shader_gen_glsl.cpp | 4 +--- src/core/renderer_gl/renderer_gl.cpp | 22 ++++++++++++++-------- 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/include/PICA/pica_frag_config.hpp b/include/PICA/pica_frag_config.hpp index 59f13757..9e13b3b5 100644 --- a/include/PICA/pica_frag_config.hpp +++ b/include/PICA/pica_frag_config.hpp @@ -24,8 +24,9 @@ namespace PICA { u32 texUnitConfig; u32 texEnvUpdateBuffer; - // There's 6 TEV stages, and each one is configured via 5 word-sized registers - std::array tevConfigs; + // There's 6 TEV stages, and each one is configured via 4 word-sized registers + // (+ the constant color register, which we don't include here, otherwise we'd generate too many shaders) + std::array tevConfigs; }; // Config used for identifying unique fragment pipeline configurations diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index e135ac8e..0877e5f2 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -173,9 +173,7 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { applyAlphaTest(ret, regs); - ret += "fragColor = combinerOutput;\n"; - ret += "}"; // End of main function - ret += "\n\n\n\n\n\n\n"; + ret += "fragColor = combinerOutput;\n}"; // End of main function return ret; } diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index b9a2c7ae..249d8484 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -793,13 +793,19 @@ OpenGL::Program& RendererGL::getSpecializedShader() { texConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg]; texConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer]; - // Set up TEV stages - std::memcpy(&texConfig.tevConfigs[0 * 5], ®s[InternalRegs::TexEnv0Source], 5 * sizeof(u32)); - std::memcpy(&texConfig.tevConfigs[1 * 5], ®s[InternalRegs::TexEnv1Source], 5 * sizeof(u32)); - std::memcpy(&texConfig.tevConfigs[2 * 5], ®s[InternalRegs::TexEnv2Source], 5 * sizeof(u32)); - std::memcpy(&texConfig.tevConfigs[3 * 5], ®s[InternalRegs::TexEnv3Source], 5 * sizeof(u32)); - std::memcpy(&texConfig.tevConfigs[4 * 5], ®s[InternalRegs::TexEnv4Source], 5 * sizeof(u32)); - std::memcpy(&texConfig.tevConfigs[5 * 5], ®s[InternalRegs::TexEnv5Source], 5 * sizeof(u32)); + // Set up TEV stages. Annoyingly we can't just memcpy as the TEV registers are arranged like + // {Source, Operand, Combiner, Color, Scale} and we want to skip the color register since it's uploaded via UBO +#define setupTevStage(stage) \ + std::memcpy(&texConfig.tevConfigs[stage * 4], ®s[InternalRegs::TexEnv##stage##Source], 3 * sizeof(u32)); \ + texConfig.tevConfigs[stage * 4 + 3] = regs[InternalRegs::TexEnv##stage##Source + 5]; + + setupTevStage(0); + setupTevStage(1); + setupTevStage(2); + setupTevStage(3); + setupTevStage(4); + setupTevStage(5); +#undef setupTevStage CachedProgram& programEntry = shaderCache[fsConfig]; OpenGL::Program& program = programEntry.program; @@ -828,8 +834,8 @@ OpenGL::Program& RendererGL::getSpecializedShader() { // As it's an OpenGL 4.2 feature that MacOS doesn't support... uint uboIndex = glGetUniformBlockIndex(program.handle(), "FragmentUniforms"); glUniformBlockBinding(program.handle(), uboIndex, uboBlockBinding); - glBindBufferBase(GL_UNIFORM_BUFFER, uboBlockBinding, programEntry.uboBinding); } + glBindBufferBase(GL_UNIFORM_BUFFER, uboBlockBinding, programEntry.uboBinding); // Upload uniform data to our shader's UBO PICA::FragmentUniforms uniforms; From 2ca886f64f56f47fffc9b4125458b728352c9e7e Mon Sep 17 00:00:00 2001 From: offtkp Date: Wed, 17 Jul 2024 22:08:48 +0300 Subject: [PATCH 086/251] Move normal calculation to the fragment shader --- src/host_shaders/opengl_fragment_shader.frag | 30 +++++++++++++++----- src/host_shaders/opengl_vertex_shader.vert | 16 ++--------- 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag index ae43d993..582d6eef 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -1,8 +1,6 @@ #version 410 core -in vec3 v_tangent; -in vec3 v_normal; -in vec3 v_bitangent; +in vec4 v_quaternion; in vec4 v_colour; in vec3 v_texcoord0; in vec2 v_texcoord1; @@ -37,6 +35,7 @@ uint readPicaReg(uint reg_addr) { return u_picaRegs[reg_addr - 0x48u]; } vec4 tevSources[16]; vec4 tevNextPreviousBuffer; bool tevUnimplementedSourceFlag = false; +vec3 normal; // Holds the enabled state of the lighting samples for various PICA configurations // As explained in https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTING_CONFIG0 @@ -255,7 +254,7 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3); switch (input_id) { case 0u: { - delta = dot(v_normal, normalize(half_vector)); + delta = dot(normal, normalize(half_vector)); break; } case 1u: { @@ -263,11 +262,11 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light break; } case 2u: { - delta = dot(v_normal, normalize(v_view)); + delta = dot(normal, normalize(v_view)); break; } case 3u: { - delta = dot(light_vector, v_normal); + delta = dot(light_vector, normal); break; } case 4u: { @@ -313,6 +312,12 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light } } +vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) { + vec3 u = q.xyz; + float s = q.w; + return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v); +} + // Implements the following algorthm: https://mathb.in/26766 void calcLighting(out vec4 primary_color, out vec4 secondary_color) { error_unimpl = false; @@ -336,6 +341,17 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u); GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u); + uint bump_mode = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 28, 2); + + // Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker + // Could be because the texture is not sampled correctly, may need the clamp/border color configurations + switch (bump_mode) { + default: { + normal = rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), v_quaternion); + break; + } + } + vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0); vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0); @@ -377,7 +393,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { light_vector = normalize(light_vector); half_vector = light_vector + normalize(v_view); - float NdotL = dot(v_normal, light_vector); // N dot Li + float NdotL = dot(normal, light_vector); // N dot Li // Two sided diffuse if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) diff --git a/src/host_shaders/opengl_vertex_shader.vert b/src/host_shaders/opengl_vertex_shader.vert index a25d7a6d..057f9a88 100644 --- a/src/host_shaders/opengl_vertex_shader.vert +++ b/src/host_shaders/opengl_vertex_shader.vert @@ -9,9 +9,7 @@ layout(location = 5) in float a_texcoord0_w; layout(location = 6) in vec3 a_view; layout(location = 7) in vec2 a_texcoord2; -out vec3 v_normal; -out vec3 v_tangent; -out vec3 v_bitangent; +out vec4 v_quaternion; out vec4 v_colour; out vec3 v_texcoord0; out vec2 v_texcoord1; @@ -35,12 +33,6 @@ vec4 abgr8888ToVec4(uint abgr) { return scale * vec4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24)); } -vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) { - vec3 u = q.xyz; - float s = q.w; - return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v); -} - // Convert an arbitrary-width floating point literal to an f32 float decodeFP(uint hex, uint E, uint M) { uint width = M + E + 1u; @@ -73,10 +65,6 @@ void main() { v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y); v_view = a_view; - v_normal = normalize(rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), a_quaternion)); - v_tangent = normalize(rotateVec3ByQuaternion(vec3(1.0, 0.0, 0.0), a_quaternion)); - v_bitangent = normalize(rotateVec3ByQuaternion(vec3(0.0, 1.0, 0.0), a_quaternion)); - for (int i = 0; i < 6; i++) { v_textureEnvColor[i] = abgr8888ToVec4(u_textureEnvColor[i]); } @@ -95,4 +83,6 @@ void main() { // There's also another, always-on clipping plane based on vertex z gl_ClipDistance[0] = -a_coords.z; gl_ClipDistance[1] = dot(clipData, a_coords); + + v_quaternion = a_quaternion; } From ed00ddc8058aaeccd00b5cdb3f640759e13e9111 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Thu, 18 Jul 2024 00:55:57 +0300 Subject: [PATCH 087/251] Improve lighting register definitions --- include/PICA/pica_frag_config.hpp | 160 ++++++++++++++++++++++++++- include/PICA/regs.hpp | 12 +- include/PICA/shader_gen.hpp | 3 +- src/core/PICA/shader_gen_glsl.cpp | 2 +- src/core/renderer_gl/renderer_gl.cpp | 4 +- 5 files changed, 175 insertions(+), 6 deletions(-) diff --git a/include/PICA/pica_frag_config.hpp b/include/PICA/pica_frag_config.hpp index 9e13b3b5..cfb22e5c 100644 --- a/include/PICA/pica_frag_config.hpp +++ b/include/PICA/pica_frag_config.hpp @@ -29,20 +29,178 @@ namespace PICA { std::array tevConfigs; }; + struct Light { + union { + u16 raw; + BitField<0, 3, u16> num; + BitField<3, 1, u16> directional; + BitField<4, 1, u16> twoSidedDiffuse; + BitField<5, 1, u16> distanceAttenuationEnable; + BitField<6, 1, u16> spotAttenuationEnable; + BitField<7, 1, u16> geometricFactor0; + BitField<8, 1, u16> geometricFactor1; + BitField<9, 1, u16> shadowEnable; + }; + }; + + struct LightingLUTConfig { + union { + u32 raw; + BitField<0, 1, u32> enable; + BitField<1, 1, u32> absInput; + BitField<2, 3, u32> type; + }; + float scale; + }; + + struct LightingConfig { + union { + u32 raw{}; + BitField<0, 1, u32> enable; + BitField<1, 4, u32> lightNum; + BitField<5, 2, u32> bumpMode; + BitField<7, 2, u32> bumpSelector; + BitField<9, 1, u32> bumpRenorm; + BitField<10, 1, u32> clampHighlights; + BitField<11, 4, u32> config; + BitField<15, 1, u32> enablePrimaryAlpha; + BitField<16, 1, u32> enableSecondaryAlpha; + BitField<17, 1, u32> enableShadow; + BitField<18, 1, u32> shadowPrimary; + BitField<19, 1, u32> shadowSecondary; + BitField<20, 1, u32> shadowInvert; + BitField<21, 1, u32> shadowAlpha; + BitField<22, 2, u32> shadowSelector; + }; + + LightingLUTConfig d0{}; + LightingLUTConfig d1{}; + LightingLUTConfig sp{}; + LightingLUTConfig fr{}; + LightingLUTConfig rr{}; + LightingLUTConfig rg{}; + LightingLUTConfig rb{}; + + std::array lights{}; + + LightingConfig(const std::array& regs) { + // Ignore lighting registers if it's disabled + if ((regs[InternalRegs::LightingEnable] & 1) == 0) { + return; + } + + const u32 config0 = regs[InternalRegs::LightConfig0]; + const u32 config1 = regs[InternalRegs::LightConfig1]; + const u32 totalLightCount = Helpers::getBits<0, 3>(regs[InternalRegs::LightNumber]) + 1; + + enable = 1; + lightNum = totalLightCount; + + enableShadow = Helpers::getBit<0>(config0); + if (enableShadow) [[unlikely]] { + shadowPrimary = Helpers::getBit<16>(config0); + shadowSecondary = Helpers::getBit<17>(config0); + shadowInvert = Helpers::getBit<18>(config0); + shadowAlpha = Helpers::getBit<19>(config0); + shadowSelector = Helpers::getBits<24, 2>(config0); + } + + enablePrimaryAlpha = Helpers::getBit<2>(config0); + enableSecondaryAlpha = Helpers::getBit<3>(config0); + config = Helpers::getBits<4, 4>(config0); + + bumpSelector = Helpers::getBits<22, 2>(config0); + clampHighlights = Helpers::getBit<27>(config0); + bumpMode = Helpers::getBits<28, 2>(config0); + bumpRenorm = Helpers::getBit<30>(config0) ^ 1; // 0 = enable so flip it with xor + + for (int i = 0; i < totalLightCount; i++) { + auto& light = lights[i]; + const u32 lightConfig = 0x149 + 0x10 * i; + + light.num = (regs[InternalRegs::LightPermutation] >> (i * 4)) & 0x7; + light.directional = Helpers::getBit<0>(lightConfig); + light.twoSidedDiffuse = Helpers::getBit<1>(lightConfig); + light.geometricFactor0 = Helpers::getBit<2>(lightConfig); + light.geometricFactor1 = Helpers::getBit<3>(lightConfig); + + light.shadowEnable = ((config1 >> i) & 1) ^ 1; // This also does 0 = enabled + light.spotAttenuationEnable = ((config1 >> (8 + i)) & 1) ^ 1; // Same here + light.distanceAttenuationEnable = ((config1 >> (24 + i)) & 1) ^ 1; // Of course same here + } + + d0.enable = Helpers::getBit<16>(config1) == 0; + d1.enable = Helpers::getBit<17>(config1) == 0; + fr.enable = Helpers::getBit<19>(config1) == 0; + rb.enable = Helpers::getBit<20>(config1) == 0; + rg.enable = Helpers::getBit<21>(config1) == 0; + rr.enable = Helpers::getBit<22>(config1) == 0; + sp.enable = 1; + + const u32 lutAbs = regs[InternalRegs::LightLUTAbs]; + const u32 lutSelect = regs[InternalRegs::LightLUTSelect]; + const u32 lutScale = regs[InternalRegs::LightLUTScale]; + static constexpr float scales[] = {1.0f, 2.0f, 4.0f, 8.0f, 0.25f, 0.5f}; + + if (d0.enable) { + d0.absInput = Helpers::getBit<1>(lutAbs) == 0; + d0.type = Helpers::getBits<0, 3>(lutSelect); + d0.scale = scales[Helpers::getBits<0, 3>(lutScale)]; + } + + if (d1.enable) { + d1.absInput = Helpers::getBit<5>(lutAbs) == 0; + d1.type = Helpers::getBits<4, 3>(lutSelect); + d1.scale = scales[Helpers::getBits<4, 3>(lutScale)]; + } + + sp.absInput = Helpers::getBit<9>(lutAbs) == 0; + sp.type = Helpers::getBits<8, 3>(lutSelect); + sp.scale = scales[Helpers::getBits<8, 3>(lutScale)]; + + if (fr.enable) { + fr.absInput = Helpers::getBit<13>(lutAbs) == 0; + fr.type = Helpers::getBits<12, 3>(lutSelect); + fr.scale = scales[Helpers::getBits<12, 3>(lutScale)]; + } + + if (rb.enable) { + rb.absInput = Helpers::getBit<17>(lutAbs) == 0; + rb.type = Helpers::getBits<16, 3>(lutSelect); + rb.scale = scales[Helpers::getBits<16, 3>(lutScale)]; + } + + if (rg.enable) { + rg.absInput = Helpers::getBit<21>(lutAbs) == 0; + rg.type = Helpers::getBits<20, 3>(lutSelect); + rg.scale = scales[Helpers::getBits<20, 3>(lutScale)]; + } + + if (rr.enable) { + rr.absInput = Helpers::getBit<25>(lutAbs) == 0; + rr.type = Helpers::getBits<24, 3>(lutSelect); + rr.scale = scales[Helpers::getBits<24, 3>(lutScale)]; + } + } + }; + // Config used for identifying unique fragment pipeline configurations struct FragmentConfig { OutputConfig outConfig; TextureConfig texConfig; + LightingConfig lighting; bool operator==(const FragmentConfig& config) const { // Hash function and equality operator required by std::unordered_map return std::memcmp(this, &config, sizeof(FragmentConfig)) == 0; } + + FragmentConfig(const std::array& regs) : lighting(regs) {} }; static_assert( std::has_unique_object_representations() && std::has_unique_object_representations() && - std::has_unique_object_representations() + std::has_unique_object_representations() ); } // namespace PICA diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index 74f8c7d5..312ac78b 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -67,7 +67,17 @@ namespace PICA { ColourBufferLoc = 0x11D, FramebufferSize = 0x11E, - //LightingRegs + + // Lighting registers + LightingEnable = 0x8F, + LightNumber = 0x1C2, + LightConfig0 = 0x1C3, + LightConfig1 = 0x1C4, + LightPermutation = 0x1D9, + LightLUTAbs = 0x1D0, + LightLUTSelect = 0x1D1, + LightLUTScale = 0x1D2, + LightingLUTIndex = 0x01C5, LightingLUTData0 = 0x01C8, LightingLUTData1 = 0x01C9, diff --git a/include/PICA/shader_gen.hpp b/include/PICA/shader_gen.hpp index e8e8ca20..0a6bca8e 100644 --- a/include/PICA/shader_gen.hpp +++ b/include/PICA/shader_gen.hpp @@ -2,6 +2,7 @@ #include #include "PICA/gpu.hpp" +#include "PICA/pica_frag_config.hpp" #include "PICA/regs.hpp" #include "helpers.hpp" @@ -30,7 +31,7 @@ namespace PICA::ShaderGen { public: FragmentGenerator(API api, Language language) : api(api), language(language) {} - std::string generate(const PICARegs& regs); + std::string generate(const PICARegs& regs, const PICA::FragmentConfig& config); std::string getVertexShader(const PICARegs& regs); void setTarget(API api, Language language) { diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 0877e5f2..5dbc3b81 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -92,7 +92,7 @@ std::string FragmentGenerator::getVertexShader(const PICARegs& regs) { return ret; } -std::string FragmentGenerator::generate(const PICARegs& regs) { +std::string FragmentGenerator::generate(const PICARegs& regs, const FragmentConfig& config) { std::string ret = ""; switch (api) { diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 249d8484..b85e7689 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -780,7 +780,7 @@ std::optional RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt OpenGL::Program& RendererGL::getSpecializedShader() { constexpr uint uboBlockBinding = 2; - PICA::FragmentConfig fsConfig; + PICA::FragmentConfig fsConfig(regs); auto& outConfig = fsConfig.outConfig; auto& texConfig = fsConfig.texConfig; @@ -812,7 +812,7 @@ OpenGL::Program& RendererGL::getSpecializedShader() { if (!program.exists()) { std::string vs = fragShaderGen.getVertexShader(regs); - std::string fs = fragShaderGen.generate(regs); + std::string fs = fragShaderGen.generate(regs, fsConfig); OpenGL::Shader vertShader({vs.c_str(), vs.size()}, OpenGL::Vertex); OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment); From 7e7856fa4440b8584895a3ba4ce77e586d62f400 Mon Sep 17 00:00:00 2001 From: offtkp Date: Thu, 18 Jul 2024 02:51:08 +0300 Subject: [PATCH 088/251] Pack sampler configurations in bitfields instead of bool arrays --- src/host_shaders/opengl_fragment_shader.frag | 49 +++++++++++++------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag index 582d6eef..23c5c4cb 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -39,18 +39,37 @@ vec3 normal; // Holds the enabled state of the lighting samples for various PICA configurations // As explained in https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTING_CONFIG0 -const bool samplerEnabled[9 * 7] = bool[9 * 7]( - // D0 D1 SP FR RB RG RR - true, false, true, false, false, false, true, // Configuration 0: D0, SP, RR - false, false, true, true, false, false, true, // Configuration 1: FR, SP, RR - true, true, false, false, false, false, true, // Configuration 2: D0, D1, RR - true, true, false, true, false, false, false, // Configuration 3: D0, D1, FR - true, true, true, false, true, true, true, // Configuration 4: All except for FR - true, false, true, true, true, true, true, // Configuration 5: All except for D1 - true, true, true, true, false, false, true, // Configuration 6: All except for RB and RG - false, false, false, false, false, false, false, // Configuration 7: Unused - true, true, true, true, true, true, true // Configuration 8: All -); +// const bool samplerEnabled[9 * 7] = bool[9 * 7]( +// // D0 D1 SP FR RB RG RR +// true, false, true, false, false, false, true, // Configuration 0: D0, SP, RR +// false, false, true, true, false, false, true, // Configuration 1: FR, SP, RR +// true, true, false, false, false, false, true, // Configuration 2: D0, D1, RR +// true, true, false, true, false, false, false, // Configuration 3: D0, D1, FR +// true, true, true, false, true, true, true, // Configuration 4: All except for FR +// true, false, true, true, true, true, true, // Configuration 5: All except for D1 +// true, true, true, true, false, false, true, // Configuration 6: All except for RB and RG +// false, false, false, false, false, false, false, // Configuration 7: Unused +// true, true, true, true, true, true, true // Configuration 8: All +// ); + +// The above have been condensed to two uints to save space +// You can confirm they are the same by running the following: +// for (int i = 0; i < 9 * 7; i++) { +// unsigned arrayIndex = (i >> 5); +// bool b = (samplerEnabledBitfields[arrayIndex] & (1u << (i & 31))) != 0u; +// if (samplerEnabled[i] == b) { +// printf("%d: happy\n", i); +// } else { +// printf("%d: unhappy\n", i); +// } +// } +const uint samplerEnabledBitfields[2] = uint[2](0x7170e645u, 0x7f013fefu); + +bool isSamplerEnabled(uint environment_id, uint lut_id) { + uint index = 7 * environment_id + lut_id; + uint arrayIndex = (index >> 5); + return (samplerEnabledBitfields[arrayIndex] & (1u << (index & 31))) != 0u; +} // OpenGL ES 1.1 reference pages for TEVs (this is what the PICA200 implements): // https://registry.khronos.org/OpenGL-Refpages/es1.1/xhtml/glTexEnv.xml @@ -198,10 +217,6 @@ float decodeFP(uint hex, uint E, uint M) { return uintBitsToFloat(hex); } -bool isSamplerEnabled(uint environment_id, uint lut_id) { - return samplerEnabled[7 * environment_id + lut_id]; -} - float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light_vector, vec3 half_vector) { uint lut_index; // lut_id is one of these values @@ -485,7 +500,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { secondary_color = clamp(specular_sum, vec4(0.0), vec4(1.0)); if (error_unimpl) { - secondary_color = primary_color = unimpl_color; + // secondary_color = primary_color = unimpl_color; } } From b51e2fd25f4a983c93a8781fb4847f8b26409d2b Mon Sep 17 00:00:00 2001 From: offtkp Date: Thu, 18 Jul 2024 02:53:54 +0300 Subject: [PATCH 089/251] Update gles.patch --- .github/gles.patch | 81 ++++++++++++++++++++++++++-------------------- 1 file changed, 46 insertions(+), 35 deletions(-) diff --git a/.github/gles.patch b/.github/gles.patch index f5270518..99258011 100644 --- a/.github/gles.patch +++ b/.github/gles.patch @@ -21,7 +21,7 @@ index 990e2f80..2e7842ac 100644 void main() { diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag -index 1b8e9751..96238000 100644 +index 23c5c4cb..a9851a8b 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -1,4 +1,5 @@ @@ -29,13 +29,13 @@ index 1b8e9751..96238000 100644 +#version 300 es +precision mediump float; - in vec3 v_tangent; - in vec3 v_normal; -@@ -171,11 +172,17 @@ float lutLookup(uint lut, int index) { + in vec4 v_quaternion; + in vec4 v_colour; +@@ -189,11 +190,17 @@ float lutLookup(uint lut, int index) { return texelFetch(u_tex_lighting_lut, ivec2(index, lut), 0).r; } -+// some gles versions have bitfieldExtract and complain if you redefine it, some don't and compile error, using this instead ++// some gles versions have bitfieldExtractCompat and complain if you redefine it, some don't and compile error, using this instead +uint bitfieldExtractCompat(uint val, int off, int size) { + uint mask = uint((1 << size) - 1); + return uint(val >> off) & mask; @@ -50,7 +50,7 @@ index 1b8e9751..96238000 100644 } // Convert an arbitrary-width floating point literal to an f32 -@@ -243,16 +250,16 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light +@@ -257,16 +264,16 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light // If RR is enabled but not RG or RB, the output of RR is used for the three components; Red, Green and Blue. bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment @@ -59,25 +59,25 @@ index 1b8e9751..96238000 100644 return 1.0; } -- uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) * 4, 3); -+ uint scale_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) * 4, 3); +- uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3); ++ uint scale_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3); float scale = float(1u << scale_id); if (scale_id >= 6u) scale /= 256.0; float delta = 1.0; -- uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) * 4, 3); -+ uint input_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) * 4, 3); +- uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3); ++ uint input_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3); switch (input_id) { case 0u: { - delta = dot(v_normal, normalize(half_vector)); -@@ -271,14 +278,14 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light + delta = dot(normal, normalize(half_vector)); +@@ -285,14 +292,14 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light break; } case 4u: { - // These are ints so that bitfieldExtract sign extends for us + // These are ints so that bitfieldExtractCompat sign extends for us - int GPUREG_LIGHTi_SPOTDIR_LOW = int(readPicaReg(0x0146u + 0x10u * light_id)); - int GPUREG_LIGHTi_SPOTDIR_HIGH = int(readPicaReg(0x0147u + 0x10u * light_id)); + int GPUREG_LIGHTi_SPOTDIR_LOW = int(readPicaReg(0x0146u + (light_id << 4u))); + int GPUREG_LIGHTi_SPOTDIR_HIGH = int(readPicaReg(0x0147u + (light_id << 4u))); // These are fixed point 1.1.11 values, so we need to convert them to float - float x = float(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13)) / 2047.0; @@ -89,19 +89,19 @@ index 1b8e9751..96238000 100644 vec3 spotlight_vector = vec3(x, y, z); delta = dot(light_vector, spotlight_vector); // spotlight direction is negated so we don't negate light_vector break; -@@ -296,9 +303,9 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light +@@ -310,9 +317,9 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light } // 0 = enabled -- if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + 4 * int(lut_id), 1) == 0u) { -+ if (bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + 4 * int(lut_id), 1) == 0u) { +- if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) { ++ if (bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) { // Two sided diffuse - if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) { + if (bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) { delta = max(delta, 0.0); } else { delta = abs(delta); -@@ -319,7 +326,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -339,7 +346,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { unimpl_color = vec4(1.0, 0.0, 1.0, 1.0); uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008Fu); @@ -110,7 +110,16 @@ index 1b8e9751..96238000 100644 primary_color = secondary_color = vec4(0.0); return; } -@@ -339,15 +346,15 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -356,7 +363,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u); + GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u); + +- uint bump_mode = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 28, 2); ++ uint bump_mode = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 28, 2); + + // Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker + // Could be because the texture is not sampled correctly, may need the clamp/border color configurations +@@ -370,15 +377,15 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0); vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0); @@ -124,12 +133,12 @@ index 1b8e9751..96238000 100644 vec3 half_vector; for (uint i = 0u; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) { -- light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3); -+ light_id = bitfieldExtractCompat(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3); +- light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3); ++ light_id = bitfieldExtractCompat(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3); - uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + 0x10u * light_id); - uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + 0x10u * light_id); -@@ -359,12 +366,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + (light_id << 4u)); + uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + (light_id << 4u)); +@@ -390,12 +397,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { float light_distance; vec3 light_position = vec3( @@ -145,8 +154,8 @@ index 1b8e9751..96238000 100644 light_vector = light_position + v_view; } -@@ -380,14 +387,14 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - float NdotL = dot(v_normal, light_vector); // N dot Li +@@ -411,14 +418,14 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + float NdotL = dot(normal, light_vector); // N dot Li // Two sided diffuse - if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) @@ -163,20 +172,20 @@ index 1b8e9751..96238000 100644 if (use_geo_0 || use_geo_1) { geometric_factor = dot(half_vector, half_vector); geometric_factor = geometric_factor == 0.0 ? 0.0 : min(NdotL / geometric_factor, 1.0); -@@ -399,9 +406,9 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -430,9 +437,9 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { // fragment and the distance attenuation scale and bias to calculate where in the LUT to look up. // See: https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTi_ATTENUATION_SCALE float distance_attenuation = 1.0; - if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) { -- uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtract(readPicaReg(0x014Au), 0, 20); -- uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtract(readPicaReg(0x014Bu), 0, 20); +- uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtract(readPicaReg(0x014Au + (light_id << 4u)), 0, 20); +- uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtract(readPicaReg(0x014Bu + (light_id << 4u)), 0, 20); + if (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) { -+ uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtractCompat(readPicaReg(0x014Au), 0, 20); -+ uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtractCompat(readPicaReg(0x014Bu), 0, 20); ++ uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtractCompat(readPicaReg(0x014Au + (light_id << 4u)), 0, 20); ++ uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtractCompat(readPicaReg(0x014Bu + (light_id << 4u)), 0, 20); float distance_attenuation_bias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u); float distance_attenuation_scale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u); -@@ -446,8 +453,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -477,8 +484,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { specular_sum.rgb += light_factor * clamp_factor * (specular0 + specular1); } @@ -188,7 +197,7 @@ index 1b8e9751..96238000 100644 float fresnel_factor; diff --git a/src/host_shaders/opengl_vertex_shader.vert b/src/host_shaders/opengl_vertex_shader.vert -index a25d7a6d..7cf40398 100644 +index 057f9a88..dc735ced 100644 --- a/src/host_shaders/opengl_vertex_shader.vert +++ b/src/host_shaders/opengl_vertex_shader.vert @@ -1,4 +1,6 @@ @@ -199,7 +208,7 @@ index a25d7a6d..7cf40398 100644 layout(location = 0) in vec4 a_coords; layout(location = 1) in vec4 a_quaternion; -@@ -20,7 +22,7 @@ out vec2 v_texcoord2; +@@ -18,7 +20,7 @@ out vec2 v_texcoord2; flat out vec4 v_textureEnvColor[6]; flat out vec4 v_textureEnvBufferColor; @@ -208,7 +217,7 @@ index a25d7a6d..7cf40398 100644 // TEV uniforms uniform uint u_textureEnvColor[6]; -@@ -93,6 +95,6 @@ void main() { +@@ -81,8 +83,8 @@ void main() { ); // There's also another, always-on clipping plane based on vertex z @@ -216,6 +225,8 @@ index a25d7a6d..7cf40398 100644 - gl_ClipDistance[1] = dot(clipData, a_coords); + // gl_ClipDistance[0] = -a_coords.z; + // gl_ClipDistance[1] = dot(clipData, a_coords); + + v_quaternion = a_quaternion; } diff --git a/third_party/opengl/opengl.hpp b/third_party/opengl/opengl.hpp index 9997e63b..5d9d7804 100644 From ccf9693877a649ccb9c2b891b37b36b54d04ffbb Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Thu, 18 Jul 2024 02:57:41 +0300 Subject: [PATCH 090/251] Shadergen: More lighting work --- include/PICA/pica_frag_uniforms.hpp | 23 ++++++++++++++++++- include/PICA/shader_gen.hpp | 1 + src/core/PICA/shader_gen_glsl.cpp | 34 ++++++++++++++++++++++++---- src/core/renderer_gl/renderer_gl.cpp | 4 ++++ 4 files changed, 57 insertions(+), 5 deletions(-) diff --git a/include/PICA/pica_frag_uniforms.hpp b/include/PICA/pica_frag_uniforms.hpp index 332acd4e..0122ae93 100644 --- a/include/PICA/pica_frag_uniforms.hpp +++ b/include/PICA/pica_frag_uniforms.hpp @@ -1,12 +1,27 @@ #pragma once #include +#include #include #include "helpers.hpp" namespace PICA { - struct FragmentUniforms { + struct LightUniform { using vec3 = std::array; + + // std140 requires vec3s be aligned to 16 bytes + alignas(16) vec3 specular0; + alignas(16) vec3 specular1; + alignas(16) vec3 diffuse; + alignas(16) vec3 ambient; + alignas(16) vec3 position; + alignas(16) vec3 spotlightDirection; + + float distAttenuationBias; + float distanceAttenuationScale; + }; + + struct FragmentUniforms { using vec4 = std::array; static constexpr usize tevStageCount = 6; @@ -17,5 +32,11 @@ namespace PICA { alignas(16) vec4 constantColors[tevStageCount]; alignas(16) vec4 tevBufferColor; alignas(16) vec4 clipCoords; + + // NOTE: THIS MUST BE LAST so that if lighting is disabled we can potentially omit uploading it + LightUniform lightUniforms[8]; }; + + // Assert that lightUniforms is the last member of the structure + static_assert(offsetof(FragmentUniforms, lightUniforms) + 8 * sizeof(LightUniform) == sizeof(FragmentUniforms)); } // namespace PICA \ No newline at end of file diff --git a/include/PICA/shader_gen.hpp b/include/PICA/shader_gen.hpp index 0a6bca8e..21d85d98 100644 --- a/include/PICA/shader_gen.hpp +++ b/include/PICA/shader_gen.hpp @@ -26,6 +26,7 @@ namespace PICA::ShaderGen { void getAlphaOperation(std::string& shader, PICA::TexEnvConfig::Operation op); void applyAlphaTest(std::string& shader, const PICARegs& regs); + void compileLights(std::string& shader, const PICA::FragmentConfig& config); u32 textureConfig = 0; diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 5dbc3b81..9c319780 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -3,6 +3,17 @@ using namespace PICA; using namespace PICA::ShaderGen; static constexpr const char* uniformDefinition = R"( + struct LightSource { + vec3 specular0; + vec3 specular1; + vec3 diffuse; + vec3 ambient; + vec3 position; + vec3 spotlightDirection; + float distanceAttenuationBias; + float distanceAttenuationScale; + }; + layout(std140) uniform FragmentUniforms { int alphaReference; float depthScale; @@ -11,6 +22,8 @@ static constexpr const char* uniformDefinition = R"( vec4 constantColors[6]; vec4 tevBufferColor; vec4 clipCoords; + + LightSource lightSources[8]; }; )"; @@ -128,7 +141,7 @@ std::string FragmentGenerator::generate(const PICARegs& regs, const FragmentConf uniform sampler2D u_tex2; // GLES doesn't support sampler1DArray, as such we'll have to change how we handle lighting later #ifndef USING_GLES - uniform sampler1DArray u_tex_lighting_lut; + uniform sampler2D u_tex_lighting_lut; #endif )"; @@ -140,9 +153,14 @@ std::string FragmentGenerator::generate(const PICARegs& regs, const FragmentConf void main() { vec4 combinerOutput = v_colour; vec4 previousBuffer = vec4(0.0); - vec4 tevNextPreviousBuffer = tevBufferColor; + vec4 tevNextPreviousBuffer = tevBufferColor; + + vec4 primaryColor = vec4(0.0); + vec4 secondaryColor = vec4(0.0); )"; + compileLights(ret, config); + ret += R"( vec3 colorOp1 = vec3(0.0); vec3 colorOp2 = vec3(0.0); @@ -353,8 +371,8 @@ void FragmentGenerator::getSource(std::string& shader, TexEnvConfig::Source sour case TexEnvConfig::Source::PreviousBuffer: shader += "previousBuffer"; break; // Lighting - case TexEnvConfig::Source::PrimaryFragmentColor: - case TexEnvConfig::Source::SecondaryFragmentColor: shader += "vec4(1.0, 1.0, 1.0, 1.0)"; break; + case TexEnvConfig::Source::PrimaryFragmentColor: shader += "primaryColor"; break; + case TexEnvConfig::Source::SecondaryFragmentColor: shader += "secondaryColor"; break; default: Helpers::warn("Unimplemented TEV source: %d", static_cast(source)); @@ -430,3 +448,11 @@ void FragmentGenerator::applyAlphaTest(std::string& shader, const PICARegs& regs shader += ") { discard; }\n"; } + +void FragmentGenerator::compileLights(std::string& shader, const PICA::FragmentConfig& config) { + if (!config.lighting.enable) { + return; + } + + +} \ No newline at end of file diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index b85e7689..34ed0d22 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -875,6 +875,10 @@ OpenGL::Program& RendererGL::getSpecializedShader() { vec[3] = float((color >> 24) & 0xFF) / 255.0f; } + // Append lighting uniforms + if (fsConfig.lighting.enable) { + } + gl.bindUBO(programEntry.uboBinding); glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(PICA::FragmentUniforms), &uniforms); From 7e480e35ece212277f44844c26b5f628b156b514 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Thu, 18 Jul 2024 03:37:11 +0300 Subject: [PATCH 091/251] Shadergen: Upload light uniforms --- include/PICA/pica_frag_uniforms.hpp | 5 +++- include/PICA/regs.hpp | 13 +++++++++- src/core/PICA/shader_gen_glsl.cpp | 2 ++ src/core/renderer_gl/renderer_gl.cpp | 38 ++++++++++++++++++++++++++++ 4 files changed, 56 insertions(+), 2 deletions(-) diff --git a/include/PICA/pica_frag_uniforms.hpp b/include/PICA/pica_frag_uniforms.hpp index 0122ae93..09722d61 100644 --- a/include/PICA/pica_frag_uniforms.hpp +++ b/include/PICA/pica_frag_uniforms.hpp @@ -17,11 +17,12 @@ namespace PICA { alignas(16) vec3 position; alignas(16) vec3 spotlightDirection; - float distAttenuationBias; + float distanceAttenuationBias; float distanceAttenuationScale; }; struct FragmentUniforms { + using vec3 = std::array; using vec4 = std::array; static constexpr usize tevStageCount = 6; @@ -33,6 +34,8 @@ namespace PICA { alignas(16) vec4 tevBufferColor; alignas(16) vec4 clipCoords; + // Note: We upload this as a u32 and decode on GPU + u32 globalAmbientLight; // NOTE: THIS MUST BE LAST so that if lighting is disabled we can potentially omit uploading it LightUniform lightUniforms[8]; }; diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index 312ac78b..bd1f823e 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -67,9 +67,20 @@ namespace PICA { ColourBufferLoc = 0x11D, FramebufferSize = 0x11E, - // Lighting registers LightingEnable = 0x8F, + Light0Specular0 = 0x140, + Light0Specular1 = 0x141, + Light0Diffuse = 0x142, + Light0Ambient = 0x143, + Light0XY = 0x144, + Light0Z = 0x145, + Light0SpotlightXY = 0x146, + Light0SpotlightZ = 0x147, + Light0AttenuationBias = 0x14A, + Light0AttenuationScale = 0x14B, + + LightGlobalAmbient = 0x1C0, LightNumber = 0x1C2, LightConfig0 = 0x1C3, LightConfig1 = 0x1C4, diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 9c319780..98a10bca 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -23,6 +23,8 @@ static constexpr const char* uniformDefinition = R"( vec4 tevBufferColor; vec4 clipCoords; + // Note: We upload this as a u32 and decode on GPU + uint globalAmbientLight; LightSource lightSources[8]; }; )"; diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 34ed0d22..5f599e9c 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -877,6 +877,44 @@ OpenGL::Program& RendererGL::getSpecializedShader() { // Append lighting uniforms if (fsConfig.lighting.enable) { + uniforms.globalAmbientLight = regs[InternalRegs::LightGlobalAmbient]; + for (int i = 0; i < 8; i++) { + auto& light = uniforms.lightUniforms[i]; + const u32 specular0 = regs[InternalRegs::Light0Specular0 + i * 0x10]; + const u32 specular1 = regs[InternalRegs::Light0Specular1 + i * 0x10]; + const u32 diffuse = regs[InternalRegs::Light0Diffuse + i * 0x10]; + const u32 ambient = regs[InternalRegs::Light0Ambient + i * 0x10]; + const u32 lightXY = regs[InternalRegs::Light0XY + i * 0x10]; + const u32 lightZ = regs[InternalRegs::Light0Z + i * 0x10]; + + const u32 spotlightXY = regs[InternalRegs::Light0SpotlightXY + i * 0x10]; + const u32 spotlightZ = regs[InternalRegs::Light0SpotlightZ + i * 0x10]; + const u32 attenuationBias = regs[InternalRegs::Light0AttenuationBias + i * 0x10]; + const u32 attenuationScale = regs[InternalRegs::Light0AttenuationScale + i * 0x10]; + +#define lightColorToVec3(value) \ + { \ + float(Helpers::getBits<20, 8>(value)) / 255.0f, \ + float(Helpers::getBits<10, 8>(value)) / 255.0f, \ + float(Helpers::getBits<0, 8>(value)) / 255.0f, \ + } + light.specular0 = lightColorToVec3(specular0); + light.specular1 = lightColorToVec3(specular1); + light.diffuse = lightColorToVec3(diffuse); + light.ambient = lightColorToVec3(ambient); + light.position[0] = Floats::f16::fromRaw(u16(lightXY)).toFloat32(); + light.position[1] = Floats::f16::fromRaw(u16(lightXY >> 16)).toFloat32(); + light.position[2] = Floats::f16::fromRaw(u16(lightXY)).toFloat32(); + + // Fixed point 1.11.1 to float, without negation + light.spotlightDirection[0] = float(s32(spotlightXY & 0x1FFF) << 19 >> 19) / 2047.0; + light.spotlightDirection[1] = float(s32((spotlightXY >> 16) & 0x1FFF) << 19 >> 19) / 2047.0; + light.spotlightDirection[2] = float(s32(spotlightZ & 0x1FFF) << 19 >> 19) / 2047.0; + + light.distanceAttenuationBias = Floats::f20::fromRaw(attenuationBias & 0xFFFFF).toFloat32(); + light.distanceAttenuationScale = Floats::f20::fromRaw(attenuationScale & 0xFFFFF).toFloat32(); +#undef lightColorToVec3 + } } gl.bindUBO(programEntry.uboBinding); From e1268f57b567b4cf7da5822298978a244056687b Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Thu, 18 Jul 2024 04:21:00 +0300 Subject: [PATCH 092/251] Shadergen: Fix attribute declarations --- src/core/PICA/shader_gen_glsl.cpp | 28 +++++++++----------- src/host_shaders/opengl_fragment_shader.frag | 2 +- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 98a10bca..fea9786e 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -59,9 +59,7 @@ std::string FragmentGenerator::getVertexShader(const PICARegs& regs) { layout(location = 6) in vec3 a_view; layout(location = 7) in vec2 a_texcoord2; - out vec3 v_normal; - out vec3 v_tangent; - out vec3 v_bitangent; + out vec4 v_quaternion; out vec4 v_colour; out vec3 v_texcoord0; out vec2 v_texcoord1; @@ -77,12 +75,6 @@ std::string FragmentGenerator::getVertexShader(const PICARegs& regs) { return scale * vec4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24)); } - vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) { - vec3 u = q.xyz; - float s = q.w; - return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v); - } - void main() { gl_Position = a_coords; vec4 colourAbs = abs(a_vertexColour); @@ -92,10 +84,7 @@ std::string FragmentGenerator::getVertexShader(const PICARegs& regs) { v_texcoord1 = vec2(a_texcoord1.x, 1.0 - a_texcoord1.y); v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y); v_view = a_view; - - v_normal = normalize(rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), a_quaternion)); - v_tangent = normalize(rotateVec3ByQuaternion(vec3(1.0, 0.0, 0.0), a_quaternion)); - v_bitangent = normalize(rotateVec3ByQuaternion(vec3(0.0, 1.0, 0.0), a_quaternion)); + v_quaternion = a_quaternion; #ifndef USING_GLES gl_ClipDistance[0] = -a_coords.z; @@ -128,9 +117,7 @@ std::string FragmentGenerator::generate(const PICARegs& regs, const FragmentConf // Input and output attributes ret += R"( - in vec3 v_tangent; - in vec3 v_normal; - in vec3 v_bitangent; + in vec4 v_quaternion; in vec4 v_colour; in vec3 v_texcoord0; in vec2 v_texcoord1; @@ -148,6 +135,15 @@ std::string FragmentGenerator::generate(const PICARegs& regs, const FragmentConf )"; ret += uniformDefinition; + if (config.lighting.enable) { + ret += R"( + vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) { + vec3 u = q.xyz; + float s = q.w; + return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v); + } + )"; + } // Emit main function for fragment shader // When not initialized, source 13 is set to vec4(0.0) and 15 is set to the vertex colour diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag index 23c5c4cb..e42d8e57 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -586,4 +586,4 @@ void main() { break; } } -} +} \ No newline at end of file From 6279ce3df28baeba4f65e0f93dec57a33853d179 Mon Sep 17 00:00:00 2001 From: Paris Oplopoios Date: Thu, 18 Jul 2024 04:54:28 +0300 Subject: [PATCH 093/251] Move comments to docs, sign extend stuff for Android (#539) * Move documentation, sign extend spot dir * Update gles.patch * Fix compilation errors * Update gles.patch --- .github/gles.patch | 66 ++++++++-------- docs/3ds/lighting.md | 79 +++++++++++++++++++ src/host_shaders/opengl_fragment_shader.frag | 83 ++++---------------- 3 files changed, 127 insertions(+), 101 deletions(-) create mode 100644 docs/3ds/lighting.md diff --git a/.github/gles.patch b/.github/gles.patch index 99258011..a27b3d00 100644 --- a/.github/gles.patch +++ b/.github/gles.patch @@ -21,7 +21,7 @@ index 990e2f80..2e7842ac 100644 void main() { diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag -index 23c5c4cb..a9851a8b 100644 +index b4ad7ecc..98b1bd80 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -1,4 +1,5 @@ @@ -31,7 +31,7 @@ index 23c5c4cb..a9851a8b 100644 in vec4 v_quaternion; in vec4 v_colour; -@@ -189,11 +190,17 @@ float lutLookup(uint lut, int index) { +@@ -164,11 +165,17 @@ float lutLookup(uint lut, int index) { return texelFetch(u_tex_lighting_lut, ivec2(index, lut), 0).r; } @@ -50,8 +50,8 @@ index 23c5c4cb..a9851a8b 100644 } // Convert an arbitrary-width floating point literal to an f32 -@@ -257,16 +264,16 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light - // If RR is enabled but not RG or RB, the output of RR is used for the three components; Red, Green and Blue. +@@ -208,16 +215,16 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light + bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment - if (!current_sampler_enabled || (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) { @@ -70,26 +70,23 @@ index 23c5c4cb..a9851a8b 100644 switch (input_id) { case 0u: { delta = dot(normal, normalize(half_vector)); -@@ -285,14 +292,14 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light - break; - } - case 4u: { -- // These are ints so that bitfieldExtract sign extends for us -+ // These are ints so that bitfieldExtractCompat sign extends for us +@@ -239,11 +246,11 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light int GPUREG_LIGHTi_SPOTDIR_LOW = int(readPicaReg(0x0146u + (light_id << 4u))); int GPUREG_LIGHTi_SPOTDIR_HIGH = int(readPicaReg(0x0147u + (light_id << 4u))); - // These are fixed point 1.1.11 values, so we need to convert them to float -- float x = float(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13)) / 2047.0; -- float y = float(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13)) / 2047.0; -- float z = float(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13)) / 2047.0; -+ float x = float(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13)) / 2047.0; -+ float y = float(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13)) / 2047.0; -+ float z = float(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13)) / 2047.0; - vec3 spotlight_vector = vec3(x, y, z); - delta = dot(light_vector, spotlight_vector); // spotlight direction is negated so we don't negate light_vector - break; -@@ -310,9 +317,9 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light +- // Sign extend them. Normally bitfieldExtract would do that but it's missing on some versions ++ // Sign extend them. Normally bitfieldExtractCompat would do that but it's missing on some versions + // of GLSL so we do it manually +- int se_x = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13); +- int se_y = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13); +- int se_z = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13); ++ int se_x = bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13); ++ int se_y = bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13); ++ int se_z = bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13); + + if ((se_x & 0x1000) == 0x1000) se_x |= 0xffffe000; + if ((se_y & 0x1000) == 0x1000) se_y |= 0xffffe000; +@@ -270,9 +277,9 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light } // 0 = enabled @@ -101,16 +98,16 @@ index 23c5c4cb..a9851a8b 100644 delta = max(delta, 0.0); } else { delta = abs(delta); -@@ -339,7 +346,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - unimpl_color = vec4(1.0, 0.0, 1.0, 1.0); - +@@ -296,7 +303,7 @@ vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) { + // Implements the following algorthm: https://mathb.in/26766 + void calcLighting(out vec4 primary_color, out vec4 secondary_color) { uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008Fu); - if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) { + if (bitfieldExtractCompat(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) { primary_color = secondary_color = vec4(0.0); return; } -@@ -356,7 +363,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -313,7 +320,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u); GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u); @@ -118,8 +115,8 @@ index 23c5c4cb..a9851a8b 100644 + uint bump_mode = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 28, 2); // Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker - // Could be because the texture is not sampled correctly, may need the clamp/border color configurations -@@ -370,15 +377,15 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + switch (bump_mode) { +@@ -326,15 +333,15 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0); vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0); @@ -138,7 +135,7 @@ index 23c5c4cb..a9851a8b 100644 uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + (light_id << 4u)); uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + (light_id << 4u)); -@@ -390,12 +397,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -346,12 +353,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { float light_distance; vec3 light_position = vec3( @@ -154,7 +151,7 @@ index 23c5c4cb..a9851a8b 100644 light_vector = light_position + v_view; } -@@ -411,14 +418,14 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -367,23 +374,23 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { float NdotL = dot(normal, light_vector); // N dot Li // Two sided diffuse @@ -172,9 +169,8 @@ index 23c5c4cb..a9851a8b 100644 if (use_geo_0 || use_geo_1) { geometric_factor = dot(half_vector, half_vector); geometric_factor = geometric_factor == 0.0 ? 0.0 : min(NdotL / geometric_factor, 1.0); -@@ -430,9 +437,9 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - // fragment and the distance attenuation scale and bias to calculate where in the LUT to look up. - // See: https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTi_ATTENUATION_SCALE + } + float distance_attenuation = 1.0; - if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) { - uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtract(readPicaReg(0x014Au + (light_id << 4u)), 0, 20); @@ -185,7 +181,7 @@ index 23c5c4cb..a9851a8b 100644 float distance_attenuation_bias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u); float distance_attenuation_scale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u); -@@ -477,8 +484,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -428,8 +435,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { specular_sum.rgb += light_factor * clamp_factor * (specular0 + specular1); } @@ -229,10 +225,10 @@ index 057f9a88..dc735ced 100644 v_quaternion = a_quaternion; } diff --git a/third_party/opengl/opengl.hpp b/third_party/opengl/opengl.hpp -index 9997e63b..5d9d7804 100644 +index 828fb784..a1861b77 100644 --- a/third_party/opengl/opengl.hpp +++ b/third_party/opengl/opengl.hpp -@@ -561,22 +561,22 @@ namespace OpenGL { +@@ -568,22 +568,22 @@ namespace OpenGL { static void disableScissor() { glDisable(GL_SCISSOR_TEST); } static void enableBlend() { glEnable(GL_BLEND); } static void disableBlend() { glDisable(GL_BLEND); } diff --git a/docs/3ds/lighting.md b/docs/3ds/lighting.md new file mode 100644 index 00000000..9f4ff2f2 --- /dev/null +++ b/docs/3ds/lighting.md @@ -0,0 +1,79 @@ +## Info on the lighting implementation + +### Missing shadow attenuation +Shadow attenuation samples a texture unit, and that likely needs render to texture for most games so that they can construct +their shadow map. As such the colors are not multiplied by the shadow attenuation value, so there's no shadows. + +### Missing bump mapping +Bump mapping also samples a texture unit, most likely doesn't need render to texture however may need better texture sampling +implementation (such as GPUREG_TEXUNITi_BORDER_COLOR, GPUREG_TEXUNITi_BORDER_PARAM). Bump mapping would work for some things, +namely the 3ds-examples bump mapping demo, but would break others such as Toad Treasure Tracker with a naive `texture` implementation. + +Also the CP configuration is missing, because it needs a tangent map implementation. It is currently marked with error_unimpl. + +### samplerEnabledBitfields +Holds the enabled state of the lighting samples for various PICA configurations +As explained in https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTING_CONFIG0 + +```c +const bool samplerEnabled[9 * 7] = bool[9 * 7]( + // D0 D1 SP FR RB RG RR + true, false, true, false, false, false, true, // Configuration 0: D0, SP, RR + false, false, true, true, false, false, true, // Configuration 1: FR, SP, RR + true, true, false, false, false, false, true, // Configuration 2: D0, D1, RR + true, true, false, true, false, false, false, // Configuration 3: D0, D1, FR + true, true, true, false, true, true, true, // Configuration 4: All except for FR + true, false, true, true, true, true, true, // Configuration 5: All except for D1 + true, true, true, true, false, false, true, // Configuration 6: All except for RB and RG + false, false, false, false, false, false, false, // Configuration 7: Unused + true, true, true, true, true, true, true // Configuration 8: All +); +``` + +The above has been condensed to two uints for performance reasons. +You can confirm they are the same by running the following: +```c +const uint samplerEnabledBitfields[2] = { 0x7170e645u, 0x7f013fefu }; +for (int i = 0; i < 9 * 7; i++) { + unsigned arrayIndex = (i >> 5); + bool b = (samplerEnabledBitfields[arrayIndex] & (1u << (i & 31))) != 0u; + if (samplerEnabled[i] == b) { + printf("%d: happy\n", i); + } else { + printf("%d: unhappy\n", i); + } +} +``` + +### lightLutLookup +lut_id is one of these values +0 D0 +1 D1 +2 SP +3 FR +4 RB +5 RG +6 RR + +lut_index on the other hand represents the actual index of the LUT in the texture +u_tex_lighting_lut has 24 LUTs and they are used like so: +0 D0 +1 D1 +2 is missing because SP uses LUTs 8-15 +3 FR +4 RB +5 RG +6 RR +8-15 SP0-7 +16-23 DA0-7, but this is not handled in this function as the lookup is a bit different + +The light environment configuration controls which LUTs are available for use +If a LUT is not available in the selected configuration, its value will always read a constant 1.0 regardless of the enable state in GPUREG_LIGHTING_CONFIG1 +If RR is enabled but not RG or RB, the output of RR is used for the three components; Red, Green and Blue. + +### Distance attenuation +Distance attenuation is computed differently from the other factors, for example +it doesn't store its scale in GPUREG_LIGHTING_LUTINPUT_SCALE and it doesn't use +GPUREG_LIGHTING_LUTINPUT_SELECT. Instead, it uses the distance from the light to the +fragment and the distance attenuation scale and bias to calculate where in the LUT to look up. +See: https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTi_ATTENUATION_SCALE \ No newline at end of file diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag index e42d8e57..6f30ebf0 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -37,38 +37,13 @@ vec4 tevNextPreviousBuffer; bool tevUnimplementedSourceFlag = false; vec3 normal; -// Holds the enabled state of the lighting samples for various PICA configurations -// As explained in https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTING_CONFIG0 -// const bool samplerEnabled[9 * 7] = bool[9 * 7]( -// // D0 D1 SP FR RB RG RR -// true, false, true, false, false, false, true, // Configuration 0: D0, SP, RR -// false, false, true, true, false, false, true, // Configuration 1: FR, SP, RR -// true, true, false, false, false, false, true, // Configuration 2: D0, D1, RR -// true, true, false, true, false, false, false, // Configuration 3: D0, D1, FR -// true, true, true, false, true, true, true, // Configuration 4: All except for FR -// true, false, true, true, true, true, true, // Configuration 5: All except for D1 -// true, true, true, true, false, false, true, // Configuration 6: All except for RB and RG -// false, false, false, false, false, false, false, // Configuration 7: Unused -// true, true, true, true, true, true, true // Configuration 8: All -// ); - -// The above have been condensed to two uints to save space -// You can confirm they are the same by running the following: -// for (int i = 0; i < 9 * 7; i++) { -// unsigned arrayIndex = (i >> 5); -// bool b = (samplerEnabledBitfields[arrayIndex] & (1u << (i & 31))) != 0u; -// if (samplerEnabled[i] == b) { -// printf("%d: happy\n", i); -// } else { -// printf("%d: unhappy\n", i); -// } -// } +// See docs/lighting.md const uint samplerEnabledBitfields[2] = uint[2](0x7170e645u, 0x7f013fefu); bool isSamplerEnabled(uint environment_id, uint lut_id) { uint index = 7 * environment_id + lut_id; uint arrayIndex = (index >> 5); - return (samplerEnabledBitfields[arrayIndex] & (1u << (index & 31))) != 0u; + return (samplerEnabledBitfields[arrayIndex] & (1u << (index & 31u))) != 0u; } // OpenGL ES 1.1 reference pages for TEVs (this is what the PICA200 implements): @@ -182,8 +157,8 @@ uint GPUREG_LIGHTING_CONFIG1; uint GPUREG_LIGHTING_LUTINPUT_SELECT; uint GPUREG_LIGHTING_LUTINPUT_SCALE; uint GPUREG_LIGHTING_LUTINPUT_ABS; -bool error_unimpl; -vec4 unimpl_color; +bool error_unimpl = false; +vec4 unimpl_color = vec4(1.0, 0.0, 1.0, 1.0); float lutLookup(uint lut, int index) { return texelFetch(u_tex_lighting_lut, ivec2(index, lut), 0).r; @@ -219,27 +194,6 @@ float decodeFP(uint hex, uint E, uint M) { float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light_vector, vec3 half_vector) { uint lut_index; - // lut_id is one of these values - // 0 D0 - // 1 D1 - // 2 SP - // 3 FR - // 4 RB - // 5 RG - // 6 RR - - // lut_index on the other hand represents the actual index of the LUT in the texture - // u_tex_lighting_lut has 24 LUTs and they are used like so: - // 0 D0 - // 1 D1 - // 2 is missing because SP uses LUTs 8-15 - // 3 FR - // 4 RB - // 5 RG - // 6 RR - // 8-15 SP0-7 - // 16-23 DA0-7, but this is not handled in this function as the lookup is a bit different - int bit_in_config1; if (lut_id == SP_LUT) { // These are the spotlight attenuation LUTs @@ -252,9 +206,6 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light error_unimpl = true; } - // The light environment configuration controls which LUTs are available for use - // If a LUT is not available in the selected configuration, its value will always read a constant 1.0 regardless of the enable state in GPUREG_LIGHTING_CONFIG1 - // If RR is enabled but not RG or RB, the output of RR is used for the three components; Red, Green and Blue. bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment if (!current_sampler_enabled || (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) { @@ -285,14 +236,23 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light break; } case 4u: { - // These are ints so that bitfieldExtract sign extends for us int GPUREG_LIGHTi_SPOTDIR_LOW = int(readPicaReg(0x0146u + (light_id << 4u))); int GPUREG_LIGHTi_SPOTDIR_HIGH = int(readPicaReg(0x0147u + (light_id << 4u))); + // Sign extend them. Normally bitfieldExtract would do that but it's missing on some versions + // of GLSL so we do it manually + int se_x = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13); + int se_y = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13); + int se_z = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13); + + if ((se_x & 0x1000) == 0x1000) se_x |= 0xffffe000; + if ((se_y & 0x1000) == 0x1000) se_y |= 0xffffe000; + if ((se_z & 0x1000) == 0x1000) se_z |= 0xffffe000; + // These are fixed point 1.1.11 values, so we need to convert them to float - float x = float(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13)) / 2047.0; - float y = float(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13)) / 2047.0; - float z = float(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13)) / 2047.0; + float x = float(se_x) / 2047.0; + float y = float(se_y) / 2047.0; + float z = float(se_z) / 2047.0; vec3 spotlight_vector = vec3(x, y, z); delta = dot(light_vector, spotlight_vector); // spotlight direction is negated so we don't negate light_vector break; @@ -335,9 +295,6 @@ vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) { // Implements the following algorthm: https://mathb.in/26766 void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - error_unimpl = false; - unimpl_color = vec4(1.0, 0.0, 1.0, 1.0); - uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008Fu); if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) { primary_color = secondary_color = vec4(0.0); @@ -359,7 +316,6 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { uint bump_mode = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 28, 2); // Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker - // Could be because the texture is not sampled correctly, may need the clamp/border color configurations switch (bump_mode) { default: { normal = rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), v_quaternion); @@ -424,11 +380,6 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { geometric_factor = geometric_factor == 0.0 ? 0.0 : min(NdotL / geometric_factor, 1.0); } - // Distance attenuation is computed differently from the other factors, for example - // it doesn't store its scale in GPUREG_LIGHTING_LUTINPUT_SCALE and it doesn't use - // GPUREG_LIGHTING_LUTINPUT_SELECT. Instead, it uses the distance from the light to the - // fragment and the distance attenuation scale and bias to calculate where in the LUT to look up. - // See: https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTi_ATTENUATION_SCALE float distance_attenuation = 1.0; if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) { uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtract(readPicaReg(0x014Au + (light_id << 4u)), 0, 20); From 00037d8a5e0a99c20405e2d54628dd071628599d Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Thu, 18 Jul 2024 19:27:12 +0300 Subject: [PATCH 094/251] Shadergen: Start implementing lighting --- include/PICA/shader_gen.hpp | 1 + src/core/PICA/shader_gen_glsl.cpp | 95 +++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+) diff --git a/include/PICA/shader_gen.hpp b/include/PICA/shader_gen.hpp index 21d85d98..c74e6953 100644 --- a/include/PICA/shader_gen.hpp +++ b/include/PICA/shader_gen.hpp @@ -27,6 +27,7 @@ namespace PICA::ShaderGen { void applyAlphaTest(std::string& shader, const PICARegs& regs); void compileLights(std::string& shader, const PICA::FragmentConfig& config); + void compileLUTLookup(std::string& shader, u32 lightIndex, u32 lutIndex, bool abs); u32 textureConfig = 0; diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index fea9786e..96c44ec2 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -135,6 +135,7 @@ std::string FragmentGenerator::generate(const PICARegs& regs, const FragmentConf )"; ret += uniformDefinition; + if (config.lighting.enable) { ret += R"( vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) { @@ -142,6 +143,10 @@ std::string FragmentGenerator::generate(const PICARegs& regs, const FragmentConf float s = q.w; return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v); } + + float lutLookup(uint lut, int index) { + return texelFetch(u_tex_lighting_lut, ivec2(index, lut), 0).r; + } )"; } @@ -452,5 +457,95 @@ void FragmentGenerator::compileLights(std::string& shader, const PICA::FragmentC return; } + // Currently ignore bump mode + shader += "vec3 normal = rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), v_quaternion);\n"; + shader += R"( + vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0); + vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0); + vec3 light_position, light_vector, half_vector, specular0, specular1, light_factor; + float light_distance, NdotL, geometric_factor, distance_attenuation, distance_att_delta; + float spotlight_attenuation, specular0_dist, specular1_dist, reflected_color; + )"; + + for (int i = 0; i < config.lighting.lightNum; i++) { + const auto& lightConfig = config.lighting.lights[i]; + shader += "light_position = lightSources[" + std::to_string(i) + "].position;\n"; + + if (lightConfig.directional) { // Directional lighting + shader += "light_vector = light_position + v_view;\n"; + } else { // Positional lighting + shader += "light_vector = light_position;\n"; + } + + shader += R"( + light_distance = length(light_vector); + light_vector = normalize(light_vector); + half_vector = light_vector + normalize(v_view); + + distance_attenuation = 1.0; + NdotL = dot(normal, light_vector); + )"; + + shader += lightConfig.twoSidedDiffuse ? "NdotL = abs(NdotL);\n" : "NdotL = max(NdotL, 0.0);\n"; + + if (lightConfig.geometricFactor0 || lightConfig.geometricFactor1) { + shader += R"( + geometric_factor = dot(half_vector, half_vector); + geometric_factor = (geometric_factor == 0.0) ? 0.0 : min(NdotL / geometric_factor, 1.0); + )"; + } + + if (lightConfig.distanceAttenuationEnable) { + shader += "distance_att_delta = clamp(light_distance * lightSources[" + std::to_string(i) + "].distanceAttenuationScale + lightSources[" + + std::to_string(i) + "].distanceAttenuationBias, 0.0, 1.0);\n"; + + shader += + "distance_attenuation = lutLookup(" + std::to_string(16 + i) + ", int(clamp(floor(distance_att_delta * 255.0), 0.0, 255.0)));\n"; + } + + // TODO: LightLutLookup stuff + shader += "spotlight_attenuation = 0.0; // Placeholder\n"; + shader += "specular0_dist = 0.0; // Placeholder\n"; + shader += "specular1_dist = 0.0; // Placeholder\n"; + shader += "reflected_color = vec3(0.0); // Placeholder\n"; + + shader += "specular0 = lightSources[" + std::to_string(i) + "].specular0;\n"; + shader += "specular0 = specular0 * specular0_dist"; + if (lightConfig.geometricFactor0) { + shader += " * geometric_factor;\n"; + } else { + shader += ";\n"; + } + + shader += "specular1 = lightSources[" + std::to_string(i) + "].specular1;\n"; + shader += "specular1 = specular1 * specular1_dist * reflected_color"; + if (lightConfig.geometricFactor1) { + shader += " * geometric_factor;\n"; + } else { + shader += ";\n"; + } + + shader += "light_factor = distance_attenuation * spotlight_attenuation;\n"; + + if (config.lighting.clampHighlights) { + shader += "specular_sum.rgb += light_factor * (NdotL == 0.0 ? 0.0 : 1.0) * (specular0 + specular1);\n"; + } else { + shader += "specular_sum.rgb += light_factor * (specular0 + specular1);\n"; + } + + shader += "diffuse_sum.rgb += vec3(0.0); // Placeholder\n"; + } + + // TODO: Rest of the post-per-light stuff + shader += R"( + vec4 global_ambient = vec4(regToColor(GPUREG_LIGHTING_AMBIENT), 1.0); + + primaryColor = clamp(global_ambient + diffuse_sum, vec4(0.0), vec4(1.0)); + secondaryColor = clamp(specular_sum, vec4(0.0), vec4(1.0)); + )"; +} + +void FragmentGenerator::compileLUTLookup(std::string& shader, u32 lightIndex, u32 lutIndex, bool abs) { + // TODO } \ No newline at end of file From b4ae32960c121b0125721388267d2e657aa61828 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Thu, 18 Jul 2024 20:10:20 +0300 Subject: [PATCH 095/251] Moar lighting --- include/PICA/regs.hpp | 3 +- include/PICA/shader_gen.hpp | 3 +- src/core/PICA/shader_gen_glsl.cpp | 47 ++++++++++++++++++++++++++++++- 3 files changed, 50 insertions(+), 3 deletions(-) diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index bd1f823e..2482c25b 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -252,7 +252,8 @@ namespace PICA { enum : u32 { LUT_D0 = 0, LUT_D1, - LUT_FR, + // LUT 2 is not used, the emulator internally uses it for referring to the current source's spotlight in shaders + LUT_FR = 0x3, LUT_RB, LUT_RG, LUT_RR, diff --git a/include/PICA/shader_gen.hpp b/include/PICA/shader_gen.hpp index c74e6953..1d4d07c5 100644 --- a/include/PICA/shader_gen.hpp +++ b/include/PICA/shader_gen.hpp @@ -27,7 +27,8 @@ namespace PICA::ShaderGen { void applyAlphaTest(std::string& shader, const PICARegs& regs); void compileLights(std::string& shader, const PICA::FragmentConfig& config); - void compileLUTLookup(std::string& shader, u32 lightIndex, u32 lutIndex, bool abs); + void compileLUTLookup(std::string& shader, const PICA::FragmentConfig& config, const PICARegs& regs, u32 lightIndex, u32 lutID, bool abs); + bool isSamplerEnabled(u32 environmentID, u32 lutID); u32 textureConfig = 0; diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 96c44ec2..cc01fad1 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -29,6 +29,11 @@ static constexpr const char* uniformDefinition = R"( }; )"; +// There's actually 8 different LUTs (SP0-SP7), one for each light with different indices (8-15) +// We use an unused LUT value for "this light source's spotlight" instead and figure out which light source to use in compileLutLookup +// This is particularly intuitive in several places, such as checking if a LUT is enabled +static constexpr int spotlightLutIndex = 2; + std::string FragmentGenerator::getVertexShader(const PICARegs& regs) { std::string ret = ""; @@ -546,6 +551,46 @@ void FragmentGenerator::compileLights(std::string& shader, const PICA::FragmentC )"; } -void FragmentGenerator::compileLUTLookup(std::string& shader, u32 lightIndex, u32 lutIndex, bool abs) { +bool FragmentGenerator::isSamplerEnabled(u32 environmentID, u32 lutID) { + static constexpr bool samplerEnabled[9 * 7] = { + // D0 D1 SP FR RB RG RR + true, false, true, false, false, false, true, // Configuration 0: D0, SP, RR + false, false, true, true, false, false, true, // Configuration 1: FR, SP, RR + true, true, false, false, false, false, true, // Configuration 2: D0, D1, RR + true, true, false, true, false, false, false, // Configuration 3: D0, D1, FR + true, true, true, false, true, true, true, // Configuration 4: All except for FR + true, false, true, true, true, true, true, // Configuration 5: All except for D1 + true, true, true, true, false, false, true, // Configuration 6: All except for RB and RG + false, false, false, false, false, false, false, // Configuration 7: Unused + true, true, true, true, true, true, true, // Configuration 8: All + }; + + return samplerEnabled[environmentID * 7 + lutID]; +} + +void FragmentGenerator::compileLUTLookup( + std::string& shader, const PICA::FragmentConfig& config, const PICARegs& regs, u32 lightIndex, u32 lutID, bool abs +) { + uint lutIndex = 0; + int bitInConfig1 = 0; + + if (lutID == spotlightLutIndex) { + // These are the spotlight attenuation LUTs + bitInConfig1 = 8 + (lightIndex & 0x7); + lutIndex = 8u + lightIndex; + } else if (lutID <= 6) { + bitInConfig1 = 16 + lutID; + lutIndex = lutID; + } else { + Helpers::warn("Shadergen: Unimplemented LUT value"); + } + + const bool samplerEnabled = isSamplerEnabled(config.lighting.config, lutID); + const u32 config1 = regs[InternalRegs::LightConfig1]; + + if (!samplerEnabled || ((config1 >> bitInConfig1) != 0)) { + // 1.0 + } + // TODO } \ No newline at end of file From 2f50038db9967821ec9a2cf22f0b44ac293b26ae Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Thu, 18 Jul 2024 22:56:05 +0300 Subject: [PATCH 096/251] Shadergen: Lighting almost 100% working --- include/PICA/pica_frag_config.hpp | 2 +- include/PICA/shader_gen.hpp | 4 +- src/core/PICA/shader_gen_glsl.cpp | 146 ++++++++++++++++++++------- src/core/renderer_gl/renderer_gl.cpp | 2 +- 4 files changed, 111 insertions(+), 43 deletions(-) diff --git a/include/PICA/pica_frag_config.hpp b/include/PICA/pica_frag_config.hpp index cfb22e5c..cdb68854 100644 --- a/include/PICA/pica_frag_config.hpp +++ b/include/PICA/pica_frag_config.hpp @@ -140,7 +140,7 @@ namespace PICA { const u32 lutAbs = regs[InternalRegs::LightLUTAbs]; const u32 lutSelect = regs[InternalRegs::LightLUTSelect]; const u32 lutScale = regs[InternalRegs::LightLUTScale]; - static constexpr float scales[] = {1.0f, 2.0f, 4.0f, 8.0f, 0.25f, 0.5f}; + static constexpr float scales[] = {1.0f, 2.0f, 4.0f, 8.0f, 0.0f, 0.0f, 0.25f, 0.5f}; if (d0.enable) { d0.absInput = Helpers::getBit<1>(lutAbs) == 0; diff --git a/include/PICA/shader_gen.hpp b/include/PICA/shader_gen.hpp index 1d4d07c5..372e0550 100644 --- a/include/PICA/shader_gen.hpp +++ b/include/PICA/shader_gen.hpp @@ -26,8 +26,8 @@ namespace PICA::ShaderGen { void getAlphaOperation(std::string& shader, PICA::TexEnvConfig::Operation op); void applyAlphaTest(std::string& shader, const PICARegs& regs); - void compileLights(std::string& shader, const PICA::FragmentConfig& config); - void compileLUTLookup(std::string& shader, const PICA::FragmentConfig& config, const PICARegs& regs, u32 lightIndex, u32 lutID, bool abs); + void compileLights(std::string& shader, const PICA::FragmentConfig& config, const PICARegs& regs); + void compileLUTLookup(std::string& shader, const PICA::FragmentConfig& config, const PICARegs& regs, u32 lightIndex, u32 lutID); bool isSamplerEnabled(u32 environmentID, u32 lutID); u32 textureConfig = 0; diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index cc01fad1..8d955d50 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -133,10 +133,7 @@ std::string FragmentGenerator::generate(const PICARegs& regs, const FragmentConf uniform sampler2D u_tex0; uniform sampler2D u_tex1; uniform sampler2D u_tex2; - // GLES doesn't support sampler1DArray, as such we'll have to change how we handle lighting later -#ifndef USING_GLES uniform sampler2D u_tex_lighting_lut; -#endif )"; ret += uniformDefinition; @@ -152,6 +149,10 @@ std::string FragmentGenerator::generate(const PICARegs& regs, const FragmentConf float lutLookup(uint lut, int index) { return texelFetch(u_tex_lighting_lut, ivec2(index, lut), 0).r; } + + vec3 regToColor(uint reg) { + return (1.0 / 255.0) * vec3(float((reg >> 20) & 0xFF), float((reg >> 10) & 0xFF), float(reg & 0xFF)); + } )"; } @@ -167,7 +168,7 @@ std::string FragmentGenerator::generate(const PICARegs& regs, const FragmentConf vec4 secondaryColor = vec4(0.0); )"; - compileLights(ret, config); + compileLights(ret, config, regs); ret += R"( vec3 colorOp1 = vec3(0.0); @@ -457,7 +458,7 @@ void FragmentGenerator::applyAlphaTest(std::string& shader, const PICARegs& regs shader += ") { discard; }\n"; } -void FragmentGenerator::compileLights(std::string& shader, const PICA::FragmentConfig& config) { +void FragmentGenerator::compileLights(std::string& shader, const PICA::FragmentConfig& config, const PICARegs& regs) { if (!config.lighting.enable) { return; } @@ -467,15 +468,21 @@ void FragmentGenerator::compileLights(std::string& shader, const PICA::FragmentC shader += R"( vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0); vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0); - vec3 light_position, light_vector, half_vector, specular0, specular1, light_factor; + vec3 light_position, light_vector, half_vector, specular0, specular1, reflected_color; - float light_distance, NdotL, geometric_factor, distance_attenuation, distance_att_delta; - float spotlight_attenuation, specular0_dist, specular1_dist, reflected_color; + float light_distance, NdotL, light_factor, geometric_factor, distance_attenuation, distance_att_delta; + float spotlight_attenuation, specular0_dist, specular1_dist; + float lut_lookup_result, lut_lookup_delta; + int lut_lookup_index; )"; + uint lightID = 0;; + for (int i = 0; i < config.lighting.lightNum; i++) { - const auto& lightConfig = config.lighting.lights[i]; - shader += "light_position = lightSources[" + std::to_string(i) + "].position;\n"; + lightID = config.lighting.lights[i].num; + + const auto& lightConfig = config.lighting.lights[lightID]; + shader += "light_position = lightSources[" + std::to_string(lightID) + "].position;\n"; if (lightConfig.directional) { // Directional lighting shader += "light_vector = light_position + v_view;\n"; @@ -502,49 +509,76 @@ void FragmentGenerator::compileLights(std::string& shader, const PICA::FragmentC } if (lightConfig.distanceAttenuationEnable) { - shader += "distance_att_delta = clamp(light_distance * lightSources[" + std::to_string(i) + "].distanceAttenuationScale + lightSources[" + - std::to_string(i) + "].distanceAttenuationBias, 0.0, 1.0);\n"; + shader += "distance_att_delta = clamp(light_distance * lightSources[" + std::to_string(lightID) + + "].distanceAttenuationScale + lightSources[" + std::to_string(lightID) + "].distanceAttenuationBias, 0.0, 1.0);\n"; - shader += - "distance_attenuation = lutLookup(" + std::to_string(16 + i) + ", int(clamp(floor(distance_att_delta * 255.0), 0.0, 255.0)));\n"; + shader += "distance_attenuation = lutLookup(" + std::to_string(16 + lightID) + + ", int(clamp(floor(distance_att_delta * 256.0), 0.0, 255.0)));\n"; } - // TODO: LightLutLookup stuff - shader += "spotlight_attenuation = 0.0; // Placeholder\n"; - shader += "specular0_dist = 0.0; // Placeholder\n"; - shader += "specular1_dist = 0.0; // Placeholder\n"; - shader += "reflected_color = vec3(0.0); // Placeholder\n"; + compileLUTLookup(shader, config, regs, lightID, spotlightLutIndex); + shader += "spotlight_attenuation = lut_lookup_result;\n"; - shader += "specular0 = lightSources[" + std::to_string(i) + "].specular0;\n"; - shader += "specular0 = specular0 * specular0_dist"; + compileLUTLookup(shader, config, regs, lightID, PICA::Lights::LUT_D0); + shader += "specular0_dist = lut_lookup_result;\n"; + + compileLUTLookup(shader, config, regs, lightID, PICA::Lights::LUT_D1); + shader += "specular1_dist = lut_lookup_result;\n"; + + compileLUTLookup(shader, config, regs, lightID, PICA::Lights::LUT_RR); + shader += "reflected_color.r = lut_lookup_result;\n"; + + if (isSamplerEnabled(config.lighting.config, PICA::Lights::LUT_RG)) { + compileLUTLookup(shader, config, regs, lightID, PICA::Lights::LUT_RG); + shader += "reflected_color.g = lut_lookup_result;\n"; + } else { + shader += "reflected_color.g = reflected_color.r;\n"; + } + + if (isSamplerEnabled(config.lighting.config, PICA::Lights::LUT_RB)) { + compileLUTLookup(shader, config, regs, lightID, PICA::Lights::LUT_RB); + shader += "reflected_color.b = lut_lookup_result;\n"; + } else { + shader += "reflected_color.b = reflected_color.r;\n"; + } + + shader += "specular0 = lightSources[" + std::to_string(lightID) + "].specular0 * specular0_dist;\n"; if (lightConfig.geometricFactor0) { - shader += " * geometric_factor;\n"; - } else { - shader += ";\n"; + shader += "specular0 *= geometric_factor;\n"; } - shader += "specular1 = lightSources[" + std::to_string(i) + "].specular1;\n"; - shader += "specular1 = specular1 * specular1_dist * reflected_color"; + shader += "specular1 = lightSources[" + std::to_string(lightID) + "].specular1 * specular1_dist * reflected_color;\n"; if (lightConfig.geometricFactor1) { - shader += " * geometric_factor;\n"; - } else { - shader += ";\n"; + shader += "specular1 *= geometric_factor;\n"; } shader += "light_factor = distance_attenuation * spotlight_attenuation;\n"; - + if (config.lighting.clampHighlights) { shader += "specular_sum.rgb += light_factor * (NdotL == 0.0 ? 0.0 : 1.0) * (specular0 + specular1);\n"; } else { shader += "specular_sum.rgb += light_factor * (specular0 + specular1);\n"; } - shader += "diffuse_sum.rgb += vec3(0.0); // Placeholder\n"; + shader += "diffuse_sum.rgb += light_factor * lightSources[" + std::to_string(lightID) + "].ambient + lightSources[" + + std::to_string(lightID) + "].diffuse * NdotL;\n"; + } + + if (config.lighting.enablePrimaryAlpha || config.lighting.enableSecondaryAlpha) { + compileLUTLookup(shader, config, regs, lightID, PICA::Lights::LUT_FR); + shader += "float fresnel_factor = lut_lookup_result;\n"; + } + + if (config.lighting.enablePrimaryAlpha) { + shader += "diffuse_sum.a = fresnel_factor;\n"; + } + + if (config.lighting.enableSecondaryAlpha) { + shader += "specular_sum.a = fresnel_factor;\n"; } - // TODO: Rest of the post-per-light stuff shader += R"( - vec4 global_ambient = vec4(regToColor(GPUREG_LIGHTING_AMBIENT), 1.0); + vec4 global_ambient = vec4(regToColor(globalAmbientLight), 1.0); primaryColor = clamp(global_ambient + diffuse_sum, vec4(0.0), vec4(1.0)); secondaryColor = clamp(specular_sum, vec4(0.0), vec4(1.0)); @@ -568,9 +602,7 @@ bool FragmentGenerator::isSamplerEnabled(u32 environmentID, u32 lutID) { return samplerEnabled[environmentID * 7 + lutID]; } -void FragmentGenerator::compileLUTLookup( - std::string& shader, const PICA::FragmentConfig& config, const PICARegs& regs, u32 lightIndex, u32 lutID, bool abs -) { +void FragmentGenerator::compileLUTLookup(std::string& shader, const PICA::FragmentConfig& config, const PICARegs& regs, u32 lightIndex, u32 lutID) { uint lutIndex = 0; int bitInConfig1 = 0; @@ -588,9 +620,45 @@ void FragmentGenerator::compileLUTLookup( const bool samplerEnabled = isSamplerEnabled(config.lighting.config, lutID); const u32 config1 = regs[InternalRegs::LightConfig1]; - if (!samplerEnabled || ((config1 >> bitInConfig1) != 0)) { - // 1.0 + if (!samplerEnabled || ((config1 >> bitInConfig1) & 1)) { + shader += "lut_lookup_result = 1.0;\n"; + return; } - // TODO + static constexpr float scales[] = {1.0f, 2.0f, 4.0f, 8.0f, 0.0f, 0.0f, 0.25f, 0.5f}; + const u32 lutAbs = regs[InternalRegs::LightLUTAbs]; + const u32 lutSelect = regs[InternalRegs::LightLUTSelect]; + const u32 lutScale = regs[InternalRegs::LightLUTScale]; + + // The way these bitfields are encoded is so cursed + float scale = scales[(lutScale >> (4 * lutIndex)) & 0x7]; + uint inputID = (lutSelect >> (4 * lutIndex)) & 0x7; + bool absEnabled = ((lutAbs >> (4 * lutIndex + 1)) & 0x1) == 0; // 0 = enabled... + + switch (inputID) { + case 0: shader += "lut_lookup_delta = dot(normal, normalize(half_vector));\n"; break; + case 1: shader += "lut_lookup_delta = dot(normalize(v_view), normalize(half_vector));\n"; break; + case 2: shader += "lut_lookup_delta = dot(normal, normalize(v_view));\n"; break; + case 3: shader += "lut_lookup_delta = dot(normal, light_vector);\n"; break; + + case 4: // Spotlight + default: + Helpers::warn("Shadergen: Unimplemented LUT select"); + shader += "lut_lookup_delta = 1.0;\n"; + break; + } + + if (absEnabled) { + bool twoSidedDiffuse = config.lighting.lights[lightIndex].twoSidedDiffuse; + shader += twoSidedDiffuse ? "lut_lookup_delta = abs(lut_lookup_delta);\n" : "lut_lookup_delta = max(lut_lookup_delta, 0.0);\n"; + shader += "lut_lookup_result = lutLookup(" + std::to_string(lutIndex) + ", int(clamp(floor(lut_lookup_delta * 256.0), 0.0, 255.0)));\n"; + if (scale != 1.0) { + shader += "lut_lookup_result *= " + std::to_string(scale) + ";\n"; + } + } else { + // Range is [-1, 1] so we need to map it to [0, 1] + shader += "lut_lookup_index = int(clamp(floor(lut_lookup_delta * 128.0), -128.f, 127.f));\n"; + shader += "if (lut_lookup_index < 0) lut_lookup_index += 256;\n"; + shader += "lut_lookup_result = lutLookup(" + std::to_string(lutIndex) + ", lut_lookup_index) *" + std::to_string(scale) + ";\n"; + } } \ No newline at end of file diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 38575e40..bcf33b57 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -903,7 +903,7 @@ OpenGL::Program& RendererGL::getSpecializedShader() { light.ambient = lightColorToVec3(ambient); light.position[0] = Floats::f16::fromRaw(u16(lightXY)).toFloat32(); light.position[1] = Floats::f16::fromRaw(u16(lightXY >> 16)).toFloat32(); - light.position[2] = Floats::f16::fromRaw(u16(lightXY)).toFloat32(); + light.position[2] = Floats::f16::fromRaw(u16(lightZ)).toFloat32(); // Fixed point 1.11.1 to float, without negation light.spotlightDirection[0] = float(s32(spotlightXY & 0x1FFF) << 19 >> 19) / 2047.0; From bd38f9a8abcd1db3124a752ce49c1ee6fb299fde Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Thu, 18 Jul 2024 23:34:23 +0300 Subject: [PATCH 097/251] Shadergen: Add spotlight --- src/core/PICA/shader_gen_glsl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 8d955d50..b4525634 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -640,8 +640,8 @@ void FragmentGenerator::compileLUTLookup(std::string& shader, const PICA::Fragme case 1: shader += "lut_lookup_delta = dot(normalize(v_view), normalize(half_vector));\n"; break; case 2: shader += "lut_lookup_delta = dot(normal, normalize(v_view));\n"; break; case 3: shader += "lut_lookup_delta = dot(normal, light_vector);\n"; break; + case 4: shader += "lut_lookup_delta = dot(normal, lightSources[" + std ::to_string(lightIndex) + "].spotlightDirection);\n"; break; - case 4: // Spotlight default: Helpers::warn("Shadergen: Unimplemented LUT select"); shader += "lut_lookup_delta = 1.0;\n"; From 53c76ae0d425579754fdbe5439708bb531acff74 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Thu, 18 Jul 2024 23:57:43 +0300 Subject: [PATCH 098/251] Shadergen: Fix spotlight --- src/core/PICA/shader_gen_glsl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index b4525634..136647d7 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -640,7 +640,7 @@ void FragmentGenerator::compileLUTLookup(std::string& shader, const PICA::Fragme case 1: shader += "lut_lookup_delta = dot(normalize(v_view), normalize(half_vector));\n"; break; case 2: shader += "lut_lookup_delta = dot(normal, normalize(v_view));\n"; break; case 3: shader += "lut_lookup_delta = dot(normal, light_vector);\n"; break; - case 4: shader += "lut_lookup_delta = dot(normal, lightSources[" + std ::to_string(lightIndex) + "].spotlightDirection);\n"; break; + case 4: shader += "lut_lookup_delta = dot(light_vector, lightSources[" + std ::to_string(lightIndex) + "].spotlightDirection);\n"; break; default: Helpers::warn("Shadergen: Unimplemented LUT select"); From e36b6c77a7110df3652277e36bafa6fa96774413 Mon Sep 17 00:00:00 2001 From: offtkp Date: Fri, 19 Jul 2024 00:07:29 +0300 Subject: [PATCH 099/251] Fix lugi and toad treasure tracker in ubershader Co-authored-by: wheremyfoodat <4909372+wheremyfoodat@users.noreply.github.com> --- src/host_shaders/opengl_fragment_shader.frag | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag index 6f30ebf0..6a2baa96 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -118,7 +118,7 @@ vec4 tevCalculateCombiner(int tev_id) { case 6u: result.rgb = vec3(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGB case 7u: result = vec4(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGBA case 8u: result.rgb = min(source0.rgb * source1.rgb + source2.rgb, 1.0); break; // Multiply then add - case 9u: result.rgb = min((source0.rgb + source1.rgb) * source2.rgb, 1.0); break; // Add then multiply + case 9u: result.rgb = min(source0.rgb + source1.rgb, 1.0) * source2.rgb; break; // Add then multiply default: break; } @@ -133,7 +133,7 @@ vec4 tevCalculateCombiner(int tev_id) { case 4u: result.a = mix(source1.a, source0.a, source2.a); break; // Interpolate case 5u: result.a = max(0.0, source0.a - source1.a); break; // Subtract case 8u: result.a = min(1.0, source0.a * source1.a + source2.a); break; // Multiply then add - case 9u: result.a = min(1.0, (source0.a + source1.a) * source2.a); break; // Add then multiply + case 9u: result.a = min(source0.a + source1.a, 1.0) * source2.a; break; // Add then multiply default: break; } } @@ -277,7 +277,7 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light } else { delta = abs(delta); } - int index = int(clamp(floor(delta * 256.0), 0.f, 255.f)); + int index = int(clamp(floor(delta * 255.0), 0.f, 255.f)); return lutLookup(lut_index, index) * scale; } else { // Range is [-1, 1] so we need to map it to [0, 1] From 90abf8a3769d9e1bebc85cd12d39c20851a9af1c Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Fri, 19 Jul 2024 01:29:48 +0300 Subject: [PATCH 100/251] Fix signedness mess-ups in shaders --- src/core/PICA/shader_gen_glsl.cpp | 6 +++--- src/host_shaders/opengl_fragment_shader.frag | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 136647d7..61a2c57c 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -147,11 +147,11 @@ std::string FragmentGenerator::generate(const PICARegs& regs, const FragmentConf } float lutLookup(uint lut, int index) { - return texelFetch(u_tex_lighting_lut, ivec2(index, lut), 0).r; + return texelFetch(u_tex_lighting_lut, ivec2(index, int(lut)), 0).r; } vec3 regToColor(uint reg) { - return (1.0 / 255.0) * vec3(float((reg >> 20) & 0xFF), float((reg >> 10) & 0xFF), float(reg & 0xFF)); + return (1.0 / 255.0) * vec3(float((reg >> 20u) & 0xFFu), float((reg >> 10u) & 0xFFu), float(reg & 0xFFu)); } )"; } @@ -476,7 +476,7 @@ void FragmentGenerator::compileLights(std::string& shader, const PICA::FragmentC int lut_lookup_index; )"; - uint lightID = 0;; + uint lightID = 0; for (int i = 0; i < config.lighting.lightNum; i++) { lightID = config.lighting.lights[i].num; diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag index 6a2baa96..9f369e39 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -161,7 +161,7 @@ bool error_unimpl = false; vec4 unimpl_color = vec4(1.0, 0.0, 1.0, 1.0); float lutLookup(uint lut, int index) { - return texelFetch(u_tex_lighting_lut, ivec2(index, lut), 0).r; + return texelFetch(u_tex_lighting_lut, ivec2(index, int(lut)), 0).r; } vec3 regToColor(uint reg) { From 25098082c78e899cc2e387547358fea8f7ca1f28 Mon Sep 17 00:00:00 2001 From: offtkp Date: Fri, 19 Jul 2024 02:45:09 +0300 Subject: [PATCH 101/251] Use lutID instead of lutIndex --- src/core/PICA/shader_gen_glsl.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 61a2c57c..21b55338 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -631,9 +631,9 @@ void FragmentGenerator::compileLUTLookup(std::string& shader, const PICA::Fragme const u32 lutScale = regs[InternalRegs::LightLUTScale]; // The way these bitfields are encoded is so cursed - float scale = scales[(lutScale >> (4 * lutIndex)) & 0x7]; - uint inputID = (lutSelect >> (4 * lutIndex)) & 0x7; - bool absEnabled = ((lutAbs >> (4 * lutIndex + 1)) & 0x1) == 0; // 0 = enabled... + float scale = scales[(lutScale >> (4 * lutID)) & 0x7]; + uint inputID = (lutSelect >> (4 * lutID)) & 0x7; + bool absEnabled = ((lutAbs >> (4 * lutID + 1)) & 0x1) == 0; // 0 = enabled... switch (inputID) { case 0: shader += "lut_lookup_delta = dot(normal, normalize(half_vector));\n"; break; From ac55c3e3249603e4a77dc2bd7dc97f372dad6f78 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Fri, 19 Jul 2024 03:01:12 +0300 Subject: [PATCH 102/251] Shadergen: Fix small register decoding oopsie --- include/PICA/pica_frag_config.hpp | 2 +- include/PICA/regs.hpp | 1 + src/core/PICA/shader_gen_glsl.cpp | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/include/PICA/pica_frag_config.hpp b/include/PICA/pica_frag_config.hpp index cdb68854..5338f719 100644 --- a/include/PICA/pica_frag_config.hpp +++ b/include/PICA/pica_frag_config.hpp @@ -116,7 +116,7 @@ namespace PICA { for (int i = 0; i < totalLightCount; i++) { auto& light = lights[i]; - const u32 lightConfig = 0x149 + 0x10 * i; + const u32 lightConfig = regs[InternalRegs::Light0Config + 0x10 * i]; light.num = (regs[InternalRegs::LightPermutation] >> (i * 4)) & 0x7; light.directional = Helpers::getBit<0>(lightConfig); diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index 2482c25b..4518e16a 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -77,6 +77,7 @@ namespace PICA { Light0Z = 0x145, Light0SpotlightXY = 0x146, Light0SpotlightZ = 0x147, + Light0Config = 0x149, Light0AttenuationBias = 0x14A, Light0AttenuationScale = 0x14B, diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 21b55338..a892e514 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -485,9 +485,9 @@ void FragmentGenerator::compileLights(std::string& shader, const PICA::FragmentC shader += "light_position = lightSources[" + std::to_string(lightID) + "].position;\n"; if (lightConfig.directional) { // Directional lighting - shader += "light_vector = light_position + v_view;\n"; - } else { // Positional lighting shader += "light_vector = light_position;\n"; + } else { // Positional lighting + shader += "light_vector = light_position + v_view;\n"; } shader += R"( From 5c1e2912a3c269e01eebefd23274001d1a08341a Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Fri, 19 Jul 2024 14:35:01 +0300 Subject: [PATCH 103/251] Shadergen: Minimize shader compilation time by caching the default VS --- include/PICA/shader_gen.hpp | 2 +- include/renderer_gl/renderer_gl.hpp | 3 ++ src/core/PICA/shader_gen_glsl.cpp | 2 +- src/core/renderer_gl/renderer_gl.cpp | 8 +-- third_party/opengl/opengl.hpp | 79 +++++++++++++++++----------- 5 files changed, 57 insertions(+), 37 deletions(-) diff --git a/include/PICA/shader_gen.hpp b/include/PICA/shader_gen.hpp index 372e0550..8cdaadfc 100644 --- a/include/PICA/shader_gen.hpp +++ b/include/PICA/shader_gen.hpp @@ -35,7 +35,7 @@ namespace PICA::ShaderGen { public: FragmentGenerator(API api, Language language) : api(api), language(language) {} std::string generate(const PICARegs& regs, const PICA::FragmentConfig& config); - std::string getVertexShader(const PICARegs& regs); + std::string getDefaultVertexShader(); void setTarget(API api, Language language) { this->api = api; diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 46d344b2..abde96bf 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -66,6 +66,9 @@ class RendererGL final : public Renderer { OpenGL::Texture lightLUTTexture; OpenGL::Framebuffer screenFramebuffer; OpenGL::Texture blankTexture; + // The "default" vertex shader to use when using specialized shaders but not PICA vertex shader -> GLSL recompilation + // We can compile this once and then link it with all other generated fragment shaders + OpenGL::Shader defaultShadergenVs; // Cached recompiled fragment shader struct CachedProgram { diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index a892e514..c7195b25 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -34,7 +34,7 @@ static constexpr const char* uniformDefinition = R"( // This is particularly intuitive in several places, such as checking if a LUT is enabled static constexpr int spotlightLutIndex = 2; -std::string FragmentGenerator::getVertexShader(const PICARegs& regs) { +std::string FragmentGenerator::getDefaultVertexShader() { std::string ret = ""; switch (api) { diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index bcf33b57..7fca385d 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -162,6 +162,10 @@ void RendererGL::initGraphicsContextInternal() { OpenGL::setViewport(oldViewport[0], oldViewport[1], oldViewport[2], oldViewport[3]); reset(); + + // Initialize the default vertex shader used with shadergen + std::string defaultShadergenVSSource = fragShaderGen.getDefaultVertexShader(); + defaultShadergenVs.create({defaultShadergenVSSource.c_str(), defaultShadergenVSSource.size()}, OpenGL::Vertex); } // The OpenGL renderer doesn't need to do anything with the GL context (For Qt frontend) or the SDL window (For SDL frontend) @@ -810,12 +814,10 @@ OpenGL::Program& RendererGL::getSpecializedShader() { OpenGL::Program& program = programEntry.program; if (!program.exists()) { - std::string vs = fragShaderGen.getVertexShader(regs); std::string fs = fragShaderGen.generate(regs, fsConfig); - OpenGL::Shader vertShader({vs.c_str(), vs.size()}, OpenGL::Vertex); OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment); - program.create({vertShader, fragShader}); + program.create({defaultShadergenVs, fragShader}); gl.useProgram(program); // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 diff --git a/third_party/opengl/opengl.hpp b/third_party/opengl/opengl.hpp index 828fb784..4a08650a 100644 --- a/third_party/opengl/opengl.hpp +++ b/third_party/opengl/opengl.hpp @@ -355,46 +355,57 @@ namespace OpenGL { } }; - enum ShaderType { - Fragment = GL_FRAGMENT_SHADER, - Vertex = GL_VERTEX_SHADER, - Geometry = GL_GEOMETRY_SHADER, - Compute = GL_COMPUTE_SHADER, - TessControl = GL_TESS_CONTROL_SHADER, - TessEvaluation = GL_TESS_EVALUATION_SHADER - }; + enum ShaderType { + Fragment = GL_FRAGMENT_SHADER, + Vertex = GL_VERTEX_SHADER, + Geometry = GL_GEOMETRY_SHADER, + Compute = GL_COMPUTE_SHADER, + TessControl = GL_TESS_CONTROL_SHADER, + TessEvaluation = GL_TESS_EVALUATION_SHADER + }; - struct Shader { - GLuint m_handle = 0; + struct Shader { + GLuint m_handle = 0; - Shader() {} - Shader(const std::string_view source, ShaderType type) { create(source, static_cast(type)); } + Shader() {} + Shader(const std::string_view source, ShaderType type) { create(source, static_cast(type)); } - // Returns whether compilation failed or not - bool create(const std::string_view source, GLenum type) { - m_handle = glCreateShader(type); - const GLchar* const sources[1] = { source.data() }; + // Returns whether compilation failed or not + bool create(const std::string_view source, GLenum type) { + m_handle = glCreateShader(type); + const GLchar* const sources[1] = {source.data()}; - glShaderSource(m_handle, 1, sources, nullptr); - glCompileShader(m_handle); + glShaderSource(m_handle, 1, sources, nullptr); + glCompileShader(m_handle); - GLint success; - glGetShaderiv(m_handle, GL_COMPILE_STATUS, &success); - if (success == GL_FALSE) { - char buf[4096]; - glGetShaderInfoLog(m_handle, 4096, nullptr, buf); - fprintf(stderr, "Failed to compile shader\nError: %s\n", buf); - glDeleteShader(m_handle); + GLint success; + glGetShaderiv(m_handle, GL_COMPILE_STATUS, &success); + if (success == GL_FALSE) { + char buf[4096]; + glGetShaderInfoLog(m_handle, 4096, nullptr, buf); + fprintf(stderr, "Failed to compile shader\nError: %s\n", buf); + glDeleteShader(m_handle); - m_handle = 0; - } + m_handle = 0; + } - return m_handle != 0; - } + return m_handle != 0; + } - GLuint handle() const { return m_handle; } - bool exists() const { return m_handle != 0; } - }; + GLuint handle() const { return m_handle; } + bool exists() const { return m_handle != 0; } + + void free() { + if (exists()) { + glDeleteShader(m_handle); + m_handle = 0; + } + } + +#ifdef OPENGL_DESTRUCTORS + ~Shader() { free(); } +#endif + }; struct Program { GLuint m_handle = 0; @@ -431,6 +442,10 @@ namespace OpenGL { m_handle = 0; } } + +#ifdef OPENGL_DESTRUCTORS + ~Program() { free(); } +#endif }; static void dispatchCompute(GLuint groupsX = 1, GLuint groupsY = 1, GLuint groupsZ = 1) { From e4550b3e4f14c487678a9d3a05d1e05bd7f23826 Mon Sep 17 00:00:00 2001 From: offtkp Date: Fri, 19 Jul 2024 15:55:02 +0300 Subject: [PATCH 104/251] Fix pokedex3d on specialized shaders --- src/core/PICA/shader_gen_glsl.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index c7195b25..0a9c1a5a 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -560,8 +560,8 @@ void FragmentGenerator::compileLights(std::string& shader, const PICA::FragmentC shader += "specular_sum.rgb += light_factor * (specular0 + specular1);\n"; } - shader += "diffuse_sum.rgb += light_factor * lightSources[" + std::to_string(lightID) + "].ambient + lightSources[" + - std::to_string(lightID) + "].diffuse * NdotL;\n"; + shader += "diffuse_sum.rgb += light_factor * (lightSources[" + std::to_string(lightID) + "].ambient + lightSources[" + + std::to_string(lightID) + "].diffuse * NdotL);\n"; } if (config.lighting.enablePrimaryAlpha || config.lighting.enableSecondaryAlpha) { From 9415cee59a05f4531a2b89867b02266baff3d344 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Fri, 19 Jul 2024 18:02:42 +0300 Subject: [PATCH 105/251] Enable shadergen by default for now --- include/config.hpp | 3 ++- src/core/PICA/shader_gen_glsl.cpp | 2 +- src/libretro_core.cpp | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/include/config.hpp b/include/config.hpp index 8aa695aa..ed2c270f 100644 --- a/include/config.hpp +++ b/include/config.hpp @@ -13,7 +13,8 @@ struct EmulatorConfig { static constexpr bool shaderJitDefault = false; #endif - static constexpr bool ubershaderDefault = true; + // For now, use specialized shaders by default + static constexpr bool ubershaderDefault = false; bool shaderJitEnabled = shaderJitDefault; bool discordRpcEnabled = false; diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 0a9c1a5a..95b042f1 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -165,7 +165,7 @@ std::string FragmentGenerator::generate(const PICARegs& regs, const FragmentConf vec4 tevNextPreviousBuffer = tevBufferColor; vec4 primaryColor = vec4(0.0); - vec4 secondaryColor = vec4(0.0); + vec4 secondaryColor = vec4(0.0); )"; compileLights(ret, config, regs); diff --git a/src/libretro_core.cpp b/src/libretro_core.cpp index a6a1ff00..b48e937a 100644 --- a/src/libretro_core.cpp +++ b/src/libretro_core.cpp @@ -147,7 +147,7 @@ static void configInit() { static const retro_variable values[] = { {"panda3ds_use_shader_jit", "Enable shader JIT; enabled|disabled"}, {"panda3ds_accurate_shader_mul", "Enable accurate shader multiplication; disabled|enabled"}, - {"panda3ds_use_ubershader", "Use ubershaders (No stutter, maybe slower); enabled|disabled"}, + {"panda3ds_use_ubershader", "Use ubershaders (No stutter, maybe slower); disabled|enabled"}, {"panda3ds_use_vsync", "Enable VSync; enabled|disabled"}, {"panda3ds_dsp_emulation", "DSP emulation; Null|HLE|LLE"}, {"panda3ds_use_audio", "Enable audio; disabled|enabled"}, From 20335b7d2d875b12402a59d7f50961098290f7ec Mon Sep 17 00:00:00 2001 From: offtkp Date: Fri, 19 Jul 2024 18:05:43 +0300 Subject: [PATCH 106/251] Update gles.patch --- .github/gles.patch | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/gles.patch b/.github/gles.patch index a27b3d00..270e336e 100644 --- a/.github/gles.patch +++ b/.github/gles.patch @@ -21,7 +21,7 @@ index 990e2f80..2e7842ac 100644 void main() { diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag -index b4ad7ecc..98b1bd80 100644 +index 9f369e39..b4bb19d3 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -1,4 +1,5 @@ @@ -32,7 +32,7 @@ index b4ad7ecc..98b1bd80 100644 in vec4 v_quaternion; in vec4 v_colour; @@ -164,11 +165,17 @@ float lutLookup(uint lut, int index) { - return texelFetch(u_tex_lighting_lut, ivec2(index, lut), 0).r; + return texelFetch(u_tex_lighting_lut, ivec2(index, int(lut)), 0).r; } +// some gles versions have bitfieldExtractCompat and complain if you redefine it, some don't and compile error, using this instead @@ -225,10 +225,10 @@ index 057f9a88..dc735ced 100644 v_quaternion = a_quaternion; } diff --git a/third_party/opengl/opengl.hpp b/third_party/opengl/opengl.hpp -index 828fb784..a1861b77 100644 +index 4a08650a..21af37e3 100644 --- a/third_party/opengl/opengl.hpp +++ b/third_party/opengl/opengl.hpp -@@ -568,22 +568,22 @@ namespace OpenGL { +@@ -583,22 +583,22 @@ namespace OpenGL { static void disableScissor() { glDisable(GL_SCISSOR_TEST); } static void enableBlend() { glEnable(GL_BLEND); } static void disableBlend() { glDisable(GL_BLEND); } From eb7e02fbc2ce9a285b3d523ef2c76c1e893e525a Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Fri, 19 Jul 2024 18:20:21 +0300 Subject: [PATCH 107/251] Shadergen: Remove redundant whitespace generation --- src/core/PICA/shader_gen_glsl.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 95b042f1..e19c459e 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -162,7 +162,7 @@ std::string FragmentGenerator::generate(const PICARegs& regs, const FragmentConf void main() { vec4 combinerOutput = v_colour; vec4 previousBuffer = vec4(0.0); - vec4 tevNextPreviousBuffer = tevBufferColor; + vec4 tevNextPreviousBuffer = tevBufferColor; vec4 primaryColor = vec4(0.0); vec4 secondaryColor = vec4(0.0); @@ -494,7 +494,7 @@ void FragmentGenerator::compileLights(std::string& shader, const PICA::FragmentC light_distance = length(light_vector); light_vector = normalize(light_vector); half_vector = light_vector + normalize(v_view); - + distance_attenuation = 1.0; NdotL = dot(normal, light_vector); )"; From 270f4b00a91b55087012ac598cda0b985261f7d8 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sat, 20 Jul 2024 01:01:15 +0300 Subject: [PATCH 108/251] AES: Fix fixed crypto key mode and CTR for versions 0 and 2 --- src/core/loader/ncch.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/core/loader/ncch.cpp b/src/core/loader/ncch.cpp index 47d5a4c2..98574289 100644 --- a/src/core/loader/ncch.cpp +++ b/src/core/loader/ncch.cpp @@ -88,8 +88,8 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn encryptionInfoTmp.normalKey = *primaryKey; encryptionInfoTmp.initialCounter.fill(0); - for (std::size_t i = 1; i <= sizeof(std::uint64_t) - 1; i++) { - encryptionInfoTmp.initialCounter[i] = header[0x108 + sizeof(std::uint64_t) - 1 - i]; + for (usize i = 0; i < 8; i++) { + encryptionInfoTmp.initialCounter[i] = header[0x108 + 7 - i]; } encryptionInfoTmp.initialCounter[8] = 1; exheaderInfo.encryptionInfo = encryptionInfoTmp; @@ -305,6 +305,7 @@ std::pair NCCH::getPrimaryKey(Crypto::AESEngine &aesEngine if (encrypted) { if (fixedCryptoKey) { + result.fill(0); return {true, result}; } @@ -326,6 +327,7 @@ std::pair NCCH::getSecondaryKey(Crypto::AESEngine &aesEngi if (encrypted) { if (fixedCryptoKey) { + result.fill(0); return {true, result}; } From af552edd9d8456338446a5cc959aeedd2a028c4f Mon Sep 17 00:00:00 2001 From: Paris Oplopoios Date: Sat, 20 Jul 2024 02:37:49 +0300 Subject: [PATCH 109/251] Remove dependency of PICA regs in fragment config (#541) Remove dependency of PICA regs in fragment config Nyom Nyom part 2 Nyom 3: The final nyom Nyom 4: The nyomening Nyom 5: The final Nyom for real --- include/PICA/pica_frag_config.hpp | 54 +++++++++--- include/PICA/regs.hpp | 5 ++ include/PICA/shader_gen.hpp | 19 ++-- src/core/PICA/shader_gen_glsl.cpp | 124 ++++++++++++--------------- src/core/renderer_gl/renderer_gl.cpp | 27 +----- 5 files changed, 109 insertions(+), 120 deletions(-) diff --git a/include/PICA/pica_frag_config.hpp b/include/PICA/pica_frag_config.hpp index 5338f719..89dd3420 100644 --- a/include/PICA/pica_frag_config.hpp +++ b/include/PICA/pica_frag_config.hpp @@ -73,13 +73,7 @@ namespace PICA { BitField<22, 2, u32> shadowSelector; }; - LightingLUTConfig d0{}; - LightingLUTConfig d1{}; - LightingLUTConfig sp{}; - LightingLUTConfig fr{}; - LightingLUTConfig rr{}; - LightingLUTConfig rg{}; - LightingLUTConfig rb{}; + std::array luts{}; std::array lights{}; @@ -116,19 +110,27 @@ namespace PICA { for (int i = 0; i < totalLightCount; i++) { auto& light = lights[i]; - const u32 lightConfig = regs[InternalRegs::Light0Config + 0x10 * i]; - light.num = (regs[InternalRegs::LightPermutation] >> (i * 4)) & 0x7; + + const u32 lightConfig = regs[InternalRegs::Light0Config + 0x10 * light.num]; light.directional = Helpers::getBit<0>(lightConfig); light.twoSidedDiffuse = Helpers::getBit<1>(lightConfig); light.geometricFactor0 = Helpers::getBit<2>(lightConfig); light.geometricFactor1 = Helpers::getBit<3>(lightConfig); - light.shadowEnable = ((config1 >> i) & 1) ^ 1; // This also does 0 = enabled - light.spotAttenuationEnable = ((config1 >> (8 + i)) & 1) ^ 1; // Same here - light.distanceAttenuationEnable = ((config1 >> (24 + i)) & 1) ^ 1; // Of course same here + light.shadowEnable = ((config1 >> light.num) & 1) ^ 1; // This also does 0 = enabled + light.spotAttenuationEnable = ((config1 >> (8 + light.num)) & 1) ^ 1; // Same here + light.distanceAttenuationEnable = ((config1 >> (24 + light.num)) & 1) ^ 1; // Of course same here } + LightingLUTConfig& d0 = luts[Lights::LUT_D0]; + LightingLUTConfig& d1 = luts[Lights::LUT_D1]; + LightingLUTConfig& sp = luts[spotlightLutIndex]; + LightingLUTConfig& fr = luts[Lights::LUT_FR]; + LightingLUTConfig& rb = luts[Lights::LUT_RB]; + LightingLUTConfig& rg = luts[Lights::LUT_RG]; + LightingLUTConfig& rr = luts[Lights::LUT_RR]; + d0.enable = Helpers::getBit<16>(config1) == 0; d1.enable = Helpers::getBit<17>(config1) == 0; fr.enable = Helpers::getBit<19>(config1) == 0; @@ -144,7 +146,7 @@ namespace PICA { if (d0.enable) { d0.absInput = Helpers::getBit<1>(lutAbs) == 0; - d0.type = Helpers::getBits<0, 3>(lutSelect); + d0.type = Helpers::getBits<0, 3>(lutSelect); d0.scale = scales[Helpers::getBits<0, 3>(lutScale)]; } @@ -195,7 +197,31 @@ namespace PICA { return std::memcmp(this, &config, sizeof(FragmentConfig)) == 0; } - FragmentConfig(const std::array& regs) : lighting(regs) {} + FragmentConfig(const std::array& regs) : lighting(regs) { + auto alphaTestConfig = regs[InternalRegs::AlphaTestConfig]; + auto alphaTestFunction = Helpers::getBits<4, 3>(alphaTestConfig); + + outConfig.alphaTestFunction = + (alphaTestConfig & 1) ? static_cast(alphaTestFunction) : PICA::CompareFunction::Always; + outConfig.depthMapEnable = regs[InternalRegs::DepthmapEnable] & 1; + + texConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg]; + texConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer]; + + // Set up TEV stages. Annoyingly we can't just memcpy as the TEV registers are arranged like + // {Source, Operand, Combiner, Color, Scale} and we want to skip the color register since it's uploaded via UBO +#define setupTevStage(stage) \ + std::memcpy(&texConfig.tevConfigs[stage * 4], ®s[InternalRegs::TexEnv##stage##Source], 3 * sizeof(u32)); \ + texConfig.tevConfigs[stage * 4 + 3] = regs[InternalRegs::TexEnv##stage##Source + 4]; + + setupTevStage(0); + setupTevStage(1); + setupTevStage(2); + setupTevStage(3); + setupTevStage(4); + setupTevStage(5); +#undef setupTevStage + } }; static_assert( diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index 4518e16a..c4d6a5fb 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -278,6 +278,11 @@ namespace PICA { }; } + // There's actually 8 different LUTs (SP0-SP7), one for each light with different indices (8-15) + // We use an unused LUT value for "this light source's spotlight" instead and figure out which light source to use in compileLutLookup + // This is particularly intuitive in several places, such as checking if a LUT is enabled + static constexpr int spotlightLutIndex = 2; + enum class TextureFmt : u32 { RGBA8 = 0x0, RGB8 = 0x1, diff --git a/include/PICA/shader_gen.hpp b/include/PICA/shader_gen.hpp index 8cdaadfc..6cf810a0 100644 --- a/include/PICA/shader_gen.hpp +++ b/include/PICA/shader_gen.hpp @@ -14,27 +14,24 @@ namespace PICA::ShaderGen { enum class Language { GLSL }; class FragmentGenerator { - using PICARegs = std::array; API api; Language language; - void compileTEV(std::string& shader, int stage, const PICARegs& regs); - void getSource(std::string& shader, PICA::TexEnvConfig::Source source, int index); - void getColorOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::ColorOperand color, int index); - void getAlphaOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::AlphaOperand alpha, int index); + void compileTEV(std::string& shader, int stage, const PICA::FragmentConfig& config); + void getSource(std::string& shader, PICA::TexEnvConfig::Source source, int index, const PICA::FragmentConfig& config); + void getColorOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::ColorOperand color, int index, const PICA::FragmentConfig& config); + void getAlphaOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::AlphaOperand alpha, int index, const PICA::FragmentConfig& config); void getColorOperation(std::string& shader, PICA::TexEnvConfig::Operation op); void getAlphaOperation(std::string& shader, PICA::TexEnvConfig::Operation op); - void applyAlphaTest(std::string& shader, const PICARegs& regs); - void compileLights(std::string& shader, const PICA::FragmentConfig& config, const PICARegs& regs); - void compileLUTLookup(std::string& shader, const PICA::FragmentConfig& config, const PICARegs& regs, u32 lightIndex, u32 lutID); + void applyAlphaTest(std::string& shader, const PICA::FragmentConfig& config); + void compileLights(std::string& shader, const PICA::FragmentConfig& config); + void compileLUTLookup(std::string& shader, const PICA::FragmentConfig& config, u32 lightIndex, u32 lutID); bool isSamplerEnabled(u32 environmentID, u32 lutID); - u32 textureConfig = 0; - public: FragmentGenerator(API api, Language language) : api(api), language(language) {} - std::string generate(const PICARegs& regs, const PICA::FragmentConfig& config); + std::string generate(const PICA::FragmentConfig& config); std::string getDefaultVertexShader(); void setTarget(API api, Language language) { diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index e19c459e..47df58b8 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -1,3 +1,5 @@ +#include "PICA/pica_frag_config.hpp" +#include "PICA/regs.hpp" #include "PICA/shader_gen.hpp" using namespace PICA; using namespace PICA::ShaderGen; @@ -29,11 +31,6 @@ static constexpr const char* uniformDefinition = R"( }; )"; -// There's actually 8 different LUTs (SP0-SP7), one for each light with different indices (8-15) -// We use an unused LUT value for "this light source's spotlight" instead and figure out which light source to use in compileLutLookup -// This is particularly intuitive in several places, such as checking if a LUT is enabled -static constexpr int spotlightLutIndex = 2; - std::string FragmentGenerator::getDefaultVertexShader() { std::string ret = ""; @@ -101,7 +98,7 @@ std::string FragmentGenerator::getDefaultVertexShader() { return ret; } -std::string FragmentGenerator::generate(const PICARegs& regs, const FragmentConfig& config) { +std::string FragmentGenerator::generate(const FragmentConfig& config) { std::string ret = ""; switch (api) { @@ -168,7 +165,7 @@ std::string FragmentGenerator::generate(const PICARegs& regs, const FragmentConf vec4 secondaryColor = vec4(0.0); )"; - compileLights(ret, config, regs); + compileLights(ret, config); ret += R"( vec3 colorOp1 = vec3(0.0); @@ -187,44 +184,39 @@ std::string FragmentGenerator::generate(const PICARegs& regs, const FragmentConf float depth = z_over_w * depthScale + depthOffset; )"; - if ((regs[InternalRegs::DepthmapEnable] & 1) == 0) { + if (!config.outConfig.depthMapEnable) { ret += "depth /= gl_FragCoord.w;\n"; } ret += "gl_FragDepth = depth;\n"; - textureConfig = regs[InternalRegs::TexUnitCfg]; for (int i = 0; i < 6; i++) { - compileTEV(ret, i, regs); + compileTEV(ret, i, config); } - applyAlphaTest(ret, regs); + applyAlphaTest(ret, config); ret += "fragColor = combinerOutput;\n}"; // End of main function return ret; } -void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICARegs& regs) { - // Base address for each TEV stage's configuration - static constexpr std::array ioBases = { - InternalRegs::TexEnv0Source, InternalRegs::TexEnv1Source, InternalRegs::TexEnv2Source, - InternalRegs::TexEnv3Source, InternalRegs::TexEnv4Source, InternalRegs::TexEnv5Source, - }; +void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICA::FragmentConfig& config) { + const u32* tevValues = config.texConfig.tevConfigs.data() + stage * 4; - const u32 ioBase = ioBases[stage]; - TexEnvConfig tev(regs[ioBase], regs[ioBase + 1], regs[ioBase + 2], regs[ioBase + 3], regs[ioBase + 4]); + // Pass a 0 to constColor here, as it doesn't matter for compilation + TexEnvConfig tev(tevValues[0], tevValues[1], tevValues[2], 0, tevValues[3]); if (!tev.isPassthroughStage()) { // Get color operands shader += "colorOp1 = "; - getColorOperand(shader, tev.colorSource1, tev.colorOperand1, stage); + getColorOperand(shader, tev.colorSource1, tev.colorOperand1, stage, config); shader += ";\ncolorOp2 = "; - getColorOperand(shader, tev.colorSource2, tev.colorOperand2, stage); + getColorOperand(shader, tev.colorSource2, tev.colorOperand2, stage, config); shader += ";\ncolorOp3 = "; - getColorOperand(shader, tev.colorSource3, tev.colorOperand3, stage); + getColorOperand(shader, tev.colorSource3, tev.colorOperand3, stage, config); shader += ";\nvec3 outputColor" + std::to_string(stage) + " = clamp("; getColorOperation(shader, tev.colorOp); @@ -236,13 +228,13 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg } else { // Get alpha operands shader += "alphaOp1 = "; - getAlphaOperand(shader, tev.alphaSource1, tev.alphaOperand1, stage); + getAlphaOperand(shader, tev.alphaSource1, tev.alphaOperand1, stage, config); shader += ";\nalphaOp2 = "; - getAlphaOperand(shader, tev.alphaSource2, tev.alphaOperand2, stage); + getAlphaOperand(shader, tev.alphaSource2, tev.alphaOperand2, stage, config); shader += ";\nalphaOp3 = "; - getAlphaOperand(shader, tev.alphaSource3, tev.alphaOperand3, stage); + getAlphaOperand(shader, tev.alphaSource3, tev.alphaOperand3, stage, config); shader += ";\nfloat outputAlpha" + std::to_string(stage) + " = clamp("; getAlphaOperation(shader, tev.alphaOp); @@ -258,7 +250,7 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg shader += "previousBuffer = tevNextPreviousBuffer;\n\n"; // Update the "next previous buffer" if necessary - const u32 textureEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer]; + const u32 textureEnvUpdateBuffer = config.texConfig.texEnvUpdateBuffer; if (stage < 4) { // Check whether to update rgb if ((textureEnvUpdateBuffer & (0x100 << stage))) { @@ -272,7 +264,7 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg } } -void FragmentGenerator::getColorOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::ColorOperand color, int index) { +void FragmentGenerator::getColorOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::ColorOperand color, int index, const PICA::FragmentConfig& config) { using OperandType = TexEnvConfig::ColorOperand; // For inverting operands, add the 1.0 - x subtraction @@ -284,31 +276,31 @@ void FragmentGenerator::getColorOperand(std::string& shader, TexEnvConfig::Sourc switch (color) { case OperandType::SourceColor: case OperandType::OneMinusSourceColor: - getSource(shader, source, index); + getSource(shader, source, index, config); shader += ".rgb"; break; case OperandType::SourceRed: case OperandType::OneMinusSourceRed: - getSource(shader, source, index); + getSource(shader, source, index, config); shader += ".rrr"; break; case OperandType::SourceGreen: case OperandType::OneMinusSourceGreen: - getSource(shader, source, index); + getSource(shader, source, index, config); shader += ".ggg"; break; case OperandType::SourceBlue: case OperandType::OneMinusSourceBlue: - getSource(shader, source, index); + getSource(shader, source, index, config); shader += ".bbb"; break; case OperandType::SourceAlpha: case OperandType::OneMinusSourceAlpha: - getSource(shader, source, index); + getSource(shader, source, index, config); shader += ".aaa"; break; @@ -319,7 +311,7 @@ void FragmentGenerator::getColorOperand(std::string& shader, TexEnvConfig::Sourc } } -void FragmentGenerator::getAlphaOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::AlphaOperand color, int index) { +void FragmentGenerator::getAlphaOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::AlphaOperand color, int index, const PICA::FragmentConfig& config) { using OperandType = TexEnvConfig::AlphaOperand; // For inverting operands, add the 1.0 - x subtraction @@ -331,25 +323,25 @@ void FragmentGenerator::getAlphaOperand(std::string& shader, TexEnvConfig::Sourc switch (color) { case OperandType::SourceRed: case OperandType::OneMinusSourceRed: - getSource(shader, source, index); + getSource(shader, source, index, config); shader += ".r"; break; case OperandType::SourceGreen: case OperandType::OneMinusSourceGreen: - getSource(shader, source, index); + getSource(shader, source, index, config); shader += ".g"; break; case OperandType::SourceBlue: case OperandType::OneMinusSourceBlue: - getSource(shader, source, index); + getSource(shader, source, index, config); shader += ".b"; break; case OperandType::SourceAlpha: case OperandType::OneMinusSourceAlpha: - getSource(shader, source, index); + getSource(shader, source, index, config); shader += ".a"; break; @@ -360,14 +352,14 @@ void FragmentGenerator::getAlphaOperand(std::string& shader, TexEnvConfig::Sourc } } -void FragmentGenerator::getSource(std::string& shader, TexEnvConfig::Source source, int index) { +void FragmentGenerator::getSource(std::string& shader, TexEnvConfig::Source source, int index, const PICA::FragmentConfig& config) { switch (source) { case TexEnvConfig::Source::PrimaryColor: shader += "v_colour"; break; case TexEnvConfig::Source::Texture0: shader += "texture(u_tex0, v_texcoord0.xy)"; break; case TexEnvConfig::Source::Texture1: shader += "texture(u_tex1, v_texcoord1)"; break; case TexEnvConfig::Source::Texture2: { // If bit 13 in texture config is set then we use the texcoords for texture 1, otherwise for texture 2 - if (Helpers::getBit<13>(textureConfig)) { + if (Helpers::getBit<13>(config.texConfig.texUnitConfig)) { shader += "texture(u_tex2, v_texcoord1)"; } else { shader += "texture(u_tex2, v_texcoord2)"; @@ -428,12 +420,11 @@ void FragmentGenerator::getAlphaOperation(std::string& shader, TexEnvConfig::Ope } } -void FragmentGenerator::applyAlphaTest(std::string& shader, const PICARegs& regs) { - const u32 alphaConfig = regs[InternalRegs::AlphaTestConfig]; - const auto function = static_cast(Helpers::getBits<4, 3>(alphaConfig)); +void FragmentGenerator::applyAlphaTest(std::string& shader, const PICA::FragmentConfig& config) { + const CompareFunction function = config.outConfig.alphaTestFunction; // Alpha test disabled - if (Helpers::getBit<0>(alphaConfig) == 0 || function == CompareFunction::Always) { + if (function == CompareFunction::Always) { return; } @@ -458,7 +449,7 @@ void FragmentGenerator::applyAlphaTest(std::string& shader, const PICARegs& regs shader += ") { discard; }\n"; } -void FragmentGenerator::compileLights(std::string& shader, const PICA::FragmentConfig& config, const PICARegs& regs) { +void FragmentGenerator::compileLights(std::string& shader, const PICA::FragmentConfig& config) { if (!config.lighting.enable) { return; } @@ -481,7 +472,7 @@ void FragmentGenerator::compileLights(std::string& shader, const PICA::FragmentC for (int i = 0; i < config.lighting.lightNum; i++) { lightID = config.lighting.lights[i].num; - const auto& lightConfig = config.lighting.lights[lightID]; + const auto& lightConfig = config.lighting.lights[i]; shader += "light_position = lightSources[" + std::to_string(lightID) + "].position;\n"; if (lightConfig.directional) { // Directional lighting @@ -516,27 +507,27 @@ void FragmentGenerator::compileLights(std::string& shader, const PICA::FragmentC ", int(clamp(floor(distance_att_delta * 256.0), 0.0, 255.0)));\n"; } - compileLUTLookup(shader, config, regs, lightID, spotlightLutIndex); + compileLUTLookup(shader, config, i, spotlightLutIndex); shader += "spotlight_attenuation = lut_lookup_result;\n"; - compileLUTLookup(shader, config, regs, lightID, PICA::Lights::LUT_D0); + compileLUTLookup(shader, config, i, PICA::Lights::LUT_D0); shader += "specular0_dist = lut_lookup_result;\n"; - compileLUTLookup(shader, config, regs, lightID, PICA::Lights::LUT_D1); + compileLUTLookup(shader, config, i, PICA::Lights::LUT_D1); shader += "specular1_dist = lut_lookup_result;\n"; - compileLUTLookup(shader, config, regs, lightID, PICA::Lights::LUT_RR); + compileLUTLookup(shader, config, i, PICA::Lights::LUT_RR); shader += "reflected_color.r = lut_lookup_result;\n"; if (isSamplerEnabled(config.lighting.config, PICA::Lights::LUT_RG)) { - compileLUTLookup(shader, config, regs, lightID, PICA::Lights::LUT_RG); + compileLUTLookup(shader, config, i, PICA::Lights::LUT_RG); shader += "reflected_color.g = lut_lookup_result;\n"; } else { shader += "reflected_color.g = reflected_color.r;\n"; } if (isSamplerEnabled(config.lighting.config, PICA::Lights::LUT_RB)) { - compileLUTLookup(shader, config, regs, lightID, PICA::Lights::LUT_RB); + compileLUTLookup(shader, config, i, PICA::Lights::LUT_RB); shader += "reflected_color.b = lut_lookup_result;\n"; } else { shader += "reflected_color.b = reflected_color.r;\n"; @@ -565,7 +556,7 @@ void FragmentGenerator::compileLights(std::string& shader, const PICA::FragmentC } if (config.lighting.enablePrimaryAlpha || config.lighting.enableSecondaryAlpha) { - compileLUTLookup(shader, config, regs, lightID, PICA::Lights::LUT_FR); + compileLUTLookup(shader, config, config.lighting.lightNum - 1, PICA::Lights::LUT_FR); shader += "float fresnel_factor = lut_lookup_result;\n"; } @@ -602,45 +593,40 @@ bool FragmentGenerator::isSamplerEnabled(u32 environmentID, u32 lutID) { return samplerEnabled[environmentID * 7 + lutID]; } -void FragmentGenerator::compileLUTLookup(std::string& shader, const PICA::FragmentConfig& config, const PICARegs& regs, u32 lightIndex, u32 lutID) { +void FragmentGenerator::compileLUTLookup(std::string& shader, const PICA::FragmentConfig& config, u32 lightIndex, u32 lutID) { + const LightingLUTConfig& lut = config.lighting.luts[lutID]; + uint lightID = config.lighting.lights[lightIndex].num; uint lutIndex = 0; - int bitInConfig1 = 0; + bool lutEnabled = false; if (lutID == spotlightLutIndex) { // These are the spotlight attenuation LUTs - bitInConfig1 = 8 + (lightIndex & 0x7); - lutIndex = 8u + lightIndex; + lutIndex = 8u + lightID; + lutEnabled = config.lighting.lights[lightIndex].spotAttenuationEnable; } else if (lutID <= 6) { - bitInConfig1 = 16 + lutID; lutIndex = lutID; + lutEnabled = lut.enable; } else { Helpers::warn("Shadergen: Unimplemented LUT value"); } const bool samplerEnabled = isSamplerEnabled(config.lighting.config, lutID); - const u32 config1 = regs[InternalRegs::LightConfig1]; - if (!samplerEnabled || ((config1 >> bitInConfig1) & 1)) { + if (!samplerEnabled || !lutEnabled) { shader += "lut_lookup_result = 1.0;\n"; return; } - static constexpr float scales[] = {1.0f, 2.0f, 4.0f, 8.0f, 0.0f, 0.0f, 0.25f, 0.5f}; - const u32 lutAbs = regs[InternalRegs::LightLUTAbs]; - const u32 lutSelect = regs[InternalRegs::LightLUTSelect]; - const u32 lutScale = regs[InternalRegs::LightLUTScale]; - - // The way these bitfields are encoded is so cursed - float scale = scales[(lutScale >> (4 * lutID)) & 0x7]; - uint inputID = (lutSelect >> (4 * lutID)) & 0x7; - bool absEnabled = ((lutAbs >> (4 * lutID + 1)) & 0x1) == 0; // 0 = enabled... + float scale = lut.scale; + uint inputID = lut.type; + bool absEnabled = lut.absInput; switch (inputID) { case 0: shader += "lut_lookup_delta = dot(normal, normalize(half_vector));\n"; break; case 1: shader += "lut_lookup_delta = dot(normalize(v_view), normalize(half_vector));\n"; break; case 2: shader += "lut_lookup_delta = dot(normal, normalize(v_view));\n"; break; case 3: shader += "lut_lookup_delta = dot(normal, light_vector);\n"; break; - case 4: shader += "lut_lookup_delta = dot(light_vector, lightSources[" + std ::to_string(lightIndex) + "].spotlightDirection);\n"; break; + case 4: shader += "lut_lookup_delta = dot(light_vector, lightSources[" + std ::to_string(lightID) + "].spotlightDirection);\n"; break; default: Helpers::warn("Shadergen: Unimplemented LUT select"); diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 7fca385d..2d39f65f 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -784,37 +784,12 @@ OpenGL::Program& RendererGL::getSpecializedShader() { constexpr uint uboBlockBinding = 2; PICA::FragmentConfig fsConfig(regs); - auto& outConfig = fsConfig.outConfig; - auto& texConfig = fsConfig.texConfig; - - auto alphaTestConfig = regs[InternalRegs::AlphaTestConfig]; - auto alphaTestFunction = Helpers::getBits<4, 3>(alphaTestConfig); - - outConfig.alphaTestFunction = (alphaTestConfig & 1) ? static_cast(alphaTestFunction) : PICA::CompareFunction::Always; - outConfig.depthMapEnable = regs[InternalRegs::DepthmapEnable] & 1; - - texConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg]; - texConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer]; - - // Set up TEV stages. Annoyingly we can't just memcpy as the TEV registers are arranged like - // {Source, Operand, Combiner, Color, Scale} and we want to skip the color register since it's uploaded via UBO -#define setupTevStage(stage) \ - std::memcpy(&texConfig.tevConfigs[stage * 4], ®s[InternalRegs::TexEnv##stage##Source], 3 * sizeof(u32)); \ - texConfig.tevConfigs[stage * 4 + 3] = regs[InternalRegs::TexEnv##stage##Source + 5]; - - setupTevStage(0); - setupTevStage(1); - setupTevStage(2); - setupTevStage(3); - setupTevStage(4); - setupTevStage(5); -#undef setupTevStage CachedProgram& programEntry = shaderCache[fsConfig]; OpenGL::Program& program = programEntry.program; if (!program.exists()) { - std::string fs = fragShaderGen.generate(regs, fsConfig); + std::string fs = fragShaderGen.generate(fsConfig); OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment); program.create({defaultShadergenVs, fragShader}); From 69c79a7f6c8eafe5f042b37de8d750129d7f6869 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sat, 20 Jul 2024 03:40:50 +0300 Subject: [PATCH 110/251] Ubershader: Add lighting shadergen override --- include/config.hpp | 5 +++++ include/renderer.hpp | 5 +++++ include/renderer_gl/renderer_gl.hpp | 4 ++-- src/config.cpp | 5 +++++ src/core/PICA/gpu.cpp | 4 ++++ src/core/renderer_gl/renderer_gl.cpp | 13 +++++++++++++ 6 files changed, 34 insertions(+), 2 deletions(-) diff --git a/include/config.hpp b/include/config.hpp index ed2c270f..a3fc77e4 100644 --- a/include/config.hpp +++ b/include/config.hpp @@ -20,6 +20,11 @@ struct EmulatorConfig { bool discordRpcEnabled = false; bool useUbershaders = ubershaderDefault; bool accurateShaderMul = false; + + // Toggles whether to force shadergen when there's more than N lights active and we're using the ubershader, for better performance + bool forceShadergenForLights = true; + int lightShadergenThreshold = 1; + RendererType rendererType = RendererType::OpenGL; Audio::DSPCore::Type dspType = Audio::DSPCore::Type::Null; diff --git a/include/renderer.hpp b/include/renderer.hpp index e64d49e3..569a730b 100644 --- a/include/renderer.hpp +++ b/include/renderer.hpp @@ -20,6 +20,7 @@ enum class RendererType : s8 { Software = 3, }; +struct EmulatorConfig; class GPU; struct SDL_Window; @@ -46,6 +47,8 @@ class Renderer { u32 outputWindowWidth = 400; u32 outputWindowHeight = 240 * 2; + EmulatorConfig* emulatorConfig = nullptr; + public: Renderer(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs); virtual ~Renderer(); @@ -101,4 +104,6 @@ class Renderer { outputWindowWidth = width; outputWindowHeight = height; } + + void setConfig(EmulatorConfig* config) { emulatorConfig = config; } }; diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index abde96bf..bfa9922b 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -30,7 +30,7 @@ class RendererGL final : public Renderer { OpenGL::VertexArray vao; OpenGL::VertexBuffer vbo; - bool usingUbershader = true; + bool enableUbershader = true; // Data struct { @@ -110,7 +110,7 @@ class RendererGL final : public Renderer { virtual std::string getUbershader() override; virtual void setUbershader(const std::string& shader) override; - virtual void setUbershaderSetting(bool value) override { usingUbershader = value; } + virtual void setUbershaderSetting(bool value) override { enableUbershader = value; } std::optional getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true); diff --git a/src/config.cpp b/src/config.cpp index cc34d148..dae5a0ab 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -64,6 +64,9 @@ void EmulatorConfig::load() { vsyncEnabled = toml::find_or(gpu, "EnableVSync", true); useUbershaders = toml::find_or(gpu, "UseUbershaders", ubershaderDefault); accurateShaderMul = toml::find_or(gpu, "AccurateShaderMultiplication", false); + + forceShadergenForLights = toml::find_or(gpu, "ForceShadergenForLighting", true); + lightShadergenThreshold = toml::find_or(gpu, "ShadergenLightThreshold", 1); } } @@ -130,6 +133,8 @@ void EmulatorConfig::save() { data["GPU"]["EnableVSync"] = vsyncEnabled; data["GPU"]["AccurateShaderMultiplication"] = accurateShaderMul; data["GPU"]["UseUbershaders"] = useUbershaders; + data["GPU"]["ForceShadergenForLighting"] = forceShadergenForLights; + data["GPU"]["ShadergenLightThreshold"] = lightShadergenThreshold; data["Audio"]["DSPEmulation"] = std::string(Audio::DSPCore::typeToString(dspType)); data["Audio"]["EnableAudio"] = audioEnabled; diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index a54fe6eb..ace49fea 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -58,6 +58,10 @@ GPU::GPU(Memory& mem, EmulatorConfig& config) : mem(mem), config(config) { break; } } + + if (renderer != nullptr) { + renderer->setConfig(&config); + } } void GPU::reset() { diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 2d39f65f..22750f7d 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -4,6 +4,7 @@ #include +#include "config.hpp" #include "PICA/float_types.hpp" #include "PICA/pica_frag_uniforms.hpp" #include "PICA/gpu.hpp" @@ -383,6 +384,18 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v OpenGL::Triangle, }; + bool usingUbershader = enableUbershader; + if (usingUbershader) { + const bool lightsEnabled = (regs[InternalRegs::LightingEnable] & 1) != 0; + const uint lightCount = (regs[InternalRegs::LightNumber] & 0x7) + 1; + + // Emulating lights in the ubershader is incredibly slow, so we've got an option to render draws using moret han N lights via shadergen + // This way we generate fewer shaders overall than with full shadergen, but don't tank performance + if (emulatorConfig->forceShadergenForLights && lightsEnabled && lightCount >= emulatorConfig->lightShadergenThreshold) { + usingUbershader = false; + } + } + if (usingUbershader) { gl.useProgram(triangleProgram); } else { From 8091e44206615bb0d63000c3d27e8811f0b71f8f Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sat, 20 Jul 2024 03:48:48 +0300 Subject: [PATCH 111/251] Add shadergen lighting override options to LR core --- src/libretro_core.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/libretro_core.cpp b/src/libretro_core.cpp index b48e937a..fc3e53b3 100644 --- a/src/libretro_core.cpp +++ b/src/libretro_core.cpp @@ -155,6 +155,8 @@ static void configInit() { {"panda3ds_write_protect_virtual_sd", "Write protect virtual SD card; disabled|enabled"}, {"panda3ds_battery_level", "Battery percentage; 5|10|20|30|50|70|90|100"}, {"panda3ds_use_charger", "Charger plugged; enabled|disabled"}, + {"panda3ds_ubershader_lighting_override", "Force shadergen when rendering lights; enabled|disabled"}, + {"panda3ds_ubershader_lighting_override_threshold", "Light threshold for forcing shadergen; 1|2|3|4|5|6|7|8"}, {nullptr, nullptr}, }; @@ -175,6 +177,8 @@ static void configUpdate() { config.sdWriteProtected = FetchVariableBool("panda3ds_write_protect_virtual_sd", false); config.accurateShaderMul = FetchVariableBool("panda3ds_accurate_shader_mul", false); config.useUbershaders = FetchVariableBool("panda3ds_use_ubershader", true); + config.forceShadergenForLights = FetchVariableBool("panda3ds_ubershader_lighting_override", true); + config.lightShadergenThreshold = std::clamp(std::stoi(FetchVariable("panda3ds_ubershader_lighting_override_threshold", "1")), 1, 8); config.discordRpcEnabled = false; config.save(); From 4214d9bce4944ea7d004e38e271b2bf67b86228d Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sat, 20 Jul 2024 17:45:14 +0300 Subject: [PATCH 112/251] Adjust ubershader defaults --- include/config.hpp | 7 ++++++- src/libretro_core.cpp | 3 ++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/include/config.hpp b/include/config.hpp index a3fc77e4..25f352e8 100644 --- a/include/config.hpp +++ b/include/config.hpp @@ -13,8 +13,13 @@ struct EmulatorConfig { static constexpr bool shaderJitDefault = false; #endif - // For now, use specialized shaders by default + // For now, use specialized shaders by default on MacOS as M1 drivers are buggy when using the ubershader, and on Android since mobile GPUs are + // horrible On other platforms we default to ubershader + shadergen fallback for lights +#if defined(__ANDROID__) || defined(__APPLE__) static constexpr bool ubershaderDefault = false; +#else + static constexpr bool ubershaderDefault = true; +#endif bool shaderJitEnabled = shaderJitDefault; bool discordRpcEnabled = false; diff --git a/src/libretro_core.cpp b/src/libretro_core.cpp index fc3e53b3..02bf3cd1 100644 --- a/src/libretro_core.cpp +++ b/src/libretro_core.cpp @@ -147,7 +147,8 @@ static void configInit() { static const retro_variable values[] = { {"panda3ds_use_shader_jit", "Enable shader JIT; enabled|disabled"}, {"panda3ds_accurate_shader_mul", "Enable accurate shader multiplication; disabled|enabled"}, - {"panda3ds_use_ubershader", "Use ubershaders (No stutter, maybe slower); disabled|enabled"}, + {"panda3ds_use_ubershader", EmulatorConfig::ubershaderDefault ? "Use ubershaders (No stutter, maybe slower); enabled|disabled" + : "Use ubershaders (No stutter, maybe slower); disabled|enabled"}, {"panda3ds_use_vsync", "Enable VSync; enabled|disabled"}, {"panda3ds_dsp_emulation", "DSP emulation; Null|HLE|LLE"}, {"panda3ds_use_audio", "Enable audio; disabled|enabled"}, From 5c40fb0cbf683a586d92e43da5002a6c5b3e580a Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sat, 20 Jul 2024 18:37:35 +0300 Subject: [PATCH 113/251] Fix oopsie --- include/config.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/config.hpp b/include/config.hpp index 25f352e8..52be1af7 100644 --- a/include/config.hpp +++ b/include/config.hpp @@ -14,7 +14,7 @@ struct EmulatorConfig { #endif // For now, use specialized shaders by default on MacOS as M1 drivers are buggy when using the ubershader, and on Android since mobile GPUs are - // horrible On other platforms we default to ubershader + shadergen fallback for lights + // horrible. On other platforms we default to ubershader + shadergen fallback for lights #if defined(__ANDROID__) || defined(__APPLE__) static constexpr bool ubershaderDefault = false; #else @@ -51,4 +51,4 @@ struct EmulatorConfig { EmulatorConfig(const std::filesystem::path& path); void load(); void save(); -}; \ No newline at end of file +}; From f219432c6ac3926a2eb2accea6ea2797940c3af8 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sat, 20 Jul 2024 23:18:52 +0300 Subject: [PATCH 114/251] Renderer GL: Don't leak shader/UBO handles --- include/renderer_gl/renderer_gl.hpp | 1 + src/core/renderer_gl/renderer_gl.cpp | 23 ++++++++++++++--------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index bfa9922b..d00445ac 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -117,6 +117,7 @@ class RendererGL final : public Renderer { // Note: The caller is responsible for deleting the currently bound FBO before calling this void setFBO(uint handle) { screenFramebuffer.m_handle = handle; } void resetStateManager() { gl.reset(); } + void clearShaderCache(); void initUbershader(OpenGL::Program& program); #ifdef PANDA3DS_FRONTEND_QT diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 22750f7d..36827027 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -24,10 +24,7 @@ void RendererGL::reset() { colourBufferCache.reset(); textureCache.reset(); - for (auto& shader : shaderCache) { - shader.second.program.free(); - } - shaderCache.clear(); + clearShaderCache(); // Init the colour/depth buffer settings to some random defaults on reset colourBufferLoc = 0; @@ -808,6 +805,8 @@ OpenGL::Program& RendererGL::getSpecializedShader() { program.create({defaultShadergenVs, fragShader}); gl.useProgram(program); + fragShader.free(); + // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0); glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1); @@ -937,16 +936,22 @@ void RendererGL::screenshot(const std::string& name) { stbi_write_png(name.c_str(), width, height, 4, flippedPixels.data(), 0); } +void RendererGL::clearShaderCache() { + for (auto& shader : shaderCache) { + CachedProgram& cachedProgram = shader.second; + cachedProgram.program.free(); + glDeleteBuffers(1, &cachedProgram.uboBinding); + } + + shaderCache.clear(); +} + void RendererGL::deinitGraphicsContext() { // Invalidate all surface caches since they'll no longer be valid textureCache.reset(); depthBufferCache.reset(); colourBufferCache.reset(); - - for (auto& shader : shaderCache) { - shader.second.program.free(); - } - shaderCache.clear(); + clearShaderCache(); // All other GL objects should be invalidated automatically and be recreated by the next call to initGraphicsContext // TODO: Make it so that depth and colour buffers get written back to 3DS memory From 8611e98b92f8c60ae634d5bf890e8e2dc523f402 Mon Sep 17 00:00:00 2001 From: Jonian Guveli Date: Sat, 20 Jul 2024 23:21:00 +0300 Subject: [PATCH 115/251] Libretro: Add support for touch input --- src/libretro_core.cpp | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/libretro_core.cpp b/src/libretro_core.cpp index 02bf3cd1..3e0436b8 100644 --- a/src/libretro_core.cpp +++ b/src/libretro_core.cpp @@ -15,6 +15,8 @@ static retro_input_state_t inputStateCallback; static retro_hw_render_callback hw_render; static std::filesystem::path savePath; +static bool screenTouched; + std::unique_ptr emulator; RendererGL* renderer; @@ -314,7 +316,8 @@ void retro_run() { hid.setCirclepadX((xLeft / +32767) * 0x9C); hid.setCirclepadY((yLeft / -32767) * 0x9C); - bool touch = inputStateCallback(0, RETRO_DEVICE_MOUSE, 0, RETRO_DEVICE_ID_MOUSE_LEFT); + bool touchScreen = false; + const int posX = inputStateCallback(0, RETRO_DEVICE_POINTER, 0, RETRO_DEVICE_ID_POINTER_X); const int posY = inputStateCallback(0, RETRO_DEVICE_POINTER, 0, RETRO_DEVICE_ID_POINTER_Y); @@ -324,16 +327,23 @@ void retro_run() { const int offsetX = 40; const int offsetY = emulator->height / 2; - const bool inScreenX = newX >= offsetX && newX < emulator->width - offsetX; + const bool inScreenX = newX >= offsetX && newX <= emulator->width - offsetX; const bool inScreenY = newY >= offsetY && newY <= emulator->height; - if (touch && inScreenX && inScreenY) { + if (inScreenX && inScreenY) { + touchScreen |= inputStateCallback(0, RETRO_DEVICE_MOUSE, 0, RETRO_DEVICE_ID_MOUSE_LEFT); + touchScreen |= inputStateCallback(0, RETRO_DEVICE_POINTER, 0, RETRO_DEVICE_ID_POINTER_PRESSED); + } + + if (touchScreen) { u16 x = static_cast(newX - offsetX); u16 y = static_cast(newY - offsetY); hid.setTouchScreenPress(x, y); - } else { + screenTouched = true; + } else if (screenTouched) { hid.releaseTouchScreen(); + screenTouched = false; } hid.updateInputs(emulator->getTicks()); From 8b26e1e3fcf9d7b42d8917b3a75acb121210709b Mon Sep 17 00:00:00 2001 From: offtkp Date: Sun, 21 Jul 2024 15:42:12 +0300 Subject: [PATCH 116/251] Fix shadowed variable in ubershader --- src/host_shaders/opengl_fragment_shader.frag | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag index 9f369e39..48b55a4c 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -307,8 +307,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { primary_color = vec4(vec3(0.0), 1.0); secondary_color = vec4(vec3(0.0), 1.0); - uint GPUREG_LIGHTING_LUTINPUT_SCALE = readPicaReg(0x01D2u); uint GPUREG_LIGHTING_CONFIG0 = readPicaReg(0x01C3u); + GPUREG_LIGHTING_LUTINPUT_SCALE = readPicaReg(0x01D2u); GPUREG_LIGHTING_CONFIG1 = readPicaReg(0x01C4u); GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u); GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u); From 2a6cd3c5ea29cd39ad1719e64fe65ea96a94ccbc Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 21 Jul 2024 16:02:22 +0300 Subject: [PATCH 117/251] Separate graphics API/Language types from the fragment recompiler --- CMakeLists.txt | 2 +- include/PICA/shader_gen.hpp | 7 +------ include/PICA/shader_gen_types.hpp | 9 +++++++++ 3 files changed, 11 insertions(+), 7 deletions(-) create mode 100644 include/PICA/shader_gen_types.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index c52ccd51..fdfe8a4a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -249,7 +249,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/audio/dsp_core.hpp include/audio/null_core.hpp include/audio/teakra_core.hpp include/audio/miniaudio_device.hpp include/ring_buffer.hpp include/bitfield.hpp include/audio/dsp_shared_mem.hpp include/audio/hle_core.hpp include/capstone.hpp include/audio/aac.hpp include/PICA/pica_frag_config.hpp - include/PICA/pica_frag_uniforms.hpp + include/PICA/pica_frag_uniforms.hpp include/PICA/shader_gen_types.hpp ) cmrc_add_resource_library( diff --git a/include/PICA/shader_gen.hpp b/include/PICA/shader_gen.hpp index 6cf810a0..085d990a 100644 --- a/include/PICA/shader_gen.hpp +++ b/include/PICA/shader_gen.hpp @@ -4,15 +4,10 @@ #include "PICA/gpu.hpp" #include "PICA/pica_frag_config.hpp" #include "PICA/regs.hpp" +#include "PICA/shader_gen_types.hpp" #include "helpers.hpp" namespace PICA::ShaderGen { - // Graphics API this shader is targetting - enum class API { GL, GLES, Vulkan }; - - // Shading language to use (Only GLSL for the time being) - enum class Language { GLSL }; - class FragmentGenerator { API api; Language language; diff --git a/include/PICA/shader_gen_types.hpp b/include/PICA/shader_gen_types.hpp new file mode 100644 index 00000000..1877227f --- /dev/null +++ b/include/PICA/shader_gen_types.hpp @@ -0,0 +1,9 @@ +#pragma once + +namespace PICA::ShaderGen { + // Graphics API this shader is targetting + enum class API { GL, GLES, Vulkan }; + + // Shading language to use (Only GLSL for the time being) + enum class Language { GLSL }; +} // namespace PICA::ShaderGen \ No newline at end of file From be1c801fc24467cfefdc9e8f371418bca8269adb Mon Sep 17 00:00:00 2001 From: offtkp Date: Sun, 21 Jul 2024 16:37:37 +0300 Subject: [PATCH 118/251] Fix hashing for FragmentConfig --- include/PICA/pica_frag_config.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/PICA/pica_frag_config.hpp b/include/PICA/pica_frag_config.hpp index 89dd3420..ee18eee0 100644 --- a/include/PICA/pica_frag_config.hpp +++ b/include/PICA/pica_frag_config.hpp @@ -12,11 +12,11 @@ namespace PICA { struct OutputConfig { union { - u32 raw; + u32 raw{}; // Merge the enable + compare function into 1 field to avoid duplicate shaders // enable == off means a CompareFunction of Always BitField<0, 3, CompareFunction> alphaTestFunction; - BitField<4, 1, u32> depthMapEnable; + BitField<3, 1, u32> depthMapEnable; }; }; From b333bf8a0c66e80a7b0f622ad123eeb8b6f27428 Mon Sep 17 00:00:00 2001 From: offtkp Date: Sun, 21 Jul 2024 17:28:40 +0300 Subject: [PATCH 119/251] Use u32 for scale instead of float in FragmentConfig --- include/PICA/pica_frag_config.hpp | 17 ++++++++--------- src/core/PICA/shader_gen_glsl.cpp | 13 +++++++++---- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/include/PICA/pica_frag_config.hpp b/include/PICA/pica_frag_config.hpp index ee18eee0..f4142ef1 100644 --- a/include/PICA/pica_frag_config.hpp +++ b/include/PICA/pica_frag_config.hpp @@ -49,8 +49,8 @@ namespace PICA { BitField<0, 1, u32> enable; BitField<1, 1, u32> absInput; BitField<2, 3, u32> type; + BitField<5, 3, u32> scale; }; - float scale; }; struct LightingConfig { @@ -142,46 +142,45 @@ namespace PICA { const u32 lutAbs = regs[InternalRegs::LightLUTAbs]; const u32 lutSelect = regs[InternalRegs::LightLUTSelect]; const u32 lutScale = regs[InternalRegs::LightLUTScale]; - static constexpr float scales[] = {1.0f, 2.0f, 4.0f, 8.0f, 0.0f, 0.0f, 0.25f, 0.5f}; if (d0.enable) { d0.absInput = Helpers::getBit<1>(lutAbs) == 0; d0.type = Helpers::getBits<0, 3>(lutSelect); - d0.scale = scales[Helpers::getBits<0, 3>(lutScale)]; + d0.scale = Helpers::getBits<0, 3>(lutScale); } if (d1.enable) { d1.absInput = Helpers::getBit<5>(lutAbs) == 0; d1.type = Helpers::getBits<4, 3>(lutSelect); - d1.scale = scales[Helpers::getBits<4, 3>(lutScale)]; + d1.scale = Helpers::getBits<4, 3>(lutScale); } sp.absInput = Helpers::getBit<9>(lutAbs) == 0; sp.type = Helpers::getBits<8, 3>(lutSelect); - sp.scale = scales[Helpers::getBits<8, 3>(lutScale)]; + sp.scale = Helpers::getBits<8, 3>(lutScale); if (fr.enable) { fr.absInput = Helpers::getBit<13>(lutAbs) == 0; fr.type = Helpers::getBits<12, 3>(lutSelect); - fr.scale = scales[Helpers::getBits<12, 3>(lutScale)]; + fr.scale = Helpers::getBits<12, 3>(lutScale); } if (rb.enable) { rb.absInput = Helpers::getBit<17>(lutAbs) == 0; rb.type = Helpers::getBits<16, 3>(lutSelect); - rb.scale = scales[Helpers::getBits<16, 3>(lutScale)]; + rb.scale = Helpers::getBits<16, 3>(lutScale); } if (rg.enable) { rg.absInput = Helpers::getBit<21>(lutAbs) == 0; rg.type = Helpers::getBits<20, 3>(lutSelect); - rg.scale = scales[Helpers::getBits<20, 3>(lutScale)]; + rg.scale = Helpers::getBits<20, 3>(lutScale); } if (rr.enable) { rr.absInput = Helpers::getBit<25>(lutAbs) == 0; rr.type = Helpers::getBits<24, 3>(lutSelect); - rr.scale = scales[Helpers::getBits<24, 3>(lutScale)]; + rr.scale = Helpers::getBits<24, 3>(lutScale); } } }; diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 47df58b8..3d688bd2 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -617,7 +617,7 @@ void FragmentGenerator::compileLUTLookup(std::string& shader, const PICA::Fragme return; } - float scale = lut.scale; + uint scale = lut.scale; uint inputID = lut.type; bool absEnabled = lut.absInput; @@ -634,17 +634,22 @@ void FragmentGenerator::compileLUTLookup(std::string& shader, const PICA::Fragme break; } + static constexpr float scales[] = {1.0f, 2.0f, 4.0f, 8.0f, 0.0f, 0.0f, 0.25f, 0.5f}; + if (absEnabled) { bool twoSidedDiffuse = config.lighting.lights[lightIndex].twoSidedDiffuse; shader += twoSidedDiffuse ? "lut_lookup_delta = abs(lut_lookup_delta);\n" : "lut_lookup_delta = max(lut_lookup_delta, 0.0);\n"; shader += "lut_lookup_result = lutLookup(" + std::to_string(lutIndex) + ", int(clamp(floor(lut_lookup_delta * 256.0), 0.0, 255.0)));\n"; - if (scale != 1.0) { - shader += "lut_lookup_result *= " + std::to_string(scale) + ";\n"; + if (scale != 0) { + shader += "lut_lookup_result *= " + std::to_string(scales[scale]) + ";\n"; } } else { // Range is [-1, 1] so we need to map it to [0, 1] shader += "lut_lookup_index = int(clamp(floor(lut_lookup_delta * 128.0), -128.f, 127.f));\n"; shader += "if (lut_lookup_index < 0) lut_lookup_index += 256;\n"; - shader += "lut_lookup_result = lutLookup(" + std::to_string(lutIndex) + ", lut_lookup_index) *" + std::to_string(scale) + ";\n"; + shader += "lut_lookup_result = lutLookup(" + std::to_string(lutIndex) + ", lut_lookup_index);\n"; + if (scale != 0) { + shader += "lut_lookup_result *= " + std::to_string(scales[scale]) + ";\n"; + } } } \ No newline at end of file From 4176a1925623252200804ff3ffe00f4dc8d3da09 Mon Sep 17 00:00:00 2001 From: offtkp Date: Sun, 21 Jul 2024 03:16:15 +0300 Subject: [PATCH 120/251] Fog in ubershader --- docs/3ds/lighting.md | 2 +- include/PICA/gpu.hpp | 3 + include/PICA/regs.hpp | 12 ++++ include/renderer_gl/renderer_gl.hpp | 3 +- src/core/PICA/gpu.cpp | 3 + src/core/PICA/regs.cpp | 15 +++++ src/core/PICA/shader_gen_glsl.cpp | 4 +- src/core/renderer_gl/renderer_gl.cpp | 58 +++++++++++++++----- src/host_shaders/opengl_fragment_shader.frag | 30 +++++++++- 9 files changed, 110 insertions(+), 20 deletions(-) diff --git a/docs/3ds/lighting.md b/docs/3ds/lighting.md index 9f4ff2f2..8b6b9885 100644 --- a/docs/3ds/lighting.md +++ b/docs/3ds/lighting.md @@ -56,7 +56,7 @@ lut_id is one of these values 6 RR lut_index on the other hand represents the actual index of the LUT in the texture -u_tex_lighting_lut has 24 LUTs and they are used like so: +u_tex_luts has 24 LUTs for lighting and they are used like so: 0 D0 1 D1 2 is missing because SP uses LUTs 8-15 diff --git a/include/PICA/gpu.hpp b/include/PICA/gpu.hpp index 61020f76..1e37729b 100644 --- a/include/PICA/gpu.hpp +++ b/include/PICA/gpu.hpp @@ -92,6 +92,9 @@ class GPU { // Set to false by the renderer when the lighting_lut is uploaded ot the GPU bool lightingLUTDirty = false; + std::array fogLUT; + bool fogLUTDirty = false; + GPU(Memory& mem, EmulatorConfig& config); void display() { renderer->display(); } void screenshot(const std::string& name) { renderer->screenshot(name); } diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index c4d6a5fb..c66c90ca 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -51,6 +51,18 @@ namespace PICA { #undef defineTexEnv // clang-format on + // Fog registers + FogColor = 0xE1, + FogLUTIndex = 0xE6, + FogLUTData0 = 0xE8, + FogLUTData1 = 0xE9, + FogLUTData2 = 0xEA, + FogLUTData3 = 0xEB, + FogLUTData4 = 0xEC, + FogLUTData5 = 0xED, + FogLUTData6 = 0xEE, + FogLUTData7 = 0xEF, + // Framebuffer registers ColourOperation = 0x100, BlendFunc = 0x101, diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index d00445ac..f5a964a3 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -63,7 +63,7 @@ class RendererGL final : public Renderer { OpenGL::VertexBuffer dummyVBO; OpenGL::Texture screenTexture; - OpenGL::Texture lightLUTTexture; + OpenGL::Texture LUTTexture; OpenGL::Framebuffer screenFramebuffer; OpenGL::Texture blankTexture; // The "default" vertex shader to use when using specialized shaders but not PICA vertex shader -> GLSL recompilation @@ -90,6 +90,7 @@ class RendererGL final : public Renderer { void setupUbershaderTexEnv(); void bindTexturesToSlots(); void updateLightingLUT(); + void updateFogLUT(); void initGraphicsContextInternal(); public: diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index ace49fea..fe336edc 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -74,6 +74,9 @@ void GPU::reset() { lightingLUT.fill(0); lightingLUTDirty = true; + fogLUT.fill(0); + fogLUTDirty = true; + totalAttribCount = 0; fixedAttribMask = 0; fixedAttribIndex = 0; diff --git a/src/core/PICA/regs.cpp b/src/core/PICA/regs.cpp index baaa2256..45e624ec 100644 --- a/src/core/PICA/regs.cpp +++ b/src/core/PICA/regs.cpp @@ -135,6 +135,21 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { break; } + case FogLUTData0: + case FogLUTData1: + case FogLUTData2: + case FogLUTData3: + case FogLUTData4: + case FogLUTData5: + case FogLUTData6: + case FogLUTData7: { + const uint32_t index = regs[FogLUTIndex] & 127; + fogLUT[index] = value; + fogLUTDirty = true; + regs[FogLUTIndex] = (index + 1) & 127; + break; + } + case LightingLUTData0: case LightingLUTData1: case LightingLUTData2: diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 3d688bd2..01210587 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -130,7 +130,7 @@ std::string FragmentGenerator::generate(const FragmentConfig& config) { uniform sampler2D u_tex0; uniform sampler2D u_tex1; uniform sampler2D u_tex2; - uniform sampler2D u_tex_lighting_lut; + uniform sampler2D u_tex_luts; )"; ret += uniformDefinition; @@ -144,7 +144,7 @@ std::string FragmentGenerator::generate(const FragmentConfig& config) { } float lutLookup(uint lut, int index) { - return texelFetch(u_tex_lighting_lut, ivec2(index, int(lut)), 0).r; + return texelFetch(u_tex_luts, ivec2(index, int(lut)), 0).r; } vec3 regToColor(uint reg) { diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 36827027..b6c90374 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -115,10 +115,11 @@ void RendererGL::initGraphicsContextInternal() { const u32 screenTextureWidth = 400; // Top screen is 400 pixels wide, bottom is 320 const u32 screenTextureHeight = 2 * 240; // Both screens are 240 pixels tall - lightLUTTexture.create(256, Lights::LUT_Count, GL_R32F); - lightLUTTexture.bind(); - lightLUTTexture.setMinFilter(OpenGL::Linear); - lightLUTTexture.setMagFilter(OpenGL::Linear); + // 24 rows for light, 1 for fog + LUTTexture.create(256, Lights::LUT_Count + 1, GL_RG32F); + LUTTexture.bind(); + LUTTexture.setMinFilter(OpenGL::Linear); + LUTTexture.setMagFilter(OpenGL::Linear); auto prevTexture = OpenGL::getTex2D(); @@ -353,22 +354,49 @@ void RendererGL::bindTexturesToSlots() { } glActiveTexture(GL_TEXTURE0 + 3); - lightLUTTexture.bind(); + LUTTexture.bind(); glActiveTexture(GL_TEXTURE0); } void RendererGL::updateLightingLUT() { gpu.lightingLUTDirty = false; - std::array lightingLut; + std::array lightingLut; - for (int i = 0; i < gpu.lightingLUT.size(); i++) { - uint64_t value = gpu.lightingLUT[i] & 0xFFF; + for (int i = 0; i < lightingLut.size(); i += 2) { + uint64_t value = gpu.lightingLUT[i >> 1] & 0xFFF; lightingLut[i] = (float)(value << 4) / 65535.0f; } glActiveTexture(GL_TEXTURE0 + 3); - lightLUTTexture.bind(); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, Lights::LUT_Count, GL_RED, GL_FLOAT, lightingLut.data()); + LUTTexture.bind(); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, Lights::LUT_Count, GL_RG, GL_FLOAT, lightingLut.data()); + glActiveTexture(GL_TEXTURE0); +} + +void RendererGL::updateFogLUT() { + gpu.fogLUTDirty = false; + + // Fog LUT elements are of this type: + // 0-12 fixed1.1.11, Difference from next element + // 13-23 fixed0.0.11, Value + // We will store them as a 128x1 RG texture with R being the value and G being the difference + std::array fogLut; + + for (int i = 0; i < fogLut.size(); i += 2) { + const uint32_t value = gpu.fogLUT[i >> 1]; + int32_t diff = value & 0x1fff; + diff = (diff << 19) >> 19; // Sign extend the 13-bit value to 32 bits + const float fogDifference = float(diff) / 2048.0f; + const float fogValue = float((value >> 13) & 0x7ff) / 2048.0f; + + fogLut[i] = fogValue; + fogLut[i + 1] = fogDifference; + } + + glActiveTexture(GL_TEXTURE0 + 3); + LUTTexture.bind(); + // The fog LUT exists at the end of the lighting LUT + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, Lights::LUT_Count, 128, 1, GL_RG, GL_FLOAT, fogLut.data()); glActiveTexture(GL_TEXTURE0); } @@ -453,6 +481,10 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v bindTexturesToSlots(); + if (gpu.fogLUTDirty) { + updateFogLUT(); + } + if (gpu.lightingLUTDirty) { updateLightingLUT(); } @@ -811,7 +843,7 @@ OpenGL::Program& RendererGL::getSpecializedShader() { glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0); glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1); glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2); - glUniform1i(OpenGL::uniformLocation(program, "u_tex_lighting_lut"), 3); + glUniform1i(OpenGL::uniformLocation(program, "u_tex_luts"), 3); // Allocate memory for the program UBO glGenBuffers(1, &programEntry.uboBinding); @@ -994,9 +1026,9 @@ void RendererGL::initUbershader(OpenGL::Program& program) { ubershaderData.depthmapEnableLoc = OpenGL::uniformLocation(program, "u_depthmapEnable"); ubershaderData.picaRegLoc = OpenGL::uniformLocation(program, "u_picaRegs"); - // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 + // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, light maps go in TU 3, and the fog map goes in TU 4 glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0); glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1); glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2); - glUniform1i(OpenGL::uniformLocation(program, "u_tex_lighting_lut"), 3); + glUniform1i(OpenGL::uniformLocation(program, "u_tex_luts"), 3); } diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag index 48b55a4c..b9f9fe4c 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -25,7 +25,7 @@ uniform bool u_depthmapEnable; uniform sampler2D u_tex0; uniform sampler2D u_tex1; uniform sampler2D u_tex2; -uniform sampler2D u_tex_lighting_lut; +uniform sampler2D u_tex_luts; uniform uint u_picaRegs[0x200 - 0x48]; @@ -152,6 +152,8 @@ vec4 tevCalculateCombiner(int tev_id) { #define RG_LUT 5u #define RR_LUT 6u +#define FOG_INDEX 24 + uint GPUREG_LIGHTi_CONFIG; uint GPUREG_LIGHTING_CONFIG1; uint GPUREG_LIGHTING_LUTINPUT_SELECT; @@ -161,7 +163,7 @@ bool error_unimpl = false; vec4 unimpl_color = vec4(1.0, 0.0, 1.0, 1.0); float lutLookup(uint lut, int index) { - return texelFetch(u_tex_lighting_lut, ivec2(index, int(lut)), 0).r; + return texelFetch(u_tex_luts, ivec2(index, int(lut)), 0).r; } vec3 regToColor(uint reg) { @@ -494,7 +496,7 @@ void main() { if (tevUnimplementedSourceFlag) { // fragColour = vec4(1.0, 0.0, 1.0, 1.0); } - // fragColour.rg = texture(u_tex_lighting_lut,vec2(gl_FragCoord.x/200.,float(int(gl_FragCoord.y/2)%24))).rr; + // fragColour.rg = texture(u_tex_luts,vec2(gl_FragCoord.x/200.,float(int(gl_FragCoord.y/2)%24))).rr; // Get original depth value by converting from [near, far] = [0, 1] to [-1, 1] // We do this by converting to [0, 2] first and subtracting 1 to go to [-1, 1] @@ -507,6 +509,28 @@ void main() { // Write final fragment depth gl_FragDepth = depth; + bool enable_fog = (textureEnvUpdateBuffer & 7u) == 5u; + + if (enable_fog) { + bool flip_depth = (textureEnvUpdateBuffer & (1u << 16)) != 0u; + float fog_index = flip_depth ? 1.0 - depth : depth; + fog_index *= 128.0; + float clamped_index = clamp(floor(fog_index), 0.0, 127.0); + float delta = fog_index - clamped_index; + vec2 value = texelFetch(u_tex_luts, ivec2(int(clamped_index), FOG_INDEX), 0).rg; + float fog_factor = clamp(value.r + value.g * delta, 0.0, 1.0); + + uint GPUREG_FOG_COLOR = readPicaReg(0x00E1u); + + // Annoyingly color is not encoded in the same way as light color + float r = (GPUREG_FOG_COLOR & 0xFFu) / 255.0; + float g = ((GPUREG_FOG_COLOR >> 8) & 0xFFu) / 255.0; + float b = ((GPUREG_FOG_COLOR >> 16) & 0xFFu) / 255.0; + vec3 fog_color = vec3(r, g, b); + + fragColour.rgb = mix(fog_color, fragColour.rgb, fog_factor); + } + // Perform alpha test uint alphaControl = readPicaReg(0x104u); if ((alphaControl & 1u) != 0u) { // Check if alpha test is on From b90c15919b0334a34c80b7f52d29f060699a54a3 Mon Sep 17 00:00:00 2001 From: offtkp Date: Sun, 21 Jul 2024 16:32:45 +0300 Subject: [PATCH 121/251] Shadergen fog --- include/PICA/pica_frag_config.hpp | 21 ++++++++++++++++++++- include/PICA/regs.hpp | 6 ++++++ include/PICA/shader_gen.hpp | 2 ++ src/core/PICA/shader_gen_glsl.cpp | 25 +++++++++++++++++++++++++ 4 files changed, 53 insertions(+), 1 deletion(-) diff --git a/include/PICA/pica_frag_config.hpp b/include/PICA/pica_frag_config.hpp index f4142ef1..32fa7aa6 100644 --- a/include/PICA/pica_frag_config.hpp +++ b/include/PICA/pica_frag_config.hpp @@ -29,6 +29,18 @@ namespace PICA { std::array tevConfigs; }; + struct FogConfig { + union { + u32 raw{}; + + BitField<0, 3, FogMode> mode; + BitField<3, 1, u32> flipDepth; + BitField<8, 8, u32> fogColorR; + BitField<16, 8, u32> fogColorG; + BitField<24, 8, u32> fogColorB; + }; + }; + struct Light { union { u16 raw; @@ -189,6 +201,7 @@ namespace PICA { struct FragmentConfig { OutputConfig outConfig; TextureConfig texConfig; + FogConfig fogConfig; LightingConfig lighting; bool operator==(const FragmentConfig& config) const { @@ -220,12 +233,18 @@ namespace PICA { setupTevStage(4); setupTevStage(5); #undef setupTevStage + + fogConfig.mode = (FogMode)Helpers::getBits<0, 3>(regs[InternalRegs::TexEnvUpdateBuffer]); + fogConfig.flipDepth = Helpers::getBit<16>(regs[InternalRegs::TexEnvUpdateBuffer]); + fogConfig.fogColorR = Helpers::getBits<0, 8>(regs[InternalRegs::FogColor]); + fogConfig.fogColorG = Helpers::getBits<8, 8>(regs[InternalRegs::FogColor]); + fogConfig.fogColorB = Helpers::getBits<16, 8>(regs[InternalRegs::FogColor]); } }; static_assert( std::has_unique_object_representations() && std::has_unique_object_representations() && - std::has_unique_object_representations() + std::has_unique_object_representations() && std::has_unique_object_representations() ); } // namespace PICA diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index c66c90ca..636e8f7c 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -396,6 +396,12 @@ namespace PICA { GreaterOrEqual = 7, }; + enum class FogMode : u32 { + Disabled = 0, + Fog = 5, + Gas = 7, + }; + struct TexEnvConfig { enum class Source : u8 { PrimaryColor = 0x0, diff --git a/include/PICA/shader_gen.hpp b/include/PICA/shader_gen.hpp index 085d990a..215e5adb 100644 --- a/include/PICA/shader_gen.hpp +++ b/include/PICA/shader_gen.hpp @@ -24,6 +24,8 @@ namespace PICA::ShaderGen { void compileLUTLookup(std::string& shader, const PICA::FragmentConfig& config, u32 lightIndex, u32 lutID); bool isSamplerEnabled(u32 environmentID, u32 lutID); + void compileFog(std::string& shader, const PICA::FragmentConfig& config); + public: FragmentGenerator(API api, Language language) : api(api), language(language) {} std::string generate(const PICA::FragmentConfig& config); diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 01210587..9802be90 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -194,6 +194,8 @@ std::string FragmentGenerator::generate(const FragmentConfig& config) { compileTEV(ret, i, config); } + compileFog(ret, config); + applyAlphaTest(ret, config); ret += "fragColor = combinerOutput;\n}"; // End of main function @@ -652,4 +654,27 @@ void FragmentGenerator::compileLUTLookup(std::string& shader, const PICA::Fragme shader += "lut_lookup_result *= " + std::to_string(scales[scale]) + ";\n"; } } +} + +void FragmentGenerator::compileFog(std::string& shader, const PICA::FragmentConfig& config) { + if (config.fogConfig.mode != FogMode::Fog) { + return; + } + + float r = config.fogConfig.fogColorR / 255.0f; + float g = config.fogConfig.fogColorG / 255.0f; + float b = config.fogConfig.fogColorB / 255.0f; + + if (config.fogConfig.flipDepth) { + shader += "float fog_index = (1.0 - depth) * 128.0;\n"; + } else { + shader += "float fog_index = depth * 128.0;\n"; + } + + shader += "float clamped_index = clamp(floor(fog_index), 0.0, 127.0);"; + shader += "float delta = fog_index - clamped_index;"; + shader += "vec3 fog_color = vec3(" + std::to_string(r) + ", " + std::to_string(g) + ", " + std::to_string(b) + ");"; + shader += "vec2 value = texelFetch(u_tex_luts, ivec2(int(clamped_index), 24), 0).rg;"; // fog LUT is past the light LUTs + shader += "float fog_factor = clamp(value.r + value.g * delta, 0.0, 1.0);"; + shader += "combinerOutput.rgb = mix(fog_color, combinerOutput.rgb, fog_factor);"; } \ No newline at end of file From 82df95cf88a066be296acd3e21f45e511c474f24 Mon Sep 17 00:00:00 2001 From: offtkp Date: Sun, 21 Jul 2024 17:40:43 +0300 Subject: [PATCH 122/251] Update gles.patch --- .github/gles.patch | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/gles.patch b/.github/gles.patch index 270e336e..5a922fcf 100644 --- a/.github/gles.patch +++ b/.github/gles.patch @@ -21,7 +21,7 @@ index 990e2f80..2e7842ac 100644 void main() { diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag -index 9f369e39..b4bb19d3 100644 +index b9f9fe4c..f1cf286f 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -1,4 +1,5 @@ @@ -31,8 +31,8 @@ index 9f369e39..b4bb19d3 100644 in vec4 v_quaternion; in vec4 v_colour; -@@ -164,11 +165,17 @@ float lutLookup(uint lut, int index) { - return texelFetch(u_tex_lighting_lut, ivec2(index, int(lut)), 0).r; +@@ -166,11 +167,17 @@ float lutLookup(uint lut, int index) { + return texelFetch(u_tex_luts, ivec2(index, int(lut)), 0).r; } +// some gles versions have bitfieldExtractCompat and complain if you redefine it, some don't and compile error, using this instead @@ -50,7 +50,7 @@ index 9f369e39..b4bb19d3 100644 } // Convert an arbitrary-width floating point literal to an f32 -@@ -208,16 +215,16 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light +@@ -210,16 +217,16 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment @@ -70,7 +70,7 @@ index 9f369e39..b4bb19d3 100644 switch (input_id) { case 0u: { delta = dot(normal, normalize(half_vector)); -@@ -239,11 +246,11 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light +@@ -241,11 +248,11 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light int GPUREG_LIGHTi_SPOTDIR_LOW = int(readPicaReg(0x0146u + (light_id << 4u))); int GPUREG_LIGHTi_SPOTDIR_HIGH = int(readPicaReg(0x0147u + (light_id << 4u))); @@ -86,7 +86,7 @@ index 9f369e39..b4bb19d3 100644 if ((se_x & 0x1000) == 0x1000) se_x |= 0xffffe000; if ((se_y & 0x1000) == 0x1000) se_y |= 0xffffe000; -@@ -270,9 +277,9 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light +@@ -272,9 +279,9 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light } // 0 = enabled @@ -98,7 +98,7 @@ index 9f369e39..b4bb19d3 100644 delta = max(delta, 0.0); } else { delta = abs(delta); -@@ -296,7 +303,7 @@ vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) { +@@ -298,7 +305,7 @@ vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) { // Implements the following algorthm: https://mathb.in/26766 void calcLighting(out vec4 primary_color, out vec4 secondary_color) { uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008Fu); @@ -107,7 +107,7 @@ index 9f369e39..b4bb19d3 100644 primary_color = secondary_color = vec4(0.0); return; } -@@ -313,7 +320,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -315,7 +322,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u); GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u); @@ -116,7 +116,7 @@ index 9f369e39..b4bb19d3 100644 // Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker switch (bump_mode) { -@@ -326,15 +333,15 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -328,15 +335,15 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0); vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0); @@ -135,7 +135,7 @@ index 9f369e39..b4bb19d3 100644 uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + (light_id << 4u)); uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + (light_id << 4u)); -@@ -346,12 +353,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -348,12 +355,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { float light_distance; vec3 light_position = vec3( @@ -151,7 +151,7 @@ index 9f369e39..b4bb19d3 100644 light_vector = light_position + v_view; } -@@ -367,23 +374,23 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -369,23 +376,23 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { float NdotL = dot(normal, light_vector); // N dot Li // Two sided diffuse @@ -181,7 +181,7 @@ index 9f369e39..b4bb19d3 100644 float distance_attenuation_bias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u); float distance_attenuation_scale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u); -@@ -428,8 +435,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -430,8 +437,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { specular_sum.rgb += light_factor * clamp_factor * (specular0 + specular1); } From 8fc61cdb7b9f73e56470265a7ed8e6a477e6f04a Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 21 Jul 2024 17:52:06 +0300 Subject: [PATCH 123/251] Add shader decompiler files --- CMakeLists.txt | 3 ++- include/PICA/shader_decompiler.hpp | 9 +++++++++ src/core/PICA/shader_decompiler.cpp | 1 + 3 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 include/PICA/shader_decompiler.hpp create mode 100644 src/core/PICA/shader_decompiler.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index fdfe8a4a..9d7be502 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -199,6 +199,7 @@ set(PICA_SOURCE_FILES src/core/PICA/gpu.cpp src/core/PICA/regs.cpp src/core/PICA src/core/PICA/shader_interpreter.cpp src/core/PICA/dynapica/shader_rec.cpp src/core/PICA/dynapica/shader_rec_emitter_x64.cpp src/core/PICA/pica_hash.cpp src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp src/core/PICA/shader_gen_glsl.cpp + src/core/PICA/shader_decompiler.cpp ) set(LOADER_SOURCE_FILES src/core/loader/elf.cpp src/core/loader/ncsd.cpp src/core/loader/ncch.cpp src/core/loader/3dsx.cpp src/core/loader/lz77.cpp) @@ -249,7 +250,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/audio/dsp_core.hpp include/audio/null_core.hpp include/audio/teakra_core.hpp include/audio/miniaudio_device.hpp include/ring_buffer.hpp include/bitfield.hpp include/audio/dsp_shared_mem.hpp include/audio/hle_core.hpp include/capstone.hpp include/audio/aac.hpp include/PICA/pica_frag_config.hpp - include/PICA/pica_frag_uniforms.hpp include/PICA/shader_gen_types.hpp + include/PICA/pica_frag_uniforms.hpp include/PICA/shader_gen_types.hpp include/PICA/shader_decompiler.hpp ) cmrc_add_resource_library( diff --git a/include/PICA/shader_decompiler.hpp b/include/PICA/shader_decompiler.hpp new file mode 100644 index 00000000..18c950e1 --- /dev/null +++ b/include/PICA/shader_decompiler.hpp @@ -0,0 +1,9 @@ +#pragma once +#include + +#include "PICA/shader.hpp" +#include "PICA/shader_gen_types.hpp" + +namespace PICA::ShaderGen { + std::string decompileShader(PICAShader& shaderUnit); +} \ No newline at end of file diff --git a/src/core/PICA/shader_decompiler.cpp b/src/core/PICA/shader_decompiler.cpp new file mode 100644 index 00000000..b4f8f155 --- /dev/null +++ b/src/core/PICA/shader_decompiler.cpp @@ -0,0 +1 @@ +#include "PICA/shader_decompiler.hpp" \ No newline at end of file From b8712b37c3df9f270942fde07e2cf3a3d8df3855 Mon Sep 17 00:00:00 2001 From: offtkp Date: Sun, 21 Jul 2024 18:25:51 +0300 Subject: [PATCH 124/251] A few kissable changes --- include/PICA/gpu.hpp | 2 +- include/PICA/pica_frag_config.hpp | 11 +++++++---- src/core/PICA/regs.cpp | 4 ++-- src/core/renderer_gl/renderer_gl.cpp | 2 +- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/include/PICA/gpu.hpp b/include/PICA/gpu.hpp index 1e37729b..c4c8db5c 100644 --- a/include/PICA/gpu.hpp +++ b/include/PICA/gpu.hpp @@ -92,8 +92,8 @@ class GPU { // Set to false by the renderer when the lighting_lut is uploaded ot the GPU bool lightingLUTDirty = false; - std::array fogLUT; bool fogLUTDirty = false; + std::array fogLUT; GPU(Memory& mem, EmulatorConfig& config); void display() { renderer->display(); } diff --git a/include/PICA/pica_frag_config.hpp b/include/PICA/pica_frag_config.hpp index 32fa7aa6..337fd211 100644 --- a/include/PICA/pica_frag_config.hpp +++ b/include/PICA/pica_frag_config.hpp @@ -235,10 +235,13 @@ namespace PICA { #undef setupTevStage fogConfig.mode = (FogMode)Helpers::getBits<0, 3>(regs[InternalRegs::TexEnvUpdateBuffer]); - fogConfig.flipDepth = Helpers::getBit<16>(regs[InternalRegs::TexEnvUpdateBuffer]); - fogConfig.fogColorR = Helpers::getBits<0, 8>(regs[InternalRegs::FogColor]); - fogConfig.fogColorG = Helpers::getBits<8, 8>(regs[InternalRegs::FogColor]); - fogConfig.fogColorB = Helpers::getBits<16, 8>(regs[InternalRegs::FogColor]); + + if (fogConfig.mode == FogMode::Fog) { + fogConfig.flipDepth = Helpers::getBit<16>(regs[InternalRegs::TexEnvUpdateBuffer]); + fogConfig.fogColorR = Helpers::getBits<0, 8>(regs[InternalRegs::FogColor]); + fogConfig.fogColorG = Helpers::getBits<8, 8>(regs[InternalRegs::FogColor]); + fogConfig.fogColorB = Helpers::getBits<16, 8>(regs[InternalRegs::FogColor]); + } } }; diff --git a/src/core/PICA/regs.cpp b/src/core/PICA/regs.cpp index 45e624ec..99519272 100644 --- a/src/core/PICA/regs.cpp +++ b/src/core/PICA/regs.cpp @@ -143,10 +143,10 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { case FogLUTData5: case FogLUTData6: case FogLUTData7: { - const uint32_t index = regs[FogLUTIndex] & 127; + const uint32_t index = regs[FogLUTIndex] & 0x7F; fogLUT[index] = value; fogLUTDirty = true; - regs[FogLUTIndex] = (index + 1) & 127; + regs[FogLUTIndex] = (index + 1) & 0x7F; break; } diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index b6c90374..5e1462b9 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -1026,7 +1026,7 @@ void RendererGL::initUbershader(OpenGL::Program& program) { ubershaderData.depthmapEnableLoc = OpenGL::uniformLocation(program, "u_depthmapEnable"); ubershaderData.picaRegLoc = OpenGL::uniformLocation(program, "u_picaRegs"); - // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, light maps go in TU 3, and the fog map goes in TU 4 + // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2 and the LUTs go in TU 3 glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0); glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1); glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2); From 906abe0fb322e3692520d741e7a721c42a0157ff Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 21 Jul 2024 18:29:39 +0300 Subject: [PATCH 125/251] Add -Wno-interference-size flag for GNUC --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fdfe8a4a..1264ce89 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,7 +25,7 @@ if(APPLE) endif() if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format-nonliteral -Wno-format-security") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format-nonliteral -Wno-format-security -Wno-interference-size") endif() option(DISABLE_PANIC_DEV "Make a build with fewer and less intrusive asserts" ON) From 04d6c52784894f994da84b0c66892d0eae7629e3 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 21 Jul 2024 15:34:31 +0000 Subject: [PATCH 126/251] NCCH: Remove unused saveData member --- include/loader/ncch.hpp | 2 -- src/core/loader/ncch.cpp | 3 +-- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/include/loader/ncch.hpp b/include/loader/ncch.hpp index 8e35643b..4aa2ede7 100644 --- a/include/loader/ncch.hpp +++ b/include/loader/ncch.hpp @@ -64,8 +64,6 @@ struct NCCH { // Contents of the .code file in the ExeFS std::vector codeFile; - // Contains of the cart's save data - std::vector saveData; // The cart region. Only the CXI's region matters to us. Necessary to get past region locking std::optional region = std::nullopt; std::vector smdh; diff --git a/src/core/loader/ncch.cpp b/src/core/loader/ncch.cpp index 98574289..4be05549 100644 --- a/src/core/loader/ncch.cpp +++ b/src/core/loader/ncch.cpp @@ -155,8 +155,7 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn } } - const u64 saveDataSize = *(u64*)&exheader[0x1C0 + 0x0]; // Size of save data in bytes - saveData.resize(saveDataSize, 0xff); + [[maybe_unused]] const u64 saveDataSize = *(u64*)&exheader[0x1C0 + 0x0]; // Size of save data in bytes compressCode = (exheader[0xD] & 1) != 0; stackSize = *(u32*)&exheader[0x1C]; From 0a0f623c7c9ee44cccde032f8c99f1a6650264cf Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 21 Jul 2024 15:46:38 +0000 Subject: [PATCH 127/251] NCCH: Fix saveDataSize (Oops) --- include/loader/ncch.hpp | 3 ++- src/core/loader/ncch.cpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/loader/ncch.hpp b/include/loader/ncch.hpp index 4aa2ede7..92ad5040 100644 --- a/include/loader/ncch.hpp +++ b/include/loader/ncch.hpp @@ -50,6 +50,7 @@ struct NCCH { static constexpr u64 mediaUnit = 0x200; u64 size = 0; // Size of NCCH converted to bytes + u64 saveDataSize = 0; u32 stackSize = 0; u32 bssSize = 0; u32 exheaderSize = 0; @@ -76,7 +77,7 @@ struct NCCH { bool hasExeFS() { return exeFS.size != 0; } bool hasRomFS() { return romFS.size != 0; } bool hasCode() { return codeFile.size() != 0; } - bool hasSaveData() { return saveData.size() != 0; } + bool hasSaveData() { return saveDataSize != 0; } // Parse SMDH for region info and such. Returns false on failure, true on success bool parseSMDH(const std::vector &smdh); diff --git a/src/core/loader/ncch.cpp b/src/core/loader/ncch.cpp index 4be05549..e363213c 100644 --- a/src/core/loader/ncch.cpp +++ b/src/core/loader/ncch.cpp @@ -155,7 +155,7 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn } } - [[maybe_unused]] const u64 saveDataSize = *(u64*)&exheader[0x1C0 + 0x0]; // Size of save data in bytes + saveDataSize = *(u64*)&exheader[0x1C0 + 0x0]; // Size of save data in bytes compressCode = (exheader[0xD] & 1) != 0; stackSize = *(u32*)&exheader[0x1C]; From 3d9a1a8b5d7a661f03d94a63d809b2c746e63228 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 21 Jul 2024 19:07:28 +0300 Subject: [PATCH 128/251] I should really squash this when I'm home --- src/core/loader/ncch.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/core/loader/ncch.cpp b/src/core/loader/ncch.cpp index e363213c..a575d4f2 100644 --- a/src/core/loader/ncch.cpp +++ b/src/core/loader/ncch.cpp @@ -25,7 +25,6 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn } codeFile.clear(); - saveData.clear(); smdh.clear(); partitionInfo = info; From 9bd711958b8df7db2965b59cb97462ba61d8e054 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 21 Jul 2024 18:29:39 +0300 Subject: [PATCH 129/251] Add -Wno-interference-size flag for GNUC --- CMakeLists.txt | 4 ++++ include/loader/ncch.hpp | 5 ++--- src/core/loader/ncch.cpp | 4 +--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fdfe8a4a..df0e2bb8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,6 +28,10 @@ if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format-nonliteral -Wno-format-security") endif() +if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-interference-size") +endif() + option(DISABLE_PANIC_DEV "Make a build with fewer and less intrusive asserts" ON) option(GPU_DEBUG_INFO "Enable additional GPU debugging info" OFF) option(ENABLE_OPENGL "Enable OpenGL rendering backend" ON) diff --git a/include/loader/ncch.hpp b/include/loader/ncch.hpp index 8e35643b..92ad5040 100644 --- a/include/loader/ncch.hpp +++ b/include/loader/ncch.hpp @@ -50,6 +50,7 @@ struct NCCH { static constexpr u64 mediaUnit = 0x200; u64 size = 0; // Size of NCCH converted to bytes + u64 saveDataSize = 0; u32 stackSize = 0; u32 bssSize = 0; u32 exheaderSize = 0; @@ -64,8 +65,6 @@ struct NCCH { // Contents of the .code file in the ExeFS std::vector codeFile; - // Contains of the cart's save data - std::vector saveData; // The cart region. Only the CXI's region matters to us. Necessary to get past region locking std::optional region = std::nullopt; std::vector smdh; @@ -78,7 +77,7 @@ struct NCCH { bool hasExeFS() { return exeFS.size != 0; } bool hasRomFS() { return romFS.size != 0; } bool hasCode() { return codeFile.size() != 0; } - bool hasSaveData() { return saveData.size() != 0; } + bool hasSaveData() { return saveDataSize != 0; } // Parse SMDH for region info and such. Returns false on failure, true on success bool parseSMDH(const std::vector &smdh); diff --git a/src/core/loader/ncch.cpp b/src/core/loader/ncch.cpp index 98574289..a575d4f2 100644 --- a/src/core/loader/ncch.cpp +++ b/src/core/loader/ncch.cpp @@ -25,7 +25,6 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn } codeFile.clear(); - saveData.clear(); smdh.clear(); partitionInfo = info; @@ -155,8 +154,7 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn } } - const u64 saveDataSize = *(u64*)&exheader[0x1C0 + 0x0]; // Size of save data in bytes - saveData.resize(saveDataSize, 0xff); + saveDataSize = *(u64*)&exheader[0x1C0 + 0x0]; // Size of save data in bytes compressCode = (exheader[0xD] & 1) != 0; stackSize = *(u32*)&exheader[0x1C]; From a8c68baa6f20628968c55d2443ad158d49bb9191 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 21 Jul 2024 21:49:45 +0300 Subject: [PATCH 130/251] Don't use -Wno-interference-size on Clang --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 54c64fa3..df0e2bb8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,7 +25,7 @@ if(APPLE) endif() if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format-nonliteral -Wno-format-security -Wno-interference-size") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format-nonliteral -Wno-format-security") endif() if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") From 6399cb55e222a767470cc64a1e7c334b84159a76 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 21 Jul 2024 23:04:44 +0300 Subject: [PATCH 131/251] GL: Remove duplicate scissor disable --- src/core/renderer_gl/renderer_gl.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 5e1462b9..f26158ae 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -531,7 +531,6 @@ void RendererGL::display() { gl.disableScissor(); gl.disableBlend(); gl.disableDepth(); - gl.disableScissor(); // This will work fine whether or not logic ops are enabled. We set logic op to copy instead of disabling to avoid state changes gl.setLogicOp(GL_COPY); gl.setColourMask(true, true, true, true); From 2d72b660423ddac57c56e5cbefe0ecec2f5d7d5f Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 22 Jul 2024 01:47:34 +0300 Subject: [PATCH 132/251] Initial shader decompilation work --- include/PICA/shader_decompiler.hpp | 85 ++++++++++++++++++++++++++++- src/core/PICA/shader_decompiler.cpp | 54 +++++++++++++++++- 2 files changed, 136 insertions(+), 3 deletions(-) diff --git a/include/PICA/shader_decompiler.hpp b/include/PICA/shader_decompiler.hpp index 18c950e1..abfa910c 100644 --- a/include/PICA/shader_decompiler.hpp +++ b/include/PICA/shader_decompiler.hpp @@ -1,9 +1,90 @@ #pragma once +#include #include +#include +#include #include "PICA/shader.hpp" #include "PICA/shader_gen_types.hpp" +struct EmulatorConfig; + namespace PICA::ShaderGen { - std::string decompileShader(PICAShader& shaderUnit); -} \ No newline at end of file + // Control flow analysis is partially based on + // https://github.com/PabloMK7/citra/blob/d0179559466ff09731d74474322ee880fbb44b00/src/video_core/shader/generator/glsl_shader_decompiler.cpp#L33 + struct ControlFlow { + struct Function { + using Labels = std::set; + + enum class ExitMode { + Unknown, // Can't guarantee whether we'll exit properly, fall back to CPU shaders (can happen with jmp shenanigans) + AlwaysReturn, // All paths reach the return point. + Conditional, // One or more code paths reach the return point or an END instruction conditionally. + AlwaysEnd, // All paths reach an END instruction. + }; + + u32 start; // Starting PC of the function + u32 end; // End PC of the function + Labels outLabels{}; // Labels this function can "goto" (jump) to + ExitMode exitMode = ExitMode::Unknown; + + explicit Function(u32 start, u32 end) : start(start), end(end) {} + // Use lexicographic comparison for functions in order to sort them in a set + bool operator<(const Function& other) const { return std::tie(start, end) < std::tie(other.start, other.end); } + }; + + std::set functions{}; + + // Tells us whether analysis of the shader we're trying to compile failed, in which case we'll need to fail back to shader emulation + // On the CPU + bool analysisFailed = false; + + void analyze(const PICAShader& shader, u32 entrypoint); + + // This will recursively add all functions called by the function too, as analyzeFunction will call addFunction on control flow instructions + const Function* addFunction(u32 start, u32 end) { + auto searchIterator = functions.find(Function(start, end)); + if (searchIterator != functions.end()) { + return &(*searchIterator); + } + + // Add this function and analyze it if it doesn't already exist + Function function(start, end); + function.exitMode = analyzeFunction(start, end, function.outLabels); + + // This function + if (function.exitMode == Function::ExitMode::Unknown) { + analysisFailed = true; + return nullptr; + } + + // Add function to our function list + auto [it, added] = functions.insert(std::move(function)); + return &(*it); + } + + Function::ExitMode analyzeFunction(u32 start, u32 end, Function::Labels& labels); + }; + + class ShaderDecompiler { + ControlFlow controlFlow{}; + + PICAShader& shader; + EmulatorConfig& config; + std::string decompiledShader; + + u32 entrypoint; + u32 currentPC; + + API api; + Language language; + + public: + ShaderDecompiler(PICAShader& shader, EmulatorConfig& config, u32 entrypoint, API api, Language language) + : shader(shader), entrypoint(entrypoint), currentPC(entrypoint), config(config), api(api), language(language), decompiledShader("") {} + + std::string decompile(); + }; + + std::string decompileShader(PICAShader& shader, EmulatorConfig& config, u32 entrypoint, API api, Language language); +} // namespace PICA::ShaderGen \ No newline at end of file diff --git a/src/core/PICA/shader_decompiler.cpp b/src/core/PICA/shader_decompiler.cpp index b4f8f155..4dccfa7d 100644 --- a/src/core/PICA/shader_decompiler.cpp +++ b/src/core/PICA/shader_decompiler.cpp @@ -1 +1,53 @@ -#include "PICA/shader_decompiler.hpp" \ No newline at end of file +#include "PICA/shader_decompiler.hpp" + +#include "config.hpp" + +using namespace PICA; +using namespace PICA::ShaderGen; +using Function = ControlFlow::Function; +using ExitMode = Function::ExitMode; + +void ControlFlow::analyze(const PICAShader& shader, u32 entrypoint) { + analysisFailed = false; + + const Function* function = addFunction(entrypoint, PICAShader::maxInstructionCount); + if (function == nullptr) { + analysisFailed = true; + } +} + +ExitMode analyzeFunction(u32 start, u32 end, Function::Labels& labels) { return ExitMode::Unknown; } + +std::string ShaderDecompiler::decompile() { + controlFlow.analyze(shader, entrypoint); + + if (controlFlow.analysisFailed) { + return ""; + } + + decompiledShader = ""; + + switch (api) { + case API::GL: decompiledShader += "#version 410 core"; break; + case API::GLES: decompiledShader += "#version 300 es"; break; + default: break; + } + + if (config.accurateShaderMul) { + // Safe multiplication handler from Citra: Handles the PICA's 0 * inf = 0 edge case + decompiledShader += R"( + vec4 safe_mul(vec4 a, vec4 b) { + vec4 res = a * b; + return mix(res, mix(mix(vec4(0.0), res, isnan(rhs)), product, isnan(lhs)), isnan(res)); + } + )"; + } + + return decompiledShader; +} + +std::string PICA::ShaderGen::decompileShader(PICAShader& shader, EmulatorConfig& config, u32 entrypoint, API api, Language language) { + ShaderDecompiler decompiler(shader, config, entrypoint, api, language); + + return decompiler.decompile(); +} \ No newline at end of file From 85af58f0a72df67237ac234fc476092edd3c5f5b Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 22 Jul 2024 02:06:24 +0300 Subject: [PATCH 133/251] Remove shader-related hallucinations --- include/PICA/shader.hpp | 6 +----- src/core/PICA/regs.cpp | 2 ++ src/core/PICA/shader_unit.cpp | 1 - 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/include/PICA/shader.hpp b/include/PICA/shader.hpp index 10f6ec88..cc055257 100644 --- a/include/PICA/shader.hpp +++ b/include/PICA/shader.hpp @@ -220,13 +220,9 @@ class PICAShader { public: static constexpr size_t maxInstructionCount = 4096; std::array loadedShader; // Currently loaded & active shader - std::array bufferedShader; // Shader to be transferred when the SH_CODETRANSFER_END reg gets written to PICAShader(ShaderType type) : type(type) {} - // Theese functions are in the header to be inlined more easily, though with LTO I hope I'll be able to move them - void finalize() { std::memcpy(&loadedShader[0], &bufferedShader[0], 4096 * sizeof(u32)); } - void setBufferIndex(u32 index) { bufferIndex = index & 0xfff; } void setOpDescriptorIndex(u32 index) { opDescriptorIndex = index & 0x7f; } @@ -235,7 +231,7 @@ class PICAShader { Helpers::panic("o no, shader upload overflew"); } - bufferedShader[bufferIndex++] = word; + loadedShader[bufferIndex++] = word; bufferIndex &= 0xfff; codeHashDirty = true; // Signal the JIT if necessary that the program hash has potentially changed diff --git a/src/core/PICA/regs.cpp b/src/core/PICA/regs.cpp index 99519272..f805de60 100644 --- a/src/core/PICA/regs.cpp +++ b/src/core/PICA/regs.cpp @@ -329,9 +329,11 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { break; } + /* TODO: Find out if this actually does anything case VertexShaderTransferEnd: if (value != 0) shaderUnit.vs.finalize(); break; + */ case VertexShaderTransferIndex: shaderUnit.vs.setBufferIndex(value); break; diff --git a/src/core/PICA/shader_unit.cpp b/src/core/PICA/shader_unit.cpp index aa7b4c12..759849a8 100644 --- a/src/core/PICA/shader_unit.cpp +++ b/src/core/PICA/shader_unit.cpp @@ -9,7 +9,6 @@ void ShaderUnit::reset() { void PICAShader::reset() { loadedShader.fill(0); - bufferedShader.fill(0); operandDescriptors.fill(0); boolUniform = 0; From 0aa1ed21b2a1cf4e1fc0bd3e801bd4878d56fd4d Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 23 Jul 2024 01:22:26 +0300 Subject: [PATCH 134/251] More shader decompiler work --- include/PICA/shader.hpp | 16 +++- include/PICA/shader_decompiler.hpp | 42 ++++++++--- src/core/PICA/shader_decompiler.cpp | 110 ++++++++++++++++++++++++++-- 3 files changed, 150 insertions(+), 18 deletions(-) diff --git a/include/PICA/shader.hpp b/include/PICA/shader.hpp index cc055257..938a5408 100644 --- a/include/PICA/shader.hpp +++ b/include/PICA/shader.hpp @@ -1,6 +1,8 @@ #pragma once #include #include +#include +#include #include #include "PICA/float_types.hpp" @@ -90,9 +92,12 @@ class PICAShader { public: // These are placed close to the temp registers and co because it helps the JIT generate better code u32 entrypoint = 0; // Initial shader PC - u32 boolUniform; - std::array, 4> intUniforms; + + // We want these registers in this order & with this alignment for uploading them directly to a UBO + // When emulating shaders on the GPU alignas(16) std::array floatUniforms; + alignas(16) std::array, 4> intUniforms; + u32 boolUniform; alignas(16) std::array fixedAttributes; // Fixed vertex attributes alignas(16) std::array inputs; // Attributes passed to the shader @@ -291,4 +296,9 @@ class PICAShader { Hash getCodeHash(); Hash getOpdescHash(); -}; \ No newline at end of file +}; + +static_assert( + offsetof(PICAShader, intUniforms) == offsetof(PICAShader, floatUniforms) + 96 * sizeof(float) * 4 && + offsetof(PICAShader, boolUniform) == offsetof(PICAShader, intUniforms) + 4 * sizeof(u8) * 4 +); \ No newline at end of file diff --git a/include/PICA/shader_decompiler.hpp b/include/PICA/shader_decompiler.hpp index abfa910c..cbc569ae 100644 --- a/include/PICA/shader_decompiler.hpp +++ b/include/PICA/shader_decompiler.hpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include "PICA/shader.hpp" @@ -13,6 +14,15 @@ namespace PICA::ShaderGen { // Control flow analysis is partially based on // https://github.com/PabloMK7/citra/blob/d0179559466ff09731d74474322ee880fbb44b00/src/video_core/shader/generator/glsl_shader_decompiler.cpp#L33 struct ControlFlow { + // A continuous range of addresses + struct AddressRange { + u32 start, end; + AddressRange(u32 start, u32 end) : start(start), end(end) {} + + // Use lexicographic comparison for functions in order to sort them in a set + bool operator<(const AddressRange& other) const { return std::tie(start, end) < std::tie(other.start, other.end); } + }; + struct Function { using Labels = std::set; @@ -29,20 +39,22 @@ namespace PICA::ShaderGen { ExitMode exitMode = ExitMode::Unknown; explicit Function(u32 start, u32 end) : start(start), end(end) {} - // Use lexicographic comparison for functions in order to sort them in a set - bool operator<(const Function& other) const { return std::tie(start, end) < std::tie(other.start, other.end); } + bool operator<(const Function& other) const { return AddressRange(start, end) < AddressRange(other.start, other.end); } + + std::string getIdentifier() const { return "func_" + std::to_string(start) + "_to_" + std::to_string(end); } + std::string getForwardDecl() const { return "void " + getIdentifier() + "();\n"; } + std::string getCallStatement() const { return getIdentifier() + "()"; } }; std::set functions{}; + std::map exitMap{}; // Tells us whether analysis of the shader we're trying to compile failed, in which case we'll need to fail back to shader emulation // On the CPU bool analysisFailed = false; - void analyze(const PICAShader& shader, u32 entrypoint); - // This will recursively add all functions called by the function too, as analyzeFunction will call addFunction on control flow instructions - const Function* addFunction(u32 start, u32 end) { + const Function* addFunction(const PICAShader& shader, u32 start, u32 end) { auto searchIterator = functions.find(Function(start, end)); if (searchIterator != functions.end()) { return &(*searchIterator); @@ -50,9 +62,9 @@ namespace PICA::ShaderGen { // Add this function and analyze it if it doesn't already exist Function function(start, end); - function.exitMode = analyzeFunction(start, end, function.outLabels); + function.exitMode = analyzeFunction(shader, start, end, function.outLabels); - // This function + // This function could not be fully analyzed, report failure if (function.exitMode == Function::ExitMode::Unknown) { analysisFailed = true; return nullptr; @@ -63,10 +75,14 @@ namespace PICA::ShaderGen { return &(*it); } - Function::ExitMode analyzeFunction(u32 start, u32 end, Function::Labels& labels); + void analyze(const PICAShader& shader, u32 entrypoint); + Function::ExitMode analyzeFunction(const PICAShader& shader, u32 start, u32 end, Function::Labels& labels); }; class ShaderDecompiler { + using AddressRange = ControlFlow::AddressRange; + using Function = ControlFlow::Function; + ControlFlow controlFlow{}; PICAShader& shader; @@ -74,14 +90,20 @@ namespace PICA::ShaderGen { std::string decompiledShader; u32 entrypoint; - u32 currentPC; API api; Language language; + void compileInstruction(u32& pc, bool& finished); + void compileRange(const AddressRange& range); + void callFunction(const Function& function); + const Function* findFunction(const AddressRange& range); + + void writeAttributes(); + public: ShaderDecompiler(PICAShader& shader, EmulatorConfig& config, u32 entrypoint, API api, Language language) - : shader(shader), entrypoint(entrypoint), currentPC(entrypoint), config(config), api(api), language(language), decompiledShader("") {} + : shader(shader), entrypoint(entrypoint), config(config), api(api), language(language), decompiledShader("") {} std::string decompile(); }; diff --git a/src/core/PICA/shader_decompiler.cpp b/src/core/PICA/shader_decompiler.cpp index 4dccfa7d..91b07574 100644 --- a/src/core/PICA/shader_decompiler.cpp +++ b/src/core/PICA/shader_decompiler.cpp @@ -10,13 +10,75 @@ using ExitMode = Function::ExitMode; void ControlFlow::analyze(const PICAShader& shader, u32 entrypoint) { analysisFailed = false; - const Function* function = addFunction(entrypoint, PICAShader::maxInstructionCount); + const Function* function = addFunction(shader, entrypoint, PICAShader::maxInstructionCount); if (function == nullptr) { analysisFailed = true; } } -ExitMode analyzeFunction(u32 start, u32 end, Function::Labels& labels) { return ExitMode::Unknown; } +ExitMode ControlFlow::analyzeFunction(const PICAShader& shader, u32 start, u32 end, Function::Labels& labels) { + // Initialize exit mode to unknown by default, in order to detect things like unending loops + auto [it, inserted] = exitMap.emplace(AddressRange(start, end), ExitMode::Unknown); + // Function has already been analyzed and is in the map so it wasn't added, don't analyze again + if (!inserted) { + return it->second; + } + + // Make sure not to go out of bounds on the shader + for (u32 pc = start; pc < PICAShader::maxInstructionCount && pc != end; pc++) { + const u32 instruction = shader.loadedShader[pc]; + const u32 opcode = instruction >> 26; + + switch (opcode) { + case ShaderOpcodes::JMPC: Helpers::panic("Unimplemented control flow operation (JMPC)"); + case ShaderOpcodes::JMPU: Helpers::panic("Unimplemented control flow operation (JMPU)"); + case ShaderOpcodes::IFU: Helpers::panic("Unimplemented control flow operation (IFU)"); + case ShaderOpcodes::IFC: Helpers::panic("Unimplemented control flow operation (IFC)"); + case ShaderOpcodes::CALL: Helpers::panic("Unimplemented control flow operation (CALL)"); + case ShaderOpcodes::CALLC: Helpers::panic("Unimplemented control flow operation (CALLC)"); + case ShaderOpcodes::CALLU: Helpers::panic("Unimplemented control flow operation (CALLU)"); + case ShaderOpcodes::LOOP: Helpers::panic("Unimplemented control flow operation (LOOP)"); + case ShaderOpcodes::END: it->second = ExitMode::AlwaysEnd; return it->second; + + default: break; + } + } + + // A function without control flow instructions will always reach its "return point" and return + return ExitMode::AlwaysReturn; +} + +void ShaderDecompiler::compileRange(const AddressRange& range) { + u32 pc = range.start; + const u32 end = range.end >= range.start ? range.end : PICAShader::maxInstructionCount; + bool finished = false; + + while (pc < end && !finished) { + compileInstruction(pc, finished); + } +} + +const Function* ShaderDecompiler::findFunction(const AddressRange& range) { + for (const Function& func : controlFlow.functions) { + if (range.start == func.start && range.end == func.end) { + return &func; + } + } + + return nullptr; +} + +void ShaderDecompiler::writeAttributes() { + decompiledShader += R"( + layout(std140) uniform PICAShaderUniforms { + vec4 uniform_float[96]; + uvec4 uniform_int; + uint uniform_bool; + }; +)"; + + decompiledShader += "\n"; +} std::string ShaderDecompiler::decompile() { controlFlow.analyze(shader, entrypoint); @@ -28,11 +90,13 @@ std::string ShaderDecompiler::decompile() { decompiledShader = ""; switch (api) { - case API::GL: decompiledShader += "#version 410 core"; break; - case API::GLES: decompiledShader += "#version 300 es"; break; + case API::GL: decompiledShader += "#version 410 core\n"; break; + case API::GLES: decompiledShader += "#version 300 es\n"; break; default: break; } + writeAttributes(); + if (config.accurateShaderMul) { // Safe multiplication handler from Citra: Handles the PICA's 0 * inf = 0 edge case decompiledShader += R"( @@ -43,10 +107,46 @@ std::string ShaderDecompiler::decompile() { )"; } + // Forward declare every generated function first so that we can easily call anything from anywhere. + for (auto& func : controlFlow.functions) { + decompiledShader += func.getForwardDecl(); + } + + decompiledShader += "void pica_shader_main() {\n"; + AddressRange mainFunctionRange(entrypoint, PICAShader::maxInstructionCount); + callFunction(*findFunction(mainFunctionRange)); + decompiledShader += "}\n"; + + for (auto& func : controlFlow.functions) { + if (func.outLabels.size() > 0) { + Helpers::panic("Function with out labels"); + } + + decompiledShader += "void " + func.getIdentifier() + "() {\n"; + compileRange(AddressRange(func.start, func.end)); + decompiledShader += "}\n"; + } + return decompiledShader; } -std::string PICA::ShaderGen::decompileShader(PICAShader& shader, EmulatorConfig& config, u32 entrypoint, API api, Language language) { +void ShaderDecompiler::compileInstruction(u32& pc, bool& finished) { + const u32 instruction = shader.loadedShader[pc]; + const u32 opcode = instruction >> 26; + + switch (opcode) { + case ShaderOpcodes::DP4: decompiledShader += "dp4\n"; break; + case ShaderOpcodes::MOV: decompiledShader += "mov\n"; break; + case ShaderOpcodes::END: finished = true; return; + default: Helpers::warn("GLSL recompiler: Unknown opcode: %X", opcode); break; + } + + pc++; +} + +void ShaderDecompiler::callFunction(const Function& function) { decompiledShader += function.getCallStatement() + ";\n"; } + +std::string ShaderGen::decompileShader(PICAShader& shader, EmulatorConfig& config, u32 entrypoint, API api, Language language) { ShaderDecompiler decompiler(shader, config, entrypoint, api, language); return decompiler.decompile(); From 850aadb0f6d9b2130733e1d19ec0f69a7cb86ba6 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 23 Jul 2024 02:25:40 +0300 Subject: [PATCH 135/251] Update Linux version on CI --- .github/workflows/Linux_AppImage_Build.yml | 12 ++++++------ .github/workflows/Qt_Build.yml | 12 ++++++------ include/PICA/shader.hpp | 2 +- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/Linux_AppImage_Build.yml b/.github/workflows/Linux_AppImage_Build.yml index 507187a3..7d198b9c 100644 --- a/.github/workflows/Linux_AppImage_Build.yml +++ b/.github/workflows/Linux_AppImage_Build.yml @@ -16,7 +16,7 @@ jobs: # well on Windows or Mac. You can convert this to a matrix build if you need # cross-platform coverage. # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 @@ -33,11 +33,11 @@ jobs: sudo ./llvm.sh 17 - name: Setup Vulkan SDK - run: | - wget -qO - http://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add - - sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-focal.list http://packages.lunarg.com/vulkan/lunarg-vulkan-focal.list - sudo apt update - sudo apt install vulkan-sdk + uses: humbletim/setup-vulkan-sdk@v1.2.0 + with: + vulkan-query-version: latest + vulkan-use-cache: true + vulkan-components: Vulkan-Headers, Vulkan-Loader, SPIRV-Tools, Glslang - name: Configure CMake # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. diff --git a/.github/workflows/Qt_Build.yml b/.github/workflows/Qt_Build.yml index 5e622c54..1f9db49e 100644 --- a/.github/workflows/Qt_Build.yml +++ b/.github/workflows/Qt_Build.yml @@ -96,7 +96,7 @@ jobs: path: 'Alber.zip' Linux: - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 @@ -117,11 +117,11 @@ jobs: sudo ./llvm.sh 17 - name: Setup Vulkan SDK - run: | - wget -qO - http://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add - - sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-focal.list http://packages.lunarg.com/vulkan/lunarg-vulkan-focal.list - sudo apt update - sudo apt install vulkan-sdk + uses: humbletim/setup-vulkan-sdk@v1.2.0 + with: + vulkan-query-version: latest + vulkan-use-cache: true + vulkan-components: Vulkan-Headers, Vulkan-Loader, SPIRV-Tools, Glslang - name: Configure CMake run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang-17 -DCMAKE_CXX_COMPILER=clang++-17 -DENABLE_USER_BUILD=ON -DENABLE_QT_GUI=ON diff --git a/include/PICA/shader.hpp b/include/PICA/shader.hpp index 938a5408..44ca2a15 100644 --- a/include/PICA/shader.hpp +++ b/include/PICA/shader.hpp @@ -94,7 +94,7 @@ class PICAShader { u32 entrypoint = 0; // Initial shader PC // We want these registers in this order & with this alignment for uploading them directly to a UBO - // When emulating shaders on the GPU + // When emulating shaders on the GPU. Plus this alignment for float uniforms is necessary for doing SIMD in the shader->CPU recompilers. alignas(16) std::array floatUniforms; alignas(16) std::array, 4> intUniforms; u32 boolUniform; From fc397b2b58a9c04c6c3616a41d8fbeb19801586c Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 23 Jul 2024 02:33:53 +0300 Subject: [PATCH 136/251] Fix Linux Qt packages --- .github/workflows/Qt_Build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/Qt_Build.yml b/.github/workflows/Qt_Build.yml index 1f9db49e..4d5c8b57 100644 --- a/.github/workflows/Qt_Build.yml +++ b/.github/workflows/Qt_Build.yml @@ -105,7 +105,7 @@ jobs: - name: Install misc packages run: | - sudo apt-get update && sudo apt install libx11-dev libgl1-mesa-glx mesa-common-dev libfuse2 libwayland-dev + sudo apt-get update && sudo apt install libx11-dev libgl1-mesa-glx mesa-common-dev libfuse2 libwayland-dev libgl1-mesa-dev sudo add-apt-repository -y ppa:savoury1/qt-6-2 sudo apt update sudo apt install qt6-base-dev qt6-base-private-dev From e4d4a356744f29a3f9d650bb4e915435dc0d411a Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 23 Jul 2024 04:11:12 +0300 Subject: [PATCH 137/251] Renderer GL: Add UB checks --- include/PICA/gpu.hpp | 3 ++- src/core/renderer_gl/renderer_gl.cpp | 13 +++++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/include/PICA/gpu.hpp b/include/PICA/gpu.hpp index c4c8db5c..ac2a49e6 100644 --- a/include/PICA/gpu.hpp +++ b/include/PICA/gpu.hpp @@ -167,7 +167,8 @@ class GPU { u32 index = paddr - PhysicalAddrs::VRAM; return (T*)&vram[index]; } else [[unlikely]] { - Helpers::panic("[GPU] Tried to access unknown physical address: %08X", paddr); + Helpers::warn("[GPU] Tried to access unknown physical address: %08X", paddr); + return nullptr; } } diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index f26158ae..8b614d2d 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -659,7 +659,15 @@ OpenGL::Texture RendererGL::getTexture(Texture& tex) { if (buffer.has_value()) { return buffer.value().get().texture; } else { - const auto textureData = std::span{gpu.getPointerPhys(tex.location), tex.sizeInBytes()}; // Get pointer to the texture data in 3DS memory + const u8* startPointer = gpu.getPointerPhys(tex.location); + const usize sizeInBytes = tex.sizeInBytes(); + + if (startPointer == nullptr || (sizeInBytes > 0 && gpu.getPointerPhys(tex.location + sizeInBytes - 1) == nullptr)) [[unlikely]] { + Helpers::warn("Out-of-bounds texture fetch"); + return blankTexture; + } + + const auto textureData = std::span{startPointer, tex.sizeInBytes()}; // Get pointer to the texture data in 3DS memory Texture& newTex = textureCache.add(tex); newTex.decodeTexture(textureData); @@ -770,7 +778,8 @@ void RendererGL::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 if (inputWidth != 0) [[likely]] { copyHeight = (copySize / inputWidth) * 8; } else { - copyHeight = 0; + Helpers::warn("Zero-width texture copy"); + return; } // Find the source surface. From 1fa9ce126b0f5b24707b4ca79111a964827cd787 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Tue, 23 Jul 2024 10:54:01 +0200 Subject: [PATCH 138/251] add: period at the end of a sentence --- src/miniaudio.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/miniaudio.cpp b/src/miniaudio.cpp index e42fea68..a61979e0 100644 --- a/src/miniaudio.cpp +++ b/src/miniaudio.cpp @@ -1,5 +1,5 @@ // We do not need the ability to be able to encode or decode audio files for the time being -// So we disable said functionality to make the executable smaller +// So we disable said functionality to make the executable smaller. #define MA_NO_DECODING #define MA_NO_ENCODING #define MINIAUDIO_IMPLEMENTATION From 855a374f6702a40a35fc78e67f2679ccaf13a85f Mon Sep 17 00:00:00 2001 From: SamoZ256 <96914946+SamoZ256@users.noreply.github.com> Date: Tue, 23 Jul 2024 10:57:13 +0200 Subject: [PATCH 139/251] add: period at the end of a sentence (#553) --- src/miniaudio.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/miniaudio.cpp b/src/miniaudio.cpp index e42fea68..a61979e0 100644 --- a/src/miniaudio.cpp +++ b/src/miniaudio.cpp @@ -1,5 +1,5 @@ // We do not need the ability to be able to encode or decode audio files for the time being -// So we disable said functionality to make the executable smaller +// So we disable said functionality to make the executable smaller. #define MA_NO_DECODING #define MA_NO_ENCODING #define MINIAUDIO_IMPLEMENTATION From 0f80d0af7a2e2c9c8cad52c0b5ddc620b748f4b3 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 23 Jul 2024 15:22:15 +0300 Subject: [PATCH 140/251] Rename Handle to HorizonHandle, add metal-cpp submodule, format --- .gitmodules | 3 +++ include/kernel/handles.hpp | 8 ++++---- include/kernel/kernel.hpp | 2 ++ include/kernel/kernel_types.hpp | 10 +++++++++- include/memory.hpp | 10 +++++++++- include/services/ac.hpp | 2 ++ include/services/act.hpp | 4 +++- include/services/am.hpp | 4 +++- include/services/apt.hpp | 7 +++++-- include/services/boss.hpp | 7 +++++-- include/services/cam.hpp | 1 + include/services/cecd.hpp | 5 ++++- include/services/cfg.hpp | 15 +++++++++------ include/services/csnd.hpp | 6 +++--- include/services/dlp_srvr.hpp | 4 +++- include/services/dsp.hpp | 2 ++ include/services/frd.hpp | 11 +++++++---- include/services/fs.hpp | 4 +++- include/services/gsp_gpu.hpp | 2 ++ include/services/gsp_lcd.hpp | 2 ++ include/services/hid.hpp | 2 ++ include/services/http.hpp | 2 ++ include/services/ir_user.hpp | 2 ++ include/services/ldr_ro.hpp | 4 +++- include/services/mcu/mcu_hwc.hpp | 2 ++ include/services/mic.hpp | 6 ++++-- include/services/ndm.hpp | 11 +++++++++-- include/services/news_u.hpp | 2 ++ include/services/nfc.hpp | 2 ++ include/services/nim.hpp | 4 +++- include/services/nwm_uds.hpp | 2 ++ include/services/ptm.hpp | 2 +- include/services/service_manager.hpp | 2 ++ include/services/soc.hpp | 4 +++- include/services/ssl.hpp | 8 +++++--- include/services/y2r.hpp | 15 +++++++++------ src/core/kernel/address_arbiter.cpp | 2 +- src/core/kernel/events.cpp | 2 +- src/core/kernel/kernel.cpp | 2 +- src/core/kernel/memory_management.cpp | 2 +- src/core/kernel/ports.cpp | 6 +++--- src/core/kernel/threads.cpp | 6 +++--- src/core/kernel/timers.cpp | 2 +- src/core/services/fs.cpp | 6 +++--- src/core/services/service_manager.cpp | 2 +- third_party/metal-cpp | 1 + 46 files changed, 150 insertions(+), 60 deletions(-) create mode 160000 third_party/metal-cpp diff --git a/.gitmodules b/.gitmodules index 5a136acb..656e1f41 100644 --- a/.gitmodules +++ b/.gitmodules @@ -73,3 +73,6 @@ [submodule "third_party/hips"] path = third_party/hips url = https://github.com/wheremyfoodat/Hips +[submodule "third_party/metal-cpp"] + path = third_party/metal-cpp + url = https://github.com/Panda3DS-emu/metal-cpp diff --git a/include/kernel/handles.hpp b/include/kernel/handles.hpp index fe746b65..45400837 100644 --- a/include/kernel/handles.hpp +++ b/include/kernel/handles.hpp @@ -1,7 +1,7 @@ #pragma once #include "helpers.hpp" -using Handle = u32; +using HorizonHandle = u32; namespace KernelHandles { enum : u32 { @@ -61,17 +61,17 @@ namespace KernelHandles { }; // Returns whether "handle" belongs to one of the OS services - static constexpr bool isServiceHandle(Handle handle) { + static constexpr bool isServiceHandle(HorizonHandle handle) { return handle >= MinServiceHandle && handle <= MaxServiceHandle; } // Returns whether "handle" belongs to one of the OS services' shared memory areas - static constexpr bool isSharedMemHandle(Handle handle) { + static constexpr bool isSharedMemHandle(HorizonHandle handle) { return handle >= MinSharedMemHandle && handle <= MaxSharedMemHandle; } // Returns the name of a handle as a string based on the given handle - static const char* getServiceName(Handle handle) { + static const char* getServiceName(HorizonHandle handle) { switch (handle) { case AC: return "AC"; case ACT: return "ACT"; diff --git a/include/kernel/kernel.hpp b/include/kernel/kernel.hpp index e0c0651b..abc508ac 100644 --- a/include/kernel/kernel.hpp +++ b/include/kernel/kernel.hpp @@ -18,6 +18,8 @@ class CPU; struct Scheduler; class Kernel { + using Handle = HorizonHandle; + std::span regs; CPU& cpu; Memory& mem; diff --git a/include/kernel/kernel_types.hpp b/include/kernel/kernel_types.hpp index a68ef8d5..a3a60c34 100644 --- a/include/kernel/kernel_types.hpp +++ b/include/kernel/kernel_types.hpp @@ -47,7 +47,7 @@ enum class ProcessorID : s32 { struct AddressArbiter {}; struct ResourceLimits { - Handle handle; + HorizonHandle handle; s32 currentCommit = 0; }; @@ -91,6 +91,8 @@ struct Port { }; struct Session { + using Handle = HorizonHandle; + Handle portHandle; // The port this session is subscribed to Session(Handle portHandle) : portHandle(portHandle) {} }; @@ -109,6 +111,8 @@ enum class ThreadStatus { }; struct Thread { + using Handle = HorizonHandle; + u32 initialSP; // Initial r13 value u32 entrypoint; // Initial r15 value u32 priority; @@ -161,6 +165,8 @@ static const char* kernelObjectTypeToString(KernelObjectType t) { } struct Mutex { + using Handle = HorizonHandle; + u64 waitlist; // Refer to the getWaitlist function below for documentation Handle ownerThread = 0; // Index of the thread that holds the mutex if it's locked Handle handle; // Handle of the mutex itself @@ -203,6 +209,8 @@ struct MemoryBlock { // Generic kernel object class struct KernelObject { + using Handle = HorizonHandle; + Handle handle = 0; // A u32 the OS will use to identify objects void* data = nullptr; KernelObjectType type; diff --git a/include/memory.hpp b/include/memory.hpp index 33ccbae5..2f01aa35 100644 --- a/include/memory.hpp +++ b/include/memory.hpp @@ -102,6 +102,8 @@ namespace KernelMemoryTypes { } class Memory { + using Handle = HorizonHandle; + u8* fcram; u8* dspRam; // Provided to us by Audio u8* vram; // Provided to the memory class by the GPU class @@ -213,8 +215,14 @@ private: } enum class BatteryLevel { - Empty = 0, AlmostEmpty, OneBar, TwoBars, ThreeBars, FourBars + Empty = 0, + AlmostEmpty, + OneBar, + TwoBars, + ThreeBars, + FourBars, }; + u8 getBatteryState(bool adapterConnected, bool charging, BatteryLevel batteryLevel) { u8 value = static_cast(batteryLevel) << 2; // Bits 2:4 are the battery level from 0 to 5 if (adapterConnected) value |= 1 << 0; // Bit 0 shows if the charger is connected diff --git a/include/services/ac.hpp b/include/services/ac.hpp index 4ba53033..56acd436 100644 --- a/include/services/ac.hpp +++ b/include/services/ac.hpp @@ -8,6 +8,8 @@ #include "result/result.hpp" class ACService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::AC; Memory& mem; MAKE_LOG_FUNCTION(log, acLogger) diff --git a/include/services/act.hpp b/include/services/act.hpp index 92c69c60..3fe68993 100644 --- a/include/services/act.hpp +++ b/include/services/act.hpp @@ -6,6 +6,8 @@ #include "result/result.hpp" class ACTService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::ACT; Memory& mem; MAKE_LOG_FUNCTION(log, actLogger) @@ -15,7 +17,7 @@ class ACTService { void generateUUID(u32 messagePointer); void getAccountDataBlock(u32 messagePointer); -public: + public: ACTService(Memory& mem) : mem(mem) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/am.hpp b/include/services/am.hpp index 672909ff..f72a5efc 100644 --- a/include/services/am.hpp +++ b/include/services/am.hpp @@ -6,6 +6,8 @@ #include "result/result.hpp" class AMService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::AM; Memory& mem; MAKE_LOG_FUNCTION(log, amLogger) @@ -15,7 +17,7 @@ class AMService { void getPatchTitleInfo(u32 messagePointer); void listTitleInfo(u32 messagePointer); -public: + public: AMService(Memory& mem) : mem(mem) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/apt.hpp b/include/services/apt.hpp index 48a59c2d..624151c1 100644 --- a/include/services/apt.hpp +++ b/include/services/apt.hpp @@ -12,7 +12,8 @@ class Kernel; enum class ConsoleModel : u32 { - Old3DS, New3DS + Old3DS, + New3DS, }; // https://www.3dbrew.org/wiki/NS_and_APT_Services#Command @@ -41,6 +42,8 @@ namespace APT::Transitions { } class APTService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::APT; Memory& mem; Kernel& kernel; @@ -99,7 +102,7 @@ class APTService { u32 screencapPostPermission; -public: + public: APTService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel), appletManager(mem) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/boss.hpp b/include/services/boss.hpp index 769184e5..edc50dee 100644 --- a/include/services/boss.hpp +++ b/include/services/boss.hpp @@ -6,6 +6,8 @@ #include "result/result.hpp" class BOSSService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::BOSS; Memory& mem; MAKE_LOG_FUNCTION(log, bossLogger) @@ -17,7 +19,7 @@ class BOSSService { void getNewArrivalFlag(u32 messagePointer); void getNsDataIdList(u32 messagePointer, u32 commandWord); void getOptoutFlag(u32 messagePointer); - void getStorageEntryInfo(u32 messagePointer); // Unknown what this is, name taken from Citra + void getStorageEntryInfo(u32 messagePointer); // Unknown what this is, name taken from Citra void getTaskIdList(u32 messagePointer); void getTaskInfo(u32 messagePointer); void getTaskServiceStatus(u32 messagePointer); @@ -35,7 +37,8 @@ class BOSSService { void unregisterTask(u32 messagePointer); s8 optoutFlag; -public: + + public: BOSSService(Memory& mem) : mem(mem) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/cam.hpp b/include/services/cam.hpp index 60ede3b9..e5254997 100644 --- a/include/services/cam.hpp +++ b/include/services/cam.hpp @@ -12,6 +12,7 @@ class Kernel; class CAMService { + using Handle = HorizonHandle; using Event = std::optional; struct Port { diff --git a/include/services/cecd.hpp b/include/services/cecd.hpp index 656e38ad..4612c17b 100644 --- a/include/services/cecd.hpp +++ b/include/services/cecd.hpp @@ -1,5 +1,6 @@ #pragma once #include + #include "helpers.hpp" #include "kernel_types.hpp" #include "logger.hpp" @@ -9,6 +10,8 @@ class Kernel; class CECDService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::CECD; Memory& mem; Kernel& kernel; @@ -20,7 +23,7 @@ class CECDService { void getInfoEventHandle(u32 messagePointer); void openAndRead(u32 messagePointer); -public: + public: CECDService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/cfg.hpp b/include/services/cfg.hpp index 7241a409..e2ddffa8 100644 --- a/include/services/cfg.hpp +++ b/include/services/cfg.hpp @@ -1,5 +1,6 @@ #pragma once #include + #include "helpers.hpp" #include "logger.hpp" #include "memory.hpp" @@ -7,8 +8,10 @@ #include "result/result.hpp" class CFGService { + using Handle = HorizonHandle; + Memory& mem; - CountryCodes country = CountryCodes::US; // Default to USA + CountryCodes country = CountryCodes::US; // Default to USA MAKE_LOG_FUNCTION(log, cfgLogger) void writeStringU16(u32 pointer, const std::u16string& string); @@ -27,12 +30,12 @@ class CFGService { void getConfigInfo(u32 output, u32 blockID, u32 size, u32 permissionMask); -public: + public: enum class Type { - U, // cfg:u - I, // cfg:i - S, // cfg:s - NOR, // cfg:nor + U, // cfg:u + I, // cfg:i + S, // cfg:s + NOR, // cfg:nor }; CFGService(Memory& mem) : mem(mem) {} diff --git a/include/services/csnd.hpp b/include/services/csnd.hpp index 8f6d60f8..93fa941d 100644 --- a/include/services/csnd.hpp +++ b/include/services/csnd.hpp @@ -10,6 +10,8 @@ class Kernel; class CSNDService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::CSND; Memory& mem; Kernel& kernel; @@ -30,7 +32,5 @@ class CSNDService { void reset(); void handleSyncRequest(u32 messagePointer); - void setSharedMemory(u8* ptr) { - sharedMemory = ptr; - } + void setSharedMemory(u8* ptr) { sharedMemory = ptr; } }; \ No newline at end of file diff --git a/include/services/dlp_srvr.hpp b/include/services/dlp_srvr.hpp index 1e714283..ae9cc96f 100644 --- a/include/services/dlp_srvr.hpp +++ b/include/services/dlp_srvr.hpp @@ -8,6 +8,8 @@ // Please forgive me for how everything in this file is named // "dlp:SRVR" is not a nice name to work with class DlpSrvrService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::DLP_SRVR; Memory& mem; MAKE_LOG_FUNCTION(log, dlpSrvrLogger) @@ -15,7 +17,7 @@ class DlpSrvrService { // Service commands void isChild(u32 messagePointer); -public: + public: DlpSrvrService(Memory& mem) : mem(mem) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/dsp.hpp b/include/services/dsp.hpp index 5cbd4fd5..bc1adbca 100644 --- a/include/services/dsp.hpp +++ b/include/services/dsp.hpp @@ -14,6 +14,8 @@ class Kernel; class DSPService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::DSP; Memory& mem; Kernel& kernel; diff --git a/include/services/frd.hpp b/include/services/frd.hpp index b9b3b0fe..914d9251 100644 --- a/include/services/frd.hpp +++ b/include/services/frd.hpp @@ -1,5 +1,6 @@ #pragma once #include + #include "helpers.hpp" #include "kernel_types.hpp" #include "logger.hpp" @@ -15,6 +16,8 @@ struct FriendKey { static_assert(sizeof(FriendKey) == 16); class FRDService { + using Handle = HorizonHandle; + Memory& mem; MAKE_LOG_FUNCTION(log, frdLogger) @@ -51,11 +54,11 @@ class FRDService { }; static_assert(sizeof(Profile) == 8); -public: + public: enum class Type { - A, // frd:a - N, // frd:n - U, // frd:u + A, // frd:a + N, // frd:n + U, // frd:u }; FRDService(Memory& mem) : mem(mem) {} diff --git a/include/services/fs.hpp b/include/services/fs.hpp index 4a613121..3b3b3d44 100644 --- a/include/services/fs.hpp +++ b/include/services/fs.hpp @@ -16,6 +16,8 @@ class Kernel; class FSService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::FS; Memory& mem; Kernel& kernel; @@ -81,7 +83,7 @@ class FSService { // Used for set/get priority: Not sure what sort of priority this is referring to u32 priority; -public: + public: FSService(Memory& mem, Kernel& kernel, const EmulatorConfig& config) : mem(mem), saveData(mem), sharedExtSaveData_nand(mem, "../SharedFiles/NAND", true), extSaveData_sdmc(mem, "SDMC"), sdmc(mem), sdmcWriteOnly(mem, true), selfNcch(mem), ncch(mem), userSaveData1(mem, ArchiveID::UserSaveData1), diff --git a/include/services/gsp_gpu.hpp b/include/services/gsp_gpu.hpp index 0da4fcd0..d7244609 100644 --- a/include/services/gsp_gpu.hpp +++ b/include/services/gsp_gpu.hpp @@ -22,6 +22,8 @@ enum class GPUInterrupt : u8 { class Kernel; class GPUService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::GPU; Memory& mem; GPU& gpu; diff --git a/include/services/gsp_lcd.hpp b/include/services/gsp_lcd.hpp index e7672d4f..f34f59ab 100644 --- a/include/services/gsp_lcd.hpp +++ b/include/services/gsp_lcd.hpp @@ -6,6 +6,8 @@ #include "result/result.hpp" class LCDService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::LCD; Memory& mem; MAKE_LOG_FUNCTION(log, gspLCDLogger) diff --git a/include/services/hid.hpp b/include/services/hid.hpp index d9018a4f..86a55479 100644 --- a/include/services/hid.hpp +++ b/include/services/hid.hpp @@ -38,6 +38,8 @@ namespace HID::Keys { class Kernel; class HIDService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::HID; Memory& mem; Kernel& kernel; diff --git a/include/services/http.hpp b/include/services/http.hpp index 1e7f30c3..8b23fb2d 100644 --- a/include/services/http.hpp +++ b/include/services/http.hpp @@ -5,6 +5,8 @@ #include "memory.hpp" class HTTPService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::HTTP; Memory& mem; MAKE_LOG_FUNCTION(log, httpLogger) diff --git a/include/services/ir_user.hpp b/include/services/ir_user.hpp index 186d9717..d475bdaa 100644 --- a/include/services/ir_user.hpp +++ b/include/services/ir_user.hpp @@ -11,6 +11,8 @@ class Kernel; class IRUserService { + using Handle = HorizonHandle; + enum class DeviceID : u8 { CirclePadPro = 1, }; diff --git a/include/services/ldr_ro.hpp b/include/services/ldr_ro.hpp index 71516547..cf60e036 100644 --- a/include/services/ldr_ro.hpp +++ b/include/services/ldr_ro.hpp @@ -8,6 +8,8 @@ class Kernel; class LDRService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::LDR_RO; Memory& mem; Kernel& kernel; @@ -22,7 +24,7 @@ class LDRService { void loadCRR(u32 messagePointer); void unloadCRO(u32 messagePointer); -public: + public: LDRService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/mcu/mcu_hwc.hpp b/include/services/mcu/mcu_hwc.hpp index 354a0c20..4c6a8830 100644 --- a/include/services/mcu/mcu_hwc.hpp +++ b/include/services/mcu/mcu_hwc.hpp @@ -7,6 +7,8 @@ namespace MCU { class HWCService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::MCU_HWC; Memory& mem; MAKE_LOG_FUNCTION(log, mcuLogger) diff --git a/include/services/mic.hpp b/include/services/mic.hpp index f709c27f..f166c5aa 100644 --- a/include/services/mic.hpp +++ b/include/services/mic.hpp @@ -9,6 +9,8 @@ class Kernel; class MICService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::MIC; Memory& mem; Kernel& kernel; @@ -29,14 +31,14 @@ class MICService { void unmapSharedMem(u32 messagePointer); void theCaptainToadFunction(u32 messagePointer); - u8 gain = 0; // How loud our microphone input signal is + u8 gain = 0; // How loud our microphone input signal is bool micEnabled = false; bool shouldClamp = false; bool currentlySampling = false; std::optional eventHandle; -public: + public: MICService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/ndm.hpp b/include/services/ndm.hpp index 6d4e5ad8..67679403 100644 --- a/include/services/ndm.hpp +++ b/include/services/ndm.hpp @@ -6,7 +6,14 @@ #include "result/result.hpp" class NDMService { - enum class ExclusiveState : u32 { None = 0, Infrastructure = 1, LocalComms = 2, StreetPass = 3, StreetPassData = 4 }; + using Handle = HorizonHandle; + enum class ExclusiveState : u32 { + None = 0, + Infrastructure = 1, + LocalComms = 2, + StreetPass = 3, + StreetPassData = 4, + }; Handle handle = KernelHandles::NDM; Memory& mem; @@ -25,7 +32,7 @@ class NDMService { ExclusiveState exclusiveState = ExclusiveState::None; -public: + public: NDMService(Memory& mem) : mem(mem) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/news_u.hpp b/include/services/news_u.hpp index 61266e9a..15ae0b16 100644 --- a/include/services/news_u.hpp +++ b/include/services/news_u.hpp @@ -5,6 +5,8 @@ #include "memory.hpp" class NewsUService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::NEWS_U; Memory& mem; MAKE_LOG_FUNCTION(log, newsLogger) diff --git a/include/services/nfc.hpp b/include/services/nfc.hpp index 8eea8a41..e242a326 100644 --- a/include/services/nfc.hpp +++ b/include/services/nfc.hpp @@ -12,6 +12,8 @@ class Kernel; class NFCService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::NFC; Memory& mem; Kernel& kernel; diff --git a/include/services/nim.hpp b/include/services/nim.hpp index dfe13694..dbb3bb8b 100644 --- a/include/services/nim.hpp +++ b/include/services/nim.hpp @@ -6,6 +6,8 @@ #include "result/result.hpp" class NIMService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::NIM; Memory& mem; MAKE_LOG_FUNCTION(log, nimLogger) @@ -13,7 +15,7 @@ class NIMService { // Service commands void initialize(u32 messagePointer); -public: + public: NIMService(Memory& mem) : mem(mem) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/nwm_uds.hpp b/include/services/nwm_uds.hpp index bf116bcf..a3b342b8 100644 --- a/include/services/nwm_uds.hpp +++ b/include/services/nwm_uds.hpp @@ -10,6 +10,8 @@ class Kernel; class NwmUdsService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::NWM_UDS; Memory& mem; Kernel& kernel; diff --git a/include/services/ptm.hpp b/include/services/ptm.hpp index f752839b..5b797a1d 100644 --- a/include/services/ptm.hpp +++ b/include/services/ptm.hpp @@ -22,7 +22,7 @@ class PTMService { void getStepHistoryAll(u32 messagePointer); void getTotalStepCount(u32 messagePointer); -public: + public: enum class Type { U, // ptm:u SYSM, // ptm:sysm diff --git a/include/services/service_manager.hpp b/include/services/service_manager.hpp index 6679f98d..4fa1e665 100644 --- a/include/services/service_manager.hpp +++ b/include/services/service_manager.hpp @@ -42,6 +42,8 @@ struct EmulatorConfig; class Kernel; class ServiceManager { + using Handle = HorizonHandle; + std::span regs; Memory& mem; Kernel& kernel; diff --git a/include/services/soc.hpp b/include/services/soc.hpp index 88f0b456..ff334a2c 100644 --- a/include/services/soc.hpp +++ b/include/services/soc.hpp @@ -5,6 +5,8 @@ #include "memory.hpp" class SOCService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::SOC; Memory& mem; MAKE_LOG_FUNCTION(log, socLogger) @@ -14,7 +16,7 @@ class SOCService { // Service commands void initializeSockets(u32 messagePointer); -public: + public: SOCService(Memory& mem) : mem(mem) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/ssl.hpp b/include/services/ssl.hpp index 0282049a..4b45fc81 100644 --- a/include/services/ssl.hpp +++ b/include/services/ssl.hpp @@ -1,17 +1,19 @@ #pragma once +#include + #include "helpers.hpp" #include "kernel_types.hpp" #include "logger.hpp" #include "memory.hpp" -#include - class SSLService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::SSL; Memory& mem; MAKE_LOG_FUNCTION(log, sslLogger) - std::mt19937 rng; // Use a Mersenne Twister for RNG since this service is supposed to have better rng than just rand() + std::mt19937 rng; // Use a Mersenne Twister for RNG since this service is supposed to have better rng than just rand() bool initialized; // Service commands diff --git a/include/services/y2r.hpp b/include/services/y2r.hpp index 4aa96d7b..6afebdb8 100644 --- a/include/services/y2r.hpp +++ b/include/services/y2r.hpp @@ -1,6 +1,7 @@ #pragma once #include #include + #include "helpers.hpp" #include "kernel_types.hpp" #include "logger.hpp" @@ -10,6 +11,8 @@ class Kernel; class Y2RService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::Y2R; Memory& mem; Kernel& kernel; @@ -20,7 +23,7 @@ class Y2RService { enum class BusyStatus : u32 { NotBusy = 0, - Busy = 1 + Busy = 1, }; enum class InputFormat : u32 { @@ -35,7 +38,7 @@ class Y2RService { RGB32 = 0, RGB24 = 1, RGB15 = 2, - RGB565 = 3 + RGB565 = 3, }; // Clockwise rotation @@ -43,12 +46,12 @@ class Y2RService { None = 0, Rotate90 = 1, Rotate180 = 2, - Rotate270 = 3 + Rotate270 = 3, }; enum class BlockAlignment : u32 { - Line = 0, // Output buffer's pixels are arranged linearly. Used when outputting to the framebuffer. - Block8x8 = 1, // Output buffer's pixels are morton swizzled. Used when outputting to a GPU texture. + Line = 0, // Output buffer's pixels are arranged linearly. Used when outputting to the framebuffer. + Block8x8 = 1, // Output buffer's pixels are morton swizzled. Used when outputting to a GPU texture. }; // https://github.com/citra-emu/citra/blob/ac9d72a95ca9a60de8d39484a14aecf489d6d016/src/core/hle/service/cam/y2r_u.cpp#L33 @@ -60,7 +63,7 @@ class Y2RService { {{0x12A, 0x1CA, 0x88, 0x36, 0x21C, -0x1F04, 0x99C, -0x2421}}, // ITU_Rec709_Scaling }}; - CoefficientSet conversionCoefficients; // Current conversion coefficients + CoefficientSet conversionCoefficients; // Current conversion coefficients InputFormat inputFmt; OutputFormat outputFmt; diff --git a/src/core/kernel/address_arbiter.cpp b/src/core/kernel/address_arbiter.cpp index 8c07b423..d15c81b8 100644 --- a/src/core/kernel/address_arbiter.cpp +++ b/src/core/kernel/address_arbiter.cpp @@ -12,7 +12,7 @@ static const char* arbitrationTypeToString(u32 type) { } } -Handle Kernel::makeArbiter() { +HorizonHandle Kernel::makeArbiter() { if (arbiterCount >= appResourceLimits.maxAddressArbiters) { Helpers::panic("Overflowed the number of address arbiters"); } diff --git a/src/core/kernel/events.cpp b/src/core/kernel/events.cpp index 7c0d3047..6d3dfbd7 100644 --- a/src/core/kernel/events.cpp +++ b/src/core/kernel/events.cpp @@ -12,7 +12,7 @@ const char* Kernel::resetTypeToString(u32 type) { } } -Handle Kernel::makeEvent(ResetType resetType, Event::CallbackType callback) { +HorizonHandle Kernel::makeEvent(ResetType resetType, Event::CallbackType callback) { Handle ret = makeObject(KernelObjectType::Event); objects[ret].data = new Event(resetType, callback); return ret; diff --git a/src/core/kernel/kernel.cpp b/src/core/kernel/kernel.cpp index 0d1efc15..d4229b55 100644 --- a/src/core/kernel/kernel.cpp +++ b/src/core/kernel/kernel.cpp @@ -82,7 +82,7 @@ void Kernel::setVersion(u8 major, u8 minor) { mem.kernelVersion = descriptor; // The memory objects needs a copy because you can read the kernel ver from config mem } -Handle Kernel::makeProcess(u32 id) { +HorizonHandle Kernel::makeProcess(u32 id) { const Handle processHandle = makeObject(KernelObjectType::Process); const Handle resourceLimitHandle = makeObject(KernelObjectType::ResourceLimit); diff --git a/src/core/kernel/memory_management.cpp b/src/core/kernel/memory_management.cpp index 0d234be5..aeac6269 100644 --- a/src/core/kernel/memory_management.cpp +++ b/src/core/kernel/memory_management.cpp @@ -154,7 +154,7 @@ void Kernel::mapMemoryBlock() { regs[0] = Result::Success; } -Handle Kernel::makeMemoryBlock(u32 addr, u32 size, u32 myPermission, u32 otherPermission) { +HorizonHandle Kernel::makeMemoryBlock(u32 addr, u32 size, u32 myPermission, u32 otherPermission) { Handle ret = makeObject(KernelObjectType::MemoryBlock); objects[ret].data = new MemoryBlock(addr, size, myPermission, otherPermission); diff --git a/src/core/kernel/ports.cpp b/src/core/kernel/ports.cpp index 6038de44..61ab26e3 100644 --- a/src/core/kernel/ports.cpp +++ b/src/core/kernel/ports.cpp @@ -1,7 +1,7 @@ #include "kernel.hpp" #include -Handle Kernel::makePort(const char* name) { +HorizonHandle Kernel::makePort(const char* name) { Handle ret = makeObject(KernelObjectType::Port); portHandles.push_back(ret); // Push the port handle to our cache of port handles objects[ret].data = new Port(name); @@ -9,7 +9,7 @@ Handle Kernel::makePort(const char* name) { return ret; } -Handle Kernel::makeSession(Handle portHandle) { +HorizonHandle Kernel::makeSession(Handle portHandle) { const auto port = getObject(portHandle, KernelObjectType::Port); if (port == nullptr) [[unlikely]] { Helpers::panic("Trying to make session for non-existent port"); @@ -23,7 +23,7 @@ Handle Kernel::makeSession(Handle portHandle) { // Get the handle of a port based on its name // If there's no such port, return nullopt -std::optional Kernel::getPortHandle(const char* name) { +std::optional Kernel::getPortHandle(const char* name) { for (auto handle : portHandles) { const auto data = objects[handle].getData(); if (std::strncmp(name, data->name, Port::maxNameLen) == 0) { diff --git a/src/core/kernel/threads.cpp b/src/core/kernel/threads.cpp index 3a6201c1..9eb7a197 100644 --- a/src/core/kernel/threads.cpp +++ b/src/core/kernel/threads.cpp @@ -109,7 +109,7 @@ void Kernel::rescheduleThreads() { } // Internal OS function to spawn a thread -Handle Kernel::makeThread(u32 entrypoint, u32 initialSP, u32 priority, ProcessorID id, u32 arg, ThreadStatus status) { +HorizonHandle Kernel::makeThread(u32 entrypoint, u32 initialSP, u32 priority, ProcessorID id, u32 arg, ThreadStatus status) { int index; // Index of the created thread in the threads array if (threadCount < appResourceLimits.maxThreads) [[likely]] { // If we have not yet created over too many threads @@ -161,7 +161,7 @@ Handle Kernel::makeThread(u32 entrypoint, u32 initialSP, u32 priority, Processor return ret; } -Handle Kernel::makeMutex(bool locked) { +HorizonHandle Kernel::makeMutex(bool locked) { Handle ret = makeObject(KernelObjectType::Mutex); objects[ret].data = new Mutex(locked, ret); @@ -201,7 +201,7 @@ void Kernel::releaseMutex(Mutex* moo) { } } -Handle Kernel::makeSemaphore(u32 initialCount, u32 maximumCount) { +HorizonHandle Kernel::makeSemaphore(u32 initialCount, u32 maximumCount) { Handle ret = makeObject(KernelObjectType::Semaphore); objects[ret].data = new Semaphore(initialCount, maximumCount); diff --git a/src/core/kernel/timers.cpp b/src/core/kernel/timers.cpp index 35fc57a4..8cfa4773 100644 --- a/src/core/kernel/timers.cpp +++ b/src/core/kernel/timers.cpp @@ -4,7 +4,7 @@ #include "kernel.hpp" #include "scheduler.hpp" -Handle Kernel::makeTimer(ResetType type) { +HorizonHandle Kernel::makeTimer(ResetType type) { Handle ret = makeObject(KernelObjectType::Timer); objects[ret].data = new Timer(type); diff --git a/src/core/services/fs.cpp b/src/core/services/fs.cpp index 2e102958..e81db6cd 100644 --- a/src/core/services/fs.cpp +++ b/src/core/services/fs.cpp @@ -105,7 +105,7 @@ ArchiveBase* FSService::getArchiveFromID(u32 id, const FSPath& archivePath) { } } -std::optional FSService::openFileHandle(ArchiveBase* archive, const FSPath& path, const FSPath& archivePath, const FilePerms& perms) { +std::optional FSService::openFileHandle(ArchiveBase* archive, const FSPath& path, const FSPath& archivePath, const FilePerms& perms) { FileDescriptor opened = archive->openFile(path, perms); if (opened.has_value()) { // If opened doesn't have a value, we failed to open the file auto handle = kernel.makeObject(KernelObjectType::File); @@ -119,7 +119,7 @@ std::optional FSService::openFileHandle(ArchiveBase* archive, const FSPa } } -Rust::Result FSService::openDirectoryHandle(ArchiveBase* archive, const FSPath& path) { +Rust::Result FSService::openDirectoryHandle(ArchiveBase* archive, const FSPath& path) { Rust::Result opened = archive->openDirectory(path); if (opened.isOk()) { // If opened doesn't have a value, we failed to open the directory auto handle = kernel.makeObject(KernelObjectType::Directory); @@ -132,7 +132,7 @@ Rust::Result FSService::openDirectoryHandle(Archi } } -Rust::Result FSService::openArchiveHandle(u32 archiveID, const FSPath& path) { +Rust::Result FSService::openArchiveHandle(u32 archiveID, const FSPath& path) { ArchiveBase* archive = getArchiveFromID(archiveID, path); if (archive == nullptr) [[unlikely]] { diff --git a/src/core/services/service_manager.cpp b/src/core/services/service_manager.cpp index 2a95b5c9..31e3d702 100644 --- a/src/core/services/service_manager.cpp +++ b/src/core/services/service_manager.cpp @@ -93,7 +93,7 @@ void ServiceManager::registerClient(u32 messagePointer) { } // clang-format off -static std::map serviceMap = { +static std::map serviceMap = { { "ac:u", KernelHandles::AC }, { "act:a", KernelHandles::ACT }, { "act:u", KernelHandles::ACT }, diff --git a/third_party/metal-cpp b/third_party/metal-cpp new file mode 160000 index 00000000..a63bd172 --- /dev/null +++ b/third_party/metal-cpp @@ -0,0 +1 @@ +Subproject commit a63bd172ddcba73a3d87ca32032b66ad41ddb9a6 From c319e595457fb5e25a959c043953419bd26e1f58 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 23 Jul 2024 20:13:14 +0000 Subject: [PATCH 141/251] Resizing window on Qt (#556) * Qt: Add screen resize * Qt: Allocate screen on heap for setCentralWidget * Fix header inclusion order * Switch to std::function for resize callback * rdeepfried --- include/panda_qt/main_window.hpp | 10 +++++++- include/panda_qt/screen.hpp | 16 ++++++++++++- src/panda_qt/main_window.cpp | 41 ++++++++++++++++++++++++-------- src/panda_qt/screen.cpp | 30 +++++++++++++++++++++-- 4 files changed, 83 insertions(+), 14 deletions(-) diff --git a/include/panda_qt/main_window.hpp b/include/panda_qt/main_window.hpp index 831074a2..c99fb4c2 100644 --- a/include/panda_qt/main_window.hpp +++ b/include/panda_qt/main_window.hpp @@ -50,6 +50,7 @@ class MainWindow : public QMainWindow { PressTouchscreen, ReleaseTouchscreen, ReloadUbershader, + SetScreenSize, }; // Tagged union representing our message queue messages @@ -81,6 +82,11 @@ class MainWindow : public QMainWindow { u16 x; u16 y; } touchscreen; + + struct { + u32 width; + u32 height; + } screenSize; }; }; @@ -95,7 +101,7 @@ class MainWindow : public QMainWindow { QMenuBar* menuBar = nullptr; InputMappings keyboardMappings; - ScreenWidget screen; + ScreenWidget* screen; AboutWindow* aboutWindow; ConfigWindow* configWindow; CheatsWindow* cheatsEditor; @@ -141,4 +147,6 @@ class MainWindow : public QMainWindow { void loadLuaScript(const std::string& code); void reloadShader(const std::string& shader); void editCheat(u32 handle, const std::vector& cheat, const std::function& callback); + + void handleScreenResize(u32 width, u32 height); }; diff --git a/include/panda_qt/screen.hpp b/include/panda_qt/screen.hpp index dcff3e90..1ed4966b 100644 --- a/include/panda_qt/screen.hpp +++ b/include/panda_qt/screen.hpp @@ -1,5 +1,6 @@ #pragma once #include +#include #include #include "gl/context.h" @@ -10,15 +11,28 @@ class ScreenWidget : public QWidget { Q_OBJECT public: - ScreenWidget(QWidget* parent = nullptr); + using ResizeCallback = std::function; + + ScreenWidget(ResizeCallback resizeCallback, QWidget* parent = nullptr); + void resizeEvent(QResizeEvent* event) override; + // Called by the emulator thread for resizing the actual GL surface, since the emulator thread owns the GL context + void resizeSurface(u32 width, u32 height); + GL::Context* getGLContext() { return glContext.get(); } // Dimensions of our output surface u32 surfaceWidth = 0; u32 surfaceHeight = 0; + WindowInfo windowInfo; + + // Cached "previous" dimensions, used when resizing our window + u32 previousWidth = 0; + u32 previousHeight = 0; private: std::unique_ptr glContext = nullptr; + ResizeCallback resizeCallback; + bool createGLContext(); qreal devicePixelRatioFromScreen() const; diff --git a/src/panda_qt/main_window.cpp b/src/panda_qt/main_window.cpp index cfa45e85..1f9b8123 100644 --- a/src/panda_qt/main_window.cpp +++ b/src/panda_qt/main_window.cpp @@ -11,13 +11,17 @@ #include "input_mappings.hpp" #include "services/dsp.hpp" -MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent), keyboardMappings(InputMappings::defaultKeyboardMappings()), screen(this) { +MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent), keyboardMappings(InputMappings::defaultKeyboardMappings()) { setWindowTitle("Alber"); // Enable drop events for loading ROMs setAcceptDrops(true); resize(800, 240 * 4); - screen.show(); + // We pass a callback to the screen widget that will be triggered every time we resize the screen + screen = new ScreenWidget([this](u32 width, u32 height) { handleScreenResize(width, height); }, this); + setCentralWidget(screen); + + screen->show(); appRunning = true; // Set our menu bar up @@ -69,7 +73,7 @@ MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent) connect(aboutAction, &QAction::triggered, this, &MainWindow::showAboutMenu); emu = new Emulator(); - emu->setOutputSize(screen.surfaceWidth, screen.surfaceHeight); + emu->setOutputSize(screen->surfaceWidth, screen->surfaceHeight); // Set up misc objects aboutWindow = new AboutWindow(nullptr); @@ -101,7 +105,7 @@ MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent) if (usingGL) { // Make GL context current for this thread, enable VSync - GL::Context* glContext = screen.getGLContext(); + GL::Context* glContext = screen->getGLContext(); glContext->MakeCurrent(); glContext->SetSwapInterval(emu->getConfig().vsyncEnabled ? 1 : 0); @@ -145,13 +149,13 @@ void MainWindow::emuThreadMainLoop() { // Unbind GL context if we're using GL, otherwise some setups seem to be unable to join this thread if (usingGL) { - screen.getGLContext()->DoneCurrent(); + screen->getGLContext()->DoneCurrent(); } } void MainWindow::swapEmuBuffer() { if (usingGL) { - screen.getGLContext()->SwapBuffers(); + screen->getGLContext()->SwapBuffers(); } else { Helpers::panic("[Qt] Don't know how to swap buffers for the current rendering backend :("); } @@ -360,6 +364,15 @@ void MainWindow::dispatchMessage(const EmulatorMessage& message) { emu->getRenderer()->setUbershader(*message.string.str); delete message.string.str; break; + + case MessageType::SetScreenSize: { + const u32 width = message.screenSize.width; + const u32 height = message.screenSize.height; + + emu->setOutputSize(width, height); + screen->resizeSurface(width, height); + break; + } } } @@ -423,13 +436,13 @@ void MainWindow::keyReleaseEvent(QKeyEvent* event) { void MainWindow::mousePressEvent(QMouseEvent* event) { if (event->button() == Qt::MouseButton::LeftButton) { const QPointF clickPos = event->globalPosition(); - const QPointF widgetPos = screen.mapFromGlobal(clickPos); + const QPointF widgetPos = screen->mapFromGlobal(clickPos); // Press is inside the screen area - if (widgetPos.x() >= 0 && widgetPos.x() < screen.width() && widgetPos.y() >= 0 && widgetPos.y() < screen.height()) { + if (widgetPos.x() >= 0 && widgetPos.x() < screen->width() && widgetPos.y() >= 0 && widgetPos.y() < screen->height()) { // Go from widget positions to [0, 400) for x and [0, 480) for y - uint x = (uint)std::round(widgetPos.x() / screen.width() * 400.f); - uint y = (uint)std::round(widgetPos.y() / screen.height() * 480.f); + uint x = (uint)std::round(widgetPos.x() / screen->width() * 400.f); + uint y = (uint)std::round(widgetPos.y() / screen->height() * 480.f); // Check if touch falls in the touch screen area if (y >= 240 && y <= 480 && x >= 40 && x < 40 + 320) { @@ -482,6 +495,14 @@ void MainWindow::editCheat(u32 handle, const std::vector& cheat, const sendMessage(message); } +void MainWindow::handleScreenResize(u32 width, u32 height) { + EmulatorMessage message{.type = MessageType::SetScreenSize}; + message.screenSize.width = width; + message.screenSize.height = height; + + sendMessage(message); +} + void MainWindow::initControllers() { // Make SDL use consistent positional button mapping SDL_SetHint(SDL_HINT_GAMECONTROLLER_USE_BUTTON_LABELS, "0"); diff --git a/src/panda_qt/screen.cpp b/src/panda_qt/screen.cpp index 5a254e79..25ff576c 100644 --- a/src/panda_qt/screen.cpp +++ b/src/panda_qt/screen.cpp @@ -18,7 +18,7 @@ // and https://github.com/melonDS-emu/melonDS/blob/master/src/frontend/qt_sdl/main.cpp #ifdef PANDA3DS_ENABLE_OPENGL -ScreenWidget::ScreenWidget(QWidget* parent) : QWidget(parent) { +ScreenWidget::ScreenWidget(ResizeCallback resizeCallback, QWidget* parent) : QWidget(parent), resizeCallback(resizeCallback) { // Create a native window for use with our graphics API of choice resize(800, 240 * 4); @@ -35,6 +35,30 @@ ScreenWidget::ScreenWidget(QWidget* parent) : QWidget(parent) { } } +void ScreenWidget::resizeEvent(QResizeEvent* event) { + previousWidth = surfaceWidth; + previousHeight = surfaceHeight; + QWidget::resizeEvent(event); + + // Update surfaceWidth/surfaceHeight following the resize + std::optional windowInfo = getWindowInfo(); + if (windowInfo) { + this->windowInfo = *windowInfo; + } + + // This will call take care of calling resizeSurface from the emulator thread + resizeCallback(surfaceWidth, surfaceHeight); +} + +// Note: This will run on the emulator thread, we don't want any Qt calls happening there. +void ScreenWidget::resizeSurface(u32 width, u32 height) { + if (previousWidth != width || previousHeight != height) { + if (glContext) { + glContext->ResizeSurface(width, height); + } + } +} + bool ScreenWidget::createGLContext() { // List of GL context versions we will try. Anything 4.1+ is good static constexpr std::array versionsToTry = { @@ -45,6 +69,8 @@ bool ScreenWidget::createGLContext() { std::optional windowInfo = getWindowInfo(); if (windowInfo.has_value()) { + this->windowInfo = *windowInfo; + glContext = GL::Context::Create(*getWindowInfo(), versionsToTry); glContext->DoneCurrent(); } @@ -110,4 +136,4 @@ std::optional ScreenWidget::getWindowInfo() { return wi; } -#endif \ No newline at end of file +#endif From be75fa43a3b440510292831865339a4c582d1418 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 24 Jul 2024 02:00:45 +0300 Subject: [PATCH 142/251] More shader->GLSL recompiler work --- include/PICA/shader.hpp | 5 + include/PICA/shader_decompiler.hpp | 10 ++ src/core/PICA/shader_decompiler.cpp | 167 +++++++++++++++++++++++++++- 3 files changed, 176 insertions(+), 6 deletions(-) diff --git a/include/PICA/shader.hpp b/include/PICA/shader.hpp index 44ca2a15..68b16de8 100644 --- a/include/PICA/shader.hpp +++ b/include/PICA/shader.hpp @@ -58,6 +58,10 @@ namespace ShaderOpcodes { }; } +namespace PICA::ShaderGen { + class ShaderDecompiler; +}; + // Note: All PICA f24 vec4 registers must have the alignas(16) specifier to make them easier to access in SSE/NEON code in the JIT class PICAShader { using f24 = Floats::f24; @@ -135,6 +139,7 @@ class PICAShader { // Add these as friend classes for the JIT so it has access to all important state friend class ShaderJIT; friend class ShaderEmitter; + friend class PICA::ShaderGen::ShaderDecompiler; vec4f getSource(u32 source); vec4f& getDest(u32 dest); diff --git a/include/PICA/shader_decompiler.hpp b/include/PICA/shader_decompiler.hpp index cbc569ae..1253226f 100644 --- a/include/PICA/shader_decompiler.hpp +++ b/include/PICA/shader_decompiler.hpp @@ -101,6 +101,16 @@ namespace PICA::ShaderGen { void writeAttributes(); + std::string getSource(u32 source, u32 index) const; + std::string getDest(u32 dest) const; + std::string getSwizzlePattern(u32 swizzle) const; + std::string getDestSwizzle(u32 destinationMask) const; + + void setDest(u32 operandDescriptor, const std::string& dest, const std::string& value); + // Returns if the instruction uses the typical register encodings most instructions use + // With some exceptions like MAD/MADI, and the control flow instructions which are completely different + bool usesCommonEncoding(u32 instruction) const; + public: ShaderDecompiler(PICAShader& shader, EmulatorConfig& config, u32 entrypoint, API api, Language language) : shader(shader), entrypoint(entrypoint), config(config), api(api), language(language), decompiledShader("") {} diff --git a/src/core/PICA/shader_decompiler.cpp b/src/core/PICA/shader_decompiler.cpp index 91b07574..bdbef8f3 100644 --- a/src/core/PICA/shader_decompiler.cpp +++ b/src/core/PICA/shader_decompiler.cpp @@ -4,6 +4,8 @@ using namespace PICA; using namespace PICA::ShaderGen; +using namespace Helpers; + using Function = ControlFlow::Function; using ExitMode = Function::ExitMode; @@ -70,11 +72,16 @@ const Function* ShaderDecompiler::findFunction(const AddressRange& range) { void ShaderDecompiler::writeAttributes() { decompiledShader += R"( + layout(location = 0) in vec4 inputs[8]; + layout(std140) uniform PICAShaderUniforms { vec4 uniform_float[96]; uvec4 uniform_int; uint uniform_bool; }; + + vec4 temp_registers[16]; + vec4 dummy_vec = vec4(0.0); )"; decompiledShader += "\n"; @@ -130,24 +137,172 @@ std::string ShaderDecompiler::decompile() { return decompiledShader; } +std::string ShaderDecompiler::getSource(u32 source, [[maybe_unused]] u32 index) const { + if (source < 0x10) { + return "inputs[" + std::to_string(source) + "]"; + } else if (source < 0x20) { + return "temp_registers[" + std::to_string(source - 0x10) + "]"; + } else { + const usize floatIndex = (source - 0x20) & 0x7f; + + if (floatIndex >= 96) [[unlikely]] { + return "dummy_vec"; + } + return "uniform_float[" + std::to_string(floatIndex) + "]"; + } +} + +std::string ShaderDecompiler::getDest(u32 dest) const { + if (dest < 0x10) { + return "output_registers[" + std::to_string(dest) + "]"; + } else if (dest < 0x20) { + return "temp_registers[" + std::to_string(dest - 0x10) + "]"; + } else { + return "dummy_vec"; + } +} + +std::string ShaderDecompiler::getSwizzlePattern(u32 swizzle) const { + static constexpr std::array names = {'x', 'y', 'z', 'w'}; + std::string ret(". "); + + for (int i = 0; i < 4; i++) { + ret[3 - i + 1] = names[swizzle & 0x3]; + swizzle >>= 2; + } + + return ret; +} + +std::string ShaderDecompiler::getDestSwizzle(u32 destinationMask) const { + std::string ret = "."; + + if (destinationMask & 0b1000) { + ret += "x"; + } + + if (destinationMask & 0b100) { + ret += "y"; + } + + if (destinationMask & 0b10) { + ret += "z"; + } + + if (destinationMask & 0b1) { + ret += "w"; + } + + return ret; +} + +void ShaderDecompiler::setDest(u32 operandDescriptor, const std::string& dest, const std::string& value) { + u32 destinationMask = operandDescriptor & 0xF; + + std::string destSwizzle = getDestSwizzle(destinationMask); + // We subtract 1 for the "." character of the swizzle descriptor + u32 writtenLaneCount = destSwizzle.size() - 1; + + // All lanes are masked out, so the operation is a nop. + if (writtenLaneCount == 0) { + return; + } + + decompiledShader += dest + destSwizzle + " = "; + if (writtenLaneCount == 1) { + decompiledShader += "float(" + value + ");\n"; + } else { + decompiledShader += "vec" + std::to_string(writtenLaneCount) + "(" + value + ");\n"; + } +} + void ShaderDecompiler::compileInstruction(u32& pc, bool& finished) { const u32 instruction = shader.loadedShader[pc]; const u32 opcode = instruction >> 26; - switch (opcode) { - case ShaderOpcodes::DP4: decompiledShader += "dp4\n"; break; - case ShaderOpcodes::MOV: decompiledShader += "mov\n"; break; - case ShaderOpcodes::END: finished = true; return; - default: Helpers::warn("GLSL recompiler: Unknown opcode: %X", opcode); break; + if (usesCommonEncoding(instruction)) { + const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f]; + const bool invertSources = (opcode == ShaderOpcodes::SLTI || opcode == ShaderOpcodes::SGEI || opcode == ShaderOpcodes::DPHI); + + // src1 and src2 indexes depend on whether this is one of the inverting instructions or not + const u32 src1Index = invertSources ? getBits<14, 5>(instruction) : getBits<12, 7>(instruction); + const u32 src2Index = invertSources ? getBits<7, 7>(instruction) : getBits<7, 5>(instruction); + + const u32 idx = getBits<19, 2>(instruction); + const u32 destIndex = getBits<21, 5>(instruction); + + const bool negate1 = (getBit<4>(operandDescriptor)) != 0; + const u32 swizzle1 = getBits<5, 8>(operandDescriptor); + const bool negate2 = (getBit<13>(operandDescriptor)) != 0; + const u32 swizzle2 = getBits<14, 8>(operandDescriptor); + + std::string src1 = negate1 ? "-" : ""; + src1 += getSource(src1Index, invertSources ? 0 : idx); + src1 += getSwizzlePattern(swizzle1); + + std::string src2 = negate2 ? "-" : ""; + src2 += getSource(src2Index, invertSources ? idx : 0); + src2 += getSwizzlePattern(swizzle2); + + std::string dest = getDest(destIndex); + + if (idx != 0) { + Helpers::panic("GLSL recompiler: Indexed instruction"); + } + + if (invertSources) { + Helpers::panic("GLSL recompiler: Inverted instruction"); + } + + switch (opcode) { + case ShaderOpcodes::DP4: setDest(operandDescriptor, dest, "vec4(dot(" + src1 + ", " + src2 + "))"); break; + case ShaderOpcodes::MOV: setDest(operandDescriptor, dest, src1); break; + default: Helpers::panic("GLSL recompiler: Unknown common opcode: %X", opcode); break; + } + } else { + switch (opcode) { + case ShaderOpcodes::END: finished = true; return; + default: Helpers::panic("GLSL recompiler: Unknown opcode: %X", opcode); break; + } } pc++; } + +bool ShaderDecompiler::usesCommonEncoding(u32 instruction) const { + const u32 opcode = instruction >> 26; + switch (opcode) { + case ShaderOpcodes::ADD: + case ShaderOpcodes::CMP1: + case ShaderOpcodes::CMP2: + case ShaderOpcodes::MUL: + case ShaderOpcodes::MIN: + case ShaderOpcodes::MAX: + case ShaderOpcodes::FLR: + case ShaderOpcodes::DP3: + case ShaderOpcodes::DP4: + case ShaderOpcodes::DPH: + case ShaderOpcodes::DPHI: + case ShaderOpcodes::LG2: + case ShaderOpcodes::EX2: + case ShaderOpcodes::RCP: + case ShaderOpcodes::RSQ: + case ShaderOpcodes::MOV: + case ShaderOpcodes::MOVA: + case ShaderOpcodes::SLT: + case ShaderOpcodes::SLTI: + case ShaderOpcodes::SGE: + case ShaderOpcodes::SGEI: return true; + + default: return false; + } +} + void ShaderDecompiler::callFunction(const Function& function) { decompiledShader += function.getCallStatement() + ";\n"; } std::string ShaderGen::decompileShader(PICAShader& shader, EmulatorConfig& config, u32 entrypoint, API api, Language language) { ShaderDecompiler decompiler(shader, config, entrypoint, api, language); return decompiler.decompile(); -} \ No newline at end of file +} From 156c3031a24d264f7d6d7653c986d5d80f452bf0 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 24 Jul 2024 16:47:46 +0300 Subject: [PATCH 143/251] More instructions in shader decompiler --- src/core/PICA/shader_decompiler.cpp | 48 ++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/src/core/PICA/shader_decompiler.cpp b/src/core/PICA/shader_decompiler.cpp index bdbef8f3..482aa36c 100644 --- a/src/core/PICA/shader_decompiler.cpp +++ b/src/core/PICA/shader_decompiler.cpp @@ -255,10 +255,56 @@ void ShaderDecompiler::compileInstruction(u32& pc, bool& finished) { } switch (opcode) { - case ShaderOpcodes::DP4: setDest(operandDescriptor, dest, "vec4(dot(" + src1 + ", " + src2 + "))"); break; case ShaderOpcodes::MOV: setDest(operandDescriptor, dest, src1); break; + case ShaderOpcodes::ADD: setDest(operandDescriptor, dest, src1 + " + " + src2); break; + case ShaderOpcodes::MUL: setDest(operandDescriptor, dest, src1 + " * " + src2); break; + case ShaderOpcodes::MAX: setDest(operandDescriptor, dest, "max(" + src1 + ", " + src2 + ")"); break; + case ShaderOpcodes::MIN: setDest(operandDescriptor, dest, "min(" + src1 + ", " + src2 + ")"); break; + + case ShaderOpcodes::DP3: setDest(operandDescriptor, dest, "vec4(dot(" + src1 + ".xyz, " + src2 + ".xyz))"); break; + case ShaderOpcodes::DP4: setDest(operandDescriptor, dest, "vec4(dot(" + src1 + ", " + src2 + "))"); break; + case ShaderOpcodes::RSQ: setDest(operandDescriptor, dest, "vec4(inversesqrt(" + src1 + ".x))"); break; + default: Helpers::panic("GLSL recompiler: Unknown common opcode: %X", opcode); break; } + } else if (opcode >= 0x30 && opcode <= 0x3F) { // MAD and MADI + const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x1f]; + const bool isMADI = getBit<29>(instruction) == 0; // We detect MADI based on bit 29 of the instruction + + // src1 and src2 indexes depend on whether this is one of the inverting instructions or not + const u32 src1Index = getBits<17, 5>(instruction); + const u32 src2Index = isMADI ? getBits<12, 5>(instruction) : getBits<10, 7>(instruction); + const u32 src3Index = isMADI ? getBits<5, 7>(instruction) : getBits<5, 5>(instruction); + const u32 idx = getBits<22, 2>(instruction); + const u32 destIndex = getBits<24, 5>(instruction); + + const bool negate1 = (getBit<4>(operandDescriptor)) != 0; + const u32 swizzle1 = getBits<5, 8>(operandDescriptor); + const bool negate2 = (getBit<13>(operandDescriptor)) != 0; + const u32 swizzle2 = getBits<14, 8>(operandDescriptor); + + const bool negate3 = (getBit<22>(operandDescriptor)) != 0; + const u32 swizzle3 = getBits<23, 8>(operandDescriptor); + + std::string src1 = negate1 ? "-" : ""; + src1 += getSource(src1Index, 0); + src1 += getSwizzlePattern(swizzle1); + + std::string src2 = negate2 ? "-" : ""; + src2 += getSource(src2Index, isMADI ? 0 : idx); + src2 += getSwizzlePattern(swizzle2); + + std::string src3 = negate3 ? "-" : ""; + src3 += getSource(src3Index, isMADI ? idx : 0); + src3 += getSwizzlePattern(swizzle3); + + std::string dest = getDest(destIndex); + + if (idx != 0) { + Helpers::panic("GLSL recompiler: Indexed instruction"); + } + + setDest(operandDescriptor, dest, src1 + " * " + src2 + " + " + src3); } else { switch (opcode) { case ShaderOpcodes::END: finished = true; return; From df5d14e3d8e0b5f618a0c65eefbee7533ad0baf4 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Thu, 25 Jul 2024 03:59:01 +0300 Subject: [PATCH 144/251] Shadergen: Remove unused vertex shader code --- src/core/PICA/shader_gen_glsl.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 9802be90..1db239f9 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -72,11 +72,6 @@ std::string FragmentGenerator::getDefaultVertexShader() { out float gl_ClipDistance[2]; #endif - vec4 abgr8888ToVec4(uint abgr) { - const float scale = 1.0 / 255.0; - return scale * vec4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24)); - } - void main() { gl_Position = a_coords; vec4 colourAbs = abs(a_vertexColour); @@ -677,4 +672,4 @@ void FragmentGenerator::compileFog(std::string& shader, const PICA::FragmentConf shader += "vec2 value = texelFetch(u_tex_luts, ivec2(int(clamped_index), 24), 0).rg;"; // fog LUT is past the light LUTs shader += "float fog_factor = clamp(value.r + value.g * delta, 0.0, 1.0);"; shader += "combinerOutput.rgb = mix(fog_color, combinerOutput.rgb, fog_factor);"; -} \ No newline at end of file +} From 19b69bbdc23a58a97faf88f0fff591b958dffef4 Mon Sep 17 00:00:00 2001 From: Jonian Guveli Date: Thu, 25 Jul 2024 11:04:57 +0300 Subject: [PATCH 145/251] Qt: Stop emuThread on closeEvent --- include/panda_qt/main_window.hpp | 1 + src/panda_qt/main_window.cpp | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/include/panda_qt/main_window.hpp b/include/panda_qt/main_window.hpp index c99fb4c2..ecdbc02e 100644 --- a/include/panda_qt/main_window.hpp +++ b/include/panda_qt/main_window.hpp @@ -139,6 +139,7 @@ class MainWindow : public QMainWindow { MainWindow(QApplication* app, QWidget* parent = nullptr); ~MainWindow(); + void closeEvent(QCloseEvent *event) override; void keyPressEvent(QKeyEvent* event) override; void keyReleaseEvent(QKeyEvent* event) override; void mousePressEvent(QMouseEvent* event) override; diff --git a/src/panda_qt/main_window.cpp b/src/panda_qt/main_window.cpp index 1f9b8123..284e88ea 100644 --- a/src/panda_qt/main_window.cpp +++ b/src/panda_qt/main_window.cpp @@ -204,14 +204,19 @@ void MainWindow::selectLuaFile() { } } -// Cleanup when the main window closes -MainWindow::~MainWindow() { +// Stop emulator thread when the main window closes +void MainWindow::closeEvent(QCloseEvent *event) { appRunning = false; // Set our running atomic to false in order to make the emulator thread stop, and join it if (emuThread.joinable()) { emuThread.join(); } + SDL_Quit(); +} + +// Cleanup when the main window closes +MainWindow::~MainWindow() { delete emu; delete menuBar; delete aboutWindow; From da23ec1a0683e2317a247a2eb3e5577a1d10c0b5 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Thu, 25 Jul 2024 13:40:01 +0300 Subject: [PATCH 146/251] Don't deinit SDL from non-SDL thread --- src/panda_qt/main_window.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/panda_qt/main_window.cpp b/src/panda_qt/main_window.cpp index 284e88ea..65769116 100644 --- a/src/panda_qt/main_window.cpp +++ b/src/panda_qt/main_window.cpp @@ -211,8 +211,6 @@ void MainWindow::closeEvent(QCloseEvent *event) { if (emuThread.joinable()) { emuThread.join(); } - - SDL_Quit(); } // Cleanup when the main window closes @@ -602,4 +600,4 @@ void MainWindow::pollControllers() { } } } -} \ No newline at end of file +} From a0e506affc563ee54fa3c5658e8ccf30389aa574 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Thu, 25 Jul 2024 19:51:29 +0300 Subject: [PATCH 147/251] Share fragment UBO between shadergen programs --- include/renderer_gl/renderer_gl.hpp | 2 +- src/core/renderer_gl/renderer_gl.cpp | 15 +++++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index f5a964a3..42b8bba1 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -69,11 +69,11 @@ class RendererGL final : public Renderer { // The "default" vertex shader to use when using specialized shaders but not PICA vertex shader -> GLSL recompilation // We can compile this once and then link it with all other generated fragment shaders OpenGL::Shader defaultShadergenVs; + GLuint shadergenFragmentUBO; // Cached recompiled fragment shader struct CachedProgram { OpenGL::Program program; - uint uboBinding; }; std::unordered_map shaderCache; diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 8b614d2d..c513a186 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -77,6 +77,11 @@ void RendererGL::initGraphicsContextInternal() { gl.useProgram(displayProgram); glUniform1i(OpenGL::uniformLocation(displayProgram, "u_texture"), 0); // Init sampler object + // Allocate memory for the shadergen fragment uniform UBO + glGenBuffers(1, &shadergenFragmentUBO); + gl.bindUBO(shadergenFragmentUBO); + glBufferData(GL_UNIFORM_BUFFER, sizeof(PICA::FragmentUniforms), nullptr, GL_DYNAMIC_DRAW); + vbo.createFixedSize(sizeof(Vertex) * vertexBufferSize, GL_STREAM_DRAW); gl.bindVBO(vbo); vao.create(); @@ -853,17 +858,12 @@ OpenGL::Program& RendererGL::getSpecializedShader() { glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2); glUniform1i(OpenGL::uniformLocation(program, "u_tex_luts"), 3); - // Allocate memory for the program UBO - glGenBuffers(1, &programEntry.uboBinding); - gl.bindUBO(programEntry.uboBinding); - glBufferData(GL_UNIFORM_BUFFER, sizeof(PICA::FragmentUniforms), nullptr, GL_DYNAMIC_DRAW); - // Set up the binding for our UBO. Sadly we can't specify it in the shader like normal people, // As it's an OpenGL 4.2 feature that MacOS doesn't support... uint uboIndex = glGetUniformBlockIndex(program.handle(), "FragmentUniforms"); glUniformBlockBinding(program.handle(), uboIndex, uboBlockBinding); } - glBindBufferBase(GL_UNIFORM_BUFFER, uboBlockBinding, programEntry.uboBinding); + glBindBufferBase(GL_UNIFORM_BUFFER, uboBlockBinding, shadergenFragmentUBO); // Upload uniform data to our shader's UBO PICA::FragmentUniforms uniforms; @@ -945,7 +945,7 @@ OpenGL::Program& RendererGL::getSpecializedShader() { } } - gl.bindUBO(programEntry.uboBinding); + gl.bindUBO(shadergenFragmentUBO); glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(PICA::FragmentUniforms), &uniforms); return program; @@ -980,7 +980,6 @@ void RendererGL::clearShaderCache() { for (auto& shader : shaderCache) { CachedProgram& cachedProgram = shader.second; cachedProgram.program.free(); - glDeleteBuffers(1, &cachedProgram.uboBinding); } shaderCache.clear(); From 33cb3d9c9fdb29b28b639abe5badd64cd76c4b2e Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Thu, 25 Jul 2024 20:05:23 +0300 Subject: [PATCH 148/251] SDL: Add window resizing --- src/panda_sdl/frontend_sdl.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/panda_sdl/frontend_sdl.cpp b/src/panda_sdl/frontend_sdl.cpp index 0c78eea1..3c7ccc1d 100644 --- a/src/panda_sdl/frontend_sdl.cpp +++ b/src/panda_sdl/frontend_sdl.cpp @@ -35,7 +35,7 @@ FrontendSDL::FrontendSDL() : keyboardMappings(InputMappings::defaultKeyboardMapp SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE); SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, config.rendererType == RendererType::Software ? 3 : 4); SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, config.rendererType == RendererType::Software ? 3 : 1); - window = SDL_CreateWindow("Alber", SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, 400, 480, SDL_WINDOW_OPENGL); + window = SDL_CreateWindow("Alber", SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, 400, 480, SDL_WINDOW_OPENGL | SDL_WINDOW_RESIZABLE); if (window == nullptr) { Helpers::panic("Window creation failed: %s", SDL_GetError()); @@ -55,7 +55,7 @@ FrontendSDL::FrontendSDL() : keyboardMappings(InputMappings::defaultKeyboardMapp #ifdef PANDA3DS_ENABLE_VULKAN if (config.rendererType == RendererType::Vulkan) { - window = SDL_CreateWindow("Alber", SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, 400, 480, SDL_WINDOW_VULKAN); + window = SDL_CreateWindow("Alber", SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, 400, 480, SDL_WINDOW_VULKAN | SDL_WINDOW_RESIZABLE); if (window == nullptr) { Helpers::warn("Window creation failed: %s", SDL_GetError()); @@ -289,6 +289,15 @@ void FrontendSDL::run() { } break; } + + case SDL_WINDOWEVENT: { + auto type = event.window.event; + if (type == SDL_WINDOWEVENT_RESIZED) { + const u32 width = event.window.data1; + const u32 height = event.window.data2; + emu.setOutputSize(width, height); + } + } } } From 32ddc287891cc96355d2506b4455b267bccac3e7 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Thu, 25 Jul 2024 20:18:30 +0300 Subject: [PATCH 149/251] Shadergen: Move fog colour to uniform --- include/PICA/pica_frag_config.hpp | 6 ------ include/PICA/pica_frag_uniforms.hpp | 4 +++- src/core/PICA/shader_gen_glsl.cpp | 7 ++----- src/core/renderer_gl/renderer_gl.cpp | 2 ++ 4 files changed, 7 insertions(+), 12 deletions(-) diff --git a/include/PICA/pica_frag_config.hpp b/include/PICA/pica_frag_config.hpp index 337fd211..5d5f8420 100644 --- a/include/PICA/pica_frag_config.hpp +++ b/include/PICA/pica_frag_config.hpp @@ -35,9 +35,6 @@ namespace PICA { BitField<0, 3, FogMode> mode; BitField<3, 1, u32> flipDepth; - BitField<8, 8, u32> fogColorR; - BitField<16, 8, u32> fogColorG; - BitField<24, 8, u32> fogColorB; }; }; @@ -238,9 +235,6 @@ namespace PICA { if (fogConfig.mode == FogMode::Fog) { fogConfig.flipDepth = Helpers::getBit<16>(regs[InternalRegs::TexEnvUpdateBuffer]); - fogConfig.fogColorR = Helpers::getBits<0, 8>(regs[InternalRegs::FogColor]); - fogConfig.fogColorG = Helpers::getBits<8, 8>(regs[InternalRegs::FogColor]); - fogConfig.fogColorB = Helpers::getBits<16, 8>(regs[InternalRegs::FogColor]); } } }; diff --git a/include/PICA/pica_frag_uniforms.hpp b/include/PICA/pica_frag_uniforms.hpp index 09722d61..781fdcd3 100644 --- a/include/PICA/pica_frag_uniforms.hpp +++ b/include/PICA/pica_frag_uniforms.hpp @@ -34,8 +34,10 @@ namespace PICA { alignas(16) vec4 tevBufferColor; alignas(16) vec4 clipCoords; - // Note: We upload this as a u32 and decode on GPU + // Note: We upload these as a u32 and decode on GPU. + // Particularly the fog colour since fog is really uncommon and it doesn't matter if we decode on GPU. u32 globalAmbientLight; + u32 fogColor; // NOTE: THIS MUST BE LAST so that if lighting is disabled we can potentially omit uploading it LightUniform lightUniforms[8]; }; diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 1db239f9..41b33d88 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -27,6 +27,7 @@ static constexpr const char* uniformDefinition = R"( // Note: We upload this as a u32 and decode on GPU uint globalAmbientLight; + uint inFogColor; LightSource lightSources[8]; }; )"; @@ -656,10 +657,6 @@ void FragmentGenerator::compileFog(std::string& shader, const PICA::FragmentConf return; } - float r = config.fogConfig.fogColorR / 255.0f; - float g = config.fogConfig.fogColorG / 255.0f; - float b = config.fogConfig.fogColorB / 255.0f; - if (config.fogConfig.flipDepth) { shader += "float fog_index = (1.0 - depth) * 128.0;\n"; } else { @@ -668,7 +665,7 @@ void FragmentGenerator::compileFog(std::string& shader, const PICA::FragmentConf shader += "float clamped_index = clamp(floor(fog_index), 0.0, 127.0);"; shader += "float delta = fog_index - clamped_index;"; - shader += "vec3 fog_color = vec3(" + std::to_string(r) + ", " + std::to_string(g) + ", " + std::to_string(b) + ");"; + shader += "vec3 fog_color = (1.0 / 255.0) * vec3(float(inFogColor & 0xffu), float((inFogColor >> 8u) & 0xffu), float((inFogColor >> 16u) & 0xffu));"; shader += "vec2 value = texelFetch(u_tex_luts, ivec2(int(clamped_index), 24), 0).rg;"; // fog LUT is past the light LUTs shader += "float fog_factor = clamp(value.r + value.g * delta, 0.0, 1.0);"; shader += "combinerOutput.rgb = mix(fog_color, combinerOutput.rgb, fog_factor);"; diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index c513a186..f8fc31e7 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -903,6 +903,8 @@ OpenGL::Program& RendererGL::getSpecializedShader() { vec[3] = float((color >> 24) & 0xFF) / 255.0f; } + uniforms.fogColor = regs[PICA::InternalRegs::FogColor]; + // Append lighting uniforms if (fsConfig.lighting.enable) { uniforms.globalAmbientLight = regs[InternalRegs::LightGlobalAmbient]; From 1413cdaebfb3b9be814b5b5b05616677e53bdef3 Mon Sep 17 00:00:00 2001 From: Jonian Guveli Date: Fri, 26 Jul 2024 09:41:48 +0300 Subject: [PATCH 150/251] SDL: Fix mouse coords --- src/panda_sdl/frontend_sdl.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/panda_sdl/frontend_sdl.cpp b/src/panda_sdl/frontend_sdl.cpp index 3c7ccc1d..b503dc42 100644 --- a/src/panda_sdl/frontend_sdl.cpp +++ b/src/panda_sdl/frontend_sdl.cpp @@ -162,8 +162,13 @@ void FrontendSDL::run() { if (emu.romType == ROMType::None) break; if (event.button.button == SDL_BUTTON_LEFT) { - const s32 x = event.button.x; - const s32 y = event.button.y; + // Get current window dimensions + int windowWidth, windowHeight; + SDL_GetWindowSize(window, &windowWidth, &windowHeight); + + // Go from window positions to [0, 400) for x and [0, 480) for y + const s32 x = (s32)std::round(event.button.x * 400.f / windowWidth); + const s32 y = (s32)std::round(event.button.y * 480.f / windowHeight); // Check if touch falls in the touch screen area if (y >= 240 && y <= 480 && x >= 40 && x < 40 + 320) { From 11e7eb7fd6c93a3a57a022573926275a1bc1f3e7 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Fri, 26 Jul 2024 12:05:33 +0300 Subject: [PATCH 151/251] SDL: Fixup touchscreen code --- CMakeLists.txt | 2 +- include/panda_sdl/frontend_sdl.hpp | 2 ++ src/panda_sdl/frontend_sdl.cpp | 12 ++++++------ 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a3fe41dd..448086ba 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -502,7 +502,7 @@ if(NOT BUILD_HYDRA_CORE AND NOT BUILD_LIBRETRO_CORE) ) else() set(FRONTEND_SOURCE_FILES src/panda_sdl/main.cpp src/panda_sdl/frontend_sdl.cpp src/panda_sdl/mappings.cpp) - set(FRONTEND_HEADER_FILES "") + set(FRONTEND_HEADER_FILES "include/panda_sdl/frontend_sdl.hpp") endif() target_link_libraries(Alber PRIVATE AlberCore) diff --git a/include/panda_sdl/frontend_sdl.hpp b/include/panda_sdl/frontend_sdl.hpp index dd6ab6c0..07038962 100644 --- a/include/panda_sdl/frontend_sdl.hpp +++ b/include/panda_sdl/frontend_sdl.hpp @@ -23,6 +23,8 @@ class FrontendSDL { SDL_GameController* gameController = nullptr; InputMappings keyboardMappings; + u32 windowWidth = 400; + u32 windowHeight = 480; int gameControllerID; bool programRunning = true; diff --git a/src/panda_sdl/frontend_sdl.cpp b/src/panda_sdl/frontend_sdl.cpp index b503dc42..b2dc27b7 100644 --- a/src/panda_sdl/frontend_sdl.cpp +++ b/src/panda_sdl/frontend_sdl.cpp @@ -162,9 +162,9 @@ void FrontendSDL::run() { if (emu.romType == ROMType::None) break; if (event.button.button == SDL_BUTTON_LEFT) { - // Get current window dimensions - int windowWidth, windowHeight; - SDL_GetWindowSize(window, &windowWidth, &windowHeight); + if (windowWidth == 0 || windowHeight == 0) [[unlikely]] { + break; + } // Go from window positions to [0, 400) for x and [0, 480) for y const s32 x = (s32)std::round(event.button.x * 400.f / windowWidth); @@ -298,9 +298,9 @@ void FrontendSDL::run() { case SDL_WINDOWEVENT: { auto type = event.window.event; if (type == SDL_WINDOWEVENT_RESIZED) { - const u32 width = event.window.data1; - const u32 height = event.window.data2; - emu.setOutputSize(width, height); + windowWidth = event.window.data1; + windowHeight = event.window.data2; + emu.setOutputSize(windowWidth, windowHeight); } } } From 13bff602571bc9c0876bf48b7b71b113de537bee Mon Sep 17 00:00:00 2001 From: Jonian Guveli Date: Fri, 26 Jul 2024 12:25:45 +0300 Subject: [PATCH 152/251] SDL: Fix mouse coords in gyroscope emulation --- src/panda_sdl/frontend_sdl.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/panda_sdl/frontend_sdl.cpp b/src/panda_sdl/frontend_sdl.cpp index b2dc27b7..77b1f55f 100644 --- a/src/panda_sdl/frontend_sdl.cpp +++ b/src/panda_sdl/frontend_sdl.cpp @@ -247,8 +247,13 @@ void FrontendSDL::run() { // Handle "dragging" across the touchscreen if (hid.isTouchScreenPressed()) { - const s32 x = event.motion.x; - const s32 y = event.motion.y; + if (windowWidth == 0 || windowHeight == 0) [[unlikely]] { + break; + } + + // Go from window positions to [0, 400) for x and [0, 480) for y + const s32 x = (s32)std::round(event.motion.x * 400.f / windowWidth); + const s32 y = (s32)std::round(event.motion.y * 480.f / windowHeight); // Check if touch falls in the touch screen area and register the new touch screen position if (y >= 240 && y <= 480 && x >= 40 && x < 40 + 320) { From f095e6af0bd96ebfd540dd438bfdccc409dd79c5 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Fri, 26 Jul 2024 14:44:11 +0300 Subject: [PATCH 153/251] Shadergen: Move comments outside of emitted source code --- src/core/PICA/shader_gen_glsl.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 41b33d88..60887d56 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -4,6 +4,8 @@ using namespace PICA; using namespace PICA::ShaderGen; +// Note: We upload global ambient and fog colour as u32 and decode on the GPU +// This shouldn't matter much for GPU performance, especially fog since it's relatively rare static constexpr const char* uniformDefinition = R"( struct LightSource { vec3 specular0; @@ -24,8 +26,6 @@ static constexpr const char* uniformDefinition = R"( vec4 constantColors[6]; vec4 tevBufferColor; vec4 clipCoords; - - // Note: We upload this as a u32 and decode on GPU uint globalAmbientLight; uint inFogColor; LightSource lightSources[8]; From d0f13de4c5747c3082a81b9c6c01cd020886c8c1 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Fri, 26 Jul 2024 16:25:38 +0300 Subject: [PATCH 154/251] Fix swapping loaded ELF files --- src/emulator.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/emulator.cpp b/src/emulator.cpp index db6c2e1f..921af08f 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -299,6 +299,11 @@ bool Emulator::load3DSX(const std::filesystem::path& path) { } bool Emulator::loadELF(const std::filesystem::path& path) { + // We can't open a new file with this ifstream if it's associated with a file + if (loadedELF.is_open()) { + loadedELF.close(); + } + loadedELF.open(path, std::ios_base::binary); // Open ROM in binary mode romType = ROMType::ELF; From e557bd29764f914053d3535fafb803a2974497dd Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 28 Jul 2024 19:03:05 +0300 Subject: [PATCH 155/251] Fix HLE__DSP::RecvData --- src/core/audio/hle_core.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/audio/hle_core.cpp b/src/core/audio/hle_core.cpp index 12c8f4c8..1f77974f 100644 --- a/src/core/audio/hle_core.cpp +++ b/src/core/audio/hle_core.cpp @@ -110,7 +110,7 @@ namespace Audio { Helpers::panic("Audio: invalid register in HLE frontend"); } - return dspState == DSPState::On; + return dspState != DSPState::On; } void HLE_DSP::writeProcessPipe(u32 channel, u32 size, u32 buffer) { From 908222f26fbbcdda2ecfc79cdf4c2eee7a823b37 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 28 Jul 2024 19:05:50 +0300 Subject: [PATCH 156/251] HLE DSP: Don't printf on buffer queue dirty --- src/core/audio/hle_core.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/audio/hle_core.cpp b/src/core/audio/hle_core.cpp index 1f77974f..d39bdbbf 100644 --- a/src/core/audio/hle_core.cpp +++ b/src/core/audio/hle_core.cpp @@ -317,7 +317,7 @@ namespace Audio { if (config.bufferQueueDirty) { config.bufferQueueDirty = 0; - printf("Buffer queue dirty for voice %d\n", source.index); + // printf("Buffer queue dirty for voice %d\n", source.index); } config.dirtyRaw = 0; From c7db6fe5dc39b6188e1eb7fbb61f0a0834d410c8 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 29 Jul 2024 19:54:46 +0300 Subject: [PATCH 157/251] FIx DSP region calculation --- include/audio/hle_core.hpp | 2 +- src/core/audio/hle_core.cpp | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/audio/hle_core.hpp b/include/audio/hle_core.hpp index b59dc811..117d9ecb 100644 --- a/include/audio/hle_core.hpp +++ b/include/audio/hle_core.hpp @@ -142,7 +142,7 @@ namespace Audio { } else if (counter1 == 0xffff && counter0 != 0xfffe) { return 0; } else { - return counter0 > counter1 ? 0 : 0; + return counter0 > counter1 ? 0 : 1; } } diff --git a/src/core/audio/hle_core.cpp b/src/core/audio/hle_core.cpp index d39bdbbf..23a99786 100644 --- a/src/core/audio/hle_core.cpp +++ b/src/core/audio/hle_core.cpp @@ -216,6 +216,11 @@ namespace Audio { SharedMemory& read = readRegion(); SharedMemory& write = writeRegion(); + // TODO: Properly implement mixers + // The DSP checks the DSP configuration dirty bits on every frame, applies them, and clears them + read.dspConfiguration.dirtyRaw = 0; + // read.dspConfiguration.dirtyRaw2 = 0; + for (int i = 0; i < sourceCount; i++) { // Update source configuration from the read region of shared memory auto& config = read.sourceConfigurations.config[i]; @@ -401,6 +406,7 @@ namespace Audio { // samples.insert(samples.end(), source.currentSamples.begin(), source.currentSamples.begin() + sampleCount); source.currentSamples.erase(source.currentSamples.begin(), source.currentSamples.begin() + sampleCount); + source.samplePosition += sampleCount; outputCount += sampleCount; } } From 45dd69d62ae144fdac5048cdc118c0d2749c484f Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 29 Jul 2024 21:58:00 +0300 Subject: [PATCH 158/251] HLE DSP: Pop unused samples when loading new buffer --- src/core/audio/hle_core.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/core/audio/hle_core.cpp b/src/core/audio/hle_core.cpp index 23a99786..112db9d5 100644 --- a/src/core/audio/hle_core.cpp +++ b/src/core/audio/hle_core.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -370,6 +371,13 @@ namespace Audio { break; } + // We're skipping the first samplePosition samples, so remove them from the buffer so as not to consume them later + if (source.samplePosition > 0) { + auto start = source.currentSamples.begin(); + auto end = std::next(start, source.samplePosition); + source.currentSamples.erase(start, end); + } + // If the buffer is a looping buffer, re-push it if (buffer.looping) { source.pushBuffer(buffer); From 57ecc18f325f09d629b5cf491b3f7844f927b4da Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 29 Jul 2024 23:03:17 +0300 Subject: [PATCH 159/251] HLE DSP: Implement buffer queue dirty bit --- src/core/audio/hle_core.cpp | 46 +++++++++++++++++++++++++++++++------ 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/src/core/audio/hle_core.cpp b/src/core/audio/hle_core.cpp index 112db9d5..c01a8ccd 100644 --- a/src/core/audio/hle_core.cpp +++ b/src/core/audio/hle_core.cpp @@ -220,7 +220,7 @@ namespace Audio { // TODO: Properly implement mixers // The DSP checks the DSP configuration dirty bits on every frame, applies them, and clears them read.dspConfiguration.dirtyRaw = 0; - // read.dspConfiguration.dirtyRaw2 = 0; + read.dspConfiguration.dirtyRaw2 = 0; for (int i = 0; i < sourceCount; i++) { // Update source configuration from the read region of shared memory @@ -322,8 +322,40 @@ namespace Audio { } if (config.bufferQueueDirty) { - config.bufferQueueDirty = 0; // printf("Buffer queue dirty for voice %d\n", source.index); + + u16 dirtyBuffers = config.buffersDirty; + config.bufferQueueDirty = 0; + config.buffersDirty = 0; + + for (int i = 0; i < 4; i++) { + bool dirty = ((dirtyBuffers >> i) & 1) != 0; + if (dirty) { + const auto& buffer = config.buffers[i]; + + if (s32(buffer.length) >= 0) [[likely]] { + // TODO: Add sample format and channel count + Source::Buffer newBuffer{ + .paddr = buffer.physicalAddress, + .sampleCount = buffer.length, + .adpcmScale = u8(buffer.adpcmScale), + .previousSamples = {s16(buffer.adpcm_yn[0]), s16(buffer.adpcm_yn[1])}, + .adpcmDirty = buffer.adpcmDirty != 0, + .looping = buffer.isLooping != 0, + .bufferID = buffer.bufferID, + .playPosition = 0, + .format = source.sampleFormat, + .sourceType = source.sourceType, + .fromQueue = true, + .hasPlayedOnce = false, + }; + + source.buffers.emplace(std::move(newBuffer)); + } else { + printf("Buffer queue dirty: Invalid buffer size for DSP voice %d\n", source.index); + } + } + } } config.dirtyRaw = 0; @@ -371,17 +403,17 @@ namespace Audio { break; } + // If the buffer is a looping buffer, re-push it + if (buffer.looping) { + source.pushBuffer(buffer); + } + // We're skipping the first samplePosition samples, so remove them from the buffer so as not to consume them later if (source.samplePosition > 0) { auto start = source.currentSamples.begin(); auto end = std::next(start, source.samplePosition); source.currentSamples.erase(start, end); } - - // If the buffer is a looping buffer, re-push it - if (buffer.looping) { - source.pushBuffer(buffer); - } } void HLE_DSP::generateFrame(DSPSource& source) { From 6668ba3e37fe4b2dce6840c818d0ae78524f2242 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 29 Jul 2024 23:46:36 +0300 Subject: [PATCH 160/251] HLE DSP: Fix embedded buffer starting sample position --- src/core/audio/hle_core.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/core/audio/hle_core.cpp b/src/core/audio/hle_core.cpp index c01a8ccd..f150482b 100644 --- a/src/core/audio/hle_core.cpp +++ b/src/core/audio/hle_core.cpp @@ -292,6 +292,9 @@ namespace Audio { } if (config.embeddedBufferDirty) { + // Annoyingly, and only for embedded buffer, whether we use config.playPosition depends on the relevant dirty bit + const u32 playPosition = config.playPositionDirty ? config.playPosition : 0; + config.embeddedBufferDirty = 0; if (s32(config.length) >= 0) [[likely]] { // TODO: Add sample format and channel count @@ -303,7 +306,7 @@ namespace Audio { .adpcmDirty = config.adpcmDirty != 0, .looping = config.isLooping != 0, .bufferID = config.bufferID, - .playPosition = config.playPosition, + .playPosition = playPosition, .format = source.sampleFormat, .sourceType = source.sourceType, .fromQueue = false, From e26f58595eb4667a07ef14fbeb41bc3a73590f58 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 30 Jul 2024 00:36:16 +0300 Subject: [PATCH 161/251] HLE DSP: Reset flags should take priority --- src/core/audio/hle_core.cpp | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/core/audio/hle_core.cpp b/src/core/audio/hle_core.cpp index f150482b..84d62401 100644 --- a/src/core/audio/hle_core.cpp +++ b/src/core/audio/hle_core.cpp @@ -253,6 +253,17 @@ namespace Audio { return; } + // The reset flags take priority, as you can reset a source and set it up to be played again at the same time + if (config.resetFlag) { + config.resetFlag = 0; + source.reset(); + } + + if (config.partialResetFlag) { + config.partialResetFlag = 0; + source.buffers = {}; + } + if (config.enableDirty) { config.enableDirty = 0; source.enabled = config.enable != 0; @@ -272,16 +283,6 @@ namespace Audio { ); } - if (config.resetFlag) { - config.resetFlag = 0; - source.reset(); - } - - if (config.partialResetFlag) { - config.partialResetFlag = 0; - source.buffers = {}; - } - // TODO: Should we check bufferQueueDirty here too? if (config.formatDirty || config.embeddedBufferDirty) { source.sampleFormat = config.format; From f572373fc13468354c4d418faa759fdf711858dd Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 30 Jul 2024 14:29:18 +0300 Subject: [PATCH 162/251] AES: Implement seed crypto --- include/crypto/aes_engine.hpp | 79 +++++++++++++++++------------- src/core/crypto/aes_engine.cpp | 89 +++++++++++++++++++++++++++------- src/core/loader/ncch.cpp | 34 ++++++++++--- src/emulator.cpp | 6 +++ 4 files changed, 151 insertions(+), 57 deletions(-) diff --git a/include/crypto/aes_engine.hpp b/include/crypto/aes_engine.hpp index 324f4adf..f8a2d7e4 100644 --- a/include/crypto/aes_engine.hpp +++ b/include/crypto/aes_engine.hpp @@ -1,20 +1,29 @@ #pragma once #include -#include -#include #include +#include +#include #include #include +#include #include "helpers.hpp" +#include "io_file.hpp" +#include "swap.hpp" namespace Crypto { - constexpr std::size_t AesKeySize = 0x10; + constexpr usize AesKeySize = 0x10; using AESKey = std::array; - template - static std::array rolArray(const std::array& value, std::size_t bits) { + struct Seed { + u64_le titleID; + AESKey seed; + std::array pad; + }; + + template + static std::array rolArray(const std::array& value, usize bits) { const auto bitWidth = N * CHAR_BIT; bits %= bitWidth; @@ -24,18 +33,18 @@ namespace Crypto { std::array result; - for (std::size_t i = 0; i < N; i++) { + for (usize i = 0; i < N; i++) { result[i] = ((value[(i + byteShift) % N] << bitShift) | (value[(i + byteShift + 1) % N] >> (CHAR_BIT - bitShift))) & UINT8_MAX; } return result; } - template + template static std::array addArray(const std::array& a, const std::array& b) { std::array result; - std::size_t sum = 0; - std::size_t carry = 0; + usize sum = 0; + usize carry = 0; for (std::int64_t i = N - 1; i >= 0; i--) { sum = a[i] + b[i] + carry; @@ -46,11 +55,11 @@ namespace Crypto { return result; } - template + template static std::array xorArray(const std::array& a, const std::array& b) { std::array result; - for (std::size_t i = 0; i < N; i++) { + for (usize i = 0; i < N; i++) { result[i] = a[i] ^ b[i]; } @@ -63,7 +72,7 @@ namespace Crypto { } AESKey rawKey; - for (std::size_t i = 0; i < rawKey.size(); i++) { + for (usize i = 0; i < rawKey.size(); i++) { rawKey[i] = static_cast(std::stoi(hex.substr(i * 2, 2), 0, 16)); } @@ -76,7 +85,7 @@ namespace Crypto { std::optional normalKey = std::nullopt; }; - enum KeySlotId : std::size_t { + enum KeySlotId : usize { NCCHKey0 = 0x2C, NCCHKey1 = 0x25, NCCHKey2 = 0x18, @@ -84,14 +93,18 @@ namespace Crypto { }; class AESEngine { - private: - constexpr static std::size_t AesKeySlotCount = 0x40; + private: + constexpr static usize AesKeySlotCount = 0x40; std::optional m_generator = std::nullopt; std::array m_slots; bool keysLoaded = false; - constexpr void updateNormalKey(std::size_t slotId) { + std::vector seeds; + IOFile seedDatabase; + bool seedsLoaded = false; + + constexpr void updateNormalKey(usize slotId) { if (m_generator.has_value() && hasKeyX(slotId) && hasKeyY(slotId)) { auto& keySlot = m_slots.at(slotId); AESKey keyX = keySlot.keyX.value(); @@ -101,13 +114,17 @@ namespace Crypto { } } - public: + public: AESEngine() {} void loadKeys(const std::filesystem::path& path); + void setSeedPath(const std::filesystem::path& path); + // Returns true on success, false on failure + bool loadSeeds(); + bool haveKeys() { return keysLoaded; } bool haveGenerator() { return m_generator.has_value(); } - constexpr bool hasKeyX(std::size_t slotId) { + constexpr bool hasKeyX(usize slotId) { if (slotId >= AesKeySlotCount) { return false; } @@ -115,18 +132,16 @@ namespace Crypto { return m_slots.at(slotId).keyX.has_value(); } - constexpr AESKey getKeyX(std::size_t slotId) { - return m_slots.at(slotId).keyX.value_or(AESKey{}); - } + constexpr AESKey getKeyX(usize slotId) { return m_slots.at(slotId).keyX.value_or(AESKey{}); } - constexpr void setKeyX(std::size_t slotId, const AESKey &key) { + constexpr void setKeyX(usize slotId, const AESKey& key) { if (slotId < AesKeySlotCount) { m_slots.at(slotId).keyX = key; updateNormalKey(slotId); } } - constexpr bool hasKeyY(std::size_t slotId) { + constexpr bool hasKeyY(usize slotId) { if (slotId >= AesKeySlotCount) { return false; } @@ -134,18 +149,16 @@ namespace Crypto { return m_slots.at(slotId).keyY.has_value(); } - constexpr AESKey getKeyY(std::size_t slotId) { - return m_slots.at(slotId).keyY.value_or(AESKey{}); - } + constexpr AESKey getKeyY(usize slotId) { return m_slots.at(slotId).keyY.value_or(AESKey{}); } - constexpr void setKeyY(std::size_t slotId, const AESKey &key) { + constexpr void setKeyY(usize slotId, const AESKey& key) { if (slotId < AesKeySlotCount) { m_slots.at(slotId).keyY = key; updateNormalKey(slotId); } } - constexpr bool hasNormalKey(std::size_t slotId) { + constexpr bool hasNormalKey(usize slotId) { if (slotId >= AesKeySlotCount) { return false; } @@ -153,14 +166,14 @@ namespace Crypto { return m_slots.at(slotId).normalKey.has_value(); } - constexpr AESKey getNormalKey(std::size_t slotId) { - return m_slots.at(slotId).normalKey.value_or(AESKey{}); - } + constexpr AESKey getNormalKey(usize slotId) { return m_slots.at(slotId).normalKey.value_or(AESKey{}); } - constexpr void setNormalKey(std::size_t slotId, const AESKey &key) { + constexpr void setNormalKey(usize slotId, const AESKey& key) { if (slotId < AesKeySlotCount) { m_slots.at(slotId).normalKey = key; } } + + std::optional getSeedFromDB(u64 titleID); }; -} \ No newline at end of file +} // namespace Crypto \ No newline at end of file diff --git a/src/core/crypto/aes_engine.cpp b/src/core/crypto/aes_engine.cpp index f4bf3494..dc3ae060 100644 --- a/src/core/crypto/aes_engine.cpp +++ b/src/core/crypto/aes_engine.cpp @@ -1,13 +1,15 @@ -#include -#include - #include "crypto/aes_engine.hpp" + +#include +#include +#include + #include "helpers.hpp" namespace Crypto { void AESEngine::loadKeys(const std::filesystem::path& path) { std::ifstream file(path, std::ios::in); - + if (file.fail()) { Helpers::warn("Keys: Couldn't read key file: %s", path.c_str()); return; @@ -58,18 +60,10 @@ namespace Crypto { } switch (keyType) { - case 'X': - setKeyX(slotId, key.value()); - break; - case 'Y': - setKeyY(slotId, key.value()); - break; - case 'N': - setNormalKey(slotId, key.value()); - break; - default: - Helpers::warn("Keys: Invalid key type %c", keyType); - break; + case 'X': setKeyX(slotId, key.value()); break; + case 'Y': setKeyY(slotId, key.value()); break; + case 'N': setNormalKey(slotId, key.value()); break; + default: Helpers::warn("Keys: Invalid key type %c", keyType); break; } } @@ -80,4 +74,65 @@ namespace Crypto { keysLoaded = true; } -}; \ No newline at end of file + + void AESEngine::setSeedPath(const std::filesystem::path& path) { seedDatabase.open(path, "rb"); } + + // Loads seeds from a seed file, return true on success and false on failure + bool AESEngine::loadSeeds() { + if (!seedDatabase.isOpen()) { + return false; + } + + // The # of seeds is stored at offset 0 + u32_le seedCount = 0; + + if (!seedDatabase.rewind()) { + return false; + } + + auto [success, size] = seedDatabase.readBytes(&seedCount, sizeof(u32)); + if (!success || size != sizeof(u32)) { + return false; + } + + // Key data starts from offset 16 + if (!seedDatabase.seek(16)) { + return false; + } + + Crypto::Seed seed; + for (uint i = 0; i < seedCount; i++) { + std::tie(success, size) = seedDatabase.readBytes(&seed, sizeof(seed)); + if (!success || size != sizeof(seed)) { + return false; + } + + seeds.push_back(seed); + } + + return true; + } + + std::optional AESEngine::getSeedFromDB(u64 titleID) { + // We don't have a seed db nor any seeds loaded, return nullopt + if (!seedDatabase.isOpen() && seeds.empty()) { + return std::nullopt; + } + + // We have a seed DB but haven't loaded the seeds yet, so load them + if (seedDatabase.isOpen() && seeds.empty()) { + bool success = loadSeeds(); + if (!success) { + return std::nullopt; + } + } + + for (Crypto::Seed& seed : seeds) { + if (seed.titleID == titleID) { + return seed.seed; + } + } + + return std::nullopt; + } +}; // namespace Crypto \ No newline at end of file diff --git a/src/core/loader/ncch.cpp b/src/core/loader/ncch.cpp index a575d4f2..3a7cb1f6 100644 --- a/src/core/loader/ncch.cpp +++ b/src/core/loader/ncch.cpp @@ -1,12 +1,15 @@ +#include "loader/ncch.hpp" + #include #include -#include -#include -#include "loader/lz77.hpp" -#include "loader/ncch.hpp" -#include "memory.hpp" +#include +#include #include +#include + +#include "loader/lz77.hpp" +#include "memory.hpp" bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSInfo &info) { // 0x200 bytes for the NCCH header @@ -70,8 +73,25 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn if (!seedCrypto) { secondaryKeyY = primaryKeyY; } else { - Helpers::warn("Seed crypto is not supported"); - gotCryptoKeys = false; + // In seed crypto mode, the secondary key is computed through a SHA256 hash of the primary key and a title-specific seed, which we fetch + // from seeddb.bin + std::optional seedOptional = aesEngine.getSeedFromDB(programID); + if (seedOptional.has_value()) { + auto seed = *seedOptional; + + CryptoPP::SHA256 shaEngine; + std::array data; + std::array hash; + + std::memcpy(&data[0], primaryKeyY.data(), primaryKeyY.size()); + std::memcpy(&data[16], seed.data(), seed.size()); + shaEngine.CalculateDigest(hash.data(), data.data(), data.size()); + // Note that SHA256 will produce a 256-bit hash, while we only need 128 bits cause this is an AES key + // So the latter 16 bytes of the SHA256 are thrown out. + std::memcpy(secondaryKeyY.data(), hash.data(), secondaryKeyY.size()); + } else { + Helpers::warn("Couldn't find a seed value for this title. Make sure you have a seeddb.bin file alongside your aes_keys.txt"); + } } auto primaryResult = getPrimaryKey(aesEngine, primaryKeyY); diff --git a/src/emulator.cpp b/src/emulator.cpp index 921af08f..e4bfc4af 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -220,6 +220,8 @@ bool Emulator::loadROM(const std::filesystem::path& path) { const std::filesystem::path appDataPath = getAppDataRoot(); const std::filesystem::path dataPath = appDataPath / path.filename().stem(); const std::filesystem::path aesKeysPath = appDataPath / "sysdata" / "aes_keys.txt"; + const std::filesystem::path seedDBPath = appDataPath / "sysdata" / "seeddb.bin"; + IOFile::setAppDataDir(dataPath); // Open the text file containing our AES keys if it exists. We use the std::filesystem::exists overload that takes an error code param to @@ -229,6 +231,10 @@ bool Emulator::loadROM(const std::filesystem::path& path) { aesEngine.loadKeys(aesKeysPath); } + if (std::filesystem::exists(seedDBPath, ec) && !ec) { + aesEngine.setSeedPath(seedDBPath); + } + kernel.initializeFS(); auto extension = path.extension(); bool success; // Tracks if we loaded the ROM successfully From e6c97edb1c41a5bca22aeaa8befe226ba1c511bb Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 30 Jul 2024 14:32:42 +0300 Subject: [PATCH 163/251] AES: Remove unused seedsLoaded variable --- include/crypto/aes_engine.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/crypto/aes_engine.hpp b/include/crypto/aes_engine.hpp index f8a2d7e4..c96b36d3 100644 --- a/include/crypto/aes_engine.hpp +++ b/include/crypto/aes_engine.hpp @@ -102,7 +102,6 @@ namespace Crypto { std::vector seeds; IOFile seedDatabase; - bool seedsLoaded = false; constexpr void updateNormalKey(usize slotId) { if (m_generator.has_value() && hasKeyX(slotId) && hasKeyY(slotId)) { @@ -176,4 +175,4 @@ namespace Crypto { std::optional getSeedFromDB(u64 titleID); }; -} // namespace Crypto \ No newline at end of file +} // namespace Crypto From bec63c43a169204f2f022d535c8d36bdfb7c5565 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 30 Jul 2024 14:36:39 +0300 Subject: [PATCH 164/251] AES: Properly handle missing seeds --- src/core/loader/ncch.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/core/loader/ncch.cpp b/src/core/loader/ncch.cpp index 3a7cb1f6..96d13813 100644 --- a/src/core/loader/ncch.cpp +++ b/src/core/loader/ncch.cpp @@ -91,6 +91,7 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn std::memcpy(secondaryKeyY.data(), hash.data(), secondaryKeyY.size()); } else { Helpers::warn("Couldn't find a seed value for this title. Make sure you have a seeddb.bin file alongside your aes_keys.txt"); + gotCryptoKeys = false; } } From e666afd1a30d80f69df1ea015ed835ac86af1eef Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 31 Jul 2024 02:51:40 +0300 Subject: [PATCH 165/251] DSP HLE: Fix buffer queue metadata --- include/audio/hle_core.hpp | 2 +- src/core/audio/hle_core.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/audio/hle_core.hpp b/include/audio/hle_core.hpp index 117d9ecb..35c1c1b8 100644 --- a/include/audio/hle_core.hpp +++ b/include/audio/hle_core.hpp @@ -142,7 +142,7 @@ namespace Audio { } else if (counter1 == 0xffff && counter0 != 0xfffe) { return 0; } else { - return counter0 > counter1 ? 0 : 1; + return (counter0 > counter1) ? 0 : 1; } } diff --git a/src/core/audio/hle_core.cpp b/src/core/audio/hle_core.cpp index 84d62401..ffab9301 100644 --- a/src/core/audio/hle_core.cpp +++ b/src/core/audio/hle_core.cpp @@ -342,7 +342,7 @@ namespace Audio { Source::Buffer newBuffer{ .paddr = buffer.physicalAddress, .sampleCount = buffer.length, - .adpcmScale = u8(buffer.adpcmScale), + .adpcmScale = u8(buffer.adpcm_ps), .previousSamples = {s16(buffer.adpcm_yn[0]), s16(buffer.adpcm_yn[1])}, .adpcmDirty = buffer.adpcmDirty != 0, .looping = buffer.isLooping != 0, From d1922798c5978cafb992f074f823a051ed0dc419 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 31 Jul 2024 18:22:14 +0300 Subject: [PATCH 166/251] CMake: Disable /GS when using MSVC for user builds --- CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 448086ba..b55e2390 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -55,6 +55,11 @@ if(BUILD_LIBRETRO_CORE) add_compile_definitions(__LIBRETRO__) endif() +if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" AND ENABLE_USER_BUILD) + # Disable stack buffer overflow checks in user builds + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GS-") +endif() + add_library(AlberCore STATIC) include_directories(${PROJECT_SOURCE_DIR}/include/) From 195f3388e9a5f88e18ee2779c71069c60668f544 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Fri, 2 Aug 2024 22:53:51 +0300 Subject: [PATCH 167/251] PICA: Add LITP test + interpreter implementation --- include/PICA/shader.hpp | 3 +- src/core/PICA/shader_interpreter.cpp | 32 ++++ tests/PICA_LITP/Makefile | 255 ++++++++++++++++++++++++++ tests/PICA_LITP/source/main.c | 128 +++++++++++++ tests/PICA_LITP/source/vshader.v.pica | 73 ++++++++ 5 files changed, 490 insertions(+), 1 deletion(-) create mode 100644 tests/PICA_LITP/Makefile create mode 100644 tests/PICA_LITP/source/main.c create mode 100644 tests/PICA_LITP/source/vshader.v.pica diff --git a/include/PICA/shader.hpp b/include/PICA/shader.hpp index 68b16de8..e5f57c72 100644 --- a/include/PICA/shader.hpp +++ b/include/PICA/shader.hpp @@ -23,7 +23,7 @@ namespace ShaderOpcodes { DST = 0x04, EX2 = 0x05, LG2 = 0x06, - LIT = 0x07, + LITP = 0x07, MUL = 0x08, SGE = 0x09, SLT = 0x0A, @@ -161,6 +161,7 @@ class PICAShader { void jmpc(u32 instruction); void jmpu(u32 instruction); void lg2(u32 instruction); + void litp(u32 instruction); void loop(u32 instruction); void mad(u32 instruction); void madi(u32 instruction); diff --git a/src/core/PICA/shader_interpreter.cpp b/src/core/PICA/shader_interpreter.cpp index 003ef97a..a85c7464 100644 --- a/src/core/PICA/shader_interpreter.cpp +++ b/src/core/PICA/shader_interpreter.cpp @@ -74,6 +74,9 @@ void PICAShader::run() { break; } + // Undocumented, implementation based on 3DBrew and hw testing (see tests/PICA_LITP) + case ShaderOpcodes::LITP: [[unlikely]] litp(instruction); break; + default: Helpers::panic("Unimplemented PICA instruction %08X (Opcode = %02X)", instruction, opcode); } @@ -753,4 +756,33 @@ void PICAShader::jmpu(u32 instruction) { if (((boolUniform >> bit) & 1) == test) // Jump if the bool uniform is the value we want pc = dest; +} + +void PICAShader::litp(u32 instruction) { + const u32 operandDescriptor = operandDescriptors[instruction & 0x7f]; + u32 src = getBits<12, 7>(instruction); + const u32 idx = getBits<19, 2>(instruction); + const u32 dest = getBits<21, 5>(instruction); + + src = getIndexedSource(src, idx); + vec4f srcVec = getSourceSwizzled<1>(src, operandDescriptor); + vec4f& destVector = getDest(dest); + + // Compare registers are set based on whether src.x and src.w are >= 0.0 + cmpRegister[0] = (srcVec[0].toFloat32() >= 0.0f); + cmpRegister[1] = (srcVec[3].toFloat32() >= 0.0f); + + vec4f result; + // TODO: Does max here have the same non-IEEE NaN behavior as the max instruction? + result[0] = f24::fromFloat32(std::max(srcVec[0].toFloat32(), 0.0f)); + result[1] = f24::fromFloat32(std::clamp(srcVec[1].toFloat32(), -127.9961f, 127.9961f)); + result[2] = f24::zero(); + result[3] = f24::fromFloat32(std::max(srcVec[3].toFloat32(), 0.0f)); + + u32 componentMask = operandDescriptor & 0xf; + for (int i = 0; i < 4; i++) { + if (componentMask & (1 << i)) { + destVector[3 - i] = result[3 - i]; + } + } } \ No newline at end of file diff --git a/tests/PICA_LITP/Makefile b/tests/PICA_LITP/Makefile new file mode 100644 index 00000000..46a94048 --- /dev/null +++ b/tests/PICA_LITP/Makefile @@ -0,0 +1,255 @@ +#--------------------------------------------------------------------------------- +.SUFFIXES: +#--------------------------------------------------------------------------------- + +ifeq ($(strip $(DEVKITARM)),) +$(error "Please set DEVKITARM in your environment. export DEVKITARM=devkitARM") +endif + +TOPDIR ?= $(CURDIR) +include $(DEVKITARM)/3ds_rules + +#--------------------------------------------------------------------------------- +# TARGET is the name of the output +# BUILD is the directory where object files & intermediate files will be placed +# SOURCES is a list of directories containing source code +# DATA is a list of directories containing data files +# INCLUDES is a list of directories containing header files +# GRAPHICS is a list of directories containing graphics files +# GFXBUILD is the directory where converted graphics files will be placed +# If set to $(BUILD), it will statically link in the converted +# files as if they were data files. +# +# NO_SMDH: if set to anything, no SMDH file is generated. +# ROMFS is the directory which contains the RomFS, relative to the Makefile (Optional) +# APP_TITLE is the name of the app stored in the SMDH file (Optional) +# APP_DESCRIPTION is the description of the app stored in the SMDH file (Optional) +# APP_AUTHOR is the author of the app stored in the SMDH file (Optional) +# ICON is the filename of the icon (.png), relative to the project folder. +# If not set, it attempts to use one of the following (in this order): +# - .png +# - icon.png +# - /default_icon.png +#--------------------------------------------------------------------------------- +TARGET := $(notdir $(CURDIR)) +BUILD := build +SOURCES := source +DATA := data +INCLUDES := include +GRAPHICS := gfx +GFXBUILD := $(BUILD) +#ROMFS := romfs +#GFXBUILD := $(ROMFS)/gfx + +#--------------------------------------------------------------------------------- +# options for code generation +#--------------------------------------------------------------------------------- +ARCH := -march=armv6k -mtune=mpcore -mfloat-abi=hard -mtp=soft + +CFLAGS := -g -Wall -O2 -mword-relocations \ + -ffunction-sections \ + $(ARCH) + +CFLAGS += $(INCLUDE) -D__3DS__ + +CXXFLAGS := $(CFLAGS) -fno-rtti -fno-exceptions -std=gnu++11 + +ASFLAGS := -g $(ARCH) +LDFLAGS = -specs=3dsx.specs -g $(ARCH) -Wl,-Map,$(notdir $*.map) + +LIBS := -lcitro3d -lctru -lm + +#--------------------------------------------------------------------------------- +# list of directories containing libraries, this must be the top level containing +# include and lib +#--------------------------------------------------------------------------------- +LIBDIRS := $(CTRULIB) + + +#--------------------------------------------------------------------------------- +# no real need to edit anything past this point unless you need to add additional +# rules for different file extensions +#--------------------------------------------------------------------------------- +ifneq ($(BUILD),$(notdir $(CURDIR))) +#--------------------------------------------------------------------------------- + +export OUTPUT := $(CURDIR)/$(TARGET) +export TOPDIR := $(CURDIR) + +export VPATH := $(foreach dir,$(SOURCES),$(CURDIR)/$(dir)) \ + $(foreach dir,$(GRAPHICS),$(CURDIR)/$(dir)) \ + $(foreach dir,$(DATA),$(CURDIR)/$(dir)) + +export DEPSDIR := $(CURDIR)/$(BUILD) + +CFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.c))) +CPPFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.cpp))) +SFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.s))) +PICAFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.v.pica))) +SHLISTFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.shlist))) +GFXFILES := $(foreach dir,$(GRAPHICS),$(notdir $(wildcard $(dir)/*.t3s))) +BINFILES := $(foreach dir,$(DATA),$(notdir $(wildcard $(dir)/*.*))) + +#--------------------------------------------------------------------------------- +# use CXX for linking C++ projects, CC for standard C +#--------------------------------------------------------------------------------- +ifeq ($(strip $(CPPFILES)),) +#--------------------------------------------------------------------------------- + export LD := $(CC) +#--------------------------------------------------------------------------------- +else +#--------------------------------------------------------------------------------- + export LD := $(CXX) +#--------------------------------------------------------------------------------- +endif +#--------------------------------------------------------------------------------- + +#--------------------------------------------------------------------------------- +ifeq ($(GFXBUILD),$(BUILD)) +#--------------------------------------------------------------------------------- +export T3XFILES := $(GFXFILES:.t3s=.t3x) +#--------------------------------------------------------------------------------- +else +#--------------------------------------------------------------------------------- +export ROMFS_T3XFILES := $(patsubst %.t3s, $(GFXBUILD)/%.t3x, $(GFXFILES)) +export T3XHFILES := $(patsubst %.t3s, $(BUILD)/%.h, $(GFXFILES)) +#--------------------------------------------------------------------------------- +endif +#--------------------------------------------------------------------------------- + +export OFILES_SOURCES := $(CPPFILES:.cpp=.o) $(CFILES:.c=.o) $(SFILES:.s=.o) + +export OFILES_BIN := $(addsuffix .o,$(BINFILES)) \ + $(PICAFILES:.v.pica=.shbin.o) $(SHLISTFILES:.shlist=.shbin.o) \ + $(addsuffix .o,$(T3XFILES)) + +export OFILES := $(OFILES_BIN) $(OFILES_SOURCES) + +export HFILES := $(PICAFILES:.v.pica=_shbin.h) $(SHLISTFILES:.shlist=_shbin.h) \ + $(addsuffix .h,$(subst .,_,$(BINFILES))) \ + $(GFXFILES:.t3s=.h) + +export INCLUDE := $(foreach dir,$(INCLUDES),-I$(CURDIR)/$(dir)) \ + $(foreach dir,$(LIBDIRS),-I$(dir)/include) \ + -I$(CURDIR)/$(BUILD) + +export LIBPATHS := $(foreach dir,$(LIBDIRS),-L$(dir)/lib) + +export _3DSXDEPS := $(if $(NO_SMDH),,$(OUTPUT).smdh) + +ifeq ($(strip $(ICON)),) + icons := $(wildcard *.png) + ifneq (,$(findstring $(TARGET).png,$(icons))) + export APP_ICON := $(TOPDIR)/$(TARGET).png + else + ifneq (,$(findstring icon.png,$(icons))) + export APP_ICON := $(TOPDIR)/icon.png + endif + endif +else + export APP_ICON := $(TOPDIR)/$(ICON) +endif + +ifeq ($(strip $(NO_SMDH)),) + export _3DSXFLAGS += --smdh=$(CURDIR)/$(TARGET).smdh +endif + +ifneq ($(ROMFS),) + export _3DSXFLAGS += --romfs=$(CURDIR)/$(ROMFS) +endif + +.PHONY: all clean + +#--------------------------------------------------------------------------------- +all: $(BUILD) $(GFXBUILD) $(DEPSDIR) $(ROMFS_T3XFILES) $(T3XHFILES) + @$(MAKE) --no-print-directory -C $(BUILD) -f $(CURDIR)/Makefile + +$(BUILD): + @mkdir -p $@ + +ifneq ($(GFXBUILD),$(BUILD)) +$(GFXBUILD): + @mkdir -p $@ +endif + +ifneq ($(DEPSDIR),$(BUILD)) +$(DEPSDIR): + @mkdir -p $@ +endif + +#--------------------------------------------------------------------------------- +clean: + @echo clean ... + @rm -fr $(BUILD) $(TARGET).3dsx $(OUTPUT).smdh $(TARGET).elf $(GFXBUILD) + +#--------------------------------------------------------------------------------- +$(GFXBUILD)/%.t3x $(BUILD)/%.h : %.t3s +#--------------------------------------------------------------------------------- + @echo $(notdir $<) + @tex3ds -i $< -H $(BUILD)/$*.h -d $(DEPSDIR)/$*.d -o $(GFXBUILD)/$*.t3x + +#--------------------------------------------------------------------------------- +else + +#--------------------------------------------------------------------------------- +# main targets +#--------------------------------------------------------------------------------- +$(OUTPUT).3dsx : $(OUTPUT).elf $(_3DSXDEPS) + +$(OFILES_SOURCES) : $(HFILES) + +$(OUTPUT).elf : $(OFILES) + +#--------------------------------------------------------------------------------- +# you need a rule like this for each extension you use as binary data +#--------------------------------------------------------------------------------- +%.bin.o %_bin.h : %.bin +#--------------------------------------------------------------------------------- + @echo $(notdir $<) + @$(bin2o) + +#--------------------------------------------------------------------------------- +.PRECIOUS : %.t3x +#--------------------------------------------------------------------------------- +%.t3x.o %_t3x.h : %.t3x +#--------------------------------------------------------------------------------- + @echo $(notdir $<) + @$(bin2o) + +#--------------------------------------------------------------------------------- +# rules for assembling GPU shaders +#--------------------------------------------------------------------------------- +define shader-as + $(eval CURBIN := $*.shbin) + $(eval DEPSFILE := $(DEPSDIR)/$*.shbin.d) + echo "$(CURBIN).o: $< $1" > $(DEPSFILE) + echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"_end[];" > `(echo $(CURBIN) | tr . _)`.h + echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"[];" >> `(echo $(CURBIN) | tr . _)`.h + echo "extern const u32" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`_size";" >> `(echo $(CURBIN) | tr . _)`.h + picasso -o $(CURBIN) $1 + bin2s $(CURBIN) | $(AS) -o $*.shbin.o +endef + +%.shbin.o %_shbin.h : %.v.pica %.g.pica + @echo $(notdir $^) + @$(call shader-as,$^) + +%.shbin.o %_shbin.h : %.v.pica + @echo $(notdir $<) + @$(call shader-as,$<) + +%.shbin.o %_shbin.h : %.shlist + @echo $(notdir $<) + @$(call shader-as,$(foreach file,$(shell cat $<),$(dir $<)$(file))) + +#--------------------------------------------------------------------------------- +%.t3x %.h : %.t3s +#--------------------------------------------------------------------------------- + @echo $(notdir $<) + @tex3ds -i $< -H $*.h -d $*.d -o $*.t3x + +-include $(DEPSDIR)/*.d + +#--------------------------------------------------------------------------------------- +endif +#--------------------------------------------------------------------------------------- diff --git a/tests/PICA_LITP/source/main.c b/tests/PICA_LITP/source/main.c new file mode 100644 index 00000000..3c2a3f0c --- /dev/null +++ b/tests/PICA_LITP/source/main.c @@ -0,0 +1,128 @@ +#include <3ds.h> +#include +#include +#include "vshader_shbin.h" + +#define CLEAR_COLOR 0x68B0D8FF + +#define DISPLAY_TRANSFER_FLAGS \ + (GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \ + GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \ + GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO)) + +static DVLB_s* vshader_dvlb; +static shaderProgram_s program; +static int uLoc_projection; +static C3D_Mtx projection; + +static void sceneInit(void) +{ + // Load the vertex shader, create a shader program and bind it + vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size); + shaderProgramInit(&program); + shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]); + C3D_BindProgram(&program); + + // Get the location of the uniforms + uLoc_projection = shaderInstanceGetUniformLocation(program.vertexShader, "projection"); + + // Configure attributes for use with the vertex shader + // Attribute format and element count are ignored in immediate mode + C3D_AttrInfo* attrInfo = C3D_GetAttrInfo(); + AttrInfo_Init(attrInfo); + AttrInfo_AddLoader(attrInfo, 0, GPU_FLOAT, 3); // v0=position + AttrInfo_AddLoader(attrInfo, 1, GPU_FLOAT, 3); // v1=color + + // Compute the projection matrix + Mtx_OrthoTilt(&projection, 0.0, 400.0, 0.0, 240.0, 0.0, 1.0, true); + + // Configure the first fragment shading substage to just pass through the vertex color + // See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight + C3D_TexEnv* env = C3D_GetTexEnv(0); + C3D_TexEnvInit(env); + C3D_TexEnvSrc(env, C3D_Both, GPU_PRIMARY_COLOR, 0, 0); + C3D_TexEnvFunc(env, C3D_Both, GPU_REPLACE); +} + +static void sceneRender(void) +{ + // Update the uniforms + C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projection); + + // Draw the triangle directly + C3D_ImmDrawBegin(GPU_TRIANGLES); + // Triangle 1 + // This vertex has r >= 0 and a >= 0 so the shader should output magenta (cmp.x = cmp.y = 1) + C3D_ImmSendAttrib(200.0f, 200.0f, 0.5f, 0.0f); // v0=position + C3D_ImmSendAttrib(1.0f, 0.0f, 0.0f, 1.0f); // v1=color + + // This vertex only has a >= 0, so the shader should output lime (cmp.x = 0, cmp.y = 1) + C3D_ImmSendAttrib(100.0f, 40.0f, 0.5f, 0.0f); + C3D_ImmSendAttrib(-0.5f, 1.0f, 0.0f, 1.0f); + + // This vertex only has r >= 0, so the shader should output cyan (cmp.x = 1, cmp.y = 0) + C3D_ImmSendAttrib(300.0f, 40.0f, 0.5f, 0.0f); + C3D_ImmSendAttrib(0.5f, 0.0f, 1.0f, -1.0f); + + // Triangle 2 + // The next 3 vertices have r < 0, a < 0, so the output of the shader should be the output of litp with alpha set to 1 (cmp.x = cmp.y = 0) + C3D_ImmSendAttrib(10.0f, 20.0f, 0.5f, 0.0f); + // Output g component should be 64 / 128 = 0.5 + C3D_ImmSendAttrib(-1.0f, 64.0f, 0.0f, -1.0f); + + C3D_ImmSendAttrib(90.0f, 20.0f, 0.5f, 0.0f); + // Output g component should be 128 / 128 = 1.0 + C3D_ImmSendAttrib(-1.0f, 256.0f, 1.0f, -1.0f); + + C3D_ImmSendAttrib(40.0f, 40.0f, 0.5f, 0.0f); + // Output g component should be 0 / 128 = 0 + C3D_ImmSendAttrib(-1.0f, 0.0f, 0.5f, -1.0f); + C3D_ImmDrawEnd(); +} + +static void sceneExit(void) +{ + // Free the shader program + shaderProgramFree(&program); + DVLB_Free(vshader_dvlb); +} + +int main() +{ + // Initialize graphics + gfxInitDefault(); + C3D_Init(C3D_DEFAULT_CMDBUF_SIZE); + + // Initialize the render target + C3D_RenderTarget* target = C3D_RenderTargetCreate(240, 400, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8); + C3D_RenderTargetSetOutput(target, GFX_TOP, GFX_LEFT, DISPLAY_TRANSFER_FLAGS); + + // Initialize the scene + sceneInit(); + + // Main loop + while (aptMainLoop()) + { + hidScanInput(); + + // Respond to user input + u32 kDown = hidKeysDown(); + if (kDown & KEY_START) + break; // break in order to return to hbmenu + + // Render the scene + C3D_FrameBegin(C3D_FRAME_SYNCDRAW); + C3D_RenderTargetClear(target, C3D_CLEAR_ALL, CLEAR_COLOR, 0); + C3D_FrameDrawOn(target); + sceneRender(); + C3D_FrameEnd(0); + } + + // Deinitialize the scene + sceneExit(); + + // Deinitialize graphics + C3D_Fini(); + gfxExit(); + return 0; +} \ No newline at end of file diff --git a/tests/PICA_LITP/source/vshader.v.pica b/tests/PICA_LITP/source/vshader.v.pica new file mode 100644 index 00000000..d745f939 --- /dev/null +++ b/tests/PICA_LITP/source/vshader.v.pica @@ -0,0 +1,73 @@ +; Example PICA200 vertex shader + +; Uniforms +.fvec projection[4] + +; Constants +.constf myconst(0.0, 1.0, -1.0, 0.1) +.constf myconst2(0.3, 0.0, 0.0, 0.0) +.alias zeros myconst.xxxx ; Vector full of zeros +.alias ones myconst.yyyy ; Vector full of ones + +.constf magenta(0.8, 0.192, 0.812, 1.0) +.constf cyan(0.137, 0.949, 0.906, 1.0) +.constf lime(0.286, 0.929, 0.412, 1.0) + +.constf normalize_y(1.0, 1.0/128.0, 1.0, 1.0) + +; Outputs +.out outpos position +.out outclr color + +; Inputs (defined as aliases for convenience) +.alias inpos v0 +.alias inclr v1 + +.bool test + +.proc main + ; Force the w component of inpos to be 1.0 + mov r0.xyz, inpos + mov r0.w, ones + + ; outpos = projectionMatrix * inpos + dp4 outpos.x, projection[0], r0 + dp4 outpos.y, projection[1], r0 + dp4 outpos.z, projection[2], r0 + dp4 outpos.w, projection[3], r0 + + ; Test litp via the output fragment colour + ; r1 = input colour + mov r1, inclr + + ; This should perform the following operation: + ; cmp = (x >= 0, w >= 0) + ; dest = ( max(x, 0), clamp(y, -128, +128 ), 0, max(w, 0) ); + litp r2, r1 + + ifc cmp.x + ifc cmp.y + ; cmp.x = 1, cmp.y = 1, write magenta + mov outclr, magenta + end + .else + ; cmp.x = 1, cmp.y = 0, write cyan + mov outclr, cyan + end + .end + .else + ifc cmp.y + ; cmp.x = 0, cmp.y + mov outclr, lime + end + .end + .end + + ; cmp.x 0, cmp.y = 0, write output of litp to out colour, with y normalized to [-1, 1] + mul r2.xyz, normalize_y, r2 + ; Set alpha to one + mov r2.a, ones.a + + mov outclr, r2 + end +.end \ No newline at end of file From 24c4e02143f2803cfbee7723bb6c480605911d4d Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Fri, 2 Aug 2024 22:59:33 +0300 Subject: [PATCH 168/251] Format litp test --- tests/PICA_LITP/source/main.c | 79 ++++++++++++++++------------------- 1 file changed, 37 insertions(+), 42 deletions(-) diff --git a/tests/PICA_LITP/source/main.c b/tests/PICA_LITP/source/main.c index 3c2a3f0c..9bcab5b9 100644 --- a/tests/PICA_LITP/source/main.c +++ b/tests/PICA_LITP/source/main.c @@ -1,22 +1,22 @@ #include <3ds.h> #include #include + #include "vshader_shbin.h" + #define CLEAR_COLOR 0x68B0D8FF -#define DISPLAY_TRANSFER_FLAGS \ - (GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \ - GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \ - GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO)) +#define DISPLAY_TRANSFER_FLAGS \ + (GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | \ + GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO)) static DVLB_s* vshader_dvlb; static shaderProgram_s program; static int uLoc_projection; static C3D_Mtx projection; -static void sceneInit(void) -{ +static void sceneInit(void) { // Load the vertex shader, create a shader program and bind it vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size); shaderProgramInit(&program); @@ -30,8 +30,8 @@ static void sceneInit(void) // Attribute format and element count are ignored in immediate mode C3D_AttrInfo* attrInfo = C3D_GetAttrInfo(); AttrInfo_Init(attrInfo); - AttrInfo_AddLoader(attrInfo, 0, GPU_FLOAT, 3); // v0=position - AttrInfo_AddLoader(attrInfo, 1, GPU_FLOAT, 3); // v1=color + AttrInfo_AddLoader(attrInfo, 0, GPU_FLOAT, 3); // v0=position + AttrInfo_AddLoader(attrInfo, 1, GPU_FLOAT, 3); // v1=color // Compute the projection matrix Mtx_OrthoTilt(&projection, 0.0, 400.0, 0.0, 240.0, 0.0, 1.0, true); @@ -44,51 +44,48 @@ static void sceneInit(void) C3D_TexEnvFunc(env, C3D_Both, GPU_REPLACE); } -static void sceneRender(void) -{ +static void sceneRender(void) { // Update the uniforms C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projection); // Draw the triangle directly C3D_ImmDrawBegin(GPU_TRIANGLES); - // Triangle 1 - // This vertex has r >= 0 and a >= 0 so the shader should output magenta (cmp.x = cmp.y = 1) - C3D_ImmSendAttrib(200.0f, 200.0f, 0.5f, 0.0f); // v0=position - C3D_ImmSendAttrib(1.0f, 0.0f, 0.0f, 1.0f); // v1=color + // Triangle 1 + // This vertex has r >= 0 and a >= 0 so the shader should output magenta (cmp.x = cmp.y = 1) + C3D_ImmSendAttrib(200.0f, 200.0f, 0.5f, 0.0f); // v0=position + C3D_ImmSendAttrib(1.0f, 0.0f, 0.0f, 1.0f); // v1=color - // This vertex only has a >= 0, so the shader should output lime (cmp.x = 0, cmp.y = 1) - C3D_ImmSendAttrib(100.0f, 40.0f, 0.5f, 0.0f); - C3D_ImmSendAttrib(-0.5f, 1.0f, 0.0f, 1.0f); + // This vertex only has a >= 0, so the shader should output lime (cmp.x = 0, cmp.y = 1) + C3D_ImmSendAttrib(100.0f, 40.0f, 0.5f, 0.0f); + C3D_ImmSendAttrib(-0.5f, 1.0f, 0.0f, 1.0f); - // This vertex only has r >= 0, so the shader should output cyan (cmp.x = 1, cmp.y = 0) - C3D_ImmSendAttrib(300.0f, 40.0f, 0.5f, 0.0f); - C3D_ImmSendAttrib(0.5f, 0.0f, 1.0f, -1.0f); + // This vertex only has r >= 0, so the shader should output cyan (cmp.x = 1, cmp.y = 0) + C3D_ImmSendAttrib(300.0f, 40.0f, 0.5f, 0.0f); + C3D_ImmSendAttrib(0.5f, 0.0f, 1.0f, -1.0f); - // Triangle 2 - // The next 3 vertices have r < 0, a < 0, so the output of the shader should be the output of litp with alpha set to 1 (cmp.x = cmp.y = 0) - C3D_ImmSendAttrib(10.0f, 20.0f, 0.5f, 0.0f); - // Output g component should be 64 / 128 = 0.5 - C3D_ImmSendAttrib(-1.0f, 64.0f, 0.0f, -1.0f); + // Triangle 2 + // The next 3 vertices have r < 0, a < 0, so the output of the shader should be the output of litp with alpha set to 1 (cmp.x = cmp.y = 0) + C3D_ImmSendAttrib(10.0f, 20.0f, 0.5f, 0.0f); + // Output g component should be 64 / 128 = 0.5 + C3D_ImmSendAttrib(-1.0f, 64.0f, 0.0f, -1.0f); - C3D_ImmSendAttrib(90.0f, 20.0f, 0.5f, 0.0f); - // Output g component should be 128 / 128 = 1.0 - C3D_ImmSendAttrib(-1.0f, 256.0f, 1.0f, -1.0f); + C3D_ImmSendAttrib(90.0f, 20.0f, 0.5f, 0.0f); + // Output g component should be 128 / 128 = 1.0 + C3D_ImmSendAttrib(-1.0f, 256.0f, 1.0f, -1.0f); - C3D_ImmSendAttrib(40.0f, 40.0f, 0.5f, 0.0f); - // Output g component should be 0 / 128 = 0 - C3D_ImmSendAttrib(-1.0f, 0.0f, 0.5f, -1.0f); + C3D_ImmSendAttrib(40.0f, 40.0f, 0.5f, 0.0f); + // Output g component should be 0 / 128 = 0 + C3D_ImmSendAttrib(-1.0f, 0.0f, 0.5f, -1.0f); C3D_ImmDrawEnd(); } -static void sceneExit(void) -{ +static void sceneExit(void) { // Free the shader program shaderProgramFree(&program); DVLB_Free(vshader_dvlb); } -int main() -{ +int main() { // Initialize graphics gfxInitDefault(); C3D_Init(C3D_DEFAULT_CMDBUF_SIZE); @@ -101,20 +98,18 @@ int main() sceneInit(); // Main loop - while (aptMainLoop()) - { + while (aptMainLoop()) { hidScanInput(); // Respond to user input u32 kDown = hidKeysDown(); - if (kDown & KEY_START) - break; // break in order to return to hbmenu + if (kDown & KEY_START) break; // break in order to return to hbmenu // Render the scene C3D_FrameBegin(C3D_FRAME_SYNCDRAW); - C3D_RenderTargetClear(target, C3D_CLEAR_ALL, CLEAR_COLOR, 0); - C3D_FrameDrawOn(target); - sceneRender(); + C3D_RenderTargetClear(target, C3D_CLEAR_ALL, CLEAR_COLOR, 0); + C3D_FrameDrawOn(target); + sceneRender(); C3D_FrameEnd(0); } From 68a6d73a1851168211ee1dd2047bb3ce41497c3f Mon Sep 17 00:00:00 2001 From: Jonian Guveli Date: Sat, 3 Aug 2024 10:32:26 +0300 Subject: [PATCH 169/251] Libretro: Add support for cheats --- src/libretro_core.cpp | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/src/libretro_core.cpp b/src/libretro_core.cpp index 3e0436b8..c91460b8 100644 --- a/src/libretro_core.cpp +++ b/src/libretro_core.cpp @@ -1,5 +1,6 @@ #include #include +#include #include @@ -381,5 +382,24 @@ void* retro_get_memory_data(uint id) { return nullptr; } -void retro_cheat_set(uint index, bool enabled, const char* code) {} -void retro_cheat_reset() {} +void retro_cheat_set(uint index, bool enabled, const char* code) { + std::string cheatCode = std::regex_replace(code, std::regex("[^0-9a-fA-F]"), ""); + std::vector bytes; + + for (size_t i = 0; i < cheatCode.size(); i += 2) { + std::string hex = cheatCode.substr(i, 2); + bytes.push_back((u8)std::stoul(hex, nullptr, 16)); + } + + u32 id = emulator->getCheats().addCheat(bytes.data(), bytes.size()); + + if (enabled) { + emulator->getCheats().enableCheat(id); + } else { + emulator->getCheats().disableCheat(id); + } +} + +void retro_cheat_reset() { + emulator->getCheats().reset(); +} From 6e65367e07456b0b71cb1bfe945fd6c9a0e2cc3b Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sat, 3 Aug 2024 12:52:52 +0300 Subject: [PATCH 170/251] size_t -> usize --- src/libretro_core.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libretro_core.cpp b/src/libretro_core.cpp index c91460b8..b099067f 100644 --- a/src/libretro_core.cpp +++ b/src/libretro_core.cpp @@ -386,7 +386,7 @@ void retro_cheat_set(uint index, bool enabled, const char* code) { std::string cheatCode = std::regex_replace(code, std::regex("[^0-9a-fA-F]"), ""); std::vector bytes; - for (size_t i = 0; i < cheatCode.size(); i += 2) { + for (usize i = 0; i < cheatCode.size(); i += 2) { std::string hex = cheatCode.substr(i, 2); bytes.push_back((u8)std::stoul(hex, nullptr, 16)); } From 85bae2e94eadb187ddbfcf5fe2005ac7bcd3bd5e Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 4 Aug 2024 16:46:43 +0300 Subject: [PATCH 171/251] HLE DSP: Handle cycle drifting --- include/audio/dsp_core.hpp | 2 +- include/audio/hle_core.hpp | 4 +++- include/audio/null_core.hpp | 2 +- include/audio/teakra_core.hpp | 2 +- src/core/audio/hle_core.cpp | 21 ++++++++++++++------- src/core/audio/null_core.cpp | 2 +- src/emulator.cpp | 2 +- 7 files changed, 22 insertions(+), 13 deletions(-) diff --git a/include/audio/dsp_core.hpp b/include/audio/dsp_core.hpp index a4fb1ab1..5addfd19 100644 --- a/include/audio/dsp_core.hpp +++ b/include/audio/dsp_core.hpp @@ -43,7 +43,7 @@ namespace Audio { virtual ~DSPCore() {} virtual void reset() = 0; - virtual void runAudioFrame() = 0; + virtual void runAudioFrame(u64 eventTimestamp) = 0; virtual u8* getDspMemory() = 0; virtual u16 recvData(u32 regId) = 0; diff --git a/include/audio/hle_core.hpp b/include/audio/hle_core.hpp index 35c1c1b8..c0e0896f 100644 --- a/include/audio/hle_core.hpp +++ b/include/audio/hle_core.hpp @@ -42,6 +42,7 @@ namespace Audio { return this->bufferID > other.bufferID; } }; + // Buffer of decoded PCM16 samples. TODO: Are there better alternatives to use over deque? using SampleBuffer = std::deque>; @@ -53,6 +54,7 @@ namespace Audio { std::array gain0, gain1, gain2; u32 samplePosition; // Sample number into the current audio buffer + float rateMultiplier; u16 syncCount; u16 currentBufferID; u16 previousBufferID; @@ -185,7 +187,7 @@ namespace Audio { ~HLE_DSP() override {} void reset() override; - void runAudioFrame() override; + void runAudioFrame(u64 eventTimestamp) override; u8* getDspMemory() override { return dspRam.rawMemory.data(); } diff --git a/include/audio/null_core.hpp b/include/audio/null_core.hpp index 7d6f1c9e..bedec8d3 100644 --- a/include/audio/null_core.hpp +++ b/include/audio/null_core.hpp @@ -27,7 +27,7 @@ namespace Audio { ~NullDSP() override {} void reset() override; - void runAudioFrame() override; + void runAudioFrame(u64 eventTimestamp) override; u8* getDspMemory() override { return dspRam.data(); } diff --git a/include/audio/teakra_core.hpp b/include/audio/teakra_core.hpp index 6a011231..17104985 100644 --- a/include/audio/teakra_core.hpp +++ b/include/audio/teakra_core.hpp @@ -83,7 +83,7 @@ namespace Audio { void reset() override; // Run 1 slice of DSP instructions and schedule the next audio frame - void runAudioFrame() override { + void runAudioFrame(u64 eventTimestamp) override { runSlice(); scheduler.addEvent(Scheduler::EventType::RunDSP, scheduler.currentTimestamp + Audio::lleSlice * 2); } diff --git a/src/core/audio/hle_core.cpp b/src/core/audio/hle_core.cpp index ffab9301..d1297ad8 100644 --- a/src/core/audio/hle_core.cpp +++ b/src/core/audio/hle_core.cpp @@ -95,7 +95,7 @@ namespace Audio { scheduler.removeEvent(Scheduler::EventType::RunDSP); } - void HLE_DSP::runAudioFrame() { + void HLE_DSP::runAudioFrame(u64 eventTimestamp) { // Signal audio pipe when an audio frame is done if (dspState == DSPState::On) [[likely]] { dspService.triggerPipeEvent(DSPPipeType::Audio); @@ -103,7 +103,10 @@ namespace Audio { // TODO: Should this be called if dspState != DSPState::On? outputFrame(); - scheduler.addEvent(Scheduler::EventType::RunDSP, scheduler.currentTimestamp + Audio::cyclesPerFrame); + + // How many cycles we were late + const u64 cycleDrift = scheduler.currentTimestamp - eventTimestamp; + scheduler.addEvent(Scheduler::EventType::RunDSP, scheduler.currentTimestamp + Audio::cyclesPerFrame - cycleDrift); } u16 HLE_DSP::recvData(u32 regId) { @@ -237,10 +240,9 @@ namespace Audio { auto& status = write.sourceStatuses.status[i]; status.enabled = source.enabled; status.syncCount = source.syncCount; - status.currentBufferIDDirty = source.isBufferIDDirty ? 1 : 0; + status.currentBufferIDDirty = (source.isBufferIDDirty ? 1 : 0); status.currentBufferID = source.currentBufferID; status.previousBufferID = source.previousBufferID; - // TODO: Properly update sample position status.samplePosition = source.samplePosition; source.isBufferIDDirty = false; @@ -292,6 +294,10 @@ namespace Audio { source.sourceType = config.monoOrStereo; } + if (config.rateMultiplierDirty) { + source.rateMultiplier = (config.rateMultiplier > 0.f) ? config.rateMultiplier : 1.f; + } + if (config.embeddedBufferDirty) { // Annoyingly, and only for embedded buffer, whether we use config.playPosition depends on the relevant dirty bit const u32 playPosition = config.playPositionDirty ? config.playPosition : 0; @@ -434,7 +440,7 @@ namespace Audio { decodeBuffer(source); } else { - constexpr uint maxSampleCount = Audio::samplesInFrame; + uint maxSampleCount = uint(float(Audio::samplesInFrame) * source.rateMultiplier); uint outputCount = 0; while (outputCount < maxSampleCount) { @@ -447,9 +453,9 @@ namespace Audio { } const uint sampleCount = std::min(maxSampleCount - outputCount, source.currentSamples.size()); - // samples.insert(samples.end(), source.currentSamples.begin(), source.currentSamples.begin() + sampleCount); - source.currentSamples.erase(source.currentSamples.begin(), source.currentSamples.begin() + sampleCount); + // samples.insert(samples.end(), source.currentSamples.begin(), source.currentSamples.begin() + sampleCount); + source.currentSamples.erase(source.currentSamples.begin(), std::next(source.currentSamples.begin(), sampleCount)); source.samplePosition += sampleCount; outputCount += sampleCount; } @@ -618,6 +624,7 @@ namespace Audio { previousBufferID = 0; currentBufferID = 0; syncCount = 0; + rateMultiplier = 1.f; buffers = {}; } diff --git a/src/core/audio/null_core.cpp b/src/core/audio/null_core.cpp index ec073ae7..93c746cb 100644 --- a/src/core/audio/null_core.cpp +++ b/src/core/audio/null_core.cpp @@ -74,7 +74,7 @@ namespace Audio { scheduler.removeEvent(Scheduler::EventType::RunDSP); } - void NullDSP::runAudioFrame() { + void NullDSP::runAudioFrame(u64 eventTimestamp) { // Signal audio pipe when an audio frame is done if (dspState == DSPState::On) [[likely]] { dspService.triggerPipeEvent(DSPPipeType::Audio); diff --git a/src/emulator.cpp b/src/emulator.cpp index 921af08f..8ce71e43 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -167,7 +167,7 @@ void Emulator::pollScheduler() { case Scheduler::EventType::UpdateTimers: kernel.pollTimers(); break; case Scheduler::EventType::RunDSP: { - dsp->runAudioFrame(); + dsp->runAudioFrame(time); break; } From 860eacc7e6b94da8f2d977880d4e99cf5bd97d96 Mon Sep 17 00:00:00 2001 From: Paris Oplopoios Date: Thu, 8 Aug 2024 17:29:44 +0300 Subject: [PATCH 172/251] Add createFromBinary function (#573) * Add createFromBinary function * Indentation --------- Co-authored-by: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> --- third_party/opengl/opengl.hpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/third_party/opengl/opengl.hpp b/third_party/opengl/opengl.hpp index 4a08650a..607815fa 100644 --- a/third_party/opengl/opengl.hpp +++ b/third_party/opengl/opengl.hpp @@ -432,6 +432,25 @@ namespace OpenGL { return m_handle != 0; } + bool createFromBinary(const uint8_t* binary, size_t size, GLenum format) { + m_handle = glCreateProgram(); + glProgramBinary(m_handle, format, binary, size); + + GLint success; + glGetProgramiv(m_handle, GL_LINK_STATUS, &success); + + if (!success) { + char buf[4096]; + glGetProgramInfoLog(m_handle, 4096, nullptr, buf); + fprintf(stderr, "Failed to link program\nError: %s\n", buf); + glDeleteProgram(m_handle); + + m_handle = 0; + } + + return m_handle != 0; + } + GLuint handle() const { return m_handle; } bool exists() const { return m_handle != 0; } void use() const { glUseProgram(m_handle); } From 88e0782f71e7fbf5544dbf899e7b0315012a38df Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 14 Aug 2024 20:13:38 +0300 Subject: [PATCH 173/251] HLE DSP: Fix source resetting --- src/core/audio/hle_core.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/core/audio/hle_core.cpp b/src/core/audio/hle_core.cpp index d1297ad8..83271a43 100644 --- a/src/core/audio/hle_core.cpp +++ b/src/core/audio/hle_core.cpp @@ -627,5 +627,6 @@ namespace Audio { rateMultiplier = 1.f; buffers = {}; + currentSamples.clear(); } } // namespace Audio From 17b9699c24d89acec9af6b33f11498d280d4f42e Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 14 Aug 2024 17:58:26 +0000 Subject: [PATCH 174/251] Workaround MacOS runner image breaking again --- .github/workflows/MacOS_Build.yml | 2 +- .github/workflows/Qt_Build.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/MacOS_Build.yml b/.github/workflows/MacOS_Build.yml index f6fafde9..912c8568 100644 --- a/.github/workflows/MacOS_Build.yml +++ b/.github/workflows/MacOS_Build.yml @@ -40,7 +40,7 @@ jobs: run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} - name: Install bundle dependencies - run: brew install dylibbundler imagemagick + run: brew install --overwrite python@3.12 && brew install dylibbundler imagemagick - name: Run bundle script run: ./.github/mac-bundle.sh diff --git a/.github/workflows/Qt_Build.yml b/.github/workflows/Qt_Build.yml index 4d5c8b57..40141fb1 100644 --- a/.github/workflows/Qt_Build.yml +++ b/.github/workflows/Qt_Build.yml @@ -67,7 +67,7 @@ jobs: - name: Install bundle dependencies run: | - brew install dylibbundler imagemagick + brew install --overwrite python@3.12 && brew install dylibbundler imagemagick - name: Install qt run: brew install qt && which macdeployqt From d208c24c0cf88ce6c1ffb4266edb365116a6cbf3 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 14 Aug 2024 22:35:02 +0300 Subject: [PATCH 175/251] Implement controller gyroscope in SDL --- CMakeLists.txt | 1 + include/panda_sdl/frontend_sdl.hpp | 2 ++ include/sdl_gyro.hpp | 20 ++++++++++++++++++++ include/services/hid.hpp | 2 ++ src/core/services/hid.cpp | 1 - src/panda_sdl/frontend_sdl.cpp | 29 +++++++++++++++++++++++++++++ 6 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 include/sdl_gyro.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index b55e2390..2865a3f8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -260,6 +260,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/audio/miniaudio_device.hpp include/ring_buffer.hpp include/bitfield.hpp include/audio/dsp_shared_mem.hpp include/audio/hle_core.hpp include/capstone.hpp include/audio/aac.hpp include/PICA/pica_frag_config.hpp include/PICA/pica_frag_uniforms.hpp include/PICA/shader_gen_types.hpp include/PICA/shader_decompiler.hpp + include/sdl_gyro.hpp ) cmrc_add_resource_library( diff --git a/include/panda_sdl/frontend_sdl.hpp b/include/panda_sdl/frontend_sdl.hpp index 07038962..cbd0b88e 100644 --- a/include/panda_sdl/frontend_sdl.hpp +++ b/include/panda_sdl/frontend_sdl.hpp @@ -37,4 +37,6 @@ class FrontendSDL { // And so the user can still use the keyboard to control the analog bool keyboardAnalogX = false; bool keyboardAnalogY = false; + + void setupControllerSensors(SDL_GameController* controller); }; \ No newline at end of file diff --git a/include/sdl_gyro.hpp b/include/sdl_gyro.hpp new file mode 100644 index 00000000..17faab94 --- /dev/null +++ b/include/sdl_gyro.hpp @@ -0,0 +1,20 @@ +#pragma once + +#include +#include + +#include "services/hid.hpp" + +namespace Gyro::SDL { + // Convert the rotation data we get from SDL sensor events to rotation data we can feed right to HID + // Returns [pitch, roll, yaw] + static glm::vec3 convertRotation(glm::vec3 rotation) { + // Flip axes + glm::vec3 ret = -rotation; + // Convert from radians/s to deg/s and scale by the gyroscope coefficient from the HID service + ret *= 180.f / std::numbers::pi; + ret *= HIDService::gyroscopeCoeff; + + return ret; + } +} // namespace Gyro::SDL \ No newline at end of file diff --git a/include/services/hid.hpp b/include/services/hid.hpp index 86a55479..bce2cc1b 100644 --- a/include/services/hid.hpp +++ b/include/services/hid.hpp @@ -88,6 +88,8 @@ class HIDService { } public: + static constexpr float gyroscopeCoeff = 14.375f; // Same as retail 3DS + HIDService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/src/core/services/hid.cpp b/src/core/services/hid.cpp index ef6cbb41..aa13096c 100644 --- a/src/core/services/hid.cpp +++ b/src/core/services/hid.cpp @@ -103,7 +103,6 @@ void HIDService::getGyroscopeLowCalibrateParam(u32 messagePointer) { void HIDService::getGyroscopeCoefficient(u32 messagePointer) { log("HID::GetGyroscopeLowRawToDpsCoefficient\n"); - constexpr float gyroscopeCoeff = 14.375f; // Same as retail 3DS mem.write32(messagePointer, IPC::responseHeader(0x15, 2, 0)); mem.write32(messagePointer + 4, Result::Success); mem.write32(messagePointer + 8, Helpers::bit_cast(gyroscopeCoeff)); diff --git a/src/panda_sdl/frontend_sdl.cpp b/src/panda_sdl/frontend_sdl.cpp index 77b1f55f..703fb1c7 100644 --- a/src/panda_sdl/frontend_sdl.cpp +++ b/src/panda_sdl/frontend_sdl.cpp @@ -2,6 +2,8 @@ #include +#include "sdl_gyro.hpp" + FrontendSDL::FrontendSDL() : keyboardMappings(InputMappings::defaultKeyboardMappings()) { if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_EVENTS) < 0) { Helpers::panic("Failed to initialize SDL2"); @@ -20,6 +22,8 @@ FrontendSDL::FrontendSDL() : keyboardMappings(InputMappings::defaultKeyboardMapp SDL_Joystick* stick = SDL_GameControllerGetJoystick(gameController); gameControllerID = SDL_JoystickInstanceID(stick); } + + setupControllerSensors(gameController); } const EmulatorConfig& config = emu.getConfig(); @@ -200,6 +204,8 @@ void FrontendSDL::run() { if (gameController == nullptr) { gameController = SDL_GameControllerOpen(event.cdevice.which); gameControllerID = event.cdevice.which; + + setupControllerSensors(gameController); } break; @@ -280,6 +286,21 @@ void FrontendSDL::run() { } break; } + + case SDL_CONTROLLERSENSORUPDATE: { + if (event.csensor.sensor == SDL_SENSOR_GYRO) { + glm::vec3 rotation = Gyro::SDL::convertRotation({ + event.csensor.data[0], + event.csensor.data[1], + event.csensor.data[2], + }); + + hid.setPitch(s16(rotation.x)); + hid.setRoll(s16(rotation.y)); + hid.setYaw(s16(rotation.z)); + } + break; + } case SDL_DROPFILE: { char* droppedDir = event.drop.file; @@ -342,3 +363,11 @@ void FrontendSDL::run() { SDL_GL_SwapWindow(window); } } + +void FrontendSDL::setupControllerSensors(SDL_GameController* controller) { + bool haveGyro = SDL_GameControllerHasSensor(controller, SDL_SENSOR_GYRO) == SDL_TRUE; + + if (haveGyro) { + SDL_GameControllerSetSensorEnabled(controller, SDL_SENSOR_GYRO, SDL_TRUE); + } +} \ No newline at end of file From 520e00c5531f92bd9dca36901835bc546fbe1dd9 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 14 Aug 2024 22:57:45 +0300 Subject: [PATCH 176/251] Qt: Add controller gyroscope --- include/panda_qt/main_window.hpp | 1 + src/panda_qt/main_window.cpp | 28 ++++++++++++++++++++++++++++ src/panda_sdl/frontend_sdl.cpp | 2 +- 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/include/panda_qt/main_window.hpp b/include/panda_qt/main_window.hpp index ecdbc02e..3ff16a1d 100644 --- a/include/panda_qt/main_window.hpp +++ b/include/panda_qt/main_window.hpp @@ -122,6 +122,7 @@ class MainWindow : public QMainWindow { void showAboutMenu(); void initControllers(); void pollControllers(); + void setupControllerSensors(SDL_GameController* controller); void sendMessage(const EmulatorMessage& message); void dispatchMessage(const EmulatorMessage& message); diff --git a/src/panda_qt/main_window.cpp b/src/panda_qt/main_window.cpp index 65769116..f1949da7 100644 --- a/src/panda_qt/main_window.cpp +++ b/src/panda_qt/main_window.cpp @@ -9,6 +9,7 @@ #include "cheats.hpp" #include "input_mappings.hpp" +#include "sdl_gyro.hpp" #include "services/dsp.hpp" MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent), keyboardMappings(InputMappings::defaultKeyboardMappings()) { @@ -521,6 +522,8 @@ void MainWindow::initControllers() { SDL_Joystick* stick = SDL_GameControllerGetJoystick(gameController); gameControllerID = SDL_JoystickInstanceID(stick); } + + setupControllerSensors(gameController); } } @@ -558,6 +561,8 @@ void MainWindow::pollControllers() { if (gameController == nullptr) { gameController = SDL_GameControllerOpen(event.cdevice.which); gameControllerID = event.cdevice.which; + + setupControllerSensors(gameController); } break; @@ -598,6 +603,29 @@ void MainWindow::pollControllers() { } break; } + + case SDL_CONTROLLERSENSORUPDATE: { + if (event.csensor.sensor == SDL_SENSOR_GYRO) { + auto rotation = Gyro::SDL::convertRotation({ + event.csensor.data[0], + event.csensor.data[1], + event.csensor.data[2], + }); + + hid.setPitch(s16(rotation.x)); + hid.setRoll(s16(rotation.y)); + hid.setYaw(s16(rotation.z)); + } + break; + } } } } + +void MainWindow::setupControllerSensors(SDL_GameController* controller) { + bool haveGyro = SDL_GameControllerHasSensor(controller, SDL_SENSOR_GYRO) == SDL_TRUE; + + if (haveGyro) { + SDL_GameControllerSetSensorEnabled(controller, SDL_SENSOR_GYRO, SDL_TRUE); + } +} \ No newline at end of file diff --git a/src/panda_sdl/frontend_sdl.cpp b/src/panda_sdl/frontend_sdl.cpp index 703fb1c7..8f9f4240 100644 --- a/src/panda_sdl/frontend_sdl.cpp +++ b/src/panda_sdl/frontend_sdl.cpp @@ -289,7 +289,7 @@ void FrontendSDL::run() { case SDL_CONTROLLERSENSORUPDATE: { if (event.csensor.sensor == SDL_SENSOR_GYRO) { - glm::vec3 rotation = Gyro::SDL::convertRotation({ + auto rotation = Gyro::SDL::convertRotation({ event.csensor.data[0], event.csensor.data[1], event.csensor.data[2], From ff7e0f9ca88e10900a3132c8e646aecaee688760 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 14 Aug 2024 23:41:48 +0300 Subject: [PATCH 177/251] Optimize gyro calculation --- include/sdl_gyro.hpp | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/include/sdl_gyro.hpp b/include/sdl_gyro.hpp index 17faab94..e2df18df 100644 --- a/include/sdl_gyro.hpp +++ b/include/sdl_gyro.hpp @@ -8,13 +8,10 @@ namespace Gyro::SDL { // Convert the rotation data we get from SDL sensor events to rotation data we can feed right to HID // Returns [pitch, roll, yaw] - static glm::vec3 convertRotation(glm::vec3 rotation) { - // Flip axes - glm::vec3 ret = -rotation; - // Convert from radians/s to deg/s and scale by the gyroscope coefficient from the HID service - ret *= 180.f / std::numbers::pi; - ret *= HIDService::gyroscopeCoeff; - - return ret; + static glm::vec3 convertRotation(glm::vec3 rotation) { + // Convert the rotation from rad/s to deg/s and scale by the gyroscope coefficient in HID + constexpr float scale = 180.f / std::numbers::pi * HIDService::gyroscopeCoeff; + // The axes are also inverted, so invert scale before the multiplication. + return rotation * -scale; } } // namespace Gyro::SDL \ No newline at end of file From c772b1c7026ced40084e63c1fe5366f39e5b4bcd Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Thu, 15 Aug 2024 17:31:55 +0300 Subject: [PATCH 178/251] Initial accelerometer support --- include/services/hid.hpp | 12 ++++++++++++ src/core/services/hid.cpp | 22 +++++++++++++++++++--- src/panda_qt/main_window.cpp | 7 +++++++ src/panda_sdl/frontend_sdl.cpp | 7 +++++++ 4 files changed, 45 insertions(+), 3 deletions(-) diff --git a/include/services/hid.hpp b/include/services/hid.hpp index bce2cc1b..a0eefb1c 100644 --- a/include/services/hid.hpp +++ b/include/services/hid.hpp @@ -56,6 +56,7 @@ class HIDService { s16 circlePadX, circlePadY; // Circlepad state s16 touchScreenX, touchScreenY; // Touchscreen state s16 roll, pitch, yaw; // Gyroscope state + s16 accelX, accelY, accelZ; // Accelerometer state bool accelerometerEnabled; bool eventsInitialized; @@ -87,6 +88,11 @@ class HIDService { *(T*)&sharedMem[offset] = value; } + template + T* getSharedMemPointer(size_t offset) { + return (T*)&sharedMem[offset]; + } + public: static constexpr float gyroscopeCoeff = 14.375f; // Same as retail 3DS @@ -130,6 +136,12 @@ class HIDService { void setPitch(s16 value) { pitch = value; } void setYaw(s16 value) { yaw = value; } + void setAccel(s16 x, s16 y, s16 z) { + accelX = x; + accelY = y; + accelZ = z; + } + void updateInputs(u64 currentTimestamp); void setSharedMem(u8* ptr) { diff --git a/src/core/services/hid.cpp b/src/core/services/hid.cpp index aa13096c..a7b9b13b 100644 --- a/src/core/services/hid.cpp +++ b/src/core/services/hid.cpp @@ -35,6 +35,7 @@ void HIDService::reset() { circlePadX = circlePadY = 0; touchScreenX = touchScreenY = 0; roll = pitch = yaw = 0; + accelX = accelY = accelZ = 0; } void HIDService::handleSyncRequest(u32 messagePointer) { @@ -189,6 +190,20 @@ void HIDService::updateInputs(u64 currentTick) { writeSharedMem(0x108, currentTick); // Write new tick count } writeSharedMem(0x118, nextAccelerometerIndex); // Index last updated by the HID module + const size_t accelEntryOffset = 0x128 + (nextAccelerometerIndex * 6); // Offset in the array of 8 accelerometer entries + + // Raw data of current accelerometer entry + // TODO: How is the "raw" data actually calculated? + s16* accelerometerDataRaw = getSharedMemPointer(0x120); + accelerometerDataRaw[0] = accelX; + accelerometerDataRaw[1] = accelY; + accelerometerDataRaw[2] = accelZ; + + // Accelerometer entry in entry table + s16* accelerometerData = getSharedMemPointer(accelEntryOffset); + accelerometerData[0] = accelX; + accelerometerData[1] = accelY; + accelerometerData[2] = accelZ; nextAccelerometerIndex = (nextAccelerometerIndex + 1) % 8; // Move to next entry // Next, update gyro state @@ -197,9 +212,10 @@ void HIDService::updateInputs(u64 currentTick) { writeSharedMem(0x158, currentTick); // Write new tick count } const size_t gyroEntryOffset = 0x178 + (nextGyroIndex * 6); // Offset in the array of 8 touchscreen entries - writeSharedMem(gyroEntryOffset, pitch); - writeSharedMem(gyroEntryOffset + 2, yaw); - writeSharedMem(gyroEntryOffset + 4, roll); + s16* gyroData = getSharedMemPointer(gyroEntryOffset); + gyroData[0] = pitch; + gyroData[1] = yaw; + gyroData[2] = roll; // Since gyroscope euler angles are relative, we zero them out here and the frontend will update them again when we receive a new rotation roll = pitch = yaw = 0; diff --git a/src/panda_qt/main_window.cpp b/src/panda_qt/main_window.cpp index f1949da7..fab77d2e 100644 --- a/src/panda_qt/main_window.cpp +++ b/src/panda_qt/main_window.cpp @@ -615,6 +615,8 @@ void MainWindow::pollControllers() { hid.setPitch(s16(rotation.x)); hid.setRoll(s16(rotation.y)); hid.setYaw(s16(rotation.z)); + } else if (event.csensor.sensor == SDL_SENSOR_ACCEL) { + hid.setAccel(s16(event.csensor.data[0]), s16(-event.csensor.data[1]), s16(event.csensor.data[2])); } break; } @@ -624,8 +626,13 @@ void MainWindow::pollControllers() { void MainWindow::setupControllerSensors(SDL_GameController* controller) { bool haveGyro = SDL_GameControllerHasSensor(controller, SDL_SENSOR_GYRO) == SDL_TRUE; + bool haveAccelerometer = SDL_GameControllerHasSensor(controller, SDL_SENSOR_ACCEL) == SDL_TRUE; if (haveGyro) { SDL_GameControllerSetSensorEnabled(controller, SDL_SENSOR_GYRO, SDL_TRUE); } + + if (haveAccelerometer) { + SDL_GameControllerSetSensorEnabled(controller, SDL_SENSOR_ACCEL, SDL_TRUE); + } } \ No newline at end of file diff --git a/src/panda_sdl/frontend_sdl.cpp b/src/panda_sdl/frontend_sdl.cpp index 8f9f4240..80014884 100644 --- a/src/panda_sdl/frontend_sdl.cpp +++ b/src/panda_sdl/frontend_sdl.cpp @@ -298,6 +298,8 @@ void FrontendSDL::run() { hid.setPitch(s16(rotation.x)); hid.setRoll(s16(rotation.y)); hid.setYaw(s16(rotation.z)); + } else if (event.csensor.sensor == SDL_SENSOR_ACCEL) { + hid.setAccel(s16(event.csensor.data[0]), s16(-event.csensor.data[1]), s16(event.csensor.data[2])); } break; } @@ -366,8 +368,13 @@ void FrontendSDL::run() { void FrontendSDL::setupControllerSensors(SDL_GameController* controller) { bool haveGyro = SDL_GameControllerHasSensor(controller, SDL_SENSOR_GYRO) == SDL_TRUE; + bool haveAccelerometer = SDL_GameControllerHasSensor(controller, SDL_SENSOR_ACCEL) == SDL_TRUE; if (haveGyro) { SDL_GameControllerSetSensorEnabled(controller, SDL_SENSOR_GYRO, SDL_TRUE); } + + if (haveAccelerometer) { + SDL_GameControllerSetSensorEnabled(controller, SDL_SENSOR_ACCEL, SDL_TRUE); + } } \ No newline at end of file From 98b5d560215d5899d12bea2ff0f161263bc71d8b Mon Sep 17 00:00:00 2001 From: Samuliak Date: Fri, 16 Aug 2024 10:06:56 +0200 Subject: [PATCH 179/251] metal: add all the files --- .gitignore | 4 + CMakeLists.txt | 86 +- .../renderer_mtl/mtl_blit_pipeline_cache.hpp | 75 ++ .../renderer_mtl/mtl_depth_stencil_cache.hpp | 86 ++ .../renderer_mtl/mtl_draw_pipeline_cache.hpp | 174 ++++ include/renderer_mtl/mtl_render_target.hpp | 92 +++ include/renderer_mtl/mtl_texture.hpp | 77 ++ .../renderer_mtl/mtl_vertex_buffer_cache.hpp | 80 ++ include/renderer_mtl/objc_helper.hpp | 16 + include/renderer_mtl/pica_to_mtl.hpp | 155 ++++ include/renderer_mtl/renderer_mtl.hpp | 189 +++++ src/core/renderer_mtl/metal_cpp_impl.cpp | 6 + src/core/renderer_mtl/mtl_etc1.cpp | 124 +++ src/core/renderer_mtl/mtl_texture.cpp | 312 +++++++ src/core/renderer_mtl/objc_helper.mm | 12 + src/core/renderer_mtl/renderer_mtl.cpp | 774 +++++++++++++++++ .../metal_copy_to_lut_texture.metal | 9 + src/host_shaders/metal_shaders.metal | 782 ++++++++++++++++++ 18 files changed, 3041 insertions(+), 12 deletions(-) create mode 100644 include/renderer_mtl/mtl_blit_pipeline_cache.hpp create mode 100644 include/renderer_mtl/mtl_depth_stencil_cache.hpp create mode 100644 include/renderer_mtl/mtl_draw_pipeline_cache.hpp create mode 100644 include/renderer_mtl/mtl_render_target.hpp create mode 100644 include/renderer_mtl/mtl_texture.hpp create mode 100644 include/renderer_mtl/mtl_vertex_buffer_cache.hpp create mode 100644 include/renderer_mtl/objc_helper.hpp create mode 100644 include/renderer_mtl/pica_to_mtl.hpp create mode 100644 include/renderer_mtl/renderer_mtl.hpp create mode 100644 src/core/renderer_mtl/metal_cpp_impl.cpp create mode 100644 src/core/renderer_mtl/mtl_etc1.cpp create mode 100644 src/core/renderer_mtl/mtl_texture.cpp create mode 100644 src/core/renderer_mtl/objc_helper.mm create mode 100644 src/core/renderer_mtl/renderer_mtl.cpp create mode 100644 src/host_shaders/metal_copy_to_lut_texture.metal create mode 100644 src/host_shaders/metal_shaders.metal diff --git a/.gitignore b/.gitignore index 528462ad..817786a3 100644 --- a/.gitignore +++ b/.gitignore @@ -64,5 +64,9 @@ fb.bat *.elf *.smdh +# Compiled Metal shader files +*.ir +*.metallib + config.toml CMakeSettings.json diff --git a/CMakeLists.txt b/CMakeLists.txt index 2865a3f8..31fdd9f2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,16 +26,17 @@ endif() if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format-nonliteral -Wno-format-security") -endif() +endif() if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-interference-size") -endif() +endif() option(DISABLE_PANIC_DEV "Make a build with fewer and less intrusive asserts" ON) option(GPU_DEBUG_INFO "Enable additional GPU debugging info" OFF) option(ENABLE_OPENGL "Enable OpenGL rendering backend" ON) option(ENABLE_VULKAN "Enable Vulkan rendering backend" ON) +option(ENABLE_METAL "Enable Metal rendering backend (if available)" ON) option(ENABLE_LTO "Enable link-time optimization" OFF) option(ENABLE_TESTS "Compile unit-tests" OFF) option(ENABLE_USER_BUILD "Make a user-facing build. These builds have various assertions disabled, LTO, and more" OFF) @@ -55,11 +56,6 @@ if(BUILD_LIBRETRO_CORE) add_compile_definitions(__LIBRETRO__) endif() -if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" AND ENABLE_USER_BUILD) - # Disable stack buffer overflow checks in user builds - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GS-") -endif() - add_library(AlberCore STATIC) include_directories(${PROJECT_SOURCE_DIR}/include/) @@ -240,7 +236,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/services/mic.hpp include/services/cecd.hpp include/services/ac.hpp include/services/am.hpp include/services/boss.hpp include/services/frd.hpp include/services/nim.hpp include/fs/archive_ext_save_data.hpp include/fs/archive_ncch.hpp include/services/mcu/mcu_hwc.hpp - include/colour.hpp include/services/y2r.hpp include/services/cam.hpp include/services/ssl.hpp + include/colour.hpp include/services/y2r.hpp include/services/cam.hpp include/services/ssl.hpp include/services/ldr_ro.hpp include/ipc.hpp include/services/act.hpp include/services/nfc.hpp include/system_models.hpp include/services/dlp_srvr.hpp include/PICA/dynapica/pica_recs.hpp include/PICA/dynapica/x64_regs.hpp include/PICA/dynapica/vertex_loader_rec.hpp include/PICA/dynapica/shader_rec.hpp @@ -251,7 +247,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/config.hpp include/services/ir_user.hpp include/http_server.hpp include/cheats.hpp include/action_replay.hpp include/renderer_sw/renderer_sw.hpp include/compiler_builtins.hpp include/fs/romfs.hpp include/fs/ivfc.hpp include/discord_rpc.hpp include/services/http.hpp include/result/result_cfg.hpp - include/applets/applet.hpp include/applets/mii_selector.hpp include/math_util.hpp include/services/soc.hpp + include/applets/applet.hpp include/applets/mii_selector.hpp include/math_util.hpp include/services/soc.hpp include/services/news_u.hpp include/applets/software_keyboard.hpp include/applets/applet_manager.hpp include/fs/archive_user_save_data.hpp include/services/amiibo_device.hpp include/services/nfc_types.hpp include/swap.hpp include/services/csnd.hpp include/services/nwm_uds.hpp include/fs/archive_system_save_data.hpp include/lua_manager.hpp include/memory_mapped_file.hpp include/hydra_icon.hpp @@ -260,7 +256,6 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/audio/miniaudio_device.hpp include/ring_buffer.hpp include/bitfield.hpp include/audio/dsp_shared_mem.hpp include/audio/hle_core.hpp include/capstone.hpp include/audio/aac.hpp include/PICA/pica_frag_config.hpp include/PICA/pica_frag_uniforms.hpp include/PICA/shader_gen_types.hpp include/PICA/shader_decompiler.hpp - include/sdl_gyro.hpp ) cmrc_add_resource_library( @@ -418,8 +413,75 @@ if(ENABLE_VULKAN) target_link_libraries(AlberCore PRIVATE Vulkan::Vulkan resources_renderer_vk) endif() +if(ENABLE_METAL AND APPLE) + set(RENDERER_MTL_INCLUDE_FILES include/renderer_mtl/renderer_mtl.hpp + include/renderer_mtl/mtl_depth_stencil_cache.hpp + include/renderer_mtl/mtl_blit_pipeline_cache.hpp + include/renderer_mtl/mtl_draw_pipeline_cache.hpp + include/renderer_mtl/mtl_render_target.hpp + include/renderer_mtl/mtl_texture.hpp + include/renderer_mtl/mtl_vertex_buffer_cache.hpp + include/renderer_mtl/pica_to_mtl.hpp + include/renderer_mtl/objc_helper.hpp + ) + + set(RENDERER_MTL_SOURCE_FILES src/core/renderer_mtl/metal_cpp_impl.cpp + src/core/renderer_mtl/renderer_mtl.cpp + src/core/renderer_mtl/mtl_texture.cpp + src/core/renderer_mtl/mtl_etc1.cpp + src/core/renderer_mtl/objc_helper.mm + src/host_shaders/metal_shaders.metal + src/host_shaders/metal_copy_to_lut_texture.metal + ) + + set(HEADER_FILES ${HEADER_FILES} ${RENDERER_MTL_INCLUDE_FILES}) + source_group("Source Files\\Core\\Metal Renderer" FILES ${RENDERER_MTL_SOURCE_FILES}) + + set(RENDERER_MTL_HOST_SHADERS_SOURCES) + function (add_metal_shader SHADER) + set(SHADER_SOURCE "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.metal") + set(SHADER_IR "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.ir") + set(SHADER_METALLIB "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.metallib") + # TODO: only include sources in debug builds + add_custom_command( + OUTPUT ${SHADER_IR} + COMMAND xcrun -sdk macosx metal -gline-tables-only -frecord-sources -o ${SHADER_IR} -c ${SHADER_SOURCE} + DEPENDS ${SHADER_SOURCE} + VERBATIM) + add_custom_command( + OUTPUT ${SHADER_METALLIB} + COMMAND xcrun -sdk macosx metallib -o ${SHADER_METALLIB} ${SHADER_IR} + DEPENDS ${SHADER_IR} + VERBATIM) + set(RENDERER_MTL_HOST_SHADERS_SOURCES ${RENDERER_MTL_HOST_SHADERS_SOURCES} ${SHADER_METALLIB}) + endfunction() + + add_metal_shader(metal_shaders) + add_metal_shader(metal_copy_to_lut_texture) + + add_custom_target( + compile_msl_shaders + DEPENDS ${RENDERER_MTL_HOST_SHADERS_SOURCES} + ) + + cmrc_add_resource_library( + resources_renderer_mtl + NAMESPACE RendererMTL + WHENCE "src/host_shaders/" + "src/host_shaders/metal_shaders.metallib" + "src/host_shaders/metal_copy_to_lut_texture.metallib" + ) + add_dependencies(resources_renderer_mtl compile_msl_shaders) + + target_sources(AlberCore PRIVATE ${RENDERER_MTL_SOURCE_FILES}) + target_compile_definitions(AlberCore PUBLIC "PANDA3DS_ENABLE_METAL=1") + target_include_directories(AlberCore PRIVATE third_party/metal-cpp) + # TODO: check if all of them are needed + target_link_libraries(AlberCore PRIVATE "-framework Metal" "-framework Foundation" "-framework QuartzCore" resources_renderer_mtl) +endif() + source_group("Header Files\\Core" FILES ${HEADER_FILES}) -set(ALL_SOURCES ${SOURCE_FILES} ${FS_SOURCE_FILES} ${CRYPTO_SOURCE_FILES} ${KERNEL_SOURCE_FILES} +set(ALL_SOURCES ${SOURCE_FILES} ${FS_SOURCE_FILES} ${CRYPTO_SOURCE_FILES} ${KERNEL_SOURCE_FILES} ${LOADER_SOURCE_FILES} ${SERVICE_SOURCE_FILES} ${APPLET_SOURCE_FILES} ${RENDERER_SW_SOURCE_FILES} ${PICA_SOURCE_FILES} ${THIRD_PARTY_SOURCE_FILES} ${AUDIO_SOURCE_FILES} ${HEADER_FILES} ${FRONTEND_HEADER_FILES}) target_sources(AlberCore PRIVATE ${ALL_SOURCES}) @@ -508,7 +570,7 @@ if(NOT BUILD_HYDRA_CORE AND NOT BUILD_LIBRETRO_CORE) ) else() set(FRONTEND_SOURCE_FILES src/panda_sdl/main.cpp src/panda_sdl/frontend_sdl.cpp src/panda_sdl/mappings.cpp) - set(FRONTEND_HEADER_FILES "include/panda_sdl/frontend_sdl.hpp") + set(FRONTEND_HEADER_FILES "") endif() target_link_libraries(Alber PRIVATE AlberCore) diff --git a/include/renderer_mtl/mtl_blit_pipeline_cache.hpp b/include/renderer_mtl/mtl_blit_pipeline_cache.hpp new file mode 100644 index 00000000..26422635 --- /dev/null +++ b/include/renderer_mtl/mtl_blit_pipeline_cache.hpp @@ -0,0 +1,75 @@ +#pragma once + +#include + +#include "pica_to_mtl.hpp" + +using namespace PICA; + +namespace Metal { + +struct BlitPipelineHash { + // Formats + ColorFmt colorFmt; + DepthFmt depthFmt; +}; + +// This pipeline only caches the pipeline with all of its color and depth attachment variations +class BlitPipelineCache { +public: + BlitPipelineCache() = default; + + ~BlitPipelineCache() { + reset(); + vertexFunction->release(); + fragmentFunction->release(); + } + + void set(MTL::Device* dev, MTL::Function* vert, MTL::Function* frag) { + device = dev; + vertexFunction = vert; + fragmentFunction = frag; + } + + MTL::RenderPipelineState* get(BlitPipelineHash hash) { + u8 intHash = ((u8)hash.colorFmt << 3) | (u8)hash.depthFmt; + auto& pipeline = pipelineCache[intHash]; + if (!pipeline) { + MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); + desc->setVertexFunction(vertexFunction); + desc->setFragmentFunction(fragmentFunction); + + auto colorAttachment = desc->colorAttachments()->object(0); + colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt)); + + desc->setDepthAttachmentPixelFormat(toMTLPixelFormatDepth(hash.depthFmt)); + + NS::Error* error = nullptr; + desc->setLabel(toNSString("Blit pipeline")); + pipeline = device->newRenderPipelineState(desc, &error); + if (error) { + Helpers::panic("Error creating blit pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + + desc->release(); + } + + return pipeline; + } + + void reset() { + for (auto& pair : pipelineCache) { + pair.second->release(); + } + pipelineCache.clear(); + } + +private: + std::map pipelineCache; + + MTL::Device* device; + MTL::Function* vertexFunction; + MTL::Function* fragmentFunction; +}; + +} // namespace Metal diff --git a/include/renderer_mtl/mtl_depth_stencil_cache.hpp b/include/renderer_mtl/mtl_depth_stencil_cache.hpp new file mode 100644 index 00000000..90721b70 --- /dev/null +++ b/include/renderer_mtl/mtl_depth_stencil_cache.hpp @@ -0,0 +1,86 @@ +#pragma once + +#include + +#include "pica_to_mtl.hpp" + +using namespace PICA; + +namespace Metal { + +struct DepthStencilHash { + bool depthStencilWrite; + u8 depthFunc; + u32 stencilConfig; + u16 stencilOpConfig; +}; + +class DepthStencilCache { +public: + DepthStencilCache() = default; + + ~DepthStencilCache() { + reset(); + } + + void set(MTL::Device* dev) { + device = dev; + } + + MTL::DepthStencilState* get(DepthStencilHash hash) { + u64 intHash = ((u64)hash.depthStencilWrite << 56) | ((u64)hash.depthFunc << 48) | ((u64)hash.stencilConfig << 16) | (u64)hash.stencilOpConfig; + auto& depthStencilState = depthStencilCache[intHash]; + if (!depthStencilState) { + MTL::DepthStencilDescriptor* desc = MTL::DepthStencilDescriptor::alloc()->init(); + desc->setDepthWriteEnabled(hash.depthStencilWrite); + desc->setDepthCompareFunction(toMTLCompareFunc(hash.depthFunc)); + + const bool stencilEnable = Helpers::getBit<0>(hash.stencilConfig); + MTL::StencilDescriptor* stencilDesc = nullptr; + if (stencilEnable) { + const u8 stencilFunc = Helpers::getBits<4, 3>(hash.stencilConfig); + const u8 stencilRefMask = Helpers::getBits<24, 8>(hash.stencilConfig); + + const u32 stencilBufferMask = hash.depthStencilWrite ? Helpers::getBits<8, 8>(hash.stencilConfig) : 0; + + const u8 stencilFailOp = Helpers::getBits<0, 3>(hash.stencilOpConfig); + const u8 depthFailOp = Helpers::getBits<4, 3>(hash.stencilOpConfig); + const u8 passOp = Helpers::getBits<8, 3>(hash.stencilOpConfig); + + stencilDesc = MTL::StencilDescriptor::alloc()->init(); + stencilDesc->setStencilFailureOperation(toMTLStencilOperation(stencilFailOp)); + stencilDesc->setDepthFailureOperation(toMTLStencilOperation(depthFailOp)); + stencilDesc->setDepthStencilPassOperation(toMTLStencilOperation(passOp)); + stencilDesc->setStencilCompareFunction(toMTLCompareFunc(stencilFunc)); + stencilDesc->setReadMask(stencilRefMask); + stencilDesc->setWriteMask(stencilBufferMask); + + desc->setFrontFaceStencil(stencilDesc); + desc->setBackFaceStencil(stencilDesc); + } + + depthStencilState = device->newDepthStencilState(desc); + + desc->release(); + if (stencilDesc) { + stencilDesc->release(); + } + } + + return depthStencilState; + } + + void reset() { + for (auto& pair : depthStencilCache) { + pair.second->release(); + } + depthStencilCache.clear(); + } + +private: + std::map depthStencilCache; + + MTL::Device* device; +}; + +} // namespace Metal diff --git a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp new file mode 100644 index 00000000..8bfea636 --- /dev/null +++ b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp @@ -0,0 +1,174 @@ +#pragma once + +#include + +#include "pica_to_mtl.hpp" + +using namespace PICA; + +namespace Metal { + +struct DrawFragmentFunctionHash { + bool lightingEnabled; // 1 bit + u8 lightingNumLights; // 3 bits + u32 lightingConfig1; // 32 bits (TODO: check this) + // | ref | func | on | + u16 alphaControl; // 12 bits (mask: 11111111 0111 0001) +}; + +//bool operator==(const DrawFragmentFunctionHash& l, const DrawFragmentFunctionHash& r) { +// return ((l.lightingEnabled == r.lightingEnabled) && (l.lightingNumLights == r.lightingNumLights) && +// (l.lightingConfig1 == r.lightingConfig1) && (l.alphaControl == r.alphaControl)); +//} + +inline bool operator<(const DrawFragmentFunctionHash& l, const DrawFragmentFunctionHash& r) { + if (!l.lightingEnabled && r.lightingEnabled) return true; + if (l.lightingNumLights < r.lightingNumLights) return true; + if (l.lightingConfig1 < r.lightingConfig1) return true; + if (l.alphaControl < r.alphaControl) return true; + + return false; +} + +struct DrawPipelineHash { // 56 bits + // Formats + ColorFmt colorFmt; // 3 bits + DepthFmt depthFmt; // 3 bits + + // Blending + bool blendEnabled; // 1 bit + // | functions | aeq | ceq | + u32 blendControl; // 22 bits (mask: 1111111111111111 00000111 00000111) + u8 colorWriteMask; // 4 bits + + DrawFragmentFunctionHash fragHash; +}; + +//bool operator==(const DrawPipelineHash& l, const DrawPipelineHash& r) { +// return (((u32)l.colorFmt == (u32)r.colorFmt) && ((u32)l.depthFmt == (u32)r.depthFmt) && +// (l.blendEnabled == r.blendEnabled) && (l.blendControl == r.blendControl) && +// (l.colorWriteMask == r.colorWriteMask) && (l.fragHash == r.fragHash)); +//} + +inline bool operator<(const DrawPipelineHash& l, const DrawPipelineHash& r) { + if ((u32)l.colorFmt < (u32)r.colorFmt) return true; + if ((u32)l.depthFmt < (u32)r.depthFmt) return true; + if (!l.blendEnabled && r.blendEnabled) return true; + if (l.blendControl < r.blendControl) return true; + if (l.colorWriteMask < r.colorWriteMask) return true; + if (l.fragHash < r.fragHash) return true; + + return false; +} + +// Bind the vertex buffer to binding 30 so that it doesn't occupy the lower indices +#define VERTEX_BUFFER_BINDING_INDEX 30 + +// This pipeline only caches the pipeline with all of its color and depth attachment variations +class DrawPipelineCache { +public: + DrawPipelineCache() = default; + + ~DrawPipelineCache() { + reset(); + vertexDescriptor->release(); + vertexFunction->release(); + } + + void set(MTL::Device* dev, MTL::Library* lib, MTL::Function* vert, MTL::VertexDescriptor* vertDesc) { + device = dev; + library = lib; + vertexFunction = vert; + vertexDescriptor = vertDesc; + } + + MTL::RenderPipelineState* get(DrawPipelineHash hash) { + //u32 fragmentFunctionHash = ((u32)hash.lightingEnabled << 22) | ((u32)hash.lightingNumLights << 19) | ((u32)hash.lightingConfig1 << 12) | ((((u32)hash.alphaControl & 0b1111111100000000) >> 8) << 4) | ((((u32)hash.alphaControl & 0b01110000) >> 4) << 1) | ((u32)hash.alphaControl & 0b0001); + //u64 pipelineHash = ((u64)hash.colorFmt << 53) | ((u64)hash.depthFmt << 50) | ((u64)hash.blendEnabled << 49) | ((u64)hash.colorWriteMask << 45) | ((((u64)hash.blendControl & 0b11111111111111110000000000000000) >> 16) << 29) | ((((u64)hash.blendControl & 0b0000011100000000) >> 8) << 26) | (((u64)hash.blendControl & 0b00000111) << 23) | fragmentFunctionHash; + auto& pipeline = pipelineCache[hash]; + if (!pipeline) { + auto& fragmentFunction = fragmentFunctionCache[hash.fragHash]; + if (!fragmentFunction) { + MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init(); + constants->setConstantValue(&hash.fragHash.lightingEnabled, MTL::DataTypeBool, NS::UInteger(0)); + constants->setConstantValue(&hash.fragHash.lightingNumLights, MTL::DataTypeUChar, NS::UInteger(1)); + constants->setConstantValue(&hash.fragHash.lightingConfig1, MTL::DataTypeUInt, NS::UInteger(2)); + constants->setConstantValue(&hash.fragHash.alphaControl, MTL::DataTypeUShort, NS::UInteger(3)); + + NS::Error* error = nullptr; + fragmentFunction = library->newFunction(NS::String::string("fragmentDraw", NS::ASCIIStringEncoding), constants, &error); + if (error) { + Helpers::panic("Error creating draw fragment function: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + constants->release(); + } + + MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); + desc->setVertexFunction(vertexFunction); + desc->setFragmentFunction(fragmentFunction); + desc->setVertexDescriptor(vertexDescriptor); + + auto colorAttachment = desc->colorAttachments()->object(0); + colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt)); + MTL::ColorWriteMask writeMask = 0; + if (hash.colorWriteMask & 0x1) writeMask |= MTL::ColorWriteMaskRed; + if (hash.colorWriteMask & 0x2) writeMask |= MTL::ColorWriteMaskGreen; + if (hash.colorWriteMask & 0x4) writeMask |= MTL::ColorWriteMaskBlue; + if (hash.colorWriteMask & 0x8) writeMask |= MTL::ColorWriteMaskAlpha; + colorAttachment->setWriteMask(writeMask); + if (hash.blendEnabled) { + const u8 rgbEquation = hash.blendControl & 0x7; + const u8 alphaEquation = Helpers::getBits<8, 3>(hash.blendControl); + + // Get blending functions + const u8 rgbSourceFunc = Helpers::getBits<16, 4>(hash.blendControl); + const u8 rgbDestFunc = Helpers::getBits<20, 4>(hash.blendControl); + const u8 alphaSourceFunc = Helpers::getBits<24, 4>(hash.blendControl); + const u8 alphaDestFunc = Helpers::getBits<28, 4>(hash.blendControl); + + colorAttachment->setBlendingEnabled(true); + colorAttachment->setRgbBlendOperation(toMTLBlendOperation(rgbEquation)); + colorAttachment->setAlphaBlendOperation(toMTLBlendOperation(alphaEquation)); + colorAttachment->setSourceRGBBlendFactor(toMTLBlendFactor(rgbSourceFunc)); + colorAttachment->setDestinationRGBBlendFactor(toMTLBlendFactor(rgbDestFunc)); + colorAttachment->setSourceAlphaBlendFactor(toMTLBlendFactor(alphaSourceFunc)); + colorAttachment->setDestinationAlphaBlendFactor(toMTLBlendFactor(alphaDestFunc)); + } + + desc->setDepthAttachmentPixelFormat(toMTLPixelFormatDepth(hash.depthFmt)); + + NS::Error* error = nullptr; + desc->setLabel(toNSString("Draw pipeline")); + pipeline = device->newRenderPipelineState(desc, &error); + if (error) { + Helpers::panic("Error creating draw pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + + desc->release(); + } + + return pipeline; + } + + void reset() { + for (auto& pair : pipelineCache) { + pair.second->release(); + } + pipelineCache.clear(); + for (auto& pair : fragmentFunctionCache) { + pair.second->release(); + } + fragmentFunctionCache.clear(); + } + +private: + std::map pipelineCache; + std::map fragmentFunctionCache; + + MTL::Device* device; + MTL::Library* library; + MTL::Function* vertexFunction; + MTL::VertexDescriptor* vertexDescriptor; +}; + +} // namespace Metal diff --git a/include/renderer_mtl/mtl_render_target.hpp b/include/renderer_mtl/mtl_render_target.hpp new file mode 100644 index 00000000..73be45f4 --- /dev/null +++ b/include/renderer_mtl/mtl_render_target.hpp @@ -0,0 +1,92 @@ +#pragma once +#include +#include +#include +#include "boost/icl/interval.hpp" +#include "helpers.hpp" +#include "math_util.hpp" +#include "opengl.hpp" +#include "pica_to_mtl.hpp" +#include "objc_helper.hpp" + +template +using Interval = boost::icl::right_open_interval; + +namespace Metal { + +template +struct RenderTarget { + MTL::Device* device; + + u32 location; + Format_t format; + OpenGL::uvec2 size; + bool valid; + + // Range of VRAM taken up by buffer + Interval range; + + MTL::Texture* texture = nullptr; + + RenderTarget() : valid(false) {} + + RenderTarget(MTL::Device* dev, u32 loc, Format_t format, u32 x, u32 y, bool valid = true) + : device(dev), location(loc), format(format), size({x, y}), valid(valid) { + u64 endLoc = (u64)loc + sizeInBytes(); + // Check if start and end are valid here + range = Interval(loc, (u32)endLoc); + } + + Math::Rect getSubRect(u32 inputAddress, u32 width, u32 height) { + const u32 startOffset = (inputAddress - location) / sizePerPixel(format); + const u32 x0 = (startOffset % (size.x() * 8)) / 8; + const u32 y0 = (startOffset / (size.x() * 8)) * 8; + return Math::Rect{x0, size.y() - y0, x0 + width, size.y() - height - y0}; + } + + // For 2 textures to "match" we only care about their locations, formats, and dimensions to match + // For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture + bool matches(RenderTarget& other) { + return location == other.location && format == other.format && + size.x() == other.size.x() && size.y() == other.size.y(); + } + + void allocate() { + MTL::PixelFormat pixelFormat = MTL::PixelFormatInvalid; + if (std::is_same::value) { + pixelFormat = PICA::toMTLPixelFormatColor((PICA::ColorFmt)format); + } else if (std::is_same::value) { + pixelFormat = PICA::toMTLPixelFormatDepth((PICA::DepthFmt)format); + } else { + panic("Invalid format type"); + } + + MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init(); + descriptor->setTextureType(MTL::TextureType2D); + descriptor->setPixelFormat(pixelFormat); + descriptor->setWidth(size.u()); + descriptor->setHeight(size.v()); + descriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead); + descriptor->setStorageMode(MTL::StorageModePrivate); + texture = device->newTexture(descriptor); + texture->setLabel(toNSString(std::string(std::is_same::value ? "Color" : "Depth") + " render target " + std::to_string(size.u()) + "x" + std::to_string(size.v()))); + descriptor->release(); + } + + void free() { + valid = false; + + if (texture) { + texture->release(); + } + } + + u64 sizeInBytes() { + return (size_t)size.x() * (size_t)size.y() * PICA::sizePerPixel(format); + } +}; + +typedef RenderTarget ColorRenderTarget; +typedef RenderTarget DepthStencilRenderTarget; + +} // namespace Metal diff --git a/include/renderer_mtl/mtl_texture.hpp b/include/renderer_mtl/mtl_texture.hpp new file mode 100644 index 00000000..590132bd --- /dev/null +++ b/include/renderer_mtl/mtl_texture.hpp @@ -0,0 +1,77 @@ +#pragma once +#include +#include +#include +#include "PICA/regs.hpp" +#include "boost/icl/interval.hpp" +#include "helpers.hpp" +#include "math_util.hpp" +#include "opengl.hpp" +#include "renderer_mtl/pica_to_mtl.hpp" + +template +using Interval = boost::icl::right_open_interval; + +namespace Metal { + +struct Texture { + MTL::Device* device; + + u32 location; + u32 config; // Magnification/minification filter, wrapping configs, etc + PICA::TextureFmt format; + OpenGL::uvec2 size; + bool valid; + + // Range of VRAM taken up by buffer + Interval range; + + PICA::PixelFormatInfo formatInfo; + MTL::Texture* texture = nullptr; + MTL::SamplerState* sampler = nullptr; + + Texture() : valid(false) {} + + Texture(MTL::Device* dev, u32 loc, PICA::TextureFmt format, u32 x, u32 y, u32 config, bool valid = true) + : device(dev), location(loc), format(format), size({x, y}), config(config), valid(valid) { + + u64 endLoc = (u64)loc + sizeInBytes(); + // Check if start and end are valid here + range = Interval(loc, (u32)endLoc); + } + + // For 2 textures to "match" we only care about their locations, formats, and dimensions to match + // For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture + bool matches(Texture& other) { + return location == other.location && format == other.format && + size.x() == other.size.x() && size.y() == other.size.y(); + } + + void allocate(); + void setNewConfig(u32 newConfig); + void decodeTexture(std::span data); + void free(); + u64 sizeInBytes(); + + u8 decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); + u16 decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); + u32 decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); + + // Get the morton interleave offset of a texel based on its U and V values + static u32 mortonInterleave(u32 u, u32 v); + // Get the byte offset of texel (u, v) in the texture + static u32 getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel); + static u32 getSwizzledOffset_4bpp(u32 u, u32 v, u32 width); + + // Returns the format of this texture as a string + std::string_view formatToString() { + return PICA::textureFormatToString(format); + } + + // Returns the texel at coordinates (u, v) of an ETC1(A4) texture + // TODO: Make hasAlpha a template parameter + u32 getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span data); + u32 decodeETC(u32 alpha, u32 u, u32 v, u64 colourData); +}; + +} // namespace Metal diff --git a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp new file mode 100644 index 00000000..1760cdfa --- /dev/null +++ b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp @@ -0,0 +1,80 @@ +#pragma once + +#include "pica_to_mtl.hpp" + +using namespace PICA; + +namespace Metal { + +struct BufferHandle { + MTL::Buffer* buffer; + size_t offset; +}; + +// 64MB buffer for caching vertex data +#define CACHE_BUFFER_SIZE 64 * 1024 * 1024 + +class VertexBufferCache { +public: + VertexBufferCache() = default; + + ~VertexBufferCache() { + endFrame(); + buffer->release(); + } + + void set(MTL::Device* dev) { + device = dev; + create(); + } + + void endFrame() { + ptr = 0; + for (auto buffer : additionalAllocations) { + buffer->release(); + } + additionalAllocations.clear(); + } + + BufferHandle get(const void* data, size_t size) { + // If the vertex buffer is too large, just create a new one + if (ptr + size > CACHE_BUFFER_SIZE) { + MTL::Buffer* newBuffer = device->newBuffer(data, size, MTL::ResourceStorageModeShared); + newBuffer->setLabel(toNSString("Additional vertex buffer")); + additionalAllocations.push_back(newBuffer); + Helpers::warn("Vertex buffer doesn't have enough space, creating a new buffer"); + + return BufferHandle{newBuffer, 0}; + } + + // Copy the data into the buffer + memcpy((char*)buffer->contents() + ptr, data, size); + + size_t oldPtr = ptr; + ptr += size; + + return BufferHandle{buffer, oldPtr}; + } + + void reset() { + endFrame(); + if (buffer) { + buffer->release(); + create(); + } + } + +private: + MTL::Buffer* buffer = nullptr; + size_t ptr = 0; + std::vector additionalAllocations; + + MTL::Device* device; + + void create() { + buffer = device->newBuffer(CACHE_BUFFER_SIZE, MTL::ResourceStorageModeShared); + buffer->setLabel(toNSString("Shared vertex buffer")); + } +}; + +} // namespace Metal diff --git a/include/renderer_mtl/objc_helper.hpp b/include/renderer_mtl/objc_helper.hpp new file mode 100644 index 00000000..91756d24 --- /dev/null +++ b/include/renderer_mtl/objc_helper.hpp @@ -0,0 +1,16 @@ +#pragma once + +#include + +#include + +namespace Metal { + +dispatch_data_t createDispatchData(const void* data, size_t size); + +} // namespace Metal + +// Cast from std::string to NS::String* +inline NS::String* toNSString(const std::string& str) { + return NS::String::string(str.c_str(), NS::ASCIIStringEncoding); +} diff --git a/include/renderer_mtl/pica_to_mtl.hpp b/include/renderer_mtl/pica_to_mtl.hpp new file mode 100644 index 00000000..de76dc3b --- /dev/null +++ b/include/renderer_mtl/pica_to_mtl.hpp @@ -0,0 +1,155 @@ +#pragma once + +#include +#include "PICA/regs.hpp" + +namespace PICA { + +struct PixelFormatInfo { + MTL::PixelFormat pixelFormat; + size_t bytesPerTexel; +}; + +constexpr PixelFormatInfo pixelFormatInfos[14] = { + {MTL::PixelFormatRGBA8Unorm, 4}, // RGBA8 + {MTL::PixelFormatRGBA8Unorm, 4}, // RGB8 + {MTL::PixelFormatBGR5A1Unorm, 2}, // RGBA5551 + {MTL::PixelFormatB5G6R5Unorm, 2}, // RGB565 + {MTL::PixelFormatABGR4Unorm, 2}, // RGBA4 + {MTL::PixelFormatRGBA8Unorm, 4}, // IA8 + {MTL::PixelFormatRG8Unorm, 2}, // RG8 + {MTL::PixelFormatRGBA8Unorm, 4}, // I8 + {MTL::PixelFormatA8Unorm, 1}, // A8 + {MTL::PixelFormatABGR4Unorm, 2}, // IA4 + {MTL::PixelFormatABGR4Unorm, 2}, // I4 + {MTL::PixelFormatA8Unorm, 1}, // A4 + {MTL::PixelFormatRGBA8Unorm, 4}, // ETC1 + {MTL::PixelFormatRGBA8Unorm, 4}, // ETC1A4 +}; + +inline PixelFormatInfo getPixelFormatInfo(TextureFmt format) { + return pixelFormatInfos[static_cast(format)]; +} + +inline MTL::PixelFormat toMTLPixelFormatColor(ColorFmt format) { + switch (format) { + case ColorFmt::RGBA8: return MTL::PixelFormatRGBA8Unorm; + case ColorFmt::RGB8: return MTL::PixelFormatRGBA8Unorm; + case ColorFmt::RGBA5551: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatBGR5A1Unorm? + case ColorFmt::RGB565: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatB5G6R5Unorm? + case ColorFmt::RGBA4: return MTL::PixelFormatABGR4Unorm; + } +} + +inline MTL::PixelFormat toMTLPixelFormatDepth(DepthFmt format) { + switch (format) { + case DepthFmt::Depth16: return MTL::PixelFormatDepth16Unorm; + case DepthFmt::Unknown1: return MTL::PixelFormatInvalid; + case DepthFmt::Depth24: return MTL::PixelFormatDepth32Float; // Metal does not support 24-bit depth formats + // Apple sillicon doesn't support 24-bit depth buffers, so we use 32-bit instead + case DepthFmt::Depth24Stencil8: return MTL::PixelFormatDepth32Float_Stencil8; + } +} + +inline MTL::CompareFunction toMTLCompareFunc(u8 func) { + switch (func) { + case 0: return MTL::CompareFunctionNever; + case 1: return MTL::CompareFunctionAlways; + case 2: return MTL::CompareFunctionEqual; + case 3: return MTL::CompareFunctionNotEqual; + case 4: return MTL::CompareFunctionLess; + case 5: return MTL::CompareFunctionLessEqual; + case 6: return MTL::CompareFunctionGreater; + case 7: return MTL::CompareFunctionGreaterEqual; + default: panic("Unknown compare function %u", func); + } + + return MTL::CompareFunctionAlways; +} + +inline MTL::BlendOperation toMTLBlendOperation(u8 op) { + switch (op) { + case 0: return MTL::BlendOperationAdd; + case 1: return MTL::BlendOperationSubtract; + case 2: return MTL::BlendOperationReverseSubtract; + case 3: return MTL::BlendOperationMin; + case 4: return MTL::BlendOperationMax; + case 5: return MTL::BlendOperationAdd; // Unused (same as 0) + case 6: return MTL::BlendOperationAdd; // Unused (same as 0) + case 7: return MTL::BlendOperationAdd; // Unused (same as 0) + default: panic("Unknown blend operation %u", op); + } + + return MTL::BlendOperationAdd; +} + +inline MTL::BlendFactor toMTLBlendFactor(u8 factor) { + switch (factor) { + case 0: return MTL::BlendFactorZero; + case 1: return MTL::BlendFactorOne; + case 2: return MTL::BlendFactorSourceColor; + case 3: return MTL::BlendFactorOneMinusSourceColor; + case 4: return MTL::BlendFactorDestinationColor; + case 5: return MTL::BlendFactorOneMinusDestinationColor; + case 6: return MTL::BlendFactorSourceAlpha; + case 7: return MTL::BlendFactorOneMinusSourceAlpha; + case 8: return MTL::BlendFactorDestinationAlpha; + case 9: return MTL::BlendFactorOneMinusDestinationAlpha; + case 10: return MTL::BlendFactorBlendColor; + case 11: return MTL::BlendFactorOneMinusBlendColor; + case 12: return MTL::BlendFactorBlendAlpha; + case 13: return MTL::BlendFactorOneMinusBlendAlpha; + case 14: return MTL::BlendFactorSourceAlphaSaturated; + case 15: return MTL::BlendFactorOne; // Undocumented + default: panic("Unknown blend factor %u", factor); + } + + return MTL::BlendFactorOne; +} + +inline MTL::StencilOperation toMTLStencilOperation(u8 op) { + switch (op) { + case 0: return MTL::StencilOperationKeep; + case 1: return MTL::StencilOperationZero; + case 2: return MTL::StencilOperationReplace; + case 3: return MTL::StencilOperationIncrementClamp; + case 4: return MTL::StencilOperationDecrementClamp; + case 5: return MTL::StencilOperationInvert; + case 6: return MTL::StencilOperationIncrementWrap; + case 7: return MTL::StencilOperationDecrementWrap; + default: panic("Unknown stencil operation %u", op); + } + + return MTL::StencilOperationKeep; +} + +inline MTL::PrimitiveType toMTLPrimitiveType(PrimType primType) { + switch (primType) { + case PrimType::TriangleList: return MTL::PrimitiveTypeTriangle; + case PrimType::TriangleStrip: return MTL::PrimitiveTypeTriangleStrip; + case PrimType::TriangleFan: + Helpers::warn("Triangle fans are not supported on Metal, using triangles instead"); + return MTL::PrimitiveTypeTriangle; + case PrimType::GeometryPrimitive: + //Helpers::warn("Geometry primitives are not yet, using triangles instead"); + return MTL::PrimitiveTypeTriangle; + } +} + +inline MTL::SamplerAddressMode toMTLSamplerAddressMode(u8 addrMode) { + switch (addrMode) { + case 0: return MTL::SamplerAddressModeClampToEdge; + case 1: return MTL::SamplerAddressModeClampToBorderColor; + case 2: return MTL::SamplerAddressModeRepeat; + case 3: return MTL::SamplerAddressModeMirrorRepeat; + case 4: return MTL::SamplerAddressModeClampToEdge; + case 5: return MTL::SamplerAddressModeClampToBorderColor; + case 6: return MTL::SamplerAddressModeRepeat; + case 7: return MTL::SamplerAddressModeRepeat; + default: panic("Unknown sampler address mode %u", addrMode); + } + + return MTL::SamplerAddressModeClampToEdge; +} + +} // namespace PICA diff --git a/include/renderer_mtl/renderer_mtl.hpp b/include/renderer_mtl/renderer_mtl.hpp new file mode 100644 index 00000000..9ba0937a --- /dev/null +++ b/include/renderer_mtl/renderer_mtl.hpp @@ -0,0 +1,189 @@ +#include +#include + +#include "renderer.hpp" +#include "mtl_texture.hpp" +#include "mtl_render_target.hpp" +#include "mtl_blit_pipeline_cache.hpp" +#include "mtl_draw_pipeline_cache.hpp" +#include "mtl_depth_stencil_cache.hpp" +#include "mtl_vertex_buffer_cache.hpp" +// HACK: use the OpenGL cache +#include "../renderer_gl/surface_cache.hpp" + +class GPU; + +struct Color4 { + float r, g, b, a; +}; + +class RendererMTL final : public Renderer { + public: + RendererMTL(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs); + ~RendererMTL() override; + + void reset() override; + void display() override; + void initGraphicsContext(SDL_Window* window) override; + void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override; + void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override; + void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override; + void drawVertices(PICA::PrimType primType, std::span vertices) override; + void screenshot(const std::string& name) override; + void deinitGraphicsContext() override; + +#ifdef PANDA3DS_FRONTEND_QT + virtual void initGraphicsContext([[maybe_unused]] GL::Context* context) override {} +#endif + + private: + CA::MetalLayer* metalLayer; + + MTL::Device* device; + MTL::CommandQueue* commandQueue; + + // Libraries + MTL::Library* library; + + // Caches + SurfaceCache colorRenderTargetCache; + SurfaceCache depthStencilRenderTargetCache; + SurfaceCache textureCache; + Metal::BlitPipelineCache blitPipelineCache; + Metal::DrawPipelineCache drawPipelineCache; + Metal::DepthStencilCache depthStencilCache; + Metal::VertexBufferCache vertexBufferCache; + + // Objects + MTL::SamplerState* nearestSampler; + MTL::SamplerState* linearSampler; + MTL::Texture* lutTexture; + MTL::DepthStencilState* defaultDepthStencilState; + + // Pipelines + MTL::RenderPipelineState* displayPipeline; + MTL::RenderPipelineState* copyToLutTexturePipeline; + + // Clears + std::map colorClearOps; + std::map depthClearOps; + std::map stencilClearOps; + + // Active state + MTL::CommandBuffer* commandBuffer = nullptr; + MTL::RenderCommandEncoder* renderCommandEncoder = nullptr; + MTL::Texture* lastColorTexture = nullptr; + MTL::Texture* lastDepthTexture = nullptr; + + // Debug + std::string nextRenderPassName; + + void createCommandBufferIfNeeded() { + if (!commandBuffer) { + commandBuffer = commandQueue->commandBuffer(); + } + } + + void endRenderPass() { + if (renderCommandEncoder) { + renderCommandEncoder->endEncoding(); + renderCommandEncoder = nullptr; + } + } + + void beginRenderPassIfNeeded(MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture = nullptr) { + createCommandBufferIfNeeded(); + + if (doesClears || !renderCommandEncoder || colorTexture != lastColorTexture || (depthTexture != lastDepthTexture && !(lastDepthTexture && !depthTexture))) { + endRenderPass(); + + renderCommandEncoder = commandBuffer->renderCommandEncoder(renderPassDescriptor); + renderCommandEncoder->setLabel(toNSString(nextRenderPassName)); + + lastColorTexture = colorTexture; + lastDepthTexture = depthTexture; + } + + renderPassDescriptor->release(); + } + + void commitCommandBuffer() { + if (renderCommandEncoder) { + renderCommandEncoder->endEncoding(); + renderCommandEncoder->release(); + renderCommandEncoder = nullptr; + } + if (commandBuffer) { + commandBuffer->commit(); + commandBuffer->release(); + commandBuffer = nullptr; + } + } + + template + inline void clearAttachment(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, ClearDataT clearData, GetAttachmentT getAttachment, SetClearDataT setClearData) { + bool beginRenderPass = (renderPassDescriptor == nullptr); + if (!renderPassDescriptor) { + renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); + } + + AttachmentT* attachment = getAttachment(renderPassDescriptor); + attachment->setTexture(texture); + setClearData(attachment, clearData); + attachment->setLoadAction(MTL::LoadActionClear); + attachment->setStoreAction(MTL::StoreActionStore); + + if (beginRenderPass) { + if (std::is_same::value) + beginRenderPassIfNeeded(renderPassDescriptor, true, texture); + else + beginRenderPassIfNeeded(renderPassDescriptor, true, nullptr, texture); + } + } + + template + inline bool clearAttachment(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, std::map& clearOps, GetAttachmentT getAttachment, SetClearDataT setClearData) { + auto it = clearOps.find(texture); + if (it != clearOps.end()) { + clearAttachment(renderPassDescriptor, texture, it->second, getAttachment, setClearData); + clearOps.erase(it); + return true; + } + + if (renderPassDescriptor) { + AttachmentT* attachment = getAttachment(renderPassDescriptor); + attachment->setTexture(texture); + attachment->setLoadAction(MTL::LoadActionLoad); + attachment->setStoreAction(MTL::StoreActionStore); + } + + return false; + } + + bool clearColor(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { + return clearAttachment(renderPassDescriptor, texture, colorClearOps, [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->colorAttachments()->object(0); }, [](auto attachment, auto& color) { + attachment->setClearColor(MTL::ClearColor(color.r, color.g, color.b, color.a)); + }); + } + + bool clearDepth(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { + return clearAttachment(renderPassDescriptor, texture, depthClearOps, [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->depthAttachment(); }, [](auto attachment, auto& depth) { + attachment->setClearDepth(depth); + }); + } + + bool clearStencil(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { + return clearAttachment(renderPassDescriptor, texture, stencilClearOps, [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->stencilAttachment(); }, [](auto attachment, auto& stencil) { + attachment->setClearStencil(stencil); + }); + } + + std::optional getColorRenderTarget(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true); + Metal::DepthStencilRenderTarget& getDepthRenderTarget(); + Metal::Texture& getTexture(Metal::Texture& tex); + void setupTextureEnvState(MTL::RenderCommandEncoder* encoder); + void bindTexturesToSlots(MTL::RenderCommandEncoder* encoder); + void updateLightingLUT(MTL::RenderCommandEncoder* encoder); + void updateFogLUT(MTL::RenderCommandEncoder* encoder); + void textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect& srcRect, const Math::Rect& destRect); +}; diff --git a/src/core/renderer_mtl/metal_cpp_impl.cpp b/src/core/renderer_mtl/metal_cpp_impl.cpp new file mode 100644 index 00000000..7fa7137b --- /dev/null +++ b/src/core/renderer_mtl/metal_cpp_impl.cpp @@ -0,0 +1,6 @@ +#define NS_PRIVATE_IMPLEMENTATION +#define CA_PRIVATE_IMPLEMENTATION +#define MTL_PRIVATE_IMPLEMENTATION +#include +#include +#include diff --git a/src/core/renderer_mtl/mtl_etc1.cpp b/src/core/renderer_mtl/mtl_etc1.cpp new file mode 100644 index 00000000..a414df3c --- /dev/null +++ b/src/core/renderer_mtl/mtl_etc1.cpp @@ -0,0 +1,124 @@ +#include +#include "colour.hpp" +#include "renderer_mtl/renderer_mtl.hpp" +#include "renderer_mtl/mtl_texture.hpp" + +using namespace Helpers; + +namespace Metal { + +static constexpr u32 signExtend3To32(u32 val) { + return (u32)(s32(val) << 29 >> 29); +} + +u32 Texture::getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span data) { + // Pixel offset of the 8x8 tile based on u, v and the width of the texture + u32 offs = ((u & ~7) * 8) + ((v & ~7) * width); + if (!hasAlpha) + offs >>= 1; + + // In-tile offsets for u/v + u &= 7; + v &= 7; + + // ETC1(A4) also subdivide the 8x8 tile to 4 4x4 tiles + // Each tile is 8 bytes for ETC1, but since ETC1A4 has 4 alpha bits per pixel, that becomes 16 bytes + const u32 subTileSize = hasAlpha ? 16 : 8; + const u32 subTileIndex = (u / 4) + 2 * (v / 4); // Which of the 4 subtiles is this texel in? + + // In-subtile offsets for u/v + u &= 3; + v &= 3; + offs += subTileSize * subTileIndex; + + u32 alpha; + const u64* ptr = reinterpret_cast(data.data() + offs); // Cast to u64* + + if (hasAlpha) { + // First 64 bits of the 4x4 subtile are alpha data + const u64 alphaData = *ptr++; + alpha = Colour::convert4To8Bit((alphaData >> (4 * (u * 4 + v))) & 0xf); + } + else { + alpha = 0xff; // ETC1 without alpha uses ff for every pixel + } + + // Next 64 bits of the subtile are colour data + u64 colourData = *ptr; + return decodeETC(alpha, u, v, colourData); +} + +u32 Texture::decodeETC(u32 alpha, u32 u, u32 v, u64 colourData) { + static constexpr u32 modifiers[8][2] = { + { 2, 8 }, + { 5, 17 }, + { 9, 29 }, + { 13, 42 }, + { 18, 60 }, + { 24, 80 }, + { 33, 106 }, + { 47, 183 }, + }; + + // Parse colour data for 4x4 block + const u32 subindices = getBits<0, 16, u32>(colourData); + const u32 negationFlags = getBits<16, 16, u32>(colourData); + const bool flip = getBit<32>(colourData); + const bool diffMode = getBit<33>(colourData); + + // Note: index1 is indeed stored on the higher bits, with index2 in the lower bits + const u32 tableIndex1 = getBits<37, 3, u32>(colourData); + const u32 tableIndex2 = getBits<34, 3, u32>(colourData); + const u32 texelIndex = u * 4 + v; // Index of the texel in the block + + if (flip) + std::swap(u, v); + + s32 r, g, b; + if (diffMode) { + r = getBits<59, 5, s32>(colourData); + g = getBits<51, 5, s32>(colourData); + b = getBits<43, 5, s32>(colourData); + + if (u >= 2) { + r += signExtend3To32(getBits<56, 3, u32>(colourData)); + g += signExtend3To32(getBits<48, 3, u32>(colourData)); + b += signExtend3To32(getBits<40, 3, u32>(colourData)); + } + + // Expand from 5 to 8 bits per channel + r = Colour::convert5To8Bit(r); + g = Colour::convert5To8Bit(g); + b = Colour::convert5To8Bit(b); + } else { + if (u < 2) { + r = getBits<60, 4, s32>(colourData); + g = getBits<52, 4, s32>(colourData); + b = getBits<44, 4, s32>(colourData); + } else { + r = getBits<56, 4, s32>(colourData); + g = getBits<48, 4, s32>(colourData); + b = getBits<40, 4, s32>(colourData); + } + + // Expand from 4 to 8 bits per channel + r = Colour::convert4To8Bit(r); + g = Colour::convert4To8Bit(g); + b = Colour::convert4To8Bit(b); + } + + const u32 index = (u < 2) ? tableIndex1 : tableIndex2; + s32 modifier = modifiers[index][(subindices >> texelIndex) & 1]; + + if (((negationFlags >> texelIndex) & 1) != 0) { + modifier = -modifier; + } + + r = std::clamp(r + modifier, 0, 255); + g = std::clamp(g + modifier, 0, 255); + b = std::clamp(b + modifier, 0, 255); + + return (alpha << 24) | (u32(b) << 16) | (u32(g) << 8) | u32(r); +} + +} // namespace Metal diff --git a/src/core/renderer_mtl/mtl_texture.cpp b/src/core/renderer_mtl/mtl_texture.cpp new file mode 100644 index 00000000..b61c5502 --- /dev/null +++ b/src/core/renderer_mtl/mtl_texture.cpp @@ -0,0 +1,312 @@ +#include "renderer_mtl/mtl_texture.hpp" +#include "renderer_mtl/objc_helper.hpp" +#include "colour.hpp" +#include + +using namespace Helpers; + +namespace Metal { + +void Texture::allocate() { + formatInfo = PICA::getPixelFormatInfo(format); + + MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init(); + descriptor->setTextureType(MTL::TextureType2D); + descriptor->setPixelFormat(formatInfo.pixelFormat); + descriptor->setWidth(size.u()); + descriptor->setHeight(size.v()); + descriptor->setUsage(MTL::TextureUsageShaderRead); + descriptor->setStorageMode(MTL::StorageModeShared); // TODO: use private + staging buffers? + texture = device->newTexture(descriptor); + texture->setLabel(toNSString("Texture " + std::string(PICA::textureFormatToString(format)) + " " + std::to_string(size.u()) + "x" + std::to_string(size.v()))); + descriptor->release(); + + setNewConfig(config); +} + +// Set the texture's configuration, which includes min/mag filters, wrapping S/T modes, and so on +void Texture::setNewConfig(u32 cfg) { + config = cfg; + + if (sampler) { + sampler->release(); + } + + const auto magFilter = (cfg & 0x2) != 0 ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest; + const auto minFilter = (cfg & 0x4) != 0 ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest; + const auto wrapT = PICA::toMTLSamplerAddressMode(getBits<8, 3>(cfg)); + const auto wrapS = PICA::toMTLSamplerAddressMode(getBits<12, 3>(cfg)); + + MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init(); + samplerDescriptor->setMinFilter(minFilter); + samplerDescriptor->setMagFilter(magFilter); + samplerDescriptor->setSAddressMode(wrapS); + samplerDescriptor->setTAddressMode(wrapT); + + samplerDescriptor->setLabel(toNSString("Sampler")); + sampler = device->newSamplerState(samplerDescriptor); + samplerDescriptor->release(); +} + +void Texture::free() { + valid = false; + + if (texture) { + texture->release(); + } + if (sampler) { + sampler->release(); + } +} + +u64 Texture::sizeInBytes() { + u64 pixelCount = u64(size.x()) * u64(size.y()); + + switch (format) { + case PICA::TextureFmt::RGBA8: // 4 bytes per pixel + return pixelCount * 4; + + case PICA::TextureFmt::RGB8: // 3 bytes per pixel + return pixelCount * 3; + + case PICA::TextureFmt::RGBA5551: // 2 bytes per pixel + case PICA::TextureFmt::RGB565: + case PICA::TextureFmt::RGBA4: + case PICA::TextureFmt::RG8: + case PICA::TextureFmt::IA8: + return pixelCount * 2; + + case PICA::TextureFmt::A8: // 1 byte per pixel + case PICA::TextureFmt::I8: + case PICA::TextureFmt::IA4: + return pixelCount; + + case PICA::TextureFmt::I4: // 4 bits per pixel + case PICA::TextureFmt::A4: + return pixelCount / 2; + + case PICA::TextureFmt::ETC1: // Compressed formats + case PICA::TextureFmt::ETC1A4: { + // Number of 4x4 tiles + const u64 tileCount = pixelCount / 16; + // Tiles are 8 bytes each on ETC1 and 16 bytes each on ETC1A4 + const u64 tileSize = format == PICA::TextureFmt::ETC1 ? 8 : 16; + return tileCount * tileSize; + } + + default: + Helpers::panic("[PICA] Attempted to get size of invalid texture type"); + } +} + +// u and v are the UVs of the relevant texel +// Texture data is stored interleaved in Morton order, ie in a Z - order curve as shown here +// https://en.wikipedia.org/wiki/Z-order_curve +// Textures are split into 8x8 tiles.This function returns the in - tile offset depending on the u & v of the texel +// The in - tile offset is the sum of 2 offsets, one depending on the value of u % 8 and the other on the value of y % 8 +// As documented in this picture https ://en.wikipedia.org/wiki/File:Moser%E2%80%93de_Bruijn_addition.svg +u32 Texture::mortonInterleave(u32 u, u32 v) { + static constexpr u32 xOffsets[] = { 0, 1, 4, 5, 16, 17, 20, 21 }; + static constexpr u32 yOffsets[] = { 0, 2, 8, 10, 32, 34, 40, 42 }; + + return xOffsets[u & 7] + yOffsets[v & 7]; +} + +// Get the byte offset of texel (u, v) in the texture +u32 Texture::getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel) { + u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to + offset += mortonInterleave(u, v); // Add the in-tile offset of the texel + + return offset * bytesPerPixel; +} + +// Same as the above code except we need to divide by 2 because 4 bits is smaller than a byte +u32 Texture::getSwizzledOffset_4bpp(u32 u, u32 v, u32 width) { + u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to + offset += mortonInterleave(u, v); // Add the in-tile offset of the texel + + return offset / 2; +} + +u8 Texture::decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { + switch (fmt) { + case PICA::TextureFmt::A4: { + const u32 offset = getSwizzledOffset_4bpp(u, v, size.u()); + + // For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates + u8 alpha = data[offset] >> ((u % 2) ? 4 : 0); + alpha = Colour::convert4To8Bit(getBits<0, 4>(alpha)); + + // A8 + return alpha; + } + + case PICA::TextureFmt::A8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 1); + const u8 alpha = data[offset]; + + // A8 + return alpha; + } + + default: + Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); + } +} + +u16 Texture::decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { + switch (fmt) { + case PICA::TextureFmt::RG8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 2); + constexpr u8 b = 0; + const u8 g = data[offset]; + const u8 r = data[offset + 1]; + + // RG8 + return (g << 8) | r; + } + + case PICA::TextureFmt::RGBA4: { + u32 offset = getSwizzledOffset(u, v, size.u(), 2); + u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); + + u8 alpha = getBits<0, 4, u8>(texel); + u8 b = getBits<4, 4, u8>(texel); + u8 g = getBits<8, 4, u8>(texel); + u8 r = getBits<12, 4, u8>(texel); + + // ABGR4 + return (r << 12) | (g << 8) | (b << 4) | alpha; + } + + case PICA::TextureFmt::RGBA5551: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 2); + const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); + + u8 alpha = getBit<0>(texel) ? 0xff : 0; + u8 b = getBits<1, 5, u8>(texel); + u8 g = getBits<6, 5, u8>(texel); + u8 r = getBits<11, 5, u8>(texel); + + // BGR5A1 + return (alpha << 15) | (r << 10) | (g << 5) | b; + } + + case PICA::TextureFmt::RGB565: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 2); + const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); + + const u8 b = getBits<0, 5, u8>(texel); + const u8 g = getBits<5, 6, u8>(texel); + const u8 r = getBits<11, 5, u8>(texel); + + // B5G6R5 + return (r << 11) | (g << 5) | b; + } + + case PICA::TextureFmt::IA4: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 1); + const u8 texel = data[offset]; + const u8 alpha = texel & 0xf; + const u8 intensity = texel >> 4; + + // ABGR4 + return (intensity << 12) | (intensity << 8) | (intensity << 4) | alpha; + } + + case PICA::TextureFmt::I4: { + u32 offset = getSwizzledOffset_4bpp(u, v, size.u()); + + // For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates + u8 intensity = data[offset] >> ((u % 2) ? 4 : 0); + intensity = getBits<0, 4>(intensity); + + // ABGR4 + return (intensity << 12) | (intensity << 8) | (intensity << 4) | 0xff; + } + + default: + Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); + } +} + +u32 Texture::decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { + switch (fmt) { + case PICA::TextureFmt::RGB8: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 3); + const u8 b = data[offset]; + const u8 g = data[offset + 1]; + const u8 r = data[offset + 2]; + + // RGBA8 + return (0xff << 24) | (b << 16) | (g << 8) | r; + } + + case PICA::TextureFmt::RGBA8: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 4); + const u8 alpha = data[offset]; + const u8 b = data[offset + 1]; + const u8 g = data[offset + 2]; + const u8 r = data[offset + 3]; + + // RGBA8 + return (alpha << 24) | (b << 16) | (g << 8) | r; + } + + case PICA::TextureFmt::I8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 1); + const u8 intensity = data[offset]; + + // RGBA8 + return (0xff << 24) | (intensity << 16) | (intensity << 8) | intensity; + } + + case PICA::TextureFmt::IA8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 2); + + // Same as I8 except each pixel gets its own alpha value too + const u8 alpha = data[offset]; + const u8 intensity = data[offset + 1]; + + // RGBA8 + return (alpha << 24) | (intensity << 16) | (intensity << 8) | intensity; + } + + case PICA::TextureFmt::ETC1: return getTexelETC(false, u, v, size.u(), data); + case PICA::TextureFmt::ETC1A4: return getTexelETC(true, u, v, size.u(), data); + + default: + Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); + } +} + +void Texture::decodeTexture(std::span data) { + std::vector decoded; + decoded.reserve(u64(size.u()) * u64(size.v()) * formatInfo.bytesPerTexel); + + // Decode texels line by line + for (u32 v = 0; v < size.v(); v++) { + for (u32 u = 0; u < size.u(); u++) { + if (formatInfo.bytesPerTexel == 1) { + u8 texel = decodeTexelU8(u, v, format, data); + decoded.push_back(texel); + } else if (formatInfo.bytesPerTexel == 2) { + u16 texel = decodeTexelU16(u, v, format, data); + decoded.push_back((texel & 0x00ff) >> 0); + decoded.push_back((texel & 0xff00) >> 8); + } else if (formatInfo.bytesPerTexel == 4) { + u32 texel = decodeTexelU32(u, v, format, data); + decoded.push_back((texel & 0x000000ff) >> 0); + decoded.push_back((texel & 0x0000ff00) >> 8); + decoded.push_back((texel & 0x00ff0000) >> 16); + decoded.push_back((texel & 0xff000000) >> 24); + } else { + Helpers::panic("[Texture::decodeTexture] Unimplemented bytesPerTexel (%u)", formatInfo.bytesPerTexel); + } + } + } + + texture->replaceRegion(MTL::Region(0, 0, size.u(), size.v()), 0, 0, decoded.data(), formatInfo.bytesPerTexel * size.u(), 0); +} + +} // namespace Metal diff --git a/src/core/renderer_mtl/objc_helper.mm b/src/core/renderer_mtl/objc_helper.mm new file mode 100644 index 00000000..eeea56a0 --- /dev/null +++ b/src/core/renderer_mtl/objc_helper.mm @@ -0,0 +1,12 @@ +#include "renderer_mtl/objc_helper.hpp" + +// TODO: change the include +#import + +namespace Metal { + +dispatch_data_t createDispatchData(const void* data, size_t size) { + return dispatch_data_create(data, size, dispatch_get_global_queue(0, 0), ^{}); +} + +} // namespace Metal diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp new file mode 100644 index 00000000..10bca5dd --- /dev/null +++ b/src/core/renderer_mtl/renderer_mtl.cpp @@ -0,0 +1,774 @@ +#include "PICA/gpu.hpp" +#include "renderer_mtl/renderer_mtl.hpp" +#include "renderer_mtl/objc_helper.hpp" + +#include +#include + +#include "SDL_metal.h" + +using namespace PICA; + +CMRC_DECLARE(RendererMTL); + +const u16 LIGHT_LUT_TEXTURE_WIDTH = 256; + +// HACK: redefinition... +PICA::ColorFmt ToColorFormat(u32 format) { + switch (format) { + case 2: return PICA::ColorFmt::RGB565; + case 3: return PICA::ColorFmt::RGBA5551; + default: return static_cast(format); + } +} + +MTL::Library* loadLibrary(MTL::Device* device, const cmrc::file& shaderSource) { + //MTL::CompileOptions* compileOptions = MTL::CompileOptions::alloc()->init(); + NS::Error* error = nullptr; + MTL::Library* library = device->newLibrary(Metal::createDispatchData(shaderSource.begin(), shaderSource.size()), &error); + //MTL::Library* library = device->newLibrary(NS::String::string(source.c_str(), NS::ASCIIStringEncoding), compileOptions, &error); + if (error) { + Helpers::panic("Error loading shaders: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + + return library; +} + +RendererMTL::RendererMTL(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs) + : Renderer(gpu, internalRegs, externalRegs) {} +RendererMTL::~RendererMTL() {} + +void RendererMTL::reset() { + vertexBufferCache.reset(); + depthStencilCache.reset(); + drawPipelineCache.reset(); + blitPipelineCache.reset(); + textureCache.reset(); + depthStencilRenderTargetCache.reset(); + colorRenderTargetCache.reset(); +} + +void RendererMTL::display() { + CA::MetalDrawable* drawable = metalLayer->nextDrawable(); + if (!drawable) { + return; + } + + using namespace PICA::ExternalRegs; + + // Top screen + const u32 topActiveFb = externalRegs[Framebuffer0Select] & 1; + const u32 topScreenAddr = externalRegs[topActiveFb == 0 ? Framebuffer0AFirstAddr : Framebuffer0ASecondAddr]; + auto topScreen = colorRenderTargetCache.findFromAddress(topScreenAddr); + + if (topScreen) { + clearColor(nullptr, topScreen->get().texture); + } + + // Bottom screen + const u32 bottomActiveFb = externalRegs[Framebuffer1Select] & 1; + const u32 bottomScreenAddr = externalRegs[bottomActiveFb == 0 ? Framebuffer1AFirstAddr : Framebuffer1ASecondAddr]; + auto bottomScreen = colorRenderTargetCache.findFromAddress(bottomScreenAddr); + + if (bottomScreen) { + clearColor(nullptr, bottomScreen->get().texture); + } + + // -------- Draw -------- + commandBuffer->pushDebugGroup(toNSString("Display")); + + MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); + MTL::RenderPassColorAttachmentDescriptor* colorAttachment = renderPassDescriptor->colorAttachments()->object(0); + colorAttachment->setTexture(drawable->texture()); + colorAttachment->setLoadAction(MTL::LoadActionClear); + colorAttachment->setClearColor(MTL::ClearColor{0.0f, 0.0f, 0.0f, 1.0f}); + colorAttachment->setStoreAction(MTL::StoreActionStore); + + nextRenderPassName = "Display"; + beginRenderPassIfNeeded(renderPassDescriptor, false, drawable->texture()); + renderCommandEncoder->setRenderPipelineState(displayPipeline); + renderCommandEncoder->setFragmentSamplerState(nearestSampler, 0); + + // Top screen + if (topScreen) { + renderCommandEncoder->setViewport(MTL::Viewport{0, 0, 400, 240, 0.0f, 1.0f}); + renderCommandEncoder->setFragmentTexture(topScreen->get().texture, 0); + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); + } + + // Bottom screen + if (bottomScreen) { + renderCommandEncoder->setViewport(MTL::Viewport{40, 240, 320, 240, 0.0f, 1.0f}); + renderCommandEncoder->setFragmentTexture(bottomScreen->get().texture, 0); + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); + } + + endRenderPass(); + + commandBuffer->presentDrawable(drawable); + + commandBuffer->popDebugGroup(); + + commitCommandBuffer(); + + // Inform the vertex buffer cache that the frame ended + vertexBufferCache.endFrame(); + + // Release + drawable->release(); +} + +void RendererMTL::initGraphicsContext(SDL_Window* window) { + // TODO: what should be the type of the view? + void* view = SDL_Metal_CreateView(window); + metalLayer = (CA::MetalLayer*)SDL_Metal_GetLayer(view); + device = MTL::CreateSystemDefaultDevice(); + metalLayer->setDevice(device); + commandQueue = device->newCommandQueue(); + + // -------- Objects -------- + + // Textures + MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init(); + textureDescriptor->setTextureType(MTL::TextureType2D); + textureDescriptor->setPixelFormat(MTL::PixelFormatRGBA32Float); + textureDescriptor->setWidth(LIGHT_LUT_TEXTURE_WIDTH); + textureDescriptor->setHeight(Lights::LUT_Count + 1); + textureDescriptor->setUsage(MTL::TextureUsageShaderRead | MTL::TextureUsageShaderWrite); + textureDescriptor->setStorageMode(MTL::StorageModePrivate); + + lutTexture = device->newTexture(textureDescriptor); + lutTexture->setLabel(toNSString("LUT texture")); + textureDescriptor->release(); + + // Samplers + MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init(); + samplerDescriptor->setLabel(toNSString("Sampler (nearest)")); + nearestSampler = device->newSamplerState(samplerDescriptor); + + samplerDescriptor->setMinFilter(MTL::SamplerMinMagFilterLinear); + samplerDescriptor->setMagFilter(MTL::SamplerMinMagFilterLinear); + samplerDescriptor->setLabel(toNSString("Sampler (linear)")); + linearSampler = device->newSamplerState(samplerDescriptor); + + samplerDescriptor->release(); + + // -------- Pipelines -------- + + // Load shaders + auto mtlResources = cmrc::RendererMTL::get_filesystem(); + library = loadLibrary(device, mtlResources.open("metal_shaders.metallib")); + MTL::Library* copyToLutTextureLibrary = loadLibrary(device, mtlResources.open("metal_copy_to_lut_texture.metallib")); + + // Display + MTL::Function* vertexDisplayFunction = library->newFunction(NS::String::string("vertexDisplay", NS::ASCIIStringEncoding)); + MTL::Function* fragmentDisplayFunction = library->newFunction(NS::String::string("fragmentDisplay", NS::ASCIIStringEncoding)); + + MTL::RenderPipelineDescriptor* displayPipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init(); + displayPipelineDescriptor->setVertexFunction(vertexDisplayFunction); + displayPipelineDescriptor->setFragmentFunction(fragmentDisplayFunction); + auto* displayColorAttachment = displayPipelineDescriptor->colorAttachments()->object(0); + displayColorAttachment->setPixelFormat(MTL::PixelFormat::PixelFormatBGRA8Unorm); + + NS::Error* error = nullptr; + displayPipelineDescriptor->setLabel(toNSString("Display pipeline")); + displayPipeline = device->newRenderPipelineState(displayPipelineDescriptor, &error); + if (error) { + Helpers::panic("Error creating display pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + displayPipelineDescriptor->release(); + vertexDisplayFunction->release(); + fragmentDisplayFunction->release(); + + // Blit + MTL::Function* vertexBlitFunction = library->newFunction(NS::String::string("vertexBlit", NS::ASCIIStringEncoding)); + MTL::Function* fragmentBlitFunction = library->newFunction(NS::String::string("fragmentBlit", NS::ASCIIStringEncoding)); + + blitPipelineCache.set(device, vertexBlitFunction, fragmentBlitFunction); + + // Draw + MTL::Function* vertexDrawFunction = library->newFunction(NS::String::string("vertexDraw", NS::ASCIIStringEncoding)); + + // -------- Vertex descriptor -------- + MTL::VertexDescriptor* vertexDescriptor = MTL::VertexDescriptor::alloc()->init(); + + // Position + MTL::VertexAttributeDescriptor* positionAttribute = vertexDescriptor->attributes()->object(0); + positionAttribute->setFormat(MTL::VertexFormatFloat4); + positionAttribute->setOffset(offsetof(Vertex, s.positions)); + positionAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // Quaternion + MTL::VertexAttributeDescriptor* quaternionAttribute = vertexDescriptor->attributes()->object(1); + quaternionAttribute->setFormat(MTL::VertexFormatFloat4); + quaternionAttribute->setOffset(offsetof(Vertex, s.quaternion)); + quaternionAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // Color + MTL::VertexAttributeDescriptor* colorAttribute = vertexDescriptor->attributes()->object(2); + colorAttribute->setFormat(MTL::VertexFormatFloat4); + colorAttribute->setOffset(offsetof(Vertex, s.colour)); + colorAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // Texture coordinate 0 + MTL::VertexAttributeDescriptor* texCoord0Attribute = vertexDescriptor->attributes()->object(3); + texCoord0Attribute->setFormat(MTL::VertexFormatFloat2); + texCoord0Attribute->setOffset(offsetof(Vertex, s.texcoord0)); + texCoord0Attribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // Texture coordinate 1 + MTL::VertexAttributeDescriptor* texCoord1Attribute = vertexDescriptor->attributes()->object(4); + texCoord1Attribute->setFormat(MTL::VertexFormatFloat2); + texCoord1Attribute->setOffset(offsetof(Vertex, s.texcoord1)); + texCoord1Attribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // Texture coordinate 0 W + MTL::VertexAttributeDescriptor* texCoord0WAttribute = vertexDescriptor->attributes()->object(5); + texCoord0WAttribute->setFormat(MTL::VertexFormatFloat); + texCoord0WAttribute->setOffset(offsetof(Vertex, s.texcoord0_w)); + texCoord0WAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // View + MTL::VertexAttributeDescriptor* viewAttribute = vertexDescriptor->attributes()->object(6); + viewAttribute->setFormat(MTL::VertexFormatFloat3); + viewAttribute->setOffset(offsetof(Vertex, s.view)); + viewAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // Texture coordinate 2 + MTL::VertexAttributeDescriptor* texCoord2Attribute = vertexDescriptor->attributes()->object(7); + texCoord2Attribute->setFormat(MTL::VertexFormatFloat2); + texCoord2Attribute->setOffset(offsetof(Vertex, s.texcoord2)); + texCoord2Attribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + MTL::VertexBufferLayoutDescriptor* vertexBufferLayout = vertexDescriptor->layouts()->object(VERTEX_BUFFER_BINDING_INDEX); + vertexBufferLayout->setStride(sizeof(Vertex)); + vertexBufferLayout->setStepFunction(MTL::VertexStepFunctionPerVertex); + vertexBufferLayout->setStepRate(1); + + drawPipelineCache.set(device, library, vertexDrawFunction, vertexDescriptor); + + // Copy to LUT texture + MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init(); + constants->setConstantValue(&LIGHT_LUT_TEXTURE_WIDTH, MTL::DataTypeUShort, NS::UInteger(0)); + + error = nullptr; + MTL::Function* vertexCopyToLutTextureFunction = copyToLutTextureLibrary->newFunction(NS::String::string("vertexCopyToLutTexture", NS::ASCIIStringEncoding), constants, &error); + if (error) { + Helpers::panic("Error creating copy_to_lut_texture vertex function: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + constants->release(); + + MTL::RenderPipelineDescriptor* copyToLutTexturePipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init(); + copyToLutTexturePipelineDescriptor->setVertexFunction(vertexCopyToLutTextureFunction); + // Disable rasterization + copyToLutTexturePipelineDescriptor->setRasterizationEnabled(false); + + error = nullptr; + copyToLutTexturePipelineDescriptor->setLabel(toNSString("Copy to LUT texture pipeline")); + copyToLutTexturePipeline = device->newRenderPipelineState(copyToLutTexturePipelineDescriptor, &error); + if (error) { + Helpers::panic("Error creating copy_to_lut_texture pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + copyToLutTexturePipelineDescriptor->release(); + vertexCopyToLutTextureFunction->release(); + + // Depth stencil cache + depthStencilCache.set(device); + + // Vertex buffer cache + vertexBufferCache.set(device); + + // -------- Depth stencil state -------- + MTL::DepthStencilDescriptor* depthStencilDescriptor = MTL::DepthStencilDescriptor::alloc()->init(); + depthStencilDescriptor->setLabel(toNSString("Default depth stencil state")); + defaultDepthStencilState = device->newDepthStencilState(depthStencilDescriptor); + depthStencilDescriptor->release(); + + // Release + copyToLutTextureLibrary->release(); +} + +void RendererMTL::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) { + const auto color = colorRenderTargetCache.findFromAddress(startAddress); + if (color) { + const float r = Helpers::getBits<24, 8>(value) / 255.0f; + const float g = Helpers::getBits<16, 8>(value) / 255.0f; + const float b = Helpers::getBits<8, 8>(value) / 255.0f; + const float a = (value & 0xff) / 255.0f; + + colorClearOps[color->get().texture] = {r, g, b, a}; + + return; + } + + const auto depth = depthStencilRenderTargetCache.findFromAddress(startAddress); + if (depth) { + float depthVal; + const auto format = depth->get().format; + if (format == DepthFmt::Depth16) { + depthVal = (value & 0xffff) / 65535.0f; + } else { + depthVal = (value & 0xffffff) / 16777215.0f; + } + + depthClearOps[depth->get().texture] = depthVal; + + if (format == DepthFmt::Depth24Stencil8) { + const u8 stencilVal = value >> 24; + stencilClearOps[depth->get().texture] = stencilVal; + } + + return; + } + + Helpers::warn("[RendererMTL::ClearBuffer] No buffer found!\n"); +} + +void RendererMTL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) { + const u32 inputWidth = inputSize & 0xffff; + const u32 inputHeight = inputSize >> 16; + const auto inputFormat = ToColorFormat(Helpers::getBits<8, 3>(flags)); + const auto outputFormat = ToColorFormat(Helpers::getBits<12, 3>(flags)); + const bool verticalFlip = flags & 1; + const PICA::Scaling scaling = static_cast(Helpers::getBits<24, 2>(flags)); + + u32 outputWidth = outputSize & 0xffff; + u32 outputHeight = outputSize >> 16; + + auto srcFramebuffer = getColorRenderTarget(inputAddr, inputFormat, inputWidth, outputHeight); + nextRenderPassName = "Clear before display transfer"; + clearColor(nullptr, srcFramebuffer->texture); + Math::Rect srcRect = srcFramebuffer->getSubRect(inputAddr, outputWidth, outputHeight); + + if (verticalFlip) { + std::swap(srcRect.bottom, srcRect.top); + } + + // Apply scaling for the destination rectangle. + if (scaling == PICA::Scaling::X || scaling == PICA::Scaling::XY) { + outputWidth >>= 1; + } + + if (scaling == PICA::Scaling::XY) { + outputHeight >>= 1; + } + + auto destFramebuffer = getColorRenderTarget(outputAddr, outputFormat, outputWidth, outputHeight); + // TODO: clear if not blitting to the whole framebuffer + Math::Rect destRect = destFramebuffer->getSubRect(outputAddr, outputWidth, outputHeight); + + if (inputWidth != outputWidth) { + // Helpers::warn("Strided display transfer is not handled correctly!\n"); + } + + textureCopyImpl(*srcFramebuffer, *destFramebuffer, srcRect, destRect); +} + +void RendererMTL::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) { + // Texture copy size is aligned to 16 byte units + const u32 copySize = totalBytes & ~0xf; + if (copySize == 0) { + Helpers::warn("TextureCopy total bytes less than 16!\n"); + return; + } + + // The width and gap are provided in 16-byte units. + const u32 inputWidth = (inputSize & 0xffff) << 4; + const u32 inputGap = (inputSize >> 16) << 4; + const u32 outputWidth = (outputSize & 0xffff) << 4; + const u32 outputGap = (outputSize >> 16) << 4; + + if (inputGap != 0 || outputGap != 0) { + // Helpers::warn("Strided texture copy\n"); + } + + if (inputWidth != outputWidth) { + Helpers::warn("Input width does not match output width, cannot accelerate texture copy!"); + return; + } + + // Texture copy is a raw data copy in PICA, which means no format or tiling information is provided to the engine. + // Depending if the target surface is linear or tiled, games set inputWidth to either the width of the texture or + // the width multiplied by eight (because tiles are stored linearly in memory). + // To properly accelerate this we must examine each surface individually. For now we assume the most common case + // of tiled surface with RGBA8 format. If our assumption does not hold true, we abort the texture copy as inserting + // that surface is not correct. + + // We assume the source surface is tiled and RGBA8. inputWidth is in bytes so divide it + // by eight * sizePerPixel(RGBA8) to convert it to a useable width. + const u32 bpp = sizePerPixel(PICA::ColorFmt::RGBA8); + const u32 copyStride = (inputWidth + inputGap) / (8 * bpp); + const u32 copyWidth = inputWidth / (8 * bpp); + + // inputHeight/outputHeight are typically set to zero so they cannot be used to get the height of the copy region + // in contrast to display transfer. Compute height manually by dividing the copy size with the copy width. The result + // is the number of vertical tiles so multiply that by eight to get the actual copy height. + u32 copyHeight; + if (inputWidth != 0) [[likely]] { + copyHeight = (copySize / inputWidth) * 8; + } else { + copyHeight = 0; + } + + // Find the source surface. + auto srcFramebuffer = getColorRenderTarget(inputAddr, PICA::ColorFmt::RGBA8, copyStride, copyHeight, false); + if (!srcFramebuffer) { + Helpers::warn("RendererGL::TextureCopy failed to locate src framebuffer!\n"); + return; + } + nextRenderPassName = "Clear before texture copy"; + clearColor(nullptr, srcFramebuffer->texture); + + Math::Rect srcRect = srcFramebuffer->getSubRect(inputAddr, copyWidth, copyHeight); + + // Assume the destination surface has the same format. Unless the surfaces have the same block width, + // texture copy does not make sense. + auto destFramebuffer = getColorRenderTarget(outputAddr, srcFramebuffer->format, copyWidth, copyHeight); + // TODO: clear if not blitting to the whole framebuffer + Math::Rect destRect = destFramebuffer->getSubRect(outputAddr, copyWidth, copyHeight); + + textureCopyImpl(*srcFramebuffer, *destFramebuffer, srcRect, destRect); +} + +void RendererMTL::drawVertices(PICA::PrimType primType, std::span vertices) { + // Color + auto colorRenderTarget = getColorRenderTarget(colourBufferLoc, colourBufferFormat, fbSize[0], fbSize[1]); + + // Depth stencil + const u32 depthControl = regs[PICA::InternalRegs::DepthAndColorMask]; + const bool depthStencilWrite = regs[PICA::InternalRegs::DepthBufferWrite]; + const bool depthEnable = depthControl & 0x1; + const bool depthWriteEnable = Helpers::getBit<12>(depthControl); + const u8 depthFunc = Helpers::getBits<4, 3>(depthControl); + const u8 colorMask = Helpers::getBits<8, 4>(depthControl); + + Metal::DepthStencilHash depthStencilHash{false, 1}; + depthStencilHash.stencilConfig = regs[PICA::InternalRegs::StencilTest]; + depthStencilHash.stencilOpConfig = regs[PICA::InternalRegs::StencilOp]; + const bool stencilEnable = Helpers::getBit<0>(depthStencilHash.stencilConfig); + + std::optional depthStencilRenderTarget = std::nullopt; + if (depthEnable) { + depthStencilHash.depthStencilWrite = depthWriteEnable && depthStencilWrite; + depthStencilHash.depthFunc = depthFunc; + depthStencilRenderTarget = getDepthRenderTarget(); + } else { + if (depthWriteEnable) { + depthStencilHash.depthStencilWrite = true; + depthStencilRenderTarget = getDepthRenderTarget(); + } else if (stencilEnable) { + depthStencilRenderTarget = getDepthRenderTarget(); + } + } + + // Depth uniforms + struct { + float depthScale; + float depthOffset; + bool depthMapEnable; + } depthUniforms; + depthUniforms.depthScale = Floats::f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32(); + depthUniforms.depthOffset = Floats::f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32(); + depthUniforms.depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1; + + // -------- Pipeline -------- + Metal::DrawPipelineHash pipelineHash{colorRenderTarget->format, DepthFmt::Unknown1}; + if (depthStencilRenderTarget) { + pipelineHash.depthFmt = depthStencilRenderTarget->format; + } + pipelineHash.fragHash.lightingEnabled = regs[0x008F] & 1; + pipelineHash.fragHash.lightingNumLights = regs[0x01C2] & 0x7; + pipelineHash.fragHash.lightingConfig1 = regs[0x01C4u]; + pipelineHash.fragHash.alphaControl = regs[0x104]; + + // Blending and logic op + pipelineHash.blendEnabled = (regs[PICA::InternalRegs::ColourOperation] & (1 << 8)) != 0; + pipelineHash.colorWriteMask = colorMask; + + u8 logicOp = 3; // Copy, which doesn't do anything + if (pipelineHash.blendEnabled) { + pipelineHash.blendControl = regs[PICA::InternalRegs::BlendFunc]; + } else { + logicOp = Helpers::getBits<0, 4>(regs[PICA::InternalRegs::LogicOp]); + } + + MTL::RenderPipelineState* pipeline = drawPipelineCache.get(pipelineHash); + + // Depth stencil state + MTL::DepthStencilState* depthStencilState = depthStencilCache.get(depthStencilHash); + + // -------- Render -------- + MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); + bool doesClear = clearColor(renderPassDescriptor, colorRenderTarget->texture); + if (depthStencilRenderTarget) { + if (clearDepth(renderPassDescriptor, depthStencilRenderTarget->texture)) + doesClear = true; + if (depthStencilRenderTarget->format == DepthFmt::Depth24Stencil8) { + if (clearStencil(renderPassDescriptor, depthStencilRenderTarget->texture)) + doesClear = true; + } + } + + nextRenderPassName = "Draw vertices"; + beginRenderPassIfNeeded(renderPassDescriptor, doesClear, colorRenderTarget->texture, (depthStencilRenderTarget ? depthStencilRenderTarget->texture : nullptr)); + + // Update the LUT texture if necessary + if (gpu.lightingLUTDirty) { + updateLightingLUT(renderCommandEncoder); + } + if (gpu.fogLUTDirty) { + updateFogLUT(renderCommandEncoder); + } + + renderCommandEncoder->setRenderPipelineState(pipeline); + renderCommandEncoder->setDepthStencilState(depthStencilState); + // If size is < 4KB, use inline vertex data, otherwise use a buffer + if (vertices.size_bytes() < 4 * 1024) { + renderCommandEncoder->setVertexBytes(vertices.data(), vertices.size_bytes(), VERTEX_BUFFER_BINDING_INDEX); + } else { + Metal::BufferHandle buffer = vertexBufferCache.get(vertices.data(), vertices.size_bytes()); + renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, VERTEX_BUFFER_BINDING_INDEX); + } + + // Viewport + const u32 viewportX = regs[PICA::InternalRegs::ViewportXY] & 0x3ff; + const u32 viewportY = (regs[PICA::InternalRegs::ViewportXY] >> 16) & 0x3ff; + const u32 viewportWidth = Floats::f24::fromRaw(regs[PICA::InternalRegs::ViewportWidth] & 0xffffff).toFloat32() * 2.0f; + const u32 viewportHeight = Floats::f24::fromRaw(regs[PICA::InternalRegs::ViewportHeight] & 0xffffff).toFloat32() * 2.0f; + const auto rect = colorRenderTarget->getSubRect(colourBufferLoc, fbSize[0], fbSize[1]); + MTL::Viewport viewport{double(rect.left + viewportX), double(rect.bottom + viewportY), double(viewportWidth), double(viewportHeight), 0.0, 1.0}; + renderCommandEncoder->setViewport(viewport); + + // Blend color + if (pipelineHash.blendEnabled) { + u32 constantColor = regs[PICA::InternalRegs::BlendColour]; + const u8 r = constantColor & 0xff; + const u8 g = Helpers::getBits<8, 8>(constantColor); + const u8 b = Helpers::getBits<16, 8>(constantColor); + const u8 a = Helpers::getBits<24, 8>(constantColor); + + renderCommandEncoder->setBlendColor(r / 255.0f, g / 255.0f, b / 255.0f, a / 255.0f); + } + + // Stencil reference + if (stencilEnable) { + const s8 reference = s8(Helpers::getBits<16, 8>(depthStencilHash.stencilConfig)); // Signed reference value + renderCommandEncoder->setStencilReferenceValue(reference); + } + + // Bind resources + setupTextureEnvState(renderCommandEncoder); + bindTexturesToSlots(renderCommandEncoder); + renderCommandEncoder->setVertexBytes(®s[0x48], (0x200 - 0x48) * sizeof(regs[0]), 0); + renderCommandEncoder->setFragmentBytes(®s[0x48], (0x200 - 0x48) * sizeof(regs[0]), 0); + renderCommandEncoder->setVertexBytes(&depthUniforms, sizeof(depthUniforms), 2); + renderCommandEncoder->setFragmentBytes(&logicOp, sizeof(logicOp), 2); + + renderCommandEncoder->drawPrimitives(toMTLPrimitiveType(primType), NS::UInteger(0), NS::UInteger(vertices.size())); +} + +void RendererMTL::screenshot(const std::string& name) { + // TODO: implement + Helpers::warn("RendererMTL::screenshot not implemented"); +} + +void RendererMTL::deinitGraphicsContext() { + reset(); + + // Release + copyToLutTexturePipeline->release(); + displayPipeline->release(); + defaultDepthStencilState->release(); + lutTexture->release(); + linearSampler->release(); + nearestSampler->release(); + library->release(); + commandQueue->release(); + device->release(); +} + +std::optional RendererMTL::getColorRenderTarget( + u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound +) { + // Try to find an already existing buffer that contains the provided address + // This is a more relaxed check compared to getColourFBO as display transfer/texcopy may refer to + // subrect of a surface and in case of texcopy we don't know the format of the surface. + auto buffer = colorRenderTargetCache.findFromAddress(addr); + if (buffer.has_value()) { + return buffer.value().get(); + } + + if (!createIfnotFound) { + return std::nullopt; + } + + // Otherwise create and cache a new buffer. + Metal::ColorRenderTarget sampleBuffer(device, addr, format, width, height); + + return colorRenderTargetCache.add(sampleBuffer); +} + +Metal::DepthStencilRenderTarget& RendererMTL::getDepthRenderTarget() { + Metal::DepthStencilRenderTarget sampleBuffer(device, depthBufferLoc, depthBufferFormat, fbSize[0], fbSize[1]); + auto buffer = depthStencilRenderTargetCache.find(sampleBuffer); + + if (buffer.has_value()) { + return buffer.value().get(); + } else { + return depthStencilRenderTargetCache.add(sampleBuffer); + } +} + +Metal::Texture& RendererMTL::getTexture(Metal::Texture& tex) { + auto buffer = textureCache.find(tex); + + if (buffer.has_value()) { + return buffer.value().get(); + } else { + const auto textureData = std::span{gpu.getPointerPhys(tex.location), tex.sizeInBytes()}; // Get pointer to the texture data in 3DS memory + Metal::Texture& newTex = textureCache.add(tex); + newTex.decodeTexture(textureData); + + return newTex; + } +} + +void RendererMTL::setupTextureEnvState(MTL::RenderCommandEncoder* encoder) { + static constexpr std::array ioBases = { + PICA::InternalRegs::TexEnv0Source, PICA::InternalRegs::TexEnv1Source, PICA::InternalRegs::TexEnv2Source, + PICA::InternalRegs::TexEnv3Source, PICA::InternalRegs::TexEnv4Source, PICA::InternalRegs::TexEnv5Source, + }; + + struct { + u32 textureEnvSourceRegs[6]; + u32 textureEnvOperandRegs[6]; + u32 textureEnvCombinerRegs[6]; + u32 textureEnvScaleRegs[6]; + } envState; + u32 textureEnvColourRegs[6]; + + for (int i = 0; i < 6; i++) { + const u32 ioBase = ioBases[i]; + + envState.textureEnvSourceRegs[i] = regs[ioBase]; + envState.textureEnvOperandRegs[i] = regs[ioBase + 1]; + envState.textureEnvCombinerRegs[i] = regs[ioBase + 2]; + textureEnvColourRegs[i] = regs[ioBase + 3]; + envState.textureEnvScaleRegs[i] = regs[ioBase + 4]; + } + + encoder->setVertexBytes(&textureEnvColourRegs, sizeof(textureEnvColourRegs), 1); + encoder->setFragmentBytes(&envState, sizeof(envState), 1); +} + +void RendererMTL::bindTexturesToSlots(MTL::RenderCommandEncoder* encoder) { + static constexpr std::array ioBases = { + PICA::InternalRegs::Tex0BorderColor, + PICA::InternalRegs::Tex1BorderColor, + PICA::InternalRegs::Tex2BorderColor, + }; + + for (int i = 0; i < 3; i++) { + if ((regs[PICA::InternalRegs::TexUnitCfg] & (1 << i)) == 0) { + continue; + } + + const size_t ioBase = ioBases[i]; + + const u32 dim = regs[ioBase + 1]; + const u32 config = regs[ioBase + 2]; + const u32 height = dim & 0x7ff; + const u32 width = Helpers::getBits<16, 11>(dim); + const u32 addr = (regs[ioBase + 4] & 0x0FFFFFFF) << 3; + u32 format = regs[ioBase + (i == 0 ? 13 : 5)] & 0xF; + + if (addr != 0) [[likely]] { + Metal::Texture targetTex(device, addr, static_cast(format), width, height, config); + auto tex = getTexture(targetTex); + encoder->setFragmentTexture(tex.texture, i); + encoder->setFragmentSamplerState(tex.sampler ? tex.sampler : nearestSampler, i); + } else { + // TODO: bind a dummy texture? + } + } + + // LUT texture + encoder->setFragmentTexture(lutTexture, 3); + encoder->setFragmentSamplerState(linearSampler, 3); +} + +void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) { + gpu.lightingLUTDirty = false; + std::array lightingLut = {0.0f}; + + for (int i = 0; i < gpu.lightingLUT.size(); i += 2) { + uint64_t value = gpu.lightingLUT[i >> 1] & 0xFFF; + lightingLut[i] = (float)(value << 4) / 65535.0f; + } + + //for (int i = 0; i < Lights::LUT_Count; i++) { + // lutTexture->replaceRegion(MTL::Region(0, 0, LIGHT_LUT_TEXTURE_WIDTH, 1), 0, i, u16_lightinglut.data() + LIGHT_LUT_TEXTURE_WIDTH * i, 0, 0); + //} + + renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline); + renderCommandEncoder->setDepthStencilState(defaultDepthStencilState); + renderCommandEncoder->setVertexTexture(lutTexture, 0); + Metal::BufferHandle buffer = vertexBufferCache.get(lightingLut.data(), sizeof(lightingLut)); + renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0); + u32 arrayOffset = 0; + renderCommandEncoder->setVertexBytes(&arrayOffset, sizeof(u32), 1); + + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), GPU::LightingLutSize); +} + +void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) { + gpu.fogLUTDirty = false; + std::array fogLut = {0.0f}; + + for (int i = 0; i < fogLut.size(); i += 2) { + const uint32_t value = gpu.fogLUT[i >> 1]; + int32_t diff = value & 0x1fff; + diff = (diff << 19) >> 19; // Sign extend the 13-bit value to 32 bits + const float fogDifference = float(diff) / 2048.0f; + const float fogValue = float((value >> 13) & 0x7ff) / 2048.0f; + + fogLut[i] = fogValue; + fogLut[i + 1] = fogDifference; + } + + renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline); + renderCommandEncoder->setDepthStencilState(defaultDepthStencilState); + renderCommandEncoder->setVertexTexture(lutTexture, 0); + //Metal::BufferHandle buffer = vertexBufferCache.get(fogLut.data(), sizeof(fogLut)); + //renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0); + renderCommandEncoder->setVertexBytes(fogLut.data(), sizeof(fogLut), 0); + u32 arrayOffset = (u32)Lights::LUT_Count; + renderCommandEncoder->setVertexBytes(&arrayOffset, sizeof(u32), 1); + + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(128)); +} + +void RendererMTL::textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect& srcRect, const Math::Rect& destRect) { + nextRenderPassName = "Texture copy"; + MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); + // TODO: clearColor sets the load action to load if it didn't find any clear, but that is unnecessary if we are doing a copy to the whole texture + bool doesClear = clearColor(renderPassDescriptor, destFramebuffer.texture); + beginRenderPassIfNeeded(renderPassDescriptor, doesClear, destFramebuffer.texture); + + // Pipeline + Metal::BlitPipelineHash hash{destFramebuffer.format, DepthFmt::Unknown1}; + auto blitPipeline = blitPipelineCache.get(hash); + + renderCommandEncoder->setRenderPipelineState(blitPipeline); + + // Viewport + renderCommandEncoder->setViewport(MTL::Viewport{double(destRect.left), double(destRect.bottom), double(destRect.right - destRect.left), double(destRect.top - destRect.bottom), 0.0, 1.0}); + float srcRectNDC[4] = {srcRect.left / (float)srcFramebuffer.size.u(), srcRect.bottom / (float)srcFramebuffer.size.v(), (srcRect.right - srcRect.left) / (float)srcFramebuffer.size.u(), (srcRect.top - srcRect.bottom) / (float)srcFramebuffer.size.v()}; + + // Bind resources + renderCommandEncoder->setVertexBytes(&srcRectNDC, sizeof(srcRectNDC), 0); + renderCommandEncoder->setFragmentTexture(srcFramebuffer.texture, 0); + renderCommandEncoder->setFragmentSamplerState(nearestSampler, 0); + + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); +} diff --git a/src/host_shaders/metal_copy_to_lut_texture.metal b/src/host_shaders/metal_copy_to_lut_texture.metal new file mode 100644 index 00000000..40a7f50d --- /dev/null +++ b/src/host_shaders/metal_copy_to_lut_texture.metal @@ -0,0 +1,9 @@ +#include +using namespace metal; + +constant ushort lutTextureWidth [[function_constant(0)]]; + +// The copy is done in a vertex shader instead of a compute kernel, since dispatching compute would require ending the render pass +vertex void vertexCopyToLutTexture(uint vid [[vertex_id]], texture2d out [[texture(0)]], constant float2* data [[buffer(0)]], constant uint& arrayOffset [[buffer(1)]]) { + out.write(float4(data[vid], 0.0, 0.0), uint2(vid % lutTextureWidth, arrayOffset + vid / lutTextureWidth)); +} diff --git a/src/host_shaders/metal_shaders.metal b/src/host_shaders/metal_shaders.metal new file mode 100644 index 00000000..95f417c7 --- /dev/null +++ b/src/host_shaders/metal_shaders.metal @@ -0,0 +1,782 @@ +#include +using namespace metal; + +struct BasicVertexOut { + float4 position [[position]]; + float2 uv; +}; + +constant float4 displayPositions[4] = { + float4(-1.0, -1.0, 0.0, 1.0), + float4( 1.0, -1.0, 0.0, 1.0), + float4(-1.0, 1.0, 0.0, 1.0), + float4( 1.0, 1.0, 0.0, 1.0) +}; + +constant float2 displayTexCoord[4] = { + float2(0.0, 1.0), + float2(0.0, 0.0), + float2(1.0, 1.0), + float2(1.0, 0.0) +}; + +vertex BasicVertexOut vertexDisplay(uint vid [[vertex_id]]) { + BasicVertexOut out; + out.position = displayPositions[vid]; + out.uv = displayTexCoord[vid]; + + return out; +} + +fragment float4 fragmentDisplay(BasicVertexOut in [[stage_in]], texture2d tex [[texture(0)]], sampler samplr [[sampler(0)]]) { + return tex.sample(samplr, in.uv); +} + +struct NDCViewport { + float2 offset; + float2 scale; +}; + +vertex BasicVertexOut vertexBlit(uint vid [[vertex_id]], constant NDCViewport& viewport [[buffer(0)]]) { + BasicVertexOut out; + out.uv = float2((vid << 1) & 2, vid & 2); + out.position = float4(out.uv * 2.0 - 1.0, 0.0, 1.0); + out.position.y = -out.position.y; + out.uv = out.uv * viewport.scale + viewport.offset; + + return out; +} + +fragment float4 fragmentBlit(BasicVertexOut in [[stage_in]], texture2d tex [[texture(0)]], sampler samplr [[sampler(0)]]) { + return tex.sample(samplr, in.uv); +} + +struct PicaRegs { + uint regs[0x200 - 0x48]; + + uint read(uint reg) constant { + return regs[reg - 0x48]; + } +}; + +struct VertTEV { + uint textureEnvColor[6]; +}; + +float4 abgr8888ToFloat4(uint abgr) { + const float scale = 1.0 / 255.0; + + return scale * float4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24)); +} + +struct DrawVertexIn { + float4 position [[attribute(0)]]; + float4 quaternion [[attribute(1)]]; + float4 color [[attribute(2)]]; + float2 texCoord0 [[attribute(3)]]; + float2 texCoord1 [[attribute(4)]]; + float texCoord0W [[attribute(5)]]; + float3 view [[attribute(6)]]; + float2 texCoord2 [[attribute(7)]]; +}; + +// Metal cannot return arrays from vertex functions, this is an ugly workaround +struct EnvColor { + float4 c0; + float4 c1; + float4 c2; + float4 c3; + float4 c4; + float4 c5; + + thread float4& operator[](int i) { + switch (i) { + case 0: return c0; + case 1: return c1; + case 2: return c2; + case 3: return c3; + case 4: return c4; + case 5: return c5; + default: return c0; + } + } +}; + +float3 rotateFloat3ByQuaternion(float3 v, float4 q) { + float3 u = q.xyz; + float s = q.w; + + return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v); +} + +// Convert an arbitrary-width floating point literal to an f32 +float decodeFP(uint hex, uint E, uint M) { + uint width = M + E + 1u; + uint bias = 128u - (1u << (E - 1u)); + uint exponent = (hex >> M) & ((1u << E) - 1u); + uint mantissa = hex & ((1u << M) - 1u); + uint sign = (hex >> (E + M)) << 31u; + + if ((hex & ((1u << (width - 1u)) - 1u)) != 0u) { + if (exponent == (1u << E) - 1u) + exponent = 255u; + else + exponent += bias; + hex = sign | (mantissa << (23u - M)) | (exponent << 23u); + } else { + hex = sign; + } + + return as_type(hex); +} + +struct DepthUniforms { + float depthScale; + float depthOffset; + bool depthMapEnable; +}; + +struct DrawVertexOut { + float4 position [[position]]; + float4 quaternion; + float4 color; + float3 texCoord0; + float2 texCoord1; + float2 texCoord2; + float3 view; + float3 normal; + float3 tangent; + float3 bitangent; + EnvColor textureEnvColor [[flat]]; + float4 textureEnvBufferColor [[flat]]; +}; + +struct DrawVertexOutWithClip { + DrawVertexOut out; + float clipDistance [[clip_distance]] [2]; +}; + +// TODO: check this +float transformZ(float z, float w, constant DepthUniforms& depthUniforms) { + z = z / w * depthUniforms.depthScale + depthUniforms.depthOffset; + if (!depthUniforms.depthMapEnable) { + z *= w; + } + + return z * w; +} + +vertex DrawVertexOutWithClip vertexDraw(DrawVertexIn in [[stage_in]], constant PicaRegs& picaRegs [[buffer(0)]], constant VertTEV& tev [[buffer(1)]], constant DepthUniforms& depthUniforms [[buffer(2)]]) { + DrawVertexOut out; + + // Position + out.position = in.position; + // Flip the y position + out.position.y = -out.position.y; + + // Apply depth uniforms + out.position.z = transformZ(out.position.z, out.position.w, depthUniforms); + + // Color + out.color = min(abs(in.color), 1.0); + + // Texture coordinates + out.texCoord0 = float3(in.texCoord0, in.texCoord0W); + out.texCoord0.y = 1.0 - out.texCoord0.y; + out.texCoord1 = in.texCoord1; + out.texCoord1.y = 1.0 - out.texCoord1.y; + out.texCoord2 = in.texCoord2; + out.texCoord2.y = 1.0 - out.texCoord2.y; + + // View + out.view = in.view; + + // TBN + out.normal = normalize(rotateFloat3ByQuaternion(float3(0.0, 0.0, 1.0), in.quaternion)); + out.tangent = normalize(rotateFloat3ByQuaternion(float3(1.0, 0.0, 0.0), in.quaternion)); + out.bitangent = normalize(rotateFloat3ByQuaternion(float3(0.0, 1.0, 0.0), in.quaternion)); + out.quaternion = in.quaternion; + + // Environment + for (int i = 0; i < 6; i++) { + out.textureEnvColor[i] = abgr8888ToFloat4(tev.textureEnvColor[i]); + } + + out.textureEnvBufferColor = abgr8888ToFloat4(picaRegs.read(0xFDu)); + + DrawVertexOutWithClip outWithClip; + outWithClip.out = out; + + // Parse clipping plane registers + float4 clipData = float4( + decodeFP(picaRegs.read(0x48u) & 0xffffffu, 7u, 16u), decodeFP(picaRegs.read(0x49u) & 0xffffffu, 7u, 16u), + decodeFP(picaRegs.read(0x4Au) & 0xffffffu, 7u, 16u), decodeFP(picaRegs.read(0x4Bu) & 0xffffffu, 7u, 16u) + ); + + // There's also another, always-on clipping plane based on vertex z + // TODO: transform + outWithClip.clipDistance[0] = -in.position.z; + outWithClip.clipDistance[1] = dot(clipData, in.position); + + return outWithClip; +} + +constant bool lightingEnabled [[function_constant(0)]]; +constant uint8_t lightingNumLights [[function_constant(1)]]; +constant uint32_t lightingConfig1 [[function_constant(2)]]; +constant uint16_t alphaControl [[function_constant(3)]]; + +struct Globals { + bool error_unimpl; + + float4 tevSources[16]; + float4 tevNextPreviousBuffer; + bool tevUnimplementedSourceFlag = false; + + uint GPUREG_LIGHTING_LUTINPUT_SCALE; + uint GPUREG_LIGHTING_LUTINPUT_ABS; + uint GPUREG_LIGHTING_LUTINPUT_SELECT; + uint GPUREG_LIGHTi_CONFIG; + + // HACK + //bool lightingEnabled; + //uint8_t lightingNumLights; + //uint32_t lightingConfig1; + //uint16_t alphaControl; + + float3 normal; +}; + +// See docs/lighting.md +constant uint samplerEnabledBitfields[2] = {0x7170e645u, 0x7f013fefu}; + +bool isSamplerEnabled(uint environment_id, uint lut_id) { + uint index = 7 * environment_id + lut_id; + uint arrayIndex = (index >> 5); + return (samplerEnabledBitfields[arrayIndex] & (1u << (index & 31u))) != 0u; +} + +struct FragTEV { + uint textureEnvSource[6]; + uint textureEnvOperand[6]; + uint textureEnvCombiner[6]; + uint textureEnvScale[6]; + + float4 fetchSource(thread Globals& globals, uint src_id) constant { + if (src_id >= 6u && src_id < 13u) { + globals.tevUnimplementedSourceFlag = true; + } + + return globals.tevSources[src_id]; + } + + float4 getColorAndAlphaSource(thread Globals& globals, int tev_id, int src_id) constant { + float4 result; + + float4 colorSource = fetchSource(globals, (textureEnvSource[tev_id] >> (src_id * 4)) & 15u); + float4 alphaSource = fetchSource(globals, (textureEnvSource[tev_id] >> (src_id * 4 + 16)) & 15u); + + uint colorOperand = (textureEnvOperand[tev_id] >> (src_id * 4)) & 15u; + uint alphaOperand = (textureEnvOperand[tev_id] >> (12 + src_id * 4)) & 7u; + + // TODO: figure out what the undocumented values do + switch (colorOperand) { + case 0u: result.rgb = colorSource.rgb; break; // Source color + case 1u: result.rgb = 1.0 - colorSource.rgb; break; // One minus source color + case 2u: result.rgb = float3(colorSource.a); break; // Source alpha + case 3u: result.rgb = float3(1.0 - colorSource.a); break; // One minus source alpha + case 4u: result.rgb = float3(colorSource.r); break; // Source red + case 5u: result.rgb = float3(1.0 - colorSource.r); break; // One minus source red + case 8u: result.rgb = float3(colorSource.g); break; // Source green + case 9u: result.rgb = float3(1.0 - colorSource.g); break; // One minus source green + case 12u: result.rgb = float3(colorSource.b); break; // Source blue + case 13u: result.rgb = float3(1.0 - colorSource.b); break; // One minus source blue + default: break; + } + + // TODO: figure out what the undocumented values do + switch (alphaOperand) { + case 0u: result.a = alphaSource.a; break; // Source alpha + case 1u: result.a = 1.0 - alphaSource.a; break; // One minus source alpha + case 2u: result.a = alphaSource.r; break; // Source red + case 3u: result.a = 1.0 - alphaSource.r; break; // One minus source red + case 4u: result.a = alphaSource.g; break; // Source green + case 5u: result.a = 1.0 - alphaSource.g; break; // One minus source green + case 6u: result.a = alphaSource.b; break; // Source blue + case 7u: result.a = 1.0 - alphaSource.b; break; // One minus source blue + default: break; + } + + return result; + } + + float4 calculateCombiner(thread Globals& globals, int tev_id) constant { + float4 source0 = getColorAndAlphaSource(globals, tev_id, 0); + float4 source1 = getColorAndAlphaSource(globals, tev_id, 1); + float4 source2 = getColorAndAlphaSource(globals, tev_id, 2); + + uint colorCombine = textureEnvCombiner[tev_id] & 15u; + uint alphaCombine = (textureEnvCombiner[tev_id] >> 16) & 15u; + + float4 result = float4(1.0); + + // TODO: figure out what the undocumented values do + switch (colorCombine) { + case 0u: result.rgb = source0.rgb; break; // Replace + case 1u: result.rgb = source0.rgb * source1.rgb; break; // Modulate + case 2u: result.rgb = min(float3(1.0), source0.rgb + source1.rgb); break; // Add + case 3u: result.rgb = clamp(source0.rgb + source1.rgb - 0.5, 0.0, 1.0); break; // Add signed + case 4u: result.rgb = mix(source1.rgb, source0.rgb, source2.rgb); break; // Interpolate + case 5u: result.rgb = max(source0.rgb - source1.rgb, 0.0); break; // Subtract + case 6u: result.rgb = float3(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGB + case 7u: result = float4(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGBA + case 8u: result.rgb = min(source0.rgb * source1.rgb + source2.rgb, 1.0); break; // Multiply then add + case 9u: result.rgb = min((source0.rgb + source1.rgb), 1.0) * source2.rgb; break; // Add then multiply + default: break; + } + + if (colorCombine != 7u) { // The color combiner also writes the alpha channel in the "Dot3 RGBA" mode. + // TODO: figure out what the undocumented values do + // TODO: test if the alpha combiner supports all the same modes as the color combiner. + switch (alphaCombine) { + case 0u: result.a = source0.a; break; // Replace + case 1u: result.a = source0.a * source1.a; break; // Modulate + case 2u: result.a = min(1.0, source0.a + source1.a); break; // Add + case 3u: result.a = clamp(source0.a + source1.a - 0.5, 0.0, 1.0); break; // Add signed + case 4u: result.a = mix(source1.a, source0.a, source2.a); break; // Interpolate + case 5u: result.a = max(0.0, source0.a - source1.a); break; // Subtract + case 8u: result.a = min(source0.a * source1.a + source2.a, 1.0); break; // Multiply then add + case 9u: result.a = min(source0.a + source1.a, 1.0) * source2.a; break; // Add then multiply + default: break; + } + } + + result.rgb *= float(1 << (textureEnvScale[tev_id] & 3u)); + result.a *= float(1 << ((textureEnvScale[tev_id] >> 16) & 3u)); + + return result; + } +}; + +enum class LogicOp : uint8_t { + Clear = 0, + And = 1, + AndReverse = 2, + Copy = 3, + Set = 4, + CopyInverted = 5, + NoOp = 6, + Invert = 7, + Nand = 8, + Or = 9, + Nor = 10, + Xor = 11, + Equiv = 12, + AndInverted = 13, + OrReverse = 14, + OrInverted = 15 +}; + +uint4 performLogicOpU(LogicOp logicOp, uint4 s, uint4 d) { + switch (logicOp) { + case LogicOp::Clear: return as_type(float4(0.0)); + case LogicOp::And: return s & d; + case LogicOp::AndReverse: return s & ~d; + case LogicOp::Copy: return s; + case LogicOp::Set: return as_type(float4(1.0)); + case LogicOp::CopyInverted: return ~s; + case LogicOp::NoOp: return d; + case LogicOp::Invert: return ~d; + case LogicOp::Nand: return ~(s & d); + case LogicOp::Or: return s | d; + case LogicOp::Nor: return ~(s | d); + case LogicOp::Xor: return s ^ d; + case LogicOp::Equiv: return ~(s ^ d); + case LogicOp::AndInverted: return ~s & d; + case LogicOp::OrReverse: return s | ~d; + case LogicOp::OrInverted: return ~s | d; + } +} + +#define D0_LUT 0u +#define D1_LUT 1u +#define SP_LUT 2u +#define FR_LUT 3u +#define RB_LUT 4u +#define RG_LUT 5u +#define RR_LUT 6u + +#define FOG_INDEX 24 + +float lutLookup(texture2d texLut, uint lut, uint index) { + return texLut.read(uint2(index, lut)).r; +} + +float lightLutLookup(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture2d texLut, uint environment_id, uint lut_id, uint light_id, float3 light_vector, float3 half_vector) { + uint lut_index; + int bit_in_config1; + if (lut_id == SP_LUT) { + // These are the spotlight attenuation LUTs + bit_in_config1 = 8 + int(light_id & 7u); + lut_index = 8u + light_id; + } else if (lut_id <= 6) { + bit_in_config1 = 16 + int(lut_id); + lut_index = lut_id; + } else { + globals.error_unimpl = true; + } + + bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment + + if (!current_sampler_enabled || (extract_bits(lightingConfig1, bit_in_config1, 1) != 0u)) { + return 1.0; + } + + uint scale_id = extract_bits(globals.GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3); + float scale = float(1u << scale_id); + if (scale_id >= 6u) scale /= 256.0; + + float delta = 1.0; + uint input_id = extract_bits(globals.GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3); + switch (input_id) { + case 0u: { + delta = dot(globals.normal, normalize(half_vector)); + break; + } + case 1u: { + delta = dot(normalize(in.view), normalize(half_vector)); + break; + } + case 2u: { + delta = dot(globals.normal, normalize(in.view)); + break; + } + case 3u: { + delta = dot(light_vector, globals.normal); + break; + } + case 4u: { + int GPUREG_LIGHTi_SPOTDIR_LOW = int(picaRegs.read(0x0146u + (light_id << 4u))); + int GPUREG_LIGHTi_SPOTDIR_HIGH = int(picaRegs.read(0x0147u + (light_id << 4u))); + + // Sign extend them. Normally bitfieldExtract would do that but it's missing on some versions + // of GLSL so we do it manually + int se_x = extract_bits(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13); + int se_y = extract_bits(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13); + int se_z = extract_bits(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13); + + if ((se_x & 0x1000) == 0x1000) se_x |= 0xffffe000; + if ((se_y & 0x1000) == 0x1000) se_y |= 0xffffe000; + if ((se_z & 0x1000) == 0x1000) se_z |= 0xffffe000; + + // These are fixed point 1.1.11 values, so we need to convert them to float + float x = float(se_x) / 2047.0; + float y = float(se_y) / 2047.0; + float z = float(se_z) / 2047.0; + float3 spotlight_vector = float3(x, y, z); + delta = dot(light_vector, spotlight_vector); // spotlight direction is negated so we don't negate light_vector + break; + } + case 5u: { + delta = 1.0; // TODO: cos (aka CP); + globals.error_unimpl = true; + break; + } + default: { + delta = 1.0; + globals.error_unimpl = true; + break; + } + } + + // 0 = enabled + if (extract_bits(globals.GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) { + // Two sided diffuse + if (extract_bits(globals.GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) { + delta = max(delta, 0.0); + } else { + delta = abs(delta); + } + int index = int(clamp(floor(delta * 255.0), 0.f, 255.f)); + return lutLookup(texLut, lut_index, index) * scale; + } else { + // Range is [-1, 1] so we need to map it to [0, 1] + int index = int(clamp(floor(delta * 128.0), -128.f, 127.f)); + if (index < 0) index += 256; + return lutLookup(texLut, lut_index, index) * scale; + } +} + +float3 regToColor(uint reg) { + // Normalization scale to convert from [0...255] to [0.0...1.0] + const float scale = 1.0 / 255.0; + + return scale * float3(float(extract_bits(reg, 20, 8)), float(extract_bits(reg, 10, 8)), float(extract_bits(reg, 00, 8))); +} + +// Implements the following algorthm: https://mathb.in/26766 +void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture2d texLut, sampler linearSampler, thread float4& primaryColor, thread float4& secondaryColor) { + // Quaternions describe a transformation from surface-local space to eye space. + // In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1), + // the tangent vector is (1,0,0), and the bitangent vector is (0,1,0). + //float3 normal = normalize(in.normal); + //float3 tangent = normalize(in.tangent); + //float3 bitangent = normalize(in.bitangent); + //float3 view = normalize(in.view); + + uint GPUREG_LIGHTING_LIGHT_PERMUTATION = picaRegs.read(0x01D9u); + + primaryColor = float4(0.0, 0.0, 0.0, 1.0); + secondaryColor = float4(0.0, 0.0, 0.0, 1.0); + + uint GPUREG_LIGHTING_CONFIG0 = picaRegs.read(0x01C3u); + globals.GPUREG_LIGHTING_LUTINPUT_SCALE = picaRegs.read(0x01D2u); + globals.GPUREG_LIGHTING_LUTINPUT_ABS = picaRegs.read(0x01D0u); + globals.GPUREG_LIGHTING_LUTINPUT_SELECT = picaRegs.read(0x01D1u); + + uint bumpMode = extract_bits(GPUREG_LIGHTING_CONFIG0, 28, 2); + + // Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker + switch (bumpMode) { + default: { + globals.normal = rotateFloat3ByQuaternion(float3(0.0, 0.0, 1.0), in.quaternion); + break; + } + } + + float4 diffuseSum = float4(0.0, 0.0, 0.0, 1.0); + float4 specularSum = float4(0.0, 0.0, 0.0, 1.0); + + uint environmentId = extract_bits(GPUREG_LIGHTING_CONFIG0, 4, 4); + bool clampHighlights = extract_bits(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u; + + uint lightId; + float3 lightVector = float3(0.0); + float3 halfVector = float3(0.0); + + for (uint i = 0u; i < lightingNumLights + 1; i++) { + lightId = extract_bits(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3); + + uint GPUREG_LIGHTi_SPECULAR0 = picaRegs.read(0x0140u + (lightId << 4u)); + uint GPUREG_LIGHTi_SPECULAR1 = picaRegs.read(0x0141u + (lightId << 4u)); + uint GPUREG_LIGHTi_DIFFUSE = picaRegs.read(0x0142u + (lightId << 4u)); + uint GPUREG_LIGHTi_AMBIENT = picaRegs.read(0x0143u + (lightId << 4u)); + uint GPUREG_LIGHTi_VECTOR_LOW = picaRegs.read(0x0144u + (lightId << 4u)); + uint GPUREG_LIGHTi_VECTOR_HIGH = picaRegs.read(0x0145u + (lightId << 4u)); + globals.GPUREG_LIGHTi_CONFIG = picaRegs.read(0x0149u + (lightId << 4u)); + + float lightDistance; + float3 lightPosition = normalize(float3( + decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u), + decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u) + )); + + // Positional Light + if (extract_bits(globals.GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) { + // error_unimpl = true; + lightVector = lightPosition + in.view; + } + + // Directional light + else { + lightVector = lightPosition; + } + + lightDistance = length(lightVector); + lightVector = normalize(lightVector); + halfVector = lightVector + normalize(in.view); + + float NdotL = dot(globals.normal, lightVector); // N dot Li + + // Two sided diffuse + if (extract_bits(globals.GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) + NdotL = max(0.0, NdotL); + else + NdotL = abs(NdotL); + + float geometricFactor; + bool useGeo0 = extract_bits(globals.GPUREG_LIGHTi_CONFIG, 2, 1) == 1u; + bool useGeo1 = extract_bits(globals.GPUREG_LIGHTi_CONFIG, 3, 1) == 1u; + if (useGeo0 || useGeo1) { + geometricFactor = dot(halfVector, halfVector); + geometricFactor = geometricFactor == 0.0 ? 0.0 : min(NdotL / geometricFactor, 1.0); + } + + float distanceAttenuation = 1.0; + if (extract_bits(lightingConfig1, 24 + int(lightId), 1) == 0u) { + uint GPUREG_LIGHTi_ATTENUATION_BIAS = extract_bits(picaRegs.read(0x014Au + (lightId << 4u)), 0, 20); + uint GPUREG_LIGHTi_ATTENUATION_SCALE = extract_bits(picaRegs.read(0x014Bu + (lightId << 4u)), 0, 20); + + float distanceAttenuationBias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u); + float distanceAttenuationScale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u); + + float delta = lightDistance * distanceAttenuationScale + distanceAttenuationBias; + delta = clamp(delta, 0.0, 1.0); + int index = int(clamp(floor(delta * 255.0), 0.0, 255.0)); + distanceAttenuation = lutLookup(texLut, 16u + lightId, index); + } + + float spotlightAttenuation = lightLutLookup(globals, in, picaRegs, texLut, environmentId, SP_LUT, lightId, lightVector, halfVector); + float specular0Distribution = lightLutLookup(globals, in, picaRegs, texLut, environmentId, D0_LUT, lightId, lightVector, halfVector); + float specular1Distribution = lightLutLookup(globals, in, picaRegs, texLut, environmentId, D1_LUT, lightId, lightVector, halfVector); + float3 reflectedColor; + reflectedColor.r = lightLutLookup(globals, in, picaRegs, texLut, environmentId, RR_LUT, lightId, lightVector, halfVector); + + if (isSamplerEnabled(environmentId, RG_LUT)) { + reflectedColor.g = lightLutLookup(globals, in, picaRegs, texLut, environmentId, RG_LUT, lightId, lightVector, halfVector); + } else { + reflectedColor.g = reflectedColor.r; + } + + if (isSamplerEnabled(environmentId, RB_LUT)) { + reflectedColor.b = lightLutLookup(globals, in, picaRegs, texLut, environmentId, RB_LUT, lightId, lightVector, halfVector); + } else { + reflectedColor.b = reflectedColor.r; + } + + float3 specular0 = regToColor(GPUREG_LIGHTi_SPECULAR0) * specular0Distribution; + float3 specular1 = regToColor(GPUREG_LIGHTi_SPECULAR1) * specular1Distribution * reflectedColor; + + specular0 *= useGeo0 ? geometricFactor : 1.0; + specular1 *= useGeo1 ? geometricFactor : 1.0; + + float clampFactor = 1.0; + if (clampHighlights && NdotL == 0.0) { + clampFactor = 0.0; + } + + float lightFactor = distanceAttenuation * spotlightAttenuation; + diffuseSum.rgb += lightFactor * (regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE) * NdotL); + specularSum.rgb += lightFactor * clampFactor * (specular0 + specular1); + } + uint fresnelOutput1 = extract_bits(GPUREG_LIGHTING_CONFIG0, 2, 1); + uint fresnelOutput2 = extract_bits(GPUREG_LIGHTING_CONFIG0, 3, 1); + + float fresnelFactor; + + if (fresnelOutput1 == 1u || fresnelOutput2 == 1u) { + fresnelFactor = lightLutLookup(globals, in, picaRegs, texLut, environmentId, FR_LUT, lightId, lightVector, halfVector); + } + + if (fresnelOutput1 == 1u) { + diffuseSum.a = fresnelFactor; + } + + if (fresnelOutput2 == 1u) { + specularSum.a = fresnelFactor; + } + + uint GPUREG_LIGHTING_AMBIENT = picaRegs.read(0x01C0u); + float4 globalAmbient = float4(regToColor(GPUREG_LIGHTING_AMBIENT), 1.0); + primaryColor = clamp(globalAmbient + diffuseSum, 0.0, 1.0); + secondaryColor = clamp(specularSum, 0.0, 1.0); +} + +float4 performLogicOp(LogicOp logicOp, float4 s, float4 d) { + return as_type(performLogicOpU(logicOp, as_type(s), as_type(d))); +} + +fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[color(0)]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], + texture2d tex0 [[texture(0)]], texture2d tex1 [[texture(1)]], texture2d tex2 [[texture(2)]], texture2d texLut [[texture(3)]], + sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) { + Globals globals; + + // HACK + //globals.lightingEnabled = picaRegs.read(0x008Fu) != 0u; + //globals.lightingNumLights = picaRegs.read(0x01C2u); + //globals.lightingConfig1 = picaRegs.read(0x01C4u); + //globals.alphaControl = picaRegs.read(0x104); + + globals.tevSources[0] = in.color; + if (lightingEnabled) { + calcLighting(globals, in, picaRegs, texLut, linearSampler, globals.tevSources[1], globals.tevSources[2]); + } else { + globals.tevSources[1] = float4(0.0); + globals.tevSources[2] = float4(0.0); + } + + uint textureConfig = picaRegs.read(0x80u); + float2 texCoord2 = (textureConfig & (1u << 13)) != 0u ? in.texCoord1 : in.texCoord2; + + if ((textureConfig & 1u) != 0u) globals.tevSources[3] = tex0.sample(samplr0, in.texCoord0.xy); + if ((textureConfig & 2u) != 0u) globals.tevSources[4] = tex1.sample(samplr1, in.texCoord1); + if ((textureConfig & 4u) != 0u) globals.tevSources[5] = tex2.sample(samplr2, texCoord2); + globals.tevSources[13] = float4(0.0); // Previous buffer + globals.tevSources[15] = in.color; // Previous combiner + + globals.tevNextPreviousBuffer = in.textureEnvBufferColor; + uint textureEnvUpdateBuffer = picaRegs.read(0xE0u); + + for (int i = 0; i < 6; i++) { + globals.tevSources[14] = in.textureEnvColor[i]; // Constant color + globals.tevSources[15] = tev.calculateCombiner(globals, i); + globals.tevSources[13] = globals.tevNextPreviousBuffer; + + if (i < 4) { + if ((textureEnvUpdateBuffer & (0x100u << i)) != 0u) { + globals.tevNextPreviousBuffer.rgb = globals.tevSources[15].rgb; + } + + if ((textureEnvUpdateBuffer & (0x1000u << i)) != 0u) { + globals.tevNextPreviousBuffer.a = globals.tevSources[15].a; + } + } + } + + float4 color = globals.tevSources[15]; + + // Fog + bool enable_fog = (textureEnvUpdateBuffer & 7u) == 5u; + + if (enable_fog) { + bool flip_depth = (textureEnvUpdateBuffer & (1u << 16)) != 0u; + float fog_index = flip_depth ? 1.0 - in.position.z : in.position.z; + fog_index *= 128.0; + float clamped_index = clamp(floor(fog_index), 0.0, 127.0); + float delta = fog_index - clamped_index; + float2 value = texLut.read(uint2(clamped_index, FOG_INDEX)).rg; + float fog_factor = clamp(value.r + value.g * delta, 0.0, 1.0); + + uint GPUREG_FOG_COLOR = picaRegs.read(0x00E1u); + + // Annoyingly color is not encoded in the same way as light color + float r = (GPUREG_FOG_COLOR & 0xFFu) / 255.0; + float g = ((GPUREG_FOG_COLOR >> 8) & 0xFFu) / 255.0; + float b = ((GPUREG_FOG_COLOR >> 16) & 0xFFu) / 255.0; + float3 fog_color = float3(r, g, b); + + color.rgb = mix(fog_color, color.rgb, fog_factor); + } + + // Perform alpha test + if ((alphaControl & 1u) != 0u) { // Check if alpha test is on + uint func = (alphaControl >> 4u) & 7u; + float reference = float((alphaControl >> 8u) & 0xffu) / 255.0; + float alpha = color.a; + + switch (func) { + case 0u: discard_fragment(); // Never pass alpha test + case 1u: break; // Always pass alpha test + case 2u: // Pass if equal + if (alpha != reference) discard_fragment(); + break; + case 3u: // Pass if not equal + if (alpha == reference) discard_fragment(); + break; + case 4u: // Pass if less than + if (alpha >= reference) discard_fragment(); + break; + case 5u: // Pass if less than or equal + if (alpha > reference) discard_fragment(); + break; + case 6u: // Pass if greater than + if (alpha <= reference) discard_fragment(); + break; + case 7u: // Pass if greater than or equal + if (alpha < reference) discard_fragment(); + break; + } + } + + return performLogicOp(logicOp, color, prevColor); +} From 58e1a536996348d1ec4c8ab5a65b396fe28ccb3b Mon Sep 17 00:00:00 2001 From: Samuliak Date: Fri, 16 Aug 2024 11:06:23 +0200 Subject: [PATCH 180/251] metal: create renderer --- include/panda_qt/main_window.hpp | 1 + include/renderer.hpp | 3 ++- include/renderer_gl/surface_cache.hpp | 4 ++-- src/core/PICA/gpu.cpp | 11 ++++++++++- src/panda_qt/main_window.cpp | 5 ++++- src/panda_sdl/frontend_sdl.cpp | 14 ++++++++++++-- src/renderer.cpp | 4 +++- 7 files changed, 34 insertions(+), 8 deletions(-) diff --git a/include/panda_qt/main_window.hpp b/include/panda_qt/main_window.hpp index 3ff16a1d..fc756b9f 100644 --- a/include/panda_qt/main_window.hpp +++ b/include/panda_qt/main_window.hpp @@ -129,6 +129,7 @@ class MainWindow : public QMainWindow { // Tracks whether we are using an OpenGL-backed renderer or a Vulkan-backed renderer bool usingGL = false; bool usingVk = false; + bool usingMtl = false; // Variables to keep track of whether the user is controlling the 3DS analog stick with their keyboard // This is done so when a gamepad is connected, we won't automatically override the 3DS analog stick settings with the gamepad's state diff --git a/include/renderer.hpp b/include/renderer.hpp index 569a730b..4eacf0b1 100644 --- a/include/renderer.hpp +++ b/include/renderer.hpp @@ -17,7 +17,8 @@ enum class RendererType : s8 { Null = 0, OpenGL = 1, Vulkan = 2, - Software = 3, + Metal = 3, + Software = 4, }; struct EmulatorConfig; diff --git a/include/renderer_gl/surface_cache.hpp b/include/renderer_gl/surface_cache.hpp index 5323741f..7346fd11 100644 --- a/include/renderer_gl/surface_cache.hpp +++ b/include/renderer_gl/surface_cache.hpp @@ -19,8 +19,8 @@ template class SurfaceCache { // Vanilla std::optional can't hold actual references using OptionalRef = std::optional>; - static_assert(std::is_same() || std::is_same() || - std::is_same(), "Invalid surface type"); + //static_assert(std::is_same() || std::is_same() || + // std::is_same(), "Invalid surface type"); size_t size; size_t evictionIndex; diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index fe336edc..95001b33 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -15,6 +15,9 @@ #ifdef PANDA3DS_ENABLE_VULKAN #include "renderer_vk/renderer_vk.hpp" #endif +#ifdef PANDA3DS_ENABLE_METAL +#include "renderer_mtl/renderer_mtl.hpp" +#endif constexpr u32 topScreenWidth = 240; constexpr u32 topScreenHeight = 400; @@ -52,6 +55,12 @@ GPU::GPU(Memory& mem, EmulatorConfig& config) : mem(mem), config(config) { renderer.reset(new RendererVK(*this, regs, externalRegs)); break; } +#endif +#ifdef PANDA3DS_ENABLE_METAL + case RendererType::Metal: { + renderer.reset(new RendererMTL(*this, regs, externalRegs)); + break; + } #endif default: { Helpers::panic("Rendering backend not supported: %s", Renderer::typeToString(config.rendererType)); @@ -365,7 +374,7 @@ PICA::Vertex GPU::getImmediateModeVertex() { // Run VS and return vertex data. TODO: Don't hardcode offsets for each attribute shaderUnit.vs.run(); - + // Map shader outputs to fixed function properties const u32 totalShaderOutputs = regs[PICA::InternalRegs::ShaderOutputCount] & 7; for (int i = 0; i < totalShaderOutputs; i++) { diff --git a/src/panda_qt/main_window.cpp b/src/panda_qt/main_window.cpp index f1949da7..4c187bc2 100644 --- a/src/panda_qt/main_window.cpp +++ b/src/panda_qt/main_window.cpp @@ -103,6 +103,7 @@ MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent) const RendererType rendererType = emu->getConfig().rendererType; usingGL = (rendererType == RendererType::OpenGL || rendererType == RendererType::Software || rendererType == RendererType::Null); usingVk = (rendererType == RendererType::Vulkan); + usingMtl = (rendererType == RendererType::Metal); if (usingGL) { // Make GL context current for this thread, enable VSync @@ -113,6 +114,8 @@ MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent) emu->initGraphicsContext(glContext); } else if (usingVk) { Helpers::panic("Vulkan on Qt is currently WIP, try the SDL frontend instead!"); + } else if (usingMtl) { + Helpers::panic("Metal on Qt currently doesn't work, try the SDL frontend instead!"); } else { Helpers::panic("Unsupported graphics backend for Qt frontend!"); } @@ -628,4 +631,4 @@ void MainWindow::setupControllerSensors(SDL_GameController* controller) { if (haveGyro) { SDL_GameControllerSetSensorEnabled(controller, SDL_SENSOR_GYRO, SDL_TRUE); } -} \ No newline at end of file +} diff --git a/src/panda_sdl/frontend_sdl.cpp b/src/panda_sdl/frontend_sdl.cpp index 8f9f4240..057a4858 100644 --- a/src/panda_sdl/frontend_sdl.cpp +++ b/src/panda_sdl/frontend_sdl.cpp @@ -67,6 +67,16 @@ FrontendSDL::FrontendSDL() : keyboardMappings(InputMappings::defaultKeyboardMapp } #endif +#ifdef PANDA3DS_ENABLE_METAL + if (config.rendererType == RendererType::Metal) { + window = SDL_CreateWindow("Alber", SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, 400, 480, SDL_WINDOW_METAL | SDL_WINDOW_RESIZABLE); + + if (window == nullptr) { + Helpers::warn("Window creation failed: %s", SDL_GetError()); + } + } +#endif + emu.initGraphicsContext(window); } @@ -286,7 +296,7 @@ void FrontendSDL::run() { } break; } - + case SDL_CONTROLLERSENSORUPDATE: { if (event.csensor.sensor == SDL_SENSOR_GYRO) { auto rotation = Gyro::SDL::convertRotation({ @@ -370,4 +380,4 @@ void FrontendSDL::setupControllerSensors(SDL_GameController* controller) { if (haveGyro) { SDL_GameControllerSetSensorEnabled(controller, SDL_SENSOR_GYRO, SDL_TRUE); } -} \ No newline at end of file +} diff --git a/src/renderer.cpp b/src/renderer.cpp index 76c3e7a0..6a18df85 100644 --- a/src/renderer.cpp +++ b/src/renderer.cpp @@ -17,6 +17,7 @@ std::optional Renderer::typeFromString(std::string inString) { {"null", RendererType::Null}, {"nil", RendererType::Null}, {"none", RendererType::Null}, {"gl", RendererType::OpenGL}, {"ogl", RendererType::OpenGL}, {"opengl", RendererType::OpenGL}, {"vk", RendererType::Vulkan}, {"vulkan", RendererType::Vulkan}, {"vulcan", RendererType::Vulkan}, + {"mtl", RendererType::Metal}, {"metal", RendererType::Metal}, {"sw", RendererType::Software}, {"soft", RendererType::Software}, {"software", RendererType::Software}, {"softrast", RendererType::Software}, }; @@ -33,7 +34,8 @@ const char* Renderer::typeToString(RendererType rendererType) { case RendererType::Null: return "null"; case RendererType::OpenGL: return "opengl"; case RendererType::Vulkan: return "vulkan"; + case RendererType::Metal: return "metal"; case RendererType::Software: return "software"; default: return "Invalid"; } -} \ No newline at end of file +} From 45eda2f12048204315ce771ba84b95754bf6fdc6 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Fri, 16 Aug 2024 12:25:46 +0200 Subject: [PATCH 181/251] bring back cmake changes --- CMakeLists.txt | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 31fdd9f2..4fd12174 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -56,6 +56,11 @@ if(BUILD_LIBRETRO_CORE) add_compile_definitions(__LIBRETRO__) endif() +if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" AND ENABLE_USER_BUILD) + # Disable stack buffer overflow checks in user builds + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GS-") +endif() + add_library(AlberCore STATIC) include_directories(${PROJECT_SOURCE_DIR}/include/) @@ -256,6 +261,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/audio/miniaudio_device.hpp include/ring_buffer.hpp include/bitfield.hpp include/audio/dsp_shared_mem.hpp include/audio/hle_core.hpp include/capstone.hpp include/audio/aac.hpp include/PICA/pica_frag_config.hpp include/PICA/pica_frag_uniforms.hpp include/PICA/shader_gen_types.hpp include/PICA/shader_decompiler.hpp + include/sdl_gyro.hpp ) cmrc_add_resource_library( @@ -570,7 +576,7 @@ if(NOT BUILD_HYDRA_CORE AND NOT BUILD_LIBRETRO_CORE) ) else() set(FRONTEND_SOURCE_FILES src/panda_sdl/main.cpp src/panda_sdl/frontend_sdl.cpp src/panda_sdl/mappings.cpp) - set(FRONTEND_HEADER_FILES "") + set(FRONTEND_HEADER_FILES "include/panda_sdl/frontend_sdl.hpp") endif() target_link_libraries(Alber PRIVATE AlberCore) From dbdf21b1ab9d5636a55e266cac89b184e096227f Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Fri, 16 Aug 2024 17:54:15 +0300 Subject: [PATCH 182/251] Improve accelerometer algorithm --- CMakeLists.txt | 2 +- include/sdl_gyro.hpp | 17 ----------------- include/sdl_sensors.hpp | 30 ++++++++++++++++++++++++++++++ src/panda_qt/main_window.cpp | 7 ++++--- src/panda_sdl/frontend_sdl.cpp | 7 ++++--- 5 files changed, 39 insertions(+), 24 deletions(-) delete mode 100644 include/sdl_gyro.hpp create mode 100644 include/sdl_sensors.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 2865a3f8..796217d1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -260,7 +260,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/audio/miniaudio_device.hpp include/ring_buffer.hpp include/bitfield.hpp include/audio/dsp_shared_mem.hpp include/audio/hle_core.hpp include/capstone.hpp include/audio/aac.hpp include/PICA/pica_frag_config.hpp include/PICA/pica_frag_uniforms.hpp include/PICA/shader_gen_types.hpp include/PICA/shader_decompiler.hpp - include/sdl_gyro.hpp + include/sdl_sensors.hpp ) cmrc_add_resource_library( diff --git a/include/sdl_gyro.hpp b/include/sdl_gyro.hpp deleted file mode 100644 index e2df18df..00000000 --- a/include/sdl_gyro.hpp +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once - -#include -#include - -#include "services/hid.hpp" - -namespace Gyro::SDL { - // Convert the rotation data we get from SDL sensor events to rotation data we can feed right to HID - // Returns [pitch, roll, yaw] - static glm::vec3 convertRotation(glm::vec3 rotation) { - // Convert the rotation from rad/s to deg/s and scale by the gyroscope coefficient in HID - constexpr float scale = 180.f / std::numbers::pi * HIDService::gyroscopeCoeff; - // The axes are also inverted, so invert scale before the multiplication. - return rotation * -scale; - } -} // namespace Gyro::SDL \ No newline at end of file diff --git a/include/sdl_sensors.hpp b/include/sdl_sensors.hpp new file mode 100644 index 00000000..cd452ce4 --- /dev/null +++ b/include/sdl_sensors.hpp @@ -0,0 +1,30 @@ +#pragma once + +#include +#include + +#include "services/hid.hpp" + +namespace Sensors::SDL { + // Convert the rotation data we get from SDL sensor events to rotation data we can feed right to HID + // Returns [pitch, roll, yaw] + static glm::vec3 convertRotation(glm::vec3 rotation) { + // Convert the rotation from rad/s to deg/s and scale by the gyroscope coefficient in HID + constexpr float scale = 180.f / std::numbers::pi * HIDService::gyroscopeCoeff; + // The axes are also inverted, so invert scale before the multiplication. + return rotation * -scale; + } + + static glm::vec3 convertAcceleration(float* data) { + // Set our cap to ~9 m/s^2. The 3DS sensors cap at -930 and +930, so values above this value will get clamped to 930 + // At rest (3DS laid flat on table), hardware reads around ~0 for x and z axis, and around ~480 for y axis due to gravity. + // This code tries to mimic this approximately, with offsets based on measurements from my DualShock 4. + static constexpr float accelMax = 9.f; + + s16 x = std::clamp(s16(data[0] / accelMax * 930.f), -930, +930); + s16 y = std::clamp(s16(data[1] / (SDL_STANDARD_GRAVITY * accelMax) * 930.f - 350.f), -930, +930); + s16 z = std::clamp(s16((data[2] - 2.1f) / accelMax * 930.f), -930, +930); + + return glm::vec3(x, y, z); + } +} // namespace Gyro::SDL \ No newline at end of file diff --git a/src/panda_qt/main_window.cpp b/src/panda_qt/main_window.cpp index fab77d2e..6bdffb7e 100644 --- a/src/panda_qt/main_window.cpp +++ b/src/panda_qt/main_window.cpp @@ -9,7 +9,7 @@ #include "cheats.hpp" #include "input_mappings.hpp" -#include "sdl_gyro.hpp" +#include "sdl_sensors.hpp" #include "services/dsp.hpp" MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent), keyboardMappings(InputMappings::defaultKeyboardMappings()) { @@ -606,7 +606,7 @@ void MainWindow::pollControllers() { case SDL_CONTROLLERSENSORUPDATE: { if (event.csensor.sensor == SDL_SENSOR_GYRO) { - auto rotation = Gyro::SDL::convertRotation({ + auto rotation = Sensors::SDL::convertRotation({ event.csensor.data[0], event.csensor.data[1], event.csensor.data[2], @@ -616,7 +616,8 @@ void MainWindow::pollControllers() { hid.setRoll(s16(rotation.y)); hid.setYaw(s16(rotation.z)); } else if (event.csensor.sensor == SDL_SENSOR_ACCEL) { - hid.setAccel(s16(event.csensor.data[0]), s16(-event.csensor.data[1]), s16(event.csensor.data[2])); + auto accel = Sensors::SDL::convertAcceleration(event.csensor.data); + hid.setAccel(accel.x, accel.y, accel.z); } break; } diff --git a/src/panda_sdl/frontend_sdl.cpp b/src/panda_sdl/frontend_sdl.cpp index 80014884..90166899 100644 --- a/src/panda_sdl/frontend_sdl.cpp +++ b/src/panda_sdl/frontend_sdl.cpp @@ -2,7 +2,7 @@ #include -#include "sdl_gyro.hpp" +#include "sdl_sensors.hpp" FrontendSDL::FrontendSDL() : keyboardMappings(InputMappings::defaultKeyboardMappings()) { if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_EVENTS) < 0) { @@ -289,7 +289,7 @@ void FrontendSDL::run() { case SDL_CONTROLLERSENSORUPDATE: { if (event.csensor.sensor == SDL_SENSOR_GYRO) { - auto rotation = Gyro::SDL::convertRotation({ + auto rotation = Sensors::SDL::convertRotation({ event.csensor.data[0], event.csensor.data[1], event.csensor.data[2], @@ -299,7 +299,8 @@ void FrontendSDL::run() { hid.setRoll(s16(rotation.y)); hid.setYaw(s16(rotation.z)); } else if (event.csensor.sensor == SDL_SENSOR_ACCEL) { - hid.setAccel(s16(event.csensor.data[0]), s16(-event.csensor.data[1]), s16(event.csensor.data[2])); + auto accel = Sensors::SDL::convertAcceleration(event.csensor.data); + hid.setAccel(accel.x, accel.y, accel.z); } break; } From ac3840ddb0e2a8cdae12bda4f078baba0bb77f57 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Fri, 16 Aug 2024 17:58:53 +0300 Subject: [PATCH 183/251] Docs: Add accelerometer sample data --- .../accelerometer_readings/readings_flat_1.png | Bin 0 -> 151093 bytes .../accelerometer_readings/readings_flat_2.png | Bin 0 -> 55117 bytes .../readings_shaking_1.png | Bin 0 -> 217640 bytes .../readings_shaking_2.png | Bin 0 -> 66836 bytes 4 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 docs/3ds/accelerometer_readings/readings_flat_1.png create mode 100644 docs/3ds/accelerometer_readings/readings_flat_2.png create mode 100644 docs/3ds/accelerometer_readings/readings_shaking_1.png create mode 100644 docs/3ds/accelerometer_readings/readings_shaking_2.png diff --git a/docs/3ds/accelerometer_readings/readings_flat_1.png b/docs/3ds/accelerometer_readings/readings_flat_1.png new file mode 100644 index 0000000000000000000000000000000000000000..b7a425fc4cb953c1439f1146ef77b4f761990549 GIT binary patch literal 151093 zcmZU41y~(HvMx?=cR9Euz`=q`a0ngTYm5-*=_P&pU=5E{9J+}F&kA20I*lL?{2KQS=?&xH z2i&@%81!lb>EGei(JY=H&LVSVY9D3rkSz?@mQ(pzzn^y9bdKU=-3Kg4sYQWBi00`gOonD5~bz$U*jXeb@4Bgu$}u#L%VVTPLZu_%-q5uwDVG4hC9&?hZc5oUg%A0FbR+aR~JjQ&H}scaGGnhs#AxhQdkRtkyK$K!MJemh5E$=1vPwd%Rbm+IY{Fw z-jkJ!9~Hxtc|5d}`xF3!H?70bpMq$6={fcz-*6PQKXdofa?J|%b!<## zkuwdv#eGE6pr@l<41Ed-Vy=%MEispu-2L`jh18Tt8ds*Y4kVW8jYgqABJBJVY-w zJWuvvVx_tncB_xshf8q-7TMmS?+7A+2*3yOB_a~9sfDdqouEKhtt^sV~ z%9MmfwZM@Psw`lWpw#bpwQWQnG8ft#ura%SJX@duNo6k3d=TgO%55kDXCiC}=S|8K z)#(n#o*9u4K-ve&byFuGlR{NeJEZ))((`GP&@_D=tUaJDLW{#8-zt{JG#V3~JQPJ@ z=_*;Yf4%+ggMe``+y_*AZH*qyZ)ivk0ayW7`5nw}oP$V<5yfi1a7xkRTZl?}$03Jp zt42Q2czNe^SEl3K(84@2QjN|fvMTwF_~^)m2b-5000ViJT`x(LSEJ_^#n@EKtbB9#SNb1bA5x<);*U45A33CkeRh&lIdtmv*Bn~7admzEC z_}#5aGRHkCJlTQwjwe$px;~;%i!$sTufS-X`G!=BJk69X`V=TskQeup(jq&z3Q!UXM zlHYImX}I{{xoyf8%+($&XPoLh$18-p5U*{XuQUvy(bKr1{iJmt&Ty?lgP&-E6_%%nCgN|yLY9DVlrbAY&fb{2A+k)^Pj6&oV z-<5bV?B+T zf+>POa?&j0iJ;l{dP@qK2o+?g$u6r1C<$QXmpJwc(ix|G55P zT~S2ljK&iWRh}VFTxWmDFX&WP44SzLpcMT^ksBPC-pcm5}p@}iuQ z^|B6iEiJy!4{GsR#4J-99A7re{3V_fZ{&!SiS&rlMx}|OM!8d|-j-=z{OBn7RGy|O zZp-~Xp)FQdN>nO99`gO8hKHI(#f|3SPpzMe6^5S!%4N0c)y#e>YrWHary=~qQ>IEa zx6-kE^3&muDfJpn@xnmGfDa+1JH>eAiA8mqX6b`6an#C5r6rc%nN-~h4zwnCvJ7}* zEoIGSS{jfW92)8y9N7!gQi=86X;f(1C1_=9bvKnZ#dv=84E8K}a5-0@QX2XcQI%X( zuYVXcshq1yk159~cO}P0rp~{668YFh?*VatZewio^Kd!}oGm&t?%SLSUV*@ zG(5#JXB{_#R%B?FRhLlr+dJ-BVeio0K`1#-8Ccdl{Z|=NNm7Y{Nl$52Ni178b1<7Y z^WuGgu`HKaMoDHBmmS|JKR+M9N^GoS5nPSD^3^2Kq;7JzW57@G-I(;4Od3|2s8*Yn zeT8I&ZpGh4O*^Ago5h92C5K2p%xt!7EWY!mA5|3&;SMF|M8flv6Dr2dJ=JD}CMiQK zJ3mg;o6kONR2MN^Vf8piVhaz%Xq9NiXm3$_Va3Ol-W92OrFo@nmR z@87MiEM}hSANvPjdMHh2w3?9*b-32K?X=swx1N7=qdIIGZd|#q$ZpSGu9~b`+E^DI z=p2O*OU2*~XpgpROI%$&W}Lc=?-iA0Tjd?XPjz>kb^hw`^tq31Bp(B`JJ_c;m-7$# z#dwwBY%mX4)vK1(L;;7IS0C&0GXYah?OQyhSUkzC+0V0EGe&2iWtiw#^80L$w{RG5 zuvyToG3GI2aVp>VlbVyKP-{}m1J1}rc%C%h&@!-@TMV-|(PocTn>FqjaR;z;+^!h{vF6_Fad(iFsopEyWONE>|7zk845np8dp{mrLkv8~2M%tj2g{ z#%*20GHYGC>DIvW5PL#9i4zqt%1K2{F(X4Q`AUq7#^tl=;nU2XRYG@8aXy2*tr8O> zGlP>Vbq210$$98SR5d{i^Jb3u`*P#ifsvQwdS*a=Azv8JA7gY{MlOplqa-Q}Wm!Dm zMpddlo*({dIc))Y2X?&gFzn(75y5n_yjFiURt;wzw zw+-77{uTYYH!=I_U8;c=(1>HSIUHhZ!Z#SYR?Cyx+$>)@3om&vll}-2aou|a^*V3?Xj6?Rb7sjXbRnrAi zm+LAS!x%pHK@tq&W_GfAb4ZdCV@Hx=NqmV#iBXzsy5Y;h@zRYEYI+H~o!&*0{~r1- z`c8EAyMQkj`XzNgoj)&~xKx}jO)sTX`m}qFuEtn!SY$7%FDfj0q(!9jSMwWnn?ll~ zW1}fjp{afqm_O4rJGXwXIeQds4YnzVXMf}mSY=!G_fxz|d$Aom`Bl|l^{kigsJQG2 zYAJHF8lOxff6U4g|Pm0qJASh?f=cC_Tt+>!Oy;iKE``_NbCzVJT`oCuy`<@zCbQ3yCVx5;33eq zmoZW|y0$vzPc*wpYG$_kQq8$gc<#oC;X&vzb$w z-r=evajq2!tjRNe5b{ z(q84)f@4|UIl|5NTs|I&J$blc1O=>ZUt^q9C?nBCqkqUj{V)_uRxM@qex>!3ZsZkZ zt>BfNt;*Xcewn}nFzrRY9?fSDc=2W!0_4X3Bad9a)9a1`+d^BxQdt@9?Q8hWtNX__ ztC9SAM{}0faf5>+X8ebPSNOzm1qTPeWTUC=uC1&jWai|+VPfuNYQf>{;QUXNh_}#d z(80ppgvQ&!-qB6STa@mfIfP!r|A+?C(fl)uyPYVVwz3+Hl#{Ck4Ic+52Pd5vIt>kt zh^x7!&?jlxf5Bg$MCq*E-JOMiKrb&Z4lf=KCs!*Vm!O~^kdqt8&CULrgWb)?(cQ$G z-O-Kyf0+D#KGGI$X0A5Q?lw-2H2>ghV(R4KE=ouD4?+KX{ZBb9ylwtZlB3(ds`Xky z;6GA;TpXOh|K4&qWZstOXL> zG`thhd?&s&GI?R|`PjMfIQPi+91sZP8(|>GW=LMao*O205<&iNG9^6PCi_Dw+A*Z@ z`#&g%b(_p$3-7-%pIBZYgoRHus__3wQ!IbQwpss^zDdGQv6SHsp!%SD|0l_=JpLCK z)nGz&Z&|@G7O~;~DAQ{hc}BZ%{)-EGvD#|_tMs)*9>RZ-`a{5f*6p_jO&CI+x(aPH z&JE$!e^TtPOT_=;g1}0673+$w?*FYU=s(FSB>?bWTnOb0KG4egoGsVo$V6aMjW^iL zDWSe(FZOJwgZEF39nd*b>f*93>r{$(DZ@1}<#zC8`7DCX&F$NM+aUIJ2&N?Yfb*bT zu;=T;Lx4EUH0tU!!3Y#qV;tUp3#b|f-Jj15aa&@6^wjDtx9+1$qbIkb(V zixkGfzROCb#98284RZ+?-i6@)j*>7m5kk{!`mHN~p`-f5teRuq#@+(AkXYtdojH$| zCtK6aYf-cax+xwh)oXU_f@zdB70r3tib9!Hv&@OLjU`S_J@@&1VM7^4L4zH6W#o-8 zw~D{d-8zHfgHc8xi-FpvaU##$2A){zFO7HIQ_QnDoir0G=Js7Kh6AQe-R!|eHIPiJ=aRhIF*T4N{?&cLDn&z3`8>`{BrQB z`{)ZO4I6@)53rnyt?E7n%PIK{iANp$>Ix7oVmlvrSa#X7p1B+hS@E>bys<|`!S(X* z!bzdUWw#cpuSG=4H5`gs+r`bF(3^da9w?|DZ%fuXss>ufER062mi+;0X%TEp4qQLF zmuu{6drKZ_!V7(#7ZvDHr^!+IYw;78La@DTIx=R&Kasz`gd3x;a9p#}13{Joo~T_5 z{D34XpdQaX_hWhE(^I04-s09?O&m1&$aA!&8xNp%`c$z9Ggp4t!%2ybRr+ii0!?O8 z<_&;Exk@v%493Su1AC*ig5`(sB}-S}tXgP3^*QVRHmA=~hTGnCg9KH{ftM4H?Uu0_ zD?mlf-J+tQ*2=X}DS_mg9W$rEdtzbAvY*g=0>tT5zHQ1cU!e(NJztKb3Ado0n&DHfhg(`K=(Vy(J?>RNO*VFzS}`$e_VYqx#`ikZpO|9hY?J z!d8r7@i(kOl8qmTE5SAjr1k>65?OrKAxL$URr)6VyhC&&^dZ!7i1|GCY~mszd&*52=!p0!FI*>HKLO+(tOv#vn8XuCVQwIzQg9r!+8kp#&l`ZkUuL! z-F~c`G{y&BT4@ZpVJD4ir*QrHxFUb{AFHfPE()8kiipJqBe0fXqDuNlU05v%uQlX6 zE_mSx*WKhIX|XDhq5ziS8&UdjN^-o!v86Sy@q6{HH83qb8u0&k*Au;#R#bsiM&FseoU%ibm$R`y&5+p)yX_p*fAConF_^Cpy9!$tK zihYy*?IC_Ek7bSI>eIU=Sp)J$s6PLTgmCLUv3MU6T$88=rfDi|(8E0Ju6V2jd(`UG zWQ@IW|06BlkS5kT%G+X(n6Y?bRh%<}> zKO4HNx)AlB&O3tK4(ZLOhi_~2KubpDP=SEQLJ|{_dhA7CiC-RgMHFZCnui2*il3gi z?uSbB8Hl;~kiGQ5%Sf2p7rO##JHN_Vt%$Uoo@B)8EAO4|9)^n79D0t-$knHst7U6I zdDr5ZDx+}3GDfyPK0fCQMdS>HX(7x0}uYT1%j~k16FqSuIZu0|A9!l?J*Aq|=(ptBmTC`xK z!fa0z08^!a{o)Lkja!8NQ01Lkc^Hzj{!Yz--rQNlcAqZ$@Hzw==%>DPFZk{(ZKWNE z*D{&yaX17w;RP1YJmDOBnJVU%bm4OZHbbFf{*ts7cA}$i1>9F>HXIoVshHKNyiSCm4$duQY%2Zm~hTw$_8Qg1w$bw$Sn?+xEWv(WZ z02oeNB{l42z7rDypg{EhMNw~3^av7%X(Tw7-mCIyU+ak9JU;{I-xfCic~2+Kk{SW~ zol~`Nr2uf0h8C6ICV<9C;<~4iWp%`c-XTr?YIO_S;6C|2JdSNWLab<+!;WqCb=-h5 z(yLFuI|ljMbd?~m%IeD~b5mMO5_ey?%j;WkS^{OUqHpfbNo!hyE>RyPoC*_n)*1+d z9p<6=Efy@-;3eu8qt0>hr|n21(D~hVwO4gF>uLV_4c`fvz9Y26d~VUGhMGganr^7& z!)ciN$9P(t%iWeQGPTaf@q>XcCd=C-Cc}!cjvtoTIcq76oy*&EWI6ztvy(w29Xt+yudv{Fsn}W5B znu*%>2TY~T?evkF%KPT3g_iTFLXzU0558<`U9h186g-*Vok`Q>i9_9+l>EN1gcGYw z(=AxC9f5U<6*L{oqNEP~rFVYp#Y!=|28ysYd*6KWwqTZkz#}Wll9j*7{_as1xK8~c zP~_mWl*mORP=nz-ZL)|J;TY)@Au*cIMYJ`iew``Wxf`@O`2+NUR&0GF$4WS?sxP)J;0|h4x_;9l1y$3^um_2Ic5IbV<8yWBnvl>(i&E- zQI-Yldg|>(O>~>$Y0k7M&1Ar6>T}-@X*X>p3=(LJ!<;+0JQ#6)q7Av@tmI$^?^+RZ z+#iY{k@duAtPP_E?3~tgqy6c@Bc?;sP|qNI8<}4{=WZETScat78ofKE2NDUINq9E( z)=V_p`Lse~I287W5p@+-mLJG>+4UIjbGh+o4TVgywQNgT?u4~xm!#_qDv3kxQcqVf zpCq)nSeRo|@ErM-mSp+{dp|=!rJm5V!7j(y)`P4u_dJ_}WpJ5GfYent7=N57<@zpH zD3jxM0u`4dVN{IGjyN!Qf$vZX-WQtMVS=cIc@vgcOPW{djBo(FCDH~3_YE+b7zI9; zJR`cHqf317g@l3Hj)gLxB>=8&)bNzEeS&`ySW$eI<;^Vf13Y=w*y@|CA;G6(HC@~w z@HJ|553-3mAqNdn82TdRT|^|tAoDM*3M!#OPPDIB$>9KAgsMVKI3#!DuSy5SP~ow0 z9yj9*z%!EEeq=8LM<}zLF2pBy-(#FEeDWo}K?SPf4n7&HX%Vr8tytzRjB^d0#iApR zXnpMK+tEB+c76R`u8{)|L}m|%HA4}C$NTGl`5 z3w`QC4rt&h@y8Cqn_+rnqP6z(u68m`=MSWdi}z0GP`p0546D3Ap~|uMO9obLK%~$1 zP5CW%uuD)kH^l4GDiHd7$f=ypVKBb1q;F=Km6O{rBOp~{SvQLj*F0=!3#{@ z%)8mZ^SwCuDkLa-AYHpRw#(kVezv>h^fDSLv@_)$=EvTGQmz}zxbE}jQ;V)5WlhGA zIFAGwzl?71AE4!-SU~I4aEmvr^Kl~2?+lM=>**52PcMsUCH^rOG2F<;jPB_$&(4t} zpWP&*k`wZ{V&`-_DSR19w(bWDaKDmrYtDiNg$y}_y(psCK&CdGWw!X>DpT}MoaRu) zWHVG?hMhm>GH(lS_O8r|;`sU6A1|#ei?JNE6PUJ9A##`imD7Emrr|rk36G_$Fk;=| z4wzWuj`2cxsNd5Jy}b)Uj?&QYxHrwvjP}32_s%rR=5#X{6im{gszOOjf2f)Xiu^rY zWS_wpK;AL-=E^x1b;176Sg`N#h!b*v0KyvaM%Y~&-dV* zykCV}zXeypyH6G~u{XA2V3^Q__wBM-f($a#64;=f++RR;lp&ElIR8|y1ghMVF9z>k z#`gKI;L~O2WuV)jO&OYQliy`S(TdYnKnSNIDBUyH>e!y6`{twHD88iZigxVPnieq$ zweS%|i0;c4zx%HZr^k(PRoGuv>6ZN@#jF@$XQ(O)F3XcR42;3^-AG(alw^BhNxLt{ z<$z3LWEY}py!s0g248DY-U$;|h`p(k1JHkdYc7r?o2ha#6UZ(KJ!cPrNLLg7V!eYU z^S!j3x8Fp+GaWD5KVMzG%Jn~C^|xboQSM0)DTcK|0L-7cmzEP}L?BLgB}L_vW+hMd z08nrDxbJBdAT9hFFOYK4m>oX^KgVse)iR8zZl`UDVV*kZw<0y}TonDmEru0Nm? z{W@n%*PYFA-i@;C+9nk1F^A-4RN@)`vVVtD-1xX>d_KUwMZX7dNPD)*?~GCp6rN<6 zQo;;yd+1h3KEjARkrs@3tDZkn3iDjU%>@msT5ao^C(4Y6kr+rf=ekQJh2`|#ZVx^6 zp~&)#F3kqdO1gi)%Nt|Spok}RaW8nrach2^$;n4V)tjW~NXh4EuF+XNjzXbCJut-F!n@5begh3id@&b~>%4sZs%?c!#y(m?3f4vGZhFVM zzkACzp{~5XA$WwqqFtjplt3Vy$|uF=qV+H+97~#vgDjw2$G&c08Fp4bm9-;>g<)Fd zaa0hI1+s6n_{uSyaGlt94wH|nD1R~&%g(Q^_gq_MfH-yj(Q@1LIaJ_XL&XwZwy&Sy z;{p#+`{nrTUJpsN9G6|1bI_yw^n1LB`h_Sz3UpK$_CObD^LK11R^^uFb0HTob#4gUaeF8!5`RAOP zH^#rX69G%#Q6cDiOe|Oql3Bhvz1cwpOphGq9ZZSC2swq+x7a#tDu~3nlewF;ATp^x z8Z>{Js^jEtF zA6^pMdTLB~$Sg*Gw7%rWsw!KhNR9sN>*I7S-nnc%Z)%{8a^(J|N*!`7m@>#5Tivag zZuHci(H^l^Kg)Uu7_p>^U0 zs;9`L%+6&=!(pVX<;$t7TX_iTnWXZrk!PF<#+o!JCe@iAbB^*tQ05w6B1{Me3Hp^S z^Bwp;&i8cO&Bh!|7kIi7(Zn|Q_;;4FveY@&IAuRHC#=ztmn;~i8(K3pO@ns(r^b0y z6IrmWjLk}91(=~B=E%VIGj3b-+-$Bfj^>=!?XMV;y>2&=(r2@*NHxn}0S=Xi-9ILz zzv(Do-`ckM=GjdO3Rbc{^)*y!f{yvWezAAV`Fb}Lb%S#v?SmVoUfFs; zE=`bRz~~|)V%`2aZB1koFoiaR>>&sp>pQMkjOS|}0zgQJ){0#8QR3qQdzdD$1g=q} zTkBWT8n{LMCsRp=Ph#0d)A!4r>2XOqABlmh z%km7|&p910!kcS30KuLqK*J-iwf=a<3@;>kcFD+ZqvL*2Z{Dvob0A&6w^HKE7_M~s zp#R_VW>(v*o_n9CA5qRb#pI=99R~7D9tYo|Wcl^Icfuwr>&H#Sp7%9w*Ken;i)uc9 zF<2N^={@IUS@_z3ksX(TZve|zAU@qofF<-go2>PWzhBBhVWu!G4CDD}r`dNzOR&B1 zQXfQOWxNL$B4i8xWR=Uj{jg~VyuA>EL*A-!U*^$7%n;NTlPe*+)~ zJgit*R<|B9iRNvLXW3PbaCeS05EGTrhhKC)Zt>&?ykur(&+foWG`bHv(VeSp`CW>p zkObI3;W_$0B70M^|&BFVs!tQyv#yIi3k!p~}b-v3B*+G}z zr3_yIeB5u&U9OCQb=N1xLu)tvN{G63Iy3?h*y6LZ1q{8@o)T)NHT|USE})xertq-WL0rm z5l`8Zt}8lAV$*T12r*TDzU6Pb=9PYrVZG2tih~p!@$c7=;OAIAK7)hY^wG8qov#Nj z_D`l2(Q`0$+GZ$eJrH^*Q~fT`^81|mozM8|B0Z5^?vK-Sz z9V>^oWp|&Z4{+*Hxjge{EtO(RjDWZAxaI&e`ZjjtRen3Gej-(!3@A)Y`hGUlGTCeH zO|ZRS>H{!Nxhh!pJh;)6weTE221us)yO*}6Q^P!^r>6tDT5%A^&vZ4>gUMDwR(;Mmi z2p1F_^^2}az<|smw1}Qt6p`kb&3bu_Y6hdDtt*n4k!xMy8_f}N8_(}5RAgRqN|*UW ze}`&-8EuV^^){8;IG1Hfs@6W!rga}yUz2QyT~q0)trB|Q{Jqc(G=umnMNM~brhRd+ zcdsI_laT3ht~p`FY)Vv0oYK4YhpJ=qdQvqnjcKQZM!-s&>0o`YW&%4c3$vadbX z1%y8M$AVb!Eq6oZd)vV?jigAAv~Ksk-7p89lC8dYfpxH14Ex!R?uVz}rXsI<^g_Y& z6paaxO|ZvAWyAOL7Bi=iFyb}^UMOt3(1To5tRlzJODxDV3yBDPu;w%&Z)#5SuuaYu zhzj7Iw{KdptYdkUCj0dTW0nqQZ+7h-@u0;l{=KTmHByfKcRaXN#=FhyEBXXZz3Dw> z(q;&*RyhoD^D|Bauc_s}jDx8zammnUK;Ky`H~Os` zxLe-R4RSBQPz7z={<5;>UtZe=36tdzTT1BvYGioZhoeyrC8m3+k_82K91muX%iKLk zP}q}Rv$$RlkQ{EO_1ByNP6>YglDP&wtVq}Yt!_Vgd##M;7|SaL_gg&vph*$fB@k*9 zuQEGTmVCz(RoC^H=hy2zO!>OrqdrUDZROY0OCp^$g5G?|k~*It4D!J)>5HRqiabzz z$KOG{2jkDpi6i&1j19&w=J})-WV00*L8a{}6hn*vd?@Qr!J)$oVg-R|B@Te6kOj;# z<+}@pCD>L?;j`T}V;TcqMT|mqH~o~~ml>3&v}~t%(F-gdT2wwn`gp83y$IM;OO(T}{!)f7XYN zg@2mun_Pn>M#L%?ufG1_8^M))-g%hz{f_%&m_Y5vXOW`k*~5HUew`(v&~qzz$>CcuEg1NNeoKL88UpT7dVG(?Cx`eMARoj~R7S)bfx2ApTj zfT}q65ELBlbR$on3}i83Vaqepn>CKeEh%hjTfhwi zW!?QK4^Pi~Ksl^Y1?euqLY9t(mQg>j&laIND0M}PKIkR)%c$V=%M`i}9;;zM3-3uN zC21WLDXsfg-CgIqiNRAMw`@RSU&K%>6yD%%8F3}Um!+>;CEe|~OVO~AvOMmg*xzX1 z-W#7ewj9f9!AIcEqcQcX#UNwI@u9^14w(g7O?0WDoJ8%+59JC17 zIUf7RN9TKNFiR>i#fQMdnNs8DKj~(^Ic|$92l2uek@)5H7XrtvfHL zQo|d66HyA_5m~yiUk9;nF5$aXvDlCzKvd9OSu4q1#6jG7(+b~uXh>q8$h)|dGT4es z5(uK)Af6g>{q1~|g$J>DaTRZ(eEK`=%`DyRU9ze=y4{OLz+d&wIqI)FTwAgf_AKAa zn>a^}XCF&Y=aVBPD$OnP&9L35qxyHW5G?aMA84ycW-Jk#$-g^fndAj!D6258n0pl0M>LC1Sidxg_>=2 z>d3qrq&R3?GQ>EIc~ZaAc1!)N#V0?sO56)(rOJI{+%x*I`E3cJTj1}&zu9O`D!A@B z*x6Y6C7nvlFIhb^cV05U>drLk+j2X@a*YfwK{g28i#YU&fZ@=)62*jnUj*2F^L8=2 z(x!R?&iyz6ZadXkjJ>2nnu{+W)Tq}(hh1r&X%*+Os(I6LvIt8jEi*9uc;e4p5Dq_D zwGB3SSYtE>Ea|0o{Tsc{gKYL{%u;fjEq0VY@|*t-mW!5fV^l%c!3^6VqdMX7I|av)15y3zNkzSNpEUll?@bgl ztI7eR=_rn1zkTPuRvn+LP{vm)@j&xmI zUu69eC&FOdR{Ryfdpejh0$tW~(B2Y|is}q#K~t0T6#_s$2_Vj8V!cV z^~qXt=ApiAE>P)A1xgrvQ7le;koL07+OZY^kQ#9&#tp-^^Y>@P%S~Z!6fd>P`ir&$ zqhf?NwmE^2bFaJ%PMw&Z3o4;ECFZ)P$RBMM`rz9Tm2ajPn0!R;mJ_aOT4qNX?D#Q8 z{*K!Y>hp&$aG=E-9KASj=JFOnAGf@m>`5Xw_*BS{x{K>H>)d~Ze3U#ZL~AwbAKjMO zOzR!)Y4Z?@QZ9ncxsST@dU$dv0_z)EGph9xq1L{T7hYvaQ|H-O4V-=EC)zpG6)~>0 z*`kCHY_V^1hzF8wKf9hwvl&;x^;456%e7 z)0Xx0N7SvQ`lJ7;BRbScLG`SwSmoo`M})mn zhF&*@DBUQD)*xmeBwH7R*J=G0s-YsbB2wJ%?5yF*X(KrUf#LfSxK8DI-FnwCH_Ruo zy@OlpTGI~Qw9^SPCK|yVVm+sP^^;_jExfg)J0` zvpsh6d=BK@rZ&7HW~|~FCZzh0e(#@yW}UKDVL^2yoetBhq{MpmF3@Ep^jnIms+QGs z-;V`{S$@=R8yR-WR=!)wfrYVaSktwLQ3Xd4cj45t337`^t_*!pw9}q6#L`y-(nOBe z&NEo$4JU5jX}eqg)dQ*PuGhK;35e1bpv%)h{l{a`AYx%rSSG=yW)7@a0Pyby{Jl#xeP3yC#I>dE zJB}*h$3)+VTMcwswPN08h|`~mVUXrVO15`O0)@F4wZs=2{Q23?P6$p&jNb;$6v1=d z)GC0vWHa&O0OE#)^286V{ZZW1_iR$G^Q#|#4o0RYUjwJg+?Qt2*qJ7F`oagyZHHp% zt7v-A*l)hM{-VD+?U)&H$qFa}pL)pQ6$ zh|KFM@XaoOvhHMu6dv*>_w5XT0(E-)y}(Y(O;*KtPW_aEOo3QlGE@k#ovuK#b7hA( ztI4Bt4in0~0LbI~y|WR%$?84@gW-vWJr2u3RZr(p@_(-LNxJ<`s>f13!)%|f@2jQc ziEps*-%VR0C+SdnEV4kk^)!x*u*98Tcj1HnuX+>7vY;&WK{R}oq<%Q@YPZWGyA8IG zh@Yaw=O$vzgONrxsA0VhNYa@fU`6qC9?>Ts5S%`@d7cj@u@NDE$R7^&S4VU@>BmSm_OD*T|M& z`(va^M*4ugl2V#~C4xy^WBRApLlE2DhZla@9EP(O>t#4pS^Zm3V~t?J_(Owzxp9s~ zt;Z|-{U7y5Ovb6T>~(|r&i)N(tKt0y*yQ9?%WqVcXQ4uvjuqv@)gxI@YY=IH-Rg@S z?HO=8`b2&3Z$1V`N6>-~9Z66395!!wSJTZyuFcuHbaV7_tu=J)93~R(%VuklVwSKu zCj;ce_Aptyu>dmWQjC3$Rc9*w*fl8JwZVvz7fBbcvgdwv$Gww@>7nH-zK9T% z>!a19uUclY3^0v;<-JJwuaF+%kI{T74d@*=H}8_F$O4}KSW2vN+AD_OVsOXms2pwF^h&Ke!JL~NCkkNGdJ~-DL*;wvaw;7XusLyQ7|&2uk-_pG@ko=+@W+ zK@o(_coo3)EH%pQx{kYXu%{Oeg*P^%ps_d-R-FF~OpG3jG6V%1{m?uVsIW><``i=(e+_-SXurHt;1@H7$HVo1YZyaD zBZ|p0+Hfo>42hs-pblI%p^OJmPGVB>Mv|%K6V)PiE?^f5J6?@TivJ?$zQ-F5s-;PHn?P1 zA*ZuS37Bg+4yTanpjW^|+zs$~5vP{!Dx#L-*Qe(9DK=?RWzUKV*Ze5{GbK(CYi z^pun@>F#PQq!1ibpE5N0vuc7c%&hgSFikK&3aUxYgU&Ket;Hgx>o~*tzH_BOJA*Io zGA=t~C*rHB=EoEI0@)NKf7A_V7!U;CXppVM9nv$%R$_8L%miWZA7bM_Wc<@BH1 zCSY}8f?qY+U1q>bNLtl+h)z>J6VNjLd3D00O=w@`;(KFu}?Y`&U`59$?ShNv=Ge#{>( zp%spIWLt!-ZT+D33`x}j<;L3-Ug^~#*czcr8A%c<$&<9O)}}pp@%v>MUw*GJK8E=m zP03W@i7%zv5kOk*aZVKkex2Zudl;xmx6%0qY4a)nfs&Xod*8HRGVBU_U5t}c`?9Sf zZdw++Lfm2k1!ALY&8{(u)&R^$TI-_^5xQ#t935H^3yLYuH@sBU7do$48$YT3FmC3g zqg5P+qIW2-D;vrVN6rEUyCaom-3p^);n2H!@vk{jSU`8Pqw90u?0(1L(o@^ zGNt#^adr#8$%F)Sr_YpcCT@QGI@A??J`JK*NPdHfO~FU!Qdgh)X7`#o$9R!>f|7In z?On1J*ev9;IYj1yHWw{|8nl*dz3%n#bl~H=8tFou80XBZ5FDbO1P|H0 z_dP7A%%kh4Xr+Lv#2tQ2mMgrr2}1C6K$Unj(gaZquudR7H7Wd-Hgc~Cl*HVU;KW?K zo$8IygR}gN109f_3odvWBm{G7G2*0rp(qz8$$7gSZMY3zXWjWhD6+fbDIa#F9JZcC z=dU+hoBUQPh0GSUXDo8YOnxuI6Mx-1vWIllri@DGojiC(IKU~W!H~fbSvrB+VpK#n z{$+1av-=)*JBM|8;^Y^gA5T<0W%``Aj~G+}Ra< zE@0=P_?KmrS_-1YS3}b-bdxvP)GGV%0K6=r{Qc2jFhTnP?KUs?6LxmG@;gDB7aY1c z+>46aHyUj*HIfQS2tO?XUyiNn@6+_cgVX?Fb~GfgY^Saf43tPDnOls+qhYOLVpUAB zZD-n&kUEQ|`}_}WHDyUJ?=#4vET>a_rg2|r;6!l=L+n23hscHl4O>MSFkNk)Xp|YN zNWSC54%QqeBwKT?u5L@rz1pO#x*#vvoTLDzmalg@lL{{l6*ESwNKwe$qUPro#i=L)@GEm0-VT$?* zH5?ed)7X0VCbYd&@M`TvtF${u`#lj%+O)-s@w4OVb}%XNO2Y#zo*HmD{m$1v*8N4A z?4<4c$ibI3ov*=;J|bQCiSIeLo7b`HqA~?R%`g*e%r+_N0CHP zu(jyFAZ{>It0es}Pm=PMK0RAaXb#$ogS2rW*uXSN8W!V@uHFrj6(>B472gSHo%EQn zBuUXXp;3ZBSWPn?oV=kMBtz9a8}u0%5Z$8Q1-tl7>@??(fjZD{C5`t6OX9JtD56g@ zG@U3ox*2XPolfl$O~N~nCEw;x((w*{HS|mA*yfy8c%_aGNV8L~$>dOzx-9>=y9z3Ja}gMxU~E(fy#GM;DHY+IV`a5jTVy`cY7(3xo`c?Qf(IGhxot^ z**xH?&U20@g^vhd^5CvmweOZ*Rr;N~pf7b9KWWe<-{JJ4O7nfrUxIqM09|(BiQ_Zh zjv|eLbA9eSF@rAQJDJ9F&dGCods3lJ)+#tDDZ3R(arGbP5*Z6jbU9`=2yBo){_~zl z&3*gD%5dgzOjMeWv|SG42>n4xRv7ChYhmFW)JJ>9PsWrsG)ifR7^rlK%(H2Am~IL9 z(Ru*s#o*Ss@GBmS$Wk;_v|z)c6~(u~6yCGu?&>F2)-hi27^_)-soxDjRXDC2`}M8oqxr_UW`ummv*{tyX9!t$r*uGIfnR=;pRrsA z#XIZrps5s2Li(^N`=qPf(rWgxJofmea#H^qG<=b)9Uv)b}DOXvDFL zy@FNEn@=kLlmvY~@CX~!0}jTi8;;)AMvxz-+ElD?3FZ3D9f~MNs)3aE^9OA6n9r=c z(wNgLt`C8%c8npsLg)_tRNOhG6?fv?l+3K;z z+6s}~BNX&HmKKosbUMNMMC}pY;h77@>AX9f={|sQ|H>z{0p+b2na>G=KdLju28sF4 z>ki310B&D#ll$+mQns23+9WdNc8e4*xRtm;I?~!I*Q)xqsebRtng&$*1+ZnGq^CG! z!{1IDSo4|N-gaRrkK~$uo57*hcx;nl47nJf`zp}{wcUO-)URpY^vw5W>S^SQBo?)z zx~P~jO;qhoj()yl&q^Uw!FYCcrN7^M_ccSt@`MPqh_pMwzqp;>GZYG3hH?GwUVtG+&VoG%A%sQ6C!EKnAaNH3&kG0NGX!OYH-^s$0Q;3_-0B6c$$0F{ zB!P?8$gz!h;Y#+gb)^h^y};P|V^kU}JkZOrn;1jE{?xm8q2McOtu+$iLcewd1{k>U z+hhS~ca^vR=OdG}TpXyl5oeF+>N*9c^sIEQjQ!Bp=!M6}`g){4HqX07W#((r5D~~5F$}U7;ci+1WIJa<@PWS- zhszsZ#HtS7j=P~Xl^gT=_^vXZGvs$0N0fmjkxr@#Zw?6+#ieJ$V^@$__n?t*r~QPR zV94Ap?OvUI#zWLkPq9`s{p*FTaazQ|hm&zSw66mCkb(XTHox3f z+{*$X$RPrzH-v^HS>QydeswS z5!nAo;BT0L_HeUb#0E~^NNV_}Fu`Ba@efAuB!G&8XRLqF7lyzHbLOpHtuJ66Mf)(*h+~gJSSQ_gU;{oU9CeJk$IT z$3hC=d`Bs0iFE_iw!!vtj`MB5DNVCN`9X^w(RqYb-F)Er)$vdqlaRZdr3wC0xt&Wq zOfICz^Zjn?|uE(}Cr`tgx#+*+U&F|;8)7i4OCM20S2NbUtULPKN;P+5Z zp)g#AKPj*bwq-WHX;BKlPH;S63}mTZQt<9zebsqiTYDshmQ-~#DO(#>_lI}W(~aM> z5VY`3k*qgRMaB0=jlBp#rueCQ83+Eh<9c1f^_!HizLYmT*duG;B}TJ!4z3@WNV4!( zX&d^7pHl0!(5UMsejMJn^7hrJuW->TBtaC>CCD%Q#2#YjrS_=8CrG!nkc(X6Ed}j5uZ$=3JguRev zxRvL)5*a7eoFP+rYz_QALj(s?A5;}t$9e-5=?ExLe1Ysti7|}@e*^PFr|fHxn<&|D z-d;@VAmwLY58{9^IzNFE<9j%1?TJiyyaQ0&Vq||`_+dFi4f|4@ciESw%?x4VPWzW} zZ|4FeEms7#CIk0^)(A7dYE^bozwldnitKacPoPZLU}#Mt?i6u&QTNS!Z|dS?B3EqR zOv4wP+E~7^-%j;T3`!#$Gov4thI-$Ra_1Et5?U8YOnXaNz80!<^$ET%%){{OY{YUp znq3B-<;E9Td^t$g*hO35`^xWm(7`T5j1F}52N0`!yyRb5eRc3$|8e0Puh=$DD^c== zEa5!C+io@NEl!AV86^7s4drTC+tB^spy&)UR^3*9=9YR6%O{^gAslDmn_Ojd1Jmv5%6p0LV zSfNJbMKp`d$!n8PHELPT(v=9ZCwl7>kUV|M+(Z~zPQftWc%M5|ns9meRK{)p{%lEF zn5FSo>6eSwKn@BvJr~MfHcXDXiiTowlt<_l5aru z8h2z`JVt}yeurXr{|eK2Fq9<^KWf+@&JpVg4C6CKf!a^@H&*5SL#h#WTaW^cZvVF` zT1NAioj``IvgpW^Y$;|F+b(xPSzO!H(bX{G03sm>oA-mc!bNJ-DKSc_3bH=i!M71-^(5^XbaXDR3(@^LUGruvJL zNa!^NjB3R>#VQ_Gf~k*tImHx+pfpb5L>Az7*!;OQ^M z^2AakgT{ZW8`1B;8uZA$t|-0j$r}6)7bQYBx>V1l2*0WfBH6|HTFydocRbs#Cs*1Z z@Q(N?*%f8j}a;K zy%Sr%Ayi4mH~^Jq>S1iT^L{f^>sRlD2}Ph9i9X0%^&)&YH&3s!*Nv$G9t-S!2=-Y%U@8JmN>PtXhu0esN-nc6xM#VH1oY4NNa_ry`PtHVwoq-YTp=J2?1o?hs z=xfc~bt%3`bY3nvaM+3?w7(WD90nC4Es40#mdo^`N@7v4nN`RHCTl|ETOQ0tl}XRO z3Cd*od#a(Y`IN|o;LXd;^zb3(#JK`()d zqW3M8FQewJ2p4nksKqFG^B=RqA}YO(wD~jJe|LqZ6ZE38ak6I4j4~c3%%(aEfdwW= zSbUx)ndzC0M4|m|=_sJ^$izugz6|u>2fZa2EQyRUw7-LQKac!!owMw=xct+fGVq3g z0&glZi%#wY35%3uBl6^{ak7ktW`m~;kKhr1S*Z# zEAc?-z@?@hjiQl5Xv_e{OnW|_6RBjn@k{X)RA`vJ)6X}*gElCp_k99Y@GNHY4h)EOu6JoJBm)~O* zaLuW$#McI6N1Z?C{ecxt`#g=>AOKf6ndmL(X{!;o=XiEt2%m^yj)lcfr2-#U5Ru*d z7NBtLPXr8mV&x8LK|lJ-=Ngndz=(dPp0;08a)fFnO7&q9GOM|$B1DAcwO_H=Xsn610E{eo+2fdm z*mD&Ew{ma7e#_%%KgQ@4r?0zd z5Mf6?!#4^Ox{PA~qVclj{mfMc@{CPSqDs+9XoMLl)2mdZd$_8D?dZe7@G@8NS55eJ zWoZ4fgXoPRk~v~x8&`2rXN4Mwy5CY#y!QT;|L4GZ$fOYG5M#r~ug`96d8ACyB(9$a zTq|;qL1?Pw&38OHUhmsu0pLxsPi9qlM&z6WF2dgLkZ=4Hi)oKXxgv3@u_NTK%D;4m zNt`9uEM^TC_x~)ueR5cyKPH7GS3rZ#4zyY=942=S)CYeu*#$=pheuxp75I_1S-0bC{s79yNizvFCL^|^yed=<+5+z7xUha z2Ebg4k$La)Kiq;+bcY4yiSj!qKxkZ^hv}u(;3cSholg-thZDePgcS1U=xX&+-B13eFkHdf<;uN&louD8R1jsD|Fe!88^guW5O6{ z{Emg4e;e&os?5n7Xg7sKI@iy9u)Jx6Y-qhQRII6UD3W2UJ`s=Abqx0Zl3%i|k|;3W zQWOLCCGyGZMYX#;_>GrINdS7z_2>C{8xiEAI^SB(0P2OCunL1h%DOP^jzJS_2|4TUEt zYCjyEIMB`My~aAUnC&TRD|7)#+c|nSPG{Wg_g{5;$AJ_J{=a$}`Z2R*VHic{VeD|f zU)MVe%@$M@ESCw;w3R=guVg&TW5@Byed)I@%@!k>&K?s-6IjS} zc5%fM{id_2)a=v=JTfcL>o;kug^|}>DHho*xKjAO2KmlG`|~G5Mg9SY7GYmfzc1JG zvhHqWROf;;=3gcso4KBHt6~?XR;!fWdH?XbP~UX3I_A9PX>hXWrWWTl6gL>QhzdYe zbH1B+iyLx9U4YQDgM(6rxYt4HD&Gr!U9#mOV3bm2K2+MlBYbTjm4$7(JmqO|P9&rh zX^N+X3sa_tnp=gakwCXlI)9*<>G#((@C|me5a-6p%{1`ZaFZ)R!KWfuWAJ6D2~#0d zLlN|}%Sj+vv(o~_=BAWU&unetYDJ^Wl~@^{X>?QBgRoV!je6&9-(~GPUmM;D96IC* z%vt6I?<>-Q{^r|!h>Rmy%?e*Zu7w^9x006=j+pzF#ArdMKn&*c@oUaf{1Px$EoY;bC z-)+-!I>2arTLz6ROQ(|wpu0zE3vx1qKRMJ9G`_XfaGC%(3C2>4Ca#|y1Tl~Uz~|!k zw!1|4&}@}h=e;P zx>hf0pK8ieNr%A(td5VzYTOx0KWbQ zm4Jp|i?O!61ImY~{It1Mp-k-y)Z;Lp0bYPD_V_1|Z%5kIBIIjdu>=5X(KTGUICB^l z5gsrm=|l(?#T#z;@yNn94Sk>xq{daGQ5}unbog>KB~umMU3F#%C8E3q=y0GF8SM4`%CIOd+tZs73ed$J?Gs#5DO_kNImw<&@1bm z&GHLZi=a3`(z_M?>#J$k04paJ+6hVBq9ij9jDp3}U{Z=geWIUIB;k)s z`IgAy&fO(XcgRn{8hEF%?l${>Qs?LXtiG>8zB>5~189)x<{0Vo_i){i57VIY{P&;Z zFFv3M!8rp_OXN(>SeYw$G~=^q2BRAb#cH8Lh+(18-Bb z#lDE>(ejdW@FM!C&?}^lpN4_-$_Z)waWL_m6DeL#OIetwS_7P;JPiupNUKSb%YKAu z?aUP&cffa^7xu^G#GRo?z*P-mc-ogmvip)RH0FOwA9u)au@5-Y!!gU!DlugHvokVJ$}9Lr=w|YWk`H zM*T@nFOS3CP3NZFqbxVj`r%$tLKtEYC!&s$7^zw51NB8yvK}+oGMJ?<7nE^YG|1PI z^6;Sow?El@HBe;~#kJ42j?YEtW*q&xGN=((j_;;9-sTtjM|ne$1IbV*cGwKFqd~1w z^>_LB$=}{?UPVC9?d0G)R^NZTISf}vZNJ<#?@?tS>Xg6z31MJ1S(Uv}ahW4=`3i7I zloJ<(5BrrBkg%E9=HmON6wt$l*+6>gS?0KL%<`tj=rE~*=XHLWF${ke$ez%w=5*Z(JJ7q-XRttFV(Fw zeXLE~8I6P6jCOg2up63LOigx>Ss&YYlXB5IxTth|~ zO}p$zUm`rY>@@8WxyJ^?qg=D+e6k&fYRX`-=eSoGIp!q~y3}`d0bAfduhN-!FY3W~ z{La8NS6B{PQD`eb>lyMnA%np3f*T17Sg<|GBL6yv6p+Oeif3Tp@kbMwU||3c{hzM{{_~X>jPEq9CZ*Pf%oFhUF;bNN z%-x5d`t(Uq(tT`lN+=RN3rY%p{DNW=LjjHIBcJ!yfLo|1btaW{iH=CO!!Oy+CyY*7 zW~3Rl;5U}7SSZnui9rSNS%WDI`1cu8(MQagT4GF0THN@LiM=|W;VOGPT2=F~FVQN# z{Hf+@s(@KdTU_waMM;V{(H~memIw9#7EUowh(acU(6(})yh$qlX>II5s4#040U6_1 z#lM`#e8uYX|2(1B61Ya2&^v;Jmj;G$G8r;yyy0I$ahG<2m{2y%|Q-s(N zgiD5*3U}r*D{5!)spQa7VmNcKtDr zacSKHn4qlYYb`v%DdsC3SSCqgNyn1fKXM}$@Olvn{C<40z;nXug^r`2kxw@tFo&v8 zG*t!_5K_qWdvws(o+<%vk>tv$ZYS)wFhNXOM74xaY$o_Uyn~kort!ln(n3YUL*2-( z3C%b>#W?+9%_*ofl(61GL45jBUEothxTzzX%AnavJbkBS(!AUPZT{PBt*1hYl0`FP zq{3awJ(?5%%4{KDC?{HUFuR$rW_0<=Cfzz_Dy&)AJBA0Vmv4eHi}Wixm}kxD8ex}ewt1vuib;U8N+jt=K&Ey;-b^eLt3o6JetXl&! zTWzo|ffTlymz+<*fIq!F$^~V_5D6TXE2y0@K8W&cPkcEwH@2bo2_p8rp-j4TM@L9H zaYwkpR*d&3S{b4tg8Os6)+{^UKxfO;Anp&Sc4KGa==kkeOyhbQajMjK=5wC$HoI9i za0%#yL;dKXaw3tyd`Qq$ff1EEiMd2a=y|5k41BLt_lTB+u^yjXJR~>Fq zZqFD8luE1g`zgBiY?=m6#p$d&$TugJx#`40o@ocEhMP!u>;r(bJj8`nQDv4?XJy-T zQgWCdO2Q2ZbIYUov2$($U?15}@xHp=%#8!wxJ{3x34x0ai`Ih_bE@m(Ob_I}HCv7w zAhkyC0uQ=~=#W$L^1bAB+Ld&<9;L|~CMIy$1ieFYyxz`Z_@rG!sG-``oI!4B=TWb1^O3>N2S`e3*erNaquebpwBsK;fgjU;l(%vLDIP z#fr$(5O>0>FLU$ybt^O7B7^QnFDocp`DSUHrCaf4Nuc+oG3#WlG#2l((c2u{sa%$y zb>ntGiD~Ic`JDU``bpc@i>avV`z_6qI_0CbwQJH5CIy8jM7FUO_Fw4*GJI8jWP!4Y z;FBmweTk4t=z>l`VAf^x{d3yOjLUuwcHl20z7P2;Noise zE}R4&UA)dqRnzL25qXwf)5@nkHzQ(ie1rD2GqYzvTF#v0*;;=A;&NL{zSm!E3YNrs zq_X3B%vg(9rZX$=s4OxxT+QhAc-CTC=spToyZ9aLzl_ya!&ahNlBtbnZi=FzI892v4)-# z)y-@uNqT(Y6lYE6AAZfNmc1Ar0A!*#c-r-Nm}znMQxo+Hxd`K6JTi-R9;QQOOBl_oeJDf>KRADrB zs84am{W0u-l~%sO7%V~my*E%0Q0r_NyuztcSOM+{xM86wcz%-r5cqo0Lhx&xiaLLIGY;J#u$2{b0!0d4s1H>(vj89=arYk369fs-%< zwmnGOgf0`kkt!)+y%*i@Q}h%SJBS=AP7N}uLG2e^Yd|yCqTVXUv|wV*h#O!R1`J9- z$(S%G8sa#8Dd_-{nnh>~>VDJf;_BfAF#1lw(`5DR3V57teSd}F(L{6VI|T4tr9&gY zoeRCe*>Lx#t9E;fAVU;9Pk+SFG0qN%s^ivffKaSDaecG?Eq$J7um8m&srRW@aeN!x z&i0a^r&b-l>ruWq)l?h z`VsO^dQBwXw!yK-{f?T}>^au#n)MmG5=w!!kqSSF4J8VxOTsT2YY}r@5z~Wp9k?Ov zBi*lmbZt@}PD*nXkEXIso$4OX@&YGTEVHp9Lt*Q8ud9pZUXo>H!OzpDlC9LkRjax5 z@qJviyt3Ne9y7-E!&E~$VmqmwTRJ<6XOT?hGf*%|!Z?Oasq}G4pa$~GpD9TK8a~Zm zQEESU*{MW1J7N*382j{xViYPaO{yy){M~I8>qJ86d7U@rN0FL+x{@8|YLP#eh?OvM zJfOj-MNB<(n3TgQ-5{{9%};l0(w?kvmG@pY4owHlk$hJzD2|}#wXK#G+<9Qxa49(M zez{g?5k0RzV?J#AQlro#cr~zR(T~hy!kgu2wxdAl3bDEuex3bw)4FNHU6=W!`~B77 z$6>~oA^ECjWP1vrVW>{an@UC7Oo?GOFUm>acMC=A@F0$17D?_w{Ogjd&2+4R8((DG zMd!W2;uOWn@3u2$7vOEL=ZSt~5C)yg-z+sI_^7uFP04(+D#1!C@>4CBuFz@|O@!;a zE=j!AcJ~tYnWlGP+5uu-R*cx0)If=JHIy@!`U$!LlauOVdD*7YGUI6eT2ZqB!s zblE%zoiB5*JE0^K@(fi2>FTU zuQ?u*EZuK?nFx%GeaqWV-T=Lfvo^JKEsW z&r|C7UVvLxaR+g_^J&`DbSFYZaZGN2oG$&E!Lq=Yn;DUunE0j_lEN9kP8LDa_h6s$ zX?Vzum1hQi?-ISgpVq5DFzy9i-{i3eGBMz9GQw$h=fzs~*#A?%RCzv;@cN=`um>^> zi0iR?j#ROU^BpCP)Cn5^l&`w;5B^Oo)4TES)@Nd7gArPR)77`eH3I#!%K*1l}ugAr2T(0tvA?Vi~tN|bTJMxf2V%Np5ho2k8P^EUvm&~*$Kv}N67!XIc!9O z%grY4?A?aaY4)*!z*@%FR6CAE%k8m#0E*Mb0LvVl*`KxgcQ`xw0XW4igvj%+AC-;b z5z$ZLP*TLlJ5GDRB%q&#Ddl~CITFDmkz~%JgQ6ZdVm&nOLz3~*Pjf8q08m@?NX|O5 zWcI<)YUt^1`|}1oFxGCj#!97N+yyg(D3C&XZ+*xs_+Ki~6EKkW zfc=fkF4G^e9|E9YKfD;v)wun9bsa9Q!mK7dIfX>*#f*Yr>jzziN^WVsDp&MeYttW4 zz8eitGQl4$>zGBAK@7XyJDw&M*P@ZNOC%2Ht$Sd&%!({^9B&W8icB zWwG^2T*ut6MYAM{w%u!3ub(||cEQzXH35V?$KZh&-@@{?i}|>*dCfz5tp&`e4~G09 zD_$Bbd@3*pWtv%`^RQqDl>2h6h*HeGL& zEJ}&09>ylJE!ky=oiL}vPi~g3z5PfZHGiCiKaNXOG&~vj+;NqNl(hzr7C7nf^Hg%7 z1n6bi?XV2cng$G;;KSpFEd&S=O{#+riZWP|TWiGFx7@YohyN-73;xVk_|TJ_Owl*I z;n<1C<}I0Kfot7~%j+n5Tyy+Uc88GDs&=VRFW!iWzyqx_Ll{7HJ}`V#D;@ZG4tL1X z{W3QJKnH&f%E(9(1Po}_e=$$0@M!^i^1d>s@_v9pMf$pI4C_l|O zYc7*46xwe(-PoZ>r`CbVmT!H$3LGBkB01et!l4iG`!9{Ete}m+$H?_Gikl2}*0Ci$ z7p_tNd&>mQVAbkU;q>O$`E3NYF|C$VSx zG-FOxI8v9-fZe9L>SEE5JHTG;7}*_JIJyl$964SB+bRdr>iDIFw!I;mQds1Gb+0yX ziTiyul?;8k5Nw#50jb|w%62_HJGU71jd2Hop6zx``6^WmklI}gC;3|pH63HSSYK*7 zn^w%`l_rF}F8VbGfJ7kcqyN9CVtV7ic{+3?PzB zBOTT#ofLlbx+pxX>GFB}!+@-*qa|c#7C9ror0yV{LG8&FW6!p>G+f{+l=*kt(iHpn zOEZX`Ezkp@bpFfN9Njmr1S_Kv& zyFaVrzRuV+WlQ>BH=vx41qM$Tk(AMISV0MLRRX_7l;?oP6=2nAE;KuA{naELpAKY_rkvt8NkNSwb+bcl|

zOq)q>oJ20ir6zl$JvIHoN)oT_$IEXRkxZOf+h)R(+pa_|yMi|p{p{bZS*?xlTpKer zd`VJbA#hpHBm!Dv?9KMMsSQslKWjfJ0mr>e7=Ck&TC4B4(M%{fA!FjREH$kZy0!UK zywn{)-&vXUMd|2J^FP%YDHc6v>u#MgH^<}Z9(=Vqlp=8^r9;_=OV&(9nen~m(;afW zgNJZuHlPAKWjmq~-ZM%kiB0Oxw)Jw8{&mZPhuRn14i&8(?55!_BK7N*PxHrC8>~*+ zAWF!Vgo(?;Ob7eCHLP7oW)4Icbek=-tz_i^>bYdNj9`Ei>m}GeP$S^{%htg;0JfN% zeoQ8-*ETjXPhcaHcqCx6CYTQpL=B*Ab>RjbLZ92&qQn*7QH0Wy2TFJ1AA2#KCt^EH z;Vj@93Tr8xWAvGCXvq7-`FPOu-$~o9_7hbyd}LuhH6-}Wrn2x@9F+E-S(7au{P}Hy zxokT>IT45|BwN78QHLv@GigDEbnYafUDnY!WKGSia4SA-2xLi2oxQky^h|7^oeg;9 zOXG+M!%v%@__-cdRCjZKxQgxy41ZX4;u6d^6ghDwv{Or`KaN0g$blDxre)2WkSxi9 zmCmsKlP~<^LjUO($(QXkV;X1B;RXwSz%GGKKR-FpQKtjJ$6UEWvcCYPL>4KnxHQMJ zd}b7CuIZqNfu_S+hxYAZ57oC4b{2z;V}do%AL$3Kp7o~dyi#R|av%Ll@};``5-}dw zh?c#0Y`~?KED|m;u6N@{Bw`qG<<>Ck*gK|AzNne85Hnljr;fzG-YoJh2~4F8l1hMsdYygN8O}$7 zYSTvDJR_R^L^AXaf};jy+~C-?D7ayb7or`JdKS zXCe_jJ}Z)Dpr5X{{#XGf{^Z{ToJK-QVI?Ke-q7H+4r3G3IMTi}qMsC?bfl8i;~+tr17ZYm*eDDvrSw}Pmb zOs}^D_U5HW*|Ac)^`X1-FI?#A1SN~fI>7{VHAql&we_2x>5@; zKHh&?4t?xQI!r1yHnr?1Ke8~B;sHAU{PtX5A5`PceN;WUk$~ea=3_sqh?%-|By-z$ zZGZ)gz#-m-Z%0>HIj%YyJ&*UDFVK@}#CSaQ4}G!qREuXGI;ZirDGaY$=-U!?*(1I# zO0xXqij22`otMDRoRh8f8%^!B?nA5j|I&gAJGf^xH#-qsWCc$`uM4szsPBk@ ze@Npky8Fojr{&Rd@X&)iEiIfNv;EK0f=ezJc*k#VjH>+7;GK3Vu5F-Y(xEI7%Ls9=LI z7cTB|ErYa;=z(&{EZOR0_y=(M3hV*kT~Rt3a!FOH%uB&$B#3?)HDc-9imOtq4<9;2605g&W_JqRYqy>i&s%F z$ww?M#@gWlXr-|oiX|bbiMZ!Nf_^E|?W7xb4^V4*(zpO8CP#%HbDqrptKXE#zXBH>@pO7%}*0@hTOje6Tn1op98^RC8YHbXHBRx$Tv13nBReo`2m z8>{bG_P{3w>KH&A!b&WbDZd4jP#V?3pr`r%QzmWh^o@E(=AXKo5}5gZmfH~@E8MlZ z<0nnAn> zih3r+oE>X<2Ret<`slKbRyNIOSsN17GhEpT} z6=NN7Ahq0JU4)cw>(vsaMcF`~An?&Bk6s(Xc7iit=LMZXu|kSgm|$8P17#UgCB}0= zWseIF0sA+t!?YII-LnD{*^#$YzyGbTdONKuZaubpuM~4#H&_~$NOG>D(BOh z1CMSpihWrJcRgvup9Qw)6^hkXJ)j2jhzVA|1Q8vDq#k^ieK#OZ9@2{R1fLuizwEJMK$-ot2v_tP?B7WL(Fj5 z{f$WW44f%j|0U0knPYI3Z9)o45+fv?&we4!Xn<1uYe|@?Wh}k;KPrrM>YpFrSiqrD z1QfQM-1U;&_T|m03T%4)U+U+iv}LMwuDqoA#9BiTE)(tV4*v!vCW$-Y9kK+>%U5c- z#1yzI*C~~szzg(u)S%k|n#%dVRX8^655&0fiD6QoTCN#rXl6zJyAU^z0*tuAtyK7y z^4Fg54A>K{ozIj`m=2WA9LRM_&tJ2$bEnMH$N%NHTCvVih;HGh(rv!}v3*sMzL0KM zqlxQa8VO-c+Jm+{b9ac2nD+X&YcPQN&3HbW=B`~`k}>CLuA4U2#Afw>taH8+3At_N z=uDN-s>+3?1D31HivET)Y`v;vpQgVPjf`#b4+18Fu}PhaI`b7Lp5pAWA==ccE~OI! zVLM6S+(emw&Iw@Y%`?_U2>>oHZF>nguHhApSui>}Si=h9Um?SPk_WxAn3K1D^9mK8 zbNNhFncJm5=$mw_r&>xhIPJK?)hW{jS;WU4huVJu2(VR@>H{+d2$F~yK)X`UkcHz9 zyk5oEU%Wg+cV|P0yX>Ou0c%u%^jo>sc(wlT(hlT@Y3KbV@VYF1CJFD%Q~$RM;UU`g zuVui8VSsk+Z(BN!7brEi9N`nkkJTzsdj2nRg&Yefjk^7xotF}%k7!PW3M%)n@S@+RkL$HBE;q_*3OVr-s?jO;=wh#~_x z15_lx2R~@Xj;Zm{DL+FrqePhh6#hGD7G;r-u7F8H&9r9GXLzphFUEmyIV5mehogyD z9+lesC&XcPewQ3kROXWX?9WwFL?j!mdph4`vl1dz4zt{K2@GW065^V4U{g*Pt(h{a zVYROU6Z~P5892$Z2PI{Qu@9Sc2GEXo4w=9RJpt zQbtWRCF_>@*%6k3f0IwQ3E5n?v4JG_kXP+U;kh`Qua`H*ICPo4LXddfLRo z*d|F=$0e33f`B;fp|r&1 zXo@f%+B$!4i+!OsN7drGu|3zCQgI9X2{F@Z)p<^e=t)8)Wr(k*lO~@TrVPc2{4R9AX=>lGBYgv8tQij7 zuPLWkUxROJrh+ zEgrE_cqCA8O}IC|?PbU>X|RuJadQ8cuAdGL3G=bDT9)bkuM-FR5!tJ+gfpWuNQ~tI>PtpOHZ3E9Ms+*x8o^SmvK$R=au6bjM@Y^;T$6geN z1(i}zGgkBmXgT;aUZ(=S$Nj`f(^w%3;fHOX`7$+{AM4KJnZN-BTT!@deG_~~S#)Le zu#HDg8qBS+lM4QHY9(oadti9o<)ZWOZpnZHAY~S6+msbg0=m0RjL4fIaK;kORHizd zrUR0m)@4A<-@~667doHcf7}xpX)Y{3gG)jWGKYO9{S#sb?W@6P1nC8-4~_0RSZ$=; z8?#);u-sEmpVC-uq}#r`6{9bz>AJL8Yt;VU(aOV-o^D3XaWiSh=aF-fnUwfPnfiWz ze?O>kr@Lm$=iTR*-tMF!OZNmIZwfV0z9SpBC<%j*_ivWO4j&n+<)Q~(z$=)?-Hh#Y znvJ)7?um3Ad!hH*4vWdQuE&{%b(~-Xl!R;iB8HQtK>X6$$F%o{XpfpPNgiTvBsQpy zz6o3^QMUX!AFfn!iYldFZw@C-fvP-z`+3t^!DQJ0W5cERuZZy)BS1b6m%*?t!y&pv zDcI&Q|tzI4pg^J#ae8>K}?C zo&+j;Ja#wF_ZQQbg|qn*p#VC!d21>JnP8~vNW_G{8Q?0{1HYWX1kgX*_GR@t-&lpN z_A&k!b8i(@SKDoCLI{Kq+#Q0uThQPR!QCymyM+M3-Q5Z976@*^-66O`2<~uRzHk3$ z?^CU+wYsQ_x?T;|TyxIXdVfYARxQ?zMlTLu{z&lNd@*Snhcm90>xy&ze&H{V5E3`PO?$p37=SQJ=WNyKlP; z)LcF&g1pQ%&htw)mU^*1rdZ?<*i9llj%|)!aXhR1=~>SOlceg?&3?0jhndS6!@ zS6i)8OcE9H*h?YWeGKFMKh2P^nNN~&xc${va0We}UY|D|Sd@kkHBZU$MR=dnukx+g z984FN>9zZG+1^)T#xc0?axB{kM|Dm~#uK@3q0Bbg>P;YotI4DvmenHq=ifY>kC{bJ zcjP=9$>ESb++3gb6FCmdm*iSWFv@Z7lYt2Hv=D;CuztPa4(%?%jXN-$!srE2P|hn= z!$OiNs(~YsDk$^>{LZQd>!nbFybPDc5TM71A64+27(V~pCCqH?;h5Kly4nb1)Vx%h z-5rbyJF1kHKiaz3p9=XzXj}OvN;WEgeQz6U@## z(uU?|dn^Prxj7T?%}`67Q(Rvw@yw2*8Q zI5;=DsE3{sWlvE3cKEz=Xgi;%Wj*k>Q>w9_u%(v1VXr$A?}Whoo|@8CBayriYLiwvx)p6-s+7@Su?cbi+J zH8hri1C8TjMF};#kq1z(c2I8%tM^*l_3=^{JCl7@-MRbG@ED1bUrXc=Z7q%1MHj92(+Z55Td6hV$#}RHDCbN&d`_2`)Us*9qSeXYMo-~`fyR_@K z5k?;B$@Mzq%s>U_E|}ru^GH<2xTMSe@<0<9&Ga%4KHej(VUMA_inilN>$6y(@U8b< zl{u#3<|oJ-MmoRT2)gCvs2o;Vte+S%7XPCp%Id+aQuGX#5!2-He1GM)ijbW!=XaI6 z+~sB&D!(aS$)5afU6AEmAscx7KKEGwjvovVH)Wm+!0f!s;F$I#Ns5dtw-PTtj<&$D zUaT`1Kh^}(%gmO#Z$0G-njDv(fi$XOIVfaHUvZvWvHGQ7k>~a3tjm+5*WQCF9n3V% z>eNCW#eY}S|5|tsmHWmnn&L%!wn{5+G(ZwKs|r>Ei`}xL)n)`Irox=#j<(c+N@`U* zrsB07UYEh1weSiIqClNaiwi=uzN-P464C5|y2ZIR&1NvuX9uU@W+SY&vh8m04q27Yhcaf=B)DS~eHzbMuD@qt zSPvYLs07p$b$g6_;w(Nlb9dQa4nGw1N?loZy5Mi>9Wf`~#%9huc}rW!E9~!MoAgH3 zY~61uNd;`;9VmHTNX4IAdKaFN1$udGr?3ROWq~ZgT(8O#s3~TE%T-Q;g{|uHM}UH& z*X2x-_SZM90Fsz4M)Uk8|J8%i@qPUfrI<#Pa=oGL&r`7maA(2}GTk=b=CB}F+yh1m zBS;uIioTug&3(y-eZv-4;Paz%dqh^W&a>4N3glrL{#M|V>6ClLH_=(ILun@1Iy;uJ_NHUH@4LBpRjw9_)d_^%+>qw%?$+V#;Rmdvh;+ z&Wm`plFaMQ!n5`TN@RIBg|(IcO)$KlMdVvyG^%Fe*A0snv^frSxN0k1krAT=V_3Dj zXR8=ZNc=Zn%IhYjxN9V|Uv93LX>bQ(d^}mV15{fpz>gKgQ zJV?1!suuDElu(;`nQ>MO5|7XPeqT37*08NdvmulKL0-E4B?)ZdAbO(GH1-UmA}NZ6 zd36X)&PLRZf|0VX_M`Y2y{dXkv`rr#E4HfHq&Mh&r+wB?4-a?lo;*%QdPE*zX#Chi zRSK)J>I>+@V5s*+IFkg_<|71SsWx zemhZyH}O3B@umedFCYzPQU11-qhy6~0{$+Be+@#b#pPJ!K@9KGcMf-I+?}zfXDWfJh|9He=cSlApOMfL%%FA#$CR_XNl1& zYEV>MNV1UhBgP1tel5^^a~kN+kS~`T4&AA^9FDqd5Jb@DY|V(_bD34*fkH{3bpFM! z?;6q>(+WjMiQ?_kf3g5fAbIE{D>Sjm>q*lQ#fT=U)UP3QwQPQ1-0ukxRqr$YB$i0p zrD*%J%f{g`Z?RDE&{X8@(u>q;jQ`Z<^$IPn`LrEJVDIiiN&c;b3=cQ=Pi-7d8no|5 z@iVuFf4iFq-L}iVgld2r804_J?igEcnGY?As#?e|pUE3?5{!U3;EuiRf$oremrlu# zkq6K0IhNT9A8sD~7ClEQK6Ns5cc<&wAAr?cK-|~|7~CBCvquS^WwYR zQ(>j=r)c#h16&fRL|!1Gg>wK+M$$yaMdn8(ZIk!-u2a0@9`!NT`T=&NZP`1*cAoDjp<9Fb8`{c$6!w9OXA;me)XarBS@! z3dkVrF&#-Kr<+{UeqHt;k!11|>wa1`7ud2Wwy#D$NKr;sem6X*Uux2EGPpqxZrUe( z!ipp1t6J2+p!b?N66+DP!_lU}atg&lC5-S|r(AnzeJ5C9X5wTorDjS#b-!M54Q%%R(?_n}01+29wZZ36 z);*Hq-EIBt_b?0s?iPEWQ9L_Vq|AErki(FJV1=PQ$y{1bd0Oo6K6# z3?nm490xyQxXdGoOwEOHDYoozLgXB%4q*jK7gp_7&>1ix_ZAJ+pv_*dRjuLkdo^)! z{v8*7HdNE&%gLqHw0Yy=LK1sYqEWye3FW zmRH*&Fx;HB5U<2D^oB(W44e64Q+>1V>a8>mJBO{ECzf z*v@5D%{fvNUAJB976(Tg`V81uaZnXa?8g98y|9WWbolkYi=*`C%^EYgRjV*i{DRR0}z6rNAg24e>m126r%a z22QPrDF{O4HI%(V^T(Q`fd{G3G9uJ`6jCuJi8$rP^>27pj(r(J&E@kE*VIL-hEu+Z zt)C{!+Vol%U=N_GT5w#V!zFlrj0^N|RXJ;oNQ2B`&y2YEuaYDaR7umS)?qronGN|<8@hRY~Q2i!X#6};SfmbBB z&}895$#EQ#SN%BZWFnKLOBSGbz^cb{Q!)@iKrcZZwhZK z@;vGZbVdvFA9wEsae3YB^l6Lf1)nW>mp2>>NXFBiG>IV{A+N<^b0uHK zaeSHCDN4jA7KLX`qp_aPy6i{LYSIU{cXH%(lMxkF8!du?5G(d$Nx~MfPCumBJYBC} z|2u}Jj-gjd!qNJh#vi5yA>pC$SYCw)GohIzS*?hH8!tC8| zgKk7afB}`y3w_hgN2vI>XX2)ic6?xke+=`ePeD0Z-N$2E33G2dy$^;>Vt za%pPZfc zv9|{UNvVp4^X}41PD=AjzF%~)OB>$8f7c2a(s+F3r8hK(L=9m>9fydv#w!_$p`T$Z5zd3UyVQ-7cNm8?Uiv`o zG48M&#V$0H>{=~x%ug22(#GbWSfp@ZmtIh`(>iKMiKQG;IhJB+Vr`-Gp$gX}A<9RK zF@_;bzOG4vf_WwEl>LQRw#r8WxqBEbf5?E<-h$F)-u}&d1u^o@8mO622+8=oWMx*E z*r0+W-BiKjT%+%iM~~OPktE#ok0C9f^)pp9OfU?1IA!l^4ye8tcZ(eU^L<{Piq7+Q zy-2W#2c6B($)EQ-i2s1P1CrKrmW+jdNB%3hKPP?8jP_^*O#V_|e`<^DjGPU7tT9XL zG`P=hT*cV4I|D$d$9c^lA^nA=w90UvMG1@WS$6p|Z9ZicMp242E5T0qzAfbWzWo^u zVJ^j|yCPv)@&!o@42`p9)G?5ChW$77e*n*5T=A&iDPJNdzN=$M8;8EihYYGmTa?-(UG zLbK_g1VEOOtJ-n0(JjYbpcfCi4LC=c*G^owhymXK2Gr|TL-kmbGs4m8C@RY5qE`nX^G_qz$7BkqM}kGSlmhP1s{l#VV> zAj_a6VHwy%*Jl95L$Zk3fWI^472V)cy>0)F7q7TyXG}4snzD;ka??A;=4gT;B*gCH zueXQ4Nasz{ddKuAw7XC1(`&n}CE=Fz3us9#+w3$hxm<&m=26#`!&zWHG1~AK(@|Wi zPw4Zw)hQebna0Sg{66S~4tw{@m^{L5@MCbMc%$(n*Dw^{2pgh6c1ghK5N#S=*3aiA z!H6R&PuQ6rDDzVQvGy1Awn^5_mTT4uNc(ymL)N}FkCr1^A)R!y7(ka4``{JGp8B?w zo~!K54BCFSEj)qgJ`DVPh-`{Oi&@{k-t%43CA}dPRP##oN6H@_#t-uZ#bK0c-s`6@N{;K+H?@<~aUAJ07eV;TGhBmuUXCBC=nX8oE*88zl z#9%kEJ;F^bGTFPF&q46}%AW32+VK@up`mW0zoC%jdf%4VZfk#Rv0vvD?r_7DWhIO6 zNb{L@^wSQN0EW5cm-joxe);+ppNq2Zd~XaYnlQOE4YXxwZJw_(##N(!Jij?5(lm_U z{97i6d*Lij$Qdi!X7BI?r7LPNSXjJjYK%)~nIoxzy*}Nk8t?0|Ub1k@-9ag-Y*6GX z7Xy7;`0DVRM~;2FPS||?&HgAP{7-nCHrgLU>Goyi?`g|^h^SUSThUZ5o~pC0Rqf8# z_SHN;owWNM!9nN=;xSkrDljDeM6#x6Y_Y|M2_Nbg1B=mnPY>TtbUZyt3CFvW$RTuRudXAoKmc1 zZH&aH)G(w1Wo!d90jqeDA+Lu4*&YrRaEJ}+lZb;hpOp2E47_oP@CnNrv(SB$1CWE?%=+ zfPX0dTU$wVJU{J$Tc^7S#RVsJ6<0wS4-%JiMljv3p?@La2M5%Q>`Syf{AW!(zL^9J*UalJ+`J=A-}7*U!9&uI z;WbgBG5l)D?TH=Z7=oX`b3pT|)NKV7NR$x>J$*l41^TJ}jDg>FAe|^l{3c;_aTkQh z7d+f8s^>V>gPfQCK%-*>q5Xk6^I<211mRU(hm^mn3m7%W)s7}Ig}{sQS8eNR?&`yB z(h#ouxSEbB_cmxINllV98awx}#wV7l^rZgwp`xtm{O5;VY?FDWa^aQ<_uucg=?Nj{ z+Tu1)X68P=)8J!hR#)>`Rb(FA?)G9r!s7D%-p$A?XJV@2YuE=Tn8<6fVBs@UIoWet zyQMG`cN?Ohs$kCFPD}fT*LIY>R;_L*+N`cAEy>BO^!$&xpkY)kSYp9%)k1BBD#40f zfvUo2$HnMKJ0v;pb}-@jlmYKD48_$BlD6c9EKHSEDkoTSiHAHLV>^fg4R<_g* z77f2PxWx6pRuDnSwnyvD55`WdJyviAKieptZzhb3I3nGGTnXNCxVB+~V!2NIJ~s3+ zHC{wr4?K>e)~3RXrk5wW{exveDqMnvDkj?u_jRzJ^vnJ0S|H?63&4NbDH*geg-e#1 zC+lJ%#-qUQ75Up!5BR6h=6uBQ`;y3uoD$0$orJv&fiH_?XJSb+7UqaRdiQ>mRdadd zaPe!6s%>hUyyun(A|lP}W?{!gRLUYu96y8h4UrP_AeYlrEhXZ) zXp^#RIFfBLH-2LLwi~<3r8Et1-suE(7JoV>qV%A{&(xATa*WK0o6q6$L~=^qJyiEx z?0He(I~reZVxP!aGI1!?s|o^-2$D}3*mFC1yhY75ZqmI}Mi2=j-TOHzI3ER3Le{FA zxpeHgxpmTGIb-odirT@LQ`OZ+^NM~g^Psjkj`(_$VPFULBF4sN{I8!pzn~$J#m7^*f05%KdSK!7SKx2zKd<{rB;TWK zLqaVuXrfYL)qF=OliUHEp&hH|-cH$|Re{T_b}x_Ti$@;r=c#M?Ro}cZUr;4WckUzm zAV8k?faJrfCm*PSNU~Ww-g)yGhSY>AkN@+ab!g@8on&sAZ~qADVJM!6ukZMk++&6KThC;0M1IO2PtYo~&0VL2 z#-X3>CzuWD7iD2@t9Rh?$Gi-gy~TALT(kkBuT;6PO;;HrC`EGNv;Ypx-Va510BiKu znK0tN{O_nYXMo;0Ns7XPmHFR-XZtVokWZmBRp4?}($mIHQZ)q=IrnhI$Z&>Pq^OYS zt4dg5KKJ(%nyF!8fPwQRvIfbNE{ggWkfiV2N%^BYPP?n@Fh9_JHl{w)V9>g69|YsD z%=?`znxta(mfAk97+AW<+TC*{{K3s*`)u;P06K)vr2_T?n0AnBua6dnI+9^hKB1I| z6jQhPaXqoPy+~iDrpnwarTVh}_wntarKU9l=fs-qH z87Hz3Ami8*M_sjb4dj)s`X`Dgh}D%t`=9Z8|KC-#ErR<^mY_HZyt`Yeqy zlnoXcDha0isID%$zJC;~fwlx%QtEa&>7I*&s%E+J&Hi%>d&pdtZ!C@V}F--7!zd%RHfIUk#RN=9EZ< z3T>`e*h9RzW7KzTQY9WzjNvV0iDP!nqD|>Dj2+kPs*hv}4Q>kvG8-?eGX_{gMZ&)G z=Jl@ptm9G1aFus#;2W(NHlfBbsmL2H*Zr&$t#l~?KcUObD~Q`l$1Q>OOBWFv7JkC- z)mOHea&TRKY7GZFTp8QJ8YoJ|yBz*?VZFs92_ z{`WFjemm8?$B&*t(0zflil!oVDt{E2zMHmf6wJ9F?*61wf6Mvs8vj(2hwl+Mf=*UQb!*++2N7+|2|kF>APH+Y#PVc zButJjk zRVS&t4u;^2UJgnW*IiQ`6R8e$yXK|QCmj&}eB}3rP`}o6#2Azj^Q1+w4y-i0n%y4* z{}&Vt%>y^O@@nzA{~}A7(JR^FxCMBi3(j*o{lC4=WjV}7@n_4mI2&dcEvrcp1-@E? z!tRt*HQN36K5Px3HuvgpH&X3;VVH9tol5JMguBj{jDxE)TE^PzRnI8;A6tr6por8M zO#a5$Y4(2_v}{e(&TKbEa9HQ(^n6E}oiq|J_o^B2_&l5zetT$}l9y+ao3OgAj(*t1 zzZ#^itOATl5Pj>#6_NRs_KZfZGY%1=km`0_Q+q4%$9EJ5j448 zs?Bj(`6AtbWYt5MBKSOtIE7HUbAvx9aq{FsN@7;=bU|qDj>)l|%FkGOUY&@%e?MLU z$(?pu@PwwcY|y6llE`X86bpf(=)LTV{{%5J3iwEU4nP|iK>>!vjl*V{%cz+;M>Xyb zd6ITlB<~r3G~Nq;3x@vK6tUTt(cPEMlKOacXuD2^@@KWZeO&NxJV%%e#B9i$y_EKl z5u!v}E!%Y?SUo>p&lLud9;)QR!{Ep~+qM_V{# zmbIe+TM^o-wq=4cYj~9RkZxk+hvRNdAf~3o()LE%nAUuUpnsk7tk%{5erYXT6uA*8r+XHQxaK z;&@5^#`<~Y>)W2MHvR}4`hYVD`@9p~nXb_G1j@Hxc#GdjZ6r28D=z`$fc(m(E zOzhtJH|7*TtV|sNDq;UFTjUsa_t#Ek@p2a&nd6+BMB%j*vrL1=6&F0Ly*&};qVE`< z8v~T{c$Mu8nDJxaOGR(um%o`3aUx5+lqEv?1%xOd`evz*8zSBzpJOpX-E{5mRFdW6 zL7M41gIA%tc8q`NF!C*J)ECd19i^d9pwGN(>9HlXFLy{y)yo;HS@PpL{0Gth=Ofn8 zr{@bS-L)mI`^59}XV7;Xo#!%MX#q}0O&?g-*eyUmTKMUH{?ntfQ1cP3N9 z=kZc4=L&*Ck2^!e9iffyt;*LxtUnzo8*?w!Z)6|tL12M;7Qa>vsLW?zf;@+}TC2i< zTk&FA$Fr_OfifCzu-0Nc0mbm6h&)G3DBqNCL5cL=NtsNK2ZPv~m#l;%@QA|JYE82E zLDCkZ*k9&Lg^0?6-_47NXKF-#*Q>sIOnvHJS@wN6hJF@Hp{6S#9iuapSzcYbyJ6=7 zKSJ{kd>&54XWP6z*`;pc+Q#@F?!6Sevck@KpM!osU{Fh-DI9;$ntE-d{QLyPrRo?! z40VuQ5@rxfez)1XC-}k{hOP*eD*VTjpY_&Jv$j&Pu+T|GBbcL)}CQXCzoU z_dKc&!a^*26|as1&P)*TC2xqm1LYX-e{Tb=nbaD4n7k&AVukpkga7%+v7*1lIG_(* z;kdjQpcI08(;GpCKuypGY!%i!>G<~B9lV%t9Kw9U1gkYf^rYUZjvG($?gAdDZ>e+wwN@_%F^$@sFNMrNWub zu8$27nZ5796faQqyVaAGt_^&Z+Ba4db3}di!zT0oOAB9x0NDe)ac1BB-AbobGd@O% zZ2vO2o!F|_{bNk4erARf&>ZKMjb6+cWag`u1ln8EkkI4hm05<2nI$S|ox*Nj#VJC3 z-;5X^EW#);wiUc20tSyEo+`&0czI0cOA`OQKuvj(*C~FW8D`0_zWP-bwn>Pwb!MFH z&lrMF69wKIA(Zm~Ss^t4RsJxZI*rhisF&*RJ)>_`O=s!DU7R7UzoV{BMYv&>jv#tU#U zj$^G>8zQWIEP9i*i(j+zuh0`R!3OE|YN=W!gwoKh5+TELqeXt#N1ERfs!Ad2H@Syd ze4d#1r~O$X_uw}0H}oBpdOilVje!gux`snR22bFsPxk`y%Ut5}7MgNrcp!sCX3>B(@vmlt*Ono9xX z_Kd>hs~XINnPqt=TGtFvZtfA?D-f`vAkZ*IXFkxSikBa2_cpm%$wYDA9)8>v4l` z3Hf!S3DC1!195x_^k8@WbbqnSh&OzgqJY>a@V1nx75>G6H{w-)?Tjtmod2EHhy@7V zM0IBvvluS*XR!X}!I;K7b#_u*WwVO^2q{`+PYR^s)eA(UrnYUD6!*@*D|NeJPUC8- zV4@3w#^U)G2)L$5K_okAmGpDMSE~h!*mS`F52DJon_Lvj__$L!8;VD{Yr8dh`T|%a zGl@JZe#J12Z@)XB&9Own-9yv!*f01CvkbN;eXe4I6sM}2Vco7xp>P*j1;rFc=`wBh zES^^>W>B}t>KRj;{V+1M5v81Bx8MPxo34$!u=2NA3|f?yQ(UxK-@&Dd-Mw!(Mw>`M z`Lg1TA0L=H2O4KmIhjorY5$5nJm!f3n(cRwCQSe)jp%)7lWz-SD-tXVBQr4Z;H2@- zkiX2WwvtQxD-kN+OC{_ou4_Qh4^N>I>crlD&^UlMYMUlrpmeDGNE0y-)va^$MJ0&d ze2;ZW?n{jvI6I=3o)9q`zHR}OXFvkX9`XVHhJDdH#Mo&N#&W{VVy(@tx*QD$5>|d& zo%UjyvHni)Ow@7Fn7~BK3-1A8PAMblAC&NWJR_x(IUk1feqI?_2}46S_^<0vT>tb%6zjyZd7LC}ooE^_-y+N}|6l^r zE7MbsiK_+$8|jnI+fCA`xXhVF6A6nq1!`{~=|rNqa?3W6T6Pq#1+0%R%Xrcn80|0q z0XbqCf1T9EV3KjD?^lhcGHNIOt7^qY|Aw4-H}D6tEmSW8E_={9=GNdx)uO}2nl7=@ zhwS)bw?}ShmCFM?wQj1sw_tC}-2QVNf0TDm#&G{$KD@@wo&7$jTa(EZrXDx0RlA5? zq7-SK`3p4(@VzDj(eYHNmDM&N!yUMiNdla(zV~{TP5FC2j?4)~Vc+#k0y
Cc%i zM#4M%3$FDZG?A1~D`2zv4qI+lx{;&ZX*+_`6c&TD$W@{dL|?tlkS?zHss&-6{Fhg| zsbeK3{R6h|X>aWEO z=;a4xh^+cq)25q_*%*-p%;LW9-3;(Da)wDHVKev`eqy+s)vGus@#*c;Pm=8s@oay+ z&u$Oh_}%1-68Wt;OOUn9mn0Jej* zlAro*f5f~PO}nw%d#VjGZ%Xs7qOnbgQZ#QOxeusZn9X(;|E5LwJoA0#5N{CIRg3() zU>3DGymrb$eVU?jNwUB3im8stXs%LEz<%BdfFD>grHk2IVmMCXMBa6?8_<(>`Y1uS zf7+jY*8AU!ssB(nNl=NVOTGYnV3PCkqDsKF0Mk$v$yYXfO`%LOwUL2Y7`0^A0NPC` zF(T>sY$$#%zsNLh&_#^)36T&1wI@J32EOm(n#|;i>*|Qhk~QbVs%1q=@@)eb`nUwV zcIpWoFE)k!<3cCqUF{7&v+JRi>YMhla;P_F-Isyg5dpO{IC6PHG3x}4Qm z0dG}-zw?fhDTEzi>afd?GZO>3JCh?hI#nL(O{Q&iTL(gybfe;Rg8Fqr^H8>1 z*VU%Y@^q9_^x4STWP$2Kvm%~&cnyM7??k%S*=L{HiIm(|^BglPWv~MbM=o7DqcfI> z+XuJ}42QGm6_bSAPDRzYOoLS&B;>!i$lCy-YVQI6yQX`lVvO$@-+v3Mxq-_n)ZeAV zABSriTC4L@<=#pu=V7C@0E{K$JV{?5?075Kcck^c=f~gi?>6{2UFoQJf~R)%bTbtd zGFM{SE1~A9n2t~pt01O<;??BNNGGT4b+tXW4N9g*Bvm~3BQAuG*i(5^kiXz?Y4@-K zTB#PyFTRW2T@BD`j)LY(6lgk&Co=TnCL6K&`B7({?<(5=XjZ^n;-;|%jp?PHD2?h_ z5ZFJ7-SJQ97hJiYk@@I5{DmZ;d27V3H@_-j#Ik(Fwk{K1vqlP)PF}KwGKt81o%kxr zQY4t&dV0#OI`jK5+^>ztuUvxf=AuHZK|;)BNr&Wi_3~)NXQ=`{Uk9C7_Br)9X;R_Vo^~X00h?RLpa^4 zNRwopu3)+&D~g9TS;#d+yKM?#01Ad<7vApUJFLnM#lZ=2qV7gdMkWlLkV;cu(gY=u z8RU+f+R-5wOtl`P(T?Sqe)h?xf%6O#lP6`XnSXARM|yiqe!fQaU@BvH8ZD+55bU^| zpjL9>uCY4Gx;SArjER8~{bqk!d4{q0Z+M@$k=LD_CKny`(5Zz5ZNncAzWFG}A^4$5 zX~q%4&BEpEDe6Wevys#Rav76a0jf6wnfgu2-xGt~*unh{Rla;Ik(JwPFyLW4(vQNP zr4tj!!0^n~;t*JD>pV`RljU<)%;f|p$udp6=)h-n^rg;bB{c$Y80{t} zJUOa4Mh`?Dd+!@SSWB2qS5$Y2U3B5bN7RZNpZknPQSF;mP^LghHr~{?==4!>B+i*R zCvjr(iP@+xSPQl~u(aC@!1>F|b1u1>7({6<=pQJo4k%L(h>00@1+A-u8C-p9W_6>2 zbZf838L}cG&!`_3-O>q|y|W5It%h!jYSej*mDWwl%&-Z{BcnYUU!hCHsIH)8>=j*RPU1X@G~PXksK;rtyfATVNxrkpFU0Ei(SH@U1)HIEc`r=<^yryc*oV5!2qMRZm~a02w-BAv>xD!W^Rl-Q-j_B+J<$7q zQ5F<3H^-W9#$-c<`fk~7oj%x;a$(bY!fNoO`E5+k@diKh&%G7?j(jgo1f1xs*ShQ8 zaBi0EcTJv(@9KAJ{wWV9P$#y9f6tpzcoBVL>h#l7EAEE2=f_nWpO$R16fI_&+2zh( zKQ)bN5QkqhoHx?jE9nrzaCmR1A!u#N2lNtm_>TW{9iUu8)q_a5Vb3A=`Bd7Y!u*rX zX5_syl+F68O!;!A=h6E`ez#xI5@<~q%ZFHMc~pEL=YZViGOtHIBI)Y>P_LEEhLt-y zsZ=q(i6&5Dm*QN?tBd|rR3wCTj=U{SQPX`p;pb0nO`>0xaQeE%oLGH9hcx#$M~zu? z{Y?BEk461Yi*;$AC*N0DgA$N*J7@A!@BATx0vD5IyO5e|Z~GlaxYtqZCHjOTfVfq( zm^e94)B=i#Q9godvx|K5dmJO3HamQ^H*n(YCR`aJDN71}W=g*+P?e!&hhj*Oy+=jS zE$aCu`$P700tSz|{;~^^f~?=`_2;RfnDTjbA1W(Gn=y_Jl9^i+GKI)&l}lJ{arj!Z z63HCZ@*mc*0*}`b!X#Wj6UA8kBBvFDhMZW57NM2gyzgzgZD&1E1=sD?VDI`T1-JDF zld?PkxW&-RZ+8Lp4TZJ)Z#}NaNrJITXClr&_&DL-vw6y|Zq}Gcl^yeId&MWy zGApcd((X>Ac&#G4y-0^c+BB;rGryly9Er?J@HnsTzVbp$HDmTAZ}s;`kCL%gt!9y2 zb#|}ad6zUJ0V+N+DgLSWSo@z9AA`+_7LlQPylkg_XC}>FxeOW(3wlpPGHRPLm;N8P zKtb58N~^8iHx24AtQoT~{v*KBY-?Rup1mf68AAFNJr@NEGTo+s3$9vk6~!F&4REq( zuhQw|;-(>pH)YOEtv~-*Onr?5%$t>-s?0{xa<9dT~Dp{dBQt;NRF9Hqgt?21O3Q^(D zUkCceP0tlfyz4$K=zaKKe?WC%6rHRS>+0?fHg%?xU+Qj7zumhxHuOnSFEEI=W|qUP zt$zRo09w+8U>9@?@lC-tYUErVnY?jeC|(9PKN#3a7N4HwwJkV2XXy+M-hA5vlz89q zh$hn~-8}zRQn3%g;^o3z<$Pi7dV3U=AWWu9t||iKwNeY-dOyM>&K~Ai3aj|MWM5N* zbI03TZl^tB#1E!gGSk-y)#FC}qA+`uhz^5GZ>^c(rhbqFzP(QCxfguA^MWHn)$2ATz(K0=lRFm7nA$ZFLFS|tm!nPb9HfYF;hE>;c&a^;5!UW@PptP~OyDFtX?A=iXD zp5Gp-lxJr~yC9Ip!p~0aS*ACPzbfTE33c&0vUdT_!M{7EZtNi=I74uu>!|Te5^FRb z9l*buSJw#jN-Q0Js&oM2s^zNClt?T()vXFJKxqXS25d8_CW$|8QXpt9n7=sP8MH!? zz+UP(;`6*>VJZ})P!6C`u#lq7Y{|FlOc=W8NmJN&s+1}mGK;3brWHz{VlVi8An