From 6f3c7d358b777d04175835f1abc019c6b5a3a884 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Thu, 18 Jan 2024 00:27:17 +0200 Subject: [PATCH 01/30] Add GLSL shader gen files --- CMakeLists.txt | 4 ++-- include/PICA/shader_gen.hpp | 24 ++++++++++++++++++++++++ src/core/PICA/shader_gen_glsl.cpp | 30 ++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 2 deletions(-) create mode 100644 include/PICA/shader_gen.hpp create mode 100644 src/core/PICA/shader_gen_glsl.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index b4b1503a..2d5df370 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -179,7 +179,7 @@ set(SERVICE_SOURCE_FILES src/core/services/service_manager.cpp src/core/services set(PICA_SOURCE_FILES src/core/PICA/gpu.cpp src/core/PICA/regs.cpp src/core/PICA/shader_unit.cpp src/core/PICA/shader_interpreter.cpp src/core/PICA/dynapica/shader_rec.cpp src/core/PICA/dynapica/shader_rec_emitter_x64.cpp src/core/PICA/pica_hash.cpp - src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp + src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp src/core/PICA/shader_gen_glsl.cpp ) set(LOADER_SOURCE_FILES src/core/loader/elf.cpp src/core/loader/ncsd.cpp src/core/loader/ncch.cpp src/core/loader/3dsx.cpp src/core/loader/lz77.cpp) @@ -244,7 +244,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/services/news_u.hpp include/applets/software_keyboard.hpp include/applets/applet_manager.hpp include/fs/archive_user_save_data.hpp include/services/amiibo_device.hpp include/services/nfc_types.hpp include/swap.hpp include/services/csnd.hpp include/services/nwm_uds.hpp include/fs/archive_system_save_data.hpp include/lua_manager.hpp include/memory_mapped_file.hpp include/hydra_icon.hpp - include/PICA/dynapica/shader_rec_emitter_arm64.hpp + include/PICA/dynapica/shader_rec_emitter_arm64.hpp include/PICA/shader_gen.hpp ) cmrc_add_resource_library( diff --git a/include/PICA/shader_gen.hpp b/include/PICA/shader_gen.hpp new file mode 100644 index 00000000..b52cd7ab --- /dev/null +++ b/include/PICA/shader_gen.hpp @@ -0,0 +1,24 @@ +#pragma once +#include + +#include "PICA/gpu.hpp" +#include "PICA/regs.hpp" +#include "helpers.hpp" + +namespace PICA::ShaderGen { + // Graphics API this shader is targetting + enum class API { GL, GLES, Vulkan }; + + // Shading language to use (Only GLSL for the time being) + enum class Language { GLSL }; + + class FragmentGenerator { + using PICARegs = std::array; + API api; + Language language; + + public: + FragmentGenerator(API api, Language language) : api(api), language(language) {} + std::string generate(const PICARegs& regs); + }; +}; // namespace PICA::ShaderGen \ No newline at end of file diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp new file mode 100644 index 00000000..661002ac --- /dev/null +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -0,0 +1,30 @@ +#include "PICA/shader_gen.hpp" +using namespace PICA::ShaderGen; + +std::string FragmentGenerator::generate(const PICARegs& regs) { + std::string ret = ""; + + switch (api) { + case API::GL: ret += "#version 410 core"; break; + case API::GLES: ret += "#version 300 es"; break; + default: break; + } + + // Input and output attributes + ret += R"( + in vec3 v_tangent; + in vec3 v_normal; + in vec3 v_bitangent; + in vec4 v_colour; + in vec3 v_texcoord0; + in vec2 v_texcoord1; + in vec3 v_view; + in vec2 v_texcoord2; + flat in vec4 v_textureEnvColor[6]; + flat in vec4 v_textureEnvBufferColor; + + out vec4 fragColour; + )"; + + return ret; +} \ No newline at end of file From ef2467bc6029b9e79bfba5651090f7767ea51863 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Thu, 18 Jan 2024 02:59:29 +0200 Subject: [PATCH 02/30] TEV definitions for shader generator --- include/PICA/regs.hpp | 104 +++++++++++++++++++++++++++ include/PICA/shader_gen.hpp | 2 + include/renderer_gl/renderer_gl.hpp | 5 +- src/core/PICA/shader_gen_glsl.cpp | 41 +++++++++++ src/core/renderer_gl/renderer_gl.cpp | 1 + 5 files changed, 152 insertions(+), 1 deletion(-) diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index 70cecf7b..100a0573 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -344,4 +344,108 @@ namespace PICA { GeometryPrimitive = 3, }; + struct TexEnvConfig { + enum class Source : u8 { + PrimaryColor = 0x0, + PrimaryFragmentColor = 0x1, + SecondaryFragmentColor = 0x2, + Texture0 = 0x3, + Texture1 = 0x4, + Texture2 = 0x5, + Texture3 = 0x6, + // TODO: Inbetween values are unknown + PreviousBuffer = 0xD, + Constant = 0xE, + Previous = 0xF, + }; + + enum class ColorOperand : u8 { + SourceColor = 0x0, + OneMinusSourceColor = 0x1, + SourceAlpha = 0x2, + OneMinusSourceAlpha = 0x3, + SourceRed = 0x4, + OneMinusSourceRed = 0x5, + // TODO: Inbetween values are unknown + SourceGreen = 0x8, + OneMinusSourceGreen = 0x9, + // Inbetween values are unknown + SourceBlue = 0xC, + OneMinusSourceBlue = 0xD, + }; + + enum class AlphaOperand : u8 { + SourceAlpha = 0x0, + OneMinusSourceAlpha = 0x1, + SourceRed = 0x2, + OneMinusSourceRed = 0x3, + SourceGreen = 0x4, + OneMinusSourceGreen = 0x5, + SourceBlue = 0x6, + OneMinusSourceBlue = 0x7, + }; + + enum class Operation : u8 { + Replace = 0, + Modulate = 1, + Add = 2, + AddSigned = 3, + Lerp = 4, + Subtract = 5, + Dot3RGB = 6, + Dot3RGBA = 7, + MultiplyAdd = 8, + AddMultiply = 9, + }; + + // RGB sources + Source colorSource1, colorSource2, colorSource3; + // Alpha sources + Source alphaSource1, alphaSource2, alphaSource3; + + // RGB operands + ColorOperand colorOperand1, colorOperand2, colorOperand3; + // Alpha operands + AlphaOperand alphaOperand1, alphaOperand2, alphaOperand3; + + // Texture environment operations for this stage + Operation colorOp, alphaOp; + + u32 constColor; + + private: + // These are the only private members since their value doesn't actually reflect the scale + // So we make them public so we'll always use the appropriate member functions instead + u8 colorScale; + u8 alphaScale; + + public: + // Create texture environment object from TEV registers + TexEnvConfig(u32 source, u32 operand, u32 combiner, u32 color, u32 scale) : constColor(color) { + colorSource1 = Helpers::getBits<0, 4, Source>(source); + colorSource2 = Helpers::getBits<4, 4, Source>(source); + colorSource3 = Helpers::getBits<8, 4, Source>(source); + + alphaSource1 = Helpers::getBits<16, 4, Source>(source); + alphaSource2 = Helpers::getBits<20, 4, Source>(source); + alphaSource3 = Helpers::getBits<24, 4, Source>(source); + + colorOperand1 = Helpers::getBits<0, 4, ColorOperand>(operand); + colorOperand2 = Helpers::getBits<4, 4, ColorOperand>(operand); + colorOperand3 = Helpers::getBits<8, 4, ColorOperand>(operand); + + alphaOperand1 = Helpers::getBits<12, 3, AlphaOperand>(operand); + alphaOperand2 = Helpers::getBits<16, 3, AlphaOperand>(operand); + alphaOperand3 = Helpers::getBits<20, 3, AlphaOperand>(operand); + + colorOp = Helpers::getBits<0, 4, Operation>(combiner); + alphaOp = Helpers::getBits<16, 4, Operation>(combiner); + + colorScale = Helpers::getBits<0, 2>(scale); + alphaScale = Helpers::getBits<16, 2>(scale); + } + + u32 getColorScale() { return (colorScale <= 2) ? (1 << colorScale) : 1; } + u32 getAlphaScale() { return (alphaScale <= 2) ? (1 << alphaScale) : 1; } + }; } // namespace PICA diff --git a/include/PICA/shader_gen.hpp b/include/PICA/shader_gen.hpp index b52cd7ab..3fa66871 100644 --- a/include/PICA/shader_gen.hpp +++ b/include/PICA/shader_gen.hpp @@ -17,6 +17,8 @@ namespace PICA::ShaderGen { API api; Language language; + void compileTEV(std::string& shader, int stage, const PICARegs& regs); + public: FragmentGenerator(API api, Language language) : api(api), language(language) {} std::string generate(const PICARegs& regs); diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 92f02662..b662023f 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -6,6 +6,7 @@ #include "PICA/float_types.hpp" #include "PICA/pica_vertex.hpp" #include "PICA/regs.hpp" +#include "PICA/shader_gen.hpp" #include "gl_state.hpp" #include "helpers.hpp" #include "logger.hpp" @@ -60,6 +61,8 @@ class RendererGL final : public Renderer { OpenGL::Framebuffer getColourFBO(); OpenGL::Texture getTexture(Texture& tex); + PICA::ShaderGen::FragmentGenerator fragShaderGen; + MAKE_LOG_FUNCTION(log, rendererLogger) void setupBlending(); void setupStencilTest(bool stencilEnable); @@ -71,7 +74,7 @@ class RendererGL final : public Renderer { public: RendererGL(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs) - : Renderer(gpu, internalRegs, externalRegs) {} + : Renderer(gpu, internalRegs, externalRegs), fragShaderGen(PICA::ShaderGen::API::GL, PICA::ShaderGen::Language::GLSL) {} ~RendererGL() override; void reset() override; diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 661002ac..d423016d 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -1,4 +1,5 @@ #include "PICA/shader_gen.hpp" +using namespace PICA; using namespace PICA::ShaderGen; std::string FragmentGenerator::generate(const PICARegs& regs) { @@ -10,6 +11,8 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { default: break; } + bool unimplementedFlag = false; + // Input and output attributes ret += R"( in vec3 v_tangent; @@ -24,7 +27,45 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { flat in vec4 v_textureEnvBufferColor; out vec4 fragColour; + uniform sampler2D u_tex0; + uniform sampler2D u_tex1; + uniform sampler2D u_tex2; + uniform sampler1DArray u_tex_lighting_lut; + + vec4 tevSources[16]; + vec4 tevNextPreviousBuffer; + + vec3 regToColor(uint reg) { + // Normalization scale to convert from [0...255] to [0.0...1.0] + const float scale = 1.0 / 255.0; + + return scale * vec3(float(bitfieldExtract(reg, 20, 8)), float(bitfieldExtract(reg, 10, 8)), float(bitfieldExtract(reg, 00, 8))); + } )"; + // Emit main function for fragment shader + // When not initialized, source 13 is set to vec4(0.0) and 15 is set to the vertex colour + ret += R"( + void main() { + tevSources[0] = v_colour; + tevSources[13] = vec4(0.0); // Previous buffer colour + tevSources[15] = v_colour; // Previous combiner + )"; + + for (int i = 0; i < 6; i++) { + compileTEV(ret, i, regs); + } + return ret; +} + +void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICARegs& regs) { + // Base address for each TEV stage's configuration + static constexpr std::array ioBases = { + InternalRegs::TexEnv0Source, InternalRegs::TexEnv1Source, InternalRegs::TexEnv2Source, + InternalRegs::TexEnv3Source, InternalRegs::TexEnv4Source, InternalRegs::TexEnv5Source, + }; + + const u32 ioBase = ioBases[stage]; + TexEnvConfig tev(regs[ioBase], regs[ioBase + 1], regs[ioBase + 2], regs[ioBase + 3], regs[ioBase + 4]); } \ No newline at end of file diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index a11a6ffa..4828e4e6 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -388,6 +388,7 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v OpenGL::TriangleFan, OpenGL::Triangle, }; + std::cout << fragShaderGen.generate(regs); const auto primitiveTopology = primTypes[static_cast(primType)]; gl.disableScissor(); From c13c8046d47cf6e7a1a0654406ccb7129356f697 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Thu, 18 Jan 2024 03:11:41 +0200 Subject: [PATCH 03/30] Detect passthrough TEV stages --- include/PICA/regs.hpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index 100a0573..b807ae5c 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -447,5 +447,17 @@ namespace PICA { u32 getColorScale() { return (colorScale <= 2) ? (1 << colorScale) : 1; } u32 getAlphaScale() { return (alphaScale <= 2) ? (1 << alphaScale) : 1; } + + bool isPassthroughStage() { + // clang-format off + // Thank you to the Citra dev that wrote this out + return ( + colorOp == Operation::Replace && alphaOp == Operation::Replace && + colorSource1 == Source::Previous && alphaSource1 == Source::Previous && + colorOperand1 == ColorOperand::SourceColor && alphaOperand1 == AlphaOperand::SourceAlpha && + getColorScale() == 1 && getAlphaScale() == 1 + ); + // clang-format on + } }; } // namespace PICA From 45ae6bd3a8ff7b05c93a188c34fa9d61295f4116 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Fri, 2 Feb 2024 01:16:45 +0200 Subject: [PATCH 04/30] Getting TEV operations working --- include/PICA/shader_gen.hpp | 5 ++ src/core/PICA/shader_gen_glsl.cpp | 145 ++++++++++++++++++++++++++++++ 2 files changed, 150 insertions(+) diff --git a/include/PICA/shader_gen.hpp b/include/PICA/shader_gen.hpp index 3fa66871..aaef8b30 100644 --- a/include/PICA/shader_gen.hpp +++ b/include/PICA/shader_gen.hpp @@ -18,6 +18,11 @@ namespace PICA::ShaderGen { Language language; void compileTEV(std::string& shader, int stage, const PICARegs& regs); + void getSource(std::string& shader, PICA::TexEnvConfig::Source source, int index); + void getColorOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::ColorOperand color, int index); + void getAlphaOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::AlphaOperand alpha, int index); + void getColorOperation(std::string& shader, PICA::TexEnvConfig::Operation op); + void getAlphaOperation(std::string& shader, PICA::TexEnvConfig::Operation op); public: FragmentGenerator(API api, Language language) : api(api), language(language) {} diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index d423016d..0c41b8d0 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -52,10 +52,23 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { tevSources[15] = v_colour; // Previous combiner )"; + ret += R"( + vec3 colorOperand1 = vec3(0.0); + vec3 colorOperand2 = vec3(0.0); + vec3 colorOperand3 = vec3(0.0); + + float alphaOperand1 = 0.0; + float alphaOperand2 = 0.0; + float alphaOperand3 = 0.0; + )"; + for (int i = 0; i < 6; i++) { compileTEV(ret, i, regs); } + ret += "}"; // End of main function + ret += "\n\n\n\n\n\n\n\n\n\n\n\n\n"; + return ret; } @@ -68,4 +81,136 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg const u32 ioBase = ioBases[stage]; TexEnvConfig tev(regs[ioBase], regs[ioBase + 1], regs[ioBase + 2], regs[ioBase + 3], regs[ioBase + 4]); + + if (!tev.isPassthroughStage()) { + // Get color operands + shader += "colorOp1 = "; + getColorOperand(shader, tev.colorSource1, tev.colorOperand1, stage); + + shader += ";\ncolorOp2 = "; + getColorOperand(shader, tev.colorSource2, tev.colorOperand2, stage); + + shader += ";\ncolorOp3 = "; + getColorOperand(shader, tev.colorSource3, tev.colorOperand3, stage); + + shader += ";\nvec3 outputColor" + std::to_string(stage) + " = vec3(1.0)"; + shader += ";\n"; + + if (tev.colorOp == TexEnvConfig::Operation::Dot3RGBA) { + // Dot3 RGBA also writes to the alpha component so we don't need to do anything more + shader += "float outputAlpha" + std::to_string(stage) + " = colorOutput" + std::to_string(stage) + ".x;\n"; + } else { + // Get alpha operands + shader += "alphaOp1 = "; + getAlphaOperand(shader, tev.alphaSource1, tev.alphaOperand1, stage); + + shader += ";\nalphaOp2 = "; + getAlphaOperand(shader, tev.alphaSource2, tev.alphaOperand2, stage); + + shader += ";\nalphaOp3 = "; + getAlphaOperand(shader, tev.alphaSource3, tev.alphaOperand3, stage); + + shader += ";\nvec3 outputAlpha" + std::to_string(stage) + " = 1.0"; + shader += ";\n"; + } + } +} + +void FragmentGenerator::getColorOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::ColorOperand color, int index) { + using OperandType = TexEnvConfig::ColorOperand; + + // For inverting operands, add the 1.0 - x subtraction + if (color == OperandType::OneMinusSourceColor || color == OperandType::OneMinusSourceRed || color == OperandType::OneMinusSourceGreen || + color == OperandType::OneMinusSourceBlue || color == OperandType::OneMinusSourceAlpha) { + shader += "vec3(1.0, 1.0, 1.0) - "; + } + + switch (color) { + case OperandType::SourceColor: + case OperandType::OneMinusSourceColor: + getSource(shader, source, index); + shader += ".rgb"; + break; + + case OperandType::SourceRed: + case OperandType::OneMinusSourceRed: + getSource(shader, source, index); + shader += ".rrr"; + break; + + case OperandType::SourceGreen: + case OperandType::OneMinusSourceGreen: + getSource(shader, source, index); + shader += ".ggg"; + break; + + case OperandType::SourceBlue: + case OperandType::OneMinusSourceBlue: + getSource(shader, source, index); + shader += ".bbb"; + break; + + case OperandType::SourceAlpha: + case OperandType::OneMinusSourceAlpha: + getSource(shader, source, index); + shader += ".aaa"; + break; + + default: + shader += "vec3(1.0, 1.0, 1.0)"; + Helpers::warn("FragmentGenerator: Invalid TEV color operand"); + break; + } +} + +void FragmentGenerator::getAlphaOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::AlphaOperand color, int index) { + using OperandType = TexEnvConfig::AlphaOperand; + + // For inverting operands, add the 1.0 - x subtraction + if (color == OperandType::OneMinusSourceRed || color == OperandType::OneMinusSourceGreen || color == OperandType::OneMinusSourceBlue || + color == OperandType::OneMinusSourceAlpha) { + shader += "1.0 - "; + } + + switch (color) { + case OperandType::SourceRed: + case OperandType::OneMinusSourceRed: + getSource(shader, source, index); + shader += ".r"; + break; + + case OperandType::SourceGreen: + case OperandType::OneMinusSourceGreen: + getSource(shader, source, index); + shader += ".g"; + break; + + case OperandType::SourceBlue: + case OperandType::OneMinusSourceBlue: + getSource(shader, source, index); + shader += ".b"; + break; + + case OperandType::SourceAlpha: + case OperandType::OneMinusSourceAlpha: + getSource(shader, source, index); + shader += ".a"; + break; + + default: + shader += "1.0"; + Helpers::warn("FragmentGenerator: Invalid TEV color operand"); + break; + } +} + +void FragmentGenerator::getSource(std::string& shader, TexEnvConfig::Source source, int index) { + switch (source) { + case TexEnvConfig::Source::PrimaryColor: shader += "v_colour"; break; + + default: + Helpers::warn("Unimplemented TEV source: %d", static_cast(source)); + shader += "vec4(1.0, 1.0, 1.0, 1.0)"; + break; + } } \ No newline at end of file From 7e77af0de88d5a6bdbbb35b4d7372aec23ce187b Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Fri, 2 Feb 2024 01:34:33 +0200 Subject: [PATCH 05/30] GLSL Fragment Generator: Working color operations --- src/core/PICA/shader_gen_glsl.cpp | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 0c41b8d0..829aec03 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -93,7 +93,8 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg shader += ";\ncolorOp3 = "; getColorOperand(shader, tev.colorSource3, tev.colorOperand3, stage); - shader += ";\nvec3 outputColor" + std::to_string(stage) + " = vec3(1.0)"; + shader += ";\nvec3 outputColor" + std::to_string(stage) + " = "; + getColorOperation(shader, tev.colorOp); shader += ";\n"; if (tev.colorOp == TexEnvConfig::Operation::Dot3RGBA) { @@ -213,4 +214,24 @@ void FragmentGenerator::getSource(std::string& shader, TexEnvConfig::Source sour shader += "vec4(1.0, 1.0, 1.0, 1.0)"; break; } +} + +void FragmentGenerator::getColorOperation(std::string& shader, TexEnvConfig::Operation op) { + switch (op) { + case TexEnvConfig::Operation::Replace: shader += "colorOp1"; break; + case TexEnvConfig::Operation::Add: shader += "colorOp1 + colorOp2"; break; + case TexEnvConfig::Operation::AddSigned: shader += "clamp(colorOp1 + colorOp2 - 0.5, 0.0, 1.0);"; break; + case TexEnvConfig::Operation::Subtract: shader += "colorOp1 - colorOp2"; break; + case TexEnvConfig::Operation::Modulate: shader += "colorOp1 * colorOp2"; break; + case TexEnvConfig::Operation::Lerp: shader += "colorOp1 * colorOp3 + colorOp2 * (vec(1.0) - colorOp3)"; break; + + case TexEnvConfig::Operation::AddMultiply: shader += "min(colorOp1 + colorOp2, vec3(1.0)) * colorOp3"; break; + case TexEnvConfig::Operation::MultiplyAdd: shader += "colorOp1 * colorOp2 + colorOp3"; break; + case TexEnvConfig::Operation::Dot3RGB: + case TexEnvConfig::Operation::Dot3RGBA: shader += "vec3(4.0 * dot(colorOp1 - 0.5, colorOp2 - 0.5))"; break; + default: + Helpers::warn("FragmentGenerator: Unimplemented color op"); + shader += "vec3(1.0)"; + break; + } } \ No newline at end of file From 0be8c45e377a47a6db0cb530c508063b0f129f6b Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Fri, 2 Feb 2024 01:51:37 +0200 Subject: [PATCH 06/30] Fragment shader gen: Properly track TEV outputs --- src/core/PICA/shader_gen_glsl.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 829aec03..28dbc5ab 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -26,7 +26,7 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { flat in vec4 v_textureEnvColor[6]; flat in vec4 v_textureEnvBufferColor; - out vec4 fragColour; + out vec4 fragColor; uniform sampler2D u_tex0; uniform sampler2D u_tex1; uniform sampler2D u_tex2; @@ -50,6 +50,7 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { tevSources[0] = v_colour; tevSources[13] = vec4(0.0); // Previous buffer colour tevSources[15] = v_colour; // Previous combiner + vec4 combinerOutput = v_colour; // Last TEV output )"; ret += R"( @@ -66,6 +67,7 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { compileTEV(ret, i, regs); } + ret += "fragColor = combinerOutput;\n"; ret += "}"; // End of main function ret += "\n\n\n\n\n\n\n\n\n\n\n\n\n"; @@ -114,6 +116,10 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg shader += ";\nvec3 outputAlpha" + std::to_string(stage) + " = 1.0"; shader += ";\n"; } + + shader += "combinerOutput = vec4(clamp(outputColor" + std::to_string(stage) + " * " + std::to_string(tev.getColorScale()) + + ".0, vec3(0.0), vec3(1.0)), clamp(outputAlpha" + std::to_string(stage) + " * " + std::to_string(tev.getAlphaScale()) + + ".0, 0.0, 1.0));\n"; } } From 10654ce1ca94bf47f702fa36b30707c1de49434c Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 4 Feb 2024 18:54:29 +0200 Subject: [PATCH 07/30] GLSL generator: Add textures and alpha operations --- include/PICA/shader_gen.hpp | 2 ++ src/core/PICA/shader_gen_glsl.cpp | 40 +++++++++++++++++++++++++++++-- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/include/PICA/shader_gen.hpp b/include/PICA/shader_gen.hpp index aaef8b30..80c57d46 100644 --- a/include/PICA/shader_gen.hpp +++ b/include/PICA/shader_gen.hpp @@ -24,6 +24,8 @@ namespace PICA::ShaderGen { void getColorOperation(std::string& shader, PICA::TexEnvConfig::Operation op); void getAlphaOperation(std::string& shader, PICA::TexEnvConfig::Operation op); + u32 textureConfig = 0; + public: FragmentGenerator(API api, Language language) : api(api), language(language) {} std::string generate(const PICARegs& regs); diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 28dbc5ab..47cf2a7b 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -63,6 +63,7 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { float alphaOperand3 = 0.0; )"; + textureConfig = regs[InternalRegs::TexUnitCfg]; for (int i = 0; i < 6; i++) { compileTEV(ret, i, regs); } @@ -113,8 +114,10 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg shader += ";\nalphaOp3 = "; getAlphaOperand(shader, tev.alphaSource3, tev.alphaOperand3, stage); - shader += ";\nvec3 outputAlpha" + std::to_string(stage) + " = 1.0"; - shader += ";\n"; + shader += ";\nvec3 outputAlpha" + std::to_string(stage) + " = "; + getAlphaOperation(shader, tev.alphaOp); + // Clamp the alpha value to [0.0, 1.0] + shader += ";\nclamp(outputAlpha" + std::to_string(stage) + ", 0.0, 1.0);\n"; } shader += "combinerOutput = vec4(clamp(outputColor" + std::to_string(stage) + " * " + std::to_string(tev.getColorScale()) + @@ -214,6 +217,19 @@ void FragmentGenerator::getAlphaOperand(std::string& shader, TexEnvConfig::Sourc void FragmentGenerator::getSource(std::string& shader, TexEnvConfig::Source source, int index) { switch (source) { case TexEnvConfig::Source::PrimaryColor: shader += "v_colour"; break; + case TexEnvConfig::Source::Texture0: shader += "texture(u_tex0, v_texcoord0.xy)"; break; + case TexEnvConfig::Source::Texture1: shader += "texture(u_tex1, v_texcoord1)"; break; + case TexEnvConfig::Source::Texture2: { + // If bit 13 in texture config is set then we use the texcoords for texture 1, otherwise for texture 2 + if (Helpers::getBit<13>(textureConfig)) { + shader += "texture(u_tex2, v_texcoord1)"; + } else { + shader += "texture(u_tex2, v_texcoord2)"; + } + break; + } + + case TexEnvConfig::Source::Previous: shader += "combinerOutput"; break; default: Helpers::warn("Unimplemented TEV source: %d", static_cast(source)); @@ -240,4 +256,24 @@ void FragmentGenerator::getColorOperation(std::string& shader, TexEnvConfig::Ope shader += "vec3(1.0)"; break; } +} + +void FragmentGenerator::getAlphaOperation(std::string& shader, TexEnvConfig::Operation op) { + switch (op) { + case TexEnvConfig::Operation::Replace: shader += "alphaOp1"; break; + case TexEnvConfig::Operation::Add: shader += "alphaOp1 + alphaOp2"; break; + case TexEnvConfig::Operation::AddSigned: shader += "clamp(alphaOp1 + alphaOp2 - 0.5, 0.0, 1.0);"; break; + case TexEnvConfig::Operation::Subtract: shader += "alphaOp1 - alphaOp2"; break; + case TexEnvConfig::Operation::Modulate: shader += "alphaOp1 * alphaOp2"; break; + case TexEnvConfig::Operation::Lerp: shader += "alphaOp1 * alphaOp3 + alphaOp2 * (vec(1.0) - alphaOp3)"; break; + + case TexEnvConfig::Operation::AddMultiply: shader += "min(alphaOp1 + alphaOp2, vec3(1.0)) * alphaOp3"; break; + case TexEnvConfig::Operation::MultiplyAdd: shader += "alphaOp1 * alphaOp2 + alphaOp3"; break; + case TexEnvConfig::Operation::Dot3RGB: + case TexEnvConfig::Operation::Dot3RGBA: shader += "vec3(4.0 * dot(alphaOp1 - 0.5, alphaOp2 - 0.5))"; break; + default: + Helpers::warn("FragmentGenerator: Unimplemented alpha op"); + shader += "vec3(1.0)"; + break; + } } \ No newline at end of file From ddc14cea0967b231e45a378d6bb8797779177951 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Thu, 29 Feb 2024 00:56:24 +0200 Subject: [PATCH 08/30] Fix shader compilation errors --- include/PICA/shader_gen.hpp | 1 + src/core/PICA/shader_gen_glsl.cpp | 77 +++++++++++++++++++++++++--- src/core/renderer_gl/renderer_gl.cpp | 10 +++- 3 files changed, 80 insertions(+), 8 deletions(-) diff --git a/include/PICA/shader_gen.hpp b/include/PICA/shader_gen.hpp index 80c57d46..e07575a5 100644 --- a/include/PICA/shader_gen.hpp +++ b/include/PICA/shader_gen.hpp @@ -29,5 +29,6 @@ namespace PICA::ShaderGen { public: FragmentGenerator(API api, Language language) : api(api), language(language) {} std::string generate(const PICARegs& regs); + std::string getVertexShader(const PICARegs& regs); }; }; // namespace PICA::ShaderGen \ No newline at end of file diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 47cf2a7b..3a7e9b74 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -2,6 +2,69 @@ using namespace PICA; using namespace PICA::ShaderGen; +std::string FragmentGenerator::getVertexShader(const PICARegs& regs) { + std::string ret = ""; + + switch (api) { + case API::GL: ret += "#version 410 core"; break; + case API::GLES: ret += "#version 300 es"; break; + default: break; + } + + ret += R"( + layout(location = 0) in vec4 a_coords; + layout(location = 1) in vec4 a_quaternion; + layout(location = 2) in vec4 a_vertexColour; + layout(location = 3) in vec2 a_texcoord0; + layout(location = 4) in vec2 a_texcoord1; + layout(location = 5) in float a_texcoord0_w; + layout(location = 6) in vec3 a_view; + layout(location = 7) in vec2 a_texcoord2; + + out vec3 v_normal; + out vec3 v_tangent; + out vec3 v_bitangent; + out vec4 v_colour; + out vec3 v_texcoord0; + out vec2 v_texcoord1; + out vec3 v_view; + out vec2 v_texcoord2; + flat out vec4 v_textureEnvColor[6]; + flat out vec4 v_textureEnvBufferColor; + + //out float gl_ClipDistance[2]; + + vec4 abgr8888ToVec4(uint abgr) { + const float scale = 1.0 / 255.0; + return scale * vec4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24)); + } + + vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) { + vec3 u = q.xyz; + float s = q.w; + return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v); + } + + void main() { + gl_Position = a_coords; + vec4 colourAbs = abs(a_vertexColour); + v_colour = min(colourAbs, vec4(1.f)); + + // Flip y axis of UVs because OpenGL uses an inverted y for texture sampling compared to the PICA + v_texcoord0 = vec3(a_texcoord0.x, 1.0 - a_texcoord0.y, a_texcoord0_w); + v_texcoord1 = vec2(a_texcoord1.x, 1.0 - a_texcoord1.y); + v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y); + v_view = a_view; + + v_normal = normalize(rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), a_quaternion)); + v_tangent = normalize(rotateVec3ByQuaternion(vec3(1.0, 0.0, 0.0), a_quaternion)); + v_bitangent = normalize(rotateVec3ByQuaternion(vec3(0.0, 1.0, 0.0), a_quaternion)); + } +)"; + + return ret; +} + std::string FragmentGenerator::generate(const PICARegs& regs) { std::string ret = ""; @@ -54,13 +117,13 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { )"; ret += R"( - vec3 colorOperand1 = vec3(0.0); - vec3 colorOperand2 = vec3(0.0); - vec3 colorOperand3 = vec3(0.0); + vec3 colorOp1 = vec3(0.0); + vec3 colorOp2 = vec3(0.0); + vec3 colorOp3 = vec3(0.0); - float alphaOperand1 = 0.0; - float alphaOperand2 = 0.0; - float alphaOperand3 = 0.0; + float alphaOp1 = 0.0; + float alphaOp2 = 0.0; + float alphaOp3 = 0.0; )"; textureConfig = regs[InternalRegs::TexUnitCfg]; @@ -114,7 +177,7 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg shader += ";\nalphaOp3 = "; getAlphaOperand(shader, tev.alphaSource3, tev.alphaOperand3, stage); - shader += ";\nvec3 outputAlpha" + std::to_string(stage) + " = "; + shader += ";\nfloat outputAlpha" + std::to_string(stage) + " = "; getAlphaOperation(shader, tev.alphaOp); // Clamp the alpha value to [0.0, 1.0] shader += ";\nclamp(outputAlpha" + std::to_string(stage) + ", 0.0, 1.0);\n"; diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 4828e4e6..95175130 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -388,7 +388,15 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v OpenGL::TriangleFan, OpenGL::Triangle, }; - std::cout << fragShaderGen.generate(regs); + + std::string vs = fragShaderGen.getVertexShader(regs); + std::string fs = fragShaderGen.generate(regs); + std::cout << fs << "\n\n\n"; + + OpenGL::Program program; + OpenGL::Shader vertShader({vs.c_str(), vs.size()}, OpenGL::Vertex); + OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment); + program.create({vertShader, fragShader}); const auto primitiveTopology = primTypes[static_cast(primType)]; gl.disableScissor(); From fdfb012aa149487c46732d6642ab417460a56d4b Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Thu, 29 Feb 2024 01:28:00 +0200 Subject: [PATCH 09/30] GL: Add RendererGL::getSpecializedShader --- include/renderer_gl/renderer_gl.hpp | 1 + src/core/renderer_gl/renderer_gl.cpp | 25 +++++++++++++++++++++---- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index b662023f..7bc1087a 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -60,6 +60,7 @@ class RendererGL final : public Renderer { OpenGL::Framebuffer getColourFBO(); OpenGL::Texture getTexture(Texture& tex); + OpenGL::Program getSpecializedShader(); PICA::ShaderGen::FragmentGenerator fragShaderGen; diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 95175130..0bb592cf 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -393,10 +393,7 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v std::string fs = fragShaderGen.generate(regs); std::cout << fs << "\n\n\n"; - OpenGL::Program program; - OpenGL::Shader vertShader({vs.c_str(), vs.size()}, OpenGL::Vertex); - OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment); - program.create({vertShader, fragShader}); + OpenGL::Program program = getSpecializedShader(); const auto primitiveTopology = primTypes[static_cast(primType)]; gl.disableScissor(); @@ -787,6 +784,26 @@ std::optional RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt return colourBufferCache.add(sampleBuffer); } +OpenGL::Program RendererGL::getSpecializedShader() { + OpenGL::Program program; + + std::string vs = fragShaderGen.getVertexShader(regs); + std::string fs = fragShaderGen.generate(regs); + + OpenGL::Shader vertShader({vs.c_str(), vs.size()}, OpenGL::Vertex); + OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment); + program.create({vertShader, fragShader}); + program.use(); + + // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 + glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0); + glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1); + glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2); + glUniform1i(OpenGL::uniformLocation(program, "u_tex_lighting_lut"), 3); + + return program; +} + void RendererGL::screenshot(const std::string& name) { constexpr uint width = 400; constexpr uint height = 2 * 240; From 67fe3214fe00ddff37dd0a133c0fd1c38859987f Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sat, 2 Mar 2024 20:41:23 +0200 Subject: [PATCH 10/30] Add shader cache --- include/renderer_gl/renderer_gl.hpp | 64 ++++++++++++++----- src/core/PICA/shader_gen_glsl.cpp | 8 +-- src/core/renderer_gl/renderer_gl.cpp | 92 ++++++++++++++++------------ 3 files changed, 106 insertions(+), 58 deletions(-) diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 7bc1087a..e8eaeacb 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -1,9 +1,13 @@ #pragma once #include +#include +#include #include +#include #include "PICA/float_types.hpp" +#include "PICA/pica_hash.hpp" #include "PICA/pica_vertex.hpp" #include "PICA/regs.hpp" #include "PICA/shader_gen.hpp" @@ -17,6 +21,32 @@ // More circular dependencies! class GPU; +namespace PICA { + struct FragmentConfig { + u32 texUnitConfig; + u32 texEnvUpdateBuffer; + + // TODO: This should probably be a uniform + u32 texEnvBufferColor; + + // There's 6 TEV stages, and each one is configured via 5 word-sized registers + std::array tevConfigs; + + // Hash function and equality operator required by std::unordered_map + bool operator==(const FragmentConfig& config) const { + return std::memcmp(this, &config, sizeof(FragmentConfig)) == 0; + } + }; +} // namespace PICA + +// Override std::hash for our fragment config class +template <> +struct std::hash { + std::size_t operator()(const PICA::FragmentConfig& config) const noexcept { + return PICAHash::computeHash((const char*)&config, sizeof(config)); + } +}; + class RendererGL final : public Renderer { GLStateManager gl = {}; @@ -26,20 +56,23 @@ class RendererGL final : public Renderer { OpenGL::VertexArray vao; OpenGL::VertexBuffer vbo; - // TEV configuration uniform locations - GLint textureEnvSourceLoc = -1; - GLint textureEnvOperandLoc = -1; - GLint textureEnvCombinerLoc = -1; - GLint textureEnvColorLoc = -1; - GLint textureEnvScaleLoc = -1; + // Data + struct { + // TEV configuration uniform locations + GLint textureEnvSourceLoc = -1; + GLint textureEnvOperandLoc = -1; + GLint textureEnvCombinerLoc = -1; + GLint textureEnvColorLoc = -1; + GLint textureEnvScaleLoc = -1; - // Uniform of PICA registers - GLint picaRegLoc = -1; + // Uniform of PICA registers + GLint picaRegLoc = -1; - // Depth configuration uniform locations - GLint depthOffsetLoc = -1; - GLint depthScaleLoc = -1; - GLint depthmapEnableLoc = -1; + // Depth configuration uniform locations + GLint depthOffsetLoc = -1; + GLint depthScaleLoc = -1; + GLint depthmapEnableLoc = -1; + } ubershaderData; float oldDepthScale = -1.0; float oldDepthOffset = 0.0; @@ -48,6 +81,7 @@ class RendererGL final : public Renderer { SurfaceCache depthBufferCache; SurfaceCache colourBufferCache; SurfaceCache textureCache; + bool usingUbershader = false; // Dummy VAO/VBO for blitting the final output OpenGL::VertexArray dummyVAO; @@ -58,9 +92,11 @@ class RendererGL final : public Renderer { OpenGL::Framebuffer screenFramebuffer; OpenGL::Texture blankTexture; + std::unordered_map shaderCache; + OpenGL::Framebuffer getColourFBO(); OpenGL::Texture getTexture(Texture& tex); - OpenGL::Program getSpecializedShader(); + OpenGL::Program& getSpecializedShader(); PICA::ShaderGen::FragmentGenerator fragShaderGen; @@ -99,4 +135,4 @@ class RendererGL final : public Renderer { // Take a screenshot of the screen and store it in a file void screenshot(const std::string& name) override; -}; +}; \ No newline at end of file diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 3a7e9b74..1bcae30c 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -308,7 +308,7 @@ void FragmentGenerator::getColorOperation(std::string& shader, TexEnvConfig::Ope case TexEnvConfig::Operation::AddSigned: shader += "clamp(colorOp1 + colorOp2 - 0.5, 0.0, 1.0);"; break; case TexEnvConfig::Operation::Subtract: shader += "colorOp1 - colorOp2"; break; case TexEnvConfig::Operation::Modulate: shader += "colorOp1 * colorOp2"; break; - case TexEnvConfig::Operation::Lerp: shader += "colorOp1 * colorOp3 + colorOp2 * (vec(1.0) - colorOp3)"; break; + case TexEnvConfig::Operation::Lerp: shader += "colorOp1 * colorOp3 + colorOp2 * (vec3(1.0) - colorOp3)"; break; case TexEnvConfig::Operation::AddMultiply: shader += "min(colorOp1 + colorOp2, vec3(1.0)) * colorOp3"; break; case TexEnvConfig::Operation::MultiplyAdd: shader += "colorOp1 * colorOp2 + colorOp3"; break; @@ -328,15 +328,15 @@ void FragmentGenerator::getAlphaOperation(std::string& shader, TexEnvConfig::Ope case TexEnvConfig::Operation::AddSigned: shader += "clamp(alphaOp1 + alphaOp2 - 0.5, 0.0, 1.0);"; break; case TexEnvConfig::Operation::Subtract: shader += "alphaOp1 - alphaOp2"; break; case TexEnvConfig::Operation::Modulate: shader += "alphaOp1 * alphaOp2"; break; - case TexEnvConfig::Operation::Lerp: shader += "alphaOp1 * alphaOp3 + alphaOp2 * (vec(1.0) - alphaOp3)"; break; + case TexEnvConfig::Operation::Lerp: shader += "alphaOp1 * alphaOp3 + alphaOp2 * (1.0 - alphaOp3)"; break; - case TexEnvConfig::Operation::AddMultiply: shader += "min(alphaOp1 + alphaOp2, vec3(1.0)) * alphaOp3"; break; + case TexEnvConfig::Operation::AddMultiply: shader += "min(alphaOp1 + alphaOp2, 1.0) * alphaOp3"; break; case TexEnvConfig::Operation::MultiplyAdd: shader += "alphaOp1 * alphaOp2 + alphaOp3"; break; case TexEnvConfig::Operation::Dot3RGB: case TexEnvConfig::Operation::Dot3RGBA: shader += "vec3(4.0 * dot(alphaOp1 - 0.5, alphaOp2 - 0.5))"; break; default: Helpers::warn("FragmentGenerator: Unimplemented alpha op"); - shader += "vec3(1.0)"; + shader += "1.0"; break; } } \ No newline at end of file diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 0bb592cf..a0e09bba 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -38,9 +38,9 @@ void RendererGL::reset() { oldDepthOffset = 0.0; // Default depth offset to 0 oldDepthmapEnable = false; // Enable w buffering - glUniform1f(depthScaleLoc, oldDepthScale); - glUniform1f(depthOffsetLoc, oldDepthOffset); - glUniform1i(depthmapEnableLoc, oldDepthmapEnable); + glUniform1f(ubershaderData.depthScaleLoc, oldDepthScale); + glUniform1f(ubershaderData.depthOffsetLoc, oldDepthOffset); + glUniform1i(ubershaderData.depthmapEnableLoc, oldDepthmapEnable); gl.useProgram(oldProgram); // Switch to old GL program } @@ -59,16 +59,16 @@ void RendererGL::initGraphicsContextInternal() { triangleProgram.create({vert, frag}); gl.useProgram(triangleProgram); - textureEnvSourceLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvSource"); - textureEnvOperandLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvOperand"); - textureEnvCombinerLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvCombiner"); - textureEnvColorLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvColor"); - textureEnvScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvScale"); + ubershaderData.textureEnvSourceLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvSource"); + ubershaderData.textureEnvOperandLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvOperand"); + ubershaderData.textureEnvCombinerLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvCombiner"); + ubershaderData.textureEnvColorLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvColor"); + ubershaderData.textureEnvScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvScale"); - depthScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_depthScale"); - depthOffsetLoc = OpenGL::uniformLocation(triangleProgram, "u_depthOffset"); - depthmapEnableLoc = OpenGL::uniformLocation(triangleProgram, "u_depthmapEnable"); - picaRegLoc = OpenGL::uniformLocation(triangleProgram, "u_picaRegs"); + ubershaderData.depthScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_depthScale"); + ubershaderData.depthOffsetLoc = OpenGL::uniformLocation(triangleProgram, "u_depthOffset"); + ubershaderData.depthmapEnableLoc = OpenGL::uniformLocation(triangleProgram, "u_depthmapEnable"); + ubershaderData.picaRegLoc = OpenGL::uniformLocation(triangleProgram, "u_picaRegs"); // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex0"), 0); @@ -289,7 +289,6 @@ void RendererGL::setupStencilTest(bool stencilEnable) { glStencilOp(stencilOps[stencilFailOp], stencilOps[depthFailOp], stencilOps[passOp]); } - void RendererGL::setupTextureEnvState() { // TODO: Only update uniforms when the TEV config changed. Use an UBO potentially. @@ -314,11 +313,11 @@ void RendererGL::setupTextureEnvState() { textureEnvScaleRegs[i] = regs[ioBase + 4]; } - glUniform1uiv(textureEnvSourceLoc, 6, textureEnvSourceRegs); - glUniform1uiv(textureEnvOperandLoc, 6, textureEnvOperandRegs); - glUniform1uiv(textureEnvCombinerLoc, 6, textureEnvCombinerRegs); - glUniform1uiv(textureEnvColorLoc, 6, textureEnvColourRegs); - glUniform1uiv(textureEnvScaleLoc, 6, textureEnvScaleRegs); + glUniform1uiv(ubershaderData.textureEnvSourceLoc, 6, textureEnvSourceRegs); + glUniform1uiv(ubershaderData.textureEnvOperandLoc, 6, textureEnvOperandRegs); + glUniform1uiv(ubershaderData.textureEnvCombinerLoc, 6, textureEnvCombinerRegs); + glUniform1uiv(ubershaderData.textureEnvColorLoc, 6, textureEnvColourRegs); + glUniform1uiv(ubershaderData.textureEnvScaleLoc, 6, textureEnvScaleRegs); } void RendererGL::bindTexturesToSlots() { @@ -389,11 +388,7 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v OpenGL::Triangle, }; - std::string vs = fragShaderGen.getVertexShader(regs); - std::string fs = fragShaderGen.generate(regs); - std::cout << fs << "\n\n\n"; - - OpenGL::Program program = getSpecializedShader(); + OpenGL::Program& program = getSpecializedShader(); const auto primitiveTopology = primTypes[static_cast(primType)]; gl.disableScissor(); @@ -427,17 +422,17 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v // Update depth uniforms if (oldDepthScale != depthScale) { oldDepthScale = depthScale; - glUniform1f(depthScaleLoc, depthScale); + glUniform1f(ubershaderData.depthScaleLoc, depthScale); } if (oldDepthOffset != depthOffset) { oldDepthOffset = depthOffset; - glUniform1f(depthOffsetLoc, depthOffset); + glUniform1f(ubershaderData.depthOffsetLoc, depthOffset); } if (oldDepthmapEnable != depthMapEnable) { oldDepthmapEnable = depthMapEnable; - glUniform1i(depthmapEnableLoc, depthMapEnable); + glUniform1i(ubershaderData.depthmapEnableLoc, depthMapEnable); } setupTextureEnvState(); @@ -445,7 +440,7 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v // Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48) // The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates - glUniform1uiv(picaRegLoc, 0x200 - 0x48, ®s[0x48]); + glUniform1uiv(ubershaderData.picaRegLoc, 0x200 - 0x48, ®s[0x48]); if (gpu.lightingLUTDirty) { updateLightingLUT(); @@ -784,22 +779,39 @@ std::optional RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt return colourBufferCache.add(sampleBuffer); } -OpenGL::Program RendererGL::getSpecializedShader() { - OpenGL::Program program; +OpenGL::Program& RendererGL::getSpecializedShader() { + PICA::FragmentConfig fsConfig; + fsConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg]; + fsConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer]; + fsConfig.texEnvBufferColor = regs[InternalRegs::TexEnvBufferColor]; - std::string vs = fragShaderGen.getVertexShader(regs); - std::string fs = fragShaderGen.generate(regs); + // Set up TEV stages + std::memcpy(&fsConfig.tevConfigs[0 * 5], ®s[InternalRegs::TexEnv0Source], 5 * sizeof(u32)); + std::memcpy(&fsConfig.tevConfigs[1 * 5], ®s[InternalRegs::TexEnv1Source], 5 * sizeof(u32)); + std::memcpy(&fsConfig.tevConfigs[2 * 5], ®s[InternalRegs::TexEnv2Source], 5 * sizeof(u32)); + std::memcpy(&fsConfig.tevConfigs[3 * 5], ®s[InternalRegs::TexEnv3Source], 5 * sizeof(u32)); + std::memcpy(&fsConfig.tevConfigs[4 * 5], ®s[InternalRegs::TexEnv4Source], 5 * sizeof(u32)); + std::memcpy(&fsConfig.tevConfigs[5 * 5], ®s[InternalRegs::TexEnv5Source], 5 * sizeof(u32)); - OpenGL::Shader vertShader({vs.c_str(), vs.size()}, OpenGL::Vertex); - OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment); - program.create({vertShader, fragShader}); - program.use(); + OpenGL::Program& program = shaderCache[fsConfig]; + if (!program.exists()) { + printf("Creating specialized shader\n"); - // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 - glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0); - glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1); - glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2); - glUniform1i(OpenGL::uniformLocation(program, "u_tex_lighting_lut"), 3); + std::string vs = fragShaderGen.getVertexShader(regs); + std::string fs = fragShaderGen.generate(regs); + std::cout << vs << "\n\n" << fs << "\n"; + + OpenGL::Shader vertShader({vs.c_str(), vs.size()}, OpenGL::Vertex); + OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment); + program.create({vertShader, fragShader}); + program.use(); + + // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 + glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0); + glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1); + glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2); + glUniform1i(OpenGL::uniformLocation(program, "u_tex_lighting_lut"), 3); + } return program; } From fc83d518e23796e8008cef7bbfa047dd14608ff1 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sat, 2 Mar 2024 22:35:56 +0200 Subject: [PATCH 11/30] Hook up specialized shaders to GL renderer --- src/core/renderer_gl/renderer_gl.cpp | 43 +++++++++++++++++----------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index a0e09bba..4aa65586 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -291,6 +291,9 @@ void RendererGL::setupStencilTest(bool stencilEnable) { void RendererGL::setupTextureEnvState() { // TODO: Only update uniforms when the TEV config changed. Use an UBO potentially. + if (!usingUbershader) { + return; + } static constexpr std::array ioBases = { PICA::InternalRegs::TexEnv0Source, PICA::InternalRegs::TexEnv1Source, PICA::InternalRegs::TexEnv2Source, @@ -388,13 +391,17 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v OpenGL::Triangle, }; - OpenGL::Program& program = getSpecializedShader(); + if (usingUbershader) { + gl.useProgram(triangleProgram); + } else { + OpenGL::Program& program = getSpecializedShader(); + gl.useProgram(program); + } const auto primitiveTopology = primTypes[static_cast(primType)]; gl.disableScissor(); gl.bindVBO(vbo); gl.bindVAO(vao); - gl.useProgram(triangleProgram); gl.enableClipPlane(0); // Clipping plane 0 is always enabled if (regs[PICA::InternalRegs::ClipEnable] & 1) { @@ -420,27 +427,31 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v const bool depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1; // Update depth uniforms - if (oldDepthScale != depthScale) { - oldDepthScale = depthScale; - glUniform1f(ubershaderData.depthScaleLoc, depthScale); - } + if (usingUbershader) { + if (oldDepthScale != depthScale) { + oldDepthScale = depthScale; + glUniform1f(ubershaderData.depthScaleLoc, depthScale); + } - if (oldDepthOffset != depthOffset) { - oldDepthOffset = depthOffset; - glUniform1f(ubershaderData.depthOffsetLoc, depthOffset); - } + if (oldDepthOffset != depthOffset) { + oldDepthOffset = depthOffset; + glUniform1f(ubershaderData.depthOffsetLoc, depthOffset); + } - if (oldDepthmapEnable != depthMapEnable) { - oldDepthmapEnable = depthMapEnable; - glUniform1i(ubershaderData.depthmapEnableLoc, depthMapEnable); + if (oldDepthmapEnable != depthMapEnable) { + oldDepthmapEnable = depthMapEnable; + glUniform1i(ubershaderData.depthmapEnableLoc, depthMapEnable); + } } setupTextureEnvState(); bindTexturesToSlots(); - // Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48) - // The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates - glUniform1uiv(ubershaderData.picaRegLoc, 0x200 - 0x48, ®s[0x48]); + if (usingUbershader) { + // Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48) + // The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates + glUniform1uiv(ubershaderData.picaRegLoc, 0x200 - 0x48, ®s[0x48]); + } if (gpu.lightingLUTDirty) { updateLightingLUT(); From e5c09a092d76fb7339bd4202ba67630e18c0d775 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sat, 2 Mar 2024 23:29:22 +0200 Subject: [PATCH 12/30] Fix specialized shaders on Android --- include/PICA/shader_gen.hpp | 5 +++++ src/core/renderer_gl/renderer_gl.cpp | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/include/PICA/shader_gen.hpp b/include/PICA/shader_gen.hpp index e07575a5..23a87120 100644 --- a/include/PICA/shader_gen.hpp +++ b/include/PICA/shader_gen.hpp @@ -30,5 +30,10 @@ namespace PICA::ShaderGen { FragmentGenerator(API api, Language language) : api(api), language(language) {} std::string generate(const PICARegs& regs); std::string getVertexShader(const PICARegs& regs); + + void setTarget(API api, Language language) { + this->api = api; + this->language = language; + } }; }; // namespace PICA::ShaderGen \ No newline at end of file diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 4aa65586..8119cff5 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -44,6 +44,10 @@ void RendererGL::reset() { gl.useProgram(oldProgram); // Switch to old GL program } + +#ifdef __ANDROID__ + fragShaderGen.setTarget(PICA::ShaderGen::API::GLES, PICA::ShaderGen::Language::GLSL); +#endif } void RendererGL::initGraphicsContextInternal() { From 4b07ebed863fa1f23d060f8cbb3b8bdc41683854 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 3 Mar 2024 01:51:45 +0200 Subject: [PATCH 13/30] Fix shader cache bypassing GL state manager --- src/core/renderer_gl/renderer_gl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 8119cff5..5d3ed1b1 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -819,7 +819,7 @@ OpenGL::Program& RendererGL::getSpecializedShader() { OpenGL::Shader vertShader({vs.c_str(), vs.size()}, OpenGL::Vertex); OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment); program.create({vertShader, fragShader}); - program.use(); + gl.useProgram(program); // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0); From 5ba773a393a599897af821d8b1ef1fb4cfa85318 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 3 Mar 2024 02:43:41 +0200 Subject: [PATCH 14/30] Add GLES detection to fragment shader recompiler --- src/core/PICA/shader_gen_glsl.cpp | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 1bcae30c..c3056815 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -11,6 +11,15 @@ std::string FragmentGenerator::getVertexShader(const PICARegs& regs) { default: break; } + if (api == API::GLES) { + ret += R"( + #define USING_GLES 1 + + precision mediump int; + precision mediump float; + )"; + } + ret += R"( layout(location = 0) in vec4 a_coords; layout(location = 1) in vec4 a_quaternion; @@ -75,6 +84,14 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { } bool unimplementedFlag = false; + if (api == API::GLES) { + ret += R"( + #define USING_GLES 1 + + precision mediump int; + precision mediump float; + )"; + } // Input and output attributes ret += R"( @@ -93,17 +110,13 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { uniform sampler2D u_tex0; uniform sampler2D u_tex1; uniform sampler2D u_tex2; + // GLES doesn't support sampler1DArray, as such we'll have to change how we handle lighting later +#ifndef USING_GLES uniform sampler1DArray u_tex_lighting_lut; +#endif vec4 tevSources[16]; vec4 tevNextPreviousBuffer; - - vec3 regToColor(uint reg) { - // Normalization scale to convert from [0...255] to [0.0...1.0] - const float scale = 1.0 / 255.0; - - return scale * vec3(float(bitfieldExtract(reg, 20, 8)), float(bitfieldExtract(reg, 10, 8)), float(bitfieldExtract(reg, 00, 8))); - } )"; // Emit main function for fragment shader @@ -339,4 +352,4 @@ void FragmentGenerator::getAlphaOperation(std::string& shader, TexEnvConfig::Ope shader += "1.0"; break; } -} \ No newline at end of file +} From 842943fa4cb674bc2b5a652d419f8e4acd889e90 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 13 May 2024 00:51:40 +0300 Subject: [PATCH 15/30] GLSL shader gen: Add alpha test (...half of it I guess) --- CMakeLists.txt | 2 +- include/PICA/pica_frag_config.hpp | 53 ++++++++++++++++++++++++++++ include/PICA/regs.hpp | 11 ++++++ include/PICA/shader_gen.hpp | 2 ++ include/renderer_gl/renderer_gl.hpp | 27 +------------- src/core/PICA/shader_gen_glsl.cpp | 24 +++++++++++++ src/core/renderer_gl/renderer_gl.cpp | 27 ++++++++------ 7 files changed, 108 insertions(+), 38 deletions(-) create mode 100644 include/PICA/pica_frag_config.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 0bdb8abb..c6b12188 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -242,7 +242,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/PICA/dynapica/shader_rec_emitter_arm64.hpp include/scheduler.hpp include/applets/error_applet.hpp include/PICA/shader_gen.hpp include/audio/dsp_core.hpp include/audio/null_core.hpp include/audio/teakra_core.hpp include/audio/miniaudio_device.hpp include/ring_buffer.hpp include/bitfield.hpp include/audio/dsp_shared_mem.hpp - include/audio/hle_core.hpp include/capstone.hpp include/audio/aac.hpp + include/audio/hle_core.hpp include/capstone.hpp include/audio/aac.hpp include/PICA/pica_frag_config.hpp ) cmrc_add_resource_library( diff --git a/include/PICA/pica_frag_config.hpp b/include/PICA/pica_frag_config.hpp new file mode 100644 index 00000000..c4d46b11 --- /dev/null +++ b/include/PICA/pica_frag_config.hpp @@ -0,0 +1,53 @@ +#pragma once +#include +#include +#include +#include + +#include "PICA/pica_hash.hpp" +#include "PICA/regs.hpp" +#include "bitfield.hpp" +#include "helpers.hpp" + +namespace PICA { + struct OutputConfig { + union { + u32 raw; + // Merge the enable + compare function into 1 field to avoid duplicate shaders + // enable == off means a CompareFunction of Always + BitField<0, 3, CompareFunction> alphaTestFunction; + }; + }; + + struct TextureConfig { + u32 texUnitConfig; + u32 texEnvUpdateBuffer; + + // TODO: This should probably be a uniform + u32 texEnvBufferColor; + + // There's 6 TEV stages, and each one is configured via 5 word-sized registers + std::array tevConfigs; + }; + + struct FragmentConfig { + OutputConfig outConfig; + TextureConfig texConfig; + + bool operator==(const FragmentConfig& config) const { + // Hash function and equality operator required by std::unordered_map + return std::memcmp(this, &config, sizeof(FragmentConfig)) == 0; + } + }; + + static_assert( + std::has_unique_object_representations() && std::has_unique_object_representations() && + std::has_unique_object_representations() + ); +} // namespace PICA + +// Override std::hash for our fragment config class +template <> +struct std::hash { + std::size_t operator()(const PICA::FragmentConfig& config) const noexcept { return PICAHash::computeHash((const char*)&config, sizeof(config)); } +}; \ No newline at end of file diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index 5b9e1830..74f8c7d5 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -345,6 +345,17 @@ namespace PICA { GeometryPrimitive = 3, }; + enum class CompareFunction : u32 { + Never = 0, + Always = 1, + Equal = 2, + NotEqual = 3, + Less = 4, + LessOrEqual = 5, + Greater = 6, + GreaterOrEqual = 7, + }; + struct TexEnvConfig { enum class Source : u8 { PrimaryColor = 0x0, diff --git a/include/PICA/shader_gen.hpp b/include/PICA/shader_gen.hpp index 23a87120..e8e8ca20 100644 --- a/include/PICA/shader_gen.hpp +++ b/include/PICA/shader_gen.hpp @@ -24,6 +24,8 @@ namespace PICA::ShaderGen { void getColorOperation(std::string& shader, PICA::TexEnvConfig::Operation op); void getAlphaOperation(std::string& shader, PICA::TexEnvConfig::Operation op); + void applyAlphaTest(std::string& shader, const PICARegs& regs); + u32 textureConfig = 0; public: diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index e8eaeacb..53ca9975 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -7,6 +7,7 @@ #include #include "PICA/float_types.hpp" +#include "PICA/pica_frag_config.hpp" #include "PICA/pica_hash.hpp" #include "PICA/pica_vertex.hpp" #include "PICA/regs.hpp" @@ -21,32 +22,6 @@ // More circular dependencies! class GPU; -namespace PICA { - struct FragmentConfig { - u32 texUnitConfig; - u32 texEnvUpdateBuffer; - - // TODO: This should probably be a uniform - u32 texEnvBufferColor; - - // There's 6 TEV stages, and each one is configured via 5 word-sized registers - std::array tevConfigs; - - // Hash function and equality operator required by std::unordered_map - bool operator==(const FragmentConfig& config) const { - return std::memcmp(this, &config, sizeof(FragmentConfig)) == 0; - } - }; -} // namespace PICA - -// Override std::hash for our fragment config class -template <> -struct std::hash { - std::size_t operator()(const PICA::FragmentConfig& config) const noexcept { - return PICAHash::computeHash((const char*)&config, sizeof(config)); - } -}; - class RendererGL final : public Renderer { GLStateManager gl = {}; diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index c3056815..50be94f0 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -144,6 +144,8 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { compileTEV(ret, i, regs); } + applyAlphaTest(ret, regs); + ret += "fragColor = combinerOutput;\n"; ret += "}"; // End of main function ret += "\n\n\n\n\n\n\n\n\n\n\n\n\n"; @@ -353,3 +355,25 @@ void FragmentGenerator::getAlphaOperation(std::string& shader, TexEnvConfig::Ope break; } } + +void FragmentGenerator::applyAlphaTest(std::string& shader, const PICARegs& regs) { + const u32 alphaConfig = regs[InternalRegs::AlphaTestConfig]; + // Alpha test disabled + if (Helpers::getBit<0>(alphaConfig) == 0) { + return; + } + + const auto function = static_cast(Helpers::getBits<4, 3>(alphaConfig)); + + shader += "if ("; + switch (function) { + case CompareFunction::Never: shader += "true"; break; + case CompareFunction::Always: shader += "false"; break; + default: + Helpers::warn("Unimplemented alpha test function"); + shader += "false"; + break; + } + + shader += ") { discard; }\n"; +} diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 5d3ed1b1..cfd197f8 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -796,22 +796,27 @@ std::optional RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt OpenGL::Program& RendererGL::getSpecializedShader() { PICA::FragmentConfig fsConfig; - fsConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg]; - fsConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer]; - fsConfig.texEnvBufferColor = regs[InternalRegs::TexEnvBufferColor]; + auto& outConfig = fsConfig.outConfig; + auto& texConfig = fsConfig.texConfig; + + auto alphaTestConfig = regs[InternalRegs::AlphaTestConfig]; + auto alphaTestFunction = Helpers::getBits<4, 3>(alphaTestConfig); + outConfig.alphaTestFunction = (alphaTestConfig & 1) ? static_cast(alphaTestFunction) : PICA::CompareFunction::Always; + + texConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg]; + texConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer]; + texConfig.texEnvBufferColor = 0; // Set up TEV stages - std::memcpy(&fsConfig.tevConfigs[0 * 5], ®s[InternalRegs::TexEnv0Source], 5 * sizeof(u32)); - std::memcpy(&fsConfig.tevConfigs[1 * 5], ®s[InternalRegs::TexEnv1Source], 5 * sizeof(u32)); - std::memcpy(&fsConfig.tevConfigs[2 * 5], ®s[InternalRegs::TexEnv2Source], 5 * sizeof(u32)); - std::memcpy(&fsConfig.tevConfigs[3 * 5], ®s[InternalRegs::TexEnv3Source], 5 * sizeof(u32)); - std::memcpy(&fsConfig.tevConfigs[4 * 5], ®s[InternalRegs::TexEnv4Source], 5 * sizeof(u32)); - std::memcpy(&fsConfig.tevConfigs[5 * 5], ®s[InternalRegs::TexEnv5Source], 5 * sizeof(u32)); + std::memcpy(&texConfig.tevConfigs[0 * 5], ®s[InternalRegs::TexEnv0Source], 5 * sizeof(u32)); + std::memcpy(&texConfig.tevConfigs[1 * 5], ®s[InternalRegs::TexEnv1Source], 5 * sizeof(u32)); + std::memcpy(&texConfig.tevConfigs[2 * 5], ®s[InternalRegs::TexEnv2Source], 5 * sizeof(u32)); + std::memcpy(&texConfig.tevConfigs[3 * 5], ®s[InternalRegs::TexEnv3Source], 5 * sizeof(u32)); + std::memcpy(&texConfig.tevConfigs[4 * 5], ®s[InternalRegs::TexEnv4Source], 5 * sizeof(u32)); + std::memcpy(&texConfig.tevConfigs[5 * 5], ®s[InternalRegs::TexEnv5Source], 5 * sizeof(u32)); OpenGL::Program& program = shaderCache[fsConfig]; if (!program.exists()) { - printf("Creating specialized shader\n"); - std::string vs = fragShaderGen.getVertexShader(regs); std::string fs = fragShaderGen.generate(regs); std::cout << vs << "\n\n" << fs << "\n"; From b384cb8ad9601197ea4162d200c9fcdf2a6fbfa9 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 15 Jul 2024 00:13:22 +0300 Subject: [PATCH 16/30] Fix build --- src/core/renderer_gl/renderer_gl.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 706d52ac..0b26f004 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -872,24 +872,24 @@ void RendererGL::setUbershader(const std::string& shader) { initUbershader(triangleProgram); - glUniform1f(depthScaleLoc, oldDepthScale); - glUniform1f(depthOffsetLoc, oldDepthOffset); - glUniform1i(depthmapEnableLoc, oldDepthmapEnable); + glUniform1f(ubershaderData.depthScaleLoc, oldDepthScale); + glUniform1f(ubershaderData.depthOffsetLoc, oldDepthOffset); + glUniform1i(ubershaderData.depthmapEnableLoc, oldDepthmapEnable); } void RendererGL::initUbershader(OpenGL::Program& program) { gl.useProgram(program); - textureEnvSourceLoc = OpenGL::uniformLocation(program, "u_textureEnvSource"); - textureEnvOperandLoc = OpenGL::uniformLocation(program, "u_textureEnvOperand"); - textureEnvCombinerLoc = OpenGL::uniformLocation(program, "u_textureEnvCombiner"); - textureEnvColorLoc = OpenGL::uniformLocation(program, "u_textureEnvColor"); - textureEnvScaleLoc = OpenGL::uniformLocation(program, "u_textureEnvScale"); + ubershaderData.textureEnvSourceLoc = OpenGL::uniformLocation(program, "u_textureEnvSource"); + ubershaderData.textureEnvOperandLoc = OpenGL::uniformLocation(program, "u_textureEnvOperand"); + ubershaderData.textureEnvCombinerLoc = OpenGL::uniformLocation(program, "u_textureEnvCombiner"); + ubershaderData.textureEnvColorLoc = OpenGL::uniformLocation(program, "u_textureEnvColor"); + ubershaderData.textureEnvScaleLoc = OpenGL::uniformLocation(program, "u_textureEnvScale"); - depthScaleLoc = OpenGL::uniformLocation(program, "u_depthScale"); - depthOffsetLoc = OpenGL::uniformLocation(program, "u_depthOffset"); - depthmapEnableLoc = OpenGL::uniformLocation(program, "u_depthmapEnable"); - picaRegLoc = OpenGL::uniformLocation(program, "u_picaRegs"); + ubershaderData.depthScaleLoc = OpenGL::uniformLocation(program, "u_depthScale"); + ubershaderData.depthOffsetLoc = OpenGL::uniformLocation(program, "u_depthOffset"); + ubershaderData.depthmapEnableLoc = OpenGL::uniformLocation(program, "u_depthmapEnable"); + ubershaderData.picaRegLoc = OpenGL::uniformLocation(program, "u_picaRegs"); // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0); From ea59933b187732ec4dd2dffc52f9c3ab00c970d9 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 15 Jul 2024 00:46:15 +0300 Subject: [PATCH 17/30] Simplify alpha test code --- src/core/PICA/shader_gen_glsl.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 50be94f0..0e51ad93 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -358,13 +358,13 @@ void FragmentGenerator::getAlphaOperation(std::string& shader, TexEnvConfig::Ope void FragmentGenerator::applyAlphaTest(std::string& shader, const PICARegs& regs) { const u32 alphaConfig = regs[InternalRegs::AlphaTestConfig]; + const auto function = static_cast(Helpers::getBits<4, 3>(alphaConfig)); + // Alpha test disabled - if (Helpers::getBit<0>(alphaConfig) == 0) { + if (Helpers::getBit<0>(alphaConfig) == 0 || function == CompareFunction::Always) { return; } - const auto function = static_cast(Helpers::getBits<4, 3>(alphaConfig)); - shader += "if ("; switch (function) { case CompareFunction::Never: shader += "true"; break; From c8eb1c1128581d7409464e98c2a672a394737da9 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 15 Jul 2024 04:10:47 +0300 Subject: [PATCH 18/30] Shader recompiler: Add UBO --- CMakeLists.txt | 1 + include/PICA/pica_frag_config.hpp | 4 +- include/PICA/pica_frag_uniforms.hpp | 18 +++++++++ include/renderer_gl/renderer_gl.hpp | 7 +++- src/core/PICA/shader_gen_glsl.cpp | 37 ++++++++++++----- src/core/renderer_gl/renderer_gl.cpp | 60 ++++++++++++++++++++++++---- 6 files changed, 105 insertions(+), 22 deletions(-) create mode 100644 include/PICA/pica_frag_uniforms.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 194200f0..c52ccd51 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -249,6 +249,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/audio/dsp_core.hpp include/audio/null_core.hpp include/audio/teakra_core.hpp include/audio/miniaudio_device.hpp include/ring_buffer.hpp include/bitfield.hpp include/audio/dsp_shared_mem.hpp include/audio/hle_core.hpp include/capstone.hpp include/audio/aac.hpp include/PICA/pica_frag_config.hpp + include/PICA/pica_frag_uniforms.hpp ) cmrc_add_resource_library( diff --git a/include/PICA/pica_frag_config.hpp b/include/PICA/pica_frag_config.hpp index c4d46b11..8352cba2 100644 --- a/include/PICA/pica_frag_config.hpp +++ b/include/PICA/pica_frag_config.hpp @@ -23,13 +23,11 @@ namespace PICA { u32 texUnitConfig; u32 texEnvUpdateBuffer; - // TODO: This should probably be a uniform - u32 texEnvBufferColor; - // There's 6 TEV stages, and each one is configured via 5 word-sized registers std::array tevConfigs; }; + // Config used for identifying unique fragment pipeline configurations struct FragmentConfig { OutputConfig outConfig; TextureConfig texConfig; diff --git a/include/PICA/pica_frag_uniforms.hpp b/include/PICA/pica_frag_uniforms.hpp new file mode 100644 index 00000000..b151ed42 --- /dev/null +++ b/include/PICA/pica_frag_uniforms.hpp @@ -0,0 +1,18 @@ +#pragma once +#include +#include + +#include "helpers.hpp" + +namespace PICA { + struct FragmentUniforms { + using vec3 = std::array; + using vec4 = std::array; + static constexpr usize tevStageCount = 6; + + s32 alphaReference; + + alignas(16) vec4 constantColors[tevStageCount]; + alignas(16) vec4 tevBufferColor; + }; +} // namespace PICA \ No newline at end of file diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index b4cf9c6f..a028bdd3 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -67,7 +67,12 @@ class RendererGL final : public Renderer { OpenGL::Framebuffer screenFramebuffer; OpenGL::Texture blankTexture; - std::unordered_map shaderCache; + // Cached recompiled fragment shader + struct CachedProgram { + OpenGL::Program program; + uint uboBinding; + }; + std::unordered_map shaderCache; OpenGL::Framebuffer getColourFBO(); OpenGL::Texture getTexture(Texture& tex); diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 0e51ad93..50e9c3de 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -38,8 +38,6 @@ std::string FragmentGenerator::getVertexShader(const PICARegs& regs) { out vec2 v_texcoord1; out vec3 v_view; out vec2 v_texcoord2; - flat out vec4 v_textureEnvColor[6]; - flat out vec4 v_textureEnvBufferColor; //out float gl_ClipDistance[2]; @@ -103,8 +101,6 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { in vec2 v_texcoord1; in vec3 v_view; in vec2 v_texcoord2; - flat in vec4 v_textureEnvColor[6]; - flat in vec4 v_textureEnvBufferColor; out vec4 fragColor; uniform sampler2D u_tex0; @@ -115,18 +111,21 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { uniform sampler1DArray u_tex_lighting_lut; #endif - vec4 tevSources[16]; - vec4 tevNextPreviousBuffer; + layout(std140) uniform FragmentUniforms { + int alphaReference; + + vec4 constantColors[6]; + vec4 tevBufferColor; + }; )"; // Emit main function for fragment shader // When not initialized, source 13 is set to vec4(0.0) and 15 is set to the vertex colour ret += R"( void main() { - tevSources[0] = v_colour; - tevSources[13] = vec4(0.0); // Previous buffer colour - tevSources[15] = v_colour; // Previous combiner vec4 combinerOutput = v_colour; // Last TEV output + vec4 previousBuffer = vec4(0.0); // Previous buffer + vec4 tevNextPreviousBuffer = tevBufferColor; )"; ret += R"( @@ -148,7 +147,7 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { ret += "fragColor = combinerOutput;\n"; ret += "}"; // End of main function - ret += "\n\n\n\n\n\n\n\n\n\n\n\n\n"; + ret += "\n\n\n\n\n\n\n\n\n\n"; return ret; } @@ -201,6 +200,22 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg shader += "combinerOutput = vec4(clamp(outputColor" + std::to_string(stage) + " * " + std::to_string(tev.getColorScale()) + ".0, vec3(0.0), vec3(1.0)), clamp(outputAlpha" + std::to_string(stage) + " * " + std::to_string(tev.getAlphaScale()) + ".0, 0.0, 1.0));\n"; + + shader += "previousBuffer = tevNextPreviousBuffer;\n"; + + // Update the "next previous buffer" if necessary + const u32 textureEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer]; + if (stage < 4) { + // Check whether to update rgb + if ((textureEnvUpdateBuffer & (0x100 << stage))) { + shader += "tevNextPreviousBuffer.rgb = combinerOutput.rgb;\n"; + } + + // And whether to update alpha + if ((textureEnvUpdateBuffer & (0x1000u << stage))) { + shader += "tevNextPreviousBuffer.a = combinerOutput.a;\n"; + } + } } } @@ -308,6 +323,8 @@ void FragmentGenerator::getSource(std::string& shader, TexEnvConfig::Source sour } case TexEnvConfig::Source::Previous: shader += "combinerOutput"; break; + case TexEnvConfig::Source::Constant: shader += "constantColors[" + std::to_string(index) + "]"; break; + case TexEnvConfig::Source::PreviousBuffer: shader += "previousBuffer"; break; default: Helpers::warn("Unimplemented TEV source: %d", static_cast(source)); diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 0b26f004..aa3bb61b 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -5,6 +5,7 @@ #include #include "PICA/float_types.hpp" +#include "PICA/pica_frag_uniforms.hpp" #include "PICA/gpu.hpp" #include "PICA/regs.hpp" #include "math_util.hpp" @@ -413,7 +414,7 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v const float depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32(); const bool depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1; - // Update depth uniforms + // Update ubershader uniforms if (usingUbershader) { if (oldDepthScale != depthScale) { oldDepthScale = depthScale; @@ -429,17 +430,15 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v oldDepthmapEnable = depthMapEnable; glUniform1i(ubershaderData.depthmapEnableLoc, depthMapEnable); } - } - setupTextureEnvState(); - bindTexturesToSlots(); - - if (usingUbershader) { // Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48) // The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates glUniform1uiv(ubershaderData.picaRegLoc, 0x200 - 0x48, ®s[0x48]); } + setupTextureEnvState(); + bindTexturesToSlots(); + if (gpu.lightingLUTDirty) { updateLightingLUT(); } @@ -778,6 +777,8 @@ std::optional RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt } OpenGL::Program& RendererGL::getSpecializedShader() { + constexpr uint uboBlockBinding = 2; + PICA::FragmentConfig fsConfig; auto& outConfig = fsConfig.outConfig; auto& texConfig = fsConfig.texConfig; @@ -788,7 +789,6 @@ OpenGL::Program& RendererGL::getSpecializedShader() { texConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg]; texConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer]; - texConfig.texEnvBufferColor = 0; // Set up TEV stages std::memcpy(&texConfig.tevConfigs[0 * 5], ®s[InternalRegs::TexEnv0Source], 5 * sizeof(u32)); @@ -798,7 +798,9 @@ OpenGL::Program& RendererGL::getSpecializedShader() { std::memcpy(&texConfig.tevConfigs[4 * 5], ®s[InternalRegs::TexEnv4Source], 5 * sizeof(u32)); std::memcpy(&texConfig.tevConfigs[5 * 5], ®s[InternalRegs::TexEnv5Source], 5 * sizeof(u32)); - OpenGL::Program& program = shaderCache[fsConfig]; + CachedProgram& programEntry = shaderCache[fsConfig]; + OpenGL::Program& program = programEntry.program; + if (!program.exists()) { std::string vs = fragShaderGen.getVertexShader(regs); std::string fs = fragShaderGen.generate(regs); @@ -814,8 +816,50 @@ OpenGL::Program& RendererGL::getSpecializedShader() { glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1); glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2); glUniform1i(OpenGL::uniformLocation(program, "u_tex_lighting_lut"), 3); + + // Allocate memory for the program UBO + glGenBuffers(1, &programEntry.uboBinding); + glBindBuffer(GL_UNIFORM_BUFFER, programEntry.uboBinding); + glBufferData(GL_UNIFORM_BUFFER, sizeof(PICA::FragmentUniforms), nullptr, GL_DYNAMIC_DRAW); + + // Set up the binding for our UBO. Sadly we can't specify it in the shader like normal people, + // As it's an OpenGL 4.2 feature that MacOS doesn't support... + uint uboIndex = glGetUniformBlockIndex(program.handle(), "FragmentUniforms"); + glUniformBlockBinding(program.handle(), uboIndex, uboBlockBinding); + glBindBufferBase(GL_UNIFORM_BUFFER, uboBlockBinding, programEntry.uboBinding); } + // Upload uniform data to our shader's UBO + PICA::FragmentUniforms uniforms; + uniforms.alphaReference = Helpers::getBits<8, 8>(regs[InternalRegs::AlphaTestConfig]); + + // Set up the texenv buffer color + const u32 texEnvBufferColor = regs[InternalRegs::TexEnvBufferColor]; + uniforms.tevBufferColor[0] = float(texEnvBufferColor & 0xFF) / 255.0f; + uniforms.tevBufferColor[1] = float((texEnvBufferColor >> 8) & 0xFF) / 255.0f; + uniforms.tevBufferColor[2] = float((texEnvBufferColor >> 16) & 0xFF) / 255.0f; + uniforms.tevBufferColor[3] = float((texEnvBufferColor >> 24) & 0xFF) / 255.0f; + + // Set up the constant color for the 6 TEV stages + for (int i = 0; i < 6; i++) { + static constexpr std::array ioBases = { + PICA::InternalRegs::TexEnv0Source, PICA::InternalRegs::TexEnv1Source, PICA::InternalRegs::TexEnv2Source, + PICA::InternalRegs::TexEnv3Source, PICA::InternalRegs::TexEnv4Source, PICA::InternalRegs::TexEnv5Source, + }; + + auto& vec = uniforms.constantColors[i]; + u32 base = ioBases[i]; + u32 color = regs[base + 3]; + + vec[0] = float(color & 0xFF) / 255.0f; + vec[1] = float((color >> 8) & 0xFF) / 255.0f; + vec[2] = float((color >> 16) & 0xFF) / 255.0f; + vec[3] = float((color >> 24) & 0xFF) / 255.0f; + } + + glBindBuffer(GL_UNIFORM_BUFFER, programEntry.uboBinding); + glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(PICA::FragmentUniforms), &uniforms); + return program; } From 0878474e01aa6d982575ff63394d375e4fa1b13b Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 15 Jul 2024 04:30:04 +0300 Subject: [PATCH 19/30] Shader recompiler: Add depth mapping --- include/PICA/pica_frag_config.hpp | 1 + include/PICA/pica_frag_uniforms.hpp | 2 ++ src/core/PICA/shader_gen_glsl.cpp | 15 +++++++++++++++ src/core/renderer_gl/renderer_gl.cpp | 13 +++++++++---- 4 files changed, 27 insertions(+), 4 deletions(-) diff --git a/include/PICA/pica_frag_config.hpp b/include/PICA/pica_frag_config.hpp index 8352cba2..59f13757 100644 --- a/include/PICA/pica_frag_config.hpp +++ b/include/PICA/pica_frag_config.hpp @@ -16,6 +16,7 @@ namespace PICA { // Merge the enable + compare function into 1 field to avoid duplicate shaders // enable == off means a CompareFunction of Always BitField<0, 3, CompareFunction> alphaTestFunction; + BitField<4, 1, u32> depthMapEnable; }; }; diff --git a/include/PICA/pica_frag_uniforms.hpp b/include/PICA/pica_frag_uniforms.hpp index b151ed42..616f1882 100644 --- a/include/PICA/pica_frag_uniforms.hpp +++ b/include/PICA/pica_frag_uniforms.hpp @@ -11,6 +11,8 @@ namespace PICA { static constexpr usize tevStageCount = 6; s32 alphaReference; + float depthScale; + float depthOffset; alignas(16) vec4 constantColors[tevStageCount]; alignas(16) vec4 tevBufferColor; diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 50e9c3de..f19c699d 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -113,6 +113,8 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { layout(std140) uniform FragmentUniforms { int alphaReference; + float depthScale; + float depthOffset; vec4 constantColors[6]; vec4 tevBufferColor; @@ -138,6 +140,19 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { float alphaOp3 = 0.0; )"; + ret += R"( + // Get original depth value by converting from [near, far] = [0, 1] to [-1, 1] + // We do this by converting to [0, 2] first and subtracting 1 to go to [-1, 1] + float z_over_w = gl_FragCoord.z * 2.0f - 1.0f; + float depth = z_over_w * depthScale + depthOffset; + )"; + + if ((regs[InternalRegs::DepthmapEnable] & 1) == 0) { + ret += "depth /= gl_FragCoord.w;\n"; + } + + ret += "gl_FragDepth = depth;\n"; + textureConfig = regs[InternalRegs::TexUnitCfg]; for (int i = 0; i < 6; i++) { compileTEV(ret, i, regs); diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index aa3bb61b..9c60ac5f 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -410,12 +410,12 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v static constexpr std::array depthModes = {GL_NEVER, GL_ALWAYS, GL_EQUAL, GL_NOTEQUAL, GL_LESS, GL_LEQUAL, GL_GREATER, GL_GEQUAL}; - const float depthScale = f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32(); - const float depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32(); - const bool depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1; - // Update ubershader uniforms if (usingUbershader) { + const float depthScale = f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32(); + const float depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32(); + const bool depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1; + if (oldDepthScale != depthScale) { oldDepthScale = depthScale; glUniform1f(ubershaderData.depthScaleLoc, depthScale); @@ -785,7 +785,9 @@ OpenGL::Program& RendererGL::getSpecializedShader() { auto alphaTestConfig = regs[InternalRegs::AlphaTestConfig]; auto alphaTestFunction = Helpers::getBits<4, 3>(alphaTestConfig); + outConfig.alphaTestFunction = (alphaTestConfig & 1) ? static_cast(alphaTestFunction) : PICA::CompareFunction::Always; + outConfig.depthMapEnable = regs[InternalRegs::DepthmapEnable] & 1; texConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg]; texConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer]; @@ -840,6 +842,9 @@ OpenGL::Program& RendererGL::getSpecializedShader() { uniforms.tevBufferColor[2] = float((texEnvBufferColor >> 16) & 0xFF) / 255.0f; uniforms.tevBufferColor[3] = float((texEnvBufferColor >> 24) & 0xFF) / 255.0f; + uniforms.depthScale = f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32(); + uniforms.depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32(); + // Set up the constant color for the 6 TEV stages for (int i = 0; i < 6; i++) { static constexpr std::array ioBases = { From fe53214c863cf5fb5219319c9b2a8335ebf2f9b1 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 15 Jul 2024 04:51:08 +0300 Subject: [PATCH 20/30] Shader recompiler: Finish alpha test and stub lighting --- src/core/PICA/shader_gen_glsl.cpp | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index f19c699d..11030848 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -340,6 +340,10 @@ void FragmentGenerator::getSource(std::string& shader, TexEnvConfig::Source sour case TexEnvConfig::Source::Previous: shader += "combinerOutput"; break; case TexEnvConfig::Source::Constant: shader += "constantColors[" + std::to_string(index) + "]"; break; case TexEnvConfig::Source::PreviousBuffer: shader += "previousBuffer"; break; + + // Lighting + case TexEnvConfig::Source::PrimaryFragmentColor: + case TexEnvConfig::Source::SecondaryFragmentColor: shader += "vec4(0.0, 0.0, 0.0, 1.0)"; break; default: Helpers::warn("Unimplemented TEV source: %d", static_cast(source)); @@ -397,15 +401,23 @@ void FragmentGenerator::applyAlphaTest(std::string& shader, const PICARegs& regs return; } - shader += "if ("; + shader += "float alphaReferenceFloat = float(alphaReference) / 255.0;\n"; + shader += "if (!("; switch (function) { - case CompareFunction::Never: shader += "true"; break; - case CompareFunction::Always: shader += "false"; break; + case CompareFunction::Never: shader += "false"; break; + case CompareFunction::Always: shader += "true"; break; + case CompareFunction::Equal: shader += "combinerOutput.a == alphaReferenceFloat"; break; + case CompareFunction::NotEqual: shader += "combinerOutput.a != alphaReferenceFloat"; break; + case CompareFunction::Less: shader += "combinerOutput.a < alphaReferenceFloat"; break; + case CompareFunction::LessOrEqual: shader += "combinerOutput.a <= alphaReferenceFloat"; break; + case CompareFunction::Greater: shader += "combinerOutput.a > alphaReferenceFloat"; break; + case CompareFunction::GreaterOrEqual: shader += "combinerOutput.a >= alphaReferenceFloat"; break; + default: Helpers::warn("Unimplemented alpha test function"); shader += "false"; break; } - shader += ") { discard; }\n"; + shader += ")) { discard; }\n"; } From 11c927932978ea157ed3b7e851908f918733b036 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 15 Jul 2024 14:29:49 +0300 Subject: [PATCH 21/30] Properly flush shader cache --- src/core/PICA/shader_gen_glsl.cpp | 6 ++-- src/core/renderer_gl/renderer_gl.cpp | 10 ++++++ third_party/opengl/opengl.hpp | 51 ++++++++++++++++------------ 3 files changed, 42 insertions(+), 25 deletions(-) diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 11030848..6e682354 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -140,9 +140,9 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { float alphaOp3 = 0.0; )"; + // Get original depth value by converting from [near, far] = [0, 1] to [-1, 1] + // We do this by converting to [0, 2] first and subtracting 1 to go to [-1, 1] ret += R"( - // Get original depth value by converting from [near, far] = [0, 1] to [-1, 1] - // We do this by converting to [0, 2] first and subtracting 1 to go to [-1, 1] float z_over_w = gl_FragCoord.z * 2.0f - 1.0f; float depth = z_over_w * depthScale + depthOffset; )"; @@ -343,7 +343,7 @@ void FragmentGenerator::getSource(std::string& shader, TexEnvConfig::Source sour // Lighting case TexEnvConfig::Source::PrimaryFragmentColor: - case TexEnvConfig::Source::SecondaryFragmentColor: shader += "vec4(0.0, 0.0, 0.0, 1.0)"; break; + case TexEnvConfig::Source::SecondaryFragmentColor: shader += "vec4(1.0, 1.0, 1.0, 1.0)"; break; default: Helpers::warn("Unimplemented TEV source: %d", static_cast(source)); diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 9c60ac5f..d0e2bb31 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -23,6 +23,11 @@ void RendererGL::reset() { colourBufferCache.reset(); textureCache.reset(); + for (auto& shader : shaderCache) { + shader.second.program.free(); + } + shaderCache.clear(); + // Init the colour/depth buffer settings to some random defaults on reset colourBufferLoc = 0; colourBufferFormat = PICA::ColorFmt::RGBA8; @@ -899,6 +904,11 @@ void RendererGL::deinitGraphicsContext() { depthBufferCache.reset(); colourBufferCache.reset(); + for (auto& shader : shaderCache) { + shader.second.program.free(); + } + shaderCache.clear(); + // All other GL objects should be invalidated automatically and be recreated by the next call to initGraphicsContext // TODO: Make it so that depth and colour buffers get written back to 3DS memory printf("RendererGL::DeinitGraphicsContext called\n"); diff --git a/third_party/opengl/opengl.hpp b/third_party/opengl/opengl.hpp index 9997e63b..828fb784 100644 --- a/third_party/opengl/opengl.hpp +++ b/third_party/opengl/opengl.hpp @@ -397,34 +397,41 @@ namespace OpenGL { }; struct Program { - GLuint m_handle = 0; + GLuint m_handle = 0; - bool create(std::initializer_list> shaders) { - m_handle = glCreateProgram(); - for (const auto& shader : shaders) { - glAttachShader(m_handle, shader.get().handle()); - } + bool create(std::initializer_list> shaders) { + m_handle = glCreateProgram(); + for (const auto& shader : shaders) { + glAttachShader(m_handle, shader.get().handle()); + } - glLinkProgram(m_handle); - GLint success; - glGetProgramiv(m_handle, GL_LINK_STATUS, &success); + glLinkProgram(m_handle); + GLint success; + glGetProgramiv(m_handle, GL_LINK_STATUS, &success); - if (!success) { - char buf[4096]; - glGetProgramInfoLog(m_handle, 4096, nullptr, buf); - fprintf(stderr, "Failed to link program\nError: %s\n", buf); - glDeleteProgram(m_handle); + if (!success) { + char buf[4096]; + glGetProgramInfoLog(m_handle, 4096, nullptr, buf); + fprintf(stderr, "Failed to link program\nError: %s\n", buf); + glDeleteProgram(m_handle); - m_handle = 0; - } + m_handle = 0; + } - return m_handle != 0; - } + return m_handle != 0; + } - GLuint handle() const { return m_handle; } - bool exists() const { return m_handle != 0; } - void use() const { glUseProgram(m_handle); } - }; + GLuint handle() const { return m_handle; } + bool exists() const { return m_handle != 0; } + void use() const { glUseProgram(m_handle); } + + void free() { + if (exists()) { + glDeleteProgram(m_handle); + m_handle = 0; + } + } + }; static void dispatchCompute(GLuint groupsX = 1, GLuint groupsY = 1, GLuint groupsZ = 1) { glDispatchCompute(groupsX, groupsY, groupsZ); From c535ae43eed9898c065140cdc2bb00a9ad31ca32 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 15 Jul 2024 14:34:59 +0300 Subject: [PATCH 22/30] Shader recompiler: Fix dot3 RGBA --- src/core/PICA/shader_gen_glsl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 6e682354..56cdd936 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -194,7 +194,7 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg if (tev.colorOp == TexEnvConfig::Operation::Dot3RGBA) { // Dot3 RGBA also writes to the alpha component so we don't need to do anything more - shader += "float outputAlpha" + std::to_string(stage) + " = colorOutput" + std::to_string(stage) + ".x;\n"; + shader += "float outputAlpha" + std::to_string(stage) + " = outputColor" + std::to_string(stage) + ".x;\n"; } else { // Get alpha operands shader += "alphaOp1 = "; From 2cd50e7f376e2b7c0594dd382bedd271d1253bc8 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 15 Jul 2024 15:11:23 +0300 Subject: [PATCH 23/30] Clean up ubershader code --- include/renderer_gl/renderer_gl.hpp | 2 +- src/core/renderer_gl/renderer_gl.cpp | 9 ++------- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index a028bdd3..55a730ec 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -84,7 +84,7 @@ class RendererGL final : public Renderer { void setupBlending(); void setupStencilTest(bool stencilEnable); void bindDepthBuffer(); - void setupTextureEnvState(); + void setupUbershaderTexEnv(); void bindTexturesToSlots(); void updateLightingLUT(); void initGraphicsContextInternal(); diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index d0e2bb31..207bfbe4 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -282,12 +282,8 @@ void RendererGL::setupStencilTest(bool stencilEnable) { glStencilOp(stencilOps[stencilFailOp], stencilOps[depthFailOp], stencilOps[passOp]); } -void RendererGL::setupTextureEnvState() { +void RendererGL::setupUbershaderTexEnv() { // TODO: Only update uniforms when the TEV config changed. Use an UBO potentially. - if (!usingUbershader) { - return; - } - static constexpr std::array ioBases = { PICA::InternalRegs::TexEnv0Source, PICA::InternalRegs::TexEnv1Source, PICA::InternalRegs::TexEnv2Source, PICA::InternalRegs::TexEnv3Source, PICA::InternalRegs::TexEnv4Source, PICA::InternalRegs::TexEnv5Source, @@ -439,9 +435,9 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v // Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48) // The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates glUniform1uiv(ubershaderData.picaRegLoc, 0x200 - 0x48, ®s[0x48]); + setupUbershaderTexEnv(); } - setupTextureEnvState(); bindTexturesToSlots(); if (gpu.lightingLUTDirty) { @@ -811,7 +807,6 @@ OpenGL::Program& RendererGL::getSpecializedShader() { if (!program.exists()) { std::string vs = fragShaderGen.getVertexShader(regs); std::string fs = fragShaderGen.generate(regs); - std::cout << vs << "\n\n" << fs << "\n"; OpenGL::Shader vertShader({vs.c_str(), vs.size()}, OpenGL::Vertex); OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment); From a2649ffb76879408a174ac817bf875463a8bbcc3 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 15 Jul 2024 15:38:15 +0300 Subject: [PATCH 24/30] Simplify TEV code --- src/core/PICA/shader_gen_glsl.cpp | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 56cdd936..80dbf1ef 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -57,7 +57,6 @@ std::string FragmentGenerator::getVertexShader(const PICARegs& regs) { vec4 colourAbs = abs(a_vertexColour); v_colour = min(colourAbs, vec4(1.f)); - // Flip y axis of UVs because OpenGL uses an inverted y for texture sampling compared to the PICA v_texcoord0 = vec3(a_texcoord0.x, 1.0 - a_texcoord0.y, a_texcoord0_w); v_texcoord1 = vec2(a_texcoord1.x, 1.0 - a_texcoord1.y); v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y); @@ -125,8 +124,8 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { // When not initialized, source 13 is set to vec4(0.0) and 15 is set to the vertex colour ret += R"( void main() { - vec4 combinerOutput = v_colour; // Last TEV output - vec4 previousBuffer = vec4(0.0); // Previous buffer + vec4 combinerOutput = v_colour; + vec4 previousBuffer = vec4(0.0); vec4 tevNextPreviousBuffer = tevBufferColor; )"; @@ -162,7 +161,7 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { ret += "fragColor = combinerOutput;\n"; ret += "}"; // End of main function - ret += "\n\n\n\n\n\n\n\n\n\n"; + ret += "\n\n\n\n\n\n\n"; return ret; } @@ -188,9 +187,9 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg shader += ";\ncolorOp3 = "; getColorOperand(shader, tev.colorSource3, tev.colorOperand3, stage); - shader += ";\nvec3 outputColor" + std::to_string(stage) + " = "; + shader += ";\nvec3 outputColor" + std::to_string(stage) + " = clamp("; getColorOperation(shader, tev.colorOp); - shader += ";\n"; + shader += ", vec3(0.0), vec3(1.0));\n"; if (tev.colorOp == TexEnvConfig::Operation::Dot3RGBA) { // Dot3 RGBA also writes to the alpha component so we don't need to do anything more @@ -206,10 +205,10 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg shader += ";\nalphaOp3 = "; getAlphaOperand(shader, tev.alphaSource3, tev.alphaOperand3, stage); - shader += ";\nfloat outputAlpha" + std::to_string(stage) + " = "; + shader += ";\nfloat outputAlpha" + std::to_string(stage) + " = clamp("; getAlphaOperation(shader, tev.alphaOp); // Clamp the alpha value to [0.0, 1.0] - shader += ";\nclamp(outputAlpha" + std::to_string(stage) + ", 0.0, 1.0);\n"; + shader += ", 0.0, 1.0);\n"; } shader += "combinerOutput = vec4(clamp(outputColor" + std::to_string(stage) + " * " + std::to_string(tev.getColorScale()) + @@ -356,15 +355,15 @@ void FragmentGenerator::getColorOperation(std::string& shader, TexEnvConfig::Ope switch (op) { case TexEnvConfig::Operation::Replace: shader += "colorOp1"; break; case TexEnvConfig::Operation::Add: shader += "colorOp1 + colorOp2"; break; - case TexEnvConfig::Operation::AddSigned: shader += "clamp(colorOp1 + colorOp2 - 0.5, 0.0, 1.0);"; break; + case TexEnvConfig::Operation::AddSigned: shader += "colorOp1 + colorOp2 - vec3(0.5)"; break; case TexEnvConfig::Operation::Subtract: shader += "colorOp1 - colorOp2"; break; case TexEnvConfig::Operation::Modulate: shader += "colorOp1 * colorOp2"; break; - case TexEnvConfig::Operation::Lerp: shader += "colorOp1 * colorOp3 + colorOp2 * (vec3(1.0) - colorOp3)"; break; + case TexEnvConfig::Operation::Lerp: shader += "mix(colorOp2, colorOp1, colorOp3)"; break; - case TexEnvConfig::Operation::AddMultiply: shader += "min(colorOp1 + colorOp2, vec3(1.0)) * colorOp3"; break; - case TexEnvConfig::Operation::MultiplyAdd: shader += "colorOp1 * colorOp2 + colorOp3"; break; + case TexEnvConfig::Operation::AddMultiply: shader += "min(colorOp1 + colorOp2), vec3(1.0)) * colorOp3"; break; + case TexEnvConfig::Operation::MultiplyAdd: shader += "fma(colorOp1, colorOp2, colorOp3)"; break; case TexEnvConfig::Operation::Dot3RGB: - case TexEnvConfig::Operation::Dot3RGBA: shader += "vec3(4.0 * dot(colorOp1 - 0.5, colorOp2 - 0.5))"; break; + case TexEnvConfig::Operation::Dot3RGBA: shader += "vec3(4.0 * dot(colorOp1 - vec3(0.5), colorOp2 - vec3(0.5)))"; break; default: Helpers::warn("FragmentGenerator: Unimplemented color op"); shader += "vec3(1.0)"; @@ -376,13 +375,13 @@ void FragmentGenerator::getAlphaOperation(std::string& shader, TexEnvConfig::Ope switch (op) { case TexEnvConfig::Operation::Replace: shader += "alphaOp1"; break; case TexEnvConfig::Operation::Add: shader += "alphaOp1 + alphaOp2"; break; - case TexEnvConfig::Operation::AddSigned: shader += "clamp(alphaOp1 + alphaOp2 - 0.5, 0.0, 1.0);"; break; + case TexEnvConfig::Operation::AddSigned: shader += "alphaOp1 + alphaOp2 - 0.5"; break; case TexEnvConfig::Operation::Subtract: shader += "alphaOp1 - alphaOp2"; break; case TexEnvConfig::Operation::Modulate: shader += "alphaOp1 * alphaOp2"; break; - case TexEnvConfig::Operation::Lerp: shader += "alphaOp1 * alphaOp3 + alphaOp2 * (1.0 - alphaOp3)"; break; + case TexEnvConfig::Operation::Lerp: shader += "mix(alphaOp2, alphaOp1, alphaOp3)"; break; case TexEnvConfig::Operation::AddMultiply: shader += "min(alphaOp1 + alphaOp2, 1.0) * alphaOp3"; break; - case TexEnvConfig::Operation::MultiplyAdd: shader += "alphaOp1 * alphaOp2 + alphaOp3"; break; + case TexEnvConfig::Operation::MultiplyAdd: shader += "fma(alphaOp1, alphaOp2, alphaOp3)"; break; case TexEnvConfig::Operation::Dot3RGB: case TexEnvConfig::Operation::Dot3RGBA: shader += "vec3(4.0 * dot(alphaOp1 - 0.5, alphaOp2 - 0.5))"; break; default: From b8a186d5cd9a2a2db2f61c7ce38355cc22eb28ba Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 15 Jul 2024 16:34:33 +0300 Subject: [PATCH 25/30] Shadergen: Fix add-multiply --- src/core/PICA/shader_gen_glsl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 80dbf1ef..86594023 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -360,7 +360,7 @@ void FragmentGenerator::getColorOperation(std::string& shader, TexEnvConfig::Ope case TexEnvConfig::Operation::Modulate: shader += "colorOp1 * colorOp2"; break; case TexEnvConfig::Operation::Lerp: shader += "mix(colorOp2, colorOp1, colorOp3)"; break; - case TexEnvConfig::Operation::AddMultiply: shader += "min(colorOp1 + colorOp2), vec3(1.0)) * colorOp3"; break; + case TexEnvConfig::Operation::AddMultiply: shader += "min(colorOp1 + colorOp2, vec3(1.0)) * colorOp3"; break; case TexEnvConfig::Operation::MultiplyAdd: shader += "fma(colorOp1, colorOp2, colorOp3)"; break; case TexEnvConfig::Operation::Dot3RGB: case TexEnvConfig::Operation::Dot3RGBA: shader += "vec3(4.0 * dot(colorOp1 - vec3(0.5), colorOp2 - vec3(0.5)))"; break; From db801312134cb9654e6afd932de1784af13d0081 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 15 Jul 2024 18:27:22 +0300 Subject: [PATCH 26/30] Shadergen: Previous buffer should be able to be set even for passthrough TEV stages --- src/core/PICA/shader_gen_glsl.cpp | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 86594023..556c0794 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -214,21 +214,21 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg shader += "combinerOutput = vec4(clamp(outputColor" + std::to_string(stage) + " * " + std::to_string(tev.getColorScale()) + ".0, vec3(0.0), vec3(1.0)), clamp(outputAlpha" + std::to_string(stage) + " * " + std::to_string(tev.getAlphaScale()) + ".0, 0.0, 1.0));\n"; + } - shader += "previousBuffer = tevNextPreviousBuffer;\n"; + shader += "previousBuffer = tevNextPreviousBuffer;\n\n"; - // Update the "next previous buffer" if necessary - const u32 textureEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer]; - if (stage < 4) { - // Check whether to update rgb - if ((textureEnvUpdateBuffer & (0x100 << stage))) { - shader += "tevNextPreviousBuffer.rgb = combinerOutput.rgb;\n"; - } + // Update the "next previous buffer" if necessary + const u32 textureEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer]; + if (stage < 4) { + // Check whether to update rgb + if ((textureEnvUpdateBuffer & (0x100 << stage))) { + shader += "tevNextPreviousBuffer.rgb = combinerOutput.rgb;\n"; + } - // And whether to update alpha - if ((textureEnvUpdateBuffer & (0x1000u << stage))) { - shader += "tevNextPreviousBuffer.a = combinerOutput.a;\n"; - } + // And whether to update alpha + if ((textureEnvUpdateBuffer & (0x1000u << stage))) { + shader += "tevNextPreviousBuffer.a = combinerOutput.a;\n"; } } } @@ -382,8 +382,6 @@ void FragmentGenerator::getAlphaOperation(std::string& shader, TexEnvConfig::Ope case TexEnvConfig::Operation::AddMultiply: shader += "min(alphaOp1 + alphaOp2, 1.0) * alphaOp3"; break; case TexEnvConfig::Operation::MultiplyAdd: shader += "fma(alphaOp1, alphaOp2, alphaOp3)"; break; - case TexEnvConfig::Operation::Dot3RGB: - case TexEnvConfig::Operation::Dot3RGBA: shader += "vec3(4.0 * dot(alphaOp1 - 0.5, alphaOp2 - 0.5))"; break; default: Helpers::warn("FragmentGenerator: Unimplemented alpha op"); shader += "1.0"; From c02b3822623372d9598d38da4dd66f8f7e8a090c Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 16 Jul 2024 00:58:52 +0300 Subject: [PATCH 27/30] Perform alpha test with integers instead of floats --- src/core/PICA/shader_gen_glsl.cpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 556c0794..d4a4bf8e 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -398,17 +398,17 @@ void FragmentGenerator::applyAlphaTest(std::string& shader, const PICARegs& regs return; } - shader += "float alphaReferenceFloat = float(alphaReference) / 255.0;\n"; - shader += "if (!("; + shader += "int testingAlpha = int(combinerOutput.a * 255.0);\n"; + shader += "if ("; switch (function) { - case CompareFunction::Never: shader += "false"; break; - case CompareFunction::Always: shader += "true"; break; - case CompareFunction::Equal: shader += "combinerOutput.a == alphaReferenceFloat"; break; - case CompareFunction::NotEqual: shader += "combinerOutput.a != alphaReferenceFloat"; break; - case CompareFunction::Less: shader += "combinerOutput.a < alphaReferenceFloat"; break; - case CompareFunction::LessOrEqual: shader += "combinerOutput.a <= alphaReferenceFloat"; break; - case CompareFunction::Greater: shader += "combinerOutput.a > alphaReferenceFloat"; break; - case CompareFunction::GreaterOrEqual: shader += "combinerOutput.a >= alphaReferenceFloat"; break; + case CompareFunction::Never: shader += "true"; break; + case CompareFunction::Always: shader += "false"; break; + case CompareFunction::Equal: shader += "testingAlpha != alphaReference"; break; + case CompareFunction::NotEqual: shader += "testingAlpha == alphaReference"; break; + case CompareFunction::Less: shader += "testingAlpha >= alphaReference"; break; + case CompareFunction::LessOrEqual: shader += "testingAlpha > alphaReference"; break; + case CompareFunction::Greater: shader += "testingAlpha <= alphaReference"; break; + case CompareFunction::GreaterOrEqual: shader += "testingAlpha < alphaReference"; break; default: Helpers::warn("Unimplemented alpha test function"); @@ -416,5 +416,5 @@ void FragmentGenerator::applyAlphaTest(std::string& shader, const PICARegs& regs break; } - shader += ")) { discard; }\n"; + shader += ") { discard; }\n"; } From 441aa2346c6ebd004865a324f4e683fa814949b1 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 16 Jul 2024 02:20:37 +0300 Subject: [PATCH 28/30] Shadergen: Add clipping --- include/PICA/pica_frag_uniforms.hpp | 1 + src/core/PICA/shader_gen_glsl.cpp | 34 ++++++++++++++++++++-------- src/core/renderer_gl/renderer_gl.cpp | 7 ++++++ 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/include/PICA/pica_frag_uniforms.hpp b/include/PICA/pica_frag_uniforms.hpp index 616f1882..332acd4e 100644 --- a/include/PICA/pica_frag_uniforms.hpp +++ b/include/PICA/pica_frag_uniforms.hpp @@ -16,5 +16,6 @@ namespace PICA { alignas(16) vec4 constantColors[tevStageCount]; alignas(16) vec4 tevBufferColor; + alignas(16) vec4 clipCoords; }; } // namespace PICA \ No newline at end of file diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index d4a4bf8e..e135ac8e 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -2,6 +2,18 @@ using namespace PICA; using namespace PICA::ShaderGen; +static constexpr const char* uniformDefinition = R"( + layout(std140) uniform FragmentUniforms { + int alphaReference; + float depthScale; + float depthOffset; + + vec4 constantColors[6]; + vec4 tevBufferColor; + vec4 clipCoords; + }; +)"; + std::string FragmentGenerator::getVertexShader(const PICARegs& regs) { std::string ret = ""; @@ -20,6 +32,8 @@ std::string FragmentGenerator::getVertexShader(const PICARegs& regs) { )"; } + ret += uniformDefinition; + ret += R"( layout(location = 0) in vec4 a_coords; layout(location = 1) in vec4 a_quaternion; @@ -39,7 +53,9 @@ std::string FragmentGenerator::getVertexShader(const PICARegs& regs) { out vec3 v_view; out vec2 v_texcoord2; - //out float gl_ClipDistance[2]; + #ifndef USING_GLES + out float gl_ClipDistance[2]; + #endif vec4 abgr8888ToVec4(uint abgr) { const float scale = 1.0 / 255.0; @@ -65,6 +81,11 @@ std::string FragmentGenerator::getVertexShader(const PICARegs& regs) { v_normal = normalize(rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), a_quaternion)); v_tangent = normalize(rotateVec3ByQuaternion(vec3(1.0, 0.0, 0.0), a_quaternion)); v_bitangent = normalize(rotateVec3ByQuaternion(vec3(0.0, 1.0, 0.0), a_quaternion)); + + #ifndef USING_GLES + gl_ClipDistance[0] = -a_coords.z; + gl_ClipDistance[1] = dot(clipCoords, a_coords); + #endif } )"; @@ -109,17 +130,10 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { #ifndef USING_GLES uniform sampler1DArray u_tex_lighting_lut; #endif - - layout(std140) uniform FragmentUniforms { - int alphaReference; - float depthScale; - float depthOffset; - - vec4 constantColors[6]; - vec4 tevBufferColor; - }; )"; + ret += uniformDefinition; + // Emit main function for fragment shader // When not initialized, source 13 is set to vec4(0.0) and 15 is set to the vertex colour ret += R"( diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 207bfbe4..97b642c7 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -845,6 +845,13 @@ OpenGL::Program& RendererGL::getSpecializedShader() { uniforms.depthScale = f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32(); uniforms.depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32(); + if (regs[InternalRegs::ClipEnable] & 1) { + uniforms.clipCoords[0] = f24::fromRaw(regs[PICA::InternalRegs::ClipData0] & 0xffffff).toFloat32(); + uniforms.clipCoords[1] = f24::fromRaw(regs[PICA::InternalRegs::ClipData1] & 0xffffff).toFloat32(); + uniforms.clipCoords[2] = f24::fromRaw(regs[PICA::InternalRegs::ClipData2] & 0xffffff).toFloat32(); + uniforms.clipCoords[3] = f24::fromRaw(regs[PICA::InternalRegs::ClipData3] & 0xffffff).toFloat32(); + } + // Set up the constant color for the 6 TEV stages for (int i = 0; i < 6; i++) { static constexpr std::array ioBases = { From a4ec7705878c56dbc6f2aaf1a70a0dc384c04566 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 17 Jul 2024 01:32:55 +0300 Subject: [PATCH 29/30] Add UBO/BlendEquation/BlendFunc to GL state manager --- include/renderer_gl/gl_state.hpp | 48 +++++++++++++++++++++++++++- src/core/renderer_gl/gl_state.cpp | 18 +++++++++-- src/core/renderer_gl/renderer_gl.cpp | 8 ++--- 3 files changed, 67 insertions(+), 7 deletions(-) diff --git a/include/renderer_gl/gl_state.hpp b/include/renderer_gl/gl_state.hpp index 69960f1e..e5591ea0 100644 --- a/include/renderer_gl/gl_state.hpp +++ b/include/renderer_gl/gl_state.hpp @@ -40,9 +40,13 @@ struct GLStateManager { GLuint boundVAO; GLuint boundVBO; GLuint currentProgram; + GLuint boundUBO; GLenum depthFunc; GLenum logicOp; + GLenum blendEquationRGB, blendEquationAlpha; + GLenum blendFuncSourceRGB, blendFuncSourceAlpha; + GLenum blendFuncDestRGB, blendFuncDestAlpha; void reset(); void resetBlend(); @@ -51,7 +55,7 @@ struct GLStateManager { void resetColourMask(); void resetDepth(); void resetVAO(); - void resetVBO(); + void resetBuffers(); void resetProgram(); void resetScissor(); void resetStencil(); @@ -183,6 +187,13 @@ struct GLStateManager { } } + void bindUBO(GLuint handle) { + if (boundUBO != handle) { + boundUBO = handle; + glBindBuffer(GL_UNIFORM_BUFFER, boundUBO); + } + } + void bindVAO(const OpenGL::VertexArray& vao) { bindVAO(vao.handle()); } void bindVBO(const OpenGL::VertexBuffer& vbo) { bindVBO(vbo.handle()); } void useProgram(const OpenGL::Program& program) { useProgram(program.handle()); } @@ -224,6 +235,41 @@ struct GLStateManager { } void setDepthFunc(OpenGL::DepthFunc func) { setDepthFunc(static_cast(func)); } + + // Counterpart to glBlendEquationSeparate + void setBlendEquation(GLenum modeRGB, GLenum modeAlpha) { + if (blendEquationRGB != modeRGB || blendEquationAlpha != modeAlpha) { + blendEquationRGB = modeRGB; + blendEquationAlpha = modeAlpha; + + glBlendEquationSeparate(modeRGB, modeAlpha); + } + } + + // Counterpart to glBlendFuncSeparate + void setBlendFunc(GLenum sourceRGB, GLenum destRGB, GLenum sourceAlpha, GLenum destAlpha) { + if (blendFuncSourceRGB != sourceRGB || blendFuncDestRGB != destRGB || blendFuncSourceAlpha != sourceAlpha || + blendFuncDestAlpha != destAlpha) { + + blendFuncSourceRGB = sourceRGB; + blendFuncDestRGB = destRGB; + blendFuncSourceAlpha = sourceAlpha; + blendFuncDestAlpha = destAlpha; + + glBlendFuncSeparate(sourceRGB, destRGB,sourceAlpha, destAlpha); + } + } + + // Counterpart to regular glBlendEquation + void setBlendEquation(GLenum mode) { setBlendEquation(mode, mode); } + + void setBlendEquation(OpenGL::BlendEquation modeRGB, OpenGL::BlendEquation modeAlpha) { + setBlendEquation(static_cast(modeRGB), static_cast(modeAlpha)); + } + + void setBlendEquation(OpenGL::BlendEquation mode) { + setBlendEquation(static_cast(mode)); + } }; static_assert(std::is_trivially_constructible(), "OpenGL State Manager class is not trivially constructible!"); diff --git a/src/core/renderer_gl/gl_state.cpp b/src/core/renderer_gl/gl_state.cpp index d2eec0d5..3d1c0681 100644 --- a/src/core/renderer_gl/gl_state.cpp +++ b/src/core/renderer_gl/gl_state.cpp @@ -5,9 +5,20 @@ void GLStateManager::resetBlend() { logicOpEnabled = false; logicOp = GL_COPY; + blendEquationRGB = GL_FUNC_ADD; + blendEquationAlpha = GL_FUNC_ADD; + + blendFuncSourceRGB = GL_SRC_COLOR; + blendFuncDestRGB = GL_DST_COLOR; + blendFuncSourceAlpha = GL_SRC_ALPHA; + blendFuncDestAlpha = GL_DST_ALPHA; + OpenGL::disableBlend(); OpenGL::disableLogicOp(); OpenGL::setLogicOp(GL_COPY); + + glBlendEquationSeparate(blendEquationRGB, blendEquationAlpha); + glBlendFuncSeparate(blendFuncSourceRGB, blendFuncDestRGB, blendFuncSourceAlpha, blendFuncDestAlpha); } void GLStateManager::resetClearing() { @@ -61,9 +72,12 @@ void GLStateManager::resetVAO() { glBindVertexArray(0); } -void GLStateManager::resetVBO() { +void GLStateManager::resetBuffers() { boundVBO = 0; + boundUBO = 0; + glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindBuffer(GL_UNIFORM_BUFFER, 0); } void GLStateManager::resetProgram() { @@ -79,7 +93,7 @@ void GLStateManager::reset() { resetDepth(); resetVAO(); - resetVBO(); + resetBuffers(); resetProgram(); resetScissor(); resetStencil(); diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 97b642c7..b9a2c7ae 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -229,8 +229,8 @@ void RendererGL::setupBlending() { OpenGL::setBlendColor(float(r) / 255.f, float(g) / 255.f, float(b) / 255.f, float(a) / 255.f); // Translate equations and funcs to their GL equivalents and set them - glBlendEquationSeparate(blendingEquations[rgbEquation], blendingEquations[alphaEquation]); - glBlendFuncSeparate(blendingFuncs[rgbSourceFunc], blendingFuncs[rgbDestFunc], blendingFuncs[alphaSourceFunc], blendingFuncs[alphaDestFunc]); + gl.setBlendEquation(blendingEquations[rgbEquation], blendingEquations[alphaEquation]); + gl.setBlendFunc(blendingFuncs[rgbSourceFunc], blendingFuncs[rgbDestFunc], blendingFuncs[alphaSourceFunc], blendingFuncs[alphaDestFunc]); } } @@ -821,7 +821,7 @@ OpenGL::Program& RendererGL::getSpecializedShader() { // Allocate memory for the program UBO glGenBuffers(1, &programEntry.uboBinding); - glBindBuffer(GL_UNIFORM_BUFFER, programEntry.uboBinding); + gl.bindUBO(programEntry.uboBinding); glBufferData(GL_UNIFORM_BUFFER, sizeof(PICA::FragmentUniforms), nullptr, GL_DYNAMIC_DRAW); // Set up the binding for our UBO. Sadly we can't specify it in the shader like normal people, @@ -869,7 +869,7 @@ OpenGL::Program& RendererGL::getSpecializedShader() { vec[3] = float((color >> 24) & 0xFF) / 255.0f; } - glBindBuffer(GL_UNIFORM_BUFFER, programEntry.uboBinding); + gl.bindUBO(programEntry.uboBinding); glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(PICA::FragmentUniforms), &uniforms); return program; From aad7bb817eaf92a2632315b580ac9508d105fac5 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 17 Jul 2024 02:25:38 +0300 Subject: [PATCH 30/30] Add setting for ubershaders --- include/config.hpp | 3 +++ include/renderer.hpp | 2 ++ include/renderer_gl/renderer_gl.hpp | 4 +++- src/config.cpp | 4 ++++ src/core/PICA/gpu.cpp | 1 + src/libretro_core.cpp | 2 ++ 6 files changed, 15 insertions(+), 1 deletion(-) diff --git a/include/config.hpp b/include/config.hpp index 6dbae9e3..8aa695aa 100644 --- a/include/config.hpp +++ b/include/config.hpp @@ -13,8 +13,11 @@ struct EmulatorConfig { static constexpr bool shaderJitDefault = false; #endif + static constexpr bool ubershaderDefault = true; + bool shaderJitEnabled = shaderJitDefault; bool discordRpcEnabled = false; + bool useUbershaders = ubershaderDefault; bool accurateShaderMul = false; RendererType rendererType = RendererType::OpenGL; Audio::DSPCore::Type dspType = Audio::DSPCore::Type::Null; diff --git a/include/renderer.hpp b/include/renderer.hpp index 17812bcf..e64d49e3 100644 --- a/include/renderer.hpp +++ b/include/renderer.hpp @@ -74,6 +74,8 @@ class Renderer { virtual std::string getUbershader() { return ""; } virtual void setUbershader(const std::string& shader) {} + virtual void setUbershaderSetting(bool value) {} + // Functions for initializing the graphics context for the Qt frontend, where we don't have the convenience of SDL_Window #ifdef PANDA3DS_FRONTEND_QT virtual void initGraphicsContext(GL::Context* context) { Helpers::panic("Tried to initialize incompatible renderer with GL context"); } diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 55a730ec..6414a7cf 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -30,6 +30,7 @@ class RendererGL final : public Renderer { OpenGL::VertexArray vao; OpenGL::VertexBuffer vbo; + bool usingUbershader = true; // Data struct { @@ -56,7 +57,6 @@ class RendererGL final : public Renderer { SurfaceCache depthBufferCache; SurfaceCache colourBufferCache; SurfaceCache textureCache; - bool usingUbershader = false; // Dummy VAO/VBO for blitting the final output OpenGL::VertexArray dummyVAO; @@ -107,6 +107,8 @@ class RendererGL final : public Renderer { virtual std::string getUbershader() override; virtual void setUbershader(const std::string& shader) override; + virtual void setUbershaderSetting(bool value) override { usingUbershader = value; } + std::optional getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true); // Note: The caller is responsible for deleting the currently bound FBO before calling this diff --git a/src/config.cpp b/src/config.cpp index 5af4d654..cc34d148 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -62,6 +62,7 @@ void EmulatorConfig::load() { shaderJitEnabled = toml::find_or(gpu, "EnableShaderJIT", shaderJitDefault); vsyncEnabled = toml::find_or(gpu, "EnableVSync", true); + useUbershaders = toml::find_or(gpu, "UseUbershaders", ubershaderDefault); accurateShaderMul = toml::find_or(gpu, "AccurateShaderMultiplication", false); } } @@ -123,10 +124,13 @@ void EmulatorConfig::save() { data["General"]["EnableDiscordRPC"] = discordRpcEnabled; data["General"]["UsePortableBuild"] = usePortableBuild; data["General"]["DefaultRomPath"] = defaultRomPath.string(); + data["GPU"]["EnableShaderJIT"] = shaderJitEnabled; data["GPU"]["Renderer"] = std::string(Renderer::typeToString(rendererType)); data["GPU"]["EnableVSync"] = vsyncEnabled; data["GPU"]["AccurateShaderMultiplication"] = accurateShaderMul; + data["GPU"]["UseUbershaders"] = useUbershaders; + data["Audio"]["DSPEmulation"] = std::string(Audio::DSPCore::typeToString(dspType)); data["Audio"]["EnableAudio"] = audioEnabled; diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index ed0e5420..a54fe6eb 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -110,6 +110,7 @@ void GPU::reset() { externalRegs[Framebuffer1Config] = static_cast(PICA::ColorFmt::RGB8); externalRegs[Framebuffer1Select] = 0; + renderer->setUbershaderSetting(config.useUbershaders); renderer->reset(); } diff --git a/src/libretro_core.cpp b/src/libretro_core.cpp index 3825d3ed..a6a1ff00 100644 --- a/src/libretro_core.cpp +++ b/src/libretro_core.cpp @@ -147,6 +147,7 @@ static void configInit() { static const retro_variable values[] = { {"panda3ds_use_shader_jit", "Enable shader JIT; enabled|disabled"}, {"panda3ds_accurate_shader_mul", "Enable accurate shader multiplication; disabled|enabled"}, + {"panda3ds_use_ubershader", "Use ubershaders (No stutter, maybe slower); enabled|disabled"}, {"panda3ds_use_vsync", "Enable VSync; enabled|disabled"}, {"panda3ds_dsp_emulation", "DSP emulation; Null|HLE|LLE"}, {"panda3ds_use_audio", "Enable audio; disabled|enabled"}, @@ -173,6 +174,7 @@ static void configUpdate() { config.sdCardInserted = FetchVariableBool("panda3ds_use_virtual_sd", true); config.sdWriteProtected = FetchVariableBool("panda3ds_write_protect_virtual_sd", false); config.accurateShaderMul = FetchVariableBool("panda3ds_accurate_shader_mul", false); + config.useUbershaders = FetchVariableBool("panda3ds_use_ubershader", true); config.discordRpcEnabled = false; config.save();