diff --git a/CMakeLists.txt b/CMakeLists.txt index 748c298b..0bdb8abb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -192,7 +192,7 @@ set(SERVICE_SOURCE_FILES src/core/services/service_manager.cpp src/core/services set(PICA_SOURCE_FILES src/core/PICA/gpu.cpp src/core/PICA/regs.cpp src/core/PICA/shader_unit.cpp src/core/PICA/shader_interpreter.cpp src/core/PICA/dynapica/shader_rec.cpp src/core/PICA/dynapica/shader_rec_emitter_x64.cpp src/core/PICA/pica_hash.cpp - src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp + src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp src/core/PICA/shader_gen_glsl.cpp ) set(LOADER_SOURCE_FILES src/core/loader/elf.cpp src/core/loader/ncsd.cpp src/core/loader/ncch.cpp src/core/loader/3dsx.cpp src/core/loader/lz77.cpp) @@ -239,7 +239,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/services/news_u.hpp include/applets/software_keyboard.hpp include/applets/applet_manager.hpp include/fs/archive_user_save_data.hpp include/services/amiibo_device.hpp include/services/nfc_types.hpp include/swap.hpp include/services/csnd.hpp include/services/nwm_uds.hpp include/fs/archive_system_save_data.hpp include/lua_manager.hpp include/memory_mapped_file.hpp include/hydra_icon.hpp - include/PICA/dynapica/shader_rec_emitter_arm64.hpp include/scheduler.hpp include/applets/error_applet.hpp + include/PICA/dynapica/shader_rec_emitter_arm64.hpp include/scheduler.hpp include/applets/error_applet.hpp include/PICA/shader_gen.hpp include/audio/dsp_core.hpp include/audio/null_core.hpp include/audio/teakra_core.hpp include/audio/miniaudio_device.hpp include/ring_buffer.hpp include/bitfield.hpp include/audio/dsp_shared_mem.hpp include/audio/hle_core.hpp include/capstone.hpp include/audio/aac.hpp diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index 4342ebe5..5b9e1830 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -345,4 +345,120 @@ namespace PICA { GeometryPrimitive = 3, }; + struct TexEnvConfig { + enum class Source : u8 { + PrimaryColor = 0x0, + PrimaryFragmentColor = 0x1, + SecondaryFragmentColor = 0x2, + Texture0 = 0x3, + Texture1 = 0x4, + Texture2 = 0x5, + Texture3 = 0x6, + // TODO: Inbetween values are unknown + PreviousBuffer = 0xD, + Constant = 0xE, + Previous = 0xF, + }; + + enum class ColorOperand : u8 { + SourceColor = 0x0, + OneMinusSourceColor = 0x1, + SourceAlpha = 0x2, + OneMinusSourceAlpha = 0x3, + SourceRed = 0x4, + OneMinusSourceRed = 0x5, + // TODO: Inbetween values are unknown + SourceGreen = 0x8, + OneMinusSourceGreen = 0x9, + // Inbetween values are unknown + SourceBlue = 0xC, + OneMinusSourceBlue = 0xD, + }; + + enum class AlphaOperand : u8 { + SourceAlpha = 0x0, + OneMinusSourceAlpha = 0x1, + SourceRed = 0x2, + OneMinusSourceRed = 0x3, + SourceGreen = 0x4, + OneMinusSourceGreen = 0x5, + SourceBlue = 0x6, + OneMinusSourceBlue = 0x7, + }; + + enum class Operation : u8 { + Replace = 0, + Modulate = 1, + Add = 2, + AddSigned = 3, + Lerp = 4, + Subtract = 5, + Dot3RGB = 6, + Dot3RGBA = 7, + MultiplyAdd = 8, + AddMultiply = 9, + }; + + // RGB sources + Source colorSource1, colorSource2, colorSource3; + // Alpha sources + Source alphaSource1, alphaSource2, alphaSource3; + + // RGB operands + ColorOperand colorOperand1, colorOperand2, colorOperand3; + // Alpha operands + AlphaOperand alphaOperand1, alphaOperand2, alphaOperand3; + + // Texture environment operations for this stage + Operation colorOp, alphaOp; + + u32 constColor; + + private: + // These are the only private members since their value doesn't actually reflect the scale + // So we make them public so we'll always use the appropriate member functions instead + u8 colorScale; + u8 alphaScale; + + public: + // Create texture environment object from TEV registers + TexEnvConfig(u32 source, u32 operand, u32 combiner, u32 color, u32 scale) : constColor(color) { + colorSource1 = Helpers::getBits<0, 4, Source>(source); + colorSource2 = Helpers::getBits<4, 4, Source>(source); + colorSource3 = Helpers::getBits<8, 4, Source>(source); + + alphaSource1 = Helpers::getBits<16, 4, Source>(source); + alphaSource2 = Helpers::getBits<20, 4, Source>(source); + alphaSource3 = Helpers::getBits<24, 4, Source>(source); + + colorOperand1 = Helpers::getBits<0, 4, ColorOperand>(operand); + colorOperand2 = Helpers::getBits<4, 4, ColorOperand>(operand); + colorOperand3 = Helpers::getBits<8, 4, ColorOperand>(operand); + + alphaOperand1 = Helpers::getBits<12, 3, AlphaOperand>(operand); + alphaOperand2 = Helpers::getBits<16, 3, AlphaOperand>(operand); + alphaOperand3 = Helpers::getBits<20, 3, AlphaOperand>(operand); + + colorOp = Helpers::getBits<0, 4, Operation>(combiner); + alphaOp = Helpers::getBits<16, 4, Operation>(combiner); + + colorScale = Helpers::getBits<0, 2>(scale); + alphaScale = Helpers::getBits<16, 2>(scale); + } + + u32 getColorScale() { return (colorScale <= 2) ? (1 << colorScale) : 1; } + u32 getAlphaScale() { return (alphaScale <= 2) ? (1 << alphaScale) : 1; } + + bool isPassthroughStage() { + // clang-format off + // Thank you to the Citra dev that wrote this out + return ( + colorOp == Operation::Replace && alphaOp == Operation::Replace && + colorSource1 == Source::Previous && alphaSource1 == Source::Previous && + colorOperand1 == ColorOperand::SourceColor && alphaOperand1 == AlphaOperand::SourceAlpha && + getColorScale() == 1 && getAlphaScale() == 1 + ); + // clang-format on + } + }; } // namespace PICA diff --git a/include/PICA/shader_gen.hpp b/include/PICA/shader_gen.hpp new file mode 100644 index 00000000..23a87120 --- /dev/null +++ b/include/PICA/shader_gen.hpp @@ -0,0 +1,39 @@ +#pragma once +#include + +#include "PICA/gpu.hpp" +#include "PICA/regs.hpp" +#include "helpers.hpp" + +namespace PICA::ShaderGen { + // Graphics API this shader is targetting + enum class API { GL, GLES, Vulkan }; + + // Shading language to use (Only GLSL for the time being) + enum class Language { GLSL }; + + class FragmentGenerator { + using PICARegs = std::array; + API api; + Language language; + + void compileTEV(std::string& shader, int stage, const PICARegs& regs); + void getSource(std::string& shader, PICA::TexEnvConfig::Source source, int index); + void getColorOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::ColorOperand color, int index); + void getAlphaOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::AlphaOperand alpha, int index); + void getColorOperation(std::string& shader, PICA::TexEnvConfig::Operation op); + void getAlphaOperation(std::string& shader, PICA::TexEnvConfig::Operation op); + + u32 textureConfig = 0; + + public: + FragmentGenerator(API api, Language language) : api(api), language(language) {} + std::string generate(const PICARegs& regs); + std::string getVertexShader(const PICARegs& regs); + + void setTarget(API api, Language language) { + this->api = api; + this->language = language; + } + }; +}; // namespace PICA::ShaderGen \ No newline at end of file diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 92f02662..e8eaeacb 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -1,11 +1,16 @@ #pragma once #include +#include +#include #include +#include #include "PICA/float_types.hpp" +#include "PICA/pica_hash.hpp" #include "PICA/pica_vertex.hpp" #include "PICA/regs.hpp" +#include "PICA/shader_gen.hpp" #include "gl_state.hpp" #include "helpers.hpp" #include "logger.hpp" @@ -16,6 +21,32 @@ // More circular dependencies! class GPU; +namespace PICA { + struct FragmentConfig { + u32 texUnitConfig; + u32 texEnvUpdateBuffer; + + // TODO: This should probably be a uniform + u32 texEnvBufferColor; + + // There's 6 TEV stages, and each one is configured via 5 word-sized registers + std::array tevConfigs; + + // Hash function and equality operator required by std::unordered_map + bool operator==(const FragmentConfig& config) const { + return std::memcmp(this, &config, sizeof(FragmentConfig)) == 0; + } + }; +} // namespace PICA + +// Override std::hash for our fragment config class +template <> +struct std::hash { + std::size_t operator()(const PICA::FragmentConfig& config) const noexcept { + return PICAHash::computeHash((const char*)&config, sizeof(config)); + } +}; + class RendererGL final : public Renderer { GLStateManager gl = {}; @@ -25,20 +56,23 @@ class RendererGL final : public Renderer { OpenGL::VertexArray vao; OpenGL::VertexBuffer vbo; - // TEV configuration uniform locations - GLint textureEnvSourceLoc = -1; - GLint textureEnvOperandLoc = -1; - GLint textureEnvCombinerLoc = -1; - GLint textureEnvColorLoc = -1; - GLint textureEnvScaleLoc = -1; + // Data + struct { + // TEV configuration uniform locations + GLint textureEnvSourceLoc = -1; + GLint textureEnvOperandLoc = -1; + GLint textureEnvCombinerLoc = -1; + GLint textureEnvColorLoc = -1; + GLint textureEnvScaleLoc = -1; - // Uniform of PICA registers - GLint picaRegLoc = -1; + // Uniform of PICA registers + GLint picaRegLoc = -1; - // Depth configuration uniform locations - GLint depthOffsetLoc = -1; - GLint depthScaleLoc = -1; - GLint depthmapEnableLoc = -1; + // Depth configuration uniform locations + GLint depthOffsetLoc = -1; + GLint depthScaleLoc = -1; + GLint depthmapEnableLoc = -1; + } ubershaderData; float oldDepthScale = -1.0; float oldDepthOffset = 0.0; @@ -47,6 +81,7 @@ class RendererGL final : public Renderer { SurfaceCache depthBufferCache; SurfaceCache colourBufferCache; SurfaceCache textureCache; + bool usingUbershader = false; // Dummy VAO/VBO for blitting the final output OpenGL::VertexArray dummyVAO; @@ -57,8 +92,13 @@ class RendererGL final : public Renderer { OpenGL::Framebuffer screenFramebuffer; OpenGL::Texture blankTexture; + std::unordered_map shaderCache; + OpenGL::Framebuffer getColourFBO(); OpenGL::Texture getTexture(Texture& tex); + OpenGL::Program& getSpecializedShader(); + + PICA::ShaderGen::FragmentGenerator fragShaderGen; MAKE_LOG_FUNCTION(log, rendererLogger) void setupBlending(); @@ -71,7 +111,7 @@ class RendererGL final : public Renderer { public: RendererGL(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs) - : Renderer(gpu, internalRegs, externalRegs) {} + : Renderer(gpu, internalRegs, externalRegs), fragShaderGen(PICA::ShaderGen::API::GL, PICA::ShaderGen::Language::GLSL) {} ~RendererGL() override; void reset() override; @@ -95,4 +135,4 @@ class RendererGL final : public Renderer { // Take a screenshot of the screen and store it in a file void screenshot(const std::string& name) override; -}; +}; \ No newline at end of file diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp new file mode 100644 index 00000000..c3056815 --- /dev/null +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -0,0 +1,355 @@ +#include "PICA/shader_gen.hpp" +using namespace PICA; +using namespace PICA::ShaderGen; + +std::string FragmentGenerator::getVertexShader(const PICARegs& regs) { + std::string ret = ""; + + switch (api) { + case API::GL: ret += "#version 410 core"; break; + case API::GLES: ret += "#version 300 es"; break; + default: break; + } + + if (api == API::GLES) { + ret += R"( + #define USING_GLES 1 + + precision mediump int; + precision mediump float; + )"; + } + + ret += R"( + layout(location = 0) in vec4 a_coords; + layout(location = 1) in vec4 a_quaternion; + layout(location = 2) in vec4 a_vertexColour; + layout(location = 3) in vec2 a_texcoord0; + layout(location = 4) in vec2 a_texcoord1; + layout(location = 5) in float a_texcoord0_w; + layout(location = 6) in vec3 a_view; + layout(location = 7) in vec2 a_texcoord2; + + out vec3 v_normal; + out vec3 v_tangent; + out vec3 v_bitangent; + out vec4 v_colour; + out vec3 v_texcoord0; + out vec2 v_texcoord1; + out vec3 v_view; + out vec2 v_texcoord2; + flat out vec4 v_textureEnvColor[6]; + flat out vec4 v_textureEnvBufferColor; + + //out float gl_ClipDistance[2]; + + vec4 abgr8888ToVec4(uint abgr) { + const float scale = 1.0 / 255.0; + return scale * vec4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24)); + } + + vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) { + vec3 u = q.xyz; + float s = q.w; + return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v); + } + + void main() { + gl_Position = a_coords; + vec4 colourAbs = abs(a_vertexColour); + v_colour = min(colourAbs, vec4(1.f)); + + // Flip y axis of UVs because OpenGL uses an inverted y for texture sampling compared to the PICA + v_texcoord0 = vec3(a_texcoord0.x, 1.0 - a_texcoord0.y, a_texcoord0_w); + v_texcoord1 = vec2(a_texcoord1.x, 1.0 - a_texcoord1.y); + v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y); + v_view = a_view; + + v_normal = normalize(rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), a_quaternion)); + v_tangent = normalize(rotateVec3ByQuaternion(vec3(1.0, 0.0, 0.0), a_quaternion)); + v_bitangent = normalize(rotateVec3ByQuaternion(vec3(0.0, 1.0, 0.0), a_quaternion)); + } +)"; + + return ret; +} + +std::string FragmentGenerator::generate(const PICARegs& regs) { + std::string ret = ""; + + switch (api) { + case API::GL: ret += "#version 410 core"; break; + case API::GLES: ret += "#version 300 es"; break; + default: break; + } + + bool unimplementedFlag = false; + if (api == API::GLES) { + ret += R"( + #define USING_GLES 1 + + precision mediump int; + precision mediump float; + )"; + } + + // Input and output attributes + ret += R"( + in vec3 v_tangent; + in vec3 v_normal; + in vec3 v_bitangent; + in vec4 v_colour; + in vec3 v_texcoord0; + in vec2 v_texcoord1; + in vec3 v_view; + in vec2 v_texcoord2; + flat in vec4 v_textureEnvColor[6]; + flat in vec4 v_textureEnvBufferColor; + + out vec4 fragColor; + uniform sampler2D u_tex0; + uniform sampler2D u_tex1; + uniform sampler2D u_tex2; + // GLES doesn't support sampler1DArray, as such we'll have to change how we handle lighting later +#ifndef USING_GLES + uniform sampler1DArray u_tex_lighting_lut; +#endif + + vec4 tevSources[16]; + vec4 tevNextPreviousBuffer; + )"; + + // Emit main function for fragment shader + // When not initialized, source 13 is set to vec4(0.0) and 15 is set to the vertex colour + ret += R"( + void main() { + tevSources[0] = v_colour; + tevSources[13] = vec4(0.0); // Previous buffer colour + tevSources[15] = v_colour; // Previous combiner + vec4 combinerOutput = v_colour; // Last TEV output + )"; + + ret += R"( + vec3 colorOp1 = vec3(0.0); + vec3 colorOp2 = vec3(0.0); + vec3 colorOp3 = vec3(0.0); + + float alphaOp1 = 0.0; + float alphaOp2 = 0.0; + float alphaOp3 = 0.0; + )"; + + textureConfig = regs[InternalRegs::TexUnitCfg]; + for (int i = 0; i < 6; i++) { + compileTEV(ret, i, regs); + } + + ret += "fragColor = combinerOutput;\n"; + ret += "}"; // End of main function + ret += "\n\n\n\n\n\n\n\n\n\n\n\n\n"; + + return ret; +} + +void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICARegs& regs) { + // Base address for each TEV stage's configuration + static constexpr std::array ioBases = { + InternalRegs::TexEnv0Source, InternalRegs::TexEnv1Source, InternalRegs::TexEnv2Source, + InternalRegs::TexEnv3Source, InternalRegs::TexEnv4Source, InternalRegs::TexEnv5Source, + }; + + const u32 ioBase = ioBases[stage]; + TexEnvConfig tev(regs[ioBase], regs[ioBase + 1], regs[ioBase + 2], regs[ioBase + 3], regs[ioBase + 4]); + + if (!tev.isPassthroughStage()) { + // Get color operands + shader += "colorOp1 = "; + getColorOperand(shader, tev.colorSource1, tev.colorOperand1, stage); + + shader += ";\ncolorOp2 = "; + getColorOperand(shader, tev.colorSource2, tev.colorOperand2, stage); + + shader += ";\ncolorOp3 = "; + getColorOperand(shader, tev.colorSource3, tev.colorOperand3, stage); + + shader += ";\nvec3 outputColor" + std::to_string(stage) + " = "; + getColorOperation(shader, tev.colorOp); + shader += ";\n"; + + if (tev.colorOp == TexEnvConfig::Operation::Dot3RGBA) { + // Dot3 RGBA also writes to the alpha component so we don't need to do anything more + shader += "float outputAlpha" + std::to_string(stage) + " = colorOutput" + std::to_string(stage) + ".x;\n"; + } else { + // Get alpha operands + shader += "alphaOp1 = "; + getAlphaOperand(shader, tev.alphaSource1, tev.alphaOperand1, stage); + + shader += ";\nalphaOp2 = "; + getAlphaOperand(shader, tev.alphaSource2, tev.alphaOperand2, stage); + + shader += ";\nalphaOp3 = "; + getAlphaOperand(shader, tev.alphaSource3, tev.alphaOperand3, stage); + + shader += ";\nfloat outputAlpha" + std::to_string(stage) + " = "; + getAlphaOperation(shader, tev.alphaOp); + // Clamp the alpha value to [0.0, 1.0] + shader += ";\nclamp(outputAlpha" + std::to_string(stage) + ", 0.0, 1.0);\n"; + } + + shader += "combinerOutput = vec4(clamp(outputColor" + std::to_string(stage) + " * " + std::to_string(tev.getColorScale()) + + ".0, vec3(0.0), vec3(1.0)), clamp(outputAlpha" + std::to_string(stage) + " * " + std::to_string(tev.getAlphaScale()) + + ".0, 0.0, 1.0));\n"; + } +} + +void FragmentGenerator::getColorOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::ColorOperand color, int index) { + using OperandType = TexEnvConfig::ColorOperand; + + // For inverting operands, add the 1.0 - x subtraction + if (color == OperandType::OneMinusSourceColor || color == OperandType::OneMinusSourceRed || color == OperandType::OneMinusSourceGreen || + color == OperandType::OneMinusSourceBlue || color == OperandType::OneMinusSourceAlpha) { + shader += "vec3(1.0, 1.0, 1.0) - "; + } + + switch (color) { + case OperandType::SourceColor: + case OperandType::OneMinusSourceColor: + getSource(shader, source, index); + shader += ".rgb"; + break; + + case OperandType::SourceRed: + case OperandType::OneMinusSourceRed: + getSource(shader, source, index); + shader += ".rrr"; + break; + + case OperandType::SourceGreen: + case OperandType::OneMinusSourceGreen: + getSource(shader, source, index); + shader += ".ggg"; + break; + + case OperandType::SourceBlue: + case OperandType::OneMinusSourceBlue: + getSource(shader, source, index); + shader += ".bbb"; + break; + + case OperandType::SourceAlpha: + case OperandType::OneMinusSourceAlpha: + getSource(shader, source, index); + shader += ".aaa"; + break; + + default: + shader += "vec3(1.0, 1.0, 1.0)"; + Helpers::warn("FragmentGenerator: Invalid TEV color operand"); + break; + } +} + +void FragmentGenerator::getAlphaOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::AlphaOperand color, int index) { + using OperandType = TexEnvConfig::AlphaOperand; + + // For inverting operands, add the 1.0 - x subtraction + if (color == OperandType::OneMinusSourceRed || color == OperandType::OneMinusSourceGreen || color == OperandType::OneMinusSourceBlue || + color == OperandType::OneMinusSourceAlpha) { + shader += "1.0 - "; + } + + switch (color) { + case OperandType::SourceRed: + case OperandType::OneMinusSourceRed: + getSource(shader, source, index); + shader += ".r"; + break; + + case OperandType::SourceGreen: + case OperandType::OneMinusSourceGreen: + getSource(shader, source, index); + shader += ".g"; + break; + + case OperandType::SourceBlue: + case OperandType::OneMinusSourceBlue: + getSource(shader, source, index); + shader += ".b"; + break; + + case OperandType::SourceAlpha: + case OperandType::OneMinusSourceAlpha: + getSource(shader, source, index); + shader += ".a"; + break; + + default: + shader += "1.0"; + Helpers::warn("FragmentGenerator: Invalid TEV color operand"); + break; + } +} + +void FragmentGenerator::getSource(std::string& shader, TexEnvConfig::Source source, int index) { + switch (source) { + case TexEnvConfig::Source::PrimaryColor: shader += "v_colour"; break; + case TexEnvConfig::Source::Texture0: shader += "texture(u_tex0, v_texcoord0.xy)"; break; + case TexEnvConfig::Source::Texture1: shader += "texture(u_tex1, v_texcoord1)"; break; + case TexEnvConfig::Source::Texture2: { + // If bit 13 in texture config is set then we use the texcoords for texture 1, otherwise for texture 2 + if (Helpers::getBit<13>(textureConfig)) { + shader += "texture(u_tex2, v_texcoord1)"; + } else { + shader += "texture(u_tex2, v_texcoord2)"; + } + break; + } + + case TexEnvConfig::Source::Previous: shader += "combinerOutput"; break; + + default: + Helpers::warn("Unimplemented TEV source: %d", static_cast(source)); + shader += "vec4(1.0, 1.0, 1.0, 1.0)"; + break; + } +} + +void FragmentGenerator::getColorOperation(std::string& shader, TexEnvConfig::Operation op) { + switch (op) { + case TexEnvConfig::Operation::Replace: shader += "colorOp1"; break; + case TexEnvConfig::Operation::Add: shader += "colorOp1 + colorOp2"; break; + case TexEnvConfig::Operation::AddSigned: shader += "clamp(colorOp1 + colorOp2 - 0.5, 0.0, 1.0);"; break; + case TexEnvConfig::Operation::Subtract: shader += "colorOp1 - colorOp2"; break; + case TexEnvConfig::Operation::Modulate: shader += "colorOp1 * colorOp2"; break; + case TexEnvConfig::Operation::Lerp: shader += "colorOp1 * colorOp3 + colorOp2 * (vec3(1.0) - colorOp3)"; break; + + case TexEnvConfig::Operation::AddMultiply: shader += "min(colorOp1 + colorOp2, vec3(1.0)) * colorOp3"; break; + case TexEnvConfig::Operation::MultiplyAdd: shader += "colorOp1 * colorOp2 + colorOp3"; break; + case TexEnvConfig::Operation::Dot3RGB: + case TexEnvConfig::Operation::Dot3RGBA: shader += "vec3(4.0 * dot(colorOp1 - 0.5, colorOp2 - 0.5))"; break; + default: + Helpers::warn("FragmentGenerator: Unimplemented color op"); + shader += "vec3(1.0)"; + break; + } +} + +void FragmentGenerator::getAlphaOperation(std::string& shader, TexEnvConfig::Operation op) { + switch (op) { + case TexEnvConfig::Operation::Replace: shader += "alphaOp1"; break; + case TexEnvConfig::Operation::Add: shader += "alphaOp1 + alphaOp2"; break; + case TexEnvConfig::Operation::AddSigned: shader += "clamp(alphaOp1 + alphaOp2 - 0.5, 0.0, 1.0);"; break; + case TexEnvConfig::Operation::Subtract: shader += "alphaOp1 - alphaOp2"; break; + case TexEnvConfig::Operation::Modulate: shader += "alphaOp1 * alphaOp2"; break; + case TexEnvConfig::Operation::Lerp: shader += "alphaOp1 * alphaOp3 + alphaOp2 * (1.0 - alphaOp3)"; break; + + case TexEnvConfig::Operation::AddMultiply: shader += "min(alphaOp1 + alphaOp2, 1.0) * alphaOp3"; break; + case TexEnvConfig::Operation::MultiplyAdd: shader += "alphaOp1 * alphaOp2 + alphaOp3"; break; + case TexEnvConfig::Operation::Dot3RGB: + case TexEnvConfig::Operation::Dot3RGBA: shader += "vec3(4.0 * dot(alphaOp1 - 0.5, alphaOp2 - 0.5))"; break; + default: + Helpers::warn("FragmentGenerator: Unimplemented alpha op"); + shader += "1.0"; + break; + } +} diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index a11a6ffa..5d3ed1b1 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -38,12 +38,16 @@ void RendererGL::reset() { oldDepthOffset = 0.0; // Default depth offset to 0 oldDepthmapEnable = false; // Enable w buffering - glUniform1f(depthScaleLoc, oldDepthScale); - glUniform1f(depthOffsetLoc, oldDepthOffset); - glUniform1i(depthmapEnableLoc, oldDepthmapEnable); + glUniform1f(ubershaderData.depthScaleLoc, oldDepthScale); + glUniform1f(ubershaderData.depthOffsetLoc, oldDepthOffset); + glUniform1i(ubershaderData.depthmapEnableLoc, oldDepthmapEnable); gl.useProgram(oldProgram); // Switch to old GL program } + +#ifdef __ANDROID__ + fragShaderGen.setTarget(PICA::ShaderGen::API::GLES, PICA::ShaderGen::Language::GLSL); +#endif } void RendererGL::initGraphicsContextInternal() { @@ -59,16 +63,16 @@ void RendererGL::initGraphicsContextInternal() { triangleProgram.create({vert, frag}); gl.useProgram(triangleProgram); - textureEnvSourceLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvSource"); - textureEnvOperandLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvOperand"); - textureEnvCombinerLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvCombiner"); - textureEnvColorLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvColor"); - textureEnvScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvScale"); + ubershaderData.textureEnvSourceLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvSource"); + ubershaderData.textureEnvOperandLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvOperand"); + ubershaderData.textureEnvCombinerLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvCombiner"); + ubershaderData.textureEnvColorLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvColor"); + ubershaderData.textureEnvScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvScale"); - depthScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_depthScale"); - depthOffsetLoc = OpenGL::uniformLocation(triangleProgram, "u_depthOffset"); - depthmapEnableLoc = OpenGL::uniformLocation(triangleProgram, "u_depthmapEnable"); - picaRegLoc = OpenGL::uniformLocation(triangleProgram, "u_picaRegs"); + ubershaderData.depthScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_depthScale"); + ubershaderData.depthOffsetLoc = OpenGL::uniformLocation(triangleProgram, "u_depthOffset"); + ubershaderData.depthmapEnableLoc = OpenGL::uniformLocation(triangleProgram, "u_depthmapEnable"); + ubershaderData.picaRegLoc = OpenGL::uniformLocation(triangleProgram, "u_picaRegs"); // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex0"), 0); @@ -289,9 +293,11 @@ void RendererGL::setupStencilTest(bool stencilEnable) { glStencilOp(stencilOps[stencilFailOp], stencilOps[depthFailOp], stencilOps[passOp]); } - void RendererGL::setupTextureEnvState() { // TODO: Only update uniforms when the TEV config changed. Use an UBO potentially. + if (!usingUbershader) { + return; + } static constexpr std::array ioBases = { PICA::InternalRegs::TexEnv0Source, PICA::InternalRegs::TexEnv1Source, PICA::InternalRegs::TexEnv2Source, @@ -314,11 +320,11 @@ void RendererGL::setupTextureEnvState() { textureEnvScaleRegs[i] = regs[ioBase + 4]; } - glUniform1uiv(textureEnvSourceLoc, 6, textureEnvSourceRegs); - glUniform1uiv(textureEnvOperandLoc, 6, textureEnvOperandRegs); - glUniform1uiv(textureEnvCombinerLoc, 6, textureEnvCombinerRegs); - glUniform1uiv(textureEnvColorLoc, 6, textureEnvColourRegs); - glUniform1uiv(textureEnvScaleLoc, 6, textureEnvScaleRegs); + glUniform1uiv(ubershaderData.textureEnvSourceLoc, 6, textureEnvSourceRegs); + glUniform1uiv(ubershaderData.textureEnvOperandLoc, 6, textureEnvOperandRegs); + glUniform1uiv(ubershaderData.textureEnvCombinerLoc, 6, textureEnvCombinerRegs); + glUniform1uiv(ubershaderData.textureEnvColorLoc, 6, textureEnvColourRegs); + glUniform1uiv(ubershaderData.textureEnvScaleLoc, 6, textureEnvScaleRegs); } void RendererGL::bindTexturesToSlots() { @@ -389,11 +395,17 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v OpenGL::Triangle, }; + if (usingUbershader) { + gl.useProgram(triangleProgram); + } else { + OpenGL::Program& program = getSpecializedShader(); + gl.useProgram(program); + } + const auto primitiveTopology = primTypes[static_cast(primType)]; gl.disableScissor(); gl.bindVBO(vbo); gl.bindVAO(vao); - gl.useProgram(triangleProgram); gl.enableClipPlane(0); // Clipping plane 0 is always enabled if (regs[PICA::InternalRegs::ClipEnable] & 1) { @@ -419,27 +431,31 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v const bool depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1; // Update depth uniforms - if (oldDepthScale != depthScale) { - oldDepthScale = depthScale; - glUniform1f(depthScaleLoc, depthScale); - } + if (usingUbershader) { + if (oldDepthScale != depthScale) { + oldDepthScale = depthScale; + glUniform1f(ubershaderData.depthScaleLoc, depthScale); + } - if (oldDepthOffset != depthOffset) { - oldDepthOffset = depthOffset; - glUniform1f(depthOffsetLoc, depthOffset); - } + if (oldDepthOffset != depthOffset) { + oldDepthOffset = depthOffset; + glUniform1f(ubershaderData.depthOffsetLoc, depthOffset); + } - if (oldDepthmapEnable != depthMapEnable) { - oldDepthmapEnable = depthMapEnable; - glUniform1i(depthmapEnableLoc, depthMapEnable); + if (oldDepthmapEnable != depthMapEnable) { + oldDepthmapEnable = depthMapEnable; + glUniform1i(ubershaderData.depthmapEnableLoc, depthMapEnable); + } } setupTextureEnvState(); bindTexturesToSlots(); - // Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48) - // The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates - glUniform1uiv(picaRegLoc, 0x200 - 0x48, ®s[0x48]); + if (usingUbershader) { + // Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48) + // The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates + glUniform1uiv(ubershaderData.picaRegLoc, 0x200 - 0x48, ®s[0x48]); + } if (gpu.lightingLUTDirty) { updateLightingLUT(); @@ -778,6 +794,43 @@ std::optional RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt return colourBufferCache.add(sampleBuffer); } +OpenGL::Program& RendererGL::getSpecializedShader() { + PICA::FragmentConfig fsConfig; + fsConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg]; + fsConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer]; + fsConfig.texEnvBufferColor = regs[InternalRegs::TexEnvBufferColor]; + + // Set up TEV stages + std::memcpy(&fsConfig.tevConfigs[0 * 5], ®s[InternalRegs::TexEnv0Source], 5 * sizeof(u32)); + std::memcpy(&fsConfig.tevConfigs[1 * 5], ®s[InternalRegs::TexEnv1Source], 5 * sizeof(u32)); + std::memcpy(&fsConfig.tevConfigs[2 * 5], ®s[InternalRegs::TexEnv2Source], 5 * sizeof(u32)); + std::memcpy(&fsConfig.tevConfigs[3 * 5], ®s[InternalRegs::TexEnv3Source], 5 * sizeof(u32)); + std::memcpy(&fsConfig.tevConfigs[4 * 5], ®s[InternalRegs::TexEnv4Source], 5 * sizeof(u32)); + std::memcpy(&fsConfig.tevConfigs[5 * 5], ®s[InternalRegs::TexEnv5Source], 5 * sizeof(u32)); + + OpenGL::Program& program = shaderCache[fsConfig]; + if (!program.exists()) { + printf("Creating specialized shader\n"); + + std::string vs = fragShaderGen.getVertexShader(regs); + std::string fs = fragShaderGen.generate(regs); + std::cout << vs << "\n\n" << fs << "\n"; + + OpenGL::Shader vertShader({vs.c_str(), vs.size()}, OpenGL::Vertex); + OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment); + program.create({vertShader, fragShader}); + gl.useProgram(program); + + // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 + glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0); + glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1); + glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2); + glUniform1i(OpenGL::uniformLocation(program, "u_tex_lighting_lut"), 3); + } + + return program; +} + void RendererGL::screenshot(const std::string& name) { constexpr uint width = 400; constexpr uint height = 2 * 240;