diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 7bc1087a..e8eaeacb 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -1,9 +1,13 @@ #pragma once #include +#include +#include #include +#include #include "PICA/float_types.hpp" +#include "PICA/pica_hash.hpp" #include "PICA/pica_vertex.hpp" #include "PICA/regs.hpp" #include "PICA/shader_gen.hpp" @@ -17,6 +21,32 @@ // More circular dependencies! class GPU; +namespace PICA { + struct FragmentConfig { + u32 texUnitConfig; + u32 texEnvUpdateBuffer; + + // TODO: This should probably be a uniform + u32 texEnvBufferColor; + + // There's 6 TEV stages, and each one is configured via 5 word-sized registers + std::array tevConfigs; + + // Hash function and equality operator required by std::unordered_map + bool operator==(const FragmentConfig& config) const { + return std::memcmp(this, &config, sizeof(FragmentConfig)) == 0; + } + }; +} // namespace PICA + +// Override std::hash for our fragment config class +template <> +struct std::hash { + std::size_t operator()(const PICA::FragmentConfig& config) const noexcept { + return PICAHash::computeHash((const char*)&config, sizeof(config)); + } +}; + class RendererGL final : public Renderer { GLStateManager gl = {}; @@ -26,20 +56,23 @@ class RendererGL final : public Renderer { OpenGL::VertexArray vao; OpenGL::VertexBuffer vbo; - // TEV configuration uniform locations - GLint textureEnvSourceLoc = -1; - GLint textureEnvOperandLoc = -1; - GLint textureEnvCombinerLoc = -1; - GLint textureEnvColorLoc = -1; - GLint textureEnvScaleLoc = -1; + // Data + struct { + // TEV configuration uniform locations + GLint textureEnvSourceLoc = -1; + GLint textureEnvOperandLoc = -1; + GLint textureEnvCombinerLoc = -1; + GLint textureEnvColorLoc = -1; + GLint textureEnvScaleLoc = -1; - // Uniform of PICA registers - GLint picaRegLoc = -1; + // Uniform of PICA registers + GLint picaRegLoc = -1; - // Depth configuration uniform locations - GLint depthOffsetLoc = -1; - GLint depthScaleLoc = -1; - GLint depthmapEnableLoc = -1; + // Depth configuration uniform locations + GLint depthOffsetLoc = -1; + GLint depthScaleLoc = -1; + GLint depthmapEnableLoc = -1; + } ubershaderData; float oldDepthScale = -1.0; float oldDepthOffset = 0.0; @@ -48,6 +81,7 @@ class RendererGL final : public Renderer { SurfaceCache depthBufferCache; SurfaceCache colourBufferCache; SurfaceCache textureCache; + bool usingUbershader = false; // Dummy VAO/VBO for blitting the final output OpenGL::VertexArray dummyVAO; @@ -58,9 +92,11 @@ class RendererGL final : public Renderer { OpenGL::Framebuffer screenFramebuffer; OpenGL::Texture blankTexture; + std::unordered_map shaderCache; + OpenGL::Framebuffer getColourFBO(); OpenGL::Texture getTexture(Texture& tex); - OpenGL::Program getSpecializedShader(); + OpenGL::Program& getSpecializedShader(); PICA::ShaderGen::FragmentGenerator fragShaderGen; @@ -99,4 +135,4 @@ class RendererGL final : public Renderer { // Take a screenshot of the screen and store it in a file void screenshot(const std::string& name) override; -}; +}; \ No newline at end of file diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 3a7e9b74..1bcae30c 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -308,7 +308,7 @@ void FragmentGenerator::getColorOperation(std::string& shader, TexEnvConfig::Ope case TexEnvConfig::Operation::AddSigned: shader += "clamp(colorOp1 + colorOp2 - 0.5, 0.0, 1.0);"; break; case TexEnvConfig::Operation::Subtract: shader += "colorOp1 - colorOp2"; break; case TexEnvConfig::Operation::Modulate: shader += "colorOp1 * colorOp2"; break; - case TexEnvConfig::Operation::Lerp: shader += "colorOp1 * colorOp3 + colorOp2 * (vec(1.0) - colorOp3)"; break; + case TexEnvConfig::Operation::Lerp: shader += "colorOp1 * colorOp3 + colorOp2 * (vec3(1.0) - colorOp3)"; break; case TexEnvConfig::Operation::AddMultiply: shader += "min(colorOp1 + colorOp2, vec3(1.0)) * colorOp3"; break; case TexEnvConfig::Operation::MultiplyAdd: shader += "colorOp1 * colorOp2 + colorOp3"; break; @@ -328,15 +328,15 @@ void FragmentGenerator::getAlphaOperation(std::string& shader, TexEnvConfig::Ope case TexEnvConfig::Operation::AddSigned: shader += "clamp(alphaOp1 + alphaOp2 - 0.5, 0.0, 1.0);"; break; case TexEnvConfig::Operation::Subtract: shader += "alphaOp1 - alphaOp2"; break; case TexEnvConfig::Operation::Modulate: shader += "alphaOp1 * alphaOp2"; break; - case TexEnvConfig::Operation::Lerp: shader += "alphaOp1 * alphaOp3 + alphaOp2 * (vec(1.0) - alphaOp3)"; break; + case TexEnvConfig::Operation::Lerp: shader += "alphaOp1 * alphaOp3 + alphaOp2 * (1.0 - alphaOp3)"; break; - case TexEnvConfig::Operation::AddMultiply: shader += "min(alphaOp1 + alphaOp2, vec3(1.0)) * alphaOp3"; break; + case TexEnvConfig::Operation::AddMultiply: shader += "min(alphaOp1 + alphaOp2, 1.0) * alphaOp3"; break; case TexEnvConfig::Operation::MultiplyAdd: shader += "alphaOp1 * alphaOp2 + alphaOp3"; break; case TexEnvConfig::Operation::Dot3RGB: case TexEnvConfig::Operation::Dot3RGBA: shader += "vec3(4.0 * dot(alphaOp1 - 0.5, alphaOp2 - 0.5))"; break; default: Helpers::warn("FragmentGenerator: Unimplemented alpha op"); - shader += "vec3(1.0)"; + shader += "1.0"; break; } } \ No newline at end of file diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 0bb592cf..a0e09bba 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -38,9 +38,9 @@ void RendererGL::reset() { oldDepthOffset = 0.0; // Default depth offset to 0 oldDepthmapEnable = false; // Enable w buffering - glUniform1f(depthScaleLoc, oldDepthScale); - glUniform1f(depthOffsetLoc, oldDepthOffset); - glUniform1i(depthmapEnableLoc, oldDepthmapEnable); + glUniform1f(ubershaderData.depthScaleLoc, oldDepthScale); + glUniform1f(ubershaderData.depthOffsetLoc, oldDepthOffset); + glUniform1i(ubershaderData.depthmapEnableLoc, oldDepthmapEnable); gl.useProgram(oldProgram); // Switch to old GL program } @@ -59,16 +59,16 @@ void RendererGL::initGraphicsContextInternal() { triangleProgram.create({vert, frag}); gl.useProgram(triangleProgram); - textureEnvSourceLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvSource"); - textureEnvOperandLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvOperand"); - textureEnvCombinerLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvCombiner"); - textureEnvColorLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvColor"); - textureEnvScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvScale"); + ubershaderData.textureEnvSourceLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvSource"); + ubershaderData.textureEnvOperandLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvOperand"); + ubershaderData.textureEnvCombinerLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvCombiner"); + ubershaderData.textureEnvColorLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvColor"); + ubershaderData.textureEnvScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvScale"); - depthScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_depthScale"); - depthOffsetLoc = OpenGL::uniformLocation(triangleProgram, "u_depthOffset"); - depthmapEnableLoc = OpenGL::uniformLocation(triangleProgram, "u_depthmapEnable"); - picaRegLoc = OpenGL::uniformLocation(triangleProgram, "u_picaRegs"); + ubershaderData.depthScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_depthScale"); + ubershaderData.depthOffsetLoc = OpenGL::uniformLocation(triangleProgram, "u_depthOffset"); + ubershaderData.depthmapEnableLoc = OpenGL::uniformLocation(triangleProgram, "u_depthmapEnable"); + ubershaderData.picaRegLoc = OpenGL::uniformLocation(triangleProgram, "u_picaRegs"); // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex0"), 0); @@ -289,7 +289,6 @@ void RendererGL::setupStencilTest(bool stencilEnable) { glStencilOp(stencilOps[stencilFailOp], stencilOps[depthFailOp], stencilOps[passOp]); } - void RendererGL::setupTextureEnvState() { // TODO: Only update uniforms when the TEV config changed. Use an UBO potentially. @@ -314,11 +313,11 @@ void RendererGL::setupTextureEnvState() { textureEnvScaleRegs[i] = regs[ioBase + 4]; } - glUniform1uiv(textureEnvSourceLoc, 6, textureEnvSourceRegs); - glUniform1uiv(textureEnvOperandLoc, 6, textureEnvOperandRegs); - glUniform1uiv(textureEnvCombinerLoc, 6, textureEnvCombinerRegs); - glUniform1uiv(textureEnvColorLoc, 6, textureEnvColourRegs); - glUniform1uiv(textureEnvScaleLoc, 6, textureEnvScaleRegs); + glUniform1uiv(ubershaderData.textureEnvSourceLoc, 6, textureEnvSourceRegs); + glUniform1uiv(ubershaderData.textureEnvOperandLoc, 6, textureEnvOperandRegs); + glUniform1uiv(ubershaderData.textureEnvCombinerLoc, 6, textureEnvCombinerRegs); + glUniform1uiv(ubershaderData.textureEnvColorLoc, 6, textureEnvColourRegs); + glUniform1uiv(ubershaderData.textureEnvScaleLoc, 6, textureEnvScaleRegs); } void RendererGL::bindTexturesToSlots() { @@ -389,11 +388,7 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v OpenGL::Triangle, }; - std::string vs = fragShaderGen.getVertexShader(regs); - std::string fs = fragShaderGen.generate(regs); - std::cout << fs << "\n\n\n"; - - OpenGL::Program program = getSpecializedShader(); + OpenGL::Program& program = getSpecializedShader(); const auto primitiveTopology = primTypes[static_cast(primType)]; gl.disableScissor(); @@ -427,17 +422,17 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v // Update depth uniforms if (oldDepthScale != depthScale) { oldDepthScale = depthScale; - glUniform1f(depthScaleLoc, depthScale); + glUniform1f(ubershaderData.depthScaleLoc, depthScale); } if (oldDepthOffset != depthOffset) { oldDepthOffset = depthOffset; - glUniform1f(depthOffsetLoc, depthOffset); + glUniform1f(ubershaderData.depthOffsetLoc, depthOffset); } if (oldDepthmapEnable != depthMapEnable) { oldDepthmapEnable = depthMapEnable; - glUniform1i(depthmapEnableLoc, depthMapEnable); + glUniform1i(ubershaderData.depthmapEnableLoc, depthMapEnable); } setupTextureEnvState(); @@ -445,7 +440,7 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v // Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48) // The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates - glUniform1uiv(picaRegLoc, 0x200 - 0x48, ®s[0x48]); + glUniform1uiv(ubershaderData.picaRegLoc, 0x200 - 0x48, ®s[0x48]); if (gpu.lightingLUTDirty) { updateLightingLUT(); @@ -784,22 +779,39 @@ std::optional RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt return colourBufferCache.add(sampleBuffer); } -OpenGL::Program RendererGL::getSpecializedShader() { - OpenGL::Program program; +OpenGL::Program& RendererGL::getSpecializedShader() { + PICA::FragmentConfig fsConfig; + fsConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg]; + fsConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer]; + fsConfig.texEnvBufferColor = regs[InternalRegs::TexEnvBufferColor]; - std::string vs = fragShaderGen.getVertexShader(regs); - std::string fs = fragShaderGen.generate(regs); + // Set up TEV stages + std::memcpy(&fsConfig.tevConfigs[0 * 5], ®s[InternalRegs::TexEnv0Source], 5 * sizeof(u32)); + std::memcpy(&fsConfig.tevConfigs[1 * 5], ®s[InternalRegs::TexEnv1Source], 5 * sizeof(u32)); + std::memcpy(&fsConfig.tevConfigs[2 * 5], ®s[InternalRegs::TexEnv2Source], 5 * sizeof(u32)); + std::memcpy(&fsConfig.tevConfigs[3 * 5], ®s[InternalRegs::TexEnv3Source], 5 * sizeof(u32)); + std::memcpy(&fsConfig.tevConfigs[4 * 5], ®s[InternalRegs::TexEnv4Source], 5 * sizeof(u32)); + std::memcpy(&fsConfig.tevConfigs[5 * 5], ®s[InternalRegs::TexEnv5Source], 5 * sizeof(u32)); - OpenGL::Shader vertShader({vs.c_str(), vs.size()}, OpenGL::Vertex); - OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment); - program.create({vertShader, fragShader}); - program.use(); + OpenGL::Program& program = shaderCache[fsConfig]; + if (!program.exists()) { + printf("Creating specialized shader\n"); - // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 - glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0); - glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1); - glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2); - glUniform1i(OpenGL::uniformLocation(program, "u_tex_lighting_lut"), 3); + std::string vs = fragShaderGen.getVertexShader(regs); + std::string fs = fragShaderGen.generate(regs); + std::cout << vs << "\n\n" << fs << "\n"; + + OpenGL::Shader vertShader({vs.c_str(), vs.size()}, OpenGL::Vertex); + OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment); + program.create({vertShader, fragShader}); + program.use(); + + // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 + glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0); + glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1); + glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2); + glUniform1i(OpenGL::uniformLocation(program, "u_tex_lighting_lut"), 3); + } return program; }