Add shader cache

This commit is contained in:
wheremyfoodat 2024-03-02 20:41:23 +02:00
parent fdfb012aa1
commit 67fe3214fe
3 changed files with 106 additions and 58 deletions

View file

@ -1,9 +1,13 @@
#pragma once #pragma once
#include <array> #include <array>
#include <cstring>
#include <functional>
#include <span> #include <span>
#include <unordered_map>
#include "PICA/float_types.hpp" #include "PICA/float_types.hpp"
#include "PICA/pica_hash.hpp"
#include "PICA/pica_vertex.hpp" #include "PICA/pica_vertex.hpp"
#include "PICA/regs.hpp" #include "PICA/regs.hpp"
#include "PICA/shader_gen.hpp" #include "PICA/shader_gen.hpp"
@ -17,6 +21,32 @@
// More circular dependencies! // More circular dependencies!
class GPU; class GPU;
namespace PICA {
struct FragmentConfig {
u32 texUnitConfig;
u32 texEnvUpdateBuffer;
// TODO: This should probably be a uniform
u32 texEnvBufferColor;
// There's 6 TEV stages, and each one is configured via 5 word-sized registers
std::array<u32, 5 * 6> tevConfigs;
// Hash function and equality operator required by std::unordered_map
bool operator==(const FragmentConfig& config) const {
return std::memcmp(this, &config, sizeof(FragmentConfig)) == 0;
}
};
} // namespace PICA
// Override std::hash for our fragment config class
template <>
struct std::hash<PICA::FragmentConfig> {
std::size_t operator()(const PICA::FragmentConfig& config) const noexcept {
return PICAHash::computeHash((const char*)&config, sizeof(config));
}
};
class RendererGL final : public Renderer { class RendererGL final : public Renderer {
GLStateManager gl = {}; GLStateManager gl = {};
@ -26,20 +56,23 @@ class RendererGL final : public Renderer {
OpenGL::VertexArray vao; OpenGL::VertexArray vao;
OpenGL::VertexBuffer vbo; OpenGL::VertexBuffer vbo;
// TEV configuration uniform locations // Data
GLint textureEnvSourceLoc = -1; struct {
GLint textureEnvOperandLoc = -1; // TEV configuration uniform locations
GLint textureEnvCombinerLoc = -1; GLint textureEnvSourceLoc = -1;
GLint textureEnvColorLoc = -1; GLint textureEnvOperandLoc = -1;
GLint textureEnvScaleLoc = -1; GLint textureEnvCombinerLoc = -1;
GLint textureEnvColorLoc = -1;
GLint textureEnvScaleLoc = -1;
// Uniform of PICA registers // Uniform of PICA registers
GLint picaRegLoc = -1; GLint picaRegLoc = -1;
// Depth configuration uniform locations // Depth configuration uniform locations
GLint depthOffsetLoc = -1; GLint depthOffsetLoc = -1;
GLint depthScaleLoc = -1; GLint depthScaleLoc = -1;
GLint depthmapEnableLoc = -1; GLint depthmapEnableLoc = -1;
} ubershaderData;
float oldDepthScale = -1.0; float oldDepthScale = -1.0;
float oldDepthOffset = 0.0; float oldDepthOffset = 0.0;
@ -48,6 +81,7 @@ class RendererGL final : public Renderer {
SurfaceCache<DepthBuffer, 16, true> depthBufferCache; SurfaceCache<DepthBuffer, 16, true> depthBufferCache;
SurfaceCache<ColourBuffer, 16, true> colourBufferCache; SurfaceCache<ColourBuffer, 16, true> colourBufferCache;
SurfaceCache<Texture, 256, true> textureCache; SurfaceCache<Texture, 256, true> textureCache;
bool usingUbershader = false;
// Dummy VAO/VBO for blitting the final output // Dummy VAO/VBO for blitting the final output
OpenGL::VertexArray dummyVAO; OpenGL::VertexArray dummyVAO;
@ -58,9 +92,11 @@ class RendererGL final : public Renderer {
OpenGL::Framebuffer screenFramebuffer; OpenGL::Framebuffer screenFramebuffer;
OpenGL::Texture blankTexture; OpenGL::Texture blankTexture;
std::unordered_map<PICA::FragmentConfig, OpenGL::Program> shaderCache;
OpenGL::Framebuffer getColourFBO(); OpenGL::Framebuffer getColourFBO();
OpenGL::Texture getTexture(Texture& tex); OpenGL::Texture getTexture(Texture& tex);
OpenGL::Program getSpecializedShader(); OpenGL::Program& getSpecializedShader();
PICA::ShaderGen::FragmentGenerator fragShaderGen; PICA::ShaderGen::FragmentGenerator fragShaderGen;

View file

@ -308,7 +308,7 @@ void FragmentGenerator::getColorOperation(std::string& shader, TexEnvConfig::Ope
case TexEnvConfig::Operation::AddSigned: shader += "clamp(colorOp1 + colorOp2 - 0.5, 0.0, 1.0);"; break; case TexEnvConfig::Operation::AddSigned: shader += "clamp(colorOp1 + colorOp2 - 0.5, 0.0, 1.0);"; break;
case TexEnvConfig::Operation::Subtract: shader += "colorOp1 - colorOp2"; break; case TexEnvConfig::Operation::Subtract: shader += "colorOp1 - colorOp2"; break;
case TexEnvConfig::Operation::Modulate: shader += "colorOp1 * colorOp2"; break; case TexEnvConfig::Operation::Modulate: shader += "colorOp1 * colorOp2"; break;
case TexEnvConfig::Operation::Lerp: shader += "colorOp1 * colorOp3 + colorOp2 * (vec(1.0) - colorOp3)"; break; case TexEnvConfig::Operation::Lerp: shader += "colorOp1 * colorOp3 + colorOp2 * (vec3(1.0) - colorOp3)"; break;
case TexEnvConfig::Operation::AddMultiply: shader += "min(colorOp1 + colorOp2, vec3(1.0)) * colorOp3"; break; case TexEnvConfig::Operation::AddMultiply: shader += "min(colorOp1 + colorOp2, vec3(1.0)) * colorOp3"; break;
case TexEnvConfig::Operation::MultiplyAdd: shader += "colorOp1 * colorOp2 + colorOp3"; break; case TexEnvConfig::Operation::MultiplyAdd: shader += "colorOp1 * colorOp2 + colorOp3"; break;
@ -328,15 +328,15 @@ void FragmentGenerator::getAlphaOperation(std::string& shader, TexEnvConfig::Ope
case TexEnvConfig::Operation::AddSigned: shader += "clamp(alphaOp1 + alphaOp2 - 0.5, 0.0, 1.0);"; break; case TexEnvConfig::Operation::AddSigned: shader += "clamp(alphaOp1 + alphaOp2 - 0.5, 0.0, 1.0);"; break;
case TexEnvConfig::Operation::Subtract: shader += "alphaOp1 - alphaOp2"; break; case TexEnvConfig::Operation::Subtract: shader += "alphaOp1 - alphaOp2"; break;
case TexEnvConfig::Operation::Modulate: shader += "alphaOp1 * alphaOp2"; break; case TexEnvConfig::Operation::Modulate: shader += "alphaOp1 * alphaOp2"; break;
case TexEnvConfig::Operation::Lerp: shader += "alphaOp1 * alphaOp3 + alphaOp2 * (vec(1.0) - alphaOp3)"; break; case TexEnvConfig::Operation::Lerp: shader += "alphaOp1 * alphaOp3 + alphaOp2 * (1.0 - alphaOp3)"; break;
case TexEnvConfig::Operation::AddMultiply: shader += "min(alphaOp1 + alphaOp2, vec3(1.0)) * alphaOp3"; break; case TexEnvConfig::Operation::AddMultiply: shader += "min(alphaOp1 + alphaOp2, 1.0) * alphaOp3"; break;
case TexEnvConfig::Operation::MultiplyAdd: shader += "alphaOp1 * alphaOp2 + alphaOp3"; break; case TexEnvConfig::Operation::MultiplyAdd: shader += "alphaOp1 * alphaOp2 + alphaOp3"; break;
case TexEnvConfig::Operation::Dot3RGB: case TexEnvConfig::Operation::Dot3RGB:
case TexEnvConfig::Operation::Dot3RGBA: shader += "vec3(4.0 * dot(alphaOp1 - 0.5, alphaOp2 - 0.5))"; break; case TexEnvConfig::Operation::Dot3RGBA: shader += "vec3(4.0 * dot(alphaOp1 - 0.5, alphaOp2 - 0.5))"; break;
default: default:
Helpers::warn("FragmentGenerator: Unimplemented alpha op"); Helpers::warn("FragmentGenerator: Unimplemented alpha op");
shader += "vec3(1.0)"; shader += "1.0";
break; break;
} }
} }

View file

@ -38,9 +38,9 @@ void RendererGL::reset() {
oldDepthOffset = 0.0; // Default depth offset to 0 oldDepthOffset = 0.0; // Default depth offset to 0
oldDepthmapEnable = false; // Enable w buffering oldDepthmapEnable = false; // Enable w buffering
glUniform1f(depthScaleLoc, oldDepthScale); glUniform1f(ubershaderData.depthScaleLoc, oldDepthScale);
glUniform1f(depthOffsetLoc, oldDepthOffset); glUniform1f(ubershaderData.depthOffsetLoc, oldDepthOffset);
glUniform1i(depthmapEnableLoc, oldDepthmapEnable); glUniform1i(ubershaderData.depthmapEnableLoc, oldDepthmapEnable);
gl.useProgram(oldProgram); // Switch to old GL program gl.useProgram(oldProgram); // Switch to old GL program
} }
@ -59,16 +59,16 @@ void RendererGL::initGraphicsContextInternal() {
triangleProgram.create({vert, frag}); triangleProgram.create({vert, frag});
gl.useProgram(triangleProgram); gl.useProgram(triangleProgram);
textureEnvSourceLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvSource"); ubershaderData.textureEnvSourceLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvSource");
textureEnvOperandLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvOperand"); ubershaderData.textureEnvOperandLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvOperand");
textureEnvCombinerLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvCombiner"); ubershaderData.textureEnvCombinerLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvCombiner");
textureEnvColorLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvColor"); ubershaderData.textureEnvColorLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvColor");
textureEnvScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvScale"); ubershaderData.textureEnvScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvScale");
depthScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_depthScale"); ubershaderData.depthScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_depthScale");
depthOffsetLoc = OpenGL::uniformLocation(triangleProgram, "u_depthOffset"); ubershaderData.depthOffsetLoc = OpenGL::uniformLocation(triangleProgram, "u_depthOffset");
depthmapEnableLoc = OpenGL::uniformLocation(triangleProgram, "u_depthmapEnable"); ubershaderData.depthmapEnableLoc = OpenGL::uniformLocation(triangleProgram, "u_depthmapEnable");
picaRegLoc = OpenGL::uniformLocation(triangleProgram, "u_picaRegs"); ubershaderData.picaRegLoc = OpenGL::uniformLocation(triangleProgram, "u_picaRegs");
// Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3
glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex0"), 0); glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex0"), 0);
@ -289,7 +289,6 @@ void RendererGL::setupStencilTest(bool stencilEnable) {
glStencilOp(stencilOps[stencilFailOp], stencilOps[depthFailOp], stencilOps[passOp]); glStencilOp(stencilOps[stencilFailOp], stencilOps[depthFailOp], stencilOps[passOp]);
} }
void RendererGL::setupTextureEnvState() { void RendererGL::setupTextureEnvState() {
// TODO: Only update uniforms when the TEV config changed. Use an UBO potentially. // TODO: Only update uniforms when the TEV config changed. Use an UBO potentially.
@ -314,11 +313,11 @@ void RendererGL::setupTextureEnvState() {
textureEnvScaleRegs[i] = regs[ioBase + 4]; textureEnvScaleRegs[i] = regs[ioBase + 4];
} }
glUniform1uiv(textureEnvSourceLoc, 6, textureEnvSourceRegs); glUniform1uiv(ubershaderData.textureEnvSourceLoc, 6, textureEnvSourceRegs);
glUniform1uiv(textureEnvOperandLoc, 6, textureEnvOperandRegs); glUniform1uiv(ubershaderData.textureEnvOperandLoc, 6, textureEnvOperandRegs);
glUniform1uiv(textureEnvCombinerLoc, 6, textureEnvCombinerRegs); glUniform1uiv(ubershaderData.textureEnvCombinerLoc, 6, textureEnvCombinerRegs);
glUniform1uiv(textureEnvColorLoc, 6, textureEnvColourRegs); glUniform1uiv(ubershaderData.textureEnvColorLoc, 6, textureEnvColourRegs);
glUniform1uiv(textureEnvScaleLoc, 6, textureEnvScaleRegs); glUniform1uiv(ubershaderData.textureEnvScaleLoc, 6, textureEnvScaleRegs);
} }
void RendererGL::bindTexturesToSlots() { void RendererGL::bindTexturesToSlots() {
@ -389,11 +388,7 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
OpenGL::Triangle, OpenGL::Triangle,
}; };
std::string vs = fragShaderGen.getVertexShader(regs); OpenGL::Program& program = getSpecializedShader();
std::string fs = fragShaderGen.generate(regs);
std::cout << fs << "\n\n\n";
OpenGL::Program program = getSpecializedShader();
const auto primitiveTopology = primTypes[static_cast<usize>(primType)]; const auto primitiveTopology = primTypes[static_cast<usize>(primType)];
gl.disableScissor(); gl.disableScissor();
@ -427,17 +422,17 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
// Update depth uniforms // Update depth uniforms
if (oldDepthScale != depthScale) { if (oldDepthScale != depthScale) {
oldDepthScale = depthScale; oldDepthScale = depthScale;
glUniform1f(depthScaleLoc, depthScale); glUniform1f(ubershaderData.depthScaleLoc, depthScale);
} }
if (oldDepthOffset != depthOffset) { if (oldDepthOffset != depthOffset) {
oldDepthOffset = depthOffset; oldDepthOffset = depthOffset;
glUniform1f(depthOffsetLoc, depthOffset); glUniform1f(ubershaderData.depthOffsetLoc, depthOffset);
} }
if (oldDepthmapEnable != depthMapEnable) { if (oldDepthmapEnable != depthMapEnable) {
oldDepthmapEnable = depthMapEnable; oldDepthmapEnable = depthMapEnable;
glUniform1i(depthmapEnableLoc, depthMapEnable); glUniform1i(ubershaderData.depthmapEnableLoc, depthMapEnable);
} }
setupTextureEnvState(); setupTextureEnvState();
@ -445,7 +440,7 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
// Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48) // Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48)
// The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates // The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates
glUniform1uiv(picaRegLoc, 0x200 - 0x48, &regs[0x48]); glUniform1uiv(ubershaderData.picaRegLoc, 0x200 - 0x48, &regs[0x48]);
if (gpu.lightingLUTDirty) { if (gpu.lightingLUTDirty) {
updateLightingLUT(); updateLightingLUT();
@ -784,22 +779,39 @@ std::optional<ColourBuffer> RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt
return colourBufferCache.add(sampleBuffer); return colourBufferCache.add(sampleBuffer);
} }
OpenGL::Program RendererGL::getSpecializedShader() { OpenGL::Program& RendererGL::getSpecializedShader() {
OpenGL::Program program; PICA::FragmentConfig fsConfig;
fsConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg];
fsConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer];
fsConfig.texEnvBufferColor = regs[InternalRegs::TexEnvBufferColor];
std::string vs = fragShaderGen.getVertexShader(regs); // Set up TEV stages
std::string fs = fragShaderGen.generate(regs); std::memcpy(&fsConfig.tevConfigs[0 * 5], &regs[InternalRegs::TexEnv0Source], 5 * sizeof(u32));
std::memcpy(&fsConfig.tevConfigs[1 * 5], &regs[InternalRegs::TexEnv1Source], 5 * sizeof(u32));
std::memcpy(&fsConfig.tevConfigs[2 * 5], &regs[InternalRegs::TexEnv2Source], 5 * sizeof(u32));
std::memcpy(&fsConfig.tevConfigs[3 * 5], &regs[InternalRegs::TexEnv3Source], 5 * sizeof(u32));
std::memcpy(&fsConfig.tevConfigs[4 * 5], &regs[InternalRegs::TexEnv4Source], 5 * sizeof(u32));
std::memcpy(&fsConfig.tevConfigs[5 * 5], &regs[InternalRegs::TexEnv5Source], 5 * sizeof(u32));
OpenGL::Shader vertShader({vs.c_str(), vs.size()}, OpenGL::Vertex); OpenGL::Program& program = shaderCache[fsConfig];
OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment); if (!program.exists()) {
program.create({vertShader, fragShader}); printf("Creating specialized shader\n");
program.use();
// Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 std::string vs = fragShaderGen.getVertexShader(regs);
glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0); std::string fs = fragShaderGen.generate(regs);
glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1); std::cout << vs << "\n\n" << fs << "\n";
glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2);
glUniform1i(OpenGL::uniformLocation(program, "u_tex_lighting_lut"), 3); OpenGL::Shader vertShader({vs.c_str(), vs.size()}, OpenGL::Vertex);
OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment);
program.create({vertShader, fragShader});
program.use();
// Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3
glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0);
glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1);
glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2);
glUniform1i(OpenGL::uniformLocation(program, "u_tex_lighting_lut"), 3);
}
return program; return program;
} }