Add shader cache

This commit is contained in:
wheremyfoodat 2024-03-02 20:41:23 +02:00
parent fdfb012aa1
commit 67fe3214fe
3 changed files with 106 additions and 58 deletions

View file

@ -1,9 +1,13 @@
#pragma once
#include <array>
#include <cstring>
#include <functional>
#include <span>
#include <unordered_map>
#include "PICA/float_types.hpp"
#include "PICA/pica_hash.hpp"
#include "PICA/pica_vertex.hpp"
#include "PICA/regs.hpp"
#include "PICA/shader_gen.hpp"
@ -17,6 +21,32 @@
// More circular dependencies!
class GPU;
namespace PICA {
struct FragmentConfig {
u32 texUnitConfig;
u32 texEnvUpdateBuffer;
// TODO: This should probably be a uniform
u32 texEnvBufferColor;
// There's 6 TEV stages, and each one is configured via 5 word-sized registers
std::array<u32, 5 * 6> tevConfigs;
// Hash function and equality operator required by std::unordered_map
bool operator==(const FragmentConfig& config) const {
return std::memcmp(this, &config, sizeof(FragmentConfig)) == 0;
}
};
} // namespace PICA
// Override std::hash for our fragment config class
template <>
struct std::hash<PICA::FragmentConfig> {
std::size_t operator()(const PICA::FragmentConfig& config) const noexcept {
return PICAHash::computeHash((const char*)&config, sizeof(config));
}
};
class RendererGL final : public Renderer {
GLStateManager gl = {};
@ -26,20 +56,23 @@ class RendererGL final : public Renderer {
OpenGL::VertexArray vao;
OpenGL::VertexBuffer vbo;
// TEV configuration uniform locations
GLint textureEnvSourceLoc = -1;
GLint textureEnvOperandLoc = -1;
GLint textureEnvCombinerLoc = -1;
GLint textureEnvColorLoc = -1;
GLint textureEnvScaleLoc = -1;
// Data
struct {
// TEV configuration uniform locations
GLint textureEnvSourceLoc = -1;
GLint textureEnvOperandLoc = -1;
GLint textureEnvCombinerLoc = -1;
GLint textureEnvColorLoc = -1;
GLint textureEnvScaleLoc = -1;
// Uniform of PICA registers
GLint picaRegLoc = -1;
// Uniform of PICA registers
GLint picaRegLoc = -1;
// Depth configuration uniform locations
GLint depthOffsetLoc = -1;
GLint depthScaleLoc = -1;
GLint depthmapEnableLoc = -1;
// Depth configuration uniform locations
GLint depthOffsetLoc = -1;
GLint depthScaleLoc = -1;
GLint depthmapEnableLoc = -1;
} ubershaderData;
float oldDepthScale = -1.0;
float oldDepthOffset = 0.0;
@ -48,6 +81,7 @@ class RendererGL final : public Renderer {
SurfaceCache<DepthBuffer, 16, true> depthBufferCache;
SurfaceCache<ColourBuffer, 16, true> colourBufferCache;
SurfaceCache<Texture, 256, true> textureCache;
bool usingUbershader = false;
// Dummy VAO/VBO for blitting the final output
OpenGL::VertexArray dummyVAO;
@ -58,9 +92,11 @@ class RendererGL final : public Renderer {
OpenGL::Framebuffer screenFramebuffer;
OpenGL::Texture blankTexture;
std::unordered_map<PICA::FragmentConfig, OpenGL::Program> shaderCache;
OpenGL::Framebuffer getColourFBO();
OpenGL::Texture getTexture(Texture& tex);
OpenGL::Program getSpecializedShader();
OpenGL::Program& getSpecializedShader();
PICA::ShaderGen::FragmentGenerator fragShaderGen;
@ -99,4 +135,4 @@ class RendererGL final : public Renderer {
// Take a screenshot of the screen and store it in a file
void screenshot(const std::string& name) override;
};
};

View file

@ -308,7 +308,7 @@ void FragmentGenerator::getColorOperation(std::string& shader, TexEnvConfig::Ope
case TexEnvConfig::Operation::AddSigned: shader += "clamp(colorOp1 + colorOp2 - 0.5, 0.0, 1.0);"; break;
case TexEnvConfig::Operation::Subtract: shader += "colorOp1 - colorOp2"; break;
case TexEnvConfig::Operation::Modulate: shader += "colorOp1 * colorOp2"; break;
case TexEnvConfig::Operation::Lerp: shader += "colorOp1 * colorOp3 + colorOp2 * (vec(1.0) - colorOp3)"; break;
case TexEnvConfig::Operation::Lerp: shader += "colorOp1 * colorOp3 + colorOp2 * (vec3(1.0) - colorOp3)"; break;
case TexEnvConfig::Operation::AddMultiply: shader += "min(colorOp1 + colorOp2, vec3(1.0)) * colorOp3"; break;
case TexEnvConfig::Operation::MultiplyAdd: shader += "colorOp1 * colorOp2 + colorOp3"; break;
@ -328,15 +328,15 @@ void FragmentGenerator::getAlphaOperation(std::string& shader, TexEnvConfig::Ope
case TexEnvConfig::Operation::AddSigned: shader += "clamp(alphaOp1 + alphaOp2 - 0.5, 0.0, 1.0);"; break;
case TexEnvConfig::Operation::Subtract: shader += "alphaOp1 - alphaOp2"; break;
case TexEnvConfig::Operation::Modulate: shader += "alphaOp1 * alphaOp2"; break;
case TexEnvConfig::Operation::Lerp: shader += "alphaOp1 * alphaOp3 + alphaOp2 * (vec(1.0) - alphaOp3)"; break;
case TexEnvConfig::Operation::Lerp: shader += "alphaOp1 * alphaOp3 + alphaOp2 * (1.0 - alphaOp3)"; break;
case TexEnvConfig::Operation::AddMultiply: shader += "min(alphaOp1 + alphaOp2, vec3(1.0)) * alphaOp3"; break;
case TexEnvConfig::Operation::AddMultiply: shader += "min(alphaOp1 + alphaOp2, 1.0) * alphaOp3"; break;
case TexEnvConfig::Operation::MultiplyAdd: shader += "alphaOp1 * alphaOp2 + alphaOp3"; break;
case TexEnvConfig::Operation::Dot3RGB:
case TexEnvConfig::Operation::Dot3RGBA: shader += "vec3(4.0 * dot(alphaOp1 - 0.5, alphaOp2 - 0.5))"; break;
default:
Helpers::warn("FragmentGenerator: Unimplemented alpha op");
shader += "vec3(1.0)";
shader += "1.0";
break;
}
}

View file

@ -38,9 +38,9 @@ void RendererGL::reset() {
oldDepthOffset = 0.0; // Default depth offset to 0
oldDepthmapEnable = false; // Enable w buffering
glUniform1f(depthScaleLoc, oldDepthScale);
glUniform1f(depthOffsetLoc, oldDepthOffset);
glUniform1i(depthmapEnableLoc, oldDepthmapEnable);
glUniform1f(ubershaderData.depthScaleLoc, oldDepthScale);
glUniform1f(ubershaderData.depthOffsetLoc, oldDepthOffset);
glUniform1i(ubershaderData.depthmapEnableLoc, oldDepthmapEnable);
gl.useProgram(oldProgram); // Switch to old GL program
}
@ -59,16 +59,16 @@ void RendererGL::initGraphicsContextInternal() {
triangleProgram.create({vert, frag});
gl.useProgram(triangleProgram);
textureEnvSourceLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvSource");
textureEnvOperandLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvOperand");
textureEnvCombinerLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvCombiner");
textureEnvColorLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvColor");
textureEnvScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvScale");
ubershaderData.textureEnvSourceLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvSource");
ubershaderData.textureEnvOperandLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvOperand");
ubershaderData.textureEnvCombinerLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvCombiner");
ubershaderData.textureEnvColorLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvColor");
ubershaderData.textureEnvScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvScale");
depthScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_depthScale");
depthOffsetLoc = OpenGL::uniformLocation(triangleProgram, "u_depthOffset");
depthmapEnableLoc = OpenGL::uniformLocation(triangleProgram, "u_depthmapEnable");
picaRegLoc = OpenGL::uniformLocation(triangleProgram, "u_picaRegs");
ubershaderData.depthScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_depthScale");
ubershaderData.depthOffsetLoc = OpenGL::uniformLocation(triangleProgram, "u_depthOffset");
ubershaderData.depthmapEnableLoc = OpenGL::uniformLocation(triangleProgram, "u_depthmapEnable");
ubershaderData.picaRegLoc = OpenGL::uniformLocation(triangleProgram, "u_picaRegs");
// Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3
glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex0"), 0);
@ -289,7 +289,6 @@ void RendererGL::setupStencilTest(bool stencilEnable) {
glStencilOp(stencilOps[stencilFailOp], stencilOps[depthFailOp], stencilOps[passOp]);
}
void RendererGL::setupTextureEnvState() {
// TODO: Only update uniforms when the TEV config changed. Use an UBO potentially.
@ -314,11 +313,11 @@ void RendererGL::setupTextureEnvState() {
textureEnvScaleRegs[i] = regs[ioBase + 4];
}
glUniform1uiv(textureEnvSourceLoc, 6, textureEnvSourceRegs);
glUniform1uiv(textureEnvOperandLoc, 6, textureEnvOperandRegs);
glUniform1uiv(textureEnvCombinerLoc, 6, textureEnvCombinerRegs);
glUniform1uiv(textureEnvColorLoc, 6, textureEnvColourRegs);
glUniform1uiv(textureEnvScaleLoc, 6, textureEnvScaleRegs);
glUniform1uiv(ubershaderData.textureEnvSourceLoc, 6, textureEnvSourceRegs);
glUniform1uiv(ubershaderData.textureEnvOperandLoc, 6, textureEnvOperandRegs);
glUniform1uiv(ubershaderData.textureEnvCombinerLoc, 6, textureEnvCombinerRegs);
glUniform1uiv(ubershaderData.textureEnvColorLoc, 6, textureEnvColourRegs);
glUniform1uiv(ubershaderData.textureEnvScaleLoc, 6, textureEnvScaleRegs);
}
void RendererGL::bindTexturesToSlots() {
@ -389,11 +388,7 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
OpenGL::Triangle,
};
std::string vs = fragShaderGen.getVertexShader(regs);
std::string fs = fragShaderGen.generate(regs);
std::cout << fs << "\n\n\n";
OpenGL::Program program = getSpecializedShader();
OpenGL::Program& program = getSpecializedShader();
const auto primitiveTopology = primTypes[static_cast<usize>(primType)];
gl.disableScissor();
@ -427,17 +422,17 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
// Update depth uniforms
if (oldDepthScale != depthScale) {
oldDepthScale = depthScale;
glUniform1f(depthScaleLoc, depthScale);
glUniform1f(ubershaderData.depthScaleLoc, depthScale);
}
if (oldDepthOffset != depthOffset) {
oldDepthOffset = depthOffset;
glUniform1f(depthOffsetLoc, depthOffset);
glUniform1f(ubershaderData.depthOffsetLoc, depthOffset);
}
if (oldDepthmapEnable != depthMapEnable) {
oldDepthmapEnable = depthMapEnable;
glUniform1i(depthmapEnableLoc, depthMapEnable);
glUniform1i(ubershaderData.depthmapEnableLoc, depthMapEnable);
}
setupTextureEnvState();
@ -445,7 +440,7 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
// Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48)
// The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates
glUniform1uiv(picaRegLoc, 0x200 - 0x48, &regs[0x48]);
glUniform1uiv(ubershaderData.picaRegLoc, 0x200 - 0x48, &regs[0x48]);
if (gpu.lightingLUTDirty) {
updateLightingLUT();
@ -784,22 +779,39 @@ std::optional<ColourBuffer> RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt
return colourBufferCache.add(sampleBuffer);
}
OpenGL::Program RendererGL::getSpecializedShader() {
OpenGL::Program program;
OpenGL::Program& RendererGL::getSpecializedShader() {
PICA::FragmentConfig fsConfig;
fsConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg];
fsConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer];
fsConfig.texEnvBufferColor = regs[InternalRegs::TexEnvBufferColor];
std::string vs = fragShaderGen.getVertexShader(regs);
std::string fs = fragShaderGen.generate(regs);
// Set up TEV stages
std::memcpy(&fsConfig.tevConfigs[0 * 5], &regs[InternalRegs::TexEnv0Source], 5 * sizeof(u32));
std::memcpy(&fsConfig.tevConfigs[1 * 5], &regs[InternalRegs::TexEnv1Source], 5 * sizeof(u32));
std::memcpy(&fsConfig.tevConfigs[2 * 5], &regs[InternalRegs::TexEnv2Source], 5 * sizeof(u32));
std::memcpy(&fsConfig.tevConfigs[3 * 5], &regs[InternalRegs::TexEnv3Source], 5 * sizeof(u32));
std::memcpy(&fsConfig.tevConfigs[4 * 5], &regs[InternalRegs::TexEnv4Source], 5 * sizeof(u32));
std::memcpy(&fsConfig.tevConfigs[5 * 5], &regs[InternalRegs::TexEnv5Source], 5 * sizeof(u32));
OpenGL::Shader vertShader({vs.c_str(), vs.size()}, OpenGL::Vertex);
OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment);
program.create({vertShader, fragShader});
program.use();
OpenGL::Program& program = shaderCache[fsConfig];
if (!program.exists()) {
printf("Creating specialized shader\n");
// Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3
glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0);
glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1);
glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2);
glUniform1i(OpenGL::uniformLocation(program, "u_tex_lighting_lut"), 3);
std::string vs = fragShaderGen.getVertexShader(regs);
std::string fs = fragShaderGen.generate(regs);
std::cout << vs << "\n\n" << fs << "\n";
OpenGL::Shader vertShader({vs.c_str(), vs.size()}, OpenGL::Vertex);
OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment);
program.create({vertShader, fragShader});
program.use();
// Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3
glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0);
glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1);
glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2);
glUniform1i(OpenGL::uniformLocation(program, "u_tex_lighting_lut"), 3);
}
return program;
}