mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-06 14:15:41 +12:00
Add shader cache
This commit is contained in:
parent
fdfb012aa1
commit
67fe3214fe
3 changed files with 106 additions and 58 deletions
|
@ -1,9 +1,13 @@
|
|||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
#include <span>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "PICA/float_types.hpp"
|
||||
#include "PICA/pica_hash.hpp"
|
||||
#include "PICA/pica_vertex.hpp"
|
||||
#include "PICA/regs.hpp"
|
||||
#include "PICA/shader_gen.hpp"
|
||||
|
@ -17,6 +21,32 @@
|
|||
// More circular dependencies!
|
||||
class GPU;
|
||||
|
||||
namespace PICA {
|
||||
struct FragmentConfig {
|
||||
u32 texUnitConfig;
|
||||
u32 texEnvUpdateBuffer;
|
||||
|
||||
// TODO: This should probably be a uniform
|
||||
u32 texEnvBufferColor;
|
||||
|
||||
// There's 6 TEV stages, and each one is configured via 5 word-sized registers
|
||||
std::array<u32, 5 * 6> tevConfigs;
|
||||
|
||||
// Hash function and equality operator required by std::unordered_map
|
||||
bool operator==(const FragmentConfig& config) const {
|
||||
return std::memcmp(this, &config, sizeof(FragmentConfig)) == 0;
|
||||
}
|
||||
};
|
||||
} // namespace PICA
|
||||
|
||||
// Override std::hash for our fragment config class
|
||||
template <>
|
||||
struct std::hash<PICA::FragmentConfig> {
|
||||
std::size_t operator()(const PICA::FragmentConfig& config) const noexcept {
|
||||
return PICAHash::computeHash((const char*)&config, sizeof(config));
|
||||
}
|
||||
};
|
||||
|
||||
class RendererGL final : public Renderer {
|
||||
GLStateManager gl = {};
|
||||
|
||||
|
@ -26,20 +56,23 @@ class RendererGL final : public Renderer {
|
|||
OpenGL::VertexArray vao;
|
||||
OpenGL::VertexBuffer vbo;
|
||||
|
||||
// TEV configuration uniform locations
|
||||
GLint textureEnvSourceLoc = -1;
|
||||
GLint textureEnvOperandLoc = -1;
|
||||
GLint textureEnvCombinerLoc = -1;
|
||||
GLint textureEnvColorLoc = -1;
|
||||
GLint textureEnvScaleLoc = -1;
|
||||
// Data
|
||||
struct {
|
||||
// TEV configuration uniform locations
|
||||
GLint textureEnvSourceLoc = -1;
|
||||
GLint textureEnvOperandLoc = -1;
|
||||
GLint textureEnvCombinerLoc = -1;
|
||||
GLint textureEnvColorLoc = -1;
|
||||
GLint textureEnvScaleLoc = -1;
|
||||
|
||||
// Uniform of PICA registers
|
||||
GLint picaRegLoc = -1;
|
||||
// Uniform of PICA registers
|
||||
GLint picaRegLoc = -1;
|
||||
|
||||
// Depth configuration uniform locations
|
||||
GLint depthOffsetLoc = -1;
|
||||
GLint depthScaleLoc = -1;
|
||||
GLint depthmapEnableLoc = -1;
|
||||
// Depth configuration uniform locations
|
||||
GLint depthOffsetLoc = -1;
|
||||
GLint depthScaleLoc = -1;
|
||||
GLint depthmapEnableLoc = -1;
|
||||
} ubershaderData;
|
||||
|
||||
float oldDepthScale = -1.0;
|
||||
float oldDepthOffset = 0.0;
|
||||
|
@ -48,6 +81,7 @@ class RendererGL final : public Renderer {
|
|||
SurfaceCache<DepthBuffer, 16, true> depthBufferCache;
|
||||
SurfaceCache<ColourBuffer, 16, true> colourBufferCache;
|
||||
SurfaceCache<Texture, 256, true> textureCache;
|
||||
bool usingUbershader = false;
|
||||
|
||||
// Dummy VAO/VBO for blitting the final output
|
||||
OpenGL::VertexArray dummyVAO;
|
||||
|
@ -58,9 +92,11 @@ class RendererGL final : public Renderer {
|
|||
OpenGL::Framebuffer screenFramebuffer;
|
||||
OpenGL::Texture blankTexture;
|
||||
|
||||
std::unordered_map<PICA::FragmentConfig, OpenGL::Program> shaderCache;
|
||||
|
||||
OpenGL::Framebuffer getColourFBO();
|
||||
OpenGL::Texture getTexture(Texture& tex);
|
||||
OpenGL::Program getSpecializedShader();
|
||||
OpenGL::Program& getSpecializedShader();
|
||||
|
||||
PICA::ShaderGen::FragmentGenerator fragShaderGen;
|
||||
|
||||
|
@ -99,4 +135,4 @@ class RendererGL final : public Renderer {
|
|||
|
||||
// Take a screenshot of the screen and store it in a file
|
||||
void screenshot(const std::string& name) override;
|
||||
};
|
||||
};
|
|
@ -308,7 +308,7 @@ void FragmentGenerator::getColorOperation(std::string& shader, TexEnvConfig::Ope
|
|||
case TexEnvConfig::Operation::AddSigned: shader += "clamp(colorOp1 + colorOp2 - 0.5, 0.0, 1.0);"; break;
|
||||
case TexEnvConfig::Operation::Subtract: shader += "colorOp1 - colorOp2"; break;
|
||||
case TexEnvConfig::Operation::Modulate: shader += "colorOp1 * colorOp2"; break;
|
||||
case TexEnvConfig::Operation::Lerp: shader += "colorOp1 * colorOp3 + colorOp2 * (vec(1.0) - colorOp3)"; break;
|
||||
case TexEnvConfig::Operation::Lerp: shader += "colorOp1 * colorOp3 + colorOp2 * (vec3(1.0) - colorOp3)"; break;
|
||||
|
||||
case TexEnvConfig::Operation::AddMultiply: shader += "min(colorOp1 + colorOp2, vec3(1.0)) * colorOp3"; break;
|
||||
case TexEnvConfig::Operation::MultiplyAdd: shader += "colorOp1 * colorOp2 + colorOp3"; break;
|
||||
|
@ -328,15 +328,15 @@ void FragmentGenerator::getAlphaOperation(std::string& shader, TexEnvConfig::Ope
|
|||
case TexEnvConfig::Operation::AddSigned: shader += "clamp(alphaOp1 + alphaOp2 - 0.5, 0.0, 1.0);"; break;
|
||||
case TexEnvConfig::Operation::Subtract: shader += "alphaOp1 - alphaOp2"; break;
|
||||
case TexEnvConfig::Operation::Modulate: shader += "alphaOp1 * alphaOp2"; break;
|
||||
case TexEnvConfig::Operation::Lerp: shader += "alphaOp1 * alphaOp3 + alphaOp2 * (vec(1.0) - alphaOp3)"; break;
|
||||
case TexEnvConfig::Operation::Lerp: shader += "alphaOp1 * alphaOp3 + alphaOp2 * (1.0 - alphaOp3)"; break;
|
||||
|
||||
case TexEnvConfig::Operation::AddMultiply: shader += "min(alphaOp1 + alphaOp2, vec3(1.0)) * alphaOp3"; break;
|
||||
case TexEnvConfig::Operation::AddMultiply: shader += "min(alphaOp1 + alphaOp2, 1.0) * alphaOp3"; break;
|
||||
case TexEnvConfig::Operation::MultiplyAdd: shader += "alphaOp1 * alphaOp2 + alphaOp3"; break;
|
||||
case TexEnvConfig::Operation::Dot3RGB:
|
||||
case TexEnvConfig::Operation::Dot3RGBA: shader += "vec3(4.0 * dot(alphaOp1 - 0.5, alphaOp2 - 0.5))"; break;
|
||||
default:
|
||||
Helpers::warn("FragmentGenerator: Unimplemented alpha op");
|
||||
shader += "vec3(1.0)";
|
||||
shader += "1.0";
|
||||
break;
|
||||
}
|
||||
}
|
|
@ -38,9 +38,9 @@ void RendererGL::reset() {
|
|||
oldDepthOffset = 0.0; // Default depth offset to 0
|
||||
oldDepthmapEnable = false; // Enable w buffering
|
||||
|
||||
glUniform1f(depthScaleLoc, oldDepthScale);
|
||||
glUniform1f(depthOffsetLoc, oldDepthOffset);
|
||||
glUniform1i(depthmapEnableLoc, oldDepthmapEnable);
|
||||
glUniform1f(ubershaderData.depthScaleLoc, oldDepthScale);
|
||||
glUniform1f(ubershaderData.depthOffsetLoc, oldDepthOffset);
|
||||
glUniform1i(ubershaderData.depthmapEnableLoc, oldDepthmapEnable);
|
||||
|
||||
gl.useProgram(oldProgram); // Switch to old GL program
|
||||
}
|
||||
|
@ -59,16 +59,16 @@ void RendererGL::initGraphicsContextInternal() {
|
|||
triangleProgram.create({vert, frag});
|
||||
gl.useProgram(triangleProgram);
|
||||
|
||||
textureEnvSourceLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvSource");
|
||||
textureEnvOperandLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvOperand");
|
||||
textureEnvCombinerLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvCombiner");
|
||||
textureEnvColorLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvColor");
|
||||
textureEnvScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvScale");
|
||||
ubershaderData.textureEnvSourceLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvSource");
|
||||
ubershaderData.textureEnvOperandLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvOperand");
|
||||
ubershaderData.textureEnvCombinerLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvCombiner");
|
||||
ubershaderData.textureEnvColorLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvColor");
|
||||
ubershaderData.textureEnvScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvScale");
|
||||
|
||||
depthScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_depthScale");
|
||||
depthOffsetLoc = OpenGL::uniformLocation(triangleProgram, "u_depthOffset");
|
||||
depthmapEnableLoc = OpenGL::uniformLocation(triangleProgram, "u_depthmapEnable");
|
||||
picaRegLoc = OpenGL::uniformLocation(triangleProgram, "u_picaRegs");
|
||||
ubershaderData.depthScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_depthScale");
|
||||
ubershaderData.depthOffsetLoc = OpenGL::uniformLocation(triangleProgram, "u_depthOffset");
|
||||
ubershaderData.depthmapEnableLoc = OpenGL::uniformLocation(triangleProgram, "u_depthmapEnable");
|
||||
ubershaderData.picaRegLoc = OpenGL::uniformLocation(triangleProgram, "u_picaRegs");
|
||||
|
||||
// Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3
|
||||
glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex0"), 0);
|
||||
|
@ -289,7 +289,6 @@ void RendererGL::setupStencilTest(bool stencilEnable) {
|
|||
glStencilOp(stencilOps[stencilFailOp], stencilOps[depthFailOp], stencilOps[passOp]);
|
||||
}
|
||||
|
||||
|
||||
void RendererGL::setupTextureEnvState() {
|
||||
// TODO: Only update uniforms when the TEV config changed. Use an UBO potentially.
|
||||
|
||||
|
@ -314,11 +313,11 @@ void RendererGL::setupTextureEnvState() {
|
|||
textureEnvScaleRegs[i] = regs[ioBase + 4];
|
||||
}
|
||||
|
||||
glUniform1uiv(textureEnvSourceLoc, 6, textureEnvSourceRegs);
|
||||
glUniform1uiv(textureEnvOperandLoc, 6, textureEnvOperandRegs);
|
||||
glUniform1uiv(textureEnvCombinerLoc, 6, textureEnvCombinerRegs);
|
||||
glUniform1uiv(textureEnvColorLoc, 6, textureEnvColourRegs);
|
||||
glUniform1uiv(textureEnvScaleLoc, 6, textureEnvScaleRegs);
|
||||
glUniform1uiv(ubershaderData.textureEnvSourceLoc, 6, textureEnvSourceRegs);
|
||||
glUniform1uiv(ubershaderData.textureEnvOperandLoc, 6, textureEnvOperandRegs);
|
||||
glUniform1uiv(ubershaderData.textureEnvCombinerLoc, 6, textureEnvCombinerRegs);
|
||||
glUniform1uiv(ubershaderData.textureEnvColorLoc, 6, textureEnvColourRegs);
|
||||
glUniform1uiv(ubershaderData.textureEnvScaleLoc, 6, textureEnvScaleRegs);
|
||||
}
|
||||
|
||||
void RendererGL::bindTexturesToSlots() {
|
||||
|
@ -389,11 +388,7 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
|
|||
OpenGL::Triangle,
|
||||
};
|
||||
|
||||
std::string vs = fragShaderGen.getVertexShader(regs);
|
||||
std::string fs = fragShaderGen.generate(regs);
|
||||
std::cout << fs << "\n\n\n";
|
||||
|
||||
OpenGL::Program program = getSpecializedShader();
|
||||
OpenGL::Program& program = getSpecializedShader();
|
||||
|
||||
const auto primitiveTopology = primTypes[static_cast<usize>(primType)];
|
||||
gl.disableScissor();
|
||||
|
@ -427,17 +422,17 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
|
|||
// Update depth uniforms
|
||||
if (oldDepthScale != depthScale) {
|
||||
oldDepthScale = depthScale;
|
||||
glUniform1f(depthScaleLoc, depthScale);
|
||||
glUniform1f(ubershaderData.depthScaleLoc, depthScale);
|
||||
}
|
||||
|
||||
if (oldDepthOffset != depthOffset) {
|
||||
oldDepthOffset = depthOffset;
|
||||
glUniform1f(depthOffsetLoc, depthOffset);
|
||||
glUniform1f(ubershaderData.depthOffsetLoc, depthOffset);
|
||||
}
|
||||
|
||||
if (oldDepthmapEnable != depthMapEnable) {
|
||||
oldDepthmapEnable = depthMapEnable;
|
||||
glUniform1i(depthmapEnableLoc, depthMapEnable);
|
||||
glUniform1i(ubershaderData.depthmapEnableLoc, depthMapEnable);
|
||||
}
|
||||
|
||||
setupTextureEnvState();
|
||||
|
@ -445,7 +440,7 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
|
|||
|
||||
// Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48)
|
||||
// The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates
|
||||
glUniform1uiv(picaRegLoc, 0x200 - 0x48, ®s[0x48]);
|
||||
glUniform1uiv(ubershaderData.picaRegLoc, 0x200 - 0x48, ®s[0x48]);
|
||||
|
||||
if (gpu.lightingLUTDirty) {
|
||||
updateLightingLUT();
|
||||
|
@ -784,22 +779,39 @@ std::optional<ColourBuffer> RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt
|
|||
return colourBufferCache.add(sampleBuffer);
|
||||
}
|
||||
|
||||
OpenGL::Program RendererGL::getSpecializedShader() {
|
||||
OpenGL::Program program;
|
||||
OpenGL::Program& RendererGL::getSpecializedShader() {
|
||||
PICA::FragmentConfig fsConfig;
|
||||
fsConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg];
|
||||
fsConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer];
|
||||
fsConfig.texEnvBufferColor = regs[InternalRegs::TexEnvBufferColor];
|
||||
|
||||
std::string vs = fragShaderGen.getVertexShader(regs);
|
||||
std::string fs = fragShaderGen.generate(regs);
|
||||
// Set up TEV stages
|
||||
std::memcpy(&fsConfig.tevConfigs[0 * 5], ®s[InternalRegs::TexEnv0Source], 5 * sizeof(u32));
|
||||
std::memcpy(&fsConfig.tevConfigs[1 * 5], ®s[InternalRegs::TexEnv1Source], 5 * sizeof(u32));
|
||||
std::memcpy(&fsConfig.tevConfigs[2 * 5], ®s[InternalRegs::TexEnv2Source], 5 * sizeof(u32));
|
||||
std::memcpy(&fsConfig.tevConfigs[3 * 5], ®s[InternalRegs::TexEnv3Source], 5 * sizeof(u32));
|
||||
std::memcpy(&fsConfig.tevConfigs[4 * 5], ®s[InternalRegs::TexEnv4Source], 5 * sizeof(u32));
|
||||
std::memcpy(&fsConfig.tevConfigs[5 * 5], ®s[InternalRegs::TexEnv5Source], 5 * sizeof(u32));
|
||||
|
||||
OpenGL::Shader vertShader({vs.c_str(), vs.size()}, OpenGL::Vertex);
|
||||
OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment);
|
||||
program.create({vertShader, fragShader});
|
||||
program.use();
|
||||
OpenGL::Program& program = shaderCache[fsConfig];
|
||||
if (!program.exists()) {
|
||||
printf("Creating specialized shader\n");
|
||||
|
||||
// Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3
|
||||
glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0);
|
||||
glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1);
|
||||
glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2);
|
||||
glUniform1i(OpenGL::uniformLocation(program, "u_tex_lighting_lut"), 3);
|
||||
std::string vs = fragShaderGen.getVertexShader(regs);
|
||||
std::string fs = fragShaderGen.generate(regs);
|
||||
std::cout << vs << "\n\n" << fs << "\n";
|
||||
|
||||
OpenGL::Shader vertShader({vs.c_str(), vs.size()}, OpenGL::Vertex);
|
||||
OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment);
|
||||
program.create({vertShader, fragShader});
|
||||
program.use();
|
||||
|
||||
// Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3
|
||||
glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0);
|
||||
glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1);
|
||||
glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2);
|
||||
glUniform1i(OpenGL::uniformLocation(program, "u_tex_lighting_lut"), 3);
|
||||
}
|
||||
|
||||
return program;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue