Renderer: Add prepareForDraw callback

This commit is contained in:
wheremyfoodat 2024-07-24 23:23:24 +03:00
parent 156c3031a2
commit 90f411889b
7 changed files with 81 additions and 71 deletions

View file

@ -2,10 +2,9 @@
#include "PICA/shader.hpp" #include "PICA/shader.hpp"
class ShaderUnit { class ShaderUnit {
public:
public: PICAShader vs; // Vertex shader
PICAShader vs; // Vertex shader PICAShader gs; // Geometry shader
PICAShader gs; // Geometry shader
ShaderUnit() : vs(ShaderType::Vertex), gs(ShaderType::Geometry) {} ShaderUnit() : vs(ShaderType::Vertex), gs(ShaderType::Geometry) {}
void reset(); void reset();

View file

@ -21,9 +21,11 @@ enum class RendererType : s8 {
}; };
struct EmulatorConfig; struct EmulatorConfig;
class GPU;
struct SDL_Window; struct SDL_Window;
class GPU;
class ShaderUnit;
class Renderer { class Renderer {
protected: protected:
GPU& gpu; GPU& gpu;
@ -77,7 +79,10 @@ class Renderer {
virtual std::string getUbershader() { return ""; } virtual std::string getUbershader() { return ""; }
virtual void setUbershader(const std::string& shader) {} virtual void setUbershader(const std::string& shader) {}
virtual void setUbershaderSetting(bool value) {} // This function is called on every draw call before parsing vertex data.
// It is responsible for things like looking up which vertex/fragment shaders to use, recompiling them if they don't exist, choosing between
// ubershaders and shadergen, and so on.
virtual void prepareForDraw(ShaderUnit& shaderUnit, bool isImmediateMode) {}
// Functions for initializing the graphics context for the Qt frontend, where we don't have the convenience of SDL_Window // Functions for initializing the graphics context for the Qt frontend, where we don't have the convenience of SDL_Window
#ifdef PANDA3DS_FRONTEND_QT #ifdef PANDA3DS_FRONTEND_QT

View file

@ -30,7 +30,6 @@ class RendererGL final : public Renderer {
OpenGL::VertexArray vao; OpenGL::VertexArray vao;
OpenGL::VertexBuffer vbo; OpenGL::VertexBuffer vbo;
bool enableUbershader = true;
// Data // Data
struct { struct {
@ -110,8 +109,7 @@ class RendererGL final : public Renderer {
virtual bool supportsShaderReload() override { return true; } virtual bool supportsShaderReload() override { return true; }
virtual std::string getUbershader() override; virtual std::string getUbershader() override;
virtual void setUbershader(const std::string& shader) override; virtual void setUbershader(const std::string& shader) override;
virtual void prepareForDraw(ShaderUnit& shaderUnit, bool isImmediateMode) override;
virtual void setUbershaderSetting(bool value) override { enableUbershader = value; }
std::optional<ColourBuffer> getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true); std::optional<ColourBuffer> getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true);

View file

@ -117,13 +117,13 @@ void GPU::reset() {
externalRegs[Framebuffer1Config] = static_cast<u32>(PICA::ColorFmt::RGB8); externalRegs[Framebuffer1Config] = static_cast<u32>(PICA::ColorFmt::RGB8);
externalRegs[Framebuffer1Select] = 0; externalRegs[Framebuffer1Select] = 0;
renderer->setUbershaderSetting(config.useUbershaders);
renderer->reset(); renderer->reset();
} }
// Call the correct version of drawArrays based on whether this is an indexed draw (first template parameter) // Call the correct version of drawArrays based on whether this is an indexed draw (first template parameter)
// And whether we are going to use the shader JIT (second template parameter) // And whether we are going to use the shader JIT (second template parameter)
void GPU::drawArrays(bool indexed) { void GPU::drawArrays(bool indexed) {
renderer->prepareForDraw(shaderUnit, false);
const bool shaderJITEnabled = ShaderJIT::isAvailable() && config.shaderJitEnabled; const bool shaderJITEnabled = ShaderJIT::isAvailable() && config.shaderJitEnabled;
if (indexed) { if (indexed) {

View file

@ -249,6 +249,7 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
// If we've reached 3 verts, issue a draw call // If we've reached 3 verts, issue a draw call
// Handle rendering depending on the primitive type // Handle rendering depending on the primitive type
if (immediateModeVertIndex == 3) { if (immediateModeVertIndex == 3) {
renderer->prepareForDraw(shaderUnit, true);
renderer->drawVertices(PICA::PrimType::TriangleList, immediateModeVertices); renderer->drawVertices(PICA::PrimType::TriangleList, immediateModeVertices);
switch (primType) { switch (primType) {

View file

@ -72,19 +72,17 @@ const Function* ShaderDecompiler::findFunction(const AddressRange& range) {
void ShaderDecompiler::writeAttributes() { void ShaderDecompiler::writeAttributes() {
decompiledShader += R"( decompiledShader += R"(
layout(location = 0) in vec4 inputs[8]; layout(location = 0) in vec4 inputs[8];
layout(std140) uniform PICAShaderUniforms {
vec4 uniform_float[96];
uvec4 uniform_int;
uint uniform_bool;
};
layout(std140) uniform PICAShaderUniforms { vec4 temp_registers[16];
vec4 uniform_float[96]; vec4 output_registers[8];
uvec4 uniform_int; vec4 dummy_vec = vec4(0.0);
uint uniform_bool;
};
vec4 temp_registers[16];
vec4 dummy_vec = vec4(0.0);
)"; )";
decompiledShader += "\n";
} }
std::string ShaderDecompiler::decompile() { std::string ShaderDecompiler::decompile() {

View file

@ -4,11 +4,12 @@
#include <cmrc/cmrc.hpp> #include <cmrc/cmrc.hpp>
#include "config.hpp"
#include "PICA/float_types.hpp" #include "PICA/float_types.hpp"
#include "PICA/pica_frag_uniforms.hpp"
#include "PICA/gpu.hpp" #include "PICA/gpu.hpp"
#include "PICA/pica_frag_uniforms.hpp"
#include "PICA/regs.hpp" #include "PICA/regs.hpp"
#include "PICA/shader_decompiler.hpp"
#include "config.hpp"
#include "math_util.hpp" #include "math_util.hpp"
CMRC_DECLARE(RendererGL); CMRC_DECLARE(RendererGL);
@ -409,25 +410,6 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
OpenGL::Triangle, OpenGL::Triangle,
}; };
bool usingUbershader = enableUbershader;
if (usingUbershader) {
const bool lightsEnabled = (regs[InternalRegs::LightingEnable] & 1) != 0;
const uint lightCount = (regs[InternalRegs::LightNumber] & 0x7) + 1;
// Emulating lights in the ubershader is incredibly slow, so we've got an option to render draws using moret han N lights via shadergen
// This way we generate fewer shaders overall than with full shadergen, but don't tank performance
if (emulatorConfig->forceShadergenForLights && lightsEnabled && lightCount >= emulatorConfig->lightShadergenThreshold) {
usingUbershader = false;
}
}
if (usingUbershader) {
gl.useProgram(triangleProgram);
} else {
OpenGL::Program& program = getSpecializedShader();
gl.useProgram(program);
}
const auto primitiveTopology = primTypes[static_cast<usize>(primType)]; const auto primitiveTopology = primTypes[static_cast<usize>(primType)];
gl.disableScissor(); gl.disableScissor();
gl.bindVBO(vbo); gl.bindVBO(vbo);
@ -449,38 +431,9 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
const int depthFunc = getBits<4, 3>(depthControl); const int depthFunc = getBits<4, 3>(depthControl);
const int colourMask = getBits<8, 4>(depthControl); const int colourMask = getBits<8, 4>(depthControl);
gl.setColourMask(colourMask & 1, colourMask & 2, colourMask & 4, colourMask & 8); gl.setColourMask(colourMask & 1, colourMask & 2, colourMask & 4, colourMask & 8);
static constexpr std::array<GLenum, 8> depthModes = {GL_NEVER, GL_ALWAYS, GL_EQUAL, GL_NOTEQUAL, GL_LESS, GL_LEQUAL, GL_GREATER, GL_GEQUAL}; static constexpr std::array<GLenum, 8> depthModes = {GL_NEVER, GL_ALWAYS, GL_EQUAL, GL_NOTEQUAL, GL_LESS, GL_LEQUAL, GL_GREATER, GL_GEQUAL};
// Update ubershader uniforms
if (usingUbershader) {
const float depthScale = f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32();
const float depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32();
const bool depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1;
if (oldDepthScale != depthScale) {
oldDepthScale = depthScale;
glUniform1f(ubershaderData.depthScaleLoc, depthScale);
}
if (oldDepthOffset != depthOffset) {
oldDepthOffset = depthOffset;
glUniform1f(ubershaderData.depthOffsetLoc, depthOffset);
}
if (oldDepthmapEnable != depthMapEnable) {
oldDepthmapEnable = depthMapEnable;
glUniform1i(ubershaderData.depthmapEnableLoc, depthMapEnable);
}
// Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48)
// The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates
glUniform1uiv(ubershaderData.picaRegLoc, 0x200 - 0x48, &regs[0x48]);
setupUbershaderTexEnv();
}
bindTexturesToSlots(); bindTexturesToSlots();
if (gpu.fogLUTDirty) { if (gpu.fogLUTDirty) {
updateFogLUT(); updateFogLUT();
} }
@ -951,6 +904,62 @@ OpenGL::Program& RendererGL::getSpecializedShader() {
return program; return program;
} }
void RendererGL::prepareForDraw(ShaderUnit& shaderUnit, bool isImmediateMode) {
std::string vertShaderSource = PICA::ShaderGen::decompileShader(
shaderUnit.vs, *emulatorConfig, shaderUnit.vs.entrypoint, PICA::ShaderGen::API::GL, PICA::ShaderGen::Language::GLSL
);
OpenGL::Shader vert({vertShaderSource.c_str(), vertShaderSource.size()}, OpenGL::Vertex);
//triangleProgram.create({vert, frag});
std::cout << vertShaderSource << "\n";
bool usingUbershader = emulatorConfig->useUbershaders;
if (usingUbershader) {
const bool lightsEnabled = (regs[InternalRegs::LightingEnable] & 1) != 0;
const uint lightCount = (regs[InternalRegs::LightNumber] & 0x7) + 1;
// Emulating lights in the ubershader is incredibly slow, so we've got an option to render draws using moret han N lights via shadergen
// This way we generate fewer shaders overall than with full shadergen, but don't tank performance
if (emulatorConfig->forceShadergenForLights && lightsEnabled && lightCount >= emulatorConfig->lightShadergenThreshold) {
usingUbershader = false;
}
}
if (usingUbershader) {
gl.useProgram(triangleProgram);
} else {
OpenGL::Program& program = getSpecializedShader();
gl.useProgram(program);
}
// Update ubershader uniforms
if (usingUbershader) {
const float depthScale = f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32();
const float depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32();
const bool depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1;
if (oldDepthScale != depthScale) {
oldDepthScale = depthScale;
glUniform1f(ubershaderData.depthScaleLoc, depthScale);
}
if (oldDepthOffset != depthOffset) {
oldDepthOffset = depthOffset;
glUniform1f(ubershaderData.depthOffsetLoc, depthOffset);
}
if (oldDepthmapEnable != depthMapEnable) {
oldDepthmapEnable = depthMapEnable;
glUniform1i(ubershaderData.depthmapEnableLoc, depthMapEnable);
}
// Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48)
// The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates
glUniform1uiv(ubershaderData.picaRegLoc, 0x200 - 0x48, &regs[0x48]);
setupUbershaderTexEnv();
}
}
void RendererGL::screenshot(const std::string& name) { void RendererGL::screenshot(const std::string& name) {
constexpr uint width = 400; constexpr uint width = 400;
constexpr uint height = 2 * 240; constexpr uint height = 2 * 240;