Renderer: Add prepareForDraw callback

This commit is contained in:
wheremyfoodat 2024-07-24 23:23:24 +03:00
parent 156c3031a2
commit 90f411889b
7 changed files with 81 additions and 71 deletions

View file

@ -2,10 +2,9 @@
#include "PICA/shader.hpp"
class ShaderUnit {
public:
PICAShader vs; // Vertex shader
PICAShader gs; // Geometry shader
public:
PICAShader vs; // Vertex shader
PICAShader gs; // Geometry shader
ShaderUnit() : vs(ShaderType::Vertex), gs(ShaderType::Geometry) {}
void reset();

View file

@ -21,9 +21,11 @@ enum class RendererType : s8 {
};
struct EmulatorConfig;
class GPU;
struct SDL_Window;
class GPU;
class ShaderUnit;
class Renderer {
protected:
GPU& gpu;
@ -77,7 +79,10 @@ class Renderer {
virtual std::string getUbershader() { return ""; }
virtual void setUbershader(const std::string& shader) {}
virtual void setUbershaderSetting(bool value) {}
// This function is called on every draw call before parsing vertex data.
// It is responsible for things like looking up which vertex/fragment shaders to use, recompiling them if they don't exist, choosing between
// ubershaders and shadergen, and so on.
virtual void prepareForDraw(ShaderUnit& shaderUnit, bool isImmediateMode) {}
// Functions for initializing the graphics context for the Qt frontend, where we don't have the convenience of SDL_Window
#ifdef PANDA3DS_FRONTEND_QT

View file

@ -30,7 +30,6 @@ class RendererGL final : public Renderer {
OpenGL::VertexArray vao;
OpenGL::VertexBuffer vbo;
bool enableUbershader = true;
// Data
struct {
@ -110,8 +109,7 @@ class RendererGL final : public Renderer {
virtual bool supportsShaderReload() override { return true; }
virtual std::string getUbershader() override;
virtual void setUbershader(const std::string& shader) override;
virtual void setUbershaderSetting(bool value) override { enableUbershader = value; }
virtual void prepareForDraw(ShaderUnit& shaderUnit, bool isImmediateMode) override;
std::optional<ColourBuffer> getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true);

View file

@ -117,13 +117,13 @@ void GPU::reset() {
externalRegs[Framebuffer1Config] = static_cast<u32>(PICA::ColorFmt::RGB8);
externalRegs[Framebuffer1Select] = 0;
renderer->setUbershaderSetting(config.useUbershaders);
renderer->reset();
}
// Call the correct version of drawArrays based on whether this is an indexed draw (first template parameter)
// And whether we are going to use the shader JIT (second template parameter)
void GPU::drawArrays(bool indexed) {
renderer->prepareForDraw(shaderUnit, false);
const bool shaderJITEnabled = ShaderJIT::isAvailable() && config.shaderJitEnabled;
if (indexed) {

View file

@ -249,6 +249,7 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
// If we've reached 3 verts, issue a draw call
// Handle rendering depending on the primitive type
if (immediateModeVertIndex == 3) {
renderer->prepareForDraw(shaderUnit, true);
renderer->drawVertices(PICA::PrimType::TriangleList, immediateModeVertices);
switch (primType) {

View file

@ -72,19 +72,17 @@ const Function* ShaderDecompiler::findFunction(const AddressRange& range) {
void ShaderDecompiler::writeAttributes() {
decompiledShader += R"(
layout(location = 0) in vec4 inputs[8];
layout(location = 0) in vec4 inputs[8];
layout(std140) uniform PICAShaderUniforms {
vec4 uniform_float[96];
uvec4 uniform_int;
uint uniform_bool;
};
layout(std140) uniform PICAShaderUniforms {
vec4 uniform_float[96];
uvec4 uniform_int;
uint uniform_bool;
};
vec4 temp_registers[16];
vec4 dummy_vec = vec4(0.0);
vec4 temp_registers[16];
vec4 output_registers[8];
vec4 dummy_vec = vec4(0.0);
)";
decompiledShader += "\n";
}
std::string ShaderDecompiler::decompile() {

View file

@ -4,11 +4,12 @@
#include <cmrc/cmrc.hpp>
#include "config.hpp"
#include "PICA/float_types.hpp"
#include "PICA/pica_frag_uniforms.hpp"
#include "PICA/gpu.hpp"
#include "PICA/pica_frag_uniforms.hpp"
#include "PICA/regs.hpp"
#include "PICA/shader_decompiler.hpp"
#include "config.hpp"
#include "math_util.hpp"
CMRC_DECLARE(RendererGL);
@ -409,25 +410,6 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
OpenGL::Triangle,
};
bool usingUbershader = enableUbershader;
if (usingUbershader) {
const bool lightsEnabled = (regs[InternalRegs::LightingEnable] & 1) != 0;
const uint lightCount = (regs[InternalRegs::LightNumber] & 0x7) + 1;
// Emulating lights in the ubershader is incredibly slow, so we've got an option to render draws using moret han N lights via shadergen
// This way we generate fewer shaders overall than with full shadergen, but don't tank performance
if (emulatorConfig->forceShadergenForLights && lightsEnabled && lightCount >= emulatorConfig->lightShadergenThreshold) {
usingUbershader = false;
}
}
if (usingUbershader) {
gl.useProgram(triangleProgram);
} else {
OpenGL::Program& program = getSpecializedShader();
gl.useProgram(program);
}
const auto primitiveTopology = primTypes[static_cast<usize>(primType)];
gl.disableScissor();
gl.bindVBO(vbo);
@ -449,38 +431,9 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
const int depthFunc = getBits<4, 3>(depthControl);
const int colourMask = getBits<8, 4>(depthControl);
gl.setColourMask(colourMask & 1, colourMask & 2, colourMask & 4, colourMask & 8);
static constexpr std::array<GLenum, 8> depthModes = {GL_NEVER, GL_ALWAYS, GL_EQUAL, GL_NOTEQUAL, GL_LESS, GL_LEQUAL, GL_GREATER, GL_GEQUAL};
// Update ubershader uniforms
if (usingUbershader) {
const float depthScale = f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32();
const float depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32();
const bool depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1;
if (oldDepthScale != depthScale) {
oldDepthScale = depthScale;
glUniform1f(ubershaderData.depthScaleLoc, depthScale);
}
if (oldDepthOffset != depthOffset) {
oldDepthOffset = depthOffset;
glUniform1f(ubershaderData.depthOffsetLoc, depthOffset);
}
if (oldDepthmapEnable != depthMapEnable) {
oldDepthmapEnable = depthMapEnable;
glUniform1i(ubershaderData.depthmapEnableLoc, depthMapEnable);
}
// Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48)
// The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates
glUniform1uiv(ubershaderData.picaRegLoc, 0x200 - 0x48, &regs[0x48]);
setupUbershaderTexEnv();
}
bindTexturesToSlots();
if (gpu.fogLUTDirty) {
updateFogLUT();
}
@ -951,6 +904,62 @@ OpenGL::Program& RendererGL::getSpecializedShader() {
return program;
}
void RendererGL::prepareForDraw(ShaderUnit& shaderUnit, bool isImmediateMode) {
std::string vertShaderSource = PICA::ShaderGen::decompileShader(
shaderUnit.vs, *emulatorConfig, shaderUnit.vs.entrypoint, PICA::ShaderGen::API::GL, PICA::ShaderGen::Language::GLSL
);
OpenGL::Shader vert({vertShaderSource.c_str(), vertShaderSource.size()}, OpenGL::Vertex);
//triangleProgram.create({vert, frag});
std::cout << vertShaderSource << "\n";
bool usingUbershader = emulatorConfig->useUbershaders;
if (usingUbershader) {
const bool lightsEnabled = (regs[InternalRegs::LightingEnable] & 1) != 0;
const uint lightCount = (regs[InternalRegs::LightNumber] & 0x7) + 1;
// Emulating lights in the ubershader is incredibly slow, so we've got an option to render draws using moret han N lights via shadergen
// This way we generate fewer shaders overall than with full shadergen, but don't tank performance
if (emulatorConfig->forceShadergenForLights && lightsEnabled && lightCount >= emulatorConfig->lightShadergenThreshold) {
usingUbershader = false;
}
}
if (usingUbershader) {
gl.useProgram(triangleProgram);
} else {
OpenGL::Program& program = getSpecializedShader();
gl.useProgram(program);
}
// Update ubershader uniforms
if (usingUbershader) {
const float depthScale = f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32();
const float depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32();
const bool depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1;
if (oldDepthScale != depthScale) {
oldDepthScale = depthScale;
glUniform1f(ubershaderData.depthScaleLoc, depthScale);
}
if (oldDepthOffset != depthOffset) {
oldDepthOffset = depthOffset;
glUniform1f(ubershaderData.depthOffsetLoc, depthOffset);
}
if (oldDepthmapEnable != depthMapEnable) {
oldDepthmapEnable = depthMapEnable;
glUniform1i(ubershaderData.depthmapEnableLoc, depthMapEnable);
}
// Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48)
// The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates
glUniform1uiv(ubershaderData.picaRegLoc, 0x200 - 0x48, &regs[0x48]);
setupUbershaderTexEnv();
}
}
void RendererGL::screenshot(const std::string& name) {
constexpr uint width = 400;
constexpr uint height = 2 * 240;