Hook up vertex shaders to shader cache

This commit is contained in:
wheremyfoodat 2024-07-25 04:04:41 +03:00
parent 251ff5ee49
commit 2f4c169cad
10 changed files with 256 additions and 77 deletions

View file

@ -123,27 +123,38 @@ void GPU::reset() {
// Call the correct version of drawArrays based on whether this is an indexed draw (first template parameter)
// And whether we are going to use the shader JIT (second template parameter)
void GPU::drawArrays(bool indexed) {
renderer->prepareForDraw(shaderUnit, false);
const bool shaderJITEnabled = ShaderJIT::isAvailable() && config.shaderJitEnabled;
const bool hwShaders = renderer->prepareForDraw(shaderUnit, false);
if (indexed) {
if (shaderJITEnabled)
drawArrays<true, true>();
else
drawArrays<true, false>();
if (hwShaders) {
if (indexed) {
drawArrays<true, ShaderExecMode::Hardware>();
} else {
drawArrays<false, ShaderExecMode::Hardware>();
}
} else {
if (shaderJITEnabled)
drawArrays<false, true>();
else
drawArrays<false, false>();
const bool shaderJITEnabled = ShaderJIT::isAvailable() && config.shaderJitEnabled;
if (indexed) {
if (shaderJITEnabled) {
drawArrays<true, ShaderExecMode::JIT>();
} else {
drawArrays<true, ShaderExecMode::Interpreter>();
}
} else {
if (shaderJITEnabled) {
drawArrays<false, ShaderExecMode::JIT>();
} else {
drawArrays<false, ShaderExecMode::Interpreter>();
}
}
}
}
static std::array<PICA::Vertex, Renderer::vertexBufferSize> vertices;
template <bool indexed, bool useShaderJIT>
template <bool indexed, ShaderExecMode mode>
void GPU::drawArrays() {
if constexpr (useShaderJIT) {
if constexpr (mode == ShaderExecMode::JIT) {
shaderJIT.prepare(shaderUnit.vs);
}
@ -322,29 +333,38 @@ void GPU::drawArrays() {
}
}
// Before running the shader, the PICA maps the fetched attributes from the attribute registers to the shader input registers
// Based on the SH_ATTRIBUTES_PERMUTATION registers.
// Ie it might attribute #0 to v2, #1 to v7, etc
for (int j = 0; j < totalAttribCount; j++) {
const u32 mapping = (inputAttrCfg >> (j * 4)) & 0xf;
std::memcpy(&shaderUnit.vs.inputs[mapping], &currentAttributes[j], sizeof(vec4f));
}
// Running shader on the CPU instead of the GPU
if constexpr (mode == ShaderExecMode::Interpreter || mode == ShaderExecMode::JIT) {
// Before running the shader, the PICA maps the fetched attributes from the attribute registers to the shader input registers
// Based on the SH_ATTRIBUTES_PERMUTATION registers.
// Ie it might map attribute #0 to v2, #1 to v7, etc
for (int j = 0; j < totalAttribCount; j++) {
const u32 mapping = (inputAttrCfg >> (j * 4)) & 0xf;
std::memcpy(&shaderUnit.vs.inputs[mapping], &currentAttributes[j], sizeof(vec4f));
}
if constexpr (useShaderJIT) {
shaderJIT.run(shaderUnit.vs);
} else {
shaderUnit.vs.run();
}
if constexpr (mode == ShaderExecMode::JIT) {
shaderJIT.run(shaderUnit.vs);
} else {
shaderUnit.vs.run();
}
PICA::Vertex& out = vertices[i];
// Map shader outputs to fixed function properties
const u32 totalShaderOutputs = regs[PICA::InternalRegs::ShaderOutputCount] & 7;
for (int i = 0; i < totalShaderOutputs; i++) {
const u32 config = regs[PICA::InternalRegs::ShaderOutmap0 + i];
PICA::Vertex& out = vertices[i];
// Map shader outputs to fixed function properties
const u32 totalShaderOutputs = regs[PICA::InternalRegs::ShaderOutputCount] & 7;
for (int i = 0; i < totalShaderOutputs; i++) {
const u32 config = regs[PICA::InternalRegs::ShaderOutmap0 + i];
for (int j = 0; j < 4; j++) { // pls unroll
const u32 mapping = (config >> (j * 8)) & 0x1F;
out.raw[mapping] = vsOutputRegisters[i][j];
for (int j = 0; j < 4; j++) { // pls unroll
const u32 mapping = (config >> (j * 8)) & 0x1F;
out.raw[mapping] = vsOutputRegisters[i][j];
}
}
} else { // Using hw shaders and running the shader on the CPU, just write the inputs to the attribute buffer directly
PICA::Vertex& out = vertices[i];
for (int j = 0; j < totalAttribCount; j++) {
const u32 mapping = (inputAttrCfg >> (j * 4)) & 0xf;
std::memcpy(&out.raw[mapping], &currentAttributes[j], sizeof(vec4f));
}
}
}

View file

@ -72,11 +72,6 @@ std::string FragmentGenerator::getDefaultVertexShader() {
out float gl_ClipDistance[2];
#endif
vec4 abgr8888ToVec4(uint abgr) {
const float scale = 1.0 / 255.0;
return scale * vec4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24));
}
void main() {
gl_Position = a_coords;
vec4 colourAbs = abs(a_vertexColour);
@ -677,4 +672,58 @@ void FragmentGenerator::compileFog(std::string& shader, const PICA::FragmentConf
shader += "vec2 value = texelFetch(u_tex_luts, ivec2(int(clamped_index), 24), 0).rg;"; // fog LUT is past the light LUTs
shader += "float fog_factor = clamp(value.r + value.g * delta, 0.0, 1.0);";
shader += "combinerOutput.rgb = mix(fog_color, combinerOutput.rgb, fog_factor);";
}
std::string FragmentGenerator::getVertexShaderAccelerated(const std::string& picaSource, bool usingUbershader) {
if (usingUbershader) {
Helpers::panic("Unimplemented: GetVertexShaderAccelerated for ubershader");
return picaSource;
} else {
// TODO: Uniforms and don't hardcode fixed-function semantic indices...
std::string ret = picaSource;
if (api == API::GLES) {
ret += "\n#define USING_GLES\n";
}
ret += R"(
out vec4 v_quaternion;
out vec4 v_colour;
out vec3 v_texcoord0;
out vec2 v_texcoord1;
out vec3 v_view;
out vec2 v_texcoord2;
#ifndef USING_GLES
out float gl_ClipDistance[2];
#endif
void main() {
pica_shader_main();
vec4 a_coords = output_registers[0];
vec4 a_vertexColour = output_registers[1];
vec2 a_texcoord0 = output_registers[2].xy;
float a_texcoord0_w = output_registers[2].w;
vec2 a_texcoord1 = output_registers[3].xy;
vec2 a_texcoord2 = output_registers[4].xy;
vec3 a_view = output_registers[5].xyz;
vec4 a_quaternion = output_registers[6];
gl_Position = a_coords;
vec4 colourAbs = abs(a_vertexColour);
v_colour = min(colourAbs, vec4(1.f));
v_texcoord0 = vec3(a_texcoord0.x, 1.0 - a_texcoord0.y, a_texcoord0_w);
v_texcoord1 = vec2(a_texcoord1.x, 1.0 - a_texcoord1.y);
v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y);
v_view = a_view;
v_quaternion = a_quaternion;
#ifndef USING_GLES
//gl_ClipDistance[0] = -a_coords.z;
//gl_ClipDistance[1] = dot(clipCoords, a_coords);
#endif
})";
return ret;
}
}

View file

@ -25,7 +25,7 @@ void RendererGL::reset() {
colourBufferCache.reset();
textureCache.reset();
clearShaderCache();
shaderCache.clear();
// Init the colour/depth buffer settings to some random defaults on reset
colourBufferLoc = 0;
@ -788,18 +788,24 @@ OpenGL::Program& RendererGL::getSpecializedShader() {
PICA::FragmentConfig fsConfig(regs);
CachedProgram& programEntry = shaderCache[fsConfig];
OpenGL::Shader& fragShader = shaderCache.fragmentShaderCache[fsConfig];
if (!fragShader.exists()) {
std::string fs = fragShaderGen.generate(fsConfig);
fragShader.create({fs.c_str(), fs.size()}, OpenGL::Fragment);
}
// Get the handle of the current vertex shader
OpenGL::Shader& vertexShader = usingAcceleratedShader ? *generatedVertexShader : defaultShadergenVs;
// And form the key for looking up a shader program
const u64 programKey = (u64(vertexShader.handle()) << 32) | u64(fragShader.handle());
CachedProgram& programEntry = shaderCache.programCache[programKey];
OpenGL::Program& program = programEntry.program;
if (!program.exists()) {
std::string fs = fragShaderGen.generate(fsConfig);
OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment);
program.create({defaultShadergenVs, fragShader});
program.create({vertexShader, fragShader});
gl.useProgram(program);
fragShader.free();
// Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3
glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0);
glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1);
@ -904,15 +910,8 @@ OpenGL::Program& RendererGL::getSpecializedShader() {
return program;
}
void RendererGL::prepareForDraw(ShaderUnit& shaderUnit, bool isImmediateMode) {
std::string vertShaderSource = PICA::ShaderGen::decompileShader(
shaderUnit.vs, *emulatorConfig, shaderUnit.vs.entrypoint, PICA::ShaderGen::API::GL, PICA::ShaderGen::Language::GLSL
);
OpenGL::Shader vert({vertShaderSource.c_str(), vertShaderSource.size()}, OpenGL::Vertex);
//triangleProgram.create({vert, frag});
std::cout << vertShaderSource << "\n";
bool RendererGL::prepareForDraw(ShaderUnit& shaderUnit, bool isImmediateMode) {
// First we figure out if we will be using an ubershader
bool usingUbershader = emulatorConfig->useUbershaders;
if (usingUbershader) {
const bool lightsEnabled = (regs[InternalRegs::LightingEnable] & 1) != 0;
@ -925,6 +924,46 @@ void RendererGL::prepareForDraw(ShaderUnit& shaderUnit, bool isImmediateMode) {
}
}
// Then we figure out if we will use hw accelerated shaders, and try to fetch our shader
// TODO: Ubershader support for accelerated shaders
usingAcceleratedShader = emulatorConfig->accelerateShaders && !isImmediateMode && !usingUbershader;
if (usingAcceleratedShader) {
auto shaderCodeHash = shaderUnit.vs.getCodeHash();
auto opdescHash = shaderUnit.vs.getOpdescHash();
auto vertexConfig = PICA::VertConfig{
.shaderHash = shaderCodeHash,
.opdescHash = opdescHash,
.entrypoint = shaderUnit.vs.entrypoint,
.usingUbershader = usingUbershader,
};
std::optional<OpenGL::Shader>& shader = shaderCache.vertexShaderCache[vertexConfig];
// If the optional is false, we have never tried to recompile the shader before. Try to recompile it and see if it works.
if (!shader.has_value()) {
// Initialize shader to a "null" shader (handle == 0)
*shader = OpenGL::Shader();
std::string picaShaderSource = PICA::ShaderGen::decompileShader(
shaderUnit.vs, *emulatorConfig, shaderUnit.vs.entrypoint, PICA::ShaderGen::API::GL, PICA::ShaderGen::Language::GLSL
);
// Empty source means compilation error, if the source is not empty then we convert the rcompiled PICA code into a valid shader and upload
// it to the GPU
if (!picaShaderSource.empty()) {
std::string vertexShaderSource = fragShaderGen.getVertexShaderAccelerated(picaShaderSource, usingUbershader);
shader->create({vertexShaderSource}, OpenGL::Vertex);
}
}
// Shader generation did not work out, so set usingAcceleratedShader to false
if (!shader->exists()) {
usingAcceleratedShader = false;
} else {
generatedVertexShader = &(*shader);
}
}
if (usingUbershader) {
gl.useProgram(triangleProgram);
} else {
@ -958,6 +997,8 @@ void RendererGL::prepareForDraw(ShaderUnit& shaderUnit, bool isImmediateMode) {
glUniform1uiv(ubershaderData.picaRegLoc, 0x200 - 0x48, &regs[0x48]);
setupUbershaderTexEnv();
}
return usingAcceleratedShader;
}
void RendererGL::screenshot(const std::string& name) {
@ -985,22 +1026,12 @@ void RendererGL::screenshot(const std::string& name) {
stbi_write_png(name.c_str(), width, height, 4, flippedPixels.data(), 0);
}
void RendererGL::clearShaderCache() {
for (auto& shader : shaderCache) {
CachedProgram& cachedProgram = shader.second;
cachedProgram.program.free();
glDeleteBuffers(1, &cachedProgram.uboBinding);
}
shaderCache.clear();
}
void RendererGL::deinitGraphicsContext() {
// Invalidate all surface caches since they'll no longer be valid
textureCache.reset();
depthBufferCache.reset();
colourBufferCache.reset();
clearShaderCache();
shaderCache.clear();
// All other GL objects should be invalidated automatically and be recreated by the next call to initGraphicsContext
// TODO: Make it so that depth and colour buffers get written back to 3DS memory
@ -1048,4 +1079,4 @@ void RendererGL::initUbershader(OpenGL::Program& program) {
glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1);
glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2);
glUniform1i(OpenGL::uniformLocation(program, "u_tex_luts"), 3);
}
}