diff --git a/include/PICA/draw_acceleration.hpp b/include/PICA/draw_acceleration.hpp index 1671825e..72eb8944 100644 --- a/include/PICA/draw_acceleration.hpp +++ b/include/PICA/draw_acceleration.hpp @@ -14,6 +14,7 @@ namespace PICA { u32 size; u32 stride; + u8 inputReg; // Which input reg should this attribute go to in the vertex shader? u8 type; u8 componentCount; bool fixed; @@ -27,6 +28,7 @@ namespace PICA { // Minimum and maximum index in the index buffer for a draw call u16 minimumIndex, maximumIndex; u32 totalAttribCount; + u32 enabledAttributeMask; u32 vertexDataSize; std::array attributeInfo; diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 30b17026..137c4889 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -70,6 +70,9 @@ class RendererGL final : public Renderer { GLuint maximumIndex = 0; void* hwIndexBufferOffset = nullptr; + // When doing hw shaders, we cache which attributes are enabled in our VAO to avoid having to enable/disable all attributes on each draw + u32 previousAttributeMask = 0; + // Cached pointer to the current vertex shader when using HW accelerated shaders OpenGL::Shader* generatedVertexShader = nullptr; diff --git a/src/core/PICA/draw_acceleration.cpp b/src/core/PICA/draw_acceleration.cpp index 7646577f..538a714e 100644 --- a/src/core/PICA/draw_acceleration.cpp +++ b/src/core/PICA/draw_acceleration.cpp @@ -8,7 +8,8 @@ void GPU::getAcceleratedDrawInfo(PICA::DrawAcceleration& accel, bool indexed) { accel.indexed = indexed; accel.totalAttribCount = totalAttribCount; - + accel.enabledAttributeMask = 0; + const u32 vertexBase = ((regs[PICA::InternalRegs::VertexAttribLoc] >> 1) & 0xfffffff) * 16; const u32 vertexCount = regs[PICA::InternalRegs::VertexCountReg]; // Total # of vertices to transfer @@ -50,6 +51,8 @@ void GPU::getAcceleratedDrawInfo(PICA::DrawAcceleration& accel, bool indexed) { } const u64 vertexCfg = u64(regs[PICA::InternalRegs::AttribFormatLow]) | (u64(regs[PICA::InternalRegs::AttribFormatHigh]) << 32); + const u64 inputAttrCfg = getVertexShaderInputConfig(); + u32 buffer = 0; u32 attrCount = 0; accel.vertexDataSize = 0; @@ -94,7 +97,11 @@ void GPU::getAcceleratedDrawInfo(PICA::DrawAcceleration& accel, bool indexed) { // Size of each component based on the attribute type static constexpr u32 sizePerComponent[4] = {1, 1, 2, 4}; + const u32 inputReg = (inputAttrCfg >> (attrCount * 4)) & 0xf; + // Mark the attribute as enabled + accel.enabledAttributeMask |= 1 << inputReg; + attr.inputReg = inputReg; attr.componentCount = size; attr.offset = attributeOffset; attr.size = size * sizePerComponent[attribType]; @@ -123,6 +130,9 @@ void GPU::getAcceleratedDrawInfo(PICA::DrawAcceleration& accel, bool indexed) { attr.fixedValue[i] = fixedAttr[i].toFloat32(); } + const u32 inputReg = (inputAttrCfg >> (attrCount * 4)) & 0xf; + + attr.inputReg = inputReg; attrCount += 1; } } diff --git a/src/core/PICA/shader_decompiler.cpp b/src/core/PICA/shader_decompiler.cpp index 347df5c5..ead98410 100644 --- a/src/core/PICA/shader_decompiler.cpp +++ b/src/core/PICA/shader_decompiler.cpp @@ -79,7 +79,7 @@ ExitMode ControlFlow::analyzeFunction(const PICAShader& shader, u32 start, u32 e // This opens up 2 parallel paths of execution auto branchTakenExit = analyzeFunction(shader, dest, end, labels); - auto branchNotTakenExit = analyzeFunction(shader, pc + 1, dest, labels); + auto branchNotTakenExit = analyzeFunction(shader, pc + 1, end, labels); it->second = exitParallel(branchTakenExit, branchNotTakenExit); return it->second; } @@ -122,6 +122,7 @@ ExitMode ControlFlow::analyzeFunction(const PICAShader& shader, u32 start, u32 e } break; } + case ShaderOpcodes::CALL: { const u32 num = instruction & 0xff; const u32 dest = getBits<10, 12>(instruction); diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index affe9837..8fc2b126 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -778,8 +778,6 @@ void main() { gl_ClipDistance[1] = dot(clipCoords, a_coords); #endif })"; - - std::cout << ret << "\n"; return ret; } } diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 536cb6fa..3d011955 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -2,6 +2,7 @@ #include +#include #include #include "PICA/float_types.hpp" @@ -987,7 +988,7 @@ bool RendererGL::prepareForDraw(ShaderUnit& shaderUnit, PICA::DrawAcceleration* shaderUnit.vs, *emulatorConfig, shaderUnit.vs.entrypoint, PICA::ShaderGen::API::GL, PICA::ShaderGen::Language::GLSL ); - // Empty source means compilation error, if the source is not empty then we convert the rcompiled PICA code into a valid shader and upload + // Empty source means compilation error, if the source is not empty then we convert the recompiled PICA code into a valid shader and upload // it to the GPU if (!picaShaderSource.empty()) { std::string vertexShaderSource = fragShaderGen.getVertexShaderAccelerated(picaShaderSource, vertexConfig, usingUbershader); @@ -1167,24 +1168,49 @@ void RendererGL::accelerateVertexUpload(ShaderUnit& shaderUnit, PICA::DrawAccele gl.bindVAO(hwShaderVAO); + // Enable or disable vertex attributes as needed + const u32 currentAttributeMask = accel->enabledAttributeMask; + // Use bitwise xor to calculate which attributes chanced + u32 attributeMaskDiff = currentAttributeMask ^ previousAttributeMask; + + while (attributeMaskDiff != 0) { + // Get index of next different attribute and turn it off + const u32 index = 31 - std::countl_zero(attributeMaskDiff); + const u32 mask = 1u << index; + attributeMaskDiff ^= mask; + + if ((currentAttributeMask & mask) != 0) { + // Attribute was disabled and is now enabled + hwShaderVAO.enableAttribute(index); + } else { + // Attribute was enabled and is now disabled + hwShaderVAO.disableAttribute(index); + } + } + + previousAttributeMask = currentAttributeMask; + for (int i = 0; i < totalAttribCount; i++) { const auto& attrib = accel->attributeInfo[i]; - + if (attrib.fixed) { - Helpers::panic("Fixed attribute!"); + if ((currentAttributeMask & (1u << i)) == 0) { + glVertexAttrib4f(attrib.inputReg, attrib.fixedValue[0], attrib.fixedValue[1], attrib.fixedValue[2], attrib.fixedValue[3]); + } } else { - if (attrib.isPadding) { + if (attrib.isPadding) [[unlikely]] { continue; } - - glVertexAttribPointer(i, attrib.componentCount, attributeFormats[attrib.type], GL_FALSE, attrib.stride, reinterpret_cast(vertexBufferRes.buffer_offset + attrib.offset)); - // TODO: Disable unused attributes as well - hwShaderVAO.enableAttribute(i); - + const u32 attributeSize = attrib.size * vertexCount; std::memcpy(vertexData, attrib.data, attributeSize); - + vertexData += attributeSize; + + glVertexAttribPointer( + attrib.inputReg, attrib.componentCount, attributeFormats[attrib.type], GL_FALSE, attrib.stride, + reinterpret_cast(vertexBufferRes.buffer_offset + attrib.offset) + ); } }