From 37a43e245f2e901d46c8cacf948d8909c1d343a5 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 25 Aug 2024 19:13:37 +0300 Subject: [PATCH] HW shaders: Accelerate indexed draws --- include/renderer_gl/renderer_gl.hpp | 8 +++++++- src/core/PICA/gpu.cpp | 2 +- src/core/renderer_gl/renderer_gl.cpp | 26 +++++++++++++++++--------- 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index b643534a..30b17026 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -60,9 +60,15 @@ class RendererGL final : public Renderer { float oldDepthScale = -1.0; float oldDepthOffset = 0.0; bool oldDepthmapEnable = false; - // Set by prepareDraw, tells us whether the current draw is using hw-accelerated shader + // Set by prepareForDraw, tells us whether the current draw is using hw-accelerated shader bool usingAcceleratedShader = false; bool performIndexedRender = false; + bool usingShortIndices = false; + + // Set by prepareForDraw, metadata for indexed renders + GLuint minimumIndex = 0; + GLuint maximumIndex = 0; + void* hwIndexBufferOffset = nullptr; // Cached pointer to the current vertex shader when using HW accelerated shaders OpenGL::Shader* generatedVertexShader = nullptr; diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index dad24a22..2797e09f 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -141,7 +141,7 @@ void GPU::drawArrays(bool indexed) { // Total # of vertices to render const u32 vertexCount = regs[PICA::InternalRegs::VertexCountReg]; - // Note: In the hardware shader path the vertices span shouldn't actually be used as the rasterizer will perform its own attribute fetching + // Note: In the hardware shader path the vertices span shouldn't actually be used as the renderer will perform its own attribute fetching renderer->drawVertices(primType, std::span(vertices).first(vertexCount)); } else { const bool shaderJITEnabled = ShaderJIT::isAvailable() && config.shaderJitEnabled; diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 82248d53..536cb6fa 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -435,10 +435,8 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v const auto primitiveTopology = primTypes[static_cast(primType)]; gl.disableScissor(); - if (usingAcceleratedShader) { - hwVertexBuffer->Bind(); - gl.bindVAO(hwShaderVAO); - } else { + // If we're using accelerated shaders, the hw VAO, VBO and EBO objects will have already been bound in prepareForDraw + if (!usingAcceleratedShader) { vbo.bind(); gl.bindVAO(defaultVAO); } @@ -509,9 +507,12 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v OpenGL::draw(primitiveTopology, GLsizei(vertices.size())); } else { if (performIndexedRender) { - // When doing indexed rendering, bind the IBO and use glDrawRangeElementsBaseVertex to issue the indexed draw + // When doing indexed rendering, bind the EBO and use glDrawRangeElementsBaseVertex to issue the indexed draw hwIndexBuffer->Bind(); - //glDrawRangeElementsBaseVertex(); + glDrawRangeElementsBaseVertex( + primitiveTopology, minimumIndex, maximumIndex, GLsizei(vertices.size()), usingShortIndices ? GL_UNSIGNED_SHORT : GL_UNSIGNED_BYTE, + hwIndexBufferOffset, -minimumIndex + ); } else { // When doing non-indexed rendering, just use glDrawArrays OpenGL::draw(primitiveTopology, GLsizei(vertices.size())); @@ -1008,7 +1009,10 @@ bool RendererGL::prepareForDraw(ShaderUnit& shaderUnit, PICA::DrawAcceleration* // Upload vertex data and index buffer data to our GPU accelerateVertexUpload(shaderUnit, accel); + performIndexedRender = accel->indexed; + minimumIndex = GLsizei(accel->minimumIndex); + maximumIndex = GLsizei(accel->maximumIndex); } } @@ -1146,17 +1150,21 @@ void RendererGL::accelerateVertexUpload(ShaderUnit& shaderUnit, PICA::DrawAccele // Update index buffer if necessary if (accel->indexed) { - const bool shortIndex = accel->useShortIndices; - const usize indexBufferSize = usize(vertexCount) * (shortIndex ? sizeof(u16) : sizeof(u8)); + usingShortIndices = accel->useShortIndices; + const usize indexBufferSize = usize(vertexCount) * (usingShortIndices ? sizeof(u16) : sizeof(u8)); + hwIndexBuffer->Bind(); auto indexBufferRes = hwIndexBuffer->Map(4, indexBufferSize); + hwIndexBufferOffset = reinterpret_cast(usize(indexBufferRes.buffer_offset)); + std::memcpy(indexBufferRes.pointer, accel->indexBuffer, indexBufferSize); hwIndexBuffer->Unmap(indexBufferSize); } + hwVertexBuffer->Bind(); auto vertexBufferRes = hwVertexBuffer->Map(4, accel->vertexDataSize); - u8* vertexData = static_cast(vertexBufferRes.pointer); + gl.bindVAO(hwShaderVAO); for (int i = 0; i < totalAttribCount; i++) {