HW shaders: Accelerate indexed draws

This commit is contained in:
wheremyfoodat 2024-08-25 19:13:37 +03:00
parent e925a91e40
commit 37a43e245f
3 changed files with 25 additions and 11 deletions

View file

@ -60,9 +60,15 @@ class RendererGL final : public Renderer {
float oldDepthScale = -1.0; float oldDepthScale = -1.0;
float oldDepthOffset = 0.0; float oldDepthOffset = 0.0;
bool oldDepthmapEnable = false; bool oldDepthmapEnable = false;
// Set by prepareDraw, tells us whether the current draw is using hw-accelerated shader // Set by prepareForDraw, tells us whether the current draw is using hw-accelerated shader
bool usingAcceleratedShader = false; bool usingAcceleratedShader = false;
bool performIndexedRender = false; bool performIndexedRender = false;
bool usingShortIndices = false;
// Set by prepareForDraw, metadata for indexed renders
GLuint minimumIndex = 0;
GLuint maximumIndex = 0;
void* hwIndexBufferOffset = nullptr;
// Cached pointer to the current vertex shader when using HW accelerated shaders // Cached pointer to the current vertex shader when using HW accelerated shaders
OpenGL::Shader* generatedVertexShader = nullptr; OpenGL::Shader* generatedVertexShader = nullptr;

View file

@ -141,7 +141,7 @@ void GPU::drawArrays(bool indexed) {
// Total # of vertices to render // Total # of vertices to render
const u32 vertexCount = regs[PICA::InternalRegs::VertexCountReg]; const u32 vertexCount = regs[PICA::InternalRegs::VertexCountReg];
// Note: In the hardware shader path the vertices span shouldn't actually be used as the rasterizer will perform its own attribute fetching // Note: In the hardware shader path the vertices span shouldn't actually be used as the renderer will perform its own attribute fetching
renderer->drawVertices(primType, std::span(vertices).first(vertexCount)); renderer->drawVertices(primType, std::span(vertices).first(vertexCount));
} else { } else {
const bool shaderJITEnabled = ShaderJIT::isAvailable() && config.shaderJitEnabled; const bool shaderJITEnabled = ShaderJIT::isAvailable() && config.shaderJitEnabled;

View file

@ -435,10 +435,8 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
const auto primitiveTopology = primTypes[static_cast<usize>(primType)]; const auto primitiveTopology = primTypes[static_cast<usize>(primType)];
gl.disableScissor(); gl.disableScissor();
if (usingAcceleratedShader) { // If we're using accelerated shaders, the hw VAO, VBO and EBO objects will have already been bound in prepareForDraw
hwVertexBuffer->Bind(); if (!usingAcceleratedShader) {
gl.bindVAO(hwShaderVAO);
} else {
vbo.bind(); vbo.bind();
gl.bindVAO(defaultVAO); gl.bindVAO(defaultVAO);
} }
@ -509,9 +507,12 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
OpenGL::draw(primitiveTopology, GLsizei(vertices.size())); OpenGL::draw(primitiveTopology, GLsizei(vertices.size()));
} else { } else {
if (performIndexedRender) { if (performIndexedRender) {
// When doing indexed rendering, bind the IBO and use glDrawRangeElementsBaseVertex to issue the indexed draw // When doing indexed rendering, bind the EBO and use glDrawRangeElementsBaseVertex to issue the indexed draw
hwIndexBuffer->Bind(); hwIndexBuffer->Bind();
//glDrawRangeElementsBaseVertex(); glDrawRangeElementsBaseVertex(
primitiveTopology, minimumIndex, maximumIndex, GLsizei(vertices.size()), usingShortIndices ? GL_UNSIGNED_SHORT : GL_UNSIGNED_BYTE,
hwIndexBufferOffset, -minimumIndex
);
} else { } else {
// When doing non-indexed rendering, just use glDrawArrays // When doing non-indexed rendering, just use glDrawArrays
OpenGL::draw(primitiveTopology, GLsizei(vertices.size())); OpenGL::draw(primitiveTopology, GLsizei(vertices.size()));
@ -1008,7 +1009,10 @@ bool RendererGL::prepareForDraw(ShaderUnit& shaderUnit, PICA::DrawAcceleration*
// Upload vertex data and index buffer data to our GPU // Upload vertex data and index buffer data to our GPU
accelerateVertexUpload(shaderUnit, accel); accelerateVertexUpload(shaderUnit, accel);
performIndexedRender = accel->indexed; performIndexedRender = accel->indexed;
minimumIndex = GLsizei(accel->minimumIndex);
maximumIndex = GLsizei(accel->maximumIndex);
} }
} }
@ -1146,17 +1150,21 @@ void RendererGL::accelerateVertexUpload(ShaderUnit& shaderUnit, PICA::DrawAccele
// Update index buffer if necessary // Update index buffer if necessary
if (accel->indexed) { if (accel->indexed) {
const bool shortIndex = accel->useShortIndices; usingShortIndices = accel->useShortIndices;
const usize indexBufferSize = usize(vertexCount) * (shortIndex ? sizeof(u16) : sizeof(u8)); const usize indexBufferSize = usize(vertexCount) * (usingShortIndices ? sizeof(u16) : sizeof(u8));
hwIndexBuffer->Bind();
auto indexBufferRes = hwIndexBuffer->Map(4, indexBufferSize); auto indexBufferRes = hwIndexBuffer->Map(4, indexBufferSize);
hwIndexBufferOffset = reinterpret_cast<void*>(usize(indexBufferRes.buffer_offset));
std::memcpy(indexBufferRes.pointer, accel->indexBuffer, indexBufferSize); std::memcpy(indexBufferRes.pointer, accel->indexBuffer, indexBufferSize);
hwIndexBuffer->Unmap(indexBufferSize); hwIndexBuffer->Unmap(indexBufferSize);
} }
hwVertexBuffer->Bind();
auto vertexBufferRes = hwVertexBuffer->Map(4, accel->vertexDataSize); auto vertexBufferRes = hwVertexBuffer->Map(4, accel->vertexDataSize);
u8* vertexData = static_cast<u8*>(vertexBufferRes.pointer); u8* vertexData = static_cast<u8*>(vertexBufferRes.pointer);
gl.bindVAO(hwShaderVAO); gl.bindVAO(hwShaderVAO);
for (int i = 0; i < totalAttribCount; i++) { for (int i = 0; i < totalAttribCount; i++) {