HW shaders: Accelerate indexed draws

This commit is contained in:
wheremyfoodat 2024-08-25 19:13:37 +03:00
parent e925a91e40
commit 37a43e245f
3 changed files with 25 additions and 11 deletions

View file

@ -60,9 +60,15 @@ class RendererGL final : public Renderer {
float oldDepthScale = -1.0;
float oldDepthOffset = 0.0;
bool oldDepthmapEnable = false;
// Set by prepareDraw, tells us whether the current draw is using hw-accelerated shader
// Set by prepareForDraw, tells us whether the current draw is using hw-accelerated shader
bool usingAcceleratedShader = false;
bool performIndexedRender = false;
bool usingShortIndices = false;
// Set by prepareForDraw, metadata for indexed renders
GLuint minimumIndex = 0;
GLuint maximumIndex = 0;
void* hwIndexBufferOffset = nullptr;
// Cached pointer to the current vertex shader when using HW accelerated shaders
OpenGL::Shader* generatedVertexShader = nullptr;

View file

@ -141,7 +141,7 @@ void GPU::drawArrays(bool indexed) {
// Total # of vertices to render
const u32 vertexCount = regs[PICA::InternalRegs::VertexCountReg];
// Note: In the hardware shader path the vertices span shouldn't actually be used as the rasterizer will perform its own attribute fetching
// Note: In the hardware shader path the vertices span shouldn't actually be used as the renderer will perform its own attribute fetching
renderer->drawVertices(primType, std::span(vertices).first(vertexCount));
} else {
const bool shaderJITEnabled = ShaderJIT::isAvailable() && config.shaderJitEnabled;

View file

@ -435,10 +435,8 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
const auto primitiveTopology = primTypes[static_cast<usize>(primType)];
gl.disableScissor();
if (usingAcceleratedShader) {
hwVertexBuffer->Bind();
gl.bindVAO(hwShaderVAO);
} else {
// If we're using accelerated shaders, the hw VAO, VBO and EBO objects will have already been bound in prepareForDraw
if (!usingAcceleratedShader) {
vbo.bind();
gl.bindVAO(defaultVAO);
}
@ -509,9 +507,12 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
OpenGL::draw(primitiveTopology, GLsizei(vertices.size()));
} else {
if (performIndexedRender) {
// When doing indexed rendering, bind the IBO and use glDrawRangeElementsBaseVertex to issue the indexed draw
// When doing indexed rendering, bind the EBO and use glDrawRangeElementsBaseVertex to issue the indexed draw
hwIndexBuffer->Bind();
//glDrawRangeElementsBaseVertex();
glDrawRangeElementsBaseVertex(
primitiveTopology, minimumIndex, maximumIndex, GLsizei(vertices.size()), usingShortIndices ? GL_UNSIGNED_SHORT : GL_UNSIGNED_BYTE,
hwIndexBufferOffset, -minimumIndex
);
} else {
// When doing non-indexed rendering, just use glDrawArrays
OpenGL::draw(primitiveTopology, GLsizei(vertices.size()));
@ -1008,7 +1009,10 @@ bool RendererGL::prepareForDraw(ShaderUnit& shaderUnit, PICA::DrawAcceleration*
// Upload vertex data and index buffer data to our GPU
accelerateVertexUpload(shaderUnit, accel);
performIndexedRender = accel->indexed;
minimumIndex = GLsizei(accel->minimumIndex);
maximumIndex = GLsizei(accel->maximumIndex);
}
}
@ -1146,17 +1150,21 @@ void RendererGL::accelerateVertexUpload(ShaderUnit& shaderUnit, PICA::DrawAccele
// Update index buffer if necessary
if (accel->indexed) {
const bool shortIndex = accel->useShortIndices;
const usize indexBufferSize = usize(vertexCount) * (shortIndex ? sizeof(u16) : sizeof(u8));
usingShortIndices = accel->useShortIndices;
const usize indexBufferSize = usize(vertexCount) * (usingShortIndices ? sizeof(u16) : sizeof(u8));
hwIndexBuffer->Bind();
auto indexBufferRes = hwIndexBuffer->Map(4, indexBufferSize);
hwIndexBufferOffset = reinterpret_cast<void*>(usize(indexBufferRes.buffer_offset));
std::memcpy(indexBufferRes.pointer, accel->indexBuffer, indexBufferSize);
hwIndexBuffer->Unmap(indexBufferSize);
}
hwVertexBuffer->Bind();
auto vertexBufferRes = hwVertexBuffer->Map(4, accel->vertexDataSize);
u8* vertexData = static_cast<u8*>(vertexBufferRes.pointer);
gl.bindVAO(hwShaderVAO);
for (int i = 0; i < totalAttribCount; i++) {