GPU: Add optimized NEON path for analyzing index buffers (#613)

* Implement ARM NEON index buffer analysis

* NEON: Fix initial index buffer minima/maxima

* NEON: Fix vertex count comparison for index buffer analysis

* GPU: Add SSE4.1 path for index buffer analysis

* Fix oopsie

* Fix oopsie, again
This commit is contained in:
wheremyfoodat 2024-10-20 20:02:02 +03:00 committed by GitHub
parent af1fe13996
commit 5d28f11ccf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 268 additions and 18 deletions

View file

@ -1,16 +1,17 @@
#include "PICA/draw_acceleration.hpp"
#include <bit>
#include <limits>
#include <tuple>
#include "PICA/gpu.hpp"
#include "PICA/pica_simd.hpp"
#include "PICA/regs.hpp"
void GPU::getAcceleratedDrawInfo(PICA::DrawAcceleration& accel, bool indexed) {
accel.indexed = indexed;
accel.totalAttribCount = totalAttribCount;
accel.enabledAttributeMask = 0;
const u32 vertexBase = ((regs[PICA::InternalRegs::VertexAttribLoc] >> 1) & 0xfffffff) * 16;
const u32 vertexCount = regs[PICA::InternalRegs::VertexCountReg]; // Total # of vertices to transfer
@ -27,24 +28,12 @@ void GPU::getAcceleratedDrawInfo(PICA::DrawAcceleration& accel, bool indexed) {
// Calculate the minimum and maximum indices used in the index buffer, so we'll only upload them
if (accel.useShortIndices) {
u16* indexBuffer16 = reinterpret_cast<u16*>(indexBuffer);
for (int i = 0; i < vertexCount; i++) {
u16 index = indexBuffer16[i];
minimumIndex = std::min(minimumIndex, index);
maximumIndex = std::max(maximumIndex, index);
}
std::tie(accel.minimumIndex, accel.maximumIndex) = PICA::IndexBuffer::analyze<true>(indexBuffer, vertexCount);
} else {
for (int i = 0; i < vertexCount; i++) {
u16 index = u16(indexBuffer[i]);
minimumIndex = std::min(minimumIndex, index);
maximumIndex = std::max(maximumIndex, index);
}
std::tie(accel.minimumIndex, accel.maximumIndex) = PICA::IndexBuffer::analyze<false>(indexBuffer, vertexCount);
}
accel.indexBuffer = indexBuffer;
accel.minimumIndex = minimumIndex;
accel.maximumIndex = maximumIndex;
} else {
accel.indexBuffer = nullptr;
accel.minimumIndex = regs[PICA::InternalRegs::VertexOffsetReg];
@ -76,7 +65,7 @@ void GPU::getAcceleratedDrawInfo(PICA::DrawAcceleration& accel, bool indexed) {
// Add it to the total vertex data size, aligned to 4 bytes.
accel.vertexDataSize += (bytes + 3) & ~3;
// Get a pointer to the data where this loader's data is stored
const u32 loaderAddress = vertexBase + loaderData.offset + (accel.minimumIndex * loaderData.size);
loader.data = getPointerPhys<u8>(loaderAddress);