mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-06-07 19:41:38 +12:00
GPU: Add optimized NEON path for analyzing index buffers (#613)
* Implement ARM NEON index buffer analysis * NEON: Fix initial index buffer minima/maxima * NEON: Fix vertex count comparison for index buffer analysis * GPU: Add SSE4.1 path for index buffer analysis * Fix oopsie * Fix oopsie, again
This commit is contained in:
parent
af1fe13996
commit
5d28f11ccf
3 changed files with 268 additions and 18 deletions
|
@ -1,16 +1,17 @@
|
|||
#include "PICA/draw_acceleration.hpp"
|
||||
|
||||
#include <bit>
|
||||
#include <limits>
|
||||
#include <tuple>
|
||||
|
||||
#include "PICA/gpu.hpp"
|
||||
#include "PICA/pica_simd.hpp"
|
||||
#include "PICA/regs.hpp"
|
||||
|
||||
void GPU::getAcceleratedDrawInfo(PICA::DrawAcceleration& accel, bool indexed) {
|
||||
accel.indexed = indexed;
|
||||
accel.totalAttribCount = totalAttribCount;
|
||||
accel.enabledAttributeMask = 0;
|
||||
|
||||
|
||||
const u32 vertexBase = ((regs[PICA::InternalRegs::VertexAttribLoc] >> 1) & 0xfffffff) * 16;
|
||||
const u32 vertexCount = regs[PICA::InternalRegs::VertexCountReg]; // Total # of vertices to transfer
|
||||
|
||||
|
@ -27,24 +28,12 @@ void GPU::getAcceleratedDrawInfo(PICA::DrawAcceleration& accel, bool indexed) {
|
|||
|
||||
// Calculate the minimum and maximum indices used in the index buffer, so we'll only upload them
|
||||
if (accel.useShortIndices) {
|
||||
u16* indexBuffer16 = reinterpret_cast<u16*>(indexBuffer);
|
||||
|
||||
for (int i = 0; i < vertexCount; i++) {
|
||||
u16 index = indexBuffer16[i];
|
||||
minimumIndex = std::min(minimumIndex, index);
|
||||
maximumIndex = std::max(maximumIndex, index);
|
||||
}
|
||||
std::tie(accel.minimumIndex, accel.maximumIndex) = PICA::IndexBuffer::analyze<true>(indexBuffer, vertexCount);
|
||||
} else {
|
||||
for (int i = 0; i < vertexCount; i++) {
|
||||
u16 index = u16(indexBuffer[i]);
|
||||
minimumIndex = std::min(minimumIndex, index);
|
||||
maximumIndex = std::max(maximumIndex, index);
|
||||
}
|
||||
std::tie(accel.minimumIndex, accel.maximumIndex) = PICA::IndexBuffer::analyze<false>(indexBuffer, vertexCount);
|
||||
}
|
||||
|
||||
accel.indexBuffer = indexBuffer;
|
||||
accel.minimumIndex = minimumIndex;
|
||||
accel.maximumIndex = maximumIndex;
|
||||
} else {
|
||||
accel.indexBuffer = nullptr;
|
||||
accel.minimumIndex = regs[PICA::InternalRegs::VertexOffsetReg];
|
||||
|
@ -76,7 +65,7 @@ void GPU::getAcceleratedDrawInfo(PICA::DrawAcceleration& accel, bool indexed) {
|
|||
|
||||
// Add it to the total vertex data size, aligned to 4 bytes.
|
||||
accel.vertexDataSize += (bytes + 3) & ~3;
|
||||
|
||||
|
||||
// Get a pointer to the data where this loader's data is stored
|
||||
const u32 loaderAddress = vertexBase + loaderData.offset + (accel.minimumIndex * loaderData.size);
|
||||
loader.data = getPointerPhys<u8>(loaderAddress);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue