Merge pull request #501 from wheremyfoodat/capstone

Implement GPUREG_VSH_OUTMAP_MASK
This commit is contained in:
wheremyfoodat 2024-04-29 20:18:57 +00:00 committed by GitHub
commit 8e303d8d08
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 41 additions and 2 deletions

View file

@ -6,6 +6,7 @@
#include "PICA/pica_vertex.hpp"
#include "PICA/regs.hpp"
#include "PICA/shader_unit.hpp"
#include "compiler_builtins.hpp"
#include "config.hpp"
#include "helpers.hpp"
#include "logger.hpp"
@ -35,6 +36,12 @@ class GPU {
std::array<vec4f, 16> immediateModeAttributes; // Vertex attributes uploaded via immediate mode submission
std::array<PICA::Vertex, 3> immediateModeVertices;
// Pointers for the output registers as arranged after GPUREG_VSH_OUTMAP_MASK is applied
std::array<Floats::f24*, 16> vsOutputRegisters;
// Previous value for GPUREG_VSH_OUTMAP_MASK
u32 oldVsOutputMask;
uint immediateModeVertIndex;
uint immediateModeAttrIndex; // Index of the immediate mode attribute we're uploading
@ -167,4 +174,28 @@ class GPU {
// We have them in the end of the struct for cache locality reasons. Tl;dr we want the more commonly used things to be packed in the start
// Of the struct, instead of externalRegs being in the middle
ExternalRegisters externalRegs;
ALWAYS_INLINE void setVsOutputMask(u32 val) {
val &= 0xffff;
// Avoid recomputing this if not necessary
if (oldVsOutputMask != val) [[unlikely]] {
oldVsOutputMask = val;
uint count = 0;
// See which registers are actually enabled and ignore the disabled ones
for (int i = 0; i < 16; i++) {
if (val & 1) {
vsOutputRegisters[count++] = &shaderUnit.vs.outputs[i][0];
}
val >>= 1;
}
// For the others, map the index to a vs output directly (TODO: What does hw actually do?)
for (; count < 16; count++) {
vsOutputRegisters[count] = &shaderUnit.vs.outputs[count][0];
}
}
}
};

View file

@ -143,6 +143,7 @@ namespace PICA {
VertexIntUniform3 = 0x2B4,
VertexShaderEntrypoint = 0x2BA,
VertexShaderOutputMask = 0x2BD,
VertexShaderTransferEnd = 0x2BF,
VertexFloatUniformIndex = 0x2C0,
VertexFloatUniformData0 = 0x2C1,

View file

@ -77,6 +77,9 @@ void GPU::reset() {
fixedAttrBuff.fill(0);
oldVsOutputMask = 0;
setVsOutputMask(0xFFFF);
for (auto& e : attributeInfo) {
e.offset = 0;
e.size = 0;
@ -134,6 +137,8 @@ void GPU::drawArrays() {
shaderJIT.prepare(shaderUnit.vs);
}
setVsOutputMask(regs[PICA::InternalRegs::VertexShaderOutputMask]);
// Base address for vertex attributes
// The vertex base is always on a quadword boundary because the PICA does weird alignment shit any time possible
const u32 vertexBase = ((regs[PICA::InternalRegs::VertexAttribLoc] >> 1) & 0xfffffff) * 16;
@ -329,7 +334,7 @@ void GPU::drawArrays() {
for (int j = 0; j < 4; j++) { // pls unroll
const u32 mapping = (config >> (j * 8)) & 0x1F;
out.raw[mapping] = shaderUnit.vs.outputs[i][j];
out.raw[mapping] = vsOutputRegisters[i][j];
}
}
}
@ -338,6 +343,8 @@ void GPU::drawArrays() {
}
PICA::Vertex GPU::getImmediateModeVertex() {
setVsOutputMask(regs[PICA::InternalRegs::VertexShaderOutputMask]);
PICA::Vertex v;
const int totalAttrCount = (regs[PICA::InternalRegs::VertexShaderAttrNum] & 0xf) + 1;
@ -356,7 +363,7 @@ PICA::Vertex GPU::getImmediateModeVertex() {
for (int j = 0; j < 4; j++) { // pls unroll
const u32 mapping = (config >> (j * 8)) & 0x1F;
v.raw[mapping] = shaderUnit.vs.outputs[i][j];
v.raw[mapping] = vsOutputRegisters[i][j];
}
}