mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-07 14:45:41 +12:00
Merge pull request #501 from wheremyfoodat/capstone
Implement GPUREG_VSH_OUTMAP_MASK
This commit is contained in:
commit
8e303d8d08
3 changed files with 41 additions and 2 deletions
|
@ -6,6 +6,7 @@
|
||||||
#include "PICA/pica_vertex.hpp"
|
#include "PICA/pica_vertex.hpp"
|
||||||
#include "PICA/regs.hpp"
|
#include "PICA/regs.hpp"
|
||||||
#include "PICA/shader_unit.hpp"
|
#include "PICA/shader_unit.hpp"
|
||||||
|
#include "compiler_builtins.hpp"
|
||||||
#include "config.hpp"
|
#include "config.hpp"
|
||||||
#include "helpers.hpp"
|
#include "helpers.hpp"
|
||||||
#include "logger.hpp"
|
#include "logger.hpp"
|
||||||
|
@ -35,6 +36,12 @@ class GPU {
|
||||||
|
|
||||||
std::array<vec4f, 16> immediateModeAttributes; // Vertex attributes uploaded via immediate mode submission
|
std::array<vec4f, 16> immediateModeAttributes; // Vertex attributes uploaded via immediate mode submission
|
||||||
std::array<PICA::Vertex, 3> immediateModeVertices;
|
std::array<PICA::Vertex, 3> immediateModeVertices;
|
||||||
|
|
||||||
|
// Pointers for the output registers as arranged after GPUREG_VSH_OUTMAP_MASK is applied
|
||||||
|
std::array<Floats::f24*, 16> vsOutputRegisters;
|
||||||
|
// Previous value for GPUREG_VSH_OUTMAP_MASK
|
||||||
|
u32 oldVsOutputMask;
|
||||||
|
|
||||||
uint immediateModeVertIndex;
|
uint immediateModeVertIndex;
|
||||||
uint immediateModeAttrIndex; // Index of the immediate mode attribute we're uploading
|
uint immediateModeAttrIndex; // Index of the immediate mode attribute we're uploading
|
||||||
|
|
||||||
|
@ -167,4 +174,28 @@ class GPU {
|
||||||
// We have them in the end of the struct for cache locality reasons. Tl;dr we want the more commonly used things to be packed in the start
|
// We have them in the end of the struct for cache locality reasons. Tl;dr we want the more commonly used things to be packed in the start
|
||||||
// Of the struct, instead of externalRegs being in the middle
|
// Of the struct, instead of externalRegs being in the middle
|
||||||
ExternalRegisters externalRegs;
|
ExternalRegisters externalRegs;
|
||||||
|
|
||||||
|
ALWAYS_INLINE void setVsOutputMask(u32 val) {
|
||||||
|
val &= 0xffff;
|
||||||
|
|
||||||
|
// Avoid recomputing this if not necessary
|
||||||
|
if (oldVsOutputMask != val) [[unlikely]] {
|
||||||
|
oldVsOutputMask = val;
|
||||||
|
|
||||||
|
uint count = 0;
|
||||||
|
// See which registers are actually enabled and ignore the disabled ones
|
||||||
|
for (int i = 0; i < 16; i++) {
|
||||||
|
if (val & 1) {
|
||||||
|
vsOutputRegisters[count++] = &shaderUnit.vs.outputs[i][0];
|
||||||
|
}
|
||||||
|
|
||||||
|
val >>= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// For the others, map the index to a vs output directly (TODO: What does hw actually do?)
|
||||||
|
for (; count < 16; count++) {
|
||||||
|
vsOutputRegisters[count] = &shaderUnit.vs.outputs[count][0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -143,6 +143,7 @@ namespace PICA {
|
||||||
VertexIntUniform3 = 0x2B4,
|
VertexIntUniform3 = 0x2B4,
|
||||||
|
|
||||||
VertexShaderEntrypoint = 0x2BA,
|
VertexShaderEntrypoint = 0x2BA,
|
||||||
|
VertexShaderOutputMask = 0x2BD,
|
||||||
VertexShaderTransferEnd = 0x2BF,
|
VertexShaderTransferEnd = 0x2BF,
|
||||||
VertexFloatUniformIndex = 0x2C0,
|
VertexFloatUniformIndex = 0x2C0,
|
||||||
VertexFloatUniformData0 = 0x2C1,
|
VertexFloatUniformData0 = 0x2C1,
|
||||||
|
|
|
@ -77,6 +77,9 @@ void GPU::reset() {
|
||||||
|
|
||||||
fixedAttrBuff.fill(0);
|
fixedAttrBuff.fill(0);
|
||||||
|
|
||||||
|
oldVsOutputMask = 0;
|
||||||
|
setVsOutputMask(0xFFFF);
|
||||||
|
|
||||||
for (auto& e : attributeInfo) {
|
for (auto& e : attributeInfo) {
|
||||||
e.offset = 0;
|
e.offset = 0;
|
||||||
e.size = 0;
|
e.size = 0;
|
||||||
|
@ -134,6 +137,8 @@ void GPU::drawArrays() {
|
||||||
shaderJIT.prepare(shaderUnit.vs);
|
shaderJIT.prepare(shaderUnit.vs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
setVsOutputMask(regs[PICA::InternalRegs::VertexShaderOutputMask]);
|
||||||
|
|
||||||
// Base address for vertex attributes
|
// Base address for vertex attributes
|
||||||
// The vertex base is always on a quadword boundary because the PICA does weird alignment shit any time possible
|
// The vertex base is always on a quadword boundary because the PICA does weird alignment shit any time possible
|
||||||
const u32 vertexBase = ((regs[PICA::InternalRegs::VertexAttribLoc] >> 1) & 0xfffffff) * 16;
|
const u32 vertexBase = ((regs[PICA::InternalRegs::VertexAttribLoc] >> 1) & 0xfffffff) * 16;
|
||||||
|
@ -329,7 +334,7 @@ void GPU::drawArrays() {
|
||||||
|
|
||||||
for (int j = 0; j < 4; j++) { // pls unroll
|
for (int j = 0; j < 4; j++) { // pls unroll
|
||||||
const u32 mapping = (config >> (j * 8)) & 0x1F;
|
const u32 mapping = (config >> (j * 8)) & 0x1F;
|
||||||
out.raw[mapping] = shaderUnit.vs.outputs[i][j];
|
out.raw[mapping] = vsOutputRegisters[i][j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -338,6 +343,8 @@ void GPU::drawArrays() {
|
||||||
}
|
}
|
||||||
|
|
||||||
PICA::Vertex GPU::getImmediateModeVertex() {
|
PICA::Vertex GPU::getImmediateModeVertex() {
|
||||||
|
setVsOutputMask(regs[PICA::InternalRegs::VertexShaderOutputMask]);
|
||||||
|
|
||||||
PICA::Vertex v;
|
PICA::Vertex v;
|
||||||
const int totalAttrCount = (regs[PICA::InternalRegs::VertexShaderAttrNum] & 0xf) + 1;
|
const int totalAttrCount = (regs[PICA::InternalRegs::VertexShaderAttrNum] & 0xf) + 1;
|
||||||
|
|
||||||
|
@ -356,7 +363,7 @@ PICA::Vertex GPU::getImmediateModeVertex() {
|
||||||
|
|
||||||
for (int j = 0; j < 4; j++) { // pls unroll
|
for (int j = 0; j < 4; j++) { // pls unroll
|
||||||
const u32 mapping = (config >> (j * 8)) & 0x1F;
|
const u32 mapping = (config >> (j * 8)) & 0x1F;
|
||||||
v.raw[mapping] = shaderUnit.vs.outputs[i][j];
|
v.raw[mapping] = vsOutputRegisters[i][j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue