mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-07 14:45:41 +12:00
[PICA] Add support for nested command lists
This commit is contained in:
parent
2ab777fecd
commit
9f7a86fc01
4 changed files with 87 additions and 52 deletions
|
@ -9,7 +9,9 @@
|
|||
#include "renderer_gl/renderer_gl.hpp"
|
||||
|
||||
class GPU {
|
||||
static constexpr u32 regNum = 0x300;
|
||||
using vec4f = OpenGL::Vector<Floats::f24, 4>;
|
||||
using Registers = std::array<u32, regNum>;
|
||||
|
||||
Memory& mem;
|
||||
ShaderUnit shaderUnit;
|
||||
|
@ -17,9 +19,8 @@ class GPU {
|
|||
MAKE_LOG_FUNCTION(log, gpuLogger)
|
||||
|
||||
static constexpr u32 maxAttribCount = 12; // Up to 12 vertex attributes
|
||||
static constexpr u32 regNum = 0x300;
|
||||
static constexpr u32 vramSize = 6_MB;
|
||||
std::array<u32, regNum> regs; // GPU internal registers
|
||||
Registers regs; // GPU internal registers
|
||||
std::array<vec4f, 16> currentAttributes; // Vertex attributes before being passed to the shader
|
||||
|
||||
std::array<vec4f, 16> immediateModeAttributes; // Vertex attributes uploaded via immediate mode submission
|
||||
|
@ -57,6 +58,11 @@ class GPU {
|
|||
u32 fixedAttribCount = 0; // How many attribute components have we written? When we get to 4 the attr will actually get submitted
|
||||
std::array<u32, 3> fixedAttrBuff; // Buffer to hold fixed attributes in until they get submitted
|
||||
|
||||
// Command processor pointers for GPU command lists
|
||||
u32* cmdBuffStart = nullptr;
|
||||
u32* cmdBuffEnd = nullptr;
|
||||
u32* cmdBuffCurr = nullptr;
|
||||
|
||||
Renderer renderer;
|
||||
Vertex getImmediateModeVertex();
|
||||
public:
|
||||
|
@ -68,6 +74,9 @@ public:
|
|||
void fireDMA(u32 dest, u32 source, u32 size);
|
||||
void reset();
|
||||
|
||||
Registers& getRegisters() { return regs; }
|
||||
void startCommandList(u32 addr, u32 size);
|
||||
|
||||
// Used by the GSP GPU service for readHwRegs/writeHwRegs/writeHwRegsMasked
|
||||
u32 readReg(u32 address);
|
||||
void writeReg(u32 address, u32 value);
|
||||
|
|
|
@ -71,6 +71,14 @@ namespace PICAInternalRegs {
|
|||
FixedAttribData0 = 0x233,
|
||||
FixedAttribData1 = 0x234,
|
||||
FixedAttribData2 = 0x235,
|
||||
|
||||
// Command processor registers
|
||||
CmdBufSize0 = 0x238,
|
||||
CmdBufSize1 = 0x239,
|
||||
CmdBufAddr0 = 0x23A,
|
||||
CmdBufAddr1 = 0x23B,
|
||||
CmdBufTrigger0 = 0x23C,
|
||||
CmdBufTrigger1 = 0x23D,
|
||||
|
||||
PrimitiveConfig = 0x25E,
|
||||
|
||||
|
|
|
@ -189,7 +189,22 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
|||
shaderUnit.vs.setBufferIndex(value);
|
||||
break;
|
||||
|
||||
case 0x23C: case 0x23D: Helpers::panic("Nested PICA cmd list!");
|
||||
// Command lists can write to the command processor registers and change the command list stream
|
||||
// Several games are known to do this, including New Super Mario Bros 2 and Super Mario 3D Land
|
||||
case CmdBufTrigger0:
|
||||
case CmdBufTrigger1: {
|
||||
if (value != 0) { // A non-zero value triggers command list processing
|
||||
int bufferIndex = index - CmdBufTrigger0; // Index of the command buffer to execute (0 or 1)
|
||||
u32 addr = (regs[CmdBufAddr0 + bufferIndex] & 0xfffffff) << 3;
|
||||
u32 size = (regs[CmdBufSize0 + bufferIndex] & 0xfffff) << 3;
|
||||
|
||||
// Set command buffer state to execute the new buffer
|
||||
cmdBuffStart = getPointerPhys<u32>(addr);
|
||||
cmdBuffCurr = cmdBuffStart;
|
||||
cmdBuffEnd = cmdBuffStart + (size / sizeof(u32));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
// Vertex attribute registers
|
||||
|
@ -214,4 +229,55 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
|||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void GPU::startCommandList(u32 addr, u32 size) {
|
||||
cmdBuffStart = static_cast<u32*>(mem.getReadPointer(addr));
|
||||
if (!cmdBuffStart) Helpers::panic("Couldn't get buffer for command list");
|
||||
// TODO: This is very memory unsafe. We get a pointer to FCRAM and just keep writing without checking if we're gonna go OoB
|
||||
|
||||
cmdBuffCurr = cmdBuffStart;
|
||||
cmdBuffEnd = cmdBuffStart + (size / sizeof(u32));
|
||||
|
||||
// LUT for converting the parameter mask to an actual 32-bit mask
|
||||
// The parameter mask is 4 bits long, each bit corresponding to one byte of the mask
|
||||
// If the bit is 0 then the corresponding mask byte is 0, otherwise the mask byte is 0xff
|
||||
// So for example if the parameter mask is 0b1001, the full mask is 0xff'00'00'ff
|
||||
static constexpr std::array<u32, 16> maskLUT = {
|
||||
0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff, 0x00ff0000, 0x00ff00ff, 0x00ffff00, 0x00ffffff,
|
||||
0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff, 0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff,
|
||||
};
|
||||
|
||||
while (cmdBuffCurr < cmdBuffEnd) {
|
||||
// If the buffer is not aligned to an 8 byte boundary, force align it by moving the pointer up a word
|
||||
// The curr pointer starts out doubleword-aligned and is increased by 4 bytes each time
|
||||
// So to check if it is aligned, we get the number of words it's been incremented by
|
||||
// If that number is an odd value then the buffer is not aligned, otherwise it is
|
||||
if ((cmdBuffCurr - cmdBuffStart) % 2 != 0) {
|
||||
cmdBuffCurr++;
|
||||
}
|
||||
|
||||
// The first word of a command is the command parameter and the second one is the header
|
||||
u32 param1 = *cmdBuffCurr++;
|
||||
u32 header = *cmdBuffCurr++;
|
||||
|
||||
u32 id = header & 0xffff;
|
||||
u32 paramMaskIndex = (header >> 16) & 0xf;
|
||||
u32 paramCount = (header >> 20) & 0xff; // Number of additional parameters
|
||||
// Bit 31 tells us whether this command is going to write to multiple sequential registers (if the bit is 1)
|
||||
// Or if all written values will go to the same register (If the bit is 0). It's essentially the value that
|
||||
// gets added to the "id" field after each register write
|
||||
bool consecutiveWritingMode = (header >> 31) != 0;
|
||||
|
||||
u32 mask = maskLUT[paramMaskIndex]; // Actual parameter mask
|
||||
// Increment the ID by 1 after each write if we're in consecutive mode, or 0 otherwise
|
||||
u32 idIncrement = (consecutiveWritingMode) ? 1 : 0;
|
||||
|
||||
writeInternalReg(id, param1, mask);
|
||||
for (u32 i = 0; i < paramCount; i++) {
|
||||
id += idIncrement;
|
||||
u32 param = *cmdBuffCurr++;
|
||||
writeInternalReg(id, param, mask);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -308,55 +308,7 @@ void GPUService::processCommandList(u32* cmd) {
|
|||
bool updateGas = cmd[3] == 1; // Update gas additive blend results (0 = don't update, 1 = update)
|
||||
bool flushBuffer = cmd[7] == 1; // Flush buffer (0 = don't flush, 1 = flush)
|
||||
|
||||
u32* bufferStart = static_cast<u32*>(mem.getReadPointer(address));
|
||||
if (!bufferStart) Helpers::panic("Couldn't get buffer for command list");
|
||||
// TODO: This is very memory unsafe. We get a pointer to FCRAM and just keep writing without checking if we're gonna go OoB
|
||||
|
||||
u32* curr = bufferStart;
|
||||
u32* bufferEnd = bufferStart + (size / sizeof(u32));
|
||||
|
||||
// LUT for converting the parameter mask to an actual 32-bit mask
|
||||
// The parameter mask is 4 bits long, each bit corresponding to one byte of the mask
|
||||
// If the bit is 0 then the corresponding mask byte is 0, otherwise the mask byte is 0xff
|
||||
// So for example if the parameter mask is 0b1001, the full mask is 0xff'00'00'ff
|
||||
static constexpr std::array<u32, 16> maskLUT = {
|
||||
0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff, 0x00ff0000, 0x00ff00ff, 0x00ffff00, 0x00ffffff,
|
||||
0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff, 0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff,
|
||||
};
|
||||
|
||||
while (curr < bufferEnd) {
|
||||
// If the buffer is not aligned to an 8 byte boundary, force align it by moving the pointer up a word
|
||||
// The curr pointer starts out doubleword-aligned and is increased by 4 bytes each time
|
||||
// So to check if it is aligned, we get the number of words it's been incremented by
|
||||
// If that number is an odd value then the buffer is not aligned, otherwise it is
|
||||
if ((curr - bufferStart) % 2 != 0) {
|
||||
curr++;
|
||||
}
|
||||
|
||||
// The first word of a command is the command parameter and the second one is the header
|
||||
u32 param1 = *curr++;
|
||||
u32 header = *curr++;
|
||||
|
||||
u32 id = header & 0xffff;
|
||||
u32 paramMaskIndex = (header >> 16) & 0xf;
|
||||
u32 paramCount = (header >> 20) & 0xff; // Number of additional parameters
|
||||
// Bit 31 tells us whether this command is going to write to multiple sequential registers (if the bit is 1)
|
||||
// Or if all written values will go to the same register (If the bit is 0). It's essentially the value that
|
||||
// gets added to the "id" field after each register write
|
||||
bool consecutiveWritingMode = (header >> 31) != 0;
|
||||
|
||||
u32 mask = maskLUT[paramMaskIndex]; // Actual parameter mask
|
||||
// Increment the ID by 1 after each write if we're in consecutive mode, or 0 otherwise
|
||||
u32 idIncrement = (consecutiveWritingMode) ? 1 : 0;
|
||||
|
||||
gpu.writeInternalReg(id, param1, mask);
|
||||
for (u32 i = 0; i < paramCount; i++) {
|
||||
id += idIncrement;
|
||||
u32 param = *curr++;
|
||||
gpu.writeInternalReg(id, param, mask);
|
||||
}
|
||||
}
|
||||
|
||||
log("GPU::GSP::processCommandList. Address: %08X, size in bytes: %08X\n", address, size);
|
||||
gpu.startCommandList(address, size);
|
||||
requestInterrupt(GPUInterrupt::P3D); // Send an IRQ when command list processing is over
|
||||
}
|
Loading…
Add table
Reference in a new issue