[PICA] Add support for nested command lists

This commit is contained in:
wheremyfoodat 2023-03-10 04:39:41 +02:00
parent 2ab777fecd
commit 9f7a86fc01
4 changed files with 87 additions and 52 deletions

View file

@ -9,7 +9,9 @@
#include "renderer_gl/renderer_gl.hpp"
class GPU {
static constexpr u32 regNum = 0x300;
using vec4f = OpenGL::Vector<Floats::f24, 4>;
using Registers = std::array<u32, regNum>;
Memory& mem;
ShaderUnit shaderUnit;
@ -17,9 +19,8 @@ class GPU {
MAKE_LOG_FUNCTION(log, gpuLogger)
static constexpr u32 maxAttribCount = 12; // Up to 12 vertex attributes
static constexpr u32 regNum = 0x300;
static constexpr u32 vramSize = 6_MB;
std::array<u32, regNum> regs; // GPU internal registers
Registers regs; // GPU internal registers
std::array<vec4f, 16> currentAttributes; // Vertex attributes before being passed to the shader
std::array<vec4f, 16> immediateModeAttributes; // Vertex attributes uploaded via immediate mode submission
@ -57,6 +58,11 @@ class GPU {
u32 fixedAttribCount = 0; // How many attribute components have we written? When we get to 4 the attr will actually get submitted
std::array<u32, 3> fixedAttrBuff; // Buffer to hold fixed attributes in until they get submitted
// Command processor pointers for GPU command lists
u32* cmdBuffStart = nullptr;
u32* cmdBuffEnd = nullptr;
u32* cmdBuffCurr = nullptr;
Renderer renderer;
Vertex getImmediateModeVertex();
public:
@ -68,6 +74,9 @@ public:
void fireDMA(u32 dest, u32 source, u32 size);
void reset();
Registers& getRegisters() { return regs; }
void startCommandList(u32 addr, u32 size);
// Used by the GSP GPU service for readHwRegs/writeHwRegs/writeHwRegsMasked
u32 readReg(u32 address);
void writeReg(u32 address, u32 value);

View file

@ -71,6 +71,14 @@ namespace PICAInternalRegs {
FixedAttribData0 = 0x233,
FixedAttribData1 = 0x234,
FixedAttribData2 = 0x235,
// Command processor registers
CmdBufSize0 = 0x238,
CmdBufSize1 = 0x239,
CmdBufAddr0 = 0x23A,
CmdBufAddr1 = 0x23B,
CmdBufTrigger0 = 0x23C,
CmdBufTrigger1 = 0x23D,
PrimitiveConfig = 0x25E,

View file

@ -189,7 +189,22 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
shaderUnit.vs.setBufferIndex(value);
break;
case 0x23C: case 0x23D: Helpers::panic("Nested PICA cmd list!");
// Command lists can write to the command processor registers and change the command list stream
// Several games are known to do this, including New Super Mario Bros 2 and Super Mario 3D Land
case CmdBufTrigger0:
case CmdBufTrigger1: {
if (value != 0) { // A non-zero value triggers command list processing
int bufferIndex = index - CmdBufTrigger0; // Index of the command buffer to execute (0 or 1)
u32 addr = (regs[CmdBufAddr0 + bufferIndex] & 0xfffffff) << 3;
u32 size = (regs[CmdBufSize0 + bufferIndex] & 0xfffff) << 3;
// Set command buffer state to execute the new buffer
cmdBuffStart = getPointerPhys<u32>(addr);
cmdBuffCurr = cmdBuffStart;
cmdBuffEnd = cmdBuffStart + (size / sizeof(u32));
}
break;
}
default:
// Vertex attribute registers
@ -214,4 +229,55 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
}
break;
}
}
void GPU::startCommandList(u32 addr, u32 size) {
cmdBuffStart = static_cast<u32*>(mem.getReadPointer(addr));
if (!cmdBuffStart) Helpers::panic("Couldn't get buffer for command list");
// TODO: This is very memory unsafe. We get a pointer to FCRAM and just keep writing without checking if we're gonna go OoB
cmdBuffCurr = cmdBuffStart;
cmdBuffEnd = cmdBuffStart + (size / sizeof(u32));
// LUT for converting the parameter mask to an actual 32-bit mask
// The parameter mask is 4 bits long, each bit corresponding to one byte of the mask
// If the bit is 0 then the corresponding mask byte is 0, otherwise the mask byte is 0xff
// So for example if the parameter mask is 0b1001, the full mask is 0xff'00'00'ff
static constexpr std::array<u32, 16> maskLUT = {
0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff, 0x00ff0000, 0x00ff00ff, 0x00ffff00, 0x00ffffff,
0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff, 0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff,
};
while (cmdBuffCurr < cmdBuffEnd) {
// If the buffer is not aligned to an 8 byte boundary, force align it by moving the pointer up a word
// The curr pointer starts out doubleword-aligned and is increased by 4 bytes each time
// So to check if it is aligned, we get the number of words it's been incremented by
// If that number is an odd value then the buffer is not aligned, otherwise it is
if ((cmdBuffCurr - cmdBuffStart) % 2 != 0) {
cmdBuffCurr++;
}
// The first word of a command is the command parameter and the second one is the header
u32 param1 = *cmdBuffCurr++;
u32 header = *cmdBuffCurr++;
u32 id = header & 0xffff;
u32 paramMaskIndex = (header >> 16) & 0xf;
u32 paramCount = (header >> 20) & 0xff; // Number of additional parameters
// Bit 31 tells us whether this command is going to write to multiple sequential registers (if the bit is 1)
// Or if all written values will go to the same register (If the bit is 0). It's essentially the value that
// gets added to the "id" field after each register write
bool consecutiveWritingMode = (header >> 31) != 0;
u32 mask = maskLUT[paramMaskIndex]; // Actual parameter mask
// Increment the ID by 1 after each write if we're in consecutive mode, or 0 otherwise
u32 idIncrement = (consecutiveWritingMode) ? 1 : 0;
writeInternalReg(id, param1, mask);
for (u32 i = 0; i < paramCount; i++) {
id += idIncrement;
u32 param = *cmdBuffCurr++;
writeInternalReg(id, param, mask);
}
}
}

View file

@ -308,55 +308,7 @@ void GPUService::processCommandList(u32* cmd) {
bool updateGas = cmd[3] == 1; // Update gas additive blend results (0 = don't update, 1 = update)
bool flushBuffer = cmd[7] == 1; // Flush buffer (0 = don't flush, 1 = flush)
u32* bufferStart = static_cast<u32*>(mem.getReadPointer(address));
if (!bufferStart) Helpers::panic("Couldn't get buffer for command list");
// TODO: This is very memory unsafe. We get a pointer to FCRAM and just keep writing without checking if we're gonna go OoB
u32* curr = bufferStart;
u32* bufferEnd = bufferStart + (size / sizeof(u32));
// LUT for converting the parameter mask to an actual 32-bit mask
// The parameter mask is 4 bits long, each bit corresponding to one byte of the mask
// If the bit is 0 then the corresponding mask byte is 0, otherwise the mask byte is 0xff
// So for example if the parameter mask is 0b1001, the full mask is 0xff'00'00'ff
static constexpr std::array<u32, 16> maskLUT = {
0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff, 0x00ff0000, 0x00ff00ff, 0x00ffff00, 0x00ffffff,
0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff, 0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff,
};
while (curr < bufferEnd) {
// If the buffer is not aligned to an 8 byte boundary, force align it by moving the pointer up a word
// The curr pointer starts out doubleword-aligned and is increased by 4 bytes each time
// So to check if it is aligned, we get the number of words it's been incremented by
// If that number is an odd value then the buffer is not aligned, otherwise it is
if ((curr - bufferStart) % 2 != 0) {
curr++;
}
// The first word of a command is the command parameter and the second one is the header
u32 param1 = *curr++;
u32 header = *curr++;
u32 id = header & 0xffff;
u32 paramMaskIndex = (header >> 16) & 0xf;
u32 paramCount = (header >> 20) & 0xff; // Number of additional parameters
// Bit 31 tells us whether this command is going to write to multiple sequential registers (if the bit is 1)
// Or if all written values will go to the same register (If the bit is 0). It's essentially the value that
// gets added to the "id" field after each register write
bool consecutiveWritingMode = (header >> 31) != 0;
u32 mask = maskLUT[paramMaskIndex]; // Actual parameter mask
// Increment the ID by 1 after each write if we're in consecutive mode, or 0 otherwise
u32 idIncrement = (consecutiveWritingMode) ? 1 : 0;
gpu.writeInternalReg(id, param1, mask);
for (u32 i = 0; i < paramCount; i++) {
id += idIncrement;
u32 param = *curr++;
gpu.writeInternalReg(id, param, mask);
}
}
log("GPU::GSP::processCommandList. Address: %08X, size in bytes: %08X\n", address, size);
gpu.startCommandList(address, size);
requestInterrupt(GPUInterrupt::P3D); // Send an IRQ when command list processing is over
}