From 9f7a86fc01ab81e287d85012d53a000d86d76265 Mon Sep 17 00:00:00 2001 From: wheremyfoodat Date: Fri, 10 Mar 2023 04:39:41 +0200 Subject: [PATCH] [PICA] Add support for nested command lists --- include/PICA/gpu.hpp | 13 +++++-- include/PICA/regs.hpp | 8 +++++ src/core/PICA/regs.cpp | 68 ++++++++++++++++++++++++++++++++++- src/core/services/gsp_gpu.cpp | 50 +------------------------- 4 files changed, 87 insertions(+), 52 deletions(-) diff --git a/include/PICA/gpu.hpp b/include/PICA/gpu.hpp index 21ad6b27..801af7c6 100644 --- a/include/PICA/gpu.hpp +++ b/include/PICA/gpu.hpp @@ -9,7 +9,9 @@ #include "renderer_gl/renderer_gl.hpp" class GPU { + static constexpr u32 regNum = 0x300; using vec4f = OpenGL::Vector; + using Registers = std::array; Memory& mem; ShaderUnit shaderUnit; @@ -17,9 +19,8 @@ class GPU { MAKE_LOG_FUNCTION(log, gpuLogger) static constexpr u32 maxAttribCount = 12; // Up to 12 vertex attributes - static constexpr u32 regNum = 0x300; static constexpr u32 vramSize = 6_MB; - std::array regs; // GPU internal registers + Registers regs; // GPU internal registers std::array currentAttributes; // Vertex attributes before being passed to the shader std::array immediateModeAttributes; // Vertex attributes uploaded via immediate mode submission @@ -57,6 +58,11 @@ class GPU { u32 fixedAttribCount = 0; // How many attribute components have we written? When we get to 4 the attr will actually get submitted std::array fixedAttrBuff; // Buffer to hold fixed attributes in until they get submitted + // Command processor pointers for GPU command lists + u32* cmdBuffStart = nullptr; + u32* cmdBuffEnd = nullptr; + u32* cmdBuffCurr = nullptr; + Renderer renderer; Vertex getImmediateModeVertex(); public: @@ -68,6 +74,9 @@ public: void fireDMA(u32 dest, u32 source, u32 size); void reset(); + Registers& getRegisters() { return regs; } + void startCommandList(u32 addr, u32 size); + // Used by the GSP GPU service for readHwRegs/writeHwRegs/writeHwRegsMasked u32 readReg(u32 address); void writeReg(u32 address, u32 value); diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index d4faf2bb..8815668f 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -71,6 +71,14 @@ namespace PICAInternalRegs { FixedAttribData0 = 0x233, FixedAttribData1 = 0x234, FixedAttribData2 = 0x235, + + // Command processor registers + CmdBufSize0 = 0x238, + CmdBufSize1 = 0x239, + CmdBufAddr0 = 0x23A, + CmdBufAddr1 = 0x23B, + CmdBufTrigger0 = 0x23C, + CmdBufTrigger1 = 0x23D, PrimitiveConfig = 0x25E, diff --git a/src/core/PICA/regs.cpp b/src/core/PICA/regs.cpp index 798f84eb..33b17a25 100644 --- a/src/core/PICA/regs.cpp +++ b/src/core/PICA/regs.cpp @@ -189,7 +189,22 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { shaderUnit.vs.setBufferIndex(value); break; - case 0x23C: case 0x23D: Helpers::panic("Nested PICA cmd list!"); + // Command lists can write to the command processor registers and change the command list stream + // Several games are known to do this, including New Super Mario Bros 2 and Super Mario 3D Land + case CmdBufTrigger0: + case CmdBufTrigger1: { + if (value != 0) { // A non-zero value triggers command list processing + int bufferIndex = index - CmdBufTrigger0; // Index of the command buffer to execute (0 or 1) + u32 addr = (regs[CmdBufAddr0 + bufferIndex] & 0xfffffff) << 3; + u32 size = (regs[CmdBufSize0 + bufferIndex] & 0xfffff) << 3; + + // Set command buffer state to execute the new buffer + cmdBuffStart = getPointerPhys(addr); + cmdBuffCurr = cmdBuffStart; + cmdBuffEnd = cmdBuffStart + (size / sizeof(u32)); + } + break; + } default: // Vertex attribute registers @@ -214,4 +229,55 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { } break; } +} + +void GPU::startCommandList(u32 addr, u32 size) { + cmdBuffStart = static_cast(mem.getReadPointer(addr)); + if (!cmdBuffStart) Helpers::panic("Couldn't get buffer for command list"); + // TODO: This is very memory unsafe. We get a pointer to FCRAM and just keep writing without checking if we're gonna go OoB + + cmdBuffCurr = cmdBuffStart; + cmdBuffEnd = cmdBuffStart + (size / sizeof(u32)); + + // LUT for converting the parameter mask to an actual 32-bit mask + // The parameter mask is 4 bits long, each bit corresponding to one byte of the mask + // If the bit is 0 then the corresponding mask byte is 0, otherwise the mask byte is 0xff + // So for example if the parameter mask is 0b1001, the full mask is 0xff'00'00'ff + static constexpr std::array maskLUT = { + 0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff, 0x00ff0000, 0x00ff00ff, 0x00ffff00, 0x00ffffff, + 0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff, 0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff, + }; + + while (cmdBuffCurr < cmdBuffEnd) { + // If the buffer is not aligned to an 8 byte boundary, force align it by moving the pointer up a word + // The curr pointer starts out doubleword-aligned and is increased by 4 bytes each time + // So to check if it is aligned, we get the number of words it's been incremented by + // If that number is an odd value then the buffer is not aligned, otherwise it is + if ((cmdBuffCurr - cmdBuffStart) % 2 != 0) { + cmdBuffCurr++; + } + + // The first word of a command is the command parameter and the second one is the header + u32 param1 = *cmdBuffCurr++; + u32 header = *cmdBuffCurr++; + + u32 id = header & 0xffff; + u32 paramMaskIndex = (header >> 16) & 0xf; + u32 paramCount = (header >> 20) & 0xff; // Number of additional parameters + // Bit 31 tells us whether this command is going to write to multiple sequential registers (if the bit is 1) + // Or if all written values will go to the same register (If the bit is 0). It's essentially the value that + // gets added to the "id" field after each register write + bool consecutiveWritingMode = (header >> 31) != 0; + + u32 mask = maskLUT[paramMaskIndex]; // Actual parameter mask + // Increment the ID by 1 after each write if we're in consecutive mode, or 0 otherwise + u32 idIncrement = (consecutiveWritingMode) ? 1 : 0; + + writeInternalReg(id, param1, mask); + for (u32 i = 0; i < paramCount; i++) { + id += idIncrement; + u32 param = *cmdBuffCurr++; + writeInternalReg(id, param, mask); + } + } } \ No newline at end of file diff --git a/src/core/services/gsp_gpu.cpp b/src/core/services/gsp_gpu.cpp index 93230119..1bd26770 100644 --- a/src/core/services/gsp_gpu.cpp +++ b/src/core/services/gsp_gpu.cpp @@ -308,55 +308,7 @@ void GPUService::processCommandList(u32* cmd) { bool updateGas = cmd[3] == 1; // Update gas additive blend results (0 = don't update, 1 = update) bool flushBuffer = cmd[7] == 1; // Flush buffer (0 = don't flush, 1 = flush) - u32* bufferStart = static_cast(mem.getReadPointer(address)); - if (!bufferStart) Helpers::panic("Couldn't get buffer for command list"); - // TODO: This is very memory unsafe. We get a pointer to FCRAM and just keep writing without checking if we're gonna go OoB - - u32* curr = bufferStart; - u32* bufferEnd = bufferStart + (size / sizeof(u32)); - - // LUT for converting the parameter mask to an actual 32-bit mask - // The parameter mask is 4 bits long, each bit corresponding to one byte of the mask - // If the bit is 0 then the corresponding mask byte is 0, otherwise the mask byte is 0xff - // So for example if the parameter mask is 0b1001, the full mask is 0xff'00'00'ff - static constexpr std::array maskLUT = { - 0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff, 0x00ff0000, 0x00ff00ff, 0x00ffff00, 0x00ffffff, - 0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff, 0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff, - }; - - while (curr < bufferEnd) { - // If the buffer is not aligned to an 8 byte boundary, force align it by moving the pointer up a word - // The curr pointer starts out doubleword-aligned and is increased by 4 bytes each time - // So to check if it is aligned, we get the number of words it's been incremented by - // If that number is an odd value then the buffer is not aligned, otherwise it is - if ((curr - bufferStart) % 2 != 0) { - curr++; - } - - // The first word of a command is the command parameter and the second one is the header - u32 param1 = *curr++; - u32 header = *curr++; - - u32 id = header & 0xffff; - u32 paramMaskIndex = (header >> 16) & 0xf; - u32 paramCount = (header >> 20) & 0xff; // Number of additional parameters - // Bit 31 tells us whether this command is going to write to multiple sequential registers (if the bit is 1) - // Or if all written values will go to the same register (If the bit is 0). It's essentially the value that - // gets added to the "id" field after each register write - bool consecutiveWritingMode = (header >> 31) != 0; - - u32 mask = maskLUT[paramMaskIndex]; // Actual parameter mask - // Increment the ID by 1 after each write if we're in consecutive mode, or 0 otherwise - u32 idIncrement = (consecutiveWritingMode) ? 1 : 0; - - gpu.writeInternalReg(id, param1, mask); - for (u32 i = 0; i < paramCount; i++) { - id += idIncrement; - u32 param = *curr++; - gpu.writeInternalReg(id, param, mask); - } - } - log("GPU::GSP::processCommandList. Address: %08X, size in bytes: %08X\n", address, size); + gpu.startCommandList(address, size); requestInterrupt(GPUInterrupt::P3D); // Send an IRQ when command list processing is over } \ No newline at end of file