From f75a23b5a947d2d57a5e8c9e6b1a59ead8b265be Mon Sep 17 00:00:00 2001 From: GPUCode Date: Mon, 10 Jul 2023 23:59:44 +0300 Subject: [PATCH] code: Better screen support --- include/PICA/gpu.hpp | 4 + include/PICA/regs.hpp | 48 ++++++++++ include/memory.hpp | 2 +- include/renderer.hpp | 2 +- include/renderer_gl/gl_state.hpp | 2 +- include/renderer_gl/renderer_gl.hpp | 6 +- include/renderer_gl/surfaces.hpp | 28 +++++- include/renderer_gl/textures.hpp | 2 +- include/renderer_null/renderer_null.hpp | 2 +- include/services/gsp_gpu.hpp | 23 ++++- src/core/PICA/gpu.cpp | 27 ++++++ src/core/PICA/regs.cpp | 31 ++++++- src/core/renderer_gl/renderer_gl.cpp | 107 ++++++++++++++++------- src/core/renderer_null/renderer_null.cpp | 2 +- src/core/services/gsp_gpu.cpp | 85 +++++++++++++----- 15 files changed, 305 insertions(+), 66 deletions(-) diff --git a/include/PICA/gpu.hpp b/include/PICA/gpu.hpp index b4236ee0..338aba9b 100644 --- a/include/PICA/gpu.hpp +++ b/include/PICA/gpu.hpp @@ -29,6 +29,7 @@ class GPU { static constexpr u32 vramSize = u32(6_MB); Registers regs; // GPU internal registers std::array currentAttributes; // Vertex attributes before being passed to the shader + std::array external_regs; // GPU external registers std::array immediateModeAttributes; // Vertex attributes uploaded via immediate mode submission std::array immediateModeVertices; @@ -97,6 +98,9 @@ class GPU { u32 readReg(u32 address); void writeReg(u32 address, u32 value); + u32 readExternalReg(u32 index); + void writeExternalReg(u32 index, u32 value); + // Used when processing GPU command lists u32 readInternalReg(u32 index); void writeInternalReg(u32 index, u32 value, u32 mask); diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index ee8105cd..d67c2025 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -178,6 +178,54 @@ namespace PICA { }; } + namespace ExternalRegs { + enum : u32 { + MemFill1BufferStartPaddr = 0x3, + MemFill1BufferEndPAddr = 0x4, + MemFill1Value = 0x5, + MemFill1Control = 0x6, + MemFill2BufferStartPaddr = 0x7, + MemFill2BufferEndPAddr = 0x8, + MemFill2Value = 0x9, + MemFill2Control = 0xA, + VramBankControl = 0xB, + GPUBusy = 0xC, + BacklightControl = 0xBC, + // TODO: Framebuffer regs + Framebuffer0Size = 0x2F, + Framebuffer0AFirstAddr = 0x119, + Framebuffer0ASecondAddr = 0x11A, + Framebuffer0Config = 0x11B, + Framebuffer0Select = 0x11D, + Framebuffer0Stride = 0x123, + Framebuffer0BFirstAddr = 0x124, + Framebuffer0BSecondAddr = 0x125, + Framebuffer1Size = 0x156, + Framebuffer1AFirstAddr = 0x159, + Framebuffer1ASecondAddr = 0x15A, + Framebuffer1Config = 0x15B, + Framebuffer1Select = 0x15D, + Framebuffer1Stride = 0x163, + Framebuffer1BFirstAddr = 0x164, + Framebuffer1BSecondAddr = 0x165, + TransferInputPAddr = 0x2FF, + TransferOutputPAddr = 0x300, + DisplayTransferOutputDim = 0x301, + DisplayTransferInputDim = 0x302, + TransferFlags = 0x303, + TransferTrigger = 0x305, + TextureCopyTotalBytes = 0x307, + TextureCopyInputLineGap = 0x308, + TextureCopyOutputLineGap = 0x309, + }; + } + + enum class Scaling : u32 { + None = 0, + X = 1, + XY = 2, + }; + namespace Lights { enum : u32 { LUT_D0 = 0, diff --git a/include/memory.hpp b/include/memory.hpp index 6f33d895..0b3b184c 100644 --- a/include/memory.hpp +++ b/include/memory.hpp @@ -248,4 +248,4 @@ public: void setVRAM(u8* pointer) { vram = pointer; } bool allocateMainThreadStack(u32 size); -}; \ No newline at end of file +}; diff --git a/include/renderer.hpp b/include/renderer.hpp index cd1ee53b..230c7d89 100644 --- a/include/renderer.hpp +++ b/include/renderer.hpp @@ -65,4 +65,4 @@ class Renderer { void setColourBufferLoc(u32 loc) { colourBufferLoc = loc; } void setDepthBufferLoc(u32 loc) { depthBufferLoc = loc; } -}; \ No newline at end of file +}; diff --git a/include/renderer_gl/gl_state.hpp b/include/renderer_gl/gl_state.hpp index 98f589e0..f2680eb4 100644 --- a/include/renderer_gl/gl_state.hpp +++ b/include/renderer_gl/gl_state.hpp @@ -213,4 +213,4 @@ struct GLStateManager { }; static_assert(std::is_trivially_constructible(), "OpenGL State Manager class is not trivially constructible!"); -static_assert(std::is_trivially_destructible(), "OpenGL State Manager class is not trivially destructible!"); \ No newline at end of file +static_assert(std::is_trivially_destructible(), "OpenGL State Manager class is not trivially destructible!"); diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 18f52a1c..a69d7623 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -44,8 +44,8 @@ class RendererGL final : public Renderer { float oldDepthOffset = 0.0; bool oldDepthmapEnable = false; - SurfaceCache depthBufferCache; - SurfaceCache colourBufferCache; + SurfaceCache depthBufferCache; + SurfaceCache colourBufferCache; SurfaceCache textureCache; // Dummy VAO/VBO for blitting the final output @@ -78,6 +78,8 @@ class RendererGL final : public Renderer { void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override; // Perform display transfer void drawVertices(PICA::PrimType primType, std::span vertices) override; // Draw the given vertices + ColourBuffer getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height); + // Take a screenshot of the screen and store it in a file void screenshot(const std::string& name) override; }; diff --git a/include/renderer_gl/surfaces.hpp b/include/renderer_gl/surfaces.hpp index a77729c4..9b150861 100644 --- a/include/renderer_gl/surfaces.hpp +++ b/include/renderer_gl/surfaces.hpp @@ -19,6 +19,10 @@ struct ColourBuffer { OpenGL::Texture texture; OpenGL::Framebuffer fbo; + GLenum internalFormat; + GLenum fmt; + GLenum type; + ColourBuffer() : valid(false) {} ColourBuffer(u32 loc, PICA::ColorFmt format, u32 x, u32 y, bool valid = true) @@ -29,12 +33,32 @@ struct ColourBuffer { range = Interval(loc, (u32)endLoc); } - void allocate() { + void allocate() { + // Internal formats for the texture based on format + static constexpr std::array internalFormats = { + GL_RGBA8, GL_RGB8, GL_RGB5_A1, GL_RGB565, GL_RGBA4 + }; + + // Format of the texture + static constexpr std::array formats = { + GL_RGBA, GL_BGR, GL_RGBA, GL_RGB, GL_RGBA, + }; + + static constexpr std::array types = { + GL_UNSIGNED_INT_8_8_8_8, GL_UNSIGNED_BYTE, GL_UNSIGNED_SHORT_5_5_5_1, + GL_UNSIGNED_SHORT_5_6_5, GL_UNSIGNED_SHORT_4_4_4_4, + }; + + internalFormat = internalFormats[(int)format]; + fmt = formats[(int)format]; + type = types[(int)format]; + + // Create texture for the FBO, setting up filters and the like // Reading back the current texture is slow, but allocate calls should be few and far between. // If this becomes a bottleneck, we can fix it semi-easily auto prevTexture = OpenGL::getTex2D(); - texture.create(size.x(), size.y(), GL_RGBA8); + texture.create(size.x(), size.y(), internalFormat); texture.bind(); texture.setMinFilter(OpenGL::Linear); texture.setMagFilter(OpenGL::Linear); diff --git a/include/renderer_gl/textures.hpp b/include/renderer_gl/textures.hpp index a2b6c09d..8667716a 100644 --- a/include/renderer_gl/textures.hpp +++ b/include/renderer_gl/textures.hpp @@ -53,7 +53,7 @@ struct Texture { static u32 getSwizzledOffset_4bpp(u32 u, u32 v, u32 width); // Returns the format of this texture as a string - std::string formatToString() { + std::string_view formatToString() { return PICA::textureFormatToString(format); } diff --git a/include/renderer_null/renderer_null.hpp b/include/renderer_null/renderer_null.hpp index 553af035..05de067c 100644 --- a/include/renderer_null/renderer_null.hpp +++ b/include/renderer_null/renderer_null.hpp @@ -14,4 +14,4 @@ class RendererNull final : public Renderer { void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override; void drawVertices(PICA::PrimType primType, std::span vertices) override; void screenshot(const std::string& name) override; -}; \ No newline at end of file +}; diff --git a/include/services/gsp_gpu.hpp b/include/services/gsp_gpu.hpp index 0757ea2d..76793bbc 100644 --- a/include/services/gsp_gpu.hpp +++ b/include/services/gsp_gpu.hpp @@ -18,6 +18,24 @@ enum class GPUInterrupt : u8 { DMA = 6 }; +struct FramebufferInfo { + u32 activeFb; + u32 leftFramebufferVaddr; + u32 rightFramebufferVaddr; + u32 stride; + u32 format; + u32 displayFb; + u32 attribute; +}; + +struct FrameBufferUpdate { + u8 index; + u8 dirtyFlag; + u16 pad0; + std::array framebufferInfo; + u32 pad1; +}; + // More circular dependencies class Kernel; @@ -45,6 +63,7 @@ class GPUService { void flushDataCache(u32 messagePointer); void registerInterruptRelayQueue(u32 messagePointer); void setAxiConfigQoSMode(u32 messagePointer); + void setBufferSwap(u32 messagePointer); void setInternalPriorities(u32 messagePointer); void setLCDForceBlack(u32 messagePointer); void storeDataCache(u32 messagePointer); @@ -60,6 +79,8 @@ class GPUService { void triggerTextureCopy(u32* cmd); void flushCacheRegions(u32* cmd); + void setBufferSwapImpl(u32 screen_id, const FramebufferInfo& info); + public: GPUService(Memory& mem, GPU& gpu, Kernel& kernel, u32& currentPID) : mem(mem), gpu(gpu), kernel(kernel), currentPID(currentPID) {} @@ -72,4 +93,4 @@ public: std::memset(ptr, 0, 0x1000); } } -}; \ No newline at end of file +}; diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index 20fe4946..755bc2f4 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -16,6 +16,12 @@ #include "renderer_vk/renderer_vk.hpp" #endif +constexpr u32 top_screen_width = 240; +constexpr u32 top_screen_height = 400; + +constexpr u32 bottom_screen_width = 240; +constexpr u32 bottom_screen_height = 300; + using namespace Floats; // Note: For when we have multiple backends, the GL state manager can stay here and have the constructor for the Vulkan-or-whatever renderer ignore it @@ -78,6 +84,27 @@ void GPU::reset() { e.config2 = 0; } + // Initialize the framebuffer registers. Values taken from Citra. + + using namespace PICA::ExternalRegs; + // Top screen addresses and dimentions. + external_regs[Framebuffer0AFirstAddr] = 0x181E6000; + external_regs[Framebuffer0ASecondAddr] = 0x1822C800; + external_regs[Framebuffer0BFirstAddr] = 0x18273000; + external_regs[Framebuffer0BSecondAddr] = 0x182B9800; + external_regs[Framebuffer0Size] = (top_screen_height << 16) | top_screen_width; + external_regs[Framebuffer0Stride] = 720; + external_regs[Framebuffer0Config] = static_cast(PICA::ColorFmt::RGB8); + external_regs[Framebuffer0Select] = 0; + + // Bottom screen addresses and dimentions. + external_regs[Framebuffer1AFirstAddr] = 0x1848F000; + external_regs[Framebuffer1ASecondAddr] = 0x184C7800; + external_regs[Framebuffer1Size] = (bottom_screen_height << 16) | bottom_screen_width; + external_regs[Framebuffer1Stride] = 720; + external_regs[Framebuffer1Config] = static_cast(PICA::ColorFmt::RGB8); + external_regs[Framebuffer1Select] = 0; + renderer->reset(); } diff --git a/src/core/PICA/regs.cpp b/src/core/PICA/regs.cpp index d245f8af..d83a486b 100644 --- a/src/core/PICA/regs.cpp +++ b/src/core/PICA/regs.cpp @@ -19,11 +19,36 @@ void GPU::writeReg(u32 address, u32 value) { if (address >= 0x1EF01000 && address < 0x1EF01C00) { // Internal registers const u32 index = (address - 0x1EF01000) / sizeof(u32); writeInternalReg(index, value, 0xffffffff); + } else if (address >= 0x1EF00004 && address < 0x1EF01000) { + const u32 index = (address - 0x1EF00004) / sizeof(u32); + writeExternalReg(index, value); } else { - log("Ignoring write to external GPU register %08X. Value: %08X\n", address, value); + log("Ignoring write to unknown GPU register %08X. Value: %08X\n", address, value); } } +u32 GPU::readExternalReg(u32 index) { + using namespace PICA::ExternalRegs; + + if (index > 0x1000) [[unlikely]] { + Helpers::panic("Tried to read invalid external GPU register. Index: %X\n", index); + return -1; + } + + return external_regs[index]; +} + +void GPU::writeExternalReg(u32 index, u32 value) { + using namespace PICA::ExternalRegs; + + if (index > 0x1000) [[unlikely]] { + Helpers::panic("Tried to write to invalid external GPU register. Index: %X, value: %08X\n", index, value); + return; + } + + external_regs[index] = value; +} + u32 GPU::readInternalReg(u32 index) { using namespace PICA::InternalRegs; @@ -162,7 +187,7 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { } break; - // Restart immediate mode primitive drawing + // Restart immediate mode primitive drawing case PrimitiveRestart: if (value & 1) { immediateModeAttrIndex = 0; @@ -384,4 +409,4 @@ void GPU::startCommandList(u32 addr, u32 size) { writeInternalReg(id, param, mask); } } -} \ No newline at end of file +} diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index afe08b12..506f7db2 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -450,6 +450,37 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v void RendererGL::display() { gl.disableScissor(); + gl.disableBlend(); + gl.disableDepth(); + gl.disableScissor(); + gl.setColourMask(true, true, true, true); + gl.useProgram(displayProgram); + gl.bindVAO(dummyVAO); + + OpenGL::disableClipPlane(0); + OpenGL::disableClipPlane(1); + + using namespace PICA::ExternalRegs; + const u32 topScreenAddr = gpu.readExternalReg(Framebuffer0AFirstAddr); + const u32 bottomScreenAddr = gpu.readExternalReg(Framebuffer1AFirstAddr); + + auto topScreen = colourBufferCache.findFromAddress(topScreenAddr); + auto bottomScreen = colourBufferCache.findFromAddress(bottomScreenAddr); + Helpers::warn("Top screen addr %08X\n", topScreenAddr); + + screenFramebuffer.bind(OpenGL::DrawFramebuffer); + + if (topScreen) { + topScreen->get().texture.bind(); + OpenGL::setViewport(0, 240, 400, 240); // Top screen viewport + OpenGL::draw(OpenGL::TriangleStrip, 4); // Actually draw our 3DS screen + } + + if (bottomScreen) { + bottomScreen->get().texture.bind(); + OpenGL::setViewport(40, 0, 320, 240); + OpenGL::draw(OpenGL::TriangleStrip, 4); + } glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); screenFramebuffer.bind(OpenGL::ReadFramebuffer); @@ -550,42 +581,56 @@ OpenGL::Texture RendererGL::getTexture(Texture& tex) { } } +// NOTE: The GPU format has RGB5551 and RGB655 swapped compared to internal regs format +PICA::ColorFmt ToColorFmt(u32 format) { + switch (format) { + case 2: return PICA::ColorFmt::RGB565; + case 3: return PICA::ColorFmt::RGBA5551; + default: return static_cast(format); + } +} + void RendererGL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) { const u32 inputWidth = inputSize & 0xffff; - const u32 inputGap = inputSize >> 16; + const u32 inputHeight = inputSize >> 16; + const auto inputFormat = ToColorFmt(Helpers::getBits<8, 3>(flags)); + const auto outputFormat = ToColorFmt(Helpers::getBits<12, 3>(flags)); + const PICA::Scaling scaling = static_cast(Helpers::getBits<24, 2>(flags)); - const u32 outputWidth = outputSize & 0xffff; - const u32 outputGap = outputSize >> 16; - - auto framebuffer = colourBufferCache.findFromAddress(inputAddr); - // If there's a framebuffer at this address, use it. Otherwise go back to our old hack and display framebuffer 0 - // Displays are hard I really don't want to try implementing them because getting a fast solution is terrible - OpenGL::Texture& tex = framebuffer.has_value() ? framebuffer.value().get().texture : colourBufferCache[0].texture; - - tex.bind(); - screenFramebuffer.bind(OpenGL::DrawFramebuffer); - - gl.disableBlend(); - gl.disableLogicOp(); - gl.disableDepth(); - gl.disableScissor(); - gl.disableStencil(); - gl.setColourMask(true, true, true, true); - gl.useProgram(displayProgram); - gl.bindVAO(dummyVAO); - - gl.disableClipPlane(0); - gl.disableClipPlane(1); - - // Hack: Detect whether we are writing to the top or bottom screen by checking output gap and drawing to the proper part of the output texture - // We consider output gap == 320 to mean bottom, and anything else to mean top - if (outputGap == 320) { - OpenGL::setViewport(40, 0, 320, 240); // Bottom screen viewport - } else { - OpenGL::setViewport(0, 240, 400, 240); // Top screen viewport + u32 outputWidth = outputSize & 0xffff; + if (scaling == PICA::Scaling::X || scaling == PICA::Scaling::XY) { + outputWidth >>= 1; + } + u32 outputHeight = outputSize >> 16; + if (scaling == PICA::Scaling::XY) { + outputHeight >>= 1; } - OpenGL::draw(OpenGL::TriangleStrip, 4); // Actually draw our 3DS screen + // If there's a framebuffer at this address, use it. Otherwise go back to our old hack and display framebuffer 0 + // Displays are hard I really don't want to try implementing them because getting a fast solution is terrible + auto srcFramebuffer = getColourBuffer(inputAddr, inputFormat, inputWidth, inputHeight); + auto dstFramebuffer = getColourBuffer(outputAddr, outputFormat, outputWidth, outputHeight); + + Helpers::warn("Display transfer with outputAddr %08X\n", outputAddr); + + // Blit the framebuffers + srcFramebuffer.fbo.bind(OpenGL::ReadFramebuffer); + dstFramebuffer.fbo.bind(OpenGL::DrawFramebuffer); + glBlitFramebuffer(0, 0, inputWidth, inputHeight, 0, 0, outputWidth, outputHeight, GL_COLOR_BUFFER_BIT, GL_LINEAR); +} + +ColourBuffer RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height) { + // Try to find an already existing buffer that contains the provided address + // This is a more relaxed check compared to getColourFBO as display transfer/texcopy may refer to + // subrect of a surface and in case of texcopy we don't know the format of the surface. + auto buffer = colourBufferCache.findFromAddress(addr); + if (buffer.has_value()) { + return buffer.value().get(); + } + + // Otherwise create and cache a new buffer. + ColourBuffer sampleBuffer(addr, format, width, height); + return colourBufferCache.add(sampleBuffer); } void RendererGL::screenshot(const std::string& name) { diff --git a/src/core/renderer_null/renderer_null.cpp b/src/core/renderer_null/renderer_null.cpp index 272ce4e3..44a44aa5 100644 --- a/src/core/renderer_null/renderer_null.cpp +++ b/src/core/renderer_null/renderer_null.cpp @@ -9,4 +9,4 @@ void RendererNull::initGraphicsContext(SDL_Window* window) {} void RendererNull::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) {} void RendererNull::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) {} void RendererNull::drawVertices(PICA::PrimType primType, std::span vertices) {} -void RendererNull::screenshot(const std::string& name) {} \ No newline at end of file +void RendererNull::screenshot(const std::string& name) {} diff --git a/src/core/services/gsp_gpu.cpp b/src/core/services/gsp_gpu.cpp index 5d4b27a4..89797c3a 100644 --- a/src/core/services/gsp_gpu.cpp +++ b/src/core/services/gsp_gpu.cpp @@ -1,4 +1,5 @@ #include "services/gsp_gpu.hpp" +#include "PICA/regs.hpp" #include "ipc.hpp" #include "kernel.hpp" @@ -10,6 +11,7 @@ namespace ServiceCommands { RegisterInterruptRelayQueue = 0x00130042, WriteHwRegs = 0x00010082, WriteHwRegsWithMask = 0x00020084, + SetBufferSwap = 0x00050200, FlushDataCache = 0x00080082, SetLCDForceBlack = 0x000B0040, TriggerCmdReqQueue = 0x000C0000, @@ -19,16 +21,14 @@ namespace ServiceCommands { } // Commands written to shared memory and processed by TriggerCmdReqQueue -namespace GXCommands { - enum : u32 { - TriggerDMARequest = 0, - ProcessCommandList = 1, - MemoryFill = 2, - TriggerDisplayTransfer = 3, - TriggerTextureCopy = 4, - FlushCacheRegions = 5 - }; -} +enum class GXCommands : u32 { + TriggerDMARequest = 0, + ProcessCommandList = 1, + MemoryFill = 2, + TriggerDisplayTransfer = 3, + TriggerTextureCopy = 4, + FlushCacheRegions = 5 +}; void GPUService::reset() { privilegedProcess = 0xFFFFFFFF; // Set the privileged process to an invalid handle @@ -44,13 +44,14 @@ void GPUService::handleSyncRequest(u32 messagePointer) { case ServiceCommands::FlushDataCache: flushDataCache(messagePointer); break; case ServiceCommands::RegisterInterruptRelayQueue: registerInterruptRelayQueue(messagePointer); break; case ServiceCommands::SetAxiConfigQoSMode: setAxiConfigQoSMode(messagePointer); break; + case ServiceCommands::SetBufferSwap: setBufferSwap(messagePointer); break; case ServiceCommands::SetInternalPriorities: setInternalPriorities(messagePointer); break; case ServiceCommands::SetLCDForceBlack: setLCDForceBlack(messagePointer); break; case ServiceCommands::StoreDataCache: storeDataCache(messagePointer); break; case ServiceCommands::TriggerCmdReqQueue: [[likely]] triggerCmdReqQueue(messagePointer); break; case ServiceCommands::WriteHwRegs: writeHwRegs(messagePointer); break; case ServiceCommands::WriteHwRegsWithMask: writeHwRegsWithMask(messagePointer); break; -; default: Helpers::panic("GPU service requested. Command: %08X\n", command); + default: Helpers::panic("GPU service requested. Command: %08X\n", command); } } @@ -124,15 +125,12 @@ void GPUService::requestInterrupt(GPUInterrupt type) { // Not emulating this causes Yoshi's Wooly World, Captain Toad, Metroid 2 et al to hang if (type == GPUInterrupt::VBlank0 || type == GPUInterrupt::VBlank1) { int screen = static_cast(type) - static_cast(GPUInterrupt::VBlank0); // 0 for top screen, 1 for bottom - - constexpr u32 FBInfoSize = 0x40; // TODO: Offset depends on GSP thread being triggered - u8* info = &sharedMem[0x200 + screen * FBInfoSize]; - u8& dirtyFlag = info[1]; + FrameBufferUpdate* update = reinterpret_cast(&sharedMem[0x200 + screen * sizeof(FrameBufferUpdate)]); - if (dirtyFlag & 1) { - // TODO: Submit buffer info here - dirtyFlag &= ~1; + if (update->dirtyFlag & 1) { + setBufferSwapImpl(screen, update->framebufferInfo[update->index]); + update->dirtyFlag &= ~1; } } @@ -261,6 +259,18 @@ void GPUService::setAxiConfigQoSMode(u32 messagePointer) { mem.write32(messagePointer + 4, Result::Success); } +void GPUService::setBufferSwap(u32 messagePointer) { + FramebufferInfo info{}; + const u32 screenId = mem.read32(messagePointer + 4); // Selects either PDC0 or PDC1 + info.activeFb = mem.read32(messagePointer + 8); + info.leftFramebufferVaddr = mem.read32(messagePointer + 12); + info.rightFramebufferVaddr = mem.read32(messagePointer + 16); + info.stride = mem.read32(messagePointer + 20); + info.format = mem.read32(messagePointer + 24); + info.displayFb = mem.read32(messagePointer + 28); // Selects either framebuffer A or B + setBufferSwapImpl(screenId, info); +} + // Seems to also be completely undocumented void GPUService::setInternalPriorities(u32 messagePointer) { log("GSP::GPU::SetInternalPriorities\n"); @@ -283,7 +293,7 @@ void GPUService::processCommandBuffer() { log("Processing %d GPU commands\n", commandsLeft); while (commandsLeft != 0) { - u32 cmdID = cmd[0] & 0xff; + const GXCommands cmdID = static_cast(cmd[0] & 0xff); switch (cmdID) { case GXCommands::ProcessCommandList: processCommandList(cmd); break; case GXCommands::MemoryFill: memoryFill(cmd); break; @@ -375,12 +385,45 @@ void GPUService::flushCacheRegions(u32* cmd) { log("GSP::GPU::FlushCacheRegions (Stubbed)\n"); } +void GPUService::setBufferSwapImpl(u32 screenId, const FramebufferInfo& info) { + using namespace PICA::ExternalRegs; + + constexpr static std::array fb_addresses = { + Framebuffer0AFirstAddr, + Framebuffer0ASecondAddr, + Framebuffer0BFirstAddr, + Framebuffer0BSecondAddr, + Framebuffer1AFirstAddr, + Framebuffer1ASecondAddr, + Framebuffer1BFirstAddr, + Framebuffer1BSecondAddr, + }; + + const u32 fb_index = screenId * 4 + info.activeFb * 2; + gpu.writeExternalReg(fb_addresses[fb_index], VaddrToPaddr(info.leftFramebufferVaddr)); + gpu.writeExternalReg(fb_addresses[fb_index + 1], VaddrToPaddr(info.rightFramebufferVaddr)); + + constexpr static std::array config_addresses = { + Framebuffer0Config, + Framebuffer0Select, + Framebuffer0Stride, + Framebuffer1Config, + Framebuffer1Select, + Framebuffer1Stride, + }; + + const u32 config_index = screenId * 3; + gpu.writeExternalReg(config_addresses[config_index], info.format); + gpu.writeExternalReg(config_addresses[config_index + 1], info.displayFb); + gpu.writeExternalReg(config_addresses[config_index + 2], info.stride); +} + // Actually send command list (aka display list) to GPU void GPUService::processCommandList(u32* cmd) { const u32 address = cmd[1] & ~7; // Buffer address const u32 size = cmd[2] & ~3; // Buffer size in bytes - const bool updateGas = cmd[3] == 1; // Update gas additive blend results (0 = don't update, 1 = update) - const bool flushBuffer = cmd[7] == 1; // Flush buffer (0 = don't flush, 1 = flush) + [[maybe_unused]] const bool updateGas = cmd[3] == 1; // Update gas additive blend results (0 = don't update, 1 = update) + [[maybe_unused]] const bool flushBuffer = cmd[7] == 1; // Flush buffer (0 = don't flush, 1 = flush) log("GPU::GSP::processCommandList. Address: %08X, size in bytes: %08X\n", address, size); gpu.startCommandList(address, size);