From f75a23b5a947d2d57a5e8c9e6b1a59ead8b265be Mon Sep 17 00:00:00 2001 From: GPUCode Date: Mon, 10 Jul 2023 23:59:44 +0300 Subject: [PATCH 01/23] code: Better screen support --- include/PICA/gpu.hpp | 4 + include/PICA/regs.hpp | 48 ++++++++++ include/memory.hpp | 2 +- include/renderer.hpp | 2 +- include/renderer_gl/gl_state.hpp | 2 +- include/renderer_gl/renderer_gl.hpp | 6 +- include/renderer_gl/surfaces.hpp | 28 +++++- include/renderer_gl/textures.hpp | 2 +- include/renderer_null/renderer_null.hpp | 2 +- include/services/gsp_gpu.hpp | 23 ++++- src/core/PICA/gpu.cpp | 27 ++++++ src/core/PICA/regs.cpp | 31 ++++++- src/core/renderer_gl/renderer_gl.cpp | 107 ++++++++++++++++------- src/core/renderer_null/renderer_null.cpp | 2 +- src/core/services/gsp_gpu.cpp | 85 +++++++++++++----- 15 files changed, 305 insertions(+), 66 deletions(-) diff --git a/include/PICA/gpu.hpp b/include/PICA/gpu.hpp index b4236ee0..338aba9b 100644 --- a/include/PICA/gpu.hpp +++ b/include/PICA/gpu.hpp @@ -29,6 +29,7 @@ class GPU { static constexpr u32 vramSize = u32(6_MB); Registers regs; // GPU internal registers std::array currentAttributes; // Vertex attributes before being passed to the shader + std::array external_regs; // GPU external registers std::array immediateModeAttributes; // Vertex attributes uploaded via immediate mode submission std::array immediateModeVertices; @@ -97,6 +98,9 @@ class GPU { u32 readReg(u32 address); void writeReg(u32 address, u32 value); + u32 readExternalReg(u32 index); + void writeExternalReg(u32 index, u32 value); + // Used when processing GPU command lists u32 readInternalReg(u32 index); void writeInternalReg(u32 index, u32 value, u32 mask); diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index ee8105cd..d67c2025 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -178,6 +178,54 @@ namespace PICA { }; } + namespace ExternalRegs { + enum : u32 { + MemFill1BufferStartPaddr = 0x3, + MemFill1BufferEndPAddr = 0x4, + MemFill1Value = 0x5, + MemFill1Control = 0x6, + MemFill2BufferStartPaddr = 0x7, + MemFill2BufferEndPAddr = 0x8, + MemFill2Value = 0x9, + MemFill2Control = 0xA, + VramBankControl = 0xB, + GPUBusy = 0xC, + BacklightControl = 0xBC, + // TODO: Framebuffer regs + Framebuffer0Size = 0x2F, + Framebuffer0AFirstAddr = 0x119, + Framebuffer0ASecondAddr = 0x11A, + Framebuffer0Config = 0x11B, + Framebuffer0Select = 0x11D, + Framebuffer0Stride = 0x123, + Framebuffer0BFirstAddr = 0x124, + Framebuffer0BSecondAddr = 0x125, + Framebuffer1Size = 0x156, + Framebuffer1AFirstAddr = 0x159, + Framebuffer1ASecondAddr = 0x15A, + Framebuffer1Config = 0x15B, + Framebuffer1Select = 0x15D, + Framebuffer1Stride = 0x163, + Framebuffer1BFirstAddr = 0x164, + Framebuffer1BSecondAddr = 0x165, + TransferInputPAddr = 0x2FF, + TransferOutputPAddr = 0x300, + DisplayTransferOutputDim = 0x301, + DisplayTransferInputDim = 0x302, + TransferFlags = 0x303, + TransferTrigger = 0x305, + TextureCopyTotalBytes = 0x307, + TextureCopyInputLineGap = 0x308, + TextureCopyOutputLineGap = 0x309, + }; + } + + enum class Scaling : u32 { + None = 0, + X = 1, + XY = 2, + }; + namespace Lights { enum : u32 { LUT_D0 = 0, diff --git a/include/memory.hpp b/include/memory.hpp index 6f33d895..0b3b184c 100644 --- a/include/memory.hpp +++ b/include/memory.hpp @@ -248,4 +248,4 @@ public: void setVRAM(u8* pointer) { vram = pointer; } bool allocateMainThreadStack(u32 size); -}; \ No newline at end of file +}; diff --git a/include/renderer.hpp b/include/renderer.hpp index cd1ee53b..230c7d89 100644 --- a/include/renderer.hpp +++ b/include/renderer.hpp @@ -65,4 +65,4 @@ class Renderer { void setColourBufferLoc(u32 loc) { colourBufferLoc = loc; } void setDepthBufferLoc(u32 loc) { depthBufferLoc = loc; } -}; \ No newline at end of file +}; diff --git a/include/renderer_gl/gl_state.hpp b/include/renderer_gl/gl_state.hpp index 98f589e0..f2680eb4 100644 --- a/include/renderer_gl/gl_state.hpp +++ b/include/renderer_gl/gl_state.hpp @@ -213,4 +213,4 @@ struct GLStateManager { }; static_assert(std::is_trivially_constructible(), "OpenGL State Manager class is not trivially constructible!"); -static_assert(std::is_trivially_destructible(), "OpenGL State Manager class is not trivially destructible!"); \ No newline at end of file +static_assert(std::is_trivially_destructible(), "OpenGL State Manager class is not trivially destructible!"); diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 18f52a1c..a69d7623 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -44,8 +44,8 @@ class RendererGL final : public Renderer { float oldDepthOffset = 0.0; bool oldDepthmapEnable = false; - SurfaceCache depthBufferCache; - SurfaceCache colourBufferCache; + SurfaceCache depthBufferCache; + SurfaceCache colourBufferCache; SurfaceCache textureCache; // Dummy VAO/VBO for blitting the final output @@ -78,6 +78,8 @@ class RendererGL final : public Renderer { void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override; // Perform display transfer void drawVertices(PICA::PrimType primType, std::span vertices) override; // Draw the given vertices + ColourBuffer getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height); + // Take a screenshot of the screen and store it in a file void screenshot(const std::string& name) override; }; diff --git a/include/renderer_gl/surfaces.hpp b/include/renderer_gl/surfaces.hpp index a77729c4..9b150861 100644 --- a/include/renderer_gl/surfaces.hpp +++ b/include/renderer_gl/surfaces.hpp @@ -19,6 +19,10 @@ struct ColourBuffer { OpenGL::Texture texture; OpenGL::Framebuffer fbo; + GLenum internalFormat; + GLenum fmt; + GLenum type; + ColourBuffer() : valid(false) {} ColourBuffer(u32 loc, PICA::ColorFmt format, u32 x, u32 y, bool valid = true) @@ -29,12 +33,32 @@ struct ColourBuffer { range = Interval(loc, (u32)endLoc); } - void allocate() { + void allocate() { + // Internal formats for the texture based on format + static constexpr std::array internalFormats = { + GL_RGBA8, GL_RGB8, GL_RGB5_A1, GL_RGB565, GL_RGBA4 + }; + + // Format of the texture + static constexpr std::array formats = { + GL_RGBA, GL_BGR, GL_RGBA, GL_RGB, GL_RGBA, + }; + + static constexpr std::array types = { + GL_UNSIGNED_INT_8_8_8_8, GL_UNSIGNED_BYTE, GL_UNSIGNED_SHORT_5_5_5_1, + GL_UNSIGNED_SHORT_5_6_5, GL_UNSIGNED_SHORT_4_4_4_4, + }; + + internalFormat = internalFormats[(int)format]; + fmt = formats[(int)format]; + type = types[(int)format]; + + // Create texture for the FBO, setting up filters and the like // Reading back the current texture is slow, but allocate calls should be few and far between. // If this becomes a bottleneck, we can fix it semi-easily auto prevTexture = OpenGL::getTex2D(); - texture.create(size.x(), size.y(), GL_RGBA8); + texture.create(size.x(), size.y(), internalFormat); texture.bind(); texture.setMinFilter(OpenGL::Linear); texture.setMagFilter(OpenGL::Linear); diff --git a/include/renderer_gl/textures.hpp b/include/renderer_gl/textures.hpp index a2b6c09d..8667716a 100644 --- a/include/renderer_gl/textures.hpp +++ b/include/renderer_gl/textures.hpp @@ -53,7 +53,7 @@ struct Texture { static u32 getSwizzledOffset_4bpp(u32 u, u32 v, u32 width); // Returns the format of this texture as a string - std::string formatToString() { + std::string_view formatToString() { return PICA::textureFormatToString(format); } diff --git a/include/renderer_null/renderer_null.hpp b/include/renderer_null/renderer_null.hpp index 553af035..05de067c 100644 --- a/include/renderer_null/renderer_null.hpp +++ b/include/renderer_null/renderer_null.hpp @@ -14,4 +14,4 @@ class RendererNull final : public Renderer { void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override; void drawVertices(PICA::PrimType primType, std::span vertices) override; void screenshot(const std::string& name) override; -}; \ No newline at end of file +}; diff --git a/include/services/gsp_gpu.hpp b/include/services/gsp_gpu.hpp index 0757ea2d..76793bbc 100644 --- a/include/services/gsp_gpu.hpp +++ b/include/services/gsp_gpu.hpp @@ -18,6 +18,24 @@ enum class GPUInterrupt : u8 { DMA = 6 }; +struct FramebufferInfo { + u32 activeFb; + u32 leftFramebufferVaddr; + u32 rightFramebufferVaddr; + u32 stride; + u32 format; + u32 displayFb; + u32 attribute; +}; + +struct FrameBufferUpdate { + u8 index; + u8 dirtyFlag; + u16 pad0; + std::array framebufferInfo; + u32 pad1; +}; + // More circular dependencies class Kernel; @@ -45,6 +63,7 @@ class GPUService { void flushDataCache(u32 messagePointer); void registerInterruptRelayQueue(u32 messagePointer); void setAxiConfigQoSMode(u32 messagePointer); + void setBufferSwap(u32 messagePointer); void setInternalPriorities(u32 messagePointer); void setLCDForceBlack(u32 messagePointer); void storeDataCache(u32 messagePointer); @@ -60,6 +79,8 @@ class GPUService { void triggerTextureCopy(u32* cmd); void flushCacheRegions(u32* cmd); + void setBufferSwapImpl(u32 screen_id, const FramebufferInfo& info); + public: GPUService(Memory& mem, GPU& gpu, Kernel& kernel, u32& currentPID) : mem(mem), gpu(gpu), kernel(kernel), currentPID(currentPID) {} @@ -72,4 +93,4 @@ public: std::memset(ptr, 0, 0x1000); } } -}; \ No newline at end of file +}; diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index 20fe4946..755bc2f4 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -16,6 +16,12 @@ #include "renderer_vk/renderer_vk.hpp" #endif +constexpr u32 top_screen_width = 240; +constexpr u32 top_screen_height = 400; + +constexpr u32 bottom_screen_width = 240; +constexpr u32 bottom_screen_height = 300; + using namespace Floats; // Note: For when we have multiple backends, the GL state manager can stay here and have the constructor for the Vulkan-or-whatever renderer ignore it @@ -78,6 +84,27 @@ void GPU::reset() { e.config2 = 0; } + // Initialize the framebuffer registers. Values taken from Citra. + + using namespace PICA::ExternalRegs; + // Top screen addresses and dimentions. + external_regs[Framebuffer0AFirstAddr] = 0x181E6000; + external_regs[Framebuffer0ASecondAddr] = 0x1822C800; + external_regs[Framebuffer0BFirstAddr] = 0x18273000; + external_regs[Framebuffer0BSecondAddr] = 0x182B9800; + external_regs[Framebuffer0Size] = (top_screen_height << 16) | top_screen_width; + external_regs[Framebuffer0Stride] = 720; + external_regs[Framebuffer0Config] = static_cast(PICA::ColorFmt::RGB8); + external_regs[Framebuffer0Select] = 0; + + // Bottom screen addresses and dimentions. + external_regs[Framebuffer1AFirstAddr] = 0x1848F000; + external_regs[Framebuffer1ASecondAddr] = 0x184C7800; + external_regs[Framebuffer1Size] = (bottom_screen_height << 16) | bottom_screen_width; + external_regs[Framebuffer1Stride] = 720; + external_regs[Framebuffer1Config] = static_cast(PICA::ColorFmt::RGB8); + external_regs[Framebuffer1Select] = 0; + renderer->reset(); } diff --git a/src/core/PICA/regs.cpp b/src/core/PICA/regs.cpp index d245f8af..d83a486b 100644 --- a/src/core/PICA/regs.cpp +++ b/src/core/PICA/regs.cpp @@ -19,11 +19,36 @@ void GPU::writeReg(u32 address, u32 value) { if (address >= 0x1EF01000 && address < 0x1EF01C00) { // Internal registers const u32 index = (address - 0x1EF01000) / sizeof(u32); writeInternalReg(index, value, 0xffffffff); + } else if (address >= 0x1EF00004 && address < 0x1EF01000) { + const u32 index = (address - 0x1EF00004) / sizeof(u32); + writeExternalReg(index, value); } else { - log("Ignoring write to external GPU register %08X. Value: %08X\n", address, value); + log("Ignoring write to unknown GPU register %08X. Value: %08X\n", address, value); } } +u32 GPU::readExternalReg(u32 index) { + using namespace PICA::ExternalRegs; + + if (index > 0x1000) [[unlikely]] { + Helpers::panic("Tried to read invalid external GPU register. Index: %X\n", index); + return -1; + } + + return external_regs[index]; +} + +void GPU::writeExternalReg(u32 index, u32 value) { + using namespace PICA::ExternalRegs; + + if (index > 0x1000) [[unlikely]] { + Helpers::panic("Tried to write to invalid external GPU register. Index: %X, value: %08X\n", index, value); + return; + } + + external_regs[index] = value; +} + u32 GPU::readInternalReg(u32 index) { using namespace PICA::InternalRegs; @@ -162,7 +187,7 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { } break; - // Restart immediate mode primitive drawing + // Restart immediate mode primitive drawing case PrimitiveRestart: if (value & 1) { immediateModeAttrIndex = 0; @@ -384,4 +409,4 @@ void GPU::startCommandList(u32 addr, u32 size) { writeInternalReg(id, param, mask); } } -} \ No newline at end of file +} diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index afe08b12..506f7db2 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -450,6 +450,37 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v void RendererGL::display() { gl.disableScissor(); + gl.disableBlend(); + gl.disableDepth(); + gl.disableScissor(); + gl.setColourMask(true, true, true, true); + gl.useProgram(displayProgram); + gl.bindVAO(dummyVAO); + + OpenGL::disableClipPlane(0); + OpenGL::disableClipPlane(1); + + using namespace PICA::ExternalRegs; + const u32 topScreenAddr = gpu.readExternalReg(Framebuffer0AFirstAddr); + const u32 bottomScreenAddr = gpu.readExternalReg(Framebuffer1AFirstAddr); + + auto topScreen = colourBufferCache.findFromAddress(topScreenAddr); + auto bottomScreen = colourBufferCache.findFromAddress(bottomScreenAddr); + Helpers::warn("Top screen addr %08X\n", topScreenAddr); + + screenFramebuffer.bind(OpenGL::DrawFramebuffer); + + if (topScreen) { + topScreen->get().texture.bind(); + OpenGL::setViewport(0, 240, 400, 240); // Top screen viewport + OpenGL::draw(OpenGL::TriangleStrip, 4); // Actually draw our 3DS screen + } + + if (bottomScreen) { + bottomScreen->get().texture.bind(); + OpenGL::setViewport(40, 0, 320, 240); + OpenGL::draw(OpenGL::TriangleStrip, 4); + } glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); screenFramebuffer.bind(OpenGL::ReadFramebuffer); @@ -550,42 +581,56 @@ OpenGL::Texture RendererGL::getTexture(Texture& tex) { } } +// NOTE: The GPU format has RGB5551 and RGB655 swapped compared to internal regs format +PICA::ColorFmt ToColorFmt(u32 format) { + switch (format) { + case 2: return PICA::ColorFmt::RGB565; + case 3: return PICA::ColorFmt::RGBA5551; + default: return static_cast(format); + } +} + void RendererGL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) { const u32 inputWidth = inputSize & 0xffff; - const u32 inputGap = inputSize >> 16; + const u32 inputHeight = inputSize >> 16; + const auto inputFormat = ToColorFmt(Helpers::getBits<8, 3>(flags)); + const auto outputFormat = ToColorFmt(Helpers::getBits<12, 3>(flags)); + const PICA::Scaling scaling = static_cast(Helpers::getBits<24, 2>(flags)); - const u32 outputWidth = outputSize & 0xffff; - const u32 outputGap = outputSize >> 16; - - auto framebuffer = colourBufferCache.findFromAddress(inputAddr); - // If there's a framebuffer at this address, use it. Otherwise go back to our old hack and display framebuffer 0 - // Displays are hard I really don't want to try implementing them because getting a fast solution is terrible - OpenGL::Texture& tex = framebuffer.has_value() ? framebuffer.value().get().texture : colourBufferCache[0].texture; - - tex.bind(); - screenFramebuffer.bind(OpenGL::DrawFramebuffer); - - gl.disableBlend(); - gl.disableLogicOp(); - gl.disableDepth(); - gl.disableScissor(); - gl.disableStencil(); - gl.setColourMask(true, true, true, true); - gl.useProgram(displayProgram); - gl.bindVAO(dummyVAO); - - gl.disableClipPlane(0); - gl.disableClipPlane(1); - - // Hack: Detect whether we are writing to the top or bottom screen by checking output gap and drawing to the proper part of the output texture - // We consider output gap == 320 to mean bottom, and anything else to mean top - if (outputGap == 320) { - OpenGL::setViewport(40, 0, 320, 240); // Bottom screen viewport - } else { - OpenGL::setViewport(0, 240, 400, 240); // Top screen viewport + u32 outputWidth = outputSize & 0xffff; + if (scaling == PICA::Scaling::X || scaling == PICA::Scaling::XY) { + outputWidth >>= 1; + } + u32 outputHeight = outputSize >> 16; + if (scaling == PICA::Scaling::XY) { + outputHeight >>= 1; } - OpenGL::draw(OpenGL::TriangleStrip, 4); // Actually draw our 3DS screen + // If there's a framebuffer at this address, use it. Otherwise go back to our old hack and display framebuffer 0 + // Displays are hard I really don't want to try implementing them because getting a fast solution is terrible + auto srcFramebuffer = getColourBuffer(inputAddr, inputFormat, inputWidth, inputHeight); + auto dstFramebuffer = getColourBuffer(outputAddr, outputFormat, outputWidth, outputHeight); + + Helpers::warn("Display transfer with outputAddr %08X\n", outputAddr); + + // Blit the framebuffers + srcFramebuffer.fbo.bind(OpenGL::ReadFramebuffer); + dstFramebuffer.fbo.bind(OpenGL::DrawFramebuffer); + glBlitFramebuffer(0, 0, inputWidth, inputHeight, 0, 0, outputWidth, outputHeight, GL_COLOR_BUFFER_BIT, GL_LINEAR); +} + +ColourBuffer RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height) { + // Try to find an already existing buffer that contains the provided address + // This is a more relaxed check compared to getColourFBO as display transfer/texcopy may refer to + // subrect of a surface and in case of texcopy we don't know the format of the surface. + auto buffer = colourBufferCache.findFromAddress(addr); + if (buffer.has_value()) { + return buffer.value().get(); + } + + // Otherwise create and cache a new buffer. + ColourBuffer sampleBuffer(addr, format, width, height); + return colourBufferCache.add(sampleBuffer); } void RendererGL::screenshot(const std::string& name) { diff --git a/src/core/renderer_null/renderer_null.cpp b/src/core/renderer_null/renderer_null.cpp index 272ce4e3..44a44aa5 100644 --- a/src/core/renderer_null/renderer_null.cpp +++ b/src/core/renderer_null/renderer_null.cpp @@ -9,4 +9,4 @@ void RendererNull::initGraphicsContext(SDL_Window* window) {} void RendererNull::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) {} void RendererNull::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) {} void RendererNull::drawVertices(PICA::PrimType primType, std::span vertices) {} -void RendererNull::screenshot(const std::string& name) {} \ No newline at end of file +void RendererNull::screenshot(const std::string& name) {} diff --git a/src/core/services/gsp_gpu.cpp b/src/core/services/gsp_gpu.cpp index 5d4b27a4..89797c3a 100644 --- a/src/core/services/gsp_gpu.cpp +++ b/src/core/services/gsp_gpu.cpp @@ -1,4 +1,5 @@ #include "services/gsp_gpu.hpp" +#include "PICA/regs.hpp" #include "ipc.hpp" #include "kernel.hpp" @@ -10,6 +11,7 @@ namespace ServiceCommands { RegisterInterruptRelayQueue = 0x00130042, WriteHwRegs = 0x00010082, WriteHwRegsWithMask = 0x00020084, + SetBufferSwap = 0x00050200, FlushDataCache = 0x00080082, SetLCDForceBlack = 0x000B0040, TriggerCmdReqQueue = 0x000C0000, @@ -19,16 +21,14 @@ namespace ServiceCommands { } // Commands written to shared memory and processed by TriggerCmdReqQueue -namespace GXCommands { - enum : u32 { - TriggerDMARequest = 0, - ProcessCommandList = 1, - MemoryFill = 2, - TriggerDisplayTransfer = 3, - TriggerTextureCopy = 4, - FlushCacheRegions = 5 - }; -} +enum class GXCommands : u32 { + TriggerDMARequest = 0, + ProcessCommandList = 1, + MemoryFill = 2, + TriggerDisplayTransfer = 3, + TriggerTextureCopy = 4, + FlushCacheRegions = 5 +}; void GPUService::reset() { privilegedProcess = 0xFFFFFFFF; // Set the privileged process to an invalid handle @@ -44,13 +44,14 @@ void GPUService::handleSyncRequest(u32 messagePointer) { case ServiceCommands::FlushDataCache: flushDataCache(messagePointer); break; case ServiceCommands::RegisterInterruptRelayQueue: registerInterruptRelayQueue(messagePointer); break; case ServiceCommands::SetAxiConfigQoSMode: setAxiConfigQoSMode(messagePointer); break; + case ServiceCommands::SetBufferSwap: setBufferSwap(messagePointer); break; case ServiceCommands::SetInternalPriorities: setInternalPriorities(messagePointer); break; case ServiceCommands::SetLCDForceBlack: setLCDForceBlack(messagePointer); break; case ServiceCommands::StoreDataCache: storeDataCache(messagePointer); break; case ServiceCommands::TriggerCmdReqQueue: [[likely]] triggerCmdReqQueue(messagePointer); break; case ServiceCommands::WriteHwRegs: writeHwRegs(messagePointer); break; case ServiceCommands::WriteHwRegsWithMask: writeHwRegsWithMask(messagePointer); break; -; default: Helpers::panic("GPU service requested. Command: %08X\n", command); + default: Helpers::panic("GPU service requested. Command: %08X\n", command); } } @@ -124,15 +125,12 @@ void GPUService::requestInterrupt(GPUInterrupt type) { // Not emulating this causes Yoshi's Wooly World, Captain Toad, Metroid 2 et al to hang if (type == GPUInterrupt::VBlank0 || type == GPUInterrupt::VBlank1) { int screen = static_cast(type) - static_cast(GPUInterrupt::VBlank0); // 0 for top screen, 1 for bottom - - constexpr u32 FBInfoSize = 0x40; // TODO: Offset depends on GSP thread being triggered - u8* info = &sharedMem[0x200 + screen * FBInfoSize]; - u8& dirtyFlag = info[1]; + FrameBufferUpdate* update = reinterpret_cast(&sharedMem[0x200 + screen * sizeof(FrameBufferUpdate)]); - if (dirtyFlag & 1) { - // TODO: Submit buffer info here - dirtyFlag &= ~1; + if (update->dirtyFlag & 1) { + setBufferSwapImpl(screen, update->framebufferInfo[update->index]); + update->dirtyFlag &= ~1; } } @@ -261,6 +259,18 @@ void GPUService::setAxiConfigQoSMode(u32 messagePointer) { mem.write32(messagePointer + 4, Result::Success); } +void GPUService::setBufferSwap(u32 messagePointer) { + FramebufferInfo info{}; + const u32 screenId = mem.read32(messagePointer + 4); // Selects either PDC0 or PDC1 + info.activeFb = mem.read32(messagePointer + 8); + info.leftFramebufferVaddr = mem.read32(messagePointer + 12); + info.rightFramebufferVaddr = mem.read32(messagePointer + 16); + info.stride = mem.read32(messagePointer + 20); + info.format = mem.read32(messagePointer + 24); + info.displayFb = mem.read32(messagePointer + 28); // Selects either framebuffer A or B + setBufferSwapImpl(screenId, info); +} + // Seems to also be completely undocumented void GPUService::setInternalPriorities(u32 messagePointer) { log("GSP::GPU::SetInternalPriorities\n"); @@ -283,7 +293,7 @@ void GPUService::processCommandBuffer() { log("Processing %d GPU commands\n", commandsLeft); while (commandsLeft != 0) { - u32 cmdID = cmd[0] & 0xff; + const GXCommands cmdID = static_cast(cmd[0] & 0xff); switch (cmdID) { case GXCommands::ProcessCommandList: processCommandList(cmd); break; case GXCommands::MemoryFill: memoryFill(cmd); break; @@ -375,12 +385,45 @@ void GPUService::flushCacheRegions(u32* cmd) { log("GSP::GPU::FlushCacheRegions (Stubbed)\n"); } +void GPUService::setBufferSwapImpl(u32 screenId, const FramebufferInfo& info) { + using namespace PICA::ExternalRegs; + + constexpr static std::array fb_addresses = { + Framebuffer0AFirstAddr, + Framebuffer0ASecondAddr, + Framebuffer0BFirstAddr, + Framebuffer0BSecondAddr, + Framebuffer1AFirstAddr, + Framebuffer1ASecondAddr, + Framebuffer1BFirstAddr, + Framebuffer1BSecondAddr, + }; + + const u32 fb_index = screenId * 4 + info.activeFb * 2; + gpu.writeExternalReg(fb_addresses[fb_index], VaddrToPaddr(info.leftFramebufferVaddr)); + gpu.writeExternalReg(fb_addresses[fb_index + 1], VaddrToPaddr(info.rightFramebufferVaddr)); + + constexpr static std::array config_addresses = { + Framebuffer0Config, + Framebuffer0Select, + Framebuffer0Stride, + Framebuffer1Config, + Framebuffer1Select, + Framebuffer1Stride, + }; + + const u32 config_index = screenId * 3; + gpu.writeExternalReg(config_addresses[config_index], info.format); + gpu.writeExternalReg(config_addresses[config_index + 1], info.displayFb); + gpu.writeExternalReg(config_addresses[config_index + 2], info.stride); +} + // Actually send command list (aka display list) to GPU void GPUService::processCommandList(u32* cmd) { const u32 address = cmd[1] & ~7; // Buffer address const u32 size = cmd[2] & ~3; // Buffer size in bytes - const bool updateGas = cmd[3] == 1; // Update gas additive blend results (0 = don't update, 1 = update) - const bool flushBuffer = cmd[7] == 1; // Flush buffer (0 = don't flush, 1 = flush) + [[maybe_unused]] const bool updateGas = cmd[3] == 1; // Update gas additive blend results (0 = don't update, 1 = update) + [[maybe_unused]] const bool flushBuffer = cmd[7] == 1; // Flush buffer (0 = don't flush, 1 = flush) log("GPU::GSP::processCommandList. Address: %08X, size in bytes: %08X\n", address, size); gpu.startCommandList(address, size); From c805504f70d90ff2033867761cc991583cda0e93 Mon Sep 17 00:00:00 2001 From: GPUCode Date: Thu, 3 Aug 2023 16:35:18 +0300 Subject: [PATCH 02/23] gpu: Add display transfer rectangles --- include/math_util.hpp | 80 ++++++++++++++++++++++++++++ include/renderer_gl/opengl.hpp | 79 --------------------------- include/renderer_gl/surfaces.hpp | 14 ++++- include/renderer_gl/textures.hpp | 3 +- src/core/renderer_gl/renderer_gl.cpp | 21 +++++--- 5 files changed, 109 insertions(+), 88 deletions(-) create mode 100644 include/math_util.hpp diff --git a/include/math_util.hpp b/include/math_util.hpp new file mode 100644 index 00000000..f2b41f41 --- /dev/null +++ b/include/math_util.hpp @@ -0,0 +1,80 @@ +#pragma once +#include + +namespace Math { + // Abstraction for GLSL vectors + template + class Vector { + // A GLSL vector can only have 2, 3 or 4 elements + static_assert(size == 2 || size == 3 || size == 4); + T m_storage[size]; + + public: + T& r() { return m_storage[0]; } + T& g() { return m_storage[1]; } + T& b() { + static_assert(size >= 3, "Out of bounds OpenGL::Vector access"); + return m_storage[2]; + } + T& a() { + static_assert(size >= 4, "Out of bounds OpenGL::Vector access"); + return m_storage[3]; + } + + T& x() { return r(); } + T& y() { return g(); } + T& z() { return b(); } + T& w() { return a(); } + T& operator[](size_t index) { return m_storage[index]; } + const T& operator[](size_t index) const { return m_storage[index]; } + + T& u() { return r(); } + T& v() { return g(); } + + T& s() { return r(); } + T& t() { return g(); } + T& p() { return b(); } + T& q() { return a(); } + + Vector(std::array list) { std::copy(list.begin(), list.end(), &m_storage[0]); } + + Vector() {} + }; + + using vec2 = Vector; + using vec3 = Vector; + using vec4 = Vector; + + using dvec2 = Vector; + using dvec3 = Vector; + using dvec4 = Vector; + + using ivec2 = Vector; + using ivec3 = Vector; + using ivec4 = Vector; + + using uvec2 = Vector; + using uvec3 = Vector; + using uvec4 = Vector; + + // A 2D rectangle, meant to be used for stuff like scissor rects or viewport rects + // We're never supporting 3D rectangles, because rectangles were never meant to be 3D in the first place + template + struct Rectangle { + Vector start; + Vector end; + + Rectangle() : start({0}), end({0}) {} + Rectangle(T x0, T y0, T x1, T y1) : start({x0, y0}), end({x1, y1}) {} + + T getWidth() const { + return std::abs(end.x() - start.x()); + } + + T getHeight() const { + return std::abs(end.y() - start.y()); + } + }; + + using Rect = Rectangle; +} diff --git a/include/renderer_gl/opengl.hpp b/include/renderer_gl/opengl.hpp index e129f6b6..a2afab40 100644 --- a/include/renderer_gl/opengl.hpp +++ b/include/renderer_gl/opengl.hpp @@ -615,83 +615,4 @@ namespace OpenGL { glBlendFuncSeparate(fac1, fac2, fac3, fac4); } - // Abstraction for GLSL vectors - template - class Vector { - // A GLSL vector can only have 2, 3 or 4 elements - static_assert(size == 2 || size == 3 || size == 4); - T m_storage[size]; - - public: - T& r() { return m_storage[0]; } - T& g() { return m_storage[1]; } - T& b() { - static_assert(size >= 3, "Out of bounds OpenGL::Vector access"); - return m_storage[2]; - } - T& a() { - static_assert(size >= 4, "Out of bounds OpenGL::Vector access"); - return m_storage[3]; - } - - T& x() { return r(); } - T& y() { return g(); } - T& z() { return b(); } - T& w() { return a(); } - T& operator[](size_t index) { return m_storage[index]; } - const T& operator[](size_t index) const { return m_storage[index]; } - - T& u() { return r(); } - T& v() { return g(); } - - T& s() { return r(); } - T& t() { return g(); } - T& p() { return b(); } - T& q() { return a(); } - - Vector(std::array list) { std::copy(list.begin(), list.end(), &m_storage[0]); } - - Vector() {} - }; - - using vec2 = Vector; - using vec3 = Vector; - using vec4 = Vector; - - using dvec2 = Vector; - using dvec3 = Vector; - using dvec4 = Vector; - - using ivec2 = Vector; - using ivec3 = Vector; - using ivec4 = Vector; - - using uvec2 = Vector; - using uvec3 = Vector; - using uvec4 = Vector; - - // A 2D rectangle, meant to be used for stuff like scissor rects or viewport rects - // We're never supporting 3D rectangles, because rectangles were never meant to be 3D in the first place - // x, y: Coords of the top left vertex - // width, height: Dimensions of the rectangle. Initialized to 0 if not specified. - template - struct Rectangle { - T x, y, width, height; - - std::pair topLeft() const { return std::make_pair(x, y); } - std::pair topRight() const { return std::make_pair(x + width, y); } - std::pair bottomLeft() const { return std::make_pair(x, y + height); } - std::pair bottomRight() const { return std::make_pair(x + width, y + height); } - - Rectangle() : x(0), y(0), width(0), height(0) {} - Rectangle(T x, T y, T width, T height) : x(x), y(y), width(width), height(height) {} - - bool isEmpty() const { return width == 0 && height == 0; } - bool isLine() const { return (width == 0 && height != 0) || (width != 0 && height == 0); } - - void setEmpty() { x = y = width = height = 0; } - }; - - using Rect = Rectangle; - } // end namespace OpenGL diff --git a/include/renderer_gl/surfaces.hpp b/include/renderer_gl/surfaces.hpp index 9b150861..606e60d4 100644 --- a/include/renderer_gl/surfaces.hpp +++ b/include/renderer_gl/surfaces.hpp @@ -2,6 +2,7 @@ #include "PICA/regs.hpp" #include "boost/icl/interval.hpp" #include "helpers.hpp" +#include "math_util.hpp" #include "opengl.hpp" template @@ -10,7 +11,7 @@ using Interval = boost::icl::right_open_interval; struct ColourBuffer { u32 location; PICA::ColorFmt format; - OpenGL::uvec2 size; + Math::uvec2 size; bool valid; // Range of VRAM taken up by buffer @@ -90,6 +91,15 @@ struct ColourBuffer { } } + Math::Rect getSubRect(u32 inputAddress, u32 width, u32 height) { + // PICA textures have top-left origin while OpenGL has bottom-left origin. + // Flip the rectangle on the x axis to account for this. + const u32 startOffset = (inputAddress - location) / sizePerPixel(format); + const u32 x0 = (startOffset % (size.x() * 8)) / 8; + const u32 y0 = (startOffset / (size.x() * 8)) * 8; + return Math::Rect{x0, size.y() - y0, x0 + width, size.y() - height - y0}; + } + bool matches(ColourBuffer& other) { return location == other.location && format == other.format && size.x() == other.size.x() && size.y() == other.size.y(); @@ -103,7 +113,7 @@ struct ColourBuffer { struct DepthBuffer { u32 location; PICA::DepthFmt format; - OpenGL::uvec2 size; // Implicitly set to the size of the framebuffer + Math::uvec2 size; // Implicitly set to the size of the framebuffer bool valid; // Range of VRAM taken up by buffer diff --git a/include/renderer_gl/textures.hpp b/include/renderer_gl/textures.hpp index 8667716a..c8836b5e 100644 --- a/include/renderer_gl/textures.hpp +++ b/include/renderer_gl/textures.hpp @@ -4,6 +4,7 @@ #include "PICA/regs.hpp" #include "boost/icl/interval.hpp" #include "helpers.hpp" +#include "math_util.hpp" #include "opengl.hpp" template @@ -13,7 +14,7 @@ struct Texture { u32 location; u32 config; // Magnification/minification filter, wrapping configs, etc PICA::TextureFmt format; - OpenGL::uvec2 size; + Math::uvec2 size; bool valid; // Range of VRAM taken up by buffer diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 506f7db2..2806ae48 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -536,7 +536,7 @@ void RendererGL::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 co OpenGL::Framebuffer RendererGL::getColourFBO() { // We construct a colour buffer object and see if our cache has any matching colour buffers in it - // If not, we allocate a texture & FBO for our framebuffer and store it in the cache + // If not, we allocate a texture & FBO for our framebuffer and store it in the cache ColourBuffer sampleBuffer(colourBufferLoc, colourBufferFormat, fbSize[0], fbSize[1]); auto buffer = colourBufferCache.find(sampleBuffer); @@ -598,25 +598,34 @@ void RendererGL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u const PICA::Scaling scaling = static_cast(Helpers::getBits<24, 2>(flags)); u32 outputWidth = outputSize & 0xffff; + u32 outputHeight = outputSize >> 16; + + if (inputWidth != outputWidth) { + Helpers::warn("Strided display transfer is not handled correctly!\n"); + } + + auto srcFramebuffer = getColourBuffer(inputAddr, inputFormat, inputWidth, inputHeight); + Math::Rect srcRect = srcFramebuffer.getSubRect(inputAddr, outputWidth, outputHeight); + + // Apply scaling for the destination rectangle. if (scaling == PICA::Scaling::X || scaling == PICA::Scaling::XY) { outputWidth >>= 1; } - u32 outputHeight = outputSize >> 16; if (scaling == PICA::Scaling::XY) { outputHeight >>= 1; } - // If there's a framebuffer at this address, use it. Otherwise go back to our old hack and display framebuffer 0 - // Displays are hard I really don't want to try implementing them because getting a fast solution is terrible - auto srcFramebuffer = getColourBuffer(inputAddr, inputFormat, inputWidth, inputHeight); auto dstFramebuffer = getColourBuffer(outputAddr, outputFormat, outputWidth, outputHeight); + Math::Rect dstRect = dstFramebuffer.getSubRect(outputAddr, outputWidth, outputHeight); Helpers::warn("Display transfer with outputAddr %08X\n", outputAddr); // Blit the framebuffers srcFramebuffer.fbo.bind(OpenGL::ReadFramebuffer); dstFramebuffer.fbo.bind(OpenGL::DrawFramebuffer); - glBlitFramebuffer(0, 0, inputWidth, inputHeight, 0, 0, outputWidth, outputHeight, GL_COLOR_BUFFER_BIT, GL_LINEAR); + glBlitFramebuffer(srcRect.start.x(), srcRect.start.y(), srcRect.end.x(), srcRect.end.y(), + dstRect.start.x(), dstRect.start.y(), dstRect.end.x(), dstRect.end.y(), + GL_COLOR_BUFFER_BIT, GL_LINEAR); } ColourBuffer RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height) { From 58383ee431a28d07c26064058d4329692f856799 Mon Sep 17 00:00:00 2001 From: GPUCode Date: Thu, 3 Aug 2023 16:42:01 +0300 Subject: [PATCH 03/23] Address review comments --- src/core/PICA/regs.cpp | 2 +- src/core/services/gsp_gpu.cpp | 20 +++++++++++--------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/core/PICA/regs.cpp b/src/core/PICA/regs.cpp index d83a486b..04261526 100644 --- a/src/core/PICA/regs.cpp +++ b/src/core/PICA/regs.cpp @@ -187,7 +187,7 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { } break; - // Restart immediate mode primitive drawing + // Restart immediate mode primitive drawing case PrimitiveRestart: if (value & 1) { immediateModeAttrIndex = 0; diff --git a/src/core/services/gsp_gpu.cpp b/src/core/services/gsp_gpu.cpp index 89797c3a..31b9050f 100644 --- a/src/core/services/gsp_gpu.cpp +++ b/src/core/services/gsp_gpu.cpp @@ -21,14 +21,16 @@ namespace ServiceCommands { } // Commands written to shared memory and processed by TriggerCmdReqQueue -enum class GXCommands : u32 { - TriggerDMARequest = 0, - ProcessCommandList = 1, - MemoryFill = 2, - TriggerDisplayTransfer = 3, - TriggerTextureCopy = 4, - FlushCacheRegions = 5 -}; +namespace GXCommands { + enum : u32 { + TriggerDMARequest = 0, + ProcessCommandList = 1, + MemoryFill = 2, + TriggerDisplayTransfer = 3, + TriggerTextureCopy = 4, + FlushCacheRegions = 5 + }; +} void GPUService::reset() { privilegedProcess = 0xFFFFFFFF; // Set the privileged process to an invalid handle @@ -293,7 +295,7 @@ void GPUService::processCommandBuffer() { log("Processing %d GPU commands\n", commandsLeft); while (commandsLeft != 0) { - const GXCommands cmdID = static_cast(cmd[0] & 0xff); + const u32 cmdID = cmd[0] & 0xff; switch (cmdID) { case GXCommands::ProcessCommandList: processCommandList(cmd); break; case GXCommands::MemoryFill: memoryFill(cmd); break; From a8a76ab64d29f538b63b0eece00541a854252f9a Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Thu, 3 Aug 2023 20:37:40 +0300 Subject: [PATCH 04/23] Shush warnings --- src/core/renderer_gl/renderer_gl.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 2806ae48..d59cfa17 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -466,8 +466,6 @@ void RendererGL::display() { auto topScreen = colourBufferCache.findFromAddress(topScreenAddr); auto bottomScreen = colourBufferCache.findFromAddress(bottomScreenAddr); - Helpers::warn("Top screen addr %08X\n", topScreenAddr); - screenFramebuffer.bind(OpenGL::DrawFramebuffer); if (topScreen) { @@ -601,7 +599,7 @@ void RendererGL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u u32 outputHeight = outputSize >> 16; if (inputWidth != outputWidth) { - Helpers::warn("Strided display transfer is not handled correctly!\n"); + // Helpers::warn("Strided display transfer is not handled correctly!\n"); } auto srcFramebuffer = getColourBuffer(inputAddr, inputFormat, inputWidth, inputHeight); @@ -611,6 +609,7 @@ void RendererGL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u if (scaling == PICA::Scaling::X || scaling == PICA::Scaling::XY) { outputWidth >>= 1; } + if (scaling == PICA::Scaling::XY) { outputHeight >>= 1; } @@ -618,8 +617,6 @@ void RendererGL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u auto dstFramebuffer = getColourBuffer(outputAddr, outputFormat, outputWidth, outputHeight); Math::Rect dstRect = dstFramebuffer.getSubRect(outputAddr, outputWidth, outputHeight); - Helpers::warn("Display transfer with outputAddr %08X\n", outputAddr); - // Blit the framebuffers srcFramebuffer.fbo.bind(OpenGL::ReadFramebuffer); dstFramebuffer.fbo.bind(OpenGL::DrawFramebuffer); From 9695b57bf53c00a04bd763e9f88900051796ed10 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Fri, 4 Aug 2023 00:42:11 +0300 Subject: [PATCH 05/23] [PICA] Rework how external registers work, format --- include/PICA/gpu.hpp | 12 +++++-- include/renderer.hpp | 7 ++-- include/renderer_gl/renderer_gl.hpp | 3 +- include/renderer_null/renderer_null.hpp | 2 +- include/renderer_sw/renderer_sw.hpp | 2 +- include/renderer_vk/renderer_vk.hpp | 2 +- src/core/PICA/gpu.cpp | 44 ++++++++++++------------ src/core/PICA/regs.cpp | 4 +-- src/core/renderer_gl/renderer_gl.cpp | 4 +-- src/core/renderer_null/renderer_null.cpp | 3 +- src/core/renderer_sw/renderer_sw.cpp | 3 +- src/core/renderer_vk/renderer_vk.cpp | 3 +- src/renderer.cpp | 3 +- 13 files changed, 54 insertions(+), 38 deletions(-) diff --git a/include/PICA/gpu.hpp b/include/PICA/gpu.hpp index 338aba9b..955fb0ae 100644 --- a/include/PICA/gpu.hpp +++ b/include/PICA/gpu.hpp @@ -14,8 +14,11 @@ class GPU { static constexpr u32 regNum = 0x300; + static constexpr u32 extRegNum = 0x1000; + using vec4f = std::array; - using Registers = std::array; + using Registers = std::array; // Internal registers (named registers in short since they're the main ones) + using ExternalRegisters = std::array; Memory& mem; EmulatorConfig& config; @@ -29,7 +32,6 @@ class GPU { static constexpr u32 vramSize = u32(6_MB); Registers regs; // GPU internal registers std::array currentAttributes; // Vertex attributes before being passed to the shader - std::array external_regs; // GPU external registers std::array immediateModeAttributes; // Vertex attributes uploaded via immediate mode submission std::array immediateModeVertices; @@ -144,4 +146,10 @@ class GPU { Helpers::panic("[GPU] Tried to access unknown physical address: %08X", paddr); } } + + private: + // GPU external registers + // We have them in the end of the struct for cache locality reasons. Tl;dr we want the more commonly used things to be packed in the start + // Of the struct, instead of externalRegs being in the middle + ExternalRegisters externalRegs; }; diff --git a/include/renderer.hpp b/include/renderer.hpp index 230c7d89..fff25ab5 100644 --- a/include/renderer.hpp +++ b/include/renderer.hpp @@ -21,8 +21,11 @@ struct SDL_Window; class Renderer { protected: GPU& gpu; - static constexpr u32 regNum = 0x300; // Number of internal PICA registers + static constexpr u32 regNum = 0x300; // Number of internal PICA registers + static constexpr u32 extRegNum = 0x1000; // Number of external PICA registers + const std::array& regs; + const std::array& externalRegs; std::array fbSize; // The size of the framebuffer (ie both the colour and depth buffer)' @@ -34,7 +37,7 @@ class Renderer { PICA::DepthFmt depthBufferFormat; public: - Renderer(GPU& gpu, const std::array& internalRegs); + Renderer(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs); virtual ~Renderer(); static constexpr u32 vertexBufferSize = 0x10000; diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index a69d7623..52d97524 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -68,7 +68,8 @@ class RendererGL final : public Renderer { void updateLightingLUT(); public: - RendererGL(GPU& gpu, const std::array& internalRegs) : Renderer(gpu, internalRegs) {} + RendererGL(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs) + : Renderer(gpu, internalRegs, externalRegs) {} ~RendererGL() override; void reset() override; diff --git a/include/renderer_null/renderer_null.hpp b/include/renderer_null/renderer_null.hpp index 05de067c..22293ba6 100644 --- a/include/renderer_null/renderer_null.hpp +++ b/include/renderer_null/renderer_null.hpp @@ -4,7 +4,7 @@ class GPU; class RendererNull final : public Renderer { public: - RendererNull(GPU& gpu, const std::array& internalRegs); + RendererNull(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs); ~RendererNull() override; void reset() override; diff --git a/include/renderer_sw/renderer_sw.hpp b/include/renderer_sw/renderer_sw.hpp index 5c42e188..171fc084 100644 --- a/include/renderer_sw/renderer_sw.hpp +++ b/include/renderer_sw/renderer_sw.hpp @@ -4,7 +4,7 @@ class GPU; class RendererSw final : public Renderer { public: - RendererSw(GPU& gpu, const std::array& internalRegs); + RendererSw(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs); ~RendererSw() override; void reset() override; diff --git a/include/renderer_vk/renderer_vk.hpp b/include/renderer_vk/renderer_vk.hpp index 5e621bdc..4b6e65b0 100644 --- a/include/renderer_vk/renderer_vk.hpp +++ b/include/renderer_vk/renderer_vk.hpp @@ -44,7 +44,7 @@ class RendererVK final : public Renderer { u64 currentFrame = 0; public: - RendererVK(GPU& gpu, const std::array& internalRegs); + RendererVK(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs); ~RendererVK() override; void reset() override; diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index 755bc2f4..3668b32f 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -16,11 +16,11 @@ #include "renderer_vk/renderer_vk.hpp" #endif -constexpr u32 top_screen_width = 240; -constexpr u32 top_screen_height = 400; +constexpr u32 topScreenWidth = 240; +constexpr u32 topScreenHeight = 400; -constexpr u32 bottom_screen_width = 240; -constexpr u32 bottom_screen_height = 300; +constexpr u32 bottomScreenWidth = 240; +constexpr u32 bottomScreenHeight = 300; using namespace Floats; @@ -32,24 +32,24 @@ GPU::GPU(Memory& mem, EmulatorConfig& config) : mem(mem), config(config) { switch (config.rendererType) { case RendererType::Null: { - renderer.reset(new RendererNull(*this, regs)); + renderer.reset(new RendererNull(*this, regs, externalRegs)); break; } case RendererType::Software: { - renderer.reset(new RendererSw(*this, regs)); + renderer.reset(new RendererSw(*this, regs, externalRegs)); break; } #ifdef PANDA3DS_ENABLE_OPENGL case RendererType::OpenGL: { - renderer.reset(new RendererGL(*this, regs)); + renderer.reset(new RendererGL(*this, regs, externalRegs)); break; } #endif #ifdef PANDA3DS_ENABLE_VULKAN case RendererType::Vulkan: { - renderer.reset(new RendererVK(*this, regs)); + renderer.reset(new RendererVK(*this, regs, externalRegs)); break; } #endif @@ -88,22 +88,22 @@ void GPU::reset() { using namespace PICA::ExternalRegs; // Top screen addresses and dimentions. - external_regs[Framebuffer0AFirstAddr] = 0x181E6000; - external_regs[Framebuffer0ASecondAddr] = 0x1822C800; - external_regs[Framebuffer0BFirstAddr] = 0x18273000; - external_regs[Framebuffer0BSecondAddr] = 0x182B9800; - external_regs[Framebuffer0Size] = (top_screen_height << 16) | top_screen_width; - external_regs[Framebuffer0Stride] = 720; - external_regs[Framebuffer0Config] = static_cast(PICA::ColorFmt::RGB8); - external_regs[Framebuffer0Select] = 0; + externalRegs[Framebuffer0AFirstAddr] = 0x181E6000; + externalRegs[Framebuffer0ASecondAddr] = 0x1822C800; + externalRegs[Framebuffer0BFirstAddr] = 0x18273000; + externalRegs[Framebuffer0BSecondAddr] = 0x182B9800; + externalRegs[Framebuffer0Size] = (topScreenHeight << 16) | topScreenWidth; + externalRegs[Framebuffer0Stride] = 720; + externalRegs[Framebuffer0Config] = static_cast(PICA::ColorFmt::RGB8); + externalRegs[Framebuffer0Select] = 0; // Bottom screen addresses and dimentions. - external_regs[Framebuffer1AFirstAddr] = 0x1848F000; - external_regs[Framebuffer1ASecondAddr] = 0x184C7800; - external_regs[Framebuffer1Size] = (bottom_screen_height << 16) | bottom_screen_width; - external_regs[Framebuffer1Stride] = 720; - external_regs[Framebuffer1Config] = static_cast(PICA::ColorFmt::RGB8); - external_regs[Framebuffer1Select] = 0; + externalRegs[Framebuffer1AFirstAddr] = 0x1848F000; + externalRegs[Framebuffer1ASecondAddr] = 0x184C7800; + externalRegs[Framebuffer1Size] = (bottomScreenHeight << 16) | bottomScreenWidth; + externalRegs[Framebuffer1Stride] = 720; + externalRegs[Framebuffer1Config] = static_cast(PICA::ColorFmt::RGB8); + externalRegs[Framebuffer1Select] = 0; renderer->reset(); } diff --git a/src/core/PICA/regs.cpp b/src/core/PICA/regs.cpp index 04261526..baaa2256 100644 --- a/src/core/PICA/regs.cpp +++ b/src/core/PICA/regs.cpp @@ -35,7 +35,7 @@ u32 GPU::readExternalReg(u32 index) { return -1; } - return external_regs[index]; + return externalRegs[index]; } void GPU::writeExternalReg(u32 index, u32 value) { @@ -46,7 +46,7 @@ void GPU::writeExternalReg(u32 index, u32 value) { return; } - external_regs[index] = value; + externalRegs[index] = value; } u32 GPU::readInternalReg(u32 index) { diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index d59cfa17..4aa4fcc7 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -461,8 +461,8 @@ void RendererGL::display() { OpenGL::disableClipPlane(1); using namespace PICA::ExternalRegs; - const u32 topScreenAddr = gpu.readExternalReg(Framebuffer0AFirstAddr); - const u32 bottomScreenAddr = gpu.readExternalReg(Framebuffer1AFirstAddr); + const u32 topScreenAddr = externalRegs[Framebuffer0AFirstAddr]; + const u32 bottomScreenAddr = externalRegs[Framebuffer1AFirstAddr]; auto topScreen = colourBufferCache.findFromAddress(topScreenAddr); auto bottomScreen = colourBufferCache.findFromAddress(bottomScreenAddr); diff --git a/src/core/renderer_null/renderer_null.cpp b/src/core/renderer_null/renderer_null.cpp index 44a44aa5..546d8a81 100644 --- a/src/core/renderer_null/renderer_null.cpp +++ b/src/core/renderer_null/renderer_null.cpp @@ -1,6 +1,7 @@ #include "renderer_null/renderer_null.hpp" -RendererNull::RendererNull(GPU& gpu, const std::array& internalRegs) : Renderer(gpu, internalRegs) {} +RendererNull::RendererNull(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs) + : Renderer(gpu, internalRegs, externalRegs) {} RendererNull::~RendererNull() {} void RendererNull::reset() {} diff --git a/src/core/renderer_sw/renderer_sw.cpp b/src/core/renderer_sw/renderer_sw.cpp index 9c15d6f8..1ea452ae 100644 --- a/src/core/renderer_sw/renderer_sw.cpp +++ b/src/core/renderer_sw/renderer_sw.cpp @@ -1,6 +1,7 @@ #include "renderer_sw/renderer_sw.hpp" -RendererSw::RendererSw(GPU& gpu, const std::array& internalRegs) : Renderer(gpu, internalRegs) {} +RendererSw::RendererSw(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs) + : Renderer(gpu, internalRegs, externalRegs) {} RendererSw::~RendererSw() {} void RendererSw::reset() { printf("RendererSW: Unimplemented reset call\n"); } diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index e13a1597..23048f51 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -200,7 +200,8 @@ vk::Result RendererVK::recreateSwapchain(vk::SurfaceKHR surface, vk::Extent2D sw return vk::Result::eSuccess; } -RendererVK::RendererVK(GPU& gpu, const std::array& internalRegs) : Renderer(gpu, internalRegs) {} +RendererVK::RendererVK(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs) + : Renderer(gpu, internalRegs, externalRegs) {} RendererVK::~RendererVK() {} diff --git a/src/renderer.cpp b/src/renderer.cpp index 22d0accb..76c3e7a0 100644 --- a/src/renderer.cpp +++ b/src/renderer.cpp @@ -3,7 +3,8 @@ #include #include -Renderer::Renderer(GPU& gpu, const std::array& internalRegs) : gpu(gpu), regs(internalRegs) {} +Renderer::Renderer(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs) + : gpu(gpu), regs(internalRegs), externalRegs(externalRegs) {} Renderer::~Renderer() {} std::optional Renderer::typeFromString(std::string inString) { From 952e2a06f94b2557f3df2509749af8600d9845c3 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Fri, 4 Aug 2023 14:16:56 +0300 Subject: [PATCH 06/23] Move opengl.hpp to third_party --- CMakeLists.txt | 4 +- .../opengl}/opengl.hpp | 80 ++++++++++++++++++- 2 files changed, 82 insertions(+), 2 deletions(-) rename {include/renderer_gl => third_party/opengl}/opengl.hpp (90%) diff --git a/CMakeLists.txt b/CMakeLists.txt index c5725ff0..b6ac1143 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -38,6 +38,7 @@ include_directories(third_party/result/include) include_directories(third_party/xxhash/include) include_directories(third_party/httplib) include_directories(third_party/stb) +include_directories(third_party/opengl) add_compile_definitions(NOMINMAX) # Make windows.h not define min/max macros because third-party deps don't like it add_compile_definitions(WIN32_LEAN_AND_MEAN) # Make windows.h not include literally everything @@ -183,7 +184,8 @@ set(RENDERER_GL_SOURCE_FILES "") # Empty by default unless we are compiling with set(RENDERER_VK_SOURCE_FILES "") # Empty by default unless we are compiling with the VK renderer if(ENABLE_OPENGL) - set(RENDERER_GL_INCLUDE_FILES include/renderer_gl/opengl.hpp + # This may look weird but opengl.hpp is our header even if it's in the third_party folder + set(RENDERER_GL_INCLUDE_FILES third_party/opengl/opengl.hpp include/renderer_gl/renderer_gl.hpp include/renderer_gl/textures.hpp include/renderer_gl/surfaces.hpp include/renderer_gl/surface_cache.hpp include/renderer_gl/gl_state.hpp diff --git a/include/renderer_gl/opengl.hpp b/third_party/opengl/opengl.hpp similarity index 90% rename from include/renderer_gl/opengl.hpp rename to third_party/opengl/opengl.hpp index a2afab40..f368f573 100644 --- a/include/renderer_gl/opengl.hpp +++ b/third_party/opengl/opengl.hpp @@ -615,4 +615,82 @@ namespace OpenGL { glBlendFuncSeparate(fac1, fac2, fac3, fac4); } -} // end namespace OpenGL + // Abstraction for GLSL vectors + template + class Vector { + // A GLSL vector can only have 2, 3 or 4 elements + static_assert(size == 2 || size == 3 || size == 4); + T m_storage[size]; + + public: + T& r() { return m_storage[0]; } + T& g() { return m_storage[1]; } + T& b() { + static_assert(size >= 3, "Out of bounds OpenGL::Vector access"); + return m_storage[2]; + } + T& a() { + static_assert(size >= 4, "Out of bounds OpenGL::Vector access"); + return m_storage[3]; + } + + T& x() { return r(); } + T& y() { return g(); } + T& z() { return b(); } + T& w() { return a(); } + T& operator[](size_t index) { return m_storage[index]; } + const T& operator[](size_t index) const { return m_storage[index]; } + + T& u() { return r(); } + T& v() { return g(); } + + T& s() { return r(); } + T& t() { return g(); } + T& p() { return b(); } + T& q() { return a(); } + + Vector(std::array list) { std::copy(list.begin(), list.end(), &m_storage[0]); } + + Vector() {} + }; + + using vec2 = Vector; + using vec3 = Vector; + using vec4 = Vector; + + using dvec2 = Vector; + using dvec3 = Vector; + using dvec4 = Vector; + + using ivec2 = Vector; + using ivec3 = Vector; + using ivec4 = Vector; + + using uvec2 = Vector; + using uvec3 = Vector; + using uvec4 = Vector; + + // A 2D rectangle, meant to be used for stuff like scissor rects or viewport rects + // We're never supporting 3D rectangles, because rectangles were never meant to be 3D in the first place + // x, y: Coords of the top left vertex + // width, height: Dimensions of the rectangle. Initialized to 0 if not specified. + template + struct Rectangle { + T x, y, width, height; + + std::pair topLeft() const { return std::make_pair(x, y); } + std::pair topRight() const { return std::make_pair(x + width, y); } + std::pair bottomLeft() const { return std::make_pair(x, y + height); } + std::pair bottomRight() const { return std::make_pair(x + width, y + height); } + + Rectangle() : x(0), y(0), width(0), height(0) {} + Rectangle(T x, T y, T width, T height) : x(x), y(y), width(width), height(height) {} + + bool isEmpty() const { return width == 0 && height == 0; } + + void setEmpty() { x = y = width = height = 0; } + }; + + using Rect = Rectangle; + +} // end namespace OpenGL \ No newline at end of file From 73a18e3609e29ff02b16c6d421d0ceb398c0a59c Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Fri, 4 Aug 2023 16:14:41 +0300 Subject: [PATCH 07/23] Switch out math_util.hpp --- CMakeLists.txt | 2 +- include/math_util.hpp | 123 +++++++++++++-------------- include/renderer_gl/surfaces.hpp | 6 +- include/renderer_gl/textures.hpp | 2 +- src/core/renderer_gl/renderer_gl.cpp | 16 ++-- 5 files changed, 72 insertions(+), 77 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b6ac1143..49225abf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -158,7 +158,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/crypto/aes_engine.hpp include/metaprogramming.hpp include/PICA/pica_vertex.hpp include/config.hpp include/services/ir_user.hpp include/http_server.hpp include/cheats.hpp include/action_replay.hpp include/renderer_sw/renderer_sw.hpp include/compiler_builtins.hpp - include/fs/romfs.hpp include/fs/ivfc.hpp + include/fs/romfs.hpp include/fs/ivfc.hpp include/math_util.hpp ) set(THIRD_PARTY_SOURCE_FILES third_party/imgui/imgui.cpp diff --git a/include/math_util.hpp b/include/math_util.hpp index f2b41f41..fe895643 100644 --- a/include/math_util.hpp +++ b/include/math_util.hpp @@ -1,80 +1,73 @@ +// Copyright 2013 Dolphin Emulator Project / 2014 Citra Emulator Project / 2023 Panda3DS Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + #pragma once -#include + +#include +#include namespace Math { - // Abstraction for GLSL vectors - template - class Vector { - // A GLSL vector can only have 2, 3 or 4 elements - static_assert(size == 2 || size == 3 || size == 4); - T m_storage[size]; - public: - T& r() { return m_storage[0]; } - T& g() { return m_storage[1]; } - T& b() { - static_assert(size >= 3, "Out of bounds OpenGL::Vector access"); - return m_storage[2]; - } - T& a() { - static_assert(size >= 4, "Out of bounds OpenGL::Vector access"); - return m_storage[3]; - } +template +struct Rectangle { + T left{}; + T top{}; + T right{}; + T bottom{}; - T& x() { return r(); } - T& y() { return g(); } - T& z() { return b(); } - T& w() { return a(); } - T& operator[](size_t index) { return m_storage[index]; } - const T& operator[](size_t index) const { return m_storage[index]; } + constexpr Rectangle() = default; - T& u() { return r(); } - T& v() { return g(); } + constexpr Rectangle(T left, T top, T right, T bottom) + : left(left), top(top), right(right), bottom(bottom) {} - T& s() { return r(); } - T& t() { return g(); } - T& p() { return b(); } - T& q() { return a(); } + [[nodiscard]] constexpr bool operator==(const Rectangle& rhs) const { + return (left == rhs.left) && (top == rhs.top) && (right == rhs.right) && + (bottom == rhs.bottom); + } - Vector(std::array list) { std::copy(list.begin(), list.end(), &m_storage[0]); } + [[nodiscard]] constexpr bool operator!=(const Rectangle& rhs) const { + return !operator==(rhs); + } - Vector() {} - }; + [[nodiscard]] constexpr Rectangle operator*(const T value) const { + return Rectangle{left * value, top * value, right * value, bottom * value}; + } + + [[nodiscard]] constexpr Rectangle operator/(const T value) const { + return Rectangle{left / value, top / value, right / value, bottom / value}; + } - using vec2 = Vector; - using vec3 = Vector; - using vec4 = Vector; + [[nodiscard]] T getWidth() const { + return std::abs(static_cast>(right - left)); + } + + [[nodiscard]] T getHeight() const { + return std::abs(static_cast>(bottom - top)); + } - using dvec2 = Vector; - using dvec3 = Vector; - using dvec4 = Vector; + [[nodiscard]] T getArea() const { + return getWidth() * getHeight(); + } + + [[nodiscard]] Rectangle translateX(const T x) const { + return Rectangle{left + x, top, right + x, bottom}; + } + + [[nodiscard]] Rectangle translateY(const T y) const { + return Rectangle{left, top + y, right, bottom + y}; + } - using ivec2 = Vector; - using ivec3 = Vector; - using ivec4 = Vector; + [[nodiscard]] Rectangle scale(const float s) const { + return Rectangle{left, top, static_cast(left + getWidth() * s), + static_cast(top + getHeight() * s)}; + } +}; - using uvec2 = Vector; - using uvec3 = Vector; - using uvec4 = Vector; +template +Rectangle(T, T, T, T) -> Rectangle; - // A 2D rectangle, meant to be used for stuff like scissor rects or viewport rects - // We're never supporting 3D rectangles, because rectangles were never meant to be 3D in the first place - template - struct Rectangle { - Vector start; - Vector end; +template +using Rect = Rectangle; - Rectangle() : start({0}), end({0}) {} - Rectangle(T x0, T y0, T x1, T y1) : start({x0, y0}), end({x1, y1}) {} - - T getWidth() const { - return std::abs(end.x() - start.x()); - } - - T getHeight() const { - return std::abs(end.y() - start.y()); - } - }; - - using Rect = Rectangle; -} +} // end namespace Math \ No newline at end of file diff --git a/include/renderer_gl/surfaces.hpp b/include/renderer_gl/surfaces.hpp index 606e60d4..3cfb5ca4 100644 --- a/include/renderer_gl/surfaces.hpp +++ b/include/renderer_gl/surfaces.hpp @@ -11,7 +11,7 @@ using Interval = boost::icl::right_open_interval; struct ColourBuffer { u32 location; PICA::ColorFmt format; - Math::uvec2 size; + OpenGL::uvec2 size; bool valid; // Range of VRAM taken up by buffer @@ -91,7 +91,7 @@ struct ColourBuffer { } } - Math::Rect getSubRect(u32 inputAddress, u32 width, u32 height) { + Math::Rect getSubRect(u32 inputAddress, u32 width, u32 height) { // PICA textures have top-left origin while OpenGL has bottom-left origin. // Flip the rectangle on the x axis to account for this. const u32 startOffset = (inputAddress - location) / sizePerPixel(format); @@ -113,7 +113,7 @@ struct ColourBuffer { struct DepthBuffer { u32 location; PICA::DepthFmt format; - Math::uvec2 size; // Implicitly set to the size of the framebuffer + OpenGL::uvec2 size; // Implicitly set to the size of the framebuffer bool valid; // Range of VRAM taken up by buffer diff --git a/include/renderer_gl/textures.hpp b/include/renderer_gl/textures.hpp index c8836b5e..4c6ca2dd 100644 --- a/include/renderer_gl/textures.hpp +++ b/include/renderer_gl/textures.hpp @@ -14,7 +14,7 @@ struct Texture { u32 location; u32 config; // Magnification/minification filter, wrapping configs, etc PICA::TextureFmt format; - Math::uvec2 size; + OpenGL::uvec2 size; bool valid; // Range of VRAM taken up by buffer diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 4aa4fcc7..ba020bf5 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -7,6 +7,7 @@ #include "PICA/float_types.hpp" #include "PICA/gpu.hpp" #include "PICA/regs.hpp" +#include "math_util.hpp" CMRC_DECLARE(RendererGL); @@ -603,7 +604,7 @@ void RendererGL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u } auto srcFramebuffer = getColourBuffer(inputAddr, inputFormat, inputWidth, inputHeight); - Math::Rect srcRect = srcFramebuffer.getSubRect(inputAddr, outputWidth, outputHeight); + Math::Rect srcRect = srcFramebuffer.getSubRect(inputAddr, outputWidth, outputHeight); // Apply scaling for the destination rectangle. if (scaling == PICA::Scaling::X || scaling == PICA::Scaling::XY) { @@ -614,15 +615,16 @@ void RendererGL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u outputHeight >>= 1; } - auto dstFramebuffer = getColourBuffer(outputAddr, outputFormat, outputWidth, outputHeight); - Math::Rect dstRect = dstFramebuffer.getSubRect(outputAddr, outputWidth, outputHeight); + auto destFramebuffer = getColourBuffer(outputAddr, outputFormat, outputWidth, outputHeight); + Math::Rect destRect = destFramebuffer.getSubRect(outputAddr, outputWidth, outputHeight); // Blit the framebuffers srcFramebuffer.fbo.bind(OpenGL::ReadFramebuffer); - dstFramebuffer.fbo.bind(OpenGL::DrawFramebuffer); - glBlitFramebuffer(srcRect.start.x(), srcRect.start.y(), srcRect.end.x(), srcRect.end.y(), - dstRect.start.x(), dstRect.start.y(), dstRect.end.x(), dstRect.end.y(), - GL_COLOR_BUFFER_BIT, GL_LINEAR); + destFramebuffer.fbo.bind(OpenGL::DrawFramebuffer); + glBlitFramebuffer( + srcRect.left, srcRect.top, srcRect.right, srcRect.bottom, destRect.left, destRect.top, destRect.right, destRect.bottom, GL_COLOR_BUFFER_BIT, + GL_LINEAR + ); } ColourBuffer RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height) { From 0b4497302be978d0733baa2c9d41f01079276dc3 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Fri, 4 Aug 2023 16:36:50 +0300 Subject: [PATCH 08/23] [GSP::GPU] Proper type assertions and visibility --- include/renderer_gl/surfaces.hpp | 2 +- include/services/gsp_gpu.hpp | 38 +++++++++++++++++--------------- src/core/services/gsp_gpu.cpp | 2 +- 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/include/renderer_gl/surfaces.hpp b/include/renderer_gl/surfaces.hpp index 3cfb5ca4..62a3b61f 100644 --- a/include/renderer_gl/surfaces.hpp +++ b/include/renderer_gl/surfaces.hpp @@ -97,7 +97,7 @@ struct ColourBuffer { const u32 startOffset = (inputAddress - location) / sizePerPixel(format); const u32 x0 = (startOffset % (size.x() * 8)) / 8; const u32 y0 = (startOffset / (size.x() * 8)) * 8; - return Math::Rect{x0, size.y() - y0, x0 + width, size.y() - height - y0}; + return Math::Rect{x0, size.y() - y0, x0 + width, size.y() - height - y0}; } bool matches(ColourBuffer& other) { diff --git a/include/services/gsp_gpu.hpp b/include/services/gsp_gpu.hpp index 76793bbc..c9facffb 100644 --- a/include/services/gsp_gpu.hpp +++ b/include/services/gsp_gpu.hpp @@ -18,24 +18,6 @@ enum class GPUInterrupt : u8 { DMA = 6 }; -struct FramebufferInfo { - u32 activeFb; - u32 leftFramebufferVaddr; - u32 rightFramebufferVaddr; - u32 stride; - u32 format; - u32 displayFb; - u32 attribute; -}; - -struct FrameBufferUpdate { - u8 index; - u8 dirtyFlag; - u16 pad0; - std::array framebufferInfo; - u32 pad1; -}; - // More circular dependencies class Kernel; @@ -58,6 +40,26 @@ class GPUService { MAKE_LOG_FUNCTION(log, gspGPULogger) void processCommandBuffer(); + struct FramebufferInfo { + u32 activeFb; + u32 leftFramebufferVaddr; + u32 rightFramebufferVaddr; + u32 stride; + u32 format; + u32 displayFb; + u32 attribute; + }; + static_assert(sizeof(FramebufferInfo) == 28, "GSP::GPU::FramebufferInfo has the wrong size"); + + struct FramebufferUpdate { + u8 index; + u8 dirtyFlag; + u16 pad0; + std::array framebufferInfo; + u32 pad1; + }; + static_assert(sizeof(FramebufferUpdate) == 64, "GSP::GPU::FramebufferUpdate has the wrong size"); + // Service commands void acquireRight(u32 messagePointer); void flushDataCache(u32 messagePointer); diff --git a/src/core/services/gsp_gpu.cpp b/src/core/services/gsp_gpu.cpp index 31b9050f..d9c31ed7 100644 --- a/src/core/services/gsp_gpu.cpp +++ b/src/core/services/gsp_gpu.cpp @@ -128,7 +128,7 @@ void GPUService::requestInterrupt(GPUInterrupt type) { if (type == GPUInterrupt::VBlank0 || type == GPUInterrupt::VBlank1) { int screen = static_cast(type) - static_cast(GPUInterrupt::VBlank0); // 0 for top screen, 1 for bottom // TODO: Offset depends on GSP thread being triggered - FrameBufferUpdate* update = reinterpret_cast(&sharedMem[0x200 + screen * sizeof(FrameBufferUpdate)]); + FramebufferUpdate* update = reinterpret_cast(&sharedMem[0x200 + screen * sizeof(FramebufferUpdate)]); if (update->dirtyFlag & 1) { setBufferSwapImpl(screen, update->framebufferInfo[update->index]); From 74c3c3780588791217f889405f316d7b32d459a0 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 6 Aug 2023 01:12:49 +0300 Subject: [PATCH 09/23] [GSP::GPU] Correct setBufferSwap (also add a panic until a test case appears) --- src/core/services/gsp_gpu.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/core/services/gsp_gpu.cpp b/src/core/services/gsp_gpu.cpp index d9c31ed7..5f11a5e1 100644 --- a/src/core/services/gsp_gpu.cpp +++ b/src/core/services/gsp_gpu.cpp @@ -263,14 +263,20 @@ void GPUService::setAxiConfigQoSMode(u32 messagePointer) { void GPUService::setBufferSwap(u32 messagePointer) { FramebufferInfo info{}; - const u32 screenId = mem.read32(messagePointer + 4); // Selects either PDC0 or PDC1 + const u32 screenId = mem.read32(messagePointer + 4); // Selects either PDC0 or PDC1 info.activeFb = mem.read32(messagePointer + 8); info.leftFramebufferVaddr = mem.read32(messagePointer + 12); info.rightFramebufferVaddr = mem.read32(messagePointer + 16); info.stride = mem.read32(messagePointer + 20); info.format = mem.read32(messagePointer + 24); - info.displayFb = mem.read32(messagePointer + 28); // Selects either framebuffer A or B + info.displayFb = mem.read32(messagePointer + 28); // Selects either framebuffer A or B + + log("GSP::GPU::SetBufferSwap\n"); + Helpers::panic("Untested GSP::GPU::SetBufferSwap call"); + setBufferSwapImpl(screenId, info); + mem.write32(messagePointer, IPC::responseHeader(0x05, 1, 0)); + mem.write32(messagePointer + 4, Result::Success); } // Seems to also be completely undocumented From 297afd20d740ca10f15acd51149dc34d47775292 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 6 Aug 2023 01:33:24 +0300 Subject: [PATCH 10/23] Parens --- src/core/services/gsp_gpu.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/services/gsp_gpu.cpp b/src/core/services/gsp_gpu.cpp index 5f11a5e1..abea9f03 100644 --- a/src/core/services/gsp_gpu.cpp +++ b/src/core/services/gsp_gpu.cpp @@ -220,7 +220,7 @@ void GPUService::flushDataCache(u32 messagePointer) { u32 address = mem.read32(messagePointer + 4); u32 size = mem.read32(messagePointer + 8); u32 processHandle = handle = mem.read32(messagePointer + 16); - log("GSP::GPU::FlushDataCache(address = %08X, size = %X, process = %X\n", address, size, processHandle); + log("GSP::GPU::FlushDataCache(address = %08X, size = %X, process = %X)\n", address, size, processHandle); mem.write32(messagePointer, IPC::responseHeader(0x8, 1, 0)); mem.write32(messagePointer + 4, Result::Success); @@ -230,7 +230,7 @@ void GPUService::storeDataCache(u32 messagePointer) { u32 address = mem.read32(messagePointer + 4); u32 size = mem.read32(messagePointer + 8); u32 processHandle = handle = mem.read32(messagePointer + 16); - log("GSP::GPU::StoreDataCache(address = %08X, size = %X, process = %X\n", address, size, processHandle); + log("GSP::GPU::StoreDataCache(address = %08X, size = %X, process = %X)\n", address, size, processHandle); mem.write32(messagePointer, IPC::responseHeader(0x1F, 1, 0)); mem.write32(messagePointer + 4, Result::Success); From 82d9511993bfa8735c2d121cfa82ddd01a92a796 Mon Sep 17 00:00:00 2001 From: GPUCode Date: Sun, 6 Aug 2023 14:25:32 +0300 Subject: [PATCH 11/23] gpu: Implement basic texcopy * Improves rendering in FE:A but the screens will still not show because it requires surface validation --- include/PICA/gpu.hpp | 4 ++ include/helpers.hpp | 12 ++++ include/renderer.hpp | 1 + include/renderer_gl/renderer_gl.hpp | 3 +- include/renderer_gl/surface_cache.hpp | 10 +++ include/renderer_gl/surfaces.hpp | 3 +- include/renderer_null/renderer_null.hpp | 1 + include/renderer_sw/renderer_sw.hpp | 3 +- include/renderer_vk/renderer_vk.hpp | 3 +- src/core/renderer_gl/renderer_gl.cpp | 91 +++++++++++++++++++++--- src/core/renderer_gl/textures.cpp | 3 + src/core/renderer_null/renderer_null.cpp | 1 + src/core/renderer_sw/renderer_sw.cpp | 6 +- src/core/renderer_vk/renderer_vk.cpp | 4 +- src/core/services/gsp_gpu.cpp | 10 ++- 15 files changed, 139 insertions(+), 16 deletions(-) diff --git a/include/PICA/gpu.hpp b/include/PICA/gpu.hpp index 955fb0ae..fa6a59bd 100644 --- a/include/PICA/gpu.hpp +++ b/include/PICA/gpu.hpp @@ -117,6 +117,10 @@ class GPU { renderer->displayTransfer(inputAddr, outputAddr, inputSize, outputSize, flags); } + void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) { + renderer->textureCopy(inputAddr, outputAddr, totalBytes, inputSize, outputSize, flags); + } + // Read a value of type T from physical address paddr // This is necessary because vertex attribute fetching uses physical addresses template diff --git a/include/helpers.hpp b/include/helpers.hpp index d07c2bb6..f13fc720 100644 --- a/include/helpers.hpp +++ b/include/helpers.hpp @@ -7,6 +7,7 @@ #include #include #include +#include #include "termcolor.hpp" @@ -30,6 +31,17 @@ using s32 = std::int32_t; using s64 = std::int64_t; namespace Helpers { + template + std::string format(const std::string& fmt, Args&&... args) { + const int size = std::snprintf(nullptr, 0, fmt.c_str(), args...) + 1; + if (size <= 0) { + return {}; + } + const auto buf = std::make_unique(size); + std::snprintf(buf.get(), size, fmt.c_str(), args ...); + return std::string(buf.get(), buf.get() + size - 1); + } + // Unconditional panic, unlike panicDev which does not panic on user builds template [[noreturn]] static void panic(const char* fmt, Args&&... args) { diff --git a/include/renderer.hpp b/include/renderer.hpp index fff25ab5..c189da7f 100644 --- a/include/renderer.hpp +++ b/include/renderer.hpp @@ -49,6 +49,7 @@ class Renderer { virtual void initGraphicsContext(SDL_Window* window) = 0; // Initialize graphics context virtual void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) = 0; // Clear a GPU buffer in VRAM virtual void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) = 0; // Perform display transfer + virtual void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) = 0; virtual void drawVertices(PICA::PrimType primType, std::span vertices) = 0; // Draw the given vertices virtual void screenshot(const std::string& name) = 0; diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 52d97524..ec0906b1 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -77,9 +77,10 @@ class RendererGL final : public Renderer { void initGraphicsContext(SDL_Window* window) override; // Initialize graphics context void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override; // Clear a GPU buffer in VRAM void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override; // Perform display transfer + void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override; void drawVertices(PICA::PrimType primType, std::span vertices) override; // Draw the given vertices - ColourBuffer getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height); + std::optional getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true); // Take a screenshot of the screen and store it in a file void screenshot(const std::string& name) override; diff --git a/include/renderer_gl/surface_cache.hpp b/include/renderer_gl/surface_cache.hpp index b2e5cc29..5323741f 100644 --- a/include/renderer_gl/surface_cache.hpp +++ b/include/renderer_gl/surface_cache.hpp @@ -76,6 +76,16 @@ public: size++; + // Find an existing surface we completely invalidate and overwrite it with the new surface + for (auto& e : buffer) { + if (e.valid && e.range.lower() >= surface.range.lower() && e.range.upper() <= surface.range.upper()) { + e.free(); + e = surface; + e.allocate(); + return e; + } + } + // Find an invalid entry in the cache and overwrite it with the new surface for (auto& e : buffer) { if (!e.valid) { diff --git a/include/renderer_gl/surfaces.hpp b/include/renderer_gl/surfaces.hpp index 62a3b61f..88355a8c 100644 --- a/include/renderer_gl/surfaces.hpp +++ b/include/renderer_gl/surfaces.hpp @@ -65,7 +65,8 @@ struct ColourBuffer { texture.setMagFilter(OpenGL::Linear); glBindTexture(GL_TEXTURE_2D, prevTexture); - //Helpers::panic("Creating FBO: %d, %d\n", size.x(), size.y()); + const auto name = Helpers::format("Surface %dx%d %s from 0x%08X", size.x(), size.y(), PICA::textureFormatToString(format), location); + OpenGL::setObjectLabel(GL_TEXTURE, texture.handle(), name.c_str()); fbo.createWithDrawTexture(texture); fbo.bind(OpenGL::DrawAndReadFramebuffer); diff --git a/include/renderer_null/renderer_null.hpp b/include/renderer_null/renderer_null.hpp index 22293ba6..231ed41d 100644 --- a/include/renderer_null/renderer_null.hpp +++ b/include/renderer_null/renderer_null.hpp @@ -12,6 +12,7 @@ class RendererNull final : public Renderer { void initGraphicsContext(SDL_Window* window) override; void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override; void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override; + void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override; void drawVertices(PICA::PrimType primType, std::span vertices) override; void screenshot(const std::string& name) override; }; diff --git a/include/renderer_sw/renderer_sw.hpp b/include/renderer_sw/renderer_sw.hpp index 171fc084..9e68b00f 100644 --- a/include/renderer_sw/renderer_sw.hpp +++ b/include/renderer_sw/renderer_sw.hpp @@ -12,6 +12,7 @@ class RendererSw final : public Renderer { void initGraphicsContext(SDL_Window* window) override; void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override; void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override; + void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override; void drawVertices(PICA::PrimType primType, std::span vertices) override; void screenshot(const std::string& name) override; -}; \ No newline at end of file +}; diff --git a/include/renderer_vk/renderer_vk.hpp b/include/renderer_vk/renderer_vk.hpp index 4b6e65b0..59d8cdae 100644 --- a/include/renderer_vk/renderer_vk.hpp +++ b/include/renderer_vk/renderer_vk.hpp @@ -52,6 +52,7 @@ class RendererVK final : public Renderer { void initGraphicsContext(SDL_Window* window) override; void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override; void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override; + void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override; void drawVertices(PICA::PrimType primType, std::span vertices) override; void screenshot(const std::string& name) override; -}; \ No newline at end of file +}; diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index ba020bf5..90ca1af6 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -599,12 +599,11 @@ void RendererGL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u u32 outputWidth = outputSize & 0xffff; u32 outputHeight = outputSize >> 16; - if (inputWidth != outputWidth) { - // Helpers::warn("Strided display transfer is not handled correctly!\n"); - } + OpenGL::DebugScope scope("DisplayTransfer inputAddr 0x%08X outputAddr 0x%08X inputWidth %d outputWidth %d inputWidth %d outputHeight %d", + inputAddr, outputAddr, inputWidth, outputWidth, inputHeight, outputHeight); - auto srcFramebuffer = getColourBuffer(inputAddr, inputFormat, inputWidth, inputHeight); - Math::Rect srcRect = srcFramebuffer.getSubRect(inputAddr, outputWidth, outputHeight); + auto srcFramebuffer = getColourBuffer(inputAddr, inputFormat, inputWidth, outputHeight); + Math::Rect srcRect = srcFramebuffer->getSubRect(inputAddr, outputWidth, outputHeight); // Apply scaling for the destination rectangle. if (scaling == PICA::Scaling::X || scaling == PICA::Scaling::XY) { @@ -616,18 +615,88 @@ void RendererGL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u } auto destFramebuffer = getColourBuffer(outputAddr, outputFormat, outputWidth, outputHeight); - Math::Rect destRect = destFramebuffer.getSubRect(outputAddr, outputWidth, outputHeight); + Math::Rect destRect = destFramebuffer->getSubRect(outputAddr, outputWidth, outputHeight); + + if (inputWidth != outputWidth) { + // Helpers::warn("Strided display transfer is not handled correctly!\n"); + } // Blit the framebuffers - srcFramebuffer.fbo.bind(OpenGL::ReadFramebuffer); - destFramebuffer.fbo.bind(OpenGL::DrawFramebuffer); + srcFramebuffer->fbo.bind(OpenGL::ReadFramebuffer); + destFramebuffer->fbo.bind(OpenGL::DrawFramebuffer); glBlitFramebuffer( srcRect.left, srcRect.top, srcRect.right, srcRect.bottom, destRect.left, destRect.top, destRect.right, destRect.bottom, GL_COLOR_BUFFER_BIT, GL_LINEAR ); } -ColourBuffer RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height) { +void RendererGL::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) { + // Texture copy size is aligned to 16 byte units + const u32 copySize = totalBytes & ~0xf; + if (copySize == 0) { + printf("TextureCopy total bytes less than 16!\n"); + return; + } + + // The width and gap are provided in 16-byte units. + const u32 inputWidth = (inputSize & 0xffff) << 4; + const u32 inputGap = (inputSize >> 16) << 4; + const u32 outputWidth = (outputSize & 0xffff) << 4; + const u32 outputGap = (outputSize >> 16) << 4; + + OpenGL::DebugScope scope("TextureCopy inputAddr 0x%08X outputAddr 0x%08X totalBytes %d inputWidth %d inputGap %d outputWidth %d outputGap %d", + inputAddr, outputAddr, totalBytes, inputWidth, inputGap, outputWidth, outputGap); + + if (inputGap != 0 || outputGap != 0) { + Helpers::warn("Strided texture copy\n"); + } + if (inputWidth != outputWidth) { + Helpers::warn("Input width does not match output width, cannot accelerate texture copy!\n"); + return; + } + + // Texture copy is a raw data copy in PICA, which means no format or tiling information is provided to the engine. + // Depending if the target surface is linear or tiled, games set inputWidth to either the width of the texture or + // the width multiplied by eight (because tiles are stored linearly in memory). + // To properly accelerate this we must examine each surface individually. For now we assume the most common case + // of tiled surface with RGBA8 format. If our assumption does not hold true, we abort the texture copy as inserting + // that surface is not correct. + + // We assume the source surface is tiled and RGBA8. inputWidth is in bytes so divide it + // by eight * sizePerPixel(RGBA8) to convert it to a useable width. + const u32 bpp = sizePerPixel(PICA::ColorFmt::RGBA8); + const u32 copyStride = (inputWidth + inputGap) / (8 * bpp); + const u32 copyWidth = inputWidth / (8 * bpp); + + // inputHeight/outputHeight are typically set to zero so they cannot be used to get the height of the copy region + // in contrast to display transfer. Compute height manually by dividing the copy size with the copy width. The result + // is the number of vertical tiles so multiply that by eight to get the actual copy height. + const u32 copyHeight = (copySize / inputWidth) * 8; + + // Find the source surface. + auto srcFramebuffer = getColourBuffer(inputAddr, PICA::ColorFmt::RGBA8, copyStride, copyHeight, false); + if (!srcFramebuffer) { + printf("TextureCopy failed to locate src framebuffer!\n"); + return; + } + + Math::Rect srcRect = srcFramebuffer->getSubRect(inputAddr, copyWidth, copyHeight); + + // Assume the destination surface has the same format. Unless the surfaces have the same block width, + // texture copy does not make sense. + auto destFramebuffer = getColourBuffer(outputAddr, srcFramebuffer->format, copyWidth, copyHeight); + Math::Rect destRect = destFramebuffer->getSubRect(outputAddr, copyWidth, copyHeight); + + // Blit the framebuffers + srcFramebuffer->fbo.bind(OpenGL::ReadFramebuffer); + destFramebuffer->fbo.bind(OpenGL::DrawFramebuffer); + glBlitFramebuffer( + srcRect.left, srcRect.top, srcRect.right, srcRect.bottom, destRect.left, destRect.top, destRect.right, destRect.bottom, GL_COLOR_BUFFER_BIT, + GL_LINEAR + ); +} + +std::optional RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound) { // Try to find an already existing buffer that contains the provided address // This is a more relaxed check compared to getColourFBO as display transfer/texcopy may refer to // subrect of a surface and in case of texcopy we don't know the format of the surface. @@ -636,6 +705,10 @@ ColourBuffer RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt format, u32 wi return buffer.value().get(); } + if (!createIfnotFound) { + return std::nullopt; + } + // Otherwise create and cache a new buffer. ColourBuffer sampleBuffer(addr, format, width, height); return colourBufferCache.add(sampleBuffer); diff --git a/src/core/renderer_gl/textures.cpp b/src/core/renderer_gl/textures.cpp index 9e303fd9..7a03c97d 100644 --- a/src/core/renderer_gl/textures.cpp +++ b/src/core/renderer_gl/textures.cpp @@ -9,6 +9,9 @@ void Texture::allocate() { texture.create(size.u(), size.v(), GL_RGBA8); texture.bind(); + const auto name = Helpers::format("Surface %dx%d %s from 0x%08X", size.x(), size.y(), PICA::textureFormatToString(format), location); + OpenGL::setObjectLabel(GL_TEXTURE, texture.handle(), name.c_str()); + setNewConfig(config); } diff --git a/src/core/renderer_null/renderer_null.cpp b/src/core/renderer_null/renderer_null.cpp index 546d8a81..b2ebd1d6 100644 --- a/src/core/renderer_null/renderer_null.cpp +++ b/src/core/renderer_null/renderer_null.cpp @@ -9,5 +9,6 @@ void RendererNull::display() {} void RendererNull::initGraphicsContext(SDL_Window* window) {} void RendererNull::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) {} void RendererNull::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) {} +void RendererNull::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) {} void RendererNull::drawVertices(PICA::PrimType primType, std::span vertices) {} void RendererNull::screenshot(const std::string& name) {} diff --git a/src/core/renderer_sw/renderer_sw.cpp b/src/core/renderer_sw/renderer_sw.cpp index 1ea452ae..b671c180 100644 --- a/src/core/renderer_sw/renderer_sw.cpp +++ b/src/core/renderer_sw/renderer_sw.cpp @@ -14,8 +14,12 @@ void RendererSw::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u printf("RendererSW: Unimplemented displayTransfer call\n"); } +void RendererSw::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) { + printf("RendererSW: Unimplemented textureCopy call\n"); +} + void RendererSw::drawVertices(PICA::PrimType primType, std::span vertices) { printf("RendererSW: Unimplemented drawVertices call\n"); } -void RendererSw::screenshot(const std::string& name) { printf("RendererSW: Unimplemented screenshot call\n"); } \ No newline at end of file +void RendererSw::screenshot(const std::string& name) { printf("RendererSW: Unimplemented screenshot call\n"); } diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index 23048f51..4ec70412 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -542,6 +542,8 @@ void RendererVK::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 co void RendererVK::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) {} +void RendererVK::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) {} + void RendererVK::drawVertices(PICA::PrimType primType, std::span vertices) {} -void RendererVK::screenshot(const std::string& name) {} \ No newline at end of file +void RendererVK::screenshot(const std::string& name) {} diff --git a/src/core/services/gsp_gpu.cpp b/src/core/services/gsp_gpu.cpp index abea9f03..25628e4b 100644 --- a/src/core/services/gsp_gpu.cpp +++ b/src/core/services/gsp_gpu.cpp @@ -441,7 +441,15 @@ void GPUService::processCommandList(u32* cmd) { // TODO: Emulate the transfer engine & its registers // Then this can be emulated by just writing the appropriate values there void GPUService::triggerTextureCopy(u32* cmd) { - Helpers::warn("GSP::GPU::TriggerTextureCopy (unimplemented)\n"); + const u32 inputAddr = VaddrToPaddr(cmd[1]); + const u32 outputAddr = VaddrToPaddr(cmd[2]); + const u32 totalBytes = cmd[3]; + const u32 inputSize = cmd[4]; + const u32 outputSize = cmd[5]; + const u32 flags = cmd[6]; + + log("GSP::GPU::TriggerTextureCopy (Stubbed)\n"); + gpu.textureCopy(inputAddr, outputAddr, totalBytes, inputSize, outputSize, flags); // This uses the transfer engine and thus needs to fire a PPF interrupt. // NSMB2 relies on this requestInterrupt(GPUInterrupt::PPF); From 9d7584960049513be99375917d439199bc98247f Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 6 Aug 2023 15:20:48 +0300 Subject: [PATCH 12/23] [GL] Lock Helpers::format calls behind GPU_DEBUG_INFO macro --- include/renderer_gl/surfaces.hpp | 2 ++ src/core/renderer_gl/textures.cpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/include/renderer_gl/surfaces.hpp b/include/renderer_gl/surfaces.hpp index 88355a8c..043c63d0 100644 --- a/include/renderer_gl/surfaces.hpp +++ b/include/renderer_gl/surfaces.hpp @@ -65,8 +65,10 @@ struct ColourBuffer { texture.setMagFilter(OpenGL::Linear); glBindTexture(GL_TEXTURE_2D, prevTexture); +#ifdef GPU_DEBUG_INFO const auto name = Helpers::format("Surface %dx%d %s from 0x%08X", size.x(), size.y(), PICA::textureFormatToString(format), location); OpenGL::setObjectLabel(GL_TEXTURE, texture.handle(), name.c_str()); +#endif fbo.createWithDrawTexture(texture); fbo.bind(OpenGL::DrawAndReadFramebuffer); diff --git a/src/core/renderer_gl/textures.cpp b/src/core/renderer_gl/textures.cpp index 7a03c97d..7f4c31bf 100644 --- a/src/core/renderer_gl/textures.cpp +++ b/src/core/renderer_gl/textures.cpp @@ -9,8 +9,10 @@ void Texture::allocate() { texture.create(size.u(), size.v(), GL_RGBA8); texture.bind(); +#ifdef GPU_DEBUG_INFO const auto name = Helpers::format("Surface %dx%d %s from 0x%08X", size.x(), size.y(), PICA::textureFormatToString(format), location); OpenGL::setObjectLabel(GL_TEXTURE, texture.handle(), name.c_str()); +#endif setNewConfig(config); } From 9e529d73f41a1f68baf9f4034d0d6b1af935d30e Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 7 Aug 2023 00:00:59 +0300 Subject: [PATCH 13/23] Partially revert depth/color buffer cache bump --- include/renderer_gl/renderer_gl.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index ec0906b1..dc3e69c2 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -44,8 +44,8 @@ class RendererGL final : public Renderer { float oldDepthOffset = 0.0; bool oldDepthmapEnable = false; - SurfaceCache depthBufferCache; - SurfaceCache colourBufferCache; + SurfaceCache depthBufferCache; + SurfaceCache colourBufferCache; SurfaceCache textureCache; // Dummy VAO/VBO for blitting the final output From ca3c2550f268067465bc4e4aa84940a2fe086d86 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Mon, 7 Aug 2023 00:19:16 +0300 Subject: [PATCH 14/23] Fix trampling clip state --- src/core/renderer_gl/renderer_gl.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 90ca1af6..cd4a988f 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -458,8 +458,8 @@ void RendererGL::display() { gl.useProgram(displayProgram); gl.bindVAO(dummyVAO); - OpenGL::disableClipPlane(0); - OpenGL::disableClipPlane(1); + gl.disableClipPlane(0); + gl.disableClipPlane(1); using namespace PICA::ExternalRegs; const u32 topScreenAddr = externalRegs[Framebuffer0AFirstAddr]; From 9417c75ca7234b4ba434606f6087fade38420a77 Mon Sep 17 00:00:00 2001 From: GPUCode Date: Wed, 9 Aug 2023 11:49:19 +0300 Subject: [PATCH 15/23] gsp: Fix setbufferswap framebuffer selection --- include/PICA/regs.hpp | 3 +-- src/core/renderer_gl/renderer_gl.cpp | 8 ++++++-- src/core/services/gsp_gpu.cpp | 8 ++++---- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index d67c2025..55b3e20b 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -191,8 +191,7 @@ namespace PICA { VramBankControl = 0xB, GPUBusy = 0xC, BacklightControl = 0xBC, - // TODO: Framebuffer regs - Framebuffer0Size = 0x2F, + Framebuffer0Size = 0x118, Framebuffer0AFirstAddr = 0x119, Framebuffer0ASecondAddr = 0x11A, Framebuffer0Config = 0x11B, diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index cd4a988f..e99e3196 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -462,13 +462,17 @@ void RendererGL::display() { gl.disableClipPlane(1); using namespace PICA::ExternalRegs; - const u32 topScreenAddr = externalRegs[Framebuffer0AFirstAddr]; - const u32 bottomScreenAddr = externalRegs[Framebuffer1AFirstAddr]; + const u32 topActiveFb = externalRegs[Framebuffer0Select] & 1; + const u32 topScreenAddr = externalRegs[topActiveFb == 0 ? Framebuffer0AFirstAddr : Framebuffer0ASecondAddr]; + const u32 bottomActiveFb = externalRegs[Framebuffer1Select] & 1; + const u32 bottomScreenAddr = externalRegs[bottomActiveFb == 0 ? Framebuffer1AFirstAddr : Framebuffer1ASecondAddr]; auto topScreen = colourBufferCache.findFromAddress(topScreenAddr); auto bottomScreen = colourBufferCache.findFromAddress(bottomScreenAddr); screenFramebuffer.bind(OpenGL::DrawFramebuffer); + OpenGL::clearColor(); + if (topScreen) { topScreen->get().texture.bind(); OpenGL::setViewport(0, 240, 400, 240); // Top screen viewport diff --git a/src/core/services/gsp_gpu.cpp b/src/core/services/gsp_gpu.cpp index 25628e4b..bfecfbf8 100644 --- a/src/core/services/gsp_gpu.cpp +++ b/src/core/services/gsp_gpu.cpp @@ -398,16 +398,16 @@ void GPUService::setBufferSwapImpl(u32 screenId, const FramebufferInfo& info) { constexpr static std::array fb_addresses = { Framebuffer0AFirstAddr, - Framebuffer0ASecondAddr, Framebuffer0BFirstAddr, - Framebuffer0BSecondAddr, Framebuffer1AFirstAddr, - Framebuffer1ASecondAddr, Framebuffer1BFirstAddr, + Framebuffer0ASecondAddr, + Framebuffer0BSecondAddr, + Framebuffer1ASecondAddr, Framebuffer1BSecondAddr, }; - const u32 fb_index = screenId * 4 + info.activeFb * 2; + const u32 fb_index = info.activeFb * 4 + screenId * 2; gpu.writeExternalReg(fb_addresses[fb_index], VaddrToPaddr(info.leftFramebufferVaddr)); gpu.writeExternalReg(fb_addresses[fb_index + 1], VaddrToPaddr(info.rightFramebufferVaddr)); From d6e5f658d640358038d20070cf6bb54b062438c8 Mon Sep 17 00:00:00 2001 From: GPUCode Date: Sat, 12 Aug 2023 03:19:06 +0300 Subject: [PATCH 16/23] renderer_gl: Proper viewport rendering * I've also corrected a mistake made in the displayTransfer/textureCopy rectangles. --- include/PICA/regs.hpp | 1 + src/core/renderer_gl/renderer_gl.cpp | 22 ++++++++++++++-------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index 55b3e20b..70cecf7b 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -22,6 +22,7 @@ namespace PICA { ShaderOutputCount = 0x4F, ShaderOutmap0 = 0x50, + ViewportXY = 0x68, DepthmapEnable = 0x6D, // Texture registers diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index e99e3196..693b98d3 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -369,8 +369,8 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v } setupBlending(); - OpenGL::Framebuffer poop = getColourFBO(); - poop.bind(OpenGL::DrawAndReadFramebuffer); + auto poop = getColourBuffer(colourBufferLoc, colourBufferFormat, fbSize[0], fbSize[1]); + poop->fbo.bind(OpenGL::DrawAndReadFramebuffer); const u32 depthControl = regs[PICA::InternalRegs::DepthAndColorMask]; const bool depthWrite = regs[PICA::InternalRegs::DepthBufferWrite]; @@ -413,10 +413,12 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v updateLightingLUT(); } - // TODO: Actually use this - GLsizei viewportWidth = GLsizei(f24::fromRaw(regs[PICA::InternalRegs::ViewportWidth] & 0xffffff).toFloat32() * 2.0f); - GLsizei viewportHeight = GLsizei(f24::fromRaw(regs[PICA::InternalRegs::ViewportHeight] & 0xffffff).toFloat32() * 2.0f); - OpenGL::setViewport(viewportWidth, viewportHeight); + const GLsizei viewportX = regs[PICA::InternalRegs::ViewportXY] & 0x3ff; + const GLsizei viewportY = (regs[PICA::InternalRegs::ViewportXY] >> 16) & 0x3ff; + const GLsizei viewportWidth = GLsizei(f24::fromRaw(regs[PICA::InternalRegs::ViewportWidth] & 0xffffff).toFloat32() * 2.0f); + const GLsizei viewportHeight = GLsizei(f24::fromRaw(regs[PICA::InternalRegs::ViewportHeight] & 0xffffff).toFloat32() * 2.0f); + const auto rect = poop->getSubRect(colourBufferLoc, fbSize[0], fbSize[1]); + OpenGL::setViewport(rect.left + viewportX, rect.bottom + viewportY, viewportWidth, viewportHeight); const u32 stencilConfig = regs[PICA::InternalRegs::StencilTest]; const bool stencilEnable = getBit<0>(stencilConfig); @@ -628,8 +630,10 @@ void RendererGL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u // Blit the framebuffers srcFramebuffer->fbo.bind(OpenGL::ReadFramebuffer); destFramebuffer->fbo.bind(OpenGL::DrawFramebuffer); + gl.disableScissor(); + glBlitFramebuffer( - srcRect.left, srcRect.top, srcRect.right, srcRect.bottom, destRect.left, destRect.top, destRect.right, destRect.bottom, GL_COLOR_BUFFER_BIT, + srcRect.left, srcRect.bottom, srcRect.right, srcRect.top, destRect.left, destRect.bottom, destRect.right, destRect.top, GL_COLOR_BUFFER_BIT, GL_LINEAR ); } @@ -694,8 +698,10 @@ void RendererGL::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 // Blit the framebuffers srcFramebuffer->fbo.bind(OpenGL::ReadFramebuffer); destFramebuffer->fbo.bind(OpenGL::DrawFramebuffer); + gl.disableScissor(); + glBlitFramebuffer( - srcRect.left, srcRect.top, srcRect.right, srcRect.bottom, destRect.left, destRect.top, destRect.right, destRect.bottom, GL_COLOR_BUFFER_BIT, + srcRect.left, srcRect.bottom, srcRect.right, srcRect.top, destRect.left, destRect.bottom, destRect.right, destRect.top, GL_COLOR_BUFFER_BIT, GL_LINEAR ); } From dc342675f51712c517631d95c1ad179c5f97fdec Mon Sep 17 00:00:00 2001 From: GPUCode Date: Sat, 12 Aug 2023 12:38:45 +0300 Subject: [PATCH 17/23] renderer_gl: Add display transfer flip --- src/core/renderer_gl/renderer_gl.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 693b98d3..8b2c31bb 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -600,17 +600,22 @@ void RendererGL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u const u32 inputHeight = inputSize >> 16; const auto inputFormat = ToColorFmt(Helpers::getBits<8, 3>(flags)); const auto outputFormat = ToColorFmt(Helpers::getBits<12, 3>(flags)); + const bool verticalFlip = flags & 1; const PICA::Scaling scaling = static_cast(Helpers::getBits<24, 2>(flags)); u32 outputWidth = outputSize & 0xffff; u32 outputHeight = outputSize >> 16; - OpenGL::DebugScope scope("DisplayTransfer inputAddr 0x%08X outputAddr 0x%08X inputWidth %d outputWidth %d inputWidth %d outputHeight %d", + OpenGL::DebugScope scope("DisplayTransfer inputAddr 0x%08X outputAddr 0x%08X inputWidth %d outputWidth %d inputHeight %d outputHeight %d", inputAddr, outputAddr, inputWidth, outputWidth, inputHeight, outputHeight); auto srcFramebuffer = getColourBuffer(inputAddr, inputFormat, inputWidth, outputHeight); Math::Rect srcRect = srcFramebuffer->getSubRect(inputAddr, outputWidth, outputHeight); + if (verticalFlip) { + std::swap(srcRect.bottom, srcRect.top); + } + // Apply scaling for the destination rectangle. if (scaling == PICA::Scaling::X || scaling == PICA::Scaling::XY) { outputWidth >>= 1; @@ -656,10 +661,10 @@ void RendererGL::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputAddr, outputAddr, totalBytes, inputWidth, inputGap, outputWidth, outputGap); if (inputGap != 0 || outputGap != 0) { - Helpers::warn("Strided texture copy\n"); + //Helpers::warn("Strided texture copy\n"); } if (inputWidth != outputWidth) { - Helpers::warn("Input width does not match output width, cannot accelerate texture copy!\n"); + Helpers::warn("Input width does not match output width, cannot accelerate texture copy!"); return; } From 4107a84c0d772804538a3cbc1763e9713a65cbbc Mon Sep 17 00:00:00 2001 From: GPUCode Date: Sat, 12 Aug 2023 15:23:50 +0300 Subject: [PATCH 18/23] renderer_gl: Avoid leaking clear color to display --- src/core/renderer_gl/renderer_gl.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 8b2c31bb..181696e5 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -473,6 +473,7 @@ void RendererGL::display() { auto bottomScreen = colourBufferCache.findFromAddress(bottomScreenAddr); screenFramebuffer.bind(OpenGL::DrawFramebuffer); + OpenGL::setClearColor(0.f); OpenGL::clearColor(); if (topScreen) { From 0f5c41cd69d31dcd0a172e1720bfe4daa3977dca Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sat, 12 Aug 2023 17:02:00 +0300 Subject: [PATCH 19/23] Add clear colour to GL state manager --- include/renderer_gl/gl_state.hpp | 16 +- include/renderer_gl/surfaces.hpp | 233 +++++++++++++-------------- src/core/renderer_gl/gl_state.cpp | 10 ++ src/core/renderer_gl/renderer_gl.cpp | 7 +- 4 files changed, 138 insertions(+), 128 deletions(-) diff --git a/include/renderer_gl/gl_state.hpp b/include/renderer_gl/gl_state.hpp index f2680eb4..69960f1e 100644 --- a/include/renderer_gl/gl_state.hpp +++ b/include/renderer_gl/gl_state.hpp @@ -34,6 +34,8 @@ struct GLStateManager { bool redMask, greenMask, blueMask, alphaMask; bool depthMask; + float clearRed, clearBlue, clearGreen, clearAlpha; + GLuint stencilMask; GLuint boundVAO; GLuint boundVBO; @@ -44,6 +46,7 @@ struct GLStateManager { void reset(); void resetBlend(); + void resetClearing(); void resetClipping(); void resetColourMask(); void resetDepth(); @@ -209,8 +212,19 @@ struct GLStateManager { } } + void setClearColour(float r, float g, float b, float a) { + if (clearRed != r || clearGreen != g || clearBlue != b || clearAlpha != a) { + clearRed = r; + clearGreen = g; + clearBlue = b; + clearAlpha = a; + + OpenGL::setClearColor(r, g, b, a); + } + } + void setDepthFunc(OpenGL::DepthFunc func) { setDepthFunc(static_cast(func)); } }; static_assert(std::is_trivially_constructible(), "OpenGL State Manager class is not trivially constructible!"); -static_assert(std::is_trivially_destructible(), "OpenGL State Manager class is not trivially destructible!"); +static_assert(std::is_trivially_destructible(), "OpenGL State Manager class is not trivially destructible!"); \ No newline at end of file diff --git a/include/renderer_gl/surfaces.hpp b/include/renderer_gl/surfaces.hpp index 043c63d0..1c2976d6 100644 --- a/include/renderer_gl/surfaces.hpp +++ b/include/renderer_gl/surfaces.hpp @@ -9,83 +9,63 @@ template using Interval = boost::icl::right_open_interval; struct ColourBuffer { - u32 location; - PICA::ColorFmt format; - OpenGL::uvec2 size; - bool valid; + u32 location; + PICA::ColorFmt format; + OpenGL::uvec2 size; + bool valid; - // Range of VRAM taken up by buffer - Interval range; - // OpenGL resources allocated to buffer - OpenGL::Texture texture; - OpenGL::Framebuffer fbo; + // Range of VRAM taken up by buffer + Interval range; + // OpenGL resources allocated to buffer + OpenGL::Texture texture; + OpenGL::Framebuffer fbo; - GLenum internalFormat; - GLenum fmt; - GLenum type; + ColourBuffer() : valid(false) {} - ColourBuffer() : valid(false) {} - - ColourBuffer(u32 loc, PICA::ColorFmt format, u32 x, u32 y, bool valid = true) - : location(loc), format(format), size({x, y}), valid(valid) { - - u64 endLoc = (u64)loc + sizeInBytes(); - // Check if start and end are valid here - range = Interval(loc, (u32)endLoc); - } + ColourBuffer(u32 loc, PICA::ColorFmt format, u32 x, u32 y, bool valid = true) : location(loc), format(format), size({x, y}), valid(valid) { + u64 endLoc = (u64)loc + sizeInBytes(); + // Check if start and end are valid here + range = Interval(loc, (u32)endLoc); + } void allocate() { - // Internal formats for the texture based on format - static constexpr std::array internalFormats = { - GL_RGBA8, GL_RGB8, GL_RGB5_A1, GL_RGB565, GL_RGBA4 - }; - - // Format of the texture - static constexpr std::array formats = { - GL_RGBA, GL_BGR, GL_RGBA, GL_RGB, GL_RGBA, - }; - - static constexpr std::array types = { - GL_UNSIGNED_INT_8_8_8_8, GL_UNSIGNED_BYTE, GL_UNSIGNED_SHORT_5_5_5_1, - GL_UNSIGNED_SHORT_5_6_5, GL_UNSIGNED_SHORT_4_4_4_4, - }; - - internalFormat = internalFormats[(int)format]; - fmt = formats[(int)format]; - type = types[(int)format]; - - - // Create texture for the FBO, setting up filters and the like - // Reading back the current texture is slow, but allocate calls should be few and far between. - // If this becomes a bottleneck, we can fix it semi-easily - auto prevTexture = OpenGL::getTex2D(); - texture.create(size.x(), size.y(), internalFormat); - texture.bind(); - texture.setMinFilter(OpenGL::Linear); - texture.setMagFilter(OpenGL::Linear); - glBindTexture(GL_TEXTURE_2D, prevTexture); + // Create texture for the FBO, setting up filters and the like + // Reading back the current texture is slow, but allocate calls should be few and far between. + // If this becomes a bottleneck, we can fix it semi-easily + auto prevTexture = OpenGL::getTex2D(); + texture.create(size.x(), size.y(), GL_RGBA8); + texture.bind(); + texture.setMinFilter(OpenGL::Linear); + texture.setMagFilter(OpenGL::Linear); + glBindTexture(GL_TEXTURE_2D, prevTexture); #ifdef GPU_DEBUG_INFO const auto name = Helpers::format("Surface %dx%d %s from 0x%08X", size.x(), size.y(), PICA::textureFormatToString(format), location); OpenGL::setObjectLabel(GL_TEXTURE, texture.handle(), name.c_str()); #endif - fbo.createWithDrawTexture(texture); - fbo.bind(OpenGL::DrawAndReadFramebuffer); + fbo.createWithDrawTexture(texture); + fbo.bind(OpenGL::DrawAndReadFramebuffer); - if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) - Helpers::panic("Incomplete framebuffer"); + if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { + Helpers::panic("Incomplete framebuffer"); + } - // TODO: This should not clear the framebuffer contents. It should load them from VRAM. - GLint oldViewport[4]; - glGetIntegerv(GL_VIEWPORT, oldViewport); - OpenGL::setViewport(size.x(), size.y()); - OpenGL::setClearColor(0.0, 0.0, 0.0, 1.0); - OpenGL::clearColor(); - OpenGL::setViewport(oldViewport[0], oldViewport[1], oldViewport[2], oldViewport[3]); - } + // TODO: This should not clear the framebuffer contents. It should load them from VRAM. + GLint oldViewport[4]; + GLfloat oldClearColour[4]; - void free() { + glGetIntegerv(GL_VIEWPORT, oldViewport); + glGetFloatv(GL_COLOR_CLEAR_VALUE, oldClearColour); + + OpenGL::setViewport(size.x(), size.y()); + OpenGL::setClearColor(0.0, 0.0, 0.0, 1.0); + OpenGL::clearColor(); + OpenGL::setViewport(oldViewport[0], oldViewport[1], oldViewport[2], oldViewport[3]); + OpenGL::setClearColor(oldClearColour[0], oldClearColour[1], oldClearColour[2], oldClearColour[3]); + } + + void free() { valid = false; if (texture.exists() || fbo.exists()) { @@ -103,88 +83,93 @@ struct ColourBuffer { return Math::Rect{x0, size.y() - y0, x0 + width, size.y() - height - y0}; } - bool matches(ColourBuffer& other) { - return location == other.location && format == other.format && - size.x() == other.size.x() && size.y() == other.size.y(); - } + bool matches(ColourBuffer& other) { + return location == other.location && format == other.format && size.x() == other.size.x() && size.y() == other.size.y(); + } - size_t sizeInBytes() { - return (size_t)size.x() * (size_t)size.y() * PICA::sizePerPixel(format); - } + size_t sizeInBytes() { + return (size_t)size.x() * (size_t)size.y() * PICA::sizePerPixel(format); + } }; struct DepthBuffer { - u32 location; - PICA::DepthFmt format; - OpenGL::uvec2 size; // Implicitly set to the size of the framebuffer - bool valid; + u32 location; + PICA::DepthFmt format; + OpenGL::uvec2 size; // Implicitly set to the size of the framebuffer + bool valid; - // Range of VRAM taken up by buffer - Interval range; - // OpenGL texture used for storing depth/stencil - OpenGL::Texture texture; - OpenGL::Framebuffer fbo; + // Range of VRAM taken up by buffer + Interval range; + // OpenGL texture used for storing depth/stencil + OpenGL::Texture texture; + OpenGL::Framebuffer fbo; - DepthBuffer() : valid(false) {} + DepthBuffer() : valid(false) {} - DepthBuffer(u32 loc, PICA::DepthFmt format, u32 x, u32 y, bool valid = true) : - location(loc), format(format), size({x, y}), valid(valid) { + DepthBuffer(u32 loc, PICA::DepthFmt format, u32 x, u32 y, bool valid = true) : location(loc), format(format), size({x, y}), valid(valid) { + u64 endLoc = (u64)loc + sizeInBytes(); + // Check if start and end are valid here + range = Interval(loc, (u32)endLoc); + } - u64 endLoc = (u64)loc + sizeInBytes(); - // Check if start and end are valid here - range = Interval(loc, (u32)endLoc); - } + void allocate() { + // Create texture for the FBO, setting up filters and the like + // Reading back the current texture is slow, but allocate calls should be few and far between. + // If this becomes a bottleneck, we can fix it semi-easily + auto prevTexture = OpenGL::getTex2D(); - void allocate() { - // Create texture for the FBO, setting up filters and the like - // Reading back the current texture is slow, but allocate calls should be few and far between. - // If this becomes a bottleneck, we can fix it semi-easily - auto prevTexture = OpenGL::getTex2D(); + // Internal formats for the texture based on format + static constexpr std::array internalFormats = { + GL_DEPTH_COMPONENT16, + GL_DEPTH_COMPONENT24, + GL_DEPTH_COMPONENT24, + GL_DEPTH24_STENCIL8, + }; - // Internal formats for the texture based on format - static constexpr std::array internalFormats = { - GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT24, GL_DEPTH24_STENCIL8 - }; + // Format of the texture + static constexpr std::array formats = { + GL_DEPTH_COMPONENT, + GL_DEPTH_COMPONENT, + GL_DEPTH_COMPONENT, + GL_DEPTH_STENCIL, + }; + + static constexpr std::array types = { + GL_UNSIGNED_SHORT, + GL_UNSIGNED_INT, + GL_UNSIGNED_INT, + GL_UNSIGNED_INT_24_8, + }; - // Format of the texture - static constexpr std::array formats = { - GL_DEPTH_COMPONENT, GL_DEPTH_COMPONENT, GL_DEPTH_COMPONENT, GL_DEPTH_STENCIL - }; + auto internalFormat = internalFormats[(int)format]; + auto fmt = formats[(int)format]; + auto type = types[(int)format]; - static constexpr std::array types = { - GL_UNSIGNED_SHORT, GL_UNSIGNED_INT, GL_UNSIGNED_INT, GL_UNSIGNED_INT_24_8 - }; + texture.createDSTexture(size.x(), size.y(), internalFormat, fmt, nullptr, type, GL_TEXTURE_2D); + texture.bind(); + texture.setMinFilter(OpenGL::Nearest); + texture.setMagFilter(OpenGL::Nearest); - auto internalFormat = internalFormats[(int)format]; - auto fmt = formats[(int)format]; - auto type = types[(int)format]; + glBindTexture(GL_TEXTURE_2D, prevTexture); + fbo.createWithDrawTexture(texture, fmt == GL_DEPTH_STENCIL ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT); - texture.createDSTexture(size.x(), size.y(), internalFormat, fmt, nullptr, type, GL_TEXTURE_2D); - texture.bind(); - texture.setMinFilter(OpenGL::Nearest); - texture.setMagFilter(OpenGL::Nearest); - - glBindTexture(GL_TEXTURE_2D, prevTexture); - - fbo.createWithDrawTexture(texture, fmt == GL_DEPTH_STENCIL ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT); - - if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) + if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { Helpers::panic("Incomplete framebuffer"); - } + } + } - void free() { + void free() { valid = false; if (texture.exists()) { texture.free(); } } - bool matches(DepthBuffer& other) { - return location == other.location && format == other.format && - size.x() == other.size.x() && size.y() == other.size.y(); - } + bool matches(DepthBuffer& other) { + return location == other.location && format == other.format && size.x() == other.size.x() && size.y() == other.size.y(); + } - size_t sizeInBytes() { - return (size_t)size.x() * (size_t)size.y() * PICA::sizePerPixel(format); - } + size_t sizeInBytes() { + return (size_t)size.x() * (size_t)size.y() * PICA::sizePerPixel(format); + } }; diff --git a/src/core/renderer_gl/gl_state.cpp b/src/core/renderer_gl/gl_state.cpp index 4a512f44..d2eec0d5 100644 --- a/src/core/renderer_gl/gl_state.cpp +++ b/src/core/renderer_gl/gl_state.cpp @@ -10,6 +10,15 @@ void GLStateManager::resetBlend() { OpenGL::setLogicOp(GL_COPY); } +void GLStateManager::resetClearing() { + clearRed = 0.f; + clearBlue = 0.f; + clearGreen = 0.f; + clearAlpha = 1.f; + + OpenGL::setClearColor(clearRed, clearBlue, clearGreen, clearAlpha); +} + void GLStateManager::resetClipping() { // Disable all (supported) clip planes enabledClipPlanes = 0; @@ -64,6 +73,7 @@ void GLStateManager::resetProgram() { void GLStateManager::reset() { resetBlend(); + resetClearing(); resetClipping(); resetColourMask(); resetDepth(); diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 181696e5..21f961ba 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -141,7 +141,7 @@ void RendererGL::initGraphicsContext(SDL_Window* window) { GLint oldViewport[4]; glGetIntegerv(GL_VIEWPORT, oldViewport); OpenGL::setViewport(screenTextureWidth, screenTextureHeight); - OpenGL::setClearColor(0.0, 0.0, 0.0, 1.0); + gl.setClearColour(0.0, 0.0, 0.0, 1.0); OpenGL::clearColor(); OpenGL::setViewport(oldViewport[0], oldViewport[1], oldViewport[2], oldViewport[3]); @@ -473,7 +473,7 @@ void RendererGL::display() { auto bottomScreen = colourBufferCache.findFromAddress(bottomScreenAddr); screenFramebuffer.bind(OpenGL::DrawFramebuffer); - OpenGL::setClearColor(0.f); + gl.setClearColour(0.f, 0.f, 0.f, 1.f); OpenGL::clearColor(); if (topScreen) { @@ -504,8 +504,9 @@ void RendererGL::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 co const float b = getBits<8, 8>(value) / 255.0f; const float a = (value & 0xff) / 255.0f; color->get().fbo.bind(OpenGL::DrawFramebuffer); + gl.setColourMask(true, true, true, true); - OpenGL::setClearColor(r, g, b, a); + gl.setClearColour(r, g, b, a); OpenGL::clearColor(); return; } From 0f973a4ae412b6135e31cb8fc1625825399135ed Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sat, 12 Aug 2023 17:52:50 +0300 Subject: [PATCH 20/23] Fix immediate mode vertex submission --- src/core/PICA/gpu.cpp | 18 ++++++++++-------- src/core/renderer_gl/renderer_gl.cpp | 4 +++- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index 3668b32f..c0499382 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -348,15 +348,17 @@ PICA::Vertex GPU::getImmediateModeVertex() { // Run VS and return vertex data. TODO: Don't hardcode offsets for each attribute shaderUnit.vs.run(); - std::memcpy(&v.s.positions, &shaderUnit.vs.outputs[0], sizeof(vec4f)); - std::memcpy(&v.s.colour, &shaderUnit.vs.outputs[1], sizeof(vec4f)); - std::memcpy(&v.s.texcoord0, &shaderUnit.vs.outputs[2], 2 * sizeof(f24)); + + // Map shader outputs to fixed function properties + const u32 totalShaderOutputs = regs[PICA::InternalRegs::ShaderOutputCount] & 7; + for (int i = 0; i < totalShaderOutputs; i++) { + const u32 config = regs[PICA::InternalRegs::ShaderOutmap0 + i]; - printf( - "(x, y, z, w) = (%f, %f, %f, %f)\n", (double)v.s.positions[0], (double)v.s.positions[1], (double)v.s.positions[2], (double)v.s.positions[3] - ); - printf("(r, g, b, a) = (%f, %f, %f, %f)\n", (double)v.s.colour[0], (double)v.s.colour[1], (double)v.s.colour[2], (double)v.s.colour[3]); - printf("(u, v ) = (%f, %f)\n", (double)v.s.texcoord0[0], (double)v.s.texcoord0[1]); + for (int j = 0; j < 4; j++) { // pls unroll + const u32 mapping = (config >> (j * 8)) & 0x1F; + v.raw[mapping] = shaderUnit.vs.outputs[i][j]; + } + } return v; } diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 21f961ba..7d71b5f2 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -135,7 +135,9 @@ void RendererGL::initGraphicsContext(SDL_Window* window) { screenFramebuffer.createWithDrawTexture(screenTexture); screenFramebuffer.bind(OpenGL::DrawAndReadFramebuffer); - if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) Helpers::panic("Incomplete framebuffer"); + if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { + Helpers::panic("Incomplete framebuffer"); + } // TODO: This should not clear the framebuffer contents. It should load them from VRAM. GLint oldViewport[4]; From 5d1a2625f56b5a5c6dfc2ac7599c9a0d6c037c0c Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sat, 12 Aug 2023 18:06:29 +0300 Subject: [PATCH 21/23] GL: Fix logic op in display --- src/core/renderer_gl/renderer_gl.cpp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 7d71b5f2..4535c9e2 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -458,6 +458,8 @@ void RendererGL::display() { gl.disableBlend(); gl.disableDepth(); gl.disableScissor(); + // This will work fine whether or not logic ops are enabled. We set logic op to copy instead of disabling to avoid state changes + gl.setLogicOp(GL_COPY); gl.setColourMask(true, true, true, true); gl.useProgram(displayProgram); gl.bindVAO(dummyVAO); @@ -465,18 +467,14 @@ void RendererGL::display() { gl.disableClipPlane(0); gl.disableClipPlane(1); + screenFramebuffer.bind(OpenGL::DrawFramebuffer); + gl.setClearColour(0.f, 0.f, 0.f, 1.f); + OpenGL::clearColor(); + using namespace PICA::ExternalRegs; const u32 topActiveFb = externalRegs[Framebuffer0Select] & 1; const u32 topScreenAddr = externalRegs[topActiveFb == 0 ? Framebuffer0AFirstAddr : Framebuffer0ASecondAddr]; - const u32 bottomActiveFb = externalRegs[Framebuffer1Select] & 1; - const u32 bottomScreenAddr = externalRegs[bottomActiveFb == 0 ? Framebuffer1AFirstAddr : Framebuffer1ASecondAddr]; - auto topScreen = colourBufferCache.findFromAddress(topScreenAddr); - auto bottomScreen = colourBufferCache.findFromAddress(bottomScreenAddr); - screenFramebuffer.bind(OpenGL::DrawFramebuffer); - - gl.setClearColour(0.f, 0.f, 0.f, 1.f); - OpenGL::clearColor(); if (topScreen) { topScreen->get().texture.bind(); @@ -484,6 +482,10 @@ void RendererGL::display() { OpenGL::draw(OpenGL::TriangleStrip, 4); // Actually draw our 3DS screen } + const u32 bottomActiveFb = externalRegs[Framebuffer1Select] & 1; + const u32 bottomScreenAddr = externalRegs[bottomActiveFb == 0 ? Framebuffer1AFirstAddr : Framebuffer1ASecondAddr]; + auto bottomScreen = colourBufferCache.findFromAddress(bottomScreenAddr); + if (bottomScreen) { bottomScreen->get().texture.bind(); OpenGL::setViewport(40, 0, 320, 240); From 89dbae42e1d07d39b15aa0b1916b317d5439ae12 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sat, 12 Aug 2023 18:16:35 +0300 Subject: [PATCH 22/23] panda ! --- include/PICA/gpu.hpp | 1 + src/core/services/gsp_gpu.cpp | 20 +++++++++++--------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/include/PICA/gpu.hpp b/include/PICA/gpu.hpp index fa6a59bd..e84c259c 100644 --- a/include/PICA/gpu.hpp +++ b/include/PICA/gpu.hpp @@ -94,6 +94,7 @@ class GPU { void reset(); Registers& getRegisters() { return regs; } + ExternalRegisters& getExtRegisters() { return externalRegs; } void startCommandList(u32 addr, u32 size); // Used by the GSP GPU service for readHwRegs/writeHwRegs/writeHwRegsMasked diff --git a/src/core/services/gsp_gpu.cpp b/src/core/services/gsp_gpu.cpp index bfecfbf8..c2f7faee 100644 --- a/src/core/services/gsp_gpu.cpp +++ b/src/core/services/gsp_gpu.cpp @@ -396,7 +396,7 @@ void GPUService::flushCacheRegions(u32* cmd) { void GPUService::setBufferSwapImpl(u32 screenId, const FramebufferInfo& info) { using namespace PICA::ExternalRegs; - constexpr static std::array fb_addresses = { + static constexpr std::array fbAddresses = { Framebuffer0AFirstAddr, Framebuffer0BFirstAddr, Framebuffer1AFirstAddr, @@ -407,11 +407,13 @@ void GPUService::setBufferSwapImpl(u32 screenId, const FramebufferInfo& info) { Framebuffer1BSecondAddr, }; - const u32 fb_index = info.activeFb * 4 + screenId * 2; - gpu.writeExternalReg(fb_addresses[fb_index], VaddrToPaddr(info.leftFramebufferVaddr)); - gpu.writeExternalReg(fb_addresses[fb_index + 1], VaddrToPaddr(info.rightFramebufferVaddr)); + auto& regs = gpu.getExtRegisters(); - constexpr static std::array config_addresses = { + const u32 fbIndex = info.activeFb * 4 + screenId * 2; + regs[fbAddresses[fbIndex]] = VaddrToPaddr(info.leftFramebufferVaddr); + regs[fbAddresses[fbIndex + 1]] = VaddrToPaddr(info.rightFramebufferVaddr); + + static constexpr std::array configAddresses = { Framebuffer0Config, Framebuffer0Select, Framebuffer0Stride, @@ -420,10 +422,10 @@ void GPUService::setBufferSwapImpl(u32 screenId, const FramebufferInfo& info) { Framebuffer1Stride, }; - const u32 config_index = screenId * 3; - gpu.writeExternalReg(config_addresses[config_index], info.format); - gpu.writeExternalReg(config_addresses[config_index + 1], info.displayFb); - gpu.writeExternalReg(config_addresses[config_index + 2], info.stride); + const u32 configIndex = screenId * 3; + regs[configAddresses[configIndex]] = info.format; + regs[configAddresses[configIndex + 1]] = info.displayFb; + regs[configAddresses[configIndex + 2]] = info.stride; } // Actually send command list (aka display list) to GPU From fc641a450b92712dfec2cf8f17e5a65cae0e79d5 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sat, 12 Aug 2023 18:25:49 +0300 Subject: [PATCH 23/23] Add a shut up counter to GL::TextureCopy --- src/core/renderer_gl/renderer_gl.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 4535c9e2..3eb36de5 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -667,7 +667,7 @@ void RendererGL::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputAddr, outputAddr, totalBytes, inputWidth, inputGap, outputWidth, outputGap); if (inputGap != 0 || outputGap != 0) { - //Helpers::warn("Strided texture copy\n"); + // Helpers::warn("Strided texture copy\n"); } if (inputWidth != outputWidth) { Helpers::warn("Input width does not match output width, cannot accelerate texture copy!"); @@ -695,7 +695,12 @@ void RendererGL::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 // Find the source surface. auto srcFramebuffer = getColourBuffer(inputAddr, PICA::ColorFmt::RGBA8, copyStride, copyHeight, false); if (!srcFramebuffer) { - printf("TextureCopy failed to locate src framebuffer!\n"); + static int shutUpCounter = 0; // Don't want to spam the console too much, so shut up after 5 times + + if (shutUpCounter < 5) { + shutUpCounter++; + printf("RendererGL::TextureCopy failed to locate src framebuffer!\n"); + } return; }