diff --git a/include/PICA/gpu.hpp b/include/PICA/gpu.hpp index 955fb0ae..fa6a59bd 100644 --- a/include/PICA/gpu.hpp +++ b/include/PICA/gpu.hpp @@ -117,6 +117,10 @@ class GPU { renderer->displayTransfer(inputAddr, outputAddr, inputSize, outputSize, flags); } + void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) { + renderer->textureCopy(inputAddr, outputAddr, totalBytes, inputSize, outputSize, flags); + } + // Read a value of type T from physical address paddr // This is necessary because vertex attribute fetching uses physical addresses template diff --git a/include/helpers.hpp b/include/helpers.hpp index d07c2bb6..f13fc720 100644 --- a/include/helpers.hpp +++ b/include/helpers.hpp @@ -7,6 +7,7 @@ #include #include #include +#include #include "termcolor.hpp" @@ -30,6 +31,17 @@ using s32 = std::int32_t; using s64 = std::int64_t; namespace Helpers { + template + std::string format(const std::string& fmt, Args&&... args) { + const int size = std::snprintf(nullptr, 0, fmt.c_str(), args...) + 1; + if (size <= 0) { + return {}; + } + const auto buf = std::make_unique(size); + std::snprintf(buf.get(), size, fmt.c_str(), args ...); + return std::string(buf.get(), buf.get() + size - 1); + } + // Unconditional panic, unlike panicDev which does not panic on user builds template [[noreturn]] static void panic(const char* fmt, Args&&... args) { diff --git a/include/renderer.hpp b/include/renderer.hpp index fff25ab5..c189da7f 100644 --- a/include/renderer.hpp +++ b/include/renderer.hpp @@ -49,6 +49,7 @@ class Renderer { virtual void initGraphicsContext(SDL_Window* window) = 0; // Initialize graphics context virtual void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) = 0; // Clear a GPU buffer in VRAM virtual void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) = 0; // Perform display transfer + virtual void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) = 0; virtual void drawVertices(PICA::PrimType primType, std::span vertices) = 0; // Draw the given vertices virtual void screenshot(const std::string& name) = 0; diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 52d97524..ec0906b1 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -77,9 +77,10 @@ class RendererGL final : public Renderer { void initGraphicsContext(SDL_Window* window) override; // Initialize graphics context void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override; // Clear a GPU buffer in VRAM void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override; // Perform display transfer + void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override; void drawVertices(PICA::PrimType primType, std::span vertices) override; // Draw the given vertices - ColourBuffer getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height); + std::optional getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true); // Take a screenshot of the screen and store it in a file void screenshot(const std::string& name) override; diff --git a/include/renderer_gl/surface_cache.hpp b/include/renderer_gl/surface_cache.hpp index b2e5cc29..5323741f 100644 --- a/include/renderer_gl/surface_cache.hpp +++ b/include/renderer_gl/surface_cache.hpp @@ -76,6 +76,16 @@ public: size++; + // Find an existing surface we completely invalidate and overwrite it with the new surface + for (auto& e : buffer) { + if (e.valid && e.range.lower() >= surface.range.lower() && e.range.upper() <= surface.range.upper()) { + e.free(); + e = surface; + e.allocate(); + return e; + } + } + // Find an invalid entry in the cache and overwrite it with the new surface for (auto& e : buffer) { if (!e.valid) { diff --git a/include/renderer_gl/surfaces.hpp b/include/renderer_gl/surfaces.hpp index 62a3b61f..88355a8c 100644 --- a/include/renderer_gl/surfaces.hpp +++ b/include/renderer_gl/surfaces.hpp @@ -65,7 +65,8 @@ struct ColourBuffer { texture.setMagFilter(OpenGL::Linear); glBindTexture(GL_TEXTURE_2D, prevTexture); - //Helpers::panic("Creating FBO: %d, %d\n", size.x(), size.y()); + const auto name = Helpers::format("Surface %dx%d %s from 0x%08X", size.x(), size.y(), PICA::textureFormatToString(format), location); + OpenGL::setObjectLabel(GL_TEXTURE, texture.handle(), name.c_str()); fbo.createWithDrawTexture(texture); fbo.bind(OpenGL::DrawAndReadFramebuffer); diff --git a/include/renderer_null/renderer_null.hpp b/include/renderer_null/renderer_null.hpp index 22293ba6..231ed41d 100644 --- a/include/renderer_null/renderer_null.hpp +++ b/include/renderer_null/renderer_null.hpp @@ -12,6 +12,7 @@ class RendererNull final : public Renderer { void initGraphicsContext(SDL_Window* window) override; void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override; void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override; + void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override; void drawVertices(PICA::PrimType primType, std::span vertices) override; void screenshot(const std::string& name) override; }; diff --git a/include/renderer_sw/renderer_sw.hpp b/include/renderer_sw/renderer_sw.hpp index 171fc084..9e68b00f 100644 --- a/include/renderer_sw/renderer_sw.hpp +++ b/include/renderer_sw/renderer_sw.hpp @@ -12,6 +12,7 @@ class RendererSw final : public Renderer { void initGraphicsContext(SDL_Window* window) override; void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override; void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override; + void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override; void drawVertices(PICA::PrimType primType, std::span vertices) override; void screenshot(const std::string& name) override; -}; \ No newline at end of file +}; diff --git a/include/renderer_vk/renderer_vk.hpp b/include/renderer_vk/renderer_vk.hpp index 4b6e65b0..59d8cdae 100644 --- a/include/renderer_vk/renderer_vk.hpp +++ b/include/renderer_vk/renderer_vk.hpp @@ -52,6 +52,7 @@ class RendererVK final : public Renderer { void initGraphicsContext(SDL_Window* window) override; void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override; void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override; + void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override; void drawVertices(PICA::PrimType primType, std::span vertices) override; void screenshot(const std::string& name) override; -}; \ No newline at end of file +}; diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index ba020bf5..90ca1af6 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -599,12 +599,11 @@ void RendererGL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u u32 outputWidth = outputSize & 0xffff; u32 outputHeight = outputSize >> 16; - if (inputWidth != outputWidth) { - // Helpers::warn("Strided display transfer is not handled correctly!\n"); - } + OpenGL::DebugScope scope("DisplayTransfer inputAddr 0x%08X outputAddr 0x%08X inputWidth %d outputWidth %d inputWidth %d outputHeight %d", + inputAddr, outputAddr, inputWidth, outputWidth, inputHeight, outputHeight); - auto srcFramebuffer = getColourBuffer(inputAddr, inputFormat, inputWidth, inputHeight); - Math::Rect srcRect = srcFramebuffer.getSubRect(inputAddr, outputWidth, outputHeight); + auto srcFramebuffer = getColourBuffer(inputAddr, inputFormat, inputWidth, outputHeight); + Math::Rect srcRect = srcFramebuffer->getSubRect(inputAddr, outputWidth, outputHeight); // Apply scaling for the destination rectangle. if (scaling == PICA::Scaling::X || scaling == PICA::Scaling::XY) { @@ -616,18 +615,88 @@ void RendererGL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u } auto destFramebuffer = getColourBuffer(outputAddr, outputFormat, outputWidth, outputHeight); - Math::Rect destRect = destFramebuffer.getSubRect(outputAddr, outputWidth, outputHeight); + Math::Rect destRect = destFramebuffer->getSubRect(outputAddr, outputWidth, outputHeight); + + if (inputWidth != outputWidth) { + // Helpers::warn("Strided display transfer is not handled correctly!\n"); + } // Blit the framebuffers - srcFramebuffer.fbo.bind(OpenGL::ReadFramebuffer); - destFramebuffer.fbo.bind(OpenGL::DrawFramebuffer); + srcFramebuffer->fbo.bind(OpenGL::ReadFramebuffer); + destFramebuffer->fbo.bind(OpenGL::DrawFramebuffer); glBlitFramebuffer( srcRect.left, srcRect.top, srcRect.right, srcRect.bottom, destRect.left, destRect.top, destRect.right, destRect.bottom, GL_COLOR_BUFFER_BIT, GL_LINEAR ); } -ColourBuffer RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height) { +void RendererGL::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) { + // Texture copy size is aligned to 16 byte units + const u32 copySize = totalBytes & ~0xf; + if (copySize == 0) { + printf("TextureCopy total bytes less than 16!\n"); + return; + } + + // The width and gap are provided in 16-byte units. + const u32 inputWidth = (inputSize & 0xffff) << 4; + const u32 inputGap = (inputSize >> 16) << 4; + const u32 outputWidth = (outputSize & 0xffff) << 4; + const u32 outputGap = (outputSize >> 16) << 4; + + OpenGL::DebugScope scope("TextureCopy inputAddr 0x%08X outputAddr 0x%08X totalBytes %d inputWidth %d inputGap %d outputWidth %d outputGap %d", + inputAddr, outputAddr, totalBytes, inputWidth, inputGap, outputWidth, outputGap); + + if (inputGap != 0 || outputGap != 0) { + Helpers::warn("Strided texture copy\n"); + } + if (inputWidth != outputWidth) { + Helpers::warn("Input width does not match output width, cannot accelerate texture copy!\n"); + return; + } + + // Texture copy is a raw data copy in PICA, which means no format or tiling information is provided to the engine. + // Depending if the target surface is linear or tiled, games set inputWidth to either the width of the texture or + // the width multiplied by eight (because tiles are stored linearly in memory). + // To properly accelerate this we must examine each surface individually. For now we assume the most common case + // of tiled surface with RGBA8 format. If our assumption does not hold true, we abort the texture copy as inserting + // that surface is not correct. + + // We assume the source surface is tiled and RGBA8. inputWidth is in bytes so divide it + // by eight * sizePerPixel(RGBA8) to convert it to a useable width. + const u32 bpp = sizePerPixel(PICA::ColorFmt::RGBA8); + const u32 copyStride = (inputWidth + inputGap) / (8 * bpp); + const u32 copyWidth = inputWidth / (8 * bpp); + + // inputHeight/outputHeight are typically set to zero so they cannot be used to get the height of the copy region + // in contrast to display transfer. Compute height manually by dividing the copy size with the copy width. The result + // is the number of vertical tiles so multiply that by eight to get the actual copy height. + const u32 copyHeight = (copySize / inputWidth) * 8; + + // Find the source surface. + auto srcFramebuffer = getColourBuffer(inputAddr, PICA::ColorFmt::RGBA8, copyStride, copyHeight, false); + if (!srcFramebuffer) { + printf("TextureCopy failed to locate src framebuffer!\n"); + return; + } + + Math::Rect srcRect = srcFramebuffer->getSubRect(inputAddr, copyWidth, copyHeight); + + // Assume the destination surface has the same format. Unless the surfaces have the same block width, + // texture copy does not make sense. + auto destFramebuffer = getColourBuffer(outputAddr, srcFramebuffer->format, copyWidth, copyHeight); + Math::Rect destRect = destFramebuffer->getSubRect(outputAddr, copyWidth, copyHeight); + + // Blit the framebuffers + srcFramebuffer->fbo.bind(OpenGL::ReadFramebuffer); + destFramebuffer->fbo.bind(OpenGL::DrawFramebuffer); + glBlitFramebuffer( + srcRect.left, srcRect.top, srcRect.right, srcRect.bottom, destRect.left, destRect.top, destRect.right, destRect.bottom, GL_COLOR_BUFFER_BIT, + GL_LINEAR + ); +} + +std::optional RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound) { // Try to find an already existing buffer that contains the provided address // This is a more relaxed check compared to getColourFBO as display transfer/texcopy may refer to // subrect of a surface and in case of texcopy we don't know the format of the surface. @@ -636,6 +705,10 @@ ColourBuffer RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt format, u32 wi return buffer.value().get(); } + if (!createIfnotFound) { + return std::nullopt; + } + // Otherwise create and cache a new buffer. ColourBuffer sampleBuffer(addr, format, width, height); return colourBufferCache.add(sampleBuffer); diff --git a/src/core/renderer_gl/textures.cpp b/src/core/renderer_gl/textures.cpp index 9e303fd9..7a03c97d 100644 --- a/src/core/renderer_gl/textures.cpp +++ b/src/core/renderer_gl/textures.cpp @@ -9,6 +9,9 @@ void Texture::allocate() { texture.create(size.u(), size.v(), GL_RGBA8); texture.bind(); + const auto name = Helpers::format("Surface %dx%d %s from 0x%08X", size.x(), size.y(), PICA::textureFormatToString(format), location); + OpenGL::setObjectLabel(GL_TEXTURE, texture.handle(), name.c_str()); + setNewConfig(config); } diff --git a/src/core/renderer_null/renderer_null.cpp b/src/core/renderer_null/renderer_null.cpp index 546d8a81..b2ebd1d6 100644 --- a/src/core/renderer_null/renderer_null.cpp +++ b/src/core/renderer_null/renderer_null.cpp @@ -9,5 +9,6 @@ void RendererNull::display() {} void RendererNull::initGraphicsContext(SDL_Window* window) {} void RendererNull::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) {} void RendererNull::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) {} +void RendererNull::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) {} void RendererNull::drawVertices(PICA::PrimType primType, std::span vertices) {} void RendererNull::screenshot(const std::string& name) {} diff --git a/src/core/renderer_sw/renderer_sw.cpp b/src/core/renderer_sw/renderer_sw.cpp index 1ea452ae..b671c180 100644 --- a/src/core/renderer_sw/renderer_sw.cpp +++ b/src/core/renderer_sw/renderer_sw.cpp @@ -14,8 +14,12 @@ void RendererSw::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u printf("RendererSW: Unimplemented displayTransfer call\n"); } +void RendererSw::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) { + printf("RendererSW: Unimplemented textureCopy call\n"); +} + void RendererSw::drawVertices(PICA::PrimType primType, std::span vertices) { printf("RendererSW: Unimplemented drawVertices call\n"); } -void RendererSw::screenshot(const std::string& name) { printf("RendererSW: Unimplemented screenshot call\n"); } \ No newline at end of file +void RendererSw::screenshot(const std::string& name) { printf("RendererSW: Unimplemented screenshot call\n"); } diff --git a/src/core/renderer_vk/renderer_vk.cpp b/src/core/renderer_vk/renderer_vk.cpp index 23048f51..4ec70412 100644 --- a/src/core/renderer_vk/renderer_vk.cpp +++ b/src/core/renderer_vk/renderer_vk.cpp @@ -542,6 +542,8 @@ void RendererVK::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 co void RendererVK::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) {} +void RendererVK::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) {} + void RendererVK::drawVertices(PICA::PrimType primType, std::span vertices) {} -void RendererVK::screenshot(const std::string& name) {} \ No newline at end of file +void RendererVK::screenshot(const std::string& name) {} diff --git a/src/core/services/gsp_gpu.cpp b/src/core/services/gsp_gpu.cpp index abea9f03..25628e4b 100644 --- a/src/core/services/gsp_gpu.cpp +++ b/src/core/services/gsp_gpu.cpp @@ -441,7 +441,15 @@ void GPUService::processCommandList(u32* cmd) { // TODO: Emulate the transfer engine & its registers // Then this can be emulated by just writing the appropriate values there void GPUService::triggerTextureCopy(u32* cmd) { - Helpers::warn("GSP::GPU::TriggerTextureCopy (unimplemented)\n"); + const u32 inputAddr = VaddrToPaddr(cmd[1]); + const u32 outputAddr = VaddrToPaddr(cmd[2]); + const u32 totalBytes = cmd[3]; + const u32 inputSize = cmd[4]; + const u32 outputSize = cmd[5]; + const u32 flags = cmd[6]; + + log("GSP::GPU::TriggerTextureCopy (Stubbed)\n"); + gpu.textureCopy(inputAddr, outputAddr, totalBytes, inputSize, outputSize, flags); // This uses the transfer engine and thus needs to fire a PPF interrupt. // NSMB2 relies on this requestInterrupt(GPUInterrupt::PPF);