From fe6fc4cfa3457c0c9314ce89469961fe382a5f5e Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Fri, 3 Jan 2025 18:27:19 +0200 Subject: [PATCH] Texture caching stuff pt1 --- include/renderer.hpp | 1 + include/renderer_gl/renderer_gl.hpp | 1 + include/renderer_gl/surface_cache.hpp | 15 +++++++++++++++ src/core/PICA/gpu.cpp | 2 ++ src/core/renderer_gl/renderer_gl.cpp | 23 +++++++++++++++++++++++ src/core/services/gsp_gpu.cpp | 11 ++++++++++- 6 files changed, 52 insertions(+), 1 deletion(-) diff --git a/include/renderer.hpp b/include/renderer.hpp index b458ecce..370fa450 100644 --- a/include/renderer.hpp +++ b/include/renderer.hpp @@ -68,6 +68,7 @@ class Renderer { virtual void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) = 0; // Perform display transfer virtual void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) = 0; virtual void drawVertices(PICA::PrimType primType, std::span vertices) = 0; // Draw the given vertices + virtual void invalidateRegion(u32 start, u32 size) {} virtual void screenshot(const std::string& name) = 0; // Some frontends and platforms may require that we delete our GL or misc context and obtain a new one for things like exclusive fullscreen diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index a862cd26..caa357dc 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -172,6 +172,7 @@ class RendererGL final : public Renderer { void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override; void drawVertices(PICA::PrimType primType, std::span vertices) override; // Draw the given vertices void deinitGraphicsContext() override; + void invalidateRegion(u32 start, u32 size) override; virtual bool supportsShaderReload() override { return true; } virtual std::string getUbershader() override; diff --git a/include/renderer_gl/surface_cache.hpp b/include/renderer_gl/surface_cache.hpp index fb7c71a5..199d696c 100644 --- a/include/renderer_gl/surface_cache.hpp +++ b/include/renderer_gl/surface_cache.hpp @@ -104,4 +104,19 @@ public: const SurfaceType& operator[](size_t i) const { return buffer[i]; } + + void invalidateRegion(u32 start, u32 size) { + if (size == 0) { + return; + } + + boost::icl::right_open_interval interval(start, start + size); + + for (auto& e : buffer) { + if (e.valid && boost::icl::intersects(e.range, interval)) { + e.valid = false; + e.free(); + } + } + } }; diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index 838d3fb3..0d4f53b4 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -440,6 +440,8 @@ void GPU::fireDMA(u32 dest, u32 source, u32 size) { // Valid, optimized FCRAM->VRAM DMA. TODO: Is VRAM->VRAM DMA allowed? u8* fcram = mem.getFCRAM(); std::memcpy(&vram[dest - vramStart], &fcram[source - fcramStart], size); + + renderer->invalidateRegion(dest - vramStart + PhysicalAddrs::VRAM, size); } else { printf("Non-trivially optimizable GPU DMA. Falling back to byte-by-byte transfer\n"); diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index c1899655..62d6cf8d 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -792,6 +792,27 @@ void RendererGL::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 shutUpCounter++; printf("RendererGL::TextureCopy failed to locate src framebuffer!\n"); } + + invalidateRegion(outputAddr, copySize); + u8* inputPointer = gpu.getPointerPhys(inputAddr); + u8* outputPointer = gpu.getPointerPhys(outputAddr); + + u32 counter = 0; + u32 line = 0; + + while (counter < copySize) { + const u32 bytes = inputWidth; + std::memcpy(outputPointer, inputPointer, bytes); + counter += bytes; + line += bytes; + + inputPointer += inputWidth + inputGap; + if (line >= outputWidth) { + outputPointer += outputWidth + outputGap; + line = 0; + } + } + return; } @@ -1267,3 +1288,5 @@ void RendererGL::setupGLES() { glLogicOp = [](GLenum) {}; } } + +void RendererGL::invalidateRegion(u32 start, u32 size) { textureCache.invalidateRegion(start, size); } \ No newline at end of file diff --git a/src/core/services/gsp_gpu.cpp b/src/core/services/gsp_gpu.cpp index 5c6ab3d6..13e44c5f 100644 --- a/src/core/services/gsp_gpu.cpp +++ b/src/core/services/gsp_gpu.cpp @@ -276,6 +276,9 @@ void GPUService::flushDataCache(u32 messagePointer) { u32 processHandle = handle = mem.read32(messagePointer + 16); log("GSP::GPU::FlushDataCache(address = %08X, size = %X, process = %X)\n", address, size, processHandle); + printf("Flush data cache\n"); + gpu.getRenderer()->invalidateRegion(address, size); + mem.write32(messagePointer, IPC::responseHeader(0x8, 1, 0)); mem.write32(messagePointer + 4, Result::Success); } @@ -286,6 +289,7 @@ void GPUService::invalidateDataCache(u32 messagePointer) { u32 processHandle = handle = mem.read32(messagePointer + 16); log("GSP::GPU::InvalidateDataCache(address = %08X, size = %X, process = %X)\n", address, size, processHandle); + printf("Invalidate data cache\n"); mem.write32(messagePointer, IPC::responseHeader(0x9, 1, 0)); mem.write32(messagePointer + 4, Result::Success); } @@ -296,6 +300,7 @@ void GPUService::storeDataCache(u32 messagePointer) { u32 processHandle = handle = mem.read32(messagePointer + 16); log("GSP::GPU::StoreDataCache(address = %08X, size = %X, process = %X)\n", address, size, processHandle); + printf("Store data cache\n"); mem.write32(messagePointer, IPC::responseHeader(0x1F, 1, 0)); mem.write32(messagePointer + 4, Result::Success); } @@ -453,7 +458,11 @@ void GPUService::triggerDMARequest(u32* cmd) { requestInterrupt(GPUInterrupt::DMA); } -void GPUService::flushCacheRegions(u32* cmd) { log("GSP::GPU::FlushCacheRegions (Stubbed)\n"); } +void GPUService::flushCacheRegions(u32* cmd) { + printf("FlushCacheRegions\n"); + + log("GSP::GPU::FlushCacheRegions (Stubbed)\n"); +} void GPUService::setBufferSwapImpl(u32 screenId, const FramebufferInfo& info) { using namespace PICA::ExternalRegs;