diff --git a/CMakeLists.txt b/CMakeLists.txt index 4a6a5648..d4be6caf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,6 +13,7 @@ set (CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) include_directories(${PROJECT_SOURCE_DIR}/include/) include_directories(${PROJECT_SOURCE_DIR}/include/kernel) include_directories (${FMT_INCLUDE_DIR}) +include_directories(third_party/boost/) include_directories(third_party/elfio/) include_directories(third_party/gl3w/) include_directories(third_party/imgui/) @@ -79,6 +80,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/opengl.hpp inc include/services/dsp.hpp include/services/cfg.hpp include/services/region_codes.hpp include/fs/archive_save_data.hpp include/fs/archive_sdmc.hpp include/services/ptm.hpp include/services/mic.hpp include/services/cecd.hpp include/renderer_gl/renderer_gl.hpp + include/renderer_gl/surfaces.hpp include/renderer_gl/surface_cache.hpp ) set(THIRD_PARTY_SOURCE_FILES third_party/imgui/imgui.cpp diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index 76112d4c..dff9c819 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -14,6 +14,10 @@ namespace PICAInternalRegs { // Framebuffer registers AlphaTestConfig = 0x104, DepthAndColorMask = 0x107, + ColourBufferFormat = 0x117, + DepthBufferLoc = 0x11C, + ColourBufferLoc = 0x11D, + FramebufferSize = 0x11E, // Geometry pipeline registers VertexAttribLoc = 0x200, diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 999272e3..6f55fdce 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -3,6 +3,7 @@ #include "helpers.hpp" #include "logger.hpp" #include "opengl.hpp" +#include "surface_cache.hpp" struct Vertex { OpenGL::vec4 position; @@ -21,6 +22,17 @@ class Renderer { GLint alphaControlLoc = -1; u32 oldAlphaControl = 0; + SurfaceCache depthBufferCache; + SurfaceCache colourBufferCache; + OpenGL::uvec2 fbSize; // The size of the framebuffer (ie both the colour and depth buffer)' + + u32 colourBufferLoc; // Location in 3DS VRAM for the colour buffer + ColourBuffer::Formats colourBufferFormat; // Format of the colours stored in the colour buffer + + // Same for the depth/stencil buffer + u32 depthBufferLoc; + DepthBuffer::Formats depthBufferFormat; + // Dummy VAO/VBO for blitting the final output OpenGL::VertexArray dummyVAO; OpenGL::VertexBuffer dummyVBO; @@ -28,6 +40,8 @@ class Renderer { static constexpr u32 regNum = 0x300; // Number of internal PICA registers const std::array& regs; + OpenGL::Framebuffer getColourFBO(); + MAKE_LOG_FUNCTION(log, rendererLogger) public: @@ -40,5 +54,19 @@ public: void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control); // Clear a GPU buffer in VRAM void drawVertices(OpenGL::Primitives primType, Vertex* vertices, u32 count); // Draw the given vertices + void setFBSize(u32 width, u32 height) { + fbSize.x() = width; + fbSize.y() = height; + } + + void setColourFormat(ColourBuffer::Formats format) { colourBufferFormat = format; } + void setColourFormat(u32 format) { colourBufferFormat = static_cast(format); } + + void setDepthFormat(DepthBuffer::Formats format) { depthBufferFormat = format; } + void setDepthFormat(u32 format) { depthBufferFormat = static_cast(format); } + + void setColourBufferLoc(u32 loc) { colourBufferLoc = loc; } + void setDepthBufferLoc(u32 loc) { depthBufferLoc = loc; } + static constexpr u32 vertexBufferSize = 0x1500; }; \ No newline at end of file diff --git a/include/renderer_gl/surface_cache.hpp b/include/renderer_gl/surface_cache.hpp new file mode 100644 index 00000000..b7508a36 --- /dev/null +++ b/include/renderer_gl/surface_cache.hpp @@ -0,0 +1,62 @@ +#pragma once +#include +#include +#include "surfaces.hpp" + +// Surface cache class that can fit "capacity" instances of the "SurfaceType" class of surfaces +// SurfaceType *must* have all of the following +// - An "allocate" function that allocates GL resources for the surfaces +// - A "free" function that frees up all resources the surface is taking up +// - A "matches" function that, when provided with a SurfaceType object reference +// Will tell us if the 2 surfaces match (Only as far as location in VRAM, format, dimensions, etc) +// Are concerned. We could overload the == operator, but that implies full equality +// Including equality of the allocated OpenGL resources, which we don't want +// - A "valid" member that tells us whether the function is still valid or not +template +class SurfaceCache { + // Vanilla std::optional can't hold actual references + using OptionalRef = std::optional>; + static_assert(std::is_same() || std::is_same(), + "Invalid surface type"); + + size_t size; + std::array buffer; + +public: + void reset() { + size = 0; + for (auto& e : buffer) { // Free the VRAM of all surfaces + e.free(); + } + } + + OptionalRef find(SurfaceType& other) { + for (auto& e : buffer) { + if (e.matches(other) && e.valid) + return e; + } + + return std::nullopt; + } + + // Adds a surface object to the cache and returns it + SurfaceType add(SurfaceType& surface) { + if (size >= capacity) { + Helpers::panic("Surface cache full! Add emptying!"); + } + size++; + + // Find an invalid entry in the cache and overwrite it with the new surface + for (auto& e : buffer) { + if (!e.valid) { + e = surface; + e.allocate(); + Sleep(2000); + return e; + } + } + + // This should be unreachable but helps to panic anyways + Helpers::panic("Couldn't add surface to cache\n"); + } +}; diff --git a/include/renderer_gl/surfaces.hpp b/include/renderer_gl/surfaces.hpp new file mode 100644 index 00000000..e88acf1d --- /dev/null +++ b/include/renderer_gl/surfaces.hpp @@ -0,0 +1,125 @@ +#pragma once +#include "boost/icl/interval.hpp" +#include "helpers.hpp" +#include "opengl.hpp" + +template +using Interval = boost::icl::right_open_interval; + +struct ColourBuffer { + enum class Formats : u32 { + RGBA8 = 0, + BGR8 = 1, + RGB5A1 = 2, + RGB565 = 3, + RGBA4 = 4, + + Trash1 = 5, Trash2 = 6, Trash3 = 7 // Technically selectable, but their function is unknown + }; + + u32 location; + Formats format; + OpenGL::uvec2 size; + bool valid; + + // Range of VRAM taken up by buffer + Interval range; + // OpenGL resources allocated to buffer + OpenGL::Texture texture; + OpenGL::Framebuffer fbo; + + ColourBuffer() : valid(false) {} + + ColourBuffer(u32 loc, Formats format, u32 x, u32 y, bool valid = true) + : location(loc), format(format), size({x, y}), valid(valid) { + + u64 endLoc = (u64)loc + sizeInBytes(); + // Check if start and end are valid here + range = Interval(loc, (u32)endLoc); + } + + void allocate() { + printf("Make this colour buffer allocate itself\n"); + } + + void free() { + valid = false; + printf("Make this colour buffer free itself\n"); + } + + bool matches(ColourBuffer& other) { + return location == other.location && format == other.format && + size.x() == other.size.x() && size.y() == other.size.y(); + } + + // Size occupied by each pixel in bytes + // All formats are 16BPP except for RGBA8 (32BPP) and BGR8 (24BPP) + size_t sizePerPixel() { + switch (format) { + case Formats::BGR8: return 3; + case Formats::RGBA8: return 4; + default: return 2; + } + } + + size_t sizeInBytes() { + return (size_t)size.x() * (size_t)size.y() * sizePerPixel(); + } +}; + +struct DepthBuffer { + enum class Formats : u32 { + Depth16 = 0, + Garbage = 1, + Depth24 = 2, + Depth24Stencil8 = 3 + }; + + u32 location; + Formats format; + OpenGL::uvec2 size; // Implicitly set to the size of the framebuffer + bool valid; + + // Range of VRAM taken up by buffer + Interval range; + // OpenGL texture used for storing depth/stencil + OpenGL::Texture texture; + + DepthBuffer() : valid(false) {} + + DepthBuffer(u32 loc, Formats format, u32 x, u32 y, bool valid = true) : + location(loc), format(format), size({x, y}), valid(valid) {} + + bool hasStencil() { + return format == Formats::Depth24Stencil8; + } + + void allocate() { + printf("Make this depth buffer allocate itself\n"); + } + + void free() { + valid = false; + printf("Make this depth buffer free itself\n"); + } + + bool matches(DepthBuffer& other) { + return location == other.location && format == other.format && + size.x() == other.size.x() && size.y() == other.size.y(); + } + + // Size occupied by each pixel in bytes + size_t sizePerPixel() { + switch (format) { + case Formats::Depth16: return 2; + case Formats::Depth24: return 3; + case Formats::Depth24Stencil8: return 4; + + default: return 1; // Invalid format + } + } + + size_t sizeInBytes() { + return (size_t)size.x() * (size_t)size.y() * sizePerPixel(); + } +}; \ No newline at end of file diff --git a/src/core/PICA/regs.cpp b/src/core/PICA/regs.cpp index 10ae9b46..cfd0c6d0 100644 --- a/src/core/PICA/regs.cpp +++ b/src/core/PICA/regs.cpp @@ -59,6 +59,31 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { fixedAttribMask = (value >> 16) & 0xfff; // Determines which vertex attributes are fixed for all vertices break; + case ColourBufferLoc: { + u32 loc = (value & 0x0fffffff) << 3; + renderer.setColourBufferLoc(loc); + break; + }; + + case ColourBufferFormat: { + u32 format = (value >> 16) & 7; + renderer.setColourFormat(format); + break; + } + + case DepthBufferLoc: { + u32 loc = (value & 0x0fffffff) << 3; + renderer.setDepthBufferLoc(loc); + break; + } + + case FramebufferSize: { + const u32 width = value & 0x7ff; + const u32 height = ((value >> 12) & 0x3ff) + 1; + renderer.setFBSize(width, height); + break; + } + case VertexFloatUniformIndex: shaderUnit.vs.setFloatUniformIndex(value); break; diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index a5c18d75..8d493620 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -105,6 +105,18 @@ const char* displayFragmentShader = R"( } )"; +void Renderer::reset() { + depthBufferCache.reset(); + colourBufferCache.reset(); + + // Init the colour/depth buffer settings to some random defaults on reset + colourBufferLoc = 0; + colourBufferFormat = ColourBuffer::Formats::RGBA8; + + depthBufferLoc = 0; + depthBufferFormat = DepthBuffer::Formats::Depth16; +} + void Renderer::initGraphicsContext() { // Set up texture for top screen fboTexture.create(400, 240, GL_RGBA8); @@ -156,6 +168,7 @@ void Renderer::initGraphicsContext() { dummyVBO.create(); dummyVAO.create(); + reset(); } void Renderer::getGraphicsContext() { @@ -168,6 +181,8 @@ void Renderer::getGraphicsContext() { triangleProgram.use(); } +OpenGL::Framebuffer poop; + void Renderer::drawVertices(OpenGL::Primitives primType, Vertex* vertices, u32 count) { // Adjust alpha test if necessary const u32 alphaControl = regs[PICAInternalRegs::AlphaTestConfig]; @@ -176,6 +191,8 @@ void Renderer::drawVertices(OpenGL::Primitives primType, Vertex* vertices, u32 c glUniform1ui(alphaControlLoc, alphaControl); } + poop = getColourFBO(); + const u32 depthControl = regs[PICAInternalRegs::DepthAndColorMask]; bool depthEnable = depthControl & 1; bool depthWriteEnable = (depthControl >> 12) & 1; @@ -252,4 +269,17 @@ void Renderer::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 cont OpenGL::setClearColor(r, g, b, a); OpenGL::clearColor(); +} + +OpenGL::Framebuffer Renderer::getColourFBO() { + //We construct a colour buffer object and see if our cache has any matching colour buffers in it + // If not, we allocate a texture & FBO for our framebuffer and store it in the cache + ColourBuffer sampleBuffer(colourBufferLoc, colourBufferFormat, fbSize.x(), fbSize.y()); + auto buffer = colourBufferCache.find(sampleBuffer); + + if (buffer.has_value()) { + return buffer.value().get().fbo; + } else { + return colourBufferCache.add(sampleBuffer).fbo; + } } \ No newline at end of file