[GPU] Surface cache vol 1

This commit is contained in:
wheremyfoodat 2023-01-02 15:01:17 +02:00
parent 57ef4e25e7
commit ecbb33b906
7 changed files with 276 additions and 0 deletions

View file

@ -13,6 +13,7 @@ set (CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
include_directories(${PROJECT_SOURCE_DIR}/include/)
include_directories(${PROJECT_SOURCE_DIR}/include/kernel)
include_directories (${FMT_INCLUDE_DIR})
include_directories(third_party/boost/)
include_directories(third_party/elfio/)
include_directories(third_party/gl3w/)
include_directories(third_party/imgui/)
@ -79,6 +80,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/opengl.hpp inc
include/services/dsp.hpp include/services/cfg.hpp include/services/region_codes.hpp
include/fs/archive_save_data.hpp include/fs/archive_sdmc.hpp include/services/ptm.hpp
include/services/mic.hpp include/services/cecd.hpp include/renderer_gl/renderer_gl.hpp
include/renderer_gl/surfaces.hpp include/renderer_gl/surface_cache.hpp
)
set(THIRD_PARTY_SOURCE_FILES third_party/imgui/imgui.cpp

View file

@ -14,6 +14,10 @@ namespace PICAInternalRegs {
// Framebuffer registers
AlphaTestConfig = 0x104,
DepthAndColorMask = 0x107,
ColourBufferFormat = 0x117,
DepthBufferLoc = 0x11C,
ColourBufferLoc = 0x11D,
FramebufferSize = 0x11E,
// Geometry pipeline registers
VertexAttribLoc = 0x200,

View file

@ -3,6 +3,7 @@
#include "helpers.hpp"
#include "logger.hpp"
#include "opengl.hpp"
#include "surface_cache.hpp"
struct Vertex {
OpenGL::vec4 position;
@ -21,6 +22,17 @@ class Renderer {
GLint alphaControlLoc = -1;
u32 oldAlphaControl = 0;
SurfaceCache<DepthBuffer, 10> depthBufferCache;
SurfaceCache<ColourBuffer, 10> colourBufferCache;
OpenGL::uvec2 fbSize; // The size of the framebuffer (ie both the colour and depth buffer)'
u32 colourBufferLoc; // Location in 3DS VRAM for the colour buffer
ColourBuffer::Formats colourBufferFormat; // Format of the colours stored in the colour buffer
// Same for the depth/stencil buffer
u32 depthBufferLoc;
DepthBuffer::Formats depthBufferFormat;
// Dummy VAO/VBO for blitting the final output
OpenGL::VertexArray dummyVAO;
OpenGL::VertexBuffer dummyVBO;
@ -28,6 +40,8 @@ class Renderer {
static constexpr u32 regNum = 0x300; // Number of internal PICA registers
const std::array<u32, regNum>& regs;
OpenGL::Framebuffer getColourFBO();
MAKE_LOG_FUNCTION(log, rendererLogger)
public:
@ -40,5 +54,19 @@ public:
void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control); // Clear a GPU buffer in VRAM
void drawVertices(OpenGL::Primitives primType, Vertex* vertices, u32 count); // Draw the given vertices
void setFBSize(u32 width, u32 height) {
fbSize.x() = width;
fbSize.y() = height;
}
void setColourFormat(ColourBuffer::Formats format) { colourBufferFormat = format; }
void setColourFormat(u32 format) { colourBufferFormat = static_cast<ColourBuffer::Formats>(format); }
void setDepthFormat(DepthBuffer::Formats format) { depthBufferFormat = format; }
void setDepthFormat(u32 format) { depthBufferFormat = static_cast<DepthBuffer::Formats>(format); }
void setColourBufferLoc(u32 loc) { colourBufferLoc = loc; }
void setDepthBufferLoc(u32 loc) { depthBufferLoc = loc; }
static constexpr u32 vertexBufferSize = 0x1500;
};

View file

@ -0,0 +1,62 @@
#pragma once
#include <functional>
#include <optional>
#include "surfaces.hpp"
// Surface cache class that can fit "capacity" instances of the "SurfaceType" class of surfaces
// SurfaceType *must* have all of the following
// - An "allocate" function that allocates GL resources for the surfaces
// - A "free" function that frees up all resources the surface is taking up
// - A "matches" function that, when provided with a SurfaceType object reference
// Will tell us if the 2 surfaces match (Only as far as location in VRAM, format, dimensions, etc)
// Are concerned. We could overload the == operator, but that implies full equality
// Including equality of the allocated OpenGL resources, which we don't want
// - A "valid" member that tells us whether the function is still valid or not
template <typename SurfaceType, size_t capacity>
class SurfaceCache {
// Vanilla std::optional can't hold actual references
using OptionalRef = std::optional<std::reference_wrapper<SurfaceType>>;
static_assert(std::is_same<SurfaceType, ColourBuffer>() || std::is_same<SurfaceType, DepthBuffer>(),
"Invalid surface type");
size_t size;
std::array<SurfaceType, capacity> buffer;
public:
void reset() {
size = 0;
for (auto& e : buffer) { // Free the VRAM of all surfaces
e.free();
}
}
OptionalRef find(SurfaceType& other) {
for (auto& e : buffer) {
if (e.matches(other) && e.valid)
return e;
}
return std::nullopt;
}
// Adds a surface object to the cache and returns it
SurfaceType add(SurfaceType& surface) {
if (size >= capacity) {
Helpers::panic("Surface cache full! Add emptying!");
}
size++;
// Find an invalid entry in the cache and overwrite it with the new surface
for (auto& e : buffer) {
if (!e.valid) {
e = surface;
e.allocate();
Sleep(2000);
return e;
}
}
// This should be unreachable but helps to panic anyways
Helpers::panic("Couldn't add surface to cache\n");
}
};

View file

@ -0,0 +1,125 @@
#pragma once
#include "boost/icl/interval.hpp"
#include "helpers.hpp"
#include "opengl.hpp"
template <typename T>
using Interval = boost::icl::right_open_interval<T>;
struct ColourBuffer {
enum class Formats : u32 {
RGBA8 = 0,
BGR8 = 1,
RGB5A1 = 2,
RGB565 = 3,
RGBA4 = 4,
Trash1 = 5, Trash2 = 6, Trash3 = 7 // Technically selectable, but their function is unknown
};
u32 location;
Formats format;
OpenGL::uvec2 size;
bool valid;
// Range of VRAM taken up by buffer
Interval<u32> range;
// OpenGL resources allocated to buffer
OpenGL::Texture texture;
OpenGL::Framebuffer fbo;
ColourBuffer() : valid(false) {}
ColourBuffer(u32 loc, Formats format, u32 x, u32 y, bool valid = true)
: location(loc), format(format), size({x, y}), valid(valid) {
u64 endLoc = (u64)loc + sizeInBytes();
// Check if start and end are valid here
range = Interval<u32>(loc, (u32)endLoc);
}
void allocate() {
printf("Make this colour buffer allocate itself\n");
}
void free() {
valid = false;
printf("Make this colour buffer free itself\n");
}
bool matches(ColourBuffer& other) {
return location == other.location && format == other.format &&
size.x() == other.size.x() && size.y() == other.size.y();
}
// Size occupied by each pixel in bytes
// All formats are 16BPP except for RGBA8 (32BPP) and BGR8 (24BPP)
size_t sizePerPixel() {
switch (format) {
case Formats::BGR8: return 3;
case Formats::RGBA8: return 4;
default: return 2;
}
}
size_t sizeInBytes() {
return (size_t)size.x() * (size_t)size.y() * sizePerPixel();
}
};
struct DepthBuffer {
enum class Formats : u32 {
Depth16 = 0,
Garbage = 1,
Depth24 = 2,
Depth24Stencil8 = 3
};
u32 location;
Formats format;
OpenGL::uvec2 size; // Implicitly set to the size of the framebuffer
bool valid;
// Range of VRAM taken up by buffer
Interval<u32> range;
// OpenGL texture used for storing depth/stencil
OpenGL::Texture texture;
DepthBuffer() : valid(false) {}
DepthBuffer(u32 loc, Formats format, u32 x, u32 y, bool valid = true) :
location(loc), format(format), size({x, y}), valid(valid) {}
bool hasStencil() {
return format == Formats::Depth24Stencil8;
}
void allocate() {
printf("Make this depth buffer allocate itself\n");
}
void free() {
valid = false;
printf("Make this depth buffer free itself\n");
}
bool matches(DepthBuffer& other) {
return location == other.location && format == other.format &&
size.x() == other.size.x() && size.y() == other.size.y();
}
// Size occupied by each pixel in bytes
size_t sizePerPixel() {
switch (format) {
case Formats::Depth16: return 2;
case Formats::Depth24: return 3;
case Formats::Depth24Stencil8: return 4;
default: return 1; // Invalid format
}
}
size_t sizeInBytes() {
return (size_t)size.x() * (size_t)size.y() * sizePerPixel();
}
};

View file

@ -59,6 +59,31 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
fixedAttribMask = (value >> 16) & 0xfff; // Determines which vertex attributes are fixed for all vertices
break;
case ColourBufferLoc: {
u32 loc = (value & 0x0fffffff) << 3;
renderer.setColourBufferLoc(loc);
break;
};
case ColourBufferFormat: {
u32 format = (value >> 16) & 7;
renderer.setColourFormat(format);
break;
}
case DepthBufferLoc: {
u32 loc = (value & 0x0fffffff) << 3;
renderer.setDepthBufferLoc(loc);
break;
}
case FramebufferSize: {
const u32 width = value & 0x7ff;
const u32 height = ((value >> 12) & 0x3ff) + 1;
renderer.setFBSize(width, height);
break;
}
case VertexFloatUniformIndex:
shaderUnit.vs.setFloatUniformIndex(value);
break;

View file

@ -105,6 +105,18 @@ const char* displayFragmentShader = R"(
}
)";
void Renderer::reset() {
depthBufferCache.reset();
colourBufferCache.reset();
// Init the colour/depth buffer settings to some random defaults on reset
colourBufferLoc = 0;
colourBufferFormat = ColourBuffer::Formats::RGBA8;
depthBufferLoc = 0;
depthBufferFormat = DepthBuffer::Formats::Depth16;
}
void Renderer::initGraphicsContext() {
// Set up texture for top screen
fboTexture.create(400, 240, GL_RGBA8);
@ -156,6 +168,7 @@ void Renderer::initGraphicsContext() {
dummyVBO.create();
dummyVAO.create();
reset();
}
void Renderer::getGraphicsContext() {
@ -168,6 +181,8 @@ void Renderer::getGraphicsContext() {
triangleProgram.use();
}
OpenGL::Framebuffer poop;
void Renderer::drawVertices(OpenGL::Primitives primType, Vertex* vertices, u32 count) {
// Adjust alpha test if necessary
const u32 alphaControl = regs[PICAInternalRegs::AlphaTestConfig];
@ -176,6 +191,8 @@ void Renderer::drawVertices(OpenGL::Primitives primType, Vertex* vertices, u32 c
glUniform1ui(alphaControlLoc, alphaControl);
}
poop = getColourFBO();
const u32 depthControl = regs[PICAInternalRegs::DepthAndColorMask];
bool depthEnable = depthControl & 1;
bool depthWriteEnable = (depthControl >> 12) & 1;
@ -252,4 +269,17 @@ void Renderer::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 cont
OpenGL::setClearColor(r, g, b, a);
OpenGL::clearColor();
}
OpenGL::Framebuffer Renderer::getColourFBO() {
//We construct a colour buffer object and see if our cache has any matching colour buffers in it
// If not, we allocate a texture & FBO for our framebuffer and store it in the cache
ColourBuffer sampleBuffer(colourBufferLoc, colourBufferFormat, fbSize.x(), fbSize.y());
auto buffer = colourBufferCache.find(sampleBuffer);
if (buffer.has_value()) {
return buffer.value().get().fbo;
} else {
return colourBufferCache.add(sampleBuffer).fbo;
}
}