[GPU] Surface cache vol 1

This commit is contained in:
wheremyfoodat 2023-01-02 15:01:17 +02:00
parent 57ef4e25e7
commit ecbb33b906
7 changed files with 276 additions and 0 deletions

View file

@ -14,6 +14,10 @@ namespace PICAInternalRegs {
// Framebuffer registers
AlphaTestConfig = 0x104,
DepthAndColorMask = 0x107,
ColourBufferFormat = 0x117,
DepthBufferLoc = 0x11C,
ColourBufferLoc = 0x11D,
FramebufferSize = 0x11E,
// Geometry pipeline registers
VertexAttribLoc = 0x200,

View file

@ -3,6 +3,7 @@
#include "helpers.hpp"
#include "logger.hpp"
#include "opengl.hpp"
#include "surface_cache.hpp"
struct Vertex {
OpenGL::vec4 position;
@ -21,6 +22,17 @@ class Renderer {
GLint alphaControlLoc = -1;
u32 oldAlphaControl = 0;
SurfaceCache<DepthBuffer, 10> depthBufferCache;
SurfaceCache<ColourBuffer, 10> colourBufferCache;
OpenGL::uvec2 fbSize; // The size of the framebuffer (ie both the colour and depth buffer)'
u32 colourBufferLoc; // Location in 3DS VRAM for the colour buffer
ColourBuffer::Formats colourBufferFormat; // Format of the colours stored in the colour buffer
// Same for the depth/stencil buffer
u32 depthBufferLoc;
DepthBuffer::Formats depthBufferFormat;
// Dummy VAO/VBO for blitting the final output
OpenGL::VertexArray dummyVAO;
OpenGL::VertexBuffer dummyVBO;
@ -28,6 +40,8 @@ class Renderer {
static constexpr u32 regNum = 0x300; // Number of internal PICA registers
const std::array<u32, regNum>& regs;
OpenGL::Framebuffer getColourFBO();
MAKE_LOG_FUNCTION(log, rendererLogger)
public:
@ -40,5 +54,19 @@ public:
void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control); // Clear a GPU buffer in VRAM
void drawVertices(OpenGL::Primitives primType, Vertex* vertices, u32 count); // Draw the given vertices
void setFBSize(u32 width, u32 height) {
fbSize.x() = width;
fbSize.y() = height;
}
void setColourFormat(ColourBuffer::Formats format) { colourBufferFormat = format; }
void setColourFormat(u32 format) { colourBufferFormat = static_cast<ColourBuffer::Formats>(format); }
void setDepthFormat(DepthBuffer::Formats format) { depthBufferFormat = format; }
void setDepthFormat(u32 format) { depthBufferFormat = static_cast<DepthBuffer::Formats>(format); }
void setColourBufferLoc(u32 loc) { colourBufferLoc = loc; }
void setDepthBufferLoc(u32 loc) { depthBufferLoc = loc; }
static constexpr u32 vertexBufferSize = 0x1500;
};

View file

@ -0,0 +1,62 @@
#pragma once
#include <functional>
#include <optional>
#include "surfaces.hpp"
// Surface cache class that can fit "capacity" instances of the "SurfaceType" class of surfaces
// SurfaceType *must* have all of the following
// - An "allocate" function that allocates GL resources for the surfaces
// - A "free" function that frees up all resources the surface is taking up
// - A "matches" function that, when provided with a SurfaceType object reference
// Will tell us if the 2 surfaces match (Only as far as location in VRAM, format, dimensions, etc)
// Are concerned. We could overload the == operator, but that implies full equality
// Including equality of the allocated OpenGL resources, which we don't want
// - A "valid" member that tells us whether the function is still valid or not
template <typename SurfaceType, size_t capacity>
class SurfaceCache {
// Vanilla std::optional can't hold actual references
using OptionalRef = std::optional<std::reference_wrapper<SurfaceType>>;
static_assert(std::is_same<SurfaceType, ColourBuffer>() || std::is_same<SurfaceType, DepthBuffer>(),
"Invalid surface type");
size_t size;
std::array<SurfaceType, capacity> buffer;
public:
void reset() {
size = 0;
for (auto& e : buffer) { // Free the VRAM of all surfaces
e.free();
}
}
OptionalRef find(SurfaceType& other) {
for (auto& e : buffer) {
if (e.matches(other) && e.valid)
return e;
}
return std::nullopt;
}
// Adds a surface object to the cache and returns it
SurfaceType add(SurfaceType& surface) {
if (size >= capacity) {
Helpers::panic("Surface cache full! Add emptying!");
}
size++;
// Find an invalid entry in the cache and overwrite it with the new surface
for (auto& e : buffer) {
if (!e.valid) {
e = surface;
e.allocate();
Sleep(2000);
return e;
}
}
// This should be unreachable but helps to panic anyways
Helpers::panic("Couldn't add surface to cache\n");
}
};

View file

@ -0,0 +1,125 @@
#pragma once
#include "boost/icl/interval.hpp"
#include "helpers.hpp"
#include "opengl.hpp"
template <typename T>
using Interval = boost::icl::right_open_interval<T>;
struct ColourBuffer {
enum class Formats : u32 {
RGBA8 = 0,
BGR8 = 1,
RGB5A1 = 2,
RGB565 = 3,
RGBA4 = 4,
Trash1 = 5, Trash2 = 6, Trash3 = 7 // Technically selectable, but their function is unknown
};
u32 location;
Formats format;
OpenGL::uvec2 size;
bool valid;
// Range of VRAM taken up by buffer
Interval<u32> range;
// OpenGL resources allocated to buffer
OpenGL::Texture texture;
OpenGL::Framebuffer fbo;
ColourBuffer() : valid(false) {}
ColourBuffer(u32 loc, Formats format, u32 x, u32 y, bool valid = true)
: location(loc), format(format), size({x, y}), valid(valid) {
u64 endLoc = (u64)loc + sizeInBytes();
// Check if start and end are valid here
range = Interval<u32>(loc, (u32)endLoc);
}
void allocate() {
printf("Make this colour buffer allocate itself\n");
}
void free() {
valid = false;
printf("Make this colour buffer free itself\n");
}
bool matches(ColourBuffer& other) {
return location == other.location && format == other.format &&
size.x() == other.size.x() && size.y() == other.size.y();
}
// Size occupied by each pixel in bytes
// All formats are 16BPP except for RGBA8 (32BPP) and BGR8 (24BPP)
size_t sizePerPixel() {
switch (format) {
case Formats::BGR8: return 3;
case Formats::RGBA8: return 4;
default: return 2;
}
}
size_t sizeInBytes() {
return (size_t)size.x() * (size_t)size.y() * sizePerPixel();
}
};
struct DepthBuffer {
enum class Formats : u32 {
Depth16 = 0,
Garbage = 1,
Depth24 = 2,
Depth24Stencil8 = 3
};
u32 location;
Formats format;
OpenGL::uvec2 size; // Implicitly set to the size of the framebuffer
bool valid;
// Range of VRAM taken up by buffer
Interval<u32> range;
// OpenGL texture used for storing depth/stencil
OpenGL::Texture texture;
DepthBuffer() : valid(false) {}
DepthBuffer(u32 loc, Formats format, u32 x, u32 y, bool valid = true) :
location(loc), format(format), size({x, y}), valid(valid) {}
bool hasStencil() {
return format == Formats::Depth24Stencil8;
}
void allocate() {
printf("Make this depth buffer allocate itself\n");
}
void free() {
valid = false;
printf("Make this depth buffer free itself\n");
}
bool matches(DepthBuffer& other) {
return location == other.location && format == other.format &&
size.x() == other.size.x() && size.y() == other.size.y();
}
// Size occupied by each pixel in bytes
size_t sizePerPixel() {
switch (format) {
case Formats::Depth16: return 2;
case Formats::Depth24: return 3;
case Formats::Depth24Stencil8: return 4;
default: return 1; // Invalid format
}
}
size_t sizeInBytes() {
return (size_t)size.x() * (size_t)size.y() * sizePerPixel();
}
};