mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-06 22:25:41 +12:00
[GPU] Surface cache vol 1
This commit is contained in:
parent
57ef4e25e7
commit
ecbb33b906
7 changed files with 276 additions and 0 deletions
|
@ -13,6 +13,7 @@ set (CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
|
|||
include_directories(${PROJECT_SOURCE_DIR}/include/)
|
||||
include_directories(${PROJECT_SOURCE_DIR}/include/kernel)
|
||||
include_directories (${FMT_INCLUDE_DIR})
|
||||
include_directories(third_party/boost/)
|
||||
include_directories(third_party/elfio/)
|
||||
include_directories(third_party/gl3w/)
|
||||
include_directories(third_party/imgui/)
|
||||
|
@ -79,6 +80,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/opengl.hpp inc
|
|||
include/services/dsp.hpp include/services/cfg.hpp include/services/region_codes.hpp
|
||||
include/fs/archive_save_data.hpp include/fs/archive_sdmc.hpp include/services/ptm.hpp
|
||||
include/services/mic.hpp include/services/cecd.hpp include/renderer_gl/renderer_gl.hpp
|
||||
include/renderer_gl/surfaces.hpp include/renderer_gl/surface_cache.hpp
|
||||
)
|
||||
|
||||
set(THIRD_PARTY_SOURCE_FILES third_party/imgui/imgui.cpp
|
||||
|
|
|
@ -14,6 +14,10 @@ namespace PICAInternalRegs {
|
|||
// Framebuffer registers
|
||||
AlphaTestConfig = 0x104,
|
||||
DepthAndColorMask = 0x107,
|
||||
ColourBufferFormat = 0x117,
|
||||
DepthBufferLoc = 0x11C,
|
||||
ColourBufferLoc = 0x11D,
|
||||
FramebufferSize = 0x11E,
|
||||
|
||||
// Geometry pipeline registers
|
||||
VertexAttribLoc = 0x200,
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
#include "helpers.hpp"
|
||||
#include "logger.hpp"
|
||||
#include "opengl.hpp"
|
||||
#include "surface_cache.hpp"
|
||||
|
||||
struct Vertex {
|
||||
OpenGL::vec4 position;
|
||||
|
@ -21,6 +22,17 @@ class Renderer {
|
|||
GLint alphaControlLoc = -1;
|
||||
u32 oldAlphaControl = 0;
|
||||
|
||||
SurfaceCache<DepthBuffer, 10> depthBufferCache;
|
||||
SurfaceCache<ColourBuffer, 10> colourBufferCache;
|
||||
OpenGL::uvec2 fbSize; // The size of the framebuffer (ie both the colour and depth buffer)'
|
||||
|
||||
u32 colourBufferLoc; // Location in 3DS VRAM for the colour buffer
|
||||
ColourBuffer::Formats colourBufferFormat; // Format of the colours stored in the colour buffer
|
||||
|
||||
// Same for the depth/stencil buffer
|
||||
u32 depthBufferLoc;
|
||||
DepthBuffer::Formats depthBufferFormat;
|
||||
|
||||
// Dummy VAO/VBO for blitting the final output
|
||||
OpenGL::VertexArray dummyVAO;
|
||||
OpenGL::VertexBuffer dummyVBO;
|
||||
|
@ -28,6 +40,8 @@ class Renderer {
|
|||
static constexpr u32 regNum = 0x300; // Number of internal PICA registers
|
||||
const std::array<u32, regNum>& regs;
|
||||
|
||||
OpenGL::Framebuffer getColourFBO();
|
||||
|
||||
MAKE_LOG_FUNCTION(log, rendererLogger)
|
||||
|
||||
public:
|
||||
|
@ -40,5 +54,19 @@ public:
|
|||
void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control); // Clear a GPU buffer in VRAM
|
||||
void drawVertices(OpenGL::Primitives primType, Vertex* vertices, u32 count); // Draw the given vertices
|
||||
|
||||
void setFBSize(u32 width, u32 height) {
|
||||
fbSize.x() = width;
|
||||
fbSize.y() = height;
|
||||
}
|
||||
|
||||
void setColourFormat(ColourBuffer::Formats format) { colourBufferFormat = format; }
|
||||
void setColourFormat(u32 format) { colourBufferFormat = static_cast<ColourBuffer::Formats>(format); }
|
||||
|
||||
void setDepthFormat(DepthBuffer::Formats format) { depthBufferFormat = format; }
|
||||
void setDepthFormat(u32 format) { depthBufferFormat = static_cast<DepthBuffer::Formats>(format); }
|
||||
|
||||
void setColourBufferLoc(u32 loc) { colourBufferLoc = loc; }
|
||||
void setDepthBufferLoc(u32 loc) { depthBufferLoc = loc; }
|
||||
|
||||
static constexpr u32 vertexBufferSize = 0x1500;
|
||||
};
|
62
include/renderer_gl/surface_cache.hpp
Normal file
62
include/renderer_gl/surface_cache.hpp
Normal file
|
@ -0,0 +1,62 @@
|
|||
#pragma once
|
||||
#include <functional>
|
||||
#include <optional>
|
||||
#include "surfaces.hpp"
|
||||
|
||||
// Surface cache class that can fit "capacity" instances of the "SurfaceType" class of surfaces
|
||||
// SurfaceType *must* have all of the following
|
||||
// - An "allocate" function that allocates GL resources for the surfaces
|
||||
// - A "free" function that frees up all resources the surface is taking up
|
||||
// - A "matches" function that, when provided with a SurfaceType object reference
|
||||
// Will tell us if the 2 surfaces match (Only as far as location in VRAM, format, dimensions, etc)
|
||||
// Are concerned. We could overload the == operator, but that implies full equality
|
||||
// Including equality of the allocated OpenGL resources, which we don't want
|
||||
// - A "valid" member that tells us whether the function is still valid or not
|
||||
template <typename SurfaceType, size_t capacity>
|
||||
class SurfaceCache {
|
||||
// Vanilla std::optional can't hold actual references
|
||||
using OptionalRef = std::optional<std::reference_wrapper<SurfaceType>>;
|
||||
static_assert(std::is_same<SurfaceType, ColourBuffer>() || std::is_same<SurfaceType, DepthBuffer>(),
|
||||
"Invalid surface type");
|
||||
|
||||
size_t size;
|
||||
std::array<SurfaceType, capacity> buffer;
|
||||
|
||||
public:
|
||||
void reset() {
|
||||
size = 0;
|
||||
for (auto& e : buffer) { // Free the VRAM of all surfaces
|
||||
e.free();
|
||||
}
|
||||
}
|
||||
|
||||
OptionalRef find(SurfaceType& other) {
|
||||
for (auto& e : buffer) {
|
||||
if (e.matches(other) && e.valid)
|
||||
return e;
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
// Adds a surface object to the cache and returns it
|
||||
SurfaceType add(SurfaceType& surface) {
|
||||
if (size >= capacity) {
|
||||
Helpers::panic("Surface cache full! Add emptying!");
|
||||
}
|
||||
size++;
|
||||
|
||||
// Find an invalid entry in the cache and overwrite it with the new surface
|
||||
for (auto& e : buffer) {
|
||||
if (!e.valid) {
|
||||
e = surface;
|
||||
e.allocate();
|
||||
Sleep(2000);
|
||||
return e;
|
||||
}
|
||||
}
|
||||
|
||||
// This should be unreachable but helps to panic anyways
|
||||
Helpers::panic("Couldn't add surface to cache\n");
|
||||
}
|
||||
};
|
125
include/renderer_gl/surfaces.hpp
Normal file
125
include/renderer_gl/surfaces.hpp
Normal file
|
@ -0,0 +1,125 @@
|
|||
#pragma once
|
||||
#include "boost/icl/interval.hpp"
|
||||
#include "helpers.hpp"
|
||||
#include "opengl.hpp"
|
||||
|
||||
template <typename T>
|
||||
using Interval = boost::icl::right_open_interval<T>;
|
||||
|
||||
struct ColourBuffer {
|
||||
enum class Formats : u32 {
|
||||
RGBA8 = 0,
|
||||
BGR8 = 1,
|
||||
RGB5A1 = 2,
|
||||
RGB565 = 3,
|
||||
RGBA4 = 4,
|
||||
|
||||
Trash1 = 5, Trash2 = 6, Trash3 = 7 // Technically selectable, but their function is unknown
|
||||
};
|
||||
|
||||
u32 location;
|
||||
Formats format;
|
||||
OpenGL::uvec2 size;
|
||||
bool valid;
|
||||
|
||||
// Range of VRAM taken up by buffer
|
||||
Interval<u32> range;
|
||||
// OpenGL resources allocated to buffer
|
||||
OpenGL::Texture texture;
|
||||
OpenGL::Framebuffer fbo;
|
||||
|
||||
ColourBuffer() : valid(false) {}
|
||||
|
||||
ColourBuffer(u32 loc, Formats format, u32 x, u32 y, bool valid = true)
|
||||
: location(loc), format(format), size({x, y}), valid(valid) {
|
||||
|
||||
u64 endLoc = (u64)loc + sizeInBytes();
|
||||
// Check if start and end are valid here
|
||||
range = Interval<u32>(loc, (u32)endLoc);
|
||||
}
|
||||
|
||||
void allocate() {
|
||||
printf("Make this colour buffer allocate itself\n");
|
||||
}
|
||||
|
||||
void free() {
|
||||
valid = false;
|
||||
printf("Make this colour buffer free itself\n");
|
||||
}
|
||||
|
||||
bool matches(ColourBuffer& other) {
|
||||
return location == other.location && format == other.format &&
|
||||
size.x() == other.size.x() && size.y() == other.size.y();
|
||||
}
|
||||
|
||||
// Size occupied by each pixel in bytes
|
||||
// All formats are 16BPP except for RGBA8 (32BPP) and BGR8 (24BPP)
|
||||
size_t sizePerPixel() {
|
||||
switch (format) {
|
||||
case Formats::BGR8: return 3;
|
||||
case Formats::RGBA8: return 4;
|
||||
default: return 2;
|
||||
}
|
||||
}
|
||||
|
||||
size_t sizeInBytes() {
|
||||
return (size_t)size.x() * (size_t)size.y() * sizePerPixel();
|
||||
}
|
||||
};
|
||||
|
||||
struct DepthBuffer {
|
||||
enum class Formats : u32 {
|
||||
Depth16 = 0,
|
||||
Garbage = 1,
|
||||
Depth24 = 2,
|
||||
Depth24Stencil8 = 3
|
||||
};
|
||||
|
||||
u32 location;
|
||||
Formats format;
|
||||
OpenGL::uvec2 size; // Implicitly set to the size of the framebuffer
|
||||
bool valid;
|
||||
|
||||
// Range of VRAM taken up by buffer
|
||||
Interval<u32> range;
|
||||
// OpenGL texture used for storing depth/stencil
|
||||
OpenGL::Texture texture;
|
||||
|
||||
DepthBuffer() : valid(false) {}
|
||||
|
||||
DepthBuffer(u32 loc, Formats format, u32 x, u32 y, bool valid = true) :
|
||||
location(loc), format(format), size({x, y}), valid(valid) {}
|
||||
|
||||
bool hasStencil() {
|
||||
return format == Formats::Depth24Stencil8;
|
||||
}
|
||||
|
||||
void allocate() {
|
||||
printf("Make this depth buffer allocate itself\n");
|
||||
}
|
||||
|
||||
void free() {
|
||||
valid = false;
|
||||
printf("Make this depth buffer free itself\n");
|
||||
}
|
||||
|
||||
bool matches(DepthBuffer& other) {
|
||||
return location == other.location && format == other.format &&
|
||||
size.x() == other.size.x() && size.y() == other.size.y();
|
||||
}
|
||||
|
||||
// Size occupied by each pixel in bytes
|
||||
size_t sizePerPixel() {
|
||||
switch (format) {
|
||||
case Formats::Depth16: return 2;
|
||||
case Formats::Depth24: return 3;
|
||||
case Formats::Depth24Stencil8: return 4;
|
||||
|
||||
default: return 1; // Invalid format
|
||||
}
|
||||
}
|
||||
|
||||
size_t sizeInBytes() {
|
||||
return (size_t)size.x() * (size_t)size.y() * sizePerPixel();
|
||||
}
|
||||
};
|
|
@ -59,6 +59,31 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
|||
fixedAttribMask = (value >> 16) & 0xfff; // Determines which vertex attributes are fixed for all vertices
|
||||
break;
|
||||
|
||||
case ColourBufferLoc: {
|
||||
u32 loc = (value & 0x0fffffff) << 3;
|
||||
renderer.setColourBufferLoc(loc);
|
||||
break;
|
||||
};
|
||||
|
||||
case ColourBufferFormat: {
|
||||
u32 format = (value >> 16) & 7;
|
||||
renderer.setColourFormat(format);
|
||||
break;
|
||||
}
|
||||
|
||||
case DepthBufferLoc: {
|
||||
u32 loc = (value & 0x0fffffff) << 3;
|
||||
renderer.setDepthBufferLoc(loc);
|
||||
break;
|
||||
}
|
||||
|
||||
case FramebufferSize: {
|
||||
const u32 width = value & 0x7ff;
|
||||
const u32 height = ((value >> 12) & 0x3ff) + 1;
|
||||
renderer.setFBSize(width, height);
|
||||
break;
|
||||
}
|
||||
|
||||
case VertexFloatUniformIndex:
|
||||
shaderUnit.vs.setFloatUniformIndex(value);
|
||||
break;
|
||||
|
|
|
@ -105,6 +105,18 @@ const char* displayFragmentShader = R"(
|
|||
}
|
||||
)";
|
||||
|
||||
void Renderer::reset() {
|
||||
depthBufferCache.reset();
|
||||
colourBufferCache.reset();
|
||||
|
||||
// Init the colour/depth buffer settings to some random defaults on reset
|
||||
colourBufferLoc = 0;
|
||||
colourBufferFormat = ColourBuffer::Formats::RGBA8;
|
||||
|
||||
depthBufferLoc = 0;
|
||||
depthBufferFormat = DepthBuffer::Formats::Depth16;
|
||||
}
|
||||
|
||||
void Renderer::initGraphicsContext() {
|
||||
// Set up texture for top screen
|
||||
fboTexture.create(400, 240, GL_RGBA8);
|
||||
|
@ -156,6 +168,7 @@ void Renderer::initGraphicsContext() {
|
|||
|
||||
dummyVBO.create();
|
||||
dummyVAO.create();
|
||||
reset();
|
||||
}
|
||||
|
||||
void Renderer::getGraphicsContext() {
|
||||
|
@ -168,6 +181,8 @@ void Renderer::getGraphicsContext() {
|
|||
triangleProgram.use();
|
||||
}
|
||||
|
||||
OpenGL::Framebuffer poop;
|
||||
|
||||
void Renderer::drawVertices(OpenGL::Primitives primType, Vertex* vertices, u32 count) {
|
||||
// Adjust alpha test if necessary
|
||||
const u32 alphaControl = regs[PICAInternalRegs::AlphaTestConfig];
|
||||
|
@ -176,6 +191,8 @@ void Renderer::drawVertices(OpenGL::Primitives primType, Vertex* vertices, u32 c
|
|||
glUniform1ui(alphaControlLoc, alphaControl);
|
||||
}
|
||||
|
||||
poop = getColourFBO();
|
||||
|
||||
const u32 depthControl = regs[PICAInternalRegs::DepthAndColorMask];
|
||||
bool depthEnable = depthControl & 1;
|
||||
bool depthWriteEnable = (depthControl >> 12) & 1;
|
||||
|
@ -252,4 +269,17 @@ void Renderer::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 cont
|
|||
|
||||
OpenGL::setClearColor(r, g, b, a);
|
||||
OpenGL::clearColor();
|
||||
}
|
||||
|
||||
OpenGL::Framebuffer Renderer::getColourFBO() {
|
||||
//We construct a colour buffer object and see if our cache has any matching colour buffers in it
|
||||
// If not, we allocate a texture & FBO for our framebuffer and store it in the cache
|
||||
ColourBuffer sampleBuffer(colourBufferLoc, colourBufferFormat, fbSize.x(), fbSize.y());
|
||||
auto buffer = colourBufferCache.find(sampleBuffer);
|
||||
|
||||
if (buffer.has_value()) {
|
||||
return buffer.value().get().fbo;
|
||||
} else {
|
||||
return colourBufferCache.add(sampleBuffer).fbo;
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue