mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-17 19:21:30 +12:00
[GPU] Surface cache vol 1
This commit is contained in:
parent
57ef4e25e7
commit
ecbb33b906
7 changed files with 276 additions and 0 deletions
|
@ -13,6 +13,7 @@ set (CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
|
||||||
include_directories(${PROJECT_SOURCE_DIR}/include/)
|
include_directories(${PROJECT_SOURCE_DIR}/include/)
|
||||||
include_directories(${PROJECT_SOURCE_DIR}/include/kernel)
|
include_directories(${PROJECT_SOURCE_DIR}/include/kernel)
|
||||||
include_directories (${FMT_INCLUDE_DIR})
|
include_directories (${FMT_INCLUDE_DIR})
|
||||||
|
include_directories(third_party/boost/)
|
||||||
include_directories(third_party/elfio/)
|
include_directories(third_party/elfio/)
|
||||||
include_directories(third_party/gl3w/)
|
include_directories(third_party/gl3w/)
|
||||||
include_directories(third_party/imgui/)
|
include_directories(third_party/imgui/)
|
||||||
|
@ -79,6 +80,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/opengl.hpp inc
|
||||||
include/services/dsp.hpp include/services/cfg.hpp include/services/region_codes.hpp
|
include/services/dsp.hpp include/services/cfg.hpp include/services/region_codes.hpp
|
||||||
include/fs/archive_save_data.hpp include/fs/archive_sdmc.hpp include/services/ptm.hpp
|
include/fs/archive_save_data.hpp include/fs/archive_sdmc.hpp include/services/ptm.hpp
|
||||||
include/services/mic.hpp include/services/cecd.hpp include/renderer_gl/renderer_gl.hpp
|
include/services/mic.hpp include/services/cecd.hpp include/renderer_gl/renderer_gl.hpp
|
||||||
|
include/renderer_gl/surfaces.hpp include/renderer_gl/surface_cache.hpp
|
||||||
)
|
)
|
||||||
|
|
||||||
set(THIRD_PARTY_SOURCE_FILES third_party/imgui/imgui.cpp
|
set(THIRD_PARTY_SOURCE_FILES third_party/imgui/imgui.cpp
|
||||||
|
|
|
@ -14,6 +14,10 @@ namespace PICAInternalRegs {
|
||||||
// Framebuffer registers
|
// Framebuffer registers
|
||||||
AlphaTestConfig = 0x104,
|
AlphaTestConfig = 0x104,
|
||||||
DepthAndColorMask = 0x107,
|
DepthAndColorMask = 0x107,
|
||||||
|
ColourBufferFormat = 0x117,
|
||||||
|
DepthBufferLoc = 0x11C,
|
||||||
|
ColourBufferLoc = 0x11D,
|
||||||
|
FramebufferSize = 0x11E,
|
||||||
|
|
||||||
// Geometry pipeline registers
|
// Geometry pipeline registers
|
||||||
VertexAttribLoc = 0x200,
|
VertexAttribLoc = 0x200,
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
#include "helpers.hpp"
|
#include "helpers.hpp"
|
||||||
#include "logger.hpp"
|
#include "logger.hpp"
|
||||||
#include "opengl.hpp"
|
#include "opengl.hpp"
|
||||||
|
#include "surface_cache.hpp"
|
||||||
|
|
||||||
struct Vertex {
|
struct Vertex {
|
||||||
OpenGL::vec4 position;
|
OpenGL::vec4 position;
|
||||||
|
@ -21,6 +22,17 @@ class Renderer {
|
||||||
GLint alphaControlLoc = -1;
|
GLint alphaControlLoc = -1;
|
||||||
u32 oldAlphaControl = 0;
|
u32 oldAlphaControl = 0;
|
||||||
|
|
||||||
|
SurfaceCache<DepthBuffer, 10> depthBufferCache;
|
||||||
|
SurfaceCache<ColourBuffer, 10> colourBufferCache;
|
||||||
|
OpenGL::uvec2 fbSize; // The size of the framebuffer (ie both the colour and depth buffer)'
|
||||||
|
|
||||||
|
u32 colourBufferLoc; // Location in 3DS VRAM for the colour buffer
|
||||||
|
ColourBuffer::Formats colourBufferFormat; // Format of the colours stored in the colour buffer
|
||||||
|
|
||||||
|
// Same for the depth/stencil buffer
|
||||||
|
u32 depthBufferLoc;
|
||||||
|
DepthBuffer::Formats depthBufferFormat;
|
||||||
|
|
||||||
// Dummy VAO/VBO for blitting the final output
|
// Dummy VAO/VBO for blitting the final output
|
||||||
OpenGL::VertexArray dummyVAO;
|
OpenGL::VertexArray dummyVAO;
|
||||||
OpenGL::VertexBuffer dummyVBO;
|
OpenGL::VertexBuffer dummyVBO;
|
||||||
|
@ -28,6 +40,8 @@ class Renderer {
|
||||||
static constexpr u32 regNum = 0x300; // Number of internal PICA registers
|
static constexpr u32 regNum = 0x300; // Number of internal PICA registers
|
||||||
const std::array<u32, regNum>& regs;
|
const std::array<u32, regNum>& regs;
|
||||||
|
|
||||||
|
OpenGL::Framebuffer getColourFBO();
|
||||||
|
|
||||||
MAKE_LOG_FUNCTION(log, rendererLogger)
|
MAKE_LOG_FUNCTION(log, rendererLogger)
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
@ -40,5 +54,19 @@ public:
|
||||||
void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control); // Clear a GPU buffer in VRAM
|
void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control); // Clear a GPU buffer in VRAM
|
||||||
void drawVertices(OpenGL::Primitives primType, Vertex* vertices, u32 count); // Draw the given vertices
|
void drawVertices(OpenGL::Primitives primType, Vertex* vertices, u32 count); // Draw the given vertices
|
||||||
|
|
||||||
|
void setFBSize(u32 width, u32 height) {
|
||||||
|
fbSize.x() = width;
|
||||||
|
fbSize.y() = height;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setColourFormat(ColourBuffer::Formats format) { colourBufferFormat = format; }
|
||||||
|
void setColourFormat(u32 format) { colourBufferFormat = static_cast<ColourBuffer::Formats>(format); }
|
||||||
|
|
||||||
|
void setDepthFormat(DepthBuffer::Formats format) { depthBufferFormat = format; }
|
||||||
|
void setDepthFormat(u32 format) { depthBufferFormat = static_cast<DepthBuffer::Formats>(format); }
|
||||||
|
|
||||||
|
void setColourBufferLoc(u32 loc) { colourBufferLoc = loc; }
|
||||||
|
void setDepthBufferLoc(u32 loc) { depthBufferLoc = loc; }
|
||||||
|
|
||||||
static constexpr u32 vertexBufferSize = 0x1500;
|
static constexpr u32 vertexBufferSize = 0x1500;
|
||||||
};
|
};
|
62
include/renderer_gl/surface_cache.hpp
Normal file
62
include/renderer_gl/surface_cache.hpp
Normal file
|
@ -0,0 +1,62 @@
|
||||||
|
#pragma once
|
||||||
|
#include <functional>
|
||||||
|
#include <optional>
|
||||||
|
#include "surfaces.hpp"
|
||||||
|
|
||||||
|
// Surface cache class that can fit "capacity" instances of the "SurfaceType" class of surfaces
|
||||||
|
// SurfaceType *must* have all of the following
|
||||||
|
// - An "allocate" function that allocates GL resources for the surfaces
|
||||||
|
// - A "free" function that frees up all resources the surface is taking up
|
||||||
|
// - A "matches" function that, when provided with a SurfaceType object reference
|
||||||
|
// Will tell us if the 2 surfaces match (Only as far as location in VRAM, format, dimensions, etc)
|
||||||
|
// Are concerned. We could overload the == operator, but that implies full equality
|
||||||
|
// Including equality of the allocated OpenGL resources, which we don't want
|
||||||
|
// - A "valid" member that tells us whether the function is still valid or not
|
||||||
|
template <typename SurfaceType, size_t capacity>
|
||||||
|
class SurfaceCache {
|
||||||
|
// Vanilla std::optional can't hold actual references
|
||||||
|
using OptionalRef = std::optional<std::reference_wrapper<SurfaceType>>;
|
||||||
|
static_assert(std::is_same<SurfaceType, ColourBuffer>() || std::is_same<SurfaceType, DepthBuffer>(),
|
||||||
|
"Invalid surface type");
|
||||||
|
|
||||||
|
size_t size;
|
||||||
|
std::array<SurfaceType, capacity> buffer;
|
||||||
|
|
||||||
|
public:
|
||||||
|
void reset() {
|
||||||
|
size = 0;
|
||||||
|
for (auto& e : buffer) { // Free the VRAM of all surfaces
|
||||||
|
e.free();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
OptionalRef find(SurfaceType& other) {
|
||||||
|
for (auto& e : buffer) {
|
||||||
|
if (e.matches(other) && e.valid)
|
||||||
|
return e;
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Adds a surface object to the cache and returns it
|
||||||
|
SurfaceType add(SurfaceType& surface) {
|
||||||
|
if (size >= capacity) {
|
||||||
|
Helpers::panic("Surface cache full! Add emptying!");
|
||||||
|
}
|
||||||
|
size++;
|
||||||
|
|
||||||
|
// Find an invalid entry in the cache and overwrite it with the new surface
|
||||||
|
for (auto& e : buffer) {
|
||||||
|
if (!e.valid) {
|
||||||
|
e = surface;
|
||||||
|
e.allocate();
|
||||||
|
Sleep(2000);
|
||||||
|
return e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// This should be unreachable but helps to panic anyways
|
||||||
|
Helpers::panic("Couldn't add surface to cache\n");
|
||||||
|
}
|
||||||
|
};
|
125
include/renderer_gl/surfaces.hpp
Normal file
125
include/renderer_gl/surfaces.hpp
Normal file
|
@ -0,0 +1,125 @@
|
||||||
|
#pragma once
|
||||||
|
#include "boost/icl/interval.hpp"
|
||||||
|
#include "helpers.hpp"
|
||||||
|
#include "opengl.hpp"
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
using Interval = boost::icl::right_open_interval<T>;
|
||||||
|
|
||||||
|
struct ColourBuffer {
|
||||||
|
enum class Formats : u32 {
|
||||||
|
RGBA8 = 0,
|
||||||
|
BGR8 = 1,
|
||||||
|
RGB5A1 = 2,
|
||||||
|
RGB565 = 3,
|
||||||
|
RGBA4 = 4,
|
||||||
|
|
||||||
|
Trash1 = 5, Trash2 = 6, Trash3 = 7 // Technically selectable, but their function is unknown
|
||||||
|
};
|
||||||
|
|
||||||
|
u32 location;
|
||||||
|
Formats format;
|
||||||
|
OpenGL::uvec2 size;
|
||||||
|
bool valid;
|
||||||
|
|
||||||
|
// Range of VRAM taken up by buffer
|
||||||
|
Interval<u32> range;
|
||||||
|
// OpenGL resources allocated to buffer
|
||||||
|
OpenGL::Texture texture;
|
||||||
|
OpenGL::Framebuffer fbo;
|
||||||
|
|
||||||
|
ColourBuffer() : valid(false) {}
|
||||||
|
|
||||||
|
ColourBuffer(u32 loc, Formats format, u32 x, u32 y, bool valid = true)
|
||||||
|
: location(loc), format(format), size({x, y}), valid(valid) {
|
||||||
|
|
||||||
|
u64 endLoc = (u64)loc + sizeInBytes();
|
||||||
|
// Check if start and end are valid here
|
||||||
|
range = Interval<u32>(loc, (u32)endLoc);
|
||||||
|
}
|
||||||
|
|
||||||
|
void allocate() {
|
||||||
|
printf("Make this colour buffer allocate itself\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void free() {
|
||||||
|
valid = false;
|
||||||
|
printf("Make this colour buffer free itself\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
bool matches(ColourBuffer& other) {
|
||||||
|
return location == other.location && format == other.format &&
|
||||||
|
size.x() == other.size.x() && size.y() == other.size.y();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Size occupied by each pixel in bytes
|
||||||
|
// All formats are 16BPP except for RGBA8 (32BPP) and BGR8 (24BPP)
|
||||||
|
size_t sizePerPixel() {
|
||||||
|
switch (format) {
|
||||||
|
case Formats::BGR8: return 3;
|
||||||
|
case Formats::RGBA8: return 4;
|
||||||
|
default: return 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t sizeInBytes() {
|
||||||
|
return (size_t)size.x() * (size_t)size.y() * sizePerPixel();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct DepthBuffer {
|
||||||
|
enum class Formats : u32 {
|
||||||
|
Depth16 = 0,
|
||||||
|
Garbage = 1,
|
||||||
|
Depth24 = 2,
|
||||||
|
Depth24Stencil8 = 3
|
||||||
|
};
|
||||||
|
|
||||||
|
u32 location;
|
||||||
|
Formats format;
|
||||||
|
OpenGL::uvec2 size; // Implicitly set to the size of the framebuffer
|
||||||
|
bool valid;
|
||||||
|
|
||||||
|
// Range of VRAM taken up by buffer
|
||||||
|
Interval<u32> range;
|
||||||
|
// OpenGL texture used for storing depth/stencil
|
||||||
|
OpenGL::Texture texture;
|
||||||
|
|
||||||
|
DepthBuffer() : valid(false) {}
|
||||||
|
|
||||||
|
DepthBuffer(u32 loc, Formats format, u32 x, u32 y, bool valid = true) :
|
||||||
|
location(loc), format(format), size({x, y}), valid(valid) {}
|
||||||
|
|
||||||
|
bool hasStencil() {
|
||||||
|
return format == Formats::Depth24Stencil8;
|
||||||
|
}
|
||||||
|
|
||||||
|
void allocate() {
|
||||||
|
printf("Make this depth buffer allocate itself\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void free() {
|
||||||
|
valid = false;
|
||||||
|
printf("Make this depth buffer free itself\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
bool matches(DepthBuffer& other) {
|
||||||
|
return location == other.location && format == other.format &&
|
||||||
|
size.x() == other.size.x() && size.y() == other.size.y();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Size occupied by each pixel in bytes
|
||||||
|
size_t sizePerPixel() {
|
||||||
|
switch (format) {
|
||||||
|
case Formats::Depth16: return 2;
|
||||||
|
case Formats::Depth24: return 3;
|
||||||
|
case Formats::Depth24Stencil8: return 4;
|
||||||
|
|
||||||
|
default: return 1; // Invalid format
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t sizeInBytes() {
|
||||||
|
return (size_t)size.x() * (size_t)size.y() * sizePerPixel();
|
||||||
|
}
|
||||||
|
};
|
|
@ -59,6 +59,31 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
||||||
fixedAttribMask = (value >> 16) & 0xfff; // Determines which vertex attributes are fixed for all vertices
|
fixedAttribMask = (value >> 16) & 0xfff; // Determines which vertex attributes are fixed for all vertices
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case ColourBufferLoc: {
|
||||||
|
u32 loc = (value & 0x0fffffff) << 3;
|
||||||
|
renderer.setColourBufferLoc(loc);
|
||||||
|
break;
|
||||||
|
};
|
||||||
|
|
||||||
|
case ColourBufferFormat: {
|
||||||
|
u32 format = (value >> 16) & 7;
|
||||||
|
renderer.setColourFormat(format);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case DepthBufferLoc: {
|
||||||
|
u32 loc = (value & 0x0fffffff) << 3;
|
||||||
|
renderer.setDepthBufferLoc(loc);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case FramebufferSize: {
|
||||||
|
const u32 width = value & 0x7ff;
|
||||||
|
const u32 height = ((value >> 12) & 0x3ff) + 1;
|
||||||
|
renderer.setFBSize(width, height);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case VertexFloatUniformIndex:
|
case VertexFloatUniformIndex:
|
||||||
shaderUnit.vs.setFloatUniformIndex(value);
|
shaderUnit.vs.setFloatUniformIndex(value);
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -105,6 +105,18 @@ const char* displayFragmentShader = R"(
|
||||||
}
|
}
|
||||||
)";
|
)";
|
||||||
|
|
||||||
|
void Renderer::reset() {
|
||||||
|
depthBufferCache.reset();
|
||||||
|
colourBufferCache.reset();
|
||||||
|
|
||||||
|
// Init the colour/depth buffer settings to some random defaults on reset
|
||||||
|
colourBufferLoc = 0;
|
||||||
|
colourBufferFormat = ColourBuffer::Formats::RGBA8;
|
||||||
|
|
||||||
|
depthBufferLoc = 0;
|
||||||
|
depthBufferFormat = DepthBuffer::Formats::Depth16;
|
||||||
|
}
|
||||||
|
|
||||||
void Renderer::initGraphicsContext() {
|
void Renderer::initGraphicsContext() {
|
||||||
// Set up texture for top screen
|
// Set up texture for top screen
|
||||||
fboTexture.create(400, 240, GL_RGBA8);
|
fboTexture.create(400, 240, GL_RGBA8);
|
||||||
|
@ -156,6 +168,7 @@ void Renderer::initGraphicsContext() {
|
||||||
|
|
||||||
dummyVBO.create();
|
dummyVBO.create();
|
||||||
dummyVAO.create();
|
dummyVAO.create();
|
||||||
|
reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
void Renderer::getGraphicsContext() {
|
void Renderer::getGraphicsContext() {
|
||||||
|
@ -168,6 +181,8 @@ void Renderer::getGraphicsContext() {
|
||||||
triangleProgram.use();
|
triangleProgram.use();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
OpenGL::Framebuffer poop;
|
||||||
|
|
||||||
void Renderer::drawVertices(OpenGL::Primitives primType, Vertex* vertices, u32 count) {
|
void Renderer::drawVertices(OpenGL::Primitives primType, Vertex* vertices, u32 count) {
|
||||||
// Adjust alpha test if necessary
|
// Adjust alpha test if necessary
|
||||||
const u32 alphaControl = regs[PICAInternalRegs::AlphaTestConfig];
|
const u32 alphaControl = regs[PICAInternalRegs::AlphaTestConfig];
|
||||||
|
@ -176,6 +191,8 @@ void Renderer::drawVertices(OpenGL::Primitives primType, Vertex* vertices, u32 c
|
||||||
glUniform1ui(alphaControlLoc, alphaControl);
|
glUniform1ui(alphaControlLoc, alphaControl);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
poop = getColourFBO();
|
||||||
|
|
||||||
const u32 depthControl = regs[PICAInternalRegs::DepthAndColorMask];
|
const u32 depthControl = regs[PICAInternalRegs::DepthAndColorMask];
|
||||||
bool depthEnable = depthControl & 1;
|
bool depthEnable = depthControl & 1;
|
||||||
bool depthWriteEnable = (depthControl >> 12) & 1;
|
bool depthWriteEnable = (depthControl >> 12) & 1;
|
||||||
|
@ -253,3 +270,16 @@ void Renderer::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 cont
|
||||||
OpenGL::setClearColor(r, g, b, a);
|
OpenGL::setClearColor(r, g, b, a);
|
||||||
OpenGL::clearColor();
|
OpenGL::clearColor();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
OpenGL::Framebuffer Renderer::getColourFBO() {
|
||||||
|
//We construct a colour buffer object and see if our cache has any matching colour buffers in it
|
||||||
|
// If not, we allocate a texture & FBO for our framebuffer and store it in the cache
|
||||||
|
ColourBuffer sampleBuffer(colourBufferLoc, colourBufferFormat, fbSize.x(), fbSize.y());
|
||||||
|
auto buffer = colourBufferCache.find(sampleBuffer);
|
||||||
|
|
||||||
|
if (buffer.has_value()) {
|
||||||
|
return buffer.value().get().fbo;
|
||||||
|
} else {
|
||||||
|
return colourBufferCache.add(sampleBuffer).fbo;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue