From b403e9a66ec97fbc2c99c3bcfae35fd299e56f0c Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 4 Jul 2023 23:26:18 +0300 Subject: [PATCH 01/13] Start work on GL state manager object --- CMakeLists.txt | 6 +-- include/PICA/gpu.hpp | 2 +- include/emulator.hpp | 5 ++- include/gl_state.hpp | 58 +++++++++++++++++++++++++++++ include/renderer_gl/renderer_gl.hpp | 6 ++- src/core/PICA/gpu.cpp | 5 ++- src/emulator.cpp | 8 +++- src/gl_state.cpp | 16 ++++++++ 8 files changed, 95 insertions(+), 11 deletions(-) create mode 100644 include/gl_state.hpp create mode 100644 src/gl_state.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index f5edc420..7ce91389 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -83,8 +83,8 @@ else() message(FATAL_ERROR "Currently unsupported CPU architecture") endif() -set(SOURCE_FILES src/main.cpp src/emulator.cpp src/io_file.cpp src/core/CPU/cpu_dynarmic.cpp src/core/CPU/dynarmic_cycles.cpp - src/core/memory.cpp +set(SOURCE_FILES src/main.cpp src/emulator.cpp src/io_file.cpp src/gl_state.cpp src/core/CPU/cpu_dynarmic.cpp + src/core/CPU/dynarmic_cycles.cpp src/core/memory.cpp ) set(CRYPTO_SOURCE_FILES src/core/crypto/aes_engine.cpp) set(KERNEL_SOURCE_FILES src/core/kernel/kernel.cpp src/core/kernel/resource_limits.cpp @@ -138,7 +138,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/opengl.hpp inc include/PICA/dynapica/shader_rec_emitter_x64.hpp include/PICA/pica_hash.hpp include/result/result.hpp include/result/result_common.hpp include/result/result_fs.hpp include/result/result_fnd.hpp include/result/result_gsp.hpp include/result/result_kernel.hpp include/result/result_os.hpp - include/crypto/aes_engine.hpp include/metaprogramming.hpp include/PICA/pica_vertex.hpp + include/crypto/aes_engine.hpp include/metaprogramming.hpp include/PICA/pica_vertex.hpp include/gl_state.hpp ) set(THIRD_PARTY_SOURCE_FILES third_party/imgui/imgui.cpp diff --git a/include/PICA/gpu.hpp b/include/PICA/gpu.hpp index ced2c557..c2fbc1c6 100644 --- a/include/PICA/gpu.hpp +++ b/include/PICA/gpu.hpp @@ -81,7 +81,7 @@ class GPU { // Set to false by the renderer when the lighting_lut is uploaded ot the GPU bool lightingLUTDirty = false; - GPU(Memory& mem); + GPU(Memory& mem, GLStateManager& gl); void initGraphicsContext() { renderer.initGraphicsContext(); } void getGraphicsContext() { renderer.getGraphicsContext(); } void display() { renderer.display(); } diff --git a/include/emulator.hpp b/include/emulator.hpp index 10279443..7cbc27b7 100644 --- a/include/emulator.hpp +++ b/include/emulator.hpp @@ -11,7 +11,7 @@ #include "crypto/aes_engine.hpp" #include "io_file.hpp" #include "memory.hpp" -#include "opengl.hpp" +#include "gl_state.hpp" enum class ROMType { None, ELF, NCSD }; @@ -22,6 +22,7 @@ class Emulator { Kernel kernel; Crypto::AESEngine aesEngine; + GLStateManager gl; SDL_Window* window; SDL_GLContext glContext; SDL_GameController* gameController; @@ -56,5 +57,5 @@ class Emulator { bool loadNCSD(const std::filesystem::path& path); bool loadELF(const std::filesystem::path& path); bool loadELF(std::ifstream& file); - void initGraphicsContext() { gpu.initGraphicsContext(); } + void initGraphicsContext(); }; diff --git a/include/gl_state.hpp b/include/gl_state.hpp new file mode 100644 index 00000000..3c9458d5 --- /dev/null +++ b/include/gl_state.hpp @@ -0,0 +1,58 @@ +#pragma once +#include + +#include "opengl.hpp" + +// GL state manager object for use in the OpenGL GPU renderer and potentially other things in the future (such as a potential ImGui GUI) +// This object is meant to help us avoid duplicate OpenGL calls (such as binding the same program twice, enabling/disabling a setting twice, etc) +// by checking if we actually *need* a state change. This is meant to avoid expensive driver calls and minimize unneeded state changes +// A lot of code is in the header file instead of the relevant source file to make sure stuff gets inlined even without LTO, and +// because this header should ideally not be getting included in too many places +// Code that does not need inlining however, like the reset() function should be in gl_state.cpp +// This state manager may not handle every aspect of OpenGL, in which case anything not handled here should just be manipulated with raw +// OpenGL/opengl.hpp calls However, anything that can be handled through the state manager should, or at least there should be an attempt to keep it +// consistent with the current GL state to avoid bugs/suboptimal code. + +// The state manager must *also* be a trivially constructible/destructible type, to ensure that no OpenGL functions get called sneakily without us +// knowing. This is important for when we want to eg add a Vulkan or misc backend. Would definitely not want to refactor all this. So we try to be as +// backend-agnostic as possible + +struct GLStateManager { + bool blendEnabled; + bool depthEnabled; + + void reset(); + void resetBlend(); + void resetDepth(); + + void enableDepth() { + if (!depthEnabled) { + depthEnabled = true; + OpenGL::enableDepth(); + } + } + + void disableDepth() { + if (depthEnabled) { + depthEnabled = false; + OpenGL::disableDepth(); + } + } + + void enableBlend() { + if (!blendEnabled) { + blendEnabled = true; + OpenGL::enableBlend(); + } + } + + void disableBlend() { + if (blendEnabled) { + blendEnabled = false; + OpenGL::disableBlend(); + } + } +}; + +static_assert(std::is_trivially_constructible(), "OpenGL State Manager class is not trivially constructible!"); +static_assert(std::is_trivially_destructible(), "OpenGL State Manager class is not trivially destructible!"); \ No newline at end of file diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index bf85904b..0bf9b0a3 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -3,9 +3,9 @@ #include #include "PICA/float_types.hpp" +#include "gl_state.hpp" #include "helpers.hpp" #include "logger.hpp" -#include "opengl.hpp" #include "surface_cache.hpp" #include "textures.hpp" #include "PICA/regs.hpp" @@ -16,6 +16,8 @@ class GPU; class Renderer { GPU& gpu; + GLStateManager& gl; + OpenGL::Program triangleProgram; OpenGL::Program displayProgram; @@ -81,7 +83,7 @@ class Renderer { void updateLightingLUT(); public: - Renderer(GPU& gpu, const std::array& internalRegs) : gpu(gpu), regs(internalRegs) {} + Renderer(GPU& gpu, GLStateManager& gl, const std::array& internalRegs) : gpu(gpu), gl(gl), regs(internalRegs) {} void reset(); void display(); // Display the 3DS screen contents to the window diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index 2efc4195..43bcf674 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -10,8 +10,9 @@ using namespace Floats; - -GPU::GPU(Memory& mem) : mem(mem), renderer(*this, regs) { +// Note: For when we have multiple backends, the GL state manager can stay here and have the constructor for the Vulkan-or-whatever renderer ignore it +// Thus, our GLStateManager being here does not negatively impact renderer-agnosticness +GPU::GPU(Memory& mem, GLStateManager& gl) : mem(mem), renderer(*this, gl, regs) { vram = new u8[vramSize]; mem.setVRAM(vram); // Give the bus a pointer to our VRAM } diff --git a/src/emulator.cpp b/src/emulator.cpp index bb96cadc..76a10698 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -1,6 +1,6 @@ #include "emulator.hpp" -Emulator::Emulator() : kernel(cpu, memory, gpu), cpu(memory, kernel), gpu(memory), memory(cpu.getTicksRef()) { +Emulator::Emulator() : kernel(cpu, memory, gpu), cpu(memory, kernel), gpu(memory, gl), memory(cpu.getTicksRef()) { if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_EVENTS) < 0) { Helpers::panic("Failed to initialize SDL2"); } @@ -326,3 +326,9 @@ bool Emulator::loadELF(std::ifstream& file) { } return true; } + +// Reset our graphics context and initialize the GPU's graphics context +void Emulator::initGraphicsContext() { + gl.reset(); // TODO (For when we have multiple backends): Only do this if we are using OpenGL + gpu.initGraphicsContext(); +} \ No newline at end of file diff --git a/src/gl_state.cpp b/src/gl_state.cpp new file mode 100644 index 00000000..902971ab --- /dev/null +++ b/src/gl_state.cpp @@ -0,0 +1,16 @@ +#include "gl_state.hpp" + +void GLStateManager::resetBlend() { + blendEnabled = false; + OpenGL::disableBlend(); +} + +void GLStateManager::resetDepth() { + depthEnabled = false; + OpenGL::disableDepth(); +} + +void GLStateManager::reset() { + resetBlend(); + resetDepth(); +} \ No newline at end of file From a3d8f777b465999db7eabd5d4016be2d5a28a9f8 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Tue, 4 Jul 2023 23:37:48 +0300 Subject: [PATCH 02/13] PicaVertex -> PICA::Vertex --- include/PICA/gpu.hpp | 4 +-- include/PICA/pica_vertex.hpp | 48 +++++++++++++++------------- include/renderer_gl/renderer_gl.hpp | 2 +- src/core/PICA/gpu.cpp | 8 ++--- src/core/PICA/regs.cpp | 2 +- src/core/renderer_gl/renderer_gl.cpp | 20 ++++++------ 6 files changed, 43 insertions(+), 41 deletions(-) diff --git a/include/PICA/gpu.hpp b/include/PICA/gpu.hpp index c2fbc1c6..5bc06c47 100644 --- a/include/PICA/gpu.hpp +++ b/include/PICA/gpu.hpp @@ -28,7 +28,7 @@ class GPU { std::array currentAttributes; // Vertex attributes before being passed to the shader std::array immediateModeAttributes; // Vertex attributes uploaded via immediate mode submission - std::array immediateModeVertices; + std::array immediateModeVertices; uint immediateModeVertIndex; uint immediateModeAttrIndex; // Index of the immediate mode attribute we're uploading @@ -68,7 +68,7 @@ class GPU { u32* cmdBuffCurr = nullptr; Renderer renderer; - PicaVertex getImmediateModeVertex(); + PICA::Vertex getImmediateModeVertex(); public: // 256 entries per LUT with each LUT as its own row forming a 2D image 256 * LUT_COUNT diff --git a/include/PICA/pica_vertex.hpp b/include/PICA/pica_vertex.hpp index ea90796b..800dff9a 100644 --- a/include/PICA/pica_vertex.hpp +++ b/include/PICA/pica_vertex.hpp @@ -2,35 +2,37 @@ #include "PICA/float_types.hpp" #include -// A representation of the output vertex as it comes out of the vertex shader, with padding and all -struct PicaVertex { - using vec2f = std::array; - using vec3f = std::array; - using vec4f = std::array; +namespace PICA { + // A representation of the output vertex as it comes out of the vertex shader, with padding and all + struct Vertex { + using vec2f = std::array; + using vec3f = std::array; + using vec4f = std::array; - union { - struct { - vec4f positions; // Vertex position - vec4f quaternion; // Quaternion specifying the normal/tangent frame (for fragment lighting) - vec4f colour; // Vertex color - vec2f texcoord0; // Texcoords for texture unit 0 (Only U and V, W is stored separately for 3D textures!) - vec2f texcoord1; // Texcoords for TU 1 - Floats::f24 texcoord0_w; // W component for texcoord 0 if using a 3D texture - u32 padding; // Unused + union { + struct { + vec4f positions; // Vertex position + vec4f quaternion; // Quaternion specifying the normal/tangent frame (for fragment lighting) + vec4f colour; // Vertex color + vec2f texcoord0; // Texcoords for texture unit 0 (Only U and V, W is stored separately for 3D textures!) + vec2f texcoord1; // Texcoords for TU 1 + Floats::f24 texcoord0_w; // W component for texcoord 0 if using a 3D texture + u32 padding; // Unused - vec3f view; // View vector (for fragment lighting) - u32 padding2; // Unused - vec2f texcoord2; // Texcoords for TU 2 - } s; + vec3f view; // View vector (for fragment lighting) + u32 padding2; // Unused + vec2f texcoord2; // Texcoords for TU 2 + } s; - // The software, non-accelerated vertex loader writes here and then reads specific components from the above struct - Floats::f24 raw[0x20]; + // The software, non-accelerated vertex loader writes here and then reads specific components from the above struct + Floats::f24 raw[0x20]; + }; + Vertex() {} }; - PicaVertex() {} -}; +} // namespace PICA // Float is used here instead of Floats::f24 to ensure that Floats::f24 is properly sized for direct interpretations as a float by the render backend -#define ASSERT_POS(member, pos) static_assert(offsetof(PicaVertex, s.member) == pos * sizeof(float), "PicaVertex struct is broken!"); +#define ASSERT_POS(member, pos) static_assert(offsetof(PICA::Vertex, s.member) == pos * sizeof(float), "PICA::Vertex struct is broken!"); ASSERT_POS(positions, 0) ASSERT_POS(quaternion, 4) diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 0bf9b0a3..b5ac8758 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -91,7 +91,7 @@ class Renderer { void getGraphicsContext(); // Set up graphics context for rendering void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control); // Clear a GPU buffer in VRAM void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags); // Perform display transfer - void drawVertices(PICA::PrimType primType, std::span vertices); // Draw the given vertices + void drawVertices(PICA::PrimType primType, std::span vertices); // Draw the given vertices void setFBSize(u32 width, u32 height) { fbSize.x() = width; diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index 43bcf674..a3163f86 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -61,7 +61,7 @@ void GPU::drawArrays(bool indexed) { } } -static std::array vertices; +static std::array vertices; template void GPU::drawArrays() { @@ -249,7 +249,7 @@ void GPU::drawArrays() { shaderUnit.vs.run(); } - PicaVertex& out = vertices[i]; + PICA::Vertex& out = vertices[i]; // Map shader outputs to fixed function properties const u32 totalShaderOutputs = regs[PICA::InternalRegs::ShaderOutputCount] & 7; for (int i = 0; i < totalShaderOutputs; i++) { @@ -265,8 +265,8 @@ void GPU::drawArrays() { renderer.drawVertices(primType, std::span(vertices).first(vertexCount)); } -PicaVertex GPU::getImmediateModeVertex() { - PicaVertex v; +PICA::Vertex GPU::getImmediateModeVertex() { + PICA::Vertex v; const int totalAttrCount = (regs[PICA::InternalRegs::VertexShaderAttrNum] & 0xf) + 1; // Copy immediate mode attributes to vertex shader unit diff --git a/src/core/PICA/regs.cpp b/src/core/PICA/regs.cpp index a0eb5adc..f62040dd 100644 --- a/src/core/PICA/regs.cpp +++ b/src/core/PICA/regs.cpp @@ -188,7 +188,7 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { immediateModeAttributes[immediateModeAttrIndex++] = attr; if (immediateModeAttrIndex == totalAttrCount) { - PicaVertex v = getImmediateModeVertex(); + PICA::Vertex v = getImmediateModeVertex(); immediateModeAttrIndex = 0; immediateModeVertices[immediateModeVertIndex++] = v; diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 589457f5..de8a7375 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -645,34 +645,34 @@ void Renderer::initGraphicsContext() { displayProgram.use(); glUniform1i(OpenGL::uniformLocation(displayProgram, "u_texture"), 0); // Init sampler object - vbo.createFixedSize(sizeof(PicaVertex) * vertexBufferSize, GL_STREAM_DRAW); + vbo.createFixedSize(sizeof(Vertex) * vertexBufferSize, GL_STREAM_DRAW); vbo.bind(); vao.create(); vao.bind(); // Position (x, y, z, w) attributes - vao.setAttributeFloat(0, 4, sizeof(PicaVertex), offsetof(PicaVertex, s.positions)); + vao.setAttributeFloat(0, 4, sizeof(Vertex), offsetof(Vertex, s.positions)); vao.enableAttribute(0); // Quaternion attribute - vao.setAttributeFloat(1, 4, sizeof(PicaVertex), offsetof(PicaVertex, s.quaternion)); + vao.setAttributeFloat(1, 4, sizeof(Vertex), offsetof(Vertex, s.quaternion)); vao.enableAttribute(1); // Colour attribute - vao.setAttributeFloat(2, 4, sizeof(PicaVertex), offsetof(PicaVertex, s.colour)); + vao.setAttributeFloat(2, 4, sizeof(Vertex), offsetof(Vertex, s.colour)); vao.enableAttribute(2); // UV 0 attribute - vao.setAttributeFloat(3, 2, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord0)); + vao.setAttributeFloat(3, 2, sizeof(Vertex), offsetof(Vertex, s.texcoord0)); vao.enableAttribute(3); // UV 1 attribute - vao.setAttributeFloat(4, 2, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord1)); + vao.setAttributeFloat(4, 2, sizeof(Vertex), offsetof(Vertex, s.texcoord1)); vao.enableAttribute(4); // UV 0 W-component attribute - vao.setAttributeFloat(5, 1, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord0_w)); + vao.setAttributeFloat(5, 1, sizeof(Vertex), offsetof(Vertex, s.texcoord0_w)); vao.enableAttribute(5); // View - vao.setAttributeFloat(6, 3, sizeof(PicaVertex), offsetof(PicaVertex, s.view)); + vao.setAttributeFloat(6, 3, sizeof(Vertex), offsetof(Vertex, s.view)); vao.enableAttribute(6); // UV 2 attribute - vao.setAttributeFloat(7, 2, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord2)); + vao.setAttributeFloat(7, 2, sizeof(Vertex), offsetof(Vertex, s.texcoord2)); vao.enableAttribute(7); dummyVBO.create(); @@ -842,7 +842,7 @@ void Renderer::updateLightingLUT(){ gpu.lightingLUTDirty = false; } -void Renderer::drawVertices(PICA::PrimType primType, std::span vertices) { +void Renderer::drawVertices(PICA::PrimType primType, std::span vertices) { // The fourth type is meant to be "Geometry primitive". TODO: Find out what that is static constexpr std::array primTypes = { OpenGL::Triangle, OpenGL::TriangleStrip, OpenGL::TriangleFan, OpenGL::Triangle From e01f0ea4d0529dee829421e264f341be787d9e69 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 5 Jul 2023 00:02:52 +0300 Subject: [PATCH 03/13] [OpenGL] Add `const` to some functions --- include/opengl.hpp | 66 +++++++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/include/opengl.hpp b/include/opengl.hpp index b259381b..b85347bf 100644 --- a/include/opengl.hpp +++ b/include/opengl.hpp @@ -128,9 +128,9 @@ namespace OpenGL { #ifdef OPENGL_DESTRUCTORS ~VertexArray() { free(); } #endif - GLuint handle() { return m_handle; } - bool exists() { return m_handle != 0; } - void bind() { glBindVertexArray(m_handle); } + GLuint handle() const { return m_handle; } + bool exists() const { return m_handle != 0; } + void bind() const { glBindVertexArray(m_handle); } template void setAttributeFloat(GLuint index, GLint size, GLsizei stride, const void* offset, bool normalized = GL_FALSE) { @@ -299,11 +299,11 @@ namespace OpenGL { #ifdef OPENGL_DESTRUCTORS ~Texture() { free(); } #endif - GLuint handle() { return m_handle; } - bool exists() { return m_handle != 0; } - void bind() { glBindTexture(m_binding, m_handle); } - int width() { return m_width; } - int height() { return m_height; } + GLuint handle() const { return m_handle; } + bool exists() const { return m_handle != 0; } + void bind() const { glBindTexture(m_binding, m_handle); } + int width() const { return m_width; } + int height() const { return m_height; } void free() { glDeleteTextures(1, &m_handle); } }; @@ -327,10 +327,10 @@ namespace OpenGL { #ifdef OPENGL_DESTRUCTORS ~Framebuffer() { free(); } #endif - GLuint handle() { return m_handle; } - bool exists() { return m_handle != 0; } - void bind(GLenum target) { glBindFramebuffer(target, m_handle); } - void bind(FramebufferTypes target) { bind(static_cast(target)); } + GLuint handle() const { return m_handle; } + bool exists() const { return m_handle != 0; } + void bind(GLenum target) const { glBindFramebuffer(target, m_handle); } + void bind(FramebufferTypes target) const { bind(static_cast(target)); } void free() { glDeleteFramebuffers(1, &m_handle); } void createWithTexture(Texture& tex, GLenum mode = GL_FRAMEBUFFER, GLenum textureType = GL_TEXTURE_2D) { @@ -392,8 +392,8 @@ namespace OpenGL { return m_handle != 0; } - GLuint handle() { return m_handle; } - bool exists() { return m_handle != 0; } + GLuint handle() const { return m_handle; } + bool exists() const { return m_handle != 0; } }; struct Program { @@ -421,9 +421,9 @@ namespace OpenGL { return m_handle != 0; } - GLuint handle() { return m_handle; } - bool exists() { return m_handle != 0; } - void use() { glUseProgram(m_handle); } + GLuint handle() const { return m_handle; } + bool exists() const { return m_handle != 0; } + void use() const { glUseProgram(m_handle); } }; static void dispatchCompute(GLuint groupsX = 1, GLuint groupsY = 1, GLuint groupsZ = 1) { @@ -454,9 +454,9 @@ namespace OpenGL { #ifdef OPENGL_DESTRUCTORS ~VertexBuffer() { free(); } #endif - GLuint handle() { return m_handle; } - bool exists() { return m_handle != 0; } - void bind() { glBindBuffer(GL_ARRAY_BUFFER, m_handle); } + GLuint handle() const { return m_handle; } + bool exists() const { return m_handle != 0; } + void bind() const { glBindBuffer(GL_ARRAY_BUFFER, m_handle); } void free() { glDeleteBuffers(1, &m_handle); } // Reallocates the buffer on every call. Prefer the sub version if possible. @@ -667,23 +667,23 @@ namespace OpenGL { // We're never supporting 3D rectangles, because rectangles were never meant to be 3D in the first place // x, y: Coords of the top left vertex // width, height: Dimensions of the rectangle. Initialized to 0 if not specified. - template - struct Rectangle { - T x, y, width, height; + template + struct Rectangle { + T x, y, width, height; - std::pair topLeft() { return std::make_pair(x, y); } - std::pair topRight() { return std::make_pair(x + width, y); } - std::pair bottomLeft() { return std::make_pair(x, y + height); } - std::pair bottomRight() { return std::make_pair(x + width, y + height); } + std::pair topLeft() const { return std::make_pair(x, y); } + std::pair topRight() const { return std::make_pair(x + width, y); } + std::pair bottomLeft() const { return std::make_pair(x, y + height); } + std::pair bottomRight() const { return std::make_pair(x + width, y + height); } - Rectangle() : x(0), y(0), width(0), height(0) {} - Rectangle(T x, T y, T width, T height) : x(x), y(y), width(width), height(height) {} + Rectangle() : x(0), y(0), width(0), height(0) {} + Rectangle(T x, T y, T width, T height) : x(x), y(y), width(width), height(height) {} - bool isEmpty() { return width == 0 && height == 0; } - bool isLine() { return (width == 0 && height != 0) || (width != 0 && height == 0); } + bool isEmpty() const { return width == 0 && height == 0; } + bool isLine() const { return (width == 0 && height != 0) || (width != 0 && height == 0); } - void setEmpty() { x = y = width = height = 0; } - }; + void setEmpty() { x = y = width = height = 0; } + }; using Rect = Rectangle; From 139c8759c920437756ed9f5af9e55786d467974c Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 5 Jul 2023 00:43:49 +0300 Subject: [PATCH 04/13] More OpenGL state management stuff --- include/gl_state.hpp | 48 ++++++++++++++++++++++++++++ src/core/renderer_gl/renderer_gl.cpp | 43 +++++++++++++------------ src/gl_state.cpp | 26 +++++++++++++++ 3 files changed, 96 insertions(+), 21 deletions(-) diff --git a/include/gl_state.hpp b/include/gl_state.hpp index 3c9458d5..6a495c72 100644 --- a/include/gl_state.hpp +++ b/include/gl_state.hpp @@ -20,10 +20,19 @@ struct GLStateManager { bool blendEnabled; bool depthEnabled; + bool scissorEnabled; + + GLuint boundVAO; + GLuint boundVBO; + GLuint currentProgram; void reset(); void resetBlend(); void resetDepth(); + void resetVAO(); + void resetVBO(); + void resetProgram(); + void resetScissor(); void enableDepth() { if (!depthEnabled) { @@ -52,6 +61,45 @@ struct GLStateManager { OpenGL::disableBlend(); } } + + void enableScissor() { + if (!scissorEnabled) { + scissorEnabled = true; + OpenGL::enableScissor(); + } + } + + void disableScissor() { + if (scissorEnabled) { + scissorEnabled = false; + OpenGL::disableScissor(); + } + } + + void bindVAO(GLuint handle) { + if (boundVAO != handle) { + boundVAO = handle; + glBindVertexArray(handle); + } + } + + void bindVBO(GLuint handle) { + if (boundVBO != handle) { + boundVBO = handle; + glBindBuffer(GL_ARRAY_BUFFER, handle); + } + } + + void useProgram(GLuint handle) { + if (currentProgram != handle) { + currentProgram = handle; + glUseProgram(handle); + } + } + + void bindVAO(const OpenGL::VertexArray& vao) { bindVAO(vao.handle()); } + void bindVBO(const OpenGL::VertexBuffer& vbo) { bindVBO(vbo.handle()); } + void useProgram(const OpenGL::Program& program) { useProgram(program.handle()); } }; static_assert(std::is_trivially_constructible(), "OpenGL State Manager class is not trivially constructible!"); diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index de8a7375..b93834a2 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -592,7 +592,7 @@ void Renderer::reset() { if (triangleProgram.exists()) { const auto oldProgram = OpenGL::getProgram(); - triangleProgram.use(); + gl.useProgram(triangleProgram); oldAlphaControl = 0; // Default alpha control to 0 oldTexUnitConfig = 0; // Default tex unit config to 0 @@ -606,7 +606,7 @@ void Renderer::reset() { glUniform1f(depthOffsetLoc, oldDepthOffset); glUniform1i(depthmapEnableLoc, oldDepthmapEnable); - glUseProgram(oldProgram); // Switch to old GL program + gl.useProgram(oldProgram); // Switch to old GL program } } @@ -614,7 +614,7 @@ void Renderer::initGraphicsContext() { OpenGL::Shader vert(vertexShader, OpenGL::Vertex); OpenGL::Shader frag(fragmentShader, OpenGL::Fragment); triangleProgram.create({ vert, frag }); - triangleProgram.use(); + gl.useProgram(triangleProgram); alphaControlLoc = OpenGL::uniformLocation(triangleProgram, "u_alphaControl"); texUnitConfigLoc = OpenGL::uniformLocation(triangleProgram, "u_textureConfig"); @@ -642,13 +642,13 @@ void Renderer::initGraphicsContext() { OpenGL::Shader fragDisplay(displayFragmentShader, OpenGL::Fragment); displayProgram.create({ vertDisplay, fragDisplay }); - displayProgram.use(); + gl.useProgram(displayProgram); glUniform1i(OpenGL::uniformLocation(displayProgram, "u_texture"), 0); // Init sampler object vbo.createFixedSize(sizeof(Vertex) * vertexBufferSize, GL_STREAM_DRAW); - vbo.bind(); + gl.bindVBO(vbo); vao.create(); - vao.bind(); + gl.bindVAO(vao); // Position (x, y, z, w) attributes vao.setAttributeFloat(0, 4, sizeof(Vertex), offsetof(Vertex, s.positions)); @@ -725,9 +725,9 @@ void Renderer::setupBlending() { }; if (!blendingEnabled) { - OpenGL::disableBlend(); + gl.disableBlend(); } else { - OpenGL::enableBlend(); + gl.enableBlend(); // Get blending equations const u32 blendControl = regs[PICA::InternalRegs::BlendFunc]; @@ -852,11 +852,11 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span ver // TODO: We should implement a GL state tracker that tracks settings like scissor, blending, bound program, etc // This way if we attempt to eg do multiple glEnable(GL_BLEND) calls in a row, it will say "Oh blending is already enabled" // And not actually perform the very expensive driver call for it - OpenGL::disableScissor(); + gl.disableScissor(); - vbo.bind(); - vao.bind(); - triangleProgram.use(); + gl.bindVBO(vbo); + gl.bindVAO(vao); + gl.useProgram(triangleProgram); // Adjust alpha test if necessary const u32 alphaControl = regs[PICA::InternalRegs::AlphaTestConfig]; @@ -924,18 +924,18 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span ver // Note: The code below must execute after we've bound the colour buffer & its framebuffer // Because it attaches a depth texture to the aforementioned colour buffer if (depthEnable) { - OpenGL::enableDepth(); + gl.enableDepth(); glDepthFunc(depthModes[depthFunc]); glDepthMask(depthWriteEnable ? GL_TRUE : GL_FALSE); bindDepthBuffer(); } else { if (depthWriteEnable) { - OpenGL::enableDepth(); + gl.enableDepth(); glDepthFunc(GL_ALWAYS); glDepthMask(GL_TRUE); bindDepthBuffer(); } else { - OpenGL::disableDepth(); + gl.disableDepth(); } } @@ -947,7 +947,7 @@ constexpr u32 topScreenBuffer = 0x1f000000; constexpr u32 bottomScreenBuffer = 0x1f05dc00; void Renderer::display() { - OpenGL::disableScissor(); + gl.disableScissor(); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); screenFramebuffer.bind(OpenGL::ReadFramebuffer); @@ -1038,12 +1038,14 @@ void Renderer::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 tex.bind(); screenFramebuffer.bind(OpenGL::DrawFramebuffer); - OpenGL::disableBlend(); - OpenGL::disableDepth(); - OpenGL::disableScissor(); + gl.disableBlend(); + gl.disableDepth(); + gl.disableScissor(); + gl.useProgram(displayProgram); + gl.bindVAO(dummyVAO); + OpenGL::disableClipPlane(0); OpenGL::disableClipPlane(1); - displayProgram.use(); // Hack: Detect whether we are writing to the top or bottom screen by checking output gap and drawing to the proper part of the output texture // We consider output gap == 320 to mean bottom, and anything else to mean top @@ -1053,6 +1055,5 @@ void Renderer::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 OpenGL::setViewport(0, 240, 400, 240); // Top screen viewport } - dummyVAO.bind(); OpenGL::draw(OpenGL::TriangleStrip, 4); // Actually draw our 3DS screen } \ No newline at end of file diff --git a/src/gl_state.cpp b/src/gl_state.cpp index 902971ab..030797a4 100644 --- a/src/gl_state.cpp +++ b/src/gl_state.cpp @@ -10,7 +10,33 @@ void GLStateManager::resetDepth() { OpenGL::disableDepth(); } +void GLStateManager::resetScissor() { + scissorEnabled = false; + OpenGL::disableScissor(); + OpenGL::setScissor(0, 0, 0, 0); +} + +void GLStateManager::resetVAO() { + boundVAO = 0; + glBindVertexArray(0); +} + +void GLStateManager::resetVBO() { + boundVBO = 0; + glBindBuffer(GL_ARRAY_BUFFER, 0); +} + +void GLStateManager::resetProgram() { + currentProgram = 0; + glUseProgram(0); +} + void GLStateManager::reset() { resetBlend(); resetDepth(); + + resetVAO(); + resetVBO(); + resetProgram(); + resetScissor(); } \ No newline at end of file From 3e18281a99b2103049c3dcd00e2c03190babe755 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 5 Jul 2023 00:46:34 +0300 Subject: [PATCH 05/13] Remove outdated comments --- src/core/renderer_gl/renderer_gl.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index b93834a2..41917bae 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -849,11 +849,7 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span ver }; const auto primitiveTopology = primTypes[static_cast(primType)]; - // TODO: We should implement a GL state tracker that tracks settings like scissor, blending, bound program, etc - // This way if we attempt to eg do multiple glEnable(GL_BLEND) calls in a row, it will say "Oh blending is already enabled" - // And not actually perform the very expensive driver call for it gl.disableScissor(); - gl.bindVBO(vbo); gl.bindVAO(vao); gl.useProgram(triangleProgram); From c282a44c27ae9363494d15933dd4f9e9832f4ac4 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 5 Jul 2023 01:02:32 +0300 Subject: [PATCH 06/13] [OpenGL renderer] Fewer uniform uploads --- include/renderer_gl/renderer_gl.hpp | 5 --- src/core/renderer_gl/renderer_gl.cpp | 56 +++++++++------------------- 2 files changed, 18 insertions(+), 43 deletions(-) diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index b5ac8758..81b0a732 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -23,8 +23,6 @@ class Renderer { OpenGL::VertexArray vao; OpenGL::VertexBuffer vbo; - GLint alphaControlLoc = -1; - GLint texUnitConfigLoc = -1; // TEV configuration uniform locations GLint textureEnvSourceLoc = -1; @@ -41,9 +39,6 @@ class Renderer { GLint depthScaleLoc = -1; GLint depthmapEnableLoc = -1; - u32 oldAlphaControl = 0; - u32 oldTexUnitConfig = 0; - float oldDepthScale = -1.0; float oldDepthOffset = 0.0; bool oldDepthmapEnable = false; diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 41917bae..4d68e58a 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -35,11 +35,11 @@ const char* vertexShader = R"( // TEV uniforms uniform uint u_textureEnvColor[6]; uniform uint u_textureEnvBufferColor; - uniform uint u_picaRegs[0x200 - 0x47]; + uniform uint u_picaRegs[0x200 - 0x48]; // Helper so that the implementation of u_pica_regs can be changed later uint readPicaReg(uint reg_addr){ - return u_picaRegs[reg_addr - 0x47]; + return u_picaRegs[reg_addr - 0x48]; } vec4 abgr8888ToVec4(uint abgr) { @@ -131,9 +131,6 @@ const char* fragmentShader = R"( out vec4 fragColour; - uniform uint u_alphaControl; - uniform uint u_textureConfig; - // TEV uniforms uniform uint u_textureEnvSource[6]; uniform uint u_textureEnvOperand[6]; @@ -151,11 +148,11 @@ const char* fragmentShader = R"( uniform sampler2D u_tex2; uniform sampler1DArray u_tex_lighting_lut; - uniform uint u_picaRegs[0x200 - 0x47]; + uniform uint u_picaRegs[0x200 - 0x48]; // Helper so that the implementation of u_pica_regs can be changed later uint readPicaReg(uint reg_addr){ - return u_picaRegs[reg_addr - 0x47]; + return u_picaRegs[reg_addr - 0x48]; } vec4 tevSources[16]; @@ -453,16 +450,17 @@ const char* fragmentShader = R"( } void main() { - vec2 tex2UV = (u_textureConfig & (1u << 13)) != 0u ? v_texcoord1 : v_texcoord2; - // TODO: what do invalid sources and disabled textures read as? // And what does the "previous combiner" source read initially? tevSources[0] = v_colour; // Primary/vertex color calcLighting(tevSources[1],tevSources[2]); - if ((u_textureConfig & 1u) != 0u) tevSources[3] = texture(u_tex0, v_texcoord0.xy); - if ((u_textureConfig & 2u) != 0u) tevSources[4] = texture(u_tex1, v_texcoord1); - if ((u_textureConfig & 4u) != 0u) tevSources[5] = texture(u_tex2, tex2UV); + uint textureConfig = readPicaReg(0x80); + vec2 tex2UV = (textureConfig & (1u << 13)) != 0u ? v_texcoord1 : v_texcoord2; + + if ((textureConfig & 1u) != 0u) tevSources[3] = texture(u_tex0, v_texcoord0.xy); + if ((textureConfig & 2u) != 0u) tevSources[4] = texture(u_tex1, v_texcoord1); + if ((textureConfig & 4u) != 0u) tevSources[5] = texture(u_tex2, tex2UV); tevSources[13] = vec4(0.0); // Previous buffer tevSources[15] = vec4(0.0); // Previous combiner @@ -503,9 +501,11 @@ const char* fragmentShader = R"( // Write final fragment depth gl_FragDepth = depth; - if ((u_alphaControl & 1u) != 0u) { // Check if alpha test is on - uint func = (u_alphaControl >> 4u) & 7u; - float reference = float((u_alphaControl >> 8u) & 0xffu) / 255.0; + // Perform alpha test + uint alphaControl = readPicaReg(0x104); + if ((alphaControl & 1u) != 0u) { // Check if alpha test is on + uint func = (alphaControl >> 4u) & 7u; + float reference = float((alphaControl >> 8u) & 0xffu) / 255.0; float alpha = fragColour.a; switch (func) { @@ -593,15 +593,11 @@ void Renderer::reset() { const auto oldProgram = OpenGL::getProgram(); gl.useProgram(triangleProgram); - oldAlphaControl = 0; // Default alpha control to 0 - oldTexUnitConfig = 0; // Default tex unit config to 0 oldDepthScale = -1.0; // Default depth scale to -1.0, which is what games typically use oldDepthOffset = 0.0; // Default depth offset to 0 oldDepthmapEnable = false; // Enable w buffering - glUniform1ui(alphaControlLoc, oldAlphaControl); - glUniform1ui(texUnitConfigLoc, oldTexUnitConfig); glUniform1f(depthScaleLoc, oldDepthScale); glUniform1f(depthOffsetLoc, oldDepthOffset); glUniform1i(depthmapEnableLoc, oldDepthmapEnable); @@ -615,9 +611,6 @@ void Renderer::initGraphicsContext() { OpenGL::Shader frag(fragmentShader, OpenGL::Fragment); triangleProgram.create({ vert, frag }); gl.useProgram(triangleProgram); - - alphaControlLoc = OpenGL::uniformLocation(triangleProgram, "u_alphaControl"); - texUnitConfigLoc = OpenGL::uniformLocation(triangleProgram, "u_textureConfig"); textureEnvSourceLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvSource"); textureEnvOperandLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvOperand"); @@ -815,14 +808,8 @@ void Renderer::bindTexturesToSlots() { glActiveTexture(GL_TEXTURE0 + 3); glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); glActiveTexture(GL_TEXTURE0); - - // Update the texture unit configuration uniform if it changed - const u32 texUnitConfig = regs[PICA::InternalRegs::TexUnitCfg]; - if (oldTexUnitConfig != texUnitConfig) { - oldTexUnitConfig = texUnitConfig; - glUniform1ui(texUnitConfigLoc, texUnitConfig); - } } + void Renderer::updateLightingLUT(){ std::array u16_lightinglut; @@ -854,13 +841,6 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span ver gl.bindVAO(vao); gl.useProgram(triangleProgram); - // Adjust alpha test if necessary - const u32 alphaControl = regs[PICA::InternalRegs::AlphaTestConfig]; - if (alphaControl != oldAlphaControl) { - oldAlphaControl = alphaControl; - glUniform1ui(alphaControlLoc, alphaControl); - } - OpenGL::enableClipPlane(0); // Clipping plane 0 is always enabled if (regs[PICA::InternalRegs::ClipEnable] & 1) { OpenGL::enableClipPlane(1); @@ -904,9 +884,9 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span ver setupTextureEnvState(); bindTexturesToSlots(); - // Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x47) + // Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48) // The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates - glUniform1uiv(picaRegLoc, 0x200 - 0x47, ®s[0x47]); + glUniform1uiv(picaRegLoc, 0x200 - 0x48, ®s[0x48]); if (gpu.lightingLUTDirty) { updateLightingLUT(); From dd3e1ea72bdec8f9abc1bbf065caf6b2a665e0dc Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 5 Jul 2023 01:37:18 +0300 Subject: [PATCH 07/13] Ditch more uniforms --- include/renderer_gl/renderer_gl.hpp | 2 -- src/core/renderer_gl/renderer_gl.cpp | 13 ++++--------- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 81b0a732..ff8e3a69 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -30,8 +30,6 @@ class Renderer { GLint textureEnvCombinerLoc = -1; GLint textureEnvColorLoc = -1; GLint textureEnvScaleLoc = -1; - GLint textureEnvUpdateBufferLoc = -1; - GLint textureEnvBufferColorLoc = -1; GLint picaRegLoc = -1; // Depth configuration uniform locations diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 4d68e58a..bacc6751 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -34,7 +34,6 @@ const char* vertexShader = R"( // TEV uniforms uniform uint u_textureEnvColor[6]; - uniform uint u_textureEnvBufferColor; uniform uint u_picaRegs[0x200 - 0x48]; // Helper so that the implementation of u_pica_regs can be changed later @@ -96,7 +95,7 @@ const char* vertexShader = R"( v_textureEnvColor[i] = abgr8888ToVec4(u_textureEnvColor[i]); } - v_textureEnvBufferColor = abgr8888ToVec4(u_textureEnvBufferColor); + v_textureEnvBufferColor = abgr8888ToVec4(readPicaReg(0xFD)); // Parse clipping plane registers // The plane registers describe a clipping plane in the form of Ax + By + Cz + D = 0 @@ -136,7 +135,6 @@ const char* fragmentShader = R"( uniform uint u_textureEnvOperand[6]; uniform uint u_textureEnvCombiner[6]; uniform uint u_textureEnvScale[6]; - uniform uint u_textureEnvUpdateBuffer; // Depth control uniforms uniform float u_depthScale; @@ -465,6 +463,7 @@ const char* fragmentShader = R"( tevSources[15] = vec4(0.0); // Previous combiner tevNextPreviousBuffer = v_textureEnvBufferColor; + uint textureEnvUpdateBuffer = readPicaReg(0xE0); for (int i = 0; i < 6; i++) { tevSources[14] = v_textureEnvColor[i]; // Constant color @@ -472,11 +471,11 @@ const char* fragmentShader = R"( tevSources[13] = tevNextPreviousBuffer; if (i < 4) { - if ((u_textureEnvUpdateBuffer & (0x100u << i)) != 0u) { + if ((textureEnvUpdateBuffer & (0x100u << i)) != 0u) { tevNextPreviousBuffer.rgb = tevSources[15].rgb; } - if ((u_textureEnvUpdateBuffer & (0x1000u << i)) != 0u) { + if ((textureEnvUpdateBuffer & (0x1000u << i)) != 0u) { tevNextPreviousBuffer.a = tevSources[15].a; } } @@ -617,8 +616,6 @@ void Renderer::initGraphicsContext() { textureEnvCombinerLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvCombiner"); textureEnvColorLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvColor"); textureEnvScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvScale"); - textureEnvUpdateBufferLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvUpdateBuffer"); - textureEnvBufferColorLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvBufferColor"); depthScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_depthScale"); depthOffsetLoc = OpenGL::uniformLocation(triangleProgram, "u_depthOffset"); @@ -776,8 +773,6 @@ void Renderer::setupTextureEnvState() { glUniform1uiv(textureEnvCombinerLoc, 6, textureEnvCombinerRegs); glUniform1uiv(textureEnvColorLoc, 6, textureEnvColourRegs); glUniform1uiv(textureEnvScaleLoc, 6, textureEnvScaleRegs); - glUniform1ui(textureEnvUpdateBufferLoc, regs[PICA::InternalRegs::TexEnvUpdateBuffer]); - glUniform1ui(textureEnvBufferColorLoc, regs[PICA::InternalRegs::TexEnvBufferColor]); } void Renderer::bindTexturesToSlots() { From feacb9359d4a7dc789602ec05877e5f1a2b05632 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 5 Jul 2023 02:31:08 +0300 Subject: [PATCH 08/13] Formatting --- include/renderer_gl/renderer_gl.hpp | 2 ++ src/core/renderer_gl/renderer_gl.cpp | 12 ++++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index ff8e3a69..4fe0a37c 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -30,6 +30,8 @@ class Renderer { GLint textureEnvCombinerLoc = -1; GLint textureEnvColorLoc = -1; GLint textureEnvScaleLoc = -1; + + // Uniform of PICA registers GLint picaRegLoc = -1; // Depth configuration uniform locations diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index bacc6751..917075e0 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -335,7 +335,7 @@ const char* fragmentShader = R"( bool error_unimpl = false; - for (uint i = 0; i < GPUREG_LIGHTING_NUM_LIGHTS; i++){ + for (uint i = 0; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) { uint light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION,int(i*3),3); uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140 + 0x10 * light_id); @@ -358,8 +358,8 @@ const char* fragmentShader = R"( vec3 half_vector = normalize(normalize(light_vector) + view); - for(int c = 0; c < 7; c++){ - if(bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0){ + for (int c = 0; c < 7; c++) { + if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0){ uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3); float scale = float(1u << scale_id); if (scale_id >= 6u) @@ -399,15 +399,15 @@ const char* fragmentShader = R"( d[D1_LUT] = 0.0; d[FR_LUT] = 0.0; d[RG_LUT]= d[RB_LUT] = d[RR_LUT]; - } else if(lookup_config == 1) { + } else if (lookup_config == 1) { d[D0_LUT] = 0.0; d[D1_LUT] = 0.0; d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; - } else if(lookup_config == 2) { + } else if (lookup_config == 2) { d[FR_LUT] = 0.0; d[SP_LUT] = 0.0; d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; - } else if(lookup_config == 3) { + } else if (lookup_config == 3) { d[SP_LUT] = 0.0; d[RG_LUT]= d[RB_LUT] = d[RR_LUT] = 1.0; } else if (lookup_config == 4) { From d80785cbb5271e7bd6ddb8e82c188a2256acb7a3 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 5 Jul 2023 18:23:14 +0300 Subject: [PATCH 09/13] [OpenGL] More efficient colour mask handling --- include/gl_state.hpp | 15 +++++++++++++++ src/core/renderer_gl/renderer_gl.cpp | 3 ++- src/gl_state.cpp | 6 ++++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/include/gl_state.hpp b/include/gl_state.hpp index 6a495c72..fe0eba60 100644 --- a/include/gl_state.hpp +++ b/include/gl_state.hpp @@ -22,12 +22,16 @@ struct GLStateManager { bool depthEnabled; bool scissorEnabled; + // Colour mask + bool redMask, greenMask, blueMask, alphaMask; + GLuint boundVAO; GLuint boundVBO; GLuint currentProgram; void reset(); void resetBlend(); + void resetColourMask(); void resetDepth(); void resetVAO(); void resetVBO(); @@ -97,6 +101,17 @@ struct GLStateManager { } } + void setColourMask(bool r, bool g, bool b, bool a) { + if (r != redMask || g != greenMask || b != blueMask || a != alphaMask) { + r = redMask; + g = greenMask; + b = blueMask; + a = alphaMask; + + glColorMask(r, g, b, a); + } + } + void bindVAO(const OpenGL::VertexArray& vao) { bindVAO(vao.handle()); } void bindVBO(const OpenGL::VertexBuffer& vbo) { bindVBO(vbo.handle()); } void useProgram(const OpenGL::Program& program) { useProgram(program.handle()); } diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 917075e0..9ca9f2d1 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -850,7 +850,7 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span ver const bool depthWriteEnable = getBit<12>(depthControl); const int depthFunc = getBits<4, 3>(depthControl); const int colourMask = getBits<8, 4>(depthControl); - glColorMask(colourMask & 1, colourMask & 2, colourMask & 4, colourMask & 8); + gl.setColourMask(colourMask & 1, colourMask & 2, colourMask & 4, colourMask & 8); static constexpr std::array depthModes = { GL_NEVER, GL_ALWAYS, GL_EQUAL, GL_NOTEQUAL, GL_LESS, GL_LEQUAL, GL_GREATER, GL_GEQUAL @@ -1012,6 +1012,7 @@ void Renderer::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 gl.disableBlend(); gl.disableDepth(); gl.disableScissor(); + gl.setColourMask(true, true, true, true); gl.useProgram(displayProgram); gl.bindVAO(dummyVAO); diff --git a/src/gl_state.cpp b/src/gl_state.cpp index 030797a4..2f286f70 100644 --- a/src/gl_state.cpp +++ b/src/gl_state.cpp @@ -5,6 +5,11 @@ void GLStateManager::resetBlend() { OpenGL::disableBlend(); } +void GLStateManager::resetColourMask() { + redMask = greenMask = blueMask = alphaMask = true; + glColorMask(redMask, greenMask, blueMask, alphaMask); +} + void GLStateManager::resetDepth() { depthEnabled = false; OpenGL::disableDepth(); @@ -33,6 +38,7 @@ void GLStateManager::resetProgram() { void GLStateManager::reset() { resetBlend(); + resetColourMask(); resetDepth(); resetVAO(); From 7307bd270b11c947c5e474598d52f319a2c185d1 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 5 Jul 2023 18:54:09 +0300 Subject: [PATCH 10/13] [OpenGL] Same for depth mask --- include/gl_state.hpp | 12 ++++++++++-- include/opengl.hpp | 2 ++ src/core/renderer_gl/renderer_gl.cpp | 4 ++-- src/gl_state.cpp | 5 ++++- 4 files changed, 18 insertions(+), 5 deletions(-) diff --git a/include/gl_state.hpp b/include/gl_state.hpp index fe0eba60..793d2922 100644 --- a/include/gl_state.hpp +++ b/include/gl_state.hpp @@ -22,8 +22,9 @@ struct GLStateManager { bool depthEnabled; bool scissorEnabled; - // Colour mask + // Colour/depth masks bool redMask, greenMask, blueMask, alphaMask; + bool depthMask; GLuint boundVAO; GLuint boundVBO; @@ -108,7 +109,14 @@ struct GLStateManager { b = blueMask; a = alphaMask; - glColorMask(r, g, b, a); + OpenGL::setColourMask(r, g, b, a); + } + } + + void setDepthMask(bool mask) { + if (depthMask != mask) { + depthMask = mask; + OpenGL::setDepthMask(mask); } } diff --git a/include/opengl.hpp b/include/opengl.hpp index b85347bf..f8328799 100644 --- a/include/opengl.hpp +++ b/include/opengl.hpp @@ -528,6 +528,8 @@ namespace OpenGL { static void disableClipPlane(GLuint index) { glDisable(GL_CLIP_DISTANCE0 + index); } static void setDepthFunc(DepthFunc func) { glDepthFunc(static_cast(func)); } + static void setColourMask(GLboolean r, GLboolean g, GLboolean b, GLboolean a) { glColorMask(r, g, b, a); } + static void setDepthMask(GLboolean mask) { glDepthMask(mask); } enum Primitives { Triangle = GL_TRIANGLES, diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 9ca9f2d1..0dd5d58e 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -896,14 +896,14 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span ver // Because it attaches a depth texture to the aforementioned colour buffer if (depthEnable) { gl.enableDepth(); + gl.setDepthMask(depthWriteEnable ? GL_TRUE : GL_FALSE); glDepthFunc(depthModes[depthFunc]); - glDepthMask(depthWriteEnable ? GL_TRUE : GL_FALSE); bindDepthBuffer(); } else { if (depthWriteEnable) { gl.enableDepth(); + gl.setDepthMask(GL_TRUE); glDepthFunc(GL_ALWAYS); - glDepthMask(GL_TRUE); bindDepthBuffer(); } else { gl.disableDepth(); diff --git a/src/gl_state.cpp b/src/gl_state.cpp index 2f286f70..472c16d6 100644 --- a/src/gl_state.cpp +++ b/src/gl_state.cpp @@ -7,12 +7,15 @@ void GLStateManager::resetBlend() { void GLStateManager::resetColourMask() { redMask = greenMask = blueMask = alphaMask = true; - glColorMask(redMask, greenMask, blueMask, alphaMask); + OpenGL::setColourMask(redMask, greenMask, blueMask, alphaMask); } void GLStateManager::resetDepth() { depthEnabled = false; + depthMask = true; + OpenGL::disableDepth(); + OpenGL::setDepthMask(true); } void GLStateManager::resetScissor() { From 52c7783ae1b8051aa34c1b0faf907ac69c84af09 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 5 Jul 2023 19:08:13 +0300 Subject: [PATCH 11/13] [OpenGL] Same for depth func --- include/gl_state.hpp | 17 ++++++++++++++--- src/core/renderer_gl/renderer_gl.cpp | 4 ++-- src/gl_state.cpp | 2 ++ 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/include/gl_state.hpp b/include/gl_state.hpp index 793d2922..82531c7a 100644 --- a/include/gl_state.hpp +++ b/include/gl_state.hpp @@ -30,6 +30,8 @@ struct GLStateManager { GLuint boundVBO; GLuint currentProgram; + GLenum depthFunc; + void reset(); void resetBlend(); void resetColourMask(); @@ -102,6 +104,10 @@ struct GLStateManager { } } + void bindVAO(const OpenGL::VertexArray& vao) { bindVAO(vao.handle()); } + void bindVBO(const OpenGL::VertexBuffer& vbo) { bindVBO(vbo.handle()); } + void useProgram(const OpenGL::Program& program) { useProgram(program.handle()); } + void setColourMask(bool r, bool g, bool b, bool a) { if (r != redMask || g != greenMask || b != blueMask || a != alphaMask) { r = redMask; @@ -120,9 +126,14 @@ struct GLStateManager { } } - void bindVAO(const OpenGL::VertexArray& vao) { bindVAO(vao.handle()); } - void bindVBO(const OpenGL::VertexBuffer& vbo) { bindVBO(vbo.handle()); } - void useProgram(const OpenGL::Program& program) { useProgram(program.handle()); } + void setDepthFunc(GLenum func) { + if (depthFunc != func) { + depthFunc = func; + glDepthFunc(func); + } + } + + void setDepthFunc(OpenGL::DepthFunc func) { setDepthFunc(static_cast(func)); } }; static_assert(std::is_trivially_constructible(), "OpenGL State Manager class is not trivially constructible!"); diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 0dd5d58e..acc31936 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -897,13 +897,13 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span ver if (depthEnable) { gl.enableDepth(); gl.setDepthMask(depthWriteEnable ? GL_TRUE : GL_FALSE); - glDepthFunc(depthModes[depthFunc]); + gl.setDepthFunc(depthModes[depthFunc]); bindDepthBuffer(); } else { if (depthWriteEnable) { gl.enableDepth(); gl.setDepthMask(GL_TRUE); - glDepthFunc(GL_ALWAYS); + gl.setDepthFunc(GL_ALWAYS); bindDepthBuffer(); } else { gl.disableDepth(); diff --git a/src/gl_state.cpp b/src/gl_state.cpp index 472c16d6..612ae44d 100644 --- a/src/gl_state.cpp +++ b/src/gl_state.cpp @@ -13,9 +13,11 @@ void GLStateManager::resetColourMask() { void GLStateManager::resetDepth() { depthEnabled = false; depthMask = true; + depthFunc = GL_LESS; OpenGL::disableDepth(); OpenGL::setDepthMask(true); + OpenGL::setDepthFunc(OpenGL::DepthFunc::Less); } void GLStateManager::resetScissor() { From 53db56ad9da841aef37e3209746497229e0b4f2a Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 5 Jul 2023 21:48:30 +0300 Subject: [PATCH 12/13] [GPU] Set lightingLUTDirty to true on reset because paranoia --- src/core/PICA/gpu.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index a3163f86..51e9ab69 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -23,6 +23,7 @@ void GPU::reset() { shaderJIT.reset(); std::memset(vram, 0, vramSize); lightingLUT.fill(0); + lightingLUTDirty = true; totalAttribCount = 0; fixedAttribMask = 0; From 883e1d4bd0e22e4948cc5a29f4673887dc929d13 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 5 Jul 2023 22:16:15 +0300 Subject: [PATCH 13/13] Madness trying to optimize assembly by reordering statements --- src/core/renderer_gl/renderer_gl.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index acc31936..72f346bc 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -805,13 +805,14 @@ void Renderer::bindTexturesToSlots() { glActiveTexture(GL_TEXTURE0); } -void Renderer::updateLightingLUT(){ +void Renderer::updateLightingLUT() { + gpu.lightingLUTDirty = false; std::array u16_lightinglut; - for(int i = 0; i < gpu.lightingLUT.size(); i++){ + for (int i = 0; i < gpu.lightingLUT.size(); i++) { uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1); u16_lightinglut[i] = value * 65535 / 4095; - } + } glActiveTexture(GL_TEXTURE0 + 3); glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); @@ -821,7 +822,6 @@ void Renderer::updateLightingLUT(){ glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); glActiveTexture(GL_TEXTURE0); - gpu.lightingLUTDirty = false; } void Renderer::drawVertices(PICA::PrimType primType, std::span vertices) {