diff --git a/CMakeLists.txt b/CMakeLists.txt index f5edc420..7ce91389 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -83,8 +83,8 @@ else() message(FATAL_ERROR "Currently unsupported CPU architecture") endif() -set(SOURCE_FILES src/main.cpp src/emulator.cpp src/io_file.cpp src/core/CPU/cpu_dynarmic.cpp src/core/CPU/dynarmic_cycles.cpp - src/core/memory.cpp +set(SOURCE_FILES src/main.cpp src/emulator.cpp src/io_file.cpp src/gl_state.cpp src/core/CPU/cpu_dynarmic.cpp + src/core/CPU/dynarmic_cycles.cpp src/core/memory.cpp ) set(CRYPTO_SOURCE_FILES src/core/crypto/aes_engine.cpp) set(KERNEL_SOURCE_FILES src/core/kernel/kernel.cpp src/core/kernel/resource_limits.cpp @@ -138,7 +138,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/opengl.hpp inc include/PICA/dynapica/shader_rec_emitter_x64.hpp include/PICA/pica_hash.hpp include/result/result.hpp include/result/result_common.hpp include/result/result_fs.hpp include/result/result_fnd.hpp include/result/result_gsp.hpp include/result/result_kernel.hpp include/result/result_os.hpp - include/crypto/aes_engine.hpp include/metaprogramming.hpp include/PICA/pica_vertex.hpp + include/crypto/aes_engine.hpp include/metaprogramming.hpp include/PICA/pica_vertex.hpp include/gl_state.hpp ) set(THIRD_PARTY_SOURCE_FILES third_party/imgui/imgui.cpp diff --git a/include/PICA/gpu.hpp b/include/PICA/gpu.hpp index ced2c557..5bc06c47 100644 --- a/include/PICA/gpu.hpp +++ b/include/PICA/gpu.hpp @@ -28,7 +28,7 @@ class GPU { std::array currentAttributes; // Vertex attributes before being passed to the shader std::array immediateModeAttributes; // Vertex attributes uploaded via immediate mode submission - std::array immediateModeVertices; + std::array immediateModeVertices; uint immediateModeVertIndex; uint immediateModeAttrIndex; // Index of the immediate mode attribute we're uploading @@ -68,7 +68,7 @@ class GPU { u32* cmdBuffCurr = nullptr; Renderer renderer; - PicaVertex getImmediateModeVertex(); + PICA::Vertex getImmediateModeVertex(); public: // 256 entries per LUT with each LUT as its own row forming a 2D image 256 * LUT_COUNT @@ -81,7 +81,7 @@ class GPU { // Set to false by the renderer when the lighting_lut is uploaded ot the GPU bool lightingLUTDirty = false; - GPU(Memory& mem); + GPU(Memory& mem, GLStateManager& gl); void initGraphicsContext() { renderer.initGraphicsContext(); } void getGraphicsContext() { renderer.getGraphicsContext(); } void display() { renderer.display(); } diff --git a/include/PICA/pica_vertex.hpp b/include/PICA/pica_vertex.hpp index ea90796b..800dff9a 100644 --- a/include/PICA/pica_vertex.hpp +++ b/include/PICA/pica_vertex.hpp @@ -2,35 +2,37 @@ #include "PICA/float_types.hpp" #include -// A representation of the output vertex as it comes out of the vertex shader, with padding and all -struct PicaVertex { - using vec2f = std::array; - using vec3f = std::array; - using vec4f = std::array; +namespace PICA { + // A representation of the output vertex as it comes out of the vertex shader, with padding and all + struct Vertex { + using vec2f = std::array; + using vec3f = std::array; + using vec4f = std::array; - union { - struct { - vec4f positions; // Vertex position - vec4f quaternion; // Quaternion specifying the normal/tangent frame (for fragment lighting) - vec4f colour; // Vertex color - vec2f texcoord0; // Texcoords for texture unit 0 (Only U and V, W is stored separately for 3D textures!) - vec2f texcoord1; // Texcoords for TU 1 - Floats::f24 texcoord0_w; // W component for texcoord 0 if using a 3D texture - u32 padding; // Unused + union { + struct { + vec4f positions; // Vertex position + vec4f quaternion; // Quaternion specifying the normal/tangent frame (for fragment lighting) + vec4f colour; // Vertex color + vec2f texcoord0; // Texcoords for texture unit 0 (Only U and V, W is stored separately for 3D textures!) + vec2f texcoord1; // Texcoords for TU 1 + Floats::f24 texcoord0_w; // W component for texcoord 0 if using a 3D texture + u32 padding; // Unused - vec3f view; // View vector (for fragment lighting) - u32 padding2; // Unused - vec2f texcoord2; // Texcoords for TU 2 - } s; + vec3f view; // View vector (for fragment lighting) + u32 padding2; // Unused + vec2f texcoord2; // Texcoords for TU 2 + } s; - // The software, non-accelerated vertex loader writes here and then reads specific components from the above struct - Floats::f24 raw[0x20]; + // The software, non-accelerated vertex loader writes here and then reads specific components from the above struct + Floats::f24 raw[0x20]; + }; + Vertex() {} }; - PicaVertex() {} -}; +} // namespace PICA // Float is used here instead of Floats::f24 to ensure that Floats::f24 is properly sized for direct interpretations as a float by the render backend -#define ASSERT_POS(member, pos) static_assert(offsetof(PicaVertex, s.member) == pos * sizeof(float), "PicaVertex struct is broken!"); +#define ASSERT_POS(member, pos) static_assert(offsetof(PICA::Vertex, s.member) == pos * sizeof(float), "PICA::Vertex struct is broken!"); ASSERT_POS(positions, 0) ASSERT_POS(quaternion, 4) diff --git a/include/emulator.hpp b/include/emulator.hpp index 10279443..7cbc27b7 100644 --- a/include/emulator.hpp +++ b/include/emulator.hpp @@ -11,7 +11,7 @@ #include "crypto/aes_engine.hpp" #include "io_file.hpp" #include "memory.hpp" -#include "opengl.hpp" +#include "gl_state.hpp" enum class ROMType { None, ELF, NCSD }; @@ -22,6 +22,7 @@ class Emulator { Kernel kernel; Crypto::AESEngine aesEngine; + GLStateManager gl; SDL_Window* window; SDL_GLContext glContext; SDL_GameController* gameController; @@ -56,5 +57,5 @@ class Emulator { bool loadNCSD(const std::filesystem::path& path); bool loadELF(const std::filesystem::path& path); bool loadELF(std::ifstream& file); - void initGraphicsContext() { gpu.initGraphicsContext(); } + void initGraphicsContext(); }; diff --git a/include/gl_state.hpp b/include/gl_state.hpp new file mode 100644 index 00000000..82531c7a --- /dev/null +++ b/include/gl_state.hpp @@ -0,0 +1,140 @@ +#pragma once +#include + +#include "opengl.hpp" + +// GL state manager object for use in the OpenGL GPU renderer and potentially other things in the future (such as a potential ImGui GUI) +// This object is meant to help us avoid duplicate OpenGL calls (such as binding the same program twice, enabling/disabling a setting twice, etc) +// by checking if we actually *need* a state change. This is meant to avoid expensive driver calls and minimize unneeded state changes +// A lot of code is in the header file instead of the relevant source file to make sure stuff gets inlined even without LTO, and +// because this header should ideally not be getting included in too many places +// Code that does not need inlining however, like the reset() function should be in gl_state.cpp +// This state manager may not handle every aspect of OpenGL, in which case anything not handled here should just be manipulated with raw +// OpenGL/opengl.hpp calls However, anything that can be handled through the state manager should, or at least there should be an attempt to keep it +// consistent with the current GL state to avoid bugs/suboptimal code. + +// The state manager must *also* be a trivially constructible/destructible type, to ensure that no OpenGL functions get called sneakily without us +// knowing. This is important for when we want to eg add a Vulkan or misc backend. Would definitely not want to refactor all this. So we try to be as +// backend-agnostic as possible + +struct GLStateManager { + bool blendEnabled; + bool depthEnabled; + bool scissorEnabled; + + // Colour/depth masks + bool redMask, greenMask, blueMask, alphaMask; + bool depthMask; + + GLuint boundVAO; + GLuint boundVBO; + GLuint currentProgram; + + GLenum depthFunc; + + void reset(); + void resetBlend(); + void resetColourMask(); + void resetDepth(); + void resetVAO(); + void resetVBO(); + void resetProgram(); + void resetScissor(); + + void enableDepth() { + if (!depthEnabled) { + depthEnabled = true; + OpenGL::enableDepth(); + } + } + + void disableDepth() { + if (depthEnabled) { + depthEnabled = false; + OpenGL::disableDepth(); + } + } + + void enableBlend() { + if (!blendEnabled) { + blendEnabled = true; + OpenGL::enableBlend(); + } + } + + void disableBlend() { + if (blendEnabled) { + blendEnabled = false; + OpenGL::disableBlend(); + } + } + + void enableScissor() { + if (!scissorEnabled) { + scissorEnabled = true; + OpenGL::enableScissor(); + } + } + + void disableScissor() { + if (scissorEnabled) { + scissorEnabled = false; + OpenGL::disableScissor(); + } + } + + void bindVAO(GLuint handle) { + if (boundVAO != handle) { + boundVAO = handle; + glBindVertexArray(handle); + } + } + + void bindVBO(GLuint handle) { + if (boundVBO != handle) { + boundVBO = handle; + glBindBuffer(GL_ARRAY_BUFFER, handle); + } + } + + void useProgram(GLuint handle) { + if (currentProgram != handle) { + currentProgram = handle; + glUseProgram(handle); + } + } + + void bindVAO(const OpenGL::VertexArray& vao) { bindVAO(vao.handle()); } + void bindVBO(const OpenGL::VertexBuffer& vbo) { bindVBO(vbo.handle()); } + void useProgram(const OpenGL::Program& program) { useProgram(program.handle()); } + + void setColourMask(bool r, bool g, bool b, bool a) { + if (r != redMask || g != greenMask || b != blueMask || a != alphaMask) { + r = redMask; + g = greenMask; + b = blueMask; + a = alphaMask; + + OpenGL::setColourMask(r, g, b, a); + } + } + + void setDepthMask(bool mask) { + if (depthMask != mask) { + depthMask = mask; + OpenGL::setDepthMask(mask); + } + } + + void setDepthFunc(GLenum func) { + if (depthFunc != func) { + depthFunc = func; + glDepthFunc(func); + } + } + + void setDepthFunc(OpenGL::DepthFunc func) { setDepthFunc(static_cast(func)); } +}; + +static_assert(std::is_trivially_constructible(), "OpenGL State Manager class is not trivially constructible!"); +static_assert(std::is_trivially_destructible(), "OpenGL State Manager class is not trivially destructible!"); \ No newline at end of file diff --git a/include/opengl.hpp b/include/opengl.hpp index b259381b..f8328799 100644 --- a/include/opengl.hpp +++ b/include/opengl.hpp @@ -128,9 +128,9 @@ namespace OpenGL { #ifdef OPENGL_DESTRUCTORS ~VertexArray() { free(); } #endif - GLuint handle() { return m_handle; } - bool exists() { return m_handle != 0; } - void bind() { glBindVertexArray(m_handle); } + GLuint handle() const { return m_handle; } + bool exists() const { return m_handle != 0; } + void bind() const { glBindVertexArray(m_handle); } template void setAttributeFloat(GLuint index, GLint size, GLsizei stride, const void* offset, bool normalized = GL_FALSE) { @@ -299,11 +299,11 @@ namespace OpenGL { #ifdef OPENGL_DESTRUCTORS ~Texture() { free(); } #endif - GLuint handle() { return m_handle; } - bool exists() { return m_handle != 0; } - void bind() { glBindTexture(m_binding, m_handle); } - int width() { return m_width; } - int height() { return m_height; } + GLuint handle() const { return m_handle; } + bool exists() const { return m_handle != 0; } + void bind() const { glBindTexture(m_binding, m_handle); } + int width() const { return m_width; } + int height() const { return m_height; } void free() { glDeleteTextures(1, &m_handle); } }; @@ -327,10 +327,10 @@ namespace OpenGL { #ifdef OPENGL_DESTRUCTORS ~Framebuffer() { free(); } #endif - GLuint handle() { return m_handle; } - bool exists() { return m_handle != 0; } - void bind(GLenum target) { glBindFramebuffer(target, m_handle); } - void bind(FramebufferTypes target) { bind(static_cast(target)); } + GLuint handle() const { return m_handle; } + bool exists() const { return m_handle != 0; } + void bind(GLenum target) const { glBindFramebuffer(target, m_handle); } + void bind(FramebufferTypes target) const { bind(static_cast(target)); } void free() { glDeleteFramebuffers(1, &m_handle); } void createWithTexture(Texture& tex, GLenum mode = GL_FRAMEBUFFER, GLenum textureType = GL_TEXTURE_2D) { @@ -392,8 +392,8 @@ namespace OpenGL { return m_handle != 0; } - GLuint handle() { return m_handle; } - bool exists() { return m_handle != 0; } + GLuint handle() const { return m_handle; } + bool exists() const { return m_handle != 0; } }; struct Program { @@ -421,9 +421,9 @@ namespace OpenGL { return m_handle != 0; } - GLuint handle() { return m_handle; } - bool exists() { return m_handle != 0; } - void use() { glUseProgram(m_handle); } + GLuint handle() const { return m_handle; } + bool exists() const { return m_handle != 0; } + void use() const { glUseProgram(m_handle); } }; static void dispatchCompute(GLuint groupsX = 1, GLuint groupsY = 1, GLuint groupsZ = 1) { @@ -454,9 +454,9 @@ namespace OpenGL { #ifdef OPENGL_DESTRUCTORS ~VertexBuffer() { free(); } #endif - GLuint handle() { return m_handle; } - bool exists() { return m_handle != 0; } - void bind() { glBindBuffer(GL_ARRAY_BUFFER, m_handle); } + GLuint handle() const { return m_handle; } + bool exists() const { return m_handle != 0; } + void bind() const { glBindBuffer(GL_ARRAY_BUFFER, m_handle); } void free() { glDeleteBuffers(1, &m_handle); } // Reallocates the buffer on every call. Prefer the sub version if possible. @@ -528,6 +528,8 @@ namespace OpenGL { static void disableClipPlane(GLuint index) { glDisable(GL_CLIP_DISTANCE0 + index); } static void setDepthFunc(DepthFunc func) { glDepthFunc(static_cast(func)); } + static void setColourMask(GLboolean r, GLboolean g, GLboolean b, GLboolean a) { glColorMask(r, g, b, a); } + static void setDepthMask(GLboolean mask) { glDepthMask(mask); } enum Primitives { Triangle = GL_TRIANGLES, @@ -667,23 +669,23 @@ namespace OpenGL { // We're never supporting 3D rectangles, because rectangles were never meant to be 3D in the first place // x, y: Coords of the top left vertex // width, height: Dimensions of the rectangle. Initialized to 0 if not specified. - template - struct Rectangle { - T x, y, width, height; + template + struct Rectangle { + T x, y, width, height; - std::pair topLeft() { return std::make_pair(x, y); } - std::pair topRight() { return std::make_pair(x + width, y); } - std::pair bottomLeft() { return std::make_pair(x, y + height); } - std::pair bottomRight() { return std::make_pair(x + width, y + height); } + std::pair topLeft() const { return std::make_pair(x, y); } + std::pair topRight() const { return std::make_pair(x + width, y); } + std::pair bottomLeft() const { return std::make_pair(x, y + height); } + std::pair bottomRight() const { return std::make_pair(x + width, y + height); } - Rectangle() : x(0), y(0), width(0), height(0) {} - Rectangle(T x, T y, T width, T height) : x(x), y(y), width(width), height(height) {} + Rectangle() : x(0), y(0), width(0), height(0) {} + Rectangle(T x, T y, T width, T height) : x(x), y(y), width(width), height(height) {} - bool isEmpty() { return width == 0 && height == 0; } - bool isLine() { return (width == 0 && height != 0) || (width != 0 && height == 0); } + bool isEmpty() const { return width == 0 && height == 0; } + bool isLine() const { return (width == 0 && height != 0) || (width != 0 && height == 0); } - void setEmpty() { x = y = width = height = 0; } - }; + void setEmpty() { x = y = width = height = 0; } + }; using Rect = Rectangle; diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index bf85904b..4fe0a37c 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -3,9 +3,9 @@ #include #include "PICA/float_types.hpp" +#include "gl_state.hpp" #include "helpers.hpp" #include "logger.hpp" -#include "opengl.hpp" #include "surface_cache.hpp" #include "textures.hpp" #include "PICA/regs.hpp" @@ -16,13 +16,13 @@ class GPU; class Renderer { GPU& gpu; + GLStateManager& gl; + OpenGL::Program triangleProgram; OpenGL::Program displayProgram; OpenGL::VertexArray vao; OpenGL::VertexBuffer vbo; - GLint alphaControlLoc = -1; - GLint texUnitConfigLoc = -1; // TEV configuration uniform locations GLint textureEnvSourceLoc = -1; @@ -30,8 +30,8 @@ class Renderer { GLint textureEnvCombinerLoc = -1; GLint textureEnvColorLoc = -1; GLint textureEnvScaleLoc = -1; - GLint textureEnvUpdateBufferLoc = -1; - GLint textureEnvBufferColorLoc = -1; + + // Uniform of PICA registers GLint picaRegLoc = -1; // Depth configuration uniform locations @@ -39,9 +39,6 @@ class Renderer { GLint depthScaleLoc = -1; GLint depthmapEnableLoc = -1; - u32 oldAlphaControl = 0; - u32 oldTexUnitConfig = 0; - float oldDepthScale = -1.0; float oldDepthOffset = 0.0; bool oldDepthmapEnable = false; @@ -81,7 +78,7 @@ class Renderer { void updateLightingLUT(); public: - Renderer(GPU& gpu, const std::array& internalRegs) : gpu(gpu), regs(internalRegs) {} + Renderer(GPU& gpu, GLStateManager& gl, const std::array& internalRegs) : gpu(gpu), gl(gl), regs(internalRegs) {} void reset(); void display(); // Display the 3DS screen contents to the window @@ -89,7 +86,7 @@ class Renderer { void getGraphicsContext(); // Set up graphics context for rendering void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control); // Clear a GPU buffer in VRAM void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags); // Perform display transfer - void drawVertices(PICA::PrimType primType, std::span vertices); // Draw the given vertices + void drawVertices(PICA::PrimType primType, std::span vertices); // Draw the given vertices void setFBSize(u32 width, u32 height) { fbSize.x() = width; diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index 2efc4195..51e9ab69 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -10,8 +10,9 @@ using namespace Floats; - -GPU::GPU(Memory& mem) : mem(mem), renderer(*this, regs) { +// Note: For when we have multiple backends, the GL state manager can stay here and have the constructor for the Vulkan-or-whatever renderer ignore it +// Thus, our GLStateManager being here does not negatively impact renderer-agnosticness +GPU::GPU(Memory& mem, GLStateManager& gl) : mem(mem), renderer(*this, gl, regs) { vram = new u8[vramSize]; mem.setVRAM(vram); // Give the bus a pointer to our VRAM } @@ -22,6 +23,7 @@ void GPU::reset() { shaderJIT.reset(); std::memset(vram, 0, vramSize); lightingLUT.fill(0); + lightingLUTDirty = true; totalAttribCount = 0; fixedAttribMask = 0; @@ -60,7 +62,7 @@ void GPU::drawArrays(bool indexed) { } } -static std::array vertices; +static std::array vertices; template void GPU::drawArrays() { @@ -248,7 +250,7 @@ void GPU::drawArrays() { shaderUnit.vs.run(); } - PicaVertex& out = vertices[i]; + PICA::Vertex& out = vertices[i]; // Map shader outputs to fixed function properties const u32 totalShaderOutputs = regs[PICA::InternalRegs::ShaderOutputCount] & 7; for (int i = 0; i < totalShaderOutputs; i++) { @@ -264,8 +266,8 @@ void GPU::drawArrays() { renderer.drawVertices(primType, std::span(vertices).first(vertexCount)); } -PicaVertex GPU::getImmediateModeVertex() { - PicaVertex v; +PICA::Vertex GPU::getImmediateModeVertex() { + PICA::Vertex v; const int totalAttrCount = (regs[PICA::InternalRegs::VertexShaderAttrNum] & 0xf) + 1; // Copy immediate mode attributes to vertex shader unit diff --git a/src/core/PICA/regs.cpp b/src/core/PICA/regs.cpp index a0eb5adc..f62040dd 100644 --- a/src/core/PICA/regs.cpp +++ b/src/core/PICA/regs.cpp @@ -188,7 +188,7 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { immediateModeAttributes[immediateModeAttrIndex++] = attr; if (immediateModeAttrIndex == totalAttrCount) { - PicaVertex v = getImmediateModeVertex(); + PICA::Vertex v = getImmediateModeVertex(); immediateModeAttrIndex = 0; immediateModeVertices[immediateModeVertIndex++] = v; diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 589457f5..72f346bc 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -34,12 +34,11 @@ const char* vertexShader = R"( // TEV uniforms uniform uint u_textureEnvColor[6]; - uniform uint u_textureEnvBufferColor; - uniform uint u_picaRegs[0x200 - 0x47]; + uniform uint u_picaRegs[0x200 - 0x48]; // Helper so that the implementation of u_pica_regs can be changed later uint readPicaReg(uint reg_addr){ - return u_picaRegs[reg_addr - 0x47]; + return u_picaRegs[reg_addr - 0x48]; } vec4 abgr8888ToVec4(uint abgr) { @@ -96,7 +95,7 @@ const char* vertexShader = R"( v_textureEnvColor[i] = abgr8888ToVec4(u_textureEnvColor[i]); } - v_textureEnvBufferColor = abgr8888ToVec4(u_textureEnvBufferColor); + v_textureEnvBufferColor = abgr8888ToVec4(readPicaReg(0xFD)); // Parse clipping plane registers // The plane registers describe a clipping plane in the form of Ax + By + Cz + D = 0 @@ -131,15 +130,11 @@ const char* fragmentShader = R"( out vec4 fragColour; - uniform uint u_alphaControl; - uniform uint u_textureConfig; - // TEV uniforms uniform uint u_textureEnvSource[6]; uniform uint u_textureEnvOperand[6]; uniform uint u_textureEnvCombiner[6]; uniform uint u_textureEnvScale[6]; - uniform uint u_textureEnvUpdateBuffer; // Depth control uniforms uniform float u_depthScale; @@ -151,11 +146,11 @@ const char* fragmentShader = R"( uniform sampler2D u_tex2; uniform sampler1DArray u_tex_lighting_lut; - uniform uint u_picaRegs[0x200 - 0x47]; + uniform uint u_picaRegs[0x200 - 0x48]; // Helper so that the implementation of u_pica_regs can be changed later uint readPicaReg(uint reg_addr){ - return u_picaRegs[reg_addr - 0x47]; + return u_picaRegs[reg_addr - 0x48]; } vec4 tevSources[16]; @@ -340,7 +335,7 @@ const char* fragmentShader = R"( bool error_unimpl = false; - for (uint i = 0; i < GPUREG_LIGHTING_NUM_LIGHTS; i++){ + for (uint i = 0; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) { uint light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION,int(i*3),3); uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140 + 0x10 * light_id); @@ -363,8 +358,8 @@ const char* fragmentShader = R"( vec3 half_vector = normalize(normalize(light_vector) + view); - for(int c = 0; c < 7; c++){ - if(bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0){ + for (int c = 0; c < 7; c++) { + if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0){ uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3); float scale = float(1u << scale_id); if (scale_id >= 6u) @@ -404,15 +399,15 @@ const char* fragmentShader = R"( d[D1_LUT] = 0.0; d[FR_LUT] = 0.0; d[RG_LUT]= d[RB_LUT] = d[RR_LUT]; - } else if(lookup_config == 1) { + } else if (lookup_config == 1) { d[D0_LUT] = 0.0; d[D1_LUT] = 0.0; d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; - } else if(lookup_config == 2) { + } else if (lookup_config == 2) { d[FR_LUT] = 0.0; d[SP_LUT] = 0.0; d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; - } else if(lookup_config == 3) { + } else if (lookup_config == 3) { d[SP_LUT] = 0.0; d[RG_LUT]= d[RB_LUT] = d[RR_LUT] = 1.0; } else if (lookup_config == 4) { @@ -453,20 +448,22 @@ const char* fragmentShader = R"( } void main() { - vec2 tex2UV = (u_textureConfig & (1u << 13)) != 0u ? v_texcoord1 : v_texcoord2; - // TODO: what do invalid sources and disabled textures read as? // And what does the "previous combiner" source read initially? tevSources[0] = v_colour; // Primary/vertex color calcLighting(tevSources[1],tevSources[2]); - if ((u_textureConfig & 1u) != 0u) tevSources[3] = texture(u_tex0, v_texcoord0.xy); - if ((u_textureConfig & 2u) != 0u) tevSources[4] = texture(u_tex1, v_texcoord1); - if ((u_textureConfig & 4u) != 0u) tevSources[5] = texture(u_tex2, tex2UV); + uint textureConfig = readPicaReg(0x80); + vec2 tex2UV = (textureConfig & (1u << 13)) != 0u ? v_texcoord1 : v_texcoord2; + + if ((textureConfig & 1u) != 0u) tevSources[3] = texture(u_tex0, v_texcoord0.xy); + if ((textureConfig & 2u) != 0u) tevSources[4] = texture(u_tex1, v_texcoord1); + if ((textureConfig & 4u) != 0u) tevSources[5] = texture(u_tex2, tex2UV); tevSources[13] = vec4(0.0); // Previous buffer tevSources[15] = vec4(0.0); // Previous combiner tevNextPreviousBuffer = v_textureEnvBufferColor; + uint textureEnvUpdateBuffer = readPicaReg(0xE0); for (int i = 0; i < 6; i++) { tevSources[14] = v_textureEnvColor[i]; // Constant color @@ -474,11 +471,11 @@ const char* fragmentShader = R"( tevSources[13] = tevNextPreviousBuffer; if (i < 4) { - if ((u_textureEnvUpdateBuffer & (0x100u << i)) != 0u) { + if ((textureEnvUpdateBuffer & (0x100u << i)) != 0u) { tevNextPreviousBuffer.rgb = tevSources[15].rgb; } - if ((u_textureEnvUpdateBuffer & (0x1000u << i)) != 0u) { + if ((textureEnvUpdateBuffer & (0x1000u << i)) != 0u) { tevNextPreviousBuffer.a = tevSources[15].a; } } @@ -503,9 +500,11 @@ const char* fragmentShader = R"( // Write final fragment depth gl_FragDepth = depth; - if ((u_alphaControl & 1u) != 0u) { // Check if alpha test is on - uint func = (u_alphaControl >> 4u) & 7u; - float reference = float((u_alphaControl >> 8u) & 0xffu) / 255.0; + // Perform alpha test + uint alphaControl = readPicaReg(0x104); + if ((alphaControl & 1u) != 0u) { // Check if alpha test is on + uint func = (alphaControl >> 4u) & 7u; + float reference = float((alphaControl >> 8u) & 0xffu) / 255.0; float alpha = fragColour.a; switch (func) { @@ -592,21 +591,17 @@ void Renderer::reset() { if (triangleProgram.exists()) { const auto oldProgram = OpenGL::getProgram(); - triangleProgram.use(); - oldAlphaControl = 0; // Default alpha control to 0 - oldTexUnitConfig = 0; // Default tex unit config to 0 + gl.useProgram(triangleProgram); oldDepthScale = -1.0; // Default depth scale to -1.0, which is what games typically use oldDepthOffset = 0.0; // Default depth offset to 0 oldDepthmapEnable = false; // Enable w buffering - glUniform1ui(alphaControlLoc, oldAlphaControl); - glUniform1ui(texUnitConfigLoc, oldTexUnitConfig); glUniform1f(depthScaleLoc, oldDepthScale); glUniform1f(depthOffsetLoc, oldDepthOffset); glUniform1i(depthmapEnableLoc, oldDepthmapEnable); - glUseProgram(oldProgram); // Switch to old GL program + gl.useProgram(oldProgram); // Switch to old GL program } } @@ -614,18 +609,13 @@ void Renderer::initGraphicsContext() { OpenGL::Shader vert(vertexShader, OpenGL::Vertex); OpenGL::Shader frag(fragmentShader, OpenGL::Fragment); triangleProgram.create({ vert, frag }); - triangleProgram.use(); - - alphaControlLoc = OpenGL::uniformLocation(triangleProgram, "u_alphaControl"); - texUnitConfigLoc = OpenGL::uniformLocation(triangleProgram, "u_textureConfig"); + gl.useProgram(triangleProgram); textureEnvSourceLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvSource"); textureEnvOperandLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvOperand"); textureEnvCombinerLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvCombiner"); textureEnvColorLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvColor"); textureEnvScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvScale"); - textureEnvUpdateBufferLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvUpdateBuffer"); - textureEnvBufferColorLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvBufferColor"); depthScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_depthScale"); depthOffsetLoc = OpenGL::uniformLocation(triangleProgram, "u_depthOffset"); @@ -642,37 +632,37 @@ void Renderer::initGraphicsContext() { OpenGL::Shader fragDisplay(displayFragmentShader, OpenGL::Fragment); displayProgram.create({ vertDisplay, fragDisplay }); - displayProgram.use(); + gl.useProgram(displayProgram); glUniform1i(OpenGL::uniformLocation(displayProgram, "u_texture"), 0); // Init sampler object - vbo.createFixedSize(sizeof(PicaVertex) * vertexBufferSize, GL_STREAM_DRAW); - vbo.bind(); + vbo.createFixedSize(sizeof(Vertex) * vertexBufferSize, GL_STREAM_DRAW); + gl.bindVBO(vbo); vao.create(); - vao.bind(); + gl.bindVAO(vao); // Position (x, y, z, w) attributes - vao.setAttributeFloat(0, 4, sizeof(PicaVertex), offsetof(PicaVertex, s.positions)); + vao.setAttributeFloat(0, 4, sizeof(Vertex), offsetof(Vertex, s.positions)); vao.enableAttribute(0); // Quaternion attribute - vao.setAttributeFloat(1, 4, sizeof(PicaVertex), offsetof(PicaVertex, s.quaternion)); + vao.setAttributeFloat(1, 4, sizeof(Vertex), offsetof(Vertex, s.quaternion)); vao.enableAttribute(1); // Colour attribute - vao.setAttributeFloat(2, 4, sizeof(PicaVertex), offsetof(PicaVertex, s.colour)); + vao.setAttributeFloat(2, 4, sizeof(Vertex), offsetof(Vertex, s.colour)); vao.enableAttribute(2); // UV 0 attribute - vao.setAttributeFloat(3, 2, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord0)); + vao.setAttributeFloat(3, 2, sizeof(Vertex), offsetof(Vertex, s.texcoord0)); vao.enableAttribute(3); // UV 1 attribute - vao.setAttributeFloat(4, 2, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord1)); + vao.setAttributeFloat(4, 2, sizeof(Vertex), offsetof(Vertex, s.texcoord1)); vao.enableAttribute(4); // UV 0 W-component attribute - vao.setAttributeFloat(5, 1, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord0_w)); + vao.setAttributeFloat(5, 1, sizeof(Vertex), offsetof(Vertex, s.texcoord0_w)); vao.enableAttribute(5); // View - vao.setAttributeFloat(6, 3, sizeof(PicaVertex), offsetof(PicaVertex, s.view)); + vao.setAttributeFloat(6, 3, sizeof(Vertex), offsetof(Vertex, s.view)); vao.enableAttribute(6); // UV 2 attribute - vao.setAttributeFloat(7, 2, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord2)); + vao.setAttributeFloat(7, 2, sizeof(Vertex), offsetof(Vertex, s.texcoord2)); vao.enableAttribute(7); dummyVBO.create(); @@ -725,9 +715,9 @@ void Renderer::setupBlending() { }; if (!blendingEnabled) { - OpenGL::disableBlend(); + gl.disableBlend(); } else { - OpenGL::enableBlend(); + gl.enableBlend(); // Get blending equations const u32 blendControl = regs[PICA::InternalRegs::BlendFunc]; @@ -783,8 +773,6 @@ void Renderer::setupTextureEnvState() { glUniform1uiv(textureEnvCombinerLoc, 6, textureEnvCombinerRegs); glUniform1uiv(textureEnvColorLoc, 6, textureEnvColourRegs); glUniform1uiv(textureEnvScaleLoc, 6, textureEnvScaleRegs); - glUniform1ui(textureEnvUpdateBufferLoc, regs[PICA::InternalRegs::TexEnvUpdateBuffer]); - glUniform1ui(textureEnvBufferColorLoc, regs[PICA::InternalRegs::TexEnvBufferColor]); } void Renderer::bindTexturesToSlots() { @@ -815,21 +803,16 @@ void Renderer::bindTexturesToSlots() { glActiveTexture(GL_TEXTURE0 + 3); glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); glActiveTexture(GL_TEXTURE0); - - // Update the texture unit configuration uniform if it changed - const u32 texUnitConfig = regs[PICA::InternalRegs::TexUnitCfg]; - if (oldTexUnitConfig != texUnitConfig) { - oldTexUnitConfig = texUnitConfig; - glUniform1ui(texUnitConfigLoc, texUnitConfig); - } } -void Renderer::updateLightingLUT(){ + +void Renderer::updateLightingLUT() { + gpu.lightingLUTDirty = false; std::array u16_lightinglut; - for(int i = 0; i < gpu.lightingLUT.size(); i++){ + for (int i = 0; i < gpu.lightingLUT.size(); i++) { uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1); u16_lightinglut[i] = value * 65535 / 4095; - } + } glActiveTexture(GL_TEXTURE0 + 3); glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); @@ -839,31 +822,19 @@ void Renderer::updateLightingLUT(){ glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); glActiveTexture(GL_TEXTURE0); - gpu.lightingLUTDirty = false; } -void Renderer::drawVertices(PICA::PrimType primType, std::span vertices) { +void Renderer::drawVertices(PICA::PrimType primType, std::span vertices) { // The fourth type is meant to be "Geometry primitive". TODO: Find out what that is static constexpr std::array primTypes = { OpenGL::Triangle, OpenGL::TriangleStrip, OpenGL::TriangleFan, OpenGL::Triangle }; const auto primitiveTopology = primTypes[static_cast(primType)]; - // TODO: We should implement a GL state tracker that tracks settings like scissor, blending, bound program, etc - // This way if we attempt to eg do multiple glEnable(GL_BLEND) calls in a row, it will say "Oh blending is already enabled" - // And not actually perform the very expensive driver call for it - OpenGL::disableScissor(); - - vbo.bind(); - vao.bind(); - triangleProgram.use(); - - // Adjust alpha test if necessary - const u32 alphaControl = regs[PICA::InternalRegs::AlphaTestConfig]; - if (alphaControl != oldAlphaControl) { - oldAlphaControl = alphaControl; - glUniform1ui(alphaControlLoc, alphaControl); - } + gl.disableScissor(); + gl.bindVBO(vbo); + gl.bindVAO(vao); + gl.useProgram(triangleProgram); OpenGL::enableClipPlane(0); // Clipping plane 0 is always enabled if (regs[PICA::InternalRegs::ClipEnable] & 1) { @@ -879,7 +850,7 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span const bool depthWriteEnable = getBit<12>(depthControl); const int depthFunc = getBits<4, 3>(depthControl); const int colourMask = getBits<8, 4>(depthControl); - glColorMask(colourMask & 1, colourMask & 2, colourMask & 4, colourMask & 8); + gl.setColourMask(colourMask & 1, colourMask & 2, colourMask & 4, colourMask & 8); static constexpr std::array depthModes = { GL_NEVER, GL_ALWAYS, GL_EQUAL, GL_NOTEQUAL, GL_LESS, GL_LEQUAL, GL_GREATER, GL_GEQUAL @@ -908,9 +879,9 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span setupTextureEnvState(); bindTexturesToSlots(); - // Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x47) + // Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48) // The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates - glUniform1uiv(picaRegLoc, 0x200 - 0x47, ®s[0x47]); + glUniform1uiv(picaRegLoc, 0x200 - 0x48, ®s[0x48]); if (gpu.lightingLUTDirty) { updateLightingLUT(); @@ -924,18 +895,18 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span // Note: The code below must execute after we've bound the colour buffer & its framebuffer // Because it attaches a depth texture to the aforementioned colour buffer if (depthEnable) { - OpenGL::enableDepth(); - glDepthFunc(depthModes[depthFunc]); - glDepthMask(depthWriteEnable ? GL_TRUE : GL_FALSE); + gl.enableDepth(); + gl.setDepthMask(depthWriteEnable ? GL_TRUE : GL_FALSE); + gl.setDepthFunc(depthModes[depthFunc]); bindDepthBuffer(); } else { if (depthWriteEnable) { - OpenGL::enableDepth(); - glDepthFunc(GL_ALWAYS); - glDepthMask(GL_TRUE); + gl.enableDepth(); + gl.setDepthMask(GL_TRUE); + gl.setDepthFunc(GL_ALWAYS); bindDepthBuffer(); } else { - OpenGL::disableDepth(); + gl.disableDepth(); } } @@ -947,7 +918,7 @@ constexpr u32 topScreenBuffer = 0x1f000000; constexpr u32 bottomScreenBuffer = 0x1f05dc00; void Renderer::display() { - OpenGL::disableScissor(); + gl.disableScissor(); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); screenFramebuffer.bind(OpenGL::ReadFramebuffer); @@ -1038,12 +1009,15 @@ void Renderer::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 tex.bind(); screenFramebuffer.bind(OpenGL::DrawFramebuffer); - OpenGL::disableBlend(); - OpenGL::disableDepth(); - OpenGL::disableScissor(); + gl.disableBlend(); + gl.disableDepth(); + gl.disableScissor(); + gl.setColourMask(true, true, true, true); + gl.useProgram(displayProgram); + gl.bindVAO(dummyVAO); + OpenGL::disableClipPlane(0); OpenGL::disableClipPlane(1); - displayProgram.use(); // Hack: Detect whether we are writing to the top or bottom screen by checking output gap and drawing to the proper part of the output texture // We consider output gap == 320 to mean bottom, and anything else to mean top @@ -1053,6 +1027,5 @@ void Renderer::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 OpenGL::setViewport(0, 240, 400, 240); // Top screen viewport } - dummyVAO.bind(); OpenGL::draw(OpenGL::TriangleStrip, 4); // Actually draw our 3DS screen } \ No newline at end of file diff --git a/src/emulator.cpp b/src/emulator.cpp index bb96cadc..76a10698 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -1,6 +1,6 @@ #include "emulator.hpp" -Emulator::Emulator() : kernel(cpu, memory, gpu), cpu(memory, kernel), gpu(memory), memory(cpu.getTicksRef()) { +Emulator::Emulator() : kernel(cpu, memory, gpu), cpu(memory, kernel), gpu(memory, gl), memory(cpu.getTicksRef()) { if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_EVENTS) < 0) { Helpers::panic("Failed to initialize SDL2"); } @@ -326,3 +326,9 @@ bool Emulator::loadELF(std::ifstream& file) { } return true; } + +// Reset our graphics context and initialize the GPU's graphics context +void Emulator::initGraphicsContext() { + gl.reset(); // TODO (For when we have multiple backends): Only do this if we are using OpenGL + gpu.initGraphicsContext(); +} \ No newline at end of file diff --git a/src/gl_state.cpp b/src/gl_state.cpp new file mode 100644 index 00000000..612ae44d --- /dev/null +++ b/src/gl_state.cpp @@ -0,0 +1,53 @@ +#include "gl_state.hpp" + +void GLStateManager::resetBlend() { + blendEnabled = false; + OpenGL::disableBlend(); +} + +void GLStateManager::resetColourMask() { + redMask = greenMask = blueMask = alphaMask = true; + OpenGL::setColourMask(redMask, greenMask, blueMask, alphaMask); +} + +void GLStateManager::resetDepth() { + depthEnabled = false; + depthMask = true; + depthFunc = GL_LESS; + + OpenGL::disableDepth(); + OpenGL::setDepthMask(true); + OpenGL::setDepthFunc(OpenGL::DepthFunc::Less); +} + +void GLStateManager::resetScissor() { + scissorEnabled = false; + OpenGL::disableScissor(); + OpenGL::setScissor(0, 0, 0, 0); +} + +void GLStateManager::resetVAO() { + boundVAO = 0; + glBindVertexArray(0); +} + +void GLStateManager::resetVBO() { + boundVBO = 0; + glBindBuffer(GL_ARRAY_BUFFER, 0); +} + +void GLStateManager::resetProgram() { + currentProgram = 0; + glUseProgram(0); +} + +void GLStateManager::reset() { + resetBlend(); + resetColourMask(); + resetDepth(); + + resetVAO(); + resetVBO(); + resetProgram(); + resetScissor(); +} \ No newline at end of file