diff --git a/CMakeLists.txt b/CMakeLists.txt index cccd9e2c..7ce91389 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -83,8 +83,8 @@ else() message(FATAL_ERROR "Currently unsupported CPU architecture") endif() -set(SOURCE_FILES src/main.cpp src/emulator.cpp src/io_file.cpp src/core/CPU/cpu_dynarmic.cpp src/core/CPU/dynarmic_cycles.cpp - src/core/memory.cpp +set(SOURCE_FILES src/main.cpp src/emulator.cpp src/io_file.cpp src/gl_state.cpp src/core/CPU/cpu_dynarmic.cpp + src/core/CPU/dynarmic_cycles.cpp src/core/memory.cpp ) set(CRYPTO_SOURCE_FILES src/core/crypto/aes_engine.cpp) set(KERNEL_SOURCE_FILES src/core/kernel/kernel.cpp src/core/kernel/resource_limits.cpp @@ -138,7 +138,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/opengl.hpp inc include/PICA/dynapica/shader_rec_emitter_x64.hpp include/PICA/pica_hash.hpp include/result/result.hpp include/result/result_common.hpp include/result/result_fs.hpp include/result/result_fnd.hpp include/result/result_gsp.hpp include/result/result_kernel.hpp include/result/result_os.hpp - include/crypto/aes_engine.hpp include/metaprogramming.hpp + include/crypto/aes_engine.hpp include/metaprogramming.hpp include/PICA/pica_vertex.hpp include/gl_state.hpp ) set(THIRD_PARTY_SOURCE_FILES third_party/imgui/imgui.cpp diff --git a/docs/img/MK7.png b/docs/img/MK7.png index 8bf39605..f6fe0cd3 100644 Binary files a/docs/img/MK7.png and b/docs/img/MK7.png differ diff --git a/docs/img/pokegang.png b/docs/img/pokegang.png index ae30c254..58903960 100644 Binary files a/docs/img/pokegang.png and b/docs/img/pokegang.png differ diff --git a/include/PICA/gpu.hpp b/include/PICA/gpu.hpp index 21102bc5..5bc06c47 100644 --- a/include/PICA/gpu.hpp +++ b/include/PICA/gpu.hpp @@ -8,6 +8,7 @@ #include "PICA/shader_unit.hpp" #include "PICA/dynapica/shader_rec.hpp" #include "renderer_gl/renderer_gl.hpp" +#include "PICA/pica_vertex.hpp" class GPU { static constexpr u32 regNum = 0x300; @@ -27,7 +28,7 @@ class GPU { std::array currentAttributes; // Vertex attributes before being passed to the shader std::array immediateModeAttributes; // Vertex attributes uploaded via immediate mode submission - std::array immediateModeVertices; + std::array immediateModeVertices; uint immediateModeVertIndex; uint immediateModeAttrIndex; // Index of the immediate mode attribute we're uploading @@ -67,9 +68,20 @@ class GPU { u32* cmdBuffCurr = nullptr; Renderer renderer; - Vertex getImmediateModeVertex(); -public: - GPU(Memory& mem); + PICA::Vertex getImmediateModeVertex(); + + public: + // 256 entries per LUT with each LUT as its own row forming a 2D image 256 * LUT_COUNT + // Encoded in PICA native format + static constexpr size_t LightingLutSize = PICA::Lights::LUT_Count * 256; + std::array lightingLUT; + + // Used to prevent uploading the lighting_lut on every draw call + // Set to true when the CPU writes to the lighting_lut + // Set to false by the renderer when the lighting_lut is uploaded ot the GPU + bool lightingLUTDirty = false; + + GPU(Memory& mem, GLStateManager& gl); void initGraphicsContext() { renderer.initGraphicsContext(); } void getGraphicsContext() { renderer.getGraphicsContext(); } void display() { renderer.display(); } diff --git a/include/PICA/pica_vertex.hpp b/include/PICA/pica_vertex.hpp new file mode 100644 index 00000000..800dff9a --- /dev/null +++ b/include/PICA/pica_vertex.hpp @@ -0,0 +1,45 @@ +#pragma once +#include "PICA/float_types.hpp" +#include + +namespace PICA { + // A representation of the output vertex as it comes out of the vertex shader, with padding and all + struct Vertex { + using vec2f = std::array; + using vec3f = std::array; + using vec4f = std::array; + + union { + struct { + vec4f positions; // Vertex position + vec4f quaternion; // Quaternion specifying the normal/tangent frame (for fragment lighting) + vec4f colour; // Vertex color + vec2f texcoord0; // Texcoords for texture unit 0 (Only U and V, W is stored separately for 3D textures!) + vec2f texcoord1; // Texcoords for TU 1 + Floats::f24 texcoord0_w; // W component for texcoord 0 if using a 3D texture + u32 padding; // Unused + + vec3f view; // View vector (for fragment lighting) + u32 padding2; // Unused + vec2f texcoord2; // Texcoords for TU 2 + } s; + + // The software, non-accelerated vertex loader writes here and then reads specific components from the above struct + Floats::f24 raw[0x20]; + }; + Vertex() {} + }; +} // namespace PICA + +// Float is used here instead of Floats::f24 to ensure that Floats::f24 is properly sized for direct interpretations as a float by the render backend +#define ASSERT_POS(member, pos) static_assert(offsetof(PICA::Vertex, s.member) == pos * sizeof(float), "PICA::Vertex struct is broken!"); + +ASSERT_POS(positions, 0) +ASSERT_POS(quaternion, 4) +ASSERT_POS(colour, 8) +ASSERT_POS(texcoord0, 12) +ASSERT_POS(texcoord1, 14) +ASSERT_POS(texcoord0_w, 16) +ASSERT_POS(view, 18) +ASSERT_POS(texcoord2, 22) +#undef ASSERT_POS \ No newline at end of file diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index 6c868484..e1c9a819 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -10,6 +10,13 @@ namespace PICA { ViewportHeight = 0x43, ViewportInvh = 0x44, + // Clipping plane control + ClipEnable = 0x47, + ClipData0 = 0x48, + ClipData1 = 0x49, + ClipData2 = 0x4A, + ClipData3 = 0x4B, + DepthScale = 0x4D, DepthOffset = 0x4E, ShaderOutputCount = 0x4F, @@ -55,6 +62,17 @@ namespace PICA { ColourBufferLoc = 0x11D, FramebufferSize = 0x11E, + //LightingRegs + LightingLUTIndex = 0x01C5, + LightingLUTData0 = 0x01C8, + LightingLUTData1 = 0x01C9, + LightingLUTData2 = 0x01CA, + LightingLUTData3 = 0x01CB, + LightingLUTData4 = 0x01CC, + LightingLUTData5 = 0x01CD, + LightingLUTData6 = 0x01CE, + LightingLUTData7 = 0x01CF, + // Geometry pipeline registers VertexAttribLoc = 0x200, AttribFormatLow = 0x201, @@ -156,6 +174,34 @@ namespace PICA { }; } + namespace Lights { + enum : u32 { + LUT_D0 = 0, + LUT_D1, + LUT_FR, + LUT_RB, + LUT_RG, + LUT_RR, + LUT_SP0 = 0x8, + LUT_SP1, + LUT_SP2, + LUT_SP3, + LUT_SP4, + LUT_SP5, + LUT_SP6, + LUT_SP7, + LUT_DA0 = 0x10, + LUT_DA1, + LUT_DA2, + LUT_DA3, + LUT_DA4, + LUT_DA5, + LUT_DA6, + LUT_DA7, + LUT_Count + }; + } + enum class TextureFmt : u32 { RGBA8 = 0x0, RGB8 = 0x1, diff --git a/include/emulator.hpp b/include/emulator.hpp index 10279443..7cbc27b7 100644 --- a/include/emulator.hpp +++ b/include/emulator.hpp @@ -11,7 +11,7 @@ #include "crypto/aes_engine.hpp" #include "io_file.hpp" #include "memory.hpp" -#include "opengl.hpp" +#include "gl_state.hpp" enum class ROMType { None, ELF, NCSD }; @@ -22,6 +22,7 @@ class Emulator { Kernel kernel; Crypto::AESEngine aesEngine; + GLStateManager gl; SDL_Window* window; SDL_GLContext glContext; SDL_GameController* gameController; @@ -56,5 +57,5 @@ class Emulator { bool loadNCSD(const std::filesystem::path& path); bool loadELF(const std::filesystem::path& path); bool loadELF(std::ifstream& file); - void initGraphicsContext() { gpu.initGraphicsContext(); } + void initGraphicsContext(); }; diff --git a/include/gl_state.hpp b/include/gl_state.hpp new file mode 100644 index 00000000..82531c7a --- /dev/null +++ b/include/gl_state.hpp @@ -0,0 +1,140 @@ +#pragma once +#include + +#include "opengl.hpp" + +// GL state manager object for use in the OpenGL GPU renderer and potentially other things in the future (such as a potential ImGui GUI) +// This object is meant to help us avoid duplicate OpenGL calls (such as binding the same program twice, enabling/disabling a setting twice, etc) +// by checking if we actually *need* a state change. This is meant to avoid expensive driver calls and minimize unneeded state changes +// A lot of code is in the header file instead of the relevant source file to make sure stuff gets inlined even without LTO, and +// because this header should ideally not be getting included in too many places +// Code that does not need inlining however, like the reset() function should be in gl_state.cpp +// This state manager may not handle every aspect of OpenGL, in which case anything not handled here should just be manipulated with raw +// OpenGL/opengl.hpp calls However, anything that can be handled through the state manager should, or at least there should be an attempt to keep it +// consistent with the current GL state to avoid bugs/suboptimal code. + +// The state manager must *also* be a trivially constructible/destructible type, to ensure that no OpenGL functions get called sneakily without us +// knowing. This is important for when we want to eg add a Vulkan or misc backend. Would definitely not want to refactor all this. So we try to be as +// backend-agnostic as possible + +struct GLStateManager { + bool blendEnabled; + bool depthEnabled; + bool scissorEnabled; + + // Colour/depth masks + bool redMask, greenMask, blueMask, alphaMask; + bool depthMask; + + GLuint boundVAO; + GLuint boundVBO; + GLuint currentProgram; + + GLenum depthFunc; + + void reset(); + void resetBlend(); + void resetColourMask(); + void resetDepth(); + void resetVAO(); + void resetVBO(); + void resetProgram(); + void resetScissor(); + + void enableDepth() { + if (!depthEnabled) { + depthEnabled = true; + OpenGL::enableDepth(); + } + } + + void disableDepth() { + if (depthEnabled) { + depthEnabled = false; + OpenGL::disableDepth(); + } + } + + void enableBlend() { + if (!blendEnabled) { + blendEnabled = true; + OpenGL::enableBlend(); + } + } + + void disableBlend() { + if (blendEnabled) { + blendEnabled = false; + OpenGL::disableBlend(); + } + } + + void enableScissor() { + if (!scissorEnabled) { + scissorEnabled = true; + OpenGL::enableScissor(); + } + } + + void disableScissor() { + if (scissorEnabled) { + scissorEnabled = false; + OpenGL::disableScissor(); + } + } + + void bindVAO(GLuint handle) { + if (boundVAO != handle) { + boundVAO = handle; + glBindVertexArray(handle); + } + } + + void bindVBO(GLuint handle) { + if (boundVBO != handle) { + boundVBO = handle; + glBindBuffer(GL_ARRAY_BUFFER, handle); + } + } + + void useProgram(GLuint handle) { + if (currentProgram != handle) { + currentProgram = handle; + glUseProgram(handle); + } + } + + void bindVAO(const OpenGL::VertexArray& vao) { bindVAO(vao.handle()); } + void bindVBO(const OpenGL::VertexBuffer& vbo) { bindVBO(vbo.handle()); } + void useProgram(const OpenGL::Program& program) { useProgram(program.handle()); } + + void setColourMask(bool r, bool g, bool b, bool a) { + if (r != redMask || g != greenMask || b != blueMask || a != alphaMask) { + r = redMask; + g = greenMask; + b = blueMask; + a = alphaMask; + + OpenGL::setColourMask(r, g, b, a); + } + } + + void setDepthMask(bool mask) { + if (depthMask != mask) { + depthMask = mask; + OpenGL::setDepthMask(mask); + } + } + + void setDepthFunc(GLenum func) { + if (depthFunc != func) { + depthFunc = func; + glDepthFunc(func); + } + } + + void setDepthFunc(OpenGL::DepthFunc func) { setDepthFunc(static_cast(func)); } +}; + +static_assert(std::is_trivially_constructible(), "OpenGL State Manager class is not trivially constructible!"); +static_assert(std::is_trivially_destructible(), "OpenGL State Manager class is not trivially destructible!"); \ No newline at end of file diff --git a/include/opengl.hpp b/include/opengl.hpp index 9d93078b..f8328799 100644 --- a/include/opengl.hpp +++ b/include/opengl.hpp @@ -1,5 +1,5 @@ /*************************************************************************** - * Copyright (C) 2022 PCSX-Redux authors * + * Copyright (C) 2022 PCSX-Redux & Panda3DS authors * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * @@ -128,9 +128,9 @@ namespace OpenGL { #ifdef OPENGL_DESTRUCTORS ~VertexArray() { free(); } #endif - GLuint handle() { return m_handle; } - bool exists() { return m_handle != 0; } - void bind() { glBindVertexArray(m_handle); } + GLuint handle() const { return m_handle; } + bool exists() const { return m_handle != 0; } + void bind() const { glBindVertexArray(m_handle); } template void setAttributeFloat(GLuint index, GLint size, GLsizei stride, const void* offset, bool normalized = GL_FALSE) { @@ -299,11 +299,11 @@ namespace OpenGL { #ifdef OPENGL_DESTRUCTORS ~Texture() { free(); } #endif - GLuint handle() { return m_handle; } - bool exists() { return m_handle != 0; } - void bind() { glBindTexture(m_binding, m_handle); } - int width() { return m_width; } - int height() { return m_height; } + GLuint handle() const { return m_handle; } + bool exists() const { return m_handle != 0; } + void bind() const { glBindTexture(m_binding, m_handle); } + int width() const { return m_width; } + int height() const { return m_height; } void free() { glDeleteTextures(1, &m_handle); } }; @@ -327,10 +327,10 @@ namespace OpenGL { #ifdef OPENGL_DESTRUCTORS ~Framebuffer() { free(); } #endif - GLuint handle() { return m_handle; } - bool exists() { return m_handle != 0; } - void bind(GLenum target) { glBindFramebuffer(target, m_handle); } - void bind(FramebufferTypes target) { bind(static_cast(target)); } + GLuint handle() const { return m_handle; } + bool exists() const { return m_handle != 0; } + void bind(GLenum target) const { glBindFramebuffer(target, m_handle); } + void bind(FramebufferTypes target) const { bind(static_cast(target)); } void free() { glDeleteFramebuffers(1, &m_handle); } void createWithTexture(Texture& tex, GLenum mode = GL_FRAMEBUFFER, GLenum textureType = GL_TEXTURE_2D) { @@ -392,8 +392,8 @@ namespace OpenGL { return m_handle != 0; } - GLuint handle() { return m_handle; } - bool exists() { return m_handle != 0; } + GLuint handle() const { return m_handle; } + bool exists() const { return m_handle != 0; } }; struct Program { @@ -421,9 +421,9 @@ namespace OpenGL { return m_handle != 0; } - GLuint handle() { return m_handle; } - bool exists() { return m_handle != 0; } - void use() { glUseProgram(m_handle); } + GLuint handle() const { return m_handle; } + bool exists() const { return m_handle != 0; } + void use() const { glUseProgram(m_handle); } }; static void dispatchCompute(GLuint groupsX = 1, GLuint groupsY = 1, GLuint groupsZ = 1) { @@ -454,9 +454,9 @@ namespace OpenGL { #ifdef OPENGL_DESTRUCTORS ~VertexBuffer() { free(); } #endif - GLuint handle() { return m_handle; } - bool exists() { return m_handle != 0; } - void bind() { glBindBuffer(GL_ARRAY_BUFFER, m_handle); } + GLuint handle() const { return m_handle; } + bool exists() const { return m_handle != 0; } + void bind() const { glBindBuffer(GL_ARRAY_BUFFER, m_handle); } void free() { glDeleteBuffers(1, &m_handle); } // Reallocates the buffer on every call. Prefer the sub version if possible. @@ -524,7 +524,12 @@ namespace OpenGL { static void enableStencil() { glEnable(GL_STENCIL_TEST); } static void disableStencil() { glDisable(GL_STENCIL_TEST); } + static void enableClipPlane(GLuint index) { glEnable(GL_CLIP_DISTANCE0 + index); } + static void disableClipPlane(GLuint index) { glDisable(GL_CLIP_DISTANCE0 + index); } + static void setDepthFunc(DepthFunc func) { glDepthFunc(static_cast(func)); } + static void setColourMask(GLboolean r, GLboolean g, GLboolean b, GLboolean a) { glColorMask(r, g, b, a); } + static void setDepthMask(GLboolean mask) { glDepthMask(mask); } enum Primitives { Triangle = GL_TRIANGLES, @@ -664,23 +669,23 @@ namespace OpenGL { // We're never supporting 3D rectangles, because rectangles were never meant to be 3D in the first place // x, y: Coords of the top left vertex // width, height: Dimensions of the rectangle. Initialized to 0 if not specified. - template - struct Rectangle { - T x, y, width, height; + template + struct Rectangle { + T x, y, width, height; - std::pair topLeft() { return std::make_pair(x, y); } - std::pair topRight() { return std::make_pair(x + width, y); } - std::pair bottomLeft() { return std::make_pair(x, y + height); } - std::pair bottomRight() { return std::make_pair(x + width, y + height); } + std::pair topLeft() const { return std::make_pair(x, y); } + std::pair topRight() const { return std::make_pair(x + width, y); } + std::pair bottomLeft() const { return std::make_pair(x, y + height); } + std::pair bottomRight() const { return std::make_pair(x + width, y + height); } - Rectangle() : x(0), y(0), width(0), height(0) {} - Rectangle(T x, T y, T width, T height) : x(x), y(y), width(width), height(height) {} + Rectangle() : x(0), y(0), width(0), height(0) {} + Rectangle(T x, T y, T width, T height) : x(x), y(y), width(width), height(height) {} - bool isEmpty() { return width == 0 && height == 0; } - bool isLine() { return (width == 0 && height != 0) || (width != 0 && height == 0); } + bool isEmpty() const { return width == 0 && height == 0; } + bool isLine() const { return (width == 0 && height != 0) || (width != 0 && height == 0); } - void setEmpty() { x = y = width = height = 0; } - }; + void setEmpty() { x = y = width = height = 0; } + }; using Rect = Rectangle; diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 90f1fd2f..4fe0a37c 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -3,35 +3,26 @@ #include #include "PICA/float_types.hpp" +#include "gl_state.hpp" #include "helpers.hpp" #include "logger.hpp" -#include "opengl.hpp" #include "surface_cache.hpp" #include "textures.hpp" #include "PICA/regs.hpp" +#include "PICA/pica_vertex.hpp" // More circular dependencies! class GPU; -struct Vertex { - OpenGL::vec4 position; - OpenGL::vec4 colour; - OpenGL::vec2 texcoord0; - OpenGL::vec2 texcoord1; - Floats::f24 texcoord0_w; - u32 padding; // pad so that texcoord2 is 64-bit aligned - OpenGL::vec2 texcoord2; -}; - class Renderer { GPU& gpu; + GLStateManager& gl; + OpenGL::Program triangleProgram; OpenGL::Program displayProgram; OpenGL::VertexArray vao; OpenGL::VertexBuffer vbo; - GLint alphaControlLoc = -1; - GLint texUnitConfigLoc = -1; // TEV configuration uniform locations GLint textureEnvSourceLoc = -1; @@ -39,17 +30,15 @@ class Renderer { GLint textureEnvCombinerLoc = -1; GLint textureEnvColorLoc = -1; GLint textureEnvScaleLoc = -1; - GLint textureEnvUpdateBufferLoc = -1; - GLint textureEnvBufferColorLoc = -1; + + // Uniform of PICA registers + GLint picaRegLoc = -1; // Depth configuration uniform locations GLint depthOffsetLoc = -1; GLint depthScaleLoc = -1; GLint depthmapEnableLoc = -1; - u32 oldAlphaControl = 0; - u32 oldTexUnitConfig = 0; - float oldDepthScale = -1.0; float oldDepthOffset = 0.0; bool oldDepthmapEnable = false; @@ -75,6 +64,7 @@ class Renderer { const std::array& regs; OpenGL::Texture screenTexture; + GLuint lightLUTTextureArray; OpenGL::Framebuffer screenFramebuffer; OpenGL::Framebuffer getColourFBO(); @@ -85,9 +75,10 @@ class Renderer { void bindDepthBuffer(); void setupTextureEnvState(); void bindTexturesToSlots(); + void updateLightingLUT(); public: - Renderer(GPU& gpu, const std::array& internalRegs) : gpu(gpu), regs(internalRegs) {} + Renderer(GPU& gpu, GLStateManager& gl, const std::array& internalRegs) : gpu(gpu), gl(gl), regs(internalRegs) {} void reset(); void display(); // Display the 3DS screen contents to the window @@ -95,7 +86,7 @@ class Renderer { void getGraphicsContext(); // Set up graphics context for rendering void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control); // Clear a GPU buffer in VRAM void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags); // Perform display transfer - void drawVertices(PICA::PrimType primType, std::span vertices); // Draw the given vertices + void drawVertices(PICA::PrimType primType, std::span vertices); // Draw the given vertices void setFBSize(u32 width, u32 height) { fbSize.x() = width; diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index 9ee574a8..51e9ab69 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -10,44 +10,9 @@ using namespace Floats; -// A representation of the output vertex as it comes out of the vertex shader, with padding and all -struct OutputVertex { - using vec2f = OpenGL::Vector; - using vec3f = OpenGL::Vector; - using vec4f = OpenGL::Vector; - - union { - struct { - vec4f positions; // Vertex position - vec4f quaternion; // Quaternion specifying the normal/tangent frame (for fragment lighting) - vec4f colour; // Vertex color - vec2f texcoord0; // Texcoords for texture unit 0 (Only U and V, W is stored separately for 3D textures!) - vec2f texcoord1; // Texcoords for TU 1 - f24 texcoord0_w; // W component for texcoord 0 if using a 3D texture - u32 padding; // Unused - - vec3f view; // View vector (for fragment lighting) - u32 padding2; // Unused - vec2f texcoord2; // Texcoords for TU 2 - } s; - - // The software, non-accelerated vertex loader writes here and then reads specific components from the above struct - f24 raw[0x20]; - }; - OutputVertex() {} -}; -#define ASSERT_POS(member, pos) static_assert(offsetof(OutputVertex, s.member) == pos * sizeof(f24), "OutputVertex struct is broken!"); - -ASSERT_POS(positions, 0) -ASSERT_POS(quaternion, 4) -ASSERT_POS(colour, 8) -ASSERT_POS(texcoord0, 12) -ASSERT_POS(texcoord1, 14) -ASSERT_POS(texcoord0_w, 16) -ASSERT_POS(view, 18) -ASSERT_POS(texcoord2, 22) - -GPU::GPU(Memory& mem) : mem(mem), renderer(*this, regs) { +// Note: For when we have multiple backends, the GL state manager can stay here and have the constructor for the Vulkan-or-whatever renderer ignore it +// Thus, our GLStateManager being here does not negatively impact renderer-agnosticness +GPU::GPU(Memory& mem, GLStateManager& gl) : mem(mem), renderer(*this, gl, regs) { vram = new u8[vramSize]; mem.setVRAM(vram); // Give the bus a pointer to our VRAM } @@ -57,6 +22,8 @@ void GPU::reset() { shaderUnit.reset(); shaderJIT.reset(); std::memset(vram, 0, vramSize); + lightingLUT.fill(0); + lightingLUTDirty = true; totalAttribCount = 0; fixedAttribMask = 0; @@ -95,7 +62,7 @@ void GPU::drawArrays(bool indexed) { } } -static std::array vertices; +static std::array vertices; template void GPU::drawArrays() { @@ -283,7 +250,7 @@ void GPU::drawArrays() { shaderUnit.vs.run(); } - OutputVertex out; + PICA::Vertex& out = vertices[i]; // Map shader outputs to fixed function properties const u32 totalShaderOutputs = regs[PICA::InternalRegs::ShaderOutputCount] & 7; for (int i = 0; i < totalShaderOutputs; i++) { @@ -294,24 +261,13 @@ void GPU::drawArrays() { out.raw[mapping] = shaderUnit.vs.outputs[i][j]; } } - - std::memcpy(&vertices[i].position, &out.s.positions, sizeof(vec4f)); - std::memcpy(&vertices[i].colour, &out.s.colour, sizeof(vec4f)); - std::memcpy(&vertices[i].texcoord0, &out.s.texcoord0, 2 * sizeof(f24)); - std::memcpy(&vertices[i].texcoord1, &out.s.texcoord1, 2 * sizeof(f24)); - std::memcpy(&vertices[i].texcoord0_w, &out.s.texcoord0_w, sizeof(f24)); - std::memcpy(&vertices[i].texcoord2, &out.s.texcoord2, 2 * sizeof(f24)); - - //printf("(x, y, z, w) = (%f, %f, %f, %f)\n", (double)vertices[i].position.x(), (double)vertices[i].position.y(), (double)vertices[i].position.z(), (double)vertices[i].position.w()); - //printf("(r, g, b, a) = (%f, %f, %f, %f)\n", (double)vertices[i].colour.r(), (double)vertices[i].colour.g(), (double)vertices[i].colour.b(), (double)vertices[i].colour.a()); - //printf("(u, v ) = (%f, %f)\n", vertices[i].UVs.u(), vertices[i].UVs.v()); } renderer.drawVertices(primType, std::span(vertices).first(vertexCount)); } -Vertex GPU::getImmediateModeVertex() { - Vertex v; +PICA::Vertex GPU::getImmediateModeVertex() { + PICA::Vertex v; const int totalAttrCount = (regs[PICA::InternalRegs::VertexShaderAttrNum] & 0xf) + 1; // Copy immediate mode attributes to vertex shader unit @@ -321,13 +277,13 @@ Vertex GPU::getImmediateModeVertex() { // Run VS and return vertex data. TODO: Don't hardcode offsets for each attribute shaderUnit.vs.run(); - std::memcpy(&v.position, &shaderUnit.vs.outputs[0], sizeof(vec4f)); - std::memcpy(&v.colour, &shaderUnit.vs.outputs[1], sizeof(vec4f)); - std::memcpy(&v.texcoord0, &shaderUnit.vs.outputs[2], 2 * sizeof(f24)); + std::memcpy(&v.s.positions, &shaderUnit.vs.outputs[0], sizeof(vec4f)); + std::memcpy(&v.s.colour, &shaderUnit.vs.outputs[1], sizeof(vec4f)); + std::memcpy(&v.s.texcoord0, &shaderUnit.vs.outputs[2], 2 * sizeof(f24)); - printf("(x, y, z, w) = (%f, %f, %f, %f)\n", (double)v.position.x(), (double)v.position.y(), (double)v.position.z(), (double)v.position.w()); - printf("(r, g, b, a) = (%f, %f, %f, %f)\n", (double)v.colour.r(), (double)v.colour.g(), (double)v.colour.b(), (double)v.colour.a()); - printf("(u, v ) = (%f, %f)\n", v.texcoord0.u(), v.texcoord0.v()); + printf("(x, y, z, w) = (%f, %f, %f, %f)\n", (double)v.s.positions[0], (double)v.s.positions[1], (double)v.s.positions[2], (double)v.s.positions[3]); + printf("(r, g, b, a) = (%f, %f, %f, %f)\n", (double)v.s.colour[0], (double)v.s.colour[1], (double)v.s.colour[2], (double)v.s.colour[3]); + printf("(u, v ) = (%f, %f)\n", (double)v.s.texcoord0[0], (double)v.s.texcoord0[1]); return v; } diff --git a/src/core/PICA/regs.cpp b/src/core/PICA/regs.cpp index 14f61ef7..f62040dd 100644 --- a/src/core/PICA/regs.cpp +++ b/src/core/PICA/regs.cpp @@ -24,18 +24,36 @@ void GPU::writeReg(u32 address, u32 value) { } u32 GPU::readInternalReg(u32 index) { - if (index > regNum) { + using namespace PICA::InternalRegs; + + if (index > regNum) [[unlikely]] { Helpers::panic("Tried to read invalid GPU register. Index: %X\n", index); return 0; } + else if (index >= LightingLUTData0 && index <= LightingLUTData7) [[unlikely]] { + const uint32_t index = regs[LightingLUTIndex]; // Get full LUT index register + const uint32_t lutID = getBits<8, 5>(index); // Get which LUT we're actually writing to + uint32_t lutIndex = getBits<0, 8>(index); // And get the index inside the LUT we're writing to + uint32_t value = 0xffffffff; // Return value + + if (lutID < PICA::Lights::LUT_Count) { + value = lightingLUT[lutID * 256 + lutIndex]; + } + + // Increment the bottom 8 bits of the lighting LUT index register + lutIndex += 1; + regs[LightingLUTIndex] = (index & ~0xff) | (lutIndex & 0xff); + return value; + } + return regs[index]; } void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { using namespace PICA::InternalRegs; - if (index > regNum) { + if (index > regNum) [[unlikely]] { Helpers::panic("Tried to write to invalid GPU register. Index: %X, value: %08X\n", index, value); return; } @@ -91,6 +109,30 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { break; } + case LightingLUTData0: + case LightingLUTData1: + case LightingLUTData2: + case LightingLUTData3: + case LightingLUTData4: + case LightingLUTData5: + case LightingLUTData6: + case LightingLUTData7:{ + const uint32_t index = regs[LightingLUTIndex]; // Get full LUT index register + const uint32_t lutID = getBits<8, 5>(index); // Get which LUT we're actually writing to + uint32_t lutIndex = getBits<0, 8>(index); // And get the index inside the LUT we're writing to + + if (lutID < PICA::Lights::LUT_Count) { + lightingLUT[lutID * 256 + lutIndex] = newValue; + lightingLUTDirty = true; + } + + // Increment the bottom 8 bits of the lighting LUT index register + lutIndex += 1; + regs[LightingLUTIndex] = (index & ~0xff) | (lutIndex & 0xff); + + break; + } + case VertexFloatUniformIndex: shaderUnit.vs.setFloatUniformIndex(value); break; @@ -146,7 +188,7 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { immediateModeAttributes[immediateModeAttrIndex++] = attr; if (immediateModeAttrIndex == totalAttrCount) { - Vertex v = getImmediateModeVertex(); + PICA::Vertex v = getImmediateModeVertex(); immediateModeAttrIndex = 0; immediateModeVertices[immediateModeVertIndex++] = v; diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 487c9db8..72f346bc 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -5,29 +5,41 @@ using namespace Floats; using namespace Helpers; - -// This is all hacked up to display our first triangle +using namespace PICA; const char* vertexShader = R"( #version 410 core - layout (location = 0) in vec4 a_coords; - layout (location = 1) in vec4 a_vertexColour; - layout (location = 2) in vec2 a_texcoord0; - layout (location = 3) in vec2 a_texcoord1; - layout (location = 4) in float a_texcoord0_w; - layout (location = 5) in vec2 a_texcoord2; + layout (location = 0) in vec4 a_coords; + layout (location = 1) in vec4 a_quaternion; + layout (location = 2) in vec4 a_vertexColour; + layout (location = 3) in vec2 a_texcoord0; + layout (location = 4) in vec2 a_texcoord1; + layout (location = 5) in float a_texcoord0_w; + layout (location = 6) in vec3 a_view; + layout (location = 7) in vec2 a_texcoord2; + out vec3 v_normal; + out vec3 v_tangent; + out vec3 v_bitangent; out vec4 v_colour; out vec3 v_texcoord0; out vec2 v_texcoord1; + out vec3 v_view; out vec2 v_texcoord2; flat out vec4 v_textureEnvColor[6]; flat out vec4 v_textureEnvBufferColor; + out float gl_ClipDistance[2]; + // TEV uniforms uniform uint u_textureEnvColor[6]; - uniform uint u_textureEnvBufferColor; + uniform uint u_picaRegs[0x200 - 0x48]; + + // Helper so that the implementation of u_pica_regs can be changed later + uint readPicaReg(uint reg_addr){ + return u_picaRegs[reg_addr - 0x48]; + } vec4 abgr8888ToVec4(uint abgr) { const float scale = 1.0 / 255.0; @@ -40,6 +52,31 @@ const char* vertexShader = R"( ); } + vec3 rotateVec3ByQuaternion(vec3 v, vec4 q){ + vec3 u = q.xyz; + float s = q.w; + return 2.0 * dot(u, v) * u + (s * s - dot(u, u))* v + 2.0 * s * cross(u, v); + } + + // Convert an arbitrary-width floating point literal to an f32 + float decodeFP(uint hex, uint E, uint M){ + uint width = M + E + 1u; + uint bias = 128u - (1u << (E - 1u)); + uint exponent = (hex >> M) & ((1u << E) - 1u); + uint mantissa = hex & ((1u << M) - 1u); + uint sign = (hex >> (E + M)) << 31u; + + if ((hex & ((1u << (width - 1u)) - 1u)) != 0) { + if (exponent == (1u << E) - 1u) exponent = 255u; + else exponent += bias; + hex = sign | (mantissa << (23u - M)) | (exponent << 23u); + } else { + hex = sign; + } + + return uintBitsToFloat(hex); + } + void main() { gl_Position = a_coords; v_colour = a_vertexColour; @@ -48,36 +85,56 @@ const char* vertexShader = R"( v_texcoord0 = vec3(a_texcoord0.x, 1.0 - a_texcoord0.y, a_texcoord0_w); v_texcoord1 = vec2(a_texcoord1.x, 1.0 - a_texcoord1.y); v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y); + v_view = a_view; + + v_normal = normalize(rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), a_quaternion)); + v_tangent = normalize(rotateVec3ByQuaternion(vec3(1.0, 0.0, 0.0), a_quaternion)); + v_bitangent = normalize(rotateVec3ByQuaternion(vec3(0.0, 1.0, 0.0), a_quaternion)); for (int i = 0; i < 6; i++) { v_textureEnvColor[i] = abgr8888ToVec4(u_textureEnvColor[i]); } - v_textureEnvBufferColor = abgr8888ToVec4(u_textureEnvBufferColor); + v_textureEnvBufferColor = abgr8888ToVec4(readPicaReg(0xFD)); + + // Parse clipping plane registers + // The plane registers describe a clipping plane in the form of Ax + By + Cz + D = 0 + // With n = (A, B, C) being the normal vector and D being the origin point distance + // Therefore, for the second clipping plane, we can just pass the dot product of the clip vector and the input coordinates to gl_ClipDistance[1] + vec4 clipData = vec4( + decodeFP(readPicaReg(0x48) & 0xffffffu, 7, 16), + decodeFP(readPicaReg(0x49) & 0xffffffu, 7, 16), + decodeFP(readPicaReg(0x4A) & 0xffffffu, 7, 16), + decodeFP(readPicaReg(0x4B) & 0xffffffu, 7, 16) + ); + + // There's also another, always-on clipping plane based on vertex z + gl_ClipDistance[0] = -a_coords.z; + gl_ClipDistance[1] = dot(clipData, a_coords); } )"; const char* fragmentShader = R"( #version 410 core + in vec3 v_tangent; + in vec3 v_normal; + in vec3 v_bitangent; in vec4 v_colour; in vec3 v_texcoord0; in vec2 v_texcoord1; + in vec3 v_view; in vec2 v_texcoord2; flat in vec4 v_textureEnvColor[6]; flat in vec4 v_textureEnvBufferColor; out vec4 fragColour; - uniform uint u_alphaControl; - uniform uint u_textureConfig; - // TEV uniforms uniform uint u_textureEnvSource[6]; uniform uint u_textureEnvOperand[6]; uniform uint u_textureEnvCombiner[6]; uniform uint u_textureEnvScale[6]; - uniform uint u_textureEnvUpdateBuffer; // Depth control uniforms uniform float u_depthScale; @@ -87,6 +144,14 @@ const char* fragmentShader = R"( uniform sampler2D u_tex0; uniform sampler2D u_tex1; uniform sampler2D u_tex2; + uniform sampler1DArray u_tex_lighting_lut; + + uniform uint u_picaRegs[0x200 - 0x48]; + + // Helper so that the implementation of u_pica_regs can be changed later + uint readPicaReg(uint reg_addr){ + return u_picaRegs[reg_addr - 0x48]; + } vec4 tevSources[16]; vec4 tevNextPreviousBuffer; @@ -190,21 +255,215 @@ const char* fragmentShader = R"( return result; } - void main() { - vec2 tex2UV = (u_textureConfig & (1u << 13)) != 0u ? v_texcoord1 : v_texcoord2; + #define D0_LUT 0u + #define D1_LUT 1u + #define SP_LUT 2u + #define FR_LUT 3u + #define RB_LUT 4u + #define RG_LUT 5u + #define RR_LUT 6u + float lutLookup(uint lut, uint light, float value){ + if (lut >= FR_LUT && lut <= RR_LUT) + lut -= 1; + if (lut==SP_LUT) + lut = light + 8; + return texture(u_tex_lighting_lut, vec2(value, lut)).r; + } + + vec3 regToColor(uint reg) { + // Normalization scale to convert from [0...255] to [0.0...1.0] + const float scale = 1.0 / 255.0; + + return scale * vec3( + float(bitfieldExtract(reg, 20, 8)), + float(bitfieldExtract(reg, 10, 8)), + float(bitfieldExtract(reg, 00, 8)) + ); + } + + // Convert an arbitrary-width floating point literal to an f32 + float decodeFP(uint hex, uint E, uint M){ + uint width = M + E + 1u; + uint bias = 128u - (1u << (E - 1u)); + uint exponent = (hex >> M) & ((1u << E) - 1u); + uint mantissa = hex & ((1u << M) - 1u); + uint sign = (hex >> (E + M)) << 31u; + + if ((hex & ((1u << (width - 1u)) - 1u)) != 0) { + if (exponent == (1u << E) - 1u) exponent = 255u; + else exponent += bias; + hex = sign | (mantissa << (23u - M)) | (exponent << 23u); + } else { + hex = sign; + } + + return uintBitsToFloat(hex); + } + + // Implements the following algorthm: https://mathb.in/26766 + void calcLighting(out vec4 primary_color, out vec4 secondary_color){ + // Quaternions describe a transformation from surface-local space to eye space. + // In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1), + // the tangent vector is (1,0,0), and the bitangent vector is (0,1,0). + vec3 normal = normalize(v_normal ); + vec3 tangent = normalize(v_tangent ); + vec3 bitangent = normalize(v_bitangent); + vec3 view = normalize(v_view); + + uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008F); + if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0){ + primary_color = secondary_color = vec4(1.0); + return; + } + + uint GPUREG_LIGHTING_AMBIENT = readPicaReg(0x01C0); + uint GPUREG_LIGHTING_NUM_LIGHTS = (readPicaReg(0x01C2) & 0x7u) +1; + uint GPUREG_LIGHTING_LIGHT_PERMUTATION = readPicaReg(0x01D9); + + primary_color = vec4(vec3(0.0),1.0); + secondary_color = vec4(vec3(0.0),1.0); + + primary_color.rgb += regToColor(GPUREG_LIGHTING_AMBIENT); + + uint GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0); + uint GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1); + uint GPUREG_LIGHTING_CONFIG0 = readPicaReg(0x01C3); + uint GPUREG_LIGHTING_CONFIG1 = readPicaReg(0x01C4); + uint GPUREG_LIGHTING_LUTINPUT_SCALE = readPicaReg(0x01D2); + float d[7]; + + bool error_unimpl = false; + + for (uint i = 0; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) { + uint light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION,int(i*3),3); + + uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140 + 0x10 * light_id); + uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141 + 0x10 * light_id); + uint GPUREG_LIGHTi_DIFFUSE = readPicaReg(0x0142 + 0x10 * light_id); + uint GPUREG_LIGHTi_AMBIENT = readPicaReg(0x0143 + 0x10 * light_id); + uint GPUREG_LIGHTi_VECTOR_LOW = readPicaReg(0x0144 + 0x10 * light_id); + uint GPUREG_LIGHTi_VECTOR_HIGH= readPicaReg(0x0145 + 0x10 * light_id); + uint GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149 + 0x10 * light_id); + + vec3 light_vector = normalize(vec3( + decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5, 10), + decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5, 10), + decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5, 10) + )); + + // Positional Light + if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0) + error_unimpl = true; + + vec3 half_vector = normalize(normalize(light_vector) + view); + + for (int c = 0; c < 7; c++) { + if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0){ + uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3); + float scale = float(1u << scale_id); + if (scale_id >= 6u) + scale/=256.0; + + uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3); + if (input_id == 0u) d[c] = dot(normal,half_vector); + else if (input_id == 1u) d[c] = dot(view,half_vector); + else if (input_id == 2u) d[c] = dot(normal,view); + else if (input_id == 3u) d[c] = dot(light_vector,normal); + else if (input_id == 4u){ + uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146 + 0x10 * light_id); + uint GPUREG_LIGHTi_SPOTDIR_HIGH= readPicaReg(0x0147 + 0x10 * light_id); + vec3 spot_light_vector = normalize(vec3( + decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1, 11), + decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1, 11), + decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1, 11) + )); + d[c] = dot(-light_vector, spot_light_vector); // -L dot P (aka Spotlight aka SP); + } else if (input_id == 5u) { + d[c] = 1.0; // TODO: cos (aka CP); + error_unimpl = true; + } else { + d[c] = 1.0; + } + + d[c] = lutLookup(c, light_id, d[c] * 0.5 + 0.5) * scale; + if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) + d[c] = abs(d[c]); + } else { + d[c] = 1.0; + } + } + + uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG,4,4); + if (lookup_config == 0) { + d[D1_LUT] = 0.0; + d[FR_LUT] = 0.0; + d[RG_LUT]= d[RB_LUT] = d[RR_LUT]; + } else if (lookup_config == 1) { + d[D0_LUT] = 0.0; + d[D1_LUT] = 0.0; + d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; + } else if (lookup_config == 2) { + d[FR_LUT] = 0.0; + d[SP_LUT] = 0.0; + d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; + } else if (lookup_config == 3) { + d[SP_LUT] = 0.0; + d[RG_LUT]= d[RB_LUT] = d[RR_LUT] = 1.0; + } else if (lookup_config == 4) { + d[FR_LUT] = 0.0; + } else if (lookup_config == 5) { + d[D1_LUT] = 0.0; + } else if (lookup_config == 6) { + d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; + } + + float distance_factor = 1.0; // a + float indirect_factor = 1.0; // fi + float shadow_factor = 1.0; // o + + float NdotL = dot(normal, light_vector); //Li dot N + + // Two sided diffuse + if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0) NdotL = max(0.0, NdotL); + else NdotL = abs(NdotL); + + float light_factor = distance_factor*d[SP_LUT]*indirect_factor*shadow_factor; + + primary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE)*NdotL); + secondary_color.rgb += light_factor * ( + regToColor(GPUREG_LIGHTi_SPECULAR0) * d[D0_LUT] + + regToColor(GPUREG_LIGHTi_SPECULAR1) * d[D1_LUT] * vec3(d[RR_LUT], d[RG_LUT], d[RB_LUT]) + ); + } + uint fresnel_output1 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 2, 1); + uint fresnel_output2 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 3, 1); + + if (fresnel_output1 == 1u) primary_color.a = d[FR_LUT]; + if (fresnel_output2 == 1u) secondary_color.a = d[FR_LUT]; + + if (error_unimpl) { + secondary_color = primary_color = vec4(1.0,0.,1.0,1.0); + } + } + + void main() { // TODO: what do invalid sources and disabled textures read as? // And what does the "previous combiner" source read initially? tevSources[0] = v_colour; // Primary/vertex color - tevSources[1] = vec4(vec3(0.5), 1.0); // Fragment primary color - tevSources[2] = vec4(vec3(0.5), 1.0); // Fragment secondary color - if ((u_textureConfig & 1u) != 0u) tevSources[3] = texture(u_tex0, v_texcoord0.xy); - if ((u_textureConfig & 2u) != 0u) tevSources[4] = texture(u_tex1, v_texcoord1); - if ((u_textureConfig & 4u) != 0u) tevSources[5] = texture(u_tex2, tex2UV); + calcLighting(tevSources[1],tevSources[2]); + + uint textureConfig = readPicaReg(0x80); + vec2 tex2UV = (textureConfig & (1u << 13)) != 0u ? v_texcoord1 : v_texcoord2; + + if ((textureConfig & 1u) != 0u) tevSources[3] = texture(u_tex0, v_texcoord0.xy); + if ((textureConfig & 2u) != 0u) tevSources[4] = texture(u_tex1, v_texcoord1); + if ((textureConfig & 4u) != 0u) tevSources[5] = texture(u_tex2, tex2UV); tevSources[13] = vec4(0.0); // Previous buffer tevSources[15] = vec4(0.0); // Previous combiner tevNextPreviousBuffer = v_textureEnvBufferColor; + uint textureEnvUpdateBuffer = readPicaReg(0xE0); for (int i = 0; i < 6; i++) { tevSources[14] = v_textureEnvColor[i]; // Constant color @@ -212,11 +471,11 @@ const char* fragmentShader = R"( tevSources[13] = tevNextPreviousBuffer; if (i < 4) { - if ((u_textureEnvUpdateBuffer & (0x100u << i)) != 0u) { + if ((textureEnvUpdateBuffer & (0x100u << i)) != 0u) { tevNextPreviousBuffer.rgb = tevSources[15].rgb; } - if ((u_textureEnvUpdateBuffer & (0x1000u << i)) != 0u) { + if ((textureEnvUpdateBuffer & (0x1000u << i)) != 0u) { tevNextPreviousBuffer.a = tevSources[15].a; } } @@ -227,6 +486,8 @@ const char* fragmentShader = R"( if (tevUnimplementedSourceFlag) { // fragColour = vec4(1.0, 0.0, 1.0, 1.0); } + // fragColour.rg = texture(u_tex_lighting_lut,vec2(gl_FragCoord.x/200.,float(int(gl_FragCoord.y/2)%24))).rr; + // Get original depth value by converting from [near, far] = [0, 1] to [-1, 1] // We do this by converting to [0, 2] first and subtracting 1 to go to [-1, 1] @@ -239,9 +500,11 @@ const char* fragmentShader = R"( // Write final fragment depth gl_FragDepth = depth; - if ((u_alphaControl & 1u) != 0u) { // Check if alpha test is on - uint func = (u_alphaControl >> 4u) & 7u; - float reference = float((u_alphaControl >> 8u) & 0xffu) / 255.0; + // Perform alpha test + uint alphaControl = readPicaReg(0x104); + if ((alphaControl & 1u) != 0u) { // Check if alpha test is on + uint func = (alphaControl >> 4u) & 7u; + float reference = float((alphaControl >> 8u) & 0xffu) / 255.0; float alpha = fragColour.a; switch (func) { @@ -328,21 +591,17 @@ void Renderer::reset() { if (triangleProgram.exists()) { const auto oldProgram = OpenGL::getProgram(); - triangleProgram.use(); - oldAlphaControl = 0; // Default alpha control to 0 - oldTexUnitConfig = 0; // Default tex unit config to 0 + gl.useProgram(triangleProgram); oldDepthScale = -1.0; // Default depth scale to -1.0, which is what games typically use oldDepthOffset = 0.0; // Default depth offset to 0 oldDepthmapEnable = false; // Enable w buffering - glUniform1ui(alphaControlLoc, oldAlphaControl); - glUniform1ui(texUnitConfigLoc, oldTexUnitConfig); glUniform1f(depthScaleLoc, oldDepthScale); glUniform1f(depthOffsetLoc, oldDepthOffset); glUniform1i(depthmapEnableLoc, oldDepthmapEnable); - glUseProgram(oldProgram); // Switch to old GL program + gl.useProgram(oldProgram); // Switch to old GL program } } @@ -350,58 +609,61 @@ void Renderer::initGraphicsContext() { OpenGL::Shader vert(vertexShader, OpenGL::Vertex); OpenGL::Shader frag(fragmentShader, OpenGL::Fragment); triangleProgram.create({ vert, frag }); - triangleProgram.use(); - - alphaControlLoc = OpenGL::uniformLocation(triangleProgram, "u_alphaControl"); - texUnitConfigLoc = OpenGL::uniformLocation(triangleProgram, "u_textureConfig"); + gl.useProgram(triangleProgram); textureEnvSourceLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvSource"); textureEnvOperandLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvOperand"); textureEnvCombinerLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvCombiner"); textureEnvColorLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvColor"); textureEnvScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvScale"); - textureEnvUpdateBufferLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvUpdateBuffer"); - textureEnvBufferColorLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvBufferColor"); depthScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_depthScale"); depthOffsetLoc = OpenGL::uniformLocation(triangleProgram, "u_depthOffset"); depthmapEnableLoc = OpenGL::uniformLocation(triangleProgram, "u_depthmapEnable"); + picaRegLoc = OpenGL::uniformLocation(triangleProgram, "u_picaRegs"); - // Init sampler objects + // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex0"), 0); glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex1"), 1); glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex2"), 2); + glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex_lighting_lut"), 3); OpenGL::Shader vertDisplay(displayVertexShader, OpenGL::Vertex); OpenGL::Shader fragDisplay(displayFragmentShader, OpenGL::Fragment); displayProgram.create({ vertDisplay, fragDisplay }); - displayProgram.use(); + gl.useProgram(displayProgram); glUniform1i(OpenGL::uniformLocation(displayProgram, "u_texture"), 0); // Init sampler object vbo.createFixedSize(sizeof(Vertex) * vertexBufferSize, GL_STREAM_DRAW); - vbo.bind(); + gl.bindVBO(vbo); vao.create(); - vao.bind(); + gl.bindVAO(vao); // Position (x, y, z, w) attributes - vao.setAttributeFloat(0, 4, sizeof(Vertex), offsetof(Vertex, position)); + vao.setAttributeFloat(0, 4, sizeof(Vertex), offsetof(Vertex, s.positions)); vao.enableAttribute(0); - // Colour attribute - vao.setAttributeFloat(1, 4, sizeof(Vertex), offsetof(Vertex, colour)); + // Quaternion attribute + vao.setAttributeFloat(1, 4, sizeof(Vertex), offsetof(Vertex, s.quaternion)); vao.enableAttribute(1); - // UV 0 attribute - vao.setAttributeFloat(2, 2, sizeof(Vertex), offsetof(Vertex, texcoord0)); + // Colour attribute + vao.setAttributeFloat(2, 4, sizeof(Vertex), offsetof(Vertex, s.colour)); vao.enableAttribute(2); - // UV 1 attribute - vao.setAttributeFloat(3, 2, sizeof(Vertex), offsetof(Vertex, texcoord1)); + // UV 0 attribute + vao.setAttributeFloat(3, 2, sizeof(Vertex), offsetof(Vertex, s.texcoord0)); vao.enableAttribute(3); - // UV 0 W-component attribute - vao.setAttributeFloat(4, 1, sizeof(Vertex), offsetof(Vertex, texcoord0_w)); + // UV 1 attribute + vao.setAttributeFloat(4, 2, sizeof(Vertex), offsetof(Vertex, s.texcoord1)); vao.enableAttribute(4); - // UV 2 attribute - vao.setAttributeFloat(5, 2, sizeof(Vertex), offsetof(Vertex, texcoord2)); + // UV 0 W-component attribute + vao.setAttributeFloat(5, 1, sizeof(Vertex), offsetof(Vertex, s.texcoord0_w)); vao.enableAttribute(5); + // View + vao.setAttributeFloat(6, 3, sizeof(Vertex), offsetof(Vertex, s.view)); + vao.enableAttribute(6); + // UV 2 attribute + vao.setAttributeFloat(7, 2, sizeof(Vertex), offsetof(Vertex, s.texcoord2)); + vao.enableAttribute(7); dummyVBO.create(); dummyVAO.create(); @@ -409,6 +671,8 @@ void Renderer::initGraphicsContext() { // Create texture and framebuffer for the 3DS screen const u32 screenTextureWidth = 2 * 400; // Top screen is 400 pixels wide, bottom is 320 const u32 screenTextureHeight = 2 * 240; // Both screens are 240 pixels tall + + glGenTextures(1,&lightLUTTextureArray); auto prevTexture = OpenGL::getTex2D(); screenTexture.create(screenTextureWidth, screenTextureHeight, GL_RGBA8); @@ -451,9 +715,9 @@ void Renderer::setupBlending() { }; if (!blendingEnabled) { - OpenGL::disableBlend(); + gl.disableBlend(); } else { - OpenGL::enableBlend(); + gl.enableBlend(); // Get blending equations const u32 blendControl = regs[PICA::InternalRegs::BlendFunc]; @@ -509,8 +773,6 @@ void Renderer::setupTextureEnvState() { glUniform1uiv(textureEnvCombinerLoc, 6, textureEnvCombinerRegs); glUniform1uiv(textureEnvColorLoc, 6, textureEnvColourRegs); glUniform1uiv(textureEnvScaleLoc, 6, textureEnvScaleRegs); - glUniform1ui(textureEnvUpdateBufferLoc, regs[PICA::InternalRegs::TexEnvUpdateBuffer]); - glUniform1ui(textureEnvBufferColorLoc, regs[PICA::InternalRegs::TexEnvBufferColor]); } void Renderer::bindTexturesToSlots() { @@ -538,14 +800,28 @@ void Renderer::bindTexturesToSlots() { tex.bind(); } + glActiveTexture(GL_TEXTURE0 + 3); + glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); glActiveTexture(GL_TEXTURE0); +} - // Update the texture unit configuration uniform if it changed - const u32 texUnitConfig = regs[PICA::InternalRegs::TexUnitCfg]; - if (oldTexUnitConfig != texUnitConfig) { - oldTexUnitConfig = texUnitConfig; - glUniform1ui(texUnitConfigLoc, texUnitConfig); +void Renderer::updateLightingLUT() { + gpu.lightingLUTDirty = false; + std::array u16_lightinglut; + + for (int i = 0; i < gpu.lightingLUT.size(); i++) { + uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1); + u16_lightinglut[i] = value * 65535 / 4095; } + + glActiveTexture(GL_TEXTURE0 + 3); + glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); + glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data()); + glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glActiveTexture(GL_TEXTURE0); } void Renderer::drawVertices(PICA::PrimType primType, std::span vertices) { @@ -555,20 +831,14 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span ver }; const auto primitiveTopology = primTypes[static_cast(primType)]; - // TODO: We should implement a GL state tracker that tracks settings like scissor, blending, bound program, etc - // This way if we attempt to eg do multiple glEnable(GL_BLEND) calls in a row, it will say "Oh blending is already enabled" - // And not actually perform the very expensive driver call for it - OpenGL::disableScissor(); + gl.disableScissor(); + gl.bindVBO(vbo); + gl.bindVAO(vao); + gl.useProgram(triangleProgram); - vbo.bind(); - vao.bind(); - triangleProgram.use(); - - // Adjust alpha test if necessary - const u32 alphaControl = regs[PICA::InternalRegs::AlphaTestConfig]; - if (alphaControl != oldAlphaControl) { - oldAlphaControl = alphaControl; - glUniform1ui(alphaControlLoc, alphaControl); + OpenGL::enableClipPlane(0); // Clipping plane 0 is always enabled + if (regs[PICA::InternalRegs::ClipEnable] & 1) { + OpenGL::enableClipPlane(1); } setupBlending(); @@ -580,7 +850,7 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span ver const bool depthWriteEnable = getBit<12>(depthControl); const int depthFunc = getBits<4, 3>(depthControl); const int colourMask = getBits<8, 4>(depthControl); - glColorMask(colourMask & 1, colourMask & 2, colourMask & 4, colourMask & 8); + gl.setColourMask(colourMask & 1, colourMask & 2, colourMask & 4, colourMask & 8); static constexpr std::array depthModes = { GL_NEVER, GL_ALWAYS, GL_EQUAL, GL_NOTEQUAL, GL_LESS, GL_LEQUAL, GL_GREATER, GL_GEQUAL @@ -609,6 +879,14 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span ver setupTextureEnvState(); bindTexturesToSlots(); + // Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48) + // The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates + glUniform1uiv(picaRegLoc, 0x200 - 0x48, ®s[0x48]); + + if (gpu.lightingLUTDirty) { + updateLightingLUT(); + } + // TODO: Actually use this float viewportWidth = f24::fromRaw(regs[PICA::InternalRegs::ViewportWidth] & 0xffffff).toFloat32() * 2.0; float viewportHeight = f24::fromRaw(regs[PICA::InternalRegs::ViewportHeight] & 0xffffff).toFloat32() * 2.0; @@ -617,18 +895,18 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span ver // Note: The code below must execute after we've bound the colour buffer & its framebuffer // Because it attaches a depth texture to the aforementioned colour buffer if (depthEnable) { - OpenGL::enableDepth(); - glDepthFunc(depthModes[depthFunc]); - glDepthMask(depthWriteEnable ? GL_TRUE : GL_FALSE); + gl.enableDepth(); + gl.setDepthMask(depthWriteEnable ? GL_TRUE : GL_FALSE); + gl.setDepthFunc(depthModes[depthFunc]); bindDepthBuffer(); } else { if (depthWriteEnable) { - OpenGL::enableDepth(); - glDepthFunc(GL_ALWAYS); - glDepthMask(GL_TRUE); + gl.enableDepth(); + gl.setDepthMask(GL_TRUE); + gl.setDepthFunc(GL_ALWAYS); bindDepthBuffer(); } else { - OpenGL::disableDepth(); + gl.disableDepth(); } } @@ -639,9 +917,8 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span ver constexpr u32 topScreenBuffer = 0x1f000000; constexpr u32 bottomScreenBuffer = 0x1f05dc00; -// Quick hack to display top screen for now void Renderer::display() { - OpenGL::disableScissor(); + gl.disableScissor(); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); screenFramebuffer.bind(OpenGL::ReadFramebuffer); @@ -732,10 +1009,15 @@ void Renderer::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 tex.bind(); screenFramebuffer.bind(OpenGL::DrawFramebuffer); - OpenGL::disableBlend(); - OpenGL::disableDepth(); - OpenGL::disableScissor(); - displayProgram.use(); + gl.disableBlend(); + gl.disableDepth(); + gl.disableScissor(); + gl.setColourMask(true, true, true, true); + gl.useProgram(displayProgram); + gl.bindVAO(dummyVAO); + + OpenGL::disableClipPlane(0); + OpenGL::disableClipPlane(1); // Hack: Detect whether we are writing to the top or bottom screen by checking output gap and drawing to the proper part of the output texture // We consider output gap == 320 to mean bottom, and anything else to mean top @@ -745,6 +1027,5 @@ void Renderer::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 OpenGL::setViewport(0, 240, 400, 240); // Top screen viewport } - dummyVAO.bind(); OpenGL::draw(OpenGL::TriangleStrip, 4); // Actually draw our 3DS screen } \ No newline at end of file diff --git a/src/emulator.cpp b/src/emulator.cpp index bb96cadc..76a10698 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -1,6 +1,6 @@ #include "emulator.hpp" -Emulator::Emulator() : kernel(cpu, memory, gpu), cpu(memory, kernel), gpu(memory), memory(cpu.getTicksRef()) { +Emulator::Emulator() : kernel(cpu, memory, gpu), cpu(memory, kernel), gpu(memory, gl), memory(cpu.getTicksRef()) { if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_EVENTS) < 0) { Helpers::panic("Failed to initialize SDL2"); } @@ -326,3 +326,9 @@ bool Emulator::loadELF(std::ifstream& file) { } return true; } + +// Reset our graphics context and initialize the GPU's graphics context +void Emulator::initGraphicsContext() { + gl.reset(); // TODO (For when we have multiple backends): Only do this if we are using OpenGL + gpu.initGraphicsContext(); +} \ No newline at end of file diff --git a/src/gl_state.cpp b/src/gl_state.cpp new file mode 100644 index 00000000..612ae44d --- /dev/null +++ b/src/gl_state.cpp @@ -0,0 +1,53 @@ +#include "gl_state.hpp" + +void GLStateManager::resetBlend() { + blendEnabled = false; + OpenGL::disableBlend(); +} + +void GLStateManager::resetColourMask() { + redMask = greenMask = blueMask = alphaMask = true; + OpenGL::setColourMask(redMask, greenMask, blueMask, alphaMask); +} + +void GLStateManager::resetDepth() { + depthEnabled = false; + depthMask = true; + depthFunc = GL_LESS; + + OpenGL::disableDepth(); + OpenGL::setDepthMask(true); + OpenGL::setDepthFunc(OpenGL::DepthFunc::Less); +} + +void GLStateManager::resetScissor() { + scissorEnabled = false; + OpenGL::disableScissor(); + OpenGL::setScissor(0, 0, 0, 0); +} + +void GLStateManager::resetVAO() { + boundVAO = 0; + glBindVertexArray(0); +} + +void GLStateManager::resetVBO() { + boundVBO = 0; + glBindBuffer(GL_ARRAY_BUFFER, 0); +} + +void GLStateManager::resetProgram() { + currentProgram = 0; + glUseProgram(0); +} + +void GLStateManager::reset() { + resetBlend(); + resetColourMask(); + resetDepth(); + + resetVAO(); + resetVBO(); + resetProgram(); + resetScissor(); +} \ No newline at end of file