diff --git a/CMakeLists.txt b/CMakeLists.txt index 802b3d06..8c932497 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -93,7 +93,7 @@ endif() set(SOURCE_FILES src/main.cpp src/emulator.cpp src/io_file.cpp src/config.cpp src/core/CPU/cpu_dynarmic.cpp src/core/CPU/dynarmic_cycles.cpp - src/core/memory.cpp src/httpserver.cpp src/stb_image_write.c + src/core/memory.cpp src/renderer.cpp src/httpserver.cpp src/stb_image_write.c ) set(CRYPTO_SOURCE_FILES src/core/crypto/aes_engine.cpp) set(KERNEL_SOURCE_FILES src/core/kernel/kernel.cpp src/core/kernel/resource_limits.cpp diff --git a/include/PICA/dynapica/shader_rec.hpp b/include/PICA/dynapica/shader_rec.hpp index b7d37b02..e8b6afed 100644 --- a/include/PICA/dynapica/shader_rec.hpp +++ b/include/PICA/dynapica/shader_rec.hpp @@ -21,7 +21,7 @@ class ShaderJIT { ShaderCache cache; #endif -public: + public: #ifdef PANDA3DS_SHADER_JIT_SUPPORTED // Call this before starting to process a batch of vertices // This will read the PICA config (uploaded shader and shader operand descriptors) and search if we've already compiled this shader @@ -29,9 +29,7 @@ public: // The caller must make sure the entrypoint has been properly set beforehand void prepare(PICAShader& shaderUnit); void reset(); - void run(PICAShader& shaderUnit) { - prologueCallback(shaderUnit, entrypointCallback); - } + void run(PICAShader& shaderUnit) { prologueCallback(shaderUnit, entrypointCallback); } static constexpr bool isAvailable() { return true; } #else @@ -44,7 +42,7 @@ public: } // Define dummy callback. This should never be called if the shader JIT is not supported - using Callback = void(*)(PICAShader& shaderUnit); + using Callback = void (*)(PICAShader& shaderUnit); Callback activeShaderCallback = nullptr; void reset() {} diff --git a/include/PICA/gpu.hpp b/include/PICA/gpu.hpp index a4adc816..929881b7 100644 --- a/include/PICA/gpu.hpp +++ b/include/PICA/gpu.hpp @@ -1,39 +1,39 @@ #pragma once #include +#include "PICA/dynapica/shader_rec.hpp" +#include "PICA/float_types.hpp" +#include "PICA/pica_vertex.hpp" +#include "PICA/regs.hpp" +#include "PICA/shader_unit.hpp" #include "config.hpp" #include "helpers.hpp" #include "logger.hpp" #include "memory.hpp" -#include "PICA/float_types.hpp" -#include "PICA/regs.hpp" -#include "PICA/shader_unit.hpp" -#include "PICA/dynapica/shader_rec.hpp" -#include "renderer_gl/renderer_gl.hpp" -#include "PICA/pica_vertex.hpp" +#include "renderer.hpp" class GPU { static constexpr u32 regNum = 0x300; - using vec4f = OpenGL::Vector; + using vec4f = std::array; using Registers = std::array; Memory& mem; EmulatorConfig& config; ShaderUnit shaderUnit; - ShaderJIT shaderJIT; // Doesn't do anything if JIT is disabled or not supported + ShaderJIT shaderJIT; // Doesn't do anything if JIT is disabled or not supported u8* vram = nullptr; MAKE_LOG_FUNCTION(log, gpuLogger) - static constexpr u32 maxAttribCount = 12; // Up to 12 vertex attributes + static constexpr u32 maxAttribCount = 12; // Up to 12 vertex attributes static constexpr u32 vramSize = u32(6_MB); - Registers regs; // GPU internal registers - std::array currentAttributes; // Vertex attributes before being passed to the shader + Registers regs; // GPU internal registers + std::array currentAttributes; // Vertex attributes before being passed to the shader - std::array immediateModeAttributes; // Vertex attributes uploaded via immediate mode submission + std::array immediateModeAttributes; // Vertex attributes uploaded via immediate mode submission std::array immediateModeVertices; uint immediateModeVertIndex; - uint immediateModeAttrIndex; // Index of the immediate mode attribute we're uploading + uint immediateModeAttrIndex; // Index of the immediate mode attribute we're uploading template void drawArrays(); @@ -42,35 +42,33 @@ class GPU { void drawArrays(bool indexed); struct AttribInfo { - u32 offset = 0; // Offset from base vertex array - int size = 0; // Bytes per vertex + u32 offset = 0; // Offset from base vertex array + int size = 0; // Bytes per vertex u32 config1 = 0; u32 config2 = 0; - u32 componentCount = 0; // Number of components for the attribute + u32 componentCount = 0; // Number of components for the attribute - u64 getConfigFull() { - return u64(config1) | (u64(config2) << 32); - } + u64 getConfigFull() { return u64(config1) | (u64(config2) << 32); } }; u64 getVertexShaderInputConfig() { return u64(regs[PICA::InternalRegs::VertexShaderInputCfgLow]) | (u64(regs[PICA::InternalRegs::VertexShaderInputCfgHigh]) << 32); } - std::array attributeInfo; // Info for each of the 12 attributes - u32 totalAttribCount = 0; // Number of vertex attributes to send to VS - u32 fixedAttribMask = 0; // Which attributes are fixed? - - u32 fixedAttribIndex = 0; // Which fixed attribute are we writing to ([0, 11] range) - u32 fixedAttribCount = 0; // How many attribute components have we written? When we get to 4 the attr will actually get submitted - std::array fixedAttrBuff; // Buffer to hold fixed attributes in until they get submitted + std::array attributeInfo; // Info for each of the 12 attributes + u32 totalAttribCount = 0; // Number of vertex attributes to send to VS + u32 fixedAttribMask = 0; // Which attributes are fixed? + + u32 fixedAttribIndex = 0; // Which fixed attribute are we writing to ([0, 11] range) + u32 fixedAttribCount = 0; // How many attribute components have we written? When we get to 4 the attr will actually get submitted + std::array fixedAttrBuff; // Buffer to hold fixed attributes in until they get submitted // Command processor pointers for GPU command lists u32* cmdBuffStart = nullptr; u32* cmdBuffEnd = nullptr; u32* cmdBuffCurr = nullptr; - Renderer renderer; + std::unique_ptr renderer; PICA::Vertex getImmediateModeVertex(); public: @@ -84,11 +82,9 @@ class GPU { // Set to false by the renderer when the lighting_lut is uploaded ot the GPU bool lightingLUTDirty = false; - GPU(Memory& mem, GLStateManager& gl, EmulatorConfig& config); - void initGraphicsContext() { renderer.initGraphicsContext(); } - void getGraphicsContext() { renderer.getGraphicsContext(); } - void display() { renderer.display(); } - void screenshot(const std::string& name) { renderer.screenshot(name); } + GPU(Memory& mem, EmulatorConfig& config); + void initGraphicsContext() { renderer->initGraphicsContext(); } + void display() { renderer->display(); } void fireDMA(u32 dest, u32 source, u32 size); void reset(); @@ -106,14 +102,12 @@ class GPU { // TODO: Emulate the transfer engine & its registers // Then this can be emulated by just writing the appropriate values there - void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) { - renderer.clearBuffer(startAddress, endAddress, value, control); - } + void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) { renderer->clearBuffer(startAddress, endAddress, value, control); } // TODO: Emulate the transfer engine & its registers // Then this can be emulated by just writing the appropriate values there void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) { - renderer.displayTransfer(inputAddr, outputAddr, inputSize, outputSize, flags); + renderer->displayTransfer(inputAddr, outputAddr, inputSize, outputSize, flags); } // Read a value of type T from physical address paddr diff --git a/include/renderer.hpp b/include/renderer.hpp new file mode 100644 index 00000000..c7315739 --- /dev/null +++ b/include/renderer.hpp @@ -0,0 +1,37 @@ +#pragma once +#include +#include + +#include "PICA/pica_vertex.hpp" +#include "PICA/regs.hpp" +#include "helpers.hpp" + +class GPU; + +class Renderer { + protected: + GPU& gpu; + static constexpr u32 regNum = 0x300; // Number of internal PICA registers + const std::array& regs; + + public: + Renderer(GPU& gpu, const std::array& internalRegs); + virtual ~Renderer(); + + static constexpr u32 vertexBufferSize = 0x10000; + + virtual void reset() = 0; + virtual void display() = 0; // Display the 3DS screen contents to the window + virtual void initGraphicsContext() = 0; // Initialize graphics context + virtual void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) = 0; // Clear a GPU buffer in VRAM + virtual void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) = 0; // Perform display transfer + virtual void drawVertices(PICA::PrimType primType, std::span vertices) = 0; // Draw the given vertices + + virtual void setFBSize(u32 width, u32 height) = 0; + + virtual void setColourFormat(PICA::ColorFmt format) = 0; + virtual void setDepthFormat(PICA::DepthFmt format) = 0; + + virtual void setColourBufferLoc(u32 loc) = 0; + virtual void setDepthBufferLoc(u32 loc) = 0; +}; \ No newline at end of file diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 07f8a63c..24301a11 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -4,20 +4,20 @@ #include #include "PICA/float_types.hpp" +#include "PICA/pica_vertex.hpp" +#include "PICA/regs.hpp" #include "gl_state.hpp" #include "helpers.hpp" #include "logger.hpp" +#include "renderer.hpp" #include "surface_cache.hpp" #include "textures.hpp" -#include "PICA/regs.hpp" -#include "PICA/pica_vertex.hpp" // More circular dependencies! class GPU; -class Renderer { - GPU& gpu; - GLStateManager& gl; +class RendererGL final : public Renderer { + GLStateManager gl = {}; OpenGL::Program triangleProgram; OpenGL::Program displayProgram; @@ -31,7 +31,7 @@ class Renderer { GLint textureEnvCombinerLoc = -1; GLint textureEnvColorLoc = -1; GLint textureEnvScaleLoc = -1; - + // Uniform of PICA registers GLint picaRegLoc = -1; @@ -50,7 +50,7 @@ class Renderer { OpenGL::uvec2 fbSize; // The size of the framebuffer (ie both the colour and depth buffer)' - u32 colourBufferLoc; // Location in 3DS VRAM for the colour buffer + u32 colourBufferLoc; // Location in 3DS VRAM for the colour buffer PICA::ColorFmt colourBufferFormat; // Format of the colours stored in the colour buffer // Same for the depth/stencil buffer @@ -61,9 +61,6 @@ class Renderer { OpenGL::VertexArray dummyVAO; OpenGL::VertexBuffer dummyVBO; - static constexpr u32 regNum = 0x300; // Number of internal PICA registers - const std::array& regs; - OpenGL::Texture screenTexture; GLuint lightLUTTextureArray; OpenGL::Framebuffer screenFramebuffer; @@ -79,12 +76,11 @@ class Renderer { void updateLightingLUT(); public: - Renderer(GPU& gpu, GLStateManager& gl, const std::array& internalRegs) : gpu(gpu), gl(gl), regs(internalRegs) {} + RendererGL(GPU& gpu, const std::array& internalRegs) : Renderer(gpu, internalRegs) {} void reset(); void display(); // Display the 3DS screen contents to the window void initGraphicsContext(); // Initialize graphics context - void getGraphicsContext(); // Set up graphics context for rendering void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control); // Clear a GPU buffer in VRAM void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags); // Perform display transfer void drawVertices(PICA::PrimType primType, std::span vertices); // Draw the given vertices @@ -107,6 +103,4 @@ class Renderer { void setColourBufferLoc(u32 loc) { colourBufferLoc = loc; } void setDepthBufferLoc(u32 loc) { depthBufferLoc = loc; } - - static constexpr u32 vertexBufferSize = 0x10000; }; \ No newline at end of file diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index 37b67a50..29eeef04 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -2,19 +2,28 @@ #include #include -#include #include +#include #include "PICA/float_types.hpp" #include "PICA/regs.hpp" +#if ENABLE_OPENGL +#include "renderer_gl/renderer_gl.hpp" +#endif + using namespace Floats; // Note: For when we have multiple backends, the GL state manager can stay here and have the constructor for the Vulkan-or-whatever renderer ignore it // Thus, our GLStateManager being here does not negatively impact renderer-agnosticness -GPU::GPU(Memory& mem, GLStateManager& gl, EmulatorConfig& config) : mem(mem), renderer(*this, gl, regs), config(config) { +GPU::GPU(Memory& mem, EmulatorConfig& config) : mem(mem), config(config) { vram = new u8[vramSize]; - mem.setVRAM(vram); // Give the bus a pointer to our VRAM + mem.setVRAM(vram); // Give the bus a pointer to our VRAM + + // TODO: configurable backend +#if ENABLE_OPENGL + renderer.reset(new RendererGL(*this, regs)); +#endif } void GPU::reset() { @@ -41,7 +50,7 @@ void GPU::reset() { e.config2 = 0; } - renderer.reset(); + renderer->reset(); } // Call the correct version of drawArrays based on whether this is an indexed draw (first template parameter) @@ -73,15 +82,14 @@ void GPU::drawArrays() { // Base address for vertex attributes // The vertex base is always on a quadword boundary because the PICA does weird alignment shit any time possible const u32 vertexBase = ((regs[PICA::InternalRegs::VertexAttribLoc] >> 1) & 0xfffffff) * 16; - const u32 vertexCount = regs[PICA::InternalRegs::VertexCountReg]; // Total # of vertices to transfer + const u32 vertexCount = regs[PICA::InternalRegs::VertexCountReg]; // Total # of vertices to transfer // Configures the type of primitive and the number of vertex shader outputs const u32 primConfig = regs[PICA::InternalRegs::PrimitiveConfig]; const PICA::PrimType primType = static_cast(Helpers::getBits<8, 2>(primConfig)); if (vertexCount > Renderer::vertexBufferSize) Helpers::panic("[PICA] vertexCount > vertexBufferSize"); - if ((primType == PICA::PrimType::TriangleList && vertexCount % 3) || - (primType == PICA::PrimType::TriangleStrip && vertexCount < 3) || + if ((primType == PICA::PrimType::TriangleList && vertexCount % 3) || (primType == PICA::PrimType::TriangleStrip && vertexCount < 3) || (primType == PICA::PrimType::TriangleFan && vertexCount < 3)) { Helpers::panic("Invalid vertex count for primitive. Type: %d, vert count: %d\n", primType, vertexCount); } @@ -89,10 +97,10 @@ void GPU::drawArrays() { // Get the configuration for the index buffer, used only for indexed drawing u32 indexBufferConfig = regs[PICA::InternalRegs::IndexBufferConfig]; u32 indexBufferPointer = vertexBase + (indexBufferConfig & 0xfffffff); - bool shortIndex = Helpers::getBit<31>(indexBufferConfig); // Indicates whether vert indices are 16-bit or 8-bit + bool shortIndex = Helpers::getBit<31>(indexBufferConfig); // Indicates whether vert indices are 16-bit or 8-bit // Stuff the global attribute config registers in one u64 to make attr parsing easier - // TODO: Cache this when the vertex attribute format registers are written to + // TODO: Cache this when the vertex attribute format registers are written to u64 vertexCfg = u64(regs[PICA::InternalRegs::AttribFormatLow]) | (u64(regs[PICA::InternalRegs::AttribFormatHigh]) << 32); if constexpr (!indexed) { @@ -111,24 +119,24 @@ void GPU::drawArrays() { constexpr size_t vertexCacheSize = 64; struct { - std::bitset validBits{0}; // Shows which tags are valid. If the corresponding bit is 1, then there's an entry - std::array ids; // IDs (ie indices of the cached vertices in the 3DS vertex buffer) - std::array bufferPositions; // Positions of the cached vertices in our own vertex buffer + std::bitset validBits{0}; // Shows which tags are valid. If the corresponding bit is 1, then there's an entry + std::array ids; // IDs (ie indices of the cached vertices in the 3DS vertex buffer) + std::array bufferPositions; // Positions of the cached vertices in our own vertex buffer } vertexCache; - + for (u32 i = 0; i < vertexCount; i++) { - u32 vertexIndex; // Index of the vertex in the VBO for indexed rendering + u32 vertexIndex; // Index of the vertex in the VBO for indexed rendering if constexpr (!indexed) { vertexIndex = i + regs[PICA::InternalRegs::VertexOffsetReg]; } else { if (shortIndex) { auto ptr = getPointerPhys(indexBufferPointer); - vertexIndex = *ptr; // TODO: This is very unsafe + vertexIndex = *ptr; // TODO: This is very unsafe indexBufferPointer += 2; } else { auto ptr = getPointerPhys(indexBufferPointer); - vertexIndex = *ptr; // TODO: This is also very unsafe + vertexIndex = *ptr; // TODO: This is also very unsafe indexBufferPointer += 1; } } @@ -152,22 +160,22 @@ void GPU::drawArrays() { } int attrCount = 0; - int buffer = 0; // Vertex buffer index for non-fixed attributes + int buffer = 0; // Vertex buffer index for non-fixed attributes while (attrCount < totalAttribCount) { // Check if attribute is fixed or not - if (fixedAttribMask & (1 << attrCount)) { // Fixed attribute - vec4f& fixedAttr = shaderUnit.vs.fixedAttributes[attrCount]; // TODO: Is this how it works? + if (fixedAttribMask & (1 << attrCount)) { // Fixed attribute + vec4f& fixedAttr = shaderUnit.vs.fixedAttributes[attrCount]; // TODO: Is this how it works? vec4f& inputAttr = currentAttributes[attrCount]; - std::memcpy(&inputAttr, &fixedAttr, sizeof(vec4f)); // Copy fixed attr to input attr + std::memcpy(&inputAttr, &fixedAttr, sizeof(vec4f)); // Copy fixed attr to input attr attrCount++; - } else { // Non-fixed attribute - auto& attr = attributeInfo[buffer]; // Get information for this attribute - u64 attrCfg = attr.getConfigFull(); // Get config1 | (config2 << 32) + } else { // Non-fixed attribute + auto& attr = attributeInfo[buffer]; // Get information for this attribute + u64 attrCfg = attr.getConfigFull(); // Get config1 | (config2 << 32) u32 attrAddress = vertexBase + attr.offset + (vertexIndex * attr.size); for (int j = 0; j < attr.componentCount; j++) { - uint index = (attrCfg >> (j * 4)) & 0xf; // Get index of attribute in vertexCfg + uint index = (attrCfg >> (j * 4)) & 0xf; // Get index of attribute in vertexCfg // Vertex attributes used as padding // 12, 13, 14 and 15 are equivalent to 4, 8, 12 and 16 bytes of padding respectively @@ -179,15 +187,15 @@ void GPU::drawArrays() { } u32 attribInfo = (vertexCfg >> (index * 4)) & 0xf; - u32 attribType = attribInfo & 0x3; // Type of attribute(sbyte/ubyte/short/float) - u32 size = (attribInfo >> 2) + 1; // Total number of components + u32 attribType = attribInfo & 0x3; // Type of attribute(sbyte/ubyte/short/float) + u32 size = (attribInfo >> 2) + 1; // Total number of components - //printf("vertex_attribute_strides[%d] = %d\n", attrCount, attr.size); + // printf("vertex_attribute_strides[%d] = %d\n", attrCount, attr.size); vec4f& attribute = currentAttributes[attrCount]; - uint component; // Current component + uint component; // Current component switch (attribType) { - case 0: { // Signed byte + case 0: { // Signed byte s8* ptr = getPointerPhys(attrAddress); for (component = 0; component < size; component++) { float val = static_cast(*ptr++); @@ -197,7 +205,7 @@ void GPU::drawArrays() { break; } - case 1: { // Unsigned byte + case 1: { // Unsigned byte u8* ptr = getPointerPhys(attrAddress); for (component = 0; component < size; component++) { float val = static_cast(*ptr++); @@ -207,7 +215,7 @@ void GPU::drawArrays() { break; } - case 2: { // Short + case 2: { // Short s16* ptr = getPointerPhys(attrAddress); for (component = 0; component < size; component++) { float val = static_cast(*ptr++); @@ -217,7 +225,7 @@ void GPU::drawArrays() { break; } - case 3: { // Float + case 3: { // Float float* ptr = getPointerPhys(attrAddress); for (component = 0; component < size; component++) { float val = *ptr++; @@ -251,8 +259,8 @@ void GPU::drawArrays() { const u32 mapping = (inputAttrCfg >> (j * 4)) & 0xf; std::memcpy(&shaderUnit.vs.inputs[mapping], ¤tAttributes[j], sizeof(vec4f)); } - - if constexpr (useShaderJIT) { + + if constexpr (useShaderJIT) { shaderJIT.run(shaderUnit.vs); } else { shaderUnit.vs.run(); @@ -264,14 +272,14 @@ void GPU::drawArrays() { for (int i = 0; i < totalShaderOutputs; i++) { const u32 config = regs[PICA::InternalRegs::ShaderOutmap0 + i]; - for (int j = 0; j < 4; j++) { // pls unroll + for (int j = 0; j < 4; j++) { // pls unroll const u32 mapping = (config >> (j * 8)) & 0x1F; out.raw[mapping] = shaderUnit.vs.outputs[i][j]; } } } - renderer.drawVertices(primType, std::span(vertices).first(vertexCount)); + renderer->drawVertices(primType, std::span(vertices).first(vertexCount)); } PICA::Vertex GPU::getImmediateModeVertex() { @@ -289,7 +297,9 @@ PICA::Vertex GPU::getImmediateModeVertex() { std::memcpy(&v.s.colour, &shaderUnit.vs.outputs[1], sizeof(vec4f)); std::memcpy(&v.s.texcoord0, &shaderUnit.vs.outputs[2], 2 * sizeof(f24)); - printf("(x, y, z, w) = (%f, %f, %f, %f)\n", (double)v.s.positions[0], (double)v.s.positions[1], (double)v.s.positions[2], (double)v.s.positions[3]); + printf( + "(x, y, z, w) = (%f, %f, %f, %f)\n", (double)v.s.positions[0], (double)v.s.positions[1], (double)v.s.positions[2], (double)v.s.positions[3] + ); printf("(r, g, b, a) = (%f, %f, %f, %f)\n", (double)v.s.colour[0], (double)v.s.colour[1], (double)v.s.colour[2], (double)v.s.colour[3]); printf("(u, v ) = (%f, %f)\n", (double)v.s.texcoord0[0], (double)v.s.texcoord0[1]); diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 3a13b31d..22484608 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -1,4 +1,5 @@ #include "renderer_gl/renderer_gl.hpp" + #include "PICA/float_types.hpp" #include "PICA/gpu.hpp" #include "PICA/regs.hpp" @@ -576,7 +577,7 @@ const char* displayFragmentShader = R"( } )"; -void Renderer::reset() { +void RendererGL::reset() { depthBufferCache.reset(); colourBufferCache.reset(); textureCache.reset(); @@ -592,10 +593,10 @@ void Renderer::reset() { const auto oldProgram = OpenGL::getProgram(); gl.useProgram(triangleProgram); - - oldDepthScale = -1.0; // Default depth scale to -1.0, which is what games typically use - oldDepthOffset = 0.0; // Default depth offset to 0 - oldDepthmapEnable = false; // Enable w buffering + + oldDepthScale = -1.0; // Default depth scale to -1.0, which is what games typically use + oldDepthOffset = 0.0; // Default depth offset to 0 + oldDepthmapEnable = false; // Enable w buffering glUniform1f(depthScaleLoc, oldDepthScale); glUniform1f(depthOffsetLoc, oldDepthOffset); @@ -605,10 +606,10 @@ void Renderer::reset() { } } -void Renderer::initGraphicsContext() { +void RendererGL::initGraphicsContext() { OpenGL::Shader vert(vertexShader, OpenGL::Vertex); OpenGL::Shader frag(fragmentShader, OpenGL::Fragment); - triangleProgram.create({ vert, frag }); + triangleProgram.create({vert, frag}); gl.useProgram(triangleProgram); textureEnvSourceLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvSource"); @@ -630,10 +631,10 @@ void Renderer::initGraphicsContext() { OpenGL::Shader vertDisplay(displayVertexShader, OpenGL::Vertex); OpenGL::Shader fragDisplay(displayFragmentShader, OpenGL::Fragment); - displayProgram.create({ vertDisplay, fragDisplay }); + displayProgram.create({vertDisplay, fragDisplay}); gl.useProgram(displayProgram); - glUniform1i(OpenGL::uniformLocation(displayProgram, "u_texture"), 0); // Init sampler object + glUniform1i(OpenGL::uniformLocation(displayProgram, "u_texture"), 0); // Init sampler object vbo.createFixedSize(sizeof(Vertex) * vertexBufferSize, GL_STREAM_DRAW); gl.bindVBO(vbo); @@ -669,10 +670,10 @@ void Renderer::initGraphicsContext() { dummyVAO.create(); // Create texture and framebuffer for the 3DS screen - const u32 screenTextureWidth = 400; // Top screen is 400 pixels wide, bottom is 320 - const u32 screenTextureHeight = 2 * 240; // Both screens are 240 pixels tall - - glGenTextures(1,&lightLUTTextureArray); + const u32 screenTextureWidth = 400; // Top screen is 400 pixels wide, bottom is 320 + const u32 screenTextureHeight = 2 * 240; // Both screens are 240 pixels tall + + glGenTextures(1, &lightLUTTextureArray); auto prevTexture = OpenGL::getTex2D(); screenTexture.create(screenTextureWidth, screenTextureHeight, GL_RGBA8); @@ -684,8 +685,7 @@ void Renderer::initGraphicsContext() { screenFramebuffer.createWithDrawTexture(screenTexture); screenFramebuffer.bind(OpenGL::DrawAndReadFramebuffer); - if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) - Helpers::panic("Incomplete framebuffer"); + if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) Helpers::panic("Incomplete framebuffer"); // TODO: This should not clear the framebuffer contents. It should load them from VRAM. GLint oldViewport[4]; @@ -699,20 +699,31 @@ void Renderer::initGraphicsContext() { } // Set up the OpenGL blending context to match the emulated PICA -void Renderer::setupBlending() { +void RendererGL::setupBlending() { const bool blendingEnabled = (regs[PICA::InternalRegs::ColourOperation] & (1 << 8)) != 0; - + // Map of PICA blending equations to OpenGL blending equations. The unused blending equations are equivalent to equation 0 (add) - static constexpr std::array blendingEquations = { - GL_FUNC_ADD, GL_FUNC_SUBTRACT, GL_FUNC_REVERSE_SUBTRACT, GL_MIN, GL_MAX, GL_FUNC_ADD, GL_FUNC_ADD, GL_FUNC_ADD - }; - + static constexpr std::array blendingEquations = {GL_FUNC_ADD, GL_FUNC_SUBTRACT, GL_FUNC_REVERSE_SUBTRACT, GL_MIN, GL_MAX, GL_FUNC_ADD, + GL_FUNC_ADD, GL_FUNC_ADD}; + // Map of PICA blending funcs to OpenGL blending funcs. Func = 15 is undocumented and stubbed to GL_ONE for now static constexpr std::array blendingFuncs = { - GL_ZERO, GL_ONE, GL_SRC_COLOR, GL_ONE_MINUS_SRC_COLOR, GL_DST_COLOR, GL_ONE_MINUS_DST_COLOR, GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, - GL_DST_ALPHA, GL_ONE_MINUS_DST_ALPHA, GL_CONSTANT_COLOR, GL_ONE_MINUS_CONSTANT_COLOR, GL_CONSTANT_ALPHA, GL_ONE_MINUS_CONSTANT_ALPHA, - GL_SRC_ALPHA_SATURATE, GL_ONE - }; + GL_ZERO, + GL_ONE, + GL_SRC_COLOR, + GL_ONE_MINUS_SRC_COLOR, + GL_DST_COLOR, + GL_ONE_MINUS_DST_COLOR, + GL_SRC_ALPHA, + GL_ONE_MINUS_SRC_ALPHA, + GL_DST_ALPHA, + GL_ONE_MINUS_DST_ALPHA, + GL_CONSTANT_COLOR, + GL_ONE_MINUS_CONSTANT_COLOR, + GL_CONSTANT_ALPHA, + GL_ONE_MINUS_CONSTANT_ALPHA, + GL_SRC_ALPHA_SATURATE, + GL_ONE}; if (!blendingEnabled) { gl.disableBlend(); @@ -743,14 +754,12 @@ void Renderer::setupBlending() { } } -void Renderer::setupTextureEnvState() { +void RendererGL::setupTextureEnvState() { // TODO: Only update uniforms when the TEV config changed. Use an UBO potentially. - static constexpr std::array ioBases = { - PICA::InternalRegs::TexEnv0Source, PICA::InternalRegs::TexEnv1Source, - PICA::InternalRegs::TexEnv2Source, PICA::InternalRegs::TexEnv3Source, - PICA::InternalRegs::TexEnv4Source, PICA::InternalRegs::TexEnv5Source - }; + static constexpr std::array ioBases = {PICA::InternalRegs::TexEnv0Source, PICA::InternalRegs::TexEnv1Source, + PICA::InternalRegs::TexEnv2Source, PICA::InternalRegs::TexEnv3Source, + PICA::InternalRegs::TexEnv4Source, PICA::InternalRegs::TexEnv5Source}; u32 textureEnvSourceRegs[6]; u32 textureEnvOperandRegs[6]; @@ -775,10 +784,9 @@ void Renderer::setupTextureEnvState() { glUniform1uiv(textureEnvScaleLoc, 6, textureEnvScaleRegs); } -void Renderer::bindTexturesToSlots() { +void RendererGL::bindTexturesToSlots() { static constexpr std::array ioBases = { - PICA::InternalRegs::Tex0BorderColor, PICA::InternalRegs::Tex1BorderColor, PICA::InternalRegs::Tex2BorderColor - }; + PICA::InternalRegs::Tex0BorderColor, PICA::InternalRegs::Tex1BorderColor, PICA::InternalRegs::Tex2BorderColor}; for (int i = 0; i < 3; i++) { if ((regs[PICA::InternalRegs::TexUnitCfg] & (1 << i)) == 0) { @@ -805,13 +813,13 @@ void Renderer::bindTexturesToSlots() { glActiveTexture(GL_TEXTURE0); } -void Renderer::updateLightingLUT() { +void RendererGL::updateLightingLUT() { gpu.lightingLUTDirty = false; - std::array u16_lightinglut; - + std::array u16_lightinglut; + for (int i = 0; i < gpu.lightingLUT.size(); i++) { - uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1); - u16_lightinglut[i] = value * 65535 / 4095; + uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1); + u16_lightinglut[i] = value * 65535 / 4095; } glActiveTexture(GL_TEXTURE0 + 3); @@ -824,11 +832,9 @@ void Renderer::updateLightingLUT() { glActiveTexture(GL_TEXTURE0); } -void Renderer::drawVertices(PICA::PrimType primType, std::span vertices) { +void RendererGL::drawVertices(PICA::PrimType primType, std::span vertices) { // The fourth type is meant to be "Geometry primitive". TODO: Find out what that is - static constexpr std::array primTypes = { - OpenGL::Triangle, OpenGL::TriangleStrip, OpenGL::TriangleFan, OpenGL::Triangle - }; + static constexpr std::array primTypes = {OpenGL::Triangle, OpenGL::TriangleStrip, OpenGL::TriangleFan, OpenGL::Triangle}; const auto primitiveTopology = primTypes[static_cast(primType)]; gl.disableScissor(); @@ -836,7 +842,7 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span ver gl.bindVAO(vao); gl.useProgram(triangleProgram); - OpenGL::enableClipPlane(0); // Clipping plane 0 is always enabled + OpenGL::enableClipPlane(0); // Clipping plane 0 is always enabled if (regs[PICA::InternalRegs::ClipEnable] & 1) { OpenGL::enableClipPlane(1); } @@ -852,9 +858,7 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span ver const int colourMask = getBits<8, 4>(depthControl); gl.setColourMask(colourMask & 1, colourMask & 2, colourMask & 4, colourMask & 8); - static constexpr std::array depthModes = { - GL_NEVER, GL_ALWAYS, GL_EQUAL, GL_NOTEQUAL, GL_LESS, GL_LEQUAL, GL_GREATER, GL_GEQUAL - }; + static constexpr std::array depthModes = {GL_NEVER, GL_ALWAYS, GL_EQUAL, GL_NOTEQUAL, GL_LESS, GL_LEQUAL, GL_GREATER, GL_GEQUAL}; const float depthScale = f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32(); const float depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32(); @@ -865,7 +869,7 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span ver oldDepthScale = depthScale; glUniform1f(depthScaleLoc, depthScale); } - + if (oldDepthOffset != depthOffset) { oldDepthOffset = depthOffset; glUniform1f(depthOffsetLoc, depthOffset); @@ -917,7 +921,7 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span ver constexpr u32 topScreenBuffer = 0x1f000000; constexpr u32 bottomScreenBuffer = 0x1f05dc00; -void Renderer::display() { +void RendererGL::display() { gl.disableScissor(); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); @@ -925,7 +929,7 @@ void Renderer::display() { glBlitFramebuffer(0, 0, 400, 480, 0, 0, 400, 480, GL_COLOR_BUFFER_BIT, GL_LINEAR); } -void Renderer::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) { +void RendererGL::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) { return; log("GPU: Clear buffer\nStart: %08X End: %08X\nValue: %08X Control: %08X\n", startAddress, endAddress, value, control); @@ -947,9 +951,9 @@ void Renderer::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 cont OpenGL::clearColor(); } -OpenGL::Framebuffer Renderer::getColourFBO() { - //We construct a colour buffer object and see if our cache has any matching colour buffers in it - // If not, we allocate a texture & FBO for our framebuffer and store it in the cache +OpenGL::Framebuffer RendererGL::getColourFBO() { + // We construct a colour buffer object and see if our cache has any matching colour buffers in it + // If not, we allocate a texture & FBO for our framebuffer and store it in the cache ColourBuffer sampleBuffer(colourBufferLoc, colourBufferFormat, fbSize.x(), fbSize.y()); auto buffer = colourBufferCache.find(sampleBuffer); @@ -960,7 +964,7 @@ OpenGL::Framebuffer Renderer::getColourFBO() { } } -void Renderer::bindDepthBuffer() { +void RendererGL::bindDepthBuffer() { // Similar logic as the getColourFBO function DepthBuffer sampleBuffer(depthBufferLoc, depthBufferFormat, fbSize.x(), fbSize.y()); auto buffer = depthBufferCache.find(sampleBuffer); @@ -979,14 +983,14 @@ void Renderer::bindDepthBuffer() { glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, tex, 0); } -OpenGL::Texture Renderer::getTexture(Texture& tex) { +OpenGL::Texture RendererGL::getTexture(Texture& tex) { // Similar logic as the getColourFBO/bindDepthBuffer functions auto buffer = textureCache.find(tex); if (buffer.has_value()) { return buffer.value().get().texture; } else { - const void* textureData = gpu.getPointerPhys(tex.location); // Get pointer to the texture data in 3DS memory + const void* textureData = gpu.getPointerPhys(tex.location); // Get pointer to the texture data in 3DS memory Texture& newTex = textureCache.add(tex); newTex.decodeTexture(textureData); @@ -994,7 +998,7 @@ OpenGL::Texture Renderer::getTexture(Texture& tex) { } } -void Renderer::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) { +void RendererGL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) { const u32 inputWidth = inputSize & 0xffff; const u32 inputGap = inputSize >> 16; @@ -1022,12 +1026,12 @@ void Renderer::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 // Hack: Detect whether we are writing to the top or bottom screen by checking output gap and drawing to the proper part of the output texture // We consider output gap == 320 to mean bottom, and anything else to mean top if (outputGap == 320) { - OpenGL::setViewport(40, 0, 320, 240); // Bottom screen viewport + OpenGL::setViewport(40, 0, 320, 240); // Bottom screen viewport } else { - OpenGL::setViewport(0, 240, 400, 240); // Top screen viewport + OpenGL::setViewport(0, 240, 400, 240); // Top screen viewport } - OpenGL::draw(OpenGL::TriangleStrip, 4); // Actually draw our 3DS screen + OpenGL::draw(OpenGL::TriangleStrip, 4); // Actually draw our 3DS screen } void Renderer::screenshot(const std::string& name) { @@ -1035,8 +1039,35 @@ void Renderer::screenshot(const std::string& name) { constexpr uint height = 2 * 240; std::vector pixels, flippedPixels; - pixels.resize(width * height * 4); - flippedPixels.resize(pixels.size());; + pixels.resize(width * height * 4); + flippedPixels.resize(pixels.size()); + ; + + OpenGL::bindScreenFramebuffer(); + glReadPixels(0, 0, width, height, GL_BGRA, GL_UNSIGNED_BYTE, pixels.data()); + + // Flip the image vertically + for (int y = 0; y < height; y++) { + memcpy(&flippedPixels[y * width * 4], &pixels[(height - y - 1) * width * 4], width * 4); + // Swap R and B channels + for (int x = 0; x < width; x++) { + std::swap(flippedPixels[y * width * 4 + x * 4 + 0], flippedPixels[y * width * 4 + x * 4 + 2]); + // Set alpha to 0xFF + flippedPixels[y * width * 4 + x * 4 + 3] = 0xFF; + } + } + + stbi_write_png(name.c_str(), width, height, 4, flippedPixels.data(), 0); +} + +void Renderer::screenshot(const std::string& name) { + constexpr uint width = 400; + constexpr uint height = 2 * 240; + + std::vector pixels, flippedPixels; + pixels.resize(width * height * 4); + flippedPixels.resize(pixels.size()); + ; OpenGL::bindScreenFramebuffer(); glReadPixels(0, 0, width, height, GL_BGRA, GL_UNSIGNED_BYTE, pixels.data()); diff --git a/src/emulator.cpp b/src/emulator.cpp index 0d95b82b..db628853 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -16,7 +16,7 @@ _declspec(dllexport) DWORD AmdPowerXpressRequestHighPerformance = 1; } #endif -Emulator::Emulator() : kernel(cpu, memory, gpu), cpu(memory, kernel), gpu(memory, gl, config), memory(cpu.getTicksRef()) { +Emulator::Emulator() : kernel(cpu, memory, gpu), cpu(memory, kernel), gpu(memory, config), memory(cpu.getTicksRef()) { if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_EVENTS) < 0) { Helpers::panic("Failed to initialize SDL2"); } diff --git a/src/renderer.cpp b/src/renderer.cpp new file mode 100644 index 00000000..b3da0501 --- /dev/null +++ b/src/renderer.cpp @@ -0,0 +1,4 @@ +#include "renderer.hpp" + +Renderer::Renderer(GPU& gpu, const std::array& internalRegs) : gpu(gpu), regs(internalRegs) {} +Renderer::~Renderer() {} \ No newline at end of file