From 0009b0817d99dc15ca10643d8611665a0043f493 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Mon, 10 Jul 2023 08:04:32 -0700 Subject: [PATCH 01/14] Add `ENABLE_OPENGL` build option Allows the OpenGL backend to be fully disabled, continuing a modular pattern of having multiple possible rendering backends. Also defines the `ENABLE_OPENGL` preprocessor in the case of conditional source-file changes depending on the rendering backend. --- CMakeLists.txt | 46 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d276af52..802b3d06 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,6 +19,7 @@ endif() option(DISABLE_PANIC_DEV "Make a build with fewer and less intrusive asserts" OFF) option(GPU_DEBUG_INFO "Enable additional GPU debugging info" OFF) +option(ENABLE_OPENGL "Enable OpenGL rendering backend" ON) option(ENABLE_LTO "Enable link-time optimization" OFF) option(ENABLE_USER_BUILD "Make a user-facing build. These builds have various assertions disabled, LTO, and more" OFF) option(ENABLE_HTTP_SERVER "Enable HTTP server. Used for Discord bot support" OFF) @@ -90,9 +91,9 @@ else() message(FATAL_ERROR "Currently unsupported CPU architecture") endif() -set(SOURCE_FILES src/main.cpp src/emulator.cpp src/io_file.cpp src/gl_state.cpp src/config.cpp - src/core/CPU/cpu_dynarmic.cpp src/core/CPU/dynarmic_cycles.cpp src/core/memory.cpp - src/httpserver.cpp src/stb_image_write.c +set(SOURCE_FILES src/main.cpp src/emulator.cpp src/io_file.cpp src/config.cpp + src/core/CPU/cpu_dynarmic.cpp src/core/CPU/dynarmic_cycles.cpp + src/core/memory.cpp src/httpserver.cpp src/stb_image_write.c ) set(CRYPTO_SOURCE_FILES src/core/crypto/aes_engine.cpp) set(KERNEL_SOURCE_FILES src/core/kernel/kernel.cpp src/core/kernel/resource_limits.cpp @@ -117,15 +118,13 @@ set(PICA_SOURCE_FILES src/core/PICA/gpu.cpp src/core/PICA/regs.cpp src/core/PICA src/core/PICA/dynapica/shader_rec_emitter_x64.cpp src/core/PICA/pica_hash.cpp ) -set(RENDERER_GL_SOURCE_FILES src/core/renderer_gl/renderer_gl.cpp src/core/renderer_gl/textures.cpp src/core/renderer_gl/etc1.cpp) - set(LOADER_SOURCE_FILES src/core/loader/elf.cpp src/core/loader/ncsd.cpp src/core/loader/ncch.cpp src/core/loader/lz77.cpp) set(FS_SOURCE_FILES src/core/fs/archive_self_ncch.cpp src/core/fs/archive_save_data.cpp src/core/fs/archive_sdmc.cpp src/core/fs/archive_ext_save_data.cpp src/core/fs/archive_ncch.cpp ) -set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/opengl.hpp include/termcolor.hpp - include/cpu.hpp include/cpu_dynarmic.hpp include/memory.hpp include/kernel/kernel.hpp +set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp + include/cpu.hpp include/cpu_dynarmic.hpp include/memory.hpp include/renderer.hpp include/kernel/kernel.hpp include/dynarmic_cp15.hpp include/kernel/resource_limits.hpp include/kernel/kernel_types.hpp include/kernel/config_mem.hpp include/services/service_manager.hpp include/services/apt.hpp include/kernel/handles.hpp include/services/hid.hpp include/services/fs.hpp @@ -136,11 +135,10 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/opengl.hpp inc include/loader/lz77.hpp include/fs/archive_base.hpp include/fs/archive_self_ncch.hpp include/services/dsp.hpp include/services/cfg.hpp include/services/region_codes.hpp include/fs/archive_save_data.hpp include/fs/archive_sdmc.hpp include/services/ptm.hpp - include/services/mic.hpp include/services/cecd.hpp include/renderer_gl/renderer_gl.hpp - include/renderer_gl/surfaces.hpp include/renderer_gl/surface_cache.hpp include/services/ac.hpp + include/services/mic.hpp include/services/cecd.hpp include/services/ac.hpp include/services/am.hpp include/services/boss.hpp include/services/frd.hpp include/services/nim.hpp include/fs/archive_ext_save_data.hpp include/services/shared_font.hpp include/fs/archive_ncch.hpp - include/renderer_gl/textures.hpp include/colour.hpp include/services/y2r.hpp include/services/cam.hpp + include/colour.hpp include/services/y2r.hpp include/services/cam.hpp include/services/ldr_ro.hpp include/ipc.hpp include/services/act.hpp include/services/nfc.hpp include/system_models.hpp include/services/dlp_srvr.hpp include/PICA/dynapica/pica_recs.hpp include/PICA/dynapica/x64_regs.hpp include/PICA/dynapica/vertex_loader_rec.hpp include/PICA/dynapica/shader_rec.hpp @@ -169,17 +167,37 @@ source_group("Source Files\\Core\\Kernel" FILES ${KERNEL_SOURCE_FILES}) source_group("Source Files\\Core\\Loader" FILES ${LOADER_SOURCE_FILES}) source_group("Source Files\\Core\\Services" FILES ${SERVICE_SOURCE_FILES}) source_group("Source Files\\Core\\PICA" FILES ${PICA_SOURCE_FILES}) -source_group("Source Files\\Core\\OpenGL Renderer" FILES ${RENDERER_GL_SOURCE_FILES}) source_group("Source Files\\Third Party" FILES ${THIRD_PARTY_SOURCE_FILES}) -add_executable(Alber ${SOURCE_FILES} ${FS_SOURCE_FILES} ${CRYPTO_SOURCE_FILES} ${KERNEL_SOURCE_FILES} ${LOADER_SOURCE_FILES} ${SERVICE_SOURCE_FILES} -${PICA_SOURCE_FILES} ${RENDERER_GL_SOURCE_FILES} ${THIRD_PARTY_SOURCE_FILES} ${HEADER_FILES}) +add_executable(Alber + ${SOURCE_FILES} ${FS_SOURCE_FILES} ${CRYPTO_SOURCE_FILES} ${KERNEL_SOURCE_FILES} ${LOADER_SOURCE_FILES} ${SERVICE_SOURCE_FILES} + ${PICA_SOURCE_FILES} ${THIRD_PARTY_SOURCE_FILES} ${HEADER_FILES} +) if(ENABLE_LTO OR ENABLE_USER_BUILD) set_target_properties(Alber PROPERTIES INTERPROCEDURAL_OPTIMIZATION TRUE) endif() -target_link_libraries(Alber PRIVATE dynarmic SDL2-static glad cryptopp) +target_link_libraries(Alber PRIVATE dynarmic SDL2-static cryptopp) + +if(ENABLE_OPENGL) + target_compile_definitions(Alber PUBLIC "ENABLE_OPENGL=1") + + set(RENDERER_GL_INCLUDE_FILES include/opengl.hpp + include/renderer_gl/renderer_gl.hpp include/renderer_gl/textures.hpp + include/renderer_gl/surfaces.hpp include/renderer_gl/surface_cache.hpp + ) + set(RENDERER_GL_SOURCE_FILES src/core/renderer_gl/renderer_gl.cpp + src/core/renderer_gl/textures.cpp src/core/renderer_gl/etc1.cpp + src/gl_state.cpp + ) + source_group("Source Files\\Core\\OpenGL Renderer" FILES ${RENDERER_GL_SOURCE_FILES}) + + target_sources(Alber PRIVATE ${RENDERER_GL_SOURCE_FILES}) + + target_link_libraries(Alber PRIVATE glad) + +endif() if(GPU_DEBUG_INFO) target_compile_definitions(Alber PRIVATE GPU_DEBUG_INFO=1) From d664d5caf054a13a565e9e87985e265207c2100e Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Mon, 10 Jul 2023 08:30:23 -0700 Subject: [PATCH 02/14] Emulator: Conditional OpenGL compilation --- include/emulator.hpp | 13 ++++++++++--- src/emulator.cpp | 9 +++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/include/emulator.hpp b/include/emulator.hpp index 83b832f6..ae6e7142 100644 --- a/include/emulator.hpp +++ b/include/emulator.hpp @@ -1,19 +1,22 @@ #pragma once #include -#include #include #include #include #include "PICA/gpu.hpp" -#include "cpu.hpp" #include "config.hpp" +#include "cpu.hpp" #include "crypto/aes_engine.hpp" #include "io_file.hpp" #include "memory.hpp" + +#if ENABLE_OPENGL #include "gl_state.hpp" +#endif + #ifdef PANDA3DS_ENABLE_HTTP_SERVER #include "httpserver.hpp" #endif @@ -27,10 +30,14 @@ class Emulator { Kernel kernel; Crypto::AESEngine aesEngine; - GLStateManager gl; EmulatorConfig config; SDL_Window* window; + +#if ENABLE_OPENGL SDL_GLContext glContext; + GLStateManager gl; +#endif + SDL_GameController* gameController = nullptr; int gameControllerID; diff --git a/src/emulator.cpp b/src/emulator.cpp index 0311f6e9..0d95b82b 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -2,6 +2,10 @@ #include +#if ENABLE_OPENGL +#include +#endif + #ifdef _WIN32 #include @@ -23,6 +27,7 @@ Emulator::Emulator() : kernel(cpu, memory, gpu), cpu(memory, kernel), gpu(memory Helpers::warn("Failed to initialize SDL2 GameController: %s", SDL_GetError()); } +#if ENABLE_OPENGL // Request OpenGL 4.1 Core (Max available on MacOS) // MacOS gets mad if we don't explicitly demand a core profile SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE); @@ -42,6 +47,7 @@ Emulator::Emulator() : kernel(cpu, memory, gpu), cpu(memory, kernel), gpu(memory if (!gladLoadGL(reinterpret_cast(SDL_GL_GetProcAddress))) { Helpers::panic("OpenGL init failed: %s", SDL_GetError()); } +#endif if (SDL_WasInit(SDL_INIT_GAMECONTROLLER)) { gameController = SDL_GameControllerOpen(0); @@ -428,13 +434,16 @@ bool Emulator::loadELF(std::ifstream& file) { // Reset our graphics context and initialize the GPU's graphics context void Emulator::initGraphicsContext() { +#if ENABLE_OPENGL gl.reset(); // TODO (For when we have multiple backends): Only do this if we are using OpenGL +#endif gpu.initGraphicsContext(); } #ifdef PANDA3DS_ENABLE_HTTP_SERVER void Emulator::pollHttpServer() { std::scoped_lock lock(httpServer.actionMutex); + ServiceManager& srv = kernel.getServiceManager(); if (httpServer.pendingAction) { From 2a1683ba62e6dd737b590865d49c3bf427e3c336 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Mon, 10 Jul 2023 08:53:16 -0700 Subject: [PATCH 03/14] Introduce "Renderer" abstraction layer Adds a `renderer` class for which a rendering backend must implement and will conditionally use OpenGL in the case that `ENABLE_GL` is enabled. --- CMakeLists.txt | 2 +- include/PICA/dynapica/shader_rec.hpp | 8 +- include/PICA/gpu.hpp | 66 ++++++------ include/renderer.hpp | 37 +++++++ include/renderer_gl/renderer_gl.hpp | 22 ++-- src/core/PICA/gpu.cpp | 84 ++++++++------- src/core/renderer_gl/renderer_gl.cpp | 155 ++++++++++++++++----------- src/emulator.cpp | 2 +- src/renderer.cpp | 4 + 9 files changed, 224 insertions(+), 156 deletions(-) create mode 100644 include/renderer.hpp create mode 100644 src/renderer.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 802b3d06..8c932497 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -93,7 +93,7 @@ endif() set(SOURCE_FILES src/main.cpp src/emulator.cpp src/io_file.cpp src/config.cpp src/core/CPU/cpu_dynarmic.cpp src/core/CPU/dynarmic_cycles.cpp - src/core/memory.cpp src/httpserver.cpp src/stb_image_write.c + src/core/memory.cpp src/renderer.cpp src/httpserver.cpp src/stb_image_write.c ) set(CRYPTO_SOURCE_FILES src/core/crypto/aes_engine.cpp) set(KERNEL_SOURCE_FILES src/core/kernel/kernel.cpp src/core/kernel/resource_limits.cpp diff --git a/include/PICA/dynapica/shader_rec.hpp b/include/PICA/dynapica/shader_rec.hpp index b7d37b02..e8b6afed 100644 --- a/include/PICA/dynapica/shader_rec.hpp +++ b/include/PICA/dynapica/shader_rec.hpp @@ -21,7 +21,7 @@ class ShaderJIT { ShaderCache cache; #endif -public: + public: #ifdef PANDA3DS_SHADER_JIT_SUPPORTED // Call this before starting to process a batch of vertices // This will read the PICA config (uploaded shader and shader operand descriptors) and search if we've already compiled this shader @@ -29,9 +29,7 @@ public: // The caller must make sure the entrypoint has been properly set beforehand void prepare(PICAShader& shaderUnit); void reset(); - void run(PICAShader& shaderUnit) { - prologueCallback(shaderUnit, entrypointCallback); - } + void run(PICAShader& shaderUnit) { prologueCallback(shaderUnit, entrypointCallback); } static constexpr bool isAvailable() { return true; } #else @@ -44,7 +42,7 @@ public: } // Define dummy callback. This should never be called if the shader JIT is not supported - using Callback = void(*)(PICAShader& shaderUnit); + using Callback = void (*)(PICAShader& shaderUnit); Callback activeShaderCallback = nullptr; void reset() {} diff --git a/include/PICA/gpu.hpp b/include/PICA/gpu.hpp index a4adc816..929881b7 100644 --- a/include/PICA/gpu.hpp +++ b/include/PICA/gpu.hpp @@ -1,39 +1,39 @@ #pragma once #include +#include "PICA/dynapica/shader_rec.hpp" +#include "PICA/float_types.hpp" +#include "PICA/pica_vertex.hpp" +#include "PICA/regs.hpp" +#include "PICA/shader_unit.hpp" #include "config.hpp" #include "helpers.hpp" #include "logger.hpp" #include "memory.hpp" -#include "PICA/float_types.hpp" -#include "PICA/regs.hpp" -#include "PICA/shader_unit.hpp" -#include "PICA/dynapica/shader_rec.hpp" -#include "renderer_gl/renderer_gl.hpp" -#include "PICA/pica_vertex.hpp" +#include "renderer.hpp" class GPU { static constexpr u32 regNum = 0x300; - using vec4f = OpenGL::Vector; + using vec4f = std::array; using Registers = std::array; Memory& mem; EmulatorConfig& config; ShaderUnit shaderUnit; - ShaderJIT shaderJIT; // Doesn't do anything if JIT is disabled or not supported + ShaderJIT shaderJIT; // Doesn't do anything if JIT is disabled or not supported u8* vram = nullptr; MAKE_LOG_FUNCTION(log, gpuLogger) - static constexpr u32 maxAttribCount = 12; // Up to 12 vertex attributes + static constexpr u32 maxAttribCount = 12; // Up to 12 vertex attributes static constexpr u32 vramSize = u32(6_MB); - Registers regs; // GPU internal registers - std::array currentAttributes; // Vertex attributes before being passed to the shader + Registers regs; // GPU internal registers + std::array currentAttributes; // Vertex attributes before being passed to the shader - std::array immediateModeAttributes; // Vertex attributes uploaded via immediate mode submission + std::array immediateModeAttributes; // Vertex attributes uploaded via immediate mode submission std::array immediateModeVertices; uint immediateModeVertIndex; - uint immediateModeAttrIndex; // Index of the immediate mode attribute we're uploading + uint immediateModeAttrIndex; // Index of the immediate mode attribute we're uploading template void drawArrays(); @@ -42,35 +42,33 @@ class GPU { void drawArrays(bool indexed); struct AttribInfo { - u32 offset = 0; // Offset from base vertex array - int size = 0; // Bytes per vertex + u32 offset = 0; // Offset from base vertex array + int size = 0; // Bytes per vertex u32 config1 = 0; u32 config2 = 0; - u32 componentCount = 0; // Number of components for the attribute + u32 componentCount = 0; // Number of components for the attribute - u64 getConfigFull() { - return u64(config1) | (u64(config2) << 32); - } + u64 getConfigFull() { return u64(config1) | (u64(config2) << 32); } }; u64 getVertexShaderInputConfig() { return u64(regs[PICA::InternalRegs::VertexShaderInputCfgLow]) | (u64(regs[PICA::InternalRegs::VertexShaderInputCfgHigh]) << 32); } - std::array attributeInfo; // Info for each of the 12 attributes - u32 totalAttribCount = 0; // Number of vertex attributes to send to VS - u32 fixedAttribMask = 0; // Which attributes are fixed? - - u32 fixedAttribIndex = 0; // Which fixed attribute are we writing to ([0, 11] range) - u32 fixedAttribCount = 0; // How many attribute components have we written? When we get to 4 the attr will actually get submitted - std::array fixedAttrBuff; // Buffer to hold fixed attributes in until they get submitted + std::array attributeInfo; // Info for each of the 12 attributes + u32 totalAttribCount = 0; // Number of vertex attributes to send to VS + u32 fixedAttribMask = 0; // Which attributes are fixed? + + u32 fixedAttribIndex = 0; // Which fixed attribute are we writing to ([0, 11] range) + u32 fixedAttribCount = 0; // How many attribute components have we written? When we get to 4 the attr will actually get submitted + std::array fixedAttrBuff; // Buffer to hold fixed attributes in until they get submitted // Command processor pointers for GPU command lists u32* cmdBuffStart = nullptr; u32* cmdBuffEnd = nullptr; u32* cmdBuffCurr = nullptr; - Renderer renderer; + std::unique_ptr renderer; PICA::Vertex getImmediateModeVertex(); public: @@ -84,11 +82,9 @@ class GPU { // Set to false by the renderer when the lighting_lut is uploaded ot the GPU bool lightingLUTDirty = false; - GPU(Memory& mem, GLStateManager& gl, EmulatorConfig& config); - void initGraphicsContext() { renderer.initGraphicsContext(); } - void getGraphicsContext() { renderer.getGraphicsContext(); } - void display() { renderer.display(); } - void screenshot(const std::string& name) { renderer.screenshot(name); } + GPU(Memory& mem, EmulatorConfig& config); + void initGraphicsContext() { renderer->initGraphicsContext(); } + void display() { renderer->display(); } void fireDMA(u32 dest, u32 source, u32 size); void reset(); @@ -106,14 +102,12 @@ class GPU { // TODO: Emulate the transfer engine & its registers // Then this can be emulated by just writing the appropriate values there - void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) { - renderer.clearBuffer(startAddress, endAddress, value, control); - } + void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) { renderer->clearBuffer(startAddress, endAddress, value, control); } // TODO: Emulate the transfer engine & its registers // Then this can be emulated by just writing the appropriate values there void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) { - renderer.displayTransfer(inputAddr, outputAddr, inputSize, outputSize, flags); + renderer->displayTransfer(inputAddr, outputAddr, inputSize, outputSize, flags); } // Read a value of type T from physical address paddr diff --git a/include/renderer.hpp b/include/renderer.hpp new file mode 100644 index 00000000..c7315739 --- /dev/null +++ b/include/renderer.hpp @@ -0,0 +1,37 @@ +#pragma once +#include +#include + +#include "PICA/pica_vertex.hpp" +#include "PICA/regs.hpp" +#include "helpers.hpp" + +class GPU; + +class Renderer { + protected: + GPU& gpu; + static constexpr u32 regNum = 0x300; // Number of internal PICA registers + const std::array& regs; + + public: + Renderer(GPU& gpu, const std::array& internalRegs); + virtual ~Renderer(); + + static constexpr u32 vertexBufferSize = 0x10000; + + virtual void reset() = 0; + virtual void display() = 0; // Display the 3DS screen contents to the window + virtual void initGraphicsContext() = 0; // Initialize graphics context + virtual void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) = 0; // Clear a GPU buffer in VRAM + virtual void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) = 0; // Perform display transfer + virtual void drawVertices(PICA::PrimType primType, std::span vertices) = 0; // Draw the given vertices + + virtual void setFBSize(u32 width, u32 height) = 0; + + virtual void setColourFormat(PICA::ColorFmt format) = 0; + virtual void setDepthFormat(PICA::DepthFmt format) = 0; + + virtual void setColourBufferLoc(u32 loc) = 0; + virtual void setDepthBufferLoc(u32 loc) = 0; +}; \ No newline at end of file diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 07f8a63c..24301a11 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -4,20 +4,20 @@ #include #include "PICA/float_types.hpp" +#include "PICA/pica_vertex.hpp" +#include "PICA/regs.hpp" #include "gl_state.hpp" #include "helpers.hpp" #include "logger.hpp" +#include "renderer.hpp" #include "surface_cache.hpp" #include "textures.hpp" -#include "PICA/regs.hpp" -#include "PICA/pica_vertex.hpp" // More circular dependencies! class GPU; -class Renderer { - GPU& gpu; - GLStateManager& gl; +class RendererGL final : public Renderer { + GLStateManager gl = {}; OpenGL::Program triangleProgram; OpenGL::Program displayProgram; @@ -31,7 +31,7 @@ class Renderer { GLint textureEnvCombinerLoc = -1; GLint textureEnvColorLoc = -1; GLint textureEnvScaleLoc = -1; - + // Uniform of PICA registers GLint picaRegLoc = -1; @@ -50,7 +50,7 @@ class Renderer { OpenGL::uvec2 fbSize; // The size of the framebuffer (ie both the colour and depth buffer)' - u32 colourBufferLoc; // Location in 3DS VRAM for the colour buffer + u32 colourBufferLoc; // Location in 3DS VRAM for the colour buffer PICA::ColorFmt colourBufferFormat; // Format of the colours stored in the colour buffer // Same for the depth/stencil buffer @@ -61,9 +61,6 @@ class Renderer { OpenGL::VertexArray dummyVAO; OpenGL::VertexBuffer dummyVBO; - static constexpr u32 regNum = 0x300; // Number of internal PICA registers - const std::array& regs; - OpenGL::Texture screenTexture; GLuint lightLUTTextureArray; OpenGL::Framebuffer screenFramebuffer; @@ -79,12 +76,11 @@ class Renderer { void updateLightingLUT(); public: - Renderer(GPU& gpu, GLStateManager& gl, const std::array& internalRegs) : gpu(gpu), gl(gl), regs(internalRegs) {} + RendererGL(GPU& gpu, const std::array& internalRegs) : Renderer(gpu, internalRegs) {} void reset(); void display(); // Display the 3DS screen contents to the window void initGraphicsContext(); // Initialize graphics context - void getGraphicsContext(); // Set up graphics context for rendering void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control); // Clear a GPU buffer in VRAM void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags); // Perform display transfer void drawVertices(PICA::PrimType primType, std::span vertices); // Draw the given vertices @@ -107,6 +103,4 @@ class Renderer { void setColourBufferLoc(u32 loc) { colourBufferLoc = loc; } void setDepthBufferLoc(u32 loc) { depthBufferLoc = loc; } - - static constexpr u32 vertexBufferSize = 0x10000; }; \ No newline at end of file diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index 37b67a50..29eeef04 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -2,19 +2,28 @@ #include #include -#include #include +#include #include "PICA/float_types.hpp" #include "PICA/regs.hpp" +#if ENABLE_OPENGL +#include "renderer_gl/renderer_gl.hpp" +#endif + using namespace Floats; // Note: For when we have multiple backends, the GL state manager can stay here and have the constructor for the Vulkan-or-whatever renderer ignore it // Thus, our GLStateManager being here does not negatively impact renderer-agnosticness -GPU::GPU(Memory& mem, GLStateManager& gl, EmulatorConfig& config) : mem(mem), renderer(*this, gl, regs), config(config) { +GPU::GPU(Memory& mem, EmulatorConfig& config) : mem(mem), config(config) { vram = new u8[vramSize]; - mem.setVRAM(vram); // Give the bus a pointer to our VRAM + mem.setVRAM(vram); // Give the bus a pointer to our VRAM + + // TODO: configurable backend +#if ENABLE_OPENGL + renderer.reset(new RendererGL(*this, regs)); +#endif } void GPU::reset() { @@ -41,7 +50,7 @@ void GPU::reset() { e.config2 = 0; } - renderer.reset(); + renderer->reset(); } // Call the correct version of drawArrays based on whether this is an indexed draw (first template parameter) @@ -73,15 +82,14 @@ void GPU::drawArrays() { // Base address for vertex attributes // The vertex base is always on a quadword boundary because the PICA does weird alignment shit any time possible const u32 vertexBase = ((regs[PICA::InternalRegs::VertexAttribLoc] >> 1) & 0xfffffff) * 16; - const u32 vertexCount = regs[PICA::InternalRegs::VertexCountReg]; // Total # of vertices to transfer + const u32 vertexCount = regs[PICA::InternalRegs::VertexCountReg]; // Total # of vertices to transfer // Configures the type of primitive and the number of vertex shader outputs const u32 primConfig = regs[PICA::InternalRegs::PrimitiveConfig]; const PICA::PrimType primType = static_cast(Helpers::getBits<8, 2>(primConfig)); if (vertexCount > Renderer::vertexBufferSize) Helpers::panic("[PICA] vertexCount > vertexBufferSize"); - if ((primType == PICA::PrimType::TriangleList && vertexCount % 3) || - (primType == PICA::PrimType::TriangleStrip && vertexCount < 3) || + if ((primType == PICA::PrimType::TriangleList && vertexCount % 3) || (primType == PICA::PrimType::TriangleStrip && vertexCount < 3) || (primType == PICA::PrimType::TriangleFan && vertexCount < 3)) { Helpers::panic("Invalid vertex count for primitive. Type: %d, vert count: %d\n", primType, vertexCount); } @@ -89,10 +97,10 @@ void GPU::drawArrays() { // Get the configuration for the index buffer, used only for indexed drawing u32 indexBufferConfig = regs[PICA::InternalRegs::IndexBufferConfig]; u32 indexBufferPointer = vertexBase + (indexBufferConfig & 0xfffffff); - bool shortIndex = Helpers::getBit<31>(indexBufferConfig); // Indicates whether vert indices are 16-bit or 8-bit + bool shortIndex = Helpers::getBit<31>(indexBufferConfig); // Indicates whether vert indices are 16-bit or 8-bit // Stuff the global attribute config registers in one u64 to make attr parsing easier - // TODO: Cache this when the vertex attribute format registers are written to + // TODO: Cache this when the vertex attribute format registers are written to u64 vertexCfg = u64(regs[PICA::InternalRegs::AttribFormatLow]) | (u64(regs[PICA::InternalRegs::AttribFormatHigh]) << 32); if constexpr (!indexed) { @@ -111,24 +119,24 @@ void GPU::drawArrays() { constexpr size_t vertexCacheSize = 64; struct { - std::bitset validBits{0}; // Shows which tags are valid. If the corresponding bit is 1, then there's an entry - std::array ids; // IDs (ie indices of the cached vertices in the 3DS vertex buffer) - std::array bufferPositions; // Positions of the cached vertices in our own vertex buffer + std::bitset validBits{0}; // Shows which tags are valid. If the corresponding bit is 1, then there's an entry + std::array ids; // IDs (ie indices of the cached vertices in the 3DS vertex buffer) + std::array bufferPositions; // Positions of the cached vertices in our own vertex buffer } vertexCache; - + for (u32 i = 0; i < vertexCount; i++) { - u32 vertexIndex; // Index of the vertex in the VBO for indexed rendering + u32 vertexIndex; // Index of the vertex in the VBO for indexed rendering if constexpr (!indexed) { vertexIndex = i + regs[PICA::InternalRegs::VertexOffsetReg]; } else { if (shortIndex) { auto ptr = getPointerPhys(indexBufferPointer); - vertexIndex = *ptr; // TODO: This is very unsafe + vertexIndex = *ptr; // TODO: This is very unsafe indexBufferPointer += 2; } else { auto ptr = getPointerPhys(indexBufferPointer); - vertexIndex = *ptr; // TODO: This is also very unsafe + vertexIndex = *ptr; // TODO: This is also very unsafe indexBufferPointer += 1; } } @@ -152,22 +160,22 @@ void GPU::drawArrays() { } int attrCount = 0; - int buffer = 0; // Vertex buffer index for non-fixed attributes + int buffer = 0; // Vertex buffer index for non-fixed attributes while (attrCount < totalAttribCount) { // Check if attribute is fixed or not - if (fixedAttribMask & (1 << attrCount)) { // Fixed attribute - vec4f& fixedAttr = shaderUnit.vs.fixedAttributes[attrCount]; // TODO: Is this how it works? + if (fixedAttribMask & (1 << attrCount)) { // Fixed attribute + vec4f& fixedAttr = shaderUnit.vs.fixedAttributes[attrCount]; // TODO: Is this how it works? vec4f& inputAttr = currentAttributes[attrCount]; - std::memcpy(&inputAttr, &fixedAttr, sizeof(vec4f)); // Copy fixed attr to input attr + std::memcpy(&inputAttr, &fixedAttr, sizeof(vec4f)); // Copy fixed attr to input attr attrCount++; - } else { // Non-fixed attribute - auto& attr = attributeInfo[buffer]; // Get information for this attribute - u64 attrCfg = attr.getConfigFull(); // Get config1 | (config2 << 32) + } else { // Non-fixed attribute + auto& attr = attributeInfo[buffer]; // Get information for this attribute + u64 attrCfg = attr.getConfigFull(); // Get config1 | (config2 << 32) u32 attrAddress = vertexBase + attr.offset + (vertexIndex * attr.size); for (int j = 0; j < attr.componentCount; j++) { - uint index = (attrCfg >> (j * 4)) & 0xf; // Get index of attribute in vertexCfg + uint index = (attrCfg >> (j * 4)) & 0xf; // Get index of attribute in vertexCfg // Vertex attributes used as padding // 12, 13, 14 and 15 are equivalent to 4, 8, 12 and 16 bytes of padding respectively @@ -179,15 +187,15 @@ void GPU::drawArrays() { } u32 attribInfo = (vertexCfg >> (index * 4)) & 0xf; - u32 attribType = attribInfo & 0x3; // Type of attribute(sbyte/ubyte/short/float) - u32 size = (attribInfo >> 2) + 1; // Total number of components + u32 attribType = attribInfo & 0x3; // Type of attribute(sbyte/ubyte/short/float) + u32 size = (attribInfo >> 2) + 1; // Total number of components - //printf("vertex_attribute_strides[%d] = %d\n", attrCount, attr.size); + // printf("vertex_attribute_strides[%d] = %d\n", attrCount, attr.size); vec4f& attribute = currentAttributes[attrCount]; - uint component; // Current component + uint component; // Current component switch (attribType) { - case 0: { // Signed byte + case 0: { // Signed byte s8* ptr = getPointerPhys(attrAddress); for (component = 0; component < size; component++) { float val = static_cast(*ptr++); @@ -197,7 +205,7 @@ void GPU::drawArrays() { break; } - case 1: { // Unsigned byte + case 1: { // Unsigned byte u8* ptr = getPointerPhys(attrAddress); for (component = 0; component < size; component++) { float val = static_cast(*ptr++); @@ -207,7 +215,7 @@ void GPU::drawArrays() { break; } - case 2: { // Short + case 2: { // Short s16* ptr = getPointerPhys(attrAddress); for (component = 0; component < size; component++) { float val = static_cast(*ptr++); @@ -217,7 +225,7 @@ void GPU::drawArrays() { break; } - case 3: { // Float + case 3: { // Float float* ptr = getPointerPhys(attrAddress); for (component = 0; component < size; component++) { float val = *ptr++; @@ -251,8 +259,8 @@ void GPU::drawArrays() { const u32 mapping = (inputAttrCfg >> (j * 4)) & 0xf; std::memcpy(&shaderUnit.vs.inputs[mapping], ¤tAttributes[j], sizeof(vec4f)); } - - if constexpr (useShaderJIT) { + + if constexpr (useShaderJIT) { shaderJIT.run(shaderUnit.vs); } else { shaderUnit.vs.run(); @@ -264,14 +272,14 @@ void GPU::drawArrays() { for (int i = 0; i < totalShaderOutputs; i++) { const u32 config = regs[PICA::InternalRegs::ShaderOutmap0 + i]; - for (int j = 0; j < 4; j++) { // pls unroll + for (int j = 0; j < 4; j++) { // pls unroll const u32 mapping = (config >> (j * 8)) & 0x1F; out.raw[mapping] = shaderUnit.vs.outputs[i][j]; } } } - renderer.drawVertices(primType, std::span(vertices).first(vertexCount)); + renderer->drawVertices(primType, std::span(vertices).first(vertexCount)); } PICA::Vertex GPU::getImmediateModeVertex() { @@ -289,7 +297,9 @@ PICA::Vertex GPU::getImmediateModeVertex() { std::memcpy(&v.s.colour, &shaderUnit.vs.outputs[1], sizeof(vec4f)); std::memcpy(&v.s.texcoord0, &shaderUnit.vs.outputs[2], 2 * sizeof(f24)); - printf("(x, y, z, w) = (%f, %f, %f, %f)\n", (double)v.s.positions[0], (double)v.s.positions[1], (double)v.s.positions[2], (double)v.s.positions[3]); + printf( + "(x, y, z, w) = (%f, %f, %f, %f)\n", (double)v.s.positions[0], (double)v.s.positions[1], (double)v.s.positions[2], (double)v.s.positions[3] + ); printf("(r, g, b, a) = (%f, %f, %f, %f)\n", (double)v.s.colour[0], (double)v.s.colour[1], (double)v.s.colour[2], (double)v.s.colour[3]); printf("(u, v ) = (%f, %f)\n", (double)v.s.texcoord0[0], (double)v.s.texcoord0[1]); diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 3a13b31d..22484608 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -1,4 +1,5 @@ #include "renderer_gl/renderer_gl.hpp" + #include "PICA/float_types.hpp" #include "PICA/gpu.hpp" #include "PICA/regs.hpp" @@ -576,7 +577,7 @@ const char* displayFragmentShader = R"( } )"; -void Renderer::reset() { +void RendererGL::reset() { depthBufferCache.reset(); colourBufferCache.reset(); textureCache.reset(); @@ -592,10 +593,10 @@ void Renderer::reset() { const auto oldProgram = OpenGL::getProgram(); gl.useProgram(triangleProgram); - - oldDepthScale = -1.0; // Default depth scale to -1.0, which is what games typically use - oldDepthOffset = 0.0; // Default depth offset to 0 - oldDepthmapEnable = false; // Enable w buffering + + oldDepthScale = -1.0; // Default depth scale to -1.0, which is what games typically use + oldDepthOffset = 0.0; // Default depth offset to 0 + oldDepthmapEnable = false; // Enable w buffering glUniform1f(depthScaleLoc, oldDepthScale); glUniform1f(depthOffsetLoc, oldDepthOffset); @@ -605,10 +606,10 @@ void Renderer::reset() { } } -void Renderer::initGraphicsContext() { +void RendererGL::initGraphicsContext() { OpenGL::Shader vert(vertexShader, OpenGL::Vertex); OpenGL::Shader frag(fragmentShader, OpenGL::Fragment); - triangleProgram.create({ vert, frag }); + triangleProgram.create({vert, frag}); gl.useProgram(triangleProgram); textureEnvSourceLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvSource"); @@ -630,10 +631,10 @@ void Renderer::initGraphicsContext() { OpenGL::Shader vertDisplay(displayVertexShader, OpenGL::Vertex); OpenGL::Shader fragDisplay(displayFragmentShader, OpenGL::Fragment); - displayProgram.create({ vertDisplay, fragDisplay }); + displayProgram.create({vertDisplay, fragDisplay}); gl.useProgram(displayProgram); - glUniform1i(OpenGL::uniformLocation(displayProgram, "u_texture"), 0); // Init sampler object + glUniform1i(OpenGL::uniformLocation(displayProgram, "u_texture"), 0); // Init sampler object vbo.createFixedSize(sizeof(Vertex) * vertexBufferSize, GL_STREAM_DRAW); gl.bindVBO(vbo); @@ -669,10 +670,10 @@ void Renderer::initGraphicsContext() { dummyVAO.create(); // Create texture and framebuffer for the 3DS screen - const u32 screenTextureWidth = 400; // Top screen is 400 pixels wide, bottom is 320 - const u32 screenTextureHeight = 2 * 240; // Both screens are 240 pixels tall - - glGenTextures(1,&lightLUTTextureArray); + const u32 screenTextureWidth = 400; // Top screen is 400 pixels wide, bottom is 320 + const u32 screenTextureHeight = 2 * 240; // Both screens are 240 pixels tall + + glGenTextures(1, &lightLUTTextureArray); auto prevTexture = OpenGL::getTex2D(); screenTexture.create(screenTextureWidth, screenTextureHeight, GL_RGBA8); @@ -684,8 +685,7 @@ void Renderer::initGraphicsContext() { screenFramebuffer.createWithDrawTexture(screenTexture); screenFramebuffer.bind(OpenGL::DrawAndReadFramebuffer); - if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) - Helpers::panic("Incomplete framebuffer"); + if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) Helpers::panic("Incomplete framebuffer"); // TODO: This should not clear the framebuffer contents. It should load them from VRAM. GLint oldViewport[4]; @@ -699,20 +699,31 @@ void Renderer::initGraphicsContext() { } // Set up the OpenGL blending context to match the emulated PICA -void Renderer::setupBlending() { +void RendererGL::setupBlending() { const bool blendingEnabled = (regs[PICA::InternalRegs::ColourOperation] & (1 << 8)) != 0; - + // Map of PICA blending equations to OpenGL blending equations. The unused blending equations are equivalent to equation 0 (add) - static constexpr std::array blendingEquations = { - GL_FUNC_ADD, GL_FUNC_SUBTRACT, GL_FUNC_REVERSE_SUBTRACT, GL_MIN, GL_MAX, GL_FUNC_ADD, GL_FUNC_ADD, GL_FUNC_ADD - }; - + static constexpr std::array blendingEquations = {GL_FUNC_ADD, GL_FUNC_SUBTRACT, GL_FUNC_REVERSE_SUBTRACT, GL_MIN, GL_MAX, GL_FUNC_ADD, + GL_FUNC_ADD, GL_FUNC_ADD}; + // Map of PICA blending funcs to OpenGL blending funcs. Func = 15 is undocumented and stubbed to GL_ONE for now static constexpr std::array blendingFuncs = { - GL_ZERO, GL_ONE, GL_SRC_COLOR, GL_ONE_MINUS_SRC_COLOR, GL_DST_COLOR, GL_ONE_MINUS_DST_COLOR, GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, - GL_DST_ALPHA, GL_ONE_MINUS_DST_ALPHA, GL_CONSTANT_COLOR, GL_ONE_MINUS_CONSTANT_COLOR, GL_CONSTANT_ALPHA, GL_ONE_MINUS_CONSTANT_ALPHA, - GL_SRC_ALPHA_SATURATE, GL_ONE - }; + GL_ZERO, + GL_ONE, + GL_SRC_COLOR, + GL_ONE_MINUS_SRC_COLOR, + GL_DST_COLOR, + GL_ONE_MINUS_DST_COLOR, + GL_SRC_ALPHA, + GL_ONE_MINUS_SRC_ALPHA, + GL_DST_ALPHA, + GL_ONE_MINUS_DST_ALPHA, + GL_CONSTANT_COLOR, + GL_ONE_MINUS_CONSTANT_COLOR, + GL_CONSTANT_ALPHA, + GL_ONE_MINUS_CONSTANT_ALPHA, + GL_SRC_ALPHA_SATURATE, + GL_ONE}; if (!blendingEnabled) { gl.disableBlend(); @@ -743,14 +754,12 @@ void Renderer::setupBlending() { } } -void Renderer::setupTextureEnvState() { +void RendererGL::setupTextureEnvState() { // TODO: Only update uniforms when the TEV config changed. Use an UBO potentially. - static constexpr std::array ioBases = { - PICA::InternalRegs::TexEnv0Source, PICA::InternalRegs::TexEnv1Source, - PICA::InternalRegs::TexEnv2Source, PICA::InternalRegs::TexEnv3Source, - PICA::InternalRegs::TexEnv4Source, PICA::InternalRegs::TexEnv5Source - }; + static constexpr std::array ioBases = {PICA::InternalRegs::TexEnv0Source, PICA::InternalRegs::TexEnv1Source, + PICA::InternalRegs::TexEnv2Source, PICA::InternalRegs::TexEnv3Source, + PICA::InternalRegs::TexEnv4Source, PICA::InternalRegs::TexEnv5Source}; u32 textureEnvSourceRegs[6]; u32 textureEnvOperandRegs[6]; @@ -775,10 +784,9 @@ void Renderer::setupTextureEnvState() { glUniform1uiv(textureEnvScaleLoc, 6, textureEnvScaleRegs); } -void Renderer::bindTexturesToSlots() { +void RendererGL::bindTexturesToSlots() { static constexpr std::array ioBases = { - PICA::InternalRegs::Tex0BorderColor, PICA::InternalRegs::Tex1BorderColor, PICA::InternalRegs::Tex2BorderColor - }; + PICA::InternalRegs::Tex0BorderColor, PICA::InternalRegs::Tex1BorderColor, PICA::InternalRegs::Tex2BorderColor}; for (int i = 0; i < 3; i++) { if ((regs[PICA::InternalRegs::TexUnitCfg] & (1 << i)) == 0) { @@ -805,13 +813,13 @@ void Renderer::bindTexturesToSlots() { glActiveTexture(GL_TEXTURE0); } -void Renderer::updateLightingLUT() { +void RendererGL::updateLightingLUT() { gpu.lightingLUTDirty = false; - std::array u16_lightinglut; - + std::array u16_lightinglut; + for (int i = 0; i < gpu.lightingLUT.size(); i++) { - uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1); - u16_lightinglut[i] = value * 65535 / 4095; + uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1); + u16_lightinglut[i] = value * 65535 / 4095; } glActiveTexture(GL_TEXTURE0 + 3); @@ -824,11 +832,9 @@ void Renderer::updateLightingLUT() { glActiveTexture(GL_TEXTURE0); } -void Renderer::drawVertices(PICA::PrimType primType, std::span vertices) { +void RendererGL::drawVertices(PICA::PrimType primType, std::span vertices) { // The fourth type is meant to be "Geometry primitive". TODO: Find out what that is - static constexpr std::array primTypes = { - OpenGL::Triangle, OpenGL::TriangleStrip, OpenGL::TriangleFan, OpenGL::Triangle - }; + static constexpr std::array primTypes = {OpenGL::Triangle, OpenGL::TriangleStrip, OpenGL::TriangleFan, OpenGL::Triangle}; const auto primitiveTopology = primTypes[static_cast(primType)]; gl.disableScissor(); @@ -836,7 +842,7 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span ver gl.bindVAO(vao); gl.useProgram(triangleProgram); - OpenGL::enableClipPlane(0); // Clipping plane 0 is always enabled + OpenGL::enableClipPlane(0); // Clipping plane 0 is always enabled if (regs[PICA::InternalRegs::ClipEnable] & 1) { OpenGL::enableClipPlane(1); } @@ -852,9 +858,7 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span ver const int colourMask = getBits<8, 4>(depthControl); gl.setColourMask(colourMask & 1, colourMask & 2, colourMask & 4, colourMask & 8); - static constexpr std::array depthModes = { - GL_NEVER, GL_ALWAYS, GL_EQUAL, GL_NOTEQUAL, GL_LESS, GL_LEQUAL, GL_GREATER, GL_GEQUAL - }; + static constexpr std::array depthModes = {GL_NEVER, GL_ALWAYS, GL_EQUAL, GL_NOTEQUAL, GL_LESS, GL_LEQUAL, GL_GREATER, GL_GEQUAL}; const float depthScale = f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32(); const float depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32(); @@ -865,7 +869,7 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span ver oldDepthScale = depthScale; glUniform1f(depthScaleLoc, depthScale); } - + if (oldDepthOffset != depthOffset) { oldDepthOffset = depthOffset; glUniform1f(depthOffsetLoc, depthOffset); @@ -917,7 +921,7 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span ver constexpr u32 topScreenBuffer = 0x1f000000; constexpr u32 bottomScreenBuffer = 0x1f05dc00; -void Renderer::display() { +void RendererGL::display() { gl.disableScissor(); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); @@ -925,7 +929,7 @@ void Renderer::display() { glBlitFramebuffer(0, 0, 400, 480, 0, 0, 400, 480, GL_COLOR_BUFFER_BIT, GL_LINEAR); } -void Renderer::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) { +void RendererGL::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) { return; log("GPU: Clear buffer\nStart: %08X End: %08X\nValue: %08X Control: %08X\n", startAddress, endAddress, value, control); @@ -947,9 +951,9 @@ void Renderer::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 cont OpenGL::clearColor(); } -OpenGL::Framebuffer Renderer::getColourFBO() { - //We construct a colour buffer object and see if our cache has any matching colour buffers in it - // If not, we allocate a texture & FBO for our framebuffer and store it in the cache +OpenGL::Framebuffer RendererGL::getColourFBO() { + // We construct a colour buffer object and see if our cache has any matching colour buffers in it + // If not, we allocate a texture & FBO for our framebuffer and store it in the cache ColourBuffer sampleBuffer(colourBufferLoc, colourBufferFormat, fbSize.x(), fbSize.y()); auto buffer = colourBufferCache.find(sampleBuffer); @@ -960,7 +964,7 @@ OpenGL::Framebuffer Renderer::getColourFBO() { } } -void Renderer::bindDepthBuffer() { +void RendererGL::bindDepthBuffer() { // Similar logic as the getColourFBO function DepthBuffer sampleBuffer(depthBufferLoc, depthBufferFormat, fbSize.x(), fbSize.y()); auto buffer = depthBufferCache.find(sampleBuffer); @@ -979,14 +983,14 @@ void Renderer::bindDepthBuffer() { glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, tex, 0); } -OpenGL::Texture Renderer::getTexture(Texture& tex) { +OpenGL::Texture RendererGL::getTexture(Texture& tex) { // Similar logic as the getColourFBO/bindDepthBuffer functions auto buffer = textureCache.find(tex); if (buffer.has_value()) { return buffer.value().get().texture; } else { - const void* textureData = gpu.getPointerPhys(tex.location); // Get pointer to the texture data in 3DS memory + const void* textureData = gpu.getPointerPhys(tex.location); // Get pointer to the texture data in 3DS memory Texture& newTex = textureCache.add(tex); newTex.decodeTexture(textureData); @@ -994,7 +998,7 @@ OpenGL::Texture Renderer::getTexture(Texture& tex) { } } -void Renderer::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) { +void RendererGL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) { const u32 inputWidth = inputSize & 0xffff; const u32 inputGap = inputSize >> 16; @@ -1022,12 +1026,12 @@ void Renderer::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 // Hack: Detect whether we are writing to the top or bottom screen by checking output gap and drawing to the proper part of the output texture // We consider output gap == 320 to mean bottom, and anything else to mean top if (outputGap == 320) { - OpenGL::setViewport(40, 0, 320, 240); // Bottom screen viewport + OpenGL::setViewport(40, 0, 320, 240); // Bottom screen viewport } else { - OpenGL::setViewport(0, 240, 400, 240); // Top screen viewport + OpenGL::setViewport(0, 240, 400, 240); // Top screen viewport } - OpenGL::draw(OpenGL::TriangleStrip, 4); // Actually draw our 3DS screen + OpenGL::draw(OpenGL::TriangleStrip, 4); // Actually draw our 3DS screen } void Renderer::screenshot(const std::string& name) { @@ -1035,8 +1039,35 @@ void Renderer::screenshot(const std::string& name) { constexpr uint height = 2 * 240; std::vector pixels, flippedPixels; - pixels.resize(width * height * 4); - flippedPixels.resize(pixels.size());; + pixels.resize(width * height * 4); + flippedPixels.resize(pixels.size()); + ; + + OpenGL::bindScreenFramebuffer(); + glReadPixels(0, 0, width, height, GL_BGRA, GL_UNSIGNED_BYTE, pixels.data()); + + // Flip the image vertically + for (int y = 0; y < height; y++) { + memcpy(&flippedPixels[y * width * 4], &pixels[(height - y - 1) * width * 4], width * 4); + // Swap R and B channels + for (int x = 0; x < width; x++) { + std::swap(flippedPixels[y * width * 4 + x * 4 + 0], flippedPixels[y * width * 4 + x * 4 + 2]); + // Set alpha to 0xFF + flippedPixels[y * width * 4 + x * 4 + 3] = 0xFF; + } + } + + stbi_write_png(name.c_str(), width, height, 4, flippedPixels.data(), 0); +} + +void Renderer::screenshot(const std::string& name) { + constexpr uint width = 400; + constexpr uint height = 2 * 240; + + std::vector pixels, flippedPixels; + pixels.resize(width * height * 4); + flippedPixels.resize(pixels.size()); + ; OpenGL::bindScreenFramebuffer(); glReadPixels(0, 0, width, height, GL_BGRA, GL_UNSIGNED_BYTE, pixels.data()); diff --git a/src/emulator.cpp b/src/emulator.cpp index 0d95b82b..db628853 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -16,7 +16,7 @@ _declspec(dllexport) DWORD AmdPowerXpressRequestHighPerformance = 1; } #endif -Emulator::Emulator() : kernel(cpu, memory, gpu), cpu(memory, kernel), gpu(memory, gl, config), memory(cpu.getTicksRef()) { +Emulator::Emulator() : kernel(cpu, memory, gpu), cpu(memory, kernel), gpu(memory, config), memory(cpu.getTicksRef()) { if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_EVENTS) < 0) { Helpers::panic("Failed to initialize SDL2"); } diff --git a/src/renderer.cpp b/src/renderer.cpp new file mode 100644 index 00000000..b3da0501 --- /dev/null +++ b/src/renderer.cpp @@ -0,0 +1,4 @@ +#include "renderer.hpp" + +Renderer::Renderer(GPU& gpu, const std::array& internalRegs) : gpu(gpu), regs(internalRegs) {} +Renderer::~Renderer() {} \ No newline at end of file From 9e32b6d4bf93b2f62808002905e0e59e96aa61b6 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Mon, 10 Jul 2023 08:55:23 -0700 Subject: [PATCH 04/14] Remove OpenGL-specific vector-types Removes dependency on the OpenGL header and rendering backen for its `OpenGL::Vector` type in favor of a more standard array. --- .../PICA/dynapica/shader_rec_emitter_x64.hpp | 36 +++-- include/PICA/shader.hpp | 133 ++++++++--------- src/core/PICA/regs.cpp | 131 ++++++++--------- src/core/PICA/shader_interpreter.cpp | 136 +++++++++--------- src/core/PICA/shader_unit.cpp | 9 +- 5 files changed, 221 insertions(+), 224 deletions(-) diff --git a/include/PICA/dynapica/shader_rec_emitter_x64.hpp b/include/PICA/dynapica/shader_rec_emitter_x64.hpp index ba37595a..109fddac 100644 --- a/include/PICA/dynapica/shader_rec_emitter_x64.hpp +++ b/include/PICA/dynapica/shader_rec_emitter_x64.hpp @@ -2,17 +2,17 @@ // Only do anything if we're on an x64 target with JIT support enabled #if defined(PANDA3DS_DYNAPICA_SUPPORTED) && defined(PANDA3DS_X64_HOST) -#include "helpers.hpp" -#include "logger.hpp" -#include "PICA/shader.hpp" -#include "xbyak/xbyak.h" -#include "xbyak/xbyak_util.h" -#include "x64_regs.hpp" - #include +#include "PICA/shader.hpp" +#include "helpers.hpp" +#include "logger.hpp" +#include "x64_regs.hpp" +#include "xbyak/xbyak.h" +#include "xbyak/xbyak_util.h" + class ShaderEmitter : public Xbyak::CodeGenerator { - static constexpr size_t executableMemorySize = PICAShader::maxInstructionCount * 96; // How much executable memory to alloc for each shader + static constexpr size_t executableMemorySize = PICAShader::maxInstructionCount * 96; // How much executable memory to alloc for each shader // Allocate some extra space as padding for security purposes in the extremely unlikely occasion we manage to overflow the above size static constexpr size_t allocSize = executableMemorySize + 0x1000; @@ -20,7 +20,7 @@ class ShaderEmitter : public Xbyak::CodeGenerator { static constexpr uint noSwizzle = 0x1B; using f24 = Floats::f24; - using vec4f = OpenGL::Vector; + using vec4f = std::array; // An array of labels (incl pointers) to each compiled (to x64) PICA instruction std::array instructionLabels; @@ -33,8 +33,8 @@ class ShaderEmitter : public Xbyak::CodeGenerator { // Vector value of (1.0, 1.0, 1.0, 1.0) for SLT(i)/SGE(i) Label onesVector; - u32 recompilerPC = 0; // PC the recompiler is currently recompiling @ - u32 loopLevel = 0; // The current loop nesting level (0 = not in a loop) + u32 recompilerPC = 0; // PC the recompiler is currently recompiling @ + u32 loopLevel = 0; // The current loop nesting level (0 = not in a loop) bool haveSSE4_1 = false; // Shows if the CPU supports SSE4.1 bool haveAVX = false; // Shows if the CPU supports AVX (NOT AVX2, NOT AVX512. Regular AVX) @@ -105,10 +105,10 @@ class ShaderEmitter : public Xbyak::CodeGenerator { MAKE_LOG_FUNCTION(log, shaderJITLogger) -public: - using InstructionCallback = const void(*)(PICAShader& shaderUnit); // Callback type used for instructions + public: + using InstructionCallback = const void (*)(PICAShader& shaderUnit); // Callback type used for instructions // Callback type used for the JIT prologue. This is what the caller will call - using PrologueCallback = const void(*)(PICAShader& shaderUnit, InstructionCallback cb); + using PrologueCallback = const void (*)(PICAShader& shaderUnit, InstructionCallback cb); PrologueCallback prologueCb = nullptr; // Initialize our emitter with "allocSize" bytes of RWX memory @@ -123,7 +123,7 @@ public: Helpers::panic("This CPU does not support SSE3. Please use the shader interpreter instead"); } } - + void compile(const PICAShader& shaderUnit); // PC must be a valid entrypoint here. It doesn't have that much overhead in this case, so we use std::array<>::at() to assert it does @@ -133,9 +133,7 @@ public: return reinterpret_cast(ptr); } - PrologueCallback getPrologueCallback() { - return prologueCb; - } + PrologueCallback getPrologueCallback() { return prologueCb; } }; -#endif // x64 recompiler check \ No newline at end of file +#endif // x64 recompiler check \ No newline at end of file diff --git a/include/PICA/shader.hpp b/include/PICA/shader.hpp index ad1e0e46..06d529c9 100644 --- a/include/PICA/shader.hpp +++ b/include/PICA/shader.hpp @@ -2,14 +2,12 @@ #include #include #include -#include "helpers.hpp" -#include "opengl.hpp" + #include "PICA/float_types.hpp" #include "PICA/pica_hash.hpp" +#include "helpers.hpp" -enum class ShaderType { - Vertex, Geometry -}; +enum class ShaderType { Vertex, Geometry }; namespace ShaderOpcodes { enum : u32 { @@ -46,66 +44,66 @@ namespace ShaderOpcodes { SETEMIT = 0x2B, JMPC = 0x2C, JMPU = 0x2D, - CMP1 = 0x2E, // Both of these instructions are CMP + CMP1 = 0x2E, // Both of these instructions are CMP CMP2 = 0x2F, - MAD = 0x38 // Everything between 0x38-0x3F is a MAD but fuck it + MAD = 0x38 // Everything between 0x38-0x3F is a MAD but fuck it }; } // Note: All PICA f24 vec4 registers must have the alignas(16) specifier to make them easier to access in SSE/NEON code in the JIT class PICAShader { using f24 = Floats::f24; - using vec4f = OpenGL::Vector; + using vec4f = std::array; struct Loop { - u32 startingPC; // PC at the start of the loop - u32 endingPC; // PC at the end of the loop - u32 iterations; // How many iterations of the loop to run - u32 increment; // How much to increment the loop counter after each iteration + u32 startingPC; // PC at the start of the loop + u32 endingPC; // PC at the end of the loop + u32 iterations; // How many iterations of the loop to run + u32 increment; // How much to increment the loop counter after each iteration }; // Info for ifc/ifu stack struct ConditionalInfo { - u32 endingPC; // PC at the end of the if block (= DST) - u32 newPC; // PC after the if block is done executing (= DST + NUM) + u32 endingPC; // PC at the end of the if block (= DST) + u32 newPC; // PC after the if block is done executing (= DST + NUM) }; struct CallInfo { - u32 endingPC; // PC at the end of the function - u32 returnPC; // PC to return to after the function ends + u32 endingPC; // PC at the end of the function + u32 returnPC; // PC to return to after the function ends }; - int bufferIndex; // Index of the next instruction to overwrite for shader uploads - int opDescriptorIndex; // Index of the next operand descriptor we'll overwrite - u32 floatUniformIndex = 0; // Which float uniform are we writing to? ([0, 95] range) - u32 floatUniformWordCount = 0; // How many words have we buffered for the current uniform transfer? - bool f32UniformTransfer = false; // Are we transferring an f32 uniform or an f24 uniform? + int bufferIndex; // Index of the next instruction to overwrite for shader uploads + int opDescriptorIndex; // Index of the next operand descriptor we'll overwrite + u32 floatUniformIndex = 0; // Which float uniform are we writing to? ([0, 95] range) + u32 floatUniformWordCount = 0; // How many words have we buffered for the current uniform transfer? + bool f32UniformTransfer = false; // Are we transferring an f32 uniform or an f24 uniform? - std::array floatUniformBuffer; // Buffer for temporarily caching float uniform data + std::array floatUniformBuffer; // Buffer for temporarily caching float uniform data -public: + public: // These are placed close to the temp registers and co because it helps the JIT generate better code - u32 entrypoint = 0; // Initial shader PC + u32 entrypoint = 0; // Initial shader PC u32 boolUniform; - std::array, 4> intUniforms; + std::array, 4> intUniforms; alignas(16) std::array floatUniforms; - alignas(16) std::array fixedAttributes; // Fixed vertex attributes - alignas(16) std::array inputs; // Attributes passed to the shader + alignas(16) std::array fixedAttributes; // Fixed vertex attributes + alignas(16) std::array inputs; // Attributes passed to the shader alignas(16) std::array outputs; - alignas(16) vec4f dummy = vec4f({ f24::zero(), f24::zero(), f24::zero(), f24::zero() }); // Dummy register used by the JIT + alignas(16) vec4f dummy = vec4f({f24::zero(), f24::zero(), f24::zero(), f24::zero()}); // Dummy register used by the JIT -protected: + protected: std::array operandDescriptors; - alignas(16) std::array tempRegisters; // General purpose registers the shader can use for temp values - OpenGL::Vector addrRegister; // Address register - bool cmpRegister[2]; // Comparison registers where the result of CMP is stored in + alignas(16) std::array tempRegisters; // General purpose registers the shader can use for temp values + std::array addrRegister; // Address register + bool cmpRegister[2]; // Comparison registers where the result of CMP is stored in u32 loopCounter; - u32 pc = 0; // Program counter: Index of the next instruction we're going to execute - u32 loopIndex = 0; // The index of our loop stack (0 = empty, 4 = full) - u32 ifIndex = 0; // The index of our IF stack - u32 callIndex = 0; // The index of our CALL stack + u32 pc = 0; // Program counter: Index of the next instruction we're going to execute + u32 loopIndex = 0; // The index of our loop stack (0 = empty, 4 = full) + u32 ifIndex = 0; // The index of our IF stack + u32 callIndex = 0; // The index of our CALL stack std::array loopInfo; std::array conditionalInfo; @@ -117,7 +115,7 @@ protected: // Ideally we want to be able to support multiple different types of hash depending on compilation settings, but let's get this working first using Hash = PICAHash::HashType; - Hash lastCodeHash = 0; // Last hash computed for the shader code (Used for the JIT caching mechanism) + Hash lastCodeHash = 0; // Last hash computed for the shader code (Used for the JIT caching mechanism) Hash lastOpdescHash = 0; // Last hash computed for the operand descriptors (Also used for the JIT) bool codeHashDirty = false; @@ -130,7 +128,7 @@ protected: vec4f getSource(u32 source); vec4f& getDest(u32 dest); -private: + private: // Interpreter functions for the various shader functions void add(u32 instruction); void call(u32 instruction); @@ -171,13 +169,13 @@ private: bool negate; using namespace Helpers; - if constexpr (sourceIndex == 1) { // SRC1 + if constexpr (sourceIndex == 1) { // SRC1 negate = (getBit<4>(opDescriptor)) != 0; compSwizzle = getBits<5, 8>(opDescriptor); - } else if constexpr (sourceIndex == 2) { // SRC2 + } else if constexpr (sourceIndex == 2) { // SRC2 negate = (getBit<13>(opDescriptor)) != 0; compSwizzle = getBits<14, 8>(opDescriptor); - } else if constexpr (sourceIndex == 3) { // SRC3 + } else if constexpr (sourceIndex == 3) { // SRC3 negate = (getBit<22>(opDescriptor)) != 0; compSwizzle = getBits<23, 8>(opDescriptor); } @@ -185,8 +183,8 @@ private: // Iterate through every component of the swizzled vector in reverse order // And get which source component's index to match it with for (int comp = 0; comp < 4; comp++) { - int index = compSwizzle & 3; // Get index for this component - compSwizzle >>= 2; // Move to next component index + int index = compSwizzle & 3; // Get index for this component + compSwizzle >>= 2; // Move to next component index ret[3 - comp] = source[index]; } @@ -212,39 +210,33 @@ private: u8 getIndexedSource(u32 source, u32 index); bool isCondTrue(u32 instruction); -public: + public: static constexpr size_t maxInstructionCount = 4096; - std::array loadedShader; // Currently loaded & active shader - std::array bufferedShader; // Shader to be transferred when the SH_CODETRANSFER_END reg gets written to + std::array loadedShader; // Currently loaded & active shader + std::array bufferedShader; // Shader to be transferred when the SH_CODETRANSFER_END reg gets written to PICAShader(ShaderType type) : type(type) {} // Theese functions are in the header to be inlined more easily, though with LTO I hope I'll be able to move them - void finalize() { - std::memcpy(&loadedShader[0], &bufferedShader[0], 4096 * sizeof(u32)); - } + void finalize() { std::memcpy(&loadedShader[0], &bufferedShader[0], 4096 * sizeof(u32)); } - void setBufferIndex(u32 index) { - bufferIndex = index & 0xfff; - } + void setBufferIndex(u32 index) { bufferIndex = index & 0xfff; } - void setOpDescriptorIndex(u32 index) { - opDescriptorIndex = index & 0x7f; - } + void setOpDescriptorIndex(u32 index) { opDescriptorIndex = index & 0x7f; } void uploadWord(u32 word) { if (bufferIndex >= 4095) Helpers::panic("o no, shader upload overflew"); bufferedShader[bufferIndex++] = word; bufferIndex &= 0xfff; - codeHashDirty = true; // Signal the JIT if necessary that the program hash has potentially changed + codeHashDirty = true; // Signal the JIT if necessary that the program hash has potentially changed } void uploadDescriptor(u32 word) { operandDescriptors[opDescriptorIndex++] = word; opDescriptorIndex &= 0x7f; - opdescHashDirty = true; // Signal the JIT if necessary that the program hash has potentially changed + opdescHashDirty = true; // Signal the JIT if necessary that the program hash has potentially changed } void setFloatUniformIndex(u32 word) { @@ -255,23 +247,22 @@ public: void uploadFloatUniform(u32 word) { floatUniformBuffer[floatUniformWordCount++] = word; - if (floatUniformIndex >= 96) - Helpers::panic("[PICA] Tried to write float uniform %d", floatUniformIndex); + if (floatUniformIndex >= 96) Helpers::panic("[PICA] Tried to write float uniform %d", floatUniformIndex); if ((f32UniformTransfer && floatUniformWordCount >= 4) || (!f32UniformTransfer && floatUniformWordCount >= 3)) { vec4f& uniform = floatUniforms[floatUniformIndex++]; floatUniformWordCount = 0; if (f32UniformTransfer) { - uniform.x() = f24::fromFloat32(*(float*)&floatUniformBuffer[3]); - uniform.y() = f24::fromFloat32(*(float*)&floatUniformBuffer[2]); - uniform.z() = f24::fromFloat32(*(float*)&floatUniformBuffer[1]); - uniform.w() = f24::fromFloat32(*(float*)&floatUniformBuffer[0]); + uniform[0] = f24::fromFloat32(*(float*)&floatUniformBuffer[3]); + uniform[1] = f24::fromFloat32(*(float*)&floatUniformBuffer[2]); + uniform[2] = f24::fromFloat32(*(float*)&floatUniformBuffer[1]); + uniform[3] = f24::fromFloat32(*(float*)&floatUniformBuffer[0]); } else { - uniform.x() = f24::fromRaw(floatUniformBuffer[2] & 0xffffff); - uniform.y() = f24::fromRaw(((floatUniformBuffer[1] & 0xffff) << 8) | (floatUniformBuffer[2] >> 24)); - uniform.z() = f24::fromRaw(((floatUniformBuffer[0] & 0xff) << 16) | (floatUniformBuffer[1] >> 16)); - uniform.w() = f24::fromRaw(floatUniformBuffer[0] >> 8); + uniform[0] = f24::fromRaw(floatUniformBuffer[2] & 0xffffff); + uniform[1] = f24::fromRaw(((floatUniformBuffer[1] & 0xffff) << 8) | (floatUniformBuffer[2] >> 24)); + uniform[2] = f24::fromRaw(((floatUniformBuffer[0] & 0xff) << 16) | (floatUniformBuffer[1] >> 16)); + uniform[3] = f24::fromRaw(floatUniformBuffer[0] >> 8); } } } @@ -280,10 +271,10 @@ public: using namespace Helpers; auto& u = intUniforms[index]; - u.x() = word & 0xff; - u.y() = getBits<8, 8>(word); - u.z() = getBits<16, 8>(word); - u.w() = getBits<24, 8>(word); + u[0] = word & 0xff; + u[1] = getBits<8, 8>(word); + u[2] = getBits<16, 8>(word); + u[3] = getBits<24, 8>(word); } void run(); diff --git a/src/core/PICA/regs.cpp b/src/core/PICA/regs.cpp index f62040dd..bbffa99a 100644 --- a/src/core/PICA/regs.cpp +++ b/src/core/PICA/regs.cpp @@ -1,11 +1,12 @@ -#include "PICA/gpu.hpp" #include "PICA/regs.hpp" +#include "PICA/gpu.hpp" + using namespace Floats; using namespace Helpers; u32 GPU::readReg(u32 address) { - if (address >= 0x1EF01000 && address < 0x1EF01C00) { // Internal registers + if (address >= 0x1EF01000 && address < 0x1EF01C00) { // Internal registers const u32 index = (address - 0x1EF01000) / sizeof(u32); return readInternalReg(index); } else { @@ -15,7 +16,7 @@ u32 GPU::readReg(u32 address) { } void GPU::writeReg(u32 address, u32 value) { - if (address >= 0x1EF01000 && address < 0x1EF01C00) { // Internal registers + if (address >= 0x1EF01000 && address < 0x1EF01C00) { // Internal registers const u32 index = (address - 0x1EF01000) / sizeof(u32); writeInternalReg(index, value, 0xffffffff); } else { @@ -59,7 +60,7 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { } u32 currentValue = regs[index]; - u32 newValue = (currentValue & ~mask) | (value & mask); // Only overwrite the bits specified by "mask" + u32 newValue = (currentValue & ~mask) | (value & mask); // Only overwrite the bits specified by "mask" regs[index] = newValue; // TODO: Figure out if things like the shader index use the unmasked value or the masked one @@ -74,38 +75,38 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { break; case AttribFormatHigh: - totalAttribCount = (value >> 28) + 1; // Total number of vertex attributes - fixedAttribMask = getBits<16, 12>(value); // Determines which vertex attributes are fixed for all vertices + totalAttribCount = (value >> 28) + 1; // Total number of vertex attributes + fixedAttribMask = getBits<16, 12>(value); // Determines which vertex attributes are fixed for all vertices break; case ColourBufferLoc: { u32 loc = (value & 0x0fffffff) << 3; - renderer.setColourBufferLoc(loc); + renderer->setColourBufferLoc(loc); break; }; case ColourBufferFormat: { u32 format = getBits<16, 3>(value); - renderer.setColourFormat(static_cast(format)); + renderer->setColourFormat(static_cast(format)); break; } case DepthBufferLoc: { u32 loc = (value & 0x0fffffff) << 3; - renderer.setDepthBufferLoc(loc); + renderer->setDepthBufferLoc(loc); break; } case DepthBufferFormat: { u32 format = value & 0x3; - renderer.setDepthFormat(static_cast(format)); + renderer->setDepthFormat(static_cast(format)); break; } case FramebufferSize: { const u32 width = value & 0x7ff; const u32 height = getBits<12, 10>(value) + 1; - renderer.setFBSize(width, height); + renderer->setFBSize(width, height); break; } @@ -116,7 +117,7 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { case LightingLUTData4: case LightingLUTData5: case LightingLUTData6: - case LightingLUTData7:{ + case LightingLUTData7: { const uint32_t index = regs[LightingLUTIndex]; // Get full LUT index register const uint32_t lutID = getBits<8, 5>(index); // Get which LUT we're actually writing to uint32_t lutIndex = getBits<0, 8>(index); // And get the index inside the LUT we're writing to @@ -133,15 +134,16 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { break; } - case VertexFloatUniformIndex: - shaderUnit.vs.setFloatUniformIndex(value); - break; + case VertexFloatUniformIndex: shaderUnit.vs.setFloatUniformIndex(value); break; - case VertexFloatUniformData0: case VertexFloatUniformData1: case VertexFloatUniformData2: - case VertexFloatUniformData3: case VertexFloatUniformData4: case VertexFloatUniformData5: - case VertexFloatUniformData6: case VertexFloatUniformData7: - shaderUnit.vs.uploadFloatUniform(value); - break; + case VertexFloatUniformData0: + case VertexFloatUniformData1: + case VertexFloatUniformData2: + case VertexFloatUniformData3: + case VertexFloatUniformData4: + case VertexFloatUniformData5: + case VertexFloatUniformData6: + case VertexFloatUniformData7: shaderUnit.vs.uploadFloatUniform(value); break; case FixedAttribIndex: fixedAttribCount = 0; @@ -162,7 +164,9 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { } break; - case FixedAttribData0: case FixedAttribData1: case FixedAttribData2: + case FixedAttribData0: + case FixedAttribData1: + case FixedAttribData2: fixedAttrBuff[fixedAttribCount++] = value; if (fixedAttribCount == 3) { @@ -170,15 +174,15 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { vec4f attr; // These are stored in the reverse order anyone would expect them to be in - attr.x() = f24::fromRaw(fixedAttrBuff[2] & 0xffffff); - attr.y() = f24::fromRaw(((fixedAttrBuff[1] & 0xffff) << 8) | (fixedAttrBuff[2] >> 24)); - attr.z() = f24::fromRaw(((fixedAttrBuff[0] & 0xff) << 16) | (fixedAttrBuff[1] >> 16)); - attr.w() = f24::fromRaw(fixedAttrBuff[0] >> 8); + attr[0] = f24::fromRaw(fixedAttrBuff[2] & 0xffffff); + attr[1] = f24::fromRaw(((fixedAttrBuff[1] & 0xffff) << 8) | (fixedAttrBuff[2] >> 24)); + attr[2] = f24::fromRaw(((fixedAttrBuff[0] & 0xff) << 16) | (fixedAttrBuff[1] >> 16)); + attr[3] = f24::fromRaw(fixedAttrBuff[0] >> 8); // If the fixed attribute index is < 12, we're just writing to one of the fixed attributes if (fixedAttribIndex < 12) [[likely]] { shaderUnit.vs.fixedAttributes[fixedAttribIndex++] = attr; - } else if (fixedAttribIndex == 15) { // Otherwise if it's 15, we're submitting an immediate mode vertex + } else if (fixedAttribIndex == 15) { // Otherwise if it's 15, we're submitting an immediate mode vertex const uint totalAttrCount = (regs[PICA::InternalRegs::VertexShaderAttrNum] & 0xf) + 1; if (totalAttrCount <= immediateModeAttrIndex) { printf("Broken state in the immediate mode vertex submission pipeline. Failing silently\n"); @@ -199,13 +203,12 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { // If we've reached 3 verts, issue a draw call // Handle rendering depending on the primitive type if (immediateModeVertIndex == 3) { - renderer.drawVertices(PICA::PrimType::TriangleList, immediateModeVertices); + renderer->drawVertices(PICA::PrimType::TriangleList, immediateModeVertices); switch (primType) { // Triangle or geometry primitive. Draw a triangle and discard all vertices - case 0: case 3: - immediateModeVertIndex = 0; - break; + case 0: + case 3: immediateModeVertIndex = 0; break; // Triangle strip. Draw triangle, discard first vertex and keep the last 2 case 1: @@ -223,54 +226,54 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { } } } - } else { // Writing to fixed attributes 13 and 14 probably does nothing, but we'll see + } else { // Writing to fixed attributes 13 and 14 probably does nothing, but we'll see log("Wrote to invalid fixed vertex attribute %d\n", fixedAttribIndex); } } break; - case VertexShaderOpDescriptorIndex: - shaderUnit.vs.setOpDescriptorIndex(value); - break; + case VertexShaderOpDescriptorIndex: shaderUnit.vs.setOpDescriptorIndex(value); break; - case VertexShaderOpDescriptorData0: case VertexShaderOpDescriptorData1: case VertexShaderOpDescriptorData2: - case VertexShaderOpDescriptorData3: case VertexShaderOpDescriptorData4: case VertexShaderOpDescriptorData5: - case VertexShaderOpDescriptorData6: case VertexShaderOpDescriptorData7: - shaderUnit.vs.uploadDescriptor(value); - break; + case VertexShaderOpDescriptorData0: + case VertexShaderOpDescriptorData1: + case VertexShaderOpDescriptorData2: + case VertexShaderOpDescriptorData3: + case VertexShaderOpDescriptorData4: + case VertexShaderOpDescriptorData5: + case VertexShaderOpDescriptorData6: + case VertexShaderOpDescriptorData7: shaderUnit.vs.uploadDescriptor(value); break; - case VertexBoolUniform: - shaderUnit.vs.boolUniform = value & 0xffff; - break; + case VertexBoolUniform: shaderUnit.vs.boolUniform = value & 0xffff; break; - case VertexIntUniform0: case VertexIntUniform1: case VertexIntUniform2: case VertexIntUniform3: - shaderUnit.vs.uploadIntUniform(index - VertexIntUniform0, value); - break; + case VertexIntUniform0: + case VertexIntUniform1: + case VertexIntUniform2: + case VertexIntUniform3: shaderUnit.vs.uploadIntUniform(index - VertexIntUniform0, value); break; - case VertexShaderData0: case VertexShaderData1: case VertexShaderData2: case VertexShaderData3: - case VertexShaderData4: case VertexShaderData5: case VertexShaderData6: case VertexShaderData7: - shaderUnit.vs.uploadWord(value); - break; + case VertexShaderData0: + case VertexShaderData1: + case VertexShaderData2: + case VertexShaderData3: + case VertexShaderData4: + case VertexShaderData5: + case VertexShaderData6: + case VertexShaderData7: shaderUnit.vs.uploadWord(value); break; - case VertexShaderEntrypoint: - shaderUnit.vs.entrypoint = value & 0xffff; - break; + case VertexShaderEntrypoint: shaderUnit.vs.entrypoint = value & 0xffff; break; case VertexShaderTransferEnd: if (value != 0) shaderUnit.vs.finalize(); break; - case VertexShaderTransferIndex: - shaderUnit.vs.setBufferIndex(value); - break; + case VertexShaderTransferIndex: shaderUnit.vs.setBufferIndex(value); break; // Command lists can write to the command processor registers and change the command list stream // Several games are known to do this, including New Super Mario Bros 2 and Super Mario 3D Land case CmdBufTrigger0: case CmdBufTrigger1: { - if (value != 0) { // A non-zero value triggers command list processing - int bufferIndex = index - CmdBufTrigger0; // Index of the command buffer to execute (0 or 1) + if (value != 0) { // A non-zero value triggers command list processing + int bufferIndex = index - CmdBufTrigger0; // Index of the command buffer to execute (0 or 1) u32 addr = (regs[CmdBufAddr0 + bufferIndex] & 0xfffffff) << 3; u32 size = (regs[CmdBufSize0 + bufferIndex] & 0xfffff) << 3; @@ -285,15 +288,13 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { default: // Vertex attribute registers if (index >= AttribInfoStart && index <= AttribInfoEnd) { - uint attributeIndex = (index - AttribInfoStart) / 3; // Which attribute are we writing to - uint reg = (index - AttribInfoStart) % 3; // Which of this attribute's registers are we writing to? + uint attributeIndex = (index - AttribInfoStart) / 3; // Which attribute are we writing to + uint reg = (index - AttribInfoStart) % 3; // Which of this attribute's registers are we writing to? auto& attr = attributeInfo[attributeIndex]; switch (reg) { - case 0: attr.offset = value & 0xfffffff; break; // Attribute offset - case 1: - attr.config1 = value; - break; + case 0: attr.offset = value & 0xfffffff; break; // Attribute offset + case 1: attr.config1 = value; break; case 2: attr.config2 = value; attr.size = getBits<16, 8>(value); @@ -339,13 +340,13 @@ void GPU::startCommandList(u32 addr, u32 size) { u32 id = header & 0xffff; u32 paramMaskIndex = getBits<16, 4>(header); - u32 paramCount = getBits<20, 8>(header); // Number of additional parameters + u32 paramCount = getBits<20, 8>(header); // Number of additional parameters // Bit 31 tells us whether this command is going to write to multiple sequential registers (if the bit is 1) // Or if all written values will go to the same register (If the bit is 0). It's essentially the value that // gets added to the "id" field after each register write bool consecutiveWritingMode = (header >> 31) != 0; - u32 mask = maskLUT[paramMaskIndex]; // Actual parameter mask + u32 mask = maskLUT[paramMaskIndex]; // Actual parameter mask // Increment the ID by 1 after each write if we're in consecutive mode, or 0 otherwise u32 idIncrement = (consecutiveWritingMode) ? 1 : 0; diff --git a/src/core/PICA/shader_interpreter.cpp b/src/core/PICA/shader_interpreter.cpp index 7af284e3..28eee3c7 100644 --- a/src/core/PICA/shader_interpreter.cpp +++ b/src/core/PICA/shader_interpreter.cpp @@ -1,6 +1,7 @@ -#include "PICA/shader.hpp" #include +#include "PICA/shader.hpp" + using namespace Helpers; void PICAShader::run() { @@ -11,20 +12,19 @@ void PICAShader::run() { while (true) { const u32 instruction = loadedShader[pc++]; - const u32 opcode = instruction >> 26; // Top 6 bits are the opcode + const u32 opcode = instruction >> 26; // Top 6 bits are the opcode switch (opcode) { case ShaderOpcodes::ADD: add(instruction); break; case ShaderOpcodes::CALL: call(instruction); break; case ShaderOpcodes::CALLC: callc(instruction); break; case ShaderOpcodes::CALLU: callu(instruction); break; - case ShaderOpcodes::CMP1: case ShaderOpcodes::CMP2: - cmp(instruction); - break; + case ShaderOpcodes::CMP1: + case ShaderOpcodes::CMP2: cmp(instruction); break; case ShaderOpcodes::DP3: dp3(instruction); break; case ShaderOpcodes::DP4: dp4(instruction); break; case ShaderOpcodes::DPHI: dphi(instruction); break; - case ShaderOpcodes::END: return; // Stop running shader + case ShaderOpcodes::END: return; // Stop running shader case ShaderOpcodes::EX2: ex2(instruction); break; case ShaderOpcodes::FLR: flr(instruction); break; case ShaderOpcodes::IFC: ifc(instruction); break; @@ -38,31 +38,41 @@ void PICAShader::run() { case ShaderOpcodes::MOV: mov(instruction); break; case ShaderOpcodes::MOVA: mova(instruction); break; case ShaderOpcodes::MUL: mul(instruction); break; - case ShaderOpcodes::NOP: break; // Do nothing + case ShaderOpcodes::NOP: break; // Do nothing case ShaderOpcodes::RCP: rcp(instruction); break; case ShaderOpcodes::RSQ: rsq(instruction); break; case ShaderOpcodes::SGEI: sgei(instruction); break; case ShaderOpcodes::SLT: slt(instruction); break; case ShaderOpcodes::SLTI: slti(instruction); break; - case 0x30: case 0x31: case 0x32: case 0x33: case 0x34: case 0x35: case 0x36: case 0x37: - madi(instruction); - break; + case 0x30: + case 0x31: + case 0x32: + case 0x33: + case 0x34: + case 0x35: + case 0x36: + case 0x37: madi(instruction); break; - case 0x38: case 0x39: case 0x3A: case 0x3B: case 0x3C: case 0x3D: case 0x3E: case 0x3F: - mad(instruction); - break; + case 0x38: + case 0x39: + case 0x3A: + case 0x3B: + case 0x3C: + case 0x3D: + case 0x3E: + case 0x3F: mad(instruction); break; - default:Helpers::panic("Unimplemented PICA instruction %08X (Opcode = %02X)", instruction, opcode); + default: Helpers::panic("Unimplemented PICA instruction %08X (Opcode = %02X)", instruction, opcode); } // Handle control flow statements. The ordering is important as the priority goes: LOOP > IF > CALL // Handle loop if (loopIndex != 0) { auto& loop = loopInfo[loopIndex - 1]; - if (pc == loop.endingPC) { // Check if the loop needs to start over + if (pc == loop.endingPC) { // Check if the loop needs to start over loop.iterations -= 1; - if (loop.iterations == 0) // If the loop ended, go one level down on the loop stack + if (loop.iterations == 0) // If the loop ended, go one level down on the loop stack loopIndex -= 1; loopCounter += loop.increment; @@ -73,7 +83,7 @@ void PICAShader::run() { // Handle ifs if (ifIndex != 0) { auto& info = conditionalInfo[ifIndex - 1]; - if (pc == info.endingPC) { // Check if the IF block ended + if (pc == info.endingPC) { // Check if the IF block ended pc = info.newPC; ifIndex -= 1; } @@ -82,7 +92,7 @@ void PICAShader::run() { // Handle calls if (callIndex != 0) { auto& info = callInfo[callIndex - 1]; - if (pc == info.endingPC) { // Check if the CALL block ended + if (pc == info.endingPC) { // Check if the CALL block ended pc = info.returnPC; callIndex -= 1; } @@ -92,15 +102,15 @@ void PICAShader::run() { // Calculate the actual source value using an instruction's source field and it's respective index value // The index value is used to apply relative addressing when index != 0 by adding one of the 3 addr registers to the -// source field, but only with the original source field is pointing at a vector uniform register +// source field, but only with the original source field is pointing at a vector uniform register u8 PICAShader::getIndexedSource(u32 source, u32 index) { - if (source < 0x20) // No offset is applied if the source isn't pointing to a vector uniform reg + if (source < 0x20) // No offset is applied if the source isn't pointing to a vector uniform reg return source; switch (index) { - case 0: [[likely]] return u8(source); // No offset applied - case 1: return u8(source + addrRegister.x()); - case 2: return u8(source + addrRegister.y()); + case 0: [[likely]] return u8(source); // No offset applied + case 1: return u8(source + addrRegister[0]); + case 2: return u8(source + addrRegister[1]); case 3: return u8(source + loopCounter); } @@ -117,7 +127,7 @@ PICAShader::vec4f PICAShader::getSource(u32 source) { return floatUniforms[source - 0x20]; else { Helpers::warn("[PICA] Unimplemented source value: %X\n", source); - return vec4f({ f24::zero(), f24::zero(), f24::zero(), f24::zero() }); + return vec4f({f24::zero(), f24::zero(), f24::zero(), f24::zero()}); } } @@ -136,13 +146,13 @@ bool PICAShader::isCondTrue(u32 instruction) { bool refX = (getBit<25>(instruction)) != 0; switch (condition) { - case 0: // Either cmp register matches + case 0: // Either cmp register matches return cmpRegister[0] == refX || cmpRegister[1] == refY; - case 1: // Both cmp registers match + case 1: // Both cmp registers match return cmpRegister[0] == refX && cmpRegister[1] == refY; - case 2: // At least cmp.x matches + case 2: // At least cmp.x matches return cmpRegister[0] == refX; - default: // At least cmp.y matches + default: // At least cmp.y matches return cmpRegister[1] == refY; } } @@ -150,7 +160,7 @@ bool PICAShader::isCondTrue(u32 instruction) { void PICAShader::add(u32 instruction) { const u32 operandDescriptor = operandDescriptors[instruction & 0x7f]; u32 src1 = getBits<12, 7>(instruction); - const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment + const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment const u32 idx = getBits<19, 2>(instruction); const u32 dest = getBits<21, 5>(instruction); @@ -171,7 +181,7 @@ void PICAShader::add(u32 instruction) { void PICAShader::mul(u32 instruction) { const u32 operandDescriptor = operandDescriptors[instruction & 0x7f]; u32 src1 = getBits<12, 7>(instruction); - const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment + const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment const u32 idx = getBits<19, 2>(instruction); const u32 dest = getBits<21, 5>(instruction); @@ -210,7 +220,7 @@ void PICAShader::flr(u32 instruction) { void PICAShader::max(u32 instruction) { const u32 operandDescriptor = operandDescriptors[instruction & 0x7f]; const u32 src1 = getBits<12, 7>(instruction); - const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment + const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment const u32 idx = getBits<19, 2>(instruction); const u32 dest = getBits<21, 5>(instruction); @@ -232,7 +242,7 @@ void PICAShader::max(u32 instruction) { void PICAShader::min(u32 instruction) { const u32 operandDescriptor = operandDescriptors[instruction & 0x7f]; const u32 src1 = getBits<12, 7>(instruction); - const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment + const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment const u32 idx = getBits<19, 2>(instruction); const u32 dest = getBits<21, 5>(instruction); @@ -278,16 +288,16 @@ void PICAShader::mova(u32 instruction) { vec4f srcVector = getSourceSwizzled<1>(src, operandDescriptor); u32 componentMask = operandDescriptor & 0xf; - if (componentMask & 0b1000) // x component - addrRegister.x() = static_cast(srcVector.x().toFloat32()); - if (componentMask & 0b0100) // y component - addrRegister.y() = static_cast(srcVector.y().toFloat32()); + if (componentMask & 0b1000) // x component + addrRegister[0] = static_cast(srcVector[0].toFloat32()); + if (componentMask & 0b0100) // y component + addrRegister[1] = static_cast(srcVector[1].toFloat32()); } void PICAShader::dp3(u32 instruction) { const u32 operandDescriptor = operandDescriptors[instruction & 0x7f]; u32 src1 = getBits<12, 7>(instruction); - const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment + const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment const u32 idx = getBits<19, 2>(instruction); const u32 dest = getBits<21, 5>(instruction); @@ -309,7 +319,7 @@ void PICAShader::dp3(u32 instruction) { void PICAShader::dp4(u32 instruction) { const u32 operandDescriptor = operandDescriptors[instruction & 0x7f]; u32 src1 = getBits<12, 7>(instruction); - const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment + const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment const u32 idx = getBits<19, 2>(instruction); const u32 dest = getBits<21, 5>(instruction); @@ -480,7 +490,7 @@ void PICAShader::madi(u32 instruction) { void PICAShader::slt(u32 instruction) { const u32 operandDescriptor = operandDescriptors[instruction & 0x7f]; u32 src1 = getBits<12, 7>(instruction); - const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment + const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment const u32 idx = getBits<19, 2>(instruction); const u32 dest = getBits<21, 5>(instruction); @@ -542,11 +552,11 @@ void PICAShader::slti(u32 instruction) { void PICAShader::cmp(u32 instruction) { const u32 operandDescriptor = operandDescriptors[instruction & 0x7f]; const u32 src1 = getBits<12, 7>(instruction); - const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment + const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment const u32 idx = getBits<19, 2>(instruction); const u32 cmpY = getBits<21, 3>(instruction); const u32 cmpX = getBits<24, 3>(instruction); - const u32 cmpOperations[2] = { cmpX, cmpY }; + const u32 cmpOperations[2] = {cmpX, cmpY}; if (idx) Helpers::panic("[PICA] CMP: idx != 0"); vec4f srcVec1 = getSourceSwizzled<1>(src1, operandDescriptor); @@ -554,33 +564,31 @@ void PICAShader::cmp(u32 instruction) { for (int i = 0; i < 2; i++) { switch (cmpOperations[i]) { - case 0: // Equal + case 0: // Equal cmpRegister[i] = srcVec1[i] == srcVec2[i]; break; - case 1: // Not equal + case 1: // Not equal cmpRegister[i] = srcVec1[i] != srcVec2[i]; break; - case 2: // Less than + case 2: // Less than cmpRegister[i] = srcVec1[i] < srcVec2[i]; break; - case 3: // Less than or equal + case 3: // Less than or equal cmpRegister[i] = srcVec1[i] <= srcVec2[i]; break; - case 4: // Greater than + case 4: // Greater than cmpRegister[i] = srcVec1[i] > srcVec2[i]; break; - case 5: // Greater than or equal + case 5: // Greater than or equal cmpRegister[i] = srcVec1[i] >= srcVec2[i]; break; - default: - cmpRegister[i] = true; - break; + default: cmpRegister[i] = true; break; } } } @@ -604,7 +612,7 @@ void PICAShader::ifc(u32 instruction) { void PICAShader::ifu(u32 instruction) { const u32 dest = getBits<10, 12>(instruction); - const u32 bit = getBits<22, 4>(instruction); // Bit of the bool uniform to check + const u32 bit = getBits<22, 4>(instruction); // Bit of the bool uniform to check if (boolUniform & (1 << bit)) { if (ifIndex >= 8) [[unlikely]] @@ -615,8 +623,7 @@ void PICAShader::ifu(u32 instruction) { auto& block = conditionalInfo[ifIndex++]; block.endingPC = dest; block.newPC = dest + num; - } - else { + } else { pc = dest; } } @@ -637,12 +644,12 @@ void PICAShader::call(u32 instruction) { void PICAShader::callc(u32 instruction) { if (isCondTrue(instruction)) { - call(instruction); // Pls inline + call(instruction); // Pls inline } } void PICAShader::callu(u32 instruction) { - const u32 bit = getBits<22, 4>(instruction); // Bit of the bool uniform to check + const u32 bit = getBits<22, 4>(instruction); // Bit of the bool uniform to check if (boolUniform & (1 << bit)) { if (callIndex >= 4) [[unlikely]] @@ -664,26 +671,25 @@ void PICAShader::loop(u32 instruction) { Helpers::panic("[PICA] Overflowed loop stack"); u32 dest = getBits<10, 12>(instruction); - auto& uniform = intUniforms[getBits<22, 2>(instruction)]; // The uniform we'll get loop info from - loopCounter = uniform.y(); + auto& uniform = intUniforms[getBits<22, 2>(instruction)]; // The uniform we'll get loop info from + loopCounter = uniform[1]; auto& loop = loopInfo[loopIndex++]; loop.startingPC = pc; - loop.endingPC = dest + 1; // Loop is inclusive so we need + 1 here - loop.iterations = uniform.x() + 1; - loop.increment = uniform.z(); + loop.endingPC = dest + 1; // Loop is inclusive so we need + 1 here + loop.iterations = uniform[0] + 1; + loop.increment = uniform[2]; } void PICAShader::jmpc(u32 instruction) { - if (isCondTrue(instruction)) - pc = getBits<10, 12>(instruction); + if (isCondTrue(instruction)) pc = getBits<10, 12>(instruction); } void PICAShader::jmpu(u32 instruction) { - const u32 test = (instruction & 1) ^ 1; // If the LSB is 0 we want to compare to true, otherwise compare to false + const u32 test = (instruction & 1) ^ 1; // If the LSB is 0 we want to compare to true, otherwise compare to false const u32 dest = getBits<10, 12>(instruction); - const u32 bit = getBits<22, 4>(instruction); // Bit of the bool uniform to check + const u32 bit = getBits<22, 4>(instruction); // Bit of the bool uniform to check - if (((boolUniform >> bit) & 1) == test) // Jump if the bool uniform is the value we want + if (((boolUniform >> bit) & 1) == test) // Jump if the bool uniform is the value we want pc = dest; } \ No newline at end of file diff --git a/src/core/PICA/shader_unit.cpp b/src/core/PICA/shader_unit.cpp index 6cbc2693..aa7b4c12 100644 --- a/src/core/PICA/shader_unit.cpp +++ b/src/core/PICA/shader_unit.cpp @@ -1,4 +1,5 @@ #include "PICA/shader_unit.hpp" + #include "cityhash.hpp" void ShaderUnit::reset() { @@ -18,18 +19,18 @@ void PICAShader::reset() { opDescriptorIndex = 0; f32UniformTransfer = false; - const vec4f zero = vec4f({ f24::zero(), f24::zero(), f24::zero(), f24::zero() }); + const vec4f zero = vec4f({f24::zero(), f24::zero(), f24::zero(), f24::zero()}); inputs.fill(zero); floatUniforms.fill(zero); outputs.fill(zero); tempRegisters.fill(zero); for (auto& e : intUniforms) { - e.x() = e.y() = e.z() = e.w() = 0; + e[0] = e[1] = e[2] = e[3] = 0; } - addrRegister.x() = 0; - addrRegister.y() = 0; + addrRegister[0] = 0; + addrRegister[1] = 0; loopCounter = 0; codeHashDirty = true; From c53080b4445dad320f5f1588f6ec6ea0497e968b Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Mon, 10 Jul 2023 09:24:00 -0700 Subject: [PATCH 05/14] Fix HTTPServer gpu-renderer interfacing Fixing some compilation issues that occur when enabling the HTTP server --- CMakeLists.txt | 2 +- include/PICA/gpu.hpp | 1 + include/renderer.hpp | 2 ++ include/renderer_gl/renderer_gl.hpp | 3 ++- src/core/renderer_gl/renderer_gl.cpp | 2 +- 5 files changed, 7 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8c932497..74c33f87 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -93,7 +93,7 @@ endif() set(SOURCE_FILES src/main.cpp src/emulator.cpp src/io_file.cpp src/config.cpp src/core/CPU/cpu_dynarmic.cpp src/core/CPU/dynarmic_cycles.cpp - src/core/memory.cpp src/renderer.cpp src/httpserver.cpp src/stb_image_write.c + src/core/memory.cpp src/renderer.cpp src/httpserver.cpp src/stb_image_write.c src/httpserver.cpp ) set(CRYPTO_SOURCE_FILES src/core/crypto/aes_engine.cpp) set(KERNEL_SOURCE_FILES src/core/kernel/kernel.cpp src/core/kernel/resource_limits.cpp diff --git a/include/PICA/gpu.hpp b/include/PICA/gpu.hpp index 929881b7..753ec728 100644 --- a/include/PICA/gpu.hpp +++ b/include/PICA/gpu.hpp @@ -85,6 +85,7 @@ class GPU { GPU(Memory& mem, EmulatorConfig& config); void initGraphicsContext() { renderer->initGraphicsContext(); } void display() { renderer->display(); } + void screenshot(const std::string& name) { renderer->screenshot(name); } void fireDMA(u32 dest, u32 source, u32 size); void reset(); diff --git a/include/renderer.hpp b/include/renderer.hpp index c7315739..91dd287b 100644 --- a/include/renderer.hpp +++ b/include/renderer.hpp @@ -27,6 +27,8 @@ class Renderer { virtual void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) = 0; // Perform display transfer virtual void drawVertices(PICA::PrimType primType, std::span vertices) = 0; // Draw the given vertices + virtual void screenshot(const std::string& name) = 0; + virtual void setFBSize(u32 width, u32 height) = 0; virtual void setColourFormat(PICA::ColorFmt format) = 0; diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 24301a11..4c059f05 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -1,7 +1,8 @@ #pragma once +#include + #include #include -#include #include "PICA/float_types.hpp" #include "PICA/pica_vertex.hpp" diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 22484608..1546761d 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -1034,7 +1034,7 @@ void RendererGL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u OpenGL::draw(OpenGL::TriangleStrip, 4); // Actually draw our 3DS screen } -void Renderer::screenshot(const std::string& name) { +void RendererGL::screenshot(const std::string& name) { constexpr uint width = 400; constexpr uint height = 2 * 240; From a636a0d1da600fb9492b7b1e49a8a7e7d2333f5a Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Mon, 10 Jul 2023 09:25:39 -0700 Subject: [PATCH 06/14] Replace `ENABLE_OPENGL` with `PANDA3DS_ENABLE_OPENGL` --- CMakeLists.txt | 6 +++--- include/emulator.hpp | 4 ++-- src/core/PICA/gpu.cpp | 4 ++-- src/emulator.cpp | 6 +++--- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 74c33f87..0c665f48 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ endif() option(DISABLE_PANIC_DEV "Make a build with fewer and less intrusive asserts" OFF) option(GPU_DEBUG_INFO "Enable additional GPU debugging info" OFF) -option(ENABLE_OPENGL "Enable OpenGL rendering backend" ON) +option(PANDA3DS_ENABLE_OPENGL "Enable OpenGL rendering backend" ON) option(ENABLE_LTO "Enable link-time optimization" OFF) option(ENABLE_USER_BUILD "Make a user-facing build. These builds have various assertions disabled, LTO, and more" OFF) option(ENABLE_HTTP_SERVER "Enable HTTP server. Used for Discord bot support" OFF) @@ -180,8 +180,8 @@ endif() target_link_libraries(Alber PRIVATE dynarmic SDL2-static cryptopp) -if(ENABLE_OPENGL) - target_compile_definitions(Alber PUBLIC "ENABLE_OPENGL=1") +if(PANDA3DS_ENABLE_OPENGL) + target_compile_definitions(Alber PUBLIC "PANDA3DS_ENABLE_OPENGL=1") set(RENDERER_GL_INCLUDE_FILES include/opengl.hpp include/renderer_gl/renderer_gl.hpp include/renderer_gl/textures.hpp diff --git a/include/emulator.hpp b/include/emulator.hpp index ae6e7142..7f5bef0e 100644 --- a/include/emulator.hpp +++ b/include/emulator.hpp @@ -13,7 +13,7 @@ #include "io_file.hpp" #include "memory.hpp" -#if ENABLE_OPENGL +#if PANDA3DS_ENABLE_OPENGL #include "gl_state.hpp" #endif @@ -33,7 +33,7 @@ class Emulator { EmulatorConfig config; SDL_Window* window; -#if ENABLE_OPENGL +#if PANDA3DS_ENABLE_OPENGL SDL_GLContext glContext; GLStateManager gl; #endif diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index 29eeef04..b4fb644e 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -8,7 +8,7 @@ #include "PICA/float_types.hpp" #include "PICA/regs.hpp" -#if ENABLE_OPENGL +#if PANDA3DS_ENABLE_OPENGL #include "renderer_gl/renderer_gl.hpp" #endif @@ -21,7 +21,7 @@ GPU::GPU(Memory& mem, EmulatorConfig& config) : mem(mem), config(config) { mem.setVRAM(vram); // Give the bus a pointer to our VRAM // TODO: configurable backend -#if ENABLE_OPENGL +#if PANDA3DS_ENABLE_OPENGL renderer.reset(new RendererGL(*this, regs)); #endif } diff --git a/src/emulator.cpp b/src/emulator.cpp index db628853..6b821d86 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -2,7 +2,7 @@ #include -#if ENABLE_OPENGL +#if PANDA3DS_ENABLE_OPENGL #include #endif @@ -27,7 +27,7 @@ Emulator::Emulator() : kernel(cpu, memory, gpu), cpu(memory, kernel), gpu(memory Helpers::warn("Failed to initialize SDL2 GameController: %s", SDL_GetError()); } -#if ENABLE_OPENGL +#if PANDA3DS_ENABLE_OPENGL // Request OpenGL 4.1 Core (Max available on MacOS) // MacOS gets mad if we don't explicitly demand a core profile SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE); @@ -434,7 +434,7 @@ bool Emulator::loadELF(std::ifstream& file) { // Reset our graphics context and initialize the GPU's graphics context void Emulator::initGraphicsContext() { -#if ENABLE_OPENGL +#if PANDA3DS_ENABLE_OPENGL gl.reset(); // TODO (For when we have multiple backends): Only do this if we are using OpenGL #endif gpu.initGraphicsContext(); From 4864c51125cced2077b86a9f9346ffd2b4eb3210 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Mon, 10 Jul 2023 10:31:46 -0700 Subject: [PATCH 07/14] Migrate OpenGL specific headers to `renderer_gl` --- CMakeLists.txt | 5 +++-- include/emulator.hpp | 5 ----- include/{ => renderer_gl}/gl_state.hpp | 0 include/{ => renderer_gl}/opengl.hpp | 0 src/{ => core/renderer_gl}/gl_state.cpp | 2 +- src/core/renderer_gl/renderer_gl.cpp | 2 ++ src/emulator.cpp | 7 +------ 7 files changed, 7 insertions(+), 14 deletions(-) rename include/{ => renderer_gl}/gl_state.hpp (100%) rename include/{ => renderer_gl}/opengl.hpp (100%) rename src/{ => core/renderer_gl}/gl_state.cpp (96%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0c665f48..cf7f6019 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -145,7 +145,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/PICA/dynapica/shader_rec_emitter_x64.hpp include/PICA/pica_hash.hpp include/result/result.hpp include/result/result_common.hpp include/result/result_fs.hpp include/result/result_fnd.hpp include/result/result_gsp.hpp include/result/result_kernel.hpp include/result/result_os.hpp - include/crypto/aes_engine.hpp include/metaprogramming.hpp include/PICA/pica_vertex.hpp include/gl_state.hpp + include/crypto/aes_engine.hpp include/metaprogramming.hpp include/PICA/pica_vertex.hpp include/config.hpp include/services/ir_user.hpp include/httpserver.hpp ) @@ -186,10 +186,11 @@ if(PANDA3DS_ENABLE_OPENGL) set(RENDERER_GL_INCLUDE_FILES include/opengl.hpp include/renderer_gl/renderer_gl.hpp include/renderer_gl/textures.hpp include/renderer_gl/surfaces.hpp include/renderer_gl/surface_cache.hpp + include/renderer_gl/gl_state.hpp ) set(RENDERER_GL_SOURCE_FILES src/core/renderer_gl/renderer_gl.cpp src/core/renderer_gl/textures.cpp src/core/renderer_gl/etc1.cpp - src/gl_state.cpp + src/core/renderer_gl/gl_state.cpp ) source_group("Source Files\\Core\\OpenGL Renderer" FILES ${RENDERER_GL_SOURCE_FILES}) diff --git a/include/emulator.hpp b/include/emulator.hpp index 7f5bef0e..034b0034 100644 --- a/include/emulator.hpp +++ b/include/emulator.hpp @@ -13,10 +13,6 @@ #include "io_file.hpp" #include "memory.hpp" -#if PANDA3DS_ENABLE_OPENGL -#include "gl_state.hpp" -#endif - #ifdef PANDA3DS_ENABLE_HTTP_SERVER #include "httpserver.hpp" #endif @@ -35,7 +31,6 @@ class Emulator { #if PANDA3DS_ENABLE_OPENGL SDL_GLContext glContext; - GLStateManager gl; #endif SDL_GameController* gameController = nullptr; diff --git a/include/gl_state.hpp b/include/renderer_gl/gl_state.hpp similarity index 100% rename from include/gl_state.hpp rename to include/renderer_gl/gl_state.hpp diff --git a/include/opengl.hpp b/include/renderer_gl/opengl.hpp similarity index 100% rename from include/opengl.hpp rename to include/renderer_gl/opengl.hpp diff --git a/src/gl_state.cpp b/src/core/renderer_gl/gl_state.cpp similarity index 96% rename from src/gl_state.cpp rename to src/core/renderer_gl/gl_state.cpp index 612ae44d..691eb7b6 100644 --- a/src/gl_state.cpp +++ b/src/core/renderer_gl/gl_state.cpp @@ -1,4 +1,4 @@ -#include "gl_state.hpp" +#include "renderer_gl/gl_state.hpp" void GLStateManager::resetBlend() { blendEnabled = false; diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 1546761d..bcfb0dc0 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -607,6 +607,8 @@ void RendererGL::reset() { } void RendererGL::initGraphicsContext() { + gl.reset(); + OpenGL::Shader vert(vertexShader, OpenGL::Vertex); OpenGL::Shader frag(fragmentShader, OpenGL::Fragment); triangleProgram.create({vert, frag}); diff --git a/src/emulator.cpp b/src/emulator.cpp index 6b821d86..0f9c8c54 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -433,12 +433,7 @@ bool Emulator::loadELF(std::ifstream& file) { } // Reset our graphics context and initialize the GPU's graphics context -void Emulator::initGraphicsContext() { -#if PANDA3DS_ENABLE_OPENGL - gl.reset(); // TODO (For when we have multiple backends): Only do this if we are using OpenGL -#endif - gpu.initGraphicsContext(); -} +void Emulator::initGraphicsContext() { gpu.initGraphicsContext(); } #ifdef PANDA3DS_ENABLE_HTTP_SERVER void Emulator::pollHttpServer() { From 0a605339a327492c8a3ec7acea9bf14279054e0c Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Mon, 10 Jul 2023 12:07:57 -0700 Subject: [PATCH 08/14] Conditionally compile and link `glad` Only include this library in the case that the OpenGL backend is enabled. --- CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cf7f6019..06079482 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,7 +46,7 @@ set(SDL_STATIC ON CACHE BOOL "" FORCE) set(SDL_SHARED OFF CACHE BOOL "" FORCE) set(SDL_TEST OFF CACHE BOOL "" FORCE) add_subdirectory(third_party/SDL2) -add_subdirectory(third_party/glad) + add_subdirectory(third_party/toml11) include_directories(${SDL2_INCLUDE_DIR}) include_directories(third_party/toml11) @@ -181,6 +181,8 @@ endif() target_link_libraries(Alber PRIVATE dynarmic SDL2-static cryptopp) if(PANDA3DS_ENABLE_OPENGL) + add_subdirectory(third_party/glad) + target_compile_definitions(Alber PUBLIC "PANDA3DS_ENABLE_OPENGL=1") set(RENDERER_GL_INCLUDE_FILES include/opengl.hpp From 0f3d77768dd5408cfaec6c0f0d505b4b7bae1b24 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Tue, 11 Jul 2023 07:41:17 -0700 Subject: [PATCH 09/14] Fix array declarations to use block-formatting --- src/core/renderer_gl/renderer_gl.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index bcfb0dc0..3a587046 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -725,7 +725,8 @@ void RendererGL::setupBlending() { GL_CONSTANT_ALPHA, GL_ONE_MINUS_CONSTANT_ALPHA, GL_SRC_ALPHA_SATURATE, - GL_ONE}; + GL_ONE, + }; if (!blendingEnabled) { gl.disableBlend(); @@ -759,9 +760,10 @@ void RendererGL::setupBlending() { void RendererGL::setupTextureEnvState() { // TODO: Only update uniforms when the TEV config changed. Use an UBO potentially. - static constexpr std::array ioBases = {PICA::InternalRegs::TexEnv0Source, PICA::InternalRegs::TexEnv1Source, - PICA::InternalRegs::TexEnv2Source, PICA::InternalRegs::TexEnv3Source, - PICA::InternalRegs::TexEnv4Source, PICA::InternalRegs::TexEnv5Source}; + static constexpr std::array ioBases = { + PICA::InternalRegs::TexEnv0Source, PICA::InternalRegs::TexEnv1Source, PICA::InternalRegs::TexEnv2Source, + PICA::InternalRegs::TexEnv3Source, PICA::InternalRegs::TexEnv4Source, PICA::InternalRegs::TexEnv5Source, + }; u32 textureEnvSourceRegs[6]; u32 textureEnvOperandRegs[6]; @@ -788,7 +790,10 @@ void RendererGL::setupTextureEnvState() { void RendererGL::bindTexturesToSlots() { static constexpr std::array ioBases = { - PICA::InternalRegs::Tex0BorderColor, PICA::InternalRegs::Tex1BorderColor, PICA::InternalRegs::Tex2BorderColor}; + PICA::InternalRegs::Tex0BorderColor, + PICA::InternalRegs::Tex1BorderColor, + PICA::InternalRegs::Tex2BorderColor, + }; for (int i = 0; i < 3; i++) { if ((regs[PICA::InternalRegs::TexUnitCfg] & (1 << i)) == 0) { From 5b7fa5be7e9a91da992087f1be475290d2d3efe1 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Tue, 11 Jul 2023 07:46:35 -0700 Subject: [PATCH 10/14] Remove redundant `Renderer::screenshot` definition --- src/core/renderer_gl/renderer_gl.cpp | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 3a587046..08a7632c 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -1064,31 +1064,5 @@ void RendererGL::screenshot(const std::string& name) { } } - stbi_write_png(name.c_str(), width, height, 4, flippedPixels.data(), 0); -} - -void Renderer::screenshot(const std::string& name) { - constexpr uint width = 400; - constexpr uint height = 2 * 240; - - std::vector pixels, flippedPixels; - pixels.resize(width * height * 4); - flippedPixels.resize(pixels.size()); - ; - - OpenGL::bindScreenFramebuffer(); - glReadPixels(0, 0, width, height, GL_BGRA, GL_UNSIGNED_BYTE, pixels.data()); - - // Flip the image vertically - for (int y = 0; y < height; y++) { - memcpy(&flippedPixels[y * width * 4], &pixels[(height - y - 1) * width * 4], width * 4); - // Swap R and B channels - for (int x = 0; x < width; x++) { - std::swap(flippedPixels[y * width * 4 + x * 4 + 0], flippedPixels[y * width * 4 + x * 4 + 2]); - // Set alpha to 0xFF - flippedPixels[y * width * 4 + x * 4 + 3] = 0xFF; - } - } - stbi_write_png(name.c_str(), width, height, 4, flippedPixels.data(), 0); } \ No newline at end of file From 666fd96e7fb1cff9884f44b24dc1582f78dd10dd Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Tue, 11 Jul 2023 07:49:43 -0700 Subject: [PATCH 11/14] Move color/depth format and size to Renderer interface The state of these values are not specific to any rendering backend and can be moved to be part of the interface itself --- include/renderer.hpp | 27 ++++++++++++++++++++++----- include/renderer_gl/renderer_gl.hpp | 25 ------------------------- src/core/renderer_gl/renderer_gl.cpp | 4 ++-- 3 files changed, 24 insertions(+), 32 deletions(-) diff --git a/include/renderer.hpp b/include/renderer.hpp index 91dd287b..5a2b40b4 100644 --- a/include/renderer.hpp +++ b/include/renderer.hpp @@ -14,6 +14,15 @@ class Renderer { static constexpr u32 regNum = 0x300; // Number of internal PICA registers const std::array& regs; + std::array fbSize; // The size of the framebuffer (ie both the colour and depth buffer)' + + u32 colourBufferLoc; // Location in 3DS VRAM for the colour buffer + PICA::ColorFmt colourBufferFormat; // Format of the colours stored in the colour buffer + + // Same for the depth/stencil buffer + u32 depthBufferLoc; + PICA::DepthFmt depthBufferFormat; + public: Renderer(GPU& gpu, const std::array& internalRegs); virtual ~Renderer(); @@ -29,11 +38,19 @@ class Renderer { virtual void screenshot(const std::string& name) = 0; - virtual void setFBSize(u32 width, u32 height) = 0; + void setFBSize(u32 width, u32 height) { + fbSize[0] = width; + fbSize[1] = height; + } - virtual void setColourFormat(PICA::ColorFmt format) = 0; - virtual void setDepthFormat(PICA::DepthFmt format) = 0; + void setColourFormat(PICA::ColorFmt format) { colourBufferFormat = format; } + void setDepthFormat(PICA::DepthFmt format) { + if (format == PICA::DepthFmt::Unknown1) { + Helpers::panic("[PICA] Undocumented depth-stencil mode!"); + } + depthBufferFormat = format; + } - virtual void setColourBufferLoc(u32 loc) = 0; - virtual void setDepthBufferLoc(u32 loc) = 0; + void setColourBufferLoc(u32 loc) { colourBufferLoc = loc; } + void setDepthBufferLoc(u32 loc) { depthBufferLoc = loc; } }; \ No newline at end of file diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 4c059f05..38219216 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -49,15 +49,6 @@ class RendererGL final : public Renderer { SurfaceCache colourBufferCache; SurfaceCache textureCache; - OpenGL::uvec2 fbSize; // The size of the framebuffer (ie both the colour and depth buffer)' - - u32 colourBufferLoc; // Location in 3DS VRAM for the colour buffer - PICA::ColorFmt colourBufferFormat; // Format of the colours stored in the colour buffer - - // Same for the depth/stencil buffer - u32 depthBufferLoc; - PICA::DepthFmt depthBufferFormat; - // Dummy VAO/VBO for blitting the final output OpenGL::VertexArray dummyVAO; OpenGL::VertexBuffer dummyVBO; @@ -88,20 +79,4 @@ class RendererGL final : public Renderer { // Take a screenshot of the screen and store it in a file void screenshot(const std::string& name); - - void setFBSize(u32 width, u32 height) { - fbSize.x() = width; - fbSize.y() = height; - } - - void setColourFormat(PICA::ColorFmt format) { colourBufferFormat = format; } - void setDepthFormat(PICA::DepthFmt format) { - if (format == PICA::DepthFmt::Unknown1) { - Helpers::panic("[PICA] Undocumented depth-stencil mode!"); - } - depthBufferFormat = format; - } - - void setColourBufferLoc(u32 loc) { colourBufferLoc = loc; } - void setDepthBufferLoc(u32 loc) { depthBufferLoc = loc; } }; \ No newline at end of file diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 08a7632c..0083364b 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -961,7 +961,7 @@ void RendererGL::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 co OpenGL::Framebuffer RendererGL::getColourFBO() { // We construct a colour buffer object and see if our cache has any matching colour buffers in it // If not, we allocate a texture & FBO for our framebuffer and store it in the cache - ColourBuffer sampleBuffer(colourBufferLoc, colourBufferFormat, fbSize.x(), fbSize.y()); + ColourBuffer sampleBuffer(colourBufferLoc, colourBufferFormat, fbSize[0], fbSize[1]); auto buffer = colourBufferCache.find(sampleBuffer); if (buffer.has_value()) { @@ -973,7 +973,7 @@ OpenGL::Framebuffer RendererGL::getColourFBO() { void RendererGL::bindDepthBuffer() { // Similar logic as the getColourFBO function - DepthBuffer sampleBuffer(depthBufferLoc, depthBufferFormat, fbSize.x(), fbSize.y()); + DepthBuffer sampleBuffer(depthBufferLoc, depthBufferFormat, fbSize[0], fbSize[1]); auto buffer = depthBufferCache.find(sampleBuffer); GLuint tex; From 2f45714240229edf8d7b1a320892793a7de0fcb0 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Tue, 11 Jul 2023 08:10:10 -0700 Subject: [PATCH 12/14] Add `override`-specifier to renderer implementation --- include/renderer_gl/renderer_gl.hpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 38219216..d34bbc94 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -70,13 +70,13 @@ class RendererGL final : public Renderer { public: RendererGL(GPU& gpu, const std::array& internalRegs) : Renderer(gpu, internalRegs) {} - void reset(); - void display(); // Display the 3DS screen contents to the window - void initGraphicsContext(); // Initialize graphics context - void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control); // Clear a GPU buffer in VRAM - void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags); // Perform display transfer - void drawVertices(PICA::PrimType primType, std::span vertices); // Draw the given vertices + void reset() override; + void display() override; // Display the 3DS screen contents to the window + void initGraphicsContext() override; // Initialize graphics context + void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override; // Clear a GPU buffer in VRAM + void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override; // Perform display transfer + void drawVertices(PICA::PrimType primType, std::span vertices) override; // Draw the given vertices // Take a screenshot of the screen and store it in a file - void screenshot(const std::string& name); + void screenshot(const std::string& name) override; }; \ No newline at end of file From 7b6cd90d369c047357d9c98190f2a5bd9c541ea6 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sat, 15 Jul 2023 04:56:43 +0300 Subject: [PATCH 13/14] Cleanup for #98 --- .../PICA/dynapica/shader_rec_emitter_x64.hpp | 5 ++- include/PICA/gpu.hpp | 4 +- include/PICA/shader.hpp | 15 +++++-- include/emulator.hpp | 9 +++- include/renderer_gl/renderer_gl.hpp | 1 - src/core/PICA/gpu.cpp | 6 +-- src/core/PICA/regs.cpp | 45 +++++++++++++++---- src/core/PICA/shader_interpreter.cpp | 25 ++++++++--- src/core/renderer_gl/renderer_gl.cpp | 12 +++-- src/emulator.cpp | 6 +-- 10 files changed, 95 insertions(+), 33 deletions(-) diff --git a/include/PICA/dynapica/shader_rec_emitter_x64.hpp b/include/PICA/dynapica/shader_rec_emitter_x64.hpp index 109fddac..47e011d6 100644 --- a/include/PICA/dynapica/shader_rec_emitter_x64.hpp +++ b/include/PICA/dynapica/shader_rec_emitter_x64.hpp @@ -49,6 +49,7 @@ class ShaderEmitter : public Xbyak::CodeGenerator { const u32 opcode = instruction >> 26; return (opcode == ShaderOpcodes::CALL) || (opcode == ShaderOpcodes::CALLC) || (opcode == ShaderOpcodes::CALLU); } + // Scan the shader code for call instructions to fill up the returnPCs vector before starting compilation void scanForCalls(const PICAShader& shaderUnit); @@ -106,9 +107,11 @@ class ShaderEmitter : public Xbyak::CodeGenerator { MAKE_LOG_FUNCTION(log, shaderJITLogger) public: - using InstructionCallback = const void (*)(PICAShader& shaderUnit); // Callback type used for instructions + // Callback type used for instructions + using InstructionCallback = const void (*)(PICAShader& shaderUnit); // Callback type used for the JIT prologue. This is what the caller will call using PrologueCallback = const void (*)(PICAShader& shaderUnit, InstructionCallback cb); + PrologueCallback prologueCb = nullptr; // Initialize our emitter with "allocSize" bytes of RWX memory diff --git a/include/PICA/gpu.hpp b/include/PICA/gpu.hpp index 753ec728..d4e54358 100644 --- a/include/PICA/gpu.hpp +++ b/include/PICA/gpu.hpp @@ -103,7 +103,9 @@ class GPU { // TODO: Emulate the transfer engine & its registers // Then this can be emulated by just writing the appropriate values there - void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) { renderer->clearBuffer(startAddress, endAddress, value, control); } + void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) { + renderer->clearBuffer(startAddress, endAddress, value, control); + } // TODO: Emulate the transfer engine & its registers // Then this can be emulated by just writing the appropriate values there diff --git a/include/PICA/shader.hpp b/include/PICA/shader.hpp index 06d529c9..0f3154f1 100644 --- a/include/PICA/shader.hpp +++ b/include/PICA/shader.hpp @@ -7,7 +7,10 @@ #include "PICA/pica_hash.hpp" #include "helpers.hpp" -enum class ShaderType { Vertex, Geometry }; +enum class ShaderType { + Vertex, + Geometry, +}; namespace ShaderOpcodes { enum : u32 { @@ -221,11 +224,13 @@ class PICAShader { void finalize() { std::memcpy(&loadedShader[0], &bufferedShader[0], 4096 * sizeof(u32)); } void setBufferIndex(u32 index) { bufferIndex = index & 0xfff; } - void setOpDescriptorIndex(u32 index) { opDescriptorIndex = index & 0x7f; } void uploadWord(u32 word) { - if (bufferIndex >= 4095) Helpers::panic("o no, shader upload overflew"); + if (bufferIndex >= 4095) { + Helpers::panic("o no, shader upload overflew"); + } + bufferedShader[bufferIndex++] = word; bufferIndex &= 0xfff; @@ -247,7 +252,9 @@ class PICAShader { void uploadFloatUniform(u32 word) { floatUniformBuffer[floatUniformWordCount++] = word; - if (floatUniformIndex >= 96) Helpers::panic("[PICA] Tried to write float uniform %d", floatUniformIndex); + if (floatUniformIndex >= 96) { + Helpers::panic("[PICA] Tried to write float uniform %d", floatUniformIndex); + } if ((f32UniformTransfer && floatUniformWordCount >= 4) || (!f32UniformTransfer && floatUniformWordCount >= 3)) { vec4f& uniform = floatUniforms[floatUniformIndex++]; diff --git a/include/emulator.hpp b/include/emulator.hpp index 034b0034..f27cd990 100644 --- a/include/emulator.hpp +++ b/include/emulator.hpp @@ -17,7 +17,12 @@ #include "httpserver.hpp" #endif -enum class ROMType { None, ELF, NCSD, CXI }; +enum class ROMType { + None, + ELF, + NCSD, + CXI, +}; class Emulator { CPU cpu; @@ -29,7 +34,7 @@ class Emulator { EmulatorConfig config; SDL_Window* window; -#if PANDA3DS_ENABLE_OPENGL +#ifdef PANDA3DS_ENABLE_OPENGL SDL_GLContext glContext; #endif diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index d34bbc94..0e7f7bcb 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -1,5 +1,4 @@ #pragma once -#include #include #include diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index b4fb644e..15c99c42 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -8,7 +8,7 @@ #include "PICA/float_types.hpp" #include "PICA/regs.hpp" -#if PANDA3DS_ENABLE_OPENGL +#ifdef PANDA3DS_ENABLE_OPENGL #include "renderer_gl/renderer_gl.hpp" #endif @@ -20,8 +20,8 @@ GPU::GPU(Memory& mem, EmulatorConfig& config) : mem(mem), config(config) { vram = new u8[vramSize]; mem.setVRAM(vram); // Give the bus a pointer to our VRAM - // TODO: configurable backend -#if PANDA3DS_ENABLE_OPENGL + // TODO: Configurable backend +#ifdef PANDA3DS_ENABLE_OPENGL renderer.reset(new RendererGL(*this, regs)); #endif } diff --git a/src/core/PICA/regs.cpp b/src/core/PICA/regs.cpp index bbffa99a..d245f8af 100644 --- a/src/core/PICA/regs.cpp +++ b/src/core/PICA/regs.cpp @@ -134,7 +134,10 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { break; } - case VertexFloatUniformIndex: shaderUnit.vs.setFloatUniformIndex(value); break; + case VertexFloatUniformIndex: { + shaderUnit.vs.setFloatUniformIndex(value); + break; + } case VertexFloatUniformData0: case VertexFloatUniformData1: @@ -143,7 +146,10 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { case VertexFloatUniformData4: case VertexFloatUniformData5: case VertexFloatUniformData6: - case VertexFloatUniformData7: shaderUnit.vs.uploadFloatUniform(value); break; + case VertexFloatUniformData7: { + shaderUnit.vs.uploadFloatUniform(value); + break; + } case FixedAttribIndex: fixedAttribCount = 0; @@ -208,7 +214,10 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { switch (primType) { // Triangle or geometry primitive. Draw a triangle and discard all vertices case 0: - case 3: immediateModeVertIndex = 0; break; + case 3: { + immediateModeVertIndex = 0; + break; + } // Triangle strip. Draw triangle, discard first vertex and keep the last 2 case 1: @@ -233,7 +242,10 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { break; - case VertexShaderOpDescriptorIndex: shaderUnit.vs.setOpDescriptorIndex(value); break; + case VertexShaderOpDescriptorIndex: { + shaderUnit.vs.setOpDescriptorIndex(value); + break; + } case VertexShaderOpDescriptorData0: case VertexShaderOpDescriptorData1: @@ -242,14 +254,23 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { case VertexShaderOpDescriptorData4: case VertexShaderOpDescriptorData5: case VertexShaderOpDescriptorData6: - case VertexShaderOpDescriptorData7: shaderUnit.vs.uploadDescriptor(value); break; + case VertexShaderOpDescriptorData7: { + shaderUnit.vs.uploadDescriptor(value); + break; + } - case VertexBoolUniform: shaderUnit.vs.boolUniform = value & 0xffff; break; + case VertexBoolUniform: { + shaderUnit.vs.boolUniform = value & 0xffff; + break; + } case VertexIntUniform0: case VertexIntUniform1: case VertexIntUniform2: - case VertexIntUniform3: shaderUnit.vs.uploadIntUniform(index - VertexIntUniform0, value); break; + case VertexIntUniform3: { + shaderUnit.vs.uploadIntUniform(index - VertexIntUniform0, value); + break; + } case VertexShaderData0: case VertexShaderData1: @@ -258,9 +279,15 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { case VertexShaderData4: case VertexShaderData5: case VertexShaderData6: - case VertexShaderData7: shaderUnit.vs.uploadWord(value); break; + case VertexShaderData7: { + shaderUnit.vs.uploadWord(value); + break; + } - case VertexShaderEntrypoint: shaderUnit.vs.entrypoint = value & 0xffff; break; + case VertexShaderEntrypoint: { + shaderUnit.vs.entrypoint = value & 0xffff; + break; + } case VertexShaderTransferEnd: if (value != 0) shaderUnit.vs.finalize(); diff --git a/src/core/PICA/shader_interpreter.cpp b/src/core/PICA/shader_interpreter.cpp index 28eee3c7..9fed6bba 100644 --- a/src/core/PICA/shader_interpreter.cpp +++ b/src/core/PICA/shader_interpreter.cpp @@ -20,7 +20,11 @@ void PICAShader::run() { case ShaderOpcodes::CALLC: callc(instruction); break; case ShaderOpcodes::CALLU: callu(instruction); break; case ShaderOpcodes::CMP1: - case ShaderOpcodes::CMP2: cmp(instruction); break; + case ShaderOpcodes::CMP2: { + cmp(instruction); + break; + } + case ShaderOpcodes::DP3: dp3(instruction); break; case ShaderOpcodes::DP4: dp4(instruction); break; case ShaderOpcodes::DPHI: dphi(instruction); break; @@ -52,7 +56,10 @@ void PICAShader::run() { case 0x34: case 0x35: case 0x36: - case 0x37: madi(instruction); break; + case 0x37: { + madi(instruction); + break; + } case 0x38: case 0x39: @@ -61,7 +68,10 @@ void PICAShader::run() { case 0x3C: case 0x3D: case 0x3E: - case 0x3F: mad(instruction); break; + case 0x3F: { + mad(instruction); + break; + } default: Helpers::panic("Unimplemented PICA instruction %08X (Opcode = %02X)", instruction, opcode); } @@ -588,7 +598,10 @@ void PICAShader::cmp(u32 instruction) { cmpRegister[i] = srcVec1[i] >= srcVec2[i]; break; - default: cmpRegister[i] = true; break; + default: { + cmpRegister[i] = true; + break; + } } } } @@ -682,7 +695,9 @@ void PICAShader::loop(u32 instruction) { } void PICAShader::jmpc(u32 instruction) { - if (isCondTrue(instruction)) pc = getBits<10, 12>(instruction); + if (isCondTrue(instruction)) { + pc = getBits<10, 12>(instruction); + } } void PICAShader::jmpu(u32 instruction) { diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 0083364b..631313aa 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -1,5 +1,7 @@ #include "renderer_gl/renderer_gl.hpp" +#include + #include "PICA/float_types.hpp" #include "PICA/gpu.hpp" #include "PICA/regs.hpp" @@ -841,9 +843,14 @@ void RendererGL::updateLightingLUT() { void RendererGL::drawVertices(PICA::PrimType primType, std::span vertices) { // The fourth type is meant to be "Geometry primitive". TODO: Find out what that is - static constexpr std::array primTypes = {OpenGL::Triangle, OpenGL::TriangleStrip, OpenGL::TriangleFan, OpenGL::Triangle}; - const auto primitiveTopology = primTypes[static_cast(primType)]; + static constexpr std::array primTypes = { + OpenGL::Triangle, + OpenGL::TriangleStrip, + OpenGL::TriangleFan, + OpenGL::Triangle, + }; + const auto primitiveTopology = primTypes[static_cast(primType)]; gl.disableScissor(); gl.bindVBO(vbo); gl.bindVAO(vao); @@ -1048,7 +1055,6 @@ void RendererGL::screenshot(const std::string& name) { std::vector pixels, flippedPixels; pixels.resize(width * height * 4); flippedPixels.resize(pixels.size()); - ; OpenGL::bindScreenFramebuffer(); glReadPixels(0, 0, width, height, GL_BGRA, GL_UNSIGNED_BYTE, pixels.data()); diff --git a/src/emulator.cpp b/src/emulator.cpp index 0f9c8c54..d58635fb 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -1,8 +1,6 @@ #include "emulator.hpp" -#include - -#if PANDA3DS_ENABLE_OPENGL +#ifdef PANDA3DS_ENABLE_OPENGL #include #endif @@ -27,7 +25,7 @@ Emulator::Emulator() : kernel(cpu, memory, gpu), cpu(memory, kernel), gpu(memory Helpers::warn("Failed to initialize SDL2 GameController: %s", SDL_GetError()); } -#if PANDA3DS_ENABLE_OPENGL +#ifdef PANDA3DS_ENABLE_OPENGL // Request OpenGL 4.1 Core (Max available on MacOS) // MacOS gets mad if we don't explicitly demand a core profile SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE); From a601686cb1406f02c61cbfd92efc158bc986fa7e Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 16 Jul 2023 00:09:00 +0300 Subject: [PATCH 14/14] Pretend to know CMake --- CMakeLists.txt | 65 +++++++++++++++------------- src/core/renderer_gl/renderer_gl.cpp | 5 ++- 2 files changed, 39 insertions(+), 31 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 06079482..c023e008 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ endif() option(DISABLE_PANIC_DEV "Make a build with fewer and less intrusive asserts" OFF) option(GPU_DEBUG_INFO "Enable additional GPU debugging info" OFF) -option(PANDA3DS_ENABLE_OPENGL "Enable OpenGL rendering backend" ON) +option(ENABLE_OPENGL "Enable OpenGL rendering backend" ON) option(ENABLE_LTO "Enable link-time optimization" OFF) option(ENABLE_USER_BUILD "Make a user-facing build. These builds have various assertions disabled, LTO, and more" OFF) option(ENABLE_HTTP_SERVER "Enable HTTP server. Used for Discord bot support" OFF) @@ -93,7 +93,7 @@ endif() set(SOURCE_FILES src/main.cpp src/emulator.cpp src/io_file.cpp src/config.cpp src/core/CPU/cpu_dynarmic.cpp src/core/CPU/dynarmic_cycles.cpp - src/core/memory.cpp src/renderer.cpp src/httpserver.cpp src/stb_image_write.c src/httpserver.cpp + src/core/memory.cpp src/renderer.cpp src/httpserver.cpp src/stb_image_write.c ) set(CRYPTO_SOURCE_FILES src/core/crypto/aes_engine.cpp) set(KERNEL_SOURCE_FILES src/core/kernel/kernel.cpp src/core/kernel/resource_limits.cpp @@ -158,8 +158,6 @@ set(THIRD_PARTY_SOURCE_FILES third_party/imgui/imgui.cpp third_party/cityhash/cityhash.cpp third_party/xxhash/xxhash.c ) - -source_group("Header Files\\Core" FILES ${HEADER_FILES}) source_group("Source Files\\Core" FILES ${SOURCE_FILES}) source_group("Source Files\\Core\\Crypto" FILES ${CRYPTO_SOURCE_FILES}) source_group("Source Files\\Core\\Filesystem" FILES ${FS_SOURCE_FILES}) @@ -169,10 +167,36 @@ source_group("Source Files\\Core\\Services" FILES ${SERVICE_SOURCE_FILES}) source_group("Source Files\\Core\\PICA" FILES ${PICA_SOURCE_FILES}) source_group("Source Files\\Third Party" FILES ${THIRD_PARTY_SOURCE_FILES}) -add_executable(Alber - ${SOURCE_FILES} ${FS_SOURCE_FILES} ${CRYPTO_SOURCE_FILES} ${KERNEL_SOURCE_FILES} ${LOADER_SOURCE_FILES} ${SERVICE_SOURCE_FILES} - ${PICA_SOURCE_FILES} ${THIRD_PARTY_SOURCE_FILES} ${HEADER_FILES} -) +set(RENDERER_GL_SOURCE_FILES "") # Empty by default unless we are compiling with the GL renderer + +if(ENABLE_OPENGL) + add_subdirectory(third_party/glad) + + set(RENDERER_GL_INCLUDE_FILES include/renderer_gl/opengl.hpp + include/renderer_gl/renderer_gl.hpp include/renderer_gl/textures.hpp + include/renderer_gl/surfaces.hpp include/renderer_gl/surface_cache.hpp + include/renderer_gl/gl_state.hpp + ) + + set(RENDERER_GL_SOURCE_FILES src/core/renderer_gl/renderer_gl.cpp + src/core/renderer_gl/textures.cpp src/core/renderer_gl/etc1.cpp + src/core/renderer_gl/gl_state.cpp + ) + + set(HEADER_FILES ${HEADER_FILES} ${RENDERER_GL_INCLUDE_FILES}) + source_group("Source Files\\Core\\OpenGL Renderer" FILES ${RENDERER_GL_SOURCE_FILES}) +endif() + +source_group("Header Files\\Core" FILES ${HEADER_FILES}) +set(ALL_SOURCES ${SOURCE_FILES} ${FS_SOURCE_FILES} ${CRYPTO_SOURCE_FILES} ${KERNEL_SOURCE_FILES} ${LOADER_SOURCE_FILES} ${SERVICE_SOURCE_FILES} + ${PICA_SOURCE_FILES} ${THIRD_PARTY_SOURCE_FILES} ${HEADER_FILES}) + +if(ENABLE_OPENGL) + # Add the OpenGL source files to ALL_SOURCES + set(ALL_SOURCES ${ALL_SOURCES} ${RENDERER_GL_SOURCE_FILES}) +endif() + +add_executable(Alber ${ALL_SOURCES}) if(ENABLE_LTO OR ENABLE_USER_BUILD) set_target_properties(Alber PROPERTIES INTERPROCEDURAL_OPTIMIZATION TRUE) @@ -180,30 +204,13 @@ endif() target_link_libraries(Alber PRIVATE dynarmic SDL2-static cryptopp) -if(PANDA3DS_ENABLE_OPENGL) - add_subdirectory(third_party/glad) - - target_compile_definitions(Alber PUBLIC "PANDA3DS_ENABLE_OPENGL=1") - - set(RENDERER_GL_INCLUDE_FILES include/opengl.hpp - include/renderer_gl/renderer_gl.hpp include/renderer_gl/textures.hpp - include/renderer_gl/surfaces.hpp include/renderer_gl/surface_cache.hpp - include/renderer_gl/gl_state.hpp - ) - set(RENDERER_GL_SOURCE_FILES src/core/renderer_gl/renderer_gl.cpp - src/core/renderer_gl/textures.cpp src/core/renderer_gl/etc1.cpp - src/core/renderer_gl/gl_state.cpp - ) - source_group("Source Files\\Core\\OpenGL Renderer" FILES ${RENDERER_GL_SOURCE_FILES}) - - target_sources(Alber PRIVATE ${RENDERER_GL_SOURCE_FILES}) - - target_link_libraries(Alber PRIVATE glad) - +if(ENABLE_OPENGL) + target_compile_definitions(Alber PUBLIC "PANDA3DS_ENABLE_OPENGL=1") + target_link_libraries(Alber PRIVATE glad) endif() if(GPU_DEBUG_INFO) - target_compile_definitions(Alber PRIVATE GPU_DEBUG_INFO=1) + target_compile_definitions(Alber PRIVATE GPU_DEBUG_INFO=1) endif() if(ENABLE_USER_BUILD) diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 631313aa..2c41f83b 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -707,8 +707,9 @@ void RendererGL::setupBlending() { const bool blendingEnabled = (regs[PICA::InternalRegs::ColourOperation] & (1 << 8)) != 0; // Map of PICA blending equations to OpenGL blending equations. The unused blending equations are equivalent to equation 0 (add) - static constexpr std::array blendingEquations = {GL_FUNC_ADD, GL_FUNC_SUBTRACT, GL_FUNC_REVERSE_SUBTRACT, GL_MIN, GL_MAX, GL_FUNC_ADD, - GL_FUNC_ADD, GL_FUNC_ADD}; + static constexpr std::array blendingEquations = { + GL_FUNC_ADD, GL_FUNC_SUBTRACT, GL_FUNC_REVERSE_SUBTRACT, GL_MIN, GL_MAX, GL_FUNC_ADD, GL_FUNC_ADD, GL_FUNC_ADD, + }; // Map of PICA blending funcs to OpenGL blending funcs. Func = 15 is undocumented and stubbed to GL_ONE for now static constexpr std::array blendingFuncs = {