From fde93381a5fe86a8807762c7d1578b833014763e Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Fri, 16 Jun 2023 07:24:34 -0700 Subject: [PATCH 1/4] Use `std::span` for CPU register-state Following the trend of #33: `std::span` provides some useful utility functions like `size_bytes()` and `as_bytes()` and serves as a better non-owning "chunk of data"-type over just passing around an `std::array&`. --- include/cpu_dynarmic.hpp | 18 ++++++++---------- include/kernel/kernel.hpp | 6 ++++-- include/services/service_manager.hpp | 12 +++++++----- src/core/kernel/threads.cpp | 24 +++++++++++++----------- src/core/services/service_manager.cpp | 10 ++++++---- 5 files changed, 38 insertions(+), 32 deletions(-) diff --git a/include/cpu_dynarmic.hpp b/include/cpu_dynarmic.hpp index d51a1f88..2682d0ee 100644 --- a/include/cpu_dynarmic.hpp +++ b/include/cpu_dynarmic.hpp @@ -1,5 +1,7 @@ #pragma once +#include + #include "dynarmic/interface/A32/a32.h" #include "dynarmic/interface/A32/config.h" #include "dynarmic/interface/exclusive_monitor.h" @@ -132,17 +134,13 @@ public: return jit->Regs()[index]; } - std::array& regs() { - return jit->Regs(); - } + std::span regs() { return jit->Regs(); } - // Get reference to array of FPRs. This array consists of the FPRs as single precision values - // Hence why its base type is u32 - // Note: Dynarmic keeps 64 VFP registers as VFPv3 extends the VFP register set to 64 registers. - // However the 3DS ARM11 is an ARMv6k processor with VFPv2, so only the first 32 registers are actually used - std::array& fprs() { - return jit->ExtRegs(); - } + // Get reference to array of FPRs. This array consists of the FPRs as single precision values + // Hence why its base type is u32 + // Note: Dynarmic keeps 64 VFP registers as VFPv3 extends the VFP register set to 64 registers. + // However the 3DS ARM11 is an ARMv6k processor with VFPv2, so only the first 32 registers are actually used + std::span fprs() { return jit->ExtRegs(); } void setCPSR(u32 value) { jit->SetCpsr(value); diff --git a/include/kernel/kernel.hpp b/include/kernel/kernel.hpp index b4790056..cc2f4a4c 100644 --- a/include/kernel/kernel.hpp +++ b/include/kernel/kernel.hpp @@ -2,10 +2,12 @@ #include #include #include +#include #include #include -#include "kernel_types.hpp" + #include "helpers.hpp" +#include "kernel_types.hpp" #include "logger.hpp" #include "memory.hpp" #include "resource_limits.hpp" @@ -14,7 +16,7 @@ class CPU; class Kernel { - std::array& regs; + std::span regs; CPU& cpu; Memory& mem; diff --git a/include/services/service_manager.hpp b/include/services/service_manager.hpp index 300e8f2e..e09fd455 100644 --- a/include/services/service_manager.hpp +++ b/include/services/service_manager.hpp @@ -1,6 +1,8 @@ #pragma once #include #include +#include + #include "kernel_types.hpp" #include "logger.hpp" #include "memory.hpp" @@ -14,16 +16,16 @@ #include "services/cfg.hpp" #include "services/dlp_srvr.hpp" #include "services/dsp.hpp" -#include "services/hid.hpp" #include "services/frd.hpp" #include "services/fs.hpp" #include "services/gsp_gpu.hpp" #include "services/gsp_lcd.hpp" +#include "services/hid.hpp" #include "services/ldr_ro.hpp" #include "services/mic.hpp" +#include "services/ndm.hpp" #include "services/nfc.hpp" #include "services/nim.hpp" -#include "services/ndm.hpp" #include "services/ptm.hpp" #include "services/y2r.hpp" @@ -31,7 +33,7 @@ class Kernel; class ServiceManager { - std::array& regs; + std::span regs; Memory& mem; Kernel& kernel; @@ -69,8 +71,8 @@ class ServiceManager { void registerClient(u32 messagePointer); void subscribe(u32 messagePointer); -public: - ServiceManager(std::array& regs, Memory& mem, GPU& gpu, u32& currentPID, Kernel& kernel); + public: + ServiceManager(std::span regs, Memory& mem, GPU& gpu, u32& currentPID, Kernel& kernel); void reset(); void initializeFS() { fs.initializeFilesystem(); } void handleSyncRequest(u32 messagePointer); diff --git a/src/core/kernel/threads.cpp b/src/core/kernel/threads.cpp index 8a87edb7..f41de34b 100644 --- a/src/core/kernel/threads.cpp +++ b/src/core/kernel/threads.cpp @@ -1,8 +1,10 @@ #include #include -#include "kernel.hpp" + #include "arm_defs.hpp" -// This header needs to be included because I did stupid forward decl hack so the kernel and CPU can both access each other +#include "kernel.hpp" +// This header needs to be included because I did stupid forward decl hack so the kernel and CPU can both access each +// other #include "cpu.hpp" #include "resource_limits.hpp" @@ -20,17 +22,17 @@ void Kernel::switchThread(int newThreadIndex) { } // Backup context - std::memcpy(&oldThread.gprs[0], &cpu.regs()[0], 16 * sizeof(u32)); // Backup the 16 GPRs - std::memcpy(&oldThread.fprs[0], &cpu.fprs()[0], 32 * sizeof(u32)); // Backup the 32 FPRs - oldThread.cpsr = cpu.getCPSR(); // Backup CPSR - oldThread.fpscr = cpu.getFPSCR(); // Backup FPSCR + std::memcpy(oldThread.gprs.data(), cpu.regs().data(), cpu.regs().size_bytes()); // Backup the 16 GPRs + std::memcpy(oldThread.fprs.data(), cpu.fprs().data(), cpu.fprs().size_bytes()); // Backup the 32 FPRs + oldThread.cpsr = cpu.getCPSR(); // Backup CPSR + oldThread.fpscr = cpu.getFPSCR(); // Backup FPSCR // Load new context - std::memcpy(&cpu.regs()[0], &newThread.gprs[0], 16 * sizeof(u32)); // Load 16 GPRs - std::memcpy(&cpu.fprs()[0], &newThread.fprs[0], 32 * sizeof(u32)); // Load 32 FPRs - cpu.setCPSR(newThread.cpsr); // Load CPSR - cpu.setFPSCR(newThread.fpscr); // Load FPSCR - cpu.setTLSBase(newThread.tlsBase); // Load CP15 thread-local-storage pointer register + std::memcpy(cpu.regs().data(), newThread.gprs.data(), cpu.regs().size_bytes()); // Load 16 GPRs + std::memcpy(cpu.fprs().data(), newThread.fprs.data(), cpu.fprs().size_bytes()); // Load 32 FPRs + cpu.setCPSR(newThread.cpsr); // Load CPSR + cpu.setFPSCR(newThread.fpscr); // Load FPSCR + cpu.setTLSBase(newThread.tlsBase); // Load CP15 thread-local-storage pointer register currentThreadIndex = newThreadIndex; } diff --git a/src/core/services/service_manager.cpp b/src/core/services/service_manager.cpp index 27b39af9..2b9c5167 100644 --- a/src/core/services/service_manager.cpp +++ b/src/core/services/service_manager.cpp @@ -1,13 +1,15 @@ #include "services/service_manager.hpp" + #include + #include "ipc.hpp" #include "kernel.hpp" -ServiceManager::ServiceManager(std::array& regs, Memory& mem, GPU& gpu, u32& currentPID, Kernel& kernel) +ServiceManager::ServiceManager(std::span regs, Memory& mem, GPU& gpu, u32& currentPID, Kernel& kernel) : regs(regs), mem(mem), kernel(kernel), ac(mem), am(mem), boss(mem), act(mem), apt(mem, kernel), cam(mem), - cecd(mem, kernel), cfg(mem), dlp_srvr(mem), dsp(mem, kernel), hid(mem, kernel), frd(mem), fs(mem, kernel), - gsp_gpu(mem, gpu, kernel, currentPID), gsp_lcd(mem), ldr(mem), mic(mem), nfc(mem, kernel), nim(mem), ndm(mem), - ptm(mem), y2r(mem, kernel) {} + cecd(mem, kernel), cfg(mem), dlp_srvr(mem), dsp(mem, kernel), hid(mem, kernel), frd(mem), fs(mem, kernel), + gsp_gpu(mem, gpu, kernel, currentPID), gsp_lcd(mem), ldr(mem), mic(mem), nfc(mem, kernel), nim(mem), ndm(mem), + ptm(mem), y2r(mem, kernel) {} static constexpr int MAX_NOTIFICATION_COUNT = 16; From 553d23974a32700deb5f7b33bb850d04a33a55c8 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Fri, 16 Jun 2023 07:28:35 -0700 Subject: [PATCH 2/4] Use `std::span` to pass vertex data Starts utilizing [std::span](https://en.cppreference.com/w/cpp/container/span) to indicate a non-owning view of a contiguous array of elements rather than `T* data, usize count`. --- include/opengl.hpp | 23 ++++++++++++----------- include/renderer_gl/renderer_gl.hpp | 16 ++++++++++------ src/core/PICA/gpu.cpp | 9 ++++++--- src/core/PICA/regs.cpp | 2 +- src/core/renderer_gl/renderer_gl.cpp | 6 +++--- 5 files changed, 32 insertions(+), 24 deletions(-) diff --git a/include/opengl.hpp b/include/opengl.hpp index 8cb1613e..37e9302c 100644 --- a/include/opengl.hpp +++ b/include/opengl.hpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -388,18 +389,18 @@ namespace OpenGL { void bind() { glBindBuffer(GL_ARRAY_BUFFER, m_handle); } void free() { glDeleteBuffers(1, &m_handle); } - // Reallocates the buffer on every call. Prefer the sub version if possible. - template - void bufferVerts(VertType* vertices, int vertCount, GLenum usage = GL_DYNAMIC_DRAW) { - glBufferData(GL_ARRAY_BUFFER, sizeof(VertType) * vertCount, vertices, usage); - } + // Reallocates the buffer on every call. Prefer the sub version if possible. + template + void bufferVerts(std::span vertices, GLenum usage = GL_DYNAMIC_DRAW) { + glBufferData(GL_ARRAY_BUFFER, sizeof(VertType) * vertices.size(), vertices.data(), usage); + } - // Only use if you used createFixedSize - template - void bufferVertsSub(VertType* vertices, int vertCount, GLintptr offset = 0) { - glBufferSubData(GL_ARRAY_BUFFER, offset, sizeof(VertType) * vertCount, vertices); - } - }; + // Only use if you used createFixedSize + template + void bufferVertsSub(std::span vertices, GLintptr offset = 0) { + glBufferSubData(GL_ARRAY_BUFFER, offset, sizeof(VertType) * vertices.size(), vertices.data()); + } + }; enum DepthFunc { Never = GL_NEVER, // Depth test never passes diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index ac11a813..8182e426 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -1,5 +1,7 @@ #pragma once #include +#include + #include "helpers.hpp" #include "logger.hpp" #include "opengl.hpp" @@ -68,12 +70,14 @@ public: Renderer(GPU& gpu, const std::array& internalRegs) : gpu(gpu), regs(internalRegs) {} void reset(); - void display(); // Display the 3DS screen contents to the window - void initGraphicsContext(); // Initialize graphics context - void getGraphicsContext(); // Set up graphics context for rendering - void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control); // Clear a GPU buffer in VRAM - void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags); // Perform display transfer - void drawVertices(OpenGL::Primitives primType, Vertex* vertices, u32 count); // Draw the given vertices + void display(); // Display the 3DS screen contents to the window + void initGraphicsContext(); // Initialize graphics context + void getGraphicsContext(); // Set up graphics context for rendering + void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control); // Clear a GPU buffer in VRAM + void displayTransfer( + u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags + ); // Perform display transfer + void drawVertices(OpenGL::Primitives primType, std::span vertices); // Draw the given vertices void setFBSize(u32 width, u32 height) { fbSize.x() = width; diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index 9cbd17a8..f0e832a2 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -1,7 +1,10 @@ #include "PICA/gpu.hpp" + +#include +#include + #include "PICA/float_types.hpp" #include "PICA/regs.hpp" -#include using namespace Floats; @@ -41,7 +44,7 @@ void GPU::drawArrays(bool indexed) { drawArrays(); } -Vertex* vertices = new Vertex[Renderer::vertexBufferSize]; +static std::array vertices; template void GPU::drawArrays() { @@ -205,7 +208,7 @@ void GPU::drawArrays() { OpenGL::Triangle, OpenGL::TriangleStrip, OpenGL::TriangleFan, OpenGL::Triangle }; const auto shape = primTypes[primType]; - renderer.drawVertices(shape, vertices, vertexCount); + renderer.drawVertices(shape, std::span(vertices).first(vertexCount)); } Vertex GPU::getImmediateModeVertex() { diff --git a/src/core/PICA/regs.cpp b/src/core/PICA/regs.cpp index a5e2c6b9..26feaf9d 100644 --- a/src/core/PICA/regs.cpp +++ b/src/core/PICA/regs.cpp @@ -157,7 +157,7 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { // If we've reached 3 verts, issue a draw call // Handle rendering depending on the primitive type if (immediateModeVertIndex == 3) { - renderer.drawVertices(OpenGL::Triangle, &immediateModeVertices[0], 3); + renderer.drawVertices(OpenGL::Triangle, immediateModeVertices); switch (primType) { // Triangle or geometry primitive. Draw a triangle and discard all vertices diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 6da0dd70..7fbb9e94 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -265,7 +265,7 @@ void Renderer::setupBlending() { } } -void Renderer::drawVertices(OpenGL::Primitives primType, Vertex* vertices, u32 count) { +void Renderer::drawVertices(OpenGL::Primitives primType, std::span vertices) { // Adjust alpha test if necessary const u32 alphaControl = regs[PICAInternalRegs::AlphaTestConfig]; if (alphaControl != oldAlphaControl) { @@ -352,8 +352,8 @@ void Renderer::drawVertices(OpenGL::Primitives primType, Vertex* vertices, u32 c } } - vbo.bufferVertsSub(vertices, count); - OpenGL::draw(primType, count); + vbo.bufferVertsSub(vertices); + OpenGL::draw(primType, vertices.size()); } constexpr u32 topScreenBuffer = 0x1f000000; From 936302da2a029f8ce25046aa5cc36a0cb95825dd Mon Sep 17 00:00:00 2001 From: wheremyfoodat Date: Fri, 16 Jun 2023 22:06:59 +0300 Subject: [PATCH 3/4] Raise clang-format column size, make std::span support in opengl.hpp version-dependent --- .clang-format | 2 +- include/opengl.hpp | 31 +++++++++++++++++++++++++--- include/renderer_gl/renderer_gl.hpp | 28 ++++++++++++------------- src/core/renderer_gl/renderer_gl.cpp | 1 - 4 files changed, 42 insertions(+), 20 deletions(-) diff --git a/.clang-format b/.clang-format index 8251730f..dc371319 100644 --- a/.clang-format +++ b/.clang-format @@ -1,6 +1,6 @@ BasedOnStyle: Google IndentWidth: 4 -ColumnLimit: 120 +ColumnLimit: 150 AccessModifierOffset: -2 TabWidth: 4 NamespaceIndentation: All diff --git a/include/opengl.hpp b/include/opengl.hpp index 37e9302c..f23be4d6 100644 --- a/include/opengl.hpp +++ b/include/opengl.hpp @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -31,6 +30,19 @@ #include "gl3w.h" +// Check if we have C++20. If yes, we can add C++20 std::span support +#ifdef _MSVC_LANG // MSVC does not properly define __cplusplus without a compiler flag... +#if _MSVC_LANG >= 202002L +#define OPENGL_HAVE_CPP20 +#endif +#elif __cplusplus >= 202002L +#define OPENGL_HAVE_CPP20 +#endif // MSVC_LANG + +#ifdef OPENGL_HAVE_CPP20 +#include +#endif + // Uncomment the following define if you want GL objects to automatically free themselves when their lifetime ends // #define OPENGL_DESTRUCTORS @@ -389,17 +401,30 @@ namespace OpenGL { void bind() { glBindBuffer(GL_ARRAY_BUFFER, m_handle); } void free() { glDeleteBuffers(1, &m_handle); } - // Reallocates the buffer on every call. Prefer the sub version if possible. + // Reallocates the buffer on every call. Prefer the sub version if possible. + template + void bufferVerts(VertType* vertices, int vertCount, GLenum usage = GL_DYNAMIC_DRAW) { + glBufferData(GL_ARRAY_BUFFER, sizeof(VertType) * vertCount, vertices, usage); + } + + // Only use if you used createFixedSize + template + void bufferVertsSub(VertType* vertices, int vertCount, GLintptr offset = 0) { + glBufferSubData(GL_ARRAY_BUFFER, offset, sizeof(VertType) * vertCount, vertices); + } + + // If C++20 is available, add overloads that take std::span instead of raw pointers +#ifdef OPENGL_HAVE_CPP20 template void bufferVerts(std::span vertices, GLenum usage = GL_DYNAMIC_DRAW) { glBufferData(GL_ARRAY_BUFFER, sizeof(VertType) * vertices.size(), vertices.data(), usage); } - // Only use if you used createFixedSize template void bufferVertsSub(std::span vertices, GLintptr offset = 0) { glBufferSubData(GL_ARRAY_BUFFER, offset, sizeof(VertType) * vertices.size(), vertices.data()); } +#endif }; enum DepthFunc { diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 8182e426..f387cb69 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -26,7 +26,7 @@ class Renderer { OpenGL::VertexBuffer vbo; GLint alphaControlLoc = -1; GLint texUnitConfigLoc = -1; - + // Depth configuration uniform locations GLint depthOffsetLoc = -1; GLint depthScaleLoc = -1; @@ -43,11 +43,11 @@ class Renderer { SurfaceCache colourBufferCache; SurfaceCache textureCache; - OpenGL::uvec2 fbSize; // The size of the framebuffer (ie both the colour and depth buffer)' + OpenGL::uvec2 fbSize; // The size of the framebuffer (ie both the colour and depth buffer)' + + u32 colourBufferLoc; // Location in 3DS VRAM for the colour buffer + ColourBuffer::Formats colourBufferFormat; // Format of the colours stored in the colour buffer - u32 colourBufferLoc; // Location in 3DS VRAM for the colour buffer - ColourBuffer::Formats colourBufferFormat; // Format of the colours stored in the colour buffer - // Same for the depth/stencil buffer u32 depthBufferLoc; DepthBuffer::Formats depthBufferFormat; @@ -56,7 +56,7 @@ class Renderer { OpenGL::VertexArray dummyVAO; OpenGL::VertexBuffer dummyVBO; - static constexpr u32 regNum = 0x300; // Number of internal PICA registers + static constexpr u32 regNum = 0x300; // Number of internal PICA registers const std::array& regs; OpenGL::Framebuffer getColourFBO(); @@ -66,18 +66,16 @@ class Renderer { void setupBlending(); void bindDepthBuffer(); -public: + public: Renderer(GPU& gpu, const std::array& internalRegs) : gpu(gpu), regs(internalRegs) {} void reset(); - void display(); // Display the 3DS screen contents to the window - void initGraphicsContext(); // Initialize graphics context - void getGraphicsContext(); // Set up graphics context for rendering - void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control); // Clear a GPU buffer in VRAM - void displayTransfer( - u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags - ); // Perform display transfer - void drawVertices(OpenGL::Primitives primType, std::span vertices); // Draw the given vertices + void display(); // Display the 3DS screen contents to the window + void initGraphicsContext(); // Initialize graphics context + void getGraphicsContext(); // Set up graphics context for rendering + void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control); // Clear a GPU buffer in VRAM + void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags); // Perform display transfer + void drawVertices(OpenGL::Primitives primType, std::span vertices); // Draw the given vertices void setFBSize(u32 width, u32 height) { fbSize.x() = width; diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 7fbb9e94..9265e2f4 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -213,7 +213,6 @@ void Renderer::initGraphicsContext() { void Renderer::getGraphicsContext() { OpenGL::disableScissor(); - OpenGL::setViewport(400, 240); vbo.bind(); vao.bind(); From 2f3bc5d38e490d1d2fe25b515bd5a006ba788eb1 Mon Sep 17 00:00:00 2001 From: wheremyfoodat Date: Fri, 16 Jun 2023 22:20:59 +0300 Subject: [PATCH 4/4] [CPU] fprs() should return std::span (Fixes memory corruption bug in threads.cpp) --- include/cpu_dynarmic.hpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/include/cpu_dynarmic.hpp b/include/cpu_dynarmic.hpp index 2682d0ee..daa25d03 100644 --- a/include/cpu_dynarmic.hpp +++ b/include/cpu_dynarmic.hpp @@ -138,9 +138,7 @@ public: // Get reference to array of FPRs. This array consists of the FPRs as single precision values // Hence why its base type is u32 - // Note: Dynarmic keeps 64 VFP registers as VFPv3 extends the VFP register set to 64 registers. - // However the 3DS ARM11 is an ARMv6k processor with VFPv2, so only the first 32 registers are actually used - std::span fprs() { return jit->ExtRegs(); } + std::span fprs() { return std::span(jit->ExtRegs()).first<32>(); } void setCPSR(u32 value) { jit->SetCpsr(value);