From e13ef42b654a8dd0e8122e6f78fb7713ca84e8c2 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 21 Aug 2024 00:47:57 +0300 Subject: [PATCH] PICA: Start implementing GPU vertex fetch --- CMakeLists.txt | 4 +- include/PICA/draw_acceleration.hpp | 19 ++++++++ include/PICA/gpu.hpp | 2 + include/renderer.hpp | 5 +- include/renderer_gl/renderer_gl.hpp | 4 +- src/core/PICA/draw_acceleration.cpp | 71 ++++++++++++++++++++++++++++ src/core/PICA/gpu.cpp | 10 +++- src/core/PICA/regs.cpp | 2 +- src/core/renderer_gl/renderer_gl.cpp | 8 +++- 9 files changed, 117 insertions(+), 8 deletions(-) create mode 100644 include/PICA/draw_acceleration.hpp create mode 100644 src/core/PICA/draw_acceleration.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 043bb084..643e48e3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -241,7 +241,7 @@ set(PICA_SOURCE_FILES src/core/PICA/gpu.cpp src/core/PICA/regs.cpp src/core/PICA src/core/PICA/shader_interpreter.cpp src/core/PICA/dynapica/shader_rec.cpp src/core/PICA/dynapica/shader_rec_emitter_x64.cpp src/core/PICA/pica_hash.cpp src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp src/core/PICA/shader_gen_glsl.cpp - src/core/PICA/shader_decompiler.cpp + src/core/PICA/shader_decompiler.cpp src/core/PICA/draw_acceleration.cpp ) set(LOADER_SOURCE_FILES src/core/loader/elf.cpp src/core/loader/ncsd.cpp src/core/loader/ncch.cpp src/core/loader/3dsx.cpp src/core/loader/lz77.cpp) @@ -293,7 +293,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/audio/miniaudio_device.hpp include/ring_buffer.hpp include/bitfield.hpp include/audio/dsp_shared_mem.hpp include/audio/hle_core.hpp include/capstone.hpp include/audio/aac.hpp include/PICA/pica_frag_config.hpp include/PICA/pica_frag_uniforms.hpp include/PICA/shader_gen_types.hpp include/PICA/shader_decompiler.hpp - include/PICA/pica_vert_config.hpp include/sdl_sensors.hpp + include/PICA/pica_vert_config.hpp include/sdl_sensors.hpp include/PICA/draw_acceleration.hpp ) cmrc_add_resource_library( diff --git a/include/PICA/draw_acceleration.hpp b/include/PICA/draw_acceleration.hpp new file mode 100644 index 00000000..eec76b87 --- /dev/null +++ b/include/PICA/draw_acceleration.hpp @@ -0,0 +1,19 @@ +#pragma once + +#include + +#include "helpers.hpp" + +namespace PICA { + struct DrawAcceleration { + u8* vertexBuffer; + u8* indexBuffer; + + // Minimum and maximum index in the index buffer for a draw call + u16 minimumIndex, maximumIndex; + u32 vertexDataSize; + + bool canBeAccelerated; + bool indexed; + }; +} // namespace PICA \ No newline at end of file diff --git a/include/PICA/gpu.hpp b/include/PICA/gpu.hpp index 1e1d3c4b..c168a9bf 100644 --- a/include/PICA/gpu.hpp +++ b/include/PICA/gpu.hpp @@ -1,6 +1,7 @@ #pragma once #include +#include "PICA/draw_acceleration.hpp" #include "PICA/dynapica/shader_rec.hpp" #include "PICA/float_types.hpp" #include "PICA/pica_vertex.hpp" @@ -87,6 +88,7 @@ class GPU { std::unique_ptr renderer; PICA::Vertex getImmediateModeVertex(); + void getAcceleratedDrawInfo(PICA::DrawAcceleration& accel, bool indexed); public: // 256 entries per LUT with each LUT as its own row forming a 2D image 256 * LUT_COUNT // Encoded in PICA native format diff --git a/include/renderer.hpp b/include/renderer.hpp index 721364c1..94a0b0f3 100644 --- a/include/renderer.hpp +++ b/include/renderer.hpp @@ -1,9 +1,10 @@ #pragma once #include +#include #include #include -#include +#include "PICA/draw_acceleration.hpp" #include "PICA/pica_vertex.hpp" #include "PICA/regs.hpp" #include "helpers.hpp" @@ -83,7 +84,7 @@ class Renderer { // It is responsible for things like looking up which vertex/fragment shaders to use, recompiling them if they don't exist, choosing between // ubershaders and shadergen, and so on. // Returns whether this draw is eligible for using hardware-accelerated shaders or if shaders should run on the CPU - virtual bool prepareForDraw(ShaderUnit& shaderUnit, bool isImmediateMode) { return false; } + virtual bool prepareForDraw(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel, bool isImmediateMode) { return false; } // Functions for initializing the graphics context for the Qt frontend, where we don't have the convenience of SDL_Window #ifdef PANDA3DS_FRONTEND_QT diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 73b52cc5..397aaf53 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -135,6 +135,8 @@ class RendererGL final : public Renderer { void updateFogLUT(); void initGraphicsContextInternal(); + void accelerateVertexUpload(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel); + public: RendererGL(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs) : Renderer(gpu, internalRegs, externalRegs), fragShaderGen(PICA::ShaderGen::API::GL, PICA::ShaderGen::Language::GLSL) {} @@ -152,7 +154,7 @@ class RendererGL final : public Renderer { virtual bool supportsShaderReload() override { return true; } virtual std::string getUbershader() override; virtual void setUbershader(const std::string& shader) override; - virtual bool prepareForDraw(ShaderUnit& shaderUnit, bool isImmediateMode) override; + virtual bool prepareForDraw(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel, bool isImmediateMode) override; std::optional getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true); diff --git a/src/core/PICA/draw_acceleration.cpp b/src/core/PICA/draw_acceleration.cpp new file mode 100644 index 00000000..4f3e5bdd --- /dev/null +++ b/src/core/PICA/draw_acceleration.cpp @@ -0,0 +1,71 @@ +#include "PICA/draw_acceleration.hpp" + +#include + +#include "PICA/gpu.hpp" +#include "PICA/regs.hpp" + +void GPU::getAcceleratedDrawInfo(PICA::DrawAcceleration& accel, bool indexed) { + accel.indexed = indexed; + const u32 vertexBase = ((regs[PICA::InternalRegs::VertexAttribLoc] >> 1) & 0xfffffff) * 16; + const u32 vertexCount = regs[PICA::InternalRegs::VertexCountReg]; // Total # of vertices to transfer + + accel.vertexBuffer = getPointerPhys(vertexBase); + if (indexed) { + u32 indexBufferConfig = regs[PICA::InternalRegs::IndexBufferConfig]; + u32 indexBufferPointer = vertexBase + (indexBufferConfig & 0xfffffff); + + u8* indexBuffer = getPointerPhys(indexBufferPointer); + u16 minimumIndex = std::numeric_limits::max(); + u16 maximumIndex = 0; + + // Check whether the index buffer uses u16 indices or u8 + bool shortIndex = Helpers::getBit<31>(indexBufferConfig); // Indicates whether vert indices are 16-bit or 8-bit + + // Calculate the minimum and maximum indices used in the index buffer, so we'll only upload them + if (shortIndex) { + u16* indexBuffer16 = reinterpret_cast(indexBuffer); + for (int i = 0; i < vertexCount; i++) { + u16 index = indexBuffer16[i]; + minimumIndex = std::min(minimumIndex, index); + maximumIndex = std::max(maximumIndex, index); + } + } else { + for (int i = 0; i < vertexCount; i++) { + u16 index = u16(indexBuffer[i]); + minimumIndex = std::min(minimumIndex, index); + maximumIndex = std::max(maximumIndex, index); + } + } + + accel.indexBuffer = indexBuffer; + accel.minimumIndex = minimumIndex; + accel.maximumIndex = maximumIndex; + } else { + accel.indexBuffer = nullptr; + accel.minimumIndex = regs[PICA::InternalRegs::VertexOffsetReg]; + accel.maximumIndex = accel.minimumIndex + vertexCount - 1; + } + + int buffer = 0; + accel.vertexDataSize = 0; + + for (int attrCount = 0; attrCount < totalAttribCount; attrCount++) { + bool fixedAttribute = (fixedAttribMask & (1 << attrCount)) != 0; + + if (!fixedAttribute) { + auto& attr = attributeInfo[buffer]; // Get information for this attribute + + if (attr.componentCount != 0) { + // Size of the attribute in bytes multiplied by the total number of vertices + const u32 bytes = attr.size * vertexCount; + // Add it to the total vertex data size, aligned to 4 bytes. + accel.vertexDataSize += (bytes + 3) & ~3; + } + + buffer++; + } + } + + accel.canBeAccelerated = true; +} \ No newline at end of file diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index 6cbdb100..7e9be005 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -123,7 +123,15 @@ void GPU::reset() { // Call the correct version of drawArrays based on whether this is an indexed draw (first template parameter) // And whether we are going to use the shader JIT (second template parameter) void GPU::drawArrays(bool indexed) { - const bool hwShaders = renderer->prepareForDraw(shaderUnit, false); + PICA::DrawAcceleration accel; + + if (config.accelerateShaders) { + // If we are potentially going to use hw shaders, gather necessary to do vertex fetch, index buffering, etc on the GPU + // This includes parsing which vertices to upload, getting pointers to the index buffer data & vertex data, and so on + getAcceleratedDrawInfo(accel, indexed); + } + + const bool hwShaders = renderer->prepareForDraw(shaderUnit, &accel, false); if (hwShaders) { if (indexed) { diff --git a/src/core/PICA/regs.cpp b/src/core/PICA/regs.cpp index 0c5f4adb..091bd377 100644 --- a/src/core/PICA/regs.cpp +++ b/src/core/PICA/regs.cpp @@ -249,7 +249,7 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { // If we've reached 3 verts, issue a draw call // Handle rendering depending on the primitive type if (immediateModeVertIndex == 3) { - renderer->prepareForDraw(shaderUnit, true); + renderer->prepareForDraw(shaderUnit, nullptr, true); renderer->drawVertices(PICA::PrimType::TriangleList, immediateModeVertices); switch (primType) { diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 6e50f77b..d0ecf443 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -942,7 +942,7 @@ OpenGL::Program& RendererGL::getSpecializedShader() { return program; } -bool RendererGL::prepareForDraw(ShaderUnit& shaderUnit, bool isImmediateMode) { +bool RendererGL::prepareForDraw(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel, bool isImmediateMode) { // First we figure out if we will be using an ubershader bool usingUbershader = emulatorConfig->useUbershaders; if (usingUbershader) { @@ -993,6 +993,8 @@ bool RendererGL::prepareForDraw(ShaderUnit& shaderUnit, bool isImmediateMode) { glBufferSubData(GL_UNIFORM_BUFFER, 0, PICAShader::totalUniformSize(), shaderUnit.vs.getUniformPointer()); } } + + accelerateVertexUpload(shaderUnit, accel); } if (usingUbershader) { @@ -1110,4 +1112,8 @@ void RendererGL::initUbershader(OpenGL::Program& program) { glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1); glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2); glUniform1i(OpenGL::uniformLocation(program, "u_tex_luts"), 3); +} + +void RendererGL::accelerateVertexUpload(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel) { + } \ No newline at end of file