From e80679fe77e89d23d85a0e9e7b680dc3a1717f9d Mon Sep 17 00:00:00 2001 From: wheremyfoodat Date: Sun, 26 Feb 2023 20:41:33 +0200 Subject: [PATCH] Geometry pipeline v2 Co-Authored-By: Sky --- include/PICA/gpu.hpp | 7 ++ include/PICA/regs.hpp | 5 ++ include/PICA/shader.hpp | 2 +- include/helpers.hpp | 4 - include/renderer_gl/renderer_gl.hpp | 1 + src/core/PICA/gpu.cpp | 122 ++++++++++++++++++--------- src/core/PICA/shader_interpreter.cpp | 3 +- src/core/PICA/shader_unit.cpp | 2 +- src/core/renderer_gl/renderer_gl.cpp | 16 +++- 9 files changed, 114 insertions(+), 48 deletions(-) diff --git a/include/PICA/gpu.hpp b/include/PICA/gpu.hpp index 040292a9..9605a177 100644 --- a/include/PICA/gpu.hpp +++ b/include/PICA/gpu.hpp @@ -4,6 +4,7 @@ #include "logger.hpp" #include "memory.hpp" #include "PICA/float_types.hpp" +#include "PICA/regs.hpp" #include "PICA/shader_unit.hpp" #include "renderer_gl/renderer_gl.hpp" @@ -19,6 +20,7 @@ class GPU { static constexpr u32 regNum = 0x300; static constexpr u32 vramSize = 6_MB; std::array regs; // GPU internal registers + std::array currentAttributes; // Vertex attributes before being passed to the shader template void drawArrays(); @@ -31,12 +33,17 @@ class GPU { int size = 0; // Bytes per vertex u32 config1 = 0; u32 config2 = 0; + u32 componentCount = 0; // Number of components for the attribute u64 getConfigFull() { return u64(config1) | (u64(config2) << 32); } }; + u64 getVertexShaderInputConfig() { + return u64(regs[PICAInternalRegs::VertexShaderInputCfgLow]) | (u64(regs[PICAInternalRegs::VertexShaderInputCfgHigh]) << 32); + } + std::array attributeInfo; // Info for each of the 12 attributes u32 totalAttribCount = 0; // Number of vertex attributes to send to VS u32 fixedAttribMask = 0; // Which attributes are fixed? diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index dff9c819..9ece8c3f 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -10,6 +10,7 @@ namespace PICAInternalRegs { DepthScale = 0x4D, DepthOffset = 0x4E, + ShaderOutputCount = 0x4F, // Framebuffer registers AlphaTestConfig = 0x104, @@ -85,6 +86,10 @@ namespace PICAInternalRegs { VertexFloatUniformData6 = 0x2C7, VertexFloatUniformData7 = 0x2C8, + VertexShaderInputBufferCfg = 0x2B9, + VertexShaderInputCfgLow = 0x2BB, + VertexShaderInputCfgHigh = 0x2BC, + VertexShaderTransferIndex = 0x2CB, VertexShaderData0 = 0x2CC, VertexShaderData1 = 0x2CD, diff --git a/include/PICA/shader.hpp b/include/PICA/shader.hpp index 4f556fe1..03b2970e 100644 --- a/include/PICA/shader.hpp +++ b/include/PICA/shader.hpp @@ -163,7 +163,7 @@ public: std::array floatUniforms; std::array fixedAttributes; // Fixed vertex attributes - std::array attributes; // Attributes passed to the shader + std::array inputs; // Attributes passed to the shader std::array outputs; PICAShader(ShaderType type) : type(type) {} diff --git a/include/helpers.hpp b/include/helpers.hpp index 45824966..8164dae4 100644 --- a/include/helpers.hpp +++ b/include/helpers.hpp @@ -120,10 +120,6 @@ namespace Helpers { static_for_impl( std::forward(f), std::make_integer_sequence{ } ); } - static constexpr inline u8 get8BitColor (u8 colorRGB555) { - return (colorRGB555 << 3) | (colorRGB555 >> 2); - } - // For values < 0x99 static constexpr inline u8 incBCDByte(u8 value) { return ((value & 0xf) == 0x9) ? value + 7 : value + 1; diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index a5452b9d..03f3453d 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -12,6 +12,7 @@ class GPU; struct Vertex { OpenGL::vec4 position; OpenGL::vec4 colour; + OpenGL::vec2 UVs; }; class Renderer { diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index a8da36b7..0943de3b 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -72,6 +72,10 @@ void GPU::drawArrays() { log("PICA::DrawElements(vertex count = %d, index buffer config = %08X)\n", vertexCount, indexBufferConfig); } + // Total number of input attributes to shader. Differs between GS and VS. Currently stubbed to the VS one, as we don't have geometry shaders. + const u32 inputAttrCount = (regs[PICAInternalRegs::VertexShaderInputBufferCfg] & 0xf) + 1; + const u64 inputAttrCfg = getVertexShaderInputConfig(); + for (u32 i = 0; i < vertexCount; i++) { u32 vertexIndex; // Index of the vertex in the VBO @@ -89,72 +93,112 @@ void GPU::drawArrays() { } } - int attrCount = 0; // Number of attributes we've passed to the shader - for (int attrCount = 0; attrCount < totalAttribCount; attrCount++) { + int attrCount = 0; + int buffer = 0; // Vertex buffer index for non-fixed attributes + + while (attrCount < totalAttribCount) { // Check if attribute is fixed or not if (fixedAttribMask & (1 << attrCount)) { // Fixed attribute vec4f& fixedAttr = shaderUnit.vs.fixedAttributes[attrCount]; // TODO: Is this how it works? - vec4f& inputAttr = shaderUnit.vs.attributes[attrCount]; + vec4f& inputAttr = currentAttributes[attrCount]; std::memcpy(&inputAttr, &fixedAttr, sizeof(vec4f)); // Copy fixed attr to input attr + attrCount++; } else { // Non-fixed attribute - auto& attr = attributeInfo[attrCount]; // Get information for this attribute + auto& attr = attributeInfo[buffer]; // Get information for this attribute u64 attrCfg = attr.getConfigFull(); // Get config1 | (config2 << 32) - uint index = (attrCfg >> (attrCount * 4)) & 0xf; // Get index of attribute in vertexCfg - - if (index >= 12) Helpers::panic("[PICA] Vertex attribute used as padding"); - - u32 attribInfo = (vertexCfg >> (index * 4)) & 0xf; - u32 attribType = attribInfo & 0x3; // Type of attribute(sbyte/ubyte/short/float) - u32 componentCount = (attribInfo >> 2) + 1; // Total number of components - - // Address to fetch the attribute from u32 attrAddress = vertexBase + attr.offset + (vertexIndex * attr.size); - vec4f& attribute = shaderUnit.vs.attributes[attrCount]; - uint component; // Current component - switch (attribType) { - case 2: { // Short - s16* ptr = getPointerPhys(attrAddress); - for (component = 0; component < componentCount; component++) { - float val = static_cast(*ptr++); - attribute[component] = f24::fromFloat32(val); + for (int j = 0; j < attr.componentCount; j++) { + uint index = (attrCfg >> (j * 4)) & 0xf; // Get index of attribute in vertexCfg + if (index >= 12) Helpers::panic("[PICA] Vertex attribute used as padding"); + + u32 attribInfo = (vertexCfg >> (index * 4)) & 0xf; + u32 attribType = attribInfo & 0x3; // Type of attribute(sbyte/ubyte/short/float) + u32 size = (attribInfo >> 2) + 1; // Total number of components + + //printf("vertex_attribute_strides[%d] = %d\n", attrCount, attr.size); + vec4f& attribute = currentAttributes[attrCount]; + uint component; // Current component + + switch (attribType) { + case 0: { // Signed byte + s8* ptr = getPointerPhys(attrAddress); + for (component = 0; component < size; component++) { + float val = static_cast(*ptr++); + attribute[component] = f24::fromFloat32(val); + } + attrAddress += size * sizeof(s8); + break; } - break; + + case 1: { // Unsigned byte + u8* ptr = getPointerPhys(attrAddress); + for (component = 0; component < size; component++) { + float val = static_cast(*ptr++); + attribute[component] = f24::fromFloat32(val); + } + attrAddress += size * sizeof(u8); + break; + } + + case 2: { // Short + s16* ptr = getPointerPhys(attrAddress); + for (component = 0; component < size; component++) { + float val = static_cast(*ptr++); + attribute[component] = f24::fromFloat32(val); + } + attrAddress += size * sizeof(s16); + break; + } + + case 3: { // Float + float* ptr = getPointerPhys(attrAddress); + for (component = 0; component < size; component++) { + float val = *ptr++; + attribute[component] = f24::fromFloat32(val); + } + attrAddress += size * sizeof(float); + break; + } + + default: Helpers::panic("[PICA] Unimplemented attribute type %d", attribType); } - case 3: { // Float - float* ptr = getPointerPhys(attrAddress); - for (component = 0; component < componentCount; component++) { - float val = *ptr++; - attribute[component] = f24::fromFloat32(val); - } - break; + // Fill the remaining attribute lanes with default parameters (1.0 for alpha/w, 0.0) for everything else + // Corgi does this although I'm not sure if it's actually needed for anything. + // TODO: Find out + while (component < 4) { + attribute[component] = (component == 3) ? f24::fromFloat32(1.0) : f24::fromFloat32(0.0); + component++; } - default: Helpers::panic("[PICA] Unimplemented attribute type %d", attribType); - } - - // Fill the remaining attribute lanes with default parameters (1.0 for alpha/w, 0.0) for everything else - // Corgi does this although I'm not sure if it's actually needed for anything. - // TODO: Find out - while (component < 4) { - attribute[component] = (component == 3) ? f24::fromFloat32(1.0) : f24::fromFloat32(0.0); - component++; + attrCount++; } + buffer++; } } + // Before running the shader, the PICA maps the fetched attributes from the attribute registers to the shader input registers + // Based on the SH_ATTRIBUTES_PERMUTATION registers. + // Ie it might attribute #0 to v2, #1 to v7, etc + for (int j = 0; j < totalAttribCount; j++) { + const u32 mapping = (inputAttrCfg >> (j * 4)) & 0xf; + std::memcpy(&shaderUnit.vs.inputs[mapping], ¤tAttributes[j], sizeof(vec4f)); + } + shaderUnit.vs.run(); std::memcpy(&vertices[i].position, &shaderUnit.vs.outputs[0], sizeof(vec4f)); std::memcpy(&vertices[i].colour, &shaderUnit.vs.outputs[1], sizeof(vec4f)); + std::memcpy(&vertices[i].UVs, &shaderUnit.vs.outputs[2], 2 * sizeof(f24)); //printf("(x, y, z, w) = (%f, %f, %f, %f)\n", (double)vertices[i].position.x(), (double)vertices[i].position.y(), (double)vertices[i].position.z(), (double)vertices[i].position.w()); //printf("(r, g, b, a) = (%f, %f, %f, %f)\n", (double)vertices[i].colour.r(), (double)vertices[i].colour.g(), (double)vertices[i].colour.b(), (double)vertices[i].colour.a()); + //printf("U: %f, V: %f\n", vertices[i].UVs.u(), vertices[i].UVs.v()); } // The fourth type is meant to be "Geometry primitive". TODO: Find out what that is static constexpr std::array primTypes = { - OpenGL::Triangle, OpenGL::TriangleStrip, OpenGL::TriangleFan, OpenGL::LineStrip + OpenGL::Triangle, OpenGL::TriangleStrip, OpenGL::TriangleFan, OpenGL::Triangle }; const auto shape = primTypes[primType]; renderer.drawVertices(shape, vertices, vertexCount); diff --git a/src/core/PICA/shader_interpreter.cpp b/src/core/PICA/shader_interpreter.cpp index 41659d6c..c08f714d 100644 --- a/src/core/PICA/shader_interpreter.cpp +++ b/src/core/PICA/shader_interpreter.cpp @@ -94,7 +94,7 @@ u8 PICAShader::getIndexedSource(u32 source, u32 index) { PICAShader::vec4f PICAShader::getSource(u32 source) { if (source < 0x10) - return attributes[source]; + return inputs[source]; else if (source < 0x20) return tempRegisters[source - 0x10]; else if (source <= 0x7f) @@ -237,7 +237,6 @@ void PICAShader::mova(u32 instruction) { const u32 operandDescriptor = operandDescriptors[instruction & 0x7f]; const u32 src = (instruction >> 12) & 0x7f; const u32 idx = (instruction >> 19) & 3; - const u32 dest = (instruction >> 21) & 0x1f; if (idx) Helpers::panic("[PICA] MOVA: idx != 0"); vec4f srcVector = getSourceSwizzled<1>(src, operandDescriptor); diff --git a/src/core/PICA/shader_unit.cpp b/src/core/PICA/shader_unit.cpp index 9d2406cc..6e9ca9b6 100644 --- a/src/core/PICA/shader_unit.cpp +++ b/src/core/PICA/shader_unit.cpp @@ -18,7 +18,7 @@ void PICAShader::reset() { f32UniformTransfer = false; const vec4f zero = vec4f({ f24::zero(), f24::zero(), f24::zero(), f24::zero() }); - attributes.fill(zero); + inputs.fill(zero); floatUniforms.fill(zero); outputs.fill(zero); tempRegisters.fill(zero); diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index eafc7284..0f15a9a0 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -12,12 +12,15 @@ const char* vertexShader = R"( layout (location = 0) in vec4 coords; layout (location = 1) in vec4 vertexColour; + layout (location = 2) in vec2 inUVs; out vec4 colour; + out vec2 UVs; void main() { gl_Position = coords * vec4(1.0, 1.0, -1.0, 1.0); colour = vertexColour; + UVs = inUVs; } )"; @@ -25,12 +28,16 @@ const char* fragmentShader = R"( #version 420 core in vec4 colour; + in vec2 UVs; + out vec4 fragColour; uniform uint u_alphaControl; + uniform sampler2D u_tex0; void main() { - fragColour = colour; + //fragColour = colour; + fragColour = texture(u_tex0, UVs); if ((u_alphaControl & 1u) != 0u) { // Check if alpha test is on uint func = (u_alphaControl >> 4u) & 7u; @@ -127,6 +134,7 @@ void Renderer::initGraphicsContext() { alphaControlLoc = OpenGL::uniformLocation(triangleProgram, "u_alphaControl"); glUniform1ui(alphaControlLoc, 0); // Default alpha control to 0 + glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex0"), 0); OpenGL::Shader vertDisplay(displayVertexShader, OpenGL::Vertex); OpenGL::Shader fragDisplay(displayFragmentShader, OpenGL::Fragment); @@ -143,6 +151,9 @@ void Renderer::initGraphicsContext() { // Colour attribute vao.setAttributeFloat(1, 4, sizeof(Vertex), offsetof(Vertex, colour)); vao.enableAttribute(1); + // UV attribute + vao.setAttributeFloat(2, 2, sizeof(Vertex), offsetof(Vertex, UVs)); + vao.enableAttribute(2); dummyVBO.create(); dummyVAO.create(); @@ -182,6 +193,8 @@ void Renderer::drawVertices(OpenGL::Primitives primType, Vertex* vertices, u32 c f24 depthScale = f24::fromRaw(regs[PICAInternalRegs::DepthScale] & 0xffffff); f24 depthOffset = f24::fromRaw(regs[PICAInternalRegs::DepthOffset] & 0xffffff); printf("Depth enable: %d, func: %d, writeEnable: %d\n", depthEnable, depthFunc, depthWriteEnable); + printf("Blending enabled: %d\n", (regs[0x100] >> 8) & 1); + printf("Blend func: %08X\n", regs[0x101]); //if (depthScale.toFloat32() != -1.0 || depthOffset.toFloat32() != 0.0) // Helpers::panic("TODO: Implement depth scale/offset. Remove the depth *= -1.0 from vertex shader"); @@ -194,6 +207,7 @@ void Renderer::drawVertices(OpenGL::Primitives primType, Vertex* vertices, u32 c Texture targetTex(addr, static_cast(format), width, height); OpenGL::Texture tex = getTexture(targetTex); + tex.bind(); } // TODO: Actually use this