diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index 0b3246db..89f13913 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -12,6 +12,7 @@ namespace PICAInternalRegs { DepthScale = 0x4D, DepthOffset = 0x4E, ShaderOutputCount = 0x4F, + ShaderOutmap0 = 0x50, DepthmapEnable = 0x6D, TexUnitCfg = 0x80, diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index f0e832a2..2605ba69 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -2,12 +2,50 @@ #include #include +#include #include "PICA/float_types.hpp" #include "PICA/regs.hpp" using namespace Floats; +// A representation of the output vertex as it comes out of the vertex shader, with padding and all +struct OutputVertex { + using vec2f = OpenGL::Vector; + using vec3f = OpenGL::Vector; + using vec4f = OpenGL::Vector; + + union { + struct { + vec4f positions; // Vertex position + vec4f quaternion; // Quaternion specifying the normal/tangent frame (for fragment lighting) + vec4f colour; // Vertex color + vec2f texcoord0; // Texcoords for texture unit 0 (Only U and V, W is stored separately for 3D textures!) + vec2f texcoord1; // Texcoords for TU 1 + f24 texcoord0_w; // W component for texcoord 0 if using a 3D texture + u32 padding; // Unused + + vec3f view; // View vector (for fragment lighting) + u32 padding2; // Unused + vec2f texcoord2; // Texcoords for TU 2 + }; + + // The software, non-accelerated vertex loader writes here and then reads specific components from the above struct + f24 raw[0x20]; + }; + OutputVertex() {} +}; +#define ASSERT_POS(member, pos) static_assert(offsetof(OutputVertex, member) == pos * sizeof(f24), "OutputVertex struct is broken!"); + +ASSERT_POS(positions, 0) +ASSERT_POS(quaternion, 4) +ASSERT_POS(colour, 8) +ASSERT_POS(texcoord0, 12) +ASSERT_POS(texcoord1, 14) +ASSERT_POS(texcoord0_w, 16) +ASSERT_POS(view, 18) +ASSERT_POS(texcoord2, 22) + GPU::GPU(Memory& mem) : mem(mem), renderer(*this, regs) { vram = new u8[vramSize]; mem.setVRAM(vram); // Give the bus a pointer to our VRAM @@ -194,9 +232,22 @@ void GPU::drawArrays() { } shaderUnit.vs.run(); - std::memcpy(&vertices[i].position, &shaderUnit.vs.outputs[0], sizeof(vec4f)); - std::memcpy(&vertices[i].colour, &shaderUnit.vs.outputs[1], sizeof(vec4f)); - std::memcpy(&vertices[i].UVs, &shaderUnit.vs.outputs[2], 2 * sizeof(f24)); + OutputVertex out; + + // Map shader outputs to fixed function properties + const u32 totalShaderOutputs = regs[PICAInternalRegs::ShaderOutputCount] & 7; + for (int i = 0; i < totalShaderOutputs; i++) { + const u32 config = regs[PICAInternalRegs::ShaderOutmap0 + i]; + + for (int j = 0; j < 4; j++) { // pls unroll + const u32 mapping = (config >> (j * 8)) & 0x1F; + out.raw[mapping] = shaderUnit.vs.outputs[i][j]; + } + } + + std::memcpy(&vertices[i].position, &out.positions, sizeof(vec4f)); + std::memcpy(&vertices[i].colour, &out.colour, sizeof(vec4f)); + std::memcpy(&vertices[i].UVs, &out.texcoord0, 2 * sizeof(f24)); //printf("(x, y, z, w) = (%f, %f, %f, %f)\n", (double)vertices[i].position.x(), (double)vertices[i].position.y(), (double)vertices[i].position.z(), (double)vertices[i].position.w()); //printf("(r, g, b, a) = (%f, %f, %f, %f)\n", (double)vertices[i].colour.r(), (double)vertices[i].colour.g(), (double)vertices[i].colour.b(), (double)vertices[i].colour.a());