diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index eacafcc9..ac3ce98b 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -58,5 +58,14 @@ namespace PICAInternalRegs { VertexShaderData5 = 0x2D1, VertexShaderData6 = 0x2D2, VertexShaderData7 = 0x2D3, + VertexShaderOpDescriptorIndex = 0x2D5, + VertexShaderOpDescriptorData0 = 0x2D6, + VertexShaderOpDescriptorData1 = 0x2D7, + VertexShaderOpDescriptorData2 = 0x2D8, + VertexShaderOpDescriptorData3 = 0x2D9, + VertexShaderOpDescriptorData4 = 0x2DA, + VertexShaderOpDescriptorData5 = 0x2DB, + VertexShaderOpDescriptorData6 = 0x2DC, + VertexShaderOpDescriptorData7 = 0x2DD, }; } \ No newline at end of file diff --git a/include/PICA/shader.hpp b/include/PICA/shader.hpp index e521b057..6a233e92 100644 --- a/include/PICA/shader.hpp +++ b/include/PICA/shader.hpp @@ -10,13 +10,66 @@ enum class ShaderType { Vertex, Geometry }; +namespace ShaderOpcodes { + enum : u32 { + MOV = 0x13 + }; +} + class PICAShader { using f24 = Floats::f24; using vec4f = OpenGL::Vector; int bufferIndex; // Index of the next instruction to overwrite for shader uploads + int opDescriptorIndex; // Index of the next operand descriptor we'll overwrite + std::array operandDescriptors; + std::array tempRegisters; ShaderType type; + // Shader opcodes + void mov(u32 instruction); + vec4f getSource(u32 source); + vec4f& getDest(u32 dest); + + // src1, src2 and src3 have different negation & component swizzle bits in the operand descriptor + // https://problemkaputt.github.io/gbatek.htm#3dsgpushaderinstructionsetopcodesummary in the + // "Shader Operand Descriptors" section + template + vec4f swizzle(vec4f& source, u32 opDescriptor) { + vec4f ret; + u32 compSwizzle; + bool negate; + + if constexpr (sourceIndex == 1) { // SRC1 + negate = ((opDescriptor >> 4) & 1) != 0; + compSwizzle = (opDescriptor >> 5) & 0xff; + } else if constexpr (sourceIndex == 2) { // SRC2 + negate = ((opDescriptor >> 13) & 1) != 0; + compSwizzle = (opDescriptor >> 14) & 0xff; + } else if constexpr (sourceIndex == 3) { // SRC3 + negate = ((opDescriptor >> 22) & 1) != 0; + compSwizzle = (opDescriptor >> 23) & 0xff; + } + + // Iterate through every component of the swizzled vector in reverse order + // And get which source component's index to match it with + for (int comp = 0; comp < 4; comp++) { + int index = compSwizzle & 3; // Get index for this component + compSwizzle >>= 2; // Move to next component index + ret[3 - comp] = source[index]; + } + + // Negate result if the negate bit is set + if (negate) { + ret[0] = -ret[0]; + ret[1] = -ret[1]; + ret[2] = -ret[2]; + ret[3] = -ret[3]; + } + + return ret; + } + public: std::array loadedShader; // Currently loaded & active shader std::array bufferedShader; // Shader to be transferred when the SH_CODETRANSFER_END reg gets written to @@ -34,15 +87,18 @@ public: void reset() { loadedShader.fill(0); bufferedShader.fill(0); + operandDescriptors.fill(0); intUniforms.fill(0); boolUniform = 0; bufferIndex = 0; + opDescriptorIndex = 0; const vec4f zero = vec4f({ f24::fromFloat32(0.0), f24::fromFloat32(0.0), f24::fromFloat32(0.0), f24::fromFloat32(0.0) }); attributes.fill(zero); floatUniforms.fill(zero); outputs.fill(zero); + tempRegisters.fill(zero); } void finalize() { @@ -51,12 +107,21 @@ public: void setBufferIndex(u32 offset) { if (offset != 0) Helpers::panic("Is this register 9 or 11 bit?"); - bufferIndex = (offset >> 2) & 511; + bufferIndex = (offset >> 2) & 0x1ff; + } + + void setOpDescriptorIndex(u32 offset) { + opDescriptorIndex = offset & 0x7f; } void uploadWord(u32 word) { bufferedShader[bufferIndex++] = word; - bufferIndex &= 511; + bufferIndex &= 0x1ff; + } + + void uploadDescriptor(u32 word) { + operandDescriptors[opDescriptorIndex++] = word; + opDescriptorIndex &= 0x7f; } void run(); diff --git a/src/core/PICA/regs.cpp b/src/core/PICA/regs.cpp index b8ab7417..7caee60f 100644 --- a/src/core/PICA/regs.cpp +++ b/src/core/PICA/regs.cpp @@ -82,6 +82,16 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { break; + case VertexShaderOpDescriptorIndex: + shaderUnit.vs.setOpDescriptorIndex(value); + break; + + case VertexShaderOpDescriptorData0: case VertexShaderOpDescriptorData1: case VertexShaderOpDescriptorData2: + case VertexShaderOpDescriptorData3: case VertexShaderOpDescriptorData4: case VertexShaderOpDescriptorData5: + case VertexShaderOpDescriptorData6: case VertexShaderOpDescriptorData7: + shaderUnit.vs.uploadDescriptor(value); + break; + case VertexShaderTransferEnd: if (value != 0) shaderUnit.vs.finalize(); break; diff --git a/src/core/PICA/shader_interpreter.cpp b/src/core/PICA/shader_interpreter.cpp index f1ca0601..fdd5d214 100644 --- a/src/core/PICA/shader_interpreter.cpp +++ b/src/core/PICA/shader_interpreter.cpp @@ -8,7 +8,41 @@ void PICAShader::run() { const u32 opcode = instruction >> 26; // Top 6 bits are the opcode switch (opcode) { + case ShaderOpcodes::MOV: mov(instruction); break; default:Helpers::panic("Unimplemented PICA instruction %08X (Opcode = %02X)", instruction, opcode); } } +} + +PICAShader::vec4f PICAShader::getSource(u32 source) { + if (source < 16) + return attributes[source]; + Helpers::panic("[PICA] Unimplemented source value: %X", source); +} + +PICAShader::vec4f& PICAShader::getDest(u32 dest) { + if (dest >= 0x10 && dest <= 0x1f) { // Temporary registers + return tempRegisters[dest - 0x10]; + } + Helpers::panic("[PICA] Unimplemented dest: %X", dest); +} + +void PICAShader::mov(u32 instruction) { + const u32 operandDescriptor = operandDescriptors[instruction & 0x7f]; + const u32 src = (instruction >> 12) & 0x7f; + const u32 idx = (instruction >> 19) & 3; + const u32 dest = (instruction >> 21) & 0x1f; + + if (idx) Helpers::panic("[PICA] MOV: idx != 0"); + vec4f srcVector = getSource(src); + srcVector = swizzle<1>(srcVector, operandDescriptor); + vec4f& destVector = getDest(dest); + + // Destination component mask. Tells us which lanes of the destination register will be written to + u32 componentMask = operandDescriptor & 0xf; + for (int i = 0; i < 4; i++) { + if (componentMask & (1 << i)) { + destVector[3 - i] = srcVector[3 - i]; + } + } } \ No newline at end of file