diff --git a/include/PICA/shader.hpp b/include/PICA/shader.hpp index 9fcf0a4b..c5af3443 100644 --- a/include/PICA/shader.hpp +++ b/include/PICA/shader.hpp @@ -12,7 +12,9 @@ enum class ShaderType { namespace ShaderOpcodes { enum : u32 { - MOV = 0x13 + DP4 = 0x02, + MOV = 0x13, + END = 0x22 }; } @@ -31,11 +33,13 @@ class PICAShader { std::array tempRegisters; ShaderType type; - // Shader opcodes - void mov(u32 instruction); vec4f getSource(u32 source); vec4f& getDest(u32 dest); + // Shader opcodes + void dp4(u32 instruction); + void mov(u32 instruction); + // src1, src2 and src3 have different negation & component swizzle bits in the operand descriptor // https://problemkaputt.github.io/gbatek.htm#3dsgpushaderinstructionsetopcodesummary in the // "Shader Operand Descriptors" section @@ -75,6 +79,14 @@ class PICAShader { return ret; } + template + vec4f getSourceSwizzled(u32 source, u32 opDescriptor) { + vec4f srcVector = getSource(source); + srcVector = swizzle(srcVector, opDescriptor); + + return srcVector; + } + public: std::array loadedShader; // Currently loaded & active shader std::array bufferedShader; // Shader to be transferred when the SH_CODETRANSFER_END reg gets written to diff --git a/src/core/PICA/shader_interpreter.cpp b/src/core/PICA/shader_interpreter.cpp index f53eb6d2..b213b1ed 100644 --- a/src/core/PICA/shader_interpreter.cpp +++ b/src/core/PICA/shader_interpreter.cpp @@ -8,6 +8,8 @@ void PICAShader::run() { const u32 opcode = instruction >> 26; // Top 6 bits are the opcode switch (opcode) { + case ShaderOpcodes::DP4: dp4(instruction); break; + case ShaderOpcodes::END: return; // Stop running shader case ShaderOpcodes::MOV: mov(instruction); break; default:Helpers::panic("Unimplemented PICA instruction %08X (Opcode = %02X)", instruction, opcode); } @@ -15,16 +17,20 @@ void PICAShader::run() { } PICAShader::vec4f PICAShader::getSource(u32 source) { - if (source < 16) + if (source < 0x10) return attributes[source]; - else if (source >= 0x20 && source <= 0x7f) + else if (source < 0x20) + return tempRegisters[source - 0x10]; + else if (source <= 0x7f) return floatUniforms[source - 0x20]; Helpers::panic("[PICA] Unimplemented source value: %X", source); } PICAShader::vec4f& PICAShader::getDest(u32 dest) { - if (dest >= 0x10 && dest <= 0x1f) { // Temporary registers + if (dest <= 0x6) { + return outputs[dest]; + } else if (dest >= 0x10 && dest <= 0x1f) { // Temporary registers return tempRegisters[dest - 0x10]; } Helpers::panic("[PICA] Unimplemented dest: %X", dest); @@ -37,8 +43,7 @@ void PICAShader::mov(u32 instruction) { const u32 dest = (instruction >> 21) & 0x1f; if (idx) Helpers::panic("[PICA] MOV: idx != 0"); - vec4f srcVector = getSource(src); - srcVector = swizzle<1>(srcVector, operandDescriptor); + vec4f srcVector = getSourceSwizzled<1>(src, operandDescriptor); vec4f& destVector = getDest(dest); // Destination component mask. Tells us which lanes of the destination register will be written to @@ -48,4 +53,27 @@ void PICAShader::mov(u32 instruction) { destVector[3 - i] = srcVector[3 - i]; } } +} + +void PICAShader::dp4(u32 instruction) { + const u32 operandDescriptor = operandDescriptors[instruction & 0x7f]; + const u32 src1 = (instruction >> 12) & 0x7f; + const u32 src2 = (instruction >> 7) & 0x1f; // src2 coming first because PICA moment + const u32 idx = (instruction >> 19) & 3; + const u32 dest = (instruction >> 21) & 0x1f; + + if (idx) Helpers::panic("[PICA] DP4: idx != 0"); + vec4f srcVec1 = getSourceSwizzled<1>(src1, operandDescriptor); + vec4f srcVec2 = getSourceSwizzled<2>(src2, operandDescriptor); + + vec4f& destVector = getDest(dest); + f24 dot = srcVec1[0] * srcVec2[0] + srcVec1[1] * srcVec2[1] + srcVec1[2] * srcVec2[2] + srcVec1[3] * srcVec2[3]; + + // Destination component mask. Tells us which lanes of the destination register will be written to + u32 componentMask = operandDescriptor & 0xf; + for (int i = 0; i < 4; i++) { + if (componentMask & (1 << i)) { + destVector[3 - i] = dot; + } + } } \ No newline at end of file diff --git a/src/core/PICA/shader_unit.cpp b/src/core/PICA/shader_unit.cpp index 239fd75e..1e8377e9 100644 --- a/src/core/PICA/shader_unit.cpp +++ b/src/core/PICA/shader_unit.cpp @@ -18,7 +18,7 @@ void PICAShader::reset() { opDescriptorIndex = 0; f32UniformTransfer = false; - const vec4f zero = vec4f({ f24::fromFloat32(0.0), f24::fromFloat32(0.0), f24::fromFloat32(0.0), f24::fromFloat32(0.0) }); + const vec4f zero = vec4f({ f24::zero(), f24::zero(), f24::zero(), f24::zero() }); attributes.fill(zero); floatUniforms.fill(zero); outputs.fill(zero);