From 979519f785046e014f460bd5ad3b2aae3aeea797 Mon Sep 17 00:00:00 2001 From: wheremyfoodat Date: Mon, 26 Sep 2022 01:05:03 +0300 Subject: [PATCH] [PICA interpreter] Implement RSQ, MAD, CALLU and boot SM64 --- include/PICA/regs.hpp | 1 + include/PICA/shader.hpp | 17 ++++- src/core/PICA/regs.cpp | 4 ++ src/core/PICA/shader_interpreter.cpp | 98 ++++++++++++++++++++++++++++ 4 files changed, 119 insertions(+), 1 deletion(-) diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index 95ead4ed..8989d8fe 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -50,6 +50,7 @@ namespace PICAInternalRegs { PrimitiveConfig = 0x25E, // Vertex shader registers + VertexBoolUniform = 0x2B0, VertexIntUniform0 = 0x2B1, VertexIntUniform1 = 0x2B2, VertexIntUniform2 = 0x2B3, diff --git a/include/PICA/shader.hpp b/include/PICA/shader.hpp index 2b4e012d..868b807d 100644 --- a/include/PICA/shader.hpp +++ b/include/PICA/shader.hpp @@ -17,14 +17,18 @@ namespace ShaderOpcodes { DP4 = 0x02, MUL = 0x08, MIN = 0x0D, + RSQ = 0x0F, MOVA = 0x12, MOV = 0x13, NOP = 0x21, END = 0x22, + CALLU = 0x26, + IFU = 0x27, IFC = 0x28, LOOP = 0x29, CMP1 = 0x2E, // Both of these instructions are CMP - CMP2 = 0x2F + CMP2 = 0x2F, + MAD = 0x38 // Everything between 0x38-0x3F is a MAD but fuck it }; } @@ -45,6 +49,11 @@ class PICAShader { u32 newPC; // PC after the if block is done executing (= DST + NUM) }; + struct CallInfo { + u32 endingPC; // PC at the end of the function + u32 returnPC; // PC to return to after the function ends + }; + int bufferIndex; // Index of the next instruction to overwrite for shader uploads int opDescriptorIndex; // Index of the next operand descriptor we'll overwrite u32 floatUniformIndex = 0; // Which float uniform are we writing to? ([0, 95] range) @@ -61,9 +70,11 @@ class PICAShader { u32 pc = 0; // Program counter: Index of the next instruction we're going to execute u32 loopIndex = 0; // The index of our loop stack (0 = empty, 4 = full) u32 ifIndex = 0; // The index of our IF stack + u32 callIndex = 0; // The index of our CALL stack std::array loopInfo; std::array conditionalInfo; + std::array callInfo; ShaderType type; @@ -72,15 +83,19 @@ class PICAShader { // Shader opcodes void add(u32 instruction); + void callu(u32 instruction); void cmp(u32 instruction); void dp3(u32 instruction); void dp4(u32 instruction); void ifc(u32 instruction); + void ifu(u32 instruction); void loop(u32 instruction); + void mad(u32 instruction); void min(u32 instruction); void mov(u32 instruction); void mova(u32 instruction); void mul(u32 instruction); + void rsq(u32 instruction); // src1, src2 and src3 have different negation & component swizzle bits in the operand descriptor // https://problemkaputt.github.io/gbatek.htm#3dsgpushaderinstructionsetopcodesummary in the diff --git a/src/core/PICA/regs.cpp b/src/core/PICA/regs.cpp index 3fcff134..40b36b98 100644 --- a/src/core/PICA/regs.cpp +++ b/src/core/PICA/regs.cpp @@ -100,6 +100,10 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { shaderUnit.vs.uploadDescriptor(value); break; + case VertexBoolUniform: + shaderUnit.vs.boolUniform = value & 0xffff; + break; + case VertexIntUniform0: case VertexIntUniform1: case VertexIntUniform2: case VertexIntUniform3: shaderUnit.vs.uploadIntUniform(index - VertexIntUniform0, value); break; diff --git a/src/core/PICA/shader_interpreter.cpp b/src/core/PICA/shader_interpreter.cpp index bb47d2a9..cf299547 100644 --- a/src/core/PICA/shader_interpreter.cpp +++ b/src/core/PICA/shader_interpreter.cpp @@ -1,9 +1,11 @@ #include "PICA/shader.hpp" +#include void PICAShader::run() { pc = 0; loopIndex = 0; ifIndex = 0; + callIndex = 0; while (true) { const u32 instruction = loadedShader[pc++]; @@ -11,6 +13,7 @@ void PICAShader::run() { switch (opcode) { case ShaderOpcodes::ADD: add(instruction); break; + case ShaderOpcodes::CALLU: callu(instruction); break; case ShaderOpcodes::CMP1: case ShaderOpcodes::CMP2: cmp(instruction); break; @@ -18,12 +21,19 @@ void PICAShader::run() { case ShaderOpcodes::DP4: dp4(instruction); break; case ShaderOpcodes::END: return; // Stop running shader case ShaderOpcodes::IFC: ifc(instruction); break; + case ShaderOpcodes::IFU: ifu(instruction); break; case ShaderOpcodes::LOOP: loop(instruction); break; case ShaderOpcodes::MIN: min(instruction); break; case ShaderOpcodes::MOV: mov(instruction); break; case ShaderOpcodes::MOVA: mova(instruction); break; case ShaderOpcodes::MUL: mul(instruction); break; case ShaderOpcodes::NOP: break; // Do nothing + case ShaderOpcodes::RSQ: rsq(instruction); break; + + case 0x38: case 0x39: case 0x3A: case 0x3B: case 0x3C: case 0x3D: case 0x3E: case 0x3F: + mad(instruction); + break; + default:Helpers::panic("Unimplemented PICA instruction %08X (Opcode = %02X)", instruction, opcode); } @@ -49,6 +59,15 @@ void PICAShader::run() { ifIndex -= 1; } } + + // Handle calls + if (callIndex != 0) { + auto& info = callInfo[callIndex - 1]; + if (pc == info.endingPC) { // Check if the IF block ended + pc = info.returnPC; + callIndex -= 1; + } + } } } @@ -249,6 +268,49 @@ void PICAShader::dp4(u32 instruction) { } } +void PICAShader::rsq(u32 instruction) { + const u32 operandDescriptor = operandDescriptors[instruction & 0x7f]; + const u32 src1 = (instruction >> 12) & 0x7f; + const u32 idx = (instruction >> 19) & 3; + const u32 dest = (instruction >> 21) & 0x1f; + + if (idx) Helpers::panic("[PICA] RSQ: idx != 0"); + vec4f srcVec1 = getSourceSwizzled<1>(src1, operandDescriptor); + + vec4f& destVector = getDest(dest); + f24 res = f24::fromFloat32(1.0f / std::sqrt(srcVec1[0].toFloat32())); + + u32 componentMask = operandDescriptor & 0xf; + for (int i = 0; i < 4; i++) { + if (componentMask & (1 << i)) { + destVector[3 - i] = res; + } + } +} + +void PICAShader::mad(u32 instruction) { + const u32 operandDescriptor = operandDescriptors[instruction & 0x1f]; + const u32 src1 = (instruction >> 17) & 0x1f; + u32 src2 = (instruction >> 10) & 0x7f; + const u32 src3 = (instruction >> 5) & 0x1f; + const u32 idx = (instruction >> 22) & 3; + const u32 dest = (instruction >> 24) & 0x1f; + + src2 = getIndexedSource(src2, idx); + + auto src1Vec = getSourceSwizzled<1>(src1, operandDescriptor); + auto src2Vec = getSourceSwizzled<2>(src2, operandDescriptor); + auto src3Vec = getSourceSwizzled<3>(src3, operandDescriptor); + auto& destVector = getDest(dest); + + u32 componentMask = operandDescriptor & 0xf; + for (int i = 0; i < 4; i++) { + if (componentMask & (1 << i)) { + destVector[3 - i] = src1Vec[3 - i] * src2Vec[3 - i] + src3Vec[3 - i]; + } + } +} + void PICAShader::cmp(u32 instruction) { const u32 operandDescriptor = operandDescriptors[instruction & 0x7f]; const u32 src1 = (instruction >> 12) & 0x7f; @@ -312,6 +374,42 @@ void PICAShader::ifc(u32 instruction) { } } +void PICAShader::callu(u32 instruction) { + const u32 dest = (instruction >> 10) & 0xfff; + const u32 bit = (instruction >> 22) & 0xf; // Bit of the bool uniform to check + + if (boolUniform & (1 << bit)) { + if (callIndex >= 4) [[unlikely]] + Helpers::panic("[PICA] Overflowed CALL stack"); + + const u32 num = instruction & 0xff; + + auto& block = callInfo[callIndex++]; + block.endingPC = dest + num; + block.returnPC = pc; + + pc = dest; + } +} + +void PICAShader::ifu(u32 instruction) { + const u32 dest = (instruction >> 10) & 0xfff; + const u32 bit = (instruction >> 22) & 0xf; // Bit of the bool uniform to check + + if (boolUniform & (1 << bit)) { + if (ifIndex >= 8) [[unlikely]] + Helpers::panic("[PICA] Overflowed IF stack"); + + const u32 num = instruction & 0xff; + + auto& block = conditionalInfo[ifIndex++]; + block.endingPC = dest; + block.newPC = dest + num; + } else { + pc = dest; + } +} + void PICAShader::loop(u32 instruction) { if (loopIndex >= 4) [[unlikely]] Helpers::panic("[PICA] Overflowed loop stack");