[PICA interpreter] Implement RSQ, MAD, CALLU and boot SM64

This commit is contained in:
wheremyfoodat 2022-09-26 01:05:03 +03:00
parent f4fabdae40
commit 979519f785
4 changed files with 119 additions and 1 deletions

View file

@ -50,6 +50,7 @@ namespace PICAInternalRegs {
PrimitiveConfig = 0x25E,
// Vertex shader registers
VertexBoolUniform = 0x2B0,
VertexIntUniform0 = 0x2B1,
VertexIntUniform1 = 0x2B2,
VertexIntUniform2 = 0x2B3,

View file

@ -17,14 +17,18 @@ namespace ShaderOpcodes {
DP4 = 0x02,
MUL = 0x08,
MIN = 0x0D,
RSQ = 0x0F,
MOVA = 0x12,
MOV = 0x13,
NOP = 0x21,
END = 0x22,
CALLU = 0x26,
IFU = 0x27,
IFC = 0x28,
LOOP = 0x29,
CMP1 = 0x2E, // Both of these instructions are CMP
CMP2 = 0x2F
CMP2 = 0x2F,
MAD = 0x38 // Everything between 0x38-0x3F is a MAD but fuck it
};
}
@ -45,6 +49,11 @@ class PICAShader {
u32 newPC; // PC after the if block is done executing (= DST + NUM)
};
struct CallInfo {
u32 endingPC; // PC at the end of the function
u32 returnPC; // PC to return to after the function ends
};
int bufferIndex; // Index of the next instruction to overwrite for shader uploads
int opDescriptorIndex; // Index of the next operand descriptor we'll overwrite
u32 floatUniformIndex = 0; // Which float uniform are we writing to? ([0, 95] range)
@ -61,9 +70,11 @@ class PICAShader {
u32 pc = 0; // Program counter: Index of the next instruction we're going to execute
u32 loopIndex = 0; // The index of our loop stack (0 = empty, 4 = full)
u32 ifIndex = 0; // The index of our IF stack
u32 callIndex = 0; // The index of our CALL stack
std::array<Loop, 4> loopInfo;
std::array<ConditionalInfo, 8> conditionalInfo;
std::array<CallInfo, 8> callInfo;
ShaderType type;
@ -72,15 +83,19 @@ class PICAShader {
// Shader opcodes
void add(u32 instruction);
void callu(u32 instruction);
void cmp(u32 instruction);
void dp3(u32 instruction);
void dp4(u32 instruction);
void ifc(u32 instruction);
void ifu(u32 instruction);
void loop(u32 instruction);
void mad(u32 instruction);
void min(u32 instruction);
void mov(u32 instruction);
void mova(u32 instruction);
void mul(u32 instruction);
void rsq(u32 instruction);
// src1, src2 and src3 have different negation & component swizzle bits in the operand descriptor
// https://problemkaputt.github.io/gbatek.htm#3dsgpushaderinstructionsetopcodesummary in the

View file

@ -100,6 +100,10 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
shaderUnit.vs.uploadDescriptor(value);
break;
case VertexBoolUniform:
shaderUnit.vs.boolUniform = value & 0xffff;
break;
case VertexIntUniform0: case VertexIntUniform1: case VertexIntUniform2: case VertexIntUniform3:
shaderUnit.vs.uploadIntUniform(index - VertexIntUniform0, value);
break;

View file

@ -1,9 +1,11 @@
#include "PICA/shader.hpp"
#include <cmath>
void PICAShader::run() {
pc = 0;
loopIndex = 0;
ifIndex = 0;
callIndex = 0;
while (true) {
const u32 instruction = loadedShader[pc++];
@ -11,6 +13,7 @@ void PICAShader::run() {
switch (opcode) {
case ShaderOpcodes::ADD: add(instruction); break;
case ShaderOpcodes::CALLU: callu(instruction); break;
case ShaderOpcodes::CMP1: case ShaderOpcodes::CMP2:
cmp(instruction);
break;
@ -18,12 +21,19 @@ void PICAShader::run() {
case ShaderOpcodes::DP4: dp4(instruction); break;
case ShaderOpcodes::END: return; // Stop running shader
case ShaderOpcodes::IFC: ifc(instruction); break;
case ShaderOpcodes::IFU: ifu(instruction); break;
case ShaderOpcodes::LOOP: loop(instruction); break;
case ShaderOpcodes::MIN: min(instruction); break;
case ShaderOpcodes::MOV: mov(instruction); break;
case ShaderOpcodes::MOVA: mova(instruction); break;
case ShaderOpcodes::MUL: mul(instruction); break;
case ShaderOpcodes::NOP: break; // Do nothing
case ShaderOpcodes::RSQ: rsq(instruction); break;
case 0x38: case 0x39: case 0x3A: case 0x3B: case 0x3C: case 0x3D: case 0x3E: case 0x3F:
mad(instruction);
break;
default:Helpers::panic("Unimplemented PICA instruction %08X (Opcode = %02X)", instruction, opcode);
}
@ -49,6 +59,15 @@ void PICAShader::run() {
ifIndex -= 1;
}
}
// Handle calls
if (callIndex != 0) {
auto& info = callInfo[callIndex - 1];
if (pc == info.endingPC) { // Check if the IF block ended
pc = info.returnPC;
callIndex -= 1;
}
}
}
}
@ -249,6 +268,49 @@ void PICAShader::dp4(u32 instruction) {
}
}
void PICAShader::rsq(u32 instruction) {
const u32 operandDescriptor = operandDescriptors[instruction & 0x7f];
const u32 src1 = (instruction >> 12) & 0x7f;
const u32 idx = (instruction >> 19) & 3;
const u32 dest = (instruction >> 21) & 0x1f;
if (idx) Helpers::panic("[PICA] RSQ: idx != 0");
vec4f srcVec1 = getSourceSwizzled<1>(src1, operandDescriptor);
vec4f& destVector = getDest(dest);
f24 res = f24::fromFloat32(1.0f / std::sqrt(srcVec1[0].toFloat32()));
u32 componentMask = operandDescriptor & 0xf;
for (int i = 0; i < 4; i++) {
if (componentMask & (1 << i)) {
destVector[3 - i] = res;
}
}
}
void PICAShader::mad(u32 instruction) {
const u32 operandDescriptor = operandDescriptors[instruction & 0x1f];
const u32 src1 = (instruction >> 17) & 0x1f;
u32 src2 = (instruction >> 10) & 0x7f;
const u32 src3 = (instruction >> 5) & 0x1f;
const u32 idx = (instruction >> 22) & 3;
const u32 dest = (instruction >> 24) & 0x1f;
src2 = getIndexedSource(src2, idx);
auto src1Vec = getSourceSwizzled<1>(src1, operandDescriptor);
auto src2Vec = getSourceSwizzled<2>(src2, operandDescriptor);
auto src3Vec = getSourceSwizzled<3>(src3, operandDescriptor);
auto& destVector = getDest(dest);
u32 componentMask = operandDescriptor & 0xf;
for (int i = 0; i < 4; i++) {
if (componentMask & (1 << i)) {
destVector[3 - i] = src1Vec[3 - i] * src2Vec[3 - i] + src3Vec[3 - i];
}
}
}
void PICAShader::cmp(u32 instruction) {
const u32 operandDescriptor = operandDescriptors[instruction & 0x7f];
const u32 src1 = (instruction >> 12) & 0x7f;
@ -312,6 +374,42 @@ void PICAShader::ifc(u32 instruction) {
}
}
void PICAShader::callu(u32 instruction) {
const u32 dest = (instruction >> 10) & 0xfff;
const u32 bit = (instruction >> 22) & 0xf; // Bit of the bool uniform to check
if (boolUniform & (1 << bit)) {
if (callIndex >= 4) [[unlikely]]
Helpers::panic("[PICA] Overflowed CALL stack");
const u32 num = instruction & 0xff;
auto& block = callInfo[callIndex++];
block.endingPC = dest + num;
block.returnPC = pc;
pc = dest;
}
}
void PICAShader::ifu(u32 instruction) {
const u32 dest = (instruction >> 10) & 0xfff;
const u32 bit = (instruction >> 22) & 0xf; // Bit of the bool uniform to check
if (boolUniform & (1 << bit)) {
if (ifIndex >= 8) [[unlikely]]
Helpers::panic("[PICA] Overflowed IF stack");
const u32 num = instruction & 0xff;
auto& block = conditionalInfo[ifIndex++];
block.endingPC = dest;
block.newPC = dest + num;
} else {
pc = dest;
}
}
void PICAShader::loop(u32 instruction) {
if (loopIndex >= 4) [[unlikely]]
Helpers::panic("[PICA] Overflowed loop stack");