mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-06 22:25:41 +12:00
[PICA interpreter] Implement RSQ, MAD, CALLU and boot SM64
This commit is contained in:
parent
f4fabdae40
commit
979519f785
4 changed files with 119 additions and 1 deletions
|
@ -50,6 +50,7 @@ namespace PICAInternalRegs {
|
|||
PrimitiveConfig = 0x25E,
|
||||
|
||||
// Vertex shader registers
|
||||
VertexBoolUniform = 0x2B0,
|
||||
VertexIntUniform0 = 0x2B1,
|
||||
VertexIntUniform1 = 0x2B2,
|
||||
VertexIntUniform2 = 0x2B3,
|
||||
|
|
|
@ -17,14 +17,18 @@ namespace ShaderOpcodes {
|
|||
DP4 = 0x02,
|
||||
MUL = 0x08,
|
||||
MIN = 0x0D,
|
||||
RSQ = 0x0F,
|
||||
MOVA = 0x12,
|
||||
MOV = 0x13,
|
||||
NOP = 0x21,
|
||||
END = 0x22,
|
||||
CALLU = 0x26,
|
||||
IFU = 0x27,
|
||||
IFC = 0x28,
|
||||
LOOP = 0x29,
|
||||
CMP1 = 0x2E, // Both of these instructions are CMP
|
||||
CMP2 = 0x2F
|
||||
CMP2 = 0x2F,
|
||||
MAD = 0x38 // Everything between 0x38-0x3F is a MAD but fuck it
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -45,6 +49,11 @@ class PICAShader {
|
|||
u32 newPC; // PC after the if block is done executing (= DST + NUM)
|
||||
};
|
||||
|
||||
struct CallInfo {
|
||||
u32 endingPC; // PC at the end of the function
|
||||
u32 returnPC; // PC to return to after the function ends
|
||||
};
|
||||
|
||||
int bufferIndex; // Index of the next instruction to overwrite for shader uploads
|
||||
int opDescriptorIndex; // Index of the next operand descriptor we'll overwrite
|
||||
u32 floatUniformIndex = 0; // Which float uniform are we writing to? ([0, 95] range)
|
||||
|
@ -61,9 +70,11 @@ class PICAShader {
|
|||
u32 pc = 0; // Program counter: Index of the next instruction we're going to execute
|
||||
u32 loopIndex = 0; // The index of our loop stack (0 = empty, 4 = full)
|
||||
u32 ifIndex = 0; // The index of our IF stack
|
||||
u32 callIndex = 0; // The index of our CALL stack
|
||||
|
||||
std::array<Loop, 4> loopInfo;
|
||||
std::array<ConditionalInfo, 8> conditionalInfo;
|
||||
std::array<CallInfo, 8> callInfo;
|
||||
|
||||
ShaderType type;
|
||||
|
||||
|
@ -72,15 +83,19 @@ class PICAShader {
|
|||
|
||||
// Shader opcodes
|
||||
void add(u32 instruction);
|
||||
void callu(u32 instruction);
|
||||
void cmp(u32 instruction);
|
||||
void dp3(u32 instruction);
|
||||
void dp4(u32 instruction);
|
||||
void ifc(u32 instruction);
|
||||
void ifu(u32 instruction);
|
||||
void loop(u32 instruction);
|
||||
void mad(u32 instruction);
|
||||
void min(u32 instruction);
|
||||
void mov(u32 instruction);
|
||||
void mova(u32 instruction);
|
||||
void mul(u32 instruction);
|
||||
void rsq(u32 instruction);
|
||||
|
||||
// src1, src2 and src3 have different negation & component swizzle bits in the operand descriptor
|
||||
// https://problemkaputt.github.io/gbatek.htm#3dsgpushaderinstructionsetopcodesummary in the
|
||||
|
|
|
@ -100,6 +100,10 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
|||
shaderUnit.vs.uploadDescriptor(value);
|
||||
break;
|
||||
|
||||
case VertexBoolUniform:
|
||||
shaderUnit.vs.boolUniform = value & 0xffff;
|
||||
break;
|
||||
|
||||
case VertexIntUniform0: case VertexIntUniform1: case VertexIntUniform2: case VertexIntUniform3:
|
||||
shaderUnit.vs.uploadIntUniform(index - VertexIntUniform0, value);
|
||||
break;
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
#include "PICA/shader.hpp"
|
||||
#include <cmath>
|
||||
|
||||
void PICAShader::run() {
|
||||
pc = 0;
|
||||
loopIndex = 0;
|
||||
ifIndex = 0;
|
||||
callIndex = 0;
|
||||
|
||||
while (true) {
|
||||
const u32 instruction = loadedShader[pc++];
|
||||
|
@ -11,6 +13,7 @@ void PICAShader::run() {
|
|||
|
||||
switch (opcode) {
|
||||
case ShaderOpcodes::ADD: add(instruction); break;
|
||||
case ShaderOpcodes::CALLU: callu(instruction); break;
|
||||
case ShaderOpcodes::CMP1: case ShaderOpcodes::CMP2:
|
||||
cmp(instruction);
|
||||
break;
|
||||
|
@ -18,12 +21,19 @@ void PICAShader::run() {
|
|||
case ShaderOpcodes::DP4: dp4(instruction); break;
|
||||
case ShaderOpcodes::END: return; // Stop running shader
|
||||
case ShaderOpcodes::IFC: ifc(instruction); break;
|
||||
case ShaderOpcodes::IFU: ifu(instruction); break;
|
||||
case ShaderOpcodes::LOOP: loop(instruction); break;
|
||||
case ShaderOpcodes::MIN: min(instruction); break;
|
||||
case ShaderOpcodes::MOV: mov(instruction); break;
|
||||
case ShaderOpcodes::MOVA: mova(instruction); break;
|
||||
case ShaderOpcodes::MUL: mul(instruction); break;
|
||||
case ShaderOpcodes::NOP: break; // Do nothing
|
||||
case ShaderOpcodes::RSQ: rsq(instruction); break;
|
||||
|
||||
case 0x38: case 0x39: case 0x3A: case 0x3B: case 0x3C: case 0x3D: case 0x3E: case 0x3F:
|
||||
mad(instruction);
|
||||
break;
|
||||
|
||||
default:Helpers::panic("Unimplemented PICA instruction %08X (Opcode = %02X)", instruction, opcode);
|
||||
}
|
||||
|
||||
|
@ -49,6 +59,15 @@ void PICAShader::run() {
|
|||
ifIndex -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Handle calls
|
||||
if (callIndex != 0) {
|
||||
auto& info = callInfo[callIndex - 1];
|
||||
if (pc == info.endingPC) { // Check if the IF block ended
|
||||
pc = info.returnPC;
|
||||
callIndex -= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -249,6 +268,49 @@ void PICAShader::dp4(u32 instruction) {
|
|||
}
|
||||
}
|
||||
|
||||
void PICAShader::rsq(u32 instruction) {
|
||||
const u32 operandDescriptor = operandDescriptors[instruction & 0x7f];
|
||||
const u32 src1 = (instruction >> 12) & 0x7f;
|
||||
const u32 idx = (instruction >> 19) & 3;
|
||||
const u32 dest = (instruction >> 21) & 0x1f;
|
||||
|
||||
if (idx) Helpers::panic("[PICA] RSQ: idx != 0");
|
||||
vec4f srcVec1 = getSourceSwizzled<1>(src1, operandDescriptor);
|
||||
|
||||
vec4f& destVector = getDest(dest);
|
||||
f24 res = f24::fromFloat32(1.0f / std::sqrt(srcVec1[0].toFloat32()));
|
||||
|
||||
u32 componentMask = operandDescriptor & 0xf;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (componentMask & (1 << i)) {
|
||||
destVector[3 - i] = res;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PICAShader::mad(u32 instruction) {
|
||||
const u32 operandDescriptor = operandDescriptors[instruction & 0x1f];
|
||||
const u32 src1 = (instruction >> 17) & 0x1f;
|
||||
u32 src2 = (instruction >> 10) & 0x7f;
|
||||
const u32 src3 = (instruction >> 5) & 0x1f;
|
||||
const u32 idx = (instruction >> 22) & 3;
|
||||
const u32 dest = (instruction >> 24) & 0x1f;
|
||||
|
||||
src2 = getIndexedSource(src2, idx);
|
||||
|
||||
auto src1Vec = getSourceSwizzled<1>(src1, operandDescriptor);
|
||||
auto src2Vec = getSourceSwizzled<2>(src2, operandDescriptor);
|
||||
auto src3Vec = getSourceSwizzled<3>(src3, operandDescriptor);
|
||||
auto& destVector = getDest(dest);
|
||||
|
||||
u32 componentMask = operandDescriptor & 0xf;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (componentMask & (1 << i)) {
|
||||
destVector[3 - i] = src1Vec[3 - i] * src2Vec[3 - i] + src3Vec[3 - i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PICAShader::cmp(u32 instruction) {
|
||||
const u32 operandDescriptor = operandDescriptors[instruction & 0x7f];
|
||||
const u32 src1 = (instruction >> 12) & 0x7f;
|
||||
|
@ -312,6 +374,42 @@ void PICAShader::ifc(u32 instruction) {
|
|||
}
|
||||
}
|
||||
|
||||
void PICAShader::callu(u32 instruction) {
|
||||
const u32 dest = (instruction >> 10) & 0xfff;
|
||||
const u32 bit = (instruction >> 22) & 0xf; // Bit of the bool uniform to check
|
||||
|
||||
if (boolUniform & (1 << bit)) {
|
||||
if (callIndex >= 4) [[unlikely]]
|
||||
Helpers::panic("[PICA] Overflowed CALL stack");
|
||||
|
||||
const u32 num = instruction & 0xff;
|
||||
|
||||
auto& block = callInfo[callIndex++];
|
||||
block.endingPC = dest + num;
|
||||
block.returnPC = pc;
|
||||
|
||||
pc = dest;
|
||||
}
|
||||
}
|
||||
|
||||
void PICAShader::ifu(u32 instruction) {
|
||||
const u32 dest = (instruction >> 10) & 0xfff;
|
||||
const u32 bit = (instruction >> 22) & 0xf; // Bit of the bool uniform to check
|
||||
|
||||
if (boolUniform & (1 << bit)) {
|
||||
if (ifIndex >= 8) [[unlikely]]
|
||||
Helpers::panic("[PICA] Overflowed IF stack");
|
||||
|
||||
const u32 num = instruction & 0xff;
|
||||
|
||||
auto& block = conditionalInfo[ifIndex++];
|
||||
block.endingPC = dest;
|
||||
block.newPC = dest + num;
|
||||
} else {
|
||||
pc = dest;
|
||||
}
|
||||
}
|
||||
|
||||
void PICAShader::loop(u32 instruction) {
|
||||
if (loopIndex >= 4) [[unlikely]]
|
||||
Helpers::panic("[PICA] Overflowed loop stack");
|
||||
|
|
Loading…
Add table
Reference in a new issue