mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-08 07:05:40 +12:00
Remove OpenGL-specific vector-types
Removes dependency on the OpenGL header and rendering backen for its `OpenGL::Vector` type in favor of a more standard array.
This commit is contained in:
parent
2a1683ba62
commit
9e32b6d4bf
5 changed files with 221 additions and 224 deletions
|
@ -2,17 +2,17 @@
|
|||
|
||||
// Only do anything if we're on an x64 target with JIT support enabled
|
||||
#if defined(PANDA3DS_DYNAPICA_SUPPORTED) && defined(PANDA3DS_X64_HOST)
|
||||
#include "helpers.hpp"
|
||||
#include "logger.hpp"
|
||||
#include "PICA/shader.hpp"
|
||||
#include "xbyak/xbyak.h"
|
||||
#include "xbyak/xbyak_util.h"
|
||||
#include "x64_regs.hpp"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "PICA/shader.hpp"
|
||||
#include "helpers.hpp"
|
||||
#include "logger.hpp"
|
||||
#include "x64_regs.hpp"
|
||||
#include "xbyak/xbyak.h"
|
||||
#include "xbyak/xbyak_util.h"
|
||||
|
||||
class ShaderEmitter : public Xbyak::CodeGenerator {
|
||||
static constexpr size_t executableMemorySize = PICAShader::maxInstructionCount * 96; // How much executable memory to alloc for each shader
|
||||
static constexpr size_t executableMemorySize = PICAShader::maxInstructionCount * 96; // How much executable memory to alloc for each shader
|
||||
// Allocate some extra space as padding for security purposes in the extremely unlikely occasion we manage to overflow the above size
|
||||
static constexpr size_t allocSize = executableMemorySize + 0x1000;
|
||||
|
||||
|
@ -20,7 +20,7 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
|
|||
static constexpr uint noSwizzle = 0x1B;
|
||||
|
||||
using f24 = Floats::f24;
|
||||
using vec4f = OpenGL::Vector<f24, 4>;
|
||||
using vec4f = std::array<f24, 4>;
|
||||
|
||||
// An array of labels (incl pointers) to each compiled (to x64) PICA instruction
|
||||
std::array<Xbyak::Label, PICAShader::maxInstructionCount> instructionLabels;
|
||||
|
@ -33,8 +33,8 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
|
|||
// Vector value of (1.0, 1.0, 1.0, 1.0) for SLT(i)/SGE(i)
|
||||
Label onesVector;
|
||||
|
||||
u32 recompilerPC = 0; // PC the recompiler is currently recompiling @
|
||||
u32 loopLevel = 0; // The current loop nesting level (0 = not in a loop)
|
||||
u32 recompilerPC = 0; // PC the recompiler is currently recompiling @
|
||||
u32 loopLevel = 0; // The current loop nesting level (0 = not in a loop)
|
||||
|
||||
bool haveSSE4_1 = false; // Shows if the CPU supports SSE4.1
|
||||
bool haveAVX = false; // Shows if the CPU supports AVX (NOT AVX2, NOT AVX512. Regular AVX)
|
||||
|
@ -105,10 +105,10 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
|
|||
|
||||
MAKE_LOG_FUNCTION(log, shaderJITLogger)
|
||||
|
||||
public:
|
||||
using InstructionCallback = const void(*)(PICAShader& shaderUnit); // Callback type used for instructions
|
||||
public:
|
||||
using InstructionCallback = const void (*)(PICAShader& shaderUnit); // Callback type used for instructions
|
||||
// Callback type used for the JIT prologue. This is what the caller will call
|
||||
using PrologueCallback = const void(*)(PICAShader& shaderUnit, InstructionCallback cb);
|
||||
using PrologueCallback = const void (*)(PICAShader& shaderUnit, InstructionCallback cb);
|
||||
PrologueCallback prologueCb = nullptr;
|
||||
|
||||
// Initialize our emitter with "allocSize" bytes of RWX memory
|
||||
|
@ -123,7 +123,7 @@ public:
|
|||
Helpers::panic("This CPU does not support SSE3. Please use the shader interpreter instead");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void compile(const PICAShader& shaderUnit);
|
||||
|
||||
// PC must be a valid entrypoint here. It doesn't have that much overhead in this case, so we use std::array<>::at() to assert it does
|
||||
|
@ -133,9 +133,7 @@ public:
|
|||
return reinterpret_cast<InstructionCallback>(ptr);
|
||||
}
|
||||
|
||||
PrologueCallback getPrologueCallback() {
|
||||
return prologueCb;
|
||||
}
|
||||
PrologueCallback getPrologueCallback() { return prologueCb; }
|
||||
};
|
||||
|
||||
#endif // x64 recompiler check
|
||||
#endif // x64 recompiler check
|
|
@ -2,14 +2,12 @@
|
|||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include "helpers.hpp"
|
||||
#include "opengl.hpp"
|
||||
|
||||
#include "PICA/float_types.hpp"
|
||||
#include "PICA/pica_hash.hpp"
|
||||
#include "helpers.hpp"
|
||||
|
||||
enum class ShaderType {
|
||||
Vertex, Geometry
|
||||
};
|
||||
enum class ShaderType { Vertex, Geometry };
|
||||
|
||||
namespace ShaderOpcodes {
|
||||
enum : u32 {
|
||||
|
@ -46,66 +44,66 @@ namespace ShaderOpcodes {
|
|||
SETEMIT = 0x2B,
|
||||
JMPC = 0x2C,
|
||||
JMPU = 0x2D,
|
||||
CMP1 = 0x2E, // Both of these instructions are CMP
|
||||
CMP1 = 0x2E, // Both of these instructions are CMP
|
||||
CMP2 = 0x2F,
|
||||
MAD = 0x38 // Everything between 0x38-0x3F is a MAD but fuck it
|
||||
MAD = 0x38 // Everything between 0x38-0x3F is a MAD but fuck it
|
||||
};
|
||||
}
|
||||
|
||||
// Note: All PICA f24 vec4 registers must have the alignas(16) specifier to make them easier to access in SSE/NEON code in the JIT
|
||||
class PICAShader {
|
||||
using f24 = Floats::f24;
|
||||
using vec4f = OpenGL::Vector<f24, 4>;
|
||||
using vec4f = std::array<f24, 4>;
|
||||
|
||||
struct Loop {
|
||||
u32 startingPC; // PC at the start of the loop
|
||||
u32 endingPC; // PC at the end of the loop
|
||||
u32 iterations; // How many iterations of the loop to run
|
||||
u32 increment; // How much to increment the loop counter after each iteration
|
||||
u32 startingPC; // PC at the start of the loop
|
||||
u32 endingPC; // PC at the end of the loop
|
||||
u32 iterations; // How many iterations of the loop to run
|
||||
u32 increment; // How much to increment the loop counter after each iteration
|
||||
};
|
||||
|
||||
// Info for ifc/ifu stack
|
||||
struct ConditionalInfo {
|
||||
u32 endingPC; // PC at the end of the if block (= DST)
|
||||
u32 newPC; // PC after the if block is done executing (= DST + NUM)
|
||||
u32 endingPC; // PC at the end of the if block (= DST)
|
||||
u32 newPC; // PC after the if block is done executing (= DST + NUM)
|
||||
};
|
||||
|
||||
struct CallInfo {
|
||||
u32 endingPC; // PC at the end of the function
|
||||
u32 returnPC; // PC to return to after the function ends
|
||||
u32 endingPC; // PC at the end of the function
|
||||
u32 returnPC; // PC to return to after the function ends
|
||||
};
|
||||
|
||||
int bufferIndex; // Index of the next instruction to overwrite for shader uploads
|
||||
int opDescriptorIndex; // Index of the next operand descriptor we'll overwrite
|
||||
u32 floatUniformIndex = 0; // Which float uniform are we writing to? ([0, 95] range)
|
||||
u32 floatUniformWordCount = 0; // How many words have we buffered for the current uniform transfer?
|
||||
bool f32UniformTransfer = false; // Are we transferring an f32 uniform or an f24 uniform?
|
||||
int bufferIndex; // Index of the next instruction to overwrite for shader uploads
|
||||
int opDescriptorIndex; // Index of the next operand descriptor we'll overwrite
|
||||
u32 floatUniformIndex = 0; // Which float uniform are we writing to? ([0, 95] range)
|
||||
u32 floatUniformWordCount = 0; // How many words have we buffered for the current uniform transfer?
|
||||
bool f32UniformTransfer = false; // Are we transferring an f32 uniform or an f24 uniform?
|
||||
|
||||
std::array<u32, 4> floatUniformBuffer; // Buffer for temporarily caching float uniform data
|
||||
std::array<u32, 4> floatUniformBuffer; // Buffer for temporarily caching float uniform data
|
||||
|
||||
public:
|
||||
public:
|
||||
// These are placed close to the temp registers and co because it helps the JIT generate better code
|
||||
u32 entrypoint = 0; // Initial shader PC
|
||||
u32 entrypoint = 0; // Initial shader PC
|
||||
u32 boolUniform;
|
||||
std::array<OpenGL::Vector<u8, 4>, 4> intUniforms;
|
||||
std::array<std::array<u8, 4>, 4> intUniforms;
|
||||
alignas(16) std::array<vec4f, 96> floatUniforms;
|
||||
|
||||
alignas(16) std::array<vec4f, 16> fixedAttributes; // Fixed vertex attributes
|
||||
alignas(16) std::array<vec4f, 16> inputs; // Attributes passed to the shader
|
||||
alignas(16) std::array<vec4f, 16> fixedAttributes; // Fixed vertex attributes
|
||||
alignas(16) std::array<vec4f, 16> inputs; // Attributes passed to the shader
|
||||
alignas(16) std::array<vec4f, 16> outputs;
|
||||
alignas(16) vec4f dummy = vec4f({ f24::zero(), f24::zero(), f24::zero(), f24::zero() }); // Dummy register used by the JIT
|
||||
alignas(16) vec4f dummy = vec4f({f24::zero(), f24::zero(), f24::zero(), f24::zero()}); // Dummy register used by the JIT
|
||||
|
||||
protected:
|
||||
protected:
|
||||
std::array<u32, 128> operandDescriptors;
|
||||
alignas(16) std::array<vec4f, 16> tempRegisters; // General purpose registers the shader can use for temp values
|
||||
OpenGL::Vector<s32, 2> addrRegister; // Address register
|
||||
bool cmpRegister[2]; // Comparison registers where the result of CMP is stored in
|
||||
alignas(16) std::array<vec4f, 16> tempRegisters; // General purpose registers the shader can use for temp values
|
||||
std::array<s32, 2> addrRegister; // Address register
|
||||
bool cmpRegister[2]; // Comparison registers where the result of CMP is stored in
|
||||
u32 loopCounter;
|
||||
|
||||
u32 pc = 0; // Program counter: Index of the next instruction we're going to execute
|
||||
u32 loopIndex = 0; // The index of our loop stack (0 = empty, 4 = full)
|
||||
u32 ifIndex = 0; // The index of our IF stack
|
||||
u32 callIndex = 0; // The index of our CALL stack
|
||||
u32 pc = 0; // Program counter: Index of the next instruction we're going to execute
|
||||
u32 loopIndex = 0; // The index of our loop stack (0 = empty, 4 = full)
|
||||
u32 ifIndex = 0; // The index of our IF stack
|
||||
u32 callIndex = 0; // The index of our CALL stack
|
||||
|
||||
std::array<Loop, 4> loopInfo;
|
||||
std::array<ConditionalInfo, 8> conditionalInfo;
|
||||
|
@ -117,7 +115,7 @@ protected:
|
|||
// Ideally we want to be able to support multiple different types of hash depending on compilation settings, but let's get this working first
|
||||
using Hash = PICAHash::HashType;
|
||||
|
||||
Hash lastCodeHash = 0; // Last hash computed for the shader code (Used for the JIT caching mechanism)
|
||||
Hash lastCodeHash = 0; // Last hash computed for the shader code (Used for the JIT caching mechanism)
|
||||
Hash lastOpdescHash = 0; // Last hash computed for the operand descriptors (Also used for the JIT)
|
||||
|
||||
bool codeHashDirty = false;
|
||||
|
@ -130,7 +128,7 @@ protected:
|
|||
vec4f getSource(u32 source);
|
||||
vec4f& getDest(u32 dest);
|
||||
|
||||
private:
|
||||
private:
|
||||
// Interpreter functions for the various shader functions
|
||||
void add(u32 instruction);
|
||||
void call(u32 instruction);
|
||||
|
@ -171,13 +169,13 @@ private:
|
|||
bool negate;
|
||||
|
||||
using namespace Helpers;
|
||||
if constexpr (sourceIndex == 1) { // SRC1
|
||||
if constexpr (sourceIndex == 1) { // SRC1
|
||||
negate = (getBit<4>(opDescriptor)) != 0;
|
||||
compSwizzle = getBits<5, 8>(opDescriptor);
|
||||
} else if constexpr (sourceIndex == 2) { // SRC2
|
||||
} else if constexpr (sourceIndex == 2) { // SRC2
|
||||
negate = (getBit<13>(opDescriptor)) != 0;
|
||||
compSwizzle = getBits<14, 8>(opDescriptor);
|
||||
} else if constexpr (sourceIndex == 3) { // SRC3
|
||||
} else if constexpr (sourceIndex == 3) { // SRC3
|
||||
negate = (getBit<22>(opDescriptor)) != 0;
|
||||
compSwizzle = getBits<23, 8>(opDescriptor);
|
||||
}
|
||||
|
@ -185,8 +183,8 @@ private:
|
|||
// Iterate through every component of the swizzled vector in reverse order
|
||||
// And get which source component's index to match it with
|
||||
for (int comp = 0; comp < 4; comp++) {
|
||||
int index = compSwizzle & 3; // Get index for this component
|
||||
compSwizzle >>= 2; // Move to next component index
|
||||
int index = compSwizzle & 3; // Get index for this component
|
||||
compSwizzle >>= 2; // Move to next component index
|
||||
ret[3 - comp] = source[index];
|
||||
}
|
||||
|
||||
|
@ -212,39 +210,33 @@ private:
|
|||
u8 getIndexedSource(u32 source, u32 index);
|
||||
bool isCondTrue(u32 instruction);
|
||||
|
||||
public:
|
||||
public:
|
||||
static constexpr size_t maxInstructionCount = 4096;
|
||||
std::array<u32, maxInstructionCount> loadedShader; // Currently loaded & active shader
|
||||
std::array<u32, maxInstructionCount> bufferedShader; // Shader to be transferred when the SH_CODETRANSFER_END reg gets written to
|
||||
std::array<u32, maxInstructionCount> loadedShader; // Currently loaded & active shader
|
||||
std::array<u32, maxInstructionCount> bufferedShader; // Shader to be transferred when the SH_CODETRANSFER_END reg gets written to
|
||||
|
||||
PICAShader(ShaderType type) : type(type) {}
|
||||
|
||||
// Theese functions are in the header to be inlined more easily, though with LTO I hope I'll be able to move them
|
||||
void finalize() {
|
||||
std::memcpy(&loadedShader[0], &bufferedShader[0], 4096 * sizeof(u32));
|
||||
}
|
||||
void finalize() { std::memcpy(&loadedShader[0], &bufferedShader[0], 4096 * sizeof(u32)); }
|
||||
|
||||
void setBufferIndex(u32 index) {
|
||||
bufferIndex = index & 0xfff;
|
||||
}
|
||||
void setBufferIndex(u32 index) { bufferIndex = index & 0xfff; }
|
||||
|
||||
void setOpDescriptorIndex(u32 index) {
|
||||
opDescriptorIndex = index & 0x7f;
|
||||
}
|
||||
void setOpDescriptorIndex(u32 index) { opDescriptorIndex = index & 0x7f; }
|
||||
|
||||
void uploadWord(u32 word) {
|
||||
if (bufferIndex >= 4095) Helpers::panic("o no, shader upload overflew");
|
||||
bufferedShader[bufferIndex++] = word;
|
||||
bufferIndex &= 0xfff;
|
||||
|
||||
codeHashDirty = true; // Signal the JIT if necessary that the program hash has potentially changed
|
||||
codeHashDirty = true; // Signal the JIT if necessary that the program hash has potentially changed
|
||||
}
|
||||
|
||||
void uploadDescriptor(u32 word) {
|
||||
operandDescriptors[opDescriptorIndex++] = word;
|
||||
opDescriptorIndex &= 0x7f;
|
||||
|
||||
opdescHashDirty = true; // Signal the JIT if necessary that the program hash has potentially changed
|
||||
opdescHashDirty = true; // Signal the JIT if necessary that the program hash has potentially changed
|
||||
}
|
||||
|
||||
void setFloatUniformIndex(u32 word) {
|
||||
|
@ -255,23 +247,22 @@ public:
|
|||
|
||||
void uploadFloatUniform(u32 word) {
|
||||
floatUniformBuffer[floatUniformWordCount++] = word;
|
||||
if (floatUniformIndex >= 96)
|
||||
Helpers::panic("[PICA] Tried to write float uniform %d", floatUniformIndex);
|
||||
if (floatUniformIndex >= 96) Helpers::panic("[PICA] Tried to write float uniform %d", floatUniformIndex);
|
||||
|
||||
if ((f32UniformTransfer && floatUniformWordCount >= 4) || (!f32UniformTransfer && floatUniformWordCount >= 3)) {
|
||||
vec4f& uniform = floatUniforms[floatUniformIndex++];
|
||||
floatUniformWordCount = 0;
|
||||
|
||||
if (f32UniformTransfer) {
|
||||
uniform.x() = f24::fromFloat32(*(float*)&floatUniformBuffer[3]);
|
||||
uniform.y() = f24::fromFloat32(*(float*)&floatUniformBuffer[2]);
|
||||
uniform.z() = f24::fromFloat32(*(float*)&floatUniformBuffer[1]);
|
||||
uniform.w() = f24::fromFloat32(*(float*)&floatUniformBuffer[0]);
|
||||
uniform[0] = f24::fromFloat32(*(float*)&floatUniformBuffer[3]);
|
||||
uniform[1] = f24::fromFloat32(*(float*)&floatUniformBuffer[2]);
|
||||
uniform[2] = f24::fromFloat32(*(float*)&floatUniformBuffer[1]);
|
||||
uniform[3] = f24::fromFloat32(*(float*)&floatUniformBuffer[0]);
|
||||
} else {
|
||||
uniform.x() = f24::fromRaw(floatUniformBuffer[2] & 0xffffff);
|
||||
uniform.y() = f24::fromRaw(((floatUniformBuffer[1] & 0xffff) << 8) | (floatUniformBuffer[2] >> 24));
|
||||
uniform.z() = f24::fromRaw(((floatUniformBuffer[0] & 0xff) << 16) | (floatUniformBuffer[1] >> 16));
|
||||
uniform.w() = f24::fromRaw(floatUniformBuffer[0] >> 8);
|
||||
uniform[0] = f24::fromRaw(floatUniformBuffer[2] & 0xffffff);
|
||||
uniform[1] = f24::fromRaw(((floatUniformBuffer[1] & 0xffff) << 8) | (floatUniformBuffer[2] >> 24));
|
||||
uniform[2] = f24::fromRaw(((floatUniformBuffer[0] & 0xff) << 16) | (floatUniformBuffer[1] >> 16));
|
||||
uniform[3] = f24::fromRaw(floatUniformBuffer[0] >> 8);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -280,10 +271,10 @@ public:
|
|||
using namespace Helpers;
|
||||
|
||||
auto& u = intUniforms[index];
|
||||
u.x() = word & 0xff;
|
||||
u.y() = getBits<8, 8>(word);
|
||||
u.z() = getBits<16, 8>(word);
|
||||
u.w() = getBits<24, 8>(word);
|
||||
u[0] = word & 0xff;
|
||||
u[1] = getBits<8, 8>(word);
|
||||
u[2] = getBits<16, 8>(word);
|
||||
u[3] = getBits<24, 8>(word);
|
||||
}
|
||||
|
||||
void run();
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
#include "PICA/gpu.hpp"
|
||||
#include "PICA/regs.hpp"
|
||||
|
||||
#include "PICA/gpu.hpp"
|
||||
|
||||
using namespace Floats;
|
||||
using namespace Helpers;
|
||||
|
||||
u32 GPU::readReg(u32 address) {
|
||||
if (address >= 0x1EF01000 && address < 0x1EF01C00) { // Internal registers
|
||||
if (address >= 0x1EF01000 && address < 0x1EF01C00) { // Internal registers
|
||||
const u32 index = (address - 0x1EF01000) / sizeof(u32);
|
||||
return readInternalReg(index);
|
||||
} else {
|
||||
|
@ -15,7 +16,7 @@ u32 GPU::readReg(u32 address) {
|
|||
}
|
||||
|
||||
void GPU::writeReg(u32 address, u32 value) {
|
||||
if (address >= 0x1EF01000 && address < 0x1EF01C00) { // Internal registers
|
||||
if (address >= 0x1EF01000 && address < 0x1EF01C00) { // Internal registers
|
||||
const u32 index = (address - 0x1EF01000) / sizeof(u32);
|
||||
writeInternalReg(index, value, 0xffffffff);
|
||||
} else {
|
||||
|
@ -59,7 +60,7 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
|||
}
|
||||
|
||||
u32 currentValue = regs[index];
|
||||
u32 newValue = (currentValue & ~mask) | (value & mask); // Only overwrite the bits specified by "mask"
|
||||
u32 newValue = (currentValue & ~mask) | (value & mask); // Only overwrite the bits specified by "mask"
|
||||
regs[index] = newValue;
|
||||
|
||||
// TODO: Figure out if things like the shader index use the unmasked value or the masked one
|
||||
|
@ -74,38 +75,38 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
|||
break;
|
||||
|
||||
case AttribFormatHigh:
|
||||
totalAttribCount = (value >> 28) + 1; // Total number of vertex attributes
|
||||
fixedAttribMask = getBits<16, 12>(value); // Determines which vertex attributes are fixed for all vertices
|
||||
totalAttribCount = (value >> 28) + 1; // Total number of vertex attributes
|
||||
fixedAttribMask = getBits<16, 12>(value); // Determines which vertex attributes are fixed for all vertices
|
||||
break;
|
||||
|
||||
case ColourBufferLoc: {
|
||||
u32 loc = (value & 0x0fffffff) << 3;
|
||||
renderer.setColourBufferLoc(loc);
|
||||
renderer->setColourBufferLoc(loc);
|
||||
break;
|
||||
};
|
||||
|
||||
case ColourBufferFormat: {
|
||||
u32 format = getBits<16, 3>(value);
|
||||
renderer.setColourFormat(static_cast<PICA::ColorFmt>(format));
|
||||
renderer->setColourFormat(static_cast<PICA::ColorFmt>(format));
|
||||
break;
|
||||
}
|
||||
|
||||
case DepthBufferLoc: {
|
||||
u32 loc = (value & 0x0fffffff) << 3;
|
||||
renderer.setDepthBufferLoc(loc);
|
||||
renderer->setDepthBufferLoc(loc);
|
||||
break;
|
||||
}
|
||||
|
||||
case DepthBufferFormat: {
|
||||
u32 format = value & 0x3;
|
||||
renderer.setDepthFormat(static_cast<PICA::DepthFmt>(format));
|
||||
renderer->setDepthFormat(static_cast<PICA::DepthFmt>(format));
|
||||
break;
|
||||
}
|
||||
|
||||
case FramebufferSize: {
|
||||
const u32 width = value & 0x7ff;
|
||||
const u32 height = getBits<12, 10>(value) + 1;
|
||||
renderer.setFBSize(width, height);
|
||||
renderer->setFBSize(width, height);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -116,7 +117,7 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
|||
case LightingLUTData4:
|
||||
case LightingLUTData5:
|
||||
case LightingLUTData6:
|
||||
case LightingLUTData7:{
|
||||
case LightingLUTData7: {
|
||||
const uint32_t index = regs[LightingLUTIndex]; // Get full LUT index register
|
||||
const uint32_t lutID = getBits<8, 5>(index); // Get which LUT we're actually writing to
|
||||
uint32_t lutIndex = getBits<0, 8>(index); // And get the index inside the LUT we're writing to
|
||||
|
@ -133,15 +134,16 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
|||
break;
|
||||
}
|
||||
|
||||
case VertexFloatUniformIndex:
|
||||
shaderUnit.vs.setFloatUniformIndex(value);
|
||||
break;
|
||||
case VertexFloatUniformIndex: shaderUnit.vs.setFloatUniformIndex(value); break;
|
||||
|
||||
case VertexFloatUniformData0: case VertexFloatUniformData1: case VertexFloatUniformData2:
|
||||
case VertexFloatUniformData3: case VertexFloatUniformData4: case VertexFloatUniformData5:
|
||||
case VertexFloatUniformData6: case VertexFloatUniformData7:
|
||||
shaderUnit.vs.uploadFloatUniform(value);
|
||||
break;
|
||||
case VertexFloatUniformData0:
|
||||
case VertexFloatUniformData1:
|
||||
case VertexFloatUniformData2:
|
||||
case VertexFloatUniformData3:
|
||||
case VertexFloatUniformData4:
|
||||
case VertexFloatUniformData5:
|
||||
case VertexFloatUniformData6:
|
||||
case VertexFloatUniformData7: shaderUnit.vs.uploadFloatUniform(value); break;
|
||||
|
||||
case FixedAttribIndex:
|
||||
fixedAttribCount = 0;
|
||||
|
@ -162,7 +164,9 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
|||
}
|
||||
break;
|
||||
|
||||
case FixedAttribData0: case FixedAttribData1: case FixedAttribData2:
|
||||
case FixedAttribData0:
|
||||
case FixedAttribData1:
|
||||
case FixedAttribData2:
|
||||
fixedAttrBuff[fixedAttribCount++] = value;
|
||||
|
||||
if (fixedAttribCount == 3) {
|
||||
|
@ -170,15 +174,15 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
|||
|
||||
vec4f attr;
|
||||
// These are stored in the reverse order anyone would expect them to be in
|
||||
attr.x() = f24::fromRaw(fixedAttrBuff[2] & 0xffffff);
|
||||
attr.y() = f24::fromRaw(((fixedAttrBuff[1] & 0xffff) << 8) | (fixedAttrBuff[2] >> 24));
|
||||
attr.z() = f24::fromRaw(((fixedAttrBuff[0] & 0xff) << 16) | (fixedAttrBuff[1] >> 16));
|
||||
attr.w() = f24::fromRaw(fixedAttrBuff[0] >> 8);
|
||||
attr[0] = f24::fromRaw(fixedAttrBuff[2] & 0xffffff);
|
||||
attr[1] = f24::fromRaw(((fixedAttrBuff[1] & 0xffff) << 8) | (fixedAttrBuff[2] >> 24));
|
||||
attr[2] = f24::fromRaw(((fixedAttrBuff[0] & 0xff) << 16) | (fixedAttrBuff[1] >> 16));
|
||||
attr[3] = f24::fromRaw(fixedAttrBuff[0] >> 8);
|
||||
|
||||
// If the fixed attribute index is < 12, we're just writing to one of the fixed attributes
|
||||
if (fixedAttribIndex < 12) [[likely]] {
|
||||
shaderUnit.vs.fixedAttributes[fixedAttribIndex++] = attr;
|
||||
} else if (fixedAttribIndex == 15) { // Otherwise if it's 15, we're submitting an immediate mode vertex
|
||||
} else if (fixedAttribIndex == 15) { // Otherwise if it's 15, we're submitting an immediate mode vertex
|
||||
const uint totalAttrCount = (regs[PICA::InternalRegs::VertexShaderAttrNum] & 0xf) + 1;
|
||||
if (totalAttrCount <= immediateModeAttrIndex) {
|
||||
printf("Broken state in the immediate mode vertex submission pipeline. Failing silently\n");
|
||||
|
@ -199,13 +203,12 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
|||
// If we've reached 3 verts, issue a draw call
|
||||
// Handle rendering depending on the primitive type
|
||||
if (immediateModeVertIndex == 3) {
|
||||
renderer.drawVertices(PICA::PrimType::TriangleList, immediateModeVertices);
|
||||
renderer->drawVertices(PICA::PrimType::TriangleList, immediateModeVertices);
|
||||
|
||||
switch (primType) {
|
||||
// Triangle or geometry primitive. Draw a triangle and discard all vertices
|
||||
case 0: case 3:
|
||||
immediateModeVertIndex = 0;
|
||||
break;
|
||||
case 0:
|
||||
case 3: immediateModeVertIndex = 0; break;
|
||||
|
||||
// Triangle strip. Draw triangle, discard first vertex and keep the last 2
|
||||
case 1:
|
||||
|
@ -223,54 +226,54 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
|||
}
|
||||
}
|
||||
}
|
||||
} else { // Writing to fixed attributes 13 and 14 probably does nothing, but we'll see
|
||||
} else { // Writing to fixed attributes 13 and 14 probably does nothing, but we'll see
|
||||
log("Wrote to invalid fixed vertex attribute %d\n", fixedAttribIndex);
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case VertexShaderOpDescriptorIndex:
|
||||
shaderUnit.vs.setOpDescriptorIndex(value);
|
||||
break;
|
||||
case VertexShaderOpDescriptorIndex: shaderUnit.vs.setOpDescriptorIndex(value); break;
|
||||
|
||||
case VertexShaderOpDescriptorData0: case VertexShaderOpDescriptorData1: case VertexShaderOpDescriptorData2:
|
||||
case VertexShaderOpDescriptorData3: case VertexShaderOpDescriptorData4: case VertexShaderOpDescriptorData5:
|
||||
case VertexShaderOpDescriptorData6: case VertexShaderOpDescriptorData7:
|
||||
shaderUnit.vs.uploadDescriptor(value);
|
||||
break;
|
||||
case VertexShaderOpDescriptorData0:
|
||||
case VertexShaderOpDescriptorData1:
|
||||
case VertexShaderOpDescriptorData2:
|
||||
case VertexShaderOpDescriptorData3:
|
||||
case VertexShaderOpDescriptorData4:
|
||||
case VertexShaderOpDescriptorData5:
|
||||
case VertexShaderOpDescriptorData6:
|
||||
case VertexShaderOpDescriptorData7: shaderUnit.vs.uploadDescriptor(value); break;
|
||||
|
||||
case VertexBoolUniform:
|
||||
shaderUnit.vs.boolUniform = value & 0xffff;
|
||||
break;
|
||||
case VertexBoolUniform: shaderUnit.vs.boolUniform = value & 0xffff; break;
|
||||
|
||||
case VertexIntUniform0: case VertexIntUniform1: case VertexIntUniform2: case VertexIntUniform3:
|
||||
shaderUnit.vs.uploadIntUniform(index - VertexIntUniform0, value);
|
||||
break;
|
||||
case VertexIntUniform0:
|
||||
case VertexIntUniform1:
|
||||
case VertexIntUniform2:
|
||||
case VertexIntUniform3: shaderUnit.vs.uploadIntUniform(index - VertexIntUniform0, value); break;
|
||||
|
||||
case VertexShaderData0: case VertexShaderData1: case VertexShaderData2: case VertexShaderData3:
|
||||
case VertexShaderData4: case VertexShaderData5: case VertexShaderData6: case VertexShaderData7:
|
||||
shaderUnit.vs.uploadWord(value);
|
||||
break;
|
||||
case VertexShaderData0:
|
||||
case VertexShaderData1:
|
||||
case VertexShaderData2:
|
||||
case VertexShaderData3:
|
||||
case VertexShaderData4:
|
||||
case VertexShaderData5:
|
||||
case VertexShaderData6:
|
||||
case VertexShaderData7: shaderUnit.vs.uploadWord(value); break;
|
||||
|
||||
case VertexShaderEntrypoint:
|
||||
shaderUnit.vs.entrypoint = value & 0xffff;
|
||||
break;
|
||||
case VertexShaderEntrypoint: shaderUnit.vs.entrypoint = value & 0xffff; break;
|
||||
|
||||
case VertexShaderTransferEnd:
|
||||
if (value != 0) shaderUnit.vs.finalize();
|
||||
break;
|
||||
|
||||
case VertexShaderTransferIndex:
|
||||
shaderUnit.vs.setBufferIndex(value);
|
||||
break;
|
||||
case VertexShaderTransferIndex: shaderUnit.vs.setBufferIndex(value); break;
|
||||
|
||||
// Command lists can write to the command processor registers and change the command list stream
|
||||
// Several games are known to do this, including New Super Mario Bros 2 and Super Mario 3D Land
|
||||
case CmdBufTrigger0:
|
||||
case CmdBufTrigger1: {
|
||||
if (value != 0) { // A non-zero value triggers command list processing
|
||||
int bufferIndex = index - CmdBufTrigger0; // Index of the command buffer to execute (0 or 1)
|
||||
if (value != 0) { // A non-zero value triggers command list processing
|
||||
int bufferIndex = index - CmdBufTrigger0; // Index of the command buffer to execute (0 or 1)
|
||||
u32 addr = (regs[CmdBufAddr0 + bufferIndex] & 0xfffffff) << 3;
|
||||
u32 size = (regs[CmdBufSize0 + bufferIndex] & 0xfffff) << 3;
|
||||
|
||||
|
@ -285,15 +288,13 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
|||
default:
|
||||
// Vertex attribute registers
|
||||
if (index >= AttribInfoStart && index <= AttribInfoEnd) {
|
||||
uint attributeIndex = (index - AttribInfoStart) / 3; // Which attribute are we writing to
|
||||
uint reg = (index - AttribInfoStart) % 3; // Which of this attribute's registers are we writing to?
|
||||
uint attributeIndex = (index - AttribInfoStart) / 3; // Which attribute are we writing to
|
||||
uint reg = (index - AttribInfoStart) % 3; // Which of this attribute's registers are we writing to?
|
||||
auto& attr = attributeInfo[attributeIndex];
|
||||
|
||||
switch (reg) {
|
||||
case 0: attr.offset = value & 0xfffffff; break; // Attribute offset
|
||||
case 1:
|
||||
attr.config1 = value;
|
||||
break;
|
||||
case 0: attr.offset = value & 0xfffffff; break; // Attribute offset
|
||||
case 1: attr.config1 = value; break;
|
||||
case 2:
|
||||
attr.config2 = value;
|
||||
attr.size = getBits<16, 8>(value);
|
||||
|
@ -339,13 +340,13 @@ void GPU::startCommandList(u32 addr, u32 size) {
|
|||
|
||||
u32 id = header & 0xffff;
|
||||
u32 paramMaskIndex = getBits<16, 4>(header);
|
||||
u32 paramCount = getBits<20, 8>(header); // Number of additional parameters
|
||||
u32 paramCount = getBits<20, 8>(header); // Number of additional parameters
|
||||
// Bit 31 tells us whether this command is going to write to multiple sequential registers (if the bit is 1)
|
||||
// Or if all written values will go to the same register (If the bit is 0). It's essentially the value that
|
||||
// gets added to the "id" field after each register write
|
||||
bool consecutiveWritingMode = (header >> 31) != 0;
|
||||
|
||||
u32 mask = maskLUT[paramMaskIndex]; // Actual parameter mask
|
||||
u32 mask = maskLUT[paramMaskIndex]; // Actual parameter mask
|
||||
// Increment the ID by 1 after each write if we're in consecutive mode, or 0 otherwise
|
||||
u32 idIncrement = (consecutiveWritingMode) ? 1 : 0;
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#include "PICA/shader.hpp"
|
||||
#include <cmath>
|
||||
|
||||
#include "PICA/shader.hpp"
|
||||
|
||||
using namespace Helpers;
|
||||
|
||||
void PICAShader::run() {
|
||||
|
@ -11,20 +12,19 @@ void PICAShader::run() {
|
|||
|
||||
while (true) {
|
||||
const u32 instruction = loadedShader[pc++];
|
||||
const u32 opcode = instruction >> 26; // Top 6 bits are the opcode
|
||||
const u32 opcode = instruction >> 26; // Top 6 bits are the opcode
|
||||
|
||||
switch (opcode) {
|
||||
case ShaderOpcodes::ADD: add(instruction); break;
|
||||
case ShaderOpcodes::CALL: call(instruction); break;
|
||||
case ShaderOpcodes::CALLC: callc(instruction); break;
|
||||
case ShaderOpcodes::CALLU: callu(instruction); break;
|
||||
case ShaderOpcodes::CMP1: case ShaderOpcodes::CMP2:
|
||||
cmp(instruction);
|
||||
break;
|
||||
case ShaderOpcodes::CMP1:
|
||||
case ShaderOpcodes::CMP2: cmp(instruction); break;
|
||||
case ShaderOpcodes::DP3: dp3(instruction); break;
|
||||
case ShaderOpcodes::DP4: dp4(instruction); break;
|
||||
case ShaderOpcodes::DPHI: dphi(instruction); break;
|
||||
case ShaderOpcodes::END: return; // Stop running shader
|
||||
case ShaderOpcodes::END: return; // Stop running shader
|
||||
case ShaderOpcodes::EX2: ex2(instruction); break;
|
||||
case ShaderOpcodes::FLR: flr(instruction); break;
|
||||
case ShaderOpcodes::IFC: ifc(instruction); break;
|
||||
|
@ -38,31 +38,41 @@ void PICAShader::run() {
|
|||
case ShaderOpcodes::MOV: mov(instruction); break;
|
||||
case ShaderOpcodes::MOVA: mova(instruction); break;
|
||||
case ShaderOpcodes::MUL: mul(instruction); break;
|
||||
case ShaderOpcodes::NOP: break; // Do nothing
|
||||
case ShaderOpcodes::NOP: break; // Do nothing
|
||||
case ShaderOpcodes::RCP: rcp(instruction); break;
|
||||
case ShaderOpcodes::RSQ: rsq(instruction); break;
|
||||
case ShaderOpcodes::SGEI: sgei(instruction); break;
|
||||
case ShaderOpcodes::SLT: slt(instruction); break;
|
||||
case ShaderOpcodes::SLTI: slti(instruction); break;
|
||||
|
||||
case 0x30: case 0x31: case 0x32: case 0x33: case 0x34: case 0x35: case 0x36: case 0x37:
|
||||
madi(instruction);
|
||||
break;
|
||||
case 0x30:
|
||||
case 0x31:
|
||||
case 0x32:
|
||||
case 0x33:
|
||||
case 0x34:
|
||||
case 0x35:
|
||||
case 0x36:
|
||||
case 0x37: madi(instruction); break;
|
||||
|
||||
case 0x38: case 0x39: case 0x3A: case 0x3B: case 0x3C: case 0x3D: case 0x3E: case 0x3F:
|
||||
mad(instruction);
|
||||
break;
|
||||
case 0x38:
|
||||
case 0x39:
|
||||
case 0x3A:
|
||||
case 0x3B:
|
||||
case 0x3C:
|
||||
case 0x3D:
|
||||
case 0x3E:
|
||||
case 0x3F: mad(instruction); break;
|
||||
|
||||
default:Helpers::panic("Unimplemented PICA instruction %08X (Opcode = %02X)", instruction, opcode);
|
||||
default: Helpers::panic("Unimplemented PICA instruction %08X (Opcode = %02X)", instruction, opcode);
|
||||
}
|
||||
|
||||
// Handle control flow statements. The ordering is important as the priority goes: LOOP > IF > CALL
|
||||
// Handle loop
|
||||
if (loopIndex != 0) {
|
||||
auto& loop = loopInfo[loopIndex - 1];
|
||||
if (pc == loop.endingPC) { // Check if the loop needs to start over
|
||||
if (pc == loop.endingPC) { // Check if the loop needs to start over
|
||||
loop.iterations -= 1;
|
||||
if (loop.iterations == 0) // If the loop ended, go one level down on the loop stack
|
||||
if (loop.iterations == 0) // If the loop ended, go one level down on the loop stack
|
||||
loopIndex -= 1;
|
||||
|
||||
loopCounter += loop.increment;
|
||||
|
@ -73,7 +83,7 @@ void PICAShader::run() {
|
|||
// Handle ifs
|
||||
if (ifIndex != 0) {
|
||||
auto& info = conditionalInfo[ifIndex - 1];
|
||||
if (pc == info.endingPC) { // Check if the IF block ended
|
||||
if (pc == info.endingPC) { // Check if the IF block ended
|
||||
pc = info.newPC;
|
||||
ifIndex -= 1;
|
||||
}
|
||||
|
@ -82,7 +92,7 @@ void PICAShader::run() {
|
|||
// Handle calls
|
||||
if (callIndex != 0) {
|
||||
auto& info = callInfo[callIndex - 1];
|
||||
if (pc == info.endingPC) { // Check if the CALL block ended
|
||||
if (pc == info.endingPC) { // Check if the CALL block ended
|
||||
pc = info.returnPC;
|
||||
callIndex -= 1;
|
||||
}
|
||||
|
@ -92,15 +102,15 @@ void PICAShader::run() {
|
|||
|
||||
// Calculate the actual source value using an instruction's source field and it's respective index value
|
||||
// The index value is used to apply relative addressing when index != 0 by adding one of the 3 addr registers to the
|
||||
// source field, but only with the original source field is pointing at a vector uniform register
|
||||
// source field, but only with the original source field is pointing at a vector uniform register
|
||||
u8 PICAShader::getIndexedSource(u32 source, u32 index) {
|
||||
if (source < 0x20) // No offset is applied if the source isn't pointing to a vector uniform reg
|
||||
if (source < 0x20) // No offset is applied if the source isn't pointing to a vector uniform reg
|
||||
return source;
|
||||
|
||||
switch (index) {
|
||||
case 0: [[likely]] return u8(source); // No offset applied
|
||||
case 1: return u8(source + addrRegister.x());
|
||||
case 2: return u8(source + addrRegister.y());
|
||||
case 0: [[likely]] return u8(source); // No offset applied
|
||||
case 1: return u8(source + addrRegister[0]);
|
||||
case 2: return u8(source + addrRegister[1]);
|
||||
case 3: return u8(source + loopCounter);
|
||||
}
|
||||
|
||||
|
@ -117,7 +127,7 @@ PICAShader::vec4f PICAShader::getSource(u32 source) {
|
|||
return floatUniforms[source - 0x20];
|
||||
else {
|
||||
Helpers::warn("[PICA] Unimplemented source value: %X\n", source);
|
||||
return vec4f({ f24::zero(), f24::zero(), f24::zero(), f24::zero() });
|
||||
return vec4f({f24::zero(), f24::zero(), f24::zero(), f24::zero()});
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -136,13 +146,13 @@ bool PICAShader::isCondTrue(u32 instruction) {
|
|||
bool refX = (getBit<25>(instruction)) != 0;
|
||||
|
||||
switch (condition) {
|
||||
case 0: // Either cmp register matches
|
||||
case 0: // Either cmp register matches
|
||||
return cmpRegister[0] == refX || cmpRegister[1] == refY;
|
||||
case 1: // Both cmp registers match
|
||||
case 1: // Both cmp registers match
|
||||
return cmpRegister[0] == refX && cmpRegister[1] == refY;
|
||||
case 2: // At least cmp.x matches
|
||||
case 2: // At least cmp.x matches
|
||||
return cmpRegister[0] == refX;
|
||||
default: // At least cmp.y matches
|
||||
default: // At least cmp.y matches
|
||||
return cmpRegister[1] == refY;
|
||||
}
|
||||
}
|
||||
|
@ -150,7 +160,7 @@ bool PICAShader::isCondTrue(u32 instruction) {
|
|||
void PICAShader::add(u32 instruction) {
|
||||
const u32 operandDescriptor = operandDescriptors[instruction & 0x7f];
|
||||
u32 src1 = getBits<12, 7>(instruction);
|
||||
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
|
||||
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
|
||||
const u32 idx = getBits<19, 2>(instruction);
|
||||
const u32 dest = getBits<21, 5>(instruction);
|
||||
|
||||
|
@ -171,7 +181,7 @@ void PICAShader::add(u32 instruction) {
|
|||
void PICAShader::mul(u32 instruction) {
|
||||
const u32 operandDescriptor = operandDescriptors[instruction & 0x7f];
|
||||
u32 src1 = getBits<12, 7>(instruction);
|
||||
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
|
||||
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
|
||||
const u32 idx = getBits<19, 2>(instruction);
|
||||
const u32 dest = getBits<21, 5>(instruction);
|
||||
|
||||
|
@ -210,7 +220,7 @@ void PICAShader::flr(u32 instruction) {
|
|||
void PICAShader::max(u32 instruction) {
|
||||
const u32 operandDescriptor = operandDescriptors[instruction & 0x7f];
|
||||
const u32 src1 = getBits<12, 7>(instruction);
|
||||
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
|
||||
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
|
||||
const u32 idx = getBits<19, 2>(instruction);
|
||||
const u32 dest = getBits<21, 5>(instruction);
|
||||
|
||||
|
@ -232,7 +242,7 @@ void PICAShader::max(u32 instruction) {
|
|||
void PICAShader::min(u32 instruction) {
|
||||
const u32 operandDescriptor = operandDescriptors[instruction & 0x7f];
|
||||
const u32 src1 = getBits<12, 7>(instruction);
|
||||
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
|
||||
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
|
||||
const u32 idx = getBits<19, 2>(instruction);
|
||||
const u32 dest = getBits<21, 5>(instruction);
|
||||
|
||||
|
@ -278,16 +288,16 @@ void PICAShader::mova(u32 instruction) {
|
|||
vec4f srcVector = getSourceSwizzled<1>(src, operandDescriptor);
|
||||
|
||||
u32 componentMask = operandDescriptor & 0xf;
|
||||
if (componentMask & 0b1000) // x component
|
||||
addrRegister.x() = static_cast<s32>(srcVector.x().toFloat32());
|
||||
if (componentMask & 0b0100) // y component
|
||||
addrRegister.y() = static_cast<s32>(srcVector.y().toFloat32());
|
||||
if (componentMask & 0b1000) // x component
|
||||
addrRegister[0] = static_cast<s32>(srcVector[0].toFloat32());
|
||||
if (componentMask & 0b0100) // y component
|
||||
addrRegister[1] = static_cast<s32>(srcVector[1].toFloat32());
|
||||
}
|
||||
|
||||
void PICAShader::dp3(u32 instruction) {
|
||||
const u32 operandDescriptor = operandDescriptors[instruction & 0x7f];
|
||||
u32 src1 = getBits<12, 7>(instruction);
|
||||
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
|
||||
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
|
||||
const u32 idx = getBits<19, 2>(instruction);
|
||||
const u32 dest = getBits<21, 5>(instruction);
|
||||
|
||||
|
@ -309,7 +319,7 @@ void PICAShader::dp3(u32 instruction) {
|
|||
void PICAShader::dp4(u32 instruction) {
|
||||
const u32 operandDescriptor = operandDescriptors[instruction & 0x7f];
|
||||
u32 src1 = getBits<12, 7>(instruction);
|
||||
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
|
||||
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
|
||||
const u32 idx = getBits<19, 2>(instruction);
|
||||
const u32 dest = getBits<21, 5>(instruction);
|
||||
|
||||
|
@ -480,7 +490,7 @@ void PICAShader::madi(u32 instruction) {
|
|||
void PICAShader::slt(u32 instruction) {
|
||||
const u32 operandDescriptor = operandDescriptors[instruction & 0x7f];
|
||||
u32 src1 = getBits<12, 7>(instruction);
|
||||
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
|
||||
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
|
||||
const u32 idx = getBits<19, 2>(instruction);
|
||||
const u32 dest = getBits<21, 5>(instruction);
|
||||
|
||||
|
@ -542,11 +552,11 @@ void PICAShader::slti(u32 instruction) {
|
|||
void PICAShader::cmp(u32 instruction) {
|
||||
const u32 operandDescriptor = operandDescriptors[instruction & 0x7f];
|
||||
const u32 src1 = getBits<12, 7>(instruction);
|
||||
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
|
||||
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
|
||||
const u32 idx = getBits<19, 2>(instruction);
|
||||
const u32 cmpY = getBits<21, 3>(instruction);
|
||||
const u32 cmpX = getBits<24, 3>(instruction);
|
||||
const u32 cmpOperations[2] = { cmpX, cmpY };
|
||||
const u32 cmpOperations[2] = {cmpX, cmpY};
|
||||
|
||||
if (idx) Helpers::panic("[PICA] CMP: idx != 0");
|
||||
vec4f srcVec1 = getSourceSwizzled<1>(src1, operandDescriptor);
|
||||
|
@ -554,33 +564,31 @@ void PICAShader::cmp(u32 instruction) {
|
|||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
switch (cmpOperations[i]) {
|
||||
case 0: // Equal
|
||||
case 0: // Equal
|
||||
cmpRegister[i] = srcVec1[i] == srcVec2[i];
|
||||
break;
|
||||
|
||||
case 1: // Not equal
|
||||
case 1: // Not equal
|
||||
cmpRegister[i] = srcVec1[i] != srcVec2[i];
|
||||
break;
|
||||
|
||||
case 2: // Less than
|
||||
case 2: // Less than
|
||||
cmpRegister[i] = srcVec1[i] < srcVec2[i];
|
||||
break;
|
||||
|
||||
case 3: // Less than or equal
|
||||
case 3: // Less than or equal
|
||||
cmpRegister[i] = srcVec1[i] <= srcVec2[i];
|
||||
break;
|
||||
|
||||
case 4: // Greater than
|
||||
case 4: // Greater than
|
||||
cmpRegister[i] = srcVec1[i] > srcVec2[i];
|
||||
break;
|
||||
|
||||
case 5: // Greater than or equal
|
||||
case 5: // Greater than or equal
|
||||
cmpRegister[i] = srcVec1[i] >= srcVec2[i];
|
||||
break;
|
||||
|
||||
default:
|
||||
cmpRegister[i] = true;
|
||||
break;
|
||||
default: cmpRegister[i] = true; break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -604,7 +612,7 @@ void PICAShader::ifc(u32 instruction) {
|
|||
|
||||
void PICAShader::ifu(u32 instruction) {
|
||||
const u32 dest = getBits<10, 12>(instruction);
|
||||
const u32 bit = getBits<22, 4>(instruction); // Bit of the bool uniform to check
|
||||
const u32 bit = getBits<22, 4>(instruction); // Bit of the bool uniform to check
|
||||
|
||||
if (boolUniform & (1 << bit)) {
|
||||
if (ifIndex >= 8) [[unlikely]]
|
||||
|
@ -615,8 +623,7 @@ void PICAShader::ifu(u32 instruction) {
|
|||
auto& block = conditionalInfo[ifIndex++];
|
||||
block.endingPC = dest;
|
||||
block.newPC = dest + num;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
pc = dest;
|
||||
}
|
||||
}
|
||||
|
@ -637,12 +644,12 @@ void PICAShader::call(u32 instruction) {
|
|||
|
||||
void PICAShader::callc(u32 instruction) {
|
||||
if (isCondTrue(instruction)) {
|
||||
call(instruction); // Pls inline
|
||||
call(instruction); // Pls inline
|
||||
}
|
||||
}
|
||||
|
||||
void PICAShader::callu(u32 instruction) {
|
||||
const u32 bit = getBits<22, 4>(instruction); // Bit of the bool uniform to check
|
||||
const u32 bit = getBits<22, 4>(instruction); // Bit of the bool uniform to check
|
||||
|
||||
if (boolUniform & (1 << bit)) {
|
||||
if (callIndex >= 4) [[unlikely]]
|
||||
|
@ -664,26 +671,25 @@ void PICAShader::loop(u32 instruction) {
|
|||
Helpers::panic("[PICA] Overflowed loop stack");
|
||||
|
||||
u32 dest = getBits<10, 12>(instruction);
|
||||
auto& uniform = intUniforms[getBits<22, 2>(instruction)]; // The uniform we'll get loop info from
|
||||
loopCounter = uniform.y();
|
||||
auto& uniform = intUniforms[getBits<22, 2>(instruction)]; // The uniform we'll get loop info from
|
||||
loopCounter = uniform[1];
|
||||
auto& loop = loopInfo[loopIndex++];
|
||||
|
||||
loop.startingPC = pc;
|
||||
loop.endingPC = dest + 1; // Loop is inclusive so we need + 1 here
|
||||
loop.iterations = uniform.x() + 1;
|
||||
loop.increment = uniform.z();
|
||||
loop.endingPC = dest + 1; // Loop is inclusive so we need + 1 here
|
||||
loop.iterations = uniform[0] + 1;
|
||||
loop.increment = uniform[2];
|
||||
}
|
||||
|
||||
void PICAShader::jmpc(u32 instruction) {
|
||||
if (isCondTrue(instruction))
|
||||
pc = getBits<10, 12>(instruction);
|
||||
if (isCondTrue(instruction)) pc = getBits<10, 12>(instruction);
|
||||
}
|
||||
|
||||
void PICAShader::jmpu(u32 instruction) {
|
||||
const u32 test = (instruction & 1) ^ 1; // If the LSB is 0 we want to compare to true, otherwise compare to false
|
||||
const u32 test = (instruction & 1) ^ 1; // If the LSB is 0 we want to compare to true, otherwise compare to false
|
||||
const u32 dest = getBits<10, 12>(instruction);
|
||||
const u32 bit = getBits<22, 4>(instruction); // Bit of the bool uniform to check
|
||||
const u32 bit = getBits<22, 4>(instruction); // Bit of the bool uniform to check
|
||||
|
||||
if (((boolUniform >> bit) & 1) == test) // Jump if the bool uniform is the value we want
|
||||
if (((boolUniform >> bit) & 1) == test) // Jump if the bool uniform is the value we want
|
||||
pc = dest;
|
||||
}
|
|
@ -1,4 +1,5 @@
|
|||
#include "PICA/shader_unit.hpp"
|
||||
|
||||
#include "cityhash.hpp"
|
||||
|
||||
void ShaderUnit::reset() {
|
||||
|
@ -18,18 +19,18 @@ void PICAShader::reset() {
|
|||
opDescriptorIndex = 0;
|
||||
f32UniformTransfer = false;
|
||||
|
||||
const vec4f zero = vec4f({ f24::zero(), f24::zero(), f24::zero(), f24::zero() });
|
||||
const vec4f zero = vec4f({f24::zero(), f24::zero(), f24::zero(), f24::zero()});
|
||||
inputs.fill(zero);
|
||||
floatUniforms.fill(zero);
|
||||
outputs.fill(zero);
|
||||
tempRegisters.fill(zero);
|
||||
|
||||
for (auto& e : intUniforms) {
|
||||
e.x() = e.y() = e.z() = e.w() = 0;
|
||||
e[0] = e[1] = e[2] = e[3] = 0;
|
||||
}
|
||||
|
||||
addrRegister.x() = 0;
|
||||
addrRegister.y() = 0;
|
||||
addrRegister[0] = 0;
|
||||
addrRegister[1] = 0;
|
||||
loopCounter = 0;
|
||||
|
||||
codeHashDirty = true;
|
||||
|
|
Loading…
Add table
Reference in a new issue