mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-08 23:25:40 +12:00
Remove OpenGL-specific vector-types
Removes dependency on the OpenGL header and rendering backen for its `OpenGL::Vector` type in favor of a more standard array.
This commit is contained in:
parent
2a1683ba62
commit
9e32b6d4bf
5 changed files with 221 additions and 224 deletions
|
@ -2,14 +2,14 @@
|
||||||
|
|
||||||
// Only do anything if we're on an x64 target with JIT support enabled
|
// Only do anything if we're on an x64 target with JIT support enabled
|
||||||
#if defined(PANDA3DS_DYNAPICA_SUPPORTED) && defined(PANDA3DS_X64_HOST)
|
#if defined(PANDA3DS_DYNAPICA_SUPPORTED) && defined(PANDA3DS_X64_HOST)
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "PICA/shader.hpp"
|
||||||
#include "helpers.hpp"
|
#include "helpers.hpp"
|
||||||
#include "logger.hpp"
|
#include "logger.hpp"
|
||||||
#include "PICA/shader.hpp"
|
#include "x64_regs.hpp"
|
||||||
#include "xbyak/xbyak.h"
|
#include "xbyak/xbyak.h"
|
||||||
#include "xbyak/xbyak_util.h"
|
#include "xbyak/xbyak_util.h"
|
||||||
#include "x64_regs.hpp"
|
|
||||||
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
class ShaderEmitter : public Xbyak::CodeGenerator {
|
class ShaderEmitter : public Xbyak::CodeGenerator {
|
||||||
static constexpr size_t executableMemorySize = PICAShader::maxInstructionCount * 96; // How much executable memory to alloc for each shader
|
static constexpr size_t executableMemorySize = PICAShader::maxInstructionCount * 96; // How much executable memory to alloc for each shader
|
||||||
|
@ -20,7 +20,7 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
|
||||||
static constexpr uint noSwizzle = 0x1B;
|
static constexpr uint noSwizzle = 0x1B;
|
||||||
|
|
||||||
using f24 = Floats::f24;
|
using f24 = Floats::f24;
|
||||||
using vec4f = OpenGL::Vector<f24, 4>;
|
using vec4f = std::array<f24, 4>;
|
||||||
|
|
||||||
// An array of labels (incl pointers) to each compiled (to x64) PICA instruction
|
// An array of labels (incl pointers) to each compiled (to x64) PICA instruction
|
||||||
std::array<Xbyak::Label, PICAShader::maxInstructionCount> instructionLabels;
|
std::array<Xbyak::Label, PICAShader::maxInstructionCount> instructionLabels;
|
||||||
|
@ -105,10 +105,10 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
|
||||||
|
|
||||||
MAKE_LOG_FUNCTION(log, shaderJITLogger)
|
MAKE_LOG_FUNCTION(log, shaderJITLogger)
|
||||||
|
|
||||||
public:
|
public:
|
||||||
using InstructionCallback = const void(*)(PICAShader& shaderUnit); // Callback type used for instructions
|
using InstructionCallback = const void (*)(PICAShader& shaderUnit); // Callback type used for instructions
|
||||||
// Callback type used for the JIT prologue. This is what the caller will call
|
// Callback type used for the JIT prologue. This is what the caller will call
|
||||||
using PrologueCallback = const void(*)(PICAShader& shaderUnit, InstructionCallback cb);
|
using PrologueCallback = const void (*)(PICAShader& shaderUnit, InstructionCallback cb);
|
||||||
PrologueCallback prologueCb = nullptr;
|
PrologueCallback prologueCb = nullptr;
|
||||||
|
|
||||||
// Initialize our emitter with "allocSize" bytes of RWX memory
|
// Initialize our emitter with "allocSize" bytes of RWX memory
|
||||||
|
@ -133,9 +133,7 @@ public:
|
||||||
return reinterpret_cast<InstructionCallback>(ptr);
|
return reinterpret_cast<InstructionCallback>(ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
PrologueCallback getPrologueCallback() {
|
PrologueCallback getPrologueCallback() { return prologueCb; }
|
||||||
return prologueCb;
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // x64 recompiler check
|
#endif // x64 recompiler check
|
|
@ -2,14 +2,12 @@
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include "helpers.hpp"
|
|
||||||
#include "opengl.hpp"
|
|
||||||
#include "PICA/float_types.hpp"
|
#include "PICA/float_types.hpp"
|
||||||
#include "PICA/pica_hash.hpp"
|
#include "PICA/pica_hash.hpp"
|
||||||
|
#include "helpers.hpp"
|
||||||
|
|
||||||
enum class ShaderType {
|
enum class ShaderType { Vertex, Geometry };
|
||||||
Vertex, Geometry
|
|
||||||
};
|
|
||||||
|
|
||||||
namespace ShaderOpcodes {
|
namespace ShaderOpcodes {
|
||||||
enum : u32 {
|
enum : u32 {
|
||||||
|
@ -55,7 +53,7 @@ namespace ShaderOpcodes {
|
||||||
// Note: All PICA f24 vec4 registers must have the alignas(16) specifier to make them easier to access in SSE/NEON code in the JIT
|
// Note: All PICA f24 vec4 registers must have the alignas(16) specifier to make them easier to access in SSE/NEON code in the JIT
|
||||||
class PICAShader {
|
class PICAShader {
|
||||||
using f24 = Floats::f24;
|
using f24 = Floats::f24;
|
||||||
using vec4f = OpenGL::Vector<f24, 4>;
|
using vec4f = std::array<f24, 4>;
|
||||||
|
|
||||||
struct Loop {
|
struct Loop {
|
||||||
u32 startingPC; // PC at the start of the loop
|
u32 startingPC; // PC at the start of the loop
|
||||||
|
@ -83,22 +81,22 @@ class PICAShader {
|
||||||
|
|
||||||
std::array<u32, 4> floatUniformBuffer; // Buffer for temporarily caching float uniform data
|
std::array<u32, 4> floatUniformBuffer; // Buffer for temporarily caching float uniform data
|
||||||
|
|
||||||
public:
|
public:
|
||||||
// These are placed close to the temp registers and co because it helps the JIT generate better code
|
// These are placed close to the temp registers and co because it helps the JIT generate better code
|
||||||
u32 entrypoint = 0; // Initial shader PC
|
u32 entrypoint = 0; // Initial shader PC
|
||||||
u32 boolUniform;
|
u32 boolUniform;
|
||||||
std::array<OpenGL::Vector<u8, 4>, 4> intUniforms;
|
std::array<std::array<u8, 4>, 4> intUniforms;
|
||||||
alignas(16) std::array<vec4f, 96> floatUniforms;
|
alignas(16) std::array<vec4f, 96> floatUniforms;
|
||||||
|
|
||||||
alignas(16) std::array<vec4f, 16> fixedAttributes; // Fixed vertex attributes
|
alignas(16) std::array<vec4f, 16> fixedAttributes; // Fixed vertex attributes
|
||||||
alignas(16) std::array<vec4f, 16> inputs; // Attributes passed to the shader
|
alignas(16) std::array<vec4f, 16> inputs; // Attributes passed to the shader
|
||||||
alignas(16) std::array<vec4f, 16> outputs;
|
alignas(16) std::array<vec4f, 16> outputs;
|
||||||
alignas(16) vec4f dummy = vec4f({ f24::zero(), f24::zero(), f24::zero(), f24::zero() }); // Dummy register used by the JIT
|
alignas(16) vec4f dummy = vec4f({f24::zero(), f24::zero(), f24::zero(), f24::zero()}); // Dummy register used by the JIT
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
std::array<u32, 128> operandDescriptors;
|
std::array<u32, 128> operandDescriptors;
|
||||||
alignas(16) std::array<vec4f, 16> tempRegisters; // General purpose registers the shader can use for temp values
|
alignas(16) std::array<vec4f, 16> tempRegisters; // General purpose registers the shader can use for temp values
|
||||||
OpenGL::Vector<s32, 2> addrRegister; // Address register
|
std::array<s32, 2> addrRegister; // Address register
|
||||||
bool cmpRegister[2]; // Comparison registers where the result of CMP is stored in
|
bool cmpRegister[2]; // Comparison registers where the result of CMP is stored in
|
||||||
u32 loopCounter;
|
u32 loopCounter;
|
||||||
|
|
||||||
|
@ -130,7 +128,7 @@ protected:
|
||||||
vec4f getSource(u32 source);
|
vec4f getSource(u32 source);
|
||||||
vec4f& getDest(u32 dest);
|
vec4f& getDest(u32 dest);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Interpreter functions for the various shader functions
|
// Interpreter functions for the various shader functions
|
||||||
void add(u32 instruction);
|
void add(u32 instruction);
|
||||||
void call(u32 instruction);
|
void call(u32 instruction);
|
||||||
|
@ -212,7 +210,7 @@ private:
|
||||||
u8 getIndexedSource(u32 source, u32 index);
|
u8 getIndexedSource(u32 source, u32 index);
|
||||||
bool isCondTrue(u32 instruction);
|
bool isCondTrue(u32 instruction);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
static constexpr size_t maxInstructionCount = 4096;
|
static constexpr size_t maxInstructionCount = 4096;
|
||||||
std::array<u32, maxInstructionCount> loadedShader; // Currently loaded & active shader
|
std::array<u32, maxInstructionCount> loadedShader; // Currently loaded & active shader
|
||||||
std::array<u32, maxInstructionCount> bufferedShader; // Shader to be transferred when the SH_CODETRANSFER_END reg gets written to
|
std::array<u32, maxInstructionCount> bufferedShader; // Shader to be transferred when the SH_CODETRANSFER_END reg gets written to
|
||||||
|
@ -220,17 +218,11 @@ public:
|
||||||
PICAShader(ShaderType type) : type(type) {}
|
PICAShader(ShaderType type) : type(type) {}
|
||||||
|
|
||||||
// Theese functions are in the header to be inlined more easily, though with LTO I hope I'll be able to move them
|
// Theese functions are in the header to be inlined more easily, though with LTO I hope I'll be able to move them
|
||||||
void finalize() {
|
void finalize() { std::memcpy(&loadedShader[0], &bufferedShader[0], 4096 * sizeof(u32)); }
|
||||||
std::memcpy(&loadedShader[0], &bufferedShader[0], 4096 * sizeof(u32));
|
|
||||||
}
|
|
||||||
|
|
||||||
void setBufferIndex(u32 index) {
|
void setBufferIndex(u32 index) { bufferIndex = index & 0xfff; }
|
||||||
bufferIndex = index & 0xfff;
|
|
||||||
}
|
|
||||||
|
|
||||||
void setOpDescriptorIndex(u32 index) {
|
void setOpDescriptorIndex(u32 index) { opDescriptorIndex = index & 0x7f; }
|
||||||
opDescriptorIndex = index & 0x7f;
|
|
||||||
}
|
|
||||||
|
|
||||||
void uploadWord(u32 word) {
|
void uploadWord(u32 word) {
|
||||||
if (bufferIndex >= 4095) Helpers::panic("o no, shader upload overflew");
|
if (bufferIndex >= 4095) Helpers::panic("o no, shader upload overflew");
|
||||||
|
@ -255,23 +247,22 @@ public:
|
||||||
|
|
||||||
void uploadFloatUniform(u32 word) {
|
void uploadFloatUniform(u32 word) {
|
||||||
floatUniformBuffer[floatUniformWordCount++] = word;
|
floatUniformBuffer[floatUniformWordCount++] = word;
|
||||||
if (floatUniformIndex >= 96)
|
if (floatUniformIndex >= 96) Helpers::panic("[PICA] Tried to write float uniform %d", floatUniformIndex);
|
||||||
Helpers::panic("[PICA] Tried to write float uniform %d", floatUniformIndex);
|
|
||||||
|
|
||||||
if ((f32UniformTransfer && floatUniformWordCount >= 4) || (!f32UniformTransfer && floatUniformWordCount >= 3)) {
|
if ((f32UniformTransfer && floatUniformWordCount >= 4) || (!f32UniformTransfer && floatUniformWordCount >= 3)) {
|
||||||
vec4f& uniform = floatUniforms[floatUniformIndex++];
|
vec4f& uniform = floatUniforms[floatUniformIndex++];
|
||||||
floatUniformWordCount = 0;
|
floatUniformWordCount = 0;
|
||||||
|
|
||||||
if (f32UniformTransfer) {
|
if (f32UniformTransfer) {
|
||||||
uniform.x() = f24::fromFloat32(*(float*)&floatUniformBuffer[3]);
|
uniform[0] = f24::fromFloat32(*(float*)&floatUniformBuffer[3]);
|
||||||
uniform.y() = f24::fromFloat32(*(float*)&floatUniformBuffer[2]);
|
uniform[1] = f24::fromFloat32(*(float*)&floatUniformBuffer[2]);
|
||||||
uniform.z() = f24::fromFloat32(*(float*)&floatUniformBuffer[1]);
|
uniform[2] = f24::fromFloat32(*(float*)&floatUniformBuffer[1]);
|
||||||
uniform.w() = f24::fromFloat32(*(float*)&floatUniformBuffer[0]);
|
uniform[3] = f24::fromFloat32(*(float*)&floatUniformBuffer[0]);
|
||||||
} else {
|
} else {
|
||||||
uniform.x() = f24::fromRaw(floatUniformBuffer[2] & 0xffffff);
|
uniform[0] = f24::fromRaw(floatUniformBuffer[2] & 0xffffff);
|
||||||
uniform.y() = f24::fromRaw(((floatUniformBuffer[1] & 0xffff) << 8) | (floatUniformBuffer[2] >> 24));
|
uniform[1] = f24::fromRaw(((floatUniformBuffer[1] & 0xffff) << 8) | (floatUniformBuffer[2] >> 24));
|
||||||
uniform.z() = f24::fromRaw(((floatUniformBuffer[0] & 0xff) << 16) | (floatUniformBuffer[1] >> 16));
|
uniform[2] = f24::fromRaw(((floatUniformBuffer[0] & 0xff) << 16) | (floatUniformBuffer[1] >> 16));
|
||||||
uniform.w() = f24::fromRaw(floatUniformBuffer[0] >> 8);
|
uniform[3] = f24::fromRaw(floatUniformBuffer[0] >> 8);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -280,10 +271,10 @@ public:
|
||||||
using namespace Helpers;
|
using namespace Helpers;
|
||||||
|
|
||||||
auto& u = intUniforms[index];
|
auto& u = intUniforms[index];
|
||||||
u.x() = word & 0xff;
|
u[0] = word & 0xff;
|
||||||
u.y() = getBits<8, 8>(word);
|
u[1] = getBits<8, 8>(word);
|
||||||
u.z() = getBits<16, 8>(word);
|
u[2] = getBits<16, 8>(word);
|
||||||
u.w() = getBits<24, 8>(word);
|
u[3] = getBits<24, 8>(word);
|
||||||
}
|
}
|
||||||
|
|
||||||
void run();
|
void run();
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
#include "PICA/gpu.hpp"
|
|
||||||
#include "PICA/regs.hpp"
|
#include "PICA/regs.hpp"
|
||||||
|
|
||||||
|
#include "PICA/gpu.hpp"
|
||||||
|
|
||||||
using namespace Floats;
|
using namespace Floats;
|
||||||
using namespace Helpers;
|
using namespace Helpers;
|
||||||
|
|
||||||
|
@ -80,32 +81,32 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
||||||
|
|
||||||
case ColourBufferLoc: {
|
case ColourBufferLoc: {
|
||||||
u32 loc = (value & 0x0fffffff) << 3;
|
u32 loc = (value & 0x0fffffff) << 3;
|
||||||
renderer.setColourBufferLoc(loc);
|
renderer->setColourBufferLoc(loc);
|
||||||
break;
|
break;
|
||||||
};
|
};
|
||||||
|
|
||||||
case ColourBufferFormat: {
|
case ColourBufferFormat: {
|
||||||
u32 format = getBits<16, 3>(value);
|
u32 format = getBits<16, 3>(value);
|
||||||
renderer.setColourFormat(static_cast<PICA::ColorFmt>(format));
|
renderer->setColourFormat(static_cast<PICA::ColorFmt>(format));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case DepthBufferLoc: {
|
case DepthBufferLoc: {
|
||||||
u32 loc = (value & 0x0fffffff) << 3;
|
u32 loc = (value & 0x0fffffff) << 3;
|
||||||
renderer.setDepthBufferLoc(loc);
|
renderer->setDepthBufferLoc(loc);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case DepthBufferFormat: {
|
case DepthBufferFormat: {
|
||||||
u32 format = value & 0x3;
|
u32 format = value & 0x3;
|
||||||
renderer.setDepthFormat(static_cast<PICA::DepthFmt>(format));
|
renderer->setDepthFormat(static_cast<PICA::DepthFmt>(format));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case FramebufferSize: {
|
case FramebufferSize: {
|
||||||
const u32 width = value & 0x7ff;
|
const u32 width = value & 0x7ff;
|
||||||
const u32 height = getBits<12, 10>(value) + 1;
|
const u32 height = getBits<12, 10>(value) + 1;
|
||||||
renderer.setFBSize(width, height);
|
renderer->setFBSize(width, height);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -116,7 +117,7 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
||||||
case LightingLUTData4:
|
case LightingLUTData4:
|
||||||
case LightingLUTData5:
|
case LightingLUTData5:
|
||||||
case LightingLUTData6:
|
case LightingLUTData6:
|
||||||
case LightingLUTData7:{
|
case LightingLUTData7: {
|
||||||
const uint32_t index = regs[LightingLUTIndex]; // Get full LUT index register
|
const uint32_t index = regs[LightingLUTIndex]; // Get full LUT index register
|
||||||
const uint32_t lutID = getBits<8, 5>(index); // Get which LUT we're actually writing to
|
const uint32_t lutID = getBits<8, 5>(index); // Get which LUT we're actually writing to
|
||||||
uint32_t lutIndex = getBits<0, 8>(index); // And get the index inside the LUT we're writing to
|
uint32_t lutIndex = getBits<0, 8>(index); // And get the index inside the LUT we're writing to
|
||||||
|
@ -133,15 +134,16 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case VertexFloatUniformIndex:
|
case VertexFloatUniformIndex: shaderUnit.vs.setFloatUniformIndex(value); break;
|
||||||
shaderUnit.vs.setFloatUniformIndex(value);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case VertexFloatUniformData0: case VertexFloatUniformData1: case VertexFloatUniformData2:
|
case VertexFloatUniformData0:
|
||||||
case VertexFloatUniformData3: case VertexFloatUniformData4: case VertexFloatUniformData5:
|
case VertexFloatUniformData1:
|
||||||
case VertexFloatUniformData6: case VertexFloatUniformData7:
|
case VertexFloatUniformData2:
|
||||||
shaderUnit.vs.uploadFloatUniform(value);
|
case VertexFloatUniformData3:
|
||||||
break;
|
case VertexFloatUniformData4:
|
||||||
|
case VertexFloatUniformData5:
|
||||||
|
case VertexFloatUniformData6:
|
||||||
|
case VertexFloatUniformData7: shaderUnit.vs.uploadFloatUniform(value); break;
|
||||||
|
|
||||||
case FixedAttribIndex:
|
case FixedAttribIndex:
|
||||||
fixedAttribCount = 0;
|
fixedAttribCount = 0;
|
||||||
|
@ -162,7 +164,9 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case FixedAttribData0: case FixedAttribData1: case FixedAttribData2:
|
case FixedAttribData0:
|
||||||
|
case FixedAttribData1:
|
||||||
|
case FixedAttribData2:
|
||||||
fixedAttrBuff[fixedAttribCount++] = value;
|
fixedAttrBuff[fixedAttribCount++] = value;
|
||||||
|
|
||||||
if (fixedAttribCount == 3) {
|
if (fixedAttribCount == 3) {
|
||||||
|
@ -170,10 +174,10 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
||||||
|
|
||||||
vec4f attr;
|
vec4f attr;
|
||||||
// These are stored in the reverse order anyone would expect them to be in
|
// These are stored in the reverse order anyone would expect them to be in
|
||||||
attr.x() = f24::fromRaw(fixedAttrBuff[2] & 0xffffff);
|
attr[0] = f24::fromRaw(fixedAttrBuff[2] & 0xffffff);
|
||||||
attr.y() = f24::fromRaw(((fixedAttrBuff[1] & 0xffff) << 8) | (fixedAttrBuff[2] >> 24));
|
attr[1] = f24::fromRaw(((fixedAttrBuff[1] & 0xffff) << 8) | (fixedAttrBuff[2] >> 24));
|
||||||
attr.z() = f24::fromRaw(((fixedAttrBuff[0] & 0xff) << 16) | (fixedAttrBuff[1] >> 16));
|
attr[2] = f24::fromRaw(((fixedAttrBuff[0] & 0xff) << 16) | (fixedAttrBuff[1] >> 16));
|
||||||
attr.w() = f24::fromRaw(fixedAttrBuff[0] >> 8);
|
attr[3] = f24::fromRaw(fixedAttrBuff[0] >> 8);
|
||||||
|
|
||||||
// If the fixed attribute index is < 12, we're just writing to one of the fixed attributes
|
// If the fixed attribute index is < 12, we're just writing to one of the fixed attributes
|
||||||
if (fixedAttribIndex < 12) [[likely]] {
|
if (fixedAttribIndex < 12) [[likely]] {
|
||||||
|
@ -199,13 +203,12 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
||||||
// If we've reached 3 verts, issue a draw call
|
// If we've reached 3 verts, issue a draw call
|
||||||
// Handle rendering depending on the primitive type
|
// Handle rendering depending on the primitive type
|
||||||
if (immediateModeVertIndex == 3) {
|
if (immediateModeVertIndex == 3) {
|
||||||
renderer.drawVertices(PICA::PrimType::TriangleList, immediateModeVertices);
|
renderer->drawVertices(PICA::PrimType::TriangleList, immediateModeVertices);
|
||||||
|
|
||||||
switch (primType) {
|
switch (primType) {
|
||||||
// Triangle or geometry primitive. Draw a triangle and discard all vertices
|
// Triangle or geometry primitive. Draw a triangle and discard all vertices
|
||||||
case 0: case 3:
|
case 0:
|
||||||
immediateModeVertIndex = 0;
|
case 3: immediateModeVertIndex = 0; break;
|
||||||
break;
|
|
||||||
|
|
||||||
// Triangle strip. Draw triangle, discard first vertex and keep the last 2
|
// Triangle strip. Draw triangle, discard first vertex and keep the last 2
|
||||||
case 1:
|
case 1:
|
||||||
|
@ -230,40 +233,40 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case VertexShaderOpDescriptorIndex:
|
case VertexShaderOpDescriptorIndex: shaderUnit.vs.setOpDescriptorIndex(value); break;
|
||||||
shaderUnit.vs.setOpDescriptorIndex(value);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case VertexShaderOpDescriptorData0: case VertexShaderOpDescriptorData1: case VertexShaderOpDescriptorData2:
|
case VertexShaderOpDescriptorData0:
|
||||||
case VertexShaderOpDescriptorData3: case VertexShaderOpDescriptorData4: case VertexShaderOpDescriptorData5:
|
case VertexShaderOpDescriptorData1:
|
||||||
case VertexShaderOpDescriptorData6: case VertexShaderOpDescriptorData7:
|
case VertexShaderOpDescriptorData2:
|
||||||
shaderUnit.vs.uploadDescriptor(value);
|
case VertexShaderOpDescriptorData3:
|
||||||
break;
|
case VertexShaderOpDescriptorData4:
|
||||||
|
case VertexShaderOpDescriptorData5:
|
||||||
|
case VertexShaderOpDescriptorData6:
|
||||||
|
case VertexShaderOpDescriptorData7: shaderUnit.vs.uploadDescriptor(value); break;
|
||||||
|
|
||||||
case VertexBoolUniform:
|
case VertexBoolUniform: shaderUnit.vs.boolUniform = value & 0xffff; break;
|
||||||
shaderUnit.vs.boolUniform = value & 0xffff;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case VertexIntUniform0: case VertexIntUniform1: case VertexIntUniform2: case VertexIntUniform3:
|
case VertexIntUniform0:
|
||||||
shaderUnit.vs.uploadIntUniform(index - VertexIntUniform0, value);
|
case VertexIntUniform1:
|
||||||
break;
|
case VertexIntUniform2:
|
||||||
|
case VertexIntUniform3: shaderUnit.vs.uploadIntUniform(index - VertexIntUniform0, value); break;
|
||||||
|
|
||||||
case VertexShaderData0: case VertexShaderData1: case VertexShaderData2: case VertexShaderData3:
|
case VertexShaderData0:
|
||||||
case VertexShaderData4: case VertexShaderData5: case VertexShaderData6: case VertexShaderData7:
|
case VertexShaderData1:
|
||||||
shaderUnit.vs.uploadWord(value);
|
case VertexShaderData2:
|
||||||
break;
|
case VertexShaderData3:
|
||||||
|
case VertexShaderData4:
|
||||||
|
case VertexShaderData5:
|
||||||
|
case VertexShaderData6:
|
||||||
|
case VertexShaderData7: shaderUnit.vs.uploadWord(value); break;
|
||||||
|
|
||||||
case VertexShaderEntrypoint:
|
case VertexShaderEntrypoint: shaderUnit.vs.entrypoint = value & 0xffff; break;
|
||||||
shaderUnit.vs.entrypoint = value & 0xffff;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case VertexShaderTransferEnd:
|
case VertexShaderTransferEnd:
|
||||||
if (value != 0) shaderUnit.vs.finalize();
|
if (value != 0) shaderUnit.vs.finalize();
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case VertexShaderTransferIndex:
|
case VertexShaderTransferIndex: shaderUnit.vs.setBufferIndex(value); break;
|
||||||
shaderUnit.vs.setBufferIndex(value);
|
|
||||||
break;
|
|
||||||
|
|
||||||
// Command lists can write to the command processor registers and change the command list stream
|
// Command lists can write to the command processor registers and change the command list stream
|
||||||
// Several games are known to do this, including New Super Mario Bros 2 and Super Mario 3D Land
|
// Several games are known to do this, including New Super Mario Bros 2 and Super Mario 3D Land
|
||||||
|
@ -291,9 +294,7 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
||||||
|
|
||||||
switch (reg) {
|
switch (reg) {
|
||||||
case 0: attr.offset = value & 0xfffffff; break; // Attribute offset
|
case 0: attr.offset = value & 0xfffffff; break; // Attribute offset
|
||||||
case 1:
|
case 1: attr.config1 = value; break;
|
||||||
attr.config1 = value;
|
|
||||||
break;
|
|
||||||
case 2:
|
case 2:
|
||||||
attr.config2 = value;
|
attr.config2 = value;
|
||||||
attr.size = getBits<16, 8>(value);
|
attr.size = getBits<16, 8>(value);
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
#include "PICA/shader.hpp"
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
|
||||||
|
#include "PICA/shader.hpp"
|
||||||
|
|
||||||
using namespace Helpers;
|
using namespace Helpers;
|
||||||
|
|
||||||
void PICAShader::run() {
|
void PICAShader::run() {
|
||||||
|
@ -18,9 +19,8 @@ void PICAShader::run() {
|
||||||
case ShaderOpcodes::CALL: call(instruction); break;
|
case ShaderOpcodes::CALL: call(instruction); break;
|
||||||
case ShaderOpcodes::CALLC: callc(instruction); break;
|
case ShaderOpcodes::CALLC: callc(instruction); break;
|
||||||
case ShaderOpcodes::CALLU: callu(instruction); break;
|
case ShaderOpcodes::CALLU: callu(instruction); break;
|
||||||
case ShaderOpcodes::CMP1: case ShaderOpcodes::CMP2:
|
case ShaderOpcodes::CMP1:
|
||||||
cmp(instruction);
|
case ShaderOpcodes::CMP2: cmp(instruction); break;
|
||||||
break;
|
|
||||||
case ShaderOpcodes::DP3: dp3(instruction); break;
|
case ShaderOpcodes::DP3: dp3(instruction); break;
|
||||||
case ShaderOpcodes::DP4: dp4(instruction); break;
|
case ShaderOpcodes::DP4: dp4(instruction); break;
|
||||||
case ShaderOpcodes::DPHI: dphi(instruction); break;
|
case ShaderOpcodes::DPHI: dphi(instruction); break;
|
||||||
|
@ -45,15 +45,25 @@ void PICAShader::run() {
|
||||||
case ShaderOpcodes::SLT: slt(instruction); break;
|
case ShaderOpcodes::SLT: slt(instruction); break;
|
||||||
case ShaderOpcodes::SLTI: slti(instruction); break;
|
case ShaderOpcodes::SLTI: slti(instruction); break;
|
||||||
|
|
||||||
case 0x30: case 0x31: case 0x32: case 0x33: case 0x34: case 0x35: case 0x36: case 0x37:
|
case 0x30:
|
||||||
madi(instruction);
|
case 0x31:
|
||||||
break;
|
case 0x32:
|
||||||
|
case 0x33:
|
||||||
|
case 0x34:
|
||||||
|
case 0x35:
|
||||||
|
case 0x36:
|
||||||
|
case 0x37: madi(instruction); break;
|
||||||
|
|
||||||
case 0x38: case 0x39: case 0x3A: case 0x3B: case 0x3C: case 0x3D: case 0x3E: case 0x3F:
|
case 0x38:
|
||||||
mad(instruction);
|
case 0x39:
|
||||||
break;
|
case 0x3A:
|
||||||
|
case 0x3B:
|
||||||
|
case 0x3C:
|
||||||
|
case 0x3D:
|
||||||
|
case 0x3E:
|
||||||
|
case 0x3F: mad(instruction); break;
|
||||||
|
|
||||||
default:Helpers::panic("Unimplemented PICA instruction %08X (Opcode = %02X)", instruction, opcode);
|
default: Helpers::panic("Unimplemented PICA instruction %08X (Opcode = %02X)", instruction, opcode);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle control flow statements. The ordering is important as the priority goes: LOOP > IF > CALL
|
// Handle control flow statements. The ordering is important as the priority goes: LOOP > IF > CALL
|
||||||
|
@ -99,8 +109,8 @@ u8 PICAShader::getIndexedSource(u32 source, u32 index) {
|
||||||
|
|
||||||
switch (index) {
|
switch (index) {
|
||||||
case 0: [[likely]] return u8(source); // No offset applied
|
case 0: [[likely]] return u8(source); // No offset applied
|
||||||
case 1: return u8(source + addrRegister.x());
|
case 1: return u8(source + addrRegister[0]);
|
||||||
case 2: return u8(source + addrRegister.y());
|
case 2: return u8(source + addrRegister[1]);
|
||||||
case 3: return u8(source + loopCounter);
|
case 3: return u8(source + loopCounter);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -117,7 +127,7 @@ PICAShader::vec4f PICAShader::getSource(u32 source) {
|
||||||
return floatUniforms[source - 0x20];
|
return floatUniforms[source - 0x20];
|
||||||
else {
|
else {
|
||||||
Helpers::warn("[PICA] Unimplemented source value: %X\n", source);
|
Helpers::warn("[PICA] Unimplemented source value: %X\n", source);
|
||||||
return vec4f({ f24::zero(), f24::zero(), f24::zero(), f24::zero() });
|
return vec4f({f24::zero(), f24::zero(), f24::zero(), f24::zero()});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -279,9 +289,9 @@ void PICAShader::mova(u32 instruction) {
|
||||||
|
|
||||||
u32 componentMask = operandDescriptor & 0xf;
|
u32 componentMask = operandDescriptor & 0xf;
|
||||||
if (componentMask & 0b1000) // x component
|
if (componentMask & 0b1000) // x component
|
||||||
addrRegister.x() = static_cast<s32>(srcVector.x().toFloat32());
|
addrRegister[0] = static_cast<s32>(srcVector[0].toFloat32());
|
||||||
if (componentMask & 0b0100) // y component
|
if (componentMask & 0b0100) // y component
|
||||||
addrRegister.y() = static_cast<s32>(srcVector.y().toFloat32());
|
addrRegister[1] = static_cast<s32>(srcVector[1].toFloat32());
|
||||||
}
|
}
|
||||||
|
|
||||||
void PICAShader::dp3(u32 instruction) {
|
void PICAShader::dp3(u32 instruction) {
|
||||||
|
@ -546,7 +556,7 @@ void PICAShader::cmp(u32 instruction) {
|
||||||
const u32 idx = getBits<19, 2>(instruction);
|
const u32 idx = getBits<19, 2>(instruction);
|
||||||
const u32 cmpY = getBits<21, 3>(instruction);
|
const u32 cmpY = getBits<21, 3>(instruction);
|
||||||
const u32 cmpX = getBits<24, 3>(instruction);
|
const u32 cmpX = getBits<24, 3>(instruction);
|
||||||
const u32 cmpOperations[2] = { cmpX, cmpY };
|
const u32 cmpOperations[2] = {cmpX, cmpY};
|
||||||
|
|
||||||
if (idx) Helpers::panic("[PICA] CMP: idx != 0");
|
if (idx) Helpers::panic("[PICA] CMP: idx != 0");
|
||||||
vec4f srcVec1 = getSourceSwizzled<1>(src1, operandDescriptor);
|
vec4f srcVec1 = getSourceSwizzled<1>(src1, operandDescriptor);
|
||||||
|
@ -578,9 +588,7 @@ void PICAShader::cmp(u32 instruction) {
|
||||||
cmpRegister[i] = srcVec1[i] >= srcVec2[i];
|
cmpRegister[i] = srcVec1[i] >= srcVec2[i];
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default: cmpRegister[i] = true; break;
|
||||||
cmpRegister[i] = true;
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -615,8 +623,7 @@ void PICAShader::ifu(u32 instruction) {
|
||||||
auto& block = conditionalInfo[ifIndex++];
|
auto& block = conditionalInfo[ifIndex++];
|
||||||
block.endingPC = dest;
|
block.endingPC = dest;
|
||||||
block.newPC = dest + num;
|
block.newPC = dest + num;
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
pc = dest;
|
pc = dest;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -665,18 +672,17 @@ void PICAShader::loop(u32 instruction) {
|
||||||
|
|
||||||
u32 dest = getBits<10, 12>(instruction);
|
u32 dest = getBits<10, 12>(instruction);
|
||||||
auto& uniform = intUniforms[getBits<22, 2>(instruction)]; // The uniform we'll get loop info from
|
auto& uniform = intUniforms[getBits<22, 2>(instruction)]; // The uniform we'll get loop info from
|
||||||
loopCounter = uniform.y();
|
loopCounter = uniform[1];
|
||||||
auto& loop = loopInfo[loopIndex++];
|
auto& loop = loopInfo[loopIndex++];
|
||||||
|
|
||||||
loop.startingPC = pc;
|
loop.startingPC = pc;
|
||||||
loop.endingPC = dest + 1; // Loop is inclusive so we need + 1 here
|
loop.endingPC = dest + 1; // Loop is inclusive so we need + 1 here
|
||||||
loop.iterations = uniform.x() + 1;
|
loop.iterations = uniform[0] + 1;
|
||||||
loop.increment = uniform.z();
|
loop.increment = uniform[2];
|
||||||
}
|
}
|
||||||
|
|
||||||
void PICAShader::jmpc(u32 instruction) {
|
void PICAShader::jmpc(u32 instruction) {
|
||||||
if (isCondTrue(instruction))
|
if (isCondTrue(instruction)) pc = getBits<10, 12>(instruction);
|
||||||
pc = getBits<10, 12>(instruction);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void PICAShader::jmpu(u32 instruction) {
|
void PICAShader::jmpu(u32 instruction) {
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
#include "PICA/shader_unit.hpp"
|
#include "PICA/shader_unit.hpp"
|
||||||
|
|
||||||
#include "cityhash.hpp"
|
#include "cityhash.hpp"
|
||||||
|
|
||||||
void ShaderUnit::reset() {
|
void ShaderUnit::reset() {
|
||||||
|
@ -18,18 +19,18 @@ void PICAShader::reset() {
|
||||||
opDescriptorIndex = 0;
|
opDescriptorIndex = 0;
|
||||||
f32UniformTransfer = false;
|
f32UniformTransfer = false;
|
||||||
|
|
||||||
const vec4f zero = vec4f({ f24::zero(), f24::zero(), f24::zero(), f24::zero() });
|
const vec4f zero = vec4f({f24::zero(), f24::zero(), f24::zero(), f24::zero()});
|
||||||
inputs.fill(zero);
|
inputs.fill(zero);
|
||||||
floatUniforms.fill(zero);
|
floatUniforms.fill(zero);
|
||||||
outputs.fill(zero);
|
outputs.fill(zero);
|
||||||
tempRegisters.fill(zero);
|
tempRegisters.fill(zero);
|
||||||
|
|
||||||
for (auto& e : intUniforms) {
|
for (auto& e : intUniforms) {
|
||||||
e.x() = e.y() = e.z() = e.w() = 0;
|
e[0] = e[1] = e[2] = e[3] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
addrRegister.x() = 0;
|
addrRegister[0] = 0;
|
||||||
addrRegister.y() = 0;
|
addrRegister[1] = 0;
|
||||||
loopCounter = 0;
|
loopCounter = 0;
|
||||||
|
|
||||||
codeHashDirty = true;
|
codeHashDirty = true;
|
||||||
|
|
Loading…
Add table
Reference in a new issue