Remove OpenGL-specific vector-types

Removes dependency on the OpenGL header and rendering backen for its `OpenGL::Vector` type in favor of a more standard array.
2025-07-16 04:07:08 +12:00 · 2023-07-10 08:55:23 -07:00 · 2023-07-10 08:55:23 -07:00 · 9e32b6d4bf
commit 9e32b6d4bf
parent 2a1683ba62
5 changed files with 221 additions and 224 deletions
--- a/include/PICA/dynapica/shader_rec_emitter_x64.hpp
+++ b/include/PICA/dynapica/shader_rec_emitter_x64.hpp
@ -2,14 +2,14 @@
 // Only do anything if we're on an x64 target with JIT support enabled
 #if defined(PANDA3DS_DYNAPICA_SUPPORTED) && defined(PANDA3DS_X64_HOST)
 #include <vector>
 #include "PICA/shader.hpp"
 #include "helpers.hpp"
 #include "logger.hpp"
-#include "PICA/shader.hpp"
+#include "x64_regs.hpp"
 #include "xbyak/xbyak.h"
 #include "xbyak/xbyak_util.h"
 #include "x64_regs.hpp"
 #include <vector>
 class ShaderEmitter : public Xbyak::CodeGenerator {
 	static constexpr size_t executableMemorySize = PICAShader::maxInstructionCount * 96;  // How much executable memory to alloc for each shader
@ -20,7 +20,7 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
 	static constexpr uint noSwizzle = 0x1B;
 	using f24 = Floats::f24;
-	using vec4f = OpenGL::Vector<f24, 4>;
+	using vec4f = std::array<f24, 4>;
 	// An array of labels (incl pointers) to each compiled (to x64) PICA instruction
 	std::array<Xbyak::Label, PICAShader::maxInstructionCount> instructionLabels;
@ -105,10 +105,10 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
 	MAKE_LOG_FUNCTION(log, shaderJITLogger)
-public:
+  public:
-	using InstructionCallback = const void(*)(PICAShader& shaderUnit); // Callback type used for instructions
+	using InstructionCallback = const void (*)(PICAShader& shaderUnit);  // Callback type used for instructions
 	// Callback type used for the JIT prologue. This is what the caller will call
-	using PrologueCallback = const void(*)(PICAShader& shaderUnit, InstructionCallback cb);
+	using PrologueCallback = const void (*)(PICAShader& shaderUnit, InstructionCallback cb);
 	PrologueCallback prologueCb = nullptr;
 	// Initialize our emitter with "allocSize" bytes of RWX memory
@ -133,9 +133,7 @@ public:
 		return reinterpret_cast<InstructionCallback>(ptr);
 	}
-	PrologueCallback getPrologueCallback() {
+	PrologueCallback getPrologueCallback() { return prologueCb; }
 		return prologueCb;
 	}
 };
 #endif  // x64 recompiler check
--- a/include/PICA/shader.hpp
+++ b/include/PICA/shader.hpp
@ -2,14 +2,12 @@
 #include <algorithm>
 #include <array>
 #include <cstring>
-#include "helpers.hpp"
+
 #include "opengl.hpp"
 #include "PICA/float_types.hpp"
 #include "PICA/pica_hash.hpp"
 #include "helpers.hpp"
-enum class ShaderType {
+enum class ShaderType { Vertex, Geometry };
 	Vertex, Geometry
 };
 namespace ShaderOpcodes {
 	enum : u32 {
@ -55,7 +53,7 @@ namespace ShaderOpcodes {
 // Note: All PICA f24 vec4 registers must have the alignas(16) specifier to make them easier to access in SSE/NEON code in the JIT
 class PICAShader {
 	using f24 = Floats::f24;
-	using vec4f = OpenGL::Vector<f24, 4>;
+	using vec4f = std::array<f24, 4>;
 	struct Loop {
 		u32 startingPC;  // PC at the start of the loop
@ -83,22 +81,22 @@ class PICAShader {
 	std::array<u32, 4> floatUniformBuffer;  // Buffer for temporarily caching float uniform data
-public:
+  public:
 	// These are placed close to the temp registers and co because it helps the JIT generate better code
 	u32 entrypoint = 0;  // Initial shader PC
 	u32 boolUniform;
-	std::array<OpenGL::Vector<u8, 4>, 4> intUniforms;
+	std::array<std::array<u8, 4>, 4> intUniforms;
 	alignas(16) std::array<vec4f, 96> floatUniforms;
 	alignas(16) std::array<vec4f, 16> fixedAttributes;  // Fixed vertex attributes
 	alignas(16) std::array<vec4f, 16> inputs;           // Attributes passed to the shader
 	alignas(16) std::array<vec4f, 16> outputs;
-	alignas(16) vec4f dummy = vec4f({ f24::zero(), f24::zero(), f24::zero(), f24::zero() }); // Dummy register used by the JIT
+	alignas(16) vec4f dummy = vec4f({f24::zero(), f24::zero(), f24::zero(), f24::zero()});  // Dummy register used by the JIT
-protected:
+  protected:
 	std::array<u32, 128> operandDescriptors;
 	alignas(16) std::array<vec4f, 16> tempRegisters;  // General purpose registers the shader can use for temp values
-	OpenGL::Vector<s32, 2> addrRegister; // Address register
+	std::array<s32, 2> addrRegister;                  // Address register
 	bool cmpRegister[2];                              // Comparison registers where the result of CMP is stored in
 	u32 loopCounter;
@ -130,7 +128,7 @@ protected:
 	vec4f getSource(u32 source);
 	vec4f& getDest(u32 dest);
-private:
+  private:
 	// Interpreter functions for the various shader functions
 	void add(u32 instruction);
 	void call(u32 instruction);
@ -212,7 +210,7 @@ private:
 	u8 getIndexedSource(u32 source, u32 index);
 	bool isCondTrue(u32 instruction);
-public:
+  public:
 	static constexpr size_t maxInstructionCount = 4096;
 	std::array<u32, maxInstructionCount> loadedShader;    // Currently loaded & active shader
 	std::array<u32, maxInstructionCount> bufferedShader;  // Shader to be transferred when the SH_CODETRANSFER_END reg gets written to
@ -220,17 +218,11 @@ public:
 	PICAShader(ShaderType type) : type(type) {}
 	// Theese functions are in the header to be inlined more easily, though with LTO I hope I'll be able to move them
-	void finalize() {
+	void finalize() { std::memcpy(&loadedShader[0], &bufferedShader[0], 4096 * sizeof(u32)); }
 		std::memcpy(&loadedShader[0], &bufferedShader[0], 4096 * sizeof(u32));
 	}
-	void setBufferIndex(u32 index) {
+	void setBufferIndex(u32 index) { bufferIndex = index & 0xfff; }
 		bufferIndex = index & 0xfff;
 	}
-	void setOpDescriptorIndex(u32 index) {
+	void setOpDescriptorIndex(u32 index) { opDescriptorIndex = index & 0x7f; }
 		opDescriptorIndex = index & 0x7f;
 	}
 	void uploadWord(u32 word) {
 		if (bufferIndex >= 4095) Helpers::panic("o no, shader upload overflew");
@ -255,23 +247,22 @@ public:
 	void uploadFloatUniform(u32 word) {
 		floatUniformBuffer[floatUniformWordCount++] = word;
-		if (floatUniformIndex >= 96)
+		if (floatUniformIndex >= 96) Helpers::panic("[PICA] Tried to write float uniform %d", floatUniformIndex);
 			Helpers::panic("[PICA] Tried to write float uniform %d", floatUniformIndex);
 		if ((f32UniformTransfer && floatUniformWordCount >= 4) || (!f32UniformTransfer && floatUniformWordCount >= 3)) {
 			vec4f& uniform = floatUniforms[floatUniformIndex++];
 			floatUniformWordCount = 0;
 			if (f32UniformTransfer) {
-				uniform.x() = f24::fromFloat32(*(float*)&floatUniformBuffer[3]);
+				uniform[0] = f24::fromFloat32(*(float*)&floatUniformBuffer[3]);
-				uniform.y() = f24::fromFloat32(*(float*)&floatUniformBuffer[2]);
+				uniform[1] = f24::fromFloat32(*(float*)&floatUniformBuffer[2]);
-				uniform.z() = f24::fromFloat32(*(float*)&floatUniformBuffer[1]);
+				uniform[2] = f24::fromFloat32(*(float*)&floatUniformBuffer[1]);
-				uniform.w() = f24::fromFloat32(*(float*)&floatUniformBuffer[0]);
+				uniform[3] = f24::fromFloat32(*(float*)&floatUniformBuffer[0]);
 			} else {
-				uniform.x() = f24::fromRaw(floatUniformBuffer[2] & 0xffffff);
+				uniform[0] = f24::fromRaw(floatUniformBuffer[2] & 0xffffff);
-				uniform.y() = f24::fromRaw(((floatUniformBuffer[1] & 0xffff) << 8) | (floatUniformBuffer[2] >> 24));
+				uniform[1] = f24::fromRaw(((floatUniformBuffer[1] & 0xffff) << 8) | (floatUniformBuffer[2] >> 24));
-				uniform.z() = f24::fromRaw(((floatUniformBuffer[0] & 0xff) << 16) | (floatUniformBuffer[1] >> 16));
+				uniform[2] = f24::fromRaw(((floatUniformBuffer[0] & 0xff) << 16) | (floatUniformBuffer[1] >> 16));
-				uniform.w() = f24::fromRaw(floatUniformBuffer[0] >> 8);
+				uniform[3] = f24::fromRaw(floatUniformBuffer[0] >> 8);
 			}
 		}
 	}
@ -280,10 +271,10 @@ public:
 		using namespace Helpers;
 		auto& u = intUniforms[index];
-		u.x() = word & 0xff;
+		u[0] = word & 0xff;
-		u.y() = getBits<8, 8>(word);
+		u[1] = getBits<8, 8>(word);
-		u.z() = getBits<16, 8>(word);
+		u[2] = getBits<16, 8>(word);
-		u.w() = getBits<24, 8>(word);
+		u[3] = getBits<24, 8>(word);
 	}
 	void run();
--- a/src/core/PICA/regs.cpp
+++ b/src/core/PICA/regs.cpp
@ -1,6 +1,7 @@
 #include "PICA/gpu.hpp"
 #include "PICA/regs.hpp"
 #include "PICA/gpu.hpp"
 using namespace Floats;
 using namespace Helpers;
@ -80,32 +81,32 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
 		case ColourBufferLoc: {
 			u32 loc = (value & 0x0fffffff) << 3;
-			renderer.setColourBufferLoc(loc);
+			renderer->setColourBufferLoc(loc);
 			break;
 		};
 		case ColourBufferFormat: {
 			u32 format = getBits<16, 3>(value);
-			renderer.setColourFormat(static_cast<PICA::ColorFmt>(format));
+			renderer->setColourFormat(static_cast<PICA::ColorFmt>(format));
 			break;
 		}
 		case DepthBufferLoc: {
 			u32 loc = (value & 0x0fffffff) << 3;
-			renderer.setDepthBufferLoc(loc);
+			renderer->setDepthBufferLoc(loc);
 			break;
 		}
 		case DepthBufferFormat: {
 			u32 format = value & 0x3;
-			renderer.setDepthFormat(static_cast<PICA::DepthFmt>(format));
+			renderer->setDepthFormat(static_cast<PICA::DepthFmt>(format));
 			break;
 		}
 		case FramebufferSize: {
 			const u32 width = value & 0x7ff;
 			const u32 height = getBits<12, 10>(value) + 1;
-			renderer.setFBSize(width, height);
+			renderer->setFBSize(width, height);
 			break;
 		}
@ -116,7 +117,7 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
 		case LightingLUTData4:
 		case LightingLUTData5:
 		case LightingLUTData6:
-		case LightingLUTData7:{
+		case LightingLUTData7: {
 			const uint32_t index = regs[LightingLUTIndex];  // Get full LUT index register
 			const uint32_t lutID = getBits<8, 5>(index);    // Get which LUT we're actually writing to
 			uint32_t lutIndex = getBits<0, 8>(index);       // And get the index inside the LUT we're writing to
@ -133,15 +134,16 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
 			break;
 		}
-		case VertexFloatUniformIndex:
+		case VertexFloatUniformIndex: shaderUnit.vs.setFloatUniformIndex(value); break;
 			shaderUnit.vs.setFloatUniformIndex(value);
 			break;
-		case VertexFloatUniformData0: case VertexFloatUniformData1: case VertexFloatUniformData2:
+		case VertexFloatUniformData0:
-		case VertexFloatUniformData3: case VertexFloatUniformData4: case VertexFloatUniformData5:
+		case VertexFloatUniformData1:
-		case VertexFloatUniformData6: case VertexFloatUniformData7:
+		case VertexFloatUniformData2:
-			shaderUnit.vs.uploadFloatUniform(value);
+		case VertexFloatUniformData3:
-			break;
+		case VertexFloatUniformData4:
 		case VertexFloatUniformData5:
 		case VertexFloatUniformData6:
 		case VertexFloatUniformData7: shaderUnit.vs.uploadFloatUniform(value); break;
 		case FixedAttribIndex:
 			fixedAttribCount = 0;
@ -162,7 +164,9 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
 			}
 			break;
-		case FixedAttribData0: case FixedAttribData1: case FixedAttribData2:
+		case FixedAttribData0:
 		case FixedAttribData1:
 		case FixedAttribData2:
 			fixedAttrBuff[fixedAttribCount++] = value;
 			if (fixedAttribCount == 3) {
@ -170,10 +174,10 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
 				vec4f attr;
 				// These are stored in the reverse order anyone would expect them to be in
-				attr.x() = f24::fromRaw(fixedAttrBuff[2] & 0xffffff);
+				attr[0] = f24::fromRaw(fixedAttrBuff[2] & 0xffffff);
-				attr.y() = f24::fromRaw(((fixedAttrBuff[1] & 0xffff) << 8) | (fixedAttrBuff[2] >> 24));
+				attr[1] = f24::fromRaw(((fixedAttrBuff[1] & 0xffff) << 8) | (fixedAttrBuff[2] >> 24));
-				attr.z() = f24::fromRaw(((fixedAttrBuff[0] & 0xff) << 16) | (fixedAttrBuff[1] >> 16));
+				attr[2] = f24::fromRaw(((fixedAttrBuff[0] & 0xff) << 16) | (fixedAttrBuff[1] >> 16));
-				attr.w() = f24::fromRaw(fixedAttrBuff[0] >> 8);
+				attr[3] = f24::fromRaw(fixedAttrBuff[0] >> 8);
 				// If the fixed attribute index is < 12, we're just writing to one of the fixed attributes
 				if (fixedAttribIndex < 12) [[likely]] {
@ -199,13 +203,12 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
 						// If we've reached 3 verts, issue a draw call
 						// Handle rendering depending on the primitive type
 						if (immediateModeVertIndex == 3) {
-							renderer.drawVertices(PICA::PrimType::TriangleList, immediateModeVertices);
+							renderer->drawVertices(PICA::PrimType::TriangleList, immediateModeVertices);
 							switch (primType) {
 								// Triangle or geometry primitive. Draw a triangle and discard all vertices
-								case 0: case 3:
+								case 0:
-									immediateModeVertIndex = 0;
+								case 3: immediateModeVertIndex = 0; break;
 									break;
 								// Triangle strip. Draw triangle, discard first vertex and keep the last 2
 								case 1:
@ -230,40 +233,40 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
 			break;
-		case VertexShaderOpDescriptorIndex:
+		case VertexShaderOpDescriptorIndex: shaderUnit.vs.setOpDescriptorIndex(value); break;
 			shaderUnit.vs.setOpDescriptorIndex(value);
 			break;
-		case VertexShaderOpDescriptorData0: case VertexShaderOpDescriptorData1: case VertexShaderOpDescriptorData2:
+		case VertexShaderOpDescriptorData0:
-		case VertexShaderOpDescriptorData3: case VertexShaderOpDescriptorData4: case VertexShaderOpDescriptorData5:
+		case VertexShaderOpDescriptorData1:
-		case VertexShaderOpDescriptorData6: case VertexShaderOpDescriptorData7:
+		case VertexShaderOpDescriptorData2:
-			shaderUnit.vs.uploadDescriptor(value);
+		case VertexShaderOpDescriptorData3:
-			break;
+		case VertexShaderOpDescriptorData4:
 		case VertexShaderOpDescriptorData5:
 		case VertexShaderOpDescriptorData6:
 		case VertexShaderOpDescriptorData7: shaderUnit.vs.uploadDescriptor(value); break;
-		case VertexBoolUniform:
+		case VertexBoolUniform: shaderUnit.vs.boolUniform = value & 0xffff; break;
 			shaderUnit.vs.boolUniform = value & 0xffff;
 			break;
-		case VertexIntUniform0: case VertexIntUniform1: case VertexIntUniform2: case VertexIntUniform3:
+		case VertexIntUniform0:
-			shaderUnit.vs.uploadIntUniform(index - VertexIntUniform0, value);
+		case VertexIntUniform1:
-			break;
+		case VertexIntUniform2:
 		case VertexIntUniform3: shaderUnit.vs.uploadIntUniform(index - VertexIntUniform0, value); break;
-		case VertexShaderData0: case VertexShaderData1: case VertexShaderData2: case VertexShaderData3:
+		case VertexShaderData0:
-		case VertexShaderData4: case VertexShaderData5: case VertexShaderData6: case VertexShaderData7:
+		case VertexShaderData1:
-			shaderUnit.vs.uploadWord(value);
+		case VertexShaderData2:
-			break;
+		case VertexShaderData3:
 		case VertexShaderData4:
 		case VertexShaderData5:
 		case VertexShaderData6:
 		case VertexShaderData7: shaderUnit.vs.uploadWord(value); break;
-		case VertexShaderEntrypoint:
+		case VertexShaderEntrypoint: shaderUnit.vs.entrypoint = value & 0xffff; break;
 			shaderUnit.vs.entrypoint = value & 0xffff;
 			break;
 		case VertexShaderTransferEnd:
 			if (value != 0) shaderUnit.vs.finalize();
 			break;
-		case VertexShaderTransferIndex:
+		case VertexShaderTransferIndex: shaderUnit.vs.setBufferIndex(value); break;
 			shaderUnit.vs.setBufferIndex(value);
 			break;
 		// Command lists can write to the command processor registers and change the command list stream
 		// Several games are known to do this, including New Super Mario Bros 2 and Super Mario 3D Land
@ -291,9 +294,7 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
 				switch (reg) {
 					case 0: attr.offset = value & 0xfffffff; break;  // Attribute offset
-					case 1: 
+					case 1: attr.config1 = value; break;
 						attr.config1 = value;
 						break;
 					case 2:
 						attr.config2 = value;
 						attr.size = getBits<16, 8>(value);
--- a/src/core/PICA/shader_interpreter.cpp
+++ b/src/core/PICA/shader_interpreter.cpp
@ -1,6 +1,7 @@
 #include "PICA/shader.hpp"
 #include <cmath>
 #include "PICA/shader.hpp"
 using namespace Helpers;
 void PICAShader::run() {
@ -18,9 +19,8 @@ void PICAShader::run() {
 			case ShaderOpcodes::CALL: call(instruction); break;
 			case ShaderOpcodes::CALLC: callc(instruction); break;
 			case ShaderOpcodes::CALLU: callu(instruction); break;
-			case ShaderOpcodes::CMP1: case ShaderOpcodes::CMP2: 
+			case ShaderOpcodes::CMP1:
-				cmp(instruction);
+			case ShaderOpcodes::CMP2: cmp(instruction); break;
 				break;
 			case ShaderOpcodes::DP3: dp3(instruction); break;
 			case ShaderOpcodes::DP4: dp4(instruction); break;
 			case ShaderOpcodes::DPHI: dphi(instruction); break;
@ -45,15 +45,25 @@ void PICAShader::run() {
 			case ShaderOpcodes::SLT: slt(instruction); break;
 			case ShaderOpcodes::SLTI: slti(instruction); break;
-			case 0x30: case 0x31: case 0x32: case 0x33: case 0x34: case 0x35: case 0x36: case 0x37:
+			case 0x30:
-				madi(instruction);
+			case 0x31:
-				break;
+			case 0x32:
 			case 0x33:
 			case 0x34:
 			case 0x35:
 			case 0x36:
 			case 0x37: madi(instruction); break;
-			case 0x38: case 0x39: case 0x3A: case 0x3B: case 0x3C: case 0x3D: case 0x3E: case 0x3F:
+			case 0x38:
-				mad(instruction);
+			case 0x39:
-				break;
+			case 0x3A:
 			case 0x3B:
 			case 0x3C:
 			case 0x3D:
 			case 0x3E:
 			case 0x3F: mad(instruction); break;
-			default:Helpers::panic("Unimplemented PICA instruction %08X (Opcode = %02X)", instruction, opcode);
+			default: Helpers::panic("Unimplemented PICA instruction %08X (Opcode = %02X)", instruction, opcode);
 		}
 		// Handle control flow statements. The ordering is important as the priority goes: LOOP > IF > CALL
@ -99,8 +109,8 @@ u8 PICAShader::getIndexedSource(u32 source, u32 index) {
 	switch (index) {
 		case 0: [[likely]] return u8(source);  // No offset applied
-		case 1: return u8(source + addrRegister.x());
+		case 1: return u8(source + addrRegister[0]);
-		case 2: return u8(source + addrRegister.y());
+		case 2: return u8(source + addrRegister[1]);
 		case 3: return u8(source + loopCounter);
 	}
@ -117,7 +127,7 @@ PICAShader::vec4f PICAShader::getSource(u32 source) {
 		return floatUniforms[source - 0x20];
 	else {
 		Helpers::warn("[PICA] Unimplemented source value: %X\n", source);
-		return vec4f({ f24::zero(), f24::zero(), f24::zero(), f24::zero() });
+		return vec4f({f24::zero(), f24::zero(), f24::zero(), f24::zero()});
 	}
 }
@ -279,9 +289,9 @@ void PICAShader::mova(u32 instruction) {
 	u32 componentMask = operandDescriptor & 0xf;
 	if (componentMask & 0b1000)  // x component
-		addrRegister.x() = static_cast<s32>(srcVector.x().toFloat32());
+		addrRegister[0] = static_cast<s32>(srcVector[0].toFloat32());
 	if (componentMask & 0b0100)  // y component
-		addrRegister.y() = static_cast<s32>(srcVector.y().toFloat32());
+		addrRegister[1] = static_cast<s32>(srcVector[1].toFloat32());
 }
 void PICAShader::dp3(u32 instruction) {
@ -546,7 +556,7 @@ void PICAShader::cmp(u32 instruction) {
 	const u32 idx = getBits<19, 2>(instruction);
 	const u32 cmpY = getBits<21, 3>(instruction);
 	const u32 cmpX = getBits<24, 3>(instruction);
-	const u32 cmpOperations[2] = { cmpX, cmpY };
+	const u32 cmpOperations[2] = {cmpX, cmpY};
 	if (idx) Helpers::panic("[PICA] CMP: idx != 0");
 	vec4f srcVec1 = getSourceSwizzled<1>(src1, operandDescriptor);
@ -578,9 +588,7 @@ void PICAShader::cmp(u32 instruction) {
 				cmpRegister[i] = srcVec1[i] >= srcVec2[i];
 				break;
-			default:
+			default: cmpRegister[i] = true; break;
 				cmpRegister[i] = true;
 				break;
 		}
 	}
 }
@ -615,8 +623,7 @@ void PICAShader::ifu(u32 instruction) {
 		auto& block = conditionalInfo[ifIndex++];
 		block.endingPC = dest;
 		block.newPC = dest + num;
-	}
+	} else {
 	else {
 		pc = dest;
 	}
 }
@ -665,18 +672,17 @@ void PICAShader::loop(u32 instruction) {
 	u32 dest = getBits<10, 12>(instruction);
 	auto& uniform = intUniforms[getBits<22, 2>(instruction)];  // The uniform we'll get loop info from
-	loopCounter = uniform.y();
+	loopCounter = uniform[1];
 	auto& loop = loopInfo[loopIndex++];
 	loop.startingPC = pc;
 	loop.endingPC = dest + 1;  // Loop is inclusive so we need + 1 here
-	loop.iterations = uniform.x() + 1;
+	loop.iterations = uniform[0] + 1;
-	loop.increment = uniform.z();
+	loop.increment = uniform[2];
 }
 void PICAShader::jmpc(u32 instruction) {
-	if (isCondTrue(instruction))
+	if (isCondTrue(instruction)) pc = getBits<10, 12>(instruction);
 		pc = getBits<10, 12>(instruction);
 }
 void PICAShader::jmpu(u32 instruction) {
--- a/src/core/PICA/shader_unit.cpp
+++ b/src/core/PICA/shader_unit.cpp
@ -1,4 +1,5 @@
 #include "PICA/shader_unit.hpp"
 #include "cityhash.hpp"
 void ShaderUnit::reset() {
@ -18,18 +19,18 @@ void PICAShader::reset() {
 	opDescriptorIndex = 0;
 	f32UniformTransfer = false;
-	const vec4f zero = vec4f({ f24::zero(), f24::zero(), f24::zero(), f24::zero() });
+	const vec4f zero = vec4f({f24::zero(), f24::zero(), f24::zero(), f24::zero()});
 	inputs.fill(zero);
 	floatUniforms.fill(zero);
 	outputs.fill(zero);
 	tempRegisters.fill(zero);
 	for (auto& e : intUniforms) {
-		e.x() = e.y() = e.z() = e.w() = 0;
+		e[0] = e[1] = e[2] = e[3] = 0;
 	}
-	addrRegister.x() = 0;
+	addrRegister[0] = 0;
-	addrRegister.y() = 0;
+	addrRegister[1] = 0;
 	loopCounter = 0;
 	codeHashDirty = true;