From 1fe44c009860632072901db81489e122077f3f7f Mon Sep 17 00:00:00 2001
From: wheremyfoodat <gponiris2004@gmail.com>
Date: Fri, 9 Jun 2023 22:31:06 +0300
Subject: [PATCH] [ShaderJIT] Add cmp

---
 .../PICA/dynapica/shader_rec_emitter_x64.cpp  | 71 +++++++++++++++++++
 1 file changed, 71 insertions(+)

diff --git a/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp b/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp
index bbf835e6..55bef778 100644
--- a/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp
+++ b/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp
@@ -3,6 +3,7 @@
 
 #include <algorithm>
 #include <bit>
+#include <cassert>
 #include <cstddef>
 
 using namespace Xbyak;
@@ -75,6 +76,9 @@ void ShaderEmitter::compileInstruction(const PICAShader& shaderUnit) {
 
 	switch (opcode) {
 		case ShaderOpcodes::ADD: recADD(shaderUnit, instruction); break;
+		case ShaderOpcodes::CMP1: case ShaderOpcodes::CMP2:
+			recCMP(shaderUnit, instruction);
+			break;
 		case ShaderOpcodes::DP4: recDP4(shaderUnit, instruction); break;
 		case ShaderOpcodes::END: recEND(shaderUnit, instruction); break;
 		case ShaderOpcodes::MOV: recMOV(shaderUnit, instruction); break;
@@ -240,4 +244,71 @@ void ShaderEmitter::recDP4(const PICAShader& shader, u32 instruction) {
 	storeRegister(src1_xmm, shader, dest, operandDescriptor);
 }
 
+void ShaderEmitter::recCMP(const PICAShader& shader, u32 instruction) {
+	const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f];
+	const u32 src1 = (instruction >> 12) & 0x7f;
+	const u32 src2 = (instruction >> 7) & 0x1f; // src2 coming first because PICA moment
+	const u32 idx = (instruction >> 19) & 3;
+	const u32 cmpY = (instruction >> 21) & 7;
+	const u32 cmpX = (instruction >> 24) & 7;
+
+	loadRegister<1>(src1_xmm, shader, src1, idx, operandDescriptor);
+	loadRegister<2>(src2_xmm, shader, src2, 0, operandDescriptor);
+
+	// Condition codes for cmpps
+	enum : u8 {
+		CMP_EQ = 0,
+		CMP_LT = 1,
+		CMP_LE = 2,
+		CMP_UNORD = 3,
+		CMP_NEQ = 4,
+		CMP_NLT = 5,
+		CMP_NLE = 6,
+		CMP_ORD = 7,
+		CMP_TRUE = 15
+	};
+
+	// Map from PICA condition codes (used as index) to x86 condition codes
+	static constexpr std::array<u8, 8> conditionCodes = { CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_LT, CMP_LE, CMP_TRUE, CMP_TRUE };
+
+	// SSE does not offer GT or GE comparisons in the cmpps instruction, so we need to flip the left and right operands in that case and use LT/LE
+	const bool invertX = (cmpX == 4 || cmpX == 5);
+	const bool invertY = (cmpY == 4 || cmpY == 5);
+	Xmm lhs_x = invertX ? src2_xmm : src1_xmm;
+	Xmm rhs_x = invertX ? src1_xmm : src2_xmm;
+	Xmm lhs_y = invertY ? src2_xmm : src1_xmm;
+	Xmm rhs_y = invertY ? src1_xmm : src2_xmm;
+
+	const u8 compareFuncX = conditionCodes[cmpX];
+	const u8 compareFuncY = conditionCodes[cmpY];
+
+	static_assert(sizeof(bool) == 1 && sizeof(shader.cmpRegister) == 2); // The code below relies on bool being 1 byte exactly
+	const size_t cmpRegXOffset = uintptr_t(&shader.cmpRegister) - uintptr_t(&shader);
+	const size_t cmpRegYOffset = cmpRegXOffset + 1;
+
+	// Cmp x and y are the same compare function, we can use a single cmp instruction
+	if (cmpX == cmpY) {
+		cmpps(lhs_x, rhs_x, compareFuncX);
+		movd(eax, lhs_x);
+		test(eax, eax);
+
+		setne(byte[statePointer + cmpRegXOffset]);
+		setne(byte[statePointer + cmpRegYOffset]);
+	} else {
+		movaps(scratch1, lhs_x); // Copy the left hand operands to temp registers
+		movaps(scratch2, lhs_y);
+
+		cmpps(scratch1, rhs_x, compareFuncX); // Perform the compares
+		cmpps(scratch2, rhs_y, compareFuncY);
+
+		movd(eax, scratch1); // Move results to eax for X and edx for Y
+		movd(edx, scratch2);
+
+		test(eax, eax);      // Write back results with setne
+		setne(byte[statePointer + cmpRegXOffset]);
+		test(edx, edx);
+		setne(byte[statePointer + cmpRegYOffset]);
+	}
+}
+
 #endif
\ No newline at end of file