From 3476d336ca00b0ff585b7890f71fd0104a130e5a Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Fri, 30 Jun 2023 14:17:35 +0300 Subject: [PATCH] [Shader JIT] Implement SLT/SLTI/MADI --- .../PICA/dynapica/shader_rec_emitter_x64.hpp | 4 +-- .../PICA/dynapica/shader_rec_emitter_x64.cpp | 35 ++++++++++++++++--- 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/include/PICA/dynapica/shader_rec_emitter_x64.hpp b/include/PICA/dynapica/shader_rec_emitter_x64.hpp index 3f33da06..252285c6 100644 --- a/include/PICA/dynapica/shader_rec_emitter_x64.hpp +++ b/include/PICA/dynapica/shader_rec_emitter_x64.hpp @@ -30,6 +30,8 @@ class ShaderEmitter : public Xbyak::CodeGenerator { // Vector value of (-0.0, -0.0, -0.0, -0.0) for negating vectors via pxor Label negateVector; + // Vector value of (1.0, 1.0, 1.0, 1.0) for SLT(i)/SGE(i) + Label onesVector; u32 recompilerPC = 0; // PC the recompiler is currently recompiling @ u32 loopLevel = 0; // The current loop nesting level (0 = not in a loop) @@ -86,7 +88,6 @@ class ShaderEmitter : public Xbyak::CodeGenerator { void recJMPU(const PICAShader& shader, u32 instruction); void recLOOP(const PICAShader& shader, u32 instruction); void recMAD(const PICAShader& shader, u32 instruction); - void recMADI(const PICAShader& shader, u32 instruction); void recMAX(const PICAShader& shader, u32 instruction); void recMIN(const PICAShader& shader, u32 instruction); void recMOVA(const PICAShader& shader, u32 instruction); @@ -97,7 +98,6 @@ class ShaderEmitter : public Xbyak::CodeGenerator { void recSGE(const PICAShader& shader, u32 instruction); void recSGEI(const PICAShader& shader, u32 instruction); void recSLT(const PICAShader& shader, u32 instruction); - void recSLTI(const PICAShader& shader, u32 instruction); MAKE_LOG_FUNCTION(log, shaderJITLogger) diff --git a/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp b/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp index 8ccc4838..b2868af5 100644 --- a/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp +++ b/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp @@ -42,6 +42,8 @@ void ShaderEmitter::compile(const PICAShader& shaderUnit) { align(16); L(negateVector); dd(0x80000000); dd(0x80000000); dd(0x80000000); dd(0x80000000); // -0.0 4 times + L(onesVector); + dd(0x3f800000); dd(0x3f800000); dd(0x3f800000); dd(0x3f800000); // 1.0 4 times // Emit prologue first align(16); @@ -148,9 +150,16 @@ void ShaderEmitter::compileInstruction(const PICAShader& shaderUnit) { case ShaderOpcodes::RCP: recRCP(shaderUnit, instruction); break; case ShaderOpcodes::RSQ: recRSQ(shaderUnit, instruction); break; + // We consider both MAD and MADI to be the same instruction and decode which one we actually have in recMAD + case 0x30: case 0x31: case 0x32: case 0x33: case 0x34: case 0x35: case 0x36: case 0x37: case 0x38: case 0x39: case 0x3A: case 0x3B: case 0x3C: case 0x3D: case 0x3E: case 0x3F: recMAD(shaderUnit, instruction); break; + + case ShaderOpcodes::SLT: + case ShaderOpcodes::SLTI: + recSLT(shaderUnit, instruction); break; + default: Helpers::panic("Shader JIT: Unimplemented PICA opcode %X", opcode); } @@ -568,16 +577,18 @@ void ShaderEmitter::recRSQ(const PICAShader& shader, u32 instruction) { } void ShaderEmitter::recMAD(const PICAShader& shader, u32 instruction) { + const bool isMADI = getBit<29>(instruction); + const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x1f]; const u32 src1 = getBits<17, 5>(instruction); - const u32 src2 = getBits<10, 7>(instruction); - const u32 src3 = getBits<5, 5>(instruction); + const u32 src2 = isMADI ? getBits<12, 5>(instruction) : getBits<10, 7>(instruction); + const u32 src3 = isMADI ? getBits<5, 7>(instruction) : getBits<5, 5>(instruction); const u32 idx = getBits<22, 2>(instruction); const u32 dest = getBits<24, 5>(instruction); loadRegister<1>(src1_xmm, shader, src1, 0, operandDescriptor); - loadRegister<2>(src2_xmm, shader, src2, idx, operandDescriptor); - loadRegister<3>(src3_xmm, shader, src3, 0, operandDescriptor); + loadRegister<2>(src2_xmm, shader, src2, isMADI ? 0 : idx, operandDescriptor); + loadRegister<3>(src3_xmm, shader, src3, isMADI ? idx : 0, operandDescriptor); // TODO: Implement safe PICA mul // If we have FMA3, optimize MAD to use FMA @@ -602,6 +613,22 @@ void ShaderEmitter::recMAD(const PICAShader& shader, u32 instruction) { } } +void ShaderEmitter::recSLT(const PICAShader& shader, u32 instruction) { + const bool isSLTI = (instruction >> 26) == ShaderOpcodes::SLTI; + const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f]; + + const u32 src1 = isSLTI ? getBits<14, 5>(instruction) : getBits<12, 7>(instruction); + const u32 src2 = isSLTI ? getBits<7, 7>(instruction) : getBits<7, 5>(instruction); + const u32 idx = getBits<19, 2>(instruction); + const u32 dest = getBits<21, 5>(instruction); + + loadRegister<1>(src1_xmm, shader, src1, isSLTI ? 0 : idx, operandDescriptor); + loadRegister<2>(src2_xmm, shader, src2, isSLTI ? idx : 0, operandDescriptor); + cmpltps(src1_xmm, src2_xmm); + andps(src1_xmm, xword[rip + onesVector]); + storeRegister(src1_xmm, shader, dest, operandDescriptor); +} + void ShaderEmitter::recCMP(const PICAShader& shader, u32 instruction) { const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f]; const u32 src1 = getBits<12, 7>(instruction);