[Shader JIT] Implement SLT/SLTI/MADI

This commit is contained in:
wheremyfoodat 2023-06-30 14:17:35 +03:00
parent 8a13b8c878
commit 3476d336ca
2 changed files with 33 additions and 6 deletions

View file

@ -30,6 +30,8 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
// Vector value of (-0.0, -0.0, -0.0, -0.0) for negating vectors via pxor
Label negateVector;
// Vector value of (1.0, 1.0, 1.0, 1.0) for SLT(i)/SGE(i)
Label onesVector;
u32 recompilerPC = 0; // PC the recompiler is currently recompiling @
u32 loopLevel = 0; // The current loop nesting level (0 = not in a loop)
@ -86,7 +88,6 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
void recJMPU(const PICAShader& shader, u32 instruction);
void recLOOP(const PICAShader& shader, u32 instruction);
void recMAD(const PICAShader& shader, u32 instruction);
void recMADI(const PICAShader& shader, u32 instruction);
void recMAX(const PICAShader& shader, u32 instruction);
void recMIN(const PICAShader& shader, u32 instruction);
void recMOVA(const PICAShader& shader, u32 instruction);
@ -97,7 +98,6 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
void recSGE(const PICAShader& shader, u32 instruction);
void recSGEI(const PICAShader& shader, u32 instruction);
void recSLT(const PICAShader& shader, u32 instruction);
void recSLTI(const PICAShader& shader, u32 instruction);
MAKE_LOG_FUNCTION(log, shaderJITLogger)

View file

@ -42,6 +42,8 @@ void ShaderEmitter::compile(const PICAShader& shaderUnit) {
align(16);
L(negateVector);
dd(0x80000000); dd(0x80000000); dd(0x80000000); dd(0x80000000); // -0.0 4 times
L(onesVector);
dd(0x3f800000); dd(0x3f800000); dd(0x3f800000); dd(0x3f800000); // 1.0 4 times
// Emit prologue first
align(16);
@ -148,9 +150,16 @@ void ShaderEmitter::compileInstruction(const PICAShader& shaderUnit) {
case ShaderOpcodes::RCP: recRCP(shaderUnit, instruction); break;
case ShaderOpcodes::RSQ: recRSQ(shaderUnit, instruction); break;
// We consider both MAD and MADI to be the same instruction and decode which one we actually have in recMAD
case 0x30: case 0x31: case 0x32: case 0x33: case 0x34: case 0x35: case 0x36: case 0x37:
case 0x38: case 0x39: case 0x3A: case 0x3B: case 0x3C: case 0x3D: case 0x3E: case 0x3F:
recMAD(shaderUnit, instruction);
break;
case ShaderOpcodes::SLT:
case ShaderOpcodes::SLTI:
recSLT(shaderUnit, instruction); break;
default:
Helpers::panic("Shader JIT: Unimplemented PICA opcode %X", opcode);
}
@ -568,16 +577,18 @@ void ShaderEmitter::recRSQ(const PICAShader& shader, u32 instruction) {
}
void ShaderEmitter::recMAD(const PICAShader& shader, u32 instruction) {
const bool isMADI = getBit<29>(instruction);
const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x1f];
const u32 src1 = getBits<17, 5>(instruction);
const u32 src2 = getBits<10, 7>(instruction);
const u32 src3 = getBits<5, 5>(instruction);
const u32 src2 = isMADI ? getBits<12, 5>(instruction) : getBits<10, 7>(instruction);
const u32 src3 = isMADI ? getBits<5, 7>(instruction) : getBits<5, 5>(instruction);
const u32 idx = getBits<22, 2>(instruction);
const u32 dest = getBits<24, 5>(instruction);
loadRegister<1>(src1_xmm, shader, src1, 0, operandDescriptor);
loadRegister<2>(src2_xmm, shader, src2, idx, operandDescriptor);
loadRegister<3>(src3_xmm, shader, src3, 0, operandDescriptor);
loadRegister<2>(src2_xmm, shader, src2, isMADI ? 0 : idx, operandDescriptor);
loadRegister<3>(src3_xmm, shader, src3, isMADI ? idx : 0, operandDescriptor);
// TODO: Implement safe PICA mul
// If we have FMA3, optimize MAD to use FMA
@ -602,6 +613,22 @@ void ShaderEmitter::recMAD(const PICAShader& shader, u32 instruction) {
}
}
void ShaderEmitter::recSLT(const PICAShader& shader, u32 instruction) {
const bool isSLTI = (instruction >> 26) == ShaderOpcodes::SLTI;
const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f];
const u32 src1 = isSLTI ? getBits<14, 5>(instruction) : getBits<12, 7>(instruction);
const u32 src2 = isSLTI ? getBits<7, 7>(instruction) : getBits<7, 5>(instruction);
const u32 idx = getBits<19, 2>(instruction);
const u32 dest = getBits<21, 5>(instruction);
loadRegister<1>(src1_xmm, shader, src1, isSLTI ? 0 : idx, operandDescriptor);
loadRegister<2>(src2_xmm, shader, src2, isSLTI ? idx : 0, operandDescriptor);
cmpltps(src1_xmm, src2_xmm);
andps(src1_xmm, xword[rip + onesVector]);
storeRegister(src1_xmm, shader, dest, operandDescriptor);
}
void ShaderEmitter::recCMP(const PICAShader& shader, u32 instruction) {
const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f];
const u32 src1 = getBits<12, 7>(instruction);