From 0e4079f30457a28f3ba5fe60fb775cd089e781cd Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Wed, 3 Jul 2024 16:23:43 +0300 Subject: [PATCH] a64 shader recompiler: Add DPH/DPHI --- .../dynapica/shader_rec_emitter_arm64.cpp | 37 ++++++++++++++++++- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp b/src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp index d6358070..15200e76 100644 --- a/src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp +++ b/src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp @@ -144,8 +144,8 @@ void ShaderEmitter::compileInstruction(const PICAShader& shaderUnit) { case ShaderOpcodes::CMP2: recCMP(shaderUnit, instruction); break; case ShaderOpcodes::DP3: recDP3(shaderUnit, instruction); break; case ShaderOpcodes::DP4: recDP4(shaderUnit, instruction); break; - // case ShaderOpcodes::DPH: - // case ShaderOpcodes::DPHI: recDPH(shaderUnit, instruction); break; + case ShaderOpcodes::DPH: + case ShaderOpcodes::DPHI: recDPH(shaderUnit, instruction); break; case ShaderOpcodes::END: recEND(shaderUnit, instruction); break; case ShaderOpcodes::EX2: recEX2(shaderUnit, instruction); break; case ShaderOpcodes::FLR: recFLR(shaderUnit, instruction); break; @@ -533,6 +533,39 @@ void ShaderEmitter::recDP4(const PICAShader& shader, u32 instruction) { storeRegister(src1Vec, shader, dest, operandDescriptor); } +void ShaderEmitter::recDPH(const PICAShader& shader, u32 instruction) { + const bool isDPHI = (instruction >> 26) == ShaderOpcodes::DPHI; + + const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f]; + const u32 src1 = isDPHI ? getBits<14, 5>(instruction) : getBits<12, 7>(instruction); + const u32 src2 = isDPHI ? getBits<7, 7>(instruction) : getBits<7, 5>(instruction); + const u32 idx = getBits<19, 2>(instruction); + const u32 dest = getBits<21, 5>(instruction); + const u32 writeMask = getBits<0, 4>(operandDescriptor); + + // TODO: Safe multiplication equivalent (Multiplication is not IEEE compliant on the PICA) + loadRegister<1>(src1Vec, shader, src1, isDPHI ? 0 : idx, operandDescriptor); + loadRegister<2>(src2Vec, shader, src2, isDPHI ? idx : 0, operandDescriptor); + // // Attach 1.0 to the w component of src1 + MOV(src1Vec.Selem()[3], onesVector.Selem()[0]); + + // Now perform a DP4 + // Do a piecewise multiplication of the vectors first + if constexpr (useSafeMUL) { + emitSafeMUL(src1Vec, src2Vec, scratch1Vec); + } else { + FMUL(src1Vec.S4(), src1Vec.S4(), src2Vec.S4()); + } + FADDP(src1Vec.S4(), src1Vec.S4(), src1Vec.S4()); // Now add the adjacent components together + FADDP(src1Vec.toS(), src1Vec.toD().S2()); // Again for the bottom 2 lanes. Now the bottom lane contains the dot product + + if (writeMask != 0x8) { // Copy bottom lane to all lanes if we're not simply writing back x + DUP(src1Vec.S4(), src1Vec.Selem()[0]); // src1Vec = src1Vec.xxxx + } + + storeRegister(src1Vec, shader, dest, operandDescriptor); +} + oaknut::Label ShaderEmitter::emitLog2Func() { oaknut::Label funcStart;