mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-11 08:39:48 +12:00
[Shader JIT] Add DPH
This commit is contained in:
parent
65de2637ae
commit
5aa2e3c1d2
2 changed files with 26 additions and 0 deletions
|
@ -91,6 +91,7 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
|
|||
void recCMP(const PICAShader& shader, u32 instruction);
|
||||
void recDP3(const PICAShader& shader, u32 instruction);
|
||||
void recDP4(const PICAShader& shader, u32 instruction);
|
||||
void recDPH(const PICAShader& shader, u32 instruction);
|
||||
void recEMIT(const PICAShader& shader, u32 instruction);
|
||||
void recEND(const PICAShader& shader, u32 instruction);
|
||||
void recEX2(const PICAShader& shader, u32 instruction);
|
||||
|
|
|
@ -143,6 +143,7 @@ void ShaderEmitter::compileInstruction(const PICAShader& shaderUnit) {
|
|||
break;
|
||||
case ShaderOpcodes::DP3: recDP3(shaderUnit, instruction); break;
|
||||
case ShaderOpcodes::DP4: recDP4(shaderUnit, instruction); break;
|
||||
case ShaderOpcodes::DPH: recDPH(shaderUnit, instruction); break;
|
||||
case ShaderOpcodes::END: recEND(shaderUnit, instruction); break;
|
||||
case ShaderOpcodes::EX2: recEX2(shaderUnit, instruction); break;
|
||||
case ShaderOpcodes::FLR: recFLR(shaderUnit, instruction); break;
|
||||
|
@ -525,6 +526,30 @@ void ShaderEmitter::recDP4(const PICAShader& shader, u32 instruction) {
|
|||
storeRegister(src1_xmm, shader, dest, operandDescriptor);
|
||||
}
|
||||
|
||||
void ShaderEmitter::recDPH(const PICAShader& shader, u32 instruction) {
|
||||
const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f];
|
||||
const u32 src1 = getBits<12, 7>(instruction);
|
||||
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
|
||||
const u32 idx = getBits<19, 2>(instruction);
|
||||
const u32 dest = getBits<21, 5>(instruction);
|
||||
|
||||
// TODO: Safe multiplication equivalent (Multiplication is not IEEE compliant on the PICA)
|
||||
loadRegister<1>(src1_xmm, shader, src1, idx, operandDescriptor);
|
||||
loadRegister<2>(src2_xmm, shader, src2, 0, operandDescriptor);
|
||||
|
||||
// Attach 1.0 to the w component of src1
|
||||
if (haveSSE4_1) {
|
||||
blendps(src1_xmm, xword[rip + onesVector], 0b1000);
|
||||
} else {
|
||||
movaps(scratch1, src1_xmm);
|
||||
unpckhps(scratch1, xword[rip + onesVector]);
|
||||
unpcklpd(src1_xmm, scratch1);
|
||||
}
|
||||
|
||||
dpps(src1_xmm, src2_xmm, 0b11111111); // 4-lane dot product between the 2 registers, store the result in all lanes of scratch1 similarly to PICA
|
||||
storeRegister(src1_xmm, shader, dest, operandDescriptor);
|
||||
}
|
||||
|
||||
void ShaderEmitter::recMAX(const PICAShader& shader, u32 instruction) {
|
||||
const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f];
|
||||
const u32 src1 = getBits<12, 7>(instruction);
|
||||
|
|
Loading…
Add table
Reference in a new issue