mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-17 11:11:30 +12:00
[Shader JIT] Compile MAD to FMA when possible
This commit is contained in:
parent
cd04ed3770
commit
8a13b8c878
2 changed files with 20 additions and 9 deletions
|
@ -36,6 +36,7 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
|
||||||
|
|
||||||
bool haveSSE4_1 = false; // Shows if the CPU supports SSE4.1
|
bool haveSSE4_1 = false; // Shows if the CPU supports SSE4.1
|
||||||
bool haveAVX = false; // Shows if the CPU supports AVX (NOT AVX2, NOT AVX512. Regular AVX)
|
bool haveAVX = false; // Shows if the CPU supports AVX (NOT AVX2, NOT AVX512. Regular AVX)
|
||||||
|
bool haveFMA3 = false; // Shows if the CPU supports FMA3
|
||||||
|
|
||||||
// Compile all instructions from [current recompiler PC, end)
|
// Compile all instructions from [current recompiler PC, end)
|
||||||
void compileUntil(const PICAShader& shaderUnit, u32 endPC);
|
void compileUntil(const PICAShader& shaderUnit, u32 endPC);
|
||||||
|
@ -112,6 +113,7 @@ public:
|
||||||
|
|
||||||
haveSSE4_1 = cpu.has(Xbyak::util::Cpu::tSSE41);
|
haveSSE4_1 = cpu.has(Xbyak::util::Cpu::tSSE41);
|
||||||
haveAVX = cpu.has(Xbyak::util::Cpu::tAVX);
|
haveAVX = cpu.has(Xbyak::util::Cpu::tAVX);
|
||||||
|
haveFMA3 = cpu.has(Xbyak::util::Cpu::tFMA);
|
||||||
|
|
||||||
if (!cpu.has(Xbyak::util::Cpu::tSSE3)) {
|
if (!cpu.has(Xbyak::util::Cpu::tSSE3)) {
|
||||||
Helpers::panic("This CPU does not support SSE3. Please use the shader interpreter instead");
|
Helpers::panic("This CPU does not support SSE3. Please use the shader interpreter instead");
|
||||||
|
|
|
@ -580,17 +580,26 @@ void ShaderEmitter::recMAD(const PICAShader& shader, u32 instruction) {
|
||||||
loadRegister<3>(src3_xmm, shader, src3, 0, operandDescriptor);
|
loadRegister<3>(src3_xmm, shader, src3, 0, operandDescriptor);
|
||||||
|
|
||||||
// TODO: Implement safe PICA mul
|
// TODO: Implement safe PICA mul
|
||||||
// Multiply src1 * src2
|
// If we have FMA3, optimize MAD to use FMA
|
||||||
if (haveAVX) {
|
if (haveFMA3) {
|
||||||
vmulps(scratch1, src1_xmm, src2_xmm);
|
vfmadd213ps(src1_xmm, src2_xmm, src3_xmm);
|
||||||
} else {
|
storeRegister(src1_xmm, shader, dest, operandDescriptor);
|
||||||
movaps(scratch1, src1_xmm);
|
|
||||||
mulps(scratch1, src2_xmm);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If we don't have FMA3, do a multiplication and addition
|
||||||
|
else {
|
||||||
|
// Multiply src1 * src2
|
||||||
|
if (haveAVX) {
|
||||||
|
vmulps(scratch1, src1_xmm, src2_xmm);
|
||||||
|
} else {
|
||||||
|
movaps(scratch1, src1_xmm);
|
||||||
|
mulps(scratch1, src2_xmm);
|
||||||
|
}
|
||||||
|
|
||||||
// Add src3
|
// Add src3
|
||||||
addps(scratch1, src3_xmm);
|
addps(scratch1, src3_xmm);
|
||||||
storeRegister(scratch1, shader, dest, operandDescriptor);
|
storeRegister(scratch1, shader, dest, operandDescriptor);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ShaderEmitter::recCMP(const PICAShader& shader, u32 instruction) {
|
void ShaderEmitter::recCMP(const PICAShader& shader, u32 instruction) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue