mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-18 03:31:31 +12:00
[ShaderJIT] Fix storeRegister kinda hackily
This commit is contained in:
parent
121653b806
commit
cd04ed3770
1 changed files with 18 additions and 6 deletions
|
@ -312,18 +312,29 @@ void ShaderEmitter::storeRegister(Xmm source, const PICAShader& shader, u32 dest
|
||||||
} else if (haveSSE4_1) {
|
} else if (haveSSE4_1) {
|
||||||
// Bit reverse the write mask because that is what blendps expects
|
// Bit reverse the write mask because that is what blendps expects
|
||||||
u32 adjustedMask = ((writeMask >> 3) & 0b1) | ((writeMask >> 1) & 0b10) | ((writeMask << 1) & 0b100) | ((writeMask << 3) & 0b1000);
|
u32 adjustedMask = ((writeMask >> 3) & 0b1) | ((writeMask >> 1) & 0b10) | ((writeMask << 1) & 0b100) | ((writeMask << 3) & 0b1000);
|
||||||
movaps(scratch1, xword[statePointer + offset]); // Read current value of dest
|
// Don't accidentally overwrite scratch1 if that is what we're writing derp
|
||||||
blendps(scratch1, source, adjustedMask); // Blend with source
|
Xmm temp = (source == scratch1) ? scratch2 : scratch1;
|
||||||
movaps(xword[statePointer + offset], scratch1); // Write back
|
|
||||||
|
movaps(temp, xword[statePointer + offset]); // Read current value of dest
|
||||||
|
blendps(temp, source, adjustedMask); // Blend with source
|
||||||
|
movaps(xword[statePointer + offset], temp); // Write back
|
||||||
} else {
|
} else {
|
||||||
// Blend algo referenced from Citra
|
// Blend algo referenced from Citra
|
||||||
const u8 selector = (((writeMask & 0b1000) ? 1 : 0) << 0) |
|
const u8 selector = (((writeMask & 0b1000) ? 1 : 0) << 0) |
|
||||||
(((writeMask & 0b0100) ? 3 : 2) << 2) |
|
(((writeMask & 0b0100) ? 3 : 2) << 2) |
|
||||||
(((writeMask & 0b0010) ? 0 : 1) << 4) |
|
(((writeMask & 0b0010) ? 0 : 1) << 4) |
|
||||||
(((writeMask & 0b0001) ? 2 : 3) << 6);
|
(((writeMask & 0b0001) ? 2 : 3) << 6);
|
||||||
|
|
||||||
movaps(scratch1, xword[statePointer + offset]);
|
// Reorder instructions based on whether the source == scratch1. This is to avoid overwriting scratch1 if it's the source,
|
||||||
movaps(scratch2, source);
|
// While also having the memory load come first to mitigate execution hazards and give the load more time to complete before reading if possible
|
||||||
|
if (source != scratch1) {
|
||||||
|
movaps(scratch1, xword[statePointer + offset]);
|
||||||
|
movaps(scratch2, source);
|
||||||
|
} else {
|
||||||
|
movaps(scratch2, source);
|
||||||
|
movaps(scratch1, xword[statePointer + offset]);
|
||||||
|
}
|
||||||
|
|
||||||
unpckhps(scratch2, scratch1); // Unpack X/Y components of source and destination
|
unpckhps(scratch2, scratch1); // Unpack X/Y components of source and destination
|
||||||
unpcklps(scratch1, source); // Unpack Z/W components of source and destination
|
unpcklps(scratch1, source); // Unpack Z/W components of source and destination
|
||||||
shufps(scratch1, scratch2, selector); // "merge-shuffle" dest and source using selecto
|
shufps(scratch1, scratch2, selector); // "merge-shuffle" dest and source using selecto
|
||||||
|
@ -844,6 +855,7 @@ void ShaderEmitter::emitPrintLog(const PICAShader& shaderUnit) {
|
||||||
and_(rsp, ~0xF);
|
and_(rsp, ~0xF);
|
||||||
|
|
||||||
// Call function
|
// Call function
|
||||||
|
mov(arg1, statePointer);
|
||||||
mov(rax, uintptr_t(printLog));
|
mov(rax, uintptr_t(printLog));
|
||||||
call(rax);
|
call(rax);
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue