Fix scratch1 overwriting (#761)
Some checks failed
Android Build / x64 (release) (push) Has been cancelled
Android Build / arm64 (release) (push) Has been cancelled
HTTP Server Build / build (push) Has been cancelled
Hydra Core Build / Windows (push) Has been cancelled
Hydra Core Build / MacOS (push) Has been cancelled
Hydra Core Build / Linux (push) Has been cancelled
Hydra Core Build / Android-x64 (push) Has been cancelled
Hydra Core Build / ARM-Libretro (push) Has been cancelled
Linux AppImage Build / build (push) Has been cancelled
Linux Build / build (push) Has been cancelled
MacOS Build / MacOS-arm64 (push) Has been cancelled
MacOS Build / MacOS-x86_64 (push) Has been cancelled
Qt Build / Windows (push) Has been cancelled
Qt Build / MacOS-arm64 (push) Has been cancelled
Qt Build / MacOS-x86_64 (push) Has been cancelled
Qt Build / Linux (push) Has been cancelled
Windows Build / build (push) Has been cancelled
iOS Simulator Build / build (push) Has been cancelled
MacOS Build / MacOS-Universal (push) Has been cancelled
Qt Build / MacOS-Universal (push) Has been cancelled

This commit is contained in:
Paris Oplopoios 2025-06-25 03:25:48 +03:00 committed by GitHub
parent c99ff4f4b1
commit 8d33dcaa8e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -27,6 +27,7 @@ static constexpr Xmm scratch2 = xmm1;
static constexpr Xmm src1_xmm = xmm2;
static constexpr Xmm src2_xmm = xmm3;
static constexpr Xmm src3_xmm = xmm4;
static constexpr Xmm scratch3 = xmm5;
#if defined(PANDA3DS_MS_ABI)
// Register that points to PICA state. Must be volatile for the aforementioned reasons
@ -382,20 +383,12 @@ void ShaderEmitter::storeRegister(Xmm source, const PICAShader& shader, u32 dest
(((writeMask & 0b0010) ? 0 : 1) << 4) |
(((writeMask & 0b0001) ? 2 : 3) << 6);
// Reorder instructions based on whether the source == scratch1. This is to avoid overwriting scratch1 if it's the source,
// While also having the memory load come first to mitigate execution hazards and give the load more time to complete before reading if possible
if (source != scratch1) {
movaps(scratch1, xword[statePointer + offset]);
movaps(scratch2, source);
} else {
movaps(scratch2, source);
movaps(scratch1, xword[statePointer + offset]);
}
unpckhps(scratch2, scratch1); // Unpack X/Y components of source and destination
unpcklps(scratch1, source); // Unpack Z/W components of source and destination
shufps(scratch1, scratch2, selector); // "merge-shuffle" dest and source using selecto
movaps(xword[statePointer + offset], scratch1); // Write back
movaps(scratch3, xword[statePointer + offset]);
movaps(scratch2, source);
unpckhps(scratch2, scratch3); // Unpack X/Y components of source and destination
unpcklps(scratch3, source); // Unpack Z/W components of source and destination
shufps(scratch3, scratch2, selector); // "merge-shuffle" dest and source using selecto
movaps(xword[statePointer + offset], scratch3); // Write back
}
}