Merge branch 'master' into specialized-shaders-2

This commit is contained in:
wheremyfoodat 2024-07-17 00:10:09 +03:00
commit 219a560cbe
11 changed files with 26 additions and 18 deletions

View file

@ -16,7 +16,7 @@ void ShaderJIT::prepare(PICAShader& shaderUnit) {
auto it = cache.find(hash);
if (it == cache.end()) { // Block has not been compiled yet
auto emitter = std::make_unique<ShaderEmitter>();
auto emitter = std::make_unique<ShaderEmitter>(accurateMul);
emitter->compile(shaderUnit);
// Get pointer to callbacks
entrypointCallback = emitter->getInstructionCallback(shaderUnit.entrypoint);

View file

@ -7,9 +7,6 @@ using namespace Helpers;
using namespace oaknut;
using namespace oaknut::util;
// TODO: Expose safe/unsafe optimizations to the user
constexpr bool useSafeMUL = true;
// Similar to the x64 recompiler, we use an odd internal ABI, which abuses the fact that we'll very rarely be calling C++ functions
// So to avoid pushing and popping, we'll be making use of volatile registers as much as possible
static constexpr QReg src1Vec = Q1;
@ -491,7 +488,7 @@ void ShaderEmitter::recDP3(const PICAShader& shader, u32 instruction) {
// Now do a full DP4
// Do a piecewise multiplication of the vectors first
if constexpr (useSafeMUL) {
if (useSafeMUL) {
emitSafeMUL(src1Vec, src2Vec, scratch1Vec);
} else {
FMUL(src1Vec.S4(), src1Vec.S4(), src2Vec.S4());
@ -518,7 +515,7 @@ void ShaderEmitter::recDP4(const PICAShader& shader, u32 instruction) {
loadRegister<2>(src2Vec, shader, src2, 0, operandDescriptor);
// Do a piecewise multiplication of the vectors first
if constexpr (useSafeMUL) {
if (useSafeMUL) {
emitSafeMUL(src1Vec, src2Vec, scratch1Vec);
} else {
FMUL(src1Vec.S4(), src1Vec.S4(), src2Vec.S4());
@ -551,7 +548,7 @@ void ShaderEmitter::recDPH(const PICAShader& shader, u32 instruction) {
// Now perform a DP4
// Do a piecewise multiplication of the vectors first
if constexpr (useSafeMUL) {
if (useSafeMUL) {
emitSafeMUL(src1Vec, src2Vec, scratch1Vec);
} else {
FMUL(src1Vec.S4(), src1Vec.S4(), src2Vec.S4());
@ -834,7 +831,7 @@ void ShaderEmitter::recMUL(const PICAShader& shader, u32 instruction) {
loadRegister<1>(src1Vec, shader, src1, idx, operandDescriptor);
loadRegister<2>(src2Vec, shader, src2, 0, operandDescriptor);
if constexpr (useSafeMUL) {
if (useSafeMUL) {
emitSafeMUL(src1Vec, src2Vec, scratch1Vec);
} else {
FMUL(src1Vec.S4(), src1Vec.S4(), src2Vec.S4());
@ -907,7 +904,7 @@ void ShaderEmitter::recMAD(const PICAShader& shader, u32 instruction) {
loadRegister<2>(src2Vec, shader, src2, isMADI ? 0 : idx, operandDescriptor);
loadRegister<3>(src3Vec, shader, src3, isMADI ? idx : 0, operandDescriptor);
if constexpr (useSafeMUL) {
if (useSafeMUL) {
emitSafeMUL(src1Vec, src2Vec, scratch1Vec);
FADD(src3Vec.S4(), src3Vec.S4(), src1Vec.S4());
} else {

View file

@ -12,9 +12,6 @@ using namespace Xbyak;
using namespace Xbyak::util;
using namespace Helpers;
// TODO: Expose safe/unsafe optimizations to the user
constexpr bool useSafeMUL = false;
// The shader recompiler uses quite an odd internal ABI
// We make use of the fact that in regular conditions, we should pretty much never be calling C++ code from recompiled shader code
// This allows us to establish an ABI that's optimized for this sort of workflow, statically allocating volatile host registers

View file

@ -64,6 +64,8 @@ void GPU::reset() {
regs.fill(0);
shaderUnit.reset();
shaderJIT.reset();
shaderJIT.setAccurateMul(config.accurateShaderMul);
std::memset(vram, 0, vramSize);
lightingLUT.fill(0);
lightingLUTDirty = true;