Add accurate shader multiplication option

This commit is contained in:
wheremyfoodat 2024-07-16 22:14:01 +03:00
parent 61e2e71f68
commit 0ecdf00e64
10 changed files with 20 additions and 12 deletions

View file

@ -22,8 +22,11 @@ class ShaderJIT {
ShaderCache cache;
#endif
bool accurateMul = false;
public:
void setAccurateMul(bool value) { accurateMul = value; }
#ifdef PANDA3DS_SHADER_JIT_SUPPORTED
// Call this before starting to process a batch of vertices
// This will read the PICA config (uploaded shader and shader operand descriptors) and search if we've already compiled this shader
@ -36,11 +39,11 @@ class ShaderJIT {
static constexpr bool isAvailable() { return true; }
#else
void prepare(PICAShader& shaderUnit) {
Helpers::panic("Vertex Loader JIT: Tried to run ShaderJIT::Prepare on platform that does not support shader jit");
Helpers::panic("Shader JIT: Tried to run ShaderJIT::Prepare on platform that does not support shader jit");
}
void run(PICAShader& shaderUnit) {
Helpers::panic("Vertex Loader JIT: Tried to run ShaderJIT::Run on platform that does not support shader jit");
Helpers::panic("Shader JIT: Tried to run ShaderJIT::Run on platform that does not support shader jit");
}
// Define dummy callback. This should never be called if the shader JIT is not supported

View file

@ -37,6 +37,8 @@ class ShaderEmitter : private oaknut::CodeBlock, public oaknut::CodeGenerator {
// Shows whether the loaded shader has any log2 and exp2 instructions
bool codeHasLog2 = false;
bool codeHasExp2 = false;
// Whether to compile this shader using accurate, safe, non-IEEE multiplication (slow) or faster but less accurate mul
bool useSafeMUL = false;
oaknut::Label log2Func, exp2Func;
oaknut::Label emitLog2Func();
@ -123,7 +125,7 @@ class ShaderEmitter : private oaknut::CodeBlock, public oaknut::CodeGenerator {
PrologueCallback prologueCb = nullptr;
// Initialize our emitter with "allocSize" bytes of memory allocated for the code buffer
ShaderEmitter() : oaknut::CodeBlock(allocSize), oaknut::CodeGenerator(oaknut::CodeBlock::ptr()) {}
ShaderEmitter(bool useSafeMUL) : oaknut::CodeBlock(allocSize), oaknut::CodeGenerator(oaknut::CodeBlock::ptr()), useSafeMUL(useSafeMUL) {}
// PC must be a valid entrypoint here. It doesn't have that much overhead in this case, so we use std::array<>::at() to assert it does
InstructionCallback getInstructionCallback(u32 pc) { return getLabelPointer<InstructionCallback>(instructionLabels.at(pc)); }

View file

@ -45,6 +45,8 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
// Shows whether the loaded shader has any log2 and exp2 instructions
bool codeHasLog2 = false;
bool codeHasExp2 = false;
// Whether to compile this shader using accurate, safe, non-IEEE multiplication (slow) or faster but less accurate mul
bool useSafeMUL = false;
Xbyak::Label log2Func, exp2Func;
Xbyak::Label emitLog2Func();
@ -130,7 +132,7 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
PrologueCallback prologueCb = nullptr;
// Initialize our emitter with "allocSize" bytes of RWX memory
ShaderEmitter() : Xbyak::CodeGenerator(allocSize) {
ShaderEmitter(bool useSafeMUL) : Xbyak::CodeGenerator(allocSize), useSafeMUL(useSafeMUL) {
cpuCaps = Xbyak::util::Cpu();
haveSSE4_1 = cpuCaps.has(Xbyak::util::Cpu::tSSE41);

View file

@ -15,6 +15,7 @@ struct EmulatorConfig {
bool shaderJitEnabled = shaderJitDefault;
bool discordRpcEnabled = false;
bool accurateShaderMul = false;
RendererType rendererType = RendererType::OpenGL;
Audio::DSPCore::Type dspType = Audio::DSPCore::Type::Null;