mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-06 14:15:41 +12:00
Merge pull request #537 from wheremyfoodat/accurate-mul
Add accurate shader multiplication option
This commit is contained in:
commit
71b8031d5b
10 changed files with 25 additions and 17 deletions
|
@ -22,8 +22,11 @@ class ShaderJIT {
|
|||
|
||||
ShaderCache cache;
|
||||
#endif
|
||||
bool accurateMul = false;
|
||||
|
||||
public:
|
||||
void setAccurateMul(bool value) { accurateMul = value; }
|
||||
|
||||
#ifdef PANDA3DS_SHADER_JIT_SUPPORTED
|
||||
// Call this before starting to process a batch of vertices
|
||||
// This will read the PICA config (uploaded shader and shader operand descriptors) and search if we've already compiled this shader
|
||||
|
@ -36,11 +39,11 @@ class ShaderJIT {
|
|||
static constexpr bool isAvailable() { return true; }
|
||||
#else
|
||||
void prepare(PICAShader& shaderUnit) {
|
||||
Helpers::panic("Vertex Loader JIT: Tried to run ShaderJIT::Prepare on platform that does not support shader jit");
|
||||
Helpers::panic("Shader JIT: Tried to run ShaderJIT::Prepare on platform that does not support shader jit");
|
||||
}
|
||||
|
||||
void run(PICAShader& shaderUnit) {
|
||||
Helpers::panic("Vertex Loader JIT: Tried to run ShaderJIT::Run on platform that does not support shader jit");
|
||||
Helpers::panic("Shader JIT: Tried to run ShaderJIT::Run on platform that does not support shader jit");
|
||||
}
|
||||
|
||||
// Define dummy callback. This should never be called if the shader JIT is not supported
|
||||
|
|
|
@ -37,6 +37,8 @@ class ShaderEmitter : private oaknut::CodeBlock, public oaknut::CodeGenerator {
|
|||
// Shows whether the loaded shader has any log2 and exp2 instructions
|
||||
bool codeHasLog2 = false;
|
||||
bool codeHasExp2 = false;
|
||||
// Whether to compile this shader using accurate, safe, non-IEEE multiplication (slow) or faster but less accurate mul
|
||||
bool useSafeMUL = false;
|
||||
|
||||
oaknut::Label log2Func, exp2Func;
|
||||
oaknut::Label emitLog2Func();
|
||||
|
@ -123,7 +125,7 @@ class ShaderEmitter : private oaknut::CodeBlock, public oaknut::CodeGenerator {
|
|||
PrologueCallback prologueCb = nullptr;
|
||||
|
||||
// Initialize our emitter with "allocSize" bytes of memory allocated for the code buffer
|
||||
ShaderEmitter() : oaknut::CodeBlock(allocSize), oaknut::CodeGenerator(oaknut::CodeBlock::ptr()) {}
|
||||
ShaderEmitter(bool useSafeMUL) : oaknut::CodeBlock(allocSize), oaknut::CodeGenerator(oaknut::CodeBlock::ptr()), useSafeMUL(useSafeMUL) {}
|
||||
|
||||
// PC must be a valid entrypoint here. It doesn't have that much overhead in this case, so we use std::array<>::at() to assert it does
|
||||
InstructionCallback getInstructionCallback(u32 pc) { return getLabelPointer<InstructionCallback>(instructionLabels.at(pc)); }
|
||||
|
|
|
@ -45,6 +45,8 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
|
|||
// Shows whether the loaded shader has any log2 and exp2 instructions
|
||||
bool codeHasLog2 = false;
|
||||
bool codeHasExp2 = false;
|
||||
// Whether to compile this shader using accurate, safe, non-IEEE multiplication (slow) or faster but less accurate mul
|
||||
bool useSafeMUL = false;
|
||||
|
||||
Xbyak::Label log2Func, exp2Func;
|
||||
Xbyak::Label emitLog2Func();
|
||||
|
@ -130,7 +132,7 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
|
|||
PrologueCallback prologueCb = nullptr;
|
||||
|
||||
// Initialize our emitter with "allocSize" bytes of RWX memory
|
||||
ShaderEmitter() : Xbyak::CodeGenerator(allocSize) {
|
||||
ShaderEmitter(bool useSafeMUL) : Xbyak::CodeGenerator(allocSize), useSafeMUL(useSafeMUL) {
|
||||
cpuCaps = Xbyak::util::Cpu();
|
||||
|
||||
haveSSE4_1 = cpuCaps.has(Xbyak::util::Cpu::tSSE41);
|
||||
|
|
|
@ -15,6 +15,7 @@ struct EmulatorConfig {
|
|||
|
||||
bool shaderJitEnabled = shaderJitDefault;
|
||||
bool discordRpcEnabled = false;
|
||||
bool accurateShaderMul = false;
|
||||
RendererType rendererType = RendererType::OpenGL;
|
||||
Audio::DSPCore::Type dspType = Audio::DSPCore::Type::Null;
|
||||
|
||||
|
|
|
@ -62,6 +62,7 @@ void EmulatorConfig::load() {
|
|||
|
||||
shaderJitEnabled = toml::find_or<toml::boolean>(gpu, "EnableShaderJIT", shaderJitDefault);
|
||||
vsyncEnabled = toml::find_or<toml::boolean>(gpu, "EnableVSync", true);
|
||||
accurateShaderMul = toml::find_or<toml::boolean>(gpu, "AccurateShaderMultiplication", false);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -125,6 +126,7 @@ void EmulatorConfig::save() {
|
|||
data["GPU"]["EnableShaderJIT"] = shaderJitEnabled;
|
||||
data["GPU"]["Renderer"] = std::string(Renderer::typeToString(rendererType));
|
||||
data["GPU"]["EnableVSync"] = vsyncEnabled;
|
||||
data["GPU"]["AccurateShaderMultiplication"] = accurateShaderMul;
|
||||
data["Audio"]["DSPEmulation"] = std::string(Audio::DSPCore::typeToString(dspType));
|
||||
data["Audio"]["EnableAudio"] = audioEnabled;
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@ void ShaderJIT::prepare(PICAShader& shaderUnit) {
|
|||
auto it = cache.find(hash);
|
||||
|
||||
if (it == cache.end()) { // Block has not been compiled yet
|
||||
auto emitter = std::make_unique<ShaderEmitter>();
|
||||
auto emitter = std::make_unique<ShaderEmitter>(accurateMul);
|
||||
emitter->compile(shaderUnit);
|
||||
// Get pointer to callbacks
|
||||
entrypointCallback = emitter->getInstructionCallback(shaderUnit.entrypoint);
|
||||
|
|
|
@ -7,9 +7,6 @@ using namespace Helpers;
|
|||
using namespace oaknut;
|
||||
using namespace oaknut::util;
|
||||
|
||||
// TODO: Expose safe/unsafe optimizations to the user
|
||||
constexpr bool useSafeMUL = true;
|
||||
|
||||
// Similar to the x64 recompiler, we use an odd internal ABI, which abuses the fact that we'll very rarely be calling C++ functions
|
||||
// So to avoid pushing and popping, we'll be making use of volatile registers as much as possible
|
||||
static constexpr QReg src1Vec = Q1;
|
||||
|
@ -491,7 +488,7 @@ void ShaderEmitter::recDP3(const PICAShader& shader, u32 instruction) {
|
|||
|
||||
// Now do a full DP4
|
||||
// Do a piecewise multiplication of the vectors first
|
||||
if constexpr (useSafeMUL) {
|
||||
if (useSafeMUL) {
|
||||
emitSafeMUL(src1Vec, src2Vec, scratch1Vec);
|
||||
} else {
|
||||
FMUL(src1Vec.S4(), src1Vec.S4(), src2Vec.S4());
|
||||
|
@ -518,7 +515,7 @@ void ShaderEmitter::recDP4(const PICAShader& shader, u32 instruction) {
|
|||
loadRegister<2>(src2Vec, shader, src2, 0, operandDescriptor);
|
||||
|
||||
// Do a piecewise multiplication of the vectors first
|
||||
if constexpr (useSafeMUL) {
|
||||
if (useSafeMUL) {
|
||||
emitSafeMUL(src1Vec, src2Vec, scratch1Vec);
|
||||
} else {
|
||||
FMUL(src1Vec.S4(), src1Vec.S4(), src2Vec.S4());
|
||||
|
@ -551,7 +548,7 @@ void ShaderEmitter::recDPH(const PICAShader& shader, u32 instruction) {
|
|||
|
||||
// Now perform a DP4
|
||||
// Do a piecewise multiplication of the vectors first
|
||||
if constexpr (useSafeMUL) {
|
||||
if (useSafeMUL) {
|
||||
emitSafeMUL(src1Vec, src2Vec, scratch1Vec);
|
||||
} else {
|
||||
FMUL(src1Vec.S4(), src1Vec.S4(), src2Vec.S4());
|
||||
|
@ -834,7 +831,7 @@ void ShaderEmitter::recMUL(const PICAShader& shader, u32 instruction) {
|
|||
loadRegister<1>(src1Vec, shader, src1, idx, operandDescriptor);
|
||||
loadRegister<2>(src2Vec, shader, src2, 0, operandDescriptor);
|
||||
|
||||
if constexpr (useSafeMUL) {
|
||||
if (useSafeMUL) {
|
||||
emitSafeMUL(src1Vec, src2Vec, scratch1Vec);
|
||||
} else {
|
||||
FMUL(src1Vec.S4(), src1Vec.S4(), src2Vec.S4());
|
||||
|
@ -907,7 +904,7 @@ void ShaderEmitter::recMAD(const PICAShader& shader, u32 instruction) {
|
|||
loadRegister<2>(src2Vec, shader, src2, isMADI ? 0 : idx, operandDescriptor);
|
||||
loadRegister<3>(src3Vec, shader, src3, isMADI ? idx : 0, operandDescriptor);
|
||||
|
||||
if constexpr (useSafeMUL) {
|
||||
if (useSafeMUL) {
|
||||
emitSafeMUL(src1Vec, src2Vec, scratch1Vec);
|
||||
FADD(src3Vec.S4(), src3Vec.S4(), src1Vec.S4());
|
||||
} else {
|
||||
|
|
|
@ -12,9 +12,6 @@ using namespace Xbyak;
|
|||
using namespace Xbyak::util;
|
||||
using namespace Helpers;
|
||||
|
||||
// TODO: Expose safe/unsafe optimizations to the user
|
||||
constexpr bool useSafeMUL = false;
|
||||
|
||||
// The shader recompiler uses quite an odd internal ABI
|
||||
// We make use of the fact that in regular conditions, we should pretty much never be calling C++ code from recompiled shader code
|
||||
// This allows us to establish an ABI that's optimized for this sort of workflow, statically allocating volatile host registers
|
||||
|
|
|
@ -64,6 +64,8 @@ void GPU::reset() {
|
|||
regs.fill(0);
|
||||
shaderUnit.reset();
|
||||
shaderJIT.reset();
|
||||
shaderJIT.setAccurateMul(config.accurateShaderMul);
|
||||
|
||||
std::memset(vram, 0, vramSize);
|
||||
lightingLUT.fill(0);
|
||||
lightingLUTDirty = true;
|
||||
|
|
|
@ -146,6 +146,7 @@ static bool FetchVariableBool(std::string key, bool def) {
|
|||
static void configInit() {
|
||||
static const retro_variable values[] = {
|
||||
{"panda3ds_use_shader_jit", "Enable shader JIT; enabled|disabled"},
|
||||
{"panda3ds_accurate_shader_mul", "Enable accurate shader multiplication; disabled|enabled"},
|
||||
{"panda3ds_use_vsync", "Enable VSync; enabled|disabled"},
|
||||
{"panda3ds_dsp_emulation", "DSP emulation; Null|HLE|LLE"},
|
||||
{"panda3ds_use_audio", "Enable audio; disabled|enabled"},
|
||||
|
@ -153,7 +154,7 @@ static void configInit() {
|
|||
{"panda3ds_write_protect_virtual_sd", "Write protect virtual SD card; disabled|enabled"},
|
||||
{"panda3ds_battery_level", "Battery percentage; 5|10|20|30|50|70|90|100"},
|
||||
{"panda3ds_use_charger", "Charger plugged; enabled|disabled"},
|
||||
{nullptr, nullptr}
|
||||
{nullptr, nullptr},
|
||||
};
|
||||
|
||||
envCallbacks(RETRO_ENVIRONMENT_SET_VARIABLES, (void*)values);
|
||||
|
@ -171,6 +172,7 @@ static void configUpdate() {
|
|||
config.audioEnabled = FetchVariableBool("panda3ds_use_audio", false);
|
||||
config.sdCardInserted = FetchVariableBool("panda3ds_use_virtual_sd", true);
|
||||
config.sdWriteProtected = FetchVariableBool("panda3ds_write_protect_virtual_sd", false);
|
||||
config.accurateShaderMul = FetchVariableBool("panda3ds_accurate_shader_mul", false);
|
||||
config.discordRpcEnabled = false;
|
||||
|
||||
config.save();
|
||||
|
|
Loading…
Add table
Reference in a new issue