Merge branch 'master' into specialized-shaders-2

This commit is contained in:
wheremyfoodat 2024-07-17 00:10:09 +03:00
commit 219a560cbe
11 changed files with 26 additions and 18 deletions

View file

@ -22,8 +22,11 @@ class ShaderJIT {
ShaderCache cache;
#endif
bool accurateMul = false;
public:
void setAccurateMul(bool value) { accurateMul = value; }
#ifdef PANDA3DS_SHADER_JIT_SUPPORTED
// Call this before starting to process a batch of vertices
// This will read the PICA config (uploaded shader and shader operand descriptors) and search if we've already compiled this shader
@ -36,11 +39,11 @@ class ShaderJIT {
static constexpr bool isAvailable() { return true; }
#else
void prepare(PICAShader& shaderUnit) {
Helpers::panic("Vertex Loader JIT: Tried to run ShaderJIT::Prepare on platform that does not support shader jit");
Helpers::panic("Shader JIT: Tried to run ShaderJIT::Prepare on platform that does not support shader jit");
}
void run(PICAShader& shaderUnit) {
Helpers::panic("Vertex Loader JIT: Tried to run ShaderJIT::Run on platform that does not support shader jit");
Helpers::panic("Shader JIT: Tried to run ShaderJIT::Run on platform that does not support shader jit");
}
// Define dummy callback. This should never be called if the shader JIT is not supported

View file

@ -37,6 +37,8 @@ class ShaderEmitter : private oaknut::CodeBlock, public oaknut::CodeGenerator {
// Shows whether the loaded shader has any log2 and exp2 instructions
bool codeHasLog2 = false;
bool codeHasExp2 = false;
// Whether to compile this shader using accurate, safe, non-IEEE multiplication (slow) or faster but less accurate mul
bool useSafeMUL = false;
oaknut::Label log2Func, exp2Func;
oaknut::Label emitLog2Func();
@ -123,7 +125,7 @@ class ShaderEmitter : private oaknut::CodeBlock, public oaknut::CodeGenerator {
PrologueCallback prologueCb = nullptr;
// Initialize our emitter with "allocSize" bytes of memory allocated for the code buffer
ShaderEmitter() : oaknut::CodeBlock(allocSize), oaknut::CodeGenerator(oaknut::CodeBlock::ptr()) {}
ShaderEmitter(bool useSafeMUL) : oaknut::CodeBlock(allocSize), oaknut::CodeGenerator(oaknut::CodeBlock::ptr()), useSafeMUL(useSafeMUL) {}
// PC must be a valid entrypoint here. It doesn't have that much overhead in this case, so we use std::array<>::at() to assert it does
InstructionCallback getInstructionCallback(u32 pc) { return getLabelPointer<InstructionCallback>(instructionLabels.at(pc)); }

View file

@ -45,6 +45,8 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
// Shows whether the loaded shader has any log2 and exp2 instructions
bool codeHasLog2 = false;
bool codeHasExp2 = false;
// Whether to compile this shader using accurate, safe, non-IEEE multiplication (slow) or faster but less accurate mul
bool useSafeMUL = false;
Xbyak::Label log2Func, exp2Func;
Xbyak::Label emitLog2Func();
@ -130,7 +132,7 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
PrologueCallback prologueCb = nullptr;
// Initialize our emitter with "allocSize" bytes of RWX memory
ShaderEmitter() : Xbyak::CodeGenerator(allocSize) {
ShaderEmitter(bool useSafeMUL) : Xbyak::CodeGenerator(allocSize), useSafeMUL(useSafeMUL) {
cpuCaps = Xbyak::util::Cpu();
haveSSE4_1 = cpuCaps.has(Xbyak::util::Cpu::tSSE41);