Merge branch 'master' into specialized-shaders-2

2025-07-18 21:11:44 +12:00 · 2024-07-17 00:10:09 +03:00 · 2024-07-17 00:10:09 +03:00 · 219a560cbe
commit 219a560cbe
parent 441aa2346c 27ddb1272a
11 changed files with 26 additions and 18 deletions
--- a/include/PICA/dynapica/shader_rec.hpp
+++ b/include/PICA/dynapica/shader_rec.hpp
@ -22,8 +22,11 @@ class ShaderJIT {

 	ShaderCache cache;
 #endif
+	bool accurateMul = false;

  public:
+	void setAccurateMul(bool value) { accurateMul = value; }
+
 #ifdef PANDA3DS_SHADER_JIT_SUPPORTED
 	// Call this before starting to process a batch of vertices
 	// This will read the PICA config (uploaded shader and shader operand descriptors) and search if we've already compiled this shader
@ -36,11 +39,11 @@ class ShaderJIT {
 	static constexpr bool isAvailable() { return true; }
 #else
 	void prepare(PICAShader& shaderUnit) {
-		Helpers::panic("Vertex Loader JIT: Tried to run ShaderJIT::Prepare on platform that does not support shader jit");
+		Helpers::panic("Shader JIT: Tried to run ShaderJIT::Prepare on platform that does not support shader jit");
 	}

 	void run(PICAShader& shaderUnit) {
-		Helpers::panic("Vertex Loader JIT: Tried to run ShaderJIT::Run on platform that does not support shader jit");
+		Helpers::panic("Shader JIT: Tried to run ShaderJIT::Run on platform that does not support shader jit");
 	}

 	// Define dummy callback. This should never be called if the shader JIT is not supported
--- a/include/PICA/dynapica/shader_rec_emitter_arm64.hpp
+++ b/include/PICA/dynapica/shader_rec_emitter_arm64.hpp
@ -37,6 +37,8 @@ class ShaderEmitter : private oaknut::CodeBlock, public oaknut::CodeGenerator {
 	// Shows whether the loaded shader has any log2 and exp2 instructions
 	bool codeHasLog2 = false;
 	bool codeHasExp2 = false;
+	// Whether to compile this shader using accurate, safe, non-IEEE multiplication (slow) or faster but less accurate mul
+	bool useSafeMUL = false;

 	oaknut::Label log2Func, exp2Func;
 	oaknut::Label emitLog2Func();
@ -123,7 +125,7 @@ class ShaderEmitter : private oaknut::CodeBlock, public oaknut::CodeGenerator {
 	PrologueCallback prologueCb = nullptr;

 	// Initialize our emitter with "allocSize" bytes of memory allocated for the code buffer
-	ShaderEmitter() : oaknut::CodeBlock(allocSize), oaknut::CodeGenerator(oaknut::CodeBlock::ptr()) {}
+	ShaderEmitter(bool useSafeMUL) : oaknut::CodeBlock(allocSize), oaknut::CodeGenerator(oaknut::CodeBlock::ptr()), useSafeMUL(useSafeMUL) {}

 	// PC must be a valid entrypoint here. It doesn't have that much overhead in this case, so we use std::array<>::at() to assert it does
 	InstructionCallback getInstructionCallback(u32 pc) { return getLabelPointer<InstructionCallback>(instructionLabels.at(pc)); }
--- a/include/PICA/dynapica/shader_rec_emitter_x64.hpp
+++ b/include/PICA/dynapica/shader_rec_emitter_x64.hpp
@ -45,6 +45,8 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
 	// Shows whether the loaded shader has any log2 and exp2 instructions
 	bool codeHasLog2 = false;
 	bool codeHasExp2 = false;
+	// Whether to compile this shader using accurate, safe, non-IEEE multiplication (slow) or faster but less accurate mul
+	bool useSafeMUL = false;
 	
 	Xbyak::Label log2Func, exp2Func;
 	Xbyak::Label emitLog2Func();
@ -130,7 +132,7 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
 	PrologueCallback prologueCb = nullptr;

 	// Initialize our emitter with "allocSize" bytes of RWX memory
-	ShaderEmitter() : Xbyak::CodeGenerator(allocSize) {
+	ShaderEmitter(bool useSafeMUL) : Xbyak::CodeGenerator(allocSize), useSafeMUL(useSafeMUL) {
 		cpuCaps = Xbyak::util::Cpu();

 		haveSSE4_1 = cpuCaps.has(Xbyak::util::Cpu::tSSE41);