diff --git a/CMakeLists.txt b/CMakeLists.txt index f3e80c04..95ea1066 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,6 +22,7 @@ include_directories(third_party/gl3w/) include_directories(third_party/imgui/) include_directories(third_party/dynarmic/src) include_directories(third_party/cryptopp/) +include_directories(third_party/cityhash/include) include_directories(third_party/result/include/) add_compile_definitions(NOMINMAX) diff --git a/include/PICA/dynapica/shader_rec.hpp b/include/PICA/dynapica/shader_rec.hpp index 48104364..2dbb8300 100644 --- a/include/PICA/dynapica/shader_rec.hpp +++ b/include/PICA/dynapica/shader_rec.hpp @@ -1,22 +1,35 @@ #pragma once #include "PICA/shader.hpp" +#if defined(PANDA3DS_DYNAPICA_SUPPORTED) && defined(PANDA3DS_X64_HOST) +#define PANDA3DS_SHADER_JIT_SUPPORTED +#include +#endif + class ShaderJIT { +#ifdef PANDA3DS_SHADER_JIT_SUPPORTED + using Hash = PICAShader::Hash; + using ShaderCache = std::unordered_map; + + ShaderCache cache; void compileShader(PICAShader& shaderUnit); +#endif public: -#if defined(PANDA3DS_DYNAPICA_SUPPORTED) && defined(PANDA3DS_X64_HOST) - #define PANDA3DS_SHADER_JIT_SUPPORTED - +#ifdef PANDA3DS_SHADER_JIT_SUPPORTED // Call this before starting to process a batch of vertices // This will read the PICA config (uploaded shader and shader operand descriptors) and search if we've already compiled this shader // If yes, it sets it as the active shader. if not, then it compiles it, adds it to the cache, and sets it as active, void prepare(PICAShader& shaderUnit); + void reset(); + static constexpr bool isAvailable() { return true; } #else void prepare(PICAShader& shaderUnit) { Helpers::panic("Vertex Loader JIT: Tried to load vertices with JIT on platform that does not support vertex loader jit"); } + + void reset() {} static constexpr bool isAvailable() { return false; } #endif diff --git a/include/PICA/gpu.hpp b/include/PICA/gpu.hpp index 347dd946..4d3d4d99 100644 --- a/include/PICA/gpu.hpp +++ b/include/PICA/gpu.hpp @@ -16,6 +16,8 @@ class GPU { Memory& mem; ShaderUnit shaderUnit; + ShaderJIT shaderJIT; // Doesn't do anything if JIT is disabled or not supported + u8* vram = nullptr; MAKE_LOG_FUNCTION(log, gpuLogger) diff --git a/include/PICA/shader.hpp b/include/PICA/shader.hpp index 65b8da10..6f3d62f8 100644 --- a/include/PICA/shader.hpp +++ b/include/PICA/shader.hpp @@ -89,6 +89,17 @@ protected: std::array callInfo; ShaderType type; + // We use a hashmap for matching 3DS shaders to their equivalent compiled code in our shader cache in the shader JIT + // We choose our hash type to be a 64-bit integer by default, as the collision chance is very tiny and generating it is decently optimal + // Ideally we want to be able to support multiple different types of hash depending on compilation settings, but let's get this working first + using Hash = u64; + + Hash lastCodeHash = 0; // Last hash computed for the shader code (Used for the JIT caching mechanism) + Hash lastOpdescHash = 0; // Last hash computed for the operand descriptors (Also used for the JIT) + + bool codeHashDirty = false; + bool opdescHashDirty = false; + friend class ShaderJIT; private: @@ -204,11 +215,15 @@ public: if (bufferIndex >= 4095) Helpers::panic("o no, shader upload overflew"); bufferedShader[bufferIndex++] = word; bufferIndex &= 0xfff; + + codeHashDirty = true; // Signal the JIT if necessary that the program hash has potentially changed } void uploadDescriptor(u32 word) { operandDescriptors[opDescriptorIndex++] = word; opDescriptorIndex &= 0x7f; + + opdescHashDirty = true; // Signal the JIT if necessary that the program hash has potentially changed } void setFloatUniformIndex(u32 word) { @@ -250,4 +265,7 @@ public: void run(); void reset(); + + Hash getCodeHash(); + Hash getOpdescHash(); }; \ No newline at end of file diff --git a/src/core/PICA/dynapica/shader_rec.cpp b/src/core/PICA/dynapica/shader_rec.cpp index 8f04d1f2..63ea504b 100644 --- a/src/core/PICA/dynapica/shader_rec.cpp +++ b/src/core/PICA/dynapica/shader_rec.cpp @@ -1,7 +1,14 @@ #include "PICA/dynapica/shader_rec.hpp" +#include "cityhash.hpp" #ifdef PANDA3DS_SHADER_JIT_SUPPORTED +void ShaderJIT::reset() { + cache.clear(); +} + void ShaderJIT::prepare(PICAShader& shaderUnit) { - printf("HAPPY HAPPY HAPPY\n"); + // We construct a shader hash from both the code and operand descriptor hashes + // This is so that if only one of them changes, we still properly recompile the shader + Hash hash = shaderUnit.getCodeHash() ^ shaderUnit.getOpdescHash(); } #endif // PANDA3DS_SHADER_JIT_SUPPORTED \ No newline at end of file diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index e11b07a2..d7fa8ce8 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -12,6 +12,7 @@ GPU::GPU(Memory& mem) : mem(mem), renderer(*this, regs) { void GPU::reset() { regs.fill(0); shaderUnit.reset(); + shaderJIT.reset(); std::memset(vram, 0, vramSize); totalAttribCount = 0; @@ -84,6 +85,10 @@ void GPU::drawArrays() { log("PICA::DrawElements(vertex count = %d, index buffer config = %08X)\n", vertexCount, indexBufferConfig); } + if constexpr (useShaderJIT) { + shaderJIT.prepare(shaderUnit.vs); + } + // Total number of input attributes to shader. Differs between GS and VS. Currently stubbed to the VS one, as we don't have geometry shaders. const u32 inputAttrCount = (regs[PICAInternalRegs::VertexShaderInputBufferCfg] & 0xf) + 1; const u64 inputAttrCfg = getVertexShaderInputConfig(); diff --git a/src/core/PICA/shader_unit.cpp b/src/core/PICA/shader_unit.cpp index 6e9ca9b6..29541bd0 100644 --- a/src/core/PICA/shader_unit.cpp +++ b/src/core/PICA/shader_unit.cpp @@ -1,4 +1,5 @@ #include "PICA/shader_unit.hpp" +#include "cityhash.hpp" void ShaderUnit::reset() { vs.reset(); @@ -30,4 +31,29 @@ void PICAShader::reset() { addrRegister.x() = 0; addrRegister.y() = 0; loopCounter = 0; + + codeHashDirty = true; + opdescHashDirty = true; +} + +PICAShader::Hash PICAShader::getCodeHash() { + // Hash the code again if the code changed + if (codeHashDirty) { + codeHashDirty = false; + lastCodeHash = CityHash::CityHash64((const char*)&loadedShader[0], loadedShader.size() * sizeof(loadedShader[0])); + } + + // Return the code hash + return lastCodeHash; +} + +PICAShader::Hash PICAShader::getOpdescHash() { + // Hash the code again if the operand descriptors changed + if (opdescHashDirty) { + opdescHashDirty = false; + lastOpdescHash = CityHash::CityHash64((const char*)&operandDescriptors[0], operandDescriptors.size() * sizeof(operandDescriptors[0])); + } + + // Return the code hash + return lastOpdescHash; } \ No newline at end of file diff --git a/third_party/cityhash/cityhash.cpp b/third_party/cityhash/cityhash.cpp index a9d5406b..366d7524 100644 --- a/third_party/cityhash/cityhash.cpp +++ b/third_party/cityhash/cityhash.cpp @@ -29,7 +29,7 @@ #include #include // for memcpy and memset -#include "cityhash.hpp" +#include "include/cityhash.hpp" #include "swap.hpp" // #include "config.h" diff --git a/third_party/cityhash/cityhash.hpp b/third_party/cityhash/include/cityhash.hpp similarity index 100% rename from third_party/cityhash/cityhash.hpp rename to third_party/cityhash/include/cityhash.hpp