mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-20 20:49:12 +12:00
[Shader JIT] Add prologue & some more compilation stuffs
This commit is contained in:
parent
415e276ef9
commit
77cba3110d
5 changed files with 116 additions and 7 deletions
|
@ -17,9 +17,15 @@ void ShaderJIT::prepare(PICAShader& shaderUnit) {
|
|||
if (it == cache.end()) { // Block has not been compiled yet
|
||||
auto emitter = std::make_unique<ShaderEmitter>();
|
||||
emitter->compile(shaderUnit);
|
||||
// Get pointer to callbacks
|
||||
entrypointCallback = emitter->getInstructionCallback(shaderUnit.entrypoint);
|
||||
prologueCallback = emitter->getPrologueCallback();
|
||||
|
||||
cache.emplace_hint(it, hash, std::move(emitter));
|
||||
} else { // Block has been compiled and found, use it
|
||||
auto emitter = it->second.get();
|
||||
entrypointCallback = emitter->getInstructionCallback(shaderUnit.entrypoint);
|
||||
prologueCallback = emitter->getPrologueCallback();
|
||||
}
|
||||
}
|
||||
#endif // PANDA3DS_SHADER_JIT_SUPPORTED
|
|
@ -4,8 +4,70 @@
|
|||
using namespace Xbyak;
|
||||
using namespace Xbyak::util;
|
||||
|
||||
void ShaderEmitter::compile(const PICAShader& shaderUnit) {
|
||||
// Register that points to PICA state
|
||||
static constexpr Reg64 statePointer = rbp;
|
||||
|
||||
void ShaderEmitter::compile(const PICAShader& shaderUnit) {
|
||||
// Emit prologue first
|
||||
align(16);
|
||||
prologueCb = getCurr<PrologueCallback>();
|
||||
|
||||
// We assume arg1 contains the pointer to the PICA state and arg2 a pointer to the code for the entrypoint
|
||||
push(statePointer); // Back up state pointer to stack. This also aligns rsp to 16 bytes for calls
|
||||
mov(statePointer, (uintptr_t)&shaderUnit); // Set state pointer to the proper pointer
|
||||
|
||||
// If we add integer register allocations they should be pushed here, and the rsp should be properly fixed up
|
||||
// However most of the PICA is floating point so yeah
|
||||
|
||||
// Allocate shadow stack on Windows
|
||||
if constexpr (isWindows()) {
|
||||
sub(rsp, 32);
|
||||
}
|
||||
// Tail call to shader code entrypoint
|
||||
jmp(arg2);
|
||||
align(16);
|
||||
// Scan the shader code for call instructions and add them to the list of possible return PCs. We need to do this because the PICA callstack works
|
||||
// Pretty weirdly
|
||||
scanForCalls(shaderUnit);
|
||||
|
||||
// Compile every instruction in the shader
|
||||
// This sounds horrible but the PICA instruction memory is tiny, and most of the time it's padded wtih nops that compile to nothing
|
||||
recompilerPC = 0;
|
||||
compileUntil(shaderUnit, PICAShader::maxInstructionCount);
|
||||
}
|
||||
|
||||
void ShaderEmitter::scanForCalls(const PICAShader& shaderUnit) {
|
||||
returnPCs.clear();
|
||||
|
||||
for (u32 i = 0; i < PICAShader::maxInstructionCount; i++) {
|
||||
const u32 instruction = shaderUnit.loadedShader[i];
|
||||
if (isCall(instruction)) {
|
||||
const u32 num = instruction & 0xff; // Num of instructions to execute
|
||||
const u32 dest = (instruction >> 10) & 0xfff; // Starting subroutine address
|
||||
const u32 returnPC = num + dest; // Add them to get the return PC
|
||||
|
||||
returnPCs.push_back(returnPC);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderEmitter::compileUntil(const PICAShader& shaderUnit, u32 end) {
|
||||
while (recompilerPC < end) {
|
||||
compileInstruction(shaderUnit);
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderEmitter::compileInstruction(const PICAShader& shaderUnit) {
|
||||
// Write current location to label for this instruction
|
||||
L(instructionLabels[recompilerPC]);
|
||||
// Fetch instruction and inc PC
|
||||
const u32 instruction = shaderUnit.loadedShader[recompilerPC++];
|
||||
const u32 opcode = instruction >> 26;
|
||||
|
||||
switch (opcode) {
|
||||
default:
|
||||
Helpers::panic("ShaderJIT: Unimplemented PICA opcode %X", opcode);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
|
@ -203,7 +203,12 @@ void GPU::drawArrays() {
|
|||
std::memcpy(&shaderUnit.vs.inputs[mapping], ¤tAttributes[j], sizeof(vec4f));
|
||||
}
|
||||
|
||||
shaderUnit.vs.run();
|
||||
if constexpr (useShaderJIT) {
|
||||
shaderJIT.run(shaderUnit.vs);
|
||||
} else {
|
||||
shaderUnit.vs.run();
|
||||
}
|
||||
|
||||
std::memcpy(&vertices[i].position, &shaderUnit.vs.outputs[0], sizeof(vec4f));
|
||||
std::memcpy(&vertices[i].colour, &shaderUnit.vs.outputs[1], sizeof(vec4f));
|
||||
std::memcpy(&vertices[i].UVs, &shaderUnit.vs.outputs[2], 2 * sizeof(f24));
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue