mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-23 13:55:50 +12:00
[ShaderJIT] Add MOV
This commit is contained in:
parent
fb11fd4440
commit
9bb1f31fc9
3 changed files with 131 additions and 20 deletions
|
@ -5,6 +5,7 @@
|
|||
#include "helpers.hpp"
|
||||
#include "PICA/shader.hpp"
|
||||
#include "xbyak/xbyak.h"
|
||||
#include "xbyak/xbyak_util.h"
|
||||
#include "x64_regs.hpp"
|
||||
|
||||
#include <vector>
|
||||
|
@ -14,13 +15,20 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
|
|||
// Allocate some extra space as padding for security purposes in the extremely unlikely occasion we manage to overflow the above size
|
||||
static constexpr size_t allocSize = executableMemorySize + 0x1000;
|
||||
|
||||
// If the swizzle field is this value then the swizzle pattern is .xyzw so we don't need a shuffle
|
||||
static constexpr uint noSwizzle = 0x1B;
|
||||
|
||||
using f24 = Floats::f24;
|
||||
using vec4f = OpenGL::Vector<f24, 4>;
|
||||
|
||||
// An array of labels (incl pointers) to each compiled (to x64) PICA instruction
|
||||
std::array<Xbyak::Label, PICAShader::maxInstructionCount> instructionLabels;
|
||||
// A vector of PCs that can potentially return based on the state of the PICA callstack.
|
||||
// Filled before compiling a shader by scanning the code for call instructions
|
||||
std::vector<u32> returnPCs;
|
||||
|
||||
u32 recompilerPC; // PC the recompiler is currently recompiling @
|
||||
u32 recompilerPC = 0; // PC the recompiler is currently recompiling @
|
||||
bool haveSSE4_1 = false; // Shows if the CPU supports SSE4.1
|
||||
|
||||
// Compile all instructions from [current recompiler PC, end)
|
||||
void compileUntil(const PICAShader& shaderUnit, u32 endPC);
|
||||
|
@ -35,7 +43,12 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
|
|||
void scanForCalls(const PICAShader& shaderUnit);
|
||||
|
||||
// Load register with number "srcReg" indexed by index "idx" into the xmm register "reg"
|
||||
void loadRegister(Xmm dest, const PICAShader& shader, u32 srcReg, u32 idx);
|
||||
template <int sourceIndex>
|
||||
void loadRegister(Xmm dest, const PICAShader& shader, u32 src, u32 idx, u32 operandDescriptor);
|
||||
void storeRegister(Xmm source, const PICAShader& shader, u32 dest, u32 operandDescriptor);
|
||||
|
||||
const vec4f& getSourceRef(const PICAShader& shader, u32 src);
|
||||
const vec4f& getDestRef(const PICAShader& shader, u32 dest);
|
||||
|
||||
// Instruction recompilation functions
|
||||
void recMOV(const PICAShader& shader, u32 instruction);
|
||||
|
@ -44,15 +57,22 @@ public:
|
|||
using InstructionCallback = const void(*)(PICAShader& shaderUnit); // Callback type used for instructions
|
||||
// Callback type used for the JIT prologue. This is what the caller will call
|
||||
using PrologueCallback = const void(*)(PICAShader& shaderUnit, InstructionCallback cb);
|
||||
PrologueCallback prologueCb;
|
||||
PrologueCallback prologueCb = nullptr;
|
||||
|
||||
// Initialize our emitter with "allocSize" bytes of RWX memory
|
||||
ShaderEmitter() : Xbyak::CodeGenerator(allocSize) {}
|
||||
ShaderEmitter() : Xbyak::CodeGenerator(allocSize) {
|
||||
const auto cpu = Xbyak::util::Cpu();
|
||||
|
||||
haveSSE4_1 = cpu.has(Xbyak::util::Cpu::tSSE41);
|
||||
}
|
||||
|
||||
void compile(const PICAShader& shaderUnit);
|
||||
|
||||
// PC must be a valid entrypoint here. It doesn't have that much overhead in this case, so we use std::array<>::at() to assert it does
|
||||
InstructionCallback getInstructionCallback(u32 pc) {
|
||||
return reinterpret_cast<InstructionCallback>(instructionLabels.at(pc).getAddress());
|
||||
// Cast away the constness because casting to a function pointer is hard otherwise. Legal as long as we don't write to *ptr
|
||||
uint8_t* ptr = const_cast<uint8_t*>(instructionLabels.at(pc).getAddress());
|
||||
return reinterpret_cast<InstructionCallback>(ptr);
|
||||
}
|
||||
|
||||
PrologueCallback getPrologueCallback() {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue