mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-18 03:31:31 +12:00
Merge pull request #87 from wheremyfoodat/superskyler
Make colour/depth buffers to also do ringing for now, add exp2/log2 approximations in x87 to the shader JIT
This commit is contained in:
commit
482233f601
6 changed files with 90 additions and 16 deletions
|
@ -82,11 +82,13 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
|
||||||
void recDP4(const PICAShader& shader, u32 instruction);
|
void recDP4(const PICAShader& shader, u32 instruction);
|
||||||
void recEMIT(const PICAShader& shader, u32 instruction);
|
void recEMIT(const PICAShader& shader, u32 instruction);
|
||||||
void recEND(const PICAShader& shader, u32 instruction);
|
void recEND(const PICAShader& shader, u32 instruction);
|
||||||
|
void recEX2(const PICAShader& shader, u32 instruction);
|
||||||
void recFLR(const PICAShader& shader, u32 instruction);
|
void recFLR(const PICAShader& shader, u32 instruction);
|
||||||
void recIFC(const PICAShader& shader, u32 instruction);
|
void recIFC(const PICAShader& shader, u32 instruction);
|
||||||
void recIFU(const PICAShader& shader, u32 instruction);
|
void recIFU(const PICAShader& shader, u32 instruction);
|
||||||
void recJMPC(const PICAShader& shader, u32 instruction);
|
void recJMPC(const PICAShader& shader, u32 instruction);
|
||||||
void recJMPU(const PICAShader& shader, u32 instruction);
|
void recJMPU(const PICAShader& shader, u32 instruction);
|
||||||
|
void recLG2(const PICAShader& shader, u32 instruction);
|
||||||
void recLOOP(const PICAShader& shader, u32 instruction);
|
void recLOOP(const PICAShader& shader, u32 instruction);
|
||||||
void recMAD(const PICAShader& shader, u32 instruction);
|
void recMAD(const PICAShader& shader, u32 instruction);
|
||||||
void recMAX(const PICAShader& shader, u32 instruction);
|
void recMAX(const PICAShader& shader, u32 instruction);
|
||||||
|
|
|
@ -43,8 +43,8 @@ class Renderer {
|
||||||
float oldDepthOffset = 0.0;
|
float oldDepthOffset = 0.0;
|
||||||
bool oldDepthmapEnable = false;
|
bool oldDepthmapEnable = false;
|
||||||
|
|
||||||
SurfaceCache<DepthBuffer, 10> depthBufferCache;
|
SurfaceCache<DepthBuffer, 10, true> depthBufferCache;
|
||||||
SurfaceCache<ColourBuffer, 10> colourBufferCache;
|
SurfaceCache<ColourBuffer, 10, true> colourBufferCache;
|
||||||
SurfaceCache<Texture, 256, true> textureCache;
|
SurfaceCache<Texture, 256, true> textureCache;
|
||||||
|
|
||||||
OpenGL::uvec2 fbSize; // The size of the framebuffer (ie both the colour and depth buffer)'
|
OpenGL::uvec2 fbSize; // The size of the framebuffer (ie both the colour and depth buffer)'
|
||||||
|
|
|
@ -46,7 +46,7 @@ public:
|
||||||
|
|
||||||
OptionalRef findFromAddress(u32 address) {
|
OptionalRef findFromAddress(u32 address) {
|
||||||
for (auto& e : buffer) {
|
for (auto& e : buffer) {
|
||||||
if (e.location == address && e.valid)
|
if (e.location <= address && e.location + e.sizeInBytes() > address && e.valid)
|
||||||
return e;
|
return e;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -57,6 +57,10 @@ public:
|
||||||
SurfaceType& add(const SurfaceType& surface) {
|
SurfaceType& add(const SurfaceType& surface) {
|
||||||
if (size >= capacity) {
|
if (size >= capacity) {
|
||||||
if constexpr (evictOnOverflow) { // Do a ring buffer if evictOnOverflow is true
|
if constexpr (evictOnOverflow) { // Do a ring buffer if evictOnOverflow is true
|
||||||
|
if constexpr (std::is_same<SurfaceType, ColourBuffer>() || std::is_same<SurfaceType, DepthBuffer>()) {
|
||||||
|
Helpers::panicDev("Colour/Depth buffer cache overflowed, currently stubbed to do a ring-buffer. This might snap in half");
|
||||||
|
}
|
||||||
|
|
||||||
auto& e = buffer[evictionIndex];
|
auto& e = buffer[evictionIndex];
|
||||||
evictionIndex = (evictionIndex + 1) % capacity;
|
evictionIndex = (evictionIndex + 1) % capacity;
|
||||||
|
|
||||||
|
|
|
@ -58,11 +58,13 @@ struct ColourBuffer {
|
||||||
}
|
}
|
||||||
|
|
||||||
void free() {
|
void free() {
|
||||||
valid = false;
|
valid = false;
|
||||||
|
|
||||||
if (texture.exists() || fbo.exists())
|
if (texture.exists() || fbo.exists()) {
|
||||||
Helpers::panic("Make this buffer free itself");
|
texture.free();
|
||||||
}
|
fbo.free();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool matches(ColourBuffer& other) {
|
bool matches(ColourBuffer& other) {
|
||||||
return location == other.location && format == other.format &&
|
return location == other.location && format == other.format &&
|
||||||
|
@ -128,9 +130,11 @@ struct DepthBuffer {
|
||||||
}
|
}
|
||||||
|
|
||||||
void free() {
|
void free() {
|
||||||
valid = false;
|
valid = false;
|
||||||
printf("Make this depth buffer free itself\n");
|
if (texture.exists()) {
|
||||||
}
|
texture.free();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool matches(DepthBuffer& other) {
|
bool matches(DepthBuffer& other) {
|
||||||
return location == other.location && format == other.format &&
|
return location == other.location && format == other.format &&
|
||||||
|
|
|
@ -135,11 +135,13 @@ void ShaderEmitter::compileInstruction(const PICAShader& shaderUnit) {
|
||||||
case ShaderOpcodes::DP3: recDP3(shaderUnit, instruction); break;
|
case ShaderOpcodes::DP3: recDP3(shaderUnit, instruction); break;
|
||||||
case ShaderOpcodes::DP4: recDP4(shaderUnit, instruction); break;
|
case ShaderOpcodes::DP4: recDP4(shaderUnit, instruction); break;
|
||||||
case ShaderOpcodes::END: recEND(shaderUnit, instruction); break;
|
case ShaderOpcodes::END: recEND(shaderUnit, instruction); break;
|
||||||
|
case ShaderOpcodes::EX2: recEX2(shaderUnit, instruction); break;
|
||||||
case ShaderOpcodes::FLR: recFLR(shaderUnit, instruction); break;
|
case ShaderOpcodes::FLR: recFLR(shaderUnit, instruction); break;
|
||||||
case ShaderOpcodes::IFC: recIFC(shaderUnit, instruction); break;
|
case ShaderOpcodes::IFC: recIFC(shaderUnit, instruction); break;
|
||||||
case ShaderOpcodes::IFU: recIFU(shaderUnit, instruction); break;
|
case ShaderOpcodes::IFU: recIFU(shaderUnit, instruction); break;
|
||||||
case ShaderOpcodes::JMPC: recJMPC(shaderUnit, instruction); break;
|
case ShaderOpcodes::JMPC: recJMPC(shaderUnit, instruction); break;
|
||||||
case ShaderOpcodes::JMPU: recJMPU(shaderUnit, instruction); break;
|
case ShaderOpcodes::JMPU: recJMPU(shaderUnit, instruction); break;
|
||||||
|
case ShaderOpcodes::LG2: recLG2(shaderUnit, instruction); break;
|
||||||
case ShaderOpcodes::LOOP: recLOOP(shaderUnit, instruction); break;
|
case ShaderOpcodes::LOOP: recLOOP(shaderUnit, instruction); break;
|
||||||
case ShaderOpcodes::MOV: recMOV(shaderUnit, instruction); break;
|
case ShaderOpcodes::MOV: recMOV(shaderUnit, instruction); break;
|
||||||
case ShaderOpcodes::MOVA: recMOVA(shaderUnit, instruction); break;
|
case ShaderOpcodes::MOVA: recMOVA(shaderUnit, instruction); break;
|
||||||
|
@ -152,8 +154,6 @@ void ShaderEmitter::compileInstruction(const PICAShader& shaderUnit) {
|
||||||
|
|
||||||
// Unimplemented opcodes that don't seem to actually be used but exist in the binary
|
// Unimplemented opcodes that don't seem to actually be used but exist in the binary
|
||||||
// EMIT/SETEMIT are used in geometry shaders, however are sometimes found in vertex shaders?
|
// EMIT/SETEMIT are used in geometry shaders, however are sometimes found in vertex shaders?
|
||||||
case ShaderOpcodes::EX2:
|
|
||||||
case ShaderOpcodes::LG2:
|
|
||||||
case ShaderOpcodes::EMIT:
|
case ShaderOpcodes::EMIT:
|
||||||
case ShaderOpcodes::SETEMIT:
|
case ShaderOpcodes::SETEMIT:
|
||||||
log("[ShaderJIT] Unknown PICA opcode: %02X\n", opcode);
|
log("[ShaderJIT] Unknown PICA opcode: %02X\n", opcode);
|
||||||
|
@ -877,6 +877,74 @@ void ShaderEmitter::recLOOP(const PICAShader& shader, u32 instruction) {
|
||||||
loopLevel--;
|
loopLevel--;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SSE does not have a log2 instruction so we temporarily emulate this using x87 FPU
|
||||||
|
void ShaderEmitter::recLG2(const PICAShader& shader, u32 instruction) {
|
||||||
|
const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f];
|
||||||
|
const u32 src = getBits<12, 7>(instruction);
|
||||||
|
const u32 idx = getBits<19, 2>(instruction);
|
||||||
|
const u32 dest = getBits<21, 5>(instruction);
|
||||||
|
const u32 writeMask = getBits<0, 4>(operandDescriptor);
|
||||||
|
|
||||||
|
// Load swizzled source, push 1.0 to the x87 stack
|
||||||
|
loadRegister<1>(src1_xmm, shader, src, idx, operandDescriptor);
|
||||||
|
fld1();
|
||||||
|
|
||||||
|
// Push source to the x87 stack
|
||||||
|
movd(eax, src1_xmm);
|
||||||
|
push(rax);
|
||||||
|
fld(dword[rsp]);
|
||||||
|
|
||||||
|
// Perform log2, load result to src1_xmm, write it back and undo the previous push rax
|
||||||
|
fyl2x();
|
||||||
|
fstp(dword[rsp]);
|
||||||
|
movss(src1_xmm, dword[rsp]);
|
||||||
|
add(rsp, 8);
|
||||||
|
|
||||||
|
// If we only write back the x component to the result, we needn't perform a shuffle to do res = res.xxxx
|
||||||
|
// Otherwise we do
|
||||||
|
if (writeMask != 0x8) { // Copy bottom lane to all lanes if we're not simply writing back x
|
||||||
|
shufps(src1_xmm, src1_xmm, 0); // src1_xmm = src1_xmm.xxxx
|
||||||
|
}
|
||||||
|
storeRegister(src1_xmm, shader, dest, operandDescriptor);
|
||||||
|
}
|
||||||
|
|
||||||
|
// SSE does not have an exp2 instruction so we temporarily emulate this using x87 FPU
|
||||||
|
void ShaderEmitter::recEX2(const PICAShader& shader, u32 instruction) {
|
||||||
|
const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f];
|
||||||
|
const u32 src = getBits<12, 7>(instruction);
|
||||||
|
const u32 idx = getBits<19, 2>(instruction);
|
||||||
|
const u32 dest = getBits<21, 5>(instruction);
|
||||||
|
const u32 writeMask = getBits<0, 4>(operandDescriptor);
|
||||||
|
|
||||||
|
loadRegister<1>(src1_xmm, shader, src, idx, operandDescriptor);
|
||||||
|
|
||||||
|
// Push source to the x87 stack, then do some insane compiler-generated x87 math
|
||||||
|
movd(eax, src1_xmm);
|
||||||
|
push(rax);
|
||||||
|
fld(dword[rsp]);
|
||||||
|
|
||||||
|
fld(st0);
|
||||||
|
frndint();
|
||||||
|
fsub(st1, st0);
|
||||||
|
fxch(st1);
|
||||||
|
f2xm1();
|
||||||
|
fadd(dword[rip + onesVector]);
|
||||||
|
fscale();
|
||||||
|
|
||||||
|
// Load result to src1_xmm, write it back and undo the previous push rax
|
||||||
|
fstp(st1);
|
||||||
|
fstp(dword[rsp]);
|
||||||
|
movss(src1_xmm, dword[rsp]);
|
||||||
|
add(rsp, 8);
|
||||||
|
|
||||||
|
// If we only write back the x component to the result, we needn't perform a shuffle to do res = res.xxxx
|
||||||
|
// Otherwise we do
|
||||||
|
if (writeMask != 0x8) { // Copy bottom lane to all lanes if we're not simply writing back x
|
||||||
|
shufps(src1_xmm, src1_xmm, 0); // src1_xmm = src1_xmm.xxxx
|
||||||
|
}
|
||||||
|
storeRegister(src1_xmm, shader, dest, operandDescriptor);
|
||||||
|
}
|
||||||
|
|
||||||
void ShaderEmitter::printLog(const PICAShader& shaderUnit) {
|
void ShaderEmitter::printLog(const PICAShader& shaderUnit) {
|
||||||
printf("PC: %04X\n", shaderUnit.pc);
|
printf("PC: %04X\n", shaderUnit.pc);
|
||||||
|
|
||||||
|
|
|
@ -216,10 +216,6 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn
|
||||||
printf("RomFS offset: %08llX, size: %08llX\n", romFS.offset, romFS.size);
|
printf("RomFS offset: %08llX, size: %08llX\n", romFS.offset, romFS.size);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (stackSize != 0 && stackSize != VirtualAddrs::DefaultStackSize) {
|
|
||||||
Helpers::warn("Requested stack size is %08X bytes. Temporarily emulated as 0x4000 until adjustable sizes are added\n", stackSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
initialized = true;
|
initialized = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue