diff --git a/include/PICA/dynapica/shader_rec_emitter_x64.hpp b/include/PICA/dynapica/shader_rec_emitter_x64.hpp index ce9d992b..ba37595a 100644 --- a/include/PICA/dynapica/shader_rec_emitter_x64.hpp +++ b/include/PICA/dynapica/shader_rec_emitter_x64.hpp @@ -82,11 +82,13 @@ class ShaderEmitter : public Xbyak::CodeGenerator { void recDP4(const PICAShader& shader, u32 instruction); void recEMIT(const PICAShader& shader, u32 instruction); void recEND(const PICAShader& shader, u32 instruction); + void recEX2(const PICAShader& shader, u32 instruction); void recFLR(const PICAShader& shader, u32 instruction); void recIFC(const PICAShader& shader, u32 instruction); void recIFU(const PICAShader& shader, u32 instruction); void recJMPC(const PICAShader& shader, u32 instruction); void recJMPU(const PICAShader& shader, u32 instruction); + void recLG2(const PICAShader& shader, u32 instruction); void recLOOP(const PICAShader& shader, u32 instruction); void recMAD(const PICAShader& shader, u32 instruction); void recMAX(const PICAShader& shader, u32 instruction); diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 3ede19ff..8258c4c7 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -43,8 +43,8 @@ class Renderer { float oldDepthOffset = 0.0; bool oldDepthmapEnable = false; - SurfaceCache depthBufferCache; - SurfaceCache colourBufferCache; + SurfaceCache depthBufferCache; + SurfaceCache colourBufferCache; SurfaceCache textureCache; OpenGL::uvec2 fbSize; // The size of the framebuffer (ie both the colour and depth buffer)' diff --git a/include/renderer_gl/surface_cache.hpp b/include/renderer_gl/surface_cache.hpp index 1a7b87f8..b2e5cc29 100644 --- a/include/renderer_gl/surface_cache.hpp +++ b/include/renderer_gl/surface_cache.hpp @@ -46,7 +46,7 @@ public: OptionalRef findFromAddress(u32 address) { for (auto& e : buffer) { - if (e.location == address && e.valid) + if (e.location <= address && e.location + e.sizeInBytes() > address && e.valid) return e; } @@ -57,6 +57,10 @@ public: SurfaceType& add(const SurfaceType& surface) { if (size >= capacity) { if constexpr (evictOnOverflow) { // Do a ring buffer if evictOnOverflow is true + if constexpr (std::is_same() || std::is_same()) { + Helpers::panicDev("Colour/Depth buffer cache overflowed, currently stubbed to do a ring-buffer. This might snap in half"); + } + auto& e = buffer[evictionIndex]; evictionIndex = (evictionIndex + 1) % capacity; diff --git a/include/renderer_gl/surfaces.hpp b/include/renderer_gl/surfaces.hpp index 7ff36c6d..1d46e28e 100644 --- a/include/renderer_gl/surfaces.hpp +++ b/include/renderer_gl/surfaces.hpp @@ -58,11 +58,13 @@ struct ColourBuffer { } void free() { - valid = false; + valid = false; - if (texture.exists() || fbo.exists()) - Helpers::panic("Make this buffer free itself"); - } + if (texture.exists() || fbo.exists()) { + texture.free(); + fbo.free(); + } + } bool matches(ColourBuffer& other) { return location == other.location && format == other.format && @@ -128,9 +130,11 @@ struct DepthBuffer { } void free() { - valid = false; - printf("Make this depth buffer free itself\n"); - } + valid = false; + if (texture.exists()) { + texture.free(); + } + } bool matches(DepthBuffer& other) { return location == other.location && format == other.format && diff --git a/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp b/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp index cce9b3de..06247950 100644 --- a/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp +++ b/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp @@ -135,11 +135,13 @@ void ShaderEmitter::compileInstruction(const PICAShader& shaderUnit) { case ShaderOpcodes::DP3: recDP3(shaderUnit, instruction); break; case ShaderOpcodes::DP4: recDP4(shaderUnit, instruction); break; case ShaderOpcodes::END: recEND(shaderUnit, instruction); break; + case ShaderOpcodes::EX2: recEX2(shaderUnit, instruction); break; case ShaderOpcodes::FLR: recFLR(shaderUnit, instruction); break; case ShaderOpcodes::IFC: recIFC(shaderUnit, instruction); break; case ShaderOpcodes::IFU: recIFU(shaderUnit, instruction); break; case ShaderOpcodes::JMPC: recJMPC(shaderUnit, instruction); break; case ShaderOpcodes::JMPU: recJMPU(shaderUnit, instruction); break; + case ShaderOpcodes::LG2: recLG2(shaderUnit, instruction); break; case ShaderOpcodes::LOOP: recLOOP(shaderUnit, instruction); break; case ShaderOpcodes::MOV: recMOV(shaderUnit, instruction); break; case ShaderOpcodes::MOVA: recMOVA(shaderUnit, instruction); break; @@ -152,8 +154,6 @@ void ShaderEmitter::compileInstruction(const PICAShader& shaderUnit) { // Unimplemented opcodes that don't seem to actually be used but exist in the binary // EMIT/SETEMIT are used in geometry shaders, however are sometimes found in vertex shaders? - case ShaderOpcodes::EX2: - case ShaderOpcodes::LG2: case ShaderOpcodes::EMIT: case ShaderOpcodes::SETEMIT: log("[ShaderJIT] Unknown PICA opcode: %02X\n", opcode); @@ -877,6 +877,74 @@ void ShaderEmitter::recLOOP(const PICAShader& shader, u32 instruction) { loopLevel--; } +// SSE does not have a log2 instruction so we temporarily emulate this using x87 FPU +void ShaderEmitter::recLG2(const PICAShader& shader, u32 instruction) { + const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f]; + const u32 src = getBits<12, 7>(instruction); + const u32 idx = getBits<19, 2>(instruction); + const u32 dest = getBits<21, 5>(instruction); + const u32 writeMask = getBits<0, 4>(operandDescriptor); + + // Load swizzled source, push 1.0 to the x87 stack + loadRegister<1>(src1_xmm, shader, src, idx, operandDescriptor); + fld1(); + + // Push source to the x87 stack + movd(eax, src1_xmm); + push(rax); + fld(dword[rsp]); + + // Perform log2, load result to src1_xmm, write it back and undo the previous push rax + fyl2x(); + fstp(dword[rsp]); + movss(src1_xmm, dword[rsp]); + add(rsp, 8); + + // If we only write back the x component to the result, we needn't perform a shuffle to do res = res.xxxx + // Otherwise we do + if (writeMask != 0x8) { // Copy bottom lane to all lanes if we're not simply writing back x + shufps(src1_xmm, src1_xmm, 0); // src1_xmm = src1_xmm.xxxx + } + storeRegister(src1_xmm, shader, dest, operandDescriptor); +} + +// SSE does not have an exp2 instruction so we temporarily emulate this using x87 FPU +void ShaderEmitter::recEX2(const PICAShader& shader, u32 instruction) { + const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f]; + const u32 src = getBits<12, 7>(instruction); + const u32 idx = getBits<19, 2>(instruction); + const u32 dest = getBits<21, 5>(instruction); + const u32 writeMask = getBits<0, 4>(operandDescriptor); + + loadRegister<1>(src1_xmm, shader, src, idx, operandDescriptor); + + // Push source to the x87 stack, then do some insane compiler-generated x87 math + movd(eax, src1_xmm); + push(rax); + fld(dword[rsp]); + + fld(st0); + frndint(); + fsub(st1, st0); + fxch(st1); + f2xm1(); + fadd(dword[rip + onesVector]); + fscale(); + + // Load result to src1_xmm, write it back and undo the previous push rax + fstp(st1); + fstp(dword[rsp]); + movss(src1_xmm, dword[rsp]); + add(rsp, 8); + + // If we only write back the x component to the result, we needn't perform a shuffle to do res = res.xxxx + // Otherwise we do + if (writeMask != 0x8) { // Copy bottom lane to all lanes if we're not simply writing back x + shufps(src1_xmm, src1_xmm, 0); // src1_xmm = src1_xmm.xxxx + } + storeRegister(src1_xmm, shader, dest, operandDescriptor); +} + void ShaderEmitter::printLog(const PICAShader& shaderUnit) { printf("PC: %04X\n", shaderUnit.pc); diff --git a/src/core/loader/ncch.cpp b/src/core/loader/ncch.cpp index 75e0196a..bbc025cc 100644 --- a/src/core/loader/ncch.cpp +++ b/src/core/loader/ncch.cpp @@ -216,10 +216,6 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn printf("RomFS offset: %08llX, size: %08llX\n", romFS.offset, romFS.size); } - if (stackSize != 0 && stackSize != VirtualAddrs::DefaultStackSize) { - Helpers::warn("Requested stack size is %08X bytes. Temporarily emulated as 0x4000 until adjustable sizes are added\n", stackSize); - } - initialized = true; return true; }