From 17b08a25fabe70e72e3662a57defe7084555b115 Mon Sep 17 00:00:00 2001 From: Sky Date: Thu, 6 Jul 2023 11:18:14 -0700 Subject: [PATCH 1/6] [GPU] Converted Depth/Color Surfaces to a ring buffer Additionally made the surface cache search hit for any address that lies in the surface. This should allow multiple races to be done in Mario Kart and fixes the intro video. --- include/renderer_gl/renderer_gl.hpp | 4 ++-- include/renderer_gl/surface_cache.hpp | 2 +- include/renderer_gl/surfaces.hpp | 10 +++++++--- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 3ede19ff..8258c4c7 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -43,8 +43,8 @@ class Renderer { float oldDepthOffset = 0.0; bool oldDepthmapEnable = false; - SurfaceCache depthBufferCache; - SurfaceCache colourBufferCache; + SurfaceCache depthBufferCache; + SurfaceCache colourBufferCache; SurfaceCache textureCache; OpenGL::uvec2 fbSize; // The size of the framebuffer (ie both the colour and depth buffer)' diff --git a/include/renderer_gl/surface_cache.hpp b/include/renderer_gl/surface_cache.hpp index fb0a469d..e12befeb 100644 --- a/include/renderer_gl/surface_cache.hpp +++ b/include/renderer_gl/surface_cache.hpp @@ -46,7 +46,7 @@ public: OptionalRef findFromAddress(u32 address) { for (auto& e : buffer) { - if (e.location == address && e.valid) + if (e.location <= address && e.location+e.sizeInBytes() > address && e.valid) return e; } diff --git a/include/renderer_gl/surfaces.hpp b/include/renderer_gl/surfaces.hpp index 7ff36c6d..e5458aae 100644 --- a/include/renderer_gl/surfaces.hpp +++ b/include/renderer_gl/surfaces.hpp @@ -60,8 +60,10 @@ struct ColourBuffer { void free() { valid = false; - if (texture.exists() || fbo.exists()) - Helpers::panic("Make this buffer free itself"); + if (texture.exists() || fbo.exists()){ + texture.free(); + fbo.free(); + } } bool matches(ColourBuffer& other) { @@ -128,8 +130,10 @@ struct DepthBuffer { } void free() { + if(texture.exists()){ + texture.free(); + } valid = false; - printf("Make this depth buffer free itself\n"); } bool matches(DepthBuffer& other) { From 5e8f324ff50666b4d65e86dfa14a778b1d59c5a8 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 9 Jul 2023 00:50:16 +0300 Subject: [PATCH 2/6] Formatting --- include/renderer_gl/surface_cache.hpp | 2 +- include/renderer_gl/surfaces.hpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/renderer_gl/surface_cache.hpp b/include/renderer_gl/surface_cache.hpp index df998b6b..8d3ae5e5 100644 --- a/include/renderer_gl/surface_cache.hpp +++ b/include/renderer_gl/surface_cache.hpp @@ -46,7 +46,7 @@ public: OptionalRef findFromAddress(u32 address) { for (auto& e : buffer) { - if (e.location <= address && e.location+e.sizeInBytes() > address && e.valid) + if (e.location <= address && e.location + e.sizeInBytes() > address && e.valid) return e; } diff --git a/include/renderer_gl/surfaces.hpp b/include/renderer_gl/surfaces.hpp index e5458aae..ac4aa5b6 100644 --- a/include/renderer_gl/surfaces.hpp +++ b/include/renderer_gl/surfaces.hpp @@ -60,7 +60,7 @@ struct ColourBuffer { void free() { valid = false; - if (texture.exists() || fbo.exists()){ + if (texture.exists() || fbo.exists()) { texture.free(); fbo.free(); } @@ -130,10 +130,10 @@ struct DepthBuffer { } void free() { - if(texture.exists()){ + valid = false; + if (texture.exists()) { texture.free(); } - valid = false; } bool matches(DepthBuffer& other) { From 15ede3bd6ece9318de8c0aee504bc4a31d26c6a4 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 9 Jul 2023 01:29:51 +0300 Subject: [PATCH 3/6] [Shader JIT] Temporary EXP2/LOG2 implementations in x87 --- .../PICA/dynapica/shader_rec_emitter_x64.hpp | 2 + .../PICA/dynapica/shader_rec_emitter_x64.cpp | 72 ++++++++++++++++++- 2 files changed, 72 insertions(+), 2 deletions(-) diff --git a/include/PICA/dynapica/shader_rec_emitter_x64.hpp b/include/PICA/dynapica/shader_rec_emitter_x64.hpp index ce9d992b..ba37595a 100644 --- a/include/PICA/dynapica/shader_rec_emitter_x64.hpp +++ b/include/PICA/dynapica/shader_rec_emitter_x64.hpp @@ -82,11 +82,13 @@ class ShaderEmitter : public Xbyak::CodeGenerator { void recDP4(const PICAShader& shader, u32 instruction); void recEMIT(const PICAShader& shader, u32 instruction); void recEND(const PICAShader& shader, u32 instruction); + void recEX2(const PICAShader& shader, u32 instruction); void recFLR(const PICAShader& shader, u32 instruction); void recIFC(const PICAShader& shader, u32 instruction); void recIFU(const PICAShader& shader, u32 instruction); void recJMPC(const PICAShader& shader, u32 instruction); void recJMPU(const PICAShader& shader, u32 instruction); + void recLG2(const PICAShader& shader, u32 instruction); void recLOOP(const PICAShader& shader, u32 instruction); void recMAD(const PICAShader& shader, u32 instruction); void recMAX(const PICAShader& shader, u32 instruction); diff --git a/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp b/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp index cce9b3de..06247950 100644 --- a/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp +++ b/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp @@ -135,11 +135,13 @@ void ShaderEmitter::compileInstruction(const PICAShader& shaderUnit) { case ShaderOpcodes::DP3: recDP3(shaderUnit, instruction); break; case ShaderOpcodes::DP4: recDP4(shaderUnit, instruction); break; case ShaderOpcodes::END: recEND(shaderUnit, instruction); break; + case ShaderOpcodes::EX2: recEX2(shaderUnit, instruction); break; case ShaderOpcodes::FLR: recFLR(shaderUnit, instruction); break; case ShaderOpcodes::IFC: recIFC(shaderUnit, instruction); break; case ShaderOpcodes::IFU: recIFU(shaderUnit, instruction); break; case ShaderOpcodes::JMPC: recJMPC(shaderUnit, instruction); break; case ShaderOpcodes::JMPU: recJMPU(shaderUnit, instruction); break; + case ShaderOpcodes::LG2: recLG2(shaderUnit, instruction); break; case ShaderOpcodes::LOOP: recLOOP(shaderUnit, instruction); break; case ShaderOpcodes::MOV: recMOV(shaderUnit, instruction); break; case ShaderOpcodes::MOVA: recMOVA(shaderUnit, instruction); break; @@ -152,8 +154,6 @@ void ShaderEmitter::compileInstruction(const PICAShader& shaderUnit) { // Unimplemented opcodes that don't seem to actually be used but exist in the binary // EMIT/SETEMIT are used in geometry shaders, however are sometimes found in vertex shaders? - case ShaderOpcodes::EX2: - case ShaderOpcodes::LG2: case ShaderOpcodes::EMIT: case ShaderOpcodes::SETEMIT: log("[ShaderJIT] Unknown PICA opcode: %02X\n", opcode); @@ -877,6 +877,74 @@ void ShaderEmitter::recLOOP(const PICAShader& shader, u32 instruction) { loopLevel--; } +// SSE does not have a log2 instruction so we temporarily emulate this using x87 FPU +void ShaderEmitter::recLG2(const PICAShader& shader, u32 instruction) { + const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f]; + const u32 src = getBits<12, 7>(instruction); + const u32 idx = getBits<19, 2>(instruction); + const u32 dest = getBits<21, 5>(instruction); + const u32 writeMask = getBits<0, 4>(operandDescriptor); + + // Load swizzled source, push 1.0 to the x87 stack + loadRegister<1>(src1_xmm, shader, src, idx, operandDescriptor); + fld1(); + + // Push source to the x87 stack + movd(eax, src1_xmm); + push(rax); + fld(dword[rsp]); + + // Perform log2, load result to src1_xmm, write it back and undo the previous push rax + fyl2x(); + fstp(dword[rsp]); + movss(src1_xmm, dword[rsp]); + add(rsp, 8); + + // If we only write back the x component to the result, we needn't perform a shuffle to do res = res.xxxx + // Otherwise we do + if (writeMask != 0x8) { // Copy bottom lane to all lanes if we're not simply writing back x + shufps(src1_xmm, src1_xmm, 0); // src1_xmm = src1_xmm.xxxx + } + storeRegister(src1_xmm, shader, dest, operandDescriptor); +} + +// SSE does not have an exp2 instruction so we temporarily emulate this using x87 FPU +void ShaderEmitter::recEX2(const PICAShader& shader, u32 instruction) { + const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f]; + const u32 src = getBits<12, 7>(instruction); + const u32 idx = getBits<19, 2>(instruction); + const u32 dest = getBits<21, 5>(instruction); + const u32 writeMask = getBits<0, 4>(operandDescriptor); + + loadRegister<1>(src1_xmm, shader, src, idx, operandDescriptor); + + // Push source to the x87 stack, then do some insane compiler-generated x87 math + movd(eax, src1_xmm); + push(rax); + fld(dword[rsp]); + + fld(st0); + frndint(); + fsub(st1, st0); + fxch(st1); + f2xm1(); + fadd(dword[rip + onesVector]); + fscale(); + + // Load result to src1_xmm, write it back and undo the previous push rax + fstp(st1); + fstp(dword[rsp]); + movss(src1_xmm, dword[rsp]); + add(rsp, 8); + + // If we only write back the x component to the result, we needn't perform a shuffle to do res = res.xxxx + // Otherwise we do + if (writeMask != 0x8) { // Copy bottom lane to all lanes if we're not simply writing back x + shufps(src1_xmm, src1_xmm, 0); // src1_xmm = src1_xmm.xxxx + } + storeRegister(src1_xmm, shader, dest, operandDescriptor); +} + void ShaderEmitter::printLog(const PICAShader& shaderUnit) { printf("PC: %04X\n", shaderUnit.pc); From 2bbcdfade2ec9ea316a8f980d36824ff4039e56f Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 9 Jul 2023 01:40:30 +0300 Subject: [PATCH 4/6] Add proper warning to surface cache --- include/renderer_gl/surface_cache.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/renderer_gl/surface_cache.hpp b/include/renderer_gl/surface_cache.hpp index 8d3ae5e5..44580fef 100644 --- a/include/renderer_gl/surface_cache.hpp +++ b/include/renderer_gl/surface_cache.hpp @@ -57,6 +57,10 @@ public: SurfaceType& add(const SurfaceType& surface) { if (size >= capacity) { if constexpr (evictOnOverflow) { // Do a ring buffer if evictOnOverflow is true + if constexpr (std::is_same() || std::is_same()) { + Helpers::panicDev("Colour/Depth buffer cache overflowed, currently stubbed to do a ring-buffer. This might snap in half"); + } + auto& e = buffer[evictionIndex]; evictionIndex = (evictionIndex + 1) % capacity; From e41076522c46a66edafe3d3be7dab180c725348d Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 9 Jul 2023 01:41:09 +0300 Subject: [PATCH 5/6] Remove outdated warning --- src/core/loader/ncch.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/core/loader/ncch.cpp b/src/core/loader/ncch.cpp index 75e0196a..bbc025cc 100644 --- a/src/core/loader/ncch.cpp +++ b/src/core/loader/ncch.cpp @@ -216,10 +216,6 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn printf("RomFS offset: %08llX, size: %08llX\n", romFS.offset, romFS.size); } - if (stackSize != 0 && stackSize != VirtualAddrs::DefaultStackSize) { - Helpers::warn("Requested stack size is %08X bytes. Temporarily emulated as 0x4000 until adjustable sizes are added\n", stackSize); - } - initialized = true; return true; } From c88ab423d3c2b393b72503b4e1aaece6bf9668bb Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sun, 9 Jul 2023 01:44:25 +0300 Subject: [PATCH 6/6] doormat --- include/renderer_gl/surface_cache.hpp | 2 +- include/renderer_gl/surfaces.hpp | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/renderer_gl/surface_cache.hpp b/include/renderer_gl/surface_cache.hpp index 44580fef..b2e5cc29 100644 --- a/include/renderer_gl/surface_cache.hpp +++ b/include/renderer_gl/surface_cache.hpp @@ -59,7 +59,7 @@ public: if constexpr (evictOnOverflow) { // Do a ring buffer if evictOnOverflow is true if constexpr (std::is_same() || std::is_same()) { Helpers::panicDev("Colour/Depth buffer cache overflowed, currently stubbed to do a ring-buffer. This might snap in half"); - } + } auto& e = buffer[evictionIndex]; evictionIndex = (evictionIndex + 1) % capacity; diff --git a/include/renderer_gl/surfaces.hpp b/include/renderer_gl/surfaces.hpp index ac4aa5b6..1d46e28e 100644 --- a/include/renderer_gl/surfaces.hpp +++ b/include/renderer_gl/surfaces.hpp @@ -58,13 +58,13 @@ struct ColourBuffer { } void free() { - valid = false; + valid = false; - if (texture.exists() || fbo.exists()) { - texture.free(); - fbo.free(); - } - } + if (texture.exists() || fbo.exists()) { + texture.free(); + fbo.free(); + } + } bool matches(ColourBuffer& other) { return location == other.location && format == other.format && @@ -131,10 +131,10 @@ struct DepthBuffer { void free() { valid = false; - if (texture.exists()) { - texture.free(); - } - } + if (texture.exists()) { + texture.free(); + } + } bool matches(DepthBuffer& other) { return location == other.location && format == other.format &&