From 158be432fcf36ca4165bca1868850b3339a7dace Mon Sep 17 00:00:00 2001 From: Samuliak Date: Thu, 31 Oct 2024 09:25:01 +0100 Subject: [PATCH] rework the lut system --- CMakeLists.txt | 4 +- .../renderer_mtl/mtl_draw_pipeline_cache.hpp | 5 +- include/renderer_mtl/mtl_lut_texture.hpp | 25 ++ include/renderer_mtl/mtl_texture.hpp | 1 + include/renderer_mtl/renderer_mtl.hpp | 29 +- src/core/renderer_mtl/mtl_lut_texture.cpp | 32 +++ src/core/renderer_mtl/renderer_mtl.cpp | 269 +++++++++++------- .../metal_copy_to_lut_texture.metal | 2 +- src/host_shaders/metal_shaders.metal | 58 ++-- 9 files changed, 273 insertions(+), 152 deletions(-) create mode 100644 include/renderer_mtl/mtl_lut_texture.hpp create mode 100644 src/core/renderer_mtl/mtl_lut_texture.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index fac11cbc..854b9b9e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -482,6 +482,7 @@ if(ENABLE_METAL AND APPLE) include/renderer_mtl/mtl_render_target.hpp include/renderer_mtl/mtl_texture.hpp include/renderer_mtl/mtl_vertex_buffer_cache.hpp + include/renderer_mtl/mtl_lut_texture.hpp include/renderer_mtl/pica_to_mtl.hpp include/renderer_mtl/objc_helper.hpp ) @@ -490,6 +491,7 @@ if(ENABLE_METAL AND APPLE) src/core/renderer_mtl/renderer_mtl.cpp src/core/renderer_mtl/mtl_texture.cpp src/core/renderer_mtl/mtl_etc1.cpp + src/core/renderer_mtl/mtl_lut_texture.cpp src/core/renderer_mtl/objc_helper.mm src/host_shaders/metal_shaders.metal src/host_shaders/metal_copy_to_lut_texture.metal @@ -587,7 +589,7 @@ if(NOT BUILD_HYDRA_CORE AND NOT BUILD_LIBRETRO_CORE) if(NOT ENABLE_OPENGL) message(FATAL_ERROR "Qt frontend requires OpenGL") endif() - + option(GENERATE_QT_TRANSLATION "Generate Qt translation file" OFF) set(QT_LANGUAGES docs/translations) diff --git a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp index 8bfea636..c5105a13 100644 --- a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp +++ b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp @@ -135,7 +135,10 @@ public: colorAttachment->setDestinationAlphaBlendFactor(toMTLBlendFactor(alphaDestFunc)); } - desc->setDepthAttachmentPixelFormat(toMTLPixelFormatDepth(hash.depthFmt)); + MTL::PixelFormat depthFormat = toMTLPixelFormatDepth(hash.depthFmt); + desc->setDepthAttachmentPixelFormat(depthFormat); + if (hash.depthFmt == DepthFmt::Depth24Stencil8) + desc->setStencilAttachmentPixelFormat(depthFormat); NS::Error* error = nullptr; desc->setLabel(toNSString("Draw pipeline")); diff --git a/include/renderer_mtl/mtl_lut_texture.hpp b/include/renderer_mtl/mtl_lut_texture.hpp new file mode 100644 index 00000000..162bfe25 --- /dev/null +++ b/include/renderer_mtl/mtl_lut_texture.hpp @@ -0,0 +1,25 @@ +#pragma once + +#include + +namespace Metal { + +class LutTexture { +public: + LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name); + ~LutTexture(); + + u32 getNextIndex(); + + // Getters + MTL::Texture* getTexture() { return texture; } + + u32 getCurrentIndex() { return currentIndex; } + +private: + MTL::Texture* texture; + + u32 currentIndex = 0; +}; + +} // namespace Metal diff --git a/include/renderer_mtl/mtl_texture.hpp b/include/renderer_mtl/mtl_texture.hpp index 590132bd..9cec268d 100644 --- a/include/renderer_mtl/mtl_texture.hpp +++ b/include/renderer_mtl/mtl_texture.hpp @@ -1,4 +1,5 @@ #pragma once + #include #include #include diff --git a/include/renderer_mtl/renderer_mtl.hpp b/include/renderer_mtl/renderer_mtl.hpp index 9ba0937a..e28b63b4 100644 --- a/include/renderer_mtl/renderer_mtl.hpp +++ b/include/renderer_mtl/renderer_mtl.hpp @@ -1,3 +1,5 @@ +#pragma once + #include #include @@ -8,6 +10,8 @@ #include "mtl_draw_pipeline_cache.hpp" #include "mtl_depth_stencil_cache.hpp" #include "mtl_vertex_buffer_cache.hpp" +#include "mtl_lut_texture.hpp" + // HACK: use the OpenGL cache #include "../renderer_gl/surface_cache.hpp" @@ -54,12 +58,15 @@ class RendererMTL final : public Renderer { Metal::DepthStencilCache depthStencilCache; Metal::VertexBufferCache vertexBufferCache; - // Objects + // Resources MTL::SamplerState* nearestSampler; MTL::SamplerState* linearSampler; - MTL::Texture* lutTexture; + MTL::Texture* nullTexture; MTL::DepthStencilState* defaultDepthStencilState; + Metal::LutTexture* lutLightingTexture; + Metal::LutTexture* lutFogTexture; + // Pipelines MTL::RenderPipelineState* displayPipeline; MTL::RenderPipelineState* copyToLutTexturePipeline; @@ -91,21 +98,7 @@ class RendererMTL final : public Renderer { } } - void beginRenderPassIfNeeded(MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture = nullptr) { - createCommandBufferIfNeeded(); - - if (doesClears || !renderCommandEncoder || colorTexture != lastColorTexture || (depthTexture != lastDepthTexture && !(lastDepthTexture && !depthTexture))) { - endRenderPass(); - - renderCommandEncoder = commandBuffer->renderCommandEncoder(renderPassDescriptor); - renderCommandEncoder->setLabel(toNSString(nextRenderPassName)); - - lastColorTexture = colorTexture; - lastDepthTexture = depthTexture; - } - - renderPassDescriptor->release(); - } + void beginRenderPassIfNeeded(MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture = nullptr); void commitCommandBuffer() { if (renderCommandEncoder) { @@ -115,6 +108,8 @@ class RendererMTL final : public Renderer { } if (commandBuffer) { commandBuffer->commit(); + // HACK + commandBuffer->waitUntilCompleted(); commandBuffer->release(); commandBuffer = nullptr; } diff --git a/src/core/renderer_mtl/mtl_lut_texture.cpp b/src/core/renderer_mtl/mtl_lut_texture.cpp new file mode 100644 index 00000000..ac4ff6d9 --- /dev/null +++ b/src/core/renderer_mtl/mtl_lut_texture.cpp @@ -0,0 +1,32 @@ +#include "renderer_mtl/renderer_mtl.hpp" + +namespace Metal { + +constexpr u32 LAYER_COUNT = 1024; + +LutTexture::LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name) { + MTL::TextureDescriptor* desc = MTL::TextureDescriptor::alloc()->init(); + desc->setTextureType(type); + desc->setPixelFormat(pixelFormat); + desc->setWidth(width); + desc->setHeight(height); + desc->setArrayLength(LAYER_COUNT); + desc->setUsage(MTL::TextureUsageShaderRead/* | MTL::TextureUsageShaderWrite*/); + desc->setStorageMode(MTL::StorageModeShared); + + texture = device->newTexture(desc); + texture->setLabel(toNSString(name)); + desc->release(); +} + +LutTexture::~LutTexture() { + texture->release(); +} + +u32 LutTexture::getNextIndex() { + currentIndex = (currentIndex + 1) % LAYER_COUNT; + + return currentIndex; +} + +} // namespace Metal diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp index bdb5390d..bf2cdab1 100644 --- a/src/core/renderer_mtl/renderer_mtl.cpp +++ b/src/core/renderer_mtl/renderer_mtl.cpp @@ -1,17 +1,21 @@ -#include "PICA/gpu.hpp" #include "renderer_mtl/renderer_mtl.hpp" -#include "renderer_mtl/objc_helper.hpp" #include #include +#include "renderer_mtl/mtl_lut_texture.hpp" +// HACK +#undef NO + +#include "PICA/gpu.hpp" #include "SDL_metal.h" using namespace PICA; CMRC_DECLARE(RendererMTL); -const u16 LIGHT_LUT_TEXTURE_WIDTH = 256; +const u16 LIGHTING_LUT_TEXTURE_WIDTH = 256; +const u32 FOG_LUT_TEXTURE_WIDTH = 128; // HACK: redefinition... PICA::ColorFmt ToColorFormat(u32 format) { @@ -23,10 +27,10 @@ PICA::ColorFmt ToColorFormat(u32 format) { } MTL::Library* loadLibrary(MTL::Device* device, const cmrc::file& shaderSource) { - //MTL::CompileOptions* compileOptions = MTL::CompileOptions::alloc()->init(); + // MTL::CompileOptions* compileOptions = MTL::CompileOptions::alloc()->init(); NS::Error* error = nullptr; MTL::Library* library = device->newLibrary(Metal::createDispatchData(shaderSource.begin(), shaderSource.size()), &error); - //MTL::Library* library = device->newLibrary(NS::String::string(source.c_str(), NS::ASCIIStringEncoding), compileOptions, &error); + // MTL::Library* library = device->newLibrary(NS::String::string(source.c_str(), NS::ASCIIStringEncoding), compileOptions, &error); if (error) { Helpers::panic("Error loading shaders: %s", error->description()->cString(NS::ASCIIStringEncoding)); } @@ -39,19 +43,19 @@ RendererMTL::RendererMTL(GPU& gpu, const std::array& internalRegs, RendererMTL::~RendererMTL() {} void RendererMTL::reset() { - vertexBufferCache.reset(); - depthStencilCache.reset(); - drawPipelineCache.reset(); - blitPipelineCache.reset(); - textureCache.reset(); - depthStencilRenderTargetCache.reset(); + vertexBufferCache.reset(); + depthStencilCache.reset(); + drawPipelineCache.reset(); + blitPipelineCache.reset(); + textureCache.reset(); + depthStencilRenderTargetCache.reset(); colorRenderTargetCache.reset(); } void RendererMTL::display() { CA::MetalDrawable* drawable = metalLayer->nextDrawable(); if (!drawable) { - return; + return; } using namespace PICA::ExternalRegs; @@ -62,7 +66,7 @@ void RendererMTL::display() { auto topScreen = colorRenderTargetCache.findFromAddress(topScreenAddr); if (topScreen) { - clearColor(nullptr, topScreen->get().texture); + clearColor(nullptr, topScreen->get().texture); } // Bottom screen @@ -71,7 +75,7 @@ void RendererMTL::display() { auto bottomScreen = colorRenderTargetCache.findFromAddress(bottomScreenAddr); if (bottomScreen) { - clearColor(nullptr, bottomScreen->get().texture); + clearColor(nullptr, bottomScreen->get().texture); } // -------- Draw -------- @@ -131,14 +135,14 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { // Textures MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init(); textureDescriptor->setTextureType(MTL::TextureType2D); - textureDescriptor->setPixelFormat(MTL::PixelFormatRGBA32Float); - textureDescriptor->setWidth(LIGHT_LUT_TEXTURE_WIDTH); - textureDescriptor->setHeight(Lights::LUT_Count + 1); - textureDescriptor->setUsage(MTL::TextureUsageShaderRead | MTL::TextureUsageShaderWrite); + textureDescriptor->setPixelFormat(MTL::PixelFormatRGBA8Unorm); + textureDescriptor->setWidth(1); + textureDescriptor->setHeight(1); textureDescriptor->setStorageMode(MTL::StorageModePrivate); + textureDescriptor->setUsage(MTL::TextureUsageShaderRead); - lutTexture = device->newTexture(textureDescriptor); - lutTexture->setLabel(toNSString("LUT texture")); + nullTexture = device->newTexture(textureDescriptor); + nullTexture->setLabel(toNSString("Null texture")); textureDescriptor->release(); // Samplers @@ -153,6 +157,9 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { samplerDescriptor->release(); + lutLightingTexture = new Metal::LutTexture(device, MTL::TextureType2DArray, MTL::PixelFormatR16Unorm, LIGHTING_LUT_TEXTURE_WIDTH, Lights::LUT_Count, "Lighting LUT texture"); + lutFogTexture = new Metal::LutTexture(device, MTL::TextureType1DArray, MTL::PixelFormatRG32Float, FOG_LUT_TEXTURE_WIDTH, 1, "Fog LUT texture"); + // -------- Pipelines -------- // Load shaders @@ -249,14 +256,15 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { // Copy to LUT texture MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init(); - constants->setConstantValue(&LIGHT_LUT_TEXTURE_WIDTH, MTL::DataTypeUShort, NS::UInteger(0)); + constants->setConstantValue(&LIGHTING_LUT_TEXTURE_WIDTH, MTL::DataTypeUShort, NS::UInteger(0)); - error = nullptr; - MTL::Function* vertexCopyToLutTextureFunction = copyToLutTextureLibrary->newFunction(NS::String::string("vertexCopyToLutTexture", NS::ASCIIStringEncoding), constants, &error); - if (error) { - Helpers::panic("Error creating copy_to_lut_texture vertex function: %s", error->description()->cString(NS::ASCIIStringEncoding)); - } - constants->release(); + error = nullptr; + MTL::Function* vertexCopyToLutTextureFunction = + copyToLutTextureLibrary->newFunction(NS::String::string("vertexCopyToLutTexture", NS::ASCIIStringEncoding), constants, &error); + if (error) { + Helpers::panic("Error creating copy_to_lut_texture vertex function: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + constants->release(); MTL::RenderPipelineDescriptor* copyToLutTexturePipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init(); copyToLutTexturePipelineDescriptor->setVertexFunction(vertexCopyToLutTextureFunction); @@ -314,8 +322,8 @@ void RendererMTL::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 c depthClearOps[depth->get().texture] = depthVal; if (format == DepthFmt::Depth24Stencil8) { - const u8 stencilVal = value >> 24; - stencilClearOps[depth->get().texture] = stencilVal; + const u8 stencilVal = value >> 24; + stencilClearOps[depth->get().texture] = stencilVal; } return; @@ -365,7 +373,7 @@ void RendererMTL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, } void RendererMTL::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) { - // Texture copy size is aligned to 16 byte units + // Texture copy size is aligned to 16 byte units const u32 copySize = totalBytes & ~0xf; if (copySize == 0) { Helpers::warn("TextureCopy total bytes less than 16!\n"); @@ -463,33 +471,33 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::spanformat, DepthFmt::Unknown1}; if (depthStencilRenderTarget) { - pipelineHash.depthFmt = depthStencilRenderTarget->format; - } - pipelineHash.fragHash.lightingEnabled = regs[0x008F] & 1; - pipelineHash.fragHash.lightingNumLights = regs[0x01C2] & 0x7; - pipelineHash.fragHash.lightingConfig1 = regs[0x01C4u]; - pipelineHash.fragHash.alphaControl = regs[0x104]; + pipelineHash.depthFmt = depthStencilRenderTarget->format; + } + pipelineHash.fragHash.lightingEnabled = regs[0x008F] & 1; + pipelineHash.fragHash.lightingNumLights = regs[0x01C2] & 0x7; + pipelineHash.fragHash.lightingConfig1 = regs[0x01C4u]; + pipelineHash.fragHash.alphaControl = regs[0x104]; // Blending and logic op pipelineHash.blendEnabled = (regs[PICA::InternalRegs::ColourOperation] & (1 << 8)) != 0; pipelineHash.colorWriteMask = colorMask; - u8 logicOp = 3; // Copy, which doesn't do anything + u8 logicOp = 3; // Copy if (pipelineHash.blendEnabled) { - pipelineHash.blendControl = regs[PICA::InternalRegs::BlendFunc]; + pipelineHash.blendControl = regs[PICA::InternalRegs::BlendFunc]; } else { - logicOp = Helpers::getBits<0, 4>(regs[PICA::InternalRegs::LogicOp]); + logicOp = Helpers::getBits<0, 4>(regs[PICA::InternalRegs::LogicOp]); } MTL::RenderPipelineState* pipeline = drawPipelineCache.get(pipelineHash); @@ -500,25 +508,25 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::spaninit(); bool doesClear = clearColor(renderPassDescriptor, colorRenderTarget->texture); - if (depthStencilRenderTarget) { - if (clearDepth(renderPassDescriptor, depthStencilRenderTarget->texture)) - doesClear = true; - if (depthStencilRenderTarget->format == DepthFmt::Depth24Stencil8) { - if (clearStencil(renderPassDescriptor, depthStencilRenderTarget->texture)) - doesClear = true; - } - } + if (depthStencilRenderTarget) { + if (clearDepth(renderPassDescriptor, depthStencilRenderTarget->texture)) doesClear = true; + if (depthStencilRenderTarget->format == DepthFmt::Depth24Stencil8) { + if (clearStencil(renderPassDescriptor, depthStencilRenderTarget->texture)) doesClear = true; + } + } - nextRenderPassName = "Draw vertices"; - beginRenderPassIfNeeded(renderPassDescriptor, doesClear, colorRenderTarget->texture, (depthStencilRenderTarget ? depthStencilRenderTarget->texture : nullptr)); + nextRenderPassName = "Draw vertices"; + beginRenderPassIfNeeded( + renderPassDescriptor, doesClear, colorRenderTarget->texture, (depthStencilRenderTarget ? depthStencilRenderTarget->texture : nullptr) + ); // Update the LUT texture if necessary if (gpu.lightingLUTDirty) { updateLightingLUT(renderCommandEncoder); } if (gpu.fogLUTDirty) { - updateFogLUT(renderCommandEncoder); - } + updateFogLUT(renderCommandEncoder); + } renderCommandEncoder->setRenderPipelineState(pipeline); renderCommandEncoder->setDepthStencilState(depthStencilState); @@ -526,7 +534,7 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::spansetVertexBytes(vertices.data(), vertices.size_bytes(), VERTEX_BUFFER_BINDING_INDEX); } else { - Metal::BufferHandle buffer = vertexBufferCache.get(vertices.data(), vertices.size_bytes()); + Metal::BufferHandle buffer = vertexBufferCache.get(vertices.data(), vertices.size_bytes()); renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, VERTEX_BUFFER_BINDING_INDEX); } @@ -541,20 +549,20 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::span(constantColor); - const u8 b = Helpers::getBits<16, 8>(constantColor); - const u8 a = Helpers::getBits<24, 8>(constantColor); + u32 constantColor = regs[PICA::InternalRegs::BlendColour]; + const u8 r = constantColor & 0xff; + const u8 g = Helpers::getBits<8, 8>(constantColor); + const u8 b = Helpers::getBits<16, 8>(constantColor); + const u8 a = Helpers::getBits<24, 8>(constantColor); - renderCommandEncoder->setBlendColor(r / 255.0f, g / 255.0f, b / 255.0f, a / 255.0f); + renderCommandEncoder->setBlendColor(r / 255.0f, g / 255.0f, b / 255.0f, a / 255.0f); } // Stencil reference if (stencilEnable) { - const s8 reference = s8(Helpers::getBits<16, 8>(depthStencilHash.stencilConfig)); // Signed reference value - renderCommandEncoder->setStencilReferenceValue(reference); - } + const s8 reference = s8(Helpers::getBits<16, 8>(depthStencilHash.stencilConfig)); // Signed reference value + renderCommandEncoder->setStencilReferenceValue(reference); + } // Bind resources setupTextureEnvState(renderCommandEncoder); @@ -563,6 +571,8 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::spansetFragmentBytes(®s[0x48], (0x200 - 0x48) * sizeof(regs[0]), 0); renderCommandEncoder->setVertexBytes(&depthUniforms, sizeof(depthUniforms), 2); renderCommandEncoder->setFragmentBytes(&logicOp, sizeof(logicOp), 2); + u32 lutSlices[2] = {lutLightingTexture->getCurrentIndex(), lutFogTexture->getCurrentIndex()}; + renderCommandEncoder->setFragmentBytes(&lutSlices, sizeof(lutSlices), 3); renderCommandEncoder->drawPrimitives(toMTLPrimitiveType(primType), NS::UInteger(0), NS::UInteger(vertices.size())); } @@ -575,11 +585,14 @@ void RendererMTL::screenshot(const std::string& name) { void RendererMTL::deinitGraphicsContext() { reset(); + delete lutLightingTexture; + delete lutFogTexture; + // Release copyToLutTexturePipeline->release(); displayPipeline->release(); defaultDepthStencilState->release(); - lutTexture->release(); + nullTexture->release(); linearSampler->release(); nearestSampler->release(); library->release(); @@ -607,10 +620,10 @@ std::optional RendererMTL::getColorRenderTarget( auto& colorBuffer = colorRenderTargetCache.add(sampleBuffer); - // Clear the color buffer - colorClearOps[colorBuffer.texture] = {0, 0, 0, 0}; + // Clear the color buffer + colorClearOps[colorBuffer.texture] = {0, 0, 0, 0}; - return colorBuffer; + return colorBuffer; } Metal::DepthStencilRenderTarget& RendererMTL::getDepthRenderTarget() { @@ -622,13 +635,13 @@ Metal::DepthStencilRenderTarget& RendererMTL::getDepthRenderTarget() { } else { auto& depthBuffer = depthStencilRenderTargetCache.add(sampleBuffer); - // Clear the depth buffer - depthClearOps[depthBuffer.texture] = 0.0f; - if (depthBuffer.format == DepthFmt::Depth24Stencil8) { - stencilClearOps[depthBuffer.texture] = 0; - } + // Clear the depth buffer + depthClearOps[depthBuffer.texture] = 0.0f; + if (depthBuffer.format == DepthFmt::Depth24Stencil8) { + stencilClearOps[depthBuffer.texture] = 0; + } - return depthBuffer; + return depthBuffer; } } @@ -683,7 +696,9 @@ void RendererMTL::bindTexturesToSlots(MTL::RenderCommandEncoder* encoder) { for (int i = 0; i < 3; i++) { if ((regs[PICA::InternalRegs::TexUnitCfg] & (1 << i)) == 0) { - continue; + encoder->setFragmentTexture(nullTexture, i); + encoder->setFragmentSamplerState(nearestSampler, i); + continue; } const size_t ioBase = ioBases[i]; @@ -701,42 +716,55 @@ void RendererMTL::bindTexturesToSlots(MTL::RenderCommandEncoder* encoder) { encoder->setFragmentTexture(tex.texture, i); encoder->setFragmentSamplerState(tex.sampler ? tex.sampler : nearestSampler, i); } else { - // TODO: bind a dummy texture? + // TODO: log } } - - // LUT texture - encoder->setFragmentTexture(lutTexture, 3); - encoder->setFragmentSamplerState(linearSampler, 3); } void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) { gpu.lightingLUTDirty = false; - std::array lightingLut = {0.0f}; - for (int i = 0; i < gpu.lightingLUT.size(); i += 2) { - uint64_t value = gpu.lightingLUT[i >> 1] & 0xFFF; - lightingLut[i] = (float)(value << 4) / 65535.0f; + std::array lightingLut; + + for (int i = 0; i < gpu.lightingLUT.size(); i++) { + uint64_t value = gpu.lightingLUT[i] & 0xFFF; + lightingLut[i] = (value << 4); } - //for (int i = 0; i < Lights::LUT_Count; i++) { - // lutTexture->replaceRegion(MTL::Region(0, 0, LIGHT_LUT_TEXTURE_WIDTH, 1), 0, i, u16_lightinglut.data() + LIGHT_LUT_TEXTURE_WIDTH * i, 0, 0); - //} + u32 index = lutLightingTexture->getNextIndex(); + lutLightingTexture->getTexture()->replaceRegion(MTL::Region(0, 0, LIGHTING_LUT_TEXTURE_WIDTH, Lights::LUT_Count), 0, index, lightingLut.data(), LIGHTING_LUT_TEXTURE_WIDTH * 2, 0); + /* + endRenderPass(); + + Metal::BufferHandle buffer = vertexBufferCache.get(lightingLut.data(), sizeof(lightingLut)); + + auto blitCommandEncoder = commandBuffer->blitCommandEncoder(); + blitCommandEncoder->copyFromBuffer(buffer.buffer, buffer.offset, LIGHT_LUT_TEXTURE_WIDTH * 2 * 4, 0, MTL::Size(LIGHT_LUT_TEXTURE_WIDTH, + Lights::LUT_Count, 1), lutLightingTexture, 0, 0, MTL::Origin(0, 0, 0)); + + blitCommandEncoder->endEncoding(); + */ + + /* renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline); - renderCommandEncoder->setDepthStencilState(defaultDepthStencilState); - renderCommandEncoder->setVertexTexture(lutTexture, 0); + renderCommandEncoder->setVertexTexture(lutLightingTexture, 0); Metal::BufferHandle buffer = vertexBufferCache.get(lightingLut.data(), sizeof(lightingLut)); renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0); u32 arrayOffset = 0; renderCommandEncoder->setVertexBytes(&arrayOffset, sizeof(u32), 1); - renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), GPU::LightingLutSize); + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypePoint, NS::UInteger(0), GPU::LightingLutSize); + + MTL::Resource* barrierResources[] = {lutLightingTexture}; + renderCommandEncoder->memoryBarrier(barrierResources, 1, MTL::RenderStageVertex, MTL::RenderStageFragment); + */ } void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) { gpu.fogLUTDirty = false; - std::array fogLut = {0.0f}; + + std::array fogLut = {0.0f}; for (int i = 0; i < fogLut.size(); i += 2) { const uint32_t value = gpu.fogLUT[i >> 1]; @@ -749,20 +777,31 @@ void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) { fogLut[i + 1] = fogDifference; } + u32 index = lutFogTexture->getNextIndex(); + lutFogTexture->getTexture()->replaceRegion(MTL::Region(0, 0, FOG_LUT_TEXTURE_WIDTH, 1), 0, index, fogLut.data(), 0, 0); + + /* renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline); renderCommandEncoder->setDepthStencilState(defaultDepthStencilState); - renderCommandEncoder->setVertexTexture(lutTexture, 0); - //Metal::BufferHandle buffer = vertexBufferCache.get(fogLut.data(), sizeof(fogLut)); - //renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0); + renderCommandEncoder->setVertexTexture(lutLightingTexture, 0); + // Metal::BufferHandle buffer = vertexBufferCache.get(fogLut.data(), sizeof(fogLut)); + // renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0); renderCommandEncoder->setVertexBytes(fogLut.data(), sizeof(fogLut), 0); u32 arrayOffset = (u32)Lights::LUT_Count; renderCommandEncoder->setVertexBytes(&arrayOffset, sizeof(u32), 1); - renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(128)); + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypePoint, NS::UInteger(0), NS::UInteger(128)); + + MTL::Resource* barrierResources[] = {lutLightingTexture}; + renderCommandEncoder->memoryBarrier(barrierResources, 1, MTL::RenderStageVertex, MTL::RenderStageFragment); + */ } -void RendererMTL::textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect& srcRect, const Math::Rect& destRect) { - nextRenderPassName = "Texture copy"; +void RendererMTL::textureCopyImpl( + Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect& srcRect, + const Math::Rect& destRect +) { + nextRenderPassName = "Texture copy"; MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); // TODO: clearColor sets the load action to load if it didn't find any clear, but that is unnecessary if we are doing a copy to the whole texture bool doesClear = clearColor(renderPassDescriptor, destFramebuffer.texture); @@ -775,8 +814,13 @@ void RendererMTL::textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Meta renderCommandEncoder->setRenderPipelineState(blitPipeline); // Viewport - renderCommandEncoder->setViewport(MTL::Viewport{double(destRect.left), double(destRect.bottom), double(destRect.right - destRect.left), double(destRect.top - destRect.bottom), 0.0, 1.0}); - float srcRectNDC[4] = {srcRect.left / (float)srcFramebuffer.size.u(), srcRect.bottom / (float)srcFramebuffer.size.v(), (srcRect.right - srcRect.left) / (float)srcFramebuffer.size.u(), (srcRect.top - srcRect.bottom) / (float)srcFramebuffer.size.v()}; + renderCommandEncoder->setViewport(MTL::Viewport{ + double(destRect.left), double(destRect.bottom), double(destRect.right - destRect.left), double(destRect.top - destRect.bottom), 0.0, 1.0 + }); + float srcRectNDC[4] = { + srcRect.left / (float)srcFramebuffer.size.u(), srcRect.bottom / (float)srcFramebuffer.size.v(), + (srcRect.right - srcRect.left) / (float)srcFramebuffer.size.u(), (srcRect.top - srcRect.bottom) / (float)srcFramebuffer.size.v() + }; // Bind resources renderCommandEncoder->setVertexBytes(&srcRectNDC, sizeof(srcRectNDC), 0); @@ -785,3 +829,26 @@ void RendererMTL::textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Meta renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); } + +void RendererMTL::beginRenderPassIfNeeded(MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture) { + createCommandBufferIfNeeded(); + + if (doesClears || !renderCommandEncoder || colorTexture != lastColorTexture || (depthTexture != lastDepthTexture && !(lastDepthTexture && !depthTexture))) { + endRenderPass(); + + renderCommandEncoder = commandBuffer->renderCommandEncoder(renderPassDescriptor); + renderCommandEncoder->setLabel(toNSString(nextRenderPassName)); + + // Bind persistent resources + + // LUT texture + renderCommandEncoder->setFragmentTexture(lutLightingTexture->getTexture(), 3); + renderCommandEncoder->setFragmentTexture(lutFogTexture->getTexture(), 4); + renderCommandEncoder->setFragmentSamplerState(linearSampler, 3); + + lastColorTexture = colorTexture; + lastDepthTexture = depthTexture; + } + + renderPassDescriptor->release(); +} diff --git a/src/host_shaders/metal_copy_to_lut_texture.metal b/src/host_shaders/metal_copy_to_lut_texture.metal index 40a7f50d..c21246f1 100644 --- a/src/host_shaders/metal_copy_to_lut_texture.metal +++ b/src/host_shaders/metal_copy_to_lut_texture.metal @@ -4,6 +4,6 @@ using namespace metal; constant ushort lutTextureWidth [[function_constant(0)]]; // The copy is done in a vertex shader instead of a compute kernel, since dispatching compute would require ending the render pass -vertex void vertexCopyToLutTexture(uint vid [[vertex_id]], texture2d out [[texture(0)]], constant float2* data [[buffer(0)]], constant uint& arrayOffset [[buffer(1)]]) { +vertex void vertexCopyToLutTexture(uint vid [[vertex_id]], texture2d out [[texture(0)]], device float2* data [[buffer(0)]], constant uint& arrayOffset [[buffer(1)]]) { out.write(float4(data[vid], 0.0, 0.0), uint2(vid % lutTextureWidth, arrayOffset + vid / lutTextureWidth)); } diff --git a/src/host_shaders/metal_shaders.metal b/src/host_shaders/metal_shaders.metal index 95f417c7..c2c1799f 100644 --- a/src/host_shaders/metal_shaders.metal +++ b/src/host_shaders/metal_shaders.metal @@ -406,13 +406,11 @@ uint4 performLogicOpU(LogicOp logicOp, uint4 s, uint4 d) { #define RG_LUT 5u #define RR_LUT 6u -#define FOG_INDEX 24 - -float lutLookup(texture2d texLut, uint lut, uint index) { - return texLut.read(uint2(index, lut)).r; +float lutLookup(texture2d_array texLut, uint slice, uint lut, uint index) { + return texLut.read(uint2(index, lut), slice).r; } -float lightLutLookup(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture2d texLut, uint environment_id, uint lut_id, uint light_id, float3 light_vector, float3 half_vector) { +float lightLutLookup(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture2d_array texLut, uint slice, uint environment_id, uint lut_id, uint light_id, float3 light_vector, float3 half_vector) { uint lut_index; int bit_in_config1; if (lut_id == SP_LUT) { @@ -498,12 +496,12 @@ float lightLutLookup(thread Globals& globals, thread DrawVertexOut& in, constant delta = abs(delta); } int index = int(clamp(floor(delta * 255.0), 0.f, 255.f)); - return lutLookup(texLut, lut_index, index) * scale; + return lutLookup(texLut, slice, lut_index, index) * scale; } else { // Range is [-1, 1] so we need to map it to [0, 1] int index = int(clamp(floor(delta * 128.0), -128.f, 127.f)); if (index < 0) index += 256; - return lutLookup(texLut, lut_index, index) * scale; + return lutLookup(texLut, slice, lut_index, index) * scale; } } @@ -515,7 +513,7 @@ float3 regToColor(uint reg) { } // Implements the following algorthm: https://mathb.in/26766 -void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture2d texLut, sampler linearSampler, thread float4& primaryColor, thread float4& secondaryColor) { +void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture2d_array texLut, uint slice, sampler linearSampler, thread float4& primaryColor, thread float4& secondaryColor) { // Quaternions describe a transformation from surface-local space to eye space. // In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1), // the tangent vector is (1,0,0), and the bitangent vector is (0,1,0). @@ -566,10 +564,10 @@ void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant Pi globals.GPUREG_LIGHTi_CONFIG = picaRegs.read(0x0149u + (lightId << 4u)); float lightDistance; - float3 lightPosition = normalize(float3( + float3 lightPosition = float3( decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u), decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u) - )); + ); // Positional Light if (extract_bits(globals.GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) { @@ -613,23 +611,23 @@ void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant Pi float delta = lightDistance * distanceAttenuationScale + distanceAttenuationBias; delta = clamp(delta, 0.0, 1.0); int index = int(clamp(floor(delta * 255.0), 0.0, 255.0)); - distanceAttenuation = lutLookup(texLut, 16u + lightId, index); + distanceAttenuation = lutLookup(texLut, slice, 16u + lightId, index); } - float spotlightAttenuation = lightLutLookup(globals, in, picaRegs, texLut, environmentId, SP_LUT, lightId, lightVector, halfVector); - float specular0Distribution = lightLutLookup(globals, in, picaRegs, texLut, environmentId, D0_LUT, lightId, lightVector, halfVector); - float specular1Distribution = lightLutLookup(globals, in, picaRegs, texLut, environmentId, D1_LUT, lightId, lightVector, halfVector); + float spotlightAttenuation = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, SP_LUT, lightId, lightVector, halfVector); + float specular0Distribution = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, D0_LUT, lightId, lightVector, halfVector); + float specular1Distribution = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, D1_LUT, lightId, lightVector, halfVector); float3 reflectedColor; - reflectedColor.r = lightLutLookup(globals, in, picaRegs, texLut, environmentId, RR_LUT, lightId, lightVector, halfVector); + reflectedColor.r = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, RR_LUT, lightId, lightVector, halfVector); if (isSamplerEnabled(environmentId, RG_LUT)) { - reflectedColor.g = lightLutLookup(globals, in, picaRegs, texLut, environmentId, RG_LUT, lightId, lightVector, halfVector); + reflectedColor.g = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, RG_LUT, lightId, lightVector, halfVector); } else { reflectedColor.g = reflectedColor.r; } if (isSamplerEnabled(environmentId, RB_LUT)) { - reflectedColor.b = lightLutLookup(globals, in, picaRegs, texLut, environmentId, RB_LUT, lightId, lightVector, halfVector); + reflectedColor.b = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, RB_LUT, lightId, lightVector, halfVector); } else { reflectedColor.b = reflectedColor.r; } @@ -655,7 +653,7 @@ void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant Pi float fresnelFactor; if (fresnelOutput1 == 1u || fresnelOutput2 == 1u) { - fresnelFactor = lightLutLookup(globals, in, picaRegs, texLut, environmentId, FR_LUT, lightId, lightVector, halfVector); + fresnelFactor = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, FR_LUT, lightId, lightVector, halfVector); } if (fresnelOutput1 == 1u) { @@ -676,9 +674,7 @@ float4 performLogicOp(LogicOp logicOp, float4 s, float4 d) { return as_type(performLogicOpU(logicOp, as_type(s), as_type(d))); } -fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[color(0)]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], - texture2d tex0 [[texture(0)]], texture2d tex1 [[texture(1)]], texture2d tex2 [[texture(2)]], texture2d texLut [[texture(3)]], - sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) { +fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[color(0)]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], constant uint2& lutSlices [[buffer(3)]], texture2d tex0 [[texture(0)]], texture2d tex1 [[texture(1)]], texture2d tex2 [[texture(2)]], texture2d_array texLightingLut [[texture(3)]], texture1d_array texFogLut [[texture(4)]], sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) { Globals globals; // HACK @@ -689,7 +685,7 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c globals.tevSources[0] = in.color; if (lightingEnabled) { - calcLighting(globals, in, picaRegs, texLut, linearSampler, globals.tevSources[1], globals.tevSources[2]); + calcLighting(globals, in, picaRegs, texLightingLut, lutSlices.x, linearSampler, globals.tevSources[1], globals.tevSources[2]); } else { globals.tevSources[1] = float4(0.0); globals.tevSources[2] = float4(0.0); @@ -729,13 +725,13 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c bool enable_fog = (textureEnvUpdateBuffer & 7u) == 5u; if (enable_fog) { - bool flip_depth = (textureEnvUpdateBuffer & (1u << 16)) != 0u; - float fog_index = flip_depth ? 1.0 - in.position.z : in.position.z; - fog_index *= 128.0; - float clamped_index = clamp(floor(fog_index), 0.0, 127.0); - float delta = fog_index - clamped_index; - float2 value = texLut.read(uint2(clamped_index, FOG_INDEX)).rg; - float fog_factor = clamp(value.r + value.g * delta, 0.0, 1.0); + bool flipDepth = (textureEnvUpdateBuffer & (1u << 16)) != 0u; + float fogIndex = flipDepth ? 1.0 - in.position.z : in.position.z; + fogIndex *= 128.0; + float clampedIndex = clamp(floor(fogIndex), 0.0, 127.0); + float delta = fogIndex - clampedIndex; + float2 value = texFogLut.read(clampedIndex, lutSlices.y).rg; + float fogFactor = clamp(value.r + value.g * delta, 0.0, 1.0); uint GPUREG_FOG_COLOR = picaRegs.read(0x00E1u); @@ -743,9 +739,9 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c float r = (GPUREG_FOG_COLOR & 0xFFu) / 255.0; float g = ((GPUREG_FOG_COLOR >> 8) & 0xFFu) / 255.0; float b = ((GPUREG_FOG_COLOR >> 16) & 0xFFu) / 255.0; - float3 fog_color = float3(r, g, b); + float3 fogColor = float3(r, g, b); - color.rgb = mix(fog_color, color.rgb, fog_factor); + color.rgb = mix(fogColor, color.rgb, fogFactor); } // Perform alpha test