From c5bdb28ca1a825d1fc5c7ec88c5d437a0aab3d16 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Thu, 4 Jul 2024 09:37:28 +0200 Subject: [PATCH] fix: lighting & use lut texture --- include/renderer_mtl/pica_to_mtl.hpp | 2 +- include/renderer_mtl/renderer_mtl.hpp | 7 ++- src/core/renderer_mtl/renderer_mtl.cpp | 59 +++++++++++++++++++++++--- src/host_shaders/metal_shaders.metal | 16 +++---- 4 files changed, 65 insertions(+), 19 deletions(-) diff --git a/include/renderer_mtl/pica_to_mtl.hpp b/include/renderer_mtl/pica_to_mtl.hpp index 81cd831a..05b1edb9 100644 --- a/include/renderer_mtl/pica_to_mtl.hpp +++ b/include/renderer_mtl/pica_to_mtl.hpp @@ -105,7 +105,7 @@ inline MTL::PrimitiveType toMTLPrimitiveType(PrimType primType) { Helpers::warn("Triangle fans are not supported on Metal, using triangles instead"); return MTL::PrimitiveTypeTriangle; case PrimType::GeometryPrimitive: - Helpers::warn("Geometry primitives are not yet, using triangles instead"); + //Helpers::warn("Geometry primitives are not yet, using triangles instead"); return MTL::PrimitiveTypeTriangle; } } diff --git a/include/renderer_mtl/renderer_mtl.hpp b/include/renderer_mtl/renderer_mtl.hpp index 48b978e8..8f89fec1 100644 --- a/include/renderer_mtl/renderer_mtl.hpp +++ b/include/renderer_mtl/renderer_mtl.hpp @@ -46,8 +46,10 @@ class RendererMTL final : public Renderer { Metal::DepthStencilCache depthStencilCache; Metal::VertexBufferCache vertexBufferCache; - // Helpers - MTL::SamplerState* basicSampler; + // Objects + MTL::SamplerState* nearestSampler; + MTL::SamplerState* linearSampler; + MTL::Texture* lightLUTTextureArray; // Pipelines MTL::RenderPipelineState* displayPipeline; @@ -100,4 +102,5 @@ class RendererMTL final : public Renderer { Metal::Texture& getTexture(Metal::Texture& tex); void setupTextureEnvState(MTL::RenderCommandEncoder* encoder); void bindTexturesToSlots(MTL::RenderCommandEncoder* encoder); + void updateLightingLUT(); }; diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp index 98ba3cbf..e449cc87 100644 --- a/src/core/renderer_mtl/renderer_mtl.cpp +++ b/src/core/renderer_mtl/renderer_mtl.cpp @@ -10,6 +10,8 @@ using namespace PICA; CMRC_DECLARE(RendererMTL); +#define LIGHT_LUT_TEXTURE_WIDTH 256 + // HACK: redefinition... PICA::ColorFmt ToColorFormat(u32 format) { switch (format) { @@ -47,7 +49,7 @@ void RendererMTL::display() { beginRenderPassIfNeeded(renderPassDescriptor, drawable->texture()); renderCommandEncoder->setRenderPipelineState(displayPipeline); - renderCommandEncoder->setFragmentSamplerState(basicSampler, 0); + renderCommandEncoder->setFragmentSamplerState(nearestSampler, 0); using namespace PICA::ExternalRegs; @@ -94,9 +96,29 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { metalLayer->setDevice(device); commandQueue = device->newCommandQueue(); - // Helpers + // -------- Objects -------- + + // Textures + MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init(); + textureDescriptor->setTextureType(MTL::TextureType1DArray); + textureDescriptor->setPixelFormat(MTL::PixelFormatR16Unorm); + textureDescriptor->setWidth(LIGHT_LUT_TEXTURE_WIDTH); + textureDescriptor->setArrayLength(Lights::LUT_Count); + textureDescriptor->setUsage(MTL::TextureUsageShaderRead); + textureDescriptor->setStorageMode(MTL::StorageModeShared); + + lightLUTTextureArray = device->newTexture(textureDescriptor); + textureDescriptor->release(); + + // Samplers MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init(); - basicSampler = device->newSamplerState(samplerDescriptor); + nearestSampler = device->newSamplerState(samplerDescriptor); + + samplerDescriptor->setMinFilter(MTL::SamplerMinMagFilterLinear); + samplerDescriptor->setMagFilter(MTL::SamplerMinMagFilterLinear); + linearSampler = device->newSamplerState(samplerDescriptor); + + samplerDescriptor->release(); // -------- Pipelines -------- @@ -304,7 +326,7 @@ void RendererMTL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, beginRenderPassIfNeeded(renderPassDescriptor, destFramebuffer->texture); renderCommandEncoder->setRenderPipelineState(blitPipeline); renderCommandEncoder->setFragmentTexture(srcFramebuffer->texture, 0); - renderCommandEncoder->setFragmentSamplerState(basicSampler, 0); + renderCommandEncoder->setFragmentSamplerState(nearestSampler, 0); renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); } @@ -404,11 +426,16 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::spansetVertexBuffer(buffer.buffer, buffer.offset, VERTEX_BUFFER_BINDING_INDEX); } + // Update the LUT texture if necessary + if (gpu.lightingLUTDirty) { + updateLightingLUT(); + } + // Bind resources setupTextureEnvState(renderCommandEncoder); bindTexturesToSlots(renderCommandEncoder); - renderCommandEncoder->setVertexBytes(®s[0x48], 0x200 - 0x48, 0); - renderCommandEncoder->setFragmentBytes(®s[0x48], 0x200 - 0x48, 0); + renderCommandEncoder->setVertexBytes(®s[0x48], (0x200 - 0x48) * sizeof(regs[0]), 0); + renderCommandEncoder->setFragmentBytes(®s[0x48], (0x200 - 0x48) * sizeof(regs[0]), 0); renderCommandEncoder->setFragmentBytes(&logicOp, sizeof(logicOp), 2); renderCommandEncoder->drawPrimitives(toMTLPrimitiveType(primType), NS::UInteger(0), NS::UInteger(vertices.size())); @@ -527,9 +554,27 @@ void RendererMTL::bindTexturesToSlots(MTL::RenderCommandEncoder* encoder) { Metal::Texture targetTex(device, addr, static_cast(format), width, height, config); auto tex = getTexture(targetTex); encoder->setFragmentTexture(tex.texture, i); - encoder->setFragmentSamplerState(tex.sampler ? tex.sampler : basicSampler, i); + encoder->setFragmentSamplerState(tex.sampler ? tex.sampler : nearestSampler, i); } else { // TODO: bind a dummy texture? } } + + // LUT texture + encoder->setFragmentTexture(lightLUTTextureArray, 3); + encoder->setFragmentSamplerState(linearSampler, 3); +} + +void RendererMTL::updateLightingLUT() { + gpu.lightingLUTDirty = false; + std::array u16_lightinglut; + + for (int i = 0; i < gpu.lightingLUT.size(); i++) { + uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1); + u16_lightinglut[i] = value * 65535 / 4095; + } + + for (int i = 0; i < Lights::LUT_Count; i++) { + lightLUTTextureArray->replaceRegion(MTL::Region(0, 0, LIGHT_LUT_TEXTURE_WIDTH, 1), 0, i, u16_lightinglut.data() + LIGHT_LUT_TEXTURE_WIDTH * i, 0, 0); + } } diff --git a/src/host_shaders/metal_shaders.metal b/src/host_shaders/metal_shaders.metal index 7da1ff39..159f7219 100644 --- a/src/host_shaders/metal_shaders.metal +++ b/src/host_shaders/metal_shaders.metal @@ -337,13 +337,11 @@ uint4 performLogicOpU(LogicOp logicOp, uint4 s, uint4 d) { #define RG_LUT 5u #define RR_LUT 6u -float lutLookup(uint lut, uint light, float value) { +float lutLookup(texture1d_array texLightingLut, sampler linearSampler, uint lut, uint light, float value) { if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1; if (lut == SP_LUT) lut = light + 8; - // TODO: uncomment - //return texLightingLut.sample(, vec2(value, lut)).r; - return 1.0; + return texLightingLut.sample(linearSampler, value, lut).r; } float3 regToColor(uint reg) { @@ -354,7 +352,7 @@ float3 regToColor(uint reg) { } // Implements the following algorthm: https://mathb.in/26766 -void calcLighting(thread DrawVertexOut& in, constant PicaRegs& picaRegs, thread float4& primaryColor, thread float4& secondaryColor) { +void calcLighting(thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture1d_array texLightingLut, sampler linearSampler, thread float4& primaryColor, thread float4& secondaryColor) { // Quaternions describe a transformation from surface-local space to eye space. // In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1), // the tangent vector is (1,0,0), and the bitangent vector is (0,1,0). @@ -447,7 +445,7 @@ void calcLighting(thread DrawVertexOut& in, constant PicaRegs& picaRegs, thread d[c] = 1.0; } - d[c] = lutLookup(uint(c), lightID, d[c] * 0.5 + 0.5) * scale; + d[c] = lutLookup(texLightingLut, linearSampler, uint(c), lightID, d[c] * 0.5 + 0.5) * scale; if (extract_bits(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) d[c] = abs(d[c]); } else { d[c] = 1.0; @@ -512,11 +510,11 @@ float4 performLogicOp(LogicOp logicOp, float4 s, float4 d) { } fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[color(0)]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], - texture2d tex0 [[texture(0)]], texture2d tex1 [[texture(1)]], texture2d tex2 [[texture(2)]], - sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]]) { + texture2d tex0 [[texture(0)]], texture2d tex1 [[texture(1)]], texture2d tex2 [[texture(2)]], texture1d_array texLightingLut [[texture(3)]], + sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) { Globals globals; globals.tevSources[0] = in.color; - calcLighting(in, picaRegs, globals.tevSources[1], globals.tevSources[2]); + calcLighting(in, picaRegs, texLightingLut, linearSampler, globals.tevSources[1], globals.tevSources[2]); uint textureConfig = picaRegs.read(0x80u); float2 texCoord2 = (textureConfig & (1u << 13)) != 0u ? in.texCoord1 : in.texCoord2;