diff --git a/CMakeLists.txt b/CMakeLists.txt index 31fdd9f2..24cffec6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -445,7 +445,7 @@ if(ENABLE_METAL AND APPLE) # TODO: only include sources in debug builds add_custom_command( OUTPUT ${SHADER_IR} - COMMAND xcrun -sdk macosx metal -gline-tables-only -frecord-sources -o ${SHADER_IR} -c ${SHADER_SOURCE} + COMMAND xcrun -sdk macosx metal -gline-tables-only -frecord-sources -fno-fast-math -o ${SHADER_IR} -c ${SHADER_SOURCE} DEPENDS ${SHADER_SOURCE} VERBATIM) add_custom_command( diff --git a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp index 5129a446..1760cdfa 100644 --- a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp +++ b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp @@ -58,12 +58,14 @@ public: void reset() { endFrame(); - buffer->release(); - create(); + if (buffer) { + buffer->release(); + create(); + } } private: - MTL::Buffer* buffer; + MTL::Buffer* buffer = nullptr; size_t ptr = 0; std::vector additionalAllocations; diff --git a/include/renderer_mtl/pica_to_mtl.hpp b/include/renderer_mtl/pica_to_mtl.hpp index fdc41ecf..de76dc3b 100644 --- a/include/renderer_mtl/pica_to_mtl.hpp +++ b/include/renderer_mtl/pica_to_mtl.hpp @@ -35,8 +35,8 @@ inline MTL::PixelFormat toMTLPixelFormatColor(ColorFmt format) { switch (format) { case ColorFmt::RGBA8: return MTL::PixelFormatRGBA8Unorm; case ColorFmt::RGB8: return MTL::PixelFormatRGBA8Unorm; - case ColorFmt::RGBA5551: return MTL::PixelFormatBGR5A1Unorm; - case ColorFmt::RGB565: return MTL::PixelFormatB5G6R5Unorm; + case ColorFmt::RGBA5551: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatBGR5A1Unorm? + case ColorFmt::RGB565: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatB5G6R5Unorm? case ColorFmt::RGBA4: return MTL::PixelFormatABGR4Unorm; } } diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp index e280e1af..10bca5dd 100644 --- a/src/core/renderer_mtl/renderer_mtl.cpp +++ b/src/core/renderer_mtl/renderer_mtl.cpp @@ -563,7 +563,6 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::spansetFragmentBytes(®s[0x48], (0x200 - 0x48) * sizeof(regs[0]), 0); renderCommandEncoder->setVertexBytes(&depthUniforms, sizeof(depthUniforms), 2); renderCommandEncoder->setFragmentBytes(&logicOp, sizeof(logicOp), 2); - renderCommandEncoder->setFragmentBytes(&depthUniforms, sizeof(depthUniforms), 3); renderCommandEncoder->drawPrimitives(toMTLPrimitiveType(primType), NS::UInteger(0), NS::UInteger(vertices.size())); } @@ -719,7 +718,7 @@ void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) { u32 arrayOffset = 0; renderCommandEncoder->setVertexBytes(&arrayOffset, sizeof(u32), 1); - renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), GPU::LightingLutSize); + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), GPU::LightingLutSize); } void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) { @@ -740,12 +739,13 @@ void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) { renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline); renderCommandEncoder->setDepthStencilState(defaultDepthStencilState); renderCommandEncoder->setVertexTexture(lutTexture, 0); - Metal::BufferHandle buffer = vertexBufferCache.get(fogLut.data(), sizeof(fogLut)); - renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0); + //Metal::BufferHandle buffer = vertexBufferCache.get(fogLut.data(), sizeof(fogLut)); + //renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0); + renderCommandEncoder->setVertexBytes(fogLut.data(), sizeof(fogLut), 0); u32 arrayOffset = (u32)Lights::LUT_Count; renderCommandEncoder->setVertexBytes(&arrayOffset, sizeof(u32), 1); - renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), NS::UInteger(128)); + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(128)); } void RendererMTL::textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect& srcRect, const Math::Rect& destRect) { diff --git a/src/host_shaders/metal_copy_to_lut_texture.metal b/src/host_shaders/metal_copy_to_lut_texture.metal index 5eb87320..40a7f50d 100644 --- a/src/host_shaders/metal_copy_to_lut_texture.metal +++ b/src/host_shaders/metal_copy_to_lut_texture.metal @@ -4,6 +4,6 @@ using namespace metal; constant ushort lutTextureWidth [[function_constant(0)]]; // The copy is done in a vertex shader instead of a compute kernel, since dispatching compute would require ending the render pass -vertex void vertexCopyToLutTexture(uint vid [[vertex_id]], texture2d out [[texture(0)]], device float2* data [[buffer(0)]], constant uint& arrayOffset [[buffer(1)]]) { - out.write(float4(data[vid].x, 0.0, 0.0, 0.0), uint2(vid % lutTextureWidth, arrayOffset + vid / lutTextureWidth)); +vertex void vertexCopyToLutTexture(uint vid [[vertex_id]], texture2d out [[texture(0)]], constant float2* data [[buffer(0)]], constant uint& arrayOffset [[buffer(1)]]) { + out.write(float4(data[vid], 0.0, 0.0), uint2(vid % lutTextureWidth, arrayOffset + vid / lutTextureWidth)); } diff --git a/src/host_shaders/metal_shaders.metal b/src/host_shaders/metal_shaders.metal index f38d2958..95f417c7 100644 --- a/src/host_shaders/metal_shaders.metal +++ b/src/host_shaders/metal_shaders.metal @@ -102,27 +102,6 @@ struct EnvColor { } }; -struct DrawVertexOut { - float4 position [[position]]; - float depth; - float4 quaternion; - float4 color; - float3 texCoord0; - float2 texCoord1; - float2 texCoord2; - float3 view; - float3 normal; - float3 tangent; - float3 bitangent; - EnvColor textureEnvColor [[flat]]; - float4 textureEnvBufferColor [[flat]]; -}; - -struct DrawVertexOutWithClip { - DrawVertexOut out; - float clipDistance [[clip_distance]] [2]; -}; - float3 rotateFloat3ByQuaternion(float3 v, float4 q) { float3 u = q.xyz; float s = q.w; @@ -157,6 +136,26 @@ struct DepthUniforms { bool depthMapEnable; }; +struct DrawVertexOut { + float4 position [[position]]; + float4 quaternion; + float4 color; + float3 texCoord0; + float2 texCoord1; + float2 texCoord2; + float3 view; + float3 normal; + float3 tangent; + float3 bitangent; + EnvColor textureEnvColor [[flat]]; + float4 textureEnvBufferColor [[flat]]; +}; + +struct DrawVertexOutWithClip { + DrawVertexOut out; + float clipDistance [[clip_distance]] [2]; +}; + // TODO: check this float transformZ(float z, float w, constant DepthUniforms& depthUniforms) { z = z / w * depthUniforms.depthScale + depthUniforms.depthOffset; @@ -177,7 +176,6 @@ vertex DrawVertexOutWithClip vertexDraw(DrawVertexIn in [[stage_in]], constant P // Apply depth uniforms out.position.z = transformZ(out.position.z, out.position.w, depthUniforms); - out.depth = out.position.z; // Color out.color = min(abs(in.color), 1.0); @@ -678,7 +676,7 @@ float4 performLogicOp(LogicOp logicOp, float4 s, float4 d) { return as_type(performLogicOpU(logicOp, as_type(s), as_type(d))); } -fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[color(0)]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], constant DepthUniforms& depthUniforms [[buffer(3)]], +fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[color(0)]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], texture2d tex0 [[texture(0)]], texture2d tex1 [[texture(1)]], texture2d tex2 [[texture(2)]], texture2d texLut [[texture(3)]], sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) { Globals globals; @@ -727,19 +725,12 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c float4 color = globals.tevSources[15]; - // Depth - float z_over_w = in.position.z; - float depth = z_over_w * depthUniforms.depthScale + depthUniforms.depthOffset; - - if (!depthUniforms.depthMapEnable) // Divide z by w if depthmap enable == 0 (ie using W-buffering) - depth /= in.position.w; - // Fog bool enable_fog = (textureEnvUpdateBuffer & 7u) == 5u; if (enable_fog) { bool flip_depth = (textureEnvUpdateBuffer & (1u << 16)) != 0u; - float fog_index = flip_depth ? 1.0 - depth : depth; + float fog_index = flip_depth ? 1.0 - in.position.z : in.position.z; fog_index *= 128.0; float clamped_index = clamp(floor(fog_index), 0.0, 127.0); float delta = fog_index - clamped_index;