From 272c24d8e44c0a46bd1f63bf5d5a9d30bcc5a663 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Thu, 31 Oct 2024 13:45:05 +0100 Subject: [PATCH] don't bind resources unnecessarily --- CMakeLists.txt | 11 +++- include/renderer_mtl/mtl_command_encoder.hpp | 58 ++++++++++++++++++++ include/renderer_mtl/mtl_common.hpp | 6 ++ include/renderer_mtl/objc_helper.hpp | 2 +- include/renderer_mtl/renderer_mtl.hpp | 7 ++- src/core/renderer_mtl/renderer_mtl.cpp | 37 +++++++------ src/host_shaders/metal_blit.metal | 29 ++++++++++ src/host_shaders/metal_shaders.metal | 19 ------- 8 files changed, 128 insertions(+), 41 deletions(-) create mode 100644 include/renderer_mtl/mtl_command_encoder.hpp create mode 100644 include/renderer_mtl/mtl_common.hpp create mode 100644 src/host_shaders/metal_blit.metal diff --git a/CMakeLists.txt b/CMakeLists.txt index 854b9b9e..d25973c8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -483,6 +483,8 @@ if(ENABLE_METAL AND APPLE) include/renderer_mtl/mtl_texture.hpp include/renderer_mtl/mtl_vertex_buffer_cache.hpp include/renderer_mtl/mtl_lut_texture.hpp + include/renderer_mtl/mtl_command_encoder.hpp + include/renderer_mtl/mtl_common.hpp include/renderer_mtl/pica_to_mtl.hpp include/renderer_mtl/objc_helper.hpp ) @@ -494,7 +496,8 @@ if(ENABLE_METAL AND APPLE) src/core/renderer_mtl/mtl_lut_texture.cpp src/core/renderer_mtl/objc_helper.mm src/host_shaders/metal_shaders.metal - src/host_shaders/metal_copy_to_lut_texture.metal + src/host_shaders/metal_blit.metal + #src/host_shaders/metal_copy_to_lut_texture.metal ) set(HEADER_FILES ${HEADER_FILES} ${RENDERER_MTL_INCLUDE_FILES}) @@ -520,7 +523,8 @@ if(ENABLE_METAL AND APPLE) endfunction() add_metal_shader(metal_shaders) - add_metal_shader(metal_copy_to_lut_texture) + add_metal_shader(metal_blit) + #add_metal_shader(metal_copy_to_lut_texture) add_custom_target( compile_msl_shaders @@ -532,7 +536,8 @@ if(ENABLE_METAL AND APPLE) NAMESPACE RendererMTL WHENCE "src/host_shaders/" "src/host_shaders/metal_shaders.metallib" - "src/host_shaders/metal_copy_to_lut_texture.metallib" + "src/host_shaders/metal_blit.metallib" + #"src/host_shaders/metal_copy_to_lut_texture.metallib" ) add_dependencies(resources_renderer_mtl compile_msl_shaders) diff --git a/include/renderer_mtl/mtl_command_encoder.hpp b/include/renderer_mtl/mtl_command_encoder.hpp new file mode 100644 index 00000000..be66699d --- /dev/null +++ b/include/renderer_mtl/mtl_command_encoder.hpp @@ -0,0 +1,58 @@ +#pragma once + +#include + +namespace Metal { + +struct RenderState { + MTL::RenderPipelineState* renderPipelineState = nullptr; + MTL::DepthStencilState* depthStencilState = nullptr; + MTL::Texture* textures[3] = {nullptr}; + MTL::SamplerState* samplerStates[3] = {nullptr}; +}; + +class CommandEncoder { +public: + void newRenderCommandEncoder(MTL::RenderCommandEncoder* rce) { + renderCommandEncoder = rce; + + // Reset the render state + renderState = RenderState{}; + } + + // Resource binding + void setRenderPipelineState(MTL::RenderPipelineState* renderPipelineState) { + if (renderPipelineState != renderState.renderPipelineState) { + renderCommandEncoder->setRenderPipelineState(renderPipelineState); + renderState.renderPipelineState = renderPipelineState; + } + } + + void setDepthStencilState(MTL::DepthStencilState* depthStencilState) { + if (depthStencilState != renderState.depthStencilState) { + renderCommandEncoder->setDepthStencilState(depthStencilState); + renderState.depthStencilState = depthStencilState; + } + } + + void setFragmentTexture(MTL::Texture* texture, u32 index) { + if (texture != renderState.textures[index]) { + renderCommandEncoder->setFragmentTexture(texture, index); + renderState.textures[index] = texture; + } + } + + void setFragmentSamplerState(MTL::SamplerState* samplerState, u32 index) { + if (samplerState != renderState.samplerStates[index]) { + renderCommandEncoder->setFragmentSamplerState(samplerState, index); + renderState.samplerStates[index] = samplerState; + } + } + +private: + MTL::RenderCommandEncoder* renderCommandEncoder = nullptr; + + RenderState renderState; +}; + +} // namespace Metal diff --git a/include/renderer_mtl/mtl_common.hpp b/include/renderer_mtl/mtl_common.hpp new file mode 100644 index 00000000..a148520f --- /dev/null +++ b/include/renderer_mtl/mtl_common.hpp @@ -0,0 +1,6 @@ +#pragma once + +#include + +#define GET_HELPER_TEXTURE_BINDING(binding) (30 - binding) +#define GET_HELPER_SAMPLER_STATE_BINDING(binding) (15 - binding) diff --git a/include/renderer_mtl/objc_helper.hpp b/include/renderer_mtl/objc_helper.hpp index 91756d24..7d0e8646 100644 --- a/include/renderer_mtl/objc_helper.hpp +++ b/include/renderer_mtl/objc_helper.hpp @@ -2,7 +2,7 @@ #include -#include +#include "mtl_common.hpp" namespace Metal { diff --git a/include/renderer_mtl/renderer_mtl.hpp b/include/renderer_mtl/renderer_mtl.hpp index e28b63b4..6b356896 100644 --- a/include/renderer_mtl/renderer_mtl.hpp +++ b/include/renderer_mtl/renderer_mtl.hpp @@ -11,6 +11,7 @@ #include "mtl_depth_stencil_cache.hpp" #include "mtl_vertex_buffer_cache.hpp" #include "mtl_lut_texture.hpp" +#include "mtl_command_encoder.hpp" // HACK: use the OpenGL cache #include "../renderer_gl/surface_cache.hpp" @@ -46,6 +47,8 @@ class RendererMTL final : public Renderer { MTL::Device* device; MTL::CommandQueue* commandQueue; + Metal::CommandEncoder commandEncoder; + // Libraries MTL::Library* library; @@ -69,7 +72,7 @@ class RendererMTL final : public Renderer { // Pipelines MTL::RenderPipelineState* displayPipeline; - MTL::RenderPipelineState* copyToLutTexturePipeline; + //MTL::RenderPipelineState* copyToLutTexturePipeline; // Clears std::map colorClearOps; @@ -177,7 +180,7 @@ class RendererMTL final : public Renderer { Metal::DepthStencilRenderTarget& getDepthRenderTarget(); Metal::Texture& getTexture(Metal::Texture& tex); void setupTextureEnvState(MTL::RenderCommandEncoder* encoder); - void bindTexturesToSlots(MTL::RenderCommandEncoder* encoder); + void bindTexturesToSlots(); void updateLightingLUT(MTL::RenderCommandEncoder* encoder); void updateFogLUT(MTL::RenderCommandEncoder* encoder); void textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect& srcRect, const Math::Rect& destRect); diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp index bf2cdab1..8401eecb 100644 --- a/src/core/renderer_mtl/renderer_mtl.cpp +++ b/src/core/renderer_mtl/renderer_mtl.cpp @@ -165,7 +165,8 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { // Load shaders auto mtlResources = cmrc::RendererMTL::get_filesystem(); library = loadLibrary(device, mtlResources.open("metal_shaders.metallib")); - MTL::Library* copyToLutTextureLibrary = loadLibrary(device, mtlResources.open("metal_copy_to_lut_texture.metallib")); + MTL::Library* blitLibrary = loadLibrary(device, mtlResources.open("metal_blit.metallib")); + //MTL::Library* copyToLutTextureLibrary = loadLibrary(device, mtlResources.open("metal_copy_to_lut_texture.metallib")); // Display MTL::Function* vertexDisplayFunction = library->newFunction(NS::String::string("vertexDisplay", NS::ASCIIStringEncoding)); @@ -188,8 +189,8 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { fragmentDisplayFunction->release(); // Blit - MTL::Function* vertexBlitFunction = library->newFunction(NS::String::string("vertexBlit", NS::ASCIIStringEncoding)); - MTL::Function* fragmentBlitFunction = library->newFunction(NS::String::string("fragmentBlit", NS::ASCIIStringEncoding)); + MTL::Function* vertexBlitFunction = blitLibrary->newFunction(NS::String::string("vertexBlit", NS::ASCIIStringEncoding)); + MTL::Function* fragmentBlitFunction = blitLibrary->newFunction(NS::String::string("fragmentBlit", NS::ASCIIStringEncoding)); blitPipelineCache.set(device, vertexBlitFunction, fragmentBlitFunction); @@ -255,6 +256,7 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { drawPipelineCache.set(device, library, vertexDrawFunction, vertexDescriptor); // Copy to LUT texture + /* MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init(); constants->setConstantValue(&LIGHTING_LUT_TEXTURE_WIDTH, MTL::DataTypeUShort, NS::UInteger(0)); @@ -279,6 +281,7 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { } copyToLutTexturePipelineDescriptor->release(); vertexCopyToLutTextureFunction->release(); + */ // Depth stencil cache depthStencilCache.set(device); @@ -293,7 +296,8 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { depthStencilDescriptor->release(); // Release - copyToLutTextureLibrary->release(); + blitLibrary->release(); + //copyToLutTextureLibrary->release(); } void RendererMTL::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) { @@ -528,8 +532,8 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::spansetRenderPipelineState(pipeline); - renderCommandEncoder->setDepthStencilState(depthStencilState); + commandEncoder.setRenderPipelineState(pipeline); + commandEncoder.setDepthStencilState(depthStencilState); // If size is < 4KB, use inline vertex data, otherwise use a buffer if (vertices.size_bytes() < 4 * 1024) { renderCommandEncoder->setVertexBytes(vertices.data(), vertices.size_bytes(), VERTEX_BUFFER_BINDING_INDEX); @@ -566,7 +570,7 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::spansetVertexBytes(®s[0x48], (0x200 - 0x48) * sizeof(regs[0]), 0); renderCommandEncoder->setFragmentBytes(®s[0x48], (0x200 - 0x48) * sizeof(regs[0]), 0); renderCommandEncoder->setVertexBytes(&depthUniforms, sizeof(depthUniforms), 2); @@ -589,7 +593,7 @@ void RendererMTL::deinitGraphicsContext() { delete lutFogTexture; // Release - copyToLutTexturePipeline->release(); + //copyToLutTexturePipeline->release(); displayPipeline->release(); defaultDepthStencilState->release(); nullTexture->release(); @@ -687,7 +691,7 @@ void RendererMTL::setupTextureEnvState(MTL::RenderCommandEncoder* encoder) { encoder->setFragmentBytes(&envState, sizeof(envState), 1); } -void RendererMTL::bindTexturesToSlots(MTL::RenderCommandEncoder* encoder) { +void RendererMTL::bindTexturesToSlots() { static constexpr std::array ioBases = { PICA::InternalRegs::Tex0BorderColor, PICA::InternalRegs::Tex1BorderColor, @@ -696,8 +700,8 @@ void RendererMTL::bindTexturesToSlots(MTL::RenderCommandEncoder* encoder) { for (int i = 0; i < 3; i++) { if ((regs[PICA::InternalRegs::TexUnitCfg] & (1 << i)) == 0) { - encoder->setFragmentTexture(nullTexture, i); - encoder->setFragmentSamplerState(nearestSampler, i); + commandEncoder.setFragmentTexture(nullTexture, i); + commandEncoder.setFragmentSamplerState(nearestSampler, i); continue; } @@ -713,8 +717,8 @@ void RendererMTL::bindTexturesToSlots(MTL::RenderCommandEncoder* encoder) { if (addr != 0) [[likely]] { Metal::Texture targetTex(device, addr, static_cast(format), width, height, config); auto tex = getTexture(targetTex); - encoder->setFragmentTexture(tex.texture, i); - encoder->setFragmentSamplerState(tex.sampler ? tex.sampler : nearestSampler, i); + commandEncoder.setFragmentTexture(tex.texture, i); + commandEncoder.setFragmentSamplerState(tex.sampler ? tex.sampler : nearestSampler, i); } else { // TODO: log } @@ -811,7 +815,7 @@ void RendererMTL::textureCopyImpl( Metal::BlitPipelineHash hash{destFramebuffer.format, DepthFmt::Unknown1}; auto blitPipeline = blitPipelineCache.get(hash); - renderCommandEncoder->setRenderPipelineState(blitPipeline); + commandEncoder.setRenderPipelineState(blitPipeline); // Viewport renderCommandEncoder->setViewport(MTL::Viewport{ @@ -824,8 +828,8 @@ void RendererMTL::textureCopyImpl( // Bind resources renderCommandEncoder->setVertexBytes(&srcRectNDC, sizeof(srcRectNDC), 0); - renderCommandEncoder->setFragmentTexture(srcFramebuffer.texture, 0); - renderCommandEncoder->setFragmentSamplerState(nearestSampler, 0); + renderCommandEncoder->setFragmentTexture(srcFramebuffer.texture, GET_HELPER_TEXTURE_BINDING(0)); + renderCommandEncoder->setFragmentSamplerState(nearestSampler, GET_HELPER_SAMPLER_STATE_BINDING(0)); renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); } @@ -838,6 +842,7 @@ void RendererMTL::beginRenderPassIfNeeded(MTL::RenderPassDescriptor* renderPassD renderCommandEncoder = commandBuffer->renderCommandEncoder(renderPassDescriptor); renderCommandEncoder->setLabel(toNSString(nextRenderPassName)); + commandEncoder.newRenderCommandEncoder(renderCommandEncoder); // Bind persistent resources diff --git a/src/host_shaders/metal_blit.metal b/src/host_shaders/metal_blit.metal new file mode 100644 index 00000000..31b94ec4 --- /dev/null +++ b/src/host_shaders/metal_blit.metal @@ -0,0 +1,29 @@ +#include +using namespace metal; + +#define GET_HELPER_TEXTURE_BINDING(binding) (30 - binding) +#define GET_HELPER_SAMPLER_STATE_BINDING(binding) (15 - binding) + +struct BasicVertexOut { + float4 position [[position]]; + float2 uv; +}; + +struct NDCViewport { + float2 offset; + float2 scale; +}; + +vertex BasicVertexOut vertexBlit(uint vid [[vertex_id]], constant NDCViewport& viewport [[buffer(0)]]) { + BasicVertexOut out; + out.uv = float2((vid << 1) & 2, vid & 2); + out.position = float4(out.uv * 2.0 - 1.0, 0.0, 1.0); + out.position.y = -out.position.y; + out.uv = out.uv * viewport.scale + viewport.offset; + + return out; +} + +fragment float4 fragmentBlit(BasicVertexOut in [[stage_in]], texture2d tex [[texture(GET_HELPER_TEXTURE_BINDING(0))]], sampler samplr [[sampler(GET_HELPER_SAMPLER_STATE_BINDING(0))]]) { + return tex.sample(samplr, in.uv); +} diff --git a/src/host_shaders/metal_shaders.metal b/src/host_shaders/metal_shaders.metal index c2c1799f..18c310f7 100644 --- a/src/host_shaders/metal_shaders.metal +++ b/src/host_shaders/metal_shaders.metal @@ -32,25 +32,6 @@ fragment float4 fragmentDisplay(BasicVertexOut in [[stage_in]], texture2d return tex.sample(samplr, in.uv); } -struct NDCViewport { - float2 offset; - float2 scale; -}; - -vertex BasicVertexOut vertexBlit(uint vid [[vertex_id]], constant NDCViewport& viewport [[buffer(0)]]) { - BasicVertexOut out; - out.uv = float2((vid << 1) & 2, vid & 2); - out.position = float4(out.uv * 2.0 - 1.0, 0.0, 1.0); - out.position.y = -out.position.y; - out.uv = out.uv * viewport.scale + viewport.offset; - - return out; -} - -fragment float4 fragmentBlit(BasicVertexOut in [[stage_in]], texture2d tex [[texture(0)]], sampler samplr [[sampler(0)]]) { - return tex.sample(samplr, in.uv); -} - struct PicaRegs { uint regs[0x200 - 0x48];