don't bind resources unnecessarily

This commit is contained in:
Samuliak 2024-10-31 13:45:05 +01:00
parent 158be432fc
commit 272c24d8e4
No known key found for this signature in database
8 changed files with 128 additions and 41 deletions

View file

@ -483,6 +483,8 @@ if(ENABLE_METAL AND APPLE)
include/renderer_mtl/mtl_texture.hpp
include/renderer_mtl/mtl_vertex_buffer_cache.hpp
include/renderer_mtl/mtl_lut_texture.hpp
include/renderer_mtl/mtl_command_encoder.hpp
include/renderer_mtl/mtl_common.hpp
include/renderer_mtl/pica_to_mtl.hpp
include/renderer_mtl/objc_helper.hpp
)
@ -494,7 +496,8 @@ if(ENABLE_METAL AND APPLE)
src/core/renderer_mtl/mtl_lut_texture.cpp
src/core/renderer_mtl/objc_helper.mm
src/host_shaders/metal_shaders.metal
src/host_shaders/metal_copy_to_lut_texture.metal
src/host_shaders/metal_blit.metal
#src/host_shaders/metal_copy_to_lut_texture.metal
)
set(HEADER_FILES ${HEADER_FILES} ${RENDERER_MTL_INCLUDE_FILES})
@ -520,7 +523,8 @@ if(ENABLE_METAL AND APPLE)
endfunction()
add_metal_shader(metal_shaders)
add_metal_shader(metal_copy_to_lut_texture)
add_metal_shader(metal_blit)
#add_metal_shader(metal_copy_to_lut_texture)
add_custom_target(
compile_msl_shaders
@ -532,7 +536,8 @@ if(ENABLE_METAL AND APPLE)
NAMESPACE RendererMTL
WHENCE "src/host_shaders/"
"src/host_shaders/metal_shaders.metallib"
"src/host_shaders/metal_copy_to_lut_texture.metallib"
"src/host_shaders/metal_blit.metallib"
#"src/host_shaders/metal_copy_to_lut_texture.metallib"
)
add_dependencies(resources_renderer_mtl compile_msl_shaders)

View file

@ -0,0 +1,58 @@
#pragma once
#include <Metal/Metal.hpp>
namespace Metal {
struct RenderState {
MTL::RenderPipelineState* renderPipelineState = nullptr;
MTL::DepthStencilState* depthStencilState = nullptr;
MTL::Texture* textures[3] = {nullptr};
MTL::SamplerState* samplerStates[3] = {nullptr};
};
class CommandEncoder {
public:
void newRenderCommandEncoder(MTL::RenderCommandEncoder* rce) {
renderCommandEncoder = rce;
// Reset the render state
renderState = RenderState{};
}
// Resource binding
void setRenderPipelineState(MTL::RenderPipelineState* renderPipelineState) {
if (renderPipelineState != renderState.renderPipelineState) {
renderCommandEncoder->setRenderPipelineState(renderPipelineState);
renderState.renderPipelineState = renderPipelineState;
}
}
void setDepthStencilState(MTL::DepthStencilState* depthStencilState) {
if (depthStencilState != renderState.depthStencilState) {
renderCommandEncoder->setDepthStencilState(depthStencilState);
renderState.depthStencilState = depthStencilState;
}
}
void setFragmentTexture(MTL::Texture* texture, u32 index) {
if (texture != renderState.textures[index]) {
renderCommandEncoder->setFragmentTexture(texture, index);
renderState.textures[index] = texture;
}
}
void setFragmentSamplerState(MTL::SamplerState* samplerState, u32 index) {
if (samplerState != renderState.samplerStates[index]) {
renderCommandEncoder->setFragmentSamplerState(samplerState, index);
renderState.samplerStates[index] = samplerState;
}
}
private:
MTL::RenderCommandEncoder* renderCommandEncoder = nullptr;
RenderState renderState;
};
} // namespace Metal

View file

@ -0,0 +1,6 @@
#pragma once
#include <Metal/Metal.hpp>
#define GET_HELPER_TEXTURE_BINDING(binding) (30 - binding)
#define GET_HELPER_SAMPLER_STATE_BINDING(binding) (15 - binding)

View file

@ -2,7 +2,7 @@
#include <string>
#include <Metal/Metal.hpp>
#include "mtl_common.hpp"
namespace Metal {

View file

@ -11,6 +11,7 @@
#include "mtl_depth_stencil_cache.hpp"
#include "mtl_vertex_buffer_cache.hpp"
#include "mtl_lut_texture.hpp"
#include "mtl_command_encoder.hpp"
// HACK: use the OpenGL cache
#include "../renderer_gl/surface_cache.hpp"
@ -46,6 +47,8 @@ class RendererMTL final : public Renderer {
MTL::Device* device;
MTL::CommandQueue* commandQueue;
Metal::CommandEncoder commandEncoder;
// Libraries
MTL::Library* library;
@ -69,7 +72,7 @@ class RendererMTL final : public Renderer {
// Pipelines
MTL::RenderPipelineState* displayPipeline;
MTL::RenderPipelineState* copyToLutTexturePipeline;
//MTL::RenderPipelineState* copyToLutTexturePipeline;
// Clears
std::map<MTL::Texture*, Color4> colorClearOps;
@ -177,7 +180,7 @@ class RendererMTL final : public Renderer {
Metal::DepthStencilRenderTarget& getDepthRenderTarget();
Metal::Texture& getTexture(Metal::Texture& tex);
void setupTextureEnvState(MTL::RenderCommandEncoder* encoder);
void bindTexturesToSlots(MTL::RenderCommandEncoder* encoder);
void bindTexturesToSlots();
void updateLightingLUT(MTL::RenderCommandEncoder* encoder);
void updateFogLUT(MTL::RenderCommandEncoder* encoder);
void textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect<u32>& srcRect, const Math::Rect<u32>& destRect);

View file

@ -165,7 +165,8 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) {
// Load shaders
auto mtlResources = cmrc::RendererMTL::get_filesystem();
library = loadLibrary(device, mtlResources.open("metal_shaders.metallib"));
MTL::Library* copyToLutTextureLibrary = loadLibrary(device, mtlResources.open("metal_copy_to_lut_texture.metallib"));
MTL::Library* blitLibrary = loadLibrary(device, mtlResources.open("metal_blit.metallib"));
//MTL::Library* copyToLutTextureLibrary = loadLibrary(device, mtlResources.open("metal_copy_to_lut_texture.metallib"));
// Display
MTL::Function* vertexDisplayFunction = library->newFunction(NS::String::string("vertexDisplay", NS::ASCIIStringEncoding));
@ -188,8 +189,8 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) {
fragmentDisplayFunction->release();
// Blit
MTL::Function* vertexBlitFunction = library->newFunction(NS::String::string("vertexBlit", NS::ASCIIStringEncoding));
MTL::Function* fragmentBlitFunction = library->newFunction(NS::String::string("fragmentBlit", NS::ASCIIStringEncoding));
MTL::Function* vertexBlitFunction = blitLibrary->newFunction(NS::String::string("vertexBlit", NS::ASCIIStringEncoding));
MTL::Function* fragmentBlitFunction = blitLibrary->newFunction(NS::String::string("fragmentBlit", NS::ASCIIStringEncoding));
blitPipelineCache.set(device, vertexBlitFunction, fragmentBlitFunction);
@ -255,6 +256,7 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) {
drawPipelineCache.set(device, library, vertexDrawFunction, vertexDescriptor);
// Copy to LUT texture
/*
MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init();
constants->setConstantValue(&LIGHTING_LUT_TEXTURE_WIDTH, MTL::DataTypeUShort, NS::UInteger(0));
@ -279,6 +281,7 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) {
}
copyToLutTexturePipelineDescriptor->release();
vertexCopyToLutTextureFunction->release();
*/
// Depth stencil cache
depthStencilCache.set(device);
@ -293,7 +296,8 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) {
depthStencilDescriptor->release();
// Release
copyToLutTextureLibrary->release();
blitLibrary->release();
//copyToLutTextureLibrary->release();
}
void RendererMTL::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) {
@ -528,8 +532,8 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::span<const PICA::Ve
updateFogLUT(renderCommandEncoder);
}
renderCommandEncoder->setRenderPipelineState(pipeline);
renderCommandEncoder->setDepthStencilState(depthStencilState);
commandEncoder.setRenderPipelineState(pipeline);
commandEncoder.setDepthStencilState(depthStencilState);
// If size is < 4KB, use inline vertex data, otherwise use a buffer
if (vertices.size_bytes() < 4 * 1024) {
renderCommandEncoder->setVertexBytes(vertices.data(), vertices.size_bytes(), VERTEX_BUFFER_BINDING_INDEX);
@ -566,7 +570,7 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::span<const PICA::Ve
// Bind resources
setupTextureEnvState(renderCommandEncoder);
bindTexturesToSlots(renderCommandEncoder);
bindTexturesToSlots();
renderCommandEncoder->setVertexBytes(&regs[0x48], (0x200 - 0x48) * sizeof(regs[0]), 0);
renderCommandEncoder->setFragmentBytes(&regs[0x48], (0x200 - 0x48) * sizeof(regs[0]), 0);
renderCommandEncoder->setVertexBytes(&depthUniforms, sizeof(depthUniforms), 2);
@ -589,7 +593,7 @@ void RendererMTL::deinitGraphicsContext() {
delete lutFogTexture;
// Release
copyToLutTexturePipeline->release();
//copyToLutTexturePipeline->release();
displayPipeline->release();
defaultDepthStencilState->release();
nullTexture->release();
@ -687,7 +691,7 @@ void RendererMTL::setupTextureEnvState(MTL::RenderCommandEncoder* encoder) {
encoder->setFragmentBytes(&envState, sizeof(envState), 1);
}
void RendererMTL::bindTexturesToSlots(MTL::RenderCommandEncoder* encoder) {
void RendererMTL::bindTexturesToSlots() {
static constexpr std::array<u32, 3> ioBases = {
PICA::InternalRegs::Tex0BorderColor,
PICA::InternalRegs::Tex1BorderColor,
@ -696,8 +700,8 @@ void RendererMTL::bindTexturesToSlots(MTL::RenderCommandEncoder* encoder) {
for (int i = 0; i < 3; i++) {
if ((regs[PICA::InternalRegs::TexUnitCfg] & (1 << i)) == 0) {
encoder->setFragmentTexture(nullTexture, i);
encoder->setFragmentSamplerState(nearestSampler, i);
commandEncoder.setFragmentTexture(nullTexture, i);
commandEncoder.setFragmentSamplerState(nearestSampler, i);
continue;
}
@ -713,8 +717,8 @@ void RendererMTL::bindTexturesToSlots(MTL::RenderCommandEncoder* encoder) {
if (addr != 0) [[likely]] {
Metal::Texture targetTex(device, addr, static_cast<PICA::TextureFmt>(format), width, height, config);
auto tex = getTexture(targetTex);
encoder->setFragmentTexture(tex.texture, i);
encoder->setFragmentSamplerState(tex.sampler ? tex.sampler : nearestSampler, i);
commandEncoder.setFragmentTexture(tex.texture, i);
commandEncoder.setFragmentSamplerState(tex.sampler ? tex.sampler : nearestSampler, i);
} else {
// TODO: log
}
@ -811,7 +815,7 @@ void RendererMTL::textureCopyImpl(
Metal::BlitPipelineHash hash{destFramebuffer.format, DepthFmt::Unknown1};
auto blitPipeline = blitPipelineCache.get(hash);
renderCommandEncoder->setRenderPipelineState(blitPipeline);
commandEncoder.setRenderPipelineState(blitPipeline);
// Viewport
renderCommandEncoder->setViewport(MTL::Viewport{
@ -824,8 +828,8 @@ void RendererMTL::textureCopyImpl(
// Bind resources
renderCommandEncoder->setVertexBytes(&srcRectNDC, sizeof(srcRectNDC), 0);
renderCommandEncoder->setFragmentTexture(srcFramebuffer.texture, 0);
renderCommandEncoder->setFragmentSamplerState(nearestSampler, 0);
renderCommandEncoder->setFragmentTexture(srcFramebuffer.texture, GET_HELPER_TEXTURE_BINDING(0));
renderCommandEncoder->setFragmentSamplerState(nearestSampler, GET_HELPER_SAMPLER_STATE_BINDING(0));
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4));
}
@ -838,6 +842,7 @@ void RendererMTL::beginRenderPassIfNeeded(MTL::RenderPassDescriptor* renderPassD
renderCommandEncoder = commandBuffer->renderCommandEncoder(renderPassDescriptor);
renderCommandEncoder->setLabel(toNSString(nextRenderPassName));
commandEncoder.newRenderCommandEncoder(renderCommandEncoder);
// Bind persistent resources

View file

@ -0,0 +1,29 @@
#include <metal_stdlib>
using namespace metal;
#define GET_HELPER_TEXTURE_BINDING(binding) (30 - binding)
#define GET_HELPER_SAMPLER_STATE_BINDING(binding) (15 - binding)
struct BasicVertexOut {
float4 position [[position]];
float2 uv;
};
struct NDCViewport {
float2 offset;
float2 scale;
};
vertex BasicVertexOut vertexBlit(uint vid [[vertex_id]], constant NDCViewport& viewport [[buffer(0)]]) {
BasicVertexOut out;
out.uv = float2((vid << 1) & 2, vid & 2);
out.position = float4(out.uv * 2.0 - 1.0, 0.0, 1.0);
out.position.y = -out.position.y;
out.uv = out.uv * viewport.scale + viewport.offset;
return out;
}
fragment float4 fragmentBlit(BasicVertexOut in [[stage_in]], texture2d<float> tex [[texture(GET_HELPER_TEXTURE_BINDING(0))]], sampler samplr [[sampler(GET_HELPER_SAMPLER_STATE_BINDING(0))]]) {
return tex.sample(samplr, in.uv);
}

View file

@ -32,25 +32,6 @@ fragment float4 fragmentDisplay(BasicVertexOut in [[stage_in]], texture2d<float>
return tex.sample(samplr, in.uv);
}
struct NDCViewport {
float2 offset;
float2 scale;
};
vertex BasicVertexOut vertexBlit(uint vid [[vertex_id]], constant NDCViewport& viewport [[buffer(0)]]) {
BasicVertexOut out;
out.uv = float2((vid << 1) & 2, vid & 2);
out.position = float4(out.uv * 2.0 - 1.0, 0.0, 1.0);
out.position.y = -out.position.y;
out.uv = out.uv * viewport.scale + viewport.offset;
return out;
}
fragment float4 fragmentBlit(BasicVertexOut in [[stage_in]], texture2d<float> tex [[texture(0)]], sampler samplr [[sampler(0)]]) {
return tex.sample(samplr, in.uv);
}
struct PicaRegs {
uint regs[0x200 - 0x48];