specialize shader

This commit is contained in:
Samuliak 2024-07-05 20:29:05 +02:00
parent 7e8582d985
commit 0c19f5a3ea
6 changed files with 125 additions and 27 deletions

View file

@ -404,7 +404,8 @@ endif()
if(ENABLE_METAL AND APPLE)
set(RENDERER_MTL_INCLUDE_FILES include/renderer_mtl/renderer_mtl.hpp
include/renderer_mtl/mtl_depth_stencil_cache.hpp
include/renderer_mtl/mtl_pipeline_cache.hpp
include/renderer_mtl/mtl_blit_pipeline_cache.hpp
include/renderer_mtl/mtl_draw_pipeline_cache.hpp
include/renderer_mtl/mtl_render_target.hpp
include/renderer_mtl/mtl_texture.hpp
include/renderer_mtl/mtl_vertex_buffer_cache.hpp

View file

@ -0,0 +1,72 @@
#pragma once
#include "pica_to_mtl.hpp"
using namespace PICA;
namespace Metal {
struct BlitPipelineHash {
// Formats
ColorFmt colorFmt;
DepthFmt depthFmt;
};
// This pipeline only caches the pipeline with all of its color and depth attachment variations
class BlitPipelineCache {
public:
BlitPipelineCache() = default;
~BlitPipelineCache() {
clear();
vertexFunction->release();
fragmentFunction->release();
}
void set(MTL::Device* dev, MTL::Function* vert, MTL::Function* frag) {
device = dev;
vertexFunction = vert;
fragmentFunction = frag;
}
MTL::RenderPipelineState* get(BlitPipelineHash hash) {
u8 intHash = ((u8)hash.colorFmt << 3) | (u8)hash.depthFmt;
auto& pipeline = pipelineCache[intHash];
if (!pipeline) {
MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init();
desc->setVertexFunction(vertexFunction);
desc->setFragmentFunction(fragmentFunction);
auto colorAttachment = desc->colorAttachments()->object(0);
colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt));
desc->setDepthAttachmentPixelFormat(toMTLPixelFormatDepth(hash.depthFmt));
NS::Error* error = nullptr;
pipeline = device->newRenderPipelineState(desc, &error);
if (error) {
Helpers::panic("Error creating blit pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding));
}
desc->release();
}
return pipeline;
}
void clear() {
for (auto& pair : pipelineCache) {
pair.second->release();
}
pipelineCache.clear();
}
private:
std::unordered_map<u8, MTL::RenderPipelineState*> pipelineCache;
MTL::Device* device;
MTL::Function* vertexFunction;
MTL::Function* fragmentFunction;
};
} // namespace Metal

View file

@ -6,7 +6,7 @@ using namespace PICA;
namespace Metal {
struct PipelineHash {
struct DrawPipelineHash {
// Formats
ColorFmt colorFmt;
DepthFmt depthFmt;
@ -14,34 +14,51 @@ struct PipelineHash {
// Blending
bool blendEnabled;
u32 blendControl;
// Specialization constants
bool lightingEnabled;
};
// Bind the vertex buffer to binding 30 so that it doesn't occupy the lower indices
#define VERTEX_BUFFER_BINDING_INDEX 30
// This pipeline only caches the pipeline with all of its color and depth attachment variations
class PipelineCache {
class DrawPipelineCache {
public:
PipelineCache() = default;
DrawPipelineCache() = default;
~PipelineCache() {
~DrawPipelineCache() {
clear();
vertexDescriptor->release();
vertexFunction->release();
fragmentFunction->release();
}
void set(MTL::Device* dev, MTL::Function* vert, MTL::Function* frag, MTL::VertexDescriptor* vertDesc) {
void set(MTL::Device* dev, MTL::Library* lib, MTL::Function* vert, MTL::VertexDescriptor* vertDesc) {
device = dev;
library = lib;
vertexFunction = vert;
fragmentFunction = frag;
vertexDescriptor = vertDesc;
}
MTL::RenderPipelineState* get(PipelineHash hash) {
u64 intHash = ((u64)hash.colorFmt << 36) | ((u64)hash.depthFmt << 33) | ((u64)hash.blendEnabled << 32) | (u64)hash.blendControl;
auto& pipeline = pipelineCache[intHash];
MTL::RenderPipelineState* get(DrawPipelineHash hash) {
u64 pipelineHash = ((u64)hash.colorFmt << 37) | ((u64)hash.depthFmt << 34) | ((u64)hash.blendEnabled << 33) | ((u64)hash.blendControl << 1) | (u64)hash.lightingEnabled;
auto& pipeline = pipelineCache[pipelineHash];
if (!pipeline) {
u8 fragmentFunctionHash = (u8)hash.lightingEnabled;
auto& fragmentFunction = fragmentFunctionCache[fragmentFunctionHash];
if (!fragmentFunction) {
MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init();
constants->setConstantValue(&hash.lightingEnabled, MTL::DataTypeBool, NS::UInteger(0));
NS::Error* error = nullptr;
fragmentFunction = library->newFunction(NS::String::string("fragmentDraw", NS::ASCIIStringEncoding), constants, &error);
if (error) {
Helpers::panic("Error creating draw fragment function: %s", error->description()->cString(NS::ASCIIStringEncoding));
}
constants->release();
fragmentFunctionCache[fragmentFunctionHash] = fragmentFunction;
}
MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init();
desc->setVertexFunction(vertexFunction);
desc->setFragmentFunction(fragmentFunction);
@ -87,14 +104,19 @@ public:
pair.second->release();
}
pipelineCache.clear();
for (auto& pair : fragmentFunctionCache) {
pair.second->release();
}
fragmentFunctionCache.clear();
}
private:
std::unordered_map<u64, MTL::RenderPipelineState*> pipelineCache;
std::unordered_map<u8, MTL::Function*> fragmentFunctionCache;
MTL::Device* device;
MTL::Library* library;
MTL::Function* vertexFunction;
MTL::Function* fragmentFunction;
MTL::VertexDescriptor* vertexDescriptor;
};

View file

@ -4,7 +4,8 @@
#include "renderer.hpp"
#include "mtl_texture.hpp"
#include "mtl_render_target.hpp"
#include "mtl_pipeline_cache.hpp"
#include "mtl_blit_pipeline_cache.hpp"
#include "mtl_draw_pipeline_cache.hpp"
#include "mtl_depth_stencil_cache.hpp"
#include "mtl_vertex_buffer_cache.hpp"
// HACK: use the OpenGL cache
@ -41,8 +42,8 @@ class RendererMTL final : public Renderer {
SurfaceCache<Metal::ColorRenderTarget, 16, true> colorRenderTargetCache;
SurfaceCache<Metal::DepthStencilRenderTarget, 16, true> depthStencilRenderTargetCache;
SurfaceCache<Metal::Texture, 256, true> textureCache;
Metal::PipelineCache blitPipelineCache;
Metal::PipelineCache drawPipelineCache;
Metal::BlitPipelineCache blitPipelineCache;
Metal::DrawPipelineCache drawPipelineCache;
Metal::DepthStencilCache depthStencilCache;
Metal::VertexBufferCache vertexBufferCache;

View file

@ -154,11 +154,10 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) {
MTL::Function* vertexBlitFunction = library->newFunction(NS::String::string("vertexBlit", NS::ASCIIStringEncoding));
MTL::Function* fragmentBlitFunction = library->newFunction(NS::String::string("fragmentBlit", NS::ASCIIStringEncoding));
blitPipelineCache.set(device, vertexBlitFunction, fragmentBlitFunction, nullptr);
blitPipelineCache.set(device, vertexBlitFunction, fragmentBlitFunction);
// Draw
MTL::Function* vertexDrawFunction = library->newFunction(NS::String::string("vertexDraw", NS::ASCIIStringEncoding));
MTL::Function* fragmentDrawFunction = library->newFunction(NS::String::string("fragmentDraw", NS::ASCIIStringEncoding));
// -------- Vertex descriptor --------
MTL::VertexDescriptor* vertexDescriptor = MTL::VertexDescriptor::alloc()->init();
@ -216,7 +215,7 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) {
vertexBufferLayout->setStepFunction(MTL::VertexStepFunctionPerVertex);
vertexBufferLayout->setStepRate(1);
drawPipelineCache.set(device, vertexDrawFunction, fragmentDrawFunction, vertexDescriptor);
drawPipelineCache.set(device, library, vertexDrawFunction, vertexDescriptor);
// Depth stencil cache
depthStencilCache.set(device);
@ -321,7 +320,7 @@ void RendererMTL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize,
colorAttachment->setStoreAction(MTL::StoreActionStore);
// Pipeline
Metal::PipelineHash hash{destFramebuffer->format, DepthFmt::Unknown1};
Metal::BlitPipelineHash hash{destFramebuffer->format, DepthFmt::Unknown1};
auto blitPipeline = blitPipelineCache.get(hash);
beginRenderPassIfNeeded(renderPassDescriptor, destFramebuffer->texture);
@ -381,10 +380,11 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::span<const PICA::Ve
depthUniforms.depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1;
// -------- Pipeline --------
Metal::PipelineHash pipelineHash{colorRenderTarget->format, DepthFmt::Unknown1};
Metal::DrawPipelineHash pipelineHash{colorRenderTarget->format, DepthFmt::Unknown1};
if (depthStencilRenderTarget) {
pipelineHash.depthFmt = depthStencilRenderTarget->format;
}
pipelineHash.lightingEnabled = regs[0x008F] & 1;
// Blending and logic op
pipelineHash.blendEnabled = (regs[PICA::InternalRegs::ColourOperation] & (1 << 8)) != 0;

View file

@ -393,12 +393,6 @@ void calcLighting(thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture
float3 bitangent = normalize(in.bitangent);
float3 view = normalize(in.view);
uint GPUREG_LIGHTING_ENABLE = picaRegs.read(0x008Fu);
if (extract_bits(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) {
primaryColor = secondaryColor = float4(1.0);
return;
}
uint GPUREG_LIGHTING_AMBIENT = picaRegs.read(0x01C0u);
uint GPUREG_LIGHTING_NUM_LIGHTS = (picaRegs.read(0x01C2u) & 0x7u) + 1u;
uint GPUREG_LIGHTING_LIGHT_PERMUTATION = picaRegs.read(0x01D9u);
@ -541,12 +535,20 @@ float4 performLogicOp(LogicOp logicOp, float4 s, float4 d) {
return as_type<float4>(performLogicOpU(logicOp, as_type<uint4>(s), as_type<uint4>(d)));
}
constant int LIGHTING_ENABLED_INDEX = 0;
constant bool lightingEnabled [[function_constant(LIGHTING_ENABLED_INDEX)]];
fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[color(0)]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]],
texture2d<float> tex0 [[texture(0)]], texture2d<float> tex1 [[texture(1)]], texture2d<float> tex2 [[texture(2)]], texture1d_array<float> texLightingLut [[texture(3)]],
sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) {
Globals globals;
globals.tevSources[0] = in.color;
calcLighting(in, picaRegs, texLightingLut, linearSampler, globals.tevSources[1], globals.tevSources[2]);
if (lightingEnabled) {
calcLighting(in, picaRegs, texLightingLut, linearSampler, globals.tevSources[1], globals.tevSources[2]);
} else {
globals.tevSources[1] = float4(0.0);
globals.tevSources[2] = float4(0.0);
}
uint textureConfig = picaRegs.read(0x80u);
float2 texCoord2 = (textureConfig & (1u << 13)) != 0u ? in.texCoord1 : in.texCoord2;