mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-12 09:09:47 +12:00
Merge pull request #1 from SamoZ256/metal-specialized-shaders
Metal specialized shaders
This commit is contained in:
commit
fca03fe2b8
6 changed files with 143 additions and 36 deletions
|
@ -404,7 +404,8 @@ endif()
|
|||
if(ENABLE_METAL AND APPLE)
|
||||
set(RENDERER_MTL_INCLUDE_FILES include/renderer_mtl/renderer_mtl.hpp
|
||||
include/renderer_mtl/mtl_depth_stencil_cache.hpp
|
||||
include/renderer_mtl/mtl_pipeline_cache.hpp
|
||||
include/renderer_mtl/mtl_blit_pipeline_cache.hpp
|
||||
include/renderer_mtl/mtl_draw_pipeline_cache.hpp
|
||||
include/renderer_mtl/mtl_render_target.hpp
|
||||
include/renderer_mtl/mtl_texture.hpp
|
||||
include/renderer_mtl/mtl_vertex_buffer_cache.hpp
|
||||
|
|
72
include/renderer_mtl/mtl_blit_pipeline_cache.hpp
Normal file
72
include/renderer_mtl/mtl_blit_pipeline_cache.hpp
Normal file
|
@ -0,0 +1,72 @@
|
|||
#pragma once
|
||||
|
||||
#include "pica_to_mtl.hpp"
|
||||
|
||||
using namespace PICA;
|
||||
|
||||
namespace Metal {
|
||||
|
||||
struct BlitPipelineHash {
|
||||
// Formats
|
||||
ColorFmt colorFmt;
|
||||
DepthFmt depthFmt;
|
||||
};
|
||||
|
||||
// This pipeline only caches the pipeline with all of its color and depth attachment variations
|
||||
class BlitPipelineCache {
|
||||
public:
|
||||
BlitPipelineCache() = default;
|
||||
|
||||
~BlitPipelineCache() {
|
||||
clear();
|
||||
vertexFunction->release();
|
||||
fragmentFunction->release();
|
||||
}
|
||||
|
||||
void set(MTL::Device* dev, MTL::Function* vert, MTL::Function* frag) {
|
||||
device = dev;
|
||||
vertexFunction = vert;
|
||||
fragmentFunction = frag;
|
||||
}
|
||||
|
||||
MTL::RenderPipelineState* get(BlitPipelineHash hash) {
|
||||
u8 intHash = ((u8)hash.colorFmt << 3) | (u8)hash.depthFmt;
|
||||
auto& pipeline = pipelineCache[intHash];
|
||||
if (!pipeline) {
|
||||
MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init();
|
||||
desc->setVertexFunction(vertexFunction);
|
||||
desc->setFragmentFunction(fragmentFunction);
|
||||
|
||||
auto colorAttachment = desc->colorAttachments()->object(0);
|
||||
colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt));
|
||||
|
||||
desc->setDepthAttachmentPixelFormat(toMTLPixelFormatDepth(hash.depthFmt));
|
||||
|
||||
NS::Error* error = nullptr;
|
||||
pipeline = device->newRenderPipelineState(desc, &error);
|
||||
if (error) {
|
||||
Helpers::panic("Error creating blit pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding));
|
||||
}
|
||||
|
||||
desc->release();
|
||||
}
|
||||
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
void clear() {
|
||||
for (auto& pair : pipelineCache) {
|
||||
pair.second->release();
|
||||
}
|
||||
pipelineCache.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
std::unordered_map<u8, MTL::RenderPipelineState*> pipelineCache;
|
||||
|
||||
MTL::Device* device;
|
||||
MTL::Function* vertexFunction;
|
||||
MTL::Function* fragmentFunction;
|
||||
};
|
||||
|
||||
} // namespace Metal
|
|
@ -6,42 +6,66 @@ using namespace PICA;
|
|||
|
||||
namespace Metal {
|
||||
|
||||
struct PipelineHash {
|
||||
struct DrawPipelineHash { // 62 bits
|
||||
// Formats
|
||||
ColorFmt colorFmt;
|
||||
DepthFmt depthFmt;
|
||||
ColorFmt colorFmt; // 3 bits
|
||||
DepthFmt depthFmt; // 3 bits
|
||||
|
||||
// Blending
|
||||
bool blendEnabled;
|
||||
u32 blendControl;
|
||||
bool blendEnabled; // 1 bit
|
||||
u32 blendControl; // 32 bits
|
||||
|
||||
// Specialization constants (23 bits)
|
||||
bool lightingEnabled; // 1 bit
|
||||
u8 lightingNumLights; // 3 bits
|
||||
u8 lightingConfig1; // 7 bits
|
||||
// | ref | func | on |
|
||||
u16 alphaControl; // 12 bits (mask: 11111111 0111 0001)
|
||||
};
|
||||
|
||||
// Bind the vertex buffer to binding 30 so that it doesn't occupy the lower indices
|
||||
#define VERTEX_BUFFER_BINDING_INDEX 30
|
||||
|
||||
// This pipeline only caches the pipeline with all of its color and depth attachment variations
|
||||
class PipelineCache {
|
||||
class DrawPipelineCache {
|
||||
public:
|
||||
PipelineCache() = default;
|
||||
DrawPipelineCache() = default;
|
||||
|
||||
~PipelineCache() {
|
||||
~DrawPipelineCache() {
|
||||
clear();
|
||||
vertexDescriptor->release();
|
||||
vertexFunction->release();
|
||||
fragmentFunction->release();
|
||||
}
|
||||
|
||||
void set(MTL::Device* dev, MTL::Function* vert, MTL::Function* frag, MTL::VertexDescriptor* vertDesc) {
|
||||
void set(MTL::Device* dev, MTL::Library* lib, MTL::Function* vert, MTL::VertexDescriptor* vertDesc) {
|
||||
device = dev;
|
||||
library = lib;
|
||||
vertexFunction = vert;
|
||||
fragmentFunction = frag;
|
||||
vertexDescriptor = vertDesc;
|
||||
}
|
||||
|
||||
MTL::RenderPipelineState* get(PipelineHash hash) {
|
||||
u64 intHash = ((u64)hash.colorFmt << 36) | ((u64)hash.depthFmt << 33) | ((u64)hash.blendEnabled << 32) | (u64)hash.blendControl;
|
||||
auto& pipeline = pipelineCache[intHash];
|
||||
MTL::RenderPipelineState* get(DrawPipelineHash hash) {
|
||||
u32 fragmentFunctionHash = ((u32)hash.lightingEnabled << 22) | ((u32)hash.lightingNumLights << 19) | ((u32)hash.lightingConfig1 << 12) | ((((u32)hash.alphaControl & 0b1111111100000000) >> 8) << 4) | ((((u32)hash.alphaControl & 0b01110000) >> 4) << 1) | ((u32)hash.alphaControl & 0b0001);
|
||||
u64 pipelineHash = ((u64)hash.colorFmt << 59) | ((u64)hash.depthFmt << 56) | ((u64)hash.blendEnabled << 55) | ((u64)hash.blendControl << 23) | fragmentFunctionHash;
|
||||
auto& pipeline = pipelineCache[pipelineHash];
|
||||
if (!pipeline) {
|
||||
auto& fragmentFunction = fragmentFunctionCache[fragmentFunctionHash];
|
||||
if (!fragmentFunction) {
|
||||
MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init();
|
||||
constants->setConstantValue(&hash.lightingEnabled, MTL::DataTypeBool, NS::UInteger(0));
|
||||
constants->setConstantValue(&hash.lightingNumLights, MTL::DataTypeUChar, NS::UInteger(1));
|
||||
constants->setConstantValue(&hash.lightingConfig1, MTL::DataTypeUChar, NS::UInteger(2));
|
||||
constants->setConstantValue(&hash.alphaControl, MTL::DataTypeUShort, NS::UInteger(3));
|
||||
|
||||
NS::Error* error = nullptr;
|
||||
fragmentFunction = library->newFunction(NS::String::string("fragmentDraw", NS::ASCIIStringEncoding), constants, &error);
|
||||
if (error) {
|
||||
Helpers::panic("Error creating draw fragment function: %s", error->description()->cString(NS::ASCIIStringEncoding));
|
||||
}
|
||||
constants->release();
|
||||
fragmentFunctionCache[fragmentFunctionHash] = fragmentFunction;
|
||||
}
|
||||
|
||||
MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init();
|
||||
desc->setVertexFunction(vertexFunction);
|
||||
desc->setFragmentFunction(fragmentFunction);
|
||||
|
@ -87,14 +111,19 @@ public:
|
|||
pair.second->release();
|
||||
}
|
||||
pipelineCache.clear();
|
||||
for (auto& pair : fragmentFunctionCache) {
|
||||
pair.second->release();
|
||||
}
|
||||
fragmentFunctionCache.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
std::unordered_map<u64, MTL::RenderPipelineState*> pipelineCache;
|
||||
std::unordered_map<u32, MTL::Function*> fragmentFunctionCache;
|
||||
|
||||
MTL::Device* device;
|
||||
MTL::Library* library;
|
||||
MTL::Function* vertexFunction;
|
||||
MTL::Function* fragmentFunction;
|
||||
MTL::VertexDescriptor* vertexDescriptor;
|
||||
};
|
||||
|
|
@ -4,7 +4,8 @@
|
|||
#include "renderer.hpp"
|
||||
#include "mtl_texture.hpp"
|
||||
#include "mtl_render_target.hpp"
|
||||
#include "mtl_pipeline_cache.hpp"
|
||||
#include "mtl_blit_pipeline_cache.hpp"
|
||||
#include "mtl_draw_pipeline_cache.hpp"
|
||||
#include "mtl_depth_stencil_cache.hpp"
|
||||
#include "mtl_vertex_buffer_cache.hpp"
|
||||
// HACK: use the OpenGL cache
|
||||
|
@ -41,8 +42,8 @@ class RendererMTL final : public Renderer {
|
|||
SurfaceCache<Metal::ColorRenderTarget, 16, true> colorRenderTargetCache;
|
||||
SurfaceCache<Metal::DepthStencilRenderTarget, 16, true> depthStencilRenderTargetCache;
|
||||
SurfaceCache<Metal::Texture, 256, true> textureCache;
|
||||
Metal::PipelineCache blitPipelineCache;
|
||||
Metal::PipelineCache drawPipelineCache;
|
||||
Metal::BlitPipelineCache blitPipelineCache;
|
||||
Metal::DrawPipelineCache drawPipelineCache;
|
||||
Metal::DepthStencilCache depthStencilCache;
|
||||
Metal::VertexBufferCache vertexBufferCache;
|
||||
|
||||
|
|
|
@ -154,11 +154,10 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) {
|
|||
MTL::Function* vertexBlitFunction = library->newFunction(NS::String::string("vertexBlit", NS::ASCIIStringEncoding));
|
||||
MTL::Function* fragmentBlitFunction = library->newFunction(NS::String::string("fragmentBlit", NS::ASCIIStringEncoding));
|
||||
|
||||
blitPipelineCache.set(device, vertexBlitFunction, fragmentBlitFunction, nullptr);
|
||||
blitPipelineCache.set(device, vertexBlitFunction, fragmentBlitFunction);
|
||||
|
||||
// Draw
|
||||
MTL::Function* vertexDrawFunction = library->newFunction(NS::String::string("vertexDraw", NS::ASCIIStringEncoding));
|
||||
MTL::Function* fragmentDrawFunction = library->newFunction(NS::String::string("fragmentDraw", NS::ASCIIStringEncoding));
|
||||
|
||||
// -------- Vertex descriptor --------
|
||||
MTL::VertexDescriptor* vertexDescriptor = MTL::VertexDescriptor::alloc()->init();
|
||||
|
@ -216,7 +215,7 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) {
|
|||
vertexBufferLayout->setStepFunction(MTL::VertexStepFunctionPerVertex);
|
||||
vertexBufferLayout->setStepRate(1);
|
||||
|
||||
drawPipelineCache.set(device, vertexDrawFunction, fragmentDrawFunction, vertexDescriptor);
|
||||
drawPipelineCache.set(device, library, vertexDrawFunction, vertexDescriptor);
|
||||
|
||||
// Depth stencil cache
|
||||
depthStencilCache.set(device);
|
||||
|
@ -321,7 +320,7 @@ void RendererMTL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize,
|
|||
colorAttachment->setStoreAction(MTL::StoreActionStore);
|
||||
|
||||
// Pipeline
|
||||
Metal::PipelineHash hash{destFramebuffer->format, DepthFmt::Unknown1};
|
||||
Metal::BlitPipelineHash hash{destFramebuffer->format, DepthFmt::Unknown1};
|
||||
auto blitPipeline = blitPipelineCache.get(hash);
|
||||
|
||||
beginRenderPassIfNeeded(renderPassDescriptor, destFramebuffer->texture);
|
||||
|
@ -381,10 +380,14 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::span<const PICA::Ve
|
|||
depthUniforms.depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1;
|
||||
|
||||
// -------- Pipeline --------
|
||||
Metal::PipelineHash pipelineHash{colorRenderTarget->format, DepthFmt::Unknown1};
|
||||
Metal::DrawPipelineHash pipelineHash{colorRenderTarget->format, DepthFmt::Unknown1};
|
||||
if (depthStencilRenderTarget) {
|
||||
pipelineHash.depthFmt = depthStencilRenderTarget->format;
|
||||
}
|
||||
pipelineHash.lightingEnabled = regs[0x008F] & 1;
|
||||
pipelineHash.lightingNumLights = regs[0x01C2] & 0x7;
|
||||
pipelineHash.lightingConfig1 = regs[0x01C4u] >> 16; // Last 16 bits are unused, so skip them
|
||||
pipelineHash.alphaControl = regs[0x104];
|
||||
|
||||
// Blending and logic op
|
||||
pipelineHash.blendEnabled = (regs[PICA::InternalRegs::ColourOperation] & (1 << 8)) != 0;
|
||||
|
|
|
@ -383,6 +383,11 @@ float3 regToColor(uint reg) {
|
|||
return scale * float3(float(extract_bits(reg, 20, 8)), float(extract_bits(reg, 10, 8)), float(extract_bits(reg, 00, 8)));
|
||||
}
|
||||
|
||||
constant bool lightingEnabled [[function_constant(0)]];
|
||||
constant uint8_t lightingNumLights [[function_constant(1)]];
|
||||
constant uint8_t lightingConfig1 [[function_constant(2)]];
|
||||
constant uint16_t alphaControl [[function_constant(3)]];
|
||||
|
||||
// Implements the following algorthm: https://mathb.in/26766
|
||||
void calcLighting(thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture1d_array<float> texLightingLut, sampler linearSampler, thread float4& primaryColor, thread float4& secondaryColor) {
|
||||
// Quaternions describe a transformation from surface-local space to eye space.
|
||||
|
@ -393,14 +398,7 @@ void calcLighting(thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture
|
|||
float3 bitangent = normalize(in.bitangent);
|
||||
float3 view = normalize(in.view);
|
||||
|
||||
uint GPUREG_LIGHTING_ENABLE = picaRegs.read(0x008Fu);
|
||||
if (extract_bits(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) {
|
||||
primaryColor = secondaryColor = float4(1.0);
|
||||
return;
|
||||
}
|
||||
|
||||
uint GPUREG_LIGHTING_AMBIENT = picaRegs.read(0x01C0u);
|
||||
uint GPUREG_LIGHTING_NUM_LIGHTS = (picaRegs.read(0x01C2u) & 0x7u) + 1u;
|
||||
uint GPUREG_LIGHTING_LIGHT_PERMUTATION = picaRegs.read(0x01D9u);
|
||||
|
||||
primaryColor = float4(float3(0.0), 1.0);
|
||||
|
@ -411,13 +409,12 @@ void calcLighting(thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture
|
|||
uint GPUREG_LIGHTING_LUTINPUT_ABS = picaRegs.read(0x01D0u);
|
||||
uint GPUREG_LIGHTING_LUTINPUT_SELECT = picaRegs.read(0x01D1u);
|
||||
uint GPUREG_LIGHTING_CONFIG0 = picaRegs.read(0x01C3u);
|
||||
uint GPUREG_LIGHTING_CONFIG1 = picaRegs.read(0x01C4u);
|
||||
uint GPUREG_LIGHTING_LUTINPUT_SCALE = picaRegs.read(0x01D2u);
|
||||
float d[7];
|
||||
|
||||
bool errorUnimpl = false;
|
||||
|
||||
for (uint i = 0u; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) {
|
||||
for (uint i = 0u; i < lightingNumLights + 1; i++) {
|
||||
uint lightID = extract_bits(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3);
|
||||
|
||||
uint GPUREG_LIGHTi_SPECULAR0 = picaRegs.read(0x0140u + 0x10u * lightID);
|
||||
|
@ -447,7 +444,7 @@ void calcLighting(thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture
|
|||
}
|
||||
|
||||
for (int c = 0; c < 7; c++) {
|
||||
if (extract_bits(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0u) {
|
||||
if (extract_bits(lightingConfig1, c, 1) == 0u) {
|
||||
uint scaleID = extract_bits(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3);
|
||||
float scale = float(1u << scaleID);
|
||||
if (scaleID >= 6u) scale /= 256.0;
|
||||
|
@ -546,7 +543,12 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c
|
|||
sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) {
|
||||
Globals globals;
|
||||
globals.tevSources[0] = in.color;
|
||||
calcLighting(in, picaRegs, texLightingLut, linearSampler, globals.tevSources[1], globals.tevSources[2]);
|
||||
if (lightingEnabled) {
|
||||
calcLighting(in, picaRegs, texLightingLut, linearSampler, globals.tevSources[1], globals.tevSources[2]);
|
||||
} else {
|
||||
globals.tevSources[1] = float4(1.0);
|
||||
globals.tevSources[2] = float4(1.0);
|
||||
}
|
||||
|
||||
uint textureConfig = picaRegs.read(0x80u);
|
||||
float2 texCoord2 = (textureConfig & (1u << 13)) != 0u ? in.texCoord1 : in.texCoord2;
|
||||
|
@ -579,7 +581,6 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c
|
|||
float4 color = performLogicOp(logicOp, globals.tevSources[15], prevColor);
|
||||
|
||||
// Perform alpha test
|
||||
uint alphaControl = picaRegs.read(0x104u);
|
||||
if ((alphaControl & 1u) != 0u) { // Check if alpha test is on
|
||||
uint func = (alphaControl >> 4u) & 7u;
|
||||
float reference = float((alphaControl >> 8u) & 0xffu) / 255.0;
|
||||
|
|
Loading…
Add table
Reference in a new issue