From 49b65242b900c9463eefca1077e2f627b43969af Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sat, 9 Nov 2024 13:09:12 +0200 Subject: [PATCH] First Metal cleanup & formatting pass --- include/renderer_gl/surface_cache.hpp | 2 - .../renderer_mtl/mtl_blit_pipeline_cache.hpp | 106 ++-- include/renderer_mtl/mtl_command_encoder.hpp | 88 ++-- .../renderer_mtl/mtl_depth_stencil_cache.hpp | 120 ++--- .../renderer_mtl/mtl_draw_pipeline_cache.hpp | 268 +++++----- include/renderer_mtl/mtl_lut_texture.hpp | 5 - include/renderer_mtl/mtl_render_target.hpp | 131 +++-- include/renderer_mtl/mtl_texture.hpp | 100 ++-- .../renderer_mtl/mtl_vertex_buffer_cache.hpp | 116 ++-- include/renderer_mtl/objc_helper.hpp | 10 +- include/renderer_mtl/pica_to_mtl.hpp | 267 +++++----- include/renderer_mtl/renderer_mtl.hpp | 164 +++--- src/core/renderer_gl/etc1.cpp | 3 +- src/core/renderer_mtl/mtl_etc1.cpp | 196 ++++--- src/core/renderer_mtl/mtl_lut_texture.cpp | 45 +- src/core/renderer_mtl/mtl_texture.cpp | 498 +++++++++--------- src/core/renderer_mtl/renderer_mtl.cpp | 80 +-- 17 files changed, 1084 insertions(+), 1115 deletions(-) diff --git a/include/renderer_gl/surface_cache.hpp b/include/renderer_gl/surface_cache.hpp index 7346fd11..fb7c71a5 100644 --- a/include/renderer_gl/surface_cache.hpp +++ b/include/renderer_gl/surface_cache.hpp @@ -19,8 +19,6 @@ template class SurfaceCache { // Vanilla std::optional can't hold actual references using OptionalRef = std::optional>; - //static_assert(std::is_same() || std::is_same() || - // std::is_same(), "Invalid surface type"); size_t size; size_t evictionIndex; diff --git a/include/renderer_mtl/mtl_blit_pipeline_cache.hpp b/include/renderer_mtl/mtl_blit_pipeline_cache.hpp index 26422635..02e075b2 100644 --- a/include/renderer_mtl/mtl_blit_pipeline_cache.hpp +++ b/include/renderer_mtl/mtl_blit_pipeline_cache.hpp @@ -7,69 +7,67 @@ using namespace PICA; namespace Metal { + struct BlitPipelineHash { + // Formats + ColorFmt colorFmt; + DepthFmt depthFmt; + }; -struct BlitPipelineHash { - // Formats - ColorFmt colorFmt; - DepthFmt depthFmt; -}; + // This pipeline only caches the pipeline with all of its color and depth attachment variations + class BlitPipelineCache { + public: + BlitPipelineCache() = default; -// This pipeline only caches the pipeline with all of its color and depth attachment variations -class BlitPipelineCache { -public: - BlitPipelineCache() = default; + ~BlitPipelineCache() { + reset(); + vertexFunction->release(); + fragmentFunction->release(); + } - ~BlitPipelineCache() { - reset(); - vertexFunction->release(); - fragmentFunction->release(); - } + void set(MTL::Device* dev, MTL::Function* vert, MTL::Function* frag) { + device = dev; + vertexFunction = vert; + fragmentFunction = frag; + } - void set(MTL::Device* dev, MTL::Function* vert, MTL::Function* frag) { - device = dev; - vertexFunction = vert; - fragmentFunction = frag; - } + MTL::RenderPipelineState* get(BlitPipelineHash hash) { + u8 intHash = ((u8)hash.colorFmt << 3) | (u8)hash.depthFmt; + auto& pipeline = pipelineCache[intHash]; + if (!pipeline) { + MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); + desc->setVertexFunction(vertexFunction); + desc->setFragmentFunction(fragmentFunction); - MTL::RenderPipelineState* get(BlitPipelineHash hash) { - u8 intHash = ((u8)hash.colorFmt << 3) | (u8)hash.depthFmt; - auto& pipeline = pipelineCache[intHash]; - if (!pipeline) { - MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); - desc->setVertexFunction(vertexFunction); - desc->setFragmentFunction(fragmentFunction); + auto colorAttachment = desc->colorAttachments()->object(0); + colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt)); - auto colorAttachment = desc->colorAttachments()->object(0); - colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt)); + desc->setDepthAttachmentPixelFormat(toMTLPixelFormatDepth(hash.depthFmt)); - desc->setDepthAttachmentPixelFormat(toMTLPixelFormatDepth(hash.depthFmt)); + NS::Error* error = nullptr; + desc->setLabel(toNSString("Blit pipeline")); + pipeline = device->newRenderPipelineState(desc, &error); + if (error) { + Helpers::panic("Error creating blit pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } - NS::Error* error = nullptr; - desc->setLabel(toNSString("Blit pipeline")); - pipeline = device->newRenderPipelineState(desc, &error); - if (error) { - Helpers::panic("Error creating blit pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); - } + desc->release(); + } - desc->release(); - } + return pipeline; + } - return pipeline; - } + void reset() { + for (auto& pair : pipelineCache) { + pair.second->release(); + } + pipelineCache.clear(); + } - void reset() { - for (auto& pair : pipelineCache) { - pair.second->release(); - } - pipelineCache.clear(); - } + private: + std::map pipelineCache; -private: - std::map pipelineCache; - - MTL::Device* device; - MTL::Function* vertexFunction; - MTL::Function* fragmentFunction; -}; - -} // namespace Metal + MTL::Device* device; + MTL::Function* vertexFunction; + MTL::Function* fragmentFunction; + }; +} // namespace Metal diff --git a/include/renderer_mtl/mtl_command_encoder.hpp b/include/renderer_mtl/mtl_command_encoder.hpp index be66699d..562e6b79 100644 --- a/include/renderer_mtl/mtl_command_encoder.hpp +++ b/include/renderer_mtl/mtl_command_encoder.hpp @@ -3,56 +3,54 @@ #include namespace Metal { + struct RenderState { + MTL::RenderPipelineState* renderPipelineState = nullptr; + MTL::DepthStencilState* depthStencilState = nullptr; + MTL::Texture* textures[3] = {nullptr}; + MTL::SamplerState* samplerStates[3] = {nullptr}; + }; -struct RenderState { - MTL::RenderPipelineState* renderPipelineState = nullptr; - MTL::DepthStencilState* depthStencilState = nullptr; - MTL::Texture* textures[3] = {nullptr}; - MTL::SamplerState* samplerStates[3] = {nullptr}; -}; + class CommandEncoder { + public: + void newRenderCommandEncoder(MTL::RenderCommandEncoder* rce) { + renderCommandEncoder = rce; -class CommandEncoder { -public: - void newRenderCommandEncoder(MTL::RenderCommandEncoder* rce) { - renderCommandEncoder = rce; + // Reset the render state + renderState = RenderState{}; + } - // Reset the render state - renderState = RenderState{}; - } + // Resource binding + void setRenderPipelineState(MTL::RenderPipelineState* renderPipelineState) { + if (renderPipelineState != renderState.renderPipelineState) { + renderCommandEncoder->setRenderPipelineState(renderPipelineState); + renderState.renderPipelineState = renderPipelineState; + } + } - // Resource binding - void setRenderPipelineState(MTL::RenderPipelineState* renderPipelineState) { - if (renderPipelineState != renderState.renderPipelineState) { - renderCommandEncoder->setRenderPipelineState(renderPipelineState); - renderState.renderPipelineState = renderPipelineState; - } - } + void setDepthStencilState(MTL::DepthStencilState* depthStencilState) { + if (depthStencilState != renderState.depthStencilState) { + renderCommandEncoder->setDepthStencilState(depthStencilState); + renderState.depthStencilState = depthStencilState; + } + } - void setDepthStencilState(MTL::DepthStencilState* depthStencilState) { - if (depthStencilState != renderState.depthStencilState) { - renderCommandEncoder->setDepthStencilState(depthStencilState); - renderState.depthStencilState = depthStencilState; - } - } + void setFragmentTexture(MTL::Texture* texture, u32 index) { + if (texture != renderState.textures[index]) { + renderCommandEncoder->setFragmentTexture(texture, index); + renderState.textures[index] = texture; + } + } - void setFragmentTexture(MTL::Texture* texture, u32 index) { - if (texture != renderState.textures[index]) { - renderCommandEncoder->setFragmentTexture(texture, index); - renderState.textures[index] = texture; - } - } + void setFragmentSamplerState(MTL::SamplerState* samplerState, u32 index) { + if (samplerState != renderState.samplerStates[index]) { + renderCommandEncoder->setFragmentSamplerState(samplerState, index); + renderState.samplerStates[index] = samplerState; + } + } - void setFragmentSamplerState(MTL::SamplerState* samplerState, u32 index) { - if (samplerState != renderState.samplerStates[index]) { - renderCommandEncoder->setFragmentSamplerState(samplerState, index); - renderState.samplerStates[index] = samplerState; - } - } + private: + MTL::RenderCommandEncoder* renderCommandEncoder = nullptr; -private: - MTL::RenderCommandEncoder* renderCommandEncoder = nullptr; - - RenderState renderState; -}; - -} // namespace Metal + RenderState renderState; + }; +} // namespace Metal diff --git a/include/renderer_mtl/mtl_depth_stencil_cache.hpp b/include/renderer_mtl/mtl_depth_stencil_cache.hpp index 90721b70..8f7256a9 100644 --- a/include/renderer_mtl/mtl_depth_stencil_cache.hpp +++ b/include/renderer_mtl/mtl_depth_stencil_cache.hpp @@ -7,80 +7,74 @@ using namespace PICA; namespace Metal { + struct DepthStencilHash { + u32 stencilConfig; + u16 stencilOpConfig; + bool depthStencilWrite; + u8 depthFunc; + }; -struct DepthStencilHash { - bool depthStencilWrite; - u8 depthFunc; - u32 stencilConfig; - u16 stencilOpConfig; -}; + class DepthStencilCache { + public: + DepthStencilCache() = default; -class DepthStencilCache { -public: - DepthStencilCache() = default; + ~DepthStencilCache() { reset(); } - ~DepthStencilCache() { - reset(); - } + void set(MTL::Device* dev) { device = dev; } - void set(MTL::Device* dev) { - device = dev; - } + MTL::DepthStencilState* get(DepthStencilHash hash) { + u64 intHash = + ((u64)hash.depthStencilWrite << 56) | ((u64)hash.depthFunc << 48) | ((u64)hash.stencilConfig << 16) | (u64)hash.stencilOpConfig; + auto& depthStencilState = depthStencilCache[intHash]; + if (!depthStencilState) { + MTL::DepthStencilDescriptor* desc = MTL::DepthStencilDescriptor::alloc()->init(); + desc->setDepthWriteEnabled(hash.depthStencilWrite); + desc->setDepthCompareFunction(toMTLCompareFunc(hash.depthFunc)); - MTL::DepthStencilState* get(DepthStencilHash hash) { - u64 intHash = ((u64)hash.depthStencilWrite << 56) | ((u64)hash.depthFunc << 48) | ((u64)hash.stencilConfig << 16) | (u64)hash.stencilOpConfig; - auto& depthStencilState = depthStencilCache[intHash]; - if (!depthStencilState) { - MTL::DepthStencilDescriptor* desc = MTL::DepthStencilDescriptor::alloc()->init(); - desc->setDepthWriteEnabled(hash.depthStencilWrite); - desc->setDepthCompareFunction(toMTLCompareFunc(hash.depthFunc)); + const bool stencilEnable = Helpers::getBit<0>(hash.stencilConfig); + MTL::StencilDescriptor* stencilDesc = nullptr; + if (stencilEnable) { + const u8 stencilFunc = Helpers::getBits<4, 3>(hash.stencilConfig); + const u8 stencilRefMask = Helpers::getBits<24, 8>(hash.stencilConfig); - const bool stencilEnable = Helpers::getBit<0>(hash.stencilConfig); - MTL::StencilDescriptor* stencilDesc = nullptr; - if (stencilEnable) { - const u8 stencilFunc = Helpers::getBits<4, 3>(hash.stencilConfig); - const u8 stencilRefMask = Helpers::getBits<24, 8>(hash.stencilConfig); + const u32 stencilBufferMask = hash.depthStencilWrite ? Helpers::getBits<8, 8>(hash.stencilConfig) : 0; - const u32 stencilBufferMask = hash.depthStencilWrite ? Helpers::getBits<8, 8>(hash.stencilConfig) : 0; + const u8 stencilFailOp = Helpers::getBits<0, 3>(hash.stencilOpConfig); + const u8 depthFailOp = Helpers::getBits<4, 3>(hash.stencilOpConfig); + const u8 passOp = Helpers::getBits<8, 3>(hash.stencilOpConfig); - const u8 stencilFailOp = Helpers::getBits<0, 3>(hash.stencilOpConfig); - const u8 depthFailOp = Helpers::getBits<4, 3>(hash.stencilOpConfig); - const u8 passOp = Helpers::getBits<8, 3>(hash.stencilOpConfig); + stencilDesc = MTL::StencilDescriptor::alloc()->init(); + stencilDesc->setStencilFailureOperation(toMTLStencilOperation(stencilFailOp)); + stencilDesc->setDepthFailureOperation(toMTLStencilOperation(depthFailOp)); + stencilDesc->setDepthStencilPassOperation(toMTLStencilOperation(passOp)); + stencilDesc->setStencilCompareFunction(toMTLCompareFunc(stencilFunc)); + stencilDesc->setReadMask(stencilRefMask); + stencilDesc->setWriteMask(stencilBufferMask); - stencilDesc = MTL::StencilDescriptor::alloc()->init(); - stencilDesc->setStencilFailureOperation(toMTLStencilOperation(stencilFailOp)); - stencilDesc->setDepthFailureOperation(toMTLStencilOperation(depthFailOp)); - stencilDesc->setDepthStencilPassOperation(toMTLStencilOperation(passOp)); - stencilDesc->setStencilCompareFunction(toMTLCompareFunc(stencilFunc)); - stencilDesc->setReadMask(stencilRefMask); - stencilDesc->setWriteMask(stencilBufferMask); + desc->setFrontFaceStencil(stencilDesc); + desc->setBackFaceStencil(stencilDesc); + } - desc->setFrontFaceStencil(stencilDesc); - desc->setBackFaceStencil(stencilDesc); - } + depthStencilState = device->newDepthStencilState(desc); - depthStencilState = device->newDepthStencilState(desc); + desc->release(); + if (stencilDesc) { + stencilDesc->release(); + } + } - desc->release(); - if (stencilDesc) { - stencilDesc->release(); - } - } + return depthStencilState; + } - return depthStencilState; - } + void reset() { + for (auto& pair : depthStencilCache) { + pair.second->release(); + } + depthStencilCache.clear(); + } - void reset() { - for (auto& pair : depthStencilCache) { - pair.second->release(); - } - depthStencilCache.clear(); - } - -private: - std::map depthStencilCache; - - MTL::Device* device; -}; - -} // namespace Metal + private: + std::map depthStencilCache; + MTL::Device* device; + }; +} // namespace Metal diff --git a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp index c5105a13..ace324fe 100644 --- a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp +++ b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp @@ -7,171 +7,155 @@ using namespace PICA; namespace Metal { + struct DrawFragmentFunctionHash { + bool lightingEnabled; // 1 bit + u8 lightingNumLights; // 3 bits + u32 lightingConfig1; // 32 bits (TODO: check this) + // | ref | func | on | + u16 alphaControl; // 12 bits (mask: 11111111 0111 0001) + }; -struct DrawFragmentFunctionHash { - bool lightingEnabled; // 1 bit - u8 lightingNumLights; // 3 bits - u32 lightingConfig1; // 32 bits (TODO: check this) - // | ref | func | on | - u16 alphaControl; // 12 bits (mask: 11111111 0111 0001) -}; + inline bool operator<(const DrawFragmentFunctionHash& l, const DrawFragmentFunctionHash& r) { + if (!l.lightingEnabled && r.lightingEnabled) return true; + if (l.lightingNumLights < r.lightingNumLights) return true; + if (l.lightingConfig1 < r.lightingConfig1) return true; + if (l.alphaControl < r.alphaControl) return true; -//bool operator==(const DrawFragmentFunctionHash& l, const DrawFragmentFunctionHash& r) { -// return ((l.lightingEnabled == r.lightingEnabled) && (l.lightingNumLights == r.lightingNumLights) && -// (l.lightingConfig1 == r.lightingConfig1) && (l.alphaControl == r.alphaControl)); -//} + return false; + } -inline bool operator<(const DrawFragmentFunctionHash& l, const DrawFragmentFunctionHash& r) { - if (!l.lightingEnabled && r.lightingEnabled) return true; - if (l.lightingNumLights < r.lightingNumLights) return true; - if (l.lightingConfig1 < r.lightingConfig1) return true; - if (l.alphaControl < r.alphaControl) return true; + struct DrawPipelineHash { // 56 bits + // Formats + ColorFmt colorFmt; // 3 bits + DepthFmt depthFmt; // 3 bits - return false; -} + // Blending + bool blendEnabled; // 1 bit + // | functions | aeq | ceq | + u32 blendControl; // 22 bits (mask: 1111111111111111 00000111 00000111) + u8 colorWriteMask; // 4 bits -struct DrawPipelineHash { // 56 bits - // Formats - ColorFmt colorFmt; // 3 bits - DepthFmt depthFmt; // 3 bits + DrawFragmentFunctionHash fragHash; + }; - // Blending - bool blendEnabled; // 1 bit - // | functions | aeq | ceq | - u32 blendControl; // 22 bits (mask: 1111111111111111 00000111 00000111) - u8 colorWriteMask; // 4 bits + inline bool operator<(const DrawPipelineHash& l, const DrawPipelineHash& r) { + if ((u32)l.colorFmt < (u32)r.colorFmt) return true; + if ((u32)l.depthFmt < (u32)r.depthFmt) return true; + if (!l.blendEnabled && r.blendEnabled) return true; + if (l.blendControl < r.blendControl) return true; + if (l.colorWriteMask < r.colorWriteMask) return true; + if (l.fragHash < r.fragHash) return true; - DrawFragmentFunctionHash fragHash; -}; + return false; + } -//bool operator==(const DrawPipelineHash& l, const DrawPipelineHash& r) { -// return (((u32)l.colorFmt == (u32)r.colorFmt) && ((u32)l.depthFmt == (u32)r.depthFmt) && -// (l.blendEnabled == r.blendEnabled) && (l.blendControl == r.blendControl) && -// (l.colorWriteMask == r.colorWriteMask) && (l.fragHash == r.fragHash)); -//} + // This pipeline only caches the pipeline with all of its color and depth attachment variations + class DrawPipelineCache { + public: + DrawPipelineCache() = default; -inline bool operator<(const DrawPipelineHash& l, const DrawPipelineHash& r) { - if ((u32)l.colorFmt < (u32)r.colorFmt) return true; - if ((u32)l.depthFmt < (u32)r.depthFmt) return true; - if (!l.blendEnabled && r.blendEnabled) return true; - if (l.blendControl < r.blendControl) return true; - if (l.colorWriteMask < r.colorWriteMask) return true; - if (l.fragHash < r.fragHash) return true; + ~DrawPipelineCache() { + reset(); + vertexDescriptor->release(); + vertexFunction->release(); + } - return false; -} + void set(MTL::Device* dev, MTL::Library* lib, MTL::Function* vert, MTL::VertexDescriptor* vertDesc) { + device = dev; + library = lib; + vertexFunction = vert; + vertexDescriptor = vertDesc; + } -// Bind the vertex buffer to binding 30 so that it doesn't occupy the lower indices -#define VERTEX_BUFFER_BINDING_INDEX 30 + MTL::RenderPipelineState* get(DrawPipelineHash hash) { + auto& pipeline = pipelineCache[hash]; -// This pipeline only caches the pipeline with all of its color and depth attachment variations -class DrawPipelineCache { -public: - DrawPipelineCache() = default; + if (!pipeline) { + auto& fragmentFunction = fragmentFunctionCache[hash.fragHash]; + if (!fragmentFunction) { + MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init(); + constants->setConstantValue(&hash.fragHash.lightingEnabled, MTL::DataTypeBool, NS::UInteger(0)); + constants->setConstantValue(&hash.fragHash.lightingNumLights, MTL::DataTypeUChar, NS::UInteger(1)); + constants->setConstantValue(&hash.fragHash.lightingConfig1, MTL::DataTypeUInt, NS::UInteger(2)); + constants->setConstantValue(&hash.fragHash.alphaControl, MTL::DataTypeUShort, NS::UInteger(3)); - ~DrawPipelineCache() { - reset(); - vertexDescriptor->release(); - vertexFunction->release(); - } + NS::Error* error = nullptr; + fragmentFunction = library->newFunction(NS::String::string("fragmentDraw", NS::ASCIIStringEncoding), constants, &error); + if (error) { + Helpers::panic("Error creating draw fragment function: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + constants->release(); + } - void set(MTL::Device* dev, MTL::Library* lib, MTL::Function* vert, MTL::VertexDescriptor* vertDesc) { - device = dev; - library = lib; - vertexFunction = vert; - vertexDescriptor = vertDesc; - } + MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); + desc->setVertexFunction(vertexFunction); + desc->setFragmentFunction(fragmentFunction); + desc->setVertexDescriptor(vertexDescriptor); - MTL::RenderPipelineState* get(DrawPipelineHash hash) { - //u32 fragmentFunctionHash = ((u32)hash.lightingEnabled << 22) | ((u32)hash.lightingNumLights << 19) | ((u32)hash.lightingConfig1 << 12) | ((((u32)hash.alphaControl & 0b1111111100000000) >> 8) << 4) | ((((u32)hash.alphaControl & 0b01110000) >> 4) << 1) | ((u32)hash.alphaControl & 0b0001); - //u64 pipelineHash = ((u64)hash.colorFmt << 53) | ((u64)hash.depthFmt << 50) | ((u64)hash.blendEnabled << 49) | ((u64)hash.colorWriteMask << 45) | ((((u64)hash.blendControl & 0b11111111111111110000000000000000) >> 16) << 29) | ((((u64)hash.blendControl & 0b0000011100000000) >> 8) << 26) | (((u64)hash.blendControl & 0b00000111) << 23) | fragmentFunctionHash; - auto& pipeline = pipelineCache[hash]; - if (!pipeline) { - auto& fragmentFunction = fragmentFunctionCache[hash.fragHash]; - if (!fragmentFunction) { - MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init(); - constants->setConstantValue(&hash.fragHash.lightingEnabled, MTL::DataTypeBool, NS::UInteger(0)); - constants->setConstantValue(&hash.fragHash.lightingNumLights, MTL::DataTypeUChar, NS::UInteger(1)); - constants->setConstantValue(&hash.fragHash.lightingConfig1, MTL::DataTypeUInt, NS::UInteger(2)); - constants->setConstantValue(&hash.fragHash.alphaControl, MTL::DataTypeUShort, NS::UInteger(3)); + auto colorAttachment = desc->colorAttachments()->object(0); + colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt)); + MTL::ColorWriteMask writeMask = 0; + if (hash.colorWriteMask & 0x1) writeMask |= MTL::ColorWriteMaskRed; + if (hash.colorWriteMask & 0x2) writeMask |= MTL::ColorWriteMaskGreen; + if (hash.colorWriteMask & 0x4) writeMask |= MTL::ColorWriteMaskBlue; + if (hash.colorWriteMask & 0x8) writeMask |= MTL::ColorWriteMaskAlpha; + colorAttachment->setWriteMask(writeMask); + if (hash.blendEnabled) { + const u8 rgbEquation = hash.blendControl & 0x7; + const u8 alphaEquation = Helpers::getBits<8, 3>(hash.blendControl); - NS::Error* error = nullptr; - fragmentFunction = library->newFunction(NS::String::string("fragmentDraw", NS::ASCIIStringEncoding), constants, &error); - if (error) { - Helpers::panic("Error creating draw fragment function: %s", error->description()->cString(NS::ASCIIStringEncoding)); - } - constants->release(); - } + // Get blending functions + const u8 rgbSourceFunc = Helpers::getBits<16, 4>(hash.blendControl); + const u8 rgbDestFunc = Helpers::getBits<20, 4>(hash.blendControl); + const u8 alphaSourceFunc = Helpers::getBits<24, 4>(hash.blendControl); + const u8 alphaDestFunc = Helpers::getBits<28, 4>(hash.blendControl); - MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); - desc->setVertexFunction(vertexFunction); - desc->setFragmentFunction(fragmentFunction); - desc->setVertexDescriptor(vertexDescriptor); + colorAttachment->setBlendingEnabled(true); + colorAttachment->setRgbBlendOperation(toMTLBlendOperation(rgbEquation)); + colorAttachment->setAlphaBlendOperation(toMTLBlendOperation(alphaEquation)); + colorAttachment->setSourceRGBBlendFactor(toMTLBlendFactor(rgbSourceFunc)); + colorAttachment->setDestinationRGBBlendFactor(toMTLBlendFactor(rgbDestFunc)); + colorAttachment->setSourceAlphaBlendFactor(toMTLBlendFactor(alphaSourceFunc)); + colorAttachment->setDestinationAlphaBlendFactor(toMTLBlendFactor(alphaDestFunc)); + } - auto colorAttachment = desc->colorAttachments()->object(0); - colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt)); - MTL::ColorWriteMask writeMask = 0; - if (hash.colorWriteMask & 0x1) writeMask |= MTL::ColorWriteMaskRed; - if (hash.colorWriteMask & 0x2) writeMask |= MTL::ColorWriteMaskGreen; - if (hash.colorWriteMask & 0x4) writeMask |= MTL::ColorWriteMaskBlue; - if (hash.colorWriteMask & 0x8) writeMask |= MTL::ColorWriteMaskAlpha; - colorAttachment->setWriteMask(writeMask); - if (hash.blendEnabled) { - const u8 rgbEquation = hash.blendControl & 0x7; - const u8 alphaEquation = Helpers::getBits<8, 3>(hash.blendControl); + MTL::PixelFormat depthFormat = toMTLPixelFormatDepth(hash.depthFmt); + desc->setDepthAttachmentPixelFormat(depthFormat); + if (hash.depthFmt == DepthFmt::Depth24Stencil8) desc->setStencilAttachmentPixelFormat(depthFormat); - // Get blending functions - const u8 rgbSourceFunc = Helpers::getBits<16, 4>(hash.blendControl); - const u8 rgbDestFunc = Helpers::getBits<20, 4>(hash.blendControl); - const u8 alphaSourceFunc = Helpers::getBits<24, 4>(hash.blendControl); - const u8 alphaDestFunc = Helpers::getBits<28, 4>(hash.blendControl); + NS::Error* error = nullptr; + desc->setLabel(toNSString("Draw pipeline")); + pipeline = device->newRenderPipelineState(desc, &error); + if (error) { + Helpers::panic("Error creating draw pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } - colorAttachment->setBlendingEnabled(true); - colorAttachment->setRgbBlendOperation(toMTLBlendOperation(rgbEquation)); - colorAttachment->setAlphaBlendOperation(toMTLBlendOperation(alphaEquation)); - colorAttachment->setSourceRGBBlendFactor(toMTLBlendFactor(rgbSourceFunc)); - colorAttachment->setDestinationRGBBlendFactor(toMTLBlendFactor(rgbDestFunc)); - colorAttachment->setSourceAlphaBlendFactor(toMTLBlendFactor(alphaSourceFunc)); - colorAttachment->setDestinationAlphaBlendFactor(toMTLBlendFactor(alphaDestFunc)); - } + desc->release(); + } - MTL::PixelFormat depthFormat = toMTLPixelFormatDepth(hash.depthFmt); - desc->setDepthAttachmentPixelFormat(depthFormat); - if (hash.depthFmt == DepthFmt::Depth24Stencil8) - desc->setStencilAttachmentPixelFormat(depthFormat); + return pipeline; + } - NS::Error* error = nullptr; - desc->setLabel(toNSString("Draw pipeline")); - pipeline = device->newRenderPipelineState(desc, &error); - if (error) { - Helpers::panic("Error creating draw pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); - } + void reset() { + for (auto& pair : pipelineCache) { + pair.second->release(); + } + pipelineCache.clear(); - desc->release(); - } + for (auto& pair : fragmentFunctionCache) { + pair.second->release(); + } + fragmentFunctionCache.clear(); + } - return pipeline; - } + private: + std::map pipelineCache; + std::map fragmentFunctionCache; - void reset() { - for (auto& pair : pipelineCache) { - pair.second->release(); - } - pipelineCache.clear(); - for (auto& pair : fragmentFunctionCache) { - pair.second->release(); - } - fragmentFunctionCache.clear(); - } + MTL::Device* device; + MTL::Library* library; + MTL::Function* vertexFunction; + MTL::VertexDescriptor* vertexDescriptor; + }; -private: - std::map pipelineCache; - std::map fragmentFunctionCache; - - MTL::Device* device; - MTL::Library* library; - MTL::Function* vertexFunction; - MTL::VertexDescriptor* vertexDescriptor; -}; - -} // namespace Metal +} // namespace Metal diff --git a/include/renderer_mtl/mtl_lut_texture.hpp b/include/renderer_mtl/mtl_lut_texture.hpp index 162bfe25..531dc73c 100644 --- a/include/renderer_mtl/mtl_lut_texture.hpp +++ b/include/renderer_mtl/mtl_lut_texture.hpp @@ -8,17 +8,12 @@ class LutTexture { public: LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name); ~LutTexture(); - u32 getNextIndex(); - // Getters MTL::Texture* getTexture() { return texture; } - u32 getCurrentIndex() { return currentIndex; } - private: MTL::Texture* texture; - u32 currentIndex = 0; }; diff --git a/include/renderer_mtl/mtl_render_target.hpp b/include/renderer_mtl/mtl_render_target.hpp index 73be45f4..8f80ea64 100644 --- a/include/renderer_mtl/mtl_render_target.hpp +++ b/include/renderer_mtl/mtl_render_target.hpp @@ -1,92 +1,91 @@ #pragma once +#include #include #include -#include + #include "boost/icl/interval.hpp" #include "helpers.hpp" #include "math_util.hpp" +#include "objc_helper.hpp" #include "opengl.hpp" #include "pica_to_mtl.hpp" -#include "objc_helper.hpp" template using Interval = boost::icl::right_open_interval; namespace Metal { + template + struct RenderTarget { + MTL::Device* device; -template -struct RenderTarget { - MTL::Device* device; + u32 location; + Format_t format; + OpenGL::uvec2 size; + bool valid; - u32 location; - Format_t format; - OpenGL::uvec2 size; - bool valid; + // Range of VRAM taken up by buffer + Interval range; - // Range of VRAM taken up by buffer - Interval range; + MTL::Texture* texture = nullptr; - MTL::Texture* texture = nullptr; + RenderTarget() : valid(false) {} - RenderTarget() : valid(false) {} + RenderTarget(MTL::Device* dev, u32 loc, Format_t format, u32 x, u32 y, bool valid = true) + : device(dev), location(loc), format(format), size({x, y}), valid(valid) { + u64 endLoc = (u64)loc + sizeInBytes(); + // Check if start and end are valid here + range = Interval(loc, (u32)endLoc); + } - RenderTarget(MTL::Device* dev, u32 loc, Format_t format, u32 x, u32 y, bool valid = true) - : device(dev), location(loc), format(format), size({x, y}), valid(valid) { - u64 endLoc = (u64)loc + sizeInBytes(); - // Check if start and end are valid here - range = Interval(loc, (u32)endLoc); - } + Math::Rect getSubRect(u32 inputAddress, u32 width, u32 height) { + const u32 startOffset = (inputAddress - location) / sizePerPixel(format); + const u32 x0 = (startOffset % (size.x() * 8)) / 8; + const u32 y0 = (startOffset / (size.x() * 8)) * 8; + return Math::Rect{x0, size.y() - y0, x0 + width, size.y() - height - y0}; + } - Math::Rect getSubRect(u32 inputAddress, u32 width, u32 height) { - const u32 startOffset = (inputAddress - location) / sizePerPixel(format); - const u32 x0 = (startOffset % (size.x() * 8)) / 8; - const u32 y0 = (startOffset / (size.x() * 8)) * 8; - return Math::Rect{x0, size.y() - y0, x0 + width, size.y() - height - y0}; - } + // For 2 textures to "match" we only care about their locations, formats, and dimensions to match + // For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture + bool matches(RenderTarget& other) { + return location == other.location && format == other.format && size.x() == other.size.x() && size.y() == other.size.y(); + } - // For 2 textures to "match" we only care about their locations, formats, and dimensions to match - // For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture - bool matches(RenderTarget& other) { - return location == other.location && format == other.format && - size.x() == other.size.x() && size.y() == other.size.y(); - } + void allocate() { + MTL::PixelFormat pixelFormat = MTL::PixelFormatInvalid; + if (std::is_same::value) { + pixelFormat = PICA::toMTLPixelFormatColor((PICA::ColorFmt)format); + } else if (std::is_same::value) { + pixelFormat = PICA::toMTLPixelFormatDepth((PICA::DepthFmt)format); + } else { + panic("Invalid format type"); + } - void allocate() { - MTL::PixelFormat pixelFormat = MTL::PixelFormatInvalid; - if (std::is_same::value) { - pixelFormat = PICA::toMTLPixelFormatColor((PICA::ColorFmt)format); - } else if (std::is_same::value) { - pixelFormat = PICA::toMTLPixelFormatDepth((PICA::DepthFmt)format); - } else { - panic("Invalid format type"); - } + MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init(); + descriptor->setTextureType(MTL::TextureType2D); + descriptor->setPixelFormat(pixelFormat); + descriptor->setWidth(size.u()); + descriptor->setHeight(size.v()); + descriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead); + descriptor->setStorageMode(MTL::StorageModePrivate); + texture = device->newTexture(descriptor); + texture->setLabel(toNSString( + std::string(std::is_same::value ? "Color" : "Depth") + " render target " + std::to_string(size.u()) + "x" + + std::to_string(size.v()) + )); + descriptor->release(); + } - MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init(); - descriptor->setTextureType(MTL::TextureType2D); - descriptor->setPixelFormat(pixelFormat); - descriptor->setWidth(size.u()); - descriptor->setHeight(size.v()); - descriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead); - descriptor->setStorageMode(MTL::StorageModePrivate); - texture = device->newTexture(descriptor); - texture->setLabel(toNSString(std::string(std::is_same::value ? "Color" : "Depth") + " render target " + std::to_string(size.u()) + "x" + std::to_string(size.v()))); - descriptor->release(); - } + void free() { + valid = false; - void free() { - valid = false; + if (texture) { + texture->release(); + } + } - if (texture) { - texture->release(); - } - } + u64 sizeInBytes() { return (size_t)size.x() * (size_t)size.y() * PICA::sizePerPixel(format); } + }; - u64 sizeInBytes() { - return (size_t)size.x() * (size_t)size.y() * PICA::sizePerPixel(format); - } -}; - -typedef RenderTarget ColorRenderTarget; -typedef RenderTarget DepthStencilRenderTarget; - -} // namespace Metal + using ColorRenderTarget = RenderTarget; + using DepthStencilRenderTarget = RenderTarget; +} // namespace Metal diff --git a/include/renderer_mtl/mtl_texture.hpp b/include/renderer_mtl/mtl_texture.hpp index 9cec268d..51cb4c4b 100644 --- a/include/renderer_mtl/mtl_texture.hpp +++ b/include/renderer_mtl/mtl_texture.hpp @@ -1,8 +1,9 @@ #pragma once +#include #include #include -#include + #include "PICA/regs.hpp" #include "boost/icl/interval.hpp" #include "helpers.hpp" @@ -10,69 +11,64 @@ #include "opengl.hpp" #include "renderer_mtl/pica_to_mtl.hpp" + template using Interval = boost::icl::right_open_interval; namespace Metal { + struct Texture { + MTL::Device* device; -struct Texture { - MTL::Device* device; + u32 location; + u32 config; // Magnification/minification filter, wrapping configs, etc + PICA::TextureFmt format; + OpenGL::uvec2 size; + bool valid; - u32 location; - u32 config; // Magnification/minification filter, wrapping configs, etc - PICA::TextureFmt format; - OpenGL::uvec2 size; - bool valid; + // Range of VRAM taken up by buffer + Interval range; - // Range of VRAM taken up by buffer - Interval range; + PICA::PixelFormatInfo formatInfo; + MTL::Texture* texture = nullptr; + MTL::SamplerState* sampler = nullptr; - PICA::PixelFormatInfo formatInfo; - MTL::Texture* texture = nullptr; - MTL::SamplerState* sampler = nullptr; + Texture() : valid(false) {} - Texture() : valid(false) {} + Texture(MTL::Device* dev, u32 loc, PICA::TextureFmt format, u32 x, u32 y, u32 config, bool valid = true) + : device(dev), location(loc), format(format), size({x, y}), config(config), valid(valid) { + u64 endLoc = (u64)loc + sizeInBytes(); + // Check if start and end are valid here + range = Interval(loc, (u32)endLoc); + } - Texture(MTL::Device* dev, u32 loc, PICA::TextureFmt format, u32 x, u32 y, u32 config, bool valid = true) - : device(dev), location(loc), format(format), size({x, y}), config(config), valid(valid) { + // For 2 textures to "match" we only care about their locations, formats, and dimensions to match + // For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture + bool matches(Texture& other) { + return location == other.location && format == other.format && size.x() == other.size.x() && size.y() == other.size.y(); + } - u64 endLoc = (u64)loc + sizeInBytes(); - // Check if start and end are valid here - range = Interval(loc, (u32)endLoc); - } + void allocate(); + void setNewConfig(u32 newConfig); + void decodeTexture(std::span data); + void free(); + u64 sizeInBytes(); - // For 2 textures to "match" we only care about their locations, formats, and dimensions to match - // For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture - bool matches(Texture& other) { - return location == other.location && format == other.format && - size.x() == other.size.x() && size.y() == other.size.y(); - } + u8 decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); + u16 decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); + u32 decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); - void allocate(); - void setNewConfig(u32 newConfig); - void decodeTexture(std::span data); - void free(); - u64 sizeInBytes(); + // Get the morton interleave offset of a texel based on its U and V values + static u32 mortonInterleave(u32 u, u32 v); + // Get the byte offset of texel (u, v) in the texture + static u32 getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel); + static u32 getSwizzledOffset_4bpp(u32 u, u32 v, u32 width); - u8 decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); - u16 decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); - u32 decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); + // Returns the format of this texture as a string + std::string_view formatToString() { return PICA::textureFormatToString(format); } - // Get the morton interleave offset of a texel based on its U and V values - static u32 mortonInterleave(u32 u, u32 v); - // Get the byte offset of texel (u, v) in the texture - static u32 getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel); - static u32 getSwizzledOffset_4bpp(u32 u, u32 v, u32 width); - - // Returns the format of this texture as a string - std::string_view formatToString() { - return PICA::textureFormatToString(format); - } - - // Returns the texel at coordinates (u, v) of an ETC1(A4) texture - // TODO: Make hasAlpha a template parameter - u32 getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span data); - u32 decodeETC(u32 alpha, u32 u, u32 v, u64 colourData); -}; - -} // namespace Metal + // Returns the texel at coordinates (u, v) of an ETC1(A4) texture + // TODO: Make hasAlpha a template parameter + u32 getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span data); + u32 decodeETC(u32 alpha, u32 u, u32 v, u64 colourData); + }; +} // namespace Metal diff --git a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp index cc552477..d53af283 100644 --- a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp +++ b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp @@ -5,76 +5,74 @@ using namespace PICA; namespace Metal { + struct BufferHandle { + MTL::Buffer* buffer; + size_t offset; + }; -struct BufferHandle { - MTL::Buffer* buffer; - size_t offset; -}; + class VertexBufferCache { + // 128MB buffer for caching vertex data + static constexpr usize CACHE_BUFFER_SIZE = 128 * 1024 * 1024; -// 128MB buffer for caching vertex data -#define CACHE_BUFFER_SIZE 128 * 1024 * 1024 + public: + VertexBufferCache() = default; -class VertexBufferCache { -public: - VertexBufferCache() = default; + ~VertexBufferCache() { + endFrame(); + buffer->release(); + } - ~VertexBufferCache() { - endFrame(); - buffer->release(); - } + void set(MTL::Device* dev) { + device = dev; + create(); + } - void set(MTL::Device* dev) { - device = dev; - create(); - } + void endFrame() { + ptr = 0; + for (auto buffer : additionalAllocations) { + buffer->release(); + } + additionalAllocations.clear(); + } - void endFrame() { - ptr = 0; - for (auto buffer : additionalAllocations) { - buffer->release(); - } - additionalAllocations.clear(); - } + BufferHandle get(const void* data, size_t size) { + // If the vertex buffer is too large, just create a new one + if (ptr + size > CACHE_BUFFER_SIZE) { + MTL::Buffer* newBuffer = device->newBuffer(data, size, MTL::ResourceStorageModeShared); + newBuffer->setLabel(toNSString("Additional vertex buffer")); + additionalAllocations.push_back(newBuffer); + Helpers::warn("Vertex buffer doesn't have enough space, creating a new buffer"); - BufferHandle get(const void* data, size_t size) { - // If the vertex buffer is too large, just create a new one - if (ptr + size > CACHE_BUFFER_SIZE) { - MTL::Buffer* newBuffer = device->newBuffer(data, size, MTL::ResourceStorageModeShared); - newBuffer->setLabel(toNSString("Additional vertex buffer")); - additionalAllocations.push_back(newBuffer); - Helpers::warn("Vertex buffer doesn't have enough space, creating a new buffer"); + return BufferHandle{newBuffer, 0}; + } - return BufferHandle{newBuffer, 0}; - } + // Copy the data into the buffer + memcpy((char*)buffer->contents() + ptr, data, size); - // Copy the data into the buffer - memcpy((char*)buffer->contents() + ptr, data, size); + size_t oldPtr = ptr; + ptr += size; - size_t oldPtr = ptr; - ptr += size; + return BufferHandle{buffer, oldPtr}; + } - return BufferHandle{buffer, oldPtr}; - } + void reset() { + endFrame(); + if (buffer) { + buffer->release(); + create(); + } + } - void reset() { - endFrame(); - if (buffer) { - buffer->release(); - create(); - } - } + private: + MTL::Buffer* buffer = nullptr; + size_t ptr = 0; + std::vector additionalAllocations; -private: - MTL::Buffer* buffer = nullptr; - size_t ptr = 0; - std::vector additionalAllocations; + MTL::Device* device; - MTL::Device* device; - - void create() { - buffer = device->newBuffer(CACHE_BUFFER_SIZE, MTL::ResourceStorageModeShared); - buffer->setLabel(toNSString("Shared vertex buffer")); - } -}; - -} // namespace Metal + void create() { + buffer = device->newBuffer(CACHE_BUFFER_SIZE, MTL::ResourceStorageModeShared); + buffer->setLabel(toNSString("Shared vertex buffer")); + } + }; +} // namespace Metal diff --git a/include/renderer_mtl/objc_helper.hpp b/include/renderer_mtl/objc_helper.hpp index 7d0e8646..86992f1d 100644 --- a/include/renderer_mtl/objc_helper.hpp +++ b/include/renderer_mtl/objc_helper.hpp @@ -5,12 +5,8 @@ #include "mtl_common.hpp" namespace Metal { - -dispatch_data_t createDispatchData(const void* data, size_t size); - -} // namespace Metal + dispatch_data_t createDispatchData(const void* data, size_t size); +} // namespace Metal // Cast from std::string to NS::String* -inline NS::String* toNSString(const std::string& str) { - return NS::String::string(str.c_str(), NS::ASCIIStringEncoding); -} +inline NS::String* toNSString(const std::string& str) { return NS::String::string(str.c_str(), NS::ASCIIStringEncoding); } \ No newline at end of file diff --git a/include/renderer_mtl/pica_to_mtl.hpp b/include/renderer_mtl/pica_to_mtl.hpp index de76dc3b..9234c748 100644 --- a/include/renderer_mtl/pica_to_mtl.hpp +++ b/include/renderer_mtl/pica_to_mtl.hpp @@ -1,155 +1,154 @@ #pragma once #include + #include "PICA/regs.hpp" + namespace PICA { + struct PixelFormatInfo { + MTL::PixelFormat pixelFormat; + size_t bytesPerTexel; + }; -struct PixelFormatInfo { - MTL::PixelFormat pixelFormat; - size_t bytesPerTexel; -}; + constexpr PixelFormatInfo pixelFormatInfos[14] = { + {MTL::PixelFormatRGBA8Unorm, 4}, // RGBA8 + {MTL::PixelFormatRGBA8Unorm, 4}, // RGB8 + {MTL::PixelFormatBGR5A1Unorm, 2}, // RGBA5551 + {MTL::PixelFormatB5G6R5Unorm, 2}, // RGB565 + {MTL::PixelFormatABGR4Unorm, 2}, // RGBA4 + {MTL::PixelFormatRGBA8Unorm, 4}, // IA8 + {MTL::PixelFormatRG8Unorm, 2}, // RG8 + {MTL::PixelFormatRGBA8Unorm, 4}, // I8 + {MTL::PixelFormatA8Unorm, 1}, // A8 + {MTL::PixelFormatABGR4Unorm, 2}, // IA4 + {MTL::PixelFormatABGR4Unorm, 2}, // I4 + {MTL::PixelFormatA8Unorm, 1}, // A4 + {MTL::PixelFormatRGBA8Unorm, 4}, // ETC1 + {MTL::PixelFormatRGBA8Unorm, 4}, // ETC1A4 + }; -constexpr PixelFormatInfo pixelFormatInfos[14] = { - {MTL::PixelFormatRGBA8Unorm, 4}, // RGBA8 - {MTL::PixelFormatRGBA8Unorm, 4}, // RGB8 - {MTL::PixelFormatBGR5A1Unorm, 2}, // RGBA5551 - {MTL::PixelFormatB5G6R5Unorm, 2}, // RGB565 - {MTL::PixelFormatABGR4Unorm, 2}, // RGBA4 - {MTL::PixelFormatRGBA8Unorm, 4}, // IA8 - {MTL::PixelFormatRG8Unorm, 2}, // RG8 - {MTL::PixelFormatRGBA8Unorm, 4}, // I8 - {MTL::PixelFormatA8Unorm, 1}, // A8 - {MTL::PixelFormatABGR4Unorm, 2}, // IA4 - {MTL::PixelFormatABGR4Unorm, 2}, // I4 - {MTL::PixelFormatA8Unorm, 1}, // A4 - {MTL::PixelFormatRGBA8Unorm, 4}, // ETC1 - {MTL::PixelFormatRGBA8Unorm, 4}, // ETC1A4 -}; + inline PixelFormatInfo getPixelFormatInfo(TextureFmt format) { return pixelFormatInfos[static_cast(format)]; } -inline PixelFormatInfo getPixelFormatInfo(TextureFmt format) { - return pixelFormatInfos[static_cast(format)]; -} + inline MTL::PixelFormat toMTLPixelFormatColor(ColorFmt format) { + switch (format) { + case ColorFmt::RGBA8: return MTL::PixelFormatRGBA8Unorm; + case ColorFmt::RGB8: return MTL::PixelFormatRGBA8Unorm; + case ColorFmt::RGBA5551: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatBGR5A1Unorm? + case ColorFmt::RGB565: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatB5G6R5Unorm? + case ColorFmt::RGBA4: return MTL::PixelFormatABGR4Unorm; + } + } -inline MTL::PixelFormat toMTLPixelFormatColor(ColorFmt format) { - switch (format) { - case ColorFmt::RGBA8: return MTL::PixelFormatRGBA8Unorm; - case ColorFmt::RGB8: return MTL::PixelFormatRGBA8Unorm; - case ColorFmt::RGBA5551: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatBGR5A1Unorm? - case ColorFmt::RGB565: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatB5G6R5Unorm? - case ColorFmt::RGBA4: return MTL::PixelFormatABGR4Unorm; - } -} + inline MTL::PixelFormat toMTLPixelFormatDepth(DepthFmt format) { + switch (format) { + case DepthFmt::Depth16: return MTL::PixelFormatDepth16Unorm; + case DepthFmt::Unknown1: return MTL::PixelFormatInvalid; + case DepthFmt::Depth24: + return MTL::PixelFormatDepth32Float; // Metal does not support 24-bit depth formats + // Apple sillicon doesn't support 24-bit depth buffers, so we use 32-bit instead + case DepthFmt::Depth24Stencil8: return MTL::PixelFormatDepth32Float_Stencil8; + } + } -inline MTL::PixelFormat toMTLPixelFormatDepth(DepthFmt format) { - switch (format) { - case DepthFmt::Depth16: return MTL::PixelFormatDepth16Unorm; - case DepthFmt::Unknown1: return MTL::PixelFormatInvalid; - case DepthFmt::Depth24: return MTL::PixelFormatDepth32Float; // Metal does not support 24-bit depth formats - // Apple sillicon doesn't support 24-bit depth buffers, so we use 32-bit instead - case DepthFmt::Depth24Stencil8: return MTL::PixelFormatDepth32Float_Stencil8; - } -} + inline MTL::CompareFunction toMTLCompareFunc(u8 func) { + switch (func) { + case 0: return MTL::CompareFunctionNever; + case 1: return MTL::CompareFunctionAlways; + case 2: return MTL::CompareFunctionEqual; + case 3: return MTL::CompareFunctionNotEqual; + case 4: return MTL::CompareFunctionLess; + case 5: return MTL::CompareFunctionLessEqual; + case 6: return MTL::CompareFunctionGreater; + case 7: return MTL::CompareFunctionGreaterEqual; + default: panic("Unknown compare function %u", func); + } -inline MTL::CompareFunction toMTLCompareFunc(u8 func) { - switch (func) { - case 0: return MTL::CompareFunctionNever; - case 1: return MTL::CompareFunctionAlways; - case 2: return MTL::CompareFunctionEqual; - case 3: return MTL::CompareFunctionNotEqual; - case 4: return MTL::CompareFunctionLess; - case 5: return MTL::CompareFunctionLessEqual; - case 6: return MTL::CompareFunctionGreater; - case 7: return MTL::CompareFunctionGreaterEqual; - default: panic("Unknown compare function %u", func); - } + return MTL::CompareFunctionAlways; + } - return MTL::CompareFunctionAlways; -} + inline MTL::BlendOperation toMTLBlendOperation(u8 op) { + switch (op) { + case 0: return MTL::BlendOperationAdd; + case 1: return MTL::BlendOperationSubtract; + case 2: return MTL::BlendOperationReverseSubtract; + case 3: return MTL::BlendOperationMin; + case 4: return MTL::BlendOperationMax; + case 5: return MTL::BlendOperationAdd; // Unused (same as 0) + case 6: return MTL::BlendOperationAdd; // Unused (same as 0) + case 7: return MTL::BlendOperationAdd; // Unused (same as 0) + default: panic("Unknown blend operation %u", op); + } -inline MTL::BlendOperation toMTLBlendOperation(u8 op) { - switch (op) { - case 0: return MTL::BlendOperationAdd; - case 1: return MTL::BlendOperationSubtract; - case 2: return MTL::BlendOperationReverseSubtract; - case 3: return MTL::BlendOperationMin; - case 4: return MTL::BlendOperationMax; - case 5: return MTL::BlendOperationAdd; // Unused (same as 0) - case 6: return MTL::BlendOperationAdd; // Unused (same as 0) - case 7: return MTL::BlendOperationAdd; // Unused (same as 0) - default: panic("Unknown blend operation %u", op); - } + return MTL::BlendOperationAdd; + } - return MTL::BlendOperationAdd; -} + inline MTL::BlendFactor toMTLBlendFactor(u8 factor) { + switch (factor) { + case 0: return MTL::BlendFactorZero; + case 1: return MTL::BlendFactorOne; + case 2: return MTL::BlendFactorSourceColor; + case 3: return MTL::BlendFactorOneMinusSourceColor; + case 4: return MTL::BlendFactorDestinationColor; + case 5: return MTL::BlendFactorOneMinusDestinationColor; + case 6: return MTL::BlendFactorSourceAlpha; + case 7: return MTL::BlendFactorOneMinusSourceAlpha; + case 8: return MTL::BlendFactorDestinationAlpha; + case 9: return MTL::BlendFactorOneMinusDestinationAlpha; + case 10: return MTL::BlendFactorBlendColor; + case 11: return MTL::BlendFactorOneMinusBlendColor; + case 12: return MTL::BlendFactorBlendAlpha; + case 13: return MTL::BlendFactorOneMinusBlendAlpha; + case 14: return MTL::BlendFactorSourceAlphaSaturated; + case 15: return MTL::BlendFactorOne; // Undocumented + default: panic("Unknown blend factor %u", factor); + } -inline MTL::BlendFactor toMTLBlendFactor(u8 factor) { - switch (factor) { - case 0: return MTL::BlendFactorZero; - case 1: return MTL::BlendFactorOne; - case 2: return MTL::BlendFactorSourceColor; - case 3: return MTL::BlendFactorOneMinusSourceColor; - case 4: return MTL::BlendFactorDestinationColor; - case 5: return MTL::BlendFactorOneMinusDestinationColor; - case 6: return MTL::BlendFactorSourceAlpha; - case 7: return MTL::BlendFactorOneMinusSourceAlpha; - case 8: return MTL::BlendFactorDestinationAlpha; - case 9: return MTL::BlendFactorOneMinusDestinationAlpha; - case 10: return MTL::BlendFactorBlendColor; - case 11: return MTL::BlendFactorOneMinusBlendColor; - case 12: return MTL::BlendFactorBlendAlpha; - case 13: return MTL::BlendFactorOneMinusBlendAlpha; - case 14: return MTL::BlendFactorSourceAlphaSaturated; - case 15: return MTL::BlendFactorOne; // Undocumented - default: panic("Unknown blend factor %u", factor); - } + return MTL::BlendFactorOne; + } - return MTL::BlendFactorOne; -} + inline MTL::StencilOperation toMTLStencilOperation(u8 op) { + switch (op) { + case 0: return MTL::StencilOperationKeep; + case 1: return MTL::StencilOperationZero; + case 2: return MTL::StencilOperationReplace; + case 3: return MTL::StencilOperationIncrementClamp; + case 4: return MTL::StencilOperationDecrementClamp; + case 5: return MTL::StencilOperationInvert; + case 6: return MTL::StencilOperationIncrementWrap; + case 7: return MTL::StencilOperationDecrementWrap; + default: panic("Unknown stencil operation %u", op); + } -inline MTL::StencilOperation toMTLStencilOperation(u8 op) { - switch (op) { - case 0: return MTL::StencilOperationKeep; - case 1: return MTL::StencilOperationZero; - case 2: return MTL::StencilOperationReplace; - case 3: return MTL::StencilOperationIncrementClamp; - case 4: return MTL::StencilOperationDecrementClamp; - case 5: return MTL::StencilOperationInvert; - case 6: return MTL::StencilOperationIncrementWrap; - case 7: return MTL::StencilOperationDecrementWrap; - default: panic("Unknown stencil operation %u", op); - } + return MTL::StencilOperationKeep; + } - return MTL::StencilOperationKeep; -} + inline MTL::PrimitiveType toMTLPrimitiveType(PrimType primType) { + switch (primType) { + case PrimType::TriangleList: return MTL::PrimitiveTypeTriangle; + case PrimType::TriangleStrip: return MTL::PrimitiveTypeTriangleStrip; + case PrimType::TriangleFan: + Helpers::warn("Triangle fans are not supported on Metal, using triangles instead"); + return MTL::PrimitiveTypeTriangle; + case PrimType::GeometryPrimitive: + // Helpers::warn("Geometry primitives are not yet, using triangles instead"); + return MTL::PrimitiveTypeTriangle; + } + } -inline MTL::PrimitiveType toMTLPrimitiveType(PrimType primType) { - switch (primType) { - case PrimType::TriangleList: return MTL::PrimitiveTypeTriangle; - case PrimType::TriangleStrip: return MTL::PrimitiveTypeTriangleStrip; - case PrimType::TriangleFan: - Helpers::warn("Triangle fans are not supported on Metal, using triangles instead"); - return MTL::PrimitiveTypeTriangle; - case PrimType::GeometryPrimitive: - //Helpers::warn("Geometry primitives are not yet, using triangles instead"); - return MTL::PrimitiveTypeTriangle; - } -} + inline MTL::SamplerAddressMode toMTLSamplerAddressMode(u8 addrMode) { + switch (addrMode) { + case 0: return MTL::SamplerAddressModeClampToEdge; + case 1: return MTL::SamplerAddressModeClampToBorderColor; + case 2: return MTL::SamplerAddressModeRepeat; + case 3: return MTL::SamplerAddressModeMirrorRepeat; + case 4: return MTL::SamplerAddressModeClampToEdge; + case 5: return MTL::SamplerAddressModeClampToBorderColor; + case 6: return MTL::SamplerAddressModeRepeat; + case 7: return MTL::SamplerAddressModeRepeat; + default: panic("Unknown sampler address mode %u", addrMode); + } -inline MTL::SamplerAddressMode toMTLSamplerAddressMode(u8 addrMode) { - switch (addrMode) { - case 0: return MTL::SamplerAddressModeClampToEdge; - case 1: return MTL::SamplerAddressModeClampToBorderColor; - case 2: return MTL::SamplerAddressModeRepeat; - case 3: return MTL::SamplerAddressModeMirrorRepeat; - case 4: return MTL::SamplerAddressModeClampToEdge; - case 5: return MTL::SamplerAddressModeClampToBorderColor; - case 6: return MTL::SamplerAddressModeRepeat; - case 7: return MTL::SamplerAddressModeRepeat; - default: panic("Unknown sampler address mode %u", addrMode); - } - - return MTL::SamplerAddressModeClampToEdge; -} - -} // namespace PICA + return MTL::SamplerAddressModeClampToEdge; + } +} // namespace PICA diff --git a/include/renderer_mtl/renderer_mtl.hpp b/include/renderer_mtl/renderer_mtl.hpp index 6b356896..bd5c3bf1 100644 --- a/include/renderer_mtl/renderer_mtl.hpp +++ b/include/renderer_mtl/renderer_mtl.hpp @@ -3,15 +3,16 @@ #include #include -#include "renderer.hpp" -#include "mtl_texture.hpp" -#include "mtl_render_target.hpp" #include "mtl_blit_pipeline_cache.hpp" -#include "mtl_draw_pipeline_cache.hpp" -#include "mtl_depth_stencil_cache.hpp" -#include "mtl_vertex_buffer_cache.hpp" -#include "mtl_lut_texture.hpp" #include "mtl_command_encoder.hpp" +#include "mtl_depth_stencil_cache.hpp" +#include "mtl_draw_pipeline_cache.hpp" +#include "mtl_lut_texture.hpp" +#include "mtl_render_target.hpp" +#include "mtl_texture.hpp" +#include "mtl_vertex_buffer_cache.hpp" +#include "renderer.hpp" + // HACK: use the OpenGL cache #include "../renderer_gl/surface_cache.hpp" @@ -19,7 +20,7 @@ class GPU; struct Color4 { - float r, g, b, a; + float r, g, b, a; }; class RendererMTL final : public Renderer { @@ -72,7 +73,7 @@ class RendererMTL final : public Renderer { // Pipelines MTL::RenderPipelineState* displayPipeline; - //MTL::RenderPipelineState* copyToLutTexturePipeline; + // MTL::RenderPipelineState* copyToLutTexturePipeline; // Clears std::map colorClearOps; @@ -95,93 +96,112 @@ class RendererMTL final : public Renderer { } void endRenderPass() { - if (renderCommandEncoder) { - renderCommandEncoder->endEncoding(); - renderCommandEncoder = nullptr; - } + if (renderCommandEncoder) { + renderCommandEncoder->endEncoding(); + renderCommandEncoder = nullptr; + } } - void beginRenderPassIfNeeded(MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture = nullptr); + void beginRenderPassIfNeeded( + MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture = nullptr + ); void commitCommandBuffer() { - if (renderCommandEncoder) { - renderCommandEncoder->endEncoding(); - renderCommandEncoder->release(); - renderCommandEncoder = nullptr; - } - if (commandBuffer) { - commandBuffer->commit(); - // HACK - commandBuffer->waitUntilCompleted(); - commandBuffer->release(); - commandBuffer = nullptr; - } - } + if (renderCommandEncoder) { + renderCommandEncoder->endEncoding(); + renderCommandEncoder->release(); + renderCommandEncoder = nullptr; + } + if (commandBuffer) { + commandBuffer->commit(); + // HACK + commandBuffer->waitUntilCompleted(); + commandBuffer->release(); + commandBuffer = nullptr; + } + } - template - inline void clearAttachment(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, ClearDataT clearData, GetAttachmentT getAttachment, SetClearDataT setClearData) { - bool beginRenderPass = (renderPassDescriptor == nullptr); - if (!renderPassDescriptor) { - renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); - } + template + inline void clearAttachment( + MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, ClearDataT clearData, GetAttachmentT getAttachment, + SetClearDataT setClearData + ) { + bool beginRenderPass = (renderPassDescriptor == nullptr); + if (!renderPassDescriptor) { + renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); + } - AttachmentT* attachment = getAttachment(renderPassDescriptor); + AttachmentT* attachment = getAttachment(renderPassDescriptor); attachment->setTexture(texture); setClearData(attachment, clearData); attachment->setLoadAction(MTL::LoadActionClear); attachment->setStoreAction(MTL::StoreActionStore); if (beginRenderPass) { - if (std::is_same::value) - beginRenderPassIfNeeded(renderPassDescriptor, true, texture); - else - beginRenderPassIfNeeded(renderPassDescriptor, true, nullptr, texture); + if (std::is_same::value) + beginRenderPassIfNeeded(renderPassDescriptor, true, texture); + else + beginRenderPassIfNeeded(renderPassDescriptor, true, nullptr, texture); } - } + } - template - inline bool clearAttachment(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, std::map& clearOps, GetAttachmentT getAttachment, SetClearDataT setClearData) { - auto it = clearOps.find(texture); - if (it != clearOps.end()) { - clearAttachment(renderPassDescriptor, texture, it->second, getAttachment, setClearData); - clearOps.erase(it); - return true; - } + template + inline bool clearAttachment( + MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, std::map& clearOps, + GetAttachmentT getAttachment, SetClearDataT setClearData + ) { + auto it = clearOps.find(texture); + if (it != clearOps.end()) { + clearAttachment(renderPassDescriptor, texture, it->second, getAttachment, setClearData); + clearOps.erase(it); + return true; + } - if (renderPassDescriptor) { - AttachmentT* attachment = getAttachment(renderPassDescriptor); - attachment->setTexture(texture); - attachment->setLoadAction(MTL::LoadActionLoad); - attachment->setStoreAction(MTL::StoreActionStore); - } + if (renderPassDescriptor) { + AttachmentT* attachment = getAttachment(renderPassDescriptor); + attachment->setTexture(texture); + attachment->setLoadAction(MTL::LoadActionLoad); + attachment->setStoreAction(MTL::StoreActionStore); + } - return false; - } + return false; + } - bool clearColor(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { - return clearAttachment(renderPassDescriptor, texture, colorClearOps, [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->colorAttachments()->object(0); }, [](auto attachment, auto& color) { - attachment->setClearColor(MTL::ClearColor(color.r, color.g, color.b, color.a)); - }); - } + bool clearColor(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { + return clearAttachment( + renderPassDescriptor, texture, colorClearOps, + [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->colorAttachments()->object(0); }, + [](auto attachment, auto& color) { attachment->setClearColor(MTL::ClearColor(color.r, color.g, color.b, color.a)); } + ); + } - bool clearDepth(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { - return clearAttachment(renderPassDescriptor, texture, depthClearOps, [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->depthAttachment(); }, [](auto attachment, auto& depth) { - attachment->setClearDepth(depth); - }); - } + bool clearDepth(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { + return clearAttachment( + renderPassDescriptor, texture, depthClearOps, + [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->depthAttachment(); }, + [](auto attachment, auto& depth) { attachment->setClearDepth(depth); } + ); + } - bool clearStencil(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { - return clearAttachment(renderPassDescriptor, texture, stencilClearOps, [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->stencilAttachment(); }, [](auto attachment, auto& stencil) { - attachment->setClearStencil(stencil); - }); - } + bool clearStencil(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { + return clearAttachment( + renderPassDescriptor, texture, stencilClearOps, + [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->stencilAttachment(); }, + [](auto attachment, auto& stencil) { attachment->setClearStencil(stencil); } + ); + } - std::optional getColorRenderTarget(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true); + std::optional getColorRenderTarget( + u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true + ); Metal::DepthStencilRenderTarget& getDepthRenderTarget(); Metal::Texture& getTexture(Metal::Texture& tex); void setupTextureEnvState(MTL::RenderCommandEncoder* encoder); void bindTexturesToSlots(); void updateLightingLUT(MTL::RenderCommandEncoder* encoder); void updateFogLUT(MTL::RenderCommandEncoder* encoder); - void textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect& srcRect, const Math::Rect& destRect); + void textureCopyImpl( + Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect& srcRect, + const Math::Rect& destRect + ); }; diff --git a/src/core/renderer_gl/etc1.cpp b/src/core/renderer_gl/etc1.cpp index 8aefd622..0b4ed1a5 100644 --- a/src/core/renderer_gl/etc1.cpp +++ b/src/core/renderer_gl/etc1.cpp @@ -12,8 +12,9 @@ static constexpr u32 signExtend3To32(u32 val) { u32 Texture::getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span data) { // Pixel offset of the 8x8 tile based on u, v and the width of the texture u32 offs = ((u & ~7) * 8) + ((v & ~7) * width); - if (!hasAlpha) + if (!hasAlpha) { offs >>= 1; + } // In-tile offsets for u/v u &= 7; diff --git a/src/core/renderer_mtl/mtl_etc1.cpp b/src/core/renderer_mtl/mtl_etc1.cpp index a414df3c..420a60ca 100644 --- a/src/core/renderer_mtl/mtl_etc1.cpp +++ b/src/core/renderer_mtl/mtl_etc1.cpp @@ -1,124 +1,116 @@ #include + #include "colour.hpp" -#include "renderer_mtl/renderer_mtl.hpp" #include "renderer_mtl/mtl_texture.hpp" +#include "renderer_mtl/renderer_mtl.hpp" + using namespace Helpers; namespace Metal { - -static constexpr u32 signExtend3To32(u32 val) { - return (u32)(s32(val) << 29 >> 29); -} - -u32 Texture::getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span data) { - // Pixel offset of the 8x8 tile based on u, v and the width of the texture - u32 offs = ((u & ~7) * 8) + ((v & ~7) * width); - if (!hasAlpha) - offs >>= 1; - - // In-tile offsets for u/v - u &= 7; - v &= 7; - - // ETC1(A4) also subdivide the 8x8 tile to 4 4x4 tiles - // Each tile is 8 bytes for ETC1, but since ETC1A4 has 4 alpha bits per pixel, that becomes 16 bytes - const u32 subTileSize = hasAlpha ? 16 : 8; - const u32 subTileIndex = (u / 4) + 2 * (v / 4); // Which of the 4 subtiles is this texel in? - - // In-subtile offsets for u/v - u &= 3; - v &= 3; - offs += subTileSize * subTileIndex; - - u32 alpha; - const u64* ptr = reinterpret_cast(data.data() + offs); // Cast to u64* - - if (hasAlpha) { - // First 64 bits of the 4x4 subtile are alpha data - const u64 alphaData = *ptr++; - alpha = Colour::convert4To8Bit((alphaData >> (4 * (u * 4 + v))) & 0xf); - } - else { - alpha = 0xff; // ETC1 without alpha uses ff for every pixel + static constexpr u32 signExtend3To32(u32 val) { + return (u32)(s32(val) << 29 >> 29); } - // Next 64 bits of the subtile are colour data - u64 colourData = *ptr; - return decodeETC(alpha, u, v, colourData); -} + u32 Texture::getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span data) { + // Pixel offset of the 8x8 tile based on u, v and the width of the texture + u32 offs = ((u & ~7) * 8) + ((v & ~7) * width); + if (!hasAlpha) { + offs >>= 1; + } -u32 Texture::decodeETC(u32 alpha, u32 u, u32 v, u64 colourData) { - static constexpr u32 modifiers[8][2] = { - { 2, 8 }, - { 5, 17 }, - { 9, 29 }, - { 13, 42 }, - { 18, 60 }, - { 24, 80 }, - { 33, 106 }, - { 47, 183 }, - }; + // In-tile offsets for u/v + u &= 7; + v &= 7; - // Parse colour data for 4x4 block - const u32 subindices = getBits<0, 16, u32>(colourData); - const u32 negationFlags = getBits<16, 16, u32>(colourData); - const bool flip = getBit<32>(colourData); - const bool diffMode = getBit<33>(colourData); + // ETC1(A4) also subdivide the 8x8 tile to 4 4x4 tiles + // Each tile is 8 bytes for ETC1, but since ETC1A4 has 4 alpha bits per pixel, that becomes 16 bytes + const u32 subTileSize = hasAlpha ? 16 : 8; + const u32 subTileIndex = (u / 4) + 2 * (v / 4); // Which of the 4 subtiles is this texel in? - // Note: index1 is indeed stored on the higher bits, with index2 in the lower bits - const u32 tableIndex1 = getBits<37, 3, u32>(colourData); - const u32 tableIndex2 = getBits<34, 3, u32>(colourData); - const u32 texelIndex = u * 4 + v; // Index of the texel in the block + // In-subtile offsets for u/v + u &= 3; + v &= 3; + offs += subTileSize * subTileIndex; - if (flip) - std::swap(u, v); + u32 alpha; + const u64* ptr = reinterpret_cast(data.data() + offs); // Cast to u64* - s32 r, g, b; - if (diffMode) { - r = getBits<59, 5, s32>(colourData); - g = getBits<51, 5, s32>(colourData); - b = getBits<43, 5, s32>(colourData); + if (hasAlpha) { + // First 64 bits of the 4x4 subtile are alpha data + const u64 alphaData = *ptr++; + alpha = Colour::convert4To8Bit((alphaData >> (4 * (u * 4 + v))) & 0xf); + } else { + alpha = 0xff; // ETC1 without alpha uses ff for every pixel + } - if (u >= 2) { - r += signExtend3To32(getBits<56, 3, u32>(colourData)); - g += signExtend3To32(getBits<48, 3, u32>(colourData)); - b += signExtend3To32(getBits<40, 3, u32>(colourData)); - } + // Next 64 bits of the subtile are colour data + u64 colourData = *ptr; + return decodeETC(alpha, u, v, colourData); + } - // Expand from 5 to 8 bits per channel - r = Colour::convert5To8Bit(r); - g = Colour::convert5To8Bit(g); - b = Colour::convert5To8Bit(b); - } else { - if (u < 2) { - r = getBits<60, 4, s32>(colourData); - g = getBits<52, 4, s32>(colourData); - b = getBits<44, 4, s32>(colourData); - } else { - r = getBits<56, 4, s32>(colourData); - g = getBits<48, 4, s32>(colourData); - b = getBits<40, 4, s32>(colourData); - } + u32 Texture::decodeETC(u32 alpha, u32 u, u32 v, u64 colourData) { + static constexpr u32 modifiers[8][2] = { + {2, 8}, {5, 17}, {9, 29}, {13, 42}, {18, 60}, {24, 80}, {33, 106}, {47, 183}, + }; - // Expand from 4 to 8 bits per channel - r = Colour::convert4To8Bit(r); - g = Colour::convert4To8Bit(g); - b = Colour::convert4To8Bit(b); - } + // Parse colour data for 4x4 block + const u32 subindices = getBits<0, 16, u32>(colourData); + const u32 negationFlags = getBits<16, 16, u32>(colourData); + const bool flip = getBit<32>(colourData); + const bool diffMode = getBit<33>(colourData); - const u32 index = (u < 2) ? tableIndex1 : tableIndex2; - s32 modifier = modifiers[index][(subindices >> texelIndex) & 1]; + // Note: index1 is indeed stored on the higher bits, with index2 in the lower bits + const u32 tableIndex1 = getBits<37, 3, u32>(colourData); + const u32 tableIndex2 = getBits<34, 3, u32>(colourData); + const u32 texelIndex = u * 4 + v; // Index of the texel in the block - if (((negationFlags >> texelIndex) & 1) != 0) { - modifier = -modifier; - } + if (flip) std::swap(u, v); - r = std::clamp(r + modifier, 0, 255); - g = std::clamp(g + modifier, 0, 255); - b = std::clamp(b + modifier, 0, 255); + s32 r, g, b; + if (diffMode) { + r = getBits<59, 5, s32>(colourData); + g = getBits<51, 5, s32>(colourData); + b = getBits<43, 5, s32>(colourData); - return (alpha << 24) | (u32(b) << 16) | (u32(g) << 8) | u32(r); -} + if (u >= 2) { + r += signExtend3To32(getBits<56, 3, u32>(colourData)); + g += signExtend3To32(getBits<48, 3, u32>(colourData)); + b += signExtend3To32(getBits<40, 3, u32>(colourData)); + } -} // namespace Metal + // Expand from 5 to 8 bits per channel + r = Colour::convert5To8Bit(r); + g = Colour::convert5To8Bit(g); + b = Colour::convert5To8Bit(b); + } else { + if (u < 2) { + r = getBits<60, 4, s32>(colourData); + g = getBits<52, 4, s32>(colourData); + b = getBits<44, 4, s32>(colourData); + } else { + r = getBits<56, 4, s32>(colourData); + g = getBits<48, 4, s32>(colourData); + b = getBits<40, 4, s32>(colourData); + } + + // Expand from 4 to 8 bits per channel + r = Colour::convert4To8Bit(r); + g = Colour::convert4To8Bit(g); + b = Colour::convert4To8Bit(b); + } + + const u32 index = (u < 2) ? tableIndex1 : tableIndex2; + s32 modifier = modifiers[index][(subindices >> texelIndex) & 1]; + + if (((negationFlags >> texelIndex) & 1) != 0) { + modifier = -modifier; + } + + r = std::clamp(r + modifier, 0, 255); + g = std::clamp(g + modifier, 0, 255); + b = std::clamp(b + modifier, 0, 255); + + return (alpha << 24) | (u32(b) << 16) | (u32(g) << 8) | u32(r); + } +} // namespace Metal diff --git a/src/core/renderer_mtl/mtl_lut_texture.cpp b/src/core/renderer_mtl/mtl_lut_texture.cpp index ac4ff6d9..8486a50c 100644 --- a/src/core/renderer_mtl/mtl_lut_texture.cpp +++ b/src/core/renderer_mtl/mtl_lut_texture.cpp @@ -1,32 +1,27 @@ #include "renderer_mtl/renderer_mtl.hpp" namespace Metal { + static constexpr u32 LAYER_COUNT = 1024; -constexpr u32 LAYER_COUNT = 1024; + LutTexture::LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name) { + MTL::TextureDescriptor* desc = MTL::TextureDescriptor::alloc()->init(); + desc->setTextureType(type); + desc->setPixelFormat(pixelFormat); + desc->setWidth(width); + desc->setHeight(height); + desc->setArrayLength(LAYER_COUNT); + desc->setUsage(MTL::TextureUsageShaderRead /* | MTL::TextureUsageShaderWrite*/); + desc->setStorageMode(MTL::StorageModeShared); -LutTexture::LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name) { - MTL::TextureDescriptor* desc = MTL::TextureDescriptor::alloc()->init(); - desc->setTextureType(type); - desc->setPixelFormat(pixelFormat); - desc->setWidth(width); - desc->setHeight(height); - desc->setArrayLength(LAYER_COUNT); - desc->setUsage(MTL::TextureUsageShaderRead/* | MTL::TextureUsageShaderWrite*/); - desc->setStorageMode(MTL::StorageModeShared); + texture = device->newTexture(desc); + texture->setLabel(toNSString(name)); + desc->release(); + } - texture = device->newTexture(desc); - texture->setLabel(toNSString(name)); - desc->release(); -} + LutTexture::~LutTexture() { texture->release(); } -LutTexture::~LutTexture() { - texture->release(); -} - -u32 LutTexture::getNextIndex() { - currentIndex = (currentIndex + 1) % LAYER_COUNT; - - return currentIndex; -} - -} // namespace Metal + u32 LutTexture::getNextIndex() { + currentIndex = (currentIndex + 1) % LAYER_COUNT; + return currentIndex; + } +} // namespace Metal diff --git a/src/core/renderer_mtl/mtl_texture.cpp b/src/core/renderer_mtl/mtl_texture.cpp index b61c5502..149fea26 100644 --- a/src/core/renderer_mtl/mtl_texture.cpp +++ b/src/core/renderer_mtl/mtl_texture.cpp @@ -1,312 +1,308 @@ #include "renderer_mtl/mtl_texture.hpp" -#include "renderer_mtl/objc_helper.hpp" -#include "colour.hpp" + #include +#include "colour.hpp" +#include "renderer_mtl/objc_helper.hpp" + + using namespace Helpers; namespace Metal { + void Texture::allocate() { + formatInfo = PICA::getPixelFormatInfo(format); -void Texture::allocate() { - formatInfo = PICA::getPixelFormatInfo(format); + MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init(); + descriptor->setTextureType(MTL::TextureType2D); + descriptor->setPixelFormat(formatInfo.pixelFormat); + descriptor->setWidth(size.u()); + descriptor->setHeight(size.v()); + descriptor->setUsage(MTL::TextureUsageShaderRead); + descriptor->setStorageMode(MTL::StorageModeShared); // TODO: use private + staging buffers? + texture = device->newTexture(descriptor); + texture->setLabel(toNSString( + "Texture " + std::string(PICA::textureFormatToString(format)) + " " + std::to_string(size.u()) + "x" + std::to_string(size.v()) + )); + descriptor->release(); - MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init(); - descriptor->setTextureType(MTL::TextureType2D); - descriptor->setPixelFormat(formatInfo.pixelFormat); - descriptor->setWidth(size.u()); - descriptor->setHeight(size.v()); - descriptor->setUsage(MTL::TextureUsageShaderRead); - descriptor->setStorageMode(MTL::StorageModeShared); // TODO: use private + staging buffers? - texture = device->newTexture(descriptor); - texture->setLabel(toNSString("Texture " + std::string(PICA::textureFormatToString(format)) + " " + std::to_string(size.u()) + "x" + std::to_string(size.v()))); - descriptor->release(); - - setNewConfig(config); -} - -// Set the texture's configuration, which includes min/mag filters, wrapping S/T modes, and so on -void Texture::setNewConfig(u32 cfg) { - config = cfg; - - if (sampler) { - sampler->release(); - } - - const auto magFilter = (cfg & 0x2) != 0 ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest; - const auto minFilter = (cfg & 0x4) != 0 ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest; - const auto wrapT = PICA::toMTLSamplerAddressMode(getBits<8, 3>(cfg)); - const auto wrapS = PICA::toMTLSamplerAddressMode(getBits<12, 3>(cfg)); - - MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init(); - samplerDescriptor->setMinFilter(minFilter); - samplerDescriptor->setMagFilter(magFilter); - samplerDescriptor->setSAddressMode(wrapS); - samplerDescriptor->setTAddressMode(wrapT); - - samplerDescriptor->setLabel(toNSString("Sampler")); - sampler = device->newSamplerState(samplerDescriptor); - samplerDescriptor->release(); -} - -void Texture::free() { - valid = false; - - if (texture) { - texture->release(); + setNewConfig(config); } - if (sampler) { - sampler->release(); - } -} -u64 Texture::sizeInBytes() { - u64 pixelCount = u64(size.x()) * u64(size.y()); + // Set the texture's configuration, which includes min/mag filters, wrapping S/T modes, and so on + void Texture::setNewConfig(u32 cfg) { + config = cfg; - switch (format) { - case PICA::TextureFmt::RGBA8: // 4 bytes per pixel - return pixelCount * 4; + if (sampler) { + sampler->release(); + } - case PICA::TextureFmt::RGB8: // 3 bytes per pixel - return pixelCount * 3; + const auto magFilter = (cfg & 0x2) != 0 ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest; + const auto minFilter = (cfg & 0x4) != 0 ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest; + const auto wrapT = PICA::toMTLSamplerAddressMode(getBits<8, 3>(cfg)); + const auto wrapS = PICA::toMTLSamplerAddressMode(getBits<12, 3>(cfg)); - case PICA::TextureFmt::RGBA5551: // 2 bytes per pixel - case PICA::TextureFmt::RGB565: - case PICA::TextureFmt::RGBA4: - case PICA::TextureFmt::RG8: - case PICA::TextureFmt::IA8: - return pixelCount * 2; + MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init(); + samplerDescriptor->setMinFilter(minFilter); + samplerDescriptor->setMagFilter(magFilter); + samplerDescriptor->setSAddressMode(wrapS); + samplerDescriptor->setTAddressMode(wrapT); - case PICA::TextureFmt::A8: // 1 byte per pixel - case PICA::TextureFmt::I8: - case PICA::TextureFmt::IA4: - return pixelCount; + samplerDescriptor->setLabel(toNSString("Sampler")); + sampler = device->newSamplerState(samplerDescriptor); + samplerDescriptor->release(); + } - case PICA::TextureFmt::I4: // 4 bits per pixel - case PICA::TextureFmt::A4: - return pixelCount / 2; + void Texture::free() { + valid = false; - case PICA::TextureFmt::ETC1: // Compressed formats - case PICA::TextureFmt::ETC1A4: { - // Number of 4x4 tiles - const u64 tileCount = pixelCount / 16; - // Tiles are 8 bytes each on ETC1 and 16 bytes each on ETC1A4 - const u64 tileSize = format == PICA::TextureFmt::ETC1 ? 8 : 16; - return tileCount * tileSize; - } + if (texture) { + texture->release(); + } + if (sampler) { + sampler->release(); + } + } - default: - Helpers::panic("[PICA] Attempted to get size of invalid texture type"); - } -} + u64 Texture::sizeInBytes() { + u64 pixelCount = u64(size.x()) * u64(size.y()); -// u and v are the UVs of the relevant texel -// Texture data is stored interleaved in Morton order, ie in a Z - order curve as shown here -// https://en.wikipedia.org/wiki/Z-order_curve -// Textures are split into 8x8 tiles.This function returns the in - tile offset depending on the u & v of the texel -// The in - tile offset is the sum of 2 offsets, one depending on the value of u % 8 and the other on the value of y % 8 -// As documented in this picture https ://en.wikipedia.org/wiki/File:Moser%E2%80%93de_Bruijn_addition.svg -u32 Texture::mortonInterleave(u32 u, u32 v) { - static constexpr u32 xOffsets[] = { 0, 1, 4, 5, 16, 17, 20, 21 }; - static constexpr u32 yOffsets[] = { 0, 2, 8, 10, 32, 34, 40, 42 }; + switch (format) { + case PICA::TextureFmt::RGBA8: // 4 bytes per pixel + return pixelCount * 4; - return xOffsets[u & 7] + yOffsets[v & 7]; -} + case PICA::TextureFmt::RGB8: // 3 bytes per pixel + return pixelCount * 3; -// Get the byte offset of texel (u, v) in the texture -u32 Texture::getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel) { - u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to - offset += mortonInterleave(u, v); // Add the in-tile offset of the texel + case PICA::TextureFmt::RGBA5551: // 2 bytes per pixel + case PICA::TextureFmt::RGB565: + case PICA::TextureFmt::RGBA4: + case PICA::TextureFmt::RG8: + case PICA::TextureFmt::IA8: return pixelCount * 2; - return offset * bytesPerPixel; -} + case PICA::TextureFmt::A8: // 1 byte per pixel + case PICA::TextureFmt::I8: + case PICA::TextureFmt::IA4: return pixelCount; -// Same as the above code except we need to divide by 2 because 4 bits is smaller than a byte -u32 Texture::getSwizzledOffset_4bpp(u32 u, u32 v, u32 width) { - u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to - offset += mortonInterleave(u, v); // Add the in-tile offset of the texel + case PICA::TextureFmt::I4: // 4 bits per pixel + case PICA::TextureFmt::A4: return pixelCount / 2; - return offset / 2; -} + case PICA::TextureFmt::ETC1: // Compressed formats + case PICA::TextureFmt::ETC1A4: { + // Number of 4x4 tiles + const u64 tileCount = pixelCount / 16; + // Tiles are 8 bytes each on ETC1 and 16 bytes each on ETC1A4 + const u64 tileSize = format == PICA::TextureFmt::ETC1 ? 8 : 16; + return tileCount * tileSize; + } -u8 Texture::decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { - switch (fmt) { - case PICA::TextureFmt::A4: { - const u32 offset = getSwizzledOffset_4bpp(u, v, size.u()); + default: Helpers::panic("[PICA] Attempted to get size of invalid texture type"); + } + } - // For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates - u8 alpha = data[offset] >> ((u % 2) ? 4 : 0); - alpha = Colour::convert4To8Bit(getBits<0, 4>(alpha)); + // u and v are the UVs of the relevant texel + // Texture data is stored interleaved in Morton order, ie in a Z - order curve as shown here + // https://en.wikipedia.org/wiki/Z-order_curve + // Textures are split into 8x8 tiles.This function returns the in - tile offset depending on the u & v of the texel + // The in - tile offset is the sum of 2 offsets, one depending on the value of u % 8 and the other on the value of y % 8 + // As documented in this picture https ://en.wikipedia.org/wiki/File:Moser%E2%80%93de_Bruijn_addition.svg + u32 Texture::mortonInterleave(u32 u, u32 v) { + static constexpr u32 xOffsets[] = {0, 1, 4, 5, 16, 17, 20, 21}; + static constexpr u32 yOffsets[] = {0, 2, 8, 10, 32, 34, 40, 42}; - // A8 - return alpha; - } + return xOffsets[u & 7] + yOffsets[v & 7]; + } - case PICA::TextureFmt::A8: { - u32 offset = getSwizzledOffset(u, v, size.u(), 1); - const u8 alpha = data[offset]; + // Get the byte offset of texel (u, v) in the texture + u32 Texture::getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel) { + u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to + offset += mortonInterleave(u, v); // Add the in-tile offset of the texel - // A8 - return alpha; - } + return offset * bytesPerPixel; + } - default: - Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); - } -} + // Same as the above code except we need to divide by 2 because 4 bits is smaller than a byte + u32 Texture::getSwizzledOffset_4bpp(u32 u, u32 v, u32 width) { + u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to + offset += mortonInterleave(u, v); // Add the in-tile offset of the texel -u16 Texture::decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { - switch (fmt) { - case PICA::TextureFmt::RG8: { - u32 offset = getSwizzledOffset(u, v, size.u(), 2); - constexpr u8 b = 0; - const u8 g = data[offset]; - const u8 r = data[offset + 1]; + return offset / 2; + } - // RG8 - return (g << 8) | r; - } + u8 Texture::decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { + switch (fmt) { + case PICA::TextureFmt::A4: { + const u32 offset = getSwizzledOffset_4bpp(u, v, size.u()); - case PICA::TextureFmt::RGBA4: { - u32 offset = getSwizzledOffset(u, v, size.u(), 2); - u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); + // For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates + u8 alpha = data[offset] >> ((u % 2) ? 4 : 0); + alpha = Colour::convert4To8Bit(getBits<0, 4>(alpha)); - u8 alpha = getBits<0, 4, u8>(texel); - u8 b = getBits<4, 4, u8>(texel); - u8 g = getBits<8, 4, u8>(texel); - u8 r = getBits<12, 4, u8>(texel); + // A8 + return alpha; + } - // ABGR4 - return (r << 12) | (g << 8) | (b << 4) | alpha; - } + case PICA::TextureFmt::A8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 1); + const u8 alpha = data[offset]; - case PICA::TextureFmt::RGBA5551: { - const u32 offset = getSwizzledOffset(u, v, size.u(), 2); - const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); + // A8 + return alpha; + } - u8 alpha = getBit<0>(texel) ? 0xff : 0; - u8 b = getBits<1, 5, u8>(texel); - u8 g = getBits<6, 5, u8>(texel); - u8 r = getBits<11, 5, u8>(texel); + default: Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); + } + } - // BGR5A1 - return (alpha << 15) | (r << 10) | (g << 5) | b; - } + u16 Texture::decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { + switch (fmt) { + case PICA::TextureFmt::RG8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 2); + constexpr u8 b = 0; + const u8 g = data[offset]; + const u8 r = data[offset + 1]; - case PICA::TextureFmt::RGB565: { - const u32 offset = getSwizzledOffset(u, v, size.u(), 2); - const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); + // RG8 + return (g << 8) | r; + } - const u8 b = getBits<0, 5, u8>(texel); - const u8 g = getBits<5, 6, u8>(texel); - const u8 r = getBits<11, 5, u8>(texel); + case PICA::TextureFmt::RGBA4: { + u32 offset = getSwizzledOffset(u, v, size.u(), 2); + u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); - // B5G6R5 - return (r << 11) | (g << 5) | b; - } + u8 alpha = getBits<0, 4, u8>(texel); + u8 b = getBits<4, 4, u8>(texel); + u8 g = getBits<8, 4, u8>(texel); + u8 r = getBits<12, 4, u8>(texel); - case PICA::TextureFmt::IA4: { - const u32 offset = getSwizzledOffset(u, v, size.u(), 1); - const u8 texel = data[offset]; - const u8 alpha = texel & 0xf; - const u8 intensity = texel >> 4; + // ABGR4 + return (r << 12) | (g << 8) | (b << 4) | alpha; + } - // ABGR4 - return (intensity << 12) | (intensity << 8) | (intensity << 4) | alpha; - } + case PICA::TextureFmt::RGBA5551: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 2); + const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); - case PICA::TextureFmt::I4: { - u32 offset = getSwizzledOffset_4bpp(u, v, size.u()); + u8 alpha = getBit<0>(texel) ? 0xff : 0; + u8 b = getBits<1, 5, u8>(texel); + u8 g = getBits<6, 5, u8>(texel); + u8 r = getBits<11, 5, u8>(texel); - // For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates - u8 intensity = data[offset] >> ((u % 2) ? 4 : 0); - intensity = getBits<0, 4>(intensity); + // BGR5A1 + return (alpha << 15) | (r << 10) | (g << 5) | b; + } - // ABGR4 - return (intensity << 12) | (intensity << 8) | (intensity << 4) | 0xff; - } + case PICA::TextureFmt::RGB565: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 2); + const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); - default: - Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); - } -} + const u8 b = getBits<0, 5, u8>(texel); + const u8 g = getBits<5, 6, u8>(texel); + const u8 r = getBits<11, 5, u8>(texel); -u32 Texture::decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { - switch (fmt) { - case PICA::TextureFmt::RGB8: { - const u32 offset = getSwizzledOffset(u, v, size.u(), 3); - const u8 b = data[offset]; - const u8 g = data[offset + 1]; - const u8 r = data[offset + 2]; + // B5G6R5 + return (r << 11) | (g << 5) | b; + } - // RGBA8 - return (0xff << 24) | (b << 16) | (g << 8) | r; - } + case PICA::TextureFmt::IA4: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 1); + const u8 texel = data[offset]; + const u8 alpha = texel & 0xf; + const u8 intensity = texel >> 4; - case PICA::TextureFmt::RGBA8: { - const u32 offset = getSwizzledOffset(u, v, size.u(), 4); - const u8 alpha = data[offset]; - const u8 b = data[offset + 1]; - const u8 g = data[offset + 2]; - const u8 r = data[offset + 3]; + // ABGR4 + return (intensity << 12) | (intensity << 8) | (intensity << 4) | alpha; + } - // RGBA8 - return (alpha << 24) | (b << 16) | (g << 8) | r; - } + case PICA::TextureFmt::I4: { + u32 offset = getSwizzledOffset_4bpp(u, v, size.u()); - case PICA::TextureFmt::I8: { - u32 offset = getSwizzledOffset(u, v, size.u(), 1); - const u8 intensity = data[offset]; + // For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates + u8 intensity = data[offset] >> ((u % 2) ? 4 : 0); + intensity = getBits<0, 4>(intensity); - // RGBA8 - return (0xff << 24) | (intensity << 16) | (intensity << 8) | intensity; - } + // ABGR4 + return (intensity << 12) | (intensity << 8) | (intensity << 4) | 0xff; + } - case PICA::TextureFmt::IA8: { - u32 offset = getSwizzledOffset(u, v, size.u(), 2); + default: Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); + } + } - // Same as I8 except each pixel gets its own alpha value too - const u8 alpha = data[offset]; - const u8 intensity = data[offset + 1]; + u32 Texture::decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { + switch (fmt) { + case PICA::TextureFmt::RGB8: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 3); + const u8 b = data[offset]; + const u8 g = data[offset + 1]; + const u8 r = data[offset + 2]; - // RGBA8 - return (alpha << 24) | (intensity << 16) | (intensity << 8) | intensity; - } + // RGBA8 + return (0xff << 24) | (b << 16) | (g << 8) | r; + } - case PICA::TextureFmt::ETC1: return getTexelETC(false, u, v, size.u(), data); - case PICA::TextureFmt::ETC1A4: return getTexelETC(true, u, v, size.u(), data); + case PICA::TextureFmt::RGBA8: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 4); + const u8 alpha = data[offset]; + const u8 b = data[offset + 1]; + const u8 g = data[offset + 2]; + const u8 r = data[offset + 3]; - default: - Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); - } -} + // RGBA8 + return (alpha << 24) | (b << 16) | (g << 8) | r; + } -void Texture::decodeTexture(std::span data) { - std::vector decoded; - decoded.reserve(u64(size.u()) * u64(size.v()) * formatInfo.bytesPerTexel); + case PICA::TextureFmt::I8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 1); + const u8 intensity = data[offset]; - // Decode texels line by line - for (u32 v = 0; v < size.v(); v++) { - for (u32 u = 0; u < size.u(); u++) { - if (formatInfo.bytesPerTexel == 1) { - u8 texel = decodeTexelU8(u, v, format, data); - decoded.push_back(texel); - } else if (formatInfo.bytesPerTexel == 2) { - u16 texel = decodeTexelU16(u, v, format, data); - decoded.push_back((texel & 0x00ff) >> 0); - decoded.push_back((texel & 0xff00) >> 8); - } else if (formatInfo.bytesPerTexel == 4) { - u32 texel = decodeTexelU32(u, v, format, data); - decoded.push_back((texel & 0x000000ff) >> 0); - decoded.push_back((texel & 0x0000ff00) >> 8); - decoded.push_back((texel & 0x00ff0000) >> 16); - decoded.push_back((texel & 0xff000000) >> 24); - } else { - Helpers::panic("[Texture::decodeTexture] Unimplemented bytesPerTexel (%u)", formatInfo.bytesPerTexel); - } - } - } + // RGBA8 + return (0xff << 24) | (intensity << 16) | (intensity << 8) | intensity; + } - texture->replaceRegion(MTL::Region(0, 0, size.u(), size.v()), 0, 0, decoded.data(), formatInfo.bytesPerTexel * size.u(), 0); -} + case PICA::TextureFmt::IA8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 2); -} // namespace Metal + // Same as I8 except each pixel gets its own alpha value too + const u8 alpha = data[offset]; + const u8 intensity = data[offset + 1]; + + // RGBA8 + return (alpha << 24) | (intensity << 16) | (intensity << 8) | intensity; + } + + case PICA::TextureFmt::ETC1: return getTexelETC(false, u, v, size.u(), data); + case PICA::TextureFmt::ETC1A4: return getTexelETC(true, u, v, size.u(), data); + + default: Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); + } + } + + void Texture::decodeTexture(std::span data) { + std::vector decoded; + decoded.reserve(u64(size.u()) * u64(size.v()) * formatInfo.bytesPerTexel); + + // Decode texels line by line + for (u32 v = 0; v < size.v(); v++) { + for (u32 u = 0; u < size.u(); u++) { + if (formatInfo.bytesPerTexel == 1) { + u8 texel = decodeTexelU8(u, v, format, data); + decoded.push_back(texel); + } else if (formatInfo.bytesPerTexel == 2) { + u16 texel = decodeTexelU16(u, v, format, data); + decoded.push_back((texel & 0x00ff) >> 0); + decoded.push_back((texel & 0xff00) >> 8); + } else if (formatInfo.bytesPerTexel == 4) { + u32 texel = decodeTexelU32(u, v, format, data); + decoded.push_back((texel & 0x000000ff) >> 0); + decoded.push_back((texel & 0x0000ff00) >> 8); + decoded.push_back((texel & 0x00ff0000) >> 16); + decoded.push_back((texel & 0xff000000) >> 24); + } else { + Helpers::panic("[Texture::decodeTexture] Unimplemented bytesPerTexel (%u)", formatInfo.bytesPerTexel); + } + } + } + + texture->replaceRegion(MTL::Region(0, 0, size.u(), size.v()), 0, 0, decoded.data(), formatInfo.bytesPerTexel * size.u(), 0); + } +} // namespace Metal diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp index 8401eecb..a0c1888a 100644 --- a/src/core/renderer_mtl/renderer_mtl.cpp +++ b/src/core/renderer_mtl/renderer_mtl.cpp @@ -2,9 +2,10 @@ #include #include + #include "renderer_mtl/mtl_lut_texture.hpp" -// HACK +// Hack: Apple annoyingly defines a global "NO" macro which ends up conflicting with our own code... #undef NO #include "PICA/gpu.hpp" @@ -14,8 +15,10 @@ using namespace PICA; CMRC_DECLARE(RendererMTL); -const u16 LIGHTING_LUT_TEXTURE_WIDTH = 256; -const u32 FOG_LUT_TEXTURE_WIDTH = 128; +static constexpr u16 LIGHTING_LUT_TEXTURE_WIDTH = 256; +static constexpr u32 FOG_LUT_TEXTURE_WIDTH = 128; +// Bind the vertex buffer to binding 30 so that it doesn't occupy the lower indices +static constexpr uint VERTEX_BUFFER_BINDING_INDEX = 30; // HACK: redefinition... PICA::ColorFmt ToColorFormat(u32 format) { @@ -40,6 +43,7 @@ MTL::Library* loadLibrary(MTL::Device* device, const cmrc::file& shaderSource) { RendererMTL::RendererMTL(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs) : Renderer(gpu, internalRegs, externalRegs) {} + RendererMTL::~RendererMTL() {} void RendererMTL::reset() { @@ -78,7 +82,7 @@ void RendererMTL::display() { clearColor(nullptr, bottomScreen->get().texture); } - // -------- Draw -------- + // Draw commandBuffer->pushDebugGroup(toNSString("Display")); MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); @@ -130,8 +134,6 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { metalLayer->setDevice(device); commandQueue = device->newCommandQueue(); - // -------- Objects -------- - // Textures MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init(); textureDescriptor->setTextureType(MTL::TextureType2D); @@ -157,7 +159,9 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { samplerDescriptor->release(); - lutLightingTexture = new Metal::LutTexture(device, MTL::TextureType2DArray, MTL::PixelFormatR16Unorm, LIGHTING_LUT_TEXTURE_WIDTH, Lights::LUT_Count, "Lighting LUT texture"); + lutLightingTexture = new Metal::LutTexture( + device, MTL::TextureType2DArray, MTL::PixelFormatR16Unorm, LIGHTING_LUT_TEXTURE_WIDTH, Lights::LUT_Count, "Lighting LUT texture" + ); lutFogTexture = new Metal::LutTexture(device, MTL::TextureType1DArray, MTL::PixelFormatRG32Float, FOG_LUT_TEXTURE_WIDTH, 1, "Fog LUT texture"); // -------- Pipelines -------- @@ -166,7 +170,7 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { auto mtlResources = cmrc::RendererMTL::get_filesystem(); library = loadLibrary(device, mtlResources.open("metal_shaders.metallib")); MTL::Library* blitLibrary = loadLibrary(device, mtlResources.open("metal_blit.metallib")); - //MTL::Library* copyToLutTextureLibrary = loadLibrary(device, mtlResources.open("metal_copy_to_lut_texture.metallib")); + // MTL::Library* copyToLutTextureLibrary = loadLibrary(device, mtlResources.open("metal_copy_to_lut_texture.metallib")); // Display MTL::Function* vertexDisplayFunction = library->newFunction(NS::String::string("vertexDisplay", NS::ASCIIStringEncoding)); @@ -295,9 +299,8 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { defaultDepthStencilState = device->newDepthStencilState(depthStencilDescriptor); depthStencilDescriptor->release(); - // Release blitLibrary->release(); - //copyToLutTextureLibrary->release(); + // copyToLutTextureLibrary->release(); } void RendererMTL::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) { @@ -592,8 +595,7 @@ void RendererMTL::deinitGraphicsContext() { delete lutLightingTexture; delete lutFogTexture; - // Release - //copyToLutTexturePipeline->release(); + // copyToLutTexturePipeline->release(); displayPipeline->release(); defaultDepthStencilState->release(); nullTexture->release(); @@ -700,9 +702,9 @@ void RendererMTL::bindTexturesToSlots() { for (int i = 0; i < 3; i++) { if ((regs[PICA::InternalRegs::TexUnitCfg] & (1 << i)) == 0) { - commandEncoder.setFragmentTexture(nullTexture, i); - commandEncoder.setFragmentSamplerState(nearestSampler, i); - continue; + commandEncoder.setFragmentTexture(nullTexture, i); + commandEncoder.setFragmentSamplerState(nearestSampler, i); + continue; } const size_t ioBase = ioBases[i]; @@ -736,7 +738,9 @@ void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) { } u32 index = lutLightingTexture->getNextIndex(); - lutLightingTexture->getTexture()->replaceRegion(MTL::Region(0, 0, LIGHTING_LUT_TEXTURE_WIDTH, Lights::LUT_Count), 0, index, lightingLut.data(), LIGHTING_LUT_TEXTURE_WIDTH * 2, 0); + lutLightingTexture->getTexture()->replaceRegion( + MTL::Region(0, 0, LIGHTING_LUT_TEXTURE_WIDTH, Lights::LUT_Count), 0, index, lightingLut.data(), LIGHTING_LUT_TEXTURE_WIDTH * 2, 0 + ); /* endRenderPass(); @@ -768,7 +772,7 @@ void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) { void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) { gpu.fogLUTDirty = false; - std::array fogLut = {0.0f}; + std::array fogLut = {0.0f}; for (int i = 0; i < fogLut.size(); i += 2) { const uint32_t value = gpu.fogLUT[i >> 1]; @@ -807,7 +811,8 @@ void RendererMTL::textureCopyImpl( ) { nextRenderPassName = "Texture copy"; MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); - // TODO: clearColor sets the load action to load if it didn't find any clear, but that is unnecessary if we are doing a copy to the whole texture + // TODO: clearColor sets the load action to load if it didn't find any clear, but that is unnecessary if we are doing a copy to the whole + // texture bool doesClear = clearColor(renderPassDescriptor, destFramebuffer.texture); beginRenderPassIfNeeded(renderPassDescriptor, doesClear, destFramebuffer.texture); @@ -819,11 +824,13 @@ void RendererMTL::textureCopyImpl( // Viewport renderCommandEncoder->setViewport(MTL::Viewport{ - double(destRect.left), double(destRect.bottom), double(destRect.right - destRect.left), double(destRect.top - destRect.bottom), 0.0, 1.0 - }); + double(destRect.left), double(destRect.bottom), double(destRect.right - destRect.left), double(destRect.top - destRect.bottom), 0.0, 1.0}); + float srcRectNDC[4] = { - srcRect.left / (float)srcFramebuffer.size.u(), srcRect.bottom / (float)srcFramebuffer.size.v(), - (srcRect.right - srcRect.left) / (float)srcFramebuffer.size.u(), (srcRect.top - srcRect.bottom) / (float)srcFramebuffer.size.v() + srcRect.left / (float)srcFramebuffer.size.u(), + srcRect.bottom / (float)srcFramebuffer.size.v(), + (srcRect.right - srcRect.left) / (float)srcFramebuffer.size.u(), + (srcRect.top - srcRect.bottom) / (float)srcFramebuffer.size.v(), }; // Bind resources @@ -834,25 +841,28 @@ void RendererMTL::textureCopyImpl( renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); } -void RendererMTL::beginRenderPassIfNeeded(MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture) { +void RendererMTL::beginRenderPassIfNeeded( + MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture +) { createCommandBufferIfNeeded(); - if (doesClears || !renderCommandEncoder || colorTexture != lastColorTexture || (depthTexture != lastDepthTexture && !(lastDepthTexture && !depthTexture))) { - endRenderPass(); + if (doesClears || !renderCommandEncoder || colorTexture != lastColorTexture || + (depthTexture != lastDepthTexture && !(lastDepthTexture && !depthTexture))) { + endRenderPass(); - renderCommandEncoder = commandBuffer->renderCommandEncoder(renderPassDescriptor); - renderCommandEncoder->setLabel(toNSString(nextRenderPassName)); - commandEncoder.newRenderCommandEncoder(renderCommandEncoder); + renderCommandEncoder = commandBuffer->renderCommandEncoder(renderPassDescriptor); + renderCommandEncoder->setLabel(toNSString(nextRenderPassName)); + commandEncoder.newRenderCommandEncoder(renderCommandEncoder); - // Bind persistent resources + // Bind persistent resources - // LUT texture - renderCommandEncoder->setFragmentTexture(lutLightingTexture->getTexture(), 3); - renderCommandEncoder->setFragmentTexture(lutFogTexture->getTexture(), 4); - renderCommandEncoder->setFragmentSamplerState(linearSampler, 3); + // LUT texture + renderCommandEncoder->setFragmentTexture(lutLightingTexture->getTexture(), 3); + renderCommandEncoder->setFragmentTexture(lutFogTexture->getTexture(), 4); + renderCommandEncoder->setFragmentSamplerState(linearSampler, 3); - lastColorTexture = colorTexture; - lastDepthTexture = depthTexture; + lastColorTexture = colorTexture; + lastDepthTexture = depthTexture; } renderPassDescriptor->release();