First Metal cleanup & formatting pass

This commit is contained in:
wheremyfoodat 2024-11-09 13:09:12 +02:00
parent 4cc62d4870
commit 49b65242b9
17 changed files with 1084 additions and 1115 deletions

View file

@ -19,8 +19,6 @@ template <typename SurfaceType, size_t capacity, bool evictOnOverflow = false>
class SurfaceCache { class SurfaceCache {
// Vanilla std::optional can't hold actual references // Vanilla std::optional can't hold actual references
using OptionalRef = std::optional<std::reference_wrapper<SurfaceType>>; using OptionalRef = std::optional<std::reference_wrapper<SurfaceType>>;
//static_assert(std::is_same<SurfaceType, ColourBuffer>() || std::is_same<SurfaceType, DepthBuffer>() ||
// std::is_same<SurfaceType, Texture>(), "Invalid surface type");
size_t size; size_t size;
size_t evictionIndex; size_t evictionIndex;

View file

@ -7,69 +7,67 @@
using namespace PICA; using namespace PICA;
namespace Metal { namespace Metal {
struct BlitPipelineHash {
// Formats
ColorFmt colorFmt;
DepthFmt depthFmt;
};
struct BlitPipelineHash { // This pipeline only caches the pipeline with all of its color and depth attachment variations
// Formats class BlitPipelineCache {
ColorFmt colorFmt; public:
DepthFmt depthFmt; BlitPipelineCache() = default;
};
// This pipeline only caches the pipeline with all of its color and depth attachment variations ~BlitPipelineCache() {
class BlitPipelineCache { reset();
public: vertexFunction->release();
BlitPipelineCache() = default; fragmentFunction->release();
}
~BlitPipelineCache() { void set(MTL::Device* dev, MTL::Function* vert, MTL::Function* frag) {
reset(); device = dev;
vertexFunction->release(); vertexFunction = vert;
fragmentFunction->release(); fragmentFunction = frag;
} }
void set(MTL::Device* dev, MTL::Function* vert, MTL::Function* frag) { MTL::RenderPipelineState* get(BlitPipelineHash hash) {
device = dev; u8 intHash = ((u8)hash.colorFmt << 3) | (u8)hash.depthFmt;
vertexFunction = vert; auto& pipeline = pipelineCache[intHash];
fragmentFunction = frag; if (!pipeline) {
} MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init();
desc->setVertexFunction(vertexFunction);
desc->setFragmentFunction(fragmentFunction);
MTL::RenderPipelineState* get(BlitPipelineHash hash) { auto colorAttachment = desc->colorAttachments()->object(0);
u8 intHash = ((u8)hash.colorFmt << 3) | (u8)hash.depthFmt; colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt));
auto& pipeline = pipelineCache[intHash];
if (!pipeline) {
MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init();
desc->setVertexFunction(vertexFunction);
desc->setFragmentFunction(fragmentFunction);
auto colorAttachment = desc->colorAttachments()->object(0); desc->setDepthAttachmentPixelFormat(toMTLPixelFormatDepth(hash.depthFmt));
colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt));
desc->setDepthAttachmentPixelFormat(toMTLPixelFormatDepth(hash.depthFmt)); NS::Error* error = nullptr;
desc->setLabel(toNSString("Blit pipeline"));
pipeline = device->newRenderPipelineState(desc, &error);
if (error) {
Helpers::panic("Error creating blit pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding));
}
NS::Error* error = nullptr; desc->release();
desc->setLabel(toNSString("Blit pipeline")); }
pipeline = device->newRenderPipelineState(desc, &error);
if (error) {
Helpers::panic("Error creating blit pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding));
}
desc->release(); return pipeline;
} }
return pipeline; void reset() {
} for (auto& pair : pipelineCache) {
pair.second->release();
}
pipelineCache.clear();
}
void reset() { private:
for (auto& pair : pipelineCache) { std::map<u8, MTL::RenderPipelineState*> pipelineCache;
pair.second->release();
}
pipelineCache.clear();
}
private: MTL::Device* device;
std::map<u8, MTL::RenderPipelineState*> pipelineCache; MTL::Function* vertexFunction;
MTL::Function* fragmentFunction;
MTL::Device* device; };
MTL::Function* vertexFunction; } // namespace Metal
MTL::Function* fragmentFunction;
};
} // namespace Metal

View file

@ -3,56 +3,54 @@
#include <Metal/Metal.hpp> #include <Metal/Metal.hpp>
namespace Metal { namespace Metal {
struct RenderState {
MTL::RenderPipelineState* renderPipelineState = nullptr;
MTL::DepthStencilState* depthStencilState = nullptr;
MTL::Texture* textures[3] = {nullptr};
MTL::SamplerState* samplerStates[3] = {nullptr};
};
struct RenderState { class CommandEncoder {
MTL::RenderPipelineState* renderPipelineState = nullptr; public:
MTL::DepthStencilState* depthStencilState = nullptr; void newRenderCommandEncoder(MTL::RenderCommandEncoder* rce) {
MTL::Texture* textures[3] = {nullptr}; renderCommandEncoder = rce;
MTL::SamplerState* samplerStates[3] = {nullptr};
};
class CommandEncoder { // Reset the render state
public: renderState = RenderState{};
void newRenderCommandEncoder(MTL::RenderCommandEncoder* rce) { }
renderCommandEncoder = rce;
// Reset the render state // Resource binding
renderState = RenderState{}; void setRenderPipelineState(MTL::RenderPipelineState* renderPipelineState) {
} if (renderPipelineState != renderState.renderPipelineState) {
renderCommandEncoder->setRenderPipelineState(renderPipelineState);
renderState.renderPipelineState = renderPipelineState;
}
}
// Resource binding void setDepthStencilState(MTL::DepthStencilState* depthStencilState) {
void setRenderPipelineState(MTL::RenderPipelineState* renderPipelineState) { if (depthStencilState != renderState.depthStencilState) {
if (renderPipelineState != renderState.renderPipelineState) { renderCommandEncoder->setDepthStencilState(depthStencilState);
renderCommandEncoder->setRenderPipelineState(renderPipelineState); renderState.depthStencilState = depthStencilState;
renderState.renderPipelineState = renderPipelineState; }
} }
}
void setDepthStencilState(MTL::DepthStencilState* depthStencilState) { void setFragmentTexture(MTL::Texture* texture, u32 index) {
if (depthStencilState != renderState.depthStencilState) { if (texture != renderState.textures[index]) {
renderCommandEncoder->setDepthStencilState(depthStencilState); renderCommandEncoder->setFragmentTexture(texture, index);
renderState.depthStencilState = depthStencilState; renderState.textures[index] = texture;
} }
} }
void setFragmentTexture(MTL::Texture* texture, u32 index) { void setFragmentSamplerState(MTL::SamplerState* samplerState, u32 index) {
if (texture != renderState.textures[index]) { if (samplerState != renderState.samplerStates[index]) {
renderCommandEncoder->setFragmentTexture(texture, index); renderCommandEncoder->setFragmentSamplerState(samplerState, index);
renderState.textures[index] = texture; renderState.samplerStates[index] = samplerState;
} }
} }
void setFragmentSamplerState(MTL::SamplerState* samplerState, u32 index) { private:
if (samplerState != renderState.samplerStates[index]) { MTL::RenderCommandEncoder* renderCommandEncoder = nullptr;
renderCommandEncoder->setFragmentSamplerState(samplerState, index);
renderState.samplerStates[index] = samplerState;
}
}
private: RenderState renderState;
MTL::RenderCommandEncoder* renderCommandEncoder = nullptr; };
} // namespace Metal
RenderState renderState;
};
} // namespace Metal

View file

@ -7,80 +7,74 @@
using namespace PICA; using namespace PICA;
namespace Metal { namespace Metal {
struct DepthStencilHash {
u32 stencilConfig;
u16 stencilOpConfig;
bool depthStencilWrite;
u8 depthFunc;
};
struct DepthStencilHash { class DepthStencilCache {
bool depthStencilWrite; public:
u8 depthFunc; DepthStencilCache() = default;
u32 stencilConfig;
u16 stencilOpConfig;
};
class DepthStencilCache { ~DepthStencilCache() { reset(); }
public:
DepthStencilCache() = default;
~DepthStencilCache() { void set(MTL::Device* dev) { device = dev; }
reset();
}
void set(MTL::Device* dev) { MTL::DepthStencilState* get(DepthStencilHash hash) {
device = dev; u64 intHash =
} ((u64)hash.depthStencilWrite << 56) | ((u64)hash.depthFunc << 48) | ((u64)hash.stencilConfig << 16) | (u64)hash.stencilOpConfig;
auto& depthStencilState = depthStencilCache[intHash];
if (!depthStencilState) {
MTL::DepthStencilDescriptor* desc = MTL::DepthStencilDescriptor::alloc()->init();
desc->setDepthWriteEnabled(hash.depthStencilWrite);
desc->setDepthCompareFunction(toMTLCompareFunc(hash.depthFunc));
MTL::DepthStencilState* get(DepthStencilHash hash) { const bool stencilEnable = Helpers::getBit<0>(hash.stencilConfig);
u64 intHash = ((u64)hash.depthStencilWrite << 56) | ((u64)hash.depthFunc << 48) | ((u64)hash.stencilConfig << 16) | (u64)hash.stencilOpConfig; MTL::StencilDescriptor* stencilDesc = nullptr;
auto& depthStencilState = depthStencilCache[intHash]; if (stencilEnable) {
if (!depthStencilState) { const u8 stencilFunc = Helpers::getBits<4, 3>(hash.stencilConfig);
MTL::DepthStencilDescriptor* desc = MTL::DepthStencilDescriptor::alloc()->init(); const u8 stencilRefMask = Helpers::getBits<24, 8>(hash.stencilConfig);
desc->setDepthWriteEnabled(hash.depthStencilWrite);
desc->setDepthCompareFunction(toMTLCompareFunc(hash.depthFunc));
const bool stencilEnable = Helpers::getBit<0>(hash.stencilConfig); const u32 stencilBufferMask = hash.depthStencilWrite ? Helpers::getBits<8, 8>(hash.stencilConfig) : 0;
MTL::StencilDescriptor* stencilDesc = nullptr;
if (stencilEnable) {
const u8 stencilFunc = Helpers::getBits<4, 3>(hash.stencilConfig);
const u8 stencilRefMask = Helpers::getBits<24, 8>(hash.stencilConfig);
const u32 stencilBufferMask = hash.depthStencilWrite ? Helpers::getBits<8, 8>(hash.stencilConfig) : 0; const u8 stencilFailOp = Helpers::getBits<0, 3>(hash.stencilOpConfig);
const u8 depthFailOp = Helpers::getBits<4, 3>(hash.stencilOpConfig);
const u8 passOp = Helpers::getBits<8, 3>(hash.stencilOpConfig);
const u8 stencilFailOp = Helpers::getBits<0, 3>(hash.stencilOpConfig); stencilDesc = MTL::StencilDescriptor::alloc()->init();
const u8 depthFailOp = Helpers::getBits<4, 3>(hash.stencilOpConfig); stencilDesc->setStencilFailureOperation(toMTLStencilOperation(stencilFailOp));
const u8 passOp = Helpers::getBits<8, 3>(hash.stencilOpConfig); stencilDesc->setDepthFailureOperation(toMTLStencilOperation(depthFailOp));
stencilDesc->setDepthStencilPassOperation(toMTLStencilOperation(passOp));
stencilDesc->setStencilCompareFunction(toMTLCompareFunc(stencilFunc));
stencilDesc->setReadMask(stencilRefMask);
stencilDesc->setWriteMask(stencilBufferMask);
stencilDesc = MTL::StencilDescriptor::alloc()->init(); desc->setFrontFaceStencil(stencilDesc);
stencilDesc->setStencilFailureOperation(toMTLStencilOperation(stencilFailOp)); desc->setBackFaceStencil(stencilDesc);
stencilDesc->setDepthFailureOperation(toMTLStencilOperation(depthFailOp)); }
stencilDesc->setDepthStencilPassOperation(toMTLStencilOperation(passOp));
stencilDesc->setStencilCompareFunction(toMTLCompareFunc(stencilFunc));
stencilDesc->setReadMask(stencilRefMask);
stencilDesc->setWriteMask(stencilBufferMask);
desc->setFrontFaceStencil(stencilDesc); depthStencilState = device->newDepthStencilState(desc);
desc->setBackFaceStencil(stencilDesc);
}
depthStencilState = device->newDepthStencilState(desc); desc->release();
if (stencilDesc) {
stencilDesc->release();
}
}
desc->release(); return depthStencilState;
if (stencilDesc) { }
stencilDesc->release();
}
}
return depthStencilState; void reset() {
} for (auto& pair : depthStencilCache) {
pair.second->release();
}
depthStencilCache.clear();
}
void reset() { private:
for (auto& pair : depthStencilCache) { std::map<u64, MTL::DepthStencilState*> depthStencilCache;
pair.second->release(); MTL::Device* device;
} };
depthStencilCache.clear(); } // namespace Metal
}
private:
std::map<u64, MTL::DepthStencilState*> depthStencilCache;
MTL::Device* device;
};
} // namespace Metal

View file

@ -7,171 +7,155 @@
using namespace PICA; using namespace PICA;
namespace Metal { namespace Metal {
struct DrawFragmentFunctionHash {
bool lightingEnabled; // 1 bit
u8 lightingNumLights; // 3 bits
u32 lightingConfig1; // 32 bits (TODO: check this)
// | ref | func | on |
u16 alphaControl; // 12 bits (mask: 11111111 0111 0001)
};
struct DrawFragmentFunctionHash { inline bool operator<(const DrawFragmentFunctionHash& l, const DrawFragmentFunctionHash& r) {
bool lightingEnabled; // 1 bit if (!l.lightingEnabled && r.lightingEnabled) return true;
u8 lightingNumLights; // 3 bits if (l.lightingNumLights < r.lightingNumLights) return true;
u32 lightingConfig1; // 32 bits (TODO: check this) if (l.lightingConfig1 < r.lightingConfig1) return true;
// | ref | func | on | if (l.alphaControl < r.alphaControl) return true;
u16 alphaControl; // 12 bits (mask: 11111111 0111 0001)
};
//bool operator==(const DrawFragmentFunctionHash& l, const DrawFragmentFunctionHash& r) { return false;
// return ((l.lightingEnabled == r.lightingEnabled) && (l.lightingNumLights == r.lightingNumLights) && }
// (l.lightingConfig1 == r.lightingConfig1) && (l.alphaControl == r.alphaControl));
//}
inline bool operator<(const DrawFragmentFunctionHash& l, const DrawFragmentFunctionHash& r) { struct DrawPipelineHash { // 56 bits
if (!l.lightingEnabled && r.lightingEnabled) return true; // Formats
if (l.lightingNumLights < r.lightingNumLights) return true; ColorFmt colorFmt; // 3 bits
if (l.lightingConfig1 < r.lightingConfig1) return true; DepthFmt depthFmt; // 3 bits
if (l.alphaControl < r.alphaControl) return true;
return false; // Blending
} bool blendEnabled; // 1 bit
// | functions | aeq | ceq |
u32 blendControl; // 22 bits (mask: 1111111111111111 00000111 00000111)
u8 colorWriteMask; // 4 bits
struct DrawPipelineHash { // 56 bits DrawFragmentFunctionHash fragHash;
// Formats };
ColorFmt colorFmt; // 3 bits
DepthFmt depthFmt; // 3 bits
// Blending inline bool operator<(const DrawPipelineHash& l, const DrawPipelineHash& r) {
bool blendEnabled; // 1 bit if ((u32)l.colorFmt < (u32)r.colorFmt) return true;
// | functions | aeq | ceq | if ((u32)l.depthFmt < (u32)r.depthFmt) return true;
u32 blendControl; // 22 bits (mask: 1111111111111111 00000111 00000111) if (!l.blendEnabled && r.blendEnabled) return true;
u8 colorWriteMask; // 4 bits if (l.blendControl < r.blendControl) return true;
if (l.colorWriteMask < r.colorWriteMask) return true;
if (l.fragHash < r.fragHash) return true;
DrawFragmentFunctionHash fragHash; return false;
}; }
//bool operator==(const DrawPipelineHash& l, const DrawPipelineHash& r) { // This pipeline only caches the pipeline with all of its color and depth attachment variations
// return (((u32)l.colorFmt == (u32)r.colorFmt) && ((u32)l.depthFmt == (u32)r.depthFmt) && class DrawPipelineCache {
// (l.blendEnabled == r.blendEnabled) && (l.blendControl == r.blendControl) && public:
// (l.colorWriteMask == r.colorWriteMask) && (l.fragHash == r.fragHash)); DrawPipelineCache() = default;
//}
inline bool operator<(const DrawPipelineHash& l, const DrawPipelineHash& r) { ~DrawPipelineCache() {
if ((u32)l.colorFmt < (u32)r.colorFmt) return true; reset();
if ((u32)l.depthFmt < (u32)r.depthFmt) return true; vertexDescriptor->release();
if (!l.blendEnabled && r.blendEnabled) return true; vertexFunction->release();
if (l.blendControl < r.blendControl) return true; }
if (l.colorWriteMask < r.colorWriteMask) return true;
if (l.fragHash < r.fragHash) return true;
return false; void set(MTL::Device* dev, MTL::Library* lib, MTL::Function* vert, MTL::VertexDescriptor* vertDesc) {
} device = dev;
library = lib;
vertexFunction = vert;
vertexDescriptor = vertDesc;
}
// Bind the vertex buffer to binding 30 so that it doesn't occupy the lower indices MTL::RenderPipelineState* get(DrawPipelineHash hash) {
#define VERTEX_BUFFER_BINDING_INDEX 30 auto& pipeline = pipelineCache[hash];
// This pipeline only caches the pipeline with all of its color and depth attachment variations if (!pipeline) {
class DrawPipelineCache { auto& fragmentFunction = fragmentFunctionCache[hash.fragHash];
public: if (!fragmentFunction) {
DrawPipelineCache() = default; MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init();
constants->setConstantValue(&hash.fragHash.lightingEnabled, MTL::DataTypeBool, NS::UInteger(0));
constants->setConstantValue(&hash.fragHash.lightingNumLights, MTL::DataTypeUChar, NS::UInteger(1));
constants->setConstantValue(&hash.fragHash.lightingConfig1, MTL::DataTypeUInt, NS::UInteger(2));
constants->setConstantValue(&hash.fragHash.alphaControl, MTL::DataTypeUShort, NS::UInteger(3));
~DrawPipelineCache() { NS::Error* error = nullptr;
reset(); fragmentFunction = library->newFunction(NS::String::string("fragmentDraw", NS::ASCIIStringEncoding), constants, &error);
vertexDescriptor->release(); if (error) {
vertexFunction->release(); Helpers::panic("Error creating draw fragment function: %s", error->description()->cString(NS::ASCIIStringEncoding));
} }
constants->release();
}
void set(MTL::Device* dev, MTL::Library* lib, MTL::Function* vert, MTL::VertexDescriptor* vertDesc) { MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init();
device = dev; desc->setVertexFunction(vertexFunction);
library = lib; desc->setFragmentFunction(fragmentFunction);
vertexFunction = vert; desc->setVertexDescriptor(vertexDescriptor);
vertexDescriptor = vertDesc;
}
MTL::RenderPipelineState* get(DrawPipelineHash hash) { auto colorAttachment = desc->colorAttachments()->object(0);
//u32 fragmentFunctionHash = ((u32)hash.lightingEnabled << 22) | ((u32)hash.lightingNumLights << 19) | ((u32)hash.lightingConfig1 << 12) | ((((u32)hash.alphaControl & 0b1111111100000000) >> 8) << 4) | ((((u32)hash.alphaControl & 0b01110000) >> 4) << 1) | ((u32)hash.alphaControl & 0b0001); colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt));
//u64 pipelineHash = ((u64)hash.colorFmt << 53) | ((u64)hash.depthFmt << 50) | ((u64)hash.blendEnabled << 49) | ((u64)hash.colorWriteMask << 45) | ((((u64)hash.blendControl & 0b11111111111111110000000000000000) >> 16) << 29) | ((((u64)hash.blendControl & 0b0000011100000000) >> 8) << 26) | (((u64)hash.blendControl & 0b00000111) << 23) | fragmentFunctionHash; MTL::ColorWriteMask writeMask = 0;
auto& pipeline = pipelineCache[hash]; if (hash.colorWriteMask & 0x1) writeMask |= MTL::ColorWriteMaskRed;
if (!pipeline) { if (hash.colorWriteMask & 0x2) writeMask |= MTL::ColorWriteMaskGreen;
auto& fragmentFunction = fragmentFunctionCache[hash.fragHash]; if (hash.colorWriteMask & 0x4) writeMask |= MTL::ColorWriteMaskBlue;
if (!fragmentFunction) { if (hash.colorWriteMask & 0x8) writeMask |= MTL::ColorWriteMaskAlpha;
MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init(); colorAttachment->setWriteMask(writeMask);
constants->setConstantValue(&hash.fragHash.lightingEnabled, MTL::DataTypeBool, NS::UInteger(0)); if (hash.blendEnabled) {
constants->setConstantValue(&hash.fragHash.lightingNumLights, MTL::DataTypeUChar, NS::UInteger(1)); const u8 rgbEquation = hash.blendControl & 0x7;
constants->setConstantValue(&hash.fragHash.lightingConfig1, MTL::DataTypeUInt, NS::UInteger(2)); const u8 alphaEquation = Helpers::getBits<8, 3>(hash.blendControl);
constants->setConstantValue(&hash.fragHash.alphaControl, MTL::DataTypeUShort, NS::UInteger(3));
NS::Error* error = nullptr; // Get blending functions
fragmentFunction = library->newFunction(NS::String::string("fragmentDraw", NS::ASCIIStringEncoding), constants, &error); const u8 rgbSourceFunc = Helpers::getBits<16, 4>(hash.blendControl);
if (error) { const u8 rgbDestFunc = Helpers::getBits<20, 4>(hash.blendControl);
Helpers::panic("Error creating draw fragment function: %s", error->description()->cString(NS::ASCIIStringEncoding)); const u8 alphaSourceFunc = Helpers::getBits<24, 4>(hash.blendControl);
} const u8 alphaDestFunc = Helpers::getBits<28, 4>(hash.blendControl);
constants->release();
}
MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); colorAttachment->setBlendingEnabled(true);
desc->setVertexFunction(vertexFunction); colorAttachment->setRgbBlendOperation(toMTLBlendOperation(rgbEquation));
desc->setFragmentFunction(fragmentFunction); colorAttachment->setAlphaBlendOperation(toMTLBlendOperation(alphaEquation));
desc->setVertexDescriptor(vertexDescriptor); colorAttachment->setSourceRGBBlendFactor(toMTLBlendFactor(rgbSourceFunc));
colorAttachment->setDestinationRGBBlendFactor(toMTLBlendFactor(rgbDestFunc));
colorAttachment->setSourceAlphaBlendFactor(toMTLBlendFactor(alphaSourceFunc));
colorAttachment->setDestinationAlphaBlendFactor(toMTLBlendFactor(alphaDestFunc));
}
auto colorAttachment = desc->colorAttachments()->object(0); MTL::PixelFormat depthFormat = toMTLPixelFormatDepth(hash.depthFmt);
colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt)); desc->setDepthAttachmentPixelFormat(depthFormat);
MTL::ColorWriteMask writeMask = 0; if (hash.depthFmt == DepthFmt::Depth24Stencil8) desc->setStencilAttachmentPixelFormat(depthFormat);
if (hash.colorWriteMask & 0x1) writeMask |= MTL::ColorWriteMaskRed;
if (hash.colorWriteMask & 0x2) writeMask |= MTL::ColorWriteMaskGreen;
if (hash.colorWriteMask & 0x4) writeMask |= MTL::ColorWriteMaskBlue;
if (hash.colorWriteMask & 0x8) writeMask |= MTL::ColorWriteMaskAlpha;
colorAttachment->setWriteMask(writeMask);
if (hash.blendEnabled) {
const u8 rgbEquation = hash.blendControl & 0x7;
const u8 alphaEquation = Helpers::getBits<8, 3>(hash.blendControl);
// Get blending functions NS::Error* error = nullptr;
const u8 rgbSourceFunc = Helpers::getBits<16, 4>(hash.blendControl); desc->setLabel(toNSString("Draw pipeline"));
const u8 rgbDestFunc = Helpers::getBits<20, 4>(hash.blendControl); pipeline = device->newRenderPipelineState(desc, &error);
const u8 alphaSourceFunc = Helpers::getBits<24, 4>(hash.blendControl); if (error) {
const u8 alphaDestFunc = Helpers::getBits<28, 4>(hash.blendControl); Helpers::panic("Error creating draw pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding));
}
colorAttachment->setBlendingEnabled(true); desc->release();
colorAttachment->setRgbBlendOperation(toMTLBlendOperation(rgbEquation)); }
colorAttachment->setAlphaBlendOperation(toMTLBlendOperation(alphaEquation));
colorAttachment->setSourceRGBBlendFactor(toMTLBlendFactor(rgbSourceFunc));
colorAttachment->setDestinationRGBBlendFactor(toMTLBlendFactor(rgbDestFunc));
colorAttachment->setSourceAlphaBlendFactor(toMTLBlendFactor(alphaSourceFunc));
colorAttachment->setDestinationAlphaBlendFactor(toMTLBlendFactor(alphaDestFunc));
}
MTL::PixelFormat depthFormat = toMTLPixelFormatDepth(hash.depthFmt); return pipeline;
desc->setDepthAttachmentPixelFormat(depthFormat); }
if (hash.depthFmt == DepthFmt::Depth24Stencil8)
desc->setStencilAttachmentPixelFormat(depthFormat);
NS::Error* error = nullptr; void reset() {
desc->setLabel(toNSString("Draw pipeline")); for (auto& pair : pipelineCache) {
pipeline = device->newRenderPipelineState(desc, &error); pair.second->release();
if (error) { }
Helpers::panic("Error creating draw pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); pipelineCache.clear();
}
desc->release(); for (auto& pair : fragmentFunctionCache) {
} pair.second->release();
}
fragmentFunctionCache.clear();
}
return pipeline; private:
} std::map<DrawPipelineHash, MTL::RenderPipelineState*> pipelineCache;
std::map<DrawFragmentFunctionHash, MTL::Function*> fragmentFunctionCache;
void reset() { MTL::Device* device;
for (auto& pair : pipelineCache) { MTL::Library* library;
pair.second->release(); MTL::Function* vertexFunction;
} MTL::VertexDescriptor* vertexDescriptor;
pipelineCache.clear(); };
for (auto& pair : fragmentFunctionCache) {
pair.second->release();
}
fragmentFunctionCache.clear();
}
private: } // namespace Metal
std::map<DrawPipelineHash, MTL::RenderPipelineState*> pipelineCache;
std::map<DrawFragmentFunctionHash, MTL::Function*> fragmentFunctionCache;
MTL::Device* device;
MTL::Library* library;
MTL::Function* vertexFunction;
MTL::VertexDescriptor* vertexDescriptor;
};
} // namespace Metal

View file

@ -8,17 +8,12 @@ class LutTexture {
public: public:
LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name); LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name);
~LutTexture(); ~LutTexture();
u32 getNextIndex(); u32 getNextIndex();
// Getters
MTL::Texture* getTexture() { return texture; } MTL::Texture* getTexture() { return texture; }
u32 getCurrentIndex() { return currentIndex; } u32 getCurrentIndex() { return currentIndex; }
private: private:
MTL::Texture* texture; MTL::Texture* texture;
u32 currentIndex = 0; u32 currentIndex = 0;
}; };

View file

@ -1,92 +1,91 @@
#pragma once #pragma once
#include <Metal/Metal.hpp>
#include <array> #include <array>
#include <string> #include <string>
#include <Metal/Metal.hpp>
#include "boost/icl/interval.hpp" #include "boost/icl/interval.hpp"
#include "helpers.hpp" #include "helpers.hpp"
#include "math_util.hpp" #include "math_util.hpp"
#include "objc_helper.hpp"
#include "opengl.hpp" #include "opengl.hpp"
#include "pica_to_mtl.hpp" #include "pica_to_mtl.hpp"
#include "objc_helper.hpp"
template <typename T> template <typename T>
using Interval = boost::icl::right_open_interval<T>; using Interval = boost::icl::right_open_interval<T>;
namespace Metal { namespace Metal {
template <typename Format_t>
struct RenderTarget {
MTL::Device* device;
template <typename Format_t> u32 location;
struct RenderTarget { Format_t format;
MTL::Device* device; OpenGL::uvec2 size;
bool valid;
u32 location; // Range of VRAM taken up by buffer
Format_t format; Interval<u32> range;
OpenGL::uvec2 size;
bool valid;
// Range of VRAM taken up by buffer MTL::Texture* texture = nullptr;
Interval<u32> range;
MTL::Texture* texture = nullptr; RenderTarget() : valid(false) {}
RenderTarget() : valid(false) {} RenderTarget(MTL::Device* dev, u32 loc, Format_t format, u32 x, u32 y, bool valid = true)
: device(dev), location(loc), format(format), size({x, y}), valid(valid) {
u64 endLoc = (u64)loc + sizeInBytes();
// Check if start and end are valid here
range = Interval<u32>(loc, (u32)endLoc);
}
RenderTarget(MTL::Device* dev, u32 loc, Format_t format, u32 x, u32 y, bool valid = true) Math::Rect<u32> getSubRect(u32 inputAddress, u32 width, u32 height) {
: device(dev), location(loc), format(format), size({x, y}), valid(valid) { const u32 startOffset = (inputAddress - location) / sizePerPixel(format);
u64 endLoc = (u64)loc + sizeInBytes(); const u32 x0 = (startOffset % (size.x() * 8)) / 8;
// Check if start and end are valid here const u32 y0 = (startOffset / (size.x() * 8)) * 8;
range = Interval<u32>(loc, (u32)endLoc); return Math::Rect<u32>{x0, size.y() - y0, x0 + width, size.y() - height - y0};
} }
Math::Rect<u32> getSubRect(u32 inputAddress, u32 width, u32 height) { // For 2 textures to "match" we only care about their locations, formats, and dimensions to match
const u32 startOffset = (inputAddress - location) / sizePerPixel(format); // For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture
const u32 x0 = (startOffset % (size.x() * 8)) / 8; bool matches(RenderTarget& other) {
const u32 y0 = (startOffset / (size.x() * 8)) * 8; return location == other.location && format == other.format && size.x() == other.size.x() && size.y() == other.size.y();
return Math::Rect<u32>{x0, size.y() - y0, x0 + width, size.y() - height - y0}; }
}
// For 2 textures to "match" we only care about their locations, formats, and dimensions to match void allocate() {
// For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture MTL::PixelFormat pixelFormat = MTL::PixelFormatInvalid;
bool matches(RenderTarget& other) { if (std::is_same<Format_t, PICA::ColorFmt>::value) {
return location == other.location && format == other.format && pixelFormat = PICA::toMTLPixelFormatColor((PICA::ColorFmt)format);
size.x() == other.size.x() && size.y() == other.size.y(); } else if (std::is_same<Format_t, PICA::DepthFmt>::value) {
} pixelFormat = PICA::toMTLPixelFormatDepth((PICA::DepthFmt)format);
} else {
panic("Invalid format type");
}
void allocate() { MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init();
MTL::PixelFormat pixelFormat = MTL::PixelFormatInvalid; descriptor->setTextureType(MTL::TextureType2D);
if (std::is_same<Format_t, PICA::ColorFmt>::value) { descriptor->setPixelFormat(pixelFormat);
pixelFormat = PICA::toMTLPixelFormatColor((PICA::ColorFmt)format); descriptor->setWidth(size.u());
} else if (std::is_same<Format_t, PICA::DepthFmt>::value) { descriptor->setHeight(size.v());
pixelFormat = PICA::toMTLPixelFormatDepth((PICA::DepthFmt)format); descriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead);
} else { descriptor->setStorageMode(MTL::StorageModePrivate);
panic("Invalid format type"); texture = device->newTexture(descriptor);
} texture->setLabel(toNSString(
std::string(std::is_same<Format_t, PICA::ColorFmt>::value ? "Color" : "Depth") + " render target " + std::to_string(size.u()) + "x" +
std::to_string(size.v())
));
descriptor->release();
}
MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init(); void free() {
descriptor->setTextureType(MTL::TextureType2D); valid = false;
descriptor->setPixelFormat(pixelFormat);
descriptor->setWidth(size.u());
descriptor->setHeight(size.v());
descriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead);
descriptor->setStorageMode(MTL::StorageModePrivate);
texture = device->newTexture(descriptor);
texture->setLabel(toNSString(std::string(std::is_same<Format_t, PICA::ColorFmt>::value ? "Color" : "Depth") + " render target " + std::to_string(size.u()) + "x" + std::to_string(size.v())));
descriptor->release();
}
void free() { if (texture) {
valid = false; texture->release();
}
}
if (texture) { u64 sizeInBytes() { return (size_t)size.x() * (size_t)size.y() * PICA::sizePerPixel(format); }
texture->release(); };
}
}
u64 sizeInBytes() { using ColorRenderTarget = RenderTarget<PICA::ColorFmt>;
return (size_t)size.x() * (size_t)size.y() * PICA::sizePerPixel(format); using DepthStencilRenderTarget = RenderTarget<PICA::DepthFmt>;
} } // namespace Metal
};
typedef RenderTarget<PICA::ColorFmt> ColorRenderTarget;
typedef RenderTarget<PICA::DepthFmt> DepthStencilRenderTarget;
} // namespace Metal

View file

@ -1,8 +1,9 @@
#pragma once #pragma once
#include <Metal/Metal.hpp>
#include <array> #include <array>
#include <string> #include <string>
#include <Metal/Metal.hpp>
#include "PICA/regs.hpp" #include "PICA/regs.hpp"
#include "boost/icl/interval.hpp" #include "boost/icl/interval.hpp"
#include "helpers.hpp" #include "helpers.hpp"
@ -10,69 +11,64 @@
#include "opengl.hpp" #include "opengl.hpp"
#include "renderer_mtl/pica_to_mtl.hpp" #include "renderer_mtl/pica_to_mtl.hpp"
template <typename T> template <typename T>
using Interval = boost::icl::right_open_interval<T>; using Interval = boost::icl::right_open_interval<T>;
namespace Metal { namespace Metal {
struct Texture {
MTL::Device* device;
struct Texture { u32 location;
MTL::Device* device; u32 config; // Magnification/minification filter, wrapping configs, etc
PICA::TextureFmt format;
OpenGL::uvec2 size;
bool valid;
u32 location; // Range of VRAM taken up by buffer
u32 config; // Magnification/minification filter, wrapping configs, etc Interval<u32> range;
PICA::TextureFmt format;
OpenGL::uvec2 size;
bool valid;
// Range of VRAM taken up by buffer PICA::PixelFormatInfo formatInfo;
Interval<u32> range; MTL::Texture* texture = nullptr;
MTL::SamplerState* sampler = nullptr;
PICA::PixelFormatInfo formatInfo; Texture() : valid(false) {}
MTL::Texture* texture = nullptr;
MTL::SamplerState* sampler = nullptr;
Texture() : valid(false) {} Texture(MTL::Device* dev, u32 loc, PICA::TextureFmt format, u32 x, u32 y, u32 config, bool valid = true)
: device(dev), location(loc), format(format), size({x, y}), config(config), valid(valid) {
u64 endLoc = (u64)loc + sizeInBytes();
// Check if start and end are valid here
range = Interval<u32>(loc, (u32)endLoc);
}
Texture(MTL::Device* dev, u32 loc, PICA::TextureFmt format, u32 x, u32 y, u32 config, bool valid = true) // For 2 textures to "match" we only care about their locations, formats, and dimensions to match
: device(dev), location(loc), format(format), size({x, y}), config(config), valid(valid) { // For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture
bool matches(Texture& other) {
return location == other.location && format == other.format && size.x() == other.size.x() && size.y() == other.size.y();
}
u64 endLoc = (u64)loc + sizeInBytes(); void allocate();
// Check if start and end are valid here void setNewConfig(u32 newConfig);
range = Interval<u32>(loc, (u32)endLoc); void decodeTexture(std::span<const u8> data);
} void free();
u64 sizeInBytes();
// For 2 textures to "match" we only care about their locations, formats, and dimensions to match u8 decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data);
// For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture u16 decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data);
bool matches(Texture& other) { u32 decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data);
return location == other.location && format == other.format &&
size.x() == other.size.x() && size.y() == other.size.y();
}
void allocate(); // Get the morton interleave offset of a texel based on its U and V values
void setNewConfig(u32 newConfig); static u32 mortonInterleave(u32 u, u32 v);
void decodeTexture(std::span<const u8> data); // Get the byte offset of texel (u, v) in the texture
void free(); static u32 getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel);
u64 sizeInBytes(); static u32 getSwizzledOffset_4bpp(u32 u, u32 v, u32 width);
u8 decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data); // Returns the format of this texture as a string
u16 decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data); std::string_view formatToString() { return PICA::textureFormatToString(format); }
u32 decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data);
// Get the morton interleave offset of a texel based on its U and V values // Returns the texel at coordinates (u, v) of an ETC1(A4) texture
static u32 mortonInterleave(u32 u, u32 v); // TODO: Make hasAlpha a template parameter
// Get the byte offset of texel (u, v) in the texture u32 getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span<const u8> data);
static u32 getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel); u32 decodeETC(u32 alpha, u32 u, u32 v, u64 colourData);
static u32 getSwizzledOffset_4bpp(u32 u, u32 v, u32 width); };
} // namespace Metal
// Returns the format of this texture as a string
std::string_view formatToString() {
return PICA::textureFormatToString(format);
}
// Returns the texel at coordinates (u, v) of an ETC1(A4) texture
// TODO: Make hasAlpha a template parameter
u32 getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span<const u8> data);
u32 decodeETC(u32 alpha, u32 u, u32 v, u64 colourData);
};
} // namespace Metal

View file

@ -5,76 +5,74 @@
using namespace PICA; using namespace PICA;
namespace Metal { namespace Metal {
struct BufferHandle {
MTL::Buffer* buffer;
size_t offset;
};
struct BufferHandle { class VertexBufferCache {
MTL::Buffer* buffer; // 128MB buffer for caching vertex data
size_t offset; static constexpr usize CACHE_BUFFER_SIZE = 128 * 1024 * 1024;
};
// 128MB buffer for caching vertex data public:
#define CACHE_BUFFER_SIZE 128 * 1024 * 1024 VertexBufferCache() = default;
class VertexBufferCache { ~VertexBufferCache() {
public: endFrame();
VertexBufferCache() = default; buffer->release();
}
~VertexBufferCache() { void set(MTL::Device* dev) {
endFrame(); device = dev;
buffer->release(); create();
} }
void set(MTL::Device* dev) { void endFrame() {
device = dev; ptr = 0;
create(); for (auto buffer : additionalAllocations) {
} buffer->release();
}
additionalAllocations.clear();
}
void endFrame() { BufferHandle get(const void* data, size_t size) {
ptr = 0; // If the vertex buffer is too large, just create a new one
for (auto buffer : additionalAllocations) { if (ptr + size > CACHE_BUFFER_SIZE) {
buffer->release(); MTL::Buffer* newBuffer = device->newBuffer(data, size, MTL::ResourceStorageModeShared);
} newBuffer->setLabel(toNSString("Additional vertex buffer"));
additionalAllocations.clear(); additionalAllocations.push_back(newBuffer);
} Helpers::warn("Vertex buffer doesn't have enough space, creating a new buffer");
BufferHandle get(const void* data, size_t size) { return BufferHandle{newBuffer, 0};
// If the vertex buffer is too large, just create a new one }
if (ptr + size > CACHE_BUFFER_SIZE) {
MTL::Buffer* newBuffer = device->newBuffer(data, size, MTL::ResourceStorageModeShared);
newBuffer->setLabel(toNSString("Additional vertex buffer"));
additionalAllocations.push_back(newBuffer);
Helpers::warn("Vertex buffer doesn't have enough space, creating a new buffer");
return BufferHandle{newBuffer, 0}; // Copy the data into the buffer
} memcpy((char*)buffer->contents() + ptr, data, size);
// Copy the data into the buffer size_t oldPtr = ptr;
memcpy((char*)buffer->contents() + ptr, data, size); ptr += size;
size_t oldPtr = ptr; return BufferHandle{buffer, oldPtr};
ptr += size; }
return BufferHandle{buffer, oldPtr}; void reset() {
} endFrame();
if (buffer) {
buffer->release();
create();
}
}
void reset() { private:
endFrame(); MTL::Buffer* buffer = nullptr;
if (buffer) { size_t ptr = 0;
buffer->release(); std::vector<MTL::Buffer*> additionalAllocations;
create();
}
}
private: MTL::Device* device;
MTL::Buffer* buffer = nullptr;
size_t ptr = 0;
std::vector<MTL::Buffer*> additionalAllocations;
MTL::Device* device; void create() {
buffer = device->newBuffer(CACHE_BUFFER_SIZE, MTL::ResourceStorageModeShared);
void create() { buffer->setLabel(toNSString("Shared vertex buffer"));
buffer = device->newBuffer(CACHE_BUFFER_SIZE, MTL::ResourceStorageModeShared); }
buffer->setLabel(toNSString("Shared vertex buffer")); };
} } // namespace Metal
};
} // namespace Metal

View file

@ -5,12 +5,8 @@
#include "mtl_common.hpp" #include "mtl_common.hpp"
namespace Metal { namespace Metal {
dispatch_data_t createDispatchData(const void* data, size_t size);
dispatch_data_t createDispatchData(const void* data, size_t size); } // namespace Metal
} // namespace Metal
// Cast from std::string to NS::String* // Cast from std::string to NS::String*
inline NS::String* toNSString(const std::string& str) { inline NS::String* toNSString(const std::string& str) { return NS::String::string(str.c_str(), NS::ASCIIStringEncoding); }
return NS::String::string(str.c_str(), NS::ASCIIStringEncoding);
}

View file

@ -1,155 +1,154 @@
#pragma once #pragma once
#include <Metal/Metal.hpp> #include <Metal/Metal.hpp>
#include "PICA/regs.hpp" #include "PICA/regs.hpp"
namespace PICA { namespace PICA {
struct PixelFormatInfo {
MTL::PixelFormat pixelFormat;
size_t bytesPerTexel;
};
struct PixelFormatInfo { constexpr PixelFormatInfo pixelFormatInfos[14] = {
MTL::PixelFormat pixelFormat; {MTL::PixelFormatRGBA8Unorm, 4}, // RGBA8
size_t bytesPerTexel; {MTL::PixelFormatRGBA8Unorm, 4}, // RGB8
}; {MTL::PixelFormatBGR5A1Unorm, 2}, // RGBA5551
{MTL::PixelFormatB5G6R5Unorm, 2}, // RGB565
{MTL::PixelFormatABGR4Unorm, 2}, // RGBA4
{MTL::PixelFormatRGBA8Unorm, 4}, // IA8
{MTL::PixelFormatRG8Unorm, 2}, // RG8
{MTL::PixelFormatRGBA8Unorm, 4}, // I8
{MTL::PixelFormatA8Unorm, 1}, // A8
{MTL::PixelFormatABGR4Unorm, 2}, // IA4
{MTL::PixelFormatABGR4Unorm, 2}, // I4
{MTL::PixelFormatA8Unorm, 1}, // A4
{MTL::PixelFormatRGBA8Unorm, 4}, // ETC1
{MTL::PixelFormatRGBA8Unorm, 4}, // ETC1A4
};
constexpr PixelFormatInfo pixelFormatInfos[14] = { inline PixelFormatInfo getPixelFormatInfo(TextureFmt format) { return pixelFormatInfos[static_cast<int>(format)]; }
{MTL::PixelFormatRGBA8Unorm, 4}, // RGBA8
{MTL::PixelFormatRGBA8Unorm, 4}, // RGB8
{MTL::PixelFormatBGR5A1Unorm, 2}, // RGBA5551
{MTL::PixelFormatB5G6R5Unorm, 2}, // RGB565
{MTL::PixelFormatABGR4Unorm, 2}, // RGBA4
{MTL::PixelFormatRGBA8Unorm, 4}, // IA8
{MTL::PixelFormatRG8Unorm, 2}, // RG8
{MTL::PixelFormatRGBA8Unorm, 4}, // I8
{MTL::PixelFormatA8Unorm, 1}, // A8
{MTL::PixelFormatABGR4Unorm, 2}, // IA4
{MTL::PixelFormatABGR4Unorm, 2}, // I4
{MTL::PixelFormatA8Unorm, 1}, // A4
{MTL::PixelFormatRGBA8Unorm, 4}, // ETC1
{MTL::PixelFormatRGBA8Unorm, 4}, // ETC1A4
};
inline PixelFormatInfo getPixelFormatInfo(TextureFmt format) { inline MTL::PixelFormat toMTLPixelFormatColor(ColorFmt format) {
return pixelFormatInfos[static_cast<int>(format)]; switch (format) {
} case ColorFmt::RGBA8: return MTL::PixelFormatRGBA8Unorm;
case ColorFmt::RGB8: return MTL::PixelFormatRGBA8Unorm;
case ColorFmt::RGBA5551: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatBGR5A1Unorm?
case ColorFmt::RGB565: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatB5G6R5Unorm?
case ColorFmt::RGBA4: return MTL::PixelFormatABGR4Unorm;
}
}
inline MTL::PixelFormat toMTLPixelFormatColor(ColorFmt format) { inline MTL::PixelFormat toMTLPixelFormatDepth(DepthFmt format) {
switch (format) { switch (format) {
case ColorFmt::RGBA8: return MTL::PixelFormatRGBA8Unorm; case DepthFmt::Depth16: return MTL::PixelFormatDepth16Unorm;
case ColorFmt::RGB8: return MTL::PixelFormatRGBA8Unorm; case DepthFmt::Unknown1: return MTL::PixelFormatInvalid;
case ColorFmt::RGBA5551: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatBGR5A1Unorm? case DepthFmt::Depth24:
case ColorFmt::RGB565: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatB5G6R5Unorm? return MTL::PixelFormatDepth32Float; // Metal does not support 24-bit depth formats
case ColorFmt::RGBA4: return MTL::PixelFormatABGR4Unorm; // Apple sillicon doesn't support 24-bit depth buffers, so we use 32-bit instead
} case DepthFmt::Depth24Stencil8: return MTL::PixelFormatDepth32Float_Stencil8;
} }
}
inline MTL::PixelFormat toMTLPixelFormatDepth(DepthFmt format) { inline MTL::CompareFunction toMTLCompareFunc(u8 func) {
switch (format) { switch (func) {
case DepthFmt::Depth16: return MTL::PixelFormatDepth16Unorm; case 0: return MTL::CompareFunctionNever;
case DepthFmt::Unknown1: return MTL::PixelFormatInvalid; case 1: return MTL::CompareFunctionAlways;
case DepthFmt::Depth24: return MTL::PixelFormatDepth32Float; // Metal does not support 24-bit depth formats case 2: return MTL::CompareFunctionEqual;
// Apple sillicon doesn't support 24-bit depth buffers, so we use 32-bit instead case 3: return MTL::CompareFunctionNotEqual;
case DepthFmt::Depth24Stencil8: return MTL::PixelFormatDepth32Float_Stencil8; case 4: return MTL::CompareFunctionLess;
} case 5: return MTL::CompareFunctionLessEqual;
} case 6: return MTL::CompareFunctionGreater;
case 7: return MTL::CompareFunctionGreaterEqual;
default: panic("Unknown compare function %u", func);
}
inline MTL::CompareFunction toMTLCompareFunc(u8 func) { return MTL::CompareFunctionAlways;
switch (func) { }
case 0: return MTL::CompareFunctionNever;
case 1: return MTL::CompareFunctionAlways;
case 2: return MTL::CompareFunctionEqual;
case 3: return MTL::CompareFunctionNotEqual;
case 4: return MTL::CompareFunctionLess;
case 5: return MTL::CompareFunctionLessEqual;
case 6: return MTL::CompareFunctionGreater;
case 7: return MTL::CompareFunctionGreaterEqual;
default: panic("Unknown compare function %u", func);
}
return MTL::CompareFunctionAlways; inline MTL::BlendOperation toMTLBlendOperation(u8 op) {
} switch (op) {
case 0: return MTL::BlendOperationAdd;
case 1: return MTL::BlendOperationSubtract;
case 2: return MTL::BlendOperationReverseSubtract;
case 3: return MTL::BlendOperationMin;
case 4: return MTL::BlendOperationMax;
case 5: return MTL::BlendOperationAdd; // Unused (same as 0)
case 6: return MTL::BlendOperationAdd; // Unused (same as 0)
case 7: return MTL::BlendOperationAdd; // Unused (same as 0)
default: panic("Unknown blend operation %u", op);
}
inline MTL::BlendOperation toMTLBlendOperation(u8 op) { return MTL::BlendOperationAdd;
switch (op) { }
case 0: return MTL::BlendOperationAdd;
case 1: return MTL::BlendOperationSubtract;
case 2: return MTL::BlendOperationReverseSubtract;
case 3: return MTL::BlendOperationMin;
case 4: return MTL::BlendOperationMax;
case 5: return MTL::BlendOperationAdd; // Unused (same as 0)
case 6: return MTL::BlendOperationAdd; // Unused (same as 0)
case 7: return MTL::BlendOperationAdd; // Unused (same as 0)
default: panic("Unknown blend operation %u", op);
}
return MTL::BlendOperationAdd; inline MTL::BlendFactor toMTLBlendFactor(u8 factor) {
} switch (factor) {
case 0: return MTL::BlendFactorZero;
case 1: return MTL::BlendFactorOne;
case 2: return MTL::BlendFactorSourceColor;
case 3: return MTL::BlendFactorOneMinusSourceColor;
case 4: return MTL::BlendFactorDestinationColor;
case 5: return MTL::BlendFactorOneMinusDestinationColor;
case 6: return MTL::BlendFactorSourceAlpha;
case 7: return MTL::BlendFactorOneMinusSourceAlpha;
case 8: return MTL::BlendFactorDestinationAlpha;
case 9: return MTL::BlendFactorOneMinusDestinationAlpha;
case 10: return MTL::BlendFactorBlendColor;
case 11: return MTL::BlendFactorOneMinusBlendColor;
case 12: return MTL::BlendFactorBlendAlpha;
case 13: return MTL::BlendFactorOneMinusBlendAlpha;
case 14: return MTL::BlendFactorSourceAlphaSaturated;
case 15: return MTL::BlendFactorOne; // Undocumented
default: panic("Unknown blend factor %u", factor);
}
inline MTL::BlendFactor toMTLBlendFactor(u8 factor) { return MTL::BlendFactorOne;
switch (factor) { }
case 0: return MTL::BlendFactorZero;
case 1: return MTL::BlendFactorOne;
case 2: return MTL::BlendFactorSourceColor;
case 3: return MTL::BlendFactorOneMinusSourceColor;
case 4: return MTL::BlendFactorDestinationColor;
case 5: return MTL::BlendFactorOneMinusDestinationColor;
case 6: return MTL::BlendFactorSourceAlpha;
case 7: return MTL::BlendFactorOneMinusSourceAlpha;
case 8: return MTL::BlendFactorDestinationAlpha;
case 9: return MTL::BlendFactorOneMinusDestinationAlpha;
case 10: return MTL::BlendFactorBlendColor;
case 11: return MTL::BlendFactorOneMinusBlendColor;
case 12: return MTL::BlendFactorBlendAlpha;
case 13: return MTL::BlendFactorOneMinusBlendAlpha;
case 14: return MTL::BlendFactorSourceAlphaSaturated;
case 15: return MTL::BlendFactorOne; // Undocumented
default: panic("Unknown blend factor %u", factor);
}
return MTL::BlendFactorOne; inline MTL::StencilOperation toMTLStencilOperation(u8 op) {
} switch (op) {
case 0: return MTL::StencilOperationKeep;
case 1: return MTL::StencilOperationZero;
case 2: return MTL::StencilOperationReplace;
case 3: return MTL::StencilOperationIncrementClamp;
case 4: return MTL::StencilOperationDecrementClamp;
case 5: return MTL::StencilOperationInvert;
case 6: return MTL::StencilOperationIncrementWrap;
case 7: return MTL::StencilOperationDecrementWrap;
default: panic("Unknown stencil operation %u", op);
}
inline MTL::StencilOperation toMTLStencilOperation(u8 op) { return MTL::StencilOperationKeep;
switch (op) { }
case 0: return MTL::StencilOperationKeep;
case 1: return MTL::StencilOperationZero;
case 2: return MTL::StencilOperationReplace;
case 3: return MTL::StencilOperationIncrementClamp;
case 4: return MTL::StencilOperationDecrementClamp;
case 5: return MTL::StencilOperationInvert;
case 6: return MTL::StencilOperationIncrementWrap;
case 7: return MTL::StencilOperationDecrementWrap;
default: panic("Unknown stencil operation %u", op);
}
return MTL::StencilOperationKeep; inline MTL::PrimitiveType toMTLPrimitiveType(PrimType primType) {
} switch (primType) {
case PrimType::TriangleList: return MTL::PrimitiveTypeTriangle;
case PrimType::TriangleStrip: return MTL::PrimitiveTypeTriangleStrip;
case PrimType::TriangleFan:
Helpers::warn("Triangle fans are not supported on Metal, using triangles instead");
return MTL::PrimitiveTypeTriangle;
case PrimType::GeometryPrimitive:
// Helpers::warn("Geometry primitives are not yet, using triangles instead");
return MTL::PrimitiveTypeTriangle;
}
}
inline MTL::PrimitiveType toMTLPrimitiveType(PrimType primType) { inline MTL::SamplerAddressMode toMTLSamplerAddressMode(u8 addrMode) {
switch (primType) { switch (addrMode) {
case PrimType::TriangleList: return MTL::PrimitiveTypeTriangle; case 0: return MTL::SamplerAddressModeClampToEdge;
case PrimType::TriangleStrip: return MTL::PrimitiveTypeTriangleStrip; case 1: return MTL::SamplerAddressModeClampToBorderColor;
case PrimType::TriangleFan: case 2: return MTL::SamplerAddressModeRepeat;
Helpers::warn("Triangle fans are not supported on Metal, using triangles instead"); case 3: return MTL::SamplerAddressModeMirrorRepeat;
return MTL::PrimitiveTypeTriangle; case 4: return MTL::SamplerAddressModeClampToEdge;
case PrimType::GeometryPrimitive: case 5: return MTL::SamplerAddressModeClampToBorderColor;
//Helpers::warn("Geometry primitives are not yet, using triangles instead"); case 6: return MTL::SamplerAddressModeRepeat;
return MTL::PrimitiveTypeTriangle; case 7: return MTL::SamplerAddressModeRepeat;
} default: panic("Unknown sampler address mode %u", addrMode);
} }
inline MTL::SamplerAddressMode toMTLSamplerAddressMode(u8 addrMode) { return MTL::SamplerAddressModeClampToEdge;
switch (addrMode) { }
case 0: return MTL::SamplerAddressModeClampToEdge; } // namespace PICA
case 1: return MTL::SamplerAddressModeClampToBorderColor;
case 2: return MTL::SamplerAddressModeRepeat;
case 3: return MTL::SamplerAddressModeMirrorRepeat;
case 4: return MTL::SamplerAddressModeClampToEdge;
case 5: return MTL::SamplerAddressModeClampToBorderColor;
case 6: return MTL::SamplerAddressModeRepeat;
case 7: return MTL::SamplerAddressModeRepeat;
default: panic("Unknown sampler address mode %u", addrMode);
}
return MTL::SamplerAddressModeClampToEdge;
}
} // namespace PICA

View file

@ -3,15 +3,16 @@
#include <Metal/Metal.hpp> #include <Metal/Metal.hpp>
#include <QuartzCore/QuartzCore.hpp> #include <QuartzCore/QuartzCore.hpp>
#include "renderer.hpp"
#include "mtl_texture.hpp"
#include "mtl_render_target.hpp"
#include "mtl_blit_pipeline_cache.hpp" #include "mtl_blit_pipeline_cache.hpp"
#include "mtl_draw_pipeline_cache.hpp"
#include "mtl_depth_stencil_cache.hpp"
#include "mtl_vertex_buffer_cache.hpp"
#include "mtl_lut_texture.hpp"
#include "mtl_command_encoder.hpp" #include "mtl_command_encoder.hpp"
#include "mtl_depth_stencil_cache.hpp"
#include "mtl_draw_pipeline_cache.hpp"
#include "mtl_lut_texture.hpp"
#include "mtl_render_target.hpp"
#include "mtl_texture.hpp"
#include "mtl_vertex_buffer_cache.hpp"
#include "renderer.hpp"
// HACK: use the OpenGL cache // HACK: use the OpenGL cache
#include "../renderer_gl/surface_cache.hpp" #include "../renderer_gl/surface_cache.hpp"
@ -19,7 +20,7 @@
class GPU; class GPU;
struct Color4 { struct Color4 {
float r, g, b, a; float r, g, b, a;
}; };
class RendererMTL final : public Renderer { class RendererMTL final : public Renderer {
@ -72,7 +73,7 @@ class RendererMTL final : public Renderer {
// Pipelines // Pipelines
MTL::RenderPipelineState* displayPipeline; MTL::RenderPipelineState* displayPipeline;
//MTL::RenderPipelineState* copyToLutTexturePipeline; // MTL::RenderPipelineState* copyToLutTexturePipeline;
// Clears // Clears
std::map<MTL::Texture*, Color4> colorClearOps; std::map<MTL::Texture*, Color4> colorClearOps;
@ -95,93 +96,112 @@ class RendererMTL final : public Renderer {
} }
void endRenderPass() { void endRenderPass() {
if (renderCommandEncoder) { if (renderCommandEncoder) {
renderCommandEncoder->endEncoding(); renderCommandEncoder->endEncoding();
renderCommandEncoder = nullptr; renderCommandEncoder = nullptr;
} }
} }
void beginRenderPassIfNeeded(MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture = nullptr); void beginRenderPassIfNeeded(
MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture = nullptr
);
void commitCommandBuffer() { void commitCommandBuffer() {
if (renderCommandEncoder) { if (renderCommandEncoder) {
renderCommandEncoder->endEncoding(); renderCommandEncoder->endEncoding();
renderCommandEncoder->release(); renderCommandEncoder->release();
renderCommandEncoder = nullptr; renderCommandEncoder = nullptr;
} }
if (commandBuffer) { if (commandBuffer) {
commandBuffer->commit(); commandBuffer->commit();
// HACK // HACK
commandBuffer->waitUntilCompleted(); commandBuffer->waitUntilCompleted();
commandBuffer->release(); commandBuffer->release();
commandBuffer = nullptr; commandBuffer = nullptr;
} }
} }
template<typename AttachmentT, typename ClearDataT, typename GetAttachmentT, typename SetClearDataT> template <typename AttachmentT, typename ClearDataT, typename GetAttachmentT, typename SetClearDataT>
inline void clearAttachment(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, ClearDataT clearData, GetAttachmentT getAttachment, SetClearDataT setClearData) { inline void clearAttachment(
bool beginRenderPass = (renderPassDescriptor == nullptr); MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, ClearDataT clearData, GetAttachmentT getAttachment,
if (!renderPassDescriptor) { SetClearDataT setClearData
renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); ) {
} bool beginRenderPass = (renderPassDescriptor == nullptr);
if (!renderPassDescriptor) {
renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init();
}
AttachmentT* attachment = getAttachment(renderPassDescriptor); AttachmentT* attachment = getAttachment(renderPassDescriptor);
attachment->setTexture(texture); attachment->setTexture(texture);
setClearData(attachment, clearData); setClearData(attachment, clearData);
attachment->setLoadAction(MTL::LoadActionClear); attachment->setLoadAction(MTL::LoadActionClear);
attachment->setStoreAction(MTL::StoreActionStore); attachment->setStoreAction(MTL::StoreActionStore);
if (beginRenderPass) { if (beginRenderPass) {
if (std::is_same<AttachmentT, MTL::RenderPassColorAttachmentDescriptor>::value) if (std::is_same<AttachmentT, MTL::RenderPassColorAttachmentDescriptor>::value)
beginRenderPassIfNeeded(renderPassDescriptor, true, texture); beginRenderPassIfNeeded(renderPassDescriptor, true, texture);
else else
beginRenderPassIfNeeded(renderPassDescriptor, true, nullptr, texture); beginRenderPassIfNeeded(renderPassDescriptor, true, nullptr, texture);
} }
} }
template<typename AttachmentT, typename ClearDataT, typename GetAttachmentT, typename SetClearDataT> template <typename AttachmentT, typename ClearDataT, typename GetAttachmentT, typename SetClearDataT>
inline bool clearAttachment(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, std::map<MTL::Texture*, ClearDataT>& clearOps, GetAttachmentT getAttachment, SetClearDataT setClearData) { inline bool clearAttachment(
auto it = clearOps.find(texture); MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, std::map<MTL::Texture*, ClearDataT>& clearOps,
if (it != clearOps.end()) { GetAttachmentT getAttachment, SetClearDataT setClearData
clearAttachment<AttachmentT>(renderPassDescriptor, texture, it->second, getAttachment, setClearData); ) {
clearOps.erase(it); auto it = clearOps.find(texture);
return true; if (it != clearOps.end()) {
} clearAttachment<AttachmentT>(renderPassDescriptor, texture, it->second, getAttachment, setClearData);
clearOps.erase(it);
return true;
}
if (renderPassDescriptor) { if (renderPassDescriptor) {
AttachmentT* attachment = getAttachment(renderPassDescriptor); AttachmentT* attachment = getAttachment(renderPassDescriptor);
attachment->setTexture(texture); attachment->setTexture(texture);
attachment->setLoadAction(MTL::LoadActionLoad); attachment->setLoadAction(MTL::LoadActionLoad);
attachment->setStoreAction(MTL::StoreActionStore); attachment->setStoreAction(MTL::StoreActionStore);
} }
return false; return false;
} }
bool clearColor(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { bool clearColor(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) {
return clearAttachment<MTL::RenderPassColorAttachmentDescriptor, Color4>(renderPassDescriptor, texture, colorClearOps, [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->colorAttachments()->object(0); }, [](auto attachment, auto& color) { return clearAttachment<MTL::RenderPassColorAttachmentDescriptor, Color4>(
attachment->setClearColor(MTL::ClearColor(color.r, color.g, color.b, color.a)); renderPassDescriptor, texture, colorClearOps,
}); [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->colorAttachments()->object(0); },
} [](auto attachment, auto& color) { attachment->setClearColor(MTL::ClearColor(color.r, color.g, color.b, color.a)); }
);
}
bool clearDepth(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { bool clearDepth(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) {
return clearAttachment<MTL::RenderPassDepthAttachmentDescriptor, float>(renderPassDescriptor, texture, depthClearOps, [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->depthAttachment(); }, [](auto attachment, auto& depth) { return clearAttachment<MTL::RenderPassDepthAttachmentDescriptor, float>(
attachment->setClearDepth(depth); renderPassDescriptor, texture, depthClearOps,
}); [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->depthAttachment(); },
} [](auto attachment, auto& depth) { attachment->setClearDepth(depth); }
);
}
bool clearStencil(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { bool clearStencil(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) {
return clearAttachment<MTL::RenderPassStencilAttachmentDescriptor, u8>(renderPassDescriptor, texture, stencilClearOps, [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->stencilAttachment(); }, [](auto attachment, auto& stencil) { return clearAttachment<MTL::RenderPassStencilAttachmentDescriptor, u8>(
attachment->setClearStencil(stencil); renderPassDescriptor, texture, stencilClearOps,
}); [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->stencilAttachment(); },
} [](auto attachment, auto& stencil) { attachment->setClearStencil(stencil); }
);
}
std::optional<Metal::ColorRenderTarget> getColorRenderTarget(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true); std::optional<Metal::ColorRenderTarget> getColorRenderTarget(
u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true
);
Metal::DepthStencilRenderTarget& getDepthRenderTarget(); Metal::DepthStencilRenderTarget& getDepthRenderTarget();
Metal::Texture& getTexture(Metal::Texture& tex); Metal::Texture& getTexture(Metal::Texture& tex);
void setupTextureEnvState(MTL::RenderCommandEncoder* encoder); void setupTextureEnvState(MTL::RenderCommandEncoder* encoder);
void bindTexturesToSlots(); void bindTexturesToSlots();
void updateLightingLUT(MTL::RenderCommandEncoder* encoder); void updateLightingLUT(MTL::RenderCommandEncoder* encoder);
void updateFogLUT(MTL::RenderCommandEncoder* encoder); void updateFogLUT(MTL::RenderCommandEncoder* encoder);
void textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect<u32>& srcRect, const Math::Rect<u32>& destRect); void textureCopyImpl(
Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect<u32>& srcRect,
const Math::Rect<u32>& destRect
);
}; };

View file

@ -12,8 +12,9 @@ static constexpr u32 signExtend3To32(u32 val) {
u32 Texture::getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span<const u8> data) { u32 Texture::getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span<const u8> data) {
// Pixel offset of the 8x8 tile based on u, v and the width of the texture // Pixel offset of the 8x8 tile based on u, v and the width of the texture
u32 offs = ((u & ~7) * 8) + ((v & ~7) * width); u32 offs = ((u & ~7) * 8) + ((v & ~7) * width);
if (!hasAlpha) if (!hasAlpha) {
offs >>= 1; offs >>= 1;
}
// In-tile offsets for u/v // In-tile offsets for u/v
u &= 7; u &= 7;

View file

@ -1,124 +1,116 @@
#include <algorithm> #include <algorithm>
#include "colour.hpp" #include "colour.hpp"
#include "renderer_mtl/renderer_mtl.hpp"
#include "renderer_mtl/mtl_texture.hpp" #include "renderer_mtl/mtl_texture.hpp"
#include "renderer_mtl/renderer_mtl.hpp"
using namespace Helpers; using namespace Helpers;
namespace Metal { namespace Metal {
static constexpr u32 signExtend3To32(u32 val) {
static constexpr u32 signExtend3To32(u32 val) { return (u32)(s32(val) << 29 >> 29);
return (u32)(s32(val) << 29 >> 29);
}
u32 Texture::getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span<const u8> data) {
// Pixel offset of the 8x8 tile based on u, v and the width of the texture
u32 offs = ((u & ~7) * 8) + ((v & ~7) * width);
if (!hasAlpha)
offs >>= 1;
// In-tile offsets for u/v
u &= 7;
v &= 7;
// ETC1(A4) also subdivide the 8x8 tile to 4 4x4 tiles
// Each tile is 8 bytes for ETC1, but since ETC1A4 has 4 alpha bits per pixel, that becomes 16 bytes
const u32 subTileSize = hasAlpha ? 16 : 8;
const u32 subTileIndex = (u / 4) + 2 * (v / 4); // Which of the 4 subtiles is this texel in?
// In-subtile offsets for u/v
u &= 3;
v &= 3;
offs += subTileSize * subTileIndex;
u32 alpha;
const u64* ptr = reinterpret_cast<const u64*>(data.data() + offs); // Cast to u64*
if (hasAlpha) {
// First 64 bits of the 4x4 subtile are alpha data
const u64 alphaData = *ptr++;
alpha = Colour::convert4To8Bit((alphaData >> (4 * (u * 4 + v))) & 0xf);
}
else {
alpha = 0xff; // ETC1 without alpha uses ff for every pixel
} }
// Next 64 bits of the subtile are colour data u32 Texture::getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span<const u8> data) {
u64 colourData = *ptr; // Pixel offset of the 8x8 tile based on u, v and the width of the texture
return decodeETC(alpha, u, v, colourData); u32 offs = ((u & ~7) * 8) + ((v & ~7) * width);
} if (!hasAlpha) {
offs >>= 1;
}
u32 Texture::decodeETC(u32 alpha, u32 u, u32 v, u64 colourData) { // In-tile offsets for u/v
static constexpr u32 modifiers[8][2] = { u &= 7;
{ 2, 8 }, v &= 7;
{ 5, 17 },
{ 9, 29 },
{ 13, 42 },
{ 18, 60 },
{ 24, 80 },
{ 33, 106 },
{ 47, 183 },
};
// Parse colour data for 4x4 block // ETC1(A4) also subdivide the 8x8 tile to 4 4x4 tiles
const u32 subindices = getBits<0, 16, u32>(colourData); // Each tile is 8 bytes for ETC1, but since ETC1A4 has 4 alpha bits per pixel, that becomes 16 bytes
const u32 negationFlags = getBits<16, 16, u32>(colourData); const u32 subTileSize = hasAlpha ? 16 : 8;
const bool flip = getBit<32>(colourData); const u32 subTileIndex = (u / 4) + 2 * (v / 4); // Which of the 4 subtiles is this texel in?
const bool diffMode = getBit<33>(colourData);
// Note: index1 is indeed stored on the higher bits, with index2 in the lower bits // In-subtile offsets for u/v
const u32 tableIndex1 = getBits<37, 3, u32>(colourData); u &= 3;
const u32 tableIndex2 = getBits<34, 3, u32>(colourData); v &= 3;
const u32 texelIndex = u * 4 + v; // Index of the texel in the block offs += subTileSize * subTileIndex;
if (flip) u32 alpha;
std::swap(u, v); const u64* ptr = reinterpret_cast<const u64*>(data.data() + offs); // Cast to u64*
s32 r, g, b; if (hasAlpha) {
if (diffMode) { // First 64 bits of the 4x4 subtile are alpha data
r = getBits<59, 5, s32>(colourData); const u64 alphaData = *ptr++;
g = getBits<51, 5, s32>(colourData); alpha = Colour::convert4To8Bit((alphaData >> (4 * (u * 4 + v))) & 0xf);
b = getBits<43, 5, s32>(colourData); } else {
alpha = 0xff; // ETC1 without alpha uses ff for every pixel
}
if (u >= 2) { // Next 64 bits of the subtile are colour data
r += signExtend3To32(getBits<56, 3, u32>(colourData)); u64 colourData = *ptr;
g += signExtend3To32(getBits<48, 3, u32>(colourData)); return decodeETC(alpha, u, v, colourData);
b += signExtend3To32(getBits<40, 3, u32>(colourData)); }
}
// Expand from 5 to 8 bits per channel u32 Texture::decodeETC(u32 alpha, u32 u, u32 v, u64 colourData) {
r = Colour::convert5To8Bit(r); static constexpr u32 modifiers[8][2] = {
g = Colour::convert5To8Bit(g); {2, 8}, {5, 17}, {9, 29}, {13, 42}, {18, 60}, {24, 80}, {33, 106}, {47, 183},
b = Colour::convert5To8Bit(b); };
} else {
if (u < 2) {
r = getBits<60, 4, s32>(colourData);
g = getBits<52, 4, s32>(colourData);
b = getBits<44, 4, s32>(colourData);
} else {
r = getBits<56, 4, s32>(colourData);
g = getBits<48, 4, s32>(colourData);
b = getBits<40, 4, s32>(colourData);
}
// Expand from 4 to 8 bits per channel // Parse colour data for 4x4 block
r = Colour::convert4To8Bit(r); const u32 subindices = getBits<0, 16, u32>(colourData);
g = Colour::convert4To8Bit(g); const u32 negationFlags = getBits<16, 16, u32>(colourData);
b = Colour::convert4To8Bit(b); const bool flip = getBit<32>(colourData);
} const bool diffMode = getBit<33>(colourData);
const u32 index = (u < 2) ? tableIndex1 : tableIndex2; // Note: index1 is indeed stored on the higher bits, with index2 in the lower bits
s32 modifier = modifiers[index][(subindices >> texelIndex) & 1]; const u32 tableIndex1 = getBits<37, 3, u32>(colourData);
const u32 tableIndex2 = getBits<34, 3, u32>(colourData);
const u32 texelIndex = u * 4 + v; // Index of the texel in the block
if (((negationFlags >> texelIndex) & 1) != 0) { if (flip) std::swap(u, v);
modifier = -modifier;
}
r = std::clamp(r + modifier, 0, 255); s32 r, g, b;
g = std::clamp(g + modifier, 0, 255); if (diffMode) {
b = std::clamp(b + modifier, 0, 255); r = getBits<59, 5, s32>(colourData);
g = getBits<51, 5, s32>(colourData);
b = getBits<43, 5, s32>(colourData);
return (alpha << 24) | (u32(b) << 16) | (u32(g) << 8) | u32(r); if (u >= 2) {
} r += signExtend3To32(getBits<56, 3, u32>(colourData));
g += signExtend3To32(getBits<48, 3, u32>(colourData));
b += signExtend3To32(getBits<40, 3, u32>(colourData));
}
} // namespace Metal // Expand from 5 to 8 bits per channel
r = Colour::convert5To8Bit(r);
g = Colour::convert5To8Bit(g);
b = Colour::convert5To8Bit(b);
} else {
if (u < 2) {
r = getBits<60, 4, s32>(colourData);
g = getBits<52, 4, s32>(colourData);
b = getBits<44, 4, s32>(colourData);
} else {
r = getBits<56, 4, s32>(colourData);
g = getBits<48, 4, s32>(colourData);
b = getBits<40, 4, s32>(colourData);
}
// Expand from 4 to 8 bits per channel
r = Colour::convert4To8Bit(r);
g = Colour::convert4To8Bit(g);
b = Colour::convert4To8Bit(b);
}
const u32 index = (u < 2) ? tableIndex1 : tableIndex2;
s32 modifier = modifiers[index][(subindices >> texelIndex) & 1];
if (((negationFlags >> texelIndex) & 1) != 0) {
modifier = -modifier;
}
r = std::clamp(r + modifier, 0, 255);
g = std::clamp(g + modifier, 0, 255);
b = std::clamp(b + modifier, 0, 255);
return (alpha << 24) | (u32(b) << 16) | (u32(g) << 8) | u32(r);
}
} // namespace Metal

View file

@ -1,32 +1,27 @@
#include "renderer_mtl/renderer_mtl.hpp" #include "renderer_mtl/renderer_mtl.hpp"
namespace Metal { namespace Metal {
static constexpr u32 LAYER_COUNT = 1024;
constexpr u32 LAYER_COUNT = 1024; LutTexture::LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name) {
MTL::TextureDescriptor* desc = MTL::TextureDescriptor::alloc()->init();
desc->setTextureType(type);
desc->setPixelFormat(pixelFormat);
desc->setWidth(width);
desc->setHeight(height);
desc->setArrayLength(LAYER_COUNT);
desc->setUsage(MTL::TextureUsageShaderRead /* | MTL::TextureUsageShaderWrite*/);
desc->setStorageMode(MTL::StorageModeShared);
LutTexture::LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name) { texture = device->newTexture(desc);
MTL::TextureDescriptor* desc = MTL::TextureDescriptor::alloc()->init(); texture->setLabel(toNSString(name));
desc->setTextureType(type); desc->release();
desc->setPixelFormat(pixelFormat); }
desc->setWidth(width);
desc->setHeight(height);
desc->setArrayLength(LAYER_COUNT);
desc->setUsage(MTL::TextureUsageShaderRead/* | MTL::TextureUsageShaderWrite*/);
desc->setStorageMode(MTL::StorageModeShared);
texture = device->newTexture(desc); LutTexture::~LutTexture() { texture->release(); }
texture->setLabel(toNSString(name));
desc->release();
}
LutTexture::~LutTexture() { u32 LutTexture::getNextIndex() {
texture->release(); currentIndex = (currentIndex + 1) % LAYER_COUNT;
} return currentIndex;
}
u32 LutTexture::getNextIndex() { } // namespace Metal
currentIndex = (currentIndex + 1) % LAYER_COUNT;
return currentIndex;
}
} // namespace Metal

View file

@ -1,312 +1,308 @@
#include "renderer_mtl/mtl_texture.hpp" #include "renderer_mtl/mtl_texture.hpp"
#include "renderer_mtl/objc_helper.hpp"
#include "colour.hpp"
#include <array> #include <array>
#include "colour.hpp"
#include "renderer_mtl/objc_helper.hpp"
using namespace Helpers; using namespace Helpers;
namespace Metal { namespace Metal {
void Texture::allocate() {
formatInfo = PICA::getPixelFormatInfo(format);
void Texture::allocate() { MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init();
formatInfo = PICA::getPixelFormatInfo(format); descriptor->setTextureType(MTL::TextureType2D);
descriptor->setPixelFormat(formatInfo.pixelFormat);
descriptor->setWidth(size.u());
descriptor->setHeight(size.v());
descriptor->setUsage(MTL::TextureUsageShaderRead);
descriptor->setStorageMode(MTL::StorageModeShared); // TODO: use private + staging buffers?
texture = device->newTexture(descriptor);
texture->setLabel(toNSString(
"Texture " + std::string(PICA::textureFormatToString(format)) + " " + std::to_string(size.u()) + "x" + std::to_string(size.v())
));
descriptor->release();
MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init(); setNewConfig(config);
descriptor->setTextureType(MTL::TextureType2D);
descriptor->setPixelFormat(formatInfo.pixelFormat);
descriptor->setWidth(size.u());
descriptor->setHeight(size.v());
descriptor->setUsage(MTL::TextureUsageShaderRead);
descriptor->setStorageMode(MTL::StorageModeShared); // TODO: use private + staging buffers?
texture = device->newTexture(descriptor);
texture->setLabel(toNSString("Texture " + std::string(PICA::textureFormatToString(format)) + " " + std::to_string(size.u()) + "x" + std::to_string(size.v())));
descriptor->release();
setNewConfig(config);
}
// Set the texture's configuration, which includes min/mag filters, wrapping S/T modes, and so on
void Texture::setNewConfig(u32 cfg) {
config = cfg;
if (sampler) {
sampler->release();
}
const auto magFilter = (cfg & 0x2) != 0 ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest;
const auto minFilter = (cfg & 0x4) != 0 ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest;
const auto wrapT = PICA::toMTLSamplerAddressMode(getBits<8, 3>(cfg));
const auto wrapS = PICA::toMTLSamplerAddressMode(getBits<12, 3>(cfg));
MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init();
samplerDescriptor->setMinFilter(minFilter);
samplerDescriptor->setMagFilter(magFilter);
samplerDescriptor->setSAddressMode(wrapS);
samplerDescriptor->setTAddressMode(wrapT);
samplerDescriptor->setLabel(toNSString("Sampler"));
sampler = device->newSamplerState(samplerDescriptor);
samplerDescriptor->release();
}
void Texture::free() {
valid = false;
if (texture) {
texture->release();
} }
if (sampler) {
sampler->release();
}
}
u64 Texture::sizeInBytes() { // Set the texture's configuration, which includes min/mag filters, wrapping S/T modes, and so on
u64 pixelCount = u64(size.x()) * u64(size.y()); void Texture::setNewConfig(u32 cfg) {
config = cfg;
switch (format) { if (sampler) {
case PICA::TextureFmt::RGBA8: // 4 bytes per pixel sampler->release();
return pixelCount * 4; }
case PICA::TextureFmt::RGB8: // 3 bytes per pixel const auto magFilter = (cfg & 0x2) != 0 ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest;
return pixelCount * 3; const auto minFilter = (cfg & 0x4) != 0 ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest;
const auto wrapT = PICA::toMTLSamplerAddressMode(getBits<8, 3>(cfg));
const auto wrapS = PICA::toMTLSamplerAddressMode(getBits<12, 3>(cfg));
case PICA::TextureFmt::RGBA5551: // 2 bytes per pixel MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init();
case PICA::TextureFmt::RGB565: samplerDescriptor->setMinFilter(minFilter);
case PICA::TextureFmt::RGBA4: samplerDescriptor->setMagFilter(magFilter);
case PICA::TextureFmt::RG8: samplerDescriptor->setSAddressMode(wrapS);
case PICA::TextureFmt::IA8: samplerDescriptor->setTAddressMode(wrapT);
return pixelCount * 2;
case PICA::TextureFmt::A8: // 1 byte per pixel samplerDescriptor->setLabel(toNSString("Sampler"));
case PICA::TextureFmt::I8: sampler = device->newSamplerState(samplerDescriptor);
case PICA::TextureFmt::IA4: samplerDescriptor->release();
return pixelCount; }
case PICA::TextureFmt::I4: // 4 bits per pixel void Texture::free() {
case PICA::TextureFmt::A4: valid = false;
return pixelCount / 2;
case PICA::TextureFmt::ETC1: // Compressed formats if (texture) {
case PICA::TextureFmt::ETC1A4: { texture->release();
// Number of 4x4 tiles }
const u64 tileCount = pixelCount / 16; if (sampler) {
// Tiles are 8 bytes each on ETC1 and 16 bytes each on ETC1A4 sampler->release();
const u64 tileSize = format == PICA::TextureFmt::ETC1 ? 8 : 16; }
return tileCount * tileSize; }
}
default: u64 Texture::sizeInBytes() {
Helpers::panic("[PICA] Attempted to get size of invalid texture type"); u64 pixelCount = u64(size.x()) * u64(size.y());
}
}
// u and v are the UVs of the relevant texel switch (format) {
// Texture data is stored interleaved in Morton order, ie in a Z - order curve as shown here case PICA::TextureFmt::RGBA8: // 4 bytes per pixel
// https://en.wikipedia.org/wiki/Z-order_curve return pixelCount * 4;
// Textures are split into 8x8 tiles.This function returns the in - tile offset depending on the u & v of the texel
// The in - tile offset is the sum of 2 offsets, one depending on the value of u % 8 and the other on the value of y % 8
// As documented in this picture https ://en.wikipedia.org/wiki/File:Moser%E2%80%93de_Bruijn_addition.svg
u32 Texture::mortonInterleave(u32 u, u32 v) {
static constexpr u32 xOffsets[] = { 0, 1, 4, 5, 16, 17, 20, 21 };
static constexpr u32 yOffsets[] = { 0, 2, 8, 10, 32, 34, 40, 42 };
return xOffsets[u & 7] + yOffsets[v & 7]; case PICA::TextureFmt::RGB8: // 3 bytes per pixel
} return pixelCount * 3;
// Get the byte offset of texel (u, v) in the texture case PICA::TextureFmt::RGBA5551: // 2 bytes per pixel
u32 Texture::getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel) { case PICA::TextureFmt::RGB565:
u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to case PICA::TextureFmt::RGBA4:
offset += mortonInterleave(u, v); // Add the in-tile offset of the texel case PICA::TextureFmt::RG8:
case PICA::TextureFmt::IA8: return pixelCount * 2;
return offset * bytesPerPixel; case PICA::TextureFmt::A8: // 1 byte per pixel
} case PICA::TextureFmt::I8:
case PICA::TextureFmt::IA4: return pixelCount;
// Same as the above code except we need to divide by 2 because 4 bits is smaller than a byte case PICA::TextureFmt::I4: // 4 bits per pixel
u32 Texture::getSwizzledOffset_4bpp(u32 u, u32 v, u32 width) { case PICA::TextureFmt::A4: return pixelCount / 2;
u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to
offset += mortonInterleave(u, v); // Add the in-tile offset of the texel
return offset / 2; case PICA::TextureFmt::ETC1: // Compressed formats
} case PICA::TextureFmt::ETC1A4: {
// Number of 4x4 tiles
const u64 tileCount = pixelCount / 16;
// Tiles are 8 bytes each on ETC1 and 16 bytes each on ETC1A4
const u64 tileSize = format == PICA::TextureFmt::ETC1 ? 8 : 16;
return tileCount * tileSize;
}
u8 Texture::decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data) { default: Helpers::panic("[PICA] Attempted to get size of invalid texture type");
switch (fmt) { }
case PICA::TextureFmt::A4: { }
const u32 offset = getSwizzledOffset_4bpp(u, v, size.u());
// For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates // u and v are the UVs of the relevant texel
u8 alpha = data[offset] >> ((u % 2) ? 4 : 0); // Texture data is stored interleaved in Morton order, ie in a Z - order curve as shown here
alpha = Colour::convert4To8Bit(getBits<0, 4>(alpha)); // https://en.wikipedia.org/wiki/Z-order_curve
// Textures are split into 8x8 tiles.This function returns the in - tile offset depending on the u & v of the texel
// The in - tile offset is the sum of 2 offsets, one depending on the value of u % 8 and the other on the value of y % 8
// As documented in this picture https ://en.wikipedia.org/wiki/File:Moser%E2%80%93de_Bruijn_addition.svg
u32 Texture::mortonInterleave(u32 u, u32 v) {
static constexpr u32 xOffsets[] = {0, 1, 4, 5, 16, 17, 20, 21};
static constexpr u32 yOffsets[] = {0, 2, 8, 10, 32, 34, 40, 42};
// A8 return xOffsets[u & 7] + yOffsets[v & 7];
return alpha; }
}
case PICA::TextureFmt::A8: { // Get the byte offset of texel (u, v) in the texture
u32 offset = getSwizzledOffset(u, v, size.u(), 1); u32 Texture::getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel) {
const u8 alpha = data[offset]; u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to
offset += mortonInterleave(u, v); // Add the in-tile offset of the texel
// A8 return offset * bytesPerPixel;
return alpha; }
}
default: // Same as the above code except we need to divide by 2 because 4 bits is smaller than a byte
Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast<int>(fmt)); u32 Texture::getSwizzledOffset_4bpp(u32 u, u32 v, u32 width) {
} u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to
} offset += mortonInterleave(u, v); // Add the in-tile offset of the texel
u16 Texture::decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data) { return offset / 2;
switch (fmt) { }
case PICA::TextureFmt::RG8: {
u32 offset = getSwizzledOffset(u, v, size.u(), 2);
constexpr u8 b = 0;
const u8 g = data[offset];
const u8 r = data[offset + 1];
// RG8 u8 Texture::decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data) {
return (g << 8) | r; switch (fmt) {
} case PICA::TextureFmt::A4: {
const u32 offset = getSwizzledOffset_4bpp(u, v, size.u());
case PICA::TextureFmt::RGBA4: { // For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates
u32 offset = getSwizzledOffset(u, v, size.u(), 2); u8 alpha = data[offset] >> ((u % 2) ? 4 : 0);
u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); alpha = Colour::convert4To8Bit(getBits<0, 4>(alpha));
u8 alpha = getBits<0, 4, u8>(texel); // A8
u8 b = getBits<4, 4, u8>(texel); return alpha;
u8 g = getBits<8, 4, u8>(texel); }
u8 r = getBits<12, 4, u8>(texel);
// ABGR4 case PICA::TextureFmt::A8: {
return (r << 12) | (g << 8) | (b << 4) | alpha; u32 offset = getSwizzledOffset(u, v, size.u(), 1);
} const u8 alpha = data[offset];
case PICA::TextureFmt::RGBA5551: { // A8
const u32 offset = getSwizzledOffset(u, v, size.u(), 2); return alpha;
const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); }
u8 alpha = getBit<0>(texel) ? 0xff : 0; default: Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast<int>(fmt));
u8 b = getBits<1, 5, u8>(texel); }
u8 g = getBits<6, 5, u8>(texel); }
u8 r = getBits<11, 5, u8>(texel);
// BGR5A1 u16 Texture::decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data) {
return (alpha << 15) | (r << 10) | (g << 5) | b; switch (fmt) {
} case PICA::TextureFmt::RG8: {
u32 offset = getSwizzledOffset(u, v, size.u(), 2);
constexpr u8 b = 0;
const u8 g = data[offset];
const u8 r = data[offset + 1];
case PICA::TextureFmt::RGB565: { // RG8
const u32 offset = getSwizzledOffset(u, v, size.u(), 2); return (g << 8) | r;
const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); }
const u8 b = getBits<0, 5, u8>(texel); case PICA::TextureFmt::RGBA4: {
const u8 g = getBits<5, 6, u8>(texel); u32 offset = getSwizzledOffset(u, v, size.u(), 2);
const u8 r = getBits<11, 5, u8>(texel); u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8);
// B5G6R5 u8 alpha = getBits<0, 4, u8>(texel);
return (r << 11) | (g << 5) | b; u8 b = getBits<4, 4, u8>(texel);
} u8 g = getBits<8, 4, u8>(texel);
u8 r = getBits<12, 4, u8>(texel);
case PICA::TextureFmt::IA4: { // ABGR4
const u32 offset = getSwizzledOffset(u, v, size.u(), 1); return (r << 12) | (g << 8) | (b << 4) | alpha;
const u8 texel = data[offset]; }
const u8 alpha = texel & 0xf;
const u8 intensity = texel >> 4;
// ABGR4 case PICA::TextureFmt::RGBA5551: {
return (intensity << 12) | (intensity << 8) | (intensity << 4) | alpha; const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
} const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8);
case PICA::TextureFmt::I4: { u8 alpha = getBit<0>(texel) ? 0xff : 0;
u32 offset = getSwizzledOffset_4bpp(u, v, size.u()); u8 b = getBits<1, 5, u8>(texel);
u8 g = getBits<6, 5, u8>(texel);
u8 r = getBits<11, 5, u8>(texel);
// For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates // BGR5A1
u8 intensity = data[offset] >> ((u % 2) ? 4 : 0); return (alpha << 15) | (r << 10) | (g << 5) | b;
intensity = getBits<0, 4>(intensity); }
// ABGR4 case PICA::TextureFmt::RGB565: {
return (intensity << 12) | (intensity << 8) | (intensity << 4) | 0xff; const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
} const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8);
default: const u8 b = getBits<0, 5, u8>(texel);
Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast<int>(fmt)); const u8 g = getBits<5, 6, u8>(texel);
} const u8 r = getBits<11, 5, u8>(texel);
}
u32 Texture::decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data) { // B5G6R5
switch (fmt) { return (r << 11) | (g << 5) | b;
case PICA::TextureFmt::RGB8: { }
const u32 offset = getSwizzledOffset(u, v, size.u(), 3);
const u8 b = data[offset];
const u8 g = data[offset + 1];
const u8 r = data[offset + 2];
// RGBA8 case PICA::TextureFmt::IA4: {
return (0xff << 24) | (b << 16) | (g << 8) | r; const u32 offset = getSwizzledOffset(u, v, size.u(), 1);
} const u8 texel = data[offset];
const u8 alpha = texel & 0xf;
const u8 intensity = texel >> 4;
case PICA::TextureFmt::RGBA8: { // ABGR4
const u32 offset = getSwizzledOffset(u, v, size.u(), 4); return (intensity << 12) | (intensity << 8) | (intensity << 4) | alpha;
const u8 alpha = data[offset]; }
const u8 b = data[offset + 1];
const u8 g = data[offset + 2];
const u8 r = data[offset + 3];
// RGBA8 case PICA::TextureFmt::I4: {
return (alpha << 24) | (b << 16) | (g << 8) | r; u32 offset = getSwizzledOffset_4bpp(u, v, size.u());
}
case PICA::TextureFmt::I8: { // For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates
u32 offset = getSwizzledOffset(u, v, size.u(), 1); u8 intensity = data[offset] >> ((u % 2) ? 4 : 0);
const u8 intensity = data[offset]; intensity = getBits<0, 4>(intensity);
// RGBA8 // ABGR4
return (0xff << 24) | (intensity << 16) | (intensity << 8) | intensity; return (intensity << 12) | (intensity << 8) | (intensity << 4) | 0xff;
} }
case PICA::TextureFmt::IA8: { default: Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast<int>(fmt));
u32 offset = getSwizzledOffset(u, v, size.u(), 2); }
}
// Same as I8 except each pixel gets its own alpha value too u32 Texture::decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data) {
const u8 alpha = data[offset]; switch (fmt) {
const u8 intensity = data[offset + 1]; case PICA::TextureFmt::RGB8: {
const u32 offset = getSwizzledOffset(u, v, size.u(), 3);
const u8 b = data[offset];
const u8 g = data[offset + 1];
const u8 r = data[offset + 2];
// RGBA8 // RGBA8
return (alpha << 24) | (intensity << 16) | (intensity << 8) | intensity; return (0xff << 24) | (b << 16) | (g << 8) | r;
} }
case PICA::TextureFmt::ETC1: return getTexelETC(false, u, v, size.u(), data); case PICA::TextureFmt::RGBA8: {
case PICA::TextureFmt::ETC1A4: return getTexelETC(true, u, v, size.u(), data); const u32 offset = getSwizzledOffset(u, v, size.u(), 4);
const u8 alpha = data[offset];
const u8 b = data[offset + 1];
const u8 g = data[offset + 2];
const u8 r = data[offset + 3];
default: // RGBA8
Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast<int>(fmt)); return (alpha << 24) | (b << 16) | (g << 8) | r;
} }
}
void Texture::decodeTexture(std::span<const u8> data) { case PICA::TextureFmt::I8: {
std::vector<u8> decoded; u32 offset = getSwizzledOffset(u, v, size.u(), 1);
decoded.reserve(u64(size.u()) * u64(size.v()) * formatInfo.bytesPerTexel); const u8 intensity = data[offset];
// Decode texels line by line // RGBA8
for (u32 v = 0; v < size.v(); v++) { return (0xff << 24) | (intensity << 16) | (intensity << 8) | intensity;
for (u32 u = 0; u < size.u(); u++) { }
if (formatInfo.bytesPerTexel == 1) {
u8 texel = decodeTexelU8(u, v, format, data);
decoded.push_back(texel);
} else if (formatInfo.bytesPerTexel == 2) {
u16 texel = decodeTexelU16(u, v, format, data);
decoded.push_back((texel & 0x00ff) >> 0);
decoded.push_back((texel & 0xff00) >> 8);
} else if (formatInfo.bytesPerTexel == 4) {
u32 texel = decodeTexelU32(u, v, format, data);
decoded.push_back((texel & 0x000000ff) >> 0);
decoded.push_back((texel & 0x0000ff00) >> 8);
decoded.push_back((texel & 0x00ff0000) >> 16);
decoded.push_back((texel & 0xff000000) >> 24);
} else {
Helpers::panic("[Texture::decodeTexture] Unimplemented bytesPerTexel (%u)", formatInfo.bytesPerTexel);
}
}
}
texture->replaceRegion(MTL::Region(0, 0, size.u(), size.v()), 0, 0, decoded.data(), formatInfo.bytesPerTexel * size.u(), 0); case PICA::TextureFmt::IA8: {
} u32 offset = getSwizzledOffset(u, v, size.u(), 2);
} // namespace Metal // Same as I8 except each pixel gets its own alpha value too
const u8 alpha = data[offset];
const u8 intensity = data[offset + 1];
// RGBA8
return (alpha << 24) | (intensity << 16) | (intensity << 8) | intensity;
}
case PICA::TextureFmt::ETC1: return getTexelETC(false, u, v, size.u(), data);
case PICA::TextureFmt::ETC1A4: return getTexelETC(true, u, v, size.u(), data);
default: Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast<int>(fmt));
}
}
void Texture::decodeTexture(std::span<const u8> data) {
std::vector<u8> decoded;
decoded.reserve(u64(size.u()) * u64(size.v()) * formatInfo.bytesPerTexel);
// Decode texels line by line
for (u32 v = 0; v < size.v(); v++) {
for (u32 u = 0; u < size.u(); u++) {
if (formatInfo.bytesPerTexel == 1) {
u8 texel = decodeTexelU8(u, v, format, data);
decoded.push_back(texel);
} else if (formatInfo.bytesPerTexel == 2) {
u16 texel = decodeTexelU16(u, v, format, data);
decoded.push_back((texel & 0x00ff) >> 0);
decoded.push_back((texel & 0xff00) >> 8);
} else if (formatInfo.bytesPerTexel == 4) {
u32 texel = decodeTexelU32(u, v, format, data);
decoded.push_back((texel & 0x000000ff) >> 0);
decoded.push_back((texel & 0x0000ff00) >> 8);
decoded.push_back((texel & 0x00ff0000) >> 16);
decoded.push_back((texel & 0xff000000) >> 24);
} else {
Helpers::panic("[Texture::decodeTexture] Unimplemented bytesPerTexel (%u)", formatInfo.bytesPerTexel);
}
}
}
texture->replaceRegion(MTL::Region(0, 0, size.u(), size.v()), 0, 0, decoded.data(), formatInfo.bytesPerTexel * size.u(), 0);
}
} // namespace Metal

View file

@ -2,9 +2,10 @@
#include <cmrc/cmrc.hpp> #include <cmrc/cmrc.hpp>
#include <cstddef> #include <cstddef>
#include "renderer_mtl/mtl_lut_texture.hpp" #include "renderer_mtl/mtl_lut_texture.hpp"
// HACK // Hack: Apple annoyingly defines a global "NO" macro which ends up conflicting with our own code...
#undef NO #undef NO
#include "PICA/gpu.hpp" #include "PICA/gpu.hpp"
@ -14,8 +15,10 @@ using namespace PICA;
CMRC_DECLARE(RendererMTL); CMRC_DECLARE(RendererMTL);
const u16 LIGHTING_LUT_TEXTURE_WIDTH = 256; static constexpr u16 LIGHTING_LUT_TEXTURE_WIDTH = 256;
const u32 FOG_LUT_TEXTURE_WIDTH = 128; static constexpr u32 FOG_LUT_TEXTURE_WIDTH = 128;
// Bind the vertex buffer to binding 30 so that it doesn't occupy the lower indices
static constexpr uint VERTEX_BUFFER_BINDING_INDEX = 30;
// HACK: redefinition... // HACK: redefinition...
PICA::ColorFmt ToColorFormat(u32 format) { PICA::ColorFmt ToColorFormat(u32 format) {
@ -40,6 +43,7 @@ MTL::Library* loadLibrary(MTL::Device* device, const cmrc::file& shaderSource) {
RendererMTL::RendererMTL(GPU& gpu, const std::array<u32, regNum>& internalRegs, const std::array<u32, extRegNum>& externalRegs) RendererMTL::RendererMTL(GPU& gpu, const std::array<u32, regNum>& internalRegs, const std::array<u32, extRegNum>& externalRegs)
: Renderer(gpu, internalRegs, externalRegs) {} : Renderer(gpu, internalRegs, externalRegs) {}
RendererMTL::~RendererMTL() {} RendererMTL::~RendererMTL() {}
void RendererMTL::reset() { void RendererMTL::reset() {
@ -78,7 +82,7 @@ void RendererMTL::display() {
clearColor(nullptr, bottomScreen->get().texture); clearColor(nullptr, bottomScreen->get().texture);
} }
// -------- Draw -------- // Draw
commandBuffer->pushDebugGroup(toNSString("Display")); commandBuffer->pushDebugGroup(toNSString("Display"));
MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init();
@ -130,8 +134,6 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) {
metalLayer->setDevice(device); metalLayer->setDevice(device);
commandQueue = device->newCommandQueue(); commandQueue = device->newCommandQueue();
// -------- Objects --------
// Textures // Textures
MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init(); MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init();
textureDescriptor->setTextureType(MTL::TextureType2D); textureDescriptor->setTextureType(MTL::TextureType2D);
@ -157,7 +159,9 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) {
samplerDescriptor->release(); samplerDescriptor->release();
lutLightingTexture = new Metal::LutTexture(device, MTL::TextureType2DArray, MTL::PixelFormatR16Unorm, LIGHTING_LUT_TEXTURE_WIDTH, Lights::LUT_Count, "Lighting LUT texture"); lutLightingTexture = new Metal::LutTexture(
device, MTL::TextureType2DArray, MTL::PixelFormatR16Unorm, LIGHTING_LUT_TEXTURE_WIDTH, Lights::LUT_Count, "Lighting LUT texture"
);
lutFogTexture = new Metal::LutTexture(device, MTL::TextureType1DArray, MTL::PixelFormatRG32Float, FOG_LUT_TEXTURE_WIDTH, 1, "Fog LUT texture"); lutFogTexture = new Metal::LutTexture(device, MTL::TextureType1DArray, MTL::PixelFormatRG32Float, FOG_LUT_TEXTURE_WIDTH, 1, "Fog LUT texture");
// -------- Pipelines -------- // -------- Pipelines --------
@ -166,7 +170,7 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) {
auto mtlResources = cmrc::RendererMTL::get_filesystem(); auto mtlResources = cmrc::RendererMTL::get_filesystem();
library = loadLibrary(device, mtlResources.open("metal_shaders.metallib")); library = loadLibrary(device, mtlResources.open("metal_shaders.metallib"));
MTL::Library* blitLibrary = loadLibrary(device, mtlResources.open("metal_blit.metallib")); MTL::Library* blitLibrary = loadLibrary(device, mtlResources.open("metal_blit.metallib"));
//MTL::Library* copyToLutTextureLibrary = loadLibrary(device, mtlResources.open("metal_copy_to_lut_texture.metallib")); // MTL::Library* copyToLutTextureLibrary = loadLibrary(device, mtlResources.open("metal_copy_to_lut_texture.metallib"));
// Display // Display
MTL::Function* vertexDisplayFunction = library->newFunction(NS::String::string("vertexDisplay", NS::ASCIIStringEncoding)); MTL::Function* vertexDisplayFunction = library->newFunction(NS::String::string("vertexDisplay", NS::ASCIIStringEncoding));
@ -295,9 +299,8 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) {
defaultDepthStencilState = device->newDepthStencilState(depthStencilDescriptor); defaultDepthStencilState = device->newDepthStencilState(depthStencilDescriptor);
depthStencilDescriptor->release(); depthStencilDescriptor->release();
// Release
blitLibrary->release(); blitLibrary->release();
//copyToLutTextureLibrary->release(); // copyToLutTextureLibrary->release();
} }
void RendererMTL::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) { void RendererMTL::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) {
@ -592,8 +595,7 @@ void RendererMTL::deinitGraphicsContext() {
delete lutLightingTexture; delete lutLightingTexture;
delete lutFogTexture; delete lutFogTexture;
// Release // copyToLutTexturePipeline->release();
//copyToLutTexturePipeline->release();
displayPipeline->release(); displayPipeline->release();
defaultDepthStencilState->release(); defaultDepthStencilState->release();
nullTexture->release(); nullTexture->release();
@ -700,9 +702,9 @@ void RendererMTL::bindTexturesToSlots() {
for (int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
if ((regs[PICA::InternalRegs::TexUnitCfg] & (1 << i)) == 0) { if ((regs[PICA::InternalRegs::TexUnitCfg] & (1 << i)) == 0) {
commandEncoder.setFragmentTexture(nullTexture, i); commandEncoder.setFragmentTexture(nullTexture, i);
commandEncoder.setFragmentSamplerState(nearestSampler, i); commandEncoder.setFragmentSamplerState(nearestSampler, i);
continue; continue;
} }
const size_t ioBase = ioBases[i]; const size_t ioBase = ioBases[i];
@ -736,7 +738,9 @@ void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) {
} }
u32 index = lutLightingTexture->getNextIndex(); u32 index = lutLightingTexture->getNextIndex();
lutLightingTexture->getTexture()->replaceRegion(MTL::Region(0, 0, LIGHTING_LUT_TEXTURE_WIDTH, Lights::LUT_Count), 0, index, lightingLut.data(), LIGHTING_LUT_TEXTURE_WIDTH * 2, 0); lutLightingTexture->getTexture()->replaceRegion(
MTL::Region(0, 0, LIGHTING_LUT_TEXTURE_WIDTH, Lights::LUT_Count), 0, index, lightingLut.data(), LIGHTING_LUT_TEXTURE_WIDTH * 2, 0
);
/* /*
endRenderPass(); endRenderPass();
@ -768,7 +772,7 @@ void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) {
void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) { void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) {
gpu.fogLUTDirty = false; gpu.fogLUTDirty = false;
std::array<float, FOG_LUT_TEXTURE_WIDTH * 2> fogLut = {0.0f}; std::array<float, FOG_LUT_TEXTURE_WIDTH* 2> fogLut = {0.0f};
for (int i = 0; i < fogLut.size(); i += 2) { for (int i = 0; i < fogLut.size(); i += 2) {
const uint32_t value = gpu.fogLUT[i >> 1]; const uint32_t value = gpu.fogLUT[i >> 1];
@ -807,7 +811,8 @@ void RendererMTL::textureCopyImpl(
) { ) {
nextRenderPassName = "Texture copy"; nextRenderPassName = "Texture copy";
MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init();
// TODO: clearColor sets the load action to load if it didn't find any clear, but that is unnecessary if we are doing a copy to the whole texture // TODO: clearColor sets the load action to load if it didn't find any clear, but that is unnecessary if we are doing a copy to the whole
// texture
bool doesClear = clearColor(renderPassDescriptor, destFramebuffer.texture); bool doesClear = clearColor(renderPassDescriptor, destFramebuffer.texture);
beginRenderPassIfNeeded(renderPassDescriptor, doesClear, destFramebuffer.texture); beginRenderPassIfNeeded(renderPassDescriptor, doesClear, destFramebuffer.texture);
@ -819,11 +824,13 @@ void RendererMTL::textureCopyImpl(
// Viewport // Viewport
renderCommandEncoder->setViewport(MTL::Viewport{ renderCommandEncoder->setViewport(MTL::Viewport{
double(destRect.left), double(destRect.bottom), double(destRect.right - destRect.left), double(destRect.top - destRect.bottom), 0.0, 1.0 double(destRect.left), double(destRect.bottom), double(destRect.right - destRect.left), double(destRect.top - destRect.bottom), 0.0, 1.0});
});
float srcRectNDC[4] = { float srcRectNDC[4] = {
srcRect.left / (float)srcFramebuffer.size.u(), srcRect.bottom / (float)srcFramebuffer.size.v(), srcRect.left / (float)srcFramebuffer.size.u(),
(srcRect.right - srcRect.left) / (float)srcFramebuffer.size.u(), (srcRect.top - srcRect.bottom) / (float)srcFramebuffer.size.v() srcRect.bottom / (float)srcFramebuffer.size.v(),
(srcRect.right - srcRect.left) / (float)srcFramebuffer.size.u(),
(srcRect.top - srcRect.bottom) / (float)srcFramebuffer.size.v(),
}; };
// Bind resources // Bind resources
@ -834,25 +841,28 @@ void RendererMTL::textureCopyImpl(
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4));
} }
void RendererMTL::beginRenderPassIfNeeded(MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture) { void RendererMTL::beginRenderPassIfNeeded(
MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture
) {
createCommandBufferIfNeeded(); createCommandBufferIfNeeded();
if (doesClears || !renderCommandEncoder || colorTexture != lastColorTexture || (depthTexture != lastDepthTexture && !(lastDepthTexture && !depthTexture))) { if (doesClears || !renderCommandEncoder || colorTexture != lastColorTexture ||
endRenderPass(); (depthTexture != lastDepthTexture && !(lastDepthTexture && !depthTexture))) {
endRenderPass();
renderCommandEncoder = commandBuffer->renderCommandEncoder(renderPassDescriptor); renderCommandEncoder = commandBuffer->renderCommandEncoder(renderPassDescriptor);
renderCommandEncoder->setLabel(toNSString(nextRenderPassName)); renderCommandEncoder->setLabel(toNSString(nextRenderPassName));
commandEncoder.newRenderCommandEncoder(renderCommandEncoder); commandEncoder.newRenderCommandEncoder(renderCommandEncoder);
// Bind persistent resources // Bind persistent resources
// LUT texture // LUT texture
renderCommandEncoder->setFragmentTexture(lutLightingTexture->getTexture(), 3); renderCommandEncoder->setFragmentTexture(lutLightingTexture->getTexture(), 3);
renderCommandEncoder->setFragmentTexture(lutFogTexture->getTexture(), 4); renderCommandEncoder->setFragmentTexture(lutFogTexture->getTexture(), 4);
renderCommandEncoder->setFragmentSamplerState(linearSampler, 3); renderCommandEncoder->setFragmentSamplerState(linearSampler, 3);
lastColorTexture = colorTexture; lastColorTexture = colorTexture;
lastDepthTexture = depthTexture; lastDepthTexture = depthTexture;
} }
renderPassDescriptor->release(); renderPassDescriptor->release();