From 1948bea209cb545e34f59af175ca6916a3999cc6 Mon Sep 17 00:00:00 2001
From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com>
Date: Thu, 6 Mar 2025 17:04:52 +0200
Subject: [PATCH 01/19] More iOS work

---
 CMakeLists.txt                         | 28 ++++++++++++++++++---
 include/ios_driver.h                   |  3 +++
 src/core/renderer_mtl/renderer_mtl.cpp | 10 ++++++++
 src/ios_driver.mm                      | 34 ++++++++++++++++++++++++++
 4 files changed, 72 insertions(+), 3 deletions(-)
 create mode 100644 include/ios_driver.h
 create mode 100644 src/ios_driver.mm

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3ddbd3e4..b6d8fb4c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -65,6 +65,7 @@ option(BUILD_LIBRETRO_CORE "Build a Libretro core" OFF)
 option(ENABLE_RENDERDOC_API "Build with support for Renderdoc's capture API for graphics debugging" ON)
 option(DISABLE_SSE4 "Build with SSE4 instructions disabled, may reduce performance" OFF)
 option(USE_LIBRETRO_AUDIO "Enable to use the LR audio device with the LR core. Otherwise our own device is used" OFF)
+option(IOS_SIMULATOR_BUILD "Compiling for IOS simulator (Set to off if compiling for a real iPhone)" ON)
 
 # Discord RPC & LuaJIT are currently not supported on iOS
 if(IOS)
@@ -419,6 +420,10 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp
 if(IOS)
     set(SOURCE_FILES ${SOURCE_FILES} src/miniaudio/miniaudio.m)
     target_compile_definitions(AlberCore PUBLIC "PANDA3DS_IOS=1")
+    
+    if (IOS_SIMULATOR_BUILD)
+        target_compile_definitions(AlberCore PUBLIC "PANDA3DS_IOS_SIMULATOR=1")
+    endif()
 endif()
 
 cmrc_add_resource_library(
@@ -620,15 +625,26 @@ if(ENABLE_METAL AND APPLE)
         set(SHADER_SOURCE "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.metal")
         set(SHADER_IR "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.ir")
         set(SHADER_METALLIB "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.metallib")
+
+        # MacOS, iOS and the iOS simulator all use different compilation options for shaders
+        set(MetalSDK "macosx")
+        if(IOS)
+            if (IOS_SIMULATOR_BUILD)
+                set(MetalSDK "iphonesimulator")
+            else()
+                set(MetalSDK "iphoneos")
+            endif()
+        endif()
+
         # TODO: only include sources in debug builds
         add_custom_command(
             OUTPUT ${SHADER_IR}
-            COMMAND xcrun -sdk macosx metal -gline-tables-only -frecord-sources -o ${SHADER_IR} -c ${SHADER_SOURCE}
+            COMMAND xcrun -sdk ${MetalSDK} metal -gline-tables-only -frecord-sources -o ${SHADER_IR} -c ${SHADER_SOURCE}
             DEPENDS ${SHADER_SOURCE}
             VERBATIM)
         add_custom_command(
             OUTPUT ${SHADER_METALLIB}
-            COMMAND xcrun -sdk macosx metallib -o ${SHADER_METALLIB} ${SHADER_IR}
+            COMMAND xcrun -sdk ${MetalSDK} metallib -o ${SHADER_METALLIB} ${SHADER_IR}
             DEPENDS ${SHADER_IR}
             VERBATIM)
         set(RENDERER_MTL_HOST_SHADERS_SOURCES ${RENDERER_MTL_HOST_SHADERS_SOURCES} ${SHADER_METALLIB})
@@ -792,7 +808,13 @@ if(NOT BUILD_HYDRA_CORE AND NOT BUILD_LIBRETRO_CORE)
 elseif(BUILD_HYDRA_CORE)
     target_compile_definitions(AlberCore PRIVATE PANDA3DS_HYDRA_CORE=1)
     include_directories(third_party/hydra_core/include)
-    add_library(Alber SHARED src/hydra_core.cpp)
+    
+    set(SHARED_SOURCE_FILES src/hydra_core.cpp)
+    if(IOS)
+        set(SHARED_SOURCE_FILES ${SHARED_SOURCE_FILES} src/ios_driver.mm)
+    endif()
+
+    add_library(Alber SHARED ${SHARED_SOURCE_FILES})
     target_link_libraries(Alber PUBLIC AlberCore)
 elseif(BUILD_LIBRETRO_CORE)
     include_directories(third_party/libretro/include)
diff --git a/include/ios_driver.h b/include/ios_driver.h
new file mode 100644
index 00000000..c5c690d4
--- /dev/null
+++ b/include/ios_driver.h
@@ -0,0 +1,3 @@
+#pragma once
+
+void iosCreateEmulator();
\ No newline at end of file
diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp
index 14bca4d2..969b2aa2 100644
--- a/src/core/renderer_mtl/renderer_mtl.cpp
+++ b/src/core/renderer_mtl/renderer_mtl.cpp
@@ -57,6 +57,10 @@ void RendererMTL::reset() {
 }
 
 void RendererMTL::display() {
+#ifdef PANDA3DS_IOS
+	return;
+#endif
+
 	CA::MetalDrawable* drawable = metalLayer->nextDrawable();
 	if (!drawable) {
 		return;
@@ -126,11 +130,17 @@ void RendererMTL::display() {
 
 void RendererMTL::initGraphicsContext(SDL_Window* window) {
 	// TODO: what should be the type of the view?
+
+#ifdef PANDA3DS_IOS
+	// On iOS, the SwiftUI side handles device<->MTKView interaction
+	device = MTL::CreateSystemDefaultDevice();
+#else
 	void* view = SDL_Metal_CreateView(window);
 	metalLayer = (CA::MetalLayer*)SDL_Metal_GetLayer(view);
 	device = MTL::CreateSystemDefaultDevice();
 	metalLayer->setDevice(device);
 	commandQueue = device->newCommandQueue();
+#endif
 
 	// Textures
 	MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init();
diff --git a/src/ios_driver.mm b/src/ios_driver.mm
new file mode 100644
index 00000000..19766d38
--- /dev/null
+++ b/src/ios_driver.mm
@@ -0,0 +1,34 @@
+#import <Foundation/Foundation.h>
+
+extern "C" {
+#include "ios_driver.h"
+}
+
+#undef ABS
+#undef NO
+
+#include <memory>
+#include "emulator.hpp"
+
+#define IOS_EXPORT extern "C" __attribute__((visibility("default")))
+
+std::unique_ptr<Emulator> emulator = nullptr;
+HIDService* hidService = nullptr;
+
+extern "C" __attribute__((visibility("default"))) void iosCreateEmulator() {
+	printf("Creating emulator\n");
+
+	emulator = std::make_unique<Emulator>();
+	hidService = &emulator->getServiceManager().getHID();
+	emulator->initGraphicsContext(nullptr);
+
+	// auto path = emulator->getAppDataRoot() / "Kirb Demo.3ds";
+	auto path = emulator->getAppDataRoot() / "SimplerTri.elf";
+	emulator->loadROM(path);
+
+	while (1) {
+		emulator->runFrame();
+	}
+
+	printf("Created emulator\n");
+}
\ No newline at end of file

From 9bc50a4b9c6a6f7e3c48a19a57163645d013050c Mon Sep 17 00:00:00 2001
From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com>
Date: Thu, 6 Mar 2025 23:42:12 +0200
Subject: [PATCH 02/19] More iOS progress

---
 include/ios_driver.h                   |  3 +-
 include/renderer.hpp                   |  4 ++
 include/renderer_mtl/renderer_mtl.hpp  | 13 ++++--
 src/core/renderer_mtl/renderer_mtl.cpp | 55 ++++++++++++++++++++------
 src/host_shaders/metal_shaders.metal   |  4 +-
 src/ios_driver.mm                      | 14 ++++---
 6 files changed, 67 insertions(+), 26 deletions(-)

diff --git a/include/ios_driver.h b/include/ios_driver.h
index c5c690d4..2d745ed5 100644
--- a/include/ios_driver.h
+++ b/include/ios_driver.h
@@ -1,3 +1,4 @@
 #pragma once
 
-void iosCreateEmulator();
\ No newline at end of file
+void iosCreateEmulator();
+void iosRunFrame(void* drawable, void* drawableTexture);
\ No newline at end of file
diff --git a/include/renderer.hpp b/include/renderer.hpp
index ca28455f..583057aa 100644
--- a/include/renderer.hpp
+++ b/include/renderer.hpp
@@ -86,6 +86,10 @@ class Renderer {
 	// Called to notify the core to use OpenGL ES and not desktop GL
 	virtual void setupGLES() {}
 
+	// Only relevant for Metal renderer on iOS
+	// Passes a SwiftUI MTKView Drawable & its texture to the renderer
+	virtual void setMTKDrawable(void* drawable, void* drawableTexture) {};
+
 	// This function is called on every draw call before parsing vertex data.
 	// It is responsible for things like looking up which vertex/fragment shaders to use, recompiling them if they don't exist, choosing between
 	// ubershaders and shadergen, and so on.
diff --git a/include/renderer_mtl/renderer_mtl.hpp b/include/renderer_mtl/renderer_mtl.hpp
index bd5c3bf1..3f842466 100644
--- a/include/renderer_mtl/renderer_mtl.hpp
+++ b/include/renderer_mtl/renderer_mtl.hpp
@@ -42,11 +42,16 @@ class RendererMTL final : public Renderer {
 	virtual void initGraphicsContext([[maybe_unused]] GL::Context* context) override {}
 #endif
 
-  private:
-	CA::MetalLayer* metalLayer;
+	virtual void setMTKDrawable(void* drawable, void* drawableTexture) override;
 
-	MTL::Device* device;
-	MTL::CommandQueue* commandQueue;
+  private:
+	CA::MetalLayer* metalLayer = nullptr;
+
+	CA::MetalDrawable* metalDrawable = nullptr;
+	MTL::Texture* drawableTexture = nullptr;
+
+	MTL::Device* device = nullptr;
+	MTL::CommandQueue* commandQueue = nullptr;
 
 	Metal::CommandEncoder commandEncoder;
 
diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp
index 969b2aa2..87769dee 100644
--- a/src/core/renderer_mtl/renderer_mtl.cpp
+++ b/src/core/renderer_mtl/renderer_mtl.cpp
@@ -30,7 +30,6 @@ PICA::ColorFmt ToColorFormat(u32 format) {
 }
 
 MTL::Library* loadLibrary(MTL::Device* device, const cmrc::file& shaderSource) {
-	// MTL::CompileOptions* compileOptions = MTL::CompileOptions::alloc()->init();
 	NS::Error* error = nullptr;
 	MTL::Library* library = device->newLibrary(Metal::createDispatchData(shaderSource.begin(), shaderSource.size()), &error);
 	// MTL::Library* library = device->newLibrary(NS::String::string(source.c_str(), NS::ASCIIStringEncoding), compileOptions, &error);
@@ -56,16 +55,38 @@ void RendererMTL::reset() {
 	colorRenderTargetCache.reset();
 }
 
-void RendererMTL::display() {
-#ifdef PANDA3DS_IOS
-	return;
-#endif
+void RendererMTL::setMTKDrawable(void* drawable, void* tex) {
+	this->metalDrawable = (CA::MetalDrawable*)drawable;
+	this->drawableTexture = (MTL::Texture*)tex;
+}
 
+void RendererMTL::display() {
+	static int frameCount = 0;
+	frameCount++;
+
+	auto manager = MTL::CaptureManager::sharedCaptureManager();
+	auto captureDescriptor = MTL::CaptureDescriptor::alloc()->init();
+	if (frameCount == 200) {
+		captureDescriptor->setCaptureObject(device);
+		manager->startCapture(captureDescriptor, nullptr);
+	}
+
+#ifdef PANDA3DS_IOS
+	CA::MetalDrawable* drawable = metalDrawable;
+	if (!drawable) {
+		return;
+	}
+
+	MTL::Texture* texture = drawableTexture;
+#else
 	CA::MetalDrawable* drawable = metalLayer->nextDrawable();
 	if (!drawable) {
 		return;
 	}
 
+	MTL::Texture* texture = drawable->getTexture();
+#endif
+
 	using namespace PICA::ExternalRegs;
 
 	// Top screen
@@ -91,13 +112,13 @@ void RendererMTL::display() {
 
 	MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init();
 	MTL::RenderPassColorAttachmentDescriptor* colorAttachment = renderPassDescriptor->colorAttachments()->object(0);
-	colorAttachment->setTexture(drawable->texture());
+	colorAttachment->setTexture(texture);
 	colorAttachment->setLoadAction(MTL::LoadActionClear);
 	colorAttachment->setClearColor(MTL::ClearColor{0.0f, 0.0f, 0.0f, 1.0f});
 	colorAttachment->setStoreAction(MTL::StoreActionStore);
 
 	nextRenderPassName = "Display";
-	beginRenderPassIfNeeded(renderPassDescriptor, false, drawable->texture());
+	beginRenderPassIfNeeded(renderPassDescriptor, false, texture);
 	renderCommandEncoder->setRenderPipelineState(displayPipeline);
 	renderCommandEncoder->setFragmentSamplerState(nearestSampler, 0);
 
@@ -124,24 +145,32 @@ void RendererMTL::display() {
 	// Inform the vertex buffer cache that the frame ended
 	vertexBufferCache.endFrame();
 
-	// Release
+	// Release the drawable (not on iOS cause SwiftUI handles it there)
+#ifndef PANDA3DS_IOS
 	drawable->release();
+#endif
+
+	if (frameCount == 200) {
+		manager->stopCapture();
+	}
+	captureDescriptor->release();
 }
 
 void RendererMTL::initGraphicsContext(SDL_Window* window) {
-	// TODO: what should be the type of the view?
-
+	// On iOS, the SwiftUI side handles MetalLayer & the CommandQueue
 #ifdef PANDA3DS_IOS
-	// On iOS, the SwiftUI side handles device<->MTKView interaction
 	device = MTL::CreateSystemDefaultDevice();
 #else
+	// TODO: what should be the type of the view?
 	void* view = SDL_Metal_CreateView(window);
 	metalLayer = (CA::MetalLayer*)SDL_Metal_GetLayer(view);
 	device = MTL::CreateSystemDefaultDevice();
 	metalLayer->setDevice(device);
-	commandQueue = device->newCommandQueue();
 #endif
 
+	commandQueue = device->newCommandQueue();
+	printf("C++ device pointer: %p\n", device);
+
 	// Textures
 	MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init();
 	textureDescriptor->setTextureType(MTL::TextureType2D);
@@ -816,7 +845,7 @@ void RendererMTL::beginRenderPassIfNeeded(
 ) {
 	createCommandBufferIfNeeded();
 
-	if (doesClears || !renderCommandEncoder || colorTexture != lastColorTexture ||
+	if (1 ||doesClears || !renderCommandEncoder || colorTexture != lastColorTexture ||
 		(depthTexture != lastDepthTexture && !(lastDepthTexture && !depthTexture))) {
 		endRenderPass();
 
diff --git a/src/host_shaders/metal_shaders.metal b/src/host_shaders/metal_shaders.metal
index 18c310f7..5cd6b643 100644
--- a/src/host_shaders/metal_shaders.metal
+++ b/src/host_shaders/metal_shaders.metal
@@ -655,7 +655,7 @@ float4 performLogicOp(LogicOp logicOp, float4 s, float4 d) {
     return as_type<float4>(performLogicOpU(logicOp, as_type<uint4>(s), as_type<uint4>(d)));
 }
 
-fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[color(0)]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], constant uint2& lutSlices [[buffer(3)]], texture2d<float> tex0 [[texture(0)]], texture2d<float> tex1 [[texture(1)]], texture2d<float> tex2 [[texture(2)]], texture2d_array<float> texLightingLut [[texture(3)]], texture1d_array<float> texFogLut [[texture(4)]], sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) {
+fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], constant uint2& lutSlices [[buffer(3)]], texture2d<float> tex0 [[texture(0)]], texture2d<float> tex1 [[texture(1)]], texture2d<float> tex2 [[texture(2)]], texture2d_array<float> texLightingLut [[texture(3)]], texture1d_array<float> texFogLut [[texture(4)]], sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) {
     Globals globals;
 
     // HACK
@@ -755,5 +755,5 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c
 		}
 	}
 
-	return performLogicOp(logicOp, color, prevColor);
+	return performLogicOp(logicOp, color, float4(1.0, 0.0, 0.0, 1.0));
 }
diff --git a/src/ios_driver.mm b/src/ios_driver.mm
index 19766d38..cd2ac655 100644
--- a/src/ios_driver.mm
+++ b/src/ios_driver.mm
@@ -15,7 +15,7 @@ extern "C" {
 std::unique_ptr<Emulator> emulator = nullptr;
 HIDService* hidService = nullptr;
 
-extern "C" __attribute__((visibility("default"))) void iosCreateEmulator() {
+IOS_EXPORT void iosCreateEmulator() {
 	printf("Creating emulator\n");
 
 	emulator = std::make_unique<Emulator>();
@@ -23,12 +23,14 @@ extern "C" __attribute__((visibility("default"))) void iosCreateEmulator() {
 	emulator->initGraphicsContext(nullptr);
 
 	// auto path = emulator->getAppDataRoot() / "Kirb Demo.3ds";
+	// auto path = emulator->getAppDataRoot() / "Kirb Demo.3ds";
+
 	auto path = emulator->getAppDataRoot() / "SimplerTri.elf";
 	emulator->loadROM(path);
-
-	while (1) {
-		emulator->runFrame();
-	}
-
 	printf("Created emulator\n");
+}
+
+IOS_EXPORT void iosRunFrame(void* drawable, void* drawableTexture) {
+	emulator->getRenderer()->setMTKDrawable(drawable, drawableTexture);
+	emulator->runFrame();
 }
\ No newline at end of file

From 432eb0d2b392196ae663e2ae3541ac44efc9474a Mon Sep 17 00:00:00 2001
From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com>
Date: Thu, 6 Mar 2025 23:45:14 +0200
Subject: [PATCH 03/19] More iOS work

---
 src/core/renderer_mtl/renderer_mtl.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp
index 87769dee..f6361709 100644
--- a/src/core/renderer_mtl/renderer_mtl.cpp
+++ b/src/core/renderer_mtl/renderer_mtl.cpp
@@ -845,7 +845,7 @@ void RendererMTL::beginRenderPassIfNeeded(
 ) {
 	createCommandBufferIfNeeded();
 
-	if (1 ||doesClears || !renderCommandEncoder || colorTexture != lastColorTexture ||
+	if (doesClears || !renderCommandEncoder || colorTexture != lastColorTexture ||
 		(depthTexture != lastDepthTexture && !(lastDepthTexture && !depthTexture))) {
 		endRenderPass();
 

From fb5932082971216b670d429589483603bde6c97d Mon Sep 17 00:00:00 2001
From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com>
Date: Fri, 7 Mar 2025 17:36:09 +0200
Subject: [PATCH 04/19] More iOS work

---
 include/ios_driver.h                   |  2 +-
 include/renderer.hpp                   |  4 ++--
 include/renderer_mtl/renderer_mtl.hpp  |  5 ++---
 src/core/renderer_mtl/renderer_mtl.cpp | 25 +++++--------------------
 src/ios_driver.mm                      |  6 ++++--
 5 files changed, 14 insertions(+), 28 deletions(-)

diff --git a/include/ios_driver.h b/include/ios_driver.h
index 2d745ed5..5158fead 100644
--- a/include/ios_driver.h
+++ b/include/ios_driver.h
@@ -1,4 +1,4 @@
 #pragma once
 
 void iosCreateEmulator();
-void iosRunFrame(void* drawable, void* drawableTexture);
\ No newline at end of file
+void iosRunFrame(void* drawable);
\ No newline at end of file
diff --git a/include/renderer.hpp b/include/renderer.hpp
index 583057aa..ef1d27d4 100644
--- a/include/renderer.hpp
+++ b/include/renderer.hpp
@@ -87,8 +87,8 @@ class Renderer {
 	virtual void setupGLES() {}
 
 	// Only relevant for Metal renderer on iOS
-	// Passes a SwiftUI MTKView Drawable & its texture to the renderer
-	virtual void setMTKDrawable(void* drawable, void* drawableTexture) {};
+	// Passes a SwiftUI MTKView Drawable to the renderer
+	virtual void setMTKDrawable(void* drawable) {};
 
 	// This function is called on every draw call before parsing vertex data.
 	// It is responsible for things like looking up which vertex/fragment shaders to use, recompiling them if they don't exist, choosing between
diff --git a/include/renderer_mtl/renderer_mtl.hpp b/include/renderer_mtl/renderer_mtl.hpp
index 3f842466..e11485bd 100644
--- a/include/renderer_mtl/renderer_mtl.hpp
+++ b/include/renderer_mtl/renderer_mtl.hpp
@@ -42,13 +42,11 @@ class RendererMTL final : public Renderer {
 	virtual void initGraphicsContext([[maybe_unused]] GL::Context* context) override {}
 #endif
 
-	virtual void setMTKDrawable(void* drawable, void* drawableTexture) override;
+	virtual void setMTKDrawable(void* drawable) override;
 
   private:
 	CA::MetalLayer* metalLayer = nullptr;
-
 	CA::MetalDrawable* metalDrawable = nullptr;
-	MTL::Texture* drawableTexture = nullptr;
 
 	MTL::Device* device = nullptr;
 	MTL::CommandQueue* commandQueue = nullptr;
@@ -103,6 +101,7 @@ class RendererMTL final : public Renderer {
 	void endRenderPass() {
 		if (renderCommandEncoder) {
 			renderCommandEncoder->endEncoding();
+			renderCommandEncoder->release();
 			renderCommandEncoder = nullptr;
 		}
 	}
diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp
index f6361709..4c40f064 100644
--- a/src/core/renderer_mtl/renderer_mtl.cpp
+++ b/src/core/renderer_mtl/renderer_mtl.cpp
@@ -55,39 +55,29 @@ void RendererMTL::reset() {
 	colorRenderTargetCache.reset();
 }
 
-void RendererMTL::setMTKDrawable(void* drawable, void* tex) {
-	this->metalDrawable = (CA::MetalDrawable*)drawable;
-	this->drawableTexture = (MTL::Texture*)tex;
+void RendererMTL::setMTKDrawable(void* drawable) {
+	metalDrawable = (CA::MetalDrawable*)drawable;
 }
 
 void RendererMTL::display() {
-	static int frameCount = 0;
-	frameCount++;
-
-	auto manager = MTL::CaptureManager::sharedCaptureManager();
-	auto captureDescriptor = MTL::CaptureDescriptor::alloc()->init();
-	if (frameCount == 200) {
-		captureDescriptor->setCaptureObject(device);
-		manager->startCapture(captureDescriptor, nullptr);
-	}
-
 #ifdef PANDA3DS_IOS
 	CA::MetalDrawable* drawable = metalDrawable;
 	if (!drawable) {
 		return;
 	}
 
-	MTL::Texture* texture = drawableTexture;
+	MTL::Texture* texture = drawable->texture();
 #else
 	CA::MetalDrawable* drawable = metalLayer->nextDrawable();
 	if (!drawable) {
 		return;
 	}
 
-	MTL::Texture* texture = drawable->getTexture();
+	MTL::Texture* texture = drawable->texture();
 #endif
 
 	using namespace PICA::ExternalRegs;
+	printf("Device pointer: %p\nDrawable pointer: %p\nTexture pointer: %p\n", device, drawable, texture);
 
 	// Top screen
 	const u32 topActiveFb = externalRegs[Framebuffer0Select] & 1;
@@ -149,11 +139,6 @@ void RendererMTL::display() {
 #ifndef PANDA3DS_IOS
 	drawable->release();
 #endif
-
-	if (frameCount == 200) {
-		manager->stopCapture();
-	}
-	captureDescriptor->release();
 }
 
 void RendererMTL::initGraphicsContext(SDL_Window* window) {
diff --git a/src/ios_driver.mm b/src/ios_driver.mm
index cd2ac655..c09768c7 100644
--- a/src/ios_driver.mm
+++ b/src/ios_driver.mm
@@ -30,7 +30,9 @@ IOS_EXPORT void iosCreateEmulator() {
 	printf("Created emulator\n");
 }
 
-IOS_EXPORT void iosRunFrame(void* drawable, void* drawableTexture) {
-	emulator->getRenderer()->setMTKDrawable(drawable, drawableTexture);
+IOS_EXPORT void iosRunFrame(void* drawable) {
+	printf("Running a frame\n");
+	emulator->getRenderer()->setMTKDrawable(drawable);
 	emulator->runFrame();
+	printf("Ran a frame\n");
 }
\ No newline at end of file

From e378a52b5a40557a94f88638ab2f8e454d3012e8 Mon Sep 17 00:00:00 2001
From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com>
Date: Sat, 8 Mar 2025 20:32:42 +0200
Subject: [PATCH 05/19] ios: Simplify MTKView interface (still doesn't work
 though)

---
 include/renderer.hpp                   |  4 ++--
 include/renderer_mtl/renderer_mtl.hpp  |  3 +--
 src/core/renderer_mtl/renderer_mtl.cpp | 21 ++++++---------------
 src/ios_driver.mm                      |  4 ++--
 4 files changed, 11 insertions(+), 21 deletions(-)

diff --git a/include/renderer.hpp b/include/renderer.hpp
index ef1d27d4..d32077f6 100644
--- a/include/renderer.hpp
+++ b/include/renderer.hpp
@@ -87,8 +87,8 @@ class Renderer {
 	virtual void setupGLES() {}
 
 	// Only relevant for Metal renderer on iOS
-	// Passes a SwiftUI MTKView Drawable to the renderer
-	virtual void setMTKDrawable(void* drawable) {};
+	// Passes a SwiftUI MTKView's layer (CAMetalLayer) to the renderer
+	virtual void setMTKLayer(void* layer) {};
 
 	// This function is called on every draw call before parsing vertex data.
 	// It is responsible for things like looking up which vertex/fragment shaders to use, recompiling them if they don't exist, choosing between
diff --git a/include/renderer_mtl/renderer_mtl.hpp b/include/renderer_mtl/renderer_mtl.hpp
index e11485bd..10fac7cd 100644
--- a/include/renderer_mtl/renderer_mtl.hpp
+++ b/include/renderer_mtl/renderer_mtl.hpp
@@ -42,11 +42,10 @@ class RendererMTL final : public Renderer {
 	virtual void initGraphicsContext([[maybe_unused]] GL::Context* context) override {}
 #endif
 
-	virtual void setMTKDrawable(void* drawable) override;
+	virtual void setMTKLayer(void* layer) override;
 
   private:
 	CA::MetalLayer* metalLayer = nullptr;
-	CA::MetalDrawable* metalDrawable = nullptr;
 
 	MTL::Device* device = nullptr;
 	MTL::CommandQueue* commandQueue = nullptr;
diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp
index 4c40f064..211fa265 100644
--- a/src/core/renderer_mtl/renderer_mtl.cpp
+++ b/src/core/renderer_mtl/renderer_mtl.cpp
@@ -55,29 +55,21 @@ void RendererMTL::reset() {
 	colorRenderTargetCache.reset();
 }
 
-void RendererMTL::setMTKDrawable(void* drawable) {
-	metalDrawable = (CA::MetalDrawable*)drawable;
+void RendererMTL::setMTKLayer(void* layer) {
+	metalLayer = (CA::MetalLayer*)layer;
+    // metalLayer->retain();
 }
 
 void RendererMTL::display() {
-#ifdef PANDA3DS_IOS
-	CA::MetalDrawable* drawable = metalDrawable;
-	if (!drawable) {
-		return;
-	}
-
-	MTL::Texture* texture = drawable->texture();
-#else
 	CA::MetalDrawable* drawable = metalLayer->nextDrawable();
 	if (!drawable) {
 		return;
 	}
 
 	MTL::Texture* texture = drawable->texture();
-#endif
 
 	using namespace PICA::ExternalRegs;
-	printf("Device pointer: %p\nDrawable pointer: %p\nTexture pointer: %p\n", device, drawable, texture);
+	printf("Layer pointer: %p\nDevice pointer: %p\nDrawable pointer: %p\nTexture pointer: %p\n", metalLayer, device, drawable, texture);
 
 	// Top screen
 	const u32 topActiveFb = externalRegs[Framebuffer0Select] & 1;
@@ -134,10 +126,9 @@ void RendererMTL::display() {
 
 	// Inform the vertex buffer cache that the frame ended
 	vertexBufferCache.endFrame();
-
-	// Release the drawable (not on iOS cause SwiftUI handles it there)
-#ifndef PANDA3DS_IOS
 	drawable->release();
+#ifdef PANDA3DS_IOS
+    // metalLayer->release();
 #endif
 }
 
diff --git a/src/ios_driver.mm b/src/ios_driver.mm
index c09768c7..8e965d41 100644
--- a/src/ios_driver.mm
+++ b/src/ios_driver.mm
@@ -30,9 +30,9 @@ IOS_EXPORT void iosCreateEmulator() {
 	printf("Created emulator\n");
 }
 
-IOS_EXPORT void iosRunFrame(void* drawable) {
+IOS_EXPORT void iosRunFrame(void* layer) {
 	printf("Running a frame\n");
-	emulator->getRenderer()->setMTKDrawable(drawable);
+	emulator->getRenderer()->setMTKLayer(layer);
 	emulator->runFrame();
 	printf("Ran a frame\n");
 }
\ No newline at end of file

From 90279e6f9ef82e4cb7d14a307978393d4b2cf253 Mon Sep 17 00:00:00 2001
From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com>
Date: Sat, 8 Mar 2025 22:11:56 +0200
Subject: [PATCH 06/19] ios: Pass CAMetalLayer instead of void* to Obj-C++
 bridging header

---
 CMakeLists.txt                         |  2 +-
 include/ios_driver.h                   |  4 +++-
 src/core/renderer_mtl/renderer_mtl.cpp |  4 ++--
 src/ios_driver.mm                      | 10 ++++++++--
 4 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index b6d8fb4c..2362ffb2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -673,7 +673,7 @@ if(ENABLE_METAL AND APPLE)
     target_compile_definitions(AlberCore PUBLIC "PANDA3DS_ENABLE_METAL=1")
     target_include_directories(AlberCore PRIVATE third_party/metal-cpp)
     # TODO: check if all of them are needed
-    target_link_libraries(AlberCore PRIVATE "-framework Metal" "-framework Foundation" "-framework QuartzCore" resources_renderer_mtl)
+    target_link_libraries(AlberCore PUBLIC "-framework Metal" "-framework Foundation" "-framework QuartzCore" resources_renderer_mtl)
 endif()
 
 source_group("Header Files\\Core" FILES ${HEADER_FILES})
diff --git a/include/ios_driver.h b/include/ios_driver.h
index 5158fead..5be62943 100644
--- a/include/ios_driver.h
+++ b/include/ios_driver.h
@@ -1,4 +1,6 @@
 #pragma once
+#include <Foundation/Foundation.h>
+#include <QuartzCore/QuartzCore.h>
 
 void iosCreateEmulator();
-void iosRunFrame(void* drawable);
\ No newline at end of file
+void iosRunFrame(CAMetalLayer* layer);
\ No newline at end of file
diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp
index 211fa265..2e6b0d1f 100644
--- a/src/core/renderer_mtl/renderer_mtl.cpp
+++ b/src/core/renderer_mtl/renderer_mtl.cpp
@@ -57,7 +57,6 @@ void RendererMTL::reset() {
 
 void RendererMTL::setMTKLayer(void* layer) {
 	metalLayer = (CA::MetalLayer*)layer;
-    // metalLayer->retain();
 }
 
 void RendererMTL::display() {
@@ -127,8 +126,9 @@ void RendererMTL::display() {
 	// Inform the vertex buffer cache that the frame ended
 	vertexBufferCache.endFrame();
 	drawable->release();
+
 #ifdef PANDA3DS_IOS
-    // metalLayer->release();
+    // metalLayer->autorelease();
 #endif
 }
 
diff --git a/src/ios_driver.mm b/src/ios_driver.mm
index 8e965d41..050b222b 100644
--- a/src/ios_driver.mm
+++ b/src/ios_driver.mm
@@ -30,9 +30,15 @@ IOS_EXPORT void iosCreateEmulator() {
 	printf("Created emulator\n");
 }
 
-IOS_EXPORT void iosRunFrame(void* layer) {
+IOS_EXPORT void iosRunFrame(CAMetalLayer* layer) {
 	printf("Running a frame\n");
-	emulator->getRenderer()->setMTKLayer(layer);
+	// void* layerBridged = (void*)CFBridgingRetain(layer);
+	void* layerBridged = (__bridge void*)layer;
+
+	emulator->getRenderer()->setMTKLayer(layerBridged);
 	emulator->runFrame();
+	CFRelease(layerBridged);
+
+	// CFBridgingAutorelease(layerBridged);
 	printf("Ran a frame\n");
 }
\ No newline at end of file

From 88e986ca537f2eec19ac8b3ec5b0257cc5b5764c Mon Sep 17 00:00:00 2001
From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com>
Date: Mon, 10 Mar 2025 00:03:35 +0200
Subject: [PATCH 07/19] Fix bridging cast

---
 src/ios_driver.mm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/ios_driver.mm b/src/ios_driver.mm
index 050b222b..b4b171b0 100644
--- a/src/ios_driver.mm
+++ b/src/ios_driver.mm
@@ -37,7 +37,7 @@ IOS_EXPORT void iosRunFrame(CAMetalLayer* layer) {
 
 	emulator->getRenderer()->setMTKLayer(layerBridged);
 	emulator->runFrame();
-	CFRelease(layerBridged);
+	// CFRelease(layerBridged);
 
 	// CFBridgingAutorelease(layerBridged);
 	printf("Ran a frame\n");

From 1bd00a87f9a7f63481972db4e58c6ca7cecb0c55 Mon Sep 17 00:00:00 2001
From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com>
Date: Mon, 10 Mar 2025 02:08:19 +0200
Subject: [PATCH 08/19] FINALLY IOS GRAPHICS

---
 src/core/renderer_mtl/renderer_mtl.cpp | 4 ----
 src/ios_driver.mm                      | 6 +-----
 2 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp
index 2e6b0d1f..608d355d 100644
--- a/src/core/renderer_mtl/renderer_mtl.cpp
+++ b/src/core/renderer_mtl/renderer_mtl.cpp
@@ -126,10 +126,6 @@ void RendererMTL::display() {
 	// Inform the vertex buffer cache that the frame ended
 	vertexBufferCache.endFrame();
 	drawable->release();
-
-#ifdef PANDA3DS_IOS
-    // metalLayer->autorelease();
-#endif
 }
 
 void RendererMTL::initGraphicsContext(SDL_Window* window) {
diff --git a/src/ios_driver.mm b/src/ios_driver.mm
index b4b171b0..ed0d46a2 100644
--- a/src/ios_driver.mm
+++ b/src/ios_driver.mm
@@ -25,20 +25,16 @@ IOS_EXPORT void iosCreateEmulator() {
 	// auto path = emulator->getAppDataRoot() / "Kirb Demo.3ds";
 	// auto path = emulator->getAppDataRoot() / "Kirb Demo.3ds";
 
-	auto path = emulator->getAppDataRoot() / "SimplerTri.elf";
+	auto path = emulator->getAppDataRoot() / "toon_shading.elf";
 	emulator->loadROM(path);
 	printf("Created emulator\n");
 }
 
 IOS_EXPORT void iosRunFrame(CAMetalLayer* layer) {
 	printf("Running a frame\n");
-	// void* layerBridged = (void*)CFBridgingRetain(layer);
 	void* layerBridged = (__bridge void*)layer;
 
 	emulator->getRenderer()->setMTKLayer(layerBridged);
 	emulator->runFrame();
-	// CFRelease(layerBridged);
-
-	// CFBridgingAutorelease(layerBridged);
 	printf("Ran a frame\n");
 }
\ No newline at end of file

From 5990cb3b0295a013ab576c6586ac5f8b7cfc56f8 Mon Sep 17 00:00:00 2001
From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com>
Date: Mon, 10 Mar 2025 02:22:16 +0200
Subject: [PATCH 09/19] ios: Remove printf spam

---
 src/core/renderer_mtl/renderer_mtl.cpp | 8 +++++---
 src/ios_driver.mm                      | 2 --
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp
index 608d355d..308ad715 100644
--- a/src/core/renderer_mtl/renderer_mtl.cpp
+++ b/src/core/renderer_mtl/renderer_mtl.cpp
@@ -68,7 +68,6 @@ void RendererMTL::display() {
 	MTL::Texture* texture = drawable->texture();
 
 	using namespace PICA::ExternalRegs;
-	printf("Layer pointer: %p\nDevice pointer: %p\nDrawable pointer: %p\nTexture pointer: %p\n", metalLayer, device, drawable, texture);
 
 	// Top screen
 	const u32 topActiveFb = externalRegs[Framebuffer0Select] & 1;
@@ -103,16 +102,19 @@ void RendererMTL::display() {
 	renderCommandEncoder->setRenderPipelineState(displayPipeline);
 	renderCommandEncoder->setFragmentSamplerState(nearestSampler, 0);
 
+    const int xMultiplier = 2;
+    const int yMultiplier = 2;
+
 	// Top screen
 	if (topScreen) {
-		renderCommandEncoder->setViewport(MTL::Viewport{0, 0, 400, 240, 0.0f, 1.0f});
+		renderCommandEncoder->setViewport(MTL::Viewport{0, 0, 400 * xMultiplier, 240 * yMultiplier, 0.0f, 1.0f});
 		renderCommandEncoder->setFragmentTexture(topScreen->get().texture, 0);
 		renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4));
 	}
 
 	// Bottom screen
 	if (bottomScreen) {
-		renderCommandEncoder->setViewport(MTL::Viewport{40, 240, 320, 240, 0.0f, 1.0f});
+		renderCommandEncoder->setViewport(MTL::Viewport{40 * xMultiplier, 240 * yMultiplier, 320 * xMultiplier, 240 * yMultiplier, 0.0f, 1.0f});
 		renderCommandEncoder->setFragmentTexture(bottomScreen->get().texture, 0);
 		renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4));
 	}
diff --git a/src/ios_driver.mm b/src/ios_driver.mm
index ed0d46a2..e3fd9342 100644
--- a/src/ios_driver.mm
+++ b/src/ios_driver.mm
@@ -31,10 +31,8 @@ IOS_EXPORT void iosCreateEmulator() {
 }
 
 IOS_EXPORT void iosRunFrame(CAMetalLayer* layer) {
-	printf("Running a frame\n");
 	void* layerBridged = (__bridge void*)layer;
 
 	emulator->getRenderer()->setMTKLayer(layerBridged);
 	emulator->runFrame();
-	printf("Ran a frame\n");
 }
\ No newline at end of file

From c59ee99364333d481cab3b1814a7c051b43cbea8 Mon Sep 17 00:00:00 2001
From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com>
Date: Mon, 10 Mar 2025 02:47:41 +0200
Subject: [PATCH 10/19] Metal: Reimplement some texture formats on iOS

---
 include/renderer_mtl/pica_to_mtl.hpp  | 24 +++++++++++
 src/core/renderer_mtl/mtl_texture.cpp | 59 ++++++++++++++++++++++++++-
 src/ios_driver.mm                     |  4 +-
 3 files changed, 83 insertions(+), 4 deletions(-)

diff --git a/include/renderer_mtl/pica_to_mtl.hpp b/include/renderer_mtl/pica_to_mtl.hpp
index 715088b4..beb63b17 100644
--- a/include/renderer_mtl/pica_to_mtl.hpp
+++ b/include/renderer_mtl/pica_to_mtl.hpp
@@ -10,6 +10,8 @@ namespace PICA {
 		size_t bytesPerTexel;
 	};
 
+// iOS, at least on simulator, doesn't support a lot of more "exotic" texture formats, so we avoid them tehre
+#ifndef PANDA3DS_IOS
 	constexpr PixelFormatInfo pixelFormatInfos[14] = {
 		{MTL::PixelFormatRGBA8Unorm, 4},   // RGBA8
 		{MTL::PixelFormatRGBA8Unorm, 4},   // RGB8
@@ -26,6 +28,24 @@ namespace PICA {
 		{MTL::PixelFormatRGBA8Unorm, 4},   // ETC1
 		{MTL::PixelFormatRGBA8Unorm, 4},   // ETC1A4
 	};
+#else
+	constexpr PixelFormatInfo pixelFormatInfos[14] = {
+		{MTL::PixelFormatRGBA8Unorm, 4},   // RGBA8
+		{MTL::PixelFormatRGBA8Unorm, 4},   // RGB8
+		{MTL::PixelFormatBGR5A1Unorm, 2},  // RGBA5551
+		{MTL::PixelFormatRGBA8Unorm, 4},   // RGB565
+		{MTL::PixelFormatRGBA8Unorm, 4},   // RGBA4
+		{MTL::PixelFormatRGBA8Unorm, 4},   // IA8
+		{MTL::PixelFormatRG8Unorm, 2},     // RG8
+		{MTL::PixelFormatRGBA8Unorm, 4},   // I8
+		{MTL::PixelFormatA8Unorm, 1},      // A8
+		{MTL::PixelFormatRGBA8Unorm, 4},   // IA4
+		{MTL::PixelFormatRGBA8Unorm, 4},   // I4
+		{MTL::PixelFormatA8Unorm, 1},      // A4
+		{MTL::PixelFormatRGBA8Unorm, 4},   // ETC1
+		{MTL::PixelFormatRGBA8Unorm, 4},   // ETC1A4
+	};
+#endif
 
 	inline PixelFormatInfo getPixelFormatInfo(TextureFmt format) { return pixelFormatInfos[static_cast<int>(format)]; }
 
@@ -35,7 +55,11 @@ namespace PICA {
 			case ColorFmt::RGB8: return MTL::PixelFormatRGBA8Unorm;
 			case ColorFmt::RGBA5551: return MTL::PixelFormatRGBA8Unorm;  // TODO: use MTL::PixelFormatBGR5A1Unorm?
 			case ColorFmt::RGB565: return MTL::PixelFormatRGBA8Unorm;    // TODO: use MTL::PixelFormatB5G6R5Unorm?
+#ifdef PANDA3DS_IOS
+			case ColorFmt::RGBA4: return MTL::PixelFormatRGBA8Unorm; // IOS + Metal doesn't support AGBR4 properly, at least on simulator
+#else
 			case ColorFmt::RGBA4: return MTL::PixelFormatABGR4Unorm;
+#endif
 		}
 	}
 
diff --git a/src/core/renderer_mtl/mtl_texture.cpp b/src/core/renderer_mtl/mtl_texture.cpp
index 149fea26..a110b88f 100644
--- a/src/core/renderer_mtl/mtl_texture.cpp
+++ b/src/core/renderer_mtl/mtl_texture.cpp
@@ -251,6 +251,53 @@ namespace Metal {
 				return (alpha << 24) | (b << 16) | (g << 8) | r;
 			}
 
+			case PICA::TextureFmt::ETC1: return getTexelETC(false, u, v, size.u(), data);
+			case PICA::TextureFmt::ETC1A4: return getTexelETC(true, u, v, size.u(), data);
+
+			case PICA::TextureFmt::RGBA4: {
+				u32 offset = getSwizzledOffset(u, v, size.u(), 2);
+				u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8);
+
+				u8 alpha = Colour::convert4To8Bit(getBits<0, 4, u8>(texel));
+				u8 b = Colour::convert4To8Bit(getBits<4, 4, u8>(texel));
+				u8 g = Colour::convert4To8Bit(getBits<8, 4, u8>(texel));
+				u8 r = Colour::convert4To8Bit(getBits<12, 4, u8>(texel));
+
+				return (alpha << 24) | (b << 16) | (g << 8) | r;
+			}
+
+			case PICA::TextureFmt::I4: {
+				u32 offset = getSwizzledOffset_4bpp(u, v, size.u());
+
+				// For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates
+				u8 intensity = data[offset] >> ((u % 2) ? 4 : 0);
+				intensity = Colour::convert4To8Bit(getBits<0, 4>(intensity));
+
+				// Intensity formats just copy the intensity value to every colour channel
+				return (0xff << 24) | (intensity << 16) | (intensity << 8) | intensity;
+			}
+
+			case PICA::TextureFmt::IA4: {
+				const u32 offset = getSwizzledOffset(u, v, size.u(), 1);
+				const u8 texel = data[offset];
+				const u8 alpha = Colour::convert4To8Bit(texel & 0xf);
+				const u8 intensity = Colour::convert4To8Bit(texel >> 4);
+
+				// Intensity formats just copy the intensity value to every colour channel
+				return (alpha << 24) | (intensity << 16) | (intensity << 8) | intensity;
+			}
+
+			case PICA::TextureFmt::A4: {
+				const u32 offset = getSwizzledOffset_4bpp(u, v, size.u());
+
+				// For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates
+				u8 alpha = data[offset] >> ((u % 2) ? 4 : 0);
+				alpha = Colour::convert4To8Bit(getBits<0, 4>(alpha));
+
+				// A8 sets RGB to 0
+				return (alpha << 24) | (0 << 16) | (0 << 8) | 0;
+			}
+
 			case PICA::TextureFmt::I8: {
 				u32 offset = getSwizzledOffset(u, v, size.u(), 1);
 				const u8 intensity = data[offset];
@@ -270,8 +317,16 @@ namespace Metal {
 				return (alpha << 24) | (intensity << 16) | (intensity << 8) | intensity;
 			}
 
-			case PICA::TextureFmt::ETC1: return getTexelETC(false, u, v, size.u(), data);
-			case PICA::TextureFmt::ETC1A4: return getTexelETC(true, u, v, size.u(), data);
+			case PICA::TextureFmt::RGB565: {
+				const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
+				const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8);
+
+				const u8 b = Colour::convert5To8Bit(getBits<0, 5, u8>(texel));
+				const u8 g = Colour::convert6To8Bit(getBits<5, 6, u8>(texel));
+				const u8 r = Colour::convert5To8Bit(getBits<11, 5, u8>(texel));
+
+				return (0xff << 24) | (b << 16) | (g << 8) | r;
+			}
 
 			default: Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast<int>(fmt));
 		}
diff --git a/src/ios_driver.mm b/src/ios_driver.mm
index e3fd9342..d9a0d544 100644
--- a/src/ios_driver.mm
+++ b/src/ios_driver.mm
@@ -23,9 +23,9 @@ IOS_EXPORT void iosCreateEmulator() {
 	emulator->initGraphicsContext(nullptr);
 
 	// auto path = emulator->getAppDataRoot() / "Kirb Demo.3ds";
-	// auto path = emulator->getAppDataRoot() / "Kirb Demo.3ds";
+	auto path = emulator->getAppDataRoot() / "Kirb Demo.3ds";
 
-	auto path = emulator->getAppDataRoot() / "toon_shading.elf";
+	//auto path = emulator->getAppDataRoot() / "toon_shading.elf";
 	emulator->loadROM(path);
 	printf("Created emulator\n");
 }

From 3a654b36092f62b49715c85299b6131cfea02060 Mon Sep 17 00:00:00 2001
From: Samuliak <samuliak77@gmail.com>
Date: Tue, 11 Mar 2025 08:11:39 +0100
Subject: [PATCH 11/19] metal: implement texture decoder

---
 CMakeLists.txt                            |   8 +-
 include/renderer_mtl/mtl_texture.hpp      |  28 +-
 include/renderer_mtl/pica_to_mtl.hpp      |  44 +--
 include/renderer_mtl/texture_decoder.hpp  |  23 ++
 src/core/renderer_mtl/mtl_etc1.cpp        | 116 --------
 src/core/renderer_mtl/mtl_texture.cpp     |  58 +---
 src/core/renderer_mtl/pica_to_mtl.cpp     |  33 +++
 src/core/renderer_mtl/renderer_mtl.cpp    |   2 +-
 src/core/renderer_mtl/texture_decoder.cpp | 326 ++++++++++++++++++++++
 third_party/LuaJIT                        |   2 +-
 third_party/oaknut                        |   2 +-
 11 files changed, 419 insertions(+), 223 deletions(-)
 create mode 100644 include/renderer_mtl/texture_decoder.hpp
 delete mode 100644 src/core/renderer_mtl/mtl_etc1.cpp
 create mode 100644 src/core/renderer_mtl/pica_to_mtl.cpp
 create mode 100644 src/core/renderer_mtl/texture_decoder.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2362ffb2..fa33dede 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -420,7 +420,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp
 if(IOS)
     set(SOURCE_FILES ${SOURCE_FILES} src/miniaudio/miniaudio.m)
     target_compile_definitions(AlberCore PUBLIC "PANDA3DS_IOS=1")
-    
+
     if (IOS_SIMULATOR_BUILD)
         target_compile_definitions(AlberCore PUBLIC "PANDA3DS_IOS_SIMULATOR=1")
     endif()
@@ -604,14 +604,16 @@ if(ENABLE_METAL AND APPLE)
         include/renderer_mtl/mtl_common.hpp
         include/renderer_mtl/pica_to_mtl.hpp
         include/renderer_mtl/objc_helper.hpp
+        include/renderer_mtl/texture_decoder.hpp
     )
 
     set(RENDERER_MTL_SOURCE_FILES src/core/renderer_mtl/metal_cpp_impl.cpp
         src/core/renderer_mtl/renderer_mtl.cpp
         src/core/renderer_mtl/mtl_texture.cpp
-        src/core/renderer_mtl/mtl_etc1.cpp
         src/core/renderer_mtl/mtl_lut_texture.cpp
+        src/core/renderer_mtl/pica_to_mtl.cpp
         src/core/renderer_mtl/objc_helper.mm
+        src/core/renderer_mtl/texture_decoder.cpp
         src/host_shaders/metal_shaders.metal
         src/host_shaders/metal_blit.metal
         #src/host_shaders/metal_copy_to_lut_texture.metal
@@ -808,7 +810,7 @@ if(NOT BUILD_HYDRA_CORE AND NOT BUILD_LIBRETRO_CORE)
 elseif(BUILD_HYDRA_CORE)
     target_compile_definitions(AlberCore PRIVATE PANDA3DS_HYDRA_CORE=1)
     include_directories(third_party/hydra_core/include)
-    
+
     set(SHARED_SOURCE_FILES src/hydra_core.cpp)
     if(IOS)
         set(SHARED_SOURCE_FILES ${SHARED_SOURCE_FILES} src/ios_driver.mm)
diff --git a/include/renderer_mtl/mtl_texture.hpp b/include/renderer_mtl/mtl_texture.hpp
index 93103091..0e78b13c 100644
--- a/include/renderer_mtl/mtl_texture.hpp
+++ b/include/renderer_mtl/mtl_texture.hpp
@@ -8,8 +8,9 @@
 #include "boost/icl/interval.hpp"
 #include "helpers.hpp"
 #include "math_util.hpp"
-#include "opengl.hpp"
 #include "renderer_mtl/pica_to_mtl.hpp"
+// TODO: remove dependency on OpenGL
+#include "opengl.hpp"
 
 template <typename T>
 using Interval = boost::icl::right_open_interval<T>;
@@ -52,22 +53,19 @@ namespace Metal {
 		void free();
 		u64 sizeInBytes();
 
-		u8 decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data);
-		u16 decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data);
-		u32 decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data);
-
-		// Get the morton interleave offset of a texel based on its U and V values
-		static u32 mortonInterleave(u32 u, u32 v);
-		// Get the byte offset of texel (u, v) in the texture
-		static u32 getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel);
-		static u32 getSwizzledOffset_4bpp(u32 u, u32 v, u32 width);
+		u8 decodeTexelBGR8ToRGBA8(u32 u, u32 v, std::span<const u8> data);
+		u8 decodeTexelA1BGR5ToRGBA8(u32 u, u32 v, std::span<const u8> data);
+		u8 decodeTexelB5G6R5ToRGBA8(u32 u, u32 v, std::span<const u8> data);
+		u8 decodeTexelABGR4ToRGBA8(u32 u, u32 v, std::span<const u8> data);
+		u8 decodeTexelAI8ToRGBA8(u32 u, u32 v, std::span<const u8> data);
+		u8 decodeTexelI8ToRGBA8(u32 u, u32 v, std::span<const u8> data);
+		u8 decodeTexelAI4ToRGBA4(u32 u, u32 v, std::span<const u8> data);
+		u8 decodeTexelAI4ToRGBA8(u32 u, u32 v, std::span<const u8> data);
+		u8 decodeTexelI4ToRGBA4(u32 u, u32 v, std::span<const u8> data);
+		u8 decodeTexelI4ToRGBA8(u32 u, u32 v, std::span<const u8> data);
+		u8 decodeTexelA4ToA8(u32 u, u32 v, std::span<const u8> data);
 
 		// Returns the format of this texture as a string
 		std::string_view formatToString() { return PICA::textureFormatToString(format); }
-
-		// Returns the texel at coordinates (u, v) of an ETC1(A4) texture
-		// TODO: Make hasAlpha a template parameter
-		u32 getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span<const u8> data);
-		u32 decodeETC(u32 alpha, u32 u, u32 v, u64 colourData);
 	};
 }  // namespace Metal
diff --git a/include/renderer_mtl/pica_to_mtl.hpp b/include/renderer_mtl/pica_to_mtl.hpp
index beb63b17..a0874a65 100644
--- a/include/renderer_mtl/pica_to_mtl.hpp
+++ b/include/renderer_mtl/pica_to_mtl.hpp
@@ -3,49 +3,21 @@
 #include <Metal/Metal.hpp>
 
 #include "PICA/regs.hpp"
+// TODO: remove dependency on OpenGL
+#include "opengl.hpp"
 
 namespace PICA {
 	struct PixelFormatInfo {
 		MTL::PixelFormat pixelFormat;
 		size_t bytesPerTexel;
+		void (*decoder)(OpenGL::uvec2, u32, u32, std::span<const u8>, std::vector<u8>&);
+		bool needsSwizzle{false};
+		// TODO: swizzle
 	};
 
-// iOS, at least on simulator, doesn't support a lot of more "exotic" texture formats, so we avoid them tehre
-#ifndef PANDA3DS_IOS
-	constexpr PixelFormatInfo pixelFormatInfos[14] = {
-		{MTL::PixelFormatRGBA8Unorm, 4},   // RGBA8
-		{MTL::PixelFormatRGBA8Unorm, 4},   // RGB8
-		{MTL::PixelFormatBGR5A1Unorm, 2},  // RGBA5551
-		{MTL::PixelFormatB5G6R5Unorm, 2},  // RGB565
-		{MTL::PixelFormatABGR4Unorm, 2},   // RGBA4
-		{MTL::PixelFormatRGBA8Unorm, 4},   // IA8
-		{MTL::PixelFormatRG8Unorm, 2},     // RG8
-		{MTL::PixelFormatRGBA8Unorm, 4},   // I8
-		{MTL::PixelFormatA8Unorm, 1},      // A8
-		{MTL::PixelFormatABGR4Unorm, 2},   // IA4
-		{MTL::PixelFormatABGR4Unorm, 2},   // I4
-		{MTL::PixelFormatA8Unorm, 1},      // A4
-		{MTL::PixelFormatRGBA8Unorm, 4},   // ETC1
-		{MTL::PixelFormatRGBA8Unorm, 4},   // ETC1A4
-	};
-#else
-	constexpr PixelFormatInfo pixelFormatInfos[14] = {
-		{MTL::PixelFormatRGBA8Unorm, 4},   // RGBA8
-		{MTL::PixelFormatRGBA8Unorm, 4},   // RGB8
-		{MTL::PixelFormatBGR5A1Unorm, 2},  // RGBA5551
-		{MTL::PixelFormatRGBA8Unorm, 4},   // RGB565
-		{MTL::PixelFormatRGBA8Unorm, 4},   // RGBA4
-		{MTL::PixelFormatRGBA8Unorm, 4},   // IA8
-		{MTL::PixelFormatRG8Unorm, 2},     // RG8
-		{MTL::PixelFormatRGBA8Unorm, 4},   // I8
-		{MTL::PixelFormatA8Unorm, 1},      // A8
-		{MTL::PixelFormatRGBA8Unorm, 4},   // IA4
-		{MTL::PixelFormatRGBA8Unorm, 4},   // I4
-		{MTL::PixelFormatA8Unorm, 1},      // A4
-		{MTL::PixelFormatRGBA8Unorm, 4},   // ETC1
-		{MTL::PixelFormatRGBA8Unorm, 4},   // ETC1A4
-	};
-#endif
+	extern PixelFormatInfo pixelFormatInfos[14];
+
+	void checkForPixelFormatSupport(MTL::Device* device);
 
 	inline PixelFormatInfo getPixelFormatInfo(TextureFmt format) { return pixelFormatInfos[static_cast<int>(format)]; }
 
diff --git a/include/renderer_mtl/texture_decoder.hpp b/include/renderer_mtl/texture_decoder.hpp
new file mode 100644
index 00000000..29f88695
--- /dev/null
+++ b/include/renderer_mtl/texture_decoder.hpp
@@ -0,0 +1,23 @@
+#pragma once
+
+#include "helpers.hpp"
+// TODO: remove dependency on OpenGL
+#include "opengl.hpp"
+
+void decodeTexelABGR8ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
+void decodeTexelBGR8ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
+void decodeTexelA1BGR5ToBGR5A1(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
+void decodeTexelA1BGR5ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
+void decodeTexelB5G6R5ToB5G6R5(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
+void decodeTexelB5G6R5ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
+void decodeTexelABGR4ToABGR4(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
+void decodeTexelABGR4ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
+void decodeTexelAI8ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
+void decodeTexelGR8ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
+void decodeTexelI8ToR8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
+void decodeTexelA8ToA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
+void decodeTexelAI4ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
+void decodeTexelI4ToR8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
+void decodeTexelA4ToA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
+void decodeTexelETC1ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
+void decodeTexelETC1A4ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
diff --git a/src/core/renderer_mtl/mtl_etc1.cpp b/src/core/renderer_mtl/mtl_etc1.cpp
deleted file mode 100644
index 420a60ca..00000000
--- a/src/core/renderer_mtl/mtl_etc1.cpp
+++ /dev/null
@@ -1,116 +0,0 @@
-#include <algorithm>
-
-#include "colour.hpp"
-#include "renderer_mtl/mtl_texture.hpp"
-#include "renderer_mtl/renderer_mtl.hpp"
-
-
-using namespace Helpers;
-
-namespace Metal {
-	static constexpr u32 signExtend3To32(u32 val) {
-        return (u32)(s32(val) << 29 >> 29);
-    }
-
-	u32 Texture::getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span<const u8> data) {
-		// Pixel offset of the 8x8 tile based on u, v and the width of the texture
-		u32 offs = ((u & ~7) * 8) + ((v & ~7) * width);
-		if (!hasAlpha) {
-			offs >>= 1;
-		}
-
-		// In-tile offsets for u/v
-		u &= 7;
-		v &= 7;
-
-		// ETC1(A4) also subdivide the 8x8 tile to 4 4x4 tiles
-		// Each tile is 8 bytes for ETC1, but since ETC1A4 has 4 alpha bits per pixel, that becomes 16 bytes
-		const u32 subTileSize = hasAlpha ? 16 : 8;
-		const u32 subTileIndex = (u / 4) + 2 * (v / 4);  // Which of the 4 subtiles is this texel in?
-
-		// In-subtile offsets for u/v
-		u &= 3;
-		v &= 3;
-		offs += subTileSize * subTileIndex;
-
-		u32 alpha;
-		const u64* ptr = reinterpret_cast<const u64*>(data.data() + offs);  // Cast to u64*
-
-		if (hasAlpha) {
-			// First 64 bits of the 4x4 subtile are alpha data
-			const u64 alphaData = *ptr++;
-			alpha = Colour::convert4To8Bit((alphaData >> (4 * (u * 4 + v))) & 0xf);
-		} else {
-			alpha = 0xff;  // ETC1 without alpha uses ff for every pixel
-		}
-
-		// Next 64 bits of the subtile are colour data
-		u64 colourData = *ptr;
-		return decodeETC(alpha, u, v, colourData);
-	}
-
-	u32 Texture::decodeETC(u32 alpha, u32 u, u32 v, u64 colourData) {
-		static constexpr u32 modifiers[8][2] = {
-			{2, 8}, {5, 17}, {9, 29}, {13, 42}, {18, 60}, {24, 80}, {33, 106}, {47, 183},
-		};
-
-		// Parse colour data for 4x4 block
-		const u32 subindices = getBits<0, 16, u32>(colourData);
-		const u32 negationFlags = getBits<16, 16, u32>(colourData);
-		const bool flip = getBit<32>(colourData);
-		const bool diffMode = getBit<33>(colourData);
-
-		// Note: index1 is indeed stored on the higher bits, with index2 in the lower bits
-		const u32 tableIndex1 = getBits<37, 3, u32>(colourData);
-		const u32 tableIndex2 = getBits<34, 3, u32>(colourData);
-		const u32 texelIndex = u * 4 + v;  // Index of the texel in the block
-
-		if (flip) std::swap(u, v);
-
-		s32 r, g, b;
-		if (diffMode) {
-			r = getBits<59, 5, s32>(colourData);
-			g = getBits<51, 5, s32>(colourData);
-			b = getBits<43, 5, s32>(colourData);
-
-			if (u >= 2) {
-				r += signExtend3To32(getBits<56, 3, u32>(colourData));
-				g += signExtend3To32(getBits<48, 3, u32>(colourData));
-				b += signExtend3To32(getBits<40, 3, u32>(colourData));
-			}
-
-			// Expand from 5 to 8 bits per channel
-			r = Colour::convert5To8Bit(r);
-			g = Colour::convert5To8Bit(g);
-			b = Colour::convert5To8Bit(b);
-		} else {
-			if (u < 2) {
-				r = getBits<60, 4, s32>(colourData);
-				g = getBits<52, 4, s32>(colourData);
-				b = getBits<44, 4, s32>(colourData);
-			} else {
-				r = getBits<56, 4, s32>(colourData);
-				g = getBits<48, 4, s32>(colourData);
-				b = getBits<40, 4, s32>(colourData);
-			}
-
-			// Expand from 4 to 8 bits per channel
-			r = Colour::convert4To8Bit(r);
-			g = Colour::convert4To8Bit(g);
-			b = Colour::convert4To8Bit(b);
-		}
-
-		const u32 index = (u < 2) ? tableIndex1 : tableIndex2;
-		s32 modifier = modifiers[index][(subindices >> texelIndex) & 1];
-
-		if (((negationFlags >> texelIndex) & 1) != 0) {
-			modifier = -modifier;
-		}
-
-		r = std::clamp(r + modifier, 0, 255);
-		g = std::clamp(g + modifier, 0, 255);
-		b = std::clamp(b + modifier, 0, 255);
-
-		return (alpha << 24) | (u32(b) << 16) | (u32(g) << 8) | u32(r);
-	}
-}  // namespace Metal
diff --git a/src/core/renderer_mtl/mtl_texture.cpp b/src/core/renderer_mtl/mtl_texture.cpp
index a110b88f..3b4e065d 100644
--- a/src/core/renderer_mtl/mtl_texture.cpp
+++ b/src/core/renderer_mtl/mtl_texture.cpp
@@ -99,35 +99,7 @@ namespace Metal {
 		}
 	}
 
-	// u and v are the UVs of the relevant texel
-	// Texture data is stored interleaved in Morton order, ie in a Z - order curve as shown here
-	// https://en.wikipedia.org/wiki/Z-order_curve
-	// Textures are split into 8x8 tiles.This function returns the in - tile offset depending on the u & v of the texel
-	// The in - tile offset is the sum of 2 offsets, one depending on the value of u % 8 and the other on the value of y % 8
-	// As documented in this picture https ://en.wikipedia.org/wiki/File:Moser%E2%80%93de_Bruijn_addition.svg
-	u32 Texture::mortonInterleave(u32 u, u32 v) {
-		static constexpr u32 xOffsets[] = {0, 1, 4, 5, 16, 17, 20, 21};
-		static constexpr u32 yOffsets[] = {0, 2, 8, 10, 32, 34, 40, 42};
-
-		return xOffsets[u & 7] + yOffsets[v & 7];
-	}
-
-	// Get the byte offset of texel (u, v) in the texture
-	u32 Texture::getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel) {
-		u32 offset = ((u & ~7) * 8) + ((v & ~7) * width);  // Offset of the 8x8 tile the texel belongs to
-		offset += mortonInterleave(u, v);                  // Add the in-tile offset of the texel
-
-		return offset * bytesPerPixel;
-	}
-
-	// Same as the above code except we need to divide by 2 because 4 bits is smaller than a byte
-	u32 Texture::getSwizzledOffset_4bpp(u32 u, u32 v, u32 width) {
-		u32 offset = ((u & ~7) * 8) + ((v & ~7) * width);  // Offset of the 8x8 tile the texel belongs to
-		offset += mortonInterleave(u, v);                  // Add the in-tile offset of the texel
-
-		return offset / 2;
-	}
-
+	/*
 	u8 Texture::decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data) {
 		switch (fmt) {
 			case PICA::TextureFmt::A4: {
@@ -331,32 +303,18 @@ namespace Metal {
 			default: Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast<int>(fmt));
 		}
 	}
+	*/
 
 	void Texture::decodeTexture(std::span<const u8> data) {
 		std::vector<u8> decoded;
 		decoded.reserve(u64(size.u()) * u64(size.v()) * formatInfo.bytesPerTexel);
 
-		// Decode texels line by line
-		for (u32 v = 0; v < size.v(); v++) {
-			for (u32 u = 0; u < size.u(); u++) {
-				if (formatInfo.bytesPerTexel == 1) {
-					u8 texel = decodeTexelU8(u, v, format, data);
-					decoded.push_back(texel);
-				} else if (formatInfo.bytesPerTexel == 2) {
-					u16 texel = decodeTexelU16(u, v, format, data);
-					decoded.push_back((texel & 0x00ff) >> 0);
-					decoded.push_back((texel & 0xff00) >> 8);
-				} else if (formatInfo.bytesPerTexel == 4) {
-					u32 texel = decodeTexelU32(u, v, format, data);
-					decoded.push_back((texel & 0x000000ff) >> 0);
-					decoded.push_back((texel & 0x0000ff00) >> 8);
-					decoded.push_back((texel & 0x00ff0000) >> 16);
-					decoded.push_back((texel & 0xff000000) >> 24);
-				} else {
-					Helpers::panic("[Texture::decodeTexture] Unimplemented bytesPerTexel (%u)", formatInfo.bytesPerTexel);
-				}
-			}
-		}
+  		// Decode texels line by line
+  		for (u32 v = 0; v < size.v(); v++) {
+ 			for (u32 u = 0; u < size.u(); u++) {
+                formatInfo.decoder(size, u, v, data, decoded);
+ 			}
+  		}
 
 		texture->replaceRegion(MTL::Region(0, 0, size.u(), size.v()), 0, 0, decoded.data(), formatInfo.bytesPerTexel * size.u(), 0);
 	}
diff --git a/src/core/renderer_mtl/pica_to_mtl.cpp b/src/core/renderer_mtl/pica_to_mtl.cpp
new file mode 100644
index 00000000..d527f000
--- /dev/null
+++ b/src/core/renderer_mtl/pica_to_mtl.cpp
@@ -0,0 +1,33 @@
+#include "renderer_mtl/pica_to_mtl.hpp"
+
+#include "renderer_mtl/texture_decoder.hpp"
+
+using namespace Helpers;
+
+namespace PICA {
+
+    PixelFormatInfo pixelFormatInfos[14] = {
+		{MTL::PixelFormatRGBA8Unorm, 4, decodeTexelABGR8ToRGBA8},    // RGBA8
+		{MTL::PixelFormatRGBA8Unorm, 4, decodeTexelBGR8ToRGBA8},     // RGB8
+		{MTL::PixelFormatBGR5A1Unorm, 2, decodeTexelA1BGR5ToBGR5A1}, // RGBA5551
+		{MTL::PixelFormatB5G6R5Unorm, 2, decodeTexelB5G6R5ToB5G6R5}, // RGB565
+		{MTL::PixelFormatABGR4Unorm, 2, decodeTexelABGR4ToABGR4},    // RGBA4
+		{MTL::PixelFormatRG8Unorm, 2, decodeTexelAI8ToRG8},          // IA8
+		{MTL::PixelFormatRG8Unorm, 2, decodeTexelGR8ToRG8},          // RG8
+		{MTL::PixelFormatR8Unorm, 1, decodeTexelI8ToR8},             // I8
+		{MTL::PixelFormatA8Unorm, 1, decodeTexelA8ToA8},             // A8
+		{MTL::PixelFormatRG8Unorm, 2, decodeTexelAI4ToRG8},          // IA4
+		{MTL::PixelFormatR8Unorm, 1, decodeTexelI4ToR8},             // I4
+		{MTL::PixelFormatA8Unorm, 1, decodeTexelA4ToA8},             // A4
+		{MTL::PixelFormatRGBA8Unorm, 4, decodeTexelETC1ToRGBA8},     // ETC1
+		{MTL::PixelFormatRGBA8Unorm, 4, decodeTexelETC1A4ToRGBA8},   // ETC1A4
+	};
+
+	void checkForPixelFormatSupport(MTL::Device* device) {
+	    if (!device->supportsFamily(MTL::GPUFamilyApple1)) {
+			// TODO
+			throw;
+		}
+	}
+
+}
diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp
index 308ad715..1719eaf3 100644
--- a/src/core/renderer_mtl/renderer_mtl.cpp
+++ b/src/core/renderer_mtl/renderer_mtl.cpp
@@ -141,9 +141,9 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) {
 	device = MTL::CreateSystemDefaultDevice();
 	metalLayer->setDevice(device);
 #endif
+    checkForPixelFormatSupport(device);
 
 	commandQueue = device->newCommandQueue();
-	printf("C++ device pointer: %p\n", device);
 
 	// Textures
 	MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init();
diff --git a/src/core/renderer_mtl/texture_decoder.cpp b/src/core/renderer_mtl/texture_decoder.cpp
new file mode 100644
index 00000000..d98eb06b
--- /dev/null
+++ b/src/core/renderer_mtl/texture_decoder.cpp
@@ -0,0 +1,326 @@
+#include "renderer_mtl/texture_decoder.hpp"
+
+#include <array>
+#include <string>
+
+#include "math_util.hpp"
+#include "colour.hpp"
+
+using namespace Helpers;
+
+// u and v are the UVs of the relevant texel
+// Texture data is stored interleaved in Morton order, ie in a Z - order curve as shown here
+// https://en.wikipedia.org/wiki/Z-order_curve
+// Textures are split into 8x8 tiles.This function returns the in - tile offset depending on the u & v of the texel
+// The in - tile offset is the sum of 2 offsets, one depending on the value of u % 8 and the other on the value of y % 8
+// As documented in this picture https ://en.wikipedia.org/wiki/File:Moser%E2%80%93de_Bruijn_addition.svg
+u32 mortonInterleave(u32 u, u32 v) {
+	static constexpr u32 xOffsets[] = {0, 1, 4, 5, 16, 17, 20, 21};
+	static constexpr u32 yOffsets[] = {0, 2, 8, 10, 32, 34, 40, 42};
+
+	return xOffsets[u & 7] + yOffsets[v & 7];
+}
+
+// Get the byte offset of texel (u, v) in the texture
+u32 getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel) {
+	u32 offset = ((u & ~7) * 8) + ((v & ~7) * width);  // Offset of the 8x8 tile the texel belongs to
+	offset += mortonInterleave(u, v);                  // Add the in-tile offset of the texel
+
+	return offset * bytesPerPixel;
+}
+
+// Same as the above code except we need to divide by 2 because 4 bits is smaller than a byte
+u32 getSwizzledOffset_4bpp(u32 u, u32 v, u32 width) {
+	u32 offset = ((u & ~7) * 8) + ((v & ~7) * width);  // Offset of the 8x8 tile the texel belongs to
+	offset += mortonInterleave(u, v);                  // Add the in-tile offset of the texel
+
+	return offset / 2;
+}
+
+void decodeTexelABGR8ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+    const u32 offset = getSwizzledOffset(u, v, size.u(), 4);
+   	const u8 alpha = inData[offset];
+   	const u8 b = inData[offset + 1];
+   	const u8 g = inData[offset + 2];
+   	const u8 r = inData[offset + 3];
+
+    outData.push_back(r);
+    outData.push_back(g);
+    outData.push_back(b);
+    outData.push_back(alpha);
+}
+
+void decodeTexelBGR8ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+    const u32 offset = getSwizzledOffset(u, v, size.u(), 3);
+   	const u8 b = inData[offset];
+   	const u8 g = inData[offset + 1];
+   	const u8 r = inData[offset + 2];
+
+    outData.push_back(r);
+    outData.push_back(g);
+    outData.push_back(b);
+    outData.push_back(0xff);
+}
+
+void decodeTexelA1BGR5ToBGR5A1(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+    const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
+   	const u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8);
+
+   	u8 alpha = getBit<0>(texel);
+   	u8 b = getBits<1, 5, u8>(texel);
+   	u8 g = getBits<6, 5, u8>(texel);
+   	u8 r = getBits<11, 5, u8>(texel);
+
+   	u16 outTexel = (alpha << 15) | (r << 10) | (g << 5) | b;
+    outData.push_back(outTexel & 0xff);
+    outData.push_back((outTexel >> 8) & 0xff);
+}
+
+void decodeTexelA1BGR5ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+    const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
+   	const u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8);
+
+   	u8 alpha = getBit<0>(texel) ? 0xff : 0;
+   	u8 b = Colour::convert5To8Bit(getBits<1, 5, u8>(texel));
+   	u8 g = Colour::convert5To8Bit(getBits<6, 5, u8>(texel));
+   	u8 r = Colour::convert5To8Bit(getBits<11, 5, u8>(texel));
+
+    outData.push_back(r);
+    outData.push_back(g);
+    outData.push_back(b);
+    outData.push_back(alpha);
+}
+
+void decodeTexelB5G6R5ToB5G6R5(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+    const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
+   	const u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8);
+
+    outData.push_back(texel & 0xff);
+    outData.push_back((texel >> 8) & 0xff);
+}
+
+void decodeTexelB5G6R5ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+    const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
+   	const u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8);
+
+   	const u8 b = Colour::convert5To8Bit(getBits<0, 5, u8>(texel));
+   	const u8 g = Colour::convert6To8Bit(getBits<5, 6, u8>(texel));
+   	const u8 r = Colour::convert5To8Bit(getBits<11, 5, u8>(texel));
+
+    outData.push_back(r);
+    outData.push_back(g);
+    outData.push_back(b);
+    outData.push_back(0xff);
+}
+
+void decodeTexelABGR4ToABGR4(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+    u32 offset = getSwizzledOffset(u, v, size.u(), 2);
+   	u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8);
+
+   	u8 alpha = getBits<0, 4, u8>(texel);
+   	u8 b = getBits<4, 4, u8>(texel);
+   	u8 g = getBits<8, 4, u8>(texel);
+   	u8 r = getBits<12, 4, u8>(texel);
+
+    outData.push_back((b << 4) | alpha);
+    outData.push_back((r << 4) | g);
+}
+
+void decodeTexelABGR4ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+    u32 offset = getSwizzledOffset(u, v, size.u(), 2);
+   	u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8);
+
+   	u8 alpha = Colour::convert4To8Bit(getBits<0, 4, u8>(texel));
+   	u8 b = Colour::convert4To8Bit(getBits<4, 4, u8>(texel));
+   	u8 g = Colour::convert4To8Bit(getBits<8, 4, u8>(texel));
+   	u8 r = Colour::convert4To8Bit(getBits<12, 4, u8>(texel));
+
+    outData.push_back(r);
+    outData.push_back(g);
+    outData.push_back(b);
+    outData.push_back(alpha);
+}
+
+void decodeTexelAI8ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+    u32 offset = getSwizzledOffset(u, v, size.u(), 2);
+
+   	// Same as I8 except each pixel gets its own alpha value too
+   	const u8 alpha = inData[offset];
+   	const u8 intensity = inData[offset + 1];
+
+    outData.push_back(intensity);
+    outData.push_back(alpha);
+}
+
+void decodeTexelGR8ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+    u32 offset = getSwizzledOffset(u, v, size.u(), 2);
+   	constexpr u8 b = 0;
+   	const u8 g = inData[offset];
+   	const u8 r = inData[offset + 1];
+
+    outData.push_back(r);
+    outData.push_back(g);
+}
+
+void decodeTexelI8ToR8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+    u32 offset = getSwizzledOffset(u, v, size.u(), 1);
+   	const u8 intensity = inData[offset];
+
+    outData.push_back(intensity);
+}
+
+void decodeTexelA8ToA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+    u32 offset = getSwizzledOffset(u, v, size.u(), 1);
+   	const u8 alpha = inData[offset];
+
+   	outData.push_back(alpha);
+}
+
+void decodeTexelAI4ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+    const u32 offset = getSwizzledOffset(u, v, size.u(), 1);
+   	const u8 texel = inData[offset];
+   	const u8 alpha = Colour::convert4To8Bit(texel & 0xf);
+   	const u8 intensity = Colour::convert4To8Bit(texel >> 4);
+
+   	outData.push_back(intensity);
+   	outData.push_back(alpha);
+}
+
+void decodeTexelI4ToR8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+    u32 offset = getSwizzledOffset_4bpp(u, v, size.u());
+
+   	// For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates
+   	u8 intensity = inData[offset] >> ((u % 2) ? 4 : 0);
+   	intensity = Colour::convert4To8Bit(getBits<0, 4>(intensity));
+
+   	outData.push_back(intensity);
+}
+
+void decodeTexelA4ToA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+    const u32 offset = getSwizzledOffset_4bpp(u, v, size.u());
+
+   	// For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates
+   	u8 alpha = inData[offset] >> ((u % 2) ? 4 : 0);
+   	alpha = Colour::convert4To8Bit(getBits<0, 4>(alpha));
+
+   	outData.push_back(alpha);
+}
+
+static constexpr u32 signExtend3To32(u32 val) {
+    return (u32)(s32(val) << 29 >> 29);
+}
+
+void decodeETC(u32 u, u32 v, u64 colourData, u32 alpha, std::vector<u8>& outData) {
+	static constexpr u32 modifiers[8][2] = {
+		{2, 8}, {5, 17}, {9, 29}, {13, 42}, {18, 60}, {24, 80}, {33, 106}, {47, 183},
+	};
+
+	// Parse colour data for 4x4 block
+	const u32 subindices = getBits<0, 16, u32>(colourData);
+	const u32 negationFlags = getBits<16, 16, u32>(colourData);
+	const bool flip = getBit<32>(colourData);
+	const bool diffMode = getBit<33>(colourData);
+
+	// Note: index1 is indeed stored on the higher bits, with index2 in the lower bits
+	const u32 tableIndex1 = getBits<37, 3, u32>(colourData);
+	const u32 tableIndex2 = getBits<34, 3, u32>(colourData);
+	const u32 texelIndex = u * 4 + v;  // Index of the texel in the block
+
+	if (flip) std::swap(u, v);
+
+	s32 r, g, b;
+	if (diffMode) {
+		r = getBits<59, 5, s32>(colourData);
+		g = getBits<51, 5, s32>(colourData);
+		b = getBits<43, 5, s32>(colourData);
+
+		if (u >= 2) {
+			r += signExtend3To32(getBits<56, 3, u32>(colourData));
+			g += signExtend3To32(getBits<48, 3, u32>(colourData));
+			b += signExtend3To32(getBits<40, 3, u32>(colourData));
+		}
+
+		// Expand from 5 to 8 bits per channel
+		r = Colour::convert5To8Bit(r);
+		g = Colour::convert5To8Bit(g);
+		b = Colour::convert5To8Bit(b);
+	} else {
+		if (u < 2) {
+			r = getBits<60, 4, s32>(colourData);
+			g = getBits<52, 4, s32>(colourData);
+			b = getBits<44, 4, s32>(colourData);
+		} else {
+			r = getBits<56, 4, s32>(colourData);
+			g = getBits<48, 4, s32>(colourData);
+			b = getBits<40, 4, s32>(colourData);
+		}
+
+		// Expand from 4 to 8 bits per channel
+		r = Colour::convert4To8Bit(r);
+		g = Colour::convert4To8Bit(g);
+		b = Colour::convert4To8Bit(b);
+	}
+
+	const u32 index = (u < 2) ? tableIndex1 : tableIndex2;
+	s32 modifier = modifiers[index][(subindices >> texelIndex) & 1];
+
+	if (((negationFlags >> texelIndex) & 1) != 0) {
+		modifier = -modifier;
+	}
+
+	r = std::clamp(r + modifier, 0, 255);
+	g = std::clamp(g + modifier, 0, 255);
+	b = std::clamp(b + modifier, 0, 255);
+
+	outData.push_back(r);
+	outData.push_back(g);
+	outData.push_back(b);
+	outData.push_back(alpha);
+}
+
+template <bool hasAlpha>
+void getTexelETC(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+	// Pixel offset of the 8x8 tile based on u, v and the width of the texture
+	u32 offs = ((u & ~7) * 8) + ((v & ~7) * size.u());
+	if (!hasAlpha) {
+		offs >>= 1;
+	}
+
+	// In-tile offsets for u/v
+	u &= 7;
+	v &= 7;
+
+	// ETC1(A4) also subdivide the 8x8 tile to 4 4x4 tiles
+	// Each tile is 8 bytes for ETC1, but since ETC1A4 has 4 alpha bits per pixel, that becomes 16 bytes
+	const u32 subTileSize = hasAlpha ? 16 : 8;
+	const u32 subTileIndex = (u / 4) + 2 * (v / 4);  // Which of the 4 subtiles is this texel in?
+
+	// In-subtile offsets for u/v
+	u &= 3;
+	v &= 3;
+	offs += subTileSize * subTileIndex;
+
+	u32 alpha;
+	const u64* ptr = reinterpret_cast<const u64*>(inData.data() + offs);  // Cast to u64*
+
+	if (hasAlpha) {
+		// First 64 bits of the 4x4 subtile are alpha data
+		const u64 alphaData = *ptr++;
+		alpha = Colour::convert4To8Bit((alphaData >> (4 * (u * 4 + v))) & 0xf);
+	} else {
+		alpha = 0xff;  // ETC1 without alpha uses ff for every pixel
+	}
+
+	// Next 64 bits of the subtile are colour data
+	u64 colourData = *ptr;
+
+	decodeETC(u, v, colourData, alpha, outData);
+}
+
+void decodeTexelETC1ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+    getTexelETC<false>(size, u, v, inData, outData);
+}
+
+void decodeTexelETC1A4ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+    getTexelETC<true>(size, u, v, inData, outData);
+}
diff --git a/third_party/LuaJIT b/third_party/LuaJIT
index 8bf7686d..41edf095 160000
--- a/third_party/LuaJIT
+++ b/third_party/LuaJIT
@@ -1 +1 @@
-Subproject commit 8bf7686d820f868eae1a522c481fee09c18c90b9
+Subproject commit 41edf0959b9504d36dd85f5f16893c004ea7d7ba
diff --git a/third_party/oaknut b/third_party/oaknut
index 790374d7..94c726ce 160000
--- a/third_party/oaknut
+++ b/third_party/oaknut
@@ -1 +1 @@
-Subproject commit 790374d7e66257b1f8ed89d798e5dcfb5363af05
+Subproject commit 94c726ce0338b054eb8cb5ea91de8fe6c19f4392

From 2111c94f1e6d579341feb56a2389a1b976bc73ac Mon Sep 17 00:00:00 2001
From: Samuliak <samuliak77@gmail.com>
Date: Tue, 11 Mar 2025 08:18:55 +0100
Subject: [PATCH 12/19] metal: check for format support

---
 include/renderer_mtl/texture_decoder.hpp  |  1 +
 src/core/renderer_mtl/pica_to_mtl.cpp     |  8 +++++---
 src/core/renderer_mtl/texture_decoder.cpp | 10 ++++++++++
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/include/renderer_mtl/texture_decoder.hpp b/include/renderer_mtl/texture_decoder.hpp
index 29f88695..6fd5cfb5 100644
--- a/include/renderer_mtl/texture_decoder.hpp
+++ b/include/renderer_mtl/texture_decoder.hpp
@@ -16,6 +16,7 @@ void decodeTexelAI8ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> i
 void decodeTexelGR8ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
 void decodeTexelI8ToR8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
 void decodeTexelA8ToA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
+void decodeTexelAI4ToABGR4(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
 void decodeTexelAI4ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
 void decodeTexelI4ToR8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
 void decodeTexelA4ToA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
diff --git a/src/core/renderer_mtl/pica_to_mtl.cpp b/src/core/renderer_mtl/pica_to_mtl.cpp
index d527f000..10b3eda8 100644
--- a/src/core/renderer_mtl/pica_to_mtl.cpp
+++ b/src/core/renderer_mtl/pica_to_mtl.cpp
@@ -16,7 +16,7 @@ namespace PICA {
 		{MTL::PixelFormatRG8Unorm, 2, decodeTexelGR8ToRG8},          // RG8
 		{MTL::PixelFormatR8Unorm, 1, decodeTexelI8ToR8},             // I8
 		{MTL::PixelFormatA8Unorm, 1, decodeTexelA8ToA8},             // A8
-		{MTL::PixelFormatRG8Unorm, 2, decodeTexelAI4ToRG8},          // IA4
+		{MTL::PixelFormatABGR4Unorm, 2, decodeTexelAI4ToABGR4},      // IA4
 		{MTL::PixelFormatR8Unorm, 1, decodeTexelI4ToR8},             // I4
 		{MTL::PixelFormatA8Unorm, 1, decodeTexelA4ToA8},             // A4
 		{MTL::PixelFormatRGBA8Unorm, 4, decodeTexelETC1ToRGBA8},     // ETC1
@@ -25,8 +25,10 @@ namespace PICA {
 
 	void checkForPixelFormatSupport(MTL::Device* device) {
 	    if (!device->supportsFamily(MTL::GPUFamilyApple1)) {
-			// TODO
-			throw;
+			pixelFormatInfos[2] = {MTL::PixelFormatRGBA8Unorm, 4, decodeTexelA1BGR5ToRGBA8};
+			pixelFormatInfos[3] = {MTL::PixelFormatRGBA8Unorm, 4, decodeTexelB5G6R5ToRGBA8};
+			pixelFormatInfos[4] = {MTL::PixelFormatRGBA8Unorm, 4, decodeTexelABGR4ToRGBA8};
+			pixelFormatInfos[9] = {MTL::PixelFormatRG8Unorm, 2, decodeTexelAI4ToRG8};
 		}
 	}
 
diff --git a/src/core/renderer_mtl/texture_decoder.cpp b/src/core/renderer_mtl/texture_decoder.cpp
index d98eb06b..c758e8f1 100644
--- a/src/core/renderer_mtl/texture_decoder.cpp
+++ b/src/core/renderer_mtl/texture_decoder.cpp
@@ -176,6 +176,16 @@ void decodeTexelA8ToA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inD
    	outData.push_back(alpha);
 }
 
+void decodeTexelAI4ToABGR4(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+    const u32 offset = getSwizzledOffset(u, v, size.u(), 1);
+   	const u8 texel = inData[offset];
+   	const u8 alpha = texel & 0xf;
+   	const u8 intensity = texel >> 4;
+
+   	outData.push_back((intensity << 4) | intensity);
+   	outData.push_back((alpha << 4) | intensity);
+}
+
 void decodeTexelAI4ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
     const u32 offset = getSwizzledOffset(u, v, size.u(), 1);
    	const u8 texel = inData[offset];

From 1a460d73be72b037c3d6c779db6fde224ce84dd2 Mon Sep 17 00:00:00 2001
From: Samuliak <samuliak77@gmail.com>
Date: Tue, 11 Mar 2025 08:35:35 +0100
Subject: [PATCH 13/19] metal: implement texture swizzling

---
 include/renderer_mtl/mtl_texture.hpp  |   1 +
 include/renderer_mtl/pica_to_mtl.hpp  |   2 +-
 src/core/renderer_mtl/mtl_texture.cpp | 216 ++------------------------
 src/core/renderer_mtl/pica_to_mtl.cpp |  38 ++++-
 4 files changed, 43 insertions(+), 214 deletions(-)

diff --git a/include/renderer_mtl/mtl_texture.hpp b/include/renderer_mtl/mtl_texture.hpp
index 0e78b13c..667fcb33 100644
--- a/include/renderer_mtl/mtl_texture.hpp
+++ b/include/renderer_mtl/mtl_texture.hpp
@@ -29,6 +29,7 @@ namespace Metal {
 		Interval<u32> range;
 
 		PICA::PixelFormatInfo formatInfo;
+		MTL::Texture* base = nullptr;
 		MTL::Texture* texture = nullptr;
 		MTL::SamplerState* sampler = nullptr;
 
diff --git a/include/renderer_mtl/pica_to_mtl.hpp b/include/renderer_mtl/pica_to_mtl.hpp
index a0874a65..5cb5947c 100644
--- a/include/renderer_mtl/pica_to_mtl.hpp
+++ b/include/renderer_mtl/pica_to_mtl.hpp
@@ -12,7 +12,7 @@ namespace PICA {
 		size_t bytesPerTexel;
 		void (*decoder)(OpenGL::uvec2, u32, u32, std::span<const u8>, std::vector<u8>&);
 		bool needsSwizzle{false};
-		// TODO: swizzle
+		MTL::TextureSwizzleChannels swizzle{.red = MTL::TextureSwizzleRed, .green = MTL::TextureSwizzleGreen, .blue = MTL::TextureSwizzleBlue, .alpha = MTL::TextureSwizzleAlpha};
 	};
 
 	extern PixelFormatInfo pixelFormatInfos[14];
diff --git a/src/core/renderer_mtl/mtl_texture.cpp b/src/core/renderer_mtl/mtl_texture.cpp
index 3b4e065d..9850cd54 100644
--- a/src/core/renderer_mtl/mtl_texture.cpp
+++ b/src/core/renderer_mtl/mtl_texture.cpp
@@ -21,10 +21,15 @@ namespace Metal {
 		descriptor->setStorageMode(MTL::StorageModeShared);  // TODO: use private + staging buffers?
 		texture = device->newTexture(descriptor);
 		texture->setLabel(toNSString(
-			"Texture " + std::string(PICA::textureFormatToString(format)) + " " + std::to_string(size.u()) + "x" + std::to_string(size.v())
+			"Base texture " + std::string(PICA::textureFormatToString(format)) + " " + std::to_string(size.u()) + "x" + std::to_string(size.v())
 		));
 		descriptor->release();
 
+		if (formatInfo.needsSwizzle) {
+		    base = texture;
+		    texture = base->newTextureView(formatInfo.pixelFormat, MTL::TextureType2D, NS::Range(0, 1), NS::Range(0, 1), formatInfo.swizzle);
+		}
+
 		setNewConfig(config);
 	}
 
@@ -58,6 +63,9 @@ namespace Metal {
 		if (texture) {
 			texture->release();
 		}
+		if (base) {
+		    base->release();
+		}
 		if (sampler) {
 			sampler->release();
 		}
@@ -99,212 +107,6 @@ namespace Metal {
 		}
 	}
 
-	/*
-	u8 Texture::decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data) {
-		switch (fmt) {
-			case PICA::TextureFmt::A4: {
-				const u32 offset = getSwizzledOffset_4bpp(u, v, size.u());
-
-				// For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates
-				u8 alpha = data[offset] >> ((u % 2) ? 4 : 0);
-				alpha = Colour::convert4To8Bit(getBits<0, 4>(alpha));
-
-				// A8
-				return alpha;
-			}
-
-			case PICA::TextureFmt::A8: {
-				u32 offset = getSwizzledOffset(u, v, size.u(), 1);
-				const u8 alpha = data[offset];
-
-				// A8
-				return alpha;
-			}
-
-			default: Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast<int>(fmt));
-		}
-	}
-
-	u16 Texture::decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data) {
-		switch (fmt) {
-			case PICA::TextureFmt::RG8: {
-				u32 offset = getSwizzledOffset(u, v, size.u(), 2);
-				constexpr u8 b = 0;
-				const u8 g = data[offset];
-				const u8 r = data[offset + 1];
-
-				// RG8
-				return (g << 8) | r;
-			}
-
-			case PICA::TextureFmt::RGBA4: {
-				u32 offset = getSwizzledOffset(u, v, size.u(), 2);
-				u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8);
-
-				u8 alpha = getBits<0, 4, u8>(texel);
-				u8 b = getBits<4, 4, u8>(texel);
-				u8 g = getBits<8, 4, u8>(texel);
-				u8 r = getBits<12, 4, u8>(texel);
-
-				// ABGR4
-				return (r << 12) | (g << 8) | (b << 4) | alpha;
-			}
-
-			case PICA::TextureFmt::RGBA5551: {
-				const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
-				const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8);
-
-				u8 alpha = getBit<0>(texel) ? 0xff : 0;
-				u8 b = getBits<1, 5, u8>(texel);
-				u8 g = getBits<6, 5, u8>(texel);
-				u8 r = getBits<11, 5, u8>(texel);
-
-				// BGR5A1
-				return (alpha << 15) | (r << 10) | (g << 5) | b;
-			}
-
-			case PICA::TextureFmt::RGB565: {
-				const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
-				const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8);
-
-				const u8 b = getBits<0, 5, u8>(texel);
-				const u8 g = getBits<5, 6, u8>(texel);
-				const u8 r = getBits<11, 5, u8>(texel);
-
-				// B5G6R5
-				return (r << 11) | (g << 5) | b;
-			}
-
-			case PICA::TextureFmt::IA4: {
-				const u32 offset = getSwizzledOffset(u, v, size.u(), 1);
-				const u8 texel = data[offset];
-				const u8 alpha = texel & 0xf;
-				const u8 intensity = texel >> 4;
-
-				// ABGR4
-				return (intensity << 12) | (intensity << 8) | (intensity << 4) | alpha;
-			}
-
-			case PICA::TextureFmt::I4: {
-				u32 offset = getSwizzledOffset_4bpp(u, v, size.u());
-
-				// For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates
-				u8 intensity = data[offset] >> ((u % 2) ? 4 : 0);
-				intensity = getBits<0, 4>(intensity);
-
-				// ABGR4
-				return (intensity << 12) | (intensity << 8) | (intensity << 4) | 0xff;
-			}
-
-			default: Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast<int>(fmt));
-		}
-	}
-
-	u32 Texture::decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data) {
-		switch (fmt) {
-			case PICA::TextureFmt::RGB8: {
-				const u32 offset = getSwizzledOffset(u, v, size.u(), 3);
-				const u8 b = data[offset];
-				const u8 g = data[offset + 1];
-				const u8 r = data[offset + 2];
-
-				// RGBA8
-				return (0xff << 24) | (b << 16) | (g << 8) | r;
-			}
-
-			case PICA::TextureFmt::RGBA8: {
-				const u32 offset = getSwizzledOffset(u, v, size.u(), 4);
-				const u8 alpha = data[offset];
-				const u8 b = data[offset + 1];
-				const u8 g = data[offset + 2];
-				const u8 r = data[offset + 3];
-
-				// RGBA8
-				return (alpha << 24) | (b << 16) | (g << 8) | r;
-			}
-
-			case PICA::TextureFmt::ETC1: return getTexelETC(false, u, v, size.u(), data);
-			case PICA::TextureFmt::ETC1A4: return getTexelETC(true, u, v, size.u(), data);
-
-			case PICA::TextureFmt::RGBA4: {
-				u32 offset = getSwizzledOffset(u, v, size.u(), 2);
-				u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8);
-
-				u8 alpha = Colour::convert4To8Bit(getBits<0, 4, u8>(texel));
-				u8 b = Colour::convert4To8Bit(getBits<4, 4, u8>(texel));
-				u8 g = Colour::convert4To8Bit(getBits<8, 4, u8>(texel));
-				u8 r = Colour::convert4To8Bit(getBits<12, 4, u8>(texel));
-
-				return (alpha << 24) | (b << 16) | (g << 8) | r;
-			}
-
-			case PICA::TextureFmt::I4: {
-				u32 offset = getSwizzledOffset_4bpp(u, v, size.u());
-
-				// For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates
-				u8 intensity = data[offset] >> ((u % 2) ? 4 : 0);
-				intensity = Colour::convert4To8Bit(getBits<0, 4>(intensity));
-
-				// Intensity formats just copy the intensity value to every colour channel
-				return (0xff << 24) | (intensity << 16) | (intensity << 8) | intensity;
-			}
-
-			case PICA::TextureFmt::IA4: {
-				const u32 offset = getSwizzledOffset(u, v, size.u(), 1);
-				const u8 texel = data[offset];
-				const u8 alpha = Colour::convert4To8Bit(texel & 0xf);
-				const u8 intensity = Colour::convert4To8Bit(texel >> 4);
-
-				// Intensity formats just copy the intensity value to every colour channel
-				return (alpha << 24) | (intensity << 16) | (intensity << 8) | intensity;
-			}
-
-			case PICA::TextureFmt::A4: {
-				const u32 offset = getSwizzledOffset_4bpp(u, v, size.u());
-
-				// For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates
-				u8 alpha = data[offset] >> ((u % 2) ? 4 : 0);
-				alpha = Colour::convert4To8Bit(getBits<0, 4>(alpha));
-
-				// A8 sets RGB to 0
-				return (alpha << 24) | (0 << 16) | (0 << 8) | 0;
-			}
-
-			case PICA::TextureFmt::I8: {
-				u32 offset = getSwizzledOffset(u, v, size.u(), 1);
-				const u8 intensity = data[offset];
-
-				// RGBA8
-				return (0xff << 24) | (intensity << 16) | (intensity << 8) | intensity;
-			}
-
-			case PICA::TextureFmt::IA8: {
-				u32 offset = getSwizzledOffset(u, v, size.u(), 2);
-
-				// Same as I8 except each pixel gets its own alpha value too
-				const u8 alpha = data[offset];
-				const u8 intensity = data[offset + 1];
-
-				// RGBA8
-				return (alpha << 24) | (intensity << 16) | (intensity << 8) | intensity;
-			}
-
-			case PICA::TextureFmt::RGB565: {
-				const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
-				const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8);
-
-				const u8 b = Colour::convert5To8Bit(getBits<0, 5, u8>(texel));
-				const u8 g = Colour::convert6To8Bit(getBits<5, 6, u8>(texel));
-				const u8 r = Colour::convert5To8Bit(getBits<11, 5, u8>(texel));
-
-				return (0xff << 24) | (b << 16) | (g << 8) | r;
-			}
-
-			default: Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast<int>(fmt));
-		}
-	}
-	*/
-
 	void Texture::decodeTexture(std::span<const u8> data) {
 		std::vector<u8> decoded;
 		decoded.reserve(u64(size.u()) * u64(size.v()) * formatInfo.bytesPerTexel);
diff --git a/src/core/renderer_mtl/pica_to_mtl.cpp b/src/core/renderer_mtl/pica_to_mtl.cpp
index 10b3eda8..1b421765 100644
--- a/src/core/renderer_mtl/pica_to_mtl.cpp
+++ b/src/core/renderer_mtl/pica_to_mtl.cpp
@@ -5,19 +5,39 @@
 using namespace Helpers;
 
 namespace PICA {
-
     PixelFormatInfo pixelFormatInfos[14] = {
 		{MTL::PixelFormatRGBA8Unorm, 4, decodeTexelABGR8ToRGBA8},    // RGBA8
 		{MTL::PixelFormatRGBA8Unorm, 4, decodeTexelBGR8ToRGBA8},     // RGB8
 		{MTL::PixelFormatBGR5A1Unorm, 2, decodeTexelA1BGR5ToBGR5A1}, // RGBA5551
 		{MTL::PixelFormatB5G6R5Unorm, 2, decodeTexelB5G6R5ToB5G6R5}, // RGB565
 		{MTL::PixelFormatABGR4Unorm, 2, decodeTexelABGR4ToABGR4},    // RGBA4
-		{MTL::PixelFormatRG8Unorm, 2, decodeTexelAI8ToRG8},          // IA8
+		{MTL::PixelFormatRG8Unorm, 2, decodeTexelAI8ToRG8, true,
+		    {
+				.red = MTL::TextureSwizzleRed,
+				.green = MTL::TextureSwizzleRed,
+				.blue = MTL::TextureSwizzleRed,
+				.alpha = MTL::TextureSwizzleGreen,
+			}
+		},                                                           // IA8
 		{MTL::PixelFormatRG8Unorm, 2, decodeTexelGR8ToRG8},          // RG8
-		{MTL::PixelFormatR8Unorm, 1, decodeTexelI8ToR8},             // I8
+		{MTL::PixelFormatR8Unorm, 1, decodeTexelI8ToR8, true,
+		    {
+				.red = MTL::TextureSwizzleRed,
+				.green = MTL::TextureSwizzleRed,
+				.blue = MTL::TextureSwizzleRed,
+				.alpha = MTL::TextureSwizzleOne
+			}
+		},                                                           // I8
 		{MTL::PixelFormatA8Unorm, 1, decodeTexelA8ToA8},             // A8
 		{MTL::PixelFormatABGR4Unorm, 2, decodeTexelAI4ToABGR4},      // IA4
-		{MTL::PixelFormatR8Unorm, 1, decodeTexelI4ToR8},             // I4
+		{MTL::PixelFormatR8Unorm, 1, decodeTexelI4ToR8, true,
+		    {
+				.red = MTL::TextureSwizzleRed,
+				.green = MTL::TextureSwizzleRed,
+				.blue = MTL::TextureSwizzleRed,
+				.alpha = MTL::TextureSwizzleOne
+			}
+		},                                                           // I4
 		{MTL::PixelFormatA8Unorm, 1, decodeTexelA4ToA8},             // A4
 		{MTL::PixelFormatRGBA8Unorm, 4, decodeTexelETC1ToRGBA8},     // ETC1
 		{MTL::PixelFormatRGBA8Unorm, 4, decodeTexelETC1A4ToRGBA8},   // ETC1A4
@@ -28,8 +48,14 @@ namespace PICA {
 			pixelFormatInfos[2] = {MTL::PixelFormatRGBA8Unorm, 4, decodeTexelA1BGR5ToRGBA8};
 			pixelFormatInfos[3] = {MTL::PixelFormatRGBA8Unorm, 4, decodeTexelB5G6R5ToRGBA8};
 			pixelFormatInfos[4] = {MTL::PixelFormatRGBA8Unorm, 4, decodeTexelABGR4ToRGBA8};
-			pixelFormatInfos[9] = {MTL::PixelFormatRG8Unorm, 2, decodeTexelAI4ToRG8};
+			pixelFormatInfos[9] = {MTL::PixelFormatRG8Unorm, 2, decodeTexelAI4ToRG8, true,
+			    {
+					.red = MTL::TextureSwizzleRed,
+					.green = MTL::TextureSwizzleRed,
+					.blue = MTL::TextureSwizzleRed,
+					.alpha = MTL::TextureSwizzleGreen,
+				}
+			};
 		}
 	}
-
 }

From 67f0388eae898f6feaa9e0e1025de045e805392a Mon Sep 17 00:00:00 2001
From: Samuliak <samuliak77@gmail.com>
Date: Tue, 11 Mar 2025 08:40:21 +0100
Subject: [PATCH 14/19] metal: remove unused texture functions

---
 include/renderer_mtl/mtl_texture.hpp | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/include/renderer_mtl/mtl_texture.hpp b/include/renderer_mtl/mtl_texture.hpp
index 667fcb33..a2f111e5 100644
--- a/include/renderer_mtl/mtl_texture.hpp
+++ b/include/renderer_mtl/mtl_texture.hpp
@@ -54,18 +54,6 @@ namespace Metal {
 		void free();
 		u64 sizeInBytes();
 
-		u8 decodeTexelBGR8ToRGBA8(u32 u, u32 v, std::span<const u8> data);
-		u8 decodeTexelA1BGR5ToRGBA8(u32 u, u32 v, std::span<const u8> data);
-		u8 decodeTexelB5G6R5ToRGBA8(u32 u, u32 v, std::span<const u8> data);
-		u8 decodeTexelABGR4ToRGBA8(u32 u, u32 v, std::span<const u8> data);
-		u8 decodeTexelAI8ToRGBA8(u32 u, u32 v, std::span<const u8> data);
-		u8 decodeTexelI8ToRGBA8(u32 u, u32 v, std::span<const u8> data);
-		u8 decodeTexelAI4ToRGBA4(u32 u, u32 v, std::span<const u8> data);
-		u8 decodeTexelAI4ToRGBA8(u32 u, u32 v, std::span<const u8> data);
-		u8 decodeTexelI4ToRGBA4(u32 u, u32 v, std::span<const u8> data);
-		u8 decodeTexelI4ToRGBA8(u32 u, u32 v, std::span<const u8> data);
-		u8 decodeTexelA4ToA8(u32 u, u32 v, std::span<const u8> data);
-
 		// Returns the format of this texture as a string
 		std::string_view formatToString() { return PICA::textureFormatToString(format); }
 	};

From c061bb7b47a05168a4a2d789388cd6350bcbb741 Mon Sep 17 00:00:00 2001
From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com>
Date: Fri, 14 Mar 2025 12:38:09 +0200
Subject: [PATCH 15/19] Format

---
 include/renderer_mtl/mtl_texture.hpp      |   2 +-
 include/renderer_mtl/pica_to_mtl.hpp      |  19 +-
 src/core/renderer_mtl/mtl_texture.cpp     |  13 +-
 src/core/renderer_mtl/pica_to_mtl.cpp     |  89 ++++-----
 src/core/renderer_mtl/renderer_mtl.cpp    |   9 +-
 src/core/renderer_mtl/texture_decoder.cpp | 224 +++++++++++-----------
 src/host_shaders/metal_shaders.metal      |   6 +-
 7 files changed, 182 insertions(+), 180 deletions(-)

diff --git a/include/renderer_mtl/mtl_texture.hpp b/include/renderer_mtl/mtl_texture.hpp
index a2f111e5..fd8a6062 100644
--- a/include/renderer_mtl/mtl_texture.hpp
+++ b/include/renderer_mtl/mtl_texture.hpp
@@ -28,7 +28,7 @@ namespace Metal {
 		// Range of VRAM taken up by buffer
 		Interval<u32> range;
 
-		PICA::PixelFormatInfo formatInfo;
+		PICA::MTLPixelFormatInfo formatInfo;
 		MTL::Texture* base = nullptr;
 		MTL::Texture* texture = nullptr;
 		MTL::SamplerState* sampler = nullptr;
diff --git a/include/renderer_mtl/pica_to_mtl.hpp b/include/renderer_mtl/pica_to_mtl.hpp
index 5cb5947c..b8911f89 100644
--- a/include/renderer_mtl/pica_to_mtl.hpp
+++ b/include/renderer_mtl/pica_to_mtl.hpp
@@ -7,19 +7,24 @@
 #include "opengl.hpp"
 
 namespace PICA {
-	struct PixelFormatInfo {
+	struct MTLPixelFormatInfo {
 		MTL::PixelFormat pixelFormat;
 		size_t bytesPerTexel;
 		void (*decoder)(OpenGL::uvec2, u32, u32, std::span<const u8>, std::vector<u8>&);
-		bool needsSwizzle{false};
-		MTL::TextureSwizzleChannels swizzle{.red = MTL::TextureSwizzleRed, .green = MTL::TextureSwizzleGreen, .blue = MTL::TextureSwizzleBlue, .alpha = MTL::TextureSwizzleAlpha};
+
+		bool needsSwizzle = false;
+		MTL::TextureSwizzleChannels swizzle{
+			.red = MTL::TextureSwizzleRed,
+			.green = MTL::TextureSwizzleGreen,
+			.blue = MTL::TextureSwizzleBlue,
+			.alpha = MTL::TextureSwizzleAlpha,
+		};
 	};
 
-	extern PixelFormatInfo pixelFormatInfos[14];
+	extern MTLPixelFormatInfo mtlPixelFormatInfos[14];
 
-	void checkForPixelFormatSupport(MTL::Device* device);
-
-	inline PixelFormatInfo getPixelFormatInfo(TextureFmt format) { return pixelFormatInfos[static_cast<int>(format)]; }
+	void checkForMTLPixelFormatSupport(MTL::Device* device);
+	inline MTLPixelFormatInfo getMTLPixelFormatInfo(TextureFmt format) { return mtlPixelFormatInfos[static_cast<int>(format)]; }
 
 	inline MTL::PixelFormat toMTLPixelFormatColor(ColorFmt format) {
 		switch (format) {
diff --git a/src/core/renderer_mtl/mtl_texture.cpp b/src/core/renderer_mtl/mtl_texture.cpp
index 9850cd54..49f78a13 100644
--- a/src/core/renderer_mtl/mtl_texture.cpp
+++ b/src/core/renderer_mtl/mtl_texture.cpp
@@ -1,16 +1,17 @@
 #include "renderer_mtl/mtl_texture.hpp"
 
+#include <fmt/format.h>
+
 #include <array>
 
 #include "colour.hpp"
 #include "renderer_mtl/objc_helper.hpp"
 
-
 using namespace Helpers;
 
 namespace Metal {
 	void Texture::allocate() {
-		formatInfo = PICA::getPixelFormatInfo(format);
+		formatInfo = PICA::getMTLPixelFormatInfo(format);
 
 		MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init();
 		descriptor->setTextureType(MTL::TextureType2D);
@@ -20,9 +21,7 @@ namespace Metal {
 		descriptor->setUsage(MTL::TextureUsageShaderRead);
 		descriptor->setStorageMode(MTL::StorageModeShared);  // TODO: use private + staging buffers?
 		texture = device->newTexture(descriptor);
-		texture->setLabel(toNSString(
-			"Base texture " + std::string(PICA::textureFormatToString(format)) + " " + std::to_string(size.u()) + "x" + std::to_string(size.v())
-		));
+		texture->setLabel(toNSString(fmt::format("Base texture {} {}x{}", std::string(PICA::textureFormatToString(format)), size.u(), size.v())));
 		descriptor->release();
 
 		if (formatInfo.needsSwizzle) {
@@ -63,9 +62,11 @@ namespace Metal {
 		if (texture) {
 			texture->release();
 		}
+
 		if (base) {
-		    base->release();
+			base->release();
 		}
+
 		if (sampler) {
 			sampler->release();
 		}
diff --git a/src/core/renderer_mtl/pica_to_mtl.cpp b/src/core/renderer_mtl/pica_to_mtl.cpp
index 1b421765..538e6d52 100644
--- a/src/core/renderer_mtl/pica_to_mtl.cpp
+++ b/src/core/renderer_mtl/pica_to_mtl.cpp
@@ -5,51 +5,52 @@
 using namespace Helpers;
 
 namespace PICA {
-    PixelFormatInfo pixelFormatInfos[14] = {
-		{MTL::PixelFormatRGBA8Unorm, 4, decodeTexelABGR8ToRGBA8},    // RGBA8
-		{MTL::PixelFormatRGBA8Unorm, 4, decodeTexelBGR8ToRGBA8},     // RGB8
-		{MTL::PixelFormatBGR5A1Unorm, 2, decodeTexelA1BGR5ToBGR5A1}, // RGBA5551
-		{MTL::PixelFormatB5G6R5Unorm, 2, decodeTexelB5G6R5ToB5G6R5}, // RGB565
-		{MTL::PixelFormatABGR4Unorm, 2, decodeTexelABGR4ToABGR4},    // RGBA4
-		{MTL::PixelFormatRG8Unorm, 2, decodeTexelAI8ToRG8, true,
-		    {
-				.red = MTL::TextureSwizzleRed,
-				.green = MTL::TextureSwizzleRed,
-				.blue = MTL::TextureSwizzleRed,
-				.alpha = MTL::TextureSwizzleGreen,
-			}
-		},                                                           // IA8
-		{MTL::PixelFormatRG8Unorm, 2, decodeTexelGR8ToRG8},          // RG8
-		{MTL::PixelFormatR8Unorm, 1, decodeTexelI8ToR8, true,
-		    {
-				.red = MTL::TextureSwizzleRed,
-				.green = MTL::TextureSwizzleRed,
-				.blue = MTL::TextureSwizzleRed,
-				.alpha = MTL::TextureSwizzleOne
-			}
-		},                                                           // I8
-		{MTL::PixelFormatA8Unorm, 1, decodeTexelA8ToA8},             // A8
-		{MTL::PixelFormatABGR4Unorm, 2, decodeTexelAI4ToABGR4},      // IA4
-		{MTL::PixelFormatR8Unorm, 1, decodeTexelI4ToR8, true,
-		    {
-				.red = MTL::TextureSwizzleRed,
-				.green = MTL::TextureSwizzleRed,
-				.blue = MTL::TextureSwizzleRed,
-				.alpha = MTL::TextureSwizzleOne
-			}
-		},                                                           // I4
-		{MTL::PixelFormatA8Unorm, 1, decodeTexelA4ToA8},             // A4
-		{MTL::PixelFormatRGBA8Unorm, 4, decodeTexelETC1ToRGBA8},     // ETC1
-		{MTL::PixelFormatRGBA8Unorm, 4, decodeTexelETC1A4ToRGBA8},   // ETC1A4
+	MTLPixelFormatInfo mtlPixelFormatInfos[14] = {
+		{MTL::PixelFormatRGBA8Unorm, 4, decodeTexelABGR8ToRGBA8},     // RGBA8
+		{MTL::PixelFormatRGBA8Unorm, 4, decodeTexelBGR8ToRGBA8},      // RGB8
+		{MTL::PixelFormatBGR5A1Unorm, 2, decodeTexelA1BGR5ToBGR5A1},  // RGBA5551
+		{MTL::PixelFormatB5G6R5Unorm, 2, decodeTexelB5G6R5ToB5G6R5},  // RGB565
+		{MTL::PixelFormatABGR4Unorm, 2, decodeTexelABGR4ToABGR4},     // RGBA4
+		{MTL::PixelFormatRG8Unorm,
+		 2,
+		 decodeTexelAI8ToRG8,
+		 true,
+		 {
+			 .red = MTL::TextureSwizzleRed,
+			 .green = MTL::TextureSwizzleRed,
+			 .blue = MTL::TextureSwizzleRed,
+			 .alpha = MTL::TextureSwizzleGreen,
+		 }},                                                 // IA8
+		{MTL::PixelFormatRG8Unorm, 2, decodeTexelGR8ToRG8},  // RG8
+		{MTL::PixelFormatR8Unorm,
+		 1,
+		 decodeTexelI8ToR8,
+		 true,
+		 {.red = MTL::TextureSwizzleRed, .green = MTL::TextureSwizzleRed, .blue = MTL::TextureSwizzleRed, .alpha = MTL::TextureSwizzleOne}},  // I8
+		{MTL::PixelFormatA8Unorm, 1, decodeTexelA8ToA8},                                                                                      // A8
+		{MTL::PixelFormatABGR4Unorm, 2, decodeTexelAI4ToABGR4},                                                                               // IA4
+		{MTL::PixelFormatR8Unorm,
+		 1,
+		 decodeTexelI4ToR8,
+		 true,
+		 {.red = MTL::TextureSwizzleRed, .green = MTL::TextureSwizzleRed, .blue = MTL::TextureSwizzleRed, .alpha = MTL::TextureSwizzleOne}},  // I4
+		{MTL::PixelFormatA8Unorm, 1, decodeTexelA4ToA8},                                                                                      // A4
+		{MTL::PixelFormatRGBA8Unorm, 4, decodeTexelETC1ToRGBA8},                                                                              // ETC1
+		{MTL::PixelFormatRGBA8Unorm, 4, decodeTexelETC1A4ToRGBA8},  // ETC1A4
 	};
 
-	void checkForPixelFormatSupport(MTL::Device* device) {
-	    if (!device->supportsFamily(MTL::GPUFamilyApple1)) {
-			pixelFormatInfos[2] = {MTL::PixelFormatRGBA8Unorm, 4, decodeTexelA1BGR5ToRGBA8};
-			pixelFormatInfos[3] = {MTL::PixelFormatRGBA8Unorm, 4, decodeTexelB5G6R5ToRGBA8};
-			pixelFormatInfos[4] = {MTL::PixelFormatRGBA8Unorm, 4, decodeTexelABGR4ToRGBA8};
-			pixelFormatInfos[9] = {MTL::PixelFormatRG8Unorm, 2, decodeTexelAI4ToRG8, true,
-			    {
+	void checkForMTLPixelFormatSupport(MTL::Device* device) {
+		if (!device->supportsFamily(MTL::GPUFamilyApple1)) {
+			mtlPixelFormatInfos[2] = {MTL::PixelFormatRGBA8Unorm, 4, decodeTexelA1BGR5ToRGBA8};
+			mtlPixelFormatInfos[3] = {MTL::PixelFormatRGBA8Unorm, 4, decodeTexelB5G6R5ToRGBA8};
+			mtlPixelFormatInfos[4] = {MTL::PixelFormatRGBA8Unorm, 4, decodeTexelABGR4ToRGBA8};
+
+			mtlPixelFormatInfos[9] = {
+				MTL::PixelFormatRG8Unorm,
+				2,
+				decodeTexelAI4ToRG8,
+				true,
+				{
 					.red = MTL::TextureSwizzleRed,
 					.green = MTL::TextureSwizzleRed,
 					.blue = MTL::TextureSwizzleRed,
@@ -58,4 +59,4 @@ namespace PICA {
 			};
 		}
 	}
-}
+}  // namespace PICA
diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp
index 1719eaf3..6ec11aa3 100644
--- a/src/core/renderer_mtl/renderer_mtl.cpp
+++ b/src/core/renderer_mtl/renderer_mtl.cpp
@@ -102,19 +102,16 @@ void RendererMTL::display() {
 	renderCommandEncoder->setRenderPipelineState(displayPipeline);
 	renderCommandEncoder->setFragmentSamplerState(nearestSampler, 0);
 
-    const int xMultiplier = 2;
-    const int yMultiplier = 2;
-
 	// Top screen
 	if (topScreen) {
-		renderCommandEncoder->setViewport(MTL::Viewport{0, 0, 400 * xMultiplier, 240 * yMultiplier, 0.0f, 1.0f});
+		renderCommandEncoder->setViewport(MTL::Viewport{0, 0, 400, 240, 0.0f, 1.0f});
 		renderCommandEncoder->setFragmentTexture(topScreen->get().texture, 0);
 		renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4));
 	}
 
 	// Bottom screen
 	if (bottomScreen) {
-		renderCommandEncoder->setViewport(MTL::Viewport{40 * xMultiplier, 240 * yMultiplier, 320 * xMultiplier, 240 * yMultiplier, 0.0f, 1.0f});
+		renderCommandEncoder->setViewport(MTL::Viewport{40, 240, 320, 240, 0.0f, 1.0f});
 		renderCommandEncoder->setFragmentTexture(bottomScreen->get().texture, 0);
 		renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4));
 	}
@@ -141,7 +138,7 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) {
 	device = MTL::CreateSystemDefaultDevice();
 	metalLayer->setDevice(device);
 #endif
-    checkForPixelFormatSupport(device);
+    checkForMTLPixelFormatSupport(device);
 
 	commandQueue = device->newCommandQueue();
 
diff --git a/src/core/renderer_mtl/texture_decoder.cpp b/src/core/renderer_mtl/texture_decoder.cpp
index c758e8f1..081d4889 100644
--- a/src/core/renderer_mtl/texture_decoder.cpp
+++ b/src/core/renderer_mtl/texture_decoder.cpp
@@ -3,8 +3,8 @@
 #include <array>
 #include <string>
 
-#include "math_util.hpp"
 #include "colour.hpp"
+#include "math_util.hpp"
 
 using namespace Helpers;
 
@@ -38,187 +38,185 @@ u32 getSwizzledOffset_4bpp(u32 u, u32 v, u32 width) {
 }
 
 void decodeTexelABGR8ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
-    const u32 offset = getSwizzledOffset(u, v, size.u(), 4);
-   	const u8 alpha = inData[offset];
-   	const u8 b = inData[offset + 1];
-   	const u8 g = inData[offset + 2];
-   	const u8 r = inData[offset + 3];
+	const u32 offset = getSwizzledOffset(u, v, size.u(), 4);
+	const u8 alpha = inData[offset];
+	const u8 b = inData[offset + 1];
+	const u8 g = inData[offset + 2];
+	const u8 r = inData[offset + 3];
 
-    outData.push_back(r);
-    outData.push_back(g);
-    outData.push_back(b);
-    outData.push_back(alpha);
+	outData.push_back(r);
+	outData.push_back(g);
+	outData.push_back(b);
+	outData.push_back(alpha);
 }
 
 void decodeTexelBGR8ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
-    const u32 offset = getSwizzledOffset(u, v, size.u(), 3);
-   	const u8 b = inData[offset];
-   	const u8 g = inData[offset + 1];
-   	const u8 r = inData[offset + 2];
+	const u32 offset = getSwizzledOffset(u, v, size.u(), 3);
+	const u8 b = inData[offset];
+	const u8 g = inData[offset + 1];
+	const u8 r = inData[offset + 2];
 
-    outData.push_back(r);
-    outData.push_back(g);
-    outData.push_back(b);
-    outData.push_back(0xff);
+	outData.push_back(r);
+	outData.push_back(g);
+	outData.push_back(b);
+	outData.push_back(0xff);
 }
 
 void decodeTexelA1BGR5ToBGR5A1(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
-    const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
-   	const u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8);
+	const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
+	const u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8);
 
-   	u8 alpha = getBit<0>(texel);
-   	u8 b = getBits<1, 5, u8>(texel);
-   	u8 g = getBits<6, 5, u8>(texel);
-   	u8 r = getBits<11, 5, u8>(texel);
+	u8 alpha = getBit<0>(texel);
+	u8 b = getBits<1, 5, u8>(texel);
+	u8 g = getBits<6, 5, u8>(texel);
+	u8 r = getBits<11, 5, u8>(texel);
 
-   	u16 outTexel = (alpha << 15) | (r << 10) | (g << 5) | b;
-    outData.push_back(outTexel & 0xff);
-    outData.push_back((outTexel >> 8) & 0xff);
+	u16 outTexel = (alpha << 15) | (r << 10) | (g << 5) | b;
+	outData.push_back(outTexel & 0xff);
+	outData.push_back((outTexel >> 8) & 0xff);
 }
 
 void decodeTexelA1BGR5ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
-    const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
-   	const u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8);
+	const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
+	const u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8);
 
-   	u8 alpha = getBit<0>(texel) ? 0xff : 0;
-   	u8 b = Colour::convert5To8Bit(getBits<1, 5, u8>(texel));
-   	u8 g = Colour::convert5To8Bit(getBits<6, 5, u8>(texel));
-   	u8 r = Colour::convert5To8Bit(getBits<11, 5, u8>(texel));
+	u8 alpha = getBit<0>(texel) ? 0xff : 0;
+	u8 b = Colour::convert5To8Bit(getBits<1, 5, u8>(texel));
+	u8 g = Colour::convert5To8Bit(getBits<6, 5, u8>(texel));
+	u8 r = Colour::convert5To8Bit(getBits<11, 5, u8>(texel));
 
-    outData.push_back(r);
-    outData.push_back(g);
-    outData.push_back(b);
-    outData.push_back(alpha);
+	outData.push_back(r);
+	outData.push_back(g);
+	outData.push_back(b);
+	outData.push_back(alpha);
 }
 
 void decodeTexelB5G6R5ToB5G6R5(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
-    const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
-   	const u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8);
+	const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
+	const u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8);
 
-    outData.push_back(texel & 0xff);
-    outData.push_back((texel >> 8) & 0xff);
+	outData.push_back(texel & 0xff);
+	outData.push_back((texel >> 8) & 0xff);
 }
 
 void decodeTexelB5G6R5ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
-    const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
-   	const u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8);
+	const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
+	const u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8);
 
-   	const u8 b = Colour::convert5To8Bit(getBits<0, 5, u8>(texel));
-   	const u8 g = Colour::convert6To8Bit(getBits<5, 6, u8>(texel));
-   	const u8 r = Colour::convert5To8Bit(getBits<11, 5, u8>(texel));
+	const u8 b = Colour::convert5To8Bit(getBits<0, 5, u8>(texel));
+	const u8 g = Colour::convert6To8Bit(getBits<5, 6, u8>(texel));
+	const u8 r = Colour::convert5To8Bit(getBits<11, 5, u8>(texel));
 
-    outData.push_back(r);
-    outData.push_back(g);
-    outData.push_back(b);
-    outData.push_back(0xff);
+	outData.push_back(r);
+	outData.push_back(g);
+	outData.push_back(b);
+	outData.push_back(0xff);
 }
 
 void decodeTexelABGR4ToABGR4(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
-    u32 offset = getSwizzledOffset(u, v, size.u(), 2);
-   	u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8);
+	u32 offset = getSwizzledOffset(u, v, size.u(), 2);
+	u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8);
 
-   	u8 alpha = getBits<0, 4, u8>(texel);
-   	u8 b = getBits<4, 4, u8>(texel);
-   	u8 g = getBits<8, 4, u8>(texel);
-   	u8 r = getBits<12, 4, u8>(texel);
+	u8 alpha = getBits<0, 4, u8>(texel);
+	u8 b = getBits<4, 4, u8>(texel);
+	u8 g = getBits<8, 4, u8>(texel);
+	u8 r = getBits<12, 4, u8>(texel);
 
-    outData.push_back((b << 4) | alpha);
-    outData.push_back((r << 4) | g);
+	outData.push_back((b << 4) | alpha);
+	outData.push_back((r << 4) | g);
 }
 
 void decodeTexelABGR4ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
-    u32 offset = getSwizzledOffset(u, v, size.u(), 2);
-   	u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8);
+	u32 offset = getSwizzledOffset(u, v, size.u(), 2);
+	u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8);
 
-   	u8 alpha = Colour::convert4To8Bit(getBits<0, 4, u8>(texel));
-   	u8 b = Colour::convert4To8Bit(getBits<4, 4, u8>(texel));
-   	u8 g = Colour::convert4To8Bit(getBits<8, 4, u8>(texel));
-   	u8 r = Colour::convert4To8Bit(getBits<12, 4, u8>(texel));
+	u8 alpha = Colour::convert4To8Bit(getBits<0, 4, u8>(texel));
+	u8 b = Colour::convert4To8Bit(getBits<4, 4, u8>(texel));
+	u8 g = Colour::convert4To8Bit(getBits<8, 4, u8>(texel));
+	u8 r = Colour::convert4To8Bit(getBits<12, 4, u8>(texel));
 
-    outData.push_back(r);
-    outData.push_back(g);
-    outData.push_back(b);
-    outData.push_back(alpha);
+	outData.push_back(r);
+	outData.push_back(g);
+	outData.push_back(b);
+	outData.push_back(alpha);
 }
 
 void decodeTexelAI8ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
-    u32 offset = getSwizzledOffset(u, v, size.u(), 2);
+	u32 offset = getSwizzledOffset(u, v, size.u(), 2);
 
-   	// Same as I8 except each pixel gets its own alpha value too
-   	const u8 alpha = inData[offset];
-   	const u8 intensity = inData[offset + 1];
+	// Same as I8 except each pixel gets its own alpha value too
+	const u8 alpha = inData[offset];
+	const u8 intensity = inData[offset + 1];
 
-    outData.push_back(intensity);
-    outData.push_back(alpha);
+	outData.push_back(intensity);
+	outData.push_back(alpha);
 }
 
 void decodeTexelGR8ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
-    u32 offset = getSwizzledOffset(u, v, size.u(), 2);
-   	constexpr u8 b = 0;
-   	const u8 g = inData[offset];
-   	const u8 r = inData[offset + 1];
+	u32 offset = getSwizzledOffset(u, v, size.u(), 2);
+	constexpr u8 b = 0;
+	const u8 g = inData[offset];
+	const u8 r = inData[offset + 1];
 
-    outData.push_back(r);
-    outData.push_back(g);
+	outData.push_back(r);
+	outData.push_back(g);
 }
 
 void decodeTexelI8ToR8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
-    u32 offset = getSwizzledOffset(u, v, size.u(), 1);
-   	const u8 intensity = inData[offset];
+	u32 offset = getSwizzledOffset(u, v, size.u(), 1);
+	const u8 intensity = inData[offset];
 
-    outData.push_back(intensity);
+	outData.push_back(intensity);
 }
 
 void decodeTexelA8ToA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
-    u32 offset = getSwizzledOffset(u, v, size.u(), 1);
-   	const u8 alpha = inData[offset];
+	u32 offset = getSwizzledOffset(u, v, size.u(), 1);
+	const u8 alpha = inData[offset];
 
-   	outData.push_back(alpha);
+	outData.push_back(alpha);
 }
 
 void decodeTexelAI4ToABGR4(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
-    const u32 offset = getSwizzledOffset(u, v, size.u(), 1);
-   	const u8 texel = inData[offset];
-   	const u8 alpha = texel & 0xf;
-   	const u8 intensity = texel >> 4;
+	const u32 offset = getSwizzledOffset(u, v, size.u(), 1);
+	const u8 texel = inData[offset];
+	const u8 alpha = texel & 0xf;
+	const u8 intensity = texel >> 4;
 
-   	outData.push_back((intensity << 4) | intensity);
-   	outData.push_back((alpha << 4) | intensity);
+	outData.push_back((intensity << 4) | intensity);
+	outData.push_back((alpha << 4) | intensity);
 }
 
 void decodeTexelAI4ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
-    const u32 offset = getSwizzledOffset(u, v, size.u(), 1);
-   	const u8 texel = inData[offset];
-   	const u8 alpha = Colour::convert4To8Bit(texel & 0xf);
-   	const u8 intensity = Colour::convert4To8Bit(texel >> 4);
+	const u32 offset = getSwizzledOffset(u, v, size.u(), 1);
+	const u8 texel = inData[offset];
+	const u8 alpha = Colour::convert4To8Bit(texel & 0xf);
+	const u8 intensity = Colour::convert4To8Bit(texel >> 4);
 
-   	outData.push_back(intensity);
-   	outData.push_back(alpha);
+	outData.push_back(intensity);
+	outData.push_back(alpha);
 }
 
 void decodeTexelI4ToR8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
-    u32 offset = getSwizzledOffset_4bpp(u, v, size.u());
+	u32 offset = getSwizzledOffset_4bpp(u, v, size.u());
 
-   	// For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates
-   	u8 intensity = inData[offset] >> ((u % 2) ? 4 : 0);
-   	intensity = Colour::convert4To8Bit(getBits<0, 4>(intensity));
+	// For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates
+	u8 intensity = inData[offset] >> ((u % 2) ? 4 : 0);
+	intensity = Colour::convert4To8Bit(getBits<0, 4>(intensity));
 
-   	outData.push_back(intensity);
+	outData.push_back(intensity);
 }
 
 void decodeTexelA4ToA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
-    const u32 offset = getSwizzledOffset_4bpp(u, v, size.u());
+	const u32 offset = getSwizzledOffset_4bpp(u, v, size.u());
 
-   	// For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates
-   	u8 alpha = inData[offset] >> ((u % 2) ? 4 : 0);
-   	alpha = Colour::convert4To8Bit(getBits<0, 4>(alpha));
+	// For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates
+	u8 alpha = inData[offset] >> ((u % 2) ? 4 : 0);
+	alpha = Colour::convert4To8Bit(getBits<0, 4>(alpha));
 
-   	outData.push_back(alpha);
+	outData.push_back(alpha);
 }
 
-static constexpr u32 signExtend3To32(u32 val) {
-    return (u32)(s32(val) << 29 >> 29);
-}
+static constexpr u32 signExtend3To32(u32 val) { return (u32)(s32(val) << 29 >> 29); }
 
 void decodeETC(u32 u, u32 v, u64 colourData, u32 alpha, std::vector<u8>& outData) {
 	static constexpr u32 modifiers[8][2] = {
@@ -328,9 +326,9 @@ void getTexelETC(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, s
 }
 
 void decodeTexelETC1ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
-    getTexelETC<false>(size, u, v, inData, outData);
+	getTexelETC<false>(size, u, v, inData, outData);
 }
 
 void decodeTexelETC1A4ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
-    getTexelETC<true>(size, u, v, inData, outData);
+	getTexelETC<true>(size, u, v, inData, outData);
 }
diff --git a/src/host_shaders/metal_shaders.metal b/src/host_shaders/metal_shaders.metal
index 5cd6b643..6670f650 100644
--- a/src/host_shaders/metal_shaders.metal
+++ b/src/host_shaders/metal_shaders.metal
@@ -655,7 +655,7 @@ float4 performLogicOp(LogicOp logicOp, float4 s, float4 d) {
     return as_type<float4>(performLogicOpU(logicOp, as_type<uint4>(s), as_type<uint4>(d)));
 }
 
-fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], constant uint2& lutSlices [[buffer(3)]], texture2d<float> tex0 [[texture(0)]], texture2d<float> tex1 [[texture(1)]], texture2d<float> tex2 [[texture(2)]], texture2d_array<float> texLightingLut [[texture(3)]], texture1d_array<float> texFogLut [[texture(4)]], sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) {
+fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[color(0)]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], constant uint2& lutSlices [[buffer(3)]], texture2d<float> tex0 [[texture(0)]], texture2d<float> tex1 [[texture(1)]], texture2d<float> tex2 [[texture(2)]], texture2d_array<float> texLightingLut [[texture(3)]], texture1d_array<float> texFogLut [[texture(4)]], sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) {
     Globals globals;
 
     // HACK
@@ -755,5 +755,5 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], constant PicaRegs& p
 		}
 	}
 
-	return performLogicOp(logicOp, color, float4(1.0, 0.0, 0.0, 1.0));
-}
+	return performLogicOp(logicOp, color, prevColor);
+}
\ No newline at end of file

From fa123cea3f6211eba55a37d6de55f68f6ef8486f Mon Sep 17 00:00:00 2001
From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com>
Date: Fri, 14 Mar 2025 12:42:17 +0200
Subject: [PATCH 16/19] Undo submodule changes

---
 third_party/LuaJIT | 2 +-
 third_party/oaknut | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/third_party/LuaJIT b/third_party/LuaJIT
index 41edf095..8bf7686d 160000
--- a/third_party/LuaJIT
+++ b/third_party/LuaJIT
@@ -1 +1 @@
-Subproject commit 41edf0959b9504d36dd85f5f16893c004ea7d7ba
+Subproject commit 8bf7686d820f868eae1a522c481fee09c18c90b9
diff --git a/third_party/oaknut b/third_party/oaknut
index 94c726ce..790374d7 160000
--- a/third_party/oaknut
+++ b/third_party/oaknut
@@ -1 +1 @@
-Subproject commit 94c726ce0338b054eb8cb5ea91de8fe6c19f4392
+Subproject commit 790374d7e66257b1f8ed89d798e5dcfb5363af05

From e635b9ec8a45b7d612b53fdf2366e69c68b73e4f Mon Sep 17 00:00:00 2001
From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com>
Date: Sun, 16 Mar 2025 13:52:32 +0200
Subject: [PATCH 17/19] Metal: Use std::unique_ptr for texture decode

---
 include/renderer_mtl/pica_to_mtl.hpp      |   2 +-
 include/renderer_mtl/texture_decoder.hpp  |  36 +++----
 src/core/renderer_mtl/mtl_texture.cpp     |  21 ++--
 src/core/renderer_mtl/renderer_mtl.cpp    |   4 +-
 src/core/renderer_mtl/texture_decoder.cpp | 124 +++++++++++-----------
 5 files changed, 95 insertions(+), 92 deletions(-)

diff --git a/include/renderer_mtl/pica_to_mtl.hpp b/include/renderer_mtl/pica_to_mtl.hpp
index b8911f89..d4c6dc7c 100644
--- a/include/renderer_mtl/pica_to_mtl.hpp
+++ b/include/renderer_mtl/pica_to_mtl.hpp
@@ -10,7 +10,7 @@ namespace PICA {
 	struct MTLPixelFormatInfo {
 		MTL::PixelFormat pixelFormat;
 		size_t bytesPerTexel;
-		void (*decoder)(OpenGL::uvec2, u32, u32, std::span<const u8>, std::vector<u8>&);
+		void (*decoder)(OpenGL::uvec2, u32, u32, std::span<const u8>, u8*);
 
 		bool needsSwizzle = false;
 		MTL::TextureSwizzleChannels swizzle{
diff --git a/include/renderer_mtl/texture_decoder.hpp b/include/renderer_mtl/texture_decoder.hpp
index 6fd5cfb5..376231c0 100644
--- a/include/renderer_mtl/texture_decoder.hpp
+++ b/include/renderer_mtl/texture_decoder.hpp
@@ -4,21 +4,21 @@
 // TODO: remove dependency on OpenGL
 #include "opengl.hpp"
 
-void decodeTexelABGR8ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
-void decodeTexelBGR8ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
-void decodeTexelA1BGR5ToBGR5A1(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
-void decodeTexelA1BGR5ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
-void decodeTexelB5G6R5ToB5G6R5(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
-void decodeTexelB5G6R5ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
-void decodeTexelABGR4ToABGR4(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
-void decodeTexelABGR4ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
-void decodeTexelAI8ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
-void decodeTexelGR8ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
-void decodeTexelI8ToR8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
-void decodeTexelA8ToA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
-void decodeTexelAI4ToABGR4(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
-void decodeTexelAI4ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
-void decodeTexelI4ToR8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
-void decodeTexelA4ToA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
-void decodeTexelETC1ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
-void decodeTexelETC1A4ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData);
+void decodeTexelABGR8ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
+void decodeTexelBGR8ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
+void decodeTexelA1BGR5ToBGR5A1(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
+void decodeTexelA1BGR5ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
+void decodeTexelB5G6R5ToB5G6R5(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
+void decodeTexelB5G6R5ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
+void decodeTexelABGR4ToABGR4(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
+void decodeTexelABGR4ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
+void decodeTexelAI8ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
+void decodeTexelGR8ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
+void decodeTexelI8ToR8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
+void decodeTexelA8ToA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
+void decodeTexelAI4ToABGR4(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
+void decodeTexelAI4ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
+void decodeTexelI4ToR8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
+void decodeTexelA4ToA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
+void decodeTexelETC1ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
+void decodeTexelETC1A4ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
diff --git a/src/core/renderer_mtl/mtl_texture.cpp b/src/core/renderer_mtl/mtl_texture.cpp
index 49f78a13..29d4024a 100644
--- a/src/core/renderer_mtl/mtl_texture.cpp
+++ b/src/core/renderer_mtl/mtl_texture.cpp
@@ -3,6 +3,7 @@
 #include <fmt/format.h>
 
 #include <array>
+#include <memory>
 
 #include "colour.hpp"
 #include "renderer_mtl/objc_helper.hpp"
@@ -109,16 +110,18 @@ namespace Metal {
 	}
 
 	void Texture::decodeTexture(std::span<const u8> data) {
-		std::vector<u8> decoded;
-		decoded.reserve(u64(size.u()) * u64(size.v()) * formatInfo.bytesPerTexel);
+		std::unique_ptr<u8[]> decodedData(new u8[u64(size.u()) * u64(size.v()) * formatInfo.bytesPerTexel]);
+		// This pointer will be incremented by our texture decoders
+		u8* decodePtr = decodedData.get();
 
-  		// Decode texels line by line
-  		for (u32 v = 0; v < size.v(); v++) {
- 			for (u32 u = 0; u < size.u(); u++) {
-                formatInfo.decoder(size, u, v, data, decoded);
- 			}
-  		}
+		// Decode texels line by line
+		for (u32 v = 0; v < size.v(); v++) {
+			for (u32 u = 0; u < size.u(); u++) {
+				formatInfo.decoder(size, u, v, data, decodePtr);
+				decodePtr += formatInfo.bytesPerTexel;
+			}
+		}
 
-		texture->replaceRegion(MTL::Region(0, 0, size.u(), size.v()), 0, 0, decoded.data(), formatInfo.bytesPerTexel * size.u(), 0);
+		texture->replaceRegion(MTL::Region(0, 0, size.u(), size.v()), 0, 0, decodedData.get(), formatInfo.bytesPerTexel * size.u(), 0);
 	}
 }  // namespace Metal
diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp
index 6ec11aa3..e6fdf653 100644
--- a/src/core/renderer_mtl/renderer_mtl.cpp
+++ b/src/core/renderer_mtl/renderer_mtl.cpp
@@ -128,7 +128,7 @@ void RendererMTL::display() {
 }
 
 void RendererMTL::initGraphicsContext(SDL_Window* window) {
-	// On iOS, the SwiftUI side handles MetalLayer & the CommandQueue
+	// On iOS, the SwiftUI side handles the MetalLayer
 #ifdef PANDA3DS_IOS
 	device = MTL::CreateSystemDefaultDevice();
 #else
@@ -138,7 +138,7 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) {
 	device = MTL::CreateSystemDefaultDevice();
 	metalLayer->setDevice(device);
 #endif
-    checkForMTLPixelFormatSupport(device);
+	checkForMTLPixelFormatSupport(device);
 
 	commandQueue = device->newCommandQueue();
 
diff --git a/src/core/renderer_mtl/texture_decoder.cpp b/src/core/renderer_mtl/texture_decoder.cpp
index 081d4889..06db2d76 100644
--- a/src/core/renderer_mtl/texture_decoder.cpp
+++ b/src/core/renderer_mtl/texture_decoder.cpp
@@ -37,32 +37,32 @@ u32 getSwizzledOffset_4bpp(u32 u, u32 v, u32 width) {
 	return offset / 2;
 }
 
-void decodeTexelABGR8ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+void decodeTexelABGR8ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
 	const u32 offset = getSwizzledOffset(u, v, size.u(), 4);
 	const u8 alpha = inData[offset];
 	const u8 b = inData[offset + 1];
 	const u8 g = inData[offset + 2];
 	const u8 r = inData[offset + 3];
 
-	outData.push_back(r);
-	outData.push_back(g);
-	outData.push_back(b);
-	outData.push_back(alpha);
+	*outData++ = r;
+	*outData++ = g;
+	*outData++ = b;
+	*outData++ = alpha;
 }
 
-void decodeTexelBGR8ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+void decodeTexelBGR8ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
 	const u32 offset = getSwizzledOffset(u, v, size.u(), 3);
 	const u8 b = inData[offset];
 	const u8 g = inData[offset + 1];
 	const u8 r = inData[offset + 2];
 
-	outData.push_back(r);
-	outData.push_back(g);
-	outData.push_back(b);
-	outData.push_back(0xff);
+	*outData++ = r;
+	*outData++ = g;
+	*outData++ = b;
+	*outData++ = 0xff;
 }
 
-void decodeTexelA1BGR5ToBGR5A1(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+void decodeTexelA1BGR5ToBGR5A1(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
 	const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
 	const u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8);
 
@@ -72,11 +72,11 @@ void decodeTexelA1BGR5ToBGR5A1(OpenGL::uvec2 size, u32 u, u32 v, std::span<const
 	u8 r = getBits<11, 5, u8>(texel);
 
 	u16 outTexel = (alpha << 15) | (r << 10) | (g << 5) | b;
-	outData.push_back(outTexel & 0xff);
-	outData.push_back((outTexel >> 8) & 0xff);
+	*outData++ = outTexel & 0xff;
+	*outData++ = (outTexel >> 8) & 0xff;
 }
 
-void decodeTexelA1BGR5ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+void decodeTexelA1BGR5ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
 	const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
 	const u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8);
 
@@ -85,21 +85,21 @@ void decodeTexelA1BGR5ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const
 	u8 g = Colour::convert5To8Bit(getBits<6, 5, u8>(texel));
 	u8 r = Colour::convert5To8Bit(getBits<11, 5, u8>(texel));
 
-	outData.push_back(r);
-	outData.push_back(g);
-	outData.push_back(b);
-	outData.push_back(alpha);
+	*outData++ = r;
+	*outData++ = g;
+	*outData++ = b;
+	*outData++ = alpha;
 }
 
-void decodeTexelB5G6R5ToB5G6R5(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+void decodeTexelB5G6R5ToB5G6R5(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
 	const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
 	const u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8);
 
-	outData.push_back(texel & 0xff);
-	outData.push_back((texel >> 8) & 0xff);
+	*outData++ = texel & 0xff;
+	*outData++ = (texel >> 8) & 0xff;
 }
 
-void decodeTexelB5G6R5ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+void decodeTexelB5G6R5ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
 	const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
 	const u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8);
 
@@ -107,13 +107,13 @@ void decodeTexelB5G6R5ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const
 	const u8 g = Colour::convert6To8Bit(getBits<5, 6, u8>(texel));
 	const u8 r = Colour::convert5To8Bit(getBits<11, 5, u8>(texel));
 
-	outData.push_back(r);
-	outData.push_back(g);
-	outData.push_back(b);
-	outData.push_back(0xff);
+	*outData++ = r;
+	*outData++ = g;
+	*outData++ = b;
+	*outData++ = 0xff;
 }
 
-void decodeTexelABGR4ToABGR4(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+void decodeTexelABGR4ToABGR4(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
 	u32 offset = getSwizzledOffset(u, v, size.u(), 2);
 	u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8);
 
@@ -122,11 +122,11 @@ void decodeTexelABGR4ToABGR4(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u
 	u8 g = getBits<8, 4, u8>(texel);
 	u8 r = getBits<12, 4, u8>(texel);
 
-	outData.push_back((b << 4) | alpha);
-	outData.push_back((r << 4) | g);
+	*outData++ = (b << 4) | alpha;
+	*outData++ = (r << 4) | g;
 }
 
-void decodeTexelABGR4ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+void decodeTexelABGR4ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
 	u32 offset = getSwizzledOffset(u, v, size.u(), 2);
 	u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8);
 
@@ -135,90 +135,90 @@ void decodeTexelABGR4ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u
 	u8 g = Colour::convert4To8Bit(getBits<8, 4, u8>(texel));
 	u8 r = Colour::convert4To8Bit(getBits<12, 4, u8>(texel));
 
-	outData.push_back(r);
-	outData.push_back(g);
-	outData.push_back(b);
-	outData.push_back(alpha);
+	*outData++ = r;
+	*outData++ = g;
+	*outData++ = b;
+	*outData++ = alpha;
 }
 
-void decodeTexelAI8ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+void decodeTexelAI8ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
 	u32 offset = getSwizzledOffset(u, v, size.u(), 2);
 
 	// Same as I8 except each pixel gets its own alpha value too
 	const u8 alpha = inData[offset];
 	const u8 intensity = inData[offset + 1];
 
-	outData.push_back(intensity);
-	outData.push_back(alpha);
+	*outData++ = intensity;
+	*outData++ = alpha;
 }
 
-void decodeTexelGR8ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+void decodeTexelGR8ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
 	u32 offset = getSwizzledOffset(u, v, size.u(), 2);
 	constexpr u8 b = 0;
 	const u8 g = inData[offset];
 	const u8 r = inData[offset + 1];
 
-	outData.push_back(r);
-	outData.push_back(g);
+	*outData++ = r;
+	*outData++ = g;
 }
 
-void decodeTexelI8ToR8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+void decodeTexelI8ToR8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
 	u32 offset = getSwizzledOffset(u, v, size.u(), 1);
 	const u8 intensity = inData[offset];
 
-	outData.push_back(intensity);
+	*outData++ = intensity;
 }
 
-void decodeTexelA8ToA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+void decodeTexelA8ToA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
 	u32 offset = getSwizzledOffset(u, v, size.u(), 1);
 	const u8 alpha = inData[offset];
 
-	outData.push_back(alpha);
+	*outData++ = alpha;
 }
 
-void decodeTexelAI4ToABGR4(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+void decodeTexelAI4ToABGR4(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
 	const u32 offset = getSwizzledOffset(u, v, size.u(), 1);
 	const u8 texel = inData[offset];
 	const u8 alpha = texel & 0xf;
 	const u8 intensity = texel >> 4;
 
-	outData.push_back((intensity << 4) | intensity);
-	outData.push_back((alpha << 4) | intensity);
+	*outData++ = (intensity << 4) | intensity;
+	*outData++ = (alpha << 4) | intensity;
 }
 
-void decodeTexelAI4ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+void decodeTexelAI4ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
 	const u32 offset = getSwizzledOffset(u, v, size.u(), 1);
 	const u8 texel = inData[offset];
 	const u8 alpha = Colour::convert4To8Bit(texel & 0xf);
 	const u8 intensity = Colour::convert4To8Bit(texel >> 4);
 
-	outData.push_back(intensity);
-	outData.push_back(alpha);
+	*outData++ = intensity;
+	*outData++ = alpha;
 }
 
-void decodeTexelI4ToR8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+void decodeTexelI4ToR8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
 	u32 offset = getSwizzledOffset_4bpp(u, v, size.u());
 
 	// For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates
 	u8 intensity = inData[offset] >> ((u % 2) ? 4 : 0);
 	intensity = Colour::convert4To8Bit(getBits<0, 4>(intensity));
 
-	outData.push_back(intensity);
+	*outData++ = intensity;
 }
 
-void decodeTexelA4ToA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+void decodeTexelA4ToA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
 	const u32 offset = getSwizzledOffset_4bpp(u, v, size.u());
 
 	// For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates
 	u8 alpha = inData[offset] >> ((u % 2) ? 4 : 0);
 	alpha = Colour::convert4To8Bit(getBits<0, 4>(alpha));
 
-	outData.push_back(alpha);
+	*outData++ = alpha;
 }
 
 static constexpr u32 signExtend3To32(u32 val) { return (u32)(s32(val) << 29 >> 29); }
 
-void decodeETC(u32 u, u32 v, u64 colourData, u32 alpha, std::vector<u8>& outData) {
+void decodeETC(u32 u, u32 v, u64 colourData, u32 alpha, u8* outData) {
 	static constexpr u32 modifiers[8][2] = {
 		{2, 8}, {5, 17}, {9, 29}, {13, 42}, {18, 60}, {24, 80}, {33, 106}, {47, 183},
 	};
@@ -280,14 +280,14 @@ void decodeETC(u32 u, u32 v, u64 colourData, u32 alpha, std::vector<u8>& outData
 	g = std::clamp(g + modifier, 0, 255);
 	b = std::clamp(b + modifier, 0, 255);
 
-	outData.push_back(r);
-	outData.push_back(g);
-	outData.push_back(b);
-	outData.push_back(alpha);
+	*outData++ = r;
+	*outData++ = g;
+	*outData++ = b;
+	*outData++ = alpha;
 }
 
 template <bool hasAlpha>
-void getTexelETC(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+void getTexelETC(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
 	// Pixel offset of the 8x8 tile based on u, v and the width of the texture
 	u32 offs = ((u & ~7) * 8) + ((v & ~7) * size.u());
 	if (!hasAlpha) {
@@ -325,10 +325,10 @@ void getTexelETC(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, s
 	decodeETC(u, v, colourData, alpha, outData);
 }
 
-void decodeTexelETC1ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+void decodeTexelETC1ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
 	getTexelETC<false>(size, u, v, inData, outData);
 }
 
-void decodeTexelETC1A4ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, std::vector<u8>& outData) {
+void decodeTexelETC1A4ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
 	getTexelETC<true>(size, u, v, inData, outData);
 }

From 6d0479d7c17329c28f0eca2afba6236b98b9ce0d Mon Sep 17 00:00:00 2001
From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com>
Date: Sun, 16 Mar 2025 15:17:17 +0200
Subject: [PATCH 18/19] Metal renderer fixes for iOS

---
 include/config.hpp                   |  9 ++++++++-
 src/config.cpp                       |  4 ++--
 src/host_shaders/metal_shaders.metal | 27 ++++++++++++++-------------
 src/ios_driver.mm                    |  6 +-----
 4 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/include/config.hpp b/include/config.hpp
index d45aa05c..49597214 100644
--- a/include/config.hpp
+++ b/include/config.hpp
@@ -55,6 +55,13 @@ struct EmulatorConfig {
 	static constexpr bool audioEnabledDefault = false;
 #endif
 
+	// We default to OpenGL on all platforms other than iOS
+#if defined(PANDA3DS_IOS)
+	static constexpr RendererType rendererDefault = RendererType::Metal;
+#else
+	static constexpr RendererType rendererDefault = RendererType::OpenGL;
+#endif
+
 	bool shaderJitEnabled = shaderJitDefault;
 	bool useUbershaders = ubershaderDefault;
 	bool accelerateShaders = accelerateShadersDefault;
@@ -65,7 +72,7 @@ struct EmulatorConfig {
 	bool forceShadergenForLights = true;
 	int lightShadergenThreshold = 1;
 
-	RendererType rendererType = RendererType::OpenGL;
+	RendererType rendererType = rendererDefault;
 	Audio::DSPCore::Type dspType = Audio::DSPCore::Type::HLE;
 
 	bool sdCardInserted = true;
diff --git a/src/config.cpp b/src/config.cpp
index 9b262744..3ff83f89 100644
--- a/src/config.cpp
+++ b/src/config.cpp
@@ -72,14 +72,14 @@ void EmulatorConfig::load() {
 			auto gpu = gpuResult.unwrap();
 
 			// Get renderer
-			auto rendererName = toml::find_or<std::string>(gpu, "Renderer", "OpenGL");
+			auto rendererName = toml::find_or<std::string>(gpu, "Renderer", Renderer::typeToString(rendererDefault));
 			auto configRendererType = Renderer::typeFromString(rendererName);
 
 			if (configRendererType.has_value()) {
 				rendererType = configRendererType.value();
 			} else {
 				Helpers::warn("Invalid renderer specified: %s\n", rendererName.c_str());
-				rendererType = RendererType::OpenGL;
+				rendererType = rendererDefault;
 			}
 
 			shaderJitEnabled = toml::find_or<toml::boolean>(gpu, "EnableShaderJIT", shaderJitDefault);
diff --git a/src/host_shaders/metal_shaders.metal b/src/host_shaders/metal_shaders.metal
index 6670f650..b9640816 100644
--- a/src/host_shaders/metal_shaders.metal
+++ b/src/host_shaders/metal_shaders.metal
@@ -1,4 +1,6 @@
 #include <metal_stdlib>
+#include <TargetConditionals.h>
+
 using namespace metal;
 
 struct BasicVertexOut {
@@ -219,12 +221,6 @@ struct Globals {
 	uint GPUREG_LIGHTING_LUTINPUT_SELECT;
 	uint GPUREG_LIGHTi_CONFIG;
 
-	// HACK
-	//bool lightingEnabled;
-    //uint8_t lightingNumLights;
-    //uint32_t lightingConfig1;
-    //uint16_t alphaControl;
-
     float3 normal;
 };
 
@@ -655,14 +651,15 @@ float4 performLogicOp(LogicOp logicOp, float4 s, float4 d) {
     return as_type<float4>(performLogicOpU(logicOp, as_type<uint4>(s), as_type<uint4>(d)));
 }
 
-fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[color(0)]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], constant uint2& lutSlices [[buffer(3)]], texture2d<float> tex0 [[texture(0)]], texture2d<float> tex1 [[texture(1)]], texture2d<float> tex2 [[texture(2)]], texture2d_array<float> texLightingLut [[texture(3)]], texture1d_array<float> texFogLut [[texture(4)]], sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) {
-    Globals globals;
+// iOS simulator doesn't support fb fetch, so don't enable it
+#ifndef TARGET_OS_SIMULATOR
+#define PREVIOUS_COLOR_DECL float4 prevColor [[color(0)]],
+#else
+#define PREVIOUS_COLOR_DECL
+#endif
 
-    // HACK
-    //globals.lightingEnabled = picaRegs.read(0x008Fu) != 0u;
-    //globals.lightingNumLights = picaRegs.read(0x01C2u);
-    //globals.lightingConfig1 = picaRegs.read(0x01C4u);
-    //globals.alphaControl = picaRegs.read(0x104);
+fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], PREVIOUS_COLOR_DECL constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], constant uint2& lutSlices [[buffer(3)]], texture2d<float> tex0 [[texture(0)]], texture2d<float> tex1 [[texture(1)]], texture2d<float> tex2 [[texture(2)]], texture2d_array<float> texLightingLut [[texture(3)]], texture1d_array<float> texFogLut [[texture(4)]], sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) {
+    Globals globals;
 
     globals.tevSources[0] = in.color;
     if (lightingEnabled) {
@@ -755,5 +752,9 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c
 		}
 	}
 
+#ifndef TARGET_OS_SIMULATOR
 	return performLogicOp(logicOp, color, prevColor);
+#else
+	return performLogicOp(logicOp, color, float4(0.0));
+#endif
 }
\ No newline at end of file
diff --git a/src/ios_driver.mm b/src/ios_driver.mm
index d9a0d544..6025b27a 100644
--- a/src/ios_driver.mm
+++ b/src/ios_driver.mm
@@ -22,12 +22,8 @@ IOS_EXPORT void iosCreateEmulator() {
 	hidService = &emulator->getServiceManager().getHID();
 	emulator->initGraphicsContext(nullptr);
 
-	// auto path = emulator->getAppDataRoot() / "Kirb Demo.3ds";
-	auto path = emulator->getAppDataRoot() / "Kirb Demo.3ds";
-
-	//auto path = emulator->getAppDataRoot() / "toon_shading.elf";
+	auto path = emulator->getAppDataRoot() / "toon_shading.elf";
 	emulator->loadROM(path);
-	printf("Created emulator\n");
 }
 
 IOS_EXPORT void iosRunFrame(CAMetalLayer* layer) {

From 449c14093d9ce3f6c3df0b83998b7b9664368128 Mon Sep 17 00:00:00 2001
From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com>
Date: Sun, 16 Mar 2025 16:27:23 +0200
Subject: [PATCH 19/19] iOS driver: Add doc comments

---
 src/ios_driver.mm | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/ios_driver.mm b/src/ios_driver.mm
index 6025b27a..b11c1510 100644
--- a/src/ios_driver.mm
+++ b/src/ios_driver.mm
@@ -4,12 +4,14 @@ extern "C" {
 #include "ios_driver.h"
 }
 
+// Apple's Foundation headers define some macros globablly that create issues with our own code, so remove the definitions
 #undef ABS
 #undef NO
 
 #include <memory>
 #include "emulator.hpp"
 
+// The Objective-C++ bridge functions must be exported without name mangling in order for the SwiftUI frontend to be able to call them
 #define IOS_EXPORT extern "C" __attribute__((visibility("default")))
 
 std::unique_ptr<Emulator> emulator = nullptr;
@@ -22,6 +24,7 @@ IOS_EXPORT void iosCreateEmulator() {
 	hidService = &emulator->getServiceManager().getHID();
 	emulator->initGraphicsContext(nullptr);
 
+	// TODO: Add game selection on iOS frontend
 	auto path = emulator->getAppDataRoot() / "toon_shading.elf";
 	emulator->loadROM(path);
 }