From 1fa9ce126b0f5b24707b4ca79111a964827cd787 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Tue, 23 Jul 2024 10:54:01 +0200 Subject: [PATCH 01/16] add: period at the end of a sentence --- src/miniaudio.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/miniaudio.cpp b/src/miniaudio.cpp index e42fea68..a61979e0 100644 --- a/src/miniaudio.cpp +++ b/src/miniaudio.cpp @@ -1,5 +1,5 @@ // We do not need the ability to be able to encode or decode audio files for the time being -// So we disable said functionality to make the executable smaller +// So we disable said functionality to make the executable smaller. #define MA_NO_DECODING #define MA_NO_ENCODING #define MINIAUDIO_IMPLEMENTATION From 98b5d560215d5899d12bea2ff0f161263bc71d8b Mon Sep 17 00:00:00 2001 From: Samuliak Date: Fri, 16 Aug 2024 10:06:56 +0200 Subject: [PATCH 02/16] metal: add all the files --- .gitignore | 4 + CMakeLists.txt | 86 +- .../renderer_mtl/mtl_blit_pipeline_cache.hpp | 75 ++ .../renderer_mtl/mtl_depth_stencil_cache.hpp | 86 ++ .../renderer_mtl/mtl_draw_pipeline_cache.hpp | 174 ++++ include/renderer_mtl/mtl_render_target.hpp | 92 +++ include/renderer_mtl/mtl_texture.hpp | 77 ++ .../renderer_mtl/mtl_vertex_buffer_cache.hpp | 80 ++ include/renderer_mtl/objc_helper.hpp | 16 + include/renderer_mtl/pica_to_mtl.hpp | 155 ++++ include/renderer_mtl/renderer_mtl.hpp | 189 +++++ src/core/renderer_mtl/metal_cpp_impl.cpp | 6 + src/core/renderer_mtl/mtl_etc1.cpp | 124 +++ src/core/renderer_mtl/mtl_texture.cpp | 312 +++++++ src/core/renderer_mtl/objc_helper.mm | 12 + src/core/renderer_mtl/renderer_mtl.cpp | 774 +++++++++++++++++ .../metal_copy_to_lut_texture.metal | 9 + src/host_shaders/metal_shaders.metal | 782 ++++++++++++++++++ 18 files changed, 3041 insertions(+), 12 deletions(-) create mode 100644 include/renderer_mtl/mtl_blit_pipeline_cache.hpp create mode 100644 include/renderer_mtl/mtl_depth_stencil_cache.hpp create mode 100644 include/renderer_mtl/mtl_draw_pipeline_cache.hpp create mode 100644 include/renderer_mtl/mtl_render_target.hpp create mode 100644 include/renderer_mtl/mtl_texture.hpp create mode 100644 include/renderer_mtl/mtl_vertex_buffer_cache.hpp create mode 100644 include/renderer_mtl/objc_helper.hpp create mode 100644 include/renderer_mtl/pica_to_mtl.hpp create mode 100644 include/renderer_mtl/renderer_mtl.hpp create mode 100644 src/core/renderer_mtl/metal_cpp_impl.cpp create mode 100644 src/core/renderer_mtl/mtl_etc1.cpp create mode 100644 src/core/renderer_mtl/mtl_texture.cpp create mode 100644 src/core/renderer_mtl/objc_helper.mm create mode 100644 src/core/renderer_mtl/renderer_mtl.cpp create mode 100644 src/host_shaders/metal_copy_to_lut_texture.metal create mode 100644 src/host_shaders/metal_shaders.metal diff --git a/.gitignore b/.gitignore index 528462ad..817786a3 100644 --- a/.gitignore +++ b/.gitignore @@ -64,5 +64,9 @@ fb.bat *.elf *.smdh +# Compiled Metal shader files +*.ir +*.metallib + config.toml CMakeSettings.json diff --git a/CMakeLists.txt b/CMakeLists.txt index 2865a3f8..31fdd9f2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,16 +26,17 @@ endif() if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format-nonliteral -Wno-format-security") -endif() +endif() if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-interference-size") -endif() +endif() option(DISABLE_PANIC_DEV "Make a build with fewer and less intrusive asserts" ON) option(GPU_DEBUG_INFO "Enable additional GPU debugging info" OFF) option(ENABLE_OPENGL "Enable OpenGL rendering backend" ON) option(ENABLE_VULKAN "Enable Vulkan rendering backend" ON) +option(ENABLE_METAL "Enable Metal rendering backend (if available)" ON) option(ENABLE_LTO "Enable link-time optimization" OFF) option(ENABLE_TESTS "Compile unit-tests" OFF) option(ENABLE_USER_BUILD "Make a user-facing build. These builds have various assertions disabled, LTO, and more" OFF) @@ -55,11 +56,6 @@ if(BUILD_LIBRETRO_CORE) add_compile_definitions(__LIBRETRO__) endif() -if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" AND ENABLE_USER_BUILD) - # Disable stack buffer overflow checks in user builds - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GS-") -endif() - add_library(AlberCore STATIC) include_directories(${PROJECT_SOURCE_DIR}/include/) @@ -240,7 +236,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/services/mic.hpp include/services/cecd.hpp include/services/ac.hpp include/services/am.hpp include/services/boss.hpp include/services/frd.hpp include/services/nim.hpp include/fs/archive_ext_save_data.hpp include/fs/archive_ncch.hpp include/services/mcu/mcu_hwc.hpp - include/colour.hpp include/services/y2r.hpp include/services/cam.hpp include/services/ssl.hpp + include/colour.hpp include/services/y2r.hpp include/services/cam.hpp include/services/ssl.hpp include/services/ldr_ro.hpp include/ipc.hpp include/services/act.hpp include/services/nfc.hpp include/system_models.hpp include/services/dlp_srvr.hpp include/PICA/dynapica/pica_recs.hpp include/PICA/dynapica/x64_regs.hpp include/PICA/dynapica/vertex_loader_rec.hpp include/PICA/dynapica/shader_rec.hpp @@ -251,7 +247,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/config.hpp include/services/ir_user.hpp include/http_server.hpp include/cheats.hpp include/action_replay.hpp include/renderer_sw/renderer_sw.hpp include/compiler_builtins.hpp include/fs/romfs.hpp include/fs/ivfc.hpp include/discord_rpc.hpp include/services/http.hpp include/result/result_cfg.hpp - include/applets/applet.hpp include/applets/mii_selector.hpp include/math_util.hpp include/services/soc.hpp + include/applets/applet.hpp include/applets/mii_selector.hpp include/math_util.hpp include/services/soc.hpp include/services/news_u.hpp include/applets/software_keyboard.hpp include/applets/applet_manager.hpp include/fs/archive_user_save_data.hpp include/services/amiibo_device.hpp include/services/nfc_types.hpp include/swap.hpp include/services/csnd.hpp include/services/nwm_uds.hpp include/fs/archive_system_save_data.hpp include/lua_manager.hpp include/memory_mapped_file.hpp include/hydra_icon.hpp @@ -260,7 +256,6 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/audio/miniaudio_device.hpp include/ring_buffer.hpp include/bitfield.hpp include/audio/dsp_shared_mem.hpp include/audio/hle_core.hpp include/capstone.hpp include/audio/aac.hpp include/PICA/pica_frag_config.hpp include/PICA/pica_frag_uniforms.hpp include/PICA/shader_gen_types.hpp include/PICA/shader_decompiler.hpp - include/sdl_gyro.hpp ) cmrc_add_resource_library( @@ -418,8 +413,75 @@ if(ENABLE_VULKAN) target_link_libraries(AlberCore PRIVATE Vulkan::Vulkan resources_renderer_vk) endif() +if(ENABLE_METAL AND APPLE) + set(RENDERER_MTL_INCLUDE_FILES include/renderer_mtl/renderer_mtl.hpp + include/renderer_mtl/mtl_depth_stencil_cache.hpp + include/renderer_mtl/mtl_blit_pipeline_cache.hpp + include/renderer_mtl/mtl_draw_pipeline_cache.hpp + include/renderer_mtl/mtl_render_target.hpp + include/renderer_mtl/mtl_texture.hpp + include/renderer_mtl/mtl_vertex_buffer_cache.hpp + include/renderer_mtl/pica_to_mtl.hpp + include/renderer_mtl/objc_helper.hpp + ) + + set(RENDERER_MTL_SOURCE_FILES src/core/renderer_mtl/metal_cpp_impl.cpp + src/core/renderer_mtl/renderer_mtl.cpp + src/core/renderer_mtl/mtl_texture.cpp + src/core/renderer_mtl/mtl_etc1.cpp + src/core/renderer_mtl/objc_helper.mm + src/host_shaders/metal_shaders.metal + src/host_shaders/metal_copy_to_lut_texture.metal + ) + + set(HEADER_FILES ${HEADER_FILES} ${RENDERER_MTL_INCLUDE_FILES}) + source_group("Source Files\\Core\\Metal Renderer" FILES ${RENDERER_MTL_SOURCE_FILES}) + + set(RENDERER_MTL_HOST_SHADERS_SOURCES) + function (add_metal_shader SHADER) + set(SHADER_SOURCE "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.metal") + set(SHADER_IR "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.ir") + set(SHADER_METALLIB "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.metallib") + # TODO: only include sources in debug builds + add_custom_command( + OUTPUT ${SHADER_IR} + COMMAND xcrun -sdk macosx metal -gline-tables-only -frecord-sources -o ${SHADER_IR} -c ${SHADER_SOURCE} + DEPENDS ${SHADER_SOURCE} + VERBATIM) + add_custom_command( + OUTPUT ${SHADER_METALLIB} + COMMAND xcrun -sdk macosx metallib -o ${SHADER_METALLIB} ${SHADER_IR} + DEPENDS ${SHADER_IR} + VERBATIM) + set(RENDERER_MTL_HOST_SHADERS_SOURCES ${RENDERER_MTL_HOST_SHADERS_SOURCES} ${SHADER_METALLIB}) + endfunction() + + add_metal_shader(metal_shaders) + add_metal_shader(metal_copy_to_lut_texture) + + add_custom_target( + compile_msl_shaders + DEPENDS ${RENDERER_MTL_HOST_SHADERS_SOURCES} + ) + + cmrc_add_resource_library( + resources_renderer_mtl + NAMESPACE RendererMTL + WHENCE "src/host_shaders/" + "src/host_shaders/metal_shaders.metallib" + "src/host_shaders/metal_copy_to_lut_texture.metallib" + ) + add_dependencies(resources_renderer_mtl compile_msl_shaders) + + target_sources(AlberCore PRIVATE ${RENDERER_MTL_SOURCE_FILES}) + target_compile_definitions(AlberCore PUBLIC "PANDA3DS_ENABLE_METAL=1") + target_include_directories(AlberCore PRIVATE third_party/metal-cpp) + # TODO: check if all of them are needed + target_link_libraries(AlberCore PRIVATE "-framework Metal" "-framework Foundation" "-framework QuartzCore" resources_renderer_mtl) +endif() + source_group("Header Files\\Core" FILES ${HEADER_FILES}) -set(ALL_SOURCES ${SOURCE_FILES} ${FS_SOURCE_FILES} ${CRYPTO_SOURCE_FILES} ${KERNEL_SOURCE_FILES} +set(ALL_SOURCES ${SOURCE_FILES} ${FS_SOURCE_FILES} ${CRYPTO_SOURCE_FILES} ${KERNEL_SOURCE_FILES} ${LOADER_SOURCE_FILES} ${SERVICE_SOURCE_FILES} ${APPLET_SOURCE_FILES} ${RENDERER_SW_SOURCE_FILES} ${PICA_SOURCE_FILES} ${THIRD_PARTY_SOURCE_FILES} ${AUDIO_SOURCE_FILES} ${HEADER_FILES} ${FRONTEND_HEADER_FILES}) target_sources(AlberCore PRIVATE ${ALL_SOURCES}) @@ -508,7 +570,7 @@ if(NOT BUILD_HYDRA_CORE AND NOT BUILD_LIBRETRO_CORE) ) else() set(FRONTEND_SOURCE_FILES src/panda_sdl/main.cpp src/panda_sdl/frontend_sdl.cpp src/panda_sdl/mappings.cpp) - set(FRONTEND_HEADER_FILES "include/panda_sdl/frontend_sdl.hpp") + set(FRONTEND_HEADER_FILES "") endif() target_link_libraries(Alber PRIVATE AlberCore) diff --git a/include/renderer_mtl/mtl_blit_pipeline_cache.hpp b/include/renderer_mtl/mtl_blit_pipeline_cache.hpp new file mode 100644 index 00000000..26422635 --- /dev/null +++ b/include/renderer_mtl/mtl_blit_pipeline_cache.hpp @@ -0,0 +1,75 @@ +#pragma once + +#include + +#include "pica_to_mtl.hpp" + +using namespace PICA; + +namespace Metal { + +struct BlitPipelineHash { + // Formats + ColorFmt colorFmt; + DepthFmt depthFmt; +}; + +// This pipeline only caches the pipeline with all of its color and depth attachment variations +class BlitPipelineCache { +public: + BlitPipelineCache() = default; + + ~BlitPipelineCache() { + reset(); + vertexFunction->release(); + fragmentFunction->release(); + } + + void set(MTL::Device* dev, MTL::Function* vert, MTL::Function* frag) { + device = dev; + vertexFunction = vert; + fragmentFunction = frag; + } + + MTL::RenderPipelineState* get(BlitPipelineHash hash) { + u8 intHash = ((u8)hash.colorFmt << 3) | (u8)hash.depthFmt; + auto& pipeline = pipelineCache[intHash]; + if (!pipeline) { + MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); + desc->setVertexFunction(vertexFunction); + desc->setFragmentFunction(fragmentFunction); + + auto colorAttachment = desc->colorAttachments()->object(0); + colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt)); + + desc->setDepthAttachmentPixelFormat(toMTLPixelFormatDepth(hash.depthFmt)); + + NS::Error* error = nullptr; + desc->setLabel(toNSString("Blit pipeline")); + pipeline = device->newRenderPipelineState(desc, &error); + if (error) { + Helpers::panic("Error creating blit pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + + desc->release(); + } + + return pipeline; + } + + void reset() { + for (auto& pair : pipelineCache) { + pair.second->release(); + } + pipelineCache.clear(); + } + +private: + std::map pipelineCache; + + MTL::Device* device; + MTL::Function* vertexFunction; + MTL::Function* fragmentFunction; +}; + +} // namespace Metal diff --git a/include/renderer_mtl/mtl_depth_stencil_cache.hpp b/include/renderer_mtl/mtl_depth_stencil_cache.hpp new file mode 100644 index 00000000..90721b70 --- /dev/null +++ b/include/renderer_mtl/mtl_depth_stencil_cache.hpp @@ -0,0 +1,86 @@ +#pragma once + +#include + +#include "pica_to_mtl.hpp" + +using namespace PICA; + +namespace Metal { + +struct DepthStencilHash { + bool depthStencilWrite; + u8 depthFunc; + u32 stencilConfig; + u16 stencilOpConfig; +}; + +class DepthStencilCache { +public: + DepthStencilCache() = default; + + ~DepthStencilCache() { + reset(); + } + + void set(MTL::Device* dev) { + device = dev; + } + + MTL::DepthStencilState* get(DepthStencilHash hash) { + u64 intHash = ((u64)hash.depthStencilWrite << 56) | ((u64)hash.depthFunc << 48) | ((u64)hash.stencilConfig << 16) | (u64)hash.stencilOpConfig; + auto& depthStencilState = depthStencilCache[intHash]; + if (!depthStencilState) { + MTL::DepthStencilDescriptor* desc = MTL::DepthStencilDescriptor::alloc()->init(); + desc->setDepthWriteEnabled(hash.depthStencilWrite); + desc->setDepthCompareFunction(toMTLCompareFunc(hash.depthFunc)); + + const bool stencilEnable = Helpers::getBit<0>(hash.stencilConfig); + MTL::StencilDescriptor* stencilDesc = nullptr; + if (stencilEnable) { + const u8 stencilFunc = Helpers::getBits<4, 3>(hash.stencilConfig); + const u8 stencilRefMask = Helpers::getBits<24, 8>(hash.stencilConfig); + + const u32 stencilBufferMask = hash.depthStencilWrite ? Helpers::getBits<8, 8>(hash.stencilConfig) : 0; + + const u8 stencilFailOp = Helpers::getBits<0, 3>(hash.stencilOpConfig); + const u8 depthFailOp = Helpers::getBits<4, 3>(hash.stencilOpConfig); + const u8 passOp = Helpers::getBits<8, 3>(hash.stencilOpConfig); + + stencilDesc = MTL::StencilDescriptor::alloc()->init(); + stencilDesc->setStencilFailureOperation(toMTLStencilOperation(stencilFailOp)); + stencilDesc->setDepthFailureOperation(toMTLStencilOperation(depthFailOp)); + stencilDesc->setDepthStencilPassOperation(toMTLStencilOperation(passOp)); + stencilDesc->setStencilCompareFunction(toMTLCompareFunc(stencilFunc)); + stencilDesc->setReadMask(stencilRefMask); + stencilDesc->setWriteMask(stencilBufferMask); + + desc->setFrontFaceStencil(stencilDesc); + desc->setBackFaceStencil(stencilDesc); + } + + depthStencilState = device->newDepthStencilState(desc); + + desc->release(); + if (stencilDesc) { + stencilDesc->release(); + } + } + + return depthStencilState; + } + + void reset() { + for (auto& pair : depthStencilCache) { + pair.second->release(); + } + depthStencilCache.clear(); + } + +private: + std::map depthStencilCache; + + MTL::Device* device; +}; + +} // namespace Metal diff --git a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp new file mode 100644 index 00000000..8bfea636 --- /dev/null +++ b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp @@ -0,0 +1,174 @@ +#pragma once + +#include + +#include "pica_to_mtl.hpp" + +using namespace PICA; + +namespace Metal { + +struct DrawFragmentFunctionHash { + bool lightingEnabled; // 1 bit + u8 lightingNumLights; // 3 bits + u32 lightingConfig1; // 32 bits (TODO: check this) + // | ref | func | on | + u16 alphaControl; // 12 bits (mask: 11111111 0111 0001) +}; + +//bool operator==(const DrawFragmentFunctionHash& l, const DrawFragmentFunctionHash& r) { +// return ((l.lightingEnabled == r.lightingEnabled) && (l.lightingNumLights == r.lightingNumLights) && +// (l.lightingConfig1 == r.lightingConfig1) && (l.alphaControl == r.alphaControl)); +//} + +inline bool operator<(const DrawFragmentFunctionHash& l, const DrawFragmentFunctionHash& r) { + if (!l.lightingEnabled && r.lightingEnabled) return true; + if (l.lightingNumLights < r.lightingNumLights) return true; + if (l.lightingConfig1 < r.lightingConfig1) return true; + if (l.alphaControl < r.alphaControl) return true; + + return false; +} + +struct DrawPipelineHash { // 56 bits + // Formats + ColorFmt colorFmt; // 3 bits + DepthFmt depthFmt; // 3 bits + + // Blending + bool blendEnabled; // 1 bit + // | functions | aeq | ceq | + u32 blendControl; // 22 bits (mask: 1111111111111111 00000111 00000111) + u8 colorWriteMask; // 4 bits + + DrawFragmentFunctionHash fragHash; +}; + +//bool operator==(const DrawPipelineHash& l, const DrawPipelineHash& r) { +// return (((u32)l.colorFmt == (u32)r.colorFmt) && ((u32)l.depthFmt == (u32)r.depthFmt) && +// (l.blendEnabled == r.blendEnabled) && (l.blendControl == r.blendControl) && +// (l.colorWriteMask == r.colorWriteMask) && (l.fragHash == r.fragHash)); +//} + +inline bool operator<(const DrawPipelineHash& l, const DrawPipelineHash& r) { + if ((u32)l.colorFmt < (u32)r.colorFmt) return true; + if ((u32)l.depthFmt < (u32)r.depthFmt) return true; + if (!l.blendEnabled && r.blendEnabled) return true; + if (l.blendControl < r.blendControl) return true; + if (l.colorWriteMask < r.colorWriteMask) return true; + if (l.fragHash < r.fragHash) return true; + + return false; +} + +// Bind the vertex buffer to binding 30 so that it doesn't occupy the lower indices +#define VERTEX_BUFFER_BINDING_INDEX 30 + +// This pipeline only caches the pipeline with all of its color and depth attachment variations +class DrawPipelineCache { +public: + DrawPipelineCache() = default; + + ~DrawPipelineCache() { + reset(); + vertexDescriptor->release(); + vertexFunction->release(); + } + + void set(MTL::Device* dev, MTL::Library* lib, MTL::Function* vert, MTL::VertexDescriptor* vertDesc) { + device = dev; + library = lib; + vertexFunction = vert; + vertexDescriptor = vertDesc; + } + + MTL::RenderPipelineState* get(DrawPipelineHash hash) { + //u32 fragmentFunctionHash = ((u32)hash.lightingEnabled << 22) | ((u32)hash.lightingNumLights << 19) | ((u32)hash.lightingConfig1 << 12) | ((((u32)hash.alphaControl & 0b1111111100000000) >> 8) << 4) | ((((u32)hash.alphaControl & 0b01110000) >> 4) << 1) | ((u32)hash.alphaControl & 0b0001); + //u64 pipelineHash = ((u64)hash.colorFmt << 53) | ((u64)hash.depthFmt << 50) | ((u64)hash.blendEnabled << 49) | ((u64)hash.colorWriteMask << 45) | ((((u64)hash.blendControl & 0b11111111111111110000000000000000) >> 16) << 29) | ((((u64)hash.blendControl & 0b0000011100000000) >> 8) << 26) | (((u64)hash.blendControl & 0b00000111) << 23) | fragmentFunctionHash; + auto& pipeline = pipelineCache[hash]; + if (!pipeline) { + auto& fragmentFunction = fragmentFunctionCache[hash.fragHash]; + if (!fragmentFunction) { + MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init(); + constants->setConstantValue(&hash.fragHash.lightingEnabled, MTL::DataTypeBool, NS::UInteger(0)); + constants->setConstantValue(&hash.fragHash.lightingNumLights, MTL::DataTypeUChar, NS::UInteger(1)); + constants->setConstantValue(&hash.fragHash.lightingConfig1, MTL::DataTypeUInt, NS::UInteger(2)); + constants->setConstantValue(&hash.fragHash.alphaControl, MTL::DataTypeUShort, NS::UInteger(3)); + + NS::Error* error = nullptr; + fragmentFunction = library->newFunction(NS::String::string("fragmentDraw", NS::ASCIIStringEncoding), constants, &error); + if (error) { + Helpers::panic("Error creating draw fragment function: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + constants->release(); + } + + MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); + desc->setVertexFunction(vertexFunction); + desc->setFragmentFunction(fragmentFunction); + desc->setVertexDescriptor(vertexDescriptor); + + auto colorAttachment = desc->colorAttachments()->object(0); + colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt)); + MTL::ColorWriteMask writeMask = 0; + if (hash.colorWriteMask & 0x1) writeMask |= MTL::ColorWriteMaskRed; + if (hash.colorWriteMask & 0x2) writeMask |= MTL::ColorWriteMaskGreen; + if (hash.colorWriteMask & 0x4) writeMask |= MTL::ColorWriteMaskBlue; + if (hash.colorWriteMask & 0x8) writeMask |= MTL::ColorWriteMaskAlpha; + colorAttachment->setWriteMask(writeMask); + if (hash.blendEnabled) { + const u8 rgbEquation = hash.blendControl & 0x7; + const u8 alphaEquation = Helpers::getBits<8, 3>(hash.blendControl); + + // Get blending functions + const u8 rgbSourceFunc = Helpers::getBits<16, 4>(hash.blendControl); + const u8 rgbDestFunc = Helpers::getBits<20, 4>(hash.blendControl); + const u8 alphaSourceFunc = Helpers::getBits<24, 4>(hash.blendControl); + const u8 alphaDestFunc = Helpers::getBits<28, 4>(hash.blendControl); + + colorAttachment->setBlendingEnabled(true); + colorAttachment->setRgbBlendOperation(toMTLBlendOperation(rgbEquation)); + colorAttachment->setAlphaBlendOperation(toMTLBlendOperation(alphaEquation)); + colorAttachment->setSourceRGBBlendFactor(toMTLBlendFactor(rgbSourceFunc)); + colorAttachment->setDestinationRGBBlendFactor(toMTLBlendFactor(rgbDestFunc)); + colorAttachment->setSourceAlphaBlendFactor(toMTLBlendFactor(alphaSourceFunc)); + colorAttachment->setDestinationAlphaBlendFactor(toMTLBlendFactor(alphaDestFunc)); + } + + desc->setDepthAttachmentPixelFormat(toMTLPixelFormatDepth(hash.depthFmt)); + + NS::Error* error = nullptr; + desc->setLabel(toNSString("Draw pipeline")); + pipeline = device->newRenderPipelineState(desc, &error); + if (error) { + Helpers::panic("Error creating draw pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + + desc->release(); + } + + return pipeline; + } + + void reset() { + for (auto& pair : pipelineCache) { + pair.second->release(); + } + pipelineCache.clear(); + for (auto& pair : fragmentFunctionCache) { + pair.second->release(); + } + fragmentFunctionCache.clear(); + } + +private: + std::map pipelineCache; + std::map fragmentFunctionCache; + + MTL::Device* device; + MTL::Library* library; + MTL::Function* vertexFunction; + MTL::VertexDescriptor* vertexDescriptor; +}; + +} // namespace Metal diff --git a/include/renderer_mtl/mtl_render_target.hpp b/include/renderer_mtl/mtl_render_target.hpp new file mode 100644 index 00000000..73be45f4 --- /dev/null +++ b/include/renderer_mtl/mtl_render_target.hpp @@ -0,0 +1,92 @@ +#pragma once +#include +#include +#include +#include "boost/icl/interval.hpp" +#include "helpers.hpp" +#include "math_util.hpp" +#include "opengl.hpp" +#include "pica_to_mtl.hpp" +#include "objc_helper.hpp" + +template +using Interval = boost::icl::right_open_interval; + +namespace Metal { + +template +struct RenderTarget { + MTL::Device* device; + + u32 location; + Format_t format; + OpenGL::uvec2 size; + bool valid; + + // Range of VRAM taken up by buffer + Interval range; + + MTL::Texture* texture = nullptr; + + RenderTarget() : valid(false) {} + + RenderTarget(MTL::Device* dev, u32 loc, Format_t format, u32 x, u32 y, bool valid = true) + : device(dev), location(loc), format(format), size({x, y}), valid(valid) { + u64 endLoc = (u64)loc + sizeInBytes(); + // Check if start and end are valid here + range = Interval(loc, (u32)endLoc); + } + + Math::Rect getSubRect(u32 inputAddress, u32 width, u32 height) { + const u32 startOffset = (inputAddress - location) / sizePerPixel(format); + const u32 x0 = (startOffset % (size.x() * 8)) / 8; + const u32 y0 = (startOffset / (size.x() * 8)) * 8; + return Math::Rect{x0, size.y() - y0, x0 + width, size.y() - height - y0}; + } + + // For 2 textures to "match" we only care about their locations, formats, and dimensions to match + // For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture + bool matches(RenderTarget& other) { + return location == other.location && format == other.format && + size.x() == other.size.x() && size.y() == other.size.y(); + } + + void allocate() { + MTL::PixelFormat pixelFormat = MTL::PixelFormatInvalid; + if (std::is_same::value) { + pixelFormat = PICA::toMTLPixelFormatColor((PICA::ColorFmt)format); + } else if (std::is_same::value) { + pixelFormat = PICA::toMTLPixelFormatDepth((PICA::DepthFmt)format); + } else { + panic("Invalid format type"); + } + + MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init(); + descriptor->setTextureType(MTL::TextureType2D); + descriptor->setPixelFormat(pixelFormat); + descriptor->setWidth(size.u()); + descriptor->setHeight(size.v()); + descriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead); + descriptor->setStorageMode(MTL::StorageModePrivate); + texture = device->newTexture(descriptor); + texture->setLabel(toNSString(std::string(std::is_same::value ? "Color" : "Depth") + " render target " + std::to_string(size.u()) + "x" + std::to_string(size.v()))); + descriptor->release(); + } + + void free() { + valid = false; + + if (texture) { + texture->release(); + } + } + + u64 sizeInBytes() { + return (size_t)size.x() * (size_t)size.y() * PICA::sizePerPixel(format); + } +}; + +typedef RenderTarget ColorRenderTarget; +typedef RenderTarget DepthStencilRenderTarget; + +} // namespace Metal diff --git a/include/renderer_mtl/mtl_texture.hpp b/include/renderer_mtl/mtl_texture.hpp new file mode 100644 index 00000000..590132bd --- /dev/null +++ b/include/renderer_mtl/mtl_texture.hpp @@ -0,0 +1,77 @@ +#pragma once +#include +#include +#include +#include "PICA/regs.hpp" +#include "boost/icl/interval.hpp" +#include "helpers.hpp" +#include "math_util.hpp" +#include "opengl.hpp" +#include "renderer_mtl/pica_to_mtl.hpp" + +template +using Interval = boost::icl::right_open_interval; + +namespace Metal { + +struct Texture { + MTL::Device* device; + + u32 location; + u32 config; // Magnification/minification filter, wrapping configs, etc + PICA::TextureFmt format; + OpenGL::uvec2 size; + bool valid; + + // Range of VRAM taken up by buffer + Interval range; + + PICA::PixelFormatInfo formatInfo; + MTL::Texture* texture = nullptr; + MTL::SamplerState* sampler = nullptr; + + Texture() : valid(false) {} + + Texture(MTL::Device* dev, u32 loc, PICA::TextureFmt format, u32 x, u32 y, u32 config, bool valid = true) + : device(dev), location(loc), format(format), size({x, y}), config(config), valid(valid) { + + u64 endLoc = (u64)loc + sizeInBytes(); + // Check if start and end are valid here + range = Interval(loc, (u32)endLoc); + } + + // For 2 textures to "match" we only care about their locations, formats, and dimensions to match + // For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture + bool matches(Texture& other) { + return location == other.location && format == other.format && + size.x() == other.size.x() && size.y() == other.size.y(); + } + + void allocate(); + void setNewConfig(u32 newConfig); + void decodeTexture(std::span data); + void free(); + u64 sizeInBytes(); + + u8 decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); + u16 decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); + u32 decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); + + // Get the morton interleave offset of a texel based on its U and V values + static u32 mortonInterleave(u32 u, u32 v); + // Get the byte offset of texel (u, v) in the texture + static u32 getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel); + static u32 getSwizzledOffset_4bpp(u32 u, u32 v, u32 width); + + // Returns the format of this texture as a string + std::string_view formatToString() { + return PICA::textureFormatToString(format); + } + + // Returns the texel at coordinates (u, v) of an ETC1(A4) texture + // TODO: Make hasAlpha a template parameter + u32 getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span data); + u32 decodeETC(u32 alpha, u32 u, u32 v, u64 colourData); +}; + +} // namespace Metal diff --git a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp new file mode 100644 index 00000000..1760cdfa --- /dev/null +++ b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp @@ -0,0 +1,80 @@ +#pragma once + +#include "pica_to_mtl.hpp" + +using namespace PICA; + +namespace Metal { + +struct BufferHandle { + MTL::Buffer* buffer; + size_t offset; +}; + +// 64MB buffer for caching vertex data +#define CACHE_BUFFER_SIZE 64 * 1024 * 1024 + +class VertexBufferCache { +public: + VertexBufferCache() = default; + + ~VertexBufferCache() { + endFrame(); + buffer->release(); + } + + void set(MTL::Device* dev) { + device = dev; + create(); + } + + void endFrame() { + ptr = 0; + for (auto buffer : additionalAllocations) { + buffer->release(); + } + additionalAllocations.clear(); + } + + BufferHandle get(const void* data, size_t size) { + // If the vertex buffer is too large, just create a new one + if (ptr + size > CACHE_BUFFER_SIZE) { + MTL::Buffer* newBuffer = device->newBuffer(data, size, MTL::ResourceStorageModeShared); + newBuffer->setLabel(toNSString("Additional vertex buffer")); + additionalAllocations.push_back(newBuffer); + Helpers::warn("Vertex buffer doesn't have enough space, creating a new buffer"); + + return BufferHandle{newBuffer, 0}; + } + + // Copy the data into the buffer + memcpy((char*)buffer->contents() + ptr, data, size); + + size_t oldPtr = ptr; + ptr += size; + + return BufferHandle{buffer, oldPtr}; + } + + void reset() { + endFrame(); + if (buffer) { + buffer->release(); + create(); + } + } + +private: + MTL::Buffer* buffer = nullptr; + size_t ptr = 0; + std::vector additionalAllocations; + + MTL::Device* device; + + void create() { + buffer = device->newBuffer(CACHE_BUFFER_SIZE, MTL::ResourceStorageModeShared); + buffer->setLabel(toNSString("Shared vertex buffer")); + } +}; + +} // namespace Metal diff --git a/include/renderer_mtl/objc_helper.hpp b/include/renderer_mtl/objc_helper.hpp new file mode 100644 index 00000000..91756d24 --- /dev/null +++ b/include/renderer_mtl/objc_helper.hpp @@ -0,0 +1,16 @@ +#pragma once + +#include + +#include + +namespace Metal { + +dispatch_data_t createDispatchData(const void* data, size_t size); + +} // namespace Metal + +// Cast from std::string to NS::String* +inline NS::String* toNSString(const std::string& str) { + return NS::String::string(str.c_str(), NS::ASCIIStringEncoding); +} diff --git a/include/renderer_mtl/pica_to_mtl.hpp b/include/renderer_mtl/pica_to_mtl.hpp new file mode 100644 index 00000000..de76dc3b --- /dev/null +++ b/include/renderer_mtl/pica_to_mtl.hpp @@ -0,0 +1,155 @@ +#pragma once + +#include +#include "PICA/regs.hpp" + +namespace PICA { + +struct PixelFormatInfo { + MTL::PixelFormat pixelFormat; + size_t bytesPerTexel; +}; + +constexpr PixelFormatInfo pixelFormatInfos[14] = { + {MTL::PixelFormatRGBA8Unorm, 4}, // RGBA8 + {MTL::PixelFormatRGBA8Unorm, 4}, // RGB8 + {MTL::PixelFormatBGR5A1Unorm, 2}, // RGBA5551 + {MTL::PixelFormatB5G6R5Unorm, 2}, // RGB565 + {MTL::PixelFormatABGR4Unorm, 2}, // RGBA4 + {MTL::PixelFormatRGBA8Unorm, 4}, // IA8 + {MTL::PixelFormatRG8Unorm, 2}, // RG8 + {MTL::PixelFormatRGBA8Unorm, 4}, // I8 + {MTL::PixelFormatA8Unorm, 1}, // A8 + {MTL::PixelFormatABGR4Unorm, 2}, // IA4 + {MTL::PixelFormatABGR4Unorm, 2}, // I4 + {MTL::PixelFormatA8Unorm, 1}, // A4 + {MTL::PixelFormatRGBA8Unorm, 4}, // ETC1 + {MTL::PixelFormatRGBA8Unorm, 4}, // ETC1A4 +}; + +inline PixelFormatInfo getPixelFormatInfo(TextureFmt format) { + return pixelFormatInfos[static_cast(format)]; +} + +inline MTL::PixelFormat toMTLPixelFormatColor(ColorFmt format) { + switch (format) { + case ColorFmt::RGBA8: return MTL::PixelFormatRGBA8Unorm; + case ColorFmt::RGB8: return MTL::PixelFormatRGBA8Unorm; + case ColorFmt::RGBA5551: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatBGR5A1Unorm? + case ColorFmt::RGB565: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatB5G6R5Unorm? + case ColorFmt::RGBA4: return MTL::PixelFormatABGR4Unorm; + } +} + +inline MTL::PixelFormat toMTLPixelFormatDepth(DepthFmt format) { + switch (format) { + case DepthFmt::Depth16: return MTL::PixelFormatDepth16Unorm; + case DepthFmt::Unknown1: return MTL::PixelFormatInvalid; + case DepthFmt::Depth24: return MTL::PixelFormatDepth32Float; // Metal does not support 24-bit depth formats + // Apple sillicon doesn't support 24-bit depth buffers, so we use 32-bit instead + case DepthFmt::Depth24Stencil8: return MTL::PixelFormatDepth32Float_Stencil8; + } +} + +inline MTL::CompareFunction toMTLCompareFunc(u8 func) { + switch (func) { + case 0: return MTL::CompareFunctionNever; + case 1: return MTL::CompareFunctionAlways; + case 2: return MTL::CompareFunctionEqual; + case 3: return MTL::CompareFunctionNotEqual; + case 4: return MTL::CompareFunctionLess; + case 5: return MTL::CompareFunctionLessEqual; + case 6: return MTL::CompareFunctionGreater; + case 7: return MTL::CompareFunctionGreaterEqual; + default: panic("Unknown compare function %u", func); + } + + return MTL::CompareFunctionAlways; +} + +inline MTL::BlendOperation toMTLBlendOperation(u8 op) { + switch (op) { + case 0: return MTL::BlendOperationAdd; + case 1: return MTL::BlendOperationSubtract; + case 2: return MTL::BlendOperationReverseSubtract; + case 3: return MTL::BlendOperationMin; + case 4: return MTL::BlendOperationMax; + case 5: return MTL::BlendOperationAdd; // Unused (same as 0) + case 6: return MTL::BlendOperationAdd; // Unused (same as 0) + case 7: return MTL::BlendOperationAdd; // Unused (same as 0) + default: panic("Unknown blend operation %u", op); + } + + return MTL::BlendOperationAdd; +} + +inline MTL::BlendFactor toMTLBlendFactor(u8 factor) { + switch (factor) { + case 0: return MTL::BlendFactorZero; + case 1: return MTL::BlendFactorOne; + case 2: return MTL::BlendFactorSourceColor; + case 3: return MTL::BlendFactorOneMinusSourceColor; + case 4: return MTL::BlendFactorDestinationColor; + case 5: return MTL::BlendFactorOneMinusDestinationColor; + case 6: return MTL::BlendFactorSourceAlpha; + case 7: return MTL::BlendFactorOneMinusSourceAlpha; + case 8: return MTL::BlendFactorDestinationAlpha; + case 9: return MTL::BlendFactorOneMinusDestinationAlpha; + case 10: return MTL::BlendFactorBlendColor; + case 11: return MTL::BlendFactorOneMinusBlendColor; + case 12: return MTL::BlendFactorBlendAlpha; + case 13: return MTL::BlendFactorOneMinusBlendAlpha; + case 14: return MTL::BlendFactorSourceAlphaSaturated; + case 15: return MTL::BlendFactorOne; // Undocumented + default: panic("Unknown blend factor %u", factor); + } + + return MTL::BlendFactorOne; +} + +inline MTL::StencilOperation toMTLStencilOperation(u8 op) { + switch (op) { + case 0: return MTL::StencilOperationKeep; + case 1: return MTL::StencilOperationZero; + case 2: return MTL::StencilOperationReplace; + case 3: return MTL::StencilOperationIncrementClamp; + case 4: return MTL::StencilOperationDecrementClamp; + case 5: return MTL::StencilOperationInvert; + case 6: return MTL::StencilOperationIncrementWrap; + case 7: return MTL::StencilOperationDecrementWrap; + default: panic("Unknown stencil operation %u", op); + } + + return MTL::StencilOperationKeep; +} + +inline MTL::PrimitiveType toMTLPrimitiveType(PrimType primType) { + switch (primType) { + case PrimType::TriangleList: return MTL::PrimitiveTypeTriangle; + case PrimType::TriangleStrip: return MTL::PrimitiveTypeTriangleStrip; + case PrimType::TriangleFan: + Helpers::warn("Triangle fans are not supported on Metal, using triangles instead"); + return MTL::PrimitiveTypeTriangle; + case PrimType::GeometryPrimitive: + //Helpers::warn("Geometry primitives are not yet, using triangles instead"); + return MTL::PrimitiveTypeTriangle; + } +} + +inline MTL::SamplerAddressMode toMTLSamplerAddressMode(u8 addrMode) { + switch (addrMode) { + case 0: return MTL::SamplerAddressModeClampToEdge; + case 1: return MTL::SamplerAddressModeClampToBorderColor; + case 2: return MTL::SamplerAddressModeRepeat; + case 3: return MTL::SamplerAddressModeMirrorRepeat; + case 4: return MTL::SamplerAddressModeClampToEdge; + case 5: return MTL::SamplerAddressModeClampToBorderColor; + case 6: return MTL::SamplerAddressModeRepeat; + case 7: return MTL::SamplerAddressModeRepeat; + default: panic("Unknown sampler address mode %u", addrMode); + } + + return MTL::SamplerAddressModeClampToEdge; +} + +} // namespace PICA diff --git a/include/renderer_mtl/renderer_mtl.hpp b/include/renderer_mtl/renderer_mtl.hpp new file mode 100644 index 00000000..9ba0937a --- /dev/null +++ b/include/renderer_mtl/renderer_mtl.hpp @@ -0,0 +1,189 @@ +#include +#include + +#include "renderer.hpp" +#include "mtl_texture.hpp" +#include "mtl_render_target.hpp" +#include "mtl_blit_pipeline_cache.hpp" +#include "mtl_draw_pipeline_cache.hpp" +#include "mtl_depth_stencil_cache.hpp" +#include "mtl_vertex_buffer_cache.hpp" +// HACK: use the OpenGL cache +#include "../renderer_gl/surface_cache.hpp" + +class GPU; + +struct Color4 { + float r, g, b, a; +}; + +class RendererMTL final : public Renderer { + public: + RendererMTL(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs); + ~RendererMTL() override; + + void reset() override; + void display() override; + void initGraphicsContext(SDL_Window* window) override; + void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override; + void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override; + void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override; + void drawVertices(PICA::PrimType primType, std::span vertices) override; + void screenshot(const std::string& name) override; + void deinitGraphicsContext() override; + +#ifdef PANDA3DS_FRONTEND_QT + virtual void initGraphicsContext([[maybe_unused]] GL::Context* context) override {} +#endif + + private: + CA::MetalLayer* metalLayer; + + MTL::Device* device; + MTL::CommandQueue* commandQueue; + + // Libraries + MTL::Library* library; + + // Caches + SurfaceCache colorRenderTargetCache; + SurfaceCache depthStencilRenderTargetCache; + SurfaceCache textureCache; + Metal::BlitPipelineCache blitPipelineCache; + Metal::DrawPipelineCache drawPipelineCache; + Metal::DepthStencilCache depthStencilCache; + Metal::VertexBufferCache vertexBufferCache; + + // Objects + MTL::SamplerState* nearestSampler; + MTL::SamplerState* linearSampler; + MTL::Texture* lutTexture; + MTL::DepthStencilState* defaultDepthStencilState; + + // Pipelines + MTL::RenderPipelineState* displayPipeline; + MTL::RenderPipelineState* copyToLutTexturePipeline; + + // Clears + std::map colorClearOps; + std::map depthClearOps; + std::map stencilClearOps; + + // Active state + MTL::CommandBuffer* commandBuffer = nullptr; + MTL::RenderCommandEncoder* renderCommandEncoder = nullptr; + MTL::Texture* lastColorTexture = nullptr; + MTL::Texture* lastDepthTexture = nullptr; + + // Debug + std::string nextRenderPassName; + + void createCommandBufferIfNeeded() { + if (!commandBuffer) { + commandBuffer = commandQueue->commandBuffer(); + } + } + + void endRenderPass() { + if (renderCommandEncoder) { + renderCommandEncoder->endEncoding(); + renderCommandEncoder = nullptr; + } + } + + void beginRenderPassIfNeeded(MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture = nullptr) { + createCommandBufferIfNeeded(); + + if (doesClears || !renderCommandEncoder || colorTexture != lastColorTexture || (depthTexture != lastDepthTexture && !(lastDepthTexture && !depthTexture))) { + endRenderPass(); + + renderCommandEncoder = commandBuffer->renderCommandEncoder(renderPassDescriptor); + renderCommandEncoder->setLabel(toNSString(nextRenderPassName)); + + lastColorTexture = colorTexture; + lastDepthTexture = depthTexture; + } + + renderPassDescriptor->release(); + } + + void commitCommandBuffer() { + if (renderCommandEncoder) { + renderCommandEncoder->endEncoding(); + renderCommandEncoder->release(); + renderCommandEncoder = nullptr; + } + if (commandBuffer) { + commandBuffer->commit(); + commandBuffer->release(); + commandBuffer = nullptr; + } + } + + template + inline void clearAttachment(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, ClearDataT clearData, GetAttachmentT getAttachment, SetClearDataT setClearData) { + bool beginRenderPass = (renderPassDescriptor == nullptr); + if (!renderPassDescriptor) { + renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); + } + + AttachmentT* attachment = getAttachment(renderPassDescriptor); + attachment->setTexture(texture); + setClearData(attachment, clearData); + attachment->setLoadAction(MTL::LoadActionClear); + attachment->setStoreAction(MTL::StoreActionStore); + + if (beginRenderPass) { + if (std::is_same::value) + beginRenderPassIfNeeded(renderPassDescriptor, true, texture); + else + beginRenderPassIfNeeded(renderPassDescriptor, true, nullptr, texture); + } + } + + template + inline bool clearAttachment(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, std::map& clearOps, GetAttachmentT getAttachment, SetClearDataT setClearData) { + auto it = clearOps.find(texture); + if (it != clearOps.end()) { + clearAttachment(renderPassDescriptor, texture, it->second, getAttachment, setClearData); + clearOps.erase(it); + return true; + } + + if (renderPassDescriptor) { + AttachmentT* attachment = getAttachment(renderPassDescriptor); + attachment->setTexture(texture); + attachment->setLoadAction(MTL::LoadActionLoad); + attachment->setStoreAction(MTL::StoreActionStore); + } + + return false; + } + + bool clearColor(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { + return clearAttachment(renderPassDescriptor, texture, colorClearOps, [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->colorAttachments()->object(0); }, [](auto attachment, auto& color) { + attachment->setClearColor(MTL::ClearColor(color.r, color.g, color.b, color.a)); + }); + } + + bool clearDepth(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { + return clearAttachment(renderPassDescriptor, texture, depthClearOps, [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->depthAttachment(); }, [](auto attachment, auto& depth) { + attachment->setClearDepth(depth); + }); + } + + bool clearStencil(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { + return clearAttachment(renderPassDescriptor, texture, stencilClearOps, [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->stencilAttachment(); }, [](auto attachment, auto& stencil) { + attachment->setClearStencil(stencil); + }); + } + + std::optional getColorRenderTarget(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true); + Metal::DepthStencilRenderTarget& getDepthRenderTarget(); + Metal::Texture& getTexture(Metal::Texture& tex); + void setupTextureEnvState(MTL::RenderCommandEncoder* encoder); + void bindTexturesToSlots(MTL::RenderCommandEncoder* encoder); + void updateLightingLUT(MTL::RenderCommandEncoder* encoder); + void updateFogLUT(MTL::RenderCommandEncoder* encoder); + void textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect& srcRect, const Math::Rect& destRect); +}; diff --git a/src/core/renderer_mtl/metal_cpp_impl.cpp b/src/core/renderer_mtl/metal_cpp_impl.cpp new file mode 100644 index 00000000..7fa7137b --- /dev/null +++ b/src/core/renderer_mtl/metal_cpp_impl.cpp @@ -0,0 +1,6 @@ +#define NS_PRIVATE_IMPLEMENTATION +#define CA_PRIVATE_IMPLEMENTATION +#define MTL_PRIVATE_IMPLEMENTATION +#include +#include +#include diff --git a/src/core/renderer_mtl/mtl_etc1.cpp b/src/core/renderer_mtl/mtl_etc1.cpp new file mode 100644 index 00000000..a414df3c --- /dev/null +++ b/src/core/renderer_mtl/mtl_etc1.cpp @@ -0,0 +1,124 @@ +#include +#include "colour.hpp" +#include "renderer_mtl/renderer_mtl.hpp" +#include "renderer_mtl/mtl_texture.hpp" + +using namespace Helpers; + +namespace Metal { + +static constexpr u32 signExtend3To32(u32 val) { + return (u32)(s32(val) << 29 >> 29); +} + +u32 Texture::getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span data) { + // Pixel offset of the 8x8 tile based on u, v and the width of the texture + u32 offs = ((u & ~7) * 8) + ((v & ~7) * width); + if (!hasAlpha) + offs >>= 1; + + // In-tile offsets for u/v + u &= 7; + v &= 7; + + // ETC1(A4) also subdivide the 8x8 tile to 4 4x4 tiles + // Each tile is 8 bytes for ETC1, but since ETC1A4 has 4 alpha bits per pixel, that becomes 16 bytes + const u32 subTileSize = hasAlpha ? 16 : 8; + const u32 subTileIndex = (u / 4) + 2 * (v / 4); // Which of the 4 subtiles is this texel in? + + // In-subtile offsets for u/v + u &= 3; + v &= 3; + offs += subTileSize * subTileIndex; + + u32 alpha; + const u64* ptr = reinterpret_cast(data.data() + offs); // Cast to u64* + + if (hasAlpha) { + // First 64 bits of the 4x4 subtile are alpha data + const u64 alphaData = *ptr++; + alpha = Colour::convert4To8Bit((alphaData >> (4 * (u * 4 + v))) & 0xf); + } + else { + alpha = 0xff; // ETC1 without alpha uses ff for every pixel + } + + // Next 64 bits of the subtile are colour data + u64 colourData = *ptr; + return decodeETC(alpha, u, v, colourData); +} + +u32 Texture::decodeETC(u32 alpha, u32 u, u32 v, u64 colourData) { + static constexpr u32 modifiers[8][2] = { + { 2, 8 }, + { 5, 17 }, + { 9, 29 }, + { 13, 42 }, + { 18, 60 }, + { 24, 80 }, + { 33, 106 }, + { 47, 183 }, + }; + + // Parse colour data for 4x4 block + const u32 subindices = getBits<0, 16, u32>(colourData); + const u32 negationFlags = getBits<16, 16, u32>(colourData); + const bool flip = getBit<32>(colourData); + const bool diffMode = getBit<33>(colourData); + + // Note: index1 is indeed stored on the higher bits, with index2 in the lower bits + const u32 tableIndex1 = getBits<37, 3, u32>(colourData); + const u32 tableIndex2 = getBits<34, 3, u32>(colourData); + const u32 texelIndex = u * 4 + v; // Index of the texel in the block + + if (flip) + std::swap(u, v); + + s32 r, g, b; + if (diffMode) { + r = getBits<59, 5, s32>(colourData); + g = getBits<51, 5, s32>(colourData); + b = getBits<43, 5, s32>(colourData); + + if (u >= 2) { + r += signExtend3To32(getBits<56, 3, u32>(colourData)); + g += signExtend3To32(getBits<48, 3, u32>(colourData)); + b += signExtend3To32(getBits<40, 3, u32>(colourData)); + } + + // Expand from 5 to 8 bits per channel + r = Colour::convert5To8Bit(r); + g = Colour::convert5To8Bit(g); + b = Colour::convert5To8Bit(b); + } else { + if (u < 2) { + r = getBits<60, 4, s32>(colourData); + g = getBits<52, 4, s32>(colourData); + b = getBits<44, 4, s32>(colourData); + } else { + r = getBits<56, 4, s32>(colourData); + g = getBits<48, 4, s32>(colourData); + b = getBits<40, 4, s32>(colourData); + } + + // Expand from 4 to 8 bits per channel + r = Colour::convert4To8Bit(r); + g = Colour::convert4To8Bit(g); + b = Colour::convert4To8Bit(b); + } + + const u32 index = (u < 2) ? tableIndex1 : tableIndex2; + s32 modifier = modifiers[index][(subindices >> texelIndex) & 1]; + + if (((negationFlags >> texelIndex) & 1) != 0) { + modifier = -modifier; + } + + r = std::clamp(r + modifier, 0, 255); + g = std::clamp(g + modifier, 0, 255); + b = std::clamp(b + modifier, 0, 255); + + return (alpha << 24) | (u32(b) << 16) | (u32(g) << 8) | u32(r); +} + +} // namespace Metal diff --git a/src/core/renderer_mtl/mtl_texture.cpp b/src/core/renderer_mtl/mtl_texture.cpp new file mode 100644 index 00000000..b61c5502 --- /dev/null +++ b/src/core/renderer_mtl/mtl_texture.cpp @@ -0,0 +1,312 @@ +#include "renderer_mtl/mtl_texture.hpp" +#include "renderer_mtl/objc_helper.hpp" +#include "colour.hpp" +#include + +using namespace Helpers; + +namespace Metal { + +void Texture::allocate() { + formatInfo = PICA::getPixelFormatInfo(format); + + MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init(); + descriptor->setTextureType(MTL::TextureType2D); + descriptor->setPixelFormat(formatInfo.pixelFormat); + descriptor->setWidth(size.u()); + descriptor->setHeight(size.v()); + descriptor->setUsage(MTL::TextureUsageShaderRead); + descriptor->setStorageMode(MTL::StorageModeShared); // TODO: use private + staging buffers? + texture = device->newTexture(descriptor); + texture->setLabel(toNSString("Texture " + std::string(PICA::textureFormatToString(format)) + " " + std::to_string(size.u()) + "x" + std::to_string(size.v()))); + descriptor->release(); + + setNewConfig(config); +} + +// Set the texture's configuration, which includes min/mag filters, wrapping S/T modes, and so on +void Texture::setNewConfig(u32 cfg) { + config = cfg; + + if (sampler) { + sampler->release(); + } + + const auto magFilter = (cfg & 0x2) != 0 ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest; + const auto minFilter = (cfg & 0x4) != 0 ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest; + const auto wrapT = PICA::toMTLSamplerAddressMode(getBits<8, 3>(cfg)); + const auto wrapS = PICA::toMTLSamplerAddressMode(getBits<12, 3>(cfg)); + + MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init(); + samplerDescriptor->setMinFilter(minFilter); + samplerDescriptor->setMagFilter(magFilter); + samplerDescriptor->setSAddressMode(wrapS); + samplerDescriptor->setTAddressMode(wrapT); + + samplerDescriptor->setLabel(toNSString("Sampler")); + sampler = device->newSamplerState(samplerDescriptor); + samplerDescriptor->release(); +} + +void Texture::free() { + valid = false; + + if (texture) { + texture->release(); + } + if (sampler) { + sampler->release(); + } +} + +u64 Texture::sizeInBytes() { + u64 pixelCount = u64(size.x()) * u64(size.y()); + + switch (format) { + case PICA::TextureFmt::RGBA8: // 4 bytes per pixel + return pixelCount * 4; + + case PICA::TextureFmt::RGB8: // 3 bytes per pixel + return pixelCount * 3; + + case PICA::TextureFmt::RGBA5551: // 2 bytes per pixel + case PICA::TextureFmt::RGB565: + case PICA::TextureFmt::RGBA4: + case PICA::TextureFmt::RG8: + case PICA::TextureFmt::IA8: + return pixelCount * 2; + + case PICA::TextureFmt::A8: // 1 byte per pixel + case PICA::TextureFmt::I8: + case PICA::TextureFmt::IA4: + return pixelCount; + + case PICA::TextureFmt::I4: // 4 bits per pixel + case PICA::TextureFmt::A4: + return pixelCount / 2; + + case PICA::TextureFmt::ETC1: // Compressed formats + case PICA::TextureFmt::ETC1A4: { + // Number of 4x4 tiles + const u64 tileCount = pixelCount / 16; + // Tiles are 8 bytes each on ETC1 and 16 bytes each on ETC1A4 + const u64 tileSize = format == PICA::TextureFmt::ETC1 ? 8 : 16; + return tileCount * tileSize; + } + + default: + Helpers::panic("[PICA] Attempted to get size of invalid texture type"); + } +} + +// u and v are the UVs of the relevant texel +// Texture data is stored interleaved in Morton order, ie in a Z - order curve as shown here +// https://en.wikipedia.org/wiki/Z-order_curve +// Textures are split into 8x8 tiles.This function returns the in - tile offset depending on the u & v of the texel +// The in - tile offset is the sum of 2 offsets, one depending on the value of u % 8 and the other on the value of y % 8 +// As documented in this picture https ://en.wikipedia.org/wiki/File:Moser%E2%80%93de_Bruijn_addition.svg +u32 Texture::mortonInterleave(u32 u, u32 v) { + static constexpr u32 xOffsets[] = { 0, 1, 4, 5, 16, 17, 20, 21 }; + static constexpr u32 yOffsets[] = { 0, 2, 8, 10, 32, 34, 40, 42 }; + + return xOffsets[u & 7] + yOffsets[v & 7]; +} + +// Get the byte offset of texel (u, v) in the texture +u32 Texture::getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel) { + u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to + offset += mortonInterleave(u, v); // Add the in-tile offset of the texel + + return offset * bytesPerPixel; +} + +// Same as the above code except we need to divide by 2 because 4 bits is smaller than a byte +u32 Texture::getSwizzledOffset_4bpp(u32 u, u32 v, u32 width) { + u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to + offset += mortonInterleave(u, v); // Add the in-tile offset of the texel + + return offset / 2; +} + +u8 Texture::decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { + switch (fmt) { + case PICA::TextureFmt::A4: { + const u32 offset = getSwizzledOffset_4bpp(u, v, size.u()); + + // For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates + u8 alpha = data[offset] >> ((u % 2) ? 4 : 0); + alpha = Colour::convert4To8Bit(getBits<0, 4>(alpha)); + + // A8 + return alpha; + } + + case PICA::TextureFmt::A8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 1); + const u8 alpha = data[offset]; + + // A8 + return alpha; + } + + default: + Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); + } +} + +u16 Texture::decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { + switch (fmt) { + case PICA::TextureFmt::RG8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 2); + constexpr u8 b = 0; + const u8 g = data[offset]; + const u8 r = data[offset + 1]; + + // RG8 + return (g << 8) | r; + } + + case PICA::TextureFmt::RGBA4: { + u32 offset = getSwizzledOffset(u, v, size.u(), 2); + u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); + + u8 alpha = getBits<0, 4, u8>(texel); + u8 b = getBits<4, 4, u8>(texel); + u8 g = getBits<8, 4, u8>(texel); + u8 r = getBits<12, 4, u8>(texel); + + // ABGR4 + return (r << 12) | (g << 8) | (b << 4) | alpha; + } + + case PICA::TextureFmt::RGBA5551: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 2); + const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); + + u8 alpha = getBit<0>(texel) ? 0xff : 0; + u8 b = getBits<1, 5, u8>(texel); + u8 g = getBits<6, 5, u8>(texel); + u8 r = getBits<11, 5, u8>(texel); + + // BGR5A1 + return (alpha << 15) | (r << 10) | (g << 5) | b; + } + + case PICA::TextureFmt::RGB565: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 2); + const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); + + const u8 b = getBits<0, 5, u8>(texel); + const u8 g = getBits<5, 6, u8>(texel); + const u8 r = getBits<11, 5, u8>(texel); + + // B5G6R5 + return (r << 11) | (g << 5) | b; + } + + case PICA::TextureFmt::IA4: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 1); + const u8 texel = data[offset]; + const u8 alpha = texel & 0xf; + const u8 intensity = texel >> 4; + + // ABGR4 + return (intensity << 12) | (intensity << 8) | (intensity << 4) | alpha; + } + + case PICA::TextureFmt::I4: { + u32 offset = getSwizzledOffset_4bpp(u, v, size.u()); + + // For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates + u8 intensity = data[offset] >> ((u % 2) ? 4 : 0); + intensity = getBits<0, 4>(intensity); + + // ABGR4 + return (intensity << 12) | (intensity << 8) | (intensity << 4) | 0xff; + } + + default: + Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); + } +} + +u32 Texture::decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { + switch (fmt) { + case PICA::TextureFmt::RGB8: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 3); + const u8 b = data[offset]; + const u8 g = data[offset + 1]; + const u8 r = data[offset + 2]; + + // RGBA8 + return (0xff << 24) | (b << 16) | (g << 8) | r; + } + + case PICA::TextureFmt::RGBA8: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 4); + const u8 alpha = data[offset]; + const u8 b = data[offset + 1]; + const u8 g = data[offset + 2]; + const u8 r = data[offset + 3]; + + // RGBA8 + return (alpha << 24) | (b << 16) | (g << 8) | r; + } + + case PICA::TextureFmt::I8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 1); + const u8 intensity = data[offset]; + + // RGBA8 + return (0xff << 24) | (intensity << 16) | (intensity << 8) | intensity; + } + + case PICA::TextureFmt::IA8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 2); + + // Same as I8 except each pixel gets its own alpha value too + const u8 alpha = data[offset]; + const u8 intensity = data[offset + 1]; + + // RGBA8 + return (alpha << 24) | (intensity << 16) | (intensity << 8) | intensity; + } + + case PICA::TextureFmt::ETC1: return getTexelETC(false, u, v, size.u(), data); + case PICA::TextureFmt::ETC1A4: return getTexelETC(true, u, v, size.u(), data); + + default: + Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); + } +} + +void Texture::decodeTexture(std::span data) { + std::vector decoded; + decoded.reserve(u64(size.u()) * u64(size.v()) * formatInfo.bytesPerTexel); + + // Decode texels line by line + for (u32 v = 0; v < size.v(); v++) { + for (u32 u = 0; u < size.u(); u++) { + if (formatInfo.bytesPerTexel == 1) { + u8 texel = decodeTexelU8(u, v, format, data); + decoded.push_back(texel); + } else if (formatInfo.bytesPerTexel == 2) { + u16 texel = decodeTexelU16(u, v, format, data); + decoded.push_back((texel & 0x00ff) >> 0); + decoded.push_back((texel & 0xff00) >> 8); + } else if (formatInfo.bytesPerTexel == 4) { + u32 texel = decodeTexelU32(u, v, format, data); + decoded.push_back((texel & 0x000000ff) >> 0); + decoded.push_back((texel & 0x0000ff00) >> 8); + decoded.push_back((texel & 0x00ff0000) >> 16); + decoded.push_back((texel & 0xff000000) >> 24); + } else { + Helpers::panic("[Texture::decodeTexture] Unimplemented bytesPerTexel (%u)", formatInfo.bytesPerTexel); + } + } + } + + texture->replaceRegion(MTL::Region(0, 0, size.u(), size.v()), 0, 0, decoded.data(), formatInfo.bytesPerTexel * size.u(), 0); +} + +} // namespace Metal diff --git a/src/core/renderer_mtl/objc_helper.mm b/src/core/renderer_mtl/objc_helper.mm new file mode 100644 index 00000000..eeea56a0 --- /dev/null +++ b/src/core/renderer_mtl/objc_helper.mm @@ -0,0 +1,12 @@ +#include "renderer_mtl/objc_helper.hpp" + +// TODO: change the include +#import + +namespace Metal { + +dispatch_data_t createDispatchData(const void* data, size_t size) { + return dispatch_data_create(data, size, dispatch_get_global_queue(0, 0), ^{}); +} + +} // namespace Metal diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp new file mode 100644 index 00000000..10bca5dd --- /dev/null +++ b/src/core/renderer_mtl/renderer_mtl.cpp @@ -0,0 +1,774 @@ +#include "PICA/gpu.hpp" +#include "renderer_mtl/renderer_mtl.hpp" +#include "renderer_mtl/objc_helper.hpp" + +#include +#include + +#include "SDL_metal.h" + +using namespace PICA; + +CMRC_DECLARE(RendererMTL); + +const u16 LIGHT_LUT_TEXTURE_WIDTH = 256; + +// HACK: redefinition... +PICA::ColorFmt ToColorFormat(u32 format) { + switch (format) { + case 2: return PICA::ColorFmt::RGB565; + case 3: return PICA::ColorFmt::RGBA5551; + default: return static_cast(format); + } +} + +MTL::Library* loadLibrary(MTL::Device* device, const cmrc::file& shaderSource) { + //MTL::CompileOptions* compileOptions = MTL::CompileOptions::alloc()->init(); + NS::Error* error = nullptr; + MTL::Library* library = device->newLibrary(Metal::createDispatchData(shaderSource.begin(), shaderSource.size()), &error); + //MTL::Library* library = device->newLibrary(NS::String::string(source.c_str(), NS::ASCIIStringEncoding), compileOptions, &error); + if (error) { + Helpers::panic("Error loading shaders: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + + return library; +} + +RendererMTL::RendererMTL(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs) + : Renderer(gpu, internalRegs, externalRegs) {} +RendererMTL::~RendererMTL() {} + +void RendererMTL::reset() { + vertexBufferCache.reset(); + depthStencilCache.reset(); + drawPipelineCache.reset(); + blitPipelineCache.reset(); + textureCache.reset(); + depthStencilRenderTargetCache.reset(); + colorRenderTargetCache.reset(); +} + +void RendererMTL::display() { + CA::MetalDrawable* drawable = metalLayer->nextDrawable(); + if (!drawable) { + return; + } + + using namespace PICA::ExternalRegs; + + // Top screen + const u32 topActiveFb = externalRegs[Framebuffer0Select] & 1; + const u32 topScreenAddr = externalRegs[topActiveFb == 0 ? Framebuffer0AFirstAddr : Framebuffer0ASecondAddr]; + auto topScreen = colorRenderTargetCache.findFromAddress(topScreenAddr); + + if (topScreen) { + clearColor(nullptr, topScreen->get().texture); + } + + // Bottom screen + const u32 bottomActiveFb = externalRegs[Framebuffer1Select] & 1; + const u32 bottomScreenAddr = externalRegs[bottomActiveFb == 0 ? Framebuffer1AFirstAddr : Framebuffer1ASecondAddr]; + auto bottomScreen = colorRenderTargetCache.findFromAddress(bottomScreenAddr); + + if (bottomScreen) { + clearColor(nullptr, bottomScreen->get().texture); + } + + // -------- Draw -------- + commandBuffer->pushDebugGroup(toNSString("Display")); + + MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); + MTL::RenderPassColorAttachmentDescriptor* colorAttachment = renderPassDescriptor->colorAttachments()->object(0); + colorAttachment->setTexture(drawable->texture()); + colorAttachment->setLoadAction(MTL::LoadActionClear); + colorAttachment->setClearColor(MTL::ClearColor{0.0f, 0.0f, 0.0f, 1.0f}); + colorAttachment->setStoreAction(MTL::StoreActionStore); + + nextRenderPassName = "Display"; + beginRenderPassIfNeeded(renderPassDescriptor, false, drawable->texture()); + renderCommandEncoder->setRenderPipelineState(displayPipeline); + renderCommandEncoder->setFragmentSamplerState(nearestSampler, 0); + + // Top screen + if (topScreen) { + renderCommandEncoder->setViewport(MTL::Viewport{0, 0, 400, 240, 0.0f, 1.0f}); + renderCommandEncoder->setFragmentTexture(topScreen->get().texture, 0); + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); + } + + // Bottom screen + if (bottomScreen) { + renderCommandEncoder->setViewport(MTL::Viewport{40, 240, 320, 240, 0.0f, 1.0f}); + renderCommandEncoder->setFragmentTexture(bottomScreen->get().texture, 0); + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); + } + + endRenderPass(); + + commandBuffer->presentDrawable(drawable); + + commandBuffer->popDebugGroup(); + + commitCommandBuffer(); + + // Inform the vertex buffer cache that the frame ended + vertexBufferCache.endFrame(); + + // Release + drawable->release(); +} + +void RendererMTL::initGraphicsContext(SDL_Window* window) { + // TODO: what should be the type of the view? + void* view = SDL_Metal_CreateView(window); + metalLayer = (CA::MetalLayer*)SDL_Metal_GetLayer(view); + device = MTL::CreateSystemDefaultDevice(); + metalLayer->setDevice(device); + commandQueue = device->newCommandQueue(); + + // -------- Objects -------- + + // Textures + MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init(); + textureDescriptor->setTextureType(MTL::TextureType2D); + textureDescriptor->setPixelFormat(MTL::PixelFormatRGBA32Float); + textureDescriptor->setWidth(LIGHT_LUT_TEXTURE_WIDTH); + textureDescriptor->setHeight(Lights::LUT_Count + 1); + textureDescriptor->setUsage(MTL::TextureUsageShaderRead | MTL::TextureUsageShaderWrite); + textureDescriptor->setStorageMode(MTL::StorageModePrivate); + + lutTexture = device->newTexture(textureDescriptor); + lutTexture->setLabel(toNSString("LUT texture")); + textureDescriptor->release(); + + // Samplers + MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init(); + samplerDescriptor->setLabel(toNSString("Sampler (nearest)")); + nearestSampler = device->newSamplerState(samplerDescriptor); + + samplerDescriptor->setMinFilter(MTL::SamplerMinMagFilterLinear); + samplerDescriptor->setMagFilter(MTL::SamplerMinMagFilterLinear); + samplerDescriptor->setLabel(toNSString("Sampler (linear)")); + linearSampler = device->newSamplerState(samplerDescriptor); + + samplerDescriptor->release(); + + // -------- Pipelines -------- + + // Load shaders + auto mtlResources = cmrc::RendererMTL::get_filesystem(); + library = loadLibrary(device, mtlResources.open("metal_shaders.metallib")); + MTL::Library* copyToLutTextureLibrary = loadLibrary(device, mtlResources.open("metal_copy_to_lut_texture.metallib")); + + // Display + MTL::Function* vertexDisplayFunction = library->newFunction(NS::String::string("vertexDisplay", NS::ASCIIStringEncoding)); + MTL::Function* fragmentDisplayFunction = library->newFunction(NS::String::string("fragmentDisplay", NS::ASCIIStringEncoding)); + + MTL::RenderPipelineDescriptor* displayPipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init(); + displayPipelineDescriptor->setVertexFunction(vertexDisplayFunction); + displayPipelineDescriptor->setFragmentFunction(fragmentDisplayFunction); + auto* displayColorAttachment = displayPipelineDescriptor->colorAttachments()->object(0); + displayColorAttachment->setPixelFormat(MTL::PixelFormat::PixelFormatBGRA8Unorm); + + NS::Error* error = nullptr; + displayPipelineDescriptor->setLabel(toNSString("Display pipeline")); + displayPipeline = device->newRenderPipelineState(displayPipelineDescriptor, &error); + if (error) { + Helpers::panic("Error creating display pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + displayPipelineDescriptor->release(); + vertexDisplayFunction->release(); + fragmentDisplayFunction->release(); + + // Blit + MTL::Function* vertexBlitFunction = library->newFunction(NS::String::string("vertexBlit", NS::ASCIIStringEncoding)); + MTL::Function* fragmentBlitFunction = library->newFunction(NS::String::string("fragmentBlit", NS::ASCIIStringEncoding)); + + blitPipelineCache.set(device, vertexBlitFunction, fragmentBlitFunction); + + // Draw + MTL::Function* vertexDrawFunction = library->newFunction(NS::String::string("vertexDraw", NS::ASCIIStringEncoding)); + + // -------- Vertex descriptor -------- + MTL::VertexDescriptor* vertexDescriptor = MTL::VertexDescriptor::alloc()->init(); + + // Position + MTL::VertexAttributeDescriptor* positionAttribute = vertexDescriptor->attributes()->object(0); + positionAttribute->setFormat(MTL::VertexFormatFloat4); + positionAttribute->setOffset(offsetof(Vertex, s.positions)); + positionAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // Quaternion + MTL::VertexAttributeDescriptor* quaternionAttribute = vertexDescriptor->attributes()->object(1); + quaternionAttribute->setFormat(MTL::VertexFormatFloat4); + quaternionAttribute->setOffset(offsetof(Vertex, s.quaternion)); + quaternionAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // Color + MTL::VertexAttributeDescriptor* colorAttribute = vertexDescriptor->attributes()->object(2); + colorAttribute->setFormat(MTL::VertexFormatFloat4); + colorAttribute->setOffset(offsetof(Vertex, s.colour)); + colorAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // Texture coordinate 0 + MTL::VertexAttributeDescriptor* texCoord0Attribute = vertexDescriptor->attributes()->object(3); + texCoord0Attribute->setFormat(MTL::VertexFormatFloat2); + texCoord0Attribute->setOffset(offsetof(Vertex, s.texcoord0)); + texCoord0Attribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // Texture coordinate 1 + MTL::VertexAttributeDescriptor* texCoord1Attribute = vertexDescriptor->attributes()->object(4); + texCoord1Attribute->setFormat(MTL::VertexFormatFloat2); + texCoord1Attribute->setOffset(offsetof(Vertex, s.texcoord1)); + texCoord1Attribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // Texture coordinate 0 W + MTL::VertexAttributeDescriptor* texCoord0WAttribute = vertexDescriptor->attributes()->object(5); + texCoord0WAttribute->setFormat(MTL::VertexFormatFloat); + texCoord0WAttribute->setOffset(offsetof(Vertex, s.texcoord0_w)); + texCoord0WAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // View + MTL::VertexAttributeDescriptor* viewAttribute = vertexDescriptor->attributes()->object(6); + viewAttribute->setFormat(MTL::VertexFormatFloat3); + viewAttribute->setOffset(offsetof(Vertex, s.view)); + viewAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // Texture coordinate 2 + MTL::VertexAttributeDescriptor* texCoord2Attribute = vertexDescriptor->attributes()->object(7); + texCoord2Attribute->setFormat(MTL::VertexFormatFloat2); + texCoord2Attribute->setOffset(offsetof(Vertex, s.texcoord2)); + texCoord2Attribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + MTL::VertexBufferLayoutDescriptor* vertexBufferLayout = vertexDescriptor->layouts()->object(VERTEX_BUFFER_BINDING_INDEX); + vertexBufferLayout->setStride(sizeof(Vertex)); + vertexBufferLayout->setStepFunction(MTL::VertexStepFunctionPerVertex); + vertexBufferLayout->setStepRate(1); + + drawPipelineCache.set(device, library, vertexDrawFunction, vertexDescriptor); + + // Copy to LUT texture + MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init(); + constants->setConstantValue(&LIGHT_LUT_TEXTURE_WIDTH, MTL::DataTypeUShort, NS::UInteger(0)); + + error = nullptr; + MTL::Function* vertexCopyToLutTextureFunction = copyToLutTextureLibrary->newFunction(NS::String::string("vertexCopyToLutTexture", NS::ASCIIStringEncoding), constants, &error); + if (error) { + Helpers::panic("Error creating copy_to_lut_texture vertex function: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + constants->release(); + + MTL::RenderPipelineDescriptor* copyToLutTexturePipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init(); + copyToLutTexturePipelineDescriptor->setVertexFunction(vertexCopyToLutTextureFunction); + // Disable rasterization + copyToLutTexturePipelineDescriptor->setRasterizationEnabled(false); + + error = nullptr; + copyToLutTexturePipelineDescriptor->setLabel(toNSString("Copy to LUT texture pipeline")); + copyToLutTexturePipeline = device->newRenderPipelineState(copyToLutTexturePipelineDescriptor, &error); + if (error) { + Helpers::panic("Error creating copy_to_lut_texture pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + copyToLutTexturePipelineDescriptor->release(); + vertexCopyToLutTextureFunction->release(); + + // Depth stencil cache + depthStencilCache.set(device); + + // Vertex buffer cache + vertexBufferCache.set(device); + + // -------- Depth stencil state -------- + MTL::DepthStencilDescriptor* depthStencilDescriptor = MTL::DepthStencilDescriptor::alloc()->init(); + depthStencilDescriptor->setLabel(toNSString("Default depth stencil state")); + defaultDepthStencilState = device->newDepthStencilState(depthStencilDescriptor); + depthStencilDescriptor->release(); + + // Release + copyToLutTextureLibrary->release(); +} + +void RendererMTL::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) { + const auto color = colorRenderTargetCache.findFromAddress(startAddress); + if (color) { + const float r = Helpers::getBits<24, 8>(value) / 255.0f; + const float g = Helpers::getBits<16, 8>(value) / 255.0f; + const float b = Helpers::getBits<8, 8>(value) / 255.0f; + const float a = (value & 0xff) / 255.0f; + + colorClearOps[color->get().texture] = {r, g, b, a}; + + return; + } + + const auto depth = depthStencilRenderTargetCache.findFromAddress(startAddress); + if (depth) { + float depthVal; + const auto format = depth->get().format; + if (format == DepthFmt::Depth16) { + depthVal = (value & 0xffff) / 65535.0f; + } else { + depthVal = (value & 0xffffff) / 16777215.0f; + } + + depthClearOps[depth->get().texture] = depthVal; + + if (format == DepthFmt::Depth24Stencil8) { + const u8 stencilVal = value >> 24; + stencilClearOps[depth->get().texture] = stencilVal; + } + + return; + } + + Helpers::warn("[RendererMTL::ClearBuffer] No buffer found!\n"); +} + +void RendererMTL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) { + const u32 inputWidth = inputSize & 0xffff; + const u32 inputHeight = inputSize >> 16; + const auto inputFormat = ToColorFormat(Helpers::getBits<8, 3>(flags)); + const auto outputFormat = ToColorFormat(Helpers::getBits<12, 3>(flags)); + const bool verticalFlip = flags & 1; + const PICA::Scaling scaling = static_cast(Helpers::getBits<24, 2>(flags)); + + u32 outputWidth = outputSize & 0xffff; + u32 outputHeight = outputSize >> 16; + + auto srcFramebuffer = getColorRenderTarget(inputAddr, inputFormat, inputWidth, outputHeight); + nextRenderPassName = "Clear before display transfer"; + clearColor(nullptr, srcFramebuffer->texture); + Math::Rect srcRect = srcFramebuffer->getSubRect(inputAddr, outputWidth, outputHeight); + + if (verticalFlip) { + std::swap(srcRect.bottom, srcRect.top); + } + + // Apply scaling for the destination rectangle. + if (scaling == PICA::Scaling::X || scaling == PICA::Scaling::XY) { + outputWidth >>= 1; + } + + if (scaling == PICA::Scaling::XY) { + outputHeight >>= 1; + } + + auto destFramebuffer = getColorRenderTarget(outputAddr, outputFormat, outputWidth, outputHeight); + // TODO: clear if not blitting to the whole framebuffer + Math::Rect destRect = destFramebuffer->getSubRect(outputAddr, outputWidth, outputHeight); + + if (inputWidth != outputWidth) { + // Helpers::warn("Strided display transfer is not handled correctly!\n"); + } + + textureCopyImpl(*srcFramebuffer, *destFramebuffer, srcRect, destRect); +} + +void RendererMTL::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) { + // Texture copy size is aligned to 16 byte units + const u32 copySize = totalBytes & ~0xf; + if (copySize == 0) { + Helpers::warn("TextureCopy total bytes less than 16!\n"); + return; + } + + // The width and gap are provided in 16-byte units. + const u32 inputWidth = (inputSize & 0xffff) << 4; + const u32 inputGap = (inputSize >> 16) << 4; + const u32 outputWidth = (outputSize & 0xffff) << 4; + const u32 outputGap = (outputSize >> 16) << 4; + + if (inputGap != 0 || outputGap != 0) { + // Helpers::warn("Strided texture copy\n"); + } + + if (inputWidth != outputWidth) { + Helpers::warn("Input width does not match output width, cannot accelerate texture copy!"); + return; + } + + // Texture copy is a raw data copy in PICA, which means no format or tiling information is provided to the engine. + // Depending if the target surface is linear or tiled, games set inputWidth to either the width of the texture or + // the width multiplied by eight (because tiles are stored linearly in memory). + // To properly accelerate this we must examine each surface individually. For now we assume the most common case + // of tiled surface with RGBA8 format. If our assumption does not hold true, we abort the texture copy as inserting + // that surface is not correct. + + // We assume the source surface is tiled and RGBA8. inputWidth is in bytes so divide it + // by eight * sizePerPixel(RGBA8) to convert it to a useable width. + const u32 bpp = sizePerPixel(PICA::ColorFmt::RGBA8); + const u32 copyStride = (inputWidth + inputGap) / (8 * bpp); + const u32 copyWidth = inputWidth / (8 * bpp); + + // inputHeight/outputHeight are typically set to zero so they cannot be used to get the height of the copy region + // in contrast to display transfer. Compute height manually by dividing the copy size with the copy width. The result + // is the number of vertical tiles so multiply that by eight to get the actual copy height. + u32 copyHeight; + if (inputWidth != 0) [[likely]] { + copyHeight = (copySize / inputWidth) * 8; + } else { + copyHeight = 0; + } + + // Find the source surface. + auto srcFramebuffer = getColorRenderTarget(inputAddr, PICA::ColorFmt::RGBA8, copyStride, copyHeight, false); + if (!srcFramebuffer) { + Helpers::warn("RendererGL::TextureCopy failed to locate src framebuffer!\n"); + return; + } + nextRenderPassName = "Clear before texture copy"; + clearColor(nullptr, srcFramebuffer->texture); + + Math::Rect srcRect = srcFramebuffer->getSubRect(inputAddr, copyWidth, copyHeight); + + // Assume the destination surface has the same format. Unless the surfaces have the same block width, + // texture copy does not make sense. + auto destFramebuffer = getColorRenderTarget(outputAddr, srcFramebuffer->format, copyWidth, copyHeight); + // TODO: clear if not blitting to the whole framebuffer + Math::Rect destRect = destFramebuffer->getSubRect(outputAddr, copyWidth, copyHeight); + + textureCopyImpl(*srcFramebuffer, *destFramebuffer, srcRect, destRect); +} + +void RendererMTL::drawVertices(PICA::PrimType primType, std::span vertices) { + // Color + auto colorRenderTarget = getColorRenderTarget(colourBufferLoc, colourBufferFormat, fbSize[0], fbSize[1]); + + // Depth stencil + const u32 depthControl = regs[PICA::InternalRegs::DepthAndColorMask]; + const bool depthStencilWrite = regs[PICA::InternalRegs::DepthBufferWrite]; + const bool depthEnable = depthControl & 0x1; + const bool depthWriteEnable = Helpers::getBit<12>(depthControl); + const u8 depthFunc = Helpers::getBits<4, 3>(depthControl); + const u8 colorMask = Helpers::getBits<8, 4>(depthControl); + + Metal::DepthStencilHash depthStencilHash{false, 1}; + depthStencilHash.stencilConfig = regs[PICA::InternalRegs::StencilTest]; + depthStencilHash.stencilOpConfig = regs[PICA::InternalRegs::StencilOp]; + const bool stencilEnable = Helpers::getBit<0>(depthStencilHash.stencilConfig); + + std::optional depthStencilRenderTarget = std::nullopt; + if (depthEnable) { + depthStencilHash.depthStencilWrite = depthWriteEnable && depthStencilWrite; + depthStencilHash.depthFunc = depthFunc; + depthStencilRenderTarget = getDepthRenderTarget(); + } else { + if (depthWriteEnable) { + depthStencilHash.depthStencilWrite = true; + depthStencilRenderTarget = getDepthRenderTarget(); + } else if (stencilEnable) { + depthStencilRenderTarget = getDepthRenderTarget(); + } + } + + // Depth uniforms + struct { + float depthScale; + float depthOffset; + bool depthMapEnable; + } depthUniforms; + depthUniforms.depthScale = Floats::f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32(); + depthUniforms.depthOffset = Floats::f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32(); + depthUniforms.depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1; + + // -------- Pipeline -------- + Metal::DrawPipelineHash pipelineHash{colorRenderTarget->format, DepthFmt::Unknown1}; + if (depthStencilRenderTarget) { + pipelineHash.depthFmt = depthStencilRenderTarget->format; + } + pipelineHash.fragHash.lightingEnabled = regs[0x008F] & 1; + pipelineHash.fragHash.lightingNumLights = regs[0x01C2] & 0x7; + pipelineHash.fragHash.lightingConfig1 = regs[0x01C4u]; + pipelineHash.fragHash.alphaControl = regs[0x104]; + + // Blending and logic op + pipelineHash.blendEnabled = (regs[PICA::InternalRegs::ColourOperation] & (1 << 8)) != 0; + pipelineHash.colorWriteMask = colorMask; + + u8 logicOp = 3; // Copy, which doesn't do anything + if (pipelineHash.blendEnabled) { + pipelineHash.blendControl = regs[PICA::InternalRegs::BlendFunc]; + } else { + logicOp = Helpers::getBits<0, 4>(regs[PICA::InternalRegs::LogicOp]); + } + + MTL::RenderPipelineState* pipeline = drawPipelineCache.get(pipelineHash); + + // Depth stencil state + MTL::DepthStencilState* depthStencilState = depthStencilCache.get(depthStencilHash); + + // -------- Render -------- + MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); + bool doesClear = clearColor(renderPassDescriptor, colorRenderTarget->texture); + if (depthStencilRenderTarget) { + if (clearDepth(renderPassDescriptor, depthStencilRenderTarget->texture)) + doesClear = true; + if (depthStencilRenderTarget->format == DepthFmt::Depth24Stencil8) { + if (clearStencil(renderPassDescriptor, depthStencilRenderTarget->texture)) + doesClear = true; + } + } + + nextRenderPassName = "Draw vertices"; + beginRenderPassIfNeeded(renderPassDescriptor, doesClear, colorRenderTarget->texture, (depthStencilRenderTarget ? depthStencilRenderTarget->texture : nullptr)); + + // Update the LUT texture if necessary + if (gpu.lightingLUTDirty) { + updateLightingLUT(renderCommandEncoder); + } + if (gpu.fogLUTDirty) { + updateFogLUT(renderCommandEncoder); + } + + renderCommandEncoder->setRenderPipelineState(pipeline); + renderCommandEncoder->setDepthStencilState(depthStencilState); + // If size is < 4KB, use inline vertex data, otherwise use a buffer + if (vertices.size_bytes() < 4 * 1024) { + renderCommandEncoder->setVertexBytes(vertices.data(), vertices.size_bytes(), VERTEX_BUFFER_BINDING_INDEX); + } else { + Metal::BufferHandle buffer = vertexBufferCache.get(vertices.data(), vertices.size_bytes()); + renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, VERTEX_BUFFER_BINDING_INDEX); + } + + // Viewport + const u32 viewportX = regs[PICA::InternalRegs::ViewportXY] & 0x3ff; + const u32 viewportY = (regs[PICA::InternalRegs::ViewportXY] >> 16) & 0x3ff; + const u32 viewportWidth = Floats::f24::fromRaw(regs[PICA::InternalRegs::ViewportWidth] & 0xffffff).toFloat32() * 2.0f; + const u32 viewportHeight = Floats::f24::fromRaw(regs[PICA::InternalRegs::ViewportHeight] & 0xffffff).toFloat32() * 2.0f; + const auto rect = colorRenderTarget->getSubRect(colourBufferLoc, fbSize[0], fbSize[1]); + MTL::Viewport viewport{double(rect.left + viewportX), double(rect.bottom + viewportY), double(viewportWidth), double(viewportHeight), 0.0, 1.0}; + renderCommandEncoder->setViewport(viewport); + + // Blend color + if (pipelineHash.blendEnabled) { + u32 constantColor = regs[PICA::InternalRegs::BlendColour]; + const u8 r = constantColor & 0xff; + const u8 g = Helpers::getBits<8, 8>(constantColor); + const u8 b = Helpers::getBits<16, 8>(constantColor); + const u8 a = Helpers::getBits<24, 8>(constantColor); + + renderCommandEncoder->setBlendColor(r / 255.0f, g / 255.0f, b / 255.0f, a / 255.0f); + } + + // Stencil reference + if (stencilEnable) { + const s8 reference = s8(Helpers::getBits<16, 8>(depthStencilHash.stencilConfig)); // Signed reference value + renderCommandEncoder->setStencilReferenceValue(reference); + } + + // Bind resources + setupTextureEnvState(renderCommandEncoder); + bindTexturesToSlots(renderCommandEncoder); + renderCommandEncoder->setVertexBytes(®s[0x48], (0x200 - 0x48) * sizeof(regs[0]), 0); + renderCommandEncoder->setFragmentBytes(®s[0x48], (0x200 - 0x48) * sizeof(regs[0]), 0); + renderCommandEncoder->setVertexBytes(&depthUniforms, sizeof(depthUniforms), 2); + renderCommandEncoder->setFragmentBytes(&logicOp, sizeof(logicOp), 2); + + renderCommandEncoder->drawPrimitives(toMTLPrimitiveType(primType), NS::UInteger(0), NS::UInteger(vertices.size())); +} + +void RendererMTL::screenshot(const std::string& name) { + // TODO: implement + Helpers::warn("RendererMTL::screenshot not implemented"); +} + +void RendererMTL::deinitGraphicsContext() { + reset(); + + // Release + copyToLutTexturePipeline->release(); + displayPipeline->release(); + defaultDepthStencilState->release(); + lutTexture->release(); + linearSampler->release(); + nearestSampler->release(); + library->release(); + commandQueue->release(); + device->release(); +} + +std::optional RendererMTL::getColorRenderTarget( + u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound +) { + // Try to find an already existing buffer that contains the provided address + // This is a more relaxed check compared to getColourFBO as display transfer/texcopy may refer to + // subrect of a surface and in case of texcopy we don't know the format of the surface. + auto buffer = colorRenderTargetCache.findFromAddress(addr); + if (buffer.has_value()) { + return buffer.value().get(); + } + + if (!createIfnotFound) { + return std::nullopt; + } + + // Otherwise create and cache a new buffer. + Metal::ColorRenderTarget sampleBuffer(device, addr, format, width, height); + + return colorRenderTargetCache.add(sampleBuffer); +} + +Metal::DepthStencilRenderTarget& RendererMTL::getDepthRenderTarget() { + Metal::DepthStencilRenderTarget sampleBuffer(device, depthBufferLoc, depthBufferFormat, fbSize[0], fbSize[1]); + auto buffer = depthStencilRenderTargetCache.find(sampleBuffer); + + if (buffer.has_value()) { + return buffer.value().get(); + } else { + return depthStencilRenderTargetCache.add(sampleBuffer); + } +} + +Metal::Texture& RendererMTL::getTexture(Metal::Texture& tex) { + auto buffer = textureCache.find(tex); + + if (buffer.has_value()) { + return buffer.value().get(); + } else { + const auto textureData = std::span{gpu.getPointerPhys(tex.location), tex.sizeInBytes()}; // Get pointer to the texture data in 3DS memory + Metal::Texture& newTex = textureCache.add(tex); + newTex.decodeTexture(textureData); + + return newTex; + } +} + +void RendererMTL::setupTextureEnvState(MTL::RenderCommandEncoder* encoder) { + static constexpr std::array ioBases = { + PICA::InternalRegs::TexEnv0Source, PICA::InternalRegs::TexEnv1Source, PICA::InternalRegs::TexEnv2Source, + PICA::InternalRegs::TexEnv3Source, PICA::InternalRegs::TexEnv4Source, PICA::InternalRegs::TexEnv5Source, + }; + + struct { + u32 textureEnvSourceRegs[6]; + u32 textureEnvOperandRegs[6]; + u32 textureEnvCombinerRegs[6]; + u32 textureEnvScaleRegs[6]; + } envState; + u32 textureEnvColourRegs[6]; + + for (int i = 0; i < 6; i++) { + const u32 ioBase = ioBases[i]; + + envState.textureEnvSourceRegs[i] = regs[ioBase]; + envState.textureEnvOperandRegs[i] = regs[ioBase + 1]; + envState.textureEnvCombinerRegs[i] = regs[ioBase + 2]; + textureEnvColourRegs[i] = regs[ioBase + 3]; + envState.textureEnvScaleRegs[i] = regs[ioBase + 4]; + } + + encoder->setVertexBytes(&textureEnvColourRegs, sizeof(textureEnvColourRegs), 1); + encoder->setFragmentBytes(&envState, sizeof(envState), 1); +} + +void RendererMTL::bindTexturesToSlots(MTL::RenderCommandEncoder* encoder) { + static constexpr std::array ioBases = { + PICA::InternalRegs::Tex0BorderColor, + PICA::InternalRegs::Tex1BorderColor, + PICA::InternalRegs::Tex2BorderColor, + }; + + for (int i = 0; i < 3; i++) { + if ((regs[PICA::InternalRegs::TexUnitCfg] & (1 << i)) == 0) { + continue; + } + + const size_t ioBase = ioBases[i]; + + const u32 dim = regs[ioBase + 1]; + const u32 config = regs[ioBase + 2]; + const u32 height = dim & 0x7ff; + const u32 width = Helpers::getBits<16, 11>(dim); + const u32 addr = (regs[ioBase + 4] & 0x0FFFFFFF) << 3; + u32 format = regs[ioBase + (i == 0 ? 13 : 5)] & 0xF; + + if (addr != 0) [[likely]] { + Metal::Texture targetTex(device, addr, static_cast(format), width, height, config); + auto tex = getTexture(targetTex); + encoder->setFragmentTexture(tex.texture, i); + encoder->setFragmentSamplerState(tex.sampler ? tex.sampler : nearestSampler, i); + } else { + // TODO: bind a dummy texture? + } + } + + // LUT texture + encoder->setFragmentTexture(lutTexture, 3); + encoder->setFragmentSamplerState(linearSampler, 3); +} + +void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) { + gpu.lightingLUTDirty = false; + std::array lightingLut = {0.0f}; + + for (int i = 0; i < gpu.lightingLUT.size(); i += 2) { + uint64_t value = gpu.lightingLUT[i >> 1] & 0xFFF; + lightingLut[i] = (float)(value << 4) / 65535.0f; + } + + //for (int i = 0; i < Lights::LUT_Count; i++) { + // lutTexture->replaceRegion(MTL::Region(0, 0, LIGHT_LUT_TEXTURE_WIDTH, 1), 0, i, u16_lightinglut.data() + LIGHT_LUT_TEXTURE_WIDTH * i, 0, 0); + //} + + renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline); + renderCommandEncoder->setDepthStencilState(defaultDepthStencilState); + renderCommandEncoder->setVertexTexture(lutTexture, 0); + Metal::BufferHandle buffer = vertexBufferCache.get(lightingLut.data(), sizeof(lightingLut)); + renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0); + u32 arrayOffset = 0; + renderCommandEncoder->setVertexBytes(&arrayOffset, sizeof(u32), 1); + + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), GPU::LightingLutSize); +} + +void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) { + gpu.fogLUTDirty = false; + std::array fogLut = {0.0f}; + + for (int i = 0; i < fogLut.size(); i += 2) { + const uint32_t value = gpu.fogLUT[i >> 1]; + int32_t diff = value & 0x1fff; + diff = (diff << 19) >> 19; // Sign extend the 13-bit value to 32 bits + const float fogDifference = float(diff) / 2048.0f; + const float fogValue = float((value >> 13) & 0x7ff) / 2048.0f; + + fogLut[i] = fogValue; + fogLut[i + 1] = fogDifference; + } + + renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline); + renderCommandEncoder->setDepthStencilState(defaultDepthStencilState); + renderCommandEncoder->setVertexTexture(lutTexture, 0); + //Metal::BufferHandle buffer = vertexBufferCache.get(fogLut.data(), sizeof(fogLut)); + //renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0); + renderCommandEncoder->setVertexBytes(fogLut.data(), sizeof(fogLut), 0); + u32 arrayOffset = (u32)Lights::LUT_Count; + renderCommandEncoder->setVertexBytes(&arrayOffset, sizeof(u32), 1); + + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(128)); +} + +void RendererMTL::textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect& srcRect, const Math::Rect& destRect) { + nextRenderPassName = "Texture copy"; + MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); + // TODO: clearColor sets the load action to load if it didn't find any clear, but that is unnecessary if we are doing a copy to the whole texture + bool doesClear = clearColor(renderPassDescriptor, destFramebuffer.texture); + beginRenderPassIfNeeded(renderPassDescriptor, doesClear, destFramebuffer.texture); + + // Pipeline + Metal::BlitPipelineHash hash{destFramebuffer.format, DepthFmt::Unknown1}; + auto blitPipeline = blitPipelineCache.get(hash); + + renderCommandEncoder->setRenderPipelineState(blitPipeline); + + // Viewport + renderCommandEncoder->setViewport(MTL::Viewport{double(destRect.left), double(destRect.bottom), double(destRect.right - destRect.left), double(destRect.top - destRect.bottom), 0.0, 1.0}); + float srcRectNDC[4] = {srcRect.left / (float)srcFramebuffer.size.u(), srcRect.bottom / (float)srcFramebuffer.size.v(), (srcRect.right - srcRect.left) / (float)srcFramebuffer.size.u(), (srcRect.top - srcRect.bottom) / (float)srcFramebuffer.size.v()}; + + // Bind resources + renderCommandEncoder->setVertexBytes(&srcRectNDC, sizeof(srcRectNDC), 0); + renderCommandEncoder->setFragmentTexture(srcFramebuffer.texture, 0); + renderCommandEncoder->setFragmentSamplerState(nearestSampler, 0); + + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); +} diff --git a/src/host_shaders/metal_copy_to_lut_texture.metal b/src/host_shaders/metal_copy_to_lut_texture.metal new file mode 100644 index 00000000..40a7f50d --- /dev/null +++ b/src/host_shaders/metal_copy_to_lut_texture.metal @@ -0,0 +1,9 @@ +#include +using namespace metal; + +constant ushort lutTextureWidth [[function_constant(0)]]; + +// The copy is done in a vertex shader instead of a compute kernel, since dispatching compute would require ending the render pass +vertex void vertexCopyToLutTexture(uint vid [[vertex_id]], texture2d out [[texture(0)]], constant float2* data [[buffer(0)]], constant uint& arrayOffset [[buffer(1)]]) { + out.write(float4(data[vid], 0.0, 0.0), uint2(vid % lutTextureWidth, arrayOffset + vid / lutTextureWidth)); +} diff --git a/src/host_shaders/metal_shaders.metal b/src/host_shaders/metal_shaders.metal new file mode 100644 index 00000000..95f417c7 --- /dev/null +++ b/src/host_shaders/metal_shaders.metal @@ -0,0 +1,782 @@ +#include +using namespace metal; + +struct BasicVertexOut { + float4 position [[position]]; + float2 uv; +}; + +constant float4 displayPositions[4] = { + float4(-1.0, -1.0, 0.0, 1.0), + float4( 1.0, -1.0, 0.0, 1.0), + float4(-1.0, 1.0, 0.0, 1.0), + float4( 1.0, 1.0, 0.0, 1.0) +}; + +constant float2 displayTexCoord[4] = { + float2(0.0, 1.0), + float2(0.0, 0.0), + float2(1.0, 1.0), + float2(1.0, 0.0) +}; + +vertex BasicVertexOut vertexDisplay(uint vid [[vertex_id]]) { + BasicVertexOut out; + out.position = displayPositions[vid]; + out.uv = displayTexCoord[vid]; + + return out; +} + +fragment float4 fragmentDisplay(BasicVertexOut in [[stage_in]], texture2d tex [[texture(0)]], sampler samplr [[sampler(0)]]) { + return tex.sample(samplr, in.uv); +} + +struct NDCViewport { + float2 offset; + float2 scale; +}; + +vertex BasicVertexOut vertexBlit(uint vid [[vertex_id]], constant NDCViewport& viewport [[buffer(0)]]) { + BasicVertexOut out; + out.uv = float2((vid << 1) & 2, vid & 2); + out.position = float4(out.uv * 2.0 - 1.0, 0.0, 1.0); + out.position.y = -out.position.y; + out.uv = out.uv * viewport.scale + viewport.offset; + + return out; +} + +fragment float4 fragmentBlit(BasicVertexOut in [[stage_in]], texture2d tex [[texture(0)]], sampler samplr [[sampler(0)]]) { + return tex.sample(samplr, in.uv); +} + +struct PicaRegs { + uint regs[0x200 - 0x48]; + + uint read(uint reg) constant { + return regs[reg - 0x48]; + } +}; + +struct VertTEV { + uint textureEnvColor[6]; +}; + +float4 abgr8888ToFloat4(uint abgr) { + const float scale = 1.0 / 255.0; + + return scale * float4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24)); +} + +struct DrawVertexIn { + float4 position [[attribute(0)]]; + float4 quaternion [[attribute(1)]]; + float4 color [[attribute(2)]]; + float2 texCoord0 [[attribute(3)]]; + float2 texCoord1 [[attribute(4)]]; + float texCoord0W [[attribute(5)]]; + float3 view [[attribute(6)]]; + float2 texCoord2 [[attribute(7)]]; +}; + +// Metal cannot return arrays from vertex functions, this is an ugly workaround +struct EnvColor { + float4 c0; + float4 c1; + float4 c2; + float4 c3; + float4 c4; + float4 c5; + + thread float4& operator[](int i) { + switch (i) { + case 0: return c0; + case 1: return c1; + case 2: return c2; + case 3: return c3; + case 4: return c4; + case 5: return c5; + default: return c0; + } + } +}; + +float3 rotateFloat3ByQuaternion(float3 v, float4 q) { + float3 u = q.xyz; + float s = q.w; + + return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v); +} + +// Convert an arbitrary-width floating point literal to an f32 +float decodeFP(uint hex, uint E, uint M) { + uint width = M + E + 1u; + uint bias = 128u - (1u << (E - 1u)); + uint exponent = (hex >> M) & ((1u << E) - 1u); + uint mantissa = hex & ((1u << M) - 1u); + uint sign = (hex >> (E + M)) << 31u; + + if ((hex & ((1u << (width - 1u)) - 1u)) != 0u) { + if (exponent == (1u << E) - 1u) + exponent = 255u; + else + exponent += bias; + hex = sign | (mantissa << (23u - M)) | (exponent << 23u); + } else { + hex = sign; + } + + return as_type(hex); +} + +struct DepthUniforms { + float depthScale; + float depthOffset; + bool depthMapEnable; +}; + +struct DrawVertexOut { + float4 position [[position]]; + float4 quaternion; + float4 color; + float3 texCoord0; + float2 texCoord1; + float2 texCoord2; + float3 view; + float3 normal; + float3 tangent; + float3 bitangent; + EnvColor textureEnvColor [[flat]]; + float4 textureEnvBufferColor [[flat]]; +}; + +struct DrawVertexOutWithClip { + DrawVertexOut out; + float clipDistance [[clip_distance]] [2]; +}; + +// TODO: check this +float transformZ(float z, float w, constant DepthUniforms& depthUniforms) { + z = z / w * depthUniforms.depthScale + depthUniforms.depthOffset; + if (!depthUniforms.depthMapEnable) { + z *= w; + } + + return z * w; +} + +vertex DrawVertexOutWithClip vertexDraw(DrawVertexIn in [[stage_in]], constant PicaRegs& picaRegs [[buffer(0)]], constant VertTEV& tev [[buffer(1)]], constant DepthUniforms& depthUniforms [[buffer(2)]]) { + DrawVertexOut out; + + // Position + out.position = in.position; + // Flip the y position + out.position.y = -out.position.y; + + // Apply depth uniforms + out.position.z = transformZ(out.position.z, out.position.w, depthUniforms); + + // Color + out.color = min(abs(in.color), 1.0); + + // Texture coordinates + out.texCoord0 = float3(in.texCoord0, in.texCoord0W); + out.texCoord0.y = 1.0 - out.texCoord0.y; + out.texCoord1 = in.texCoord1; + out.texCoord1.y = 1.0 - out.texCoord1.y; + out.texCoord2 = in.texCoord2; + out.texCoord2.y = 1.0 - out.texCoord2.y; + + // View + out.view = in.view; + + // TBN + out.normal = normalize(rotateFloat3ByQuaternion(float3(0.0, 0.0, 1.0), in.quaternion)); + out.tangent = normalize(rotateFloat3ByQuaternion(float3(1.0, 0.0, 0.0), in.quaternion)); + out.bitangent = normalize(rotateFloat3ByQuaternion(float3(0.0, 1.0, 0.0), in.quaternion)); + out.quaternion = in.quaternion; + + // Environment + for (int i = 0; i < 6; i++) { + out.textureEnvColor[i] = abgr8888ToFloat4(tev.textureEnvColor[i]); + } + + out.textureEnvBufferColor = abgr8888ToFloat4(picaRegs.read(0xFDu)); + + DrawVertexOutWithClip outWithClip; + outWithClip.out = out; + + // Parse clipping plane registers + float4 clipData = float4( + decodeFP(picaRegs.read(0x48u) & 0xffffffu, 7u, 16u), decodeFP(picaRegs.read(0x49u) & 0xffffffu, 7u, 16u), + decodeFP(picaRegs.read(0x4Au) & 0xffffffu, 7u, 16u), decodeFP(picaRegs.read(0x4Bu) & 0xffffffu, 7u, 16u) + ); + + // There's also another, always-on clipping plane based on vertex z + // TODO: transform + outWithClip.clipDistance[0] = -in.position.z; + outWithClip.clipDistance[1] = dot(clipData, in.position); + + return outWithClip; +} + +constant bool lightingEnabled [[function_constant(0)]]; +constant uint8_t lightingNumLights [[function_constant(1)]]; +constant uint32_t lightingConfig1 [[function_constant(2)]]; +constant uint16_t alphaControl [[function_constant(3)]]; + +struct Globals { + bool error_unimpl; + + float4 tevSources[16]; + float4 tevNextPreviousBuffer; + bool tevUnimplementedSourceFlag = false; + + uint GPUREG_LIGHTING_LUTINPUT_SCALE; + uint GPUREG_LIGHTING_LUTINPUT_ABS; + uint GPUREG_LIGHTING_LUTINPUT_SELECT; + uint GPUREG_LIGHTi_CONFIG; + + // HACK + //bool lightingEnabled; + //uint8_t lightingNumLights; + //uint32_t lightingConfig1; + //uint16_t alphaControl; + + float3 normal; +}; + +// See docs/lighting.md +constant uint samplerEnabledBitfields[2] = {0x7170e645u, 0x7f013fefu}; + +bool isSamplerEnabled(uint environment_id, uint lut_id) { + uint index = 7 * environment_id + lut_id; + uint arrayIndex = (index >> 5); + return (samplerEnabledBitfields[arrayIndex] & (1u << (index & 31u))) != 0u; +} + +struct FragTEV { + uint textureEnvSource[6]; + uint textureEnvOperand[6]; + uint textureEnvCombiner[6]; + uint textureEnvScale[6]; + + float4 fetchSource(thread Globals& globals, uint src_id) constant { + if (src_id >= 6u && src_id < 13u) { + globals.tevUnimplementedSourceFlag = true; + } + + return globals.tevSources[src_id]; + } + + float4 getColorAndAlphaSource(thread Globals& globals, int tev_id, int src_id) constant { + float4 result; + + float4 colorSource = fetchSource(globals, (textureEnvSource[tev_id] >> (src_id * 4)) & 15u); + float4 alphaSource = fetchSource(globals, (textureEnvSource[tev_id] >> (src_id * 4 + 16)) & 15u); + + uint colorOperand = (textureEnvOperand[tev_id] >> (src_id * 4)) & 15u; + uint alphaOperand = (textureEnvOperand[tev_id] >> (12 + src_id * 4)) & 7u; + + // TODO: figure out what the undocumented values do + switch (colorOperand) { + case 0u: result.rgb = colorSource.rgb; break; // Source color + case 1u: result.rgb = 1.0 - colorSource.rgb; break; // One minus source color + case 2u: result.rgb = float3(colorSource.a); break; // Source alpha + case 3u: result.rgb = float3(1.0 - colorSource.a); break; // One minus source alpha + case 4u: result.rgb = float3(colorSource.r); break; // Source red + case 5u: result.rgb = float3(1.0 - colorSource.r); break; // One minus source red + case 8u: result.rgb = float3(colorSource.g); break; // Source green + case 9u: result.rgb = float3(1.0 - colorSource.g); break; // One minus source green + case 12u: result.rgb = float3(colorSource.b); break; // Source blue + case 13u: result.rgb = float3(1.0 - colorSource.b); break; // One minus source blue + default: break; + } + + // TODO: figure out what the undocumented values do + switch (alphaOperand) { + case 0u: result.a = alphaSource.a; break; // Source alpha + case 1u: result.a = 1.0 - alphaSource.a; break; // One minus source alpha + case 2u: result.a = alphaSource.r; break; // Source red + case 3u: result.a = 1.0 - alphaSource.r; break; // One minus source red + case 4u: result.a = alphaSource.g; break; // Source green + case 5u: result.a = 1.0 - alphaSource.g; break; // One minus source green + case 6u: result.a = alphaSource.b; break; // Source blue + case 7u: result.a = 1.0 - alphaSource.b; break; // One minus source blue + default: break; + } + + return result; + } + + float4 calculateCombiner(thread Globals& globals, int tev_id) constant { + float4 source0 = getColorAndAlphaSource(globals, tev_id, 0); + float4 source1 = getColorAndAlphaSource(globals, tev_id, 1); + float4 source2 = getColorAndAlphaSource(globals, tev_id, 2); + + uint colorCombine = textureEnvCombiner[tev_id] & 15u; + uint alphaCombine = (textureEnvCombiner[tev_id] >> 16) & 15u; + + float4 result = float4(1.0); + + // TODO: figure out what the undocumented values do + switch (colorCombine) { + case 0u: result.rgb = source0.rgb; break; // Replace + case 1u: result.rgb = source0.rgb * source1.rgb; break; // Modulate + case 2u: result.rgb = min(float3(1.0), source0.rgb + source1.rgb); break; // Add + case 3u: result.rgb = clamp(source0.rgb + source1.rgb - 0.5, 0.0, 1.0); break; // Add signed + case 4u: result.rgb = mix(source1.rgb, source0.rgb, source2.rgb); break; // Interpolate + case 5u: result.rgb = max(source0.rgb - source1.rgb, 0.0); break; // Subtract + case 6u: result.rgb = float3(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGB + case 7u: result = float4(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGBA + case 8u: result.rgb = min(source0.rgb * source1.rgb + source2.rgb, 1.0); break; // Multiply then add + case 9u: result.rgb = min((source0.rgb + source1.rgb), 1.0) * source2.rgb; break; // Add then multiply + default: break; + } + + if (colorCombine != 7u) { // The color combiner also writes the alpha channel in the "Dot3 RGBA" mode. + // TODO: figure out what the undocumented values do + // TODO: test if the alpha combiner supports all the same modes as the color combiner. + switch (alphaCombine) { + case 0u: result.a = source0.a; break; // Replace + case 1u: result.a = source0.a * source1.a; break; // Modulate + case 2u: result.a = min(1.0, source0.a + source1.a); break; // Add + case 3u: result.a = clamp(source0.a + source1.a - 0.5, 0.0, 1.0); break; // Add signed + case 4u: result.a = mix(source1.a, source0.a, source2.a); break; // Interpolate + case 5u: result.a = max(0.0, source0.a - source1.a); break; // Subtract + case 8u: result.a = min(source0.a * source1.a + source2.a, 1.0); break; // Multiply then add + case 9u: result.a = min(source0.a + source1.a, 1.0) * source2.a; break; // Add then multiply + default: break; + } + } + + result.rgb *= float(1 << (textureEnvScale[tev_id] & 3u)); + result.a *= float(1 << ((textureEnvScale[tev_id] >> 16) & 3u)); + + return result; + } +}; + +enum class LogicOp : uint8_t { + Clear = 0, + And = 1, + AndReverse = 2, + Copy = 3, + Set = 4, + CopyInverted = 5, + NoOp = 6, + Invert = 7, + Nand = 8, + Or = 9, + Nor = 10, + Xor = 11, + Equiv = 12, + AndInverted = 13, + OrReverse = 14, + OrInverted = 15 +}; + +uint4 performLogicOpU(LogicOp logicOp, uint4 s, uint4 d) { + switch (logicOp) { + case LogicOp::Clear: return as_type(float4(0.0)); + case LogicOp::And: return s & d; + case LogicOp::AndReverse: return s & ~d; + case LogicOp::Copy: return s; + case LogicOp::Set: return as_type(float4(1.0)); + case LogicOp::CopyInverted: return ~s; + case LogicOp::NoOp: return d; + case LogicOp::Invert: return ~d; + case LogicOp::Nand: return ~(s & d); + case LogicOp::Or: return s | d; + case LogicOp::Nor: return ~(s | d); + case LogicOp::Xor: return s ^ d; + case LogicOp::Equiv: return ~(s ^ d); + case LogicOp::AndInverted: return ~s & d; + case LogicOp::OrReverse: return s | ~d; + case LogicOp::OrInverted: return ~s | d; + } +} + +#define D0_LUT 0u +#define D1_LUT 1u +#define SP_LUT 2u +#define FR_LUT 3u +#define RB_LUT 4u +#define RG_LUT 5u +#define RR_LUT 6u + +#define FOG_INDEX 24 + +float lutLookup(texture2d texLut, uint lut, uint index) { + return texLut.read(uint2(index, lut)).r; +} + +float lightLutLookup(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture2d texLut, uint environment_id, uint lut_id, uint light_id, float3 light_vector, float3 half_vector) { + uint lut_index; + int bit_in_config1; + if (lut_id == SP_LUT) { + // These are the spotlight attenuation LUTs + bit_in_config1 = 8 + int(light_id & 7u); + lut_index = 8u + light_id; + } else if (lut_id <= 6) { + bit_in_config1 = 16 + int(lut_id); + lut_index = lut_id; + } else { + globals.error_unimpl = true; + } + + bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment + + if (!current_sampler_enabled || (extract_bits(lightingConfig1, bit_in_config1, 1) != 0u)) { + return 1.0; + } + + uint scale_id = extract_bits(globals.GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3); + float scale = float(1u << scale_id); + if (scale_id >= 6u) scale /= 256.0; + + float delta = 1.0; + uint input_id = extract_bits(globals.GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3); + switch (input_id) { + case 0u: { + delta = dot(globals.normal, normalize(half_vector)); + break; + } + case 1u: { + delta = dot(normalize(in.view), normalize(half_vector)); + break; + } + case 2u: { + delta = dot(globals.normal, normalize(in.view)); + break; + } + case 3u: { + delta = dot(light_vector, globals.normal); + break; + } + case 4u: { + int GPUREG_LIGHTi_SPOTDIR_LOW = int(picaRegs.read(0x0146u + (light_id << 4u))); + int GPUREG_LIGHTi_SPOTDIR_HIGH = int(picaRegs.read(0x0147u + (light_id << 4u))); + + // Sign extend them. Normally bitfieldExtract would do that but it's missing on some versions + // of GLSL so we do it manually + int se_x = extract_bits(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13); + int se_y = extract_bits(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13); + int se_z = extract_bits(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13); + + if ((se_x & 0x1000) == 0x1000) se_x |= 0xffffe000; + if ((se_y & 0x1000) == 0x1000) se_y |= 0xffffe000; + if ((se_z & 0x1000) == 0x1000) se_z |= 0xffffe000; + + // These are fixed point 1.1.11 values, so we need to convert them to float + float x = float(se_x) / 2047.0; + float y = float(se_y) / 2047.0; + float z = float(se_z) / 2047.0; + float3 spotlight_vector = float3(x, y, z); + delta = dot(light_vector, spotlight_vector); // spotlight direction is negated so we don't negate light_vector + break; + } + case 5u: { + delta = 1.0; // TODO: cos (aka CP); + globals.error_unimpl = true; + break; + } + default: { + delta = 1.0; + globals.error_unimpl = true; + break; + } + } + + // 0 = enabled + if (extract_bits(globals.GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) { + // Two sided diffuse + if (extract_bits(globals.GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) { + delta = max(delta, 0.0); + } else { + delta = abs(delta); + } + int index = int(clamp(floor(delta * 255.0), 0.f, 255.f)); + return lutLookup(texLut, lut_index, index) * scale; + } else { + // Range is [-1, 1] so we need to map it to [0, 1] + int index = int(clamp(floor(delta * 128.0), -128.f, 127.f)); + if (index < 0) index += 256; + return lutLookup(texLut, lut_index, index) * scale; + } +} + +float3 regToColor(uint reg) { + // Normalization scale to convert from [0...255] to [0.0...1.0] + const float scale = 1.0 / 255.0; + + return scale * float3(float(extract_bits(reg, 20, 8)), float(extract_bits(reg, 10, 8)), float(extract_bits(reg, 00, 8))); +} + +// Implements the following algorthm: https://mathb.in/26766 +void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture2d texLut, sampler linearSampler, thread float4& primaryColor, thread float4& secondaryColor) { + // Quaternions describe a transformation from surface-local space to eye space. + // In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1), + // the tangent vector is (1,0,0), and the bitangent vector is (0,1,0). + //float3 normal = normalize(in.normal); + //float3 tangent = normalize(in.tangent); + //float3 bitangent = normalize(in.bitangent); + //float3 view = normalize(in.view); + + uint GPUREG_LIGHTING_LIGHT_PERMUTATION = picaRegs.read(0x01D9u); + + primaryColor = float4(0.0, 0.0, 0.0, 1.0); + secondaryColor = float4(0.0, 0.0, 0.0, 1.0); + + uint GPUREG_LIGHTING_CONFIG0 = picaRegs.read(0x01C3u); + globals.GPUREG_LIGHTING_LUTINPUT_SCALE = picaRegs.read(0x01D2u); + globals.GPUREG_LIGHTING_LUTINPUT_ABS = picaRegs.read(0x01D0u); + globals.GPUREG_LIGHTING_LUTINPUT_SELECT = picaRegs.read(0x01D1u); + + uint bumpMode = extract_bits(GPUREG_LIGHTING_CONFIG0, 28, 2); + + // Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker + switch (bumpMode) { + default: { + globals.normal = rotateFloat3ByQuaternion(float3(0.0, 0.0, 1.0), in.quaternion); + break; + } + } + + float4 diffuseSum = float4(0.0, 0.0, 0.0, 1.0); + float4 specularSum = float4(0.0, 0.0, 0.0, 1.0); + + uint environmentId = extract_bits(GPUREG_LIGHTING_CONFIG0, 4, 4); + bool clampHighlights = extract_bits(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u; + + uint lightId; + float3 lightVector = float3(0.0); + float3 halfVector = float3(0.0); + + for (uint i = 0u; i < lightingNumLights + 1; i++) { + lightId = extract_bits(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3); + + uint GPUREG_LIGHTi_SPECULAR0 = picaRegs.read(0x0140u + (lightId << 4u)); + uint GPUREG_LIGHTi_SPECULAR1 = picaRegs.read(0x0141u + (lightId << 4u)); + uint GPUREG_LIGHTi_DIFFUSE = picaRegs.read(0x0142u + (lightId << 4u)); + uint GPUREG_LIGHTi_AMBIENT = picaRegs.read(0x0143u + (lightId << 4u)); + uint GPUREG_LIGHTi_VECTOR_LOW = picaRegs.read(0x0144u + (lightId << 4u)); + uint GPUREG_LIGHTi_VECTOR_HIGH = picaRegs.read(0x0145u + (lightId << 4u)); + globals.GPUREG_LIGHTi_CONFIG = picaRegs.read(0x0149u + (lightId << 4u)); + + float lightDistance; + float3 lightPosition = normalize(float3( + decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u), + decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u) + )); + + // Positional Light + if (extract_bits(globals.GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) { + // error_unimpl = true; + lightVector = lightPosition + in.view; + } + + // Directional light + else { + lightVector = lightPosition; + } + + lightDistance = length(lightVector); + lightVector = normalize(lightVector); + halfVector = lightVector + normalize(in.view); + + float NdotL = dot(globals.normal, lightVector); // N dot Li + + // Two sided diffuse + if (extract_bits(globals.GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) + NdotL = max(0.0, NdotL); + else + NdotL = abs(NdotL); + + float geometricFactor; + bool useGeo0 = extract_bits(globals.GPUREG_LIGHTi_CONFIG, 2, 1) == 1u; + bool useGeo1 = extract_bits(globals.GPUREG_LIGHTi_CONFIG, 3, 1) == 1u; + if (useGeo0 || useGeo1) { + geometricFactor = dot(halfVector, halfVector); + geometricFactor = geometricFactor == 0.0 ? 0.0 : min(NdotL / geometricFactor, 1.0); + } + + float distanceAttenuation = 1.0; + if (extract_bits(lightingConfig1, 24 + int(lightId), 1) == 0u) { + uint GPUREG_LIGHTi_ATTENUATION_BIAS = extract_bits(picaRegs.read(0x014Au + (lightId << 4u)), 0, 20); + uint GPUREG_LIGHTi_ATTENUATION_SCALE = extract_bits(picaRegs.read(0x014Bu + (lightId << 4u)), 0, 20); + + float distanceAttenuationBias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u); + float distanceAttenuationScale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u); + + float delta = lightDistance * distanceAttenuationScale + distanceAttenuationBias; + delta = clamp(delta, 0.0, 1.0); + int index = int(clamp(floor(delta * 255.0), 0.0, 255.0)); + distanceAttenuation = lutLookup(texLut, 16u + lightId, index); + } + + float spotlightAttenuation = lightLutLookup(globals, in, picaRegs, texLut, environmentId, SP_LUT, lightId, lightVector, halfVector); + float specular0Distribution = lightLutLookup(globals, in, picaRegs, texLut, environmentId, D0_LUT, lightId, lightVector, halfVector); + float specular1Distribution = lightLutLookup(globals, in, picaRegs, texLut, environmentId, D1_LUT, lightId, lightVector, halfVector); + float3 reflectedColor; + reflectedColor.r = lightLutLookup(globals, in, picaRegs, texLut, environmentId, RR_LUT, lightId, lightVector, halfVector); + + if (isSamplerEnabled(environmentId, RG_LUT)) { + reflectedColor.g = lightLutLookup(globals, in, picaRegs, texLut, environmentId, RG_LUT, lightId, lightVector, halfVector); + } else { + reflectedColor.g = reflectedColor.r; + } + + if (isSamplerEnabled(environmentId, RB_LUT)) { + reflectedColor.b = lightLutLookup(globals, in, picaRegs, texLut, environmentId, RB_LUT, lightId, lightVector, halfVector); + } else { + reflectedColor.b = reflectedColor.r; + } + + float3 specular0 = regToColor(GPUREG_LIGHTi_SPECULAR0) * specular0Distribution; + float3 specular1 = regToColor(GPUREG_LIGHTi_SPECULAR1) * specular1Distribution * reflectedColor; + + specular0 *= useGeo0 ? geometricFactor : 1.0; + specular1 *= useGeo1 ? geometricFactor : 1.0; + + float clampFactor = 1.0; + if (clampHighlights && NdotL == 0.0) { + clampFactor = 0.0; + } + + float lightFactor = distanceAttenuation * spotlightAttenuation; + diffuseSum.rgb += lightFactor * (regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE) * NdotL); + specularSum.rgb += lightFactor * clampFactor * (specular0 + specular1); + } + uint fresnelOutput1 = extract_bits(GPUREG_LIGHTING_CONFIG0, 2, 1); + uint fresnelOutput2 = extract_bits(GPUREG_LIGHTING_CONFIG0, 3, 1); + + float fresnelFactor; + + if (fresnelOutput1 == 1u || fresnelOutput2 == 1u) { + fresnelFactor = lightLutLookup(globals, in, picaRegs, texLut, environmentId, FR_LUT, lightId, lightVector, halfVector); + } + + if (fresnelOutput1 == 1u) { + diffuseSum.a = fresnelFactor; + } + + if (fresnelOutput2 == 1u) { + specularSum.a = fresnelFactor; + } + + uint GPUREG_LIGHTING_AMBIENT = picaRegs.read(0x01C0u); + float4 globalAmbient = float4(regToColor(GPUREG_LIGHTING_AMBIENT), 1.0); + primaryColor = clamp(globalAmbient + diffuseSum, 0.0, 1.0); + secondaryColor = clamp(specularSum, 0.0, 1.0); +} + +float4 performLogicOp(LogicOp logicOp, float4 s, float4 d) { + return as_type(performLogicOpU(logicOp, as_type(s), as_type(d))); +} + +fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[color(0)]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], + texture2d tex0 [[texture(0)]], texture2d tex1 [[texture(1)]], texture2d tex2 [[texture(2)]], texture2d texLut [[texture(3)]], + sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) { + Globals globals; + + // HACK + //globals.lightingEnabled = picaRegs.read(0x008Fu) != 0u; + //globals.lightingNumLights = picaRegs.read(0x01C2u); + //globals.lightingConfig1 = picaRegs.read(0x01C4u); + //globals.alphaControl = picaRegs.read(0x104); + + globals.tevSources[0] = in.color; + if (lightingEnabled) { + calcLighting(globals, in, picaRegs, texLut, linearSampler, globals.tevSources[1], globals.tevSources[2]); + } else { + globals.tevSources[1] = float4(0.0); + globals.tevSources[2] = float4(0.0); + } + + uint textureConfig = picaRegs.read(0x80u); + float2 texCoord2 = (textureConfig & (1u << 13)) != 0u ? in.texCoord1 : in.texCoord2; + + if ((textureConfig & 1u) != 0u) globals.tevSources[3] = tex0.sample(samplr0, in.texCoord0.xy); + if ((textureConfig & 2u) != 0u) globals.tevSources[4] = tex1.sample(samplr1, in.texCoord1); + if ((textureConfig & 4u) != 0u) globals.tevSources[5] = tex2.sample(samplr2, texCoord2); + globals.tevSources[13] = float4(0.0); // Previous buffer + globals.tevSources[15] = in.color; // Previous combiner + + globals.tevNextPreviousBuffer = in.textureEnvBufferColor; + uint textureEnvUpdateBuffer = picaRegs.read(0xE0u); + + for (int i = 0; i < 6; i++) { + globals.tevSources[14] = in.textureEnvColor[i]; // Constant color + globals.tevSources[15] = tev.calculateCombiner(globals, i); + globals.tevSources[13] = globals.tevNextPreviousBuffer; + + if (i < 4) { + if ((textureEnvUpdateBuffer & (0x100u << i)) != 0u) { + globals.tevNextPreviousBuffer.rgb = globals.tevSources[15].rgb; + } + + if ((textureEnvUpdateBuffer & (0x1000u << i)) != 0u) { + globals.tevNextPreviousBuffer.a = globals.tevSources[15].a; + } + } + } + + float4 color = globals.tevSources[15]; + + // Fog + bool enable_fog = (textureEnvUpdateBuffer & 7u) == 5u; + + if (enable_fog) { + bool flip_depth = (textureEnvUpdateBuffer & (1u << 16)) != 0u; + float fog_index = flip_depth ? 1.0 - in.position.z : in.position.z; + fog_index *= 128.0; + float clamped_index = clamp(floor(fog_index), 0.0, 127.0); + float delta = fog_index - clamped_index; + float2 value = texLut.read(uint2(clamped_index, FOG_INDEX)).rg; + float fog_factor = clamp(value.r + value.g * delta, 0.0, 1.0); + + uint GPUREG_FOG_COLOR = picaRegs.read(0x00E1u); + + // Annoyingly color is not encoded in the same way as light color + float r = (GPUREG_FOG_COLOR & 0xFFu) / 255.0; + float g = ((GPUREG_FOG_COLOR >> 8) & 0xFFu) / 255.0; + float b = ((GPUREG_FOG_COLOR >> 16) & 0xFFu) / 255.0; + float3 fog_color = float3(r, g, b); + + color.rgb = mix(fog_color, color.rgb, fog_factor); + } + + // Perform alpha test + if ((alphaControl & 1u) != 0u) { // Check if alpha test is on + uint func = (alphaControl >> 4u) & 7u; + float reference = float((alphaControl >> 8u) & 0xffu) / 255.0; + float alpha = color.a; + + switch (func) { + case 0u: discard_fragment(); // Never pass alpha test + case 1u: break; // Always pass alpha test + case 2u: // Pass if equal + if (alpha != reference) discard_fragment(); + break; + case 3u: // Pass if not equal + if (alpha == reference) discard_fragment(); + break; + case 4u: // Pass if less than + if (alpha >= reference) discard_fragment(); + break; + case 5u: // Pass if less than or equal + if (alpha > reference) discard_fragment(); + break; + case 6u: // Pass if greater than + if (alpha <= reference) discard_fragment(); + break; + case 7u: // Pass if greater than or equal + if (alpha < reference) discard_fragment(); + break; + } + } + + return performLogicOp(logicOp, color, prevColor); +} From 58e1a536996348d1ec4c8ab5a65b396fe28ccb3b Mon Sep 17 00:00:00 2001 From: Samuliak Date: Fri, 16 Aug 2024 11:06:23 +0200 Subject: [PATCH 03/16] metal: create renderer --- include/panda_qt/main_window.hpp | 1 + include/renderer.hpp | 3 ++- include/renderer_gl/surface_cache.hpp | 4 ++-- src/core/PICA/gpu.cpp | 11 ++++++++++- src/panda_qt/main_window.cpp | 5 ++++- src/panda_sdl/frontend_sdl.cpp | 14 ++++++++++++-- src/renderer.cpp | 4 +++- 7 files changed, 34 insertions(+), 8 deletions(-) diff --git a/include/panda_qt/main_window.hpp b/include/panda_qt/main_window.hpp index 3ff16a1d..fc756b9f 100644 --- a/include/panda_qt/main_window.hpp +++ b/include/panda_qt/main_window.hpp @@ -129,6 +129,7 @@ class MainWindow : public QMainWindow { // Tracks whether we are using an OpenGL-backed renderer or a Vulkan-backed renderer bool usingGL = false; bool usingVk = false; + bool usingMtl = false; // Variables to keep track of whether the user is controlling the 3DS analog stick with their keyboard // This is done so when a gamepad is connected, we won't automatically override the 3DS analog stick settings with the gamepad's state diff --git a/include/renderer.hpp b/include/renderer.hpp index 569a730b..4eacf0b1 100644 --- a/include/renderer.hpp +++ b/include/renderer.hpp @@ -17,7 +17,8 @@ enum class RendererType : s8 { Null = 0, OpenGL = 1, Vulkan = 2, - Software = 3, + Metal = 3, + Software = 4, }; struct EmulatorConfig; diff --git a/include/renderer_gl/surface_cache.hpp b/include/renderer_gl/surface_cache.hpp index 5323741f..7346fd11 100644 --- a/include/renderer_gl/surface_cache.hpp +++ b/include/renderer_gl/surface_cache.hpp @@ -19,8 +19,8 @@ template class SurfaceCache { // Vanilla std::optional can't hold actual references using OptionalRef = std::optional>; - static_assert(std::is_same() || std::is_same() || - std::is_same(), "Invalid surface type"); + //static_assert(std::is_same() || std::is_same() || + // std::is_same(), "Invalid surface type"); size_t size; size_t evictionIndex; diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index fe336edc..95001b33 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -15,6 +15,9 @@ #ifdef PANDA3DS_ENABLE_VULKAN #include "renderer_vk/renderer_vk.hpp" #endif +#ifdef PANDA3DS_ENABLE_METAL +#include "renderer_mtl/renderer_mtl.hpp" +#endif constexpr u32 topScreenWidth = 240; constexpr u32 topScreenHeight = 400; @@ -52,6 +55,12 @@ GPU::GPU(Memory& mem, EmulatorConfig& config) : mem(mem), config(config) { renderer.reset(new RendererVK(*this, regs, externalRegs)); break; } +#endif +#ifdef PANDA3DS_ENABLE_METAL + case RendererType::Metal: { + renderer.reset(new RendererMTL(*this, regs, externalRegs)); + break; + } #endif default: { Helpers::panic("Rendering backend not supported: %s", Renderer::typeToString(config.rendererType)); @@ -365,7 +374,7 @@ PICA::Vertex GPU::getImmediateModeVertex() { // Run VS and return vertex data. TODO: Don't hardcode offsets for each attribute shaderUnit.vs.run(); - + // Map shader outputs to fixed function properties const u32 totalShaderOutputs = regs[PICA::InternalRegs::ShaderOutputCount] & 7; for (int i = 0; i < totalShaderOutputs; i++) { diff --git a/src/panda_qt/main_window.cpp b/src/panda_qt/main_window.cpp index f1949da7..4c187bc2 100644 --- a/src/panda_qt/main_window.cpp +++ b/src/panda_qt/main_window.cpp @@ -103,6 +103,7 @@ MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent) const RendererType rendererType = emu->getConfig().rendererType; usingGL = (rendererType == RendererType::OpenGL || rendererType == RendererType::Software || rendererType == RendererType::Null); usingVk = (rendererType == RendererType::Vulkan); + usingMtl = (rendererType == RendererType::Metal); if (usingGL) { // Make GL context current for this thread, enable VSync @@ -113,6 +114,8 @@ MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent) emu->initGraphicsContext(glContext); } else if (usingVk) { Helpers::panic("Vulkan on Qt is currently WIP, try the SDL frontend instead!"); + } else if (usingMtl) { + Helpers::panic("Metal on Qt currently doesn't work, try the SDL frontend instead!"); } else { Helpers::panic("Unsupported graphics backend for Qt frontend!"); } @@ -628,4 +631,4 @@ void MainWindow::setupControllerSensors(SDL_GameController* controller) { if (haveGyro) { SDL_GameControllerSetSensorEnabled(controller, SDL_SENSOR_GYRO, SDL_TRUE); } -} \ No newline at end of file +} diff --git a/src/panda_sdl/frontend_sdl.cpp b/src/panda_sdl/frontend_sdl.cpp index 8f9f4240..057a4858 100644 --- a/src/panda_sdl/frontend_sdl.cpp +++ b/src/panda_sdl/frontend_sdl.cpp @@ -67,6 +67,16 @@ FrontendSDL::FrontendSDL() : keyboardMappings(InputMappings::defaultKeyboardMapp } #endif +#ifdef PANDA3DS_ENABLE_METAL + if (config.rendererType == RendererType::Metal) { + window = SDL_CreateWindow("Alber", SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, 400, 480, SDL_WINDOW_METAL | SDL_WINDOW_RESIZABLE); + + if (window == nullptr) { + Helpers::warn("Window creation failed: %s", SDL_GetError()); + } + } +#endif + emu.initGraphicsContext(window); } @@ -286,7 +296,7 @@ void FrontendSDL::run() { } break; } - + case SDL_CONTROLLERSENSORUPDATE: { if (event.csensor.sensor == SDL_SENSOR_GYRO) { auto rotation = Gyro::SDL::convertRotation({ @@ -370,4 +380,4 @@ void FrontendSDL::setupControllerSensors(SDL_GameController* controller) { if (haveGyro) { SDL_GameControllerSetSensorEnabled(controller, SDL_SENSOR_GYRO, SDL_TRUE); } -} \ No newline at end of file +} diff --git a/src/renderer.cpp b/src/renderer.cpp index 76c3e7a0..6a18df85 100644 --- a/src/renderer.cpp +++ b/src/renderer.cpp @@ -17,6 +17,7 @@ std::optional Renderer::typeFromString(std::string inString) { {"null", RendererType::Null}, {"nil", RendererType::Null}, {"none", RendererType::Null}, {"gl", RendererType::OpenGL}, {"ogl", RendererType::OpenGL}, {"opengl", RendererType::OpenGL}, {"vk", RendererType::Vulkan}, {"vulkan", RendererType::Vulkan}, {"vulcan", RendererType::Vulkan}, + {"mtl", RendererType::Metal}, {"metal", RendererType::Metal}, {"sw", RendererType::Software}, {"soft", RendererType::Software}, {"software", RendererType::Software}, {"softrast", RendererType::Software}, }; @@ -33,7 +34,8 @@ const char* Renderer::typeToString(RendererType rendererType) { case RendererType::Null: return "null"; case RendererType::OpenGL: return "opengl"; case RendererType::Vulkan: return "vulkan"; + case RendererType::Metal: return "metal"; case RendererType::Software: return "software"; default: return "Invalid"; } -} \ No newline at end of file +} From 45eda2f12048204315ce771ba84b95754bf6fdc6 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Fri, 16 Aug 2024 12:25:46 +0200 Subject: [PATCH 04/16] bring back cmake changes --- CMakeLists.txt | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 31fdd9f2..4fd12174 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -56,6 +56,11 @@ if(BUILD_LIBRETRO_CORE) add_compile_definitions(__LIBRETRO__) endif() +if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" AND ENABLE_USER_BUILD) + # Disable stack buffer overflow checks in user builds + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GS-") +endif() + add_library(AlberCore STATIC) include_directories(${PROJECT_SOURCE_DIR}/include/) @@ -256,6 +261,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/audio/miniaudio_device.hpp include/ring_buffer.hpp include/bitfield.hpp include/audio/dsp_shared_mem.hpp include/audio/hle_core.hpp include/capstone.hpp include/audio/aac.hpp include/PICA/pica_frag_config.hpp include/PICA/pica_frag_uniforms.hpp include/PICA/shader_gen_types.hpp include/PICA/shader_decompiler.hpp + include/sdl_gyro.hpp ) cmrc_add_resource_library( @@ -570,7 +576,7 @@ if(NOT BUILD_HYDRA_CORE AND NOT BUILD_LIBRETRO_CORE) ) else() set(FRONTEND_SOURCE_FILES src/panda_sdl/main.cpp src/panda_sdl/frontend_sdl.cpp src/panda_sdl/mappings.cpp) - set(FRONTEND_HEADER_FILES "") + set(FRONTEND_HEADER_FILES "include/panda_sdl/frontend_sdl.hpp") endif() target_link_libraries(Alber PRIVATE AlberCore) From 8830747e90a119d50fb5c7063af1f9e08bfe4cdd Mon Sep 17 00:00:00 2001 From: Samuliak Date: Thu, 29 Aug 2024 19:43:36 +0200 Subject: [PATCH 05/16] clear render targets after creation --- .../renderer_mtl/mtl_vertex_buffer_cache.hpp | 2 +- src/core/renderer_mtl/renderer_mtl.cpp | 19 ++++++++++++++++--- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp index 1760cdfa..8aa299da 100644 --- a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp +++ b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp @@ -12,7 +12,7 @@ struct BufferHandle { }; // 64MB buffer for caching vertex data -#define CACHE_BUFFER_SIZE 64 * 1024 * 1024 +#define CACHE_BUFFER_SIZE 128 * 1024 * 1024 class VertexBufferCache { public: diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp index 10bca5dd..bdb5390d 100644 --- a/src/core/renderer_mtl/renderer_mtl.cpp +++ b/src/core/renderer_mtl/renderer_mtl.cpp @@ -413,7 +413,7 @@ void RendererMTL::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 // Find the source surface. auto srcFramebuffer = getColorRenderTarget(inputAddr, PICA::ColorFmt::RGBA8, copyStride, copyHeight, false); if (!srcFramebuffer) { - Helpers::warn("RendererGL::TextureCopy failed to locate src framebuffer!\n"); + Helpers::warn("RendererMTL::TextureCopy failed to locate src framebuffer!\n"); return; } nextRenderPassName = "Clear before texture copy"; @@ -605,7 +605,12 @@ std::optional RendererMTL::getColorRenderTarget( // Otherwise create and cache a new buffer. Metal::ColorRenderTarget sampleBuffer(device, addr, format, width, height); - return colorRenderTargetCache.add(sampleBuffer); + auto& colorBuffer = colorRenderTargetCache.add(sampleBuffer); + + // Clear the color buffer + colorClearOps[colorBuffer.texture] = {0, 0, 0, 0}; + + return colorBuffer; } Metal::DepthStencilRenderTarget& RendererMTL::getDepthRenderTarget() { @@ -615,7 +620,15 @@ Metal::DepthStencilRenderTarget& RendererMTL::getDepthRenderTarget() { if (buffer.has_value()) { return buffer.value().get(); } else { - return depthStencilRenderTargetCache.add(sampleBuffer); + auto& depthBuffer = depthStencilRenderTargetCache.add(sampleBuffer); + + // Clear the depth buffer + depthClearOps[depthBuffer.texture] = 0.0f; + if (depthBuffer.format == DepthFmt::Depth24Stencil8) { + stencilClearOps[depthBuffer.texture] = 0; + } + + return depthBuffer; } } From 90420160f2092b2c2cc4bf14b261480bc8849875 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Tue, 24 Sep 2024 14:51:27 +0200 Subject: [PATCH 06/16] correct comment --- include/renderer_mtl/mtl_vertex_buffer_cache.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp index 8aa299da..cc552477 100644 --- a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp +++ b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp @@ -11,7 +11,7 @@ struct BufferHandle { size_t offset; }; -// 64MB buffer for caching vertex data +// 128MB buffer for caching vertex data #define CACHE_BUFFER_SIZE 128 * 1024 * 1024 class VertexBufferCache { From 158be432fcf36ca4165bca1868850b3339a7dace Mon Sep 17 00:00:00 2001 From: Samuliak Date: Thu, 31 Oct 2024 09:25:01 +0100 Subject: [PATCH 07/16] rework the lut system --- CMakeLists.txt | 4 +- .../renderer_mtl/mtl_draw_pipeline_cache.hpp | 5 +- include/renderer_mtl/mtl_lut_texture.hpp | 25 ++ include/renderer_mtl/mtl_texture.hpp | 1 + include/renderer_mtl/renderer_mtl.hpp | 29 +- src/core/renderer_mtl/mtl_lut_texture.cpp | 32 +++ src/core/renderer_mtl/renderer_mtl.cpp | 269 +++++++++++------- .../metal_copy_to_lut_texture.metal | 2 +- src/host_shaders/metal_shaders.metal | 58 ++-- 9 files changed, 273 insertions(+), 152 deletions(-) create mode 100644 include/renderer_mtl/mtl_lut_texture.hpp create mode 100644 src/core/renderer_mtl/mtl_lut_texture.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index fac11cbc..854b9b9e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -482,6 +482,7 @@ if(ENABLE_METAL AND APPLE) include/renderer_mtl/mtl_render_target.hpp include/renderer_mtl/mtl_texture.hpp include/renderer_mtl/mtl_vertex_buffer_cache.hpp + include/renderer_mtl/mtl_lut_texture.hpp include/renderer_mtl/pica_to_mtl.hpp include/renderer_mtl/objc_helper.hpp ) @@ -490,6 +491,7 @@ if(ENABLE_METAL AND APPLE) src/core/renderer_mtl/renderer_mtl.cpp src/core/renderer_mtl/mtl_texture.cpp src/core/renderer_mtl/mtl_etc1.cpp + src/core/renderer_mtl/mtl_lut_texture.cpp src/core/renderer_mtl/objc_helper.mm src/host_shaders/metal_shaders.metal src/host_shaders/metal_copy_to_lut_texture.metal @@ -587,7 +589,7 @@ if(NOT BUILD_HYDRA_CORE AND NOT BUILD_LIBRETRO_CORE) if(NOT ENABLE_OPENGL) message(FATAL_ERROR "Qt frontend requires OpenGL") endif() - + option(GENERATE_QT_TRANSLATION "Generate Qt translation file" OFF) set(QT_LANGUAGES docs/translations) diff --git a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp index 8bfea636..c5105a13 100644 --- a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp +++ b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp @@ -135,7 +135,10 @@ public: colorAttachment->setDestinationAlphaBlendFactor(toMTLBlendFactor(alphaDestFunc)); } - desc->setDepthAttachmentPixelFormat(toMTLPixelFormatDepth(hash.depthFmt)); + MTL::PixelFormat depthFormat = toMTLPixelFormatDepth(hash.depthFmt); + desc->setDepthAttachmentPixelFormat(depthFormat); + if (hash.depthFmt == DepthFmt::Depth24Stencil8) + desc->setStencilAttachmentPixelFormat(depthFormat); NS::Error* error = nullptr; desc->setLabel(toNSString("Draw pipeline")); diff --git a/include/renderer_mtl/mtl_lut_texture.hpp b/include/renderer_mtl/mtl_lut_texture.hpp new file mode 100644 index 00000000..162bfe25 --- /dev/null +++ b/include/renderer_mtl/mtl_lut_texture.hpp @@ -0,0 +1,25 @@ +#pragma once + +#include + +namespace Metal { + +class LutTexture { +public: + LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name); + ~LutTexture(); + + u32 getNextIndex(); + + // Getters + MTL::Texture* getTexture() { return texture; } + + u32 getCurrentIndex() { return currentIndex; } + +private: + MTL::Texture* texture; + + u32 currentIndex = 0; +}; + +} // namespace Metal diff --git a/include/renderer_mtl/mtl_texture.hpp b/include/renderer_mtl/mtl_texture.hpp index 590132bd..9cec268d 100644 --- a/include/renderer_mtl/mtl_texture.hpp +++ b/include/renderer_mtl/mtl_texture.hpp @@ -1,4 +1,5 @@ #pragma once + #include #include #include diff --git a/include/renderer_mtl/renderer_mtl.hpp b/include/renderer_mtl/renderer_mtl.hpp index 9ba0937a..e28b63b4 100644 --- a/include/renderer_mtl/renderer_mtl.hpp +++ b/include/renderer_mtl/renderer_mtl.hpp @@ -1,3 +1,5 @@ +#pragma once + #include #include @@ -8,6 +10,8 @@ #include "mtl_draw_pipeline_cache.hpp" #include "mtl_depth_stencil_cache.hpp" #include "mtl_vertex_buffer_cache.hpp" +#include "mtl_lut_texture.hpp" + // HACK: use the OpenGL cache #include "../renderer_gl/surface_cache.hpp" @@ -54,12 +58,15 @@ class RendererMTL final : public Renderer { Metal::DepthStencilCache depthStencilCache; Metal::VertexBufferCache vertexBufferCache; - // Objects + // Resources MTL::SamplerState* nearestSampler; MTL::SamplerState* linearSampler; - MTL::Texture* lutTexture; + MTL::Texture* nullTexture; MTL::DepthStencilState* defaultDepthStencilState; + Metal::LutTexture* lutLightingTexture; + Metal::LutTexture* lutFogTexture; + // Pipelines MTL::RenderPipelineState* displayPipeline; MTL::RenderPipelineState* copyToLutTexturePipeline; @@ -91,21 +98,7 @@ class RendererMTL final : public Renderer { } } - void beginRenderPassIfNeeded(MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture = nullptr) { - createCommandBufferIfNeeded(); - - if (doesClears || !renderCommandEncoder || colorTexture != lastColorTexture || (depthTexture != lastDepthTexture && !(lastDepthTexture && !depthTexture))) { - endRenderPass(); - - renderCommandEncoder = commandBuffer->renderCommandEncoder(renderPassDescriptor); - renderCommandEncoder->setLabel(toNSString(nextRenderPassName)); - - lastColorTexture = colorTexture; - lastDepthTexture = depthTexture; - } - - renderPassDescriptor->release(); - } + void beginRenderPassIfNeeded(MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture = nullptr); void commitCommandBuffer() { if (renderCommandEncoder) { @@ -115,6 +108,8 @@ class RendererMTL final : public Renderer { } if (commandBuffer) { commandBuffer->commit(); + // HACK + commandBuffer->waitUntilCompleted(); commandBuffer->release(); commandBuffer = nullptr; } diff --git a/src/core/renderer_mtl/mtl_lut_texture.cpp b/src/core/renderer_mtl/mtl_lut_texture.cpp new file mode 100644 index 00000000..ac4ff6d9 --- /dev/null +++ b/src/core/renderer_mtl/mtl_lut_texture.cpp @@ -0,0 +1,32 @@ +#include "renderer_mtl/renderer_mtl.hpp" + +namespace Metal { + +constexpr u32 LAYER_COUNT = 1024; + +LutTexture::LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name) { + MTL::TextureDescriptor* desc = MTL::TextureDescriptor::alloc()->init(); + desc->setTextureType(type); + desc->setPixelFormat(pixelFormat); + desc->setWidth(width); + desc->setHeight(height); + desc->setArrayLength(LAYER_COUNT); + desc->setUsage(MTL::TextureUsageShaderRead/* | MTL::TextureUsageShaderWrite*/); + desc->setStorageMode(MTL::StorageModeShared); + + texture = device->newTexture(desc); + texture->setLabel(toNSString(name)); + desc->release(); +} + +LutTexture::~LutTexture() { + texture->release(); +} + +u32 LutTexture::getNextIndex() { + currentIndex = (currentIndex + 1) % LAYER_COUNT; + + return currentIndex; +} + +} // namespace Metal diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp index bdb5390d..bf2cdab1 100644 --- a/src/core/renderer_mtl/renderer_mtl.cpp +++ b/src/core/renderer_mtl/renderer_mtl.cpp @@ -1,17 +1,21 @@ -#include "PICA/gpu.hpp" #include "renderer_mtl/renderer_mtl.hpp" -#include "renderer_mtl/objc_helper.hpp" #include #include +#include "renderer_mtl/mtl_lut_texture.hpp" +// HACK +#undef NO + +#include "PICA/gpu.hpp" #include "SDL_metal.h" using namespace PICA; CMRC_DECLARE(RendererMTL); -const u16 LIGHT_LUT_TEXTURE_WIDTH = 256; +const u16 LIGHTING_LUT_TEXTURE_WIDTH = 256; +const u32 FOG_LUT_TEXTURE_WIDTH = 128; // HACK: redefinition... PICA::ColorFmt ToColorFormat(u32 format) { @@ -23,10 +27,10 @@ PICA::ColorFmt ToColorFormat(u32 format) { } MTL::Library* loadLibrary(MTL::Device* device, const cmrc::file& shaderSource) { - //MTL::CompileOptions* compileOptions = MTL::CompileOptions::alloc()->init(); + // MTL::CompileOptions* compileOptions = MTL::CompileOptions::alloc()->init(); NS::Error* error = nullptr; MTL::Library* library = device->newLibrary(Metal::createDispatchData(shaderSource.begin(), shaderSource.size()), &error); - //MTL::Library* library = device->newLibrary(NS::String::string(source.c_str(), NS::ASCIIStringEncoding), compileOptions, &error); + // MTL::Library* library = device->newLibrary(NS::String::string(source.c_str(), NS::ASCIIStringEncoding), compileOptions, &error); if (error) { Helpers::panic("Error loading shaders: %s", error->description()->cString(NS::ASCIIStringEncoding)); } @@ -39,19 +43,19 @@ RendererMTL::RendererMTL(GPU& gpu, const std::array& internalRegs, RendererMTL::~RendererMTL() {} void RendererMTL::reset() { - vertexBufferCache.reset(); - depthStencilCache.reset(); - drawPipelineCache.reset(); - blitPipelineCache.reset(); - textureCache.reset(); - depthStencilRenderTargetCache.reset(); + vertexBufferCache.reset(); + depthStencilCache.reset(); + drawPipelineCache.reset(); + blitPipelineCache.reset(); + textureCache.reset(); + depthStencilRenderTargetCache.reset(); colorRenderTargetCache.reset(); } void RendererMTL::display() { CA::MetalDrawable* drawable = metalLayer->nextDrawable(); if (!drawable) { - return; + return; } using namespace PICA::ExternalRegs; @@ -62,7 +66,7 @@ void RendererMTL::display() { auto topScreen = colorRenderTargetCache.findFromAddress(topScreenAddr); if (topScreen) { - clearColor(nullptr, topScreen->get().texture); + clearColor(nullptr, topScreen->get().texture); } // Bottom screen @@ -71,7 +75,7 @@ void RendererMTL::display() { auto bottomScreen = colorRenderTargetCache.findFromAddress(bottomScreenAddr); if (bottomScreen) { - clearColor(nullptr, bottomScreen->get().texture); + clearColor(nullptr, bottomScreen->get().texture); } // -------- Draw -------- @@ -131,14 +135,14 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { // Textures MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init(); textureDescriptor->setTextureType(MTL::TextureType2D); - textureDescriptor->setPixelFormat(MTL::PixelFormatRGBA32Float); - textureDescriptor->setWidth(LIGHT_LUT_TEXTURE_WIDTH); - textureDescriptor->setHeight(Lights::LUT_Count + 1); - textureDescriptor->setUsage(MTL::TextureUsageShaderRead | MTL::TextureUsageShaderWrite); + textureDescriptor->setPixelFormat(MTL::PixelFormatRGBA8Unorm); + textureDescriptor->setWidth(1); + textureDescriptor->setHeight(1); textureDescriptor->setStorageMode(MTL::StorageModePrivate); + textureDescriptor->setUsage(MTL::TextureUsageShaderRead); - lutTexture = device->newTexture(textureDescriptor); - lutTexture->setLabel(toNSString("LUT texture")); + nullTexture = device->newTexture(textureDescriptor); + nullTexture->setLabel(toNSString("Null texture")); textureDescriptor->release(); // Samplers @@ -153,6 +157,9 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { samplerDescriptor->release(); + lutLightingTexture = new Metal::LutTexture(device, MTL::TextureType2DArray, MTL::PixelFormatR16Unorm, LIGHTING_LUT_TEXTURE_WIDTH, Lights::LUT_Count, "Lighting LUT texture"); + lutFogTexture = new Metal::LutTexture(device, MTL::TextureType1DArray, MTL::PixelFormatRG32Float, FOG_LUT_TEXTURE_WIDTH, 1, "Fog LUT texture"); + // -------- Pipelines -------- // Load shaders @@ -249,14 +256,15 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { // Copy to LUT texture MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init(); - constants->setConstantValue(&LIGHT_LUT_TEXTURE_WIDTH, MTL::DataTypeUShort, NS::UInteger(0)); + constants->setConstantValue(&LIGHTING_LUT_TEXTURE_WIDTH, MTL::DataTypeUShort, NS::UInteger(0)); - error = nullptr; - MTL::Function* vertexCopyToLutTextureFunction = copyToLutTextureLibrary->newFunction(NS::String::string("vertexCopyToLutTexture", NS::ASCIIStringEncoding), constants, &error); - if (error) { - Helpers::panic("Error creating copy_to_lut_texture vertex function: %s", error->description()->cString(NS::ASCIIStringEncoding)); - } - constants->release(); + error = nullptr; + MTL::Function* vertexCopyToLutTextureFunction = + copyToLutTextureLibrary->newFunction(NS::String::string("vertexCopyToLutTexture", NS::ASCIIStringEncoding), constants, &error); + if (error) { + Helpers::panic("Error creating copy_to_lut_texture vertex function: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + constants->release(); MTL::RenderPipelineDescriptor* copyToLutTexturePipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init(); copyToLutTexturePipelineDescriptor->setVertexFunction(vertexCopyToLutTextureFunction); @@ -314,8 +322,8 @@ void RendererMTL::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 c depthClearOps[depth->get().texture] = depthVal; if (format == DepthFmt::Depth24Stencil8) { - const u8 stencilVal = value >> 24; - stencilClearOps[depth->get().texture] = stencilVal; + const u8 stencilVal = value >> 24; + stencilClearOps[depth->get().texture] = stencilVal; } return; @@ -365,7 +373,7 @@ void RendererMTL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, } void RendererMTL::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) { - // Texture copy size is aligned to 16 byte units + // Texture copy size is aligned to 16 byte units const u32 copySize = totalBytes & ~0xf; if (copySize == 0) { Helpers::warn("TextureCopy total bytes less than 16!\n"); @@ -463,33 +471,33 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::spanformat, DepthFmt::Unknown1}; if (depthStencilRenderTarget) { - pipelineHash.depthFmt = depthStencilRenderTarget->format; - } - pipelineHash.fragHash.lightingEnabled = regs[0x008F] & 1; - pipelineHash.fragHash.lightingNumLights = regs[0x01C2] & 0x7; - pipelineHash.fragHash.lightingConfig1 = regs[0x01C4u]; - pipelineHash.fragHash.alphaControl = regs[0x104]; + pipelineHash.depthFmt = depthStencilRenderTarget->format; + } + pipelineHash.fragHash.lightingEnabled = regs[0x008F] & 1; + pipelineHash.fragHash.lightingNumLights = regs[0x01C2] & 0x7; + pipelineHash.fragHash.lightingConfig1 = regs[0x01C4u]; + pipelineHash.fragHash.alphaControl = regs[0x104]; // Blending and logic op pipelineHash.blendEnabled = (regs[PICA::InternalRegs::ColourOperation] & (1 << 8)) != 0; pipelineHash.colorWriteMask = colorMask; - u8 logicOp = 3; // Copy, which doesn't do anything + u8 logicOp = 3; // Copy if (pipelineHash.blendEnabled) { - pipelineHash.blendControl = regs[PICA::InternalRegs::BlendFunc]; + pipelineHash.blendControl = regs[PICA::InternalRegs::BlendFunc]; } else { - logicOp = Helpers::getBits<0, 4>(regs[PICA::InternalRegs::LogicOp]); + logicOp = Helpers::getBits<0, 4>(regs[PICA::InternalRegs::LogicOp]); } MTL::RenderPipelineState* pipeline = drawPipelineCache.get(pipelineHash); @@ -500,25 +508,25 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::spaninit(); bool doesClear = clearColor(renderPassDescriptor, colorRenderTarget->texture); - if (depthStencilRenderTarget) { - if (clearDepth(renderPassDescriptor, depthStencilRenderTarget->texture)) - doesClear = true; - if (depthStencilRenderTarget->format == DepthFmt::Depth24Stencil8) { - if (clearStencil(renderPassDescriptor, depthStencilRenderTarget->texture)) - doesClear = true; - } - } + if (depthStencilRenderTarget) { + if (clearDepth(renderPassDescriptor, depthStencilRenderTarget->texture)) doesClear = true; + if (depthStencilRenderTarget->format == DepthFmt::Depth24Stencil8) { + if (clearStencil(renderPassDescriptor, depthStencilRenderTarget->texture)) doesClear = true; + } + } - nextRenderPassName = "Draw vertices"; - beginRenderPassIfNeeded(renderPassDescriptor, doesClear, colorRenderTarget->texture, (depthStencilRenderTarget ? depthStencilRenderTarget->texture : nullptr)); + nextRenderPassName = "Draw vertices"; + beginRenderPassIfNeeded( + renderPassDescriptor, doesClear, colorRenderTarget->texture, (depthStencilRenderTarget ? depthStencilRenderTarget->texture : nullptr) + ); // Update the LUT texture if necessary if (gpu.lightingLUTDirty) { updateLightingLUT(renderCommandEncoder); } if (gpu.fogLUTDirty) { - updateFogLUT(renderCommandEncoder); - } + updateFogLUT(renderCommandEncoder); + } renderCommandEncoder->setRenderPipelineState(pipeline); renderCommandEncoder->setDepthStencilState(depthStencilState); @@ -526,7 +534,7 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::spansetVertexBytes(vertices.data(), vertices.size_bytes(), VERTEX_BUFFER_BINDING_INDEX); } else { - Metal::BufferHandle buffer = vertexBufferCache.get(vertices.data(), vertices.size_bytes()); + Metal::BufferHandle buffer = vertexBufferCache.get(vertices.data(), vertices.size_bytes()); renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, VERTEX_BUFFER_BINDING_INDEX); } @@ -541,20 +549,20 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::span(constantColor); - const u8 b = Helpers::getBits<16, 8>(constantColor); - const u8 a = Helpers::getBits<24, 8>(constantColor); + u32 constantColor = regs[PICA::InternalRegs::BlendColour]; + const u8 r = constantColor & 0xff; + const u8 g = Helpers::getBits<8, 8>(constantColor); + const u8 b = Helpers::getBits<16, 8>(constantColor); + const u8 a = Helpers::getBits<24, 8>(constantColor); - renderCommandEncoder->setBlendColor(r / 255.0f, g / 255.0f, b / 255.0f, a / 255.0f); + renderCommandEncoder->setBlendColor(r / 255.0f, g / 255.0f, b / 255.0f, a / 255.0f); } // Stencil reference if (stencilEnable) { - const s8 reference = s8(Helpers::getBits<16, 8>(depthStencilHash.stencilConfig)); // Signed reference value - renderCommandEncoder->setStencilReferenceValue(reference); - } + const s8 reference = s8(Helpers::getBits<16, 8>(depthStencilHash.stencilConfig)); // Signed reference value + renderCommandEncoder->setStencilReferenceValue(reference); + } // Bind resources setupTextureEnvState(renderCommandEncoder); @@ -563,6 +571,8 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::spansetFragmentBytes(®s[0x48], (0x200 - 0x48) * sizeof(regs[0]), 0); renderCommandEncoder->setVertexBytes(&depthUniforms, sizeof(depthUniforms), 2); renderCommandEncoder->setFragmentBytes(&logicOp, sizeof(logicOp), 2); + u32 lutSlices[2] = {lutLightingTexture->getCurrentIndex(), lutFogTexture->getCurrentIndex()}; + renderCommandEncoder->setFragmentBytes(&lutSlices, sizeof(lutSlices), 3); renderCommandEncoder->drawPrimitives(toMTLPrimitiveType(primType), NS::UInteger(0), NS::UInteger(vertices.size())); } @@ -575,11 +585,14 @@ void RendererMTL::screenshot(const std::string& name) { void RendererMTL::deinitGraphicsContext() { reset(); + delete lutLightingTexture; + delete lutFogTexture; + // Release copyToLutTexturePipeline->release(); displayPipeline->release(); defaultDepthStencilState->release(); - lutTexture->release(); + nullTexture->release(); linearSampler->release(); nearestSampler->release(); library->release(); @@ -607,10 +620,10 @@ std::optional RendererMTL::getColorRenderTarget( auto& colorBuffer = colorRenderTargetCache.add(sampleBuffer); - // Clear the color buffer - colorClearOps[colorBuffer.texture] = {0, 0, 0, 0}; + // Clear the color buffer + colorClearOps[colorBuffer.texture] = {0, 0, 0, 0}; - return colorBuffer; + return colorBuffer; } Metal::DepthStencilRenderTarget& RendererMTL::getDepthRenderTarget() { @@ -622,13 +635,13 @@ Metal::DepthStencilRenderTarget& RendererMTL::getDepthRenderTarget() { } else { auto& depthBuffer = depthStencilRenderTargetCache.add(sampleBuffer); - // Clear the depth buffer - depthClearOps[depthBuffer.texture] = 0.0f; - if (depthBuffer.format == DepthFmt::Depth24Stencil8) { - stencilClearOps[depthBuffer.texture] = 0; - } + // Clear the depth buffer + depthClearOps[depthBuffer.texture] = 0.0f; + if (depthBuffer.format == DepthFmt::Depth24Stencil8) { + stencilClearOps[depthBuffer.texture] = 0; + } - return depthBuffer; + return depthBuffer; } } @@ -683,7 +696,9 @@ void RendererMTL::bindTexturesToSlots(MTL::RenderCommandEncoder* encoder) { for (int i = 0; i < 3; i++) { if ((regs[PICA::InternalRegs::TexUnitCfg] & (1 << i)) == 0) { - continue; + encoder->setFragmentTexture(nullTexture, i); + encoder->setFragmentSamplerState(nearestSampler, i); + continue; } const size_t ioBase = ioBases[i]; @@ -701,42 +716,55 @@ void RendererMTL::bindTexturesToSlots(MTL::RenderCommandEncoder* encoder) { encoder->setFragmentTexture(tex.texture, i); encoder->setFragmentSamplerState(tex.sampler ? tex.sampler : nearestSampler, i); } else { - // TODO: bind a dummy texture? + // TODO: log } } - - // LUT texture - encoder->setFragmentTexture(lutTexture, 3); - encoder->setFragmentSamplerState(linearSampler, 3); } void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) { gpu.lightingLUTDirty = false; - std::array lightingLut = {0.0f}; - for (int i = 0; i < gpu.lightingLUT.size(); i += 2) { - uint64_t value = gpu.lightingLUT[i >> 1] & 0xFFF; - lightingLut[i] = (float)(value << 4) / 65535.0f; + std::array lightingLut; + + for (int i = 0; i < gpu.lightingLUT.size(); i++) { + uint64_t value = gpu.lightingLUT[i] & 0xFFF; + lightingLut[i] = (value << 4); } - //for (int i = 0; i < Lights::LUT_Count; i++) { - // lutTexture->replaceRegion(MTL::Region(0, 0, LIGHT_LUT_TEXTURE_WIDTH, 1), 0, i, u16_lightinglut.data() + LIGHT_LUT_TEXTURE_WIDTH * i, 0, 0); - //} + u32 index = lutLightingTexture->getNextIndex(); + lutLightingTexture->getTexture()->replaceRegion(MTL::Region(0, 0, LIGHTING_LUT_TEXTURE_WIDTH, Lights::LUT_Count), 0, index, lightingLut.data(), LIGHTING_LUT_TEXTURE_WIDTH * 2, 0); + /* + endRenderPass(); + + Metal::BufferHandle buffer = vertexBufferCache.get(lightingLut.data(), sizeof(lightingLut)); + + auto blitCommandEncoder = commandBuffer->blitCommandEncoder(); + blitCommandEncoder->copyFromBuffer(buffer.buffer, buffer.offset, LIGHT_LUT_TEXTURE_WIDTH * 2 * 4, 0, MTL::Size(LIGHT_LUT_TEXTURE_WIDTH, + Lights::LUT_Count, 1), lutLightingTexture, 0, 0, MTL::Origin(0, 0, 0)); + + blitCommandEncoder->endEncoding(); + */ + + /* renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline); - renderCommandEncoder->setDepthStencilState(defaultDepthStencilState); - renderCommandEncoder->setVertexTexture(lutTexture, 0); + renderCommandEncoder->setVertexTexture(lutLightingTexture, 0); Metal::BufferHandle buffer = vertexBufferCache.get(lightingLut.data(), sizeof(lightingLut)); renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0); u32 arrayOffset = 0; renderCommandEncoder->setVertexBytes(&arrayOffset, sizeof(u32), 1); - renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), GPU::LightingLutSize); + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypePoint, NS::UInteger(0), GPU::LightingLutSize); + + MTL::Resource* barrierResources[] = {lutLightingTexture}; + renderCommandEncoder->memoryBarrier(barrierResources, 1, MTL::RenderStageVertex, MTL::RenderStageFragment); + */ } void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) { gpu.fogLUTDirty = false; - std::array fogLut = {0.0f}; + + std::array fogLut = {0.0f}; for (int i = 0; i < fogLut.size(); i += 2) { const uint32_t value = gpu.fogLUT[i >> 1]; @@ -749,20 +777,31 @@ void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) { fogLut[i + 1] = fogDifference; } + u32 index = lutFogTexture->getNextIndex(); + lutFogTexture->getTexture()->replaceRegion(MTL::Region(0, 0, FOG_LUT_TEXTURE_WIDTH, 1), 0, index, fogLut.data(), 0, 0); + + /* renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline); renderCommandEncoder->setDepthStencilState(defaultDepthStencilState); - renderCommandEncoder->setVertexTexture(lutTexture, 0); - //Metal::BufferHandle buffer = vertexBufferCache.get(fogLut.data(), sizeof(fogLut)); - //renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0); + renderCommandEncoder->setVertexTexture(lutLightingTexture, 0); + // Metal::BufferHandle buffer = vertexBufferCache.get(fogLut.data(), sizeof(fogLut)); + // renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0); renderCommandEncoder->setVertexBytes(fogLut.data(), sizeof(fogLut), 0); u32 arrayOffset = (u32)Lights::LUT_Count; renderCommandEncoder->setVertexBytes(&arrayOffset, sizeof(u32), 1); - renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(128)); + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypePoint, NS::UInteger(0), NS::UInteger(128)); + + MTL::Resource* barrierResources[] = {lutLightingTexture}; + renderCommandEncoder->memoryBarrier(barrierResources, 1, MTL::RenderStageVertex, MTL::RenderStageFragment); + */ } -void RendererMTL::textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect& srcRect, const Math::Rect& destRect) { - nextRenderPassName = "Texture copy"; +void RendererMTL::textureCopyImpl( + Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect& srcRect, + const Math::Rect& destRect +) { + nextRenderPassName = "Texture copy"; MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); // TODO: clearColor sets the load action to load if it didn't find any clear, but that is unnecessary if we are doing a copy to the whole texture bool doesClear = clearColor(renderPassDescriptor, destFramebuffer.texture); @@ -775,8 +814,13 @@ void RendererMTL::textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Meta renderCommandEncoder->setRenderPipelineState(blitPipeline); // Viewport - renderCommandEncoder->setViewport(MTL::Viewport{double(destRect.left), double(destRect.bottom), double(destRect.right - destRect.left), double(destRect.top - destRect.bottom), 0.0, 1.0}); - float srcRectNDC[4] = {srcRect.left / (float)srcFramebuffer.size.u(), srcRect.bottom / (float)srcFramebuffer.size.v(), (srcRect.right - srcRect.left) / (float)srcFramebuffer.size.u(), (srcRect.top - srcRect.bottom) / (float)srcFramebuffer.size.v()}; + renderCommandEncoder->setViewport(MTL::Viewport{ + double(destRect.left), double(destRect.bottom), double(destRect.right - destRect.left), double(destRect.top - destRect.bottom), 0.0, 1.0 + }); + float srcRectNDC[4] = { + srcRect.left / (float)srcFramebuffer.size.u(), srcRect.bottom / (float)srcFramebuffer.size.v(), + (srcRect.right - srcRect.left) / (float)srcFramebuffer.size.u(), (srcRect.top - srcRect.bottom) / (float)srcFramebuffer.size.v() + }; // Bind resources renderCommandEncoder->setVertexBytes(&srcRectNDC, sizeof(srcRectNDC), 0); @@ -785,3 +829,26 @@ void RendererMTL::textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Meta renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); } + +void RendererMTL::beginRenderPassIfNeeded(MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture) { + createCommandBufferIfNeeded(); + + if (doesClears || !renderCommandEncoder || colorTexture != lastColorTexture || (depthTexture != lastDepthTexture && !(lastDepthTexture && !depthTexture))) { + endRenderPass(); + + renderCommandEncoder = commandBuffer->renderCommandEncoder(renderPassDescriptor); + renderCommandEncoder->setLabel(toNSString(nextRenderPassName)); + + // Bind persistent resources + + // LUT texture + renderCommandEncoder->setFragmentTexture(lutLightingTexture->getTexture(), 3); + renderCommandEncoder->setFragmentTexture(lutFogTexture->getTexture(), 4); + renderCommandEncoder->setFragmentSamplerState(linearSampler, 3); + + lastColorTexture = colorTexture; + lastDepthTexture = depthTexture; + } + + renderPassDescriptor->release(); +} diff --git a/src/host_shaders/metal_copy_to_lut_texture.metal b/src/host_shaders/metal_copy_to_lut_texture.metal index 40a7f50d..c21246f1 100644 --- a/src/host_shaders/metal_copy_to_lut_texture.metal +++ b/src/host_shaders/metal_copy_to_lut_texture.metal @@ -4,6 +4,6 @@ using namespace metal; constant ushort lutTextureWidth [[function_constant(0)]]; // The copy is done in a vertex shader instead of a compute kernel, since dispatching compute would require ending the render pass -vertex void vertexCopyToLutTexture(uint vid [[vertex_id]], texture2d out [[texture(0)]], constant float2* data [[buffer(0)]], constant uint& arrayOffset [[buffer(1)]]) { +vertex void vertexCopyToLutTexture(uint vid [[vertex_id]], texture2d out [[texture(0)]], device float2* data [[buffer(0)]], constant uint& arrayOffset [[buffer(1)]]) { out.write(float4(data[vid], 0.0, 0.0), uint2(vid % lutTextureWidth, arrayOffset + vid / lutTextureWidth)); } diff --git a/src/host_shaders/metal_shaders.metal b/src/host_shaders/metal_shaders.metal index 95f417c7..c2c1799f 100644 --- a/src/host_shaders/metal_shaders.metal +++ b/src/host_shaders/metal_shaders.metal @@ -406,13 +406,11 @@ uint4 performLogicOpU(LogicOp logicOp, uint4 s, uint4 d) { #define RG_LUT 5u #define RR_LUT 6u -#define FOG_INDEX 24 - -float lutLookup(texture2d texLut, uint lut, uint index) { - return texLut.read(uint2(index, lut)).r; +float lutLookup(texture2d_array texLut, uint slice, uint lut, uint index) { + return texLut.read(uint2(index, lut), slice).r; } -float lightLutLookup(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture2d texLut, uint environment_id, uint lut_id, uint light_id, float3 light_vector, float3 half_vector) { +float lightLutLookup(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture2d_array texLut, uint slice, uint environment_id, uint lut_id, uint light_id, float3 light_vector, float3 half_vector) { uint lut_index; int bit_in_config1; if (lut_id == SP_LUT) { @@ -498,12 +496,12 @@ float lightLutLookup(thread Globals& globals, thread DrawVertexOut& in, constant delta = abs(delta); } int index = int(clamp(floor(delta * 255.0), 0.f, 255.f)); - return lutLookup(texLut, lut_index, index) * scale; + return lutLookup(texLut, slice, lut_index, index) * scale; } else { // Range is [-1, 1] so we need to map it to [0, 1] int index = int(clamp(floor(delta * 128.0), -128.f, 127.f)); if (index < 0) index += 256; - return lutLookup(texLut, lut_index, index) * scale; + return lutLookup(texLut, slice, lut_index, index) * scale; } } @@ -515,7 +513,7 @@ float3 regToColor(uint reg) { } // Implements the following algorthm: https://mathb.in/26766 -void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture2d texLut, sampler linearSampler, thread float4& primaryColor, thread float4& secondaryColor) { +void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture2d_array texLut, uint slice, sampler linearSampler, thread float4& primaryColor, thread float4& secondaryColor) { // Quaternions describe a transformation from surface-local space to eye space. // In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1), // the tangent vector is (1,0,0), and the bitangent vector is (0,1,0). @@ -566,10 +564,10 @@ void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant Pi globals.GPUREG_LIGHTi_CONFIG = picaRegs.read(0x0149u + (lightId << 4u)); float lightDistance; - float3 lightPosition = normalize(float3( + float3 lightPosition = float3( decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u), decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u) - )); + ); // Positional Light if (extract_bits(globals.GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) { @@ -613,23 +611,23 @@ void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant Pi float delta = lightDistance * distanceAttenuationScale + distanceAttenuationBias; delta = clamp(delta, 0.0, 1.0); int index = int(clamp(floor(delta * 255.0), 0.0, 255.0)); - distanceAttenuation = lutLookup(texLut, 16u + lightId, index); + distanceAttenuation = lutLookup(texLut, slice, 16u + lightId, index); } - float spotlightAttenuation = lightLutLookup(globals, in, picaRegs, texLut, environmentId, SP_LUT, lightId, lightVector, halfVector); - float specular0Distribution = lightLutLookup(globals, in, picaRegs, texLut, environmentId, D0_LUT, lightId, lightVector, halfVector); - float specular1Distribution = lightLutLookup(globals, in, picaRegs, texLut, environmentId, D1_LUT, lightId, lightVector, halfVector); + float spotlightAttenuation = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, SP_LUT, lightId, lightVector, halfVector); + float specular0Distribution = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, D0_LUT, lightId, lightVector, halfVector); + float specular1Distribution = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, D1_LUT, lightId, lightVector, halfVector); float3 reflectedColor; - reflectedColor.r = lightLutLookup(globals, in, picaRegs, texLut, environmentId, RR_LUT, lightId, lightVector, halfVector); + reflectedColor.r = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, RR_LUT, lightId, lightVector, halfVector); if (isSamplerEnabled(environmentId, RG_LUT)) { - reflectedColor.g = lightLutLookup(globals, in, picaRegs, texLut, environmentId, RG_LUT, lightId, lightVector, halfVector); + reflectedColor.g = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, RG_LUT, lightId, lightVector, halfVector); } else { reflectedColor.g = reflectedColor.r; } if (isSamplerEnabled(environmentId, RB_LUT)) { - reflectedColor.b = lightLutLookup(globals, in, picaRegs, texLut, environmentId, RB_LUT, lightId, lightVector, halfVector); + reflectedColor.b = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, RB_LUT, lightId, lightVector, halfVector); } else { reflectedColor.b = reflectedColor.r; } @@ -655,7 +653,7 @@ void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant Pi float fresnelFactor; if (fresnelOutput1 == 1u || fresnelOutput2 == 1u) { - fresnelFactor = lightLutLookup(globals, in, picaRegs, texLut, environmentId, FR_LUT, lightId, lightVector, halfVector); + fresnelFactor = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, FR_LUT, lightId, lightVector, halfVector); } if (fresnelOutput1 == 1u) { @@ -676,9 +674,7 @@ float4 performLogicOp(LogicOp logicOp, float4 s, float4 d) { return as_type(performLogicOpU(logicOp, as_type(s), as_type(d))); } -fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[color(0)]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], - texture2d tex0 [[texture(0)]], texture2d tex1 [[texture(1)]], texture2d tex2 [[texture(2)]], texture2d texLut [[texture(3)]], - sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) { +fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[color(0)]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], constant uint2& lutSlices [[buffer(3)]], texture2d tex0 [[texture(0)]], texture2d tex1 [[texture(1)]], texture2d tex2 [[texture(2)]], texture2d_array texLightingLut [[texture(3)]], texture1d_array texFogLut [[texture(4)]], sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) { Globals globals; // HACK @@ -689,7 +685,7 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c globals.tevSources[0] = in.color; if (lightingEnabled) { - calcLighting(globals, in, picaRegs, texLut, linearSampler, globals.tevSources[1], globals.tevSources[2]); + calcLighting(globals, in, picaRegs, texLightingLut, lutSlices.x, linearSampler, globals.tevSources[1], globals.tevSources[2]); } else { globals.tevSources[1] = float4(0.0); globals.tevSources[2] = float4(0.0); @@ -729,13 +725,13 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c bool enable_fog = (textureEnvUpdateBuffer & 7u) == 5u; if (enable_fog) { - bool flip_depth = (textureEnvUpdateBuffer & (1u << 16)) != 0u; - float fog_index = flip_depth ? 1.0 - in.position.z : in.position.z; - fog_index *= 128.0; - float clamped_index = clamp(floor(fog_index), 0.0, 127.0); - float delta = fog_index - clamped_index; - float2 value = texLut.read(uint2(clamped_index, FOG_INDEX)).rg; - float fog_factor = clamp(value.r + value.g * delta, 0.0, 1.0); + bool flipDepth = (textureEnvUpdateBuffer & (1u << 16)) != 0u; + float fogIndex = flipDepth ? 1.0 - in.position.z : in.position.z; + fogIndex *= 128.0; + float clampedIndex = clamp(floor(fogIndex), 0.0, 127.0); + float delta = fogIndex - clampedIndex; + float2 value = texFogLut.read(clampedIndex, lutSlices.y).rg; + float fogFactor = clamp(value.r + value.g * delta, 0.0, 1.0); uint GPUREG_FOG_COLOR = picaRegs.read(0x00E1u); @@ -743,9 +739,9 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c float r = (GPUREG_FOG_COLOR & 0xFFu) / 255.0; float g = ((GPUREG_FOG_COLOR >> 8) & 0xFFu) / 255.0; float b = ((GPUREG_FOG_COLOR >> 16) & 0xFFu) / 255.0; - float3 fog_color = float3(r, g, b); + float3 fogColor = float3(r, g, b); - color.rgb = mix(fog_color, color.rgb, fog_factor); + color.rgb = mix(fogColor, color.rgb, fogFactor); } // Perform alpha test From 272c24d8e44c0a46bd1f63bf5d5a9d30bcc5a663 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Thu, 31 Oct 2024 13:45:05 +0100 Subject: [PATCH 08/16] don't bind resources unnecessarily --- CMakeLists.txt | 11 +++- include/renderer_mtl/mtl_command_encoder.hpp | 58 ++++++++++++++++++++ include/renderer_mtl/mtl_common.hpp | 6 ++ include/renderer_mtl/objc_helper.hpp | 2 +- include/renderer_mtl/renderer_mtl.hpp | 7 ++- src/core/renderer_mtl/renderer_mtl.cpp | 37 +++++++------ src/host_shaders/metal_blit.metal | 29 ++++++++++ src/host_shaders/metal_shaders.metal | 19 ------- 8 files changed, 128 insertions(+), 41 deletions(-) create mode 100644 include/renderer_mtl/mtl_command_encoder.hpp create mode 100644 include/renderer_mtl/mtl_common.hpp create mode 100644 src/host_shaders/metal_blit.metal diff --git a/CMakeLists.txt b/CMakeLists.txt index 854b9b9e..d25973c8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -483,6 +483,8 @@ if(ENABLE_METAL AND APPLE) include/renderer_mtl/mtl_texture.hpp include/renderer_mtl/mtl_vertex_buffer_cache.hpp include/renderer_mtl/mtl_lut_texture.hpp + include/renderer_mtl/mtl_command_encoder.hpp + include/renderer_mtl/mtl_common.hpp include/renderer_mtl/pica_to_mtl.hpp include/renderer_mtl/objc_helper.hpp ) @@ -494,7 +496,8 @@ if(ENABLE_METAL AND APPLE) src/core/renderer_mtl/mtl_lut_texture.cpp src/core/renderer_mtl/objc_helper.mm src/host_shaders/metal_shaders.metal - src/host_shaders/metal_copy_to_lut_texture.metal + src/host_shaders/metal_blit.metal + #src/host_shaders/metal_copy_to_lut_texture.metal ) set(HEADER_FILES ${HEADER_FILES} ${RENDERER_MTL_INCLUDE_FILES}) @@ -520,7 +523,8 @@ if(ENABLE_METAL AND APPLE) endfunction() add_metal_shader(metal_shaders) - add_metal_shader(metal_copy_to_lut_texture) + add_metal_shader(metal_blit) + #add_metal_shader(metal_copy_to_lut_texture) add_custom_target( compile_msl_shaders @@ -532,7 +536,8 @@ if(ENABLE_METAL AND APPLE) NAMESPACE RendererMTL WHENCE "src/host_shaders/" "src/host_shaders/metal_shaders.metallib" - "src/host_shaders/metal_copy_to_lut_texture.metallib" + "src/host_shaders/metal_blit.metallib" + #"src/host_shaders/metal_copy_to_lut_texture.metallib" ) add_dependencies(resources_renderer_mtl compile_msl_shaders) diff --git a/include/renderer_mtl/mtl_command_encoder.hpp b/include/renderer_mtl/mtl_command_encoder.hpp new file mode 100644 index 00000000..be66699d --- /dev/null +++ b/include/renderer_mtl/mtl_command_encoder.hpp @@ -0,0 +1,58 @@ +#pragma once + +#include + +namespace Metal { + +struct RenderState { + MTL::RenderPipelineState* renderPipelineState = nullptr; + MTL::DepthStencilState* depthStencilState = nullptr; + MTL::Texture* textures[3] = {nullptr}; + MTL::SamplerState* samplerStates[3] = {nullptr}; +}; + +class CommandEncoder { +public: + void newRenderCommandEncoder(MTL::RenderCommandEncoder* rce) { + renderCommandEncoder = rce; + + // Reset the render state + renderState = RenderState{}; + } + + // Resource binding + void setRenderPipelineState(MTL::RenderPipelineState* renderPipelineState) { + if (renderPipelineState != renderState.renderPipelineState) { + renderCommandEncoder->setRenderPipelineState(renderPipelineState); + renderState.renderPipelineState = renderPipelineState; + } + } + + void setDepthStencilState(MTL::DepthStencilState* depthStencilState) { + if (depthStencilState != renderState.depthStencilState) { + renderCommandEncoder->setDepthStencilState(depthStencilState); + renderState.depthStencilState = depthStencilState; + } + } + + void setFragmentTexture(MTL::Texture* texture, u32 index) { + if (texture != renderState.textures[index]) { + renderCommandEncoder->setFragmentTexture(texture, index); + renderState.textures[index] = texture; + } + } + + void setFragmentSamplerState(MTL::SamplerState* samplerState, u32 index) { + if (samplerState != renderState.samplerStates[index]) { + renderCommandEncoder->setFragmentSamplerState(samplerState, index); + renderState.samplerStates[index] = samplerState; + } + } + +private: + MTL::RenderCommandEncoder* renderCommandEncoder = nullptr; + + RenderState renderState; +}; + +} // namespace Metal diff --git a/include/renderer_mtl/mtl_common.hpp b/include/renderer_mtl/mtl_common.hpp new file mode 100644 index 00000000..a148520f --- /dev/null +++ b/include/renderer_mtl/mtl_common.hpp @@ -0,0 +1,6 @@ +#pragma once + +#include + +#define GET_HELPER_TEXTURE_BINDING(binding) (30 - binding) +#define GET_HELPER_SAMPLER_STATE_BINDING(binding) (15 - binding) diff --git a/include/renderer_mtl/objc_helper.hpp b/include/renderer_mtl/objc_helper.hpp index 91756d24..7d0e8646 100644 --- a/include/renderer_mtl/objc_helper.hpp +++ b/include/renderer_mtl/objc_helper.hpp @@ -2,7 +2,7 @@ #include -#include +#include "mtl_common.hpp" namespace Metal { diff --git a/include/renderer_mtl/renderer_mtl.hpp b/include/renderer_mtl/renderer_mtl.hpp index e28b63b4..6b356896 100644 --- a/include/renderer_mtl/renderer_mtl.hpp +++ b/include/renderer_mtl/renderer_mtl.hpp @@ -11,6 +11,7 @@ #include "mtl_depth_stencil_cache.hpp" #include "mtl_vertex_buffer_cache.hpp" #include "mtl_lut_texture.hpp" +#include "mtl_command_encoder.hpp" // HACK: use the OpenGL cache #include "../renderer_gl/surface_cache.hpp" @@ -46,6 +47,8 @@ class RendererMTL final : public Renderer { MTL::Device* device; MTL::CommandQueue* commandQueue; + Metal::CommandEncoder commandEncoder; + // Libraries MTL::Library* library; @@ -69,7 +72,7 @@ class RendererMTL final : public Renderer { // Pipelines MTL::RenderPipelineState* displayPipeline; - MTL::RenderPipelineState* copyToLutTexturePipeline; + //MTL::RenderPipelineState* copyToLutTexturePipeline; // Clears std::map colorClearOps; @@ -177,7 +180,7 @@ class RendererMTL final : public Renderer { Metal::DepthStencilRenderTarget& getDepthRenderTarget(); Metal::Texture& getTexture(Metal::Texture& tex); void setupTextureEnvState(MTL::RenderCommandEncoder* encoder); - void bindTexturesToSlots(MTL::RenderCommandEncoder* encoder); + void bindTexturesToSlots(); void updateLightingLUT(MTL::RenderCommandEncoder* encoder); void updateFogLUT(MTL::RenderCommandEncoder* encoder); void textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect& srcRect, const Math::Rect& destRect); diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp index bf2cdab1..8401eecb 100644 --- a/src/core/renderer_mtl/renderer_mtl.cpp +++ b/src/core/renderer_mtl/renderer_mtl.cpp @@ -165,7 +165,8 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { // Load shaders auto mtlResources = cmrc::RendererMTL::get_filesystem(); library = loadLibrary(device, mtlResources.open("metal_shaders.metallib")); - MTL::Library* copyToLutTextureLibrary = loadLibrary(device, mtlResources.open("metal_copy_to_lut_texture.metallib")); + MTL::Library* blitLibrary = loadLibrary(device, mtlResources.open("metal_blit.metallib")); + //MTL::Library* copyToLutTextureLibrary = loadLibrary(device, mtlResources.open("metal_copy_to_lut_texture.metallib")); // Display MTL::Function* vertexDisplayFunction = library->newFunction(NS::String::string("vertexDisplay", NS::ASCIIStringEncoding)); @@ -188,8 +189,8 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { fragmentDisplayFunction->release(); // Blit - MTL::Function* vertexBlitFunction = library->newFunction(NS::String::string("vertexBlit", NS::ASCIIStringEncoding)); - MTL::Function* fragmentBlitFunction = library->newFunction(NS::String::string("fragmentBlit", NS::ASCIIStringEncoding)); + MTL::Function* vertexBlitFunction = blitLibrary->newFunction(NS::String::string("vertexBlit", NS::ASCIIStringEncoding)); + MTL::Function* fragmentBlitFunction = blitLibrary->newFunction(NS::String::string("fragmentBlit", NS::ASCIIStringEncoding)); blitPipelineCache.set(device, vertexBlitFunction, fragmentBlitFunction); @@ -255,6 +256,7 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { drawPipelineCache.set(device, library, vertexDrawFunction, vertexDescriptor); // Copy to LUT texture + /* MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init(); constants->setConstantValue(&LIGHTING_LUT_TEXTURE_WIDTH, MTL::DataTypeUShort, NS::UInteger(0)); @@ -279,6 +281,7 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { } copyToLutTexturePipelineDescriptor->release(); vertexCopyToLutTextureFunction->release(); + */ // Depth stencil cache depthStencilCache.set(device); @@ -293,7 +296,8 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { depthStencilDescriptor->release(); // Release - copyToLutTextureLibrary->release(); + blitLibrary->release(); + //copyToLutTextureLibrary->release(); } void RendererMTL::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) { @@ -528,8 +532,8 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::spansetRenderPipelineState(pipeline); - renderCommandEncoder->setDepthStencilState(depthStencilState); + commandEncoder.setRenderPipelineState(pipeline); + commandEncoder.setDepthStencilState(depthStencilState); // If size is < 4KB, use inline vertex data, otherwise use a buffer if (vertices.size_bytes() < 4 * 1024) { renderCommandEncoder->setVertexBytes(vertices.data(), vertices.size_bytes(), VERTEX_BUFFER_BINDING_INDEX); @@ -566,7 +570,7 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::spansetVertexBytes(®s[0x48], (0x200 - 0x48) * sizeof(regs[0]), 0); renderCommandEncoder->setFragmentBytes(®s[0x48], (0x200 - 0x48) * sizeof(regs[0]), 0); renderCommandEncoder->setVertexBytes(&depthUniforms, sizeof(depthUniforms), 2); @@ -589,7 +593,7 @@ void RendererMTL::deinitGraphicsContext() { delete lutFogTexture; // Release - copyToLutTexturePipeline->release(); + //copyToLutTexturePipeline->release(); displayPipeline->release(); defaultDepthStencilState->release(); nullTexture->release(); @@ -687,7 +691,7 @@ void RendererMTL::setupTextureEnvState(MTL::RenderCommandEncoder* encoder) { encoder->setFragmentBytes(&envState, sizeof(envState), 1); } -void RendererMTL::bindTexturesToSlots(MTL::RenderCommandEncoder* encoder) { +void RendererMTL::bindTexturesToSlots() { static constexpr std::array ioBases = { PICA::InternalRegs::Tex0BorderColor, PICA::InternalRegs::Tex1BorderColor, @@ -696,8 +700,8 @@ void RendererMTL::bindTexturesToSlots(MTL::RenderCommandEncoder* encoder) { for (int i = 0; i < 3; i++) { if ((regs[PICA::InternalRegs::TexUnitCfg] & (1 << i)) == 0) { - encoder->setFragmentTexture(nullTexture, i); - encoder->setFragmentSamplerState(nearestSampler, i); + commandEncoder.setFragmentTexture(nullTexture, i); + commandEncoder.setFragmentSamplerState(nearestSampler, i); continue; } @@ -713,8 +717,8 @@ void RendererMTL::bindTexturesToSlots(MTL::RenderCommandEncoder* encoder) { if (addr != 0) [[likely]] { Metal::Texture targetTex(device, addr, static_cast(format), width, height, config); auto tex = getTexture(targetTex); - encoder->setFragmentTexture(tex.texture, i); - encoder->setFragmentSamplerState(tex.sampler ? tex.sampler : nearestSampler, i); + commandEncoder.setFragmentTexture(tex.texture, i); + commandEncoder.setFragmentSamplerState(tex.sampler ? tex.sampler : nearestSampler, i); } else { // TODO: log } @@ -811,7 +815,7 @@ void RendererMTL::textureCopyImpl( Metal::BlitPipelineHash hash{destFramebuffer.format, DepthFmt::Unknown1}; auto blitPipeline = blitPipelineCache.get(hash); - renderCommandEncoder->setRenderPipelineState(blitPipeline); + commandEncoder.setRenderPipelineState(blitPipeline); // Viewport renderCommandEncoder->setViewport(MTL::Viewport{ @@ -824,8 +828,8 @@ void RendererMTL::textureCopyImpl( // Bind resources renderCommandEncoder->setVertexBytes(&srcRectNDC, sizeof(srcRectNDC), 0); - renderCommandEncoder->setFragmentTexture(srcFramebuffer.texture, 0); - renderCommandEncoder->setFragmentSamplerState(nearestSampler, 0); + renderCommandEncoder->setFragmentTexture(srcFramebuffer.texture, GET_HELPER_TEXTURE_BINDING(0)); + renderCommandEncoder->setFragmentSamplerState(nearestSampler, GET_HELPER_SAMPLER_STATE_BINDING(0)); renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); } @@ -838,6 +842,7 @@ void RendererMTL::beginRenderPassIfNeeded(MTL::RenderPassDescriptor* renderPassD renderCommandEncoder = commandBuffer->renderCommandEncoder(renderPassDescriptor); renderCommandEncoder->setLabel(toNSString(nextRenderPassName)); + commandEncoder.newRenderCommandEncoder(renderCommandEncoder); // Bind persistent resources diff --git a/src/host_shaders/metal_blit.metal b/src/host_shaders/metal_blit.metal new file mode 100644 index 00000000..31b94ec4 --- /dev/null +++ b/src/host_shaders/metal_blit.metal @@ -0,0 +1,29 @@ +#include +using namespace metal; + +#define GET_HELPER_TEXTURE_BINDING(binding) (30 - binding) +#define GET_HELPER_SAMPLER_STATE_BINDING(binding) (15 - binding) + +struct BasicVertexOut { + float4 position [[position]]; + float2 uv; +}; + +struct NDCViewport { + float2 offset; + float2 scale; +}; + +vertex BasicVertexOut vertexBlit(uint vid [[vertex_id]], constant NDCViewport& viewport [[buffer(0)]]) { + BasicVertexOut out; + out.uv = float2((vid << 1) & 2, vid & 2); + out.position = float4(out.uv * 2.0 - 1.0, 0.0, 1.0); + out.position.y = -out.position.y; + out.uv = out.uv * viewport.scale + viewport.offset; + + return out; +} + +fragment float4 fragmentBlit(BasicVertexOut in [[stage_in]], texture2d tex [[texture(GET_HELPER_TEXTURE_BINDING(0))]], sampler samplr [[sampler(GET_HELPER_SAMPLER_STATE_BINDING(0))]]) { + return tex.sample(samplr, in.uv); +} diff --git a/src/host_shaders/metal_shaders.metal b/src/host_shaders/metal_shaders.metal index c2c1799f..18c310f7 100644 --- a/src/host_shaders/metal_shaders.metal +++ b/src/host_shaders/metal_shaders.metal @@ -32,25 +32,6 @@ fragment float4 fragmentDisplay(BasicVertexOut in [[stage_in]], texture2d return tex.sample(samplr, in.uv); } -struct NDCViewport { - float2 offset; - float2 scale; -}; - -vertex BasicVertexOut vertexBlit(uint vid [[vertex_id]], constant NDCViewport& viewport [[buffer(0)]]) { - BasicVertexOut out; - out.uv = float2((vid << 1) & 2, vid & 2); - out.position = float4(out.uv * 2.0 - 1.0, 0.0, 1.0); - out.position.y = -out.position.y; - out.uv = out.uv * viewport.scale + viewport.offset; - - return out; -} - -fragment float4 fragmentBlit(BasicVertexOut in [[stage_in]], texture2d tex [[texture(0)]], sampler samplr [[sampler(0)]]) { - return tex.sample(samplr, in.uv); -} - struct PicaRegs { uint regs[0x200 - 0x48]; From abe0709a81618deeda53845b59600f6b347f5a0a Mon Sep 17 00:00:00 2001 From: Samuliak Date: Fri, 1 Nov 2024 09:05:44 +0100 Subject: [PATCH 09/16] don't hardcode window size --- src/panda_sdl/frontend_sdl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/panda_sdl/frontend_sdl.cpp b/src/panda_sdl/frontend_sdl.cpp index 4bfa0087..1aa79e87 100644 --- a/src/panda_sdl/frontend_sdl.cpp +++ b/src/panda_sdl/frontend_sdl.cpp @@ -88,7 +88,7 @@ FrontendSDL::FrontendSDL() : keyboardMappings(InputMappings::defaultKeyboardMapp #ifdef PANDA3DS_ENABLE_METAL if (config.rendererType == RendererType::Metal) { - window = SDL_CreateWindow("Alber", SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, 400, 480, SDL_WINDOW_METAL | SDL_WINDOW_RESIZABLE); + window = SDL_CreateWindow("Alber", SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, windowWidth, windowHeight, SDL_WINDOW_METAL | SDL_WINDOW_RESIZABLE); if (window == nullptr) { Helpers::warn("Window creation failed: %s", SDL_GetError()); From 4cc62d487084d2686088058a623078027196272c Mon Sep 17 00:00:00 2001 From: Samuliak Date: Tue, 5 Nov 2024 20:01:20 +0100 Subject: [PATCH 10/16] use saved window position --- src/panda_sdl/frontend_sdl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/panda_sdl/frontend_sdl.cpp b/src/panda_sdl/frontend_sdl.cpp index 98978954..01f48acd 100644 --- a/src/panda_sdl/frontend_sdl.cpp +++ b/src/panda_sdl/frontend_sdl.cpp @@ -93,7 +93,7 @@ FrontendSDL::FrontendSDL() : keyboardMappings(InputMappings::defaultKeyboardMapp #ifdef PANDA3DS_ENABLE_METAL if (config.rendererType == RendererType::Metal) { - window = SDL_CreateWindow("Alber", SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, windowWidth, windowHeight, SDL_WINDOW_METAL | SDL_WINDOW_RESIZABLE); + window = SDL_CreateWindow(windowTitle, windowX, windowY, windowWidth, windowHeight, SDL_WINDOW_METAL | SDL_WINDOW_RESIZABLE); if (window == nullptr) { Helpers::warn("Window creation failed: %s", SDL_GetError()); From d481fdd6646f0647123b7aa278b8914685439b53 Mon Sep 17 00:00:00 2001 From: extherian Date: Wed, 6 Nov 2024 15:43:30 +0000 Subject: [PATCH 11/16] Add rsyn png and ico files for future use Extra icons for use with the hypothetical icon-swapping feature within the app that was mentioned on the discord. These files are based on the rsyn emote from the discord, flipped horizontally and edited in gimp to remove the blobs of colour from the corners. Although they are not visible on the discord emote, these are very noticeable when used as a desktop shortcut. rsyn.png could be used as an image within the app itself in place of rpog.png, which appears in the title bar, as well as for use as a desktop shortcut. windows_alt_icon could be used for a rainbow-coloured version of the existing .exe ico file, though swapping this out would be harder. Perhaps some future Panda3DS installer could swap it in place of the existing ico file during the installation process if desired. Resolution and aspect ratio are identical to the existing rpog.png and windows_icon images. --- docs/img/rsyn_icon.png | Bin 0 -> 30245 bytes docs/img/windows_alt_icon.ico | Bin 0 -> 55502 bytes 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 docs/img/rsyn_icon.png create mode 100644 docs/img/windows_alt_icon.ico diff --git a/docs/img/rsyn_icon.png b/docs/img/rsyn_icon.png new file mode 100644 index 0000000000000000000000000000000000000000..684c19b66b7279bafb2e3852dd9980e0bd09f276 GIT binary patch literal 30245 zcmeFYWl)^Wwl+*~2<{f#bq05LcTEB_!{F``Aovhmg9U=SySoHUAZQ4#!7Vtv*}?xzIt(Y-w>gxu7HV7h7Jb@hpD6}tNr}`{`tom75Vw~ z%$+Up`E9^Q*U(KHrNc7jqvy{s+Z;Jg;?HKnlns4?2kKZw0pC+KzE zT~xa3$&x^@o>8w0LF0E<>V5R?>dli$Kz{RKE8Ug8-X?jlcU_h|GZ%|ekUh)MXLtrP z2l40xyLLt5!VP(>qQ;n0>bD!DgIrH+#Xsm?VV4V!C<8?gJ4ltzTQb9b;C01o6T>U7 z@Nq)noTvky&8zaz=^sQNa2DcmzPukt`OJlB#fv8-?G5ZPgfIu?F<@=3WMYMI-7pw2 zuWQ07!NI)I_t9xkS4rve28Zz65 z>c3LudCTLky1&}qIpF}?q0>*fjhPVV^IEYc~S67O}K8qEW#w>!hQdwL!Wf`&#r#xNN_mx z`kt;)LWwitH5A~&{uv!9_YmbNc;zQq=pG9b3kgE&TlRej1?mC(cq`P8%0&Mh@k9*4 zz~+akJ)CQH{@f4aYt-EpWX{ST>GdeDkKkl9(u*S-H}m#52;N`u+!j&*yk1_E3w1R7 zcD{u(FD~06UtPsXW!C=PI@;DI;(K?cTGmDNk;SD!Y}*b44h~Dt`dMIxYO2BzM|)0? zxg!|L>1FTqtUNe4Q3)?65X2VhMh%8qT04l*9=CPTQd^sg(Hiin0o0sipjOt3J}yul zA9Y=bk1a&VoK`{{UDQkX8NeRu2BP+|w{vh6_7bE03s?C0^PgreTI#<{+-${Y4b?QM zWgK0g)V!R$oB$3vFKZ8OT5)u0Q5SOyVQpFYe?UB6iP2iQxj6}Qad~=ra(eP`I=WbL z0fmHwxB%Q-+}s?`796hL4sIYX4hL7dKM;Ro$UVL*(@1&;oFL(#nf3Wb(2bUMfi3`XH;Ig;p`gadkH#v`Ikbea9fAw(HeO{+r+E7#|R&4vH|$SWXV&JVEQ-~;l3ICw1tp&WuhUVaW<0U?MGA1{cH-yHmJP)ZK2ZXgE; z^bgcCIH&b9j)es`9}ke9mjf(dF2KRd1L5Wn0s@6Nz~;OlFgHJ+0DzzW-yk$xte;g0 zvitX_{y>>OL-FzOKmZ^ejXk{eh}y{D07IgyrYXf z=y^J=?Ln4ME++@ezdHU9E-a;~Bu2~43HYxTO*@dA#k0fn8nAXScl31quP$9{d#H{Z z=ntPj0Um$=FCRdVk4Hd=Pw?-k|3cJ*#1FM*9bX`j6+oy49W+lsU)^BnxtbK7#_dd4+*MVF0f#P*9i$ zD9p{n1^@^H{>|Re+}gtX|Ihl*@}U;}d(suHU7yGI{;TQlHKhY}{`>0hOFQeo785n~ zU&}%m1o=A!SC9wP{I58lvHtFYSb-cYq0jZ>AAG?3_=hK01(Q@#{mHf@^J9- z3q9+JkB6TFU;zXGAkUJ5aP#~px~rpwnKf96%t)-;_PG#>Yd;^*^5;fY$;55VGLm00D#m9K1pn&#LEv@^c76_ynL}AUB8? z__sj+Mc)65^!_M<@1N<3a{VcY|C~%wuK&+;|84MZ74(^wzuTTG^K+r+`d6j@2Vc*z z{!f1WLuUVzLr_!y?;!sze*c%Q|E24{#lU~d_UwcKU z!}ATE=krdm_h%r}^DYt@tfC+b_w?ucb4O9)a|?=_lA0XK?n^{GDpId?+2>OMYB(iX zDP6C{gLdz7=z{z5m5Hxl->YTaV?8|sgV=-ZuWGe&Hr0e$;HGuMI`h$A`tYRAh%&WN z^dADLBckZ#r(u{2>Z>6z!Ubm>%y@GH1zxN8Tx9JxxmL>ciq4c72bGnjN9ikWMXnQ+ z`j7KX>G{m^nsIPRy$A%$6$VWcuYNDu`t9F!HbD3DZbwla_10<))b0BR<{Yuk9mQE5 z_C3fUp;69+Zzd?{w|pAsuwI|nc=Oz7cKf-NzPrG^z80ZO%Bp%B@Qc$j#4}aqaVFXk ze436;@zc|w0NektuV`XPq}KrsYgiI{>^(7lTecU-eccrWrHp=NE4JO^JeceKXI9!= z(+B0JBqTVYS3jrc>1l7q4SKJ-ZtL$RHG0Aj4sMmKAEx2HfU%Dqy0#bhen0jFDpYke z3$l2s`=d$fRlxGA{3`-Jz<&mLb?kn-XUcB58Se63?65{*r8WFwMkuFAkeH7cj4qr4KW14Gm!>=sA5?KcQsV|z?6=r~@i!;bxV`E;@NX|98! zl%j`R6G4k650+vKdZCBIVnoAIoP_6G|J@|J)oCqT{PE=B>GCjR-(a-Yz#s0uK%%*H z<*8NY-rFnzf6!1mOk75>0E-!|jL1^isJbyJxORjA^?;12y?MoSzRTmVDEndc*l%?x zU>9ZOW`_D2o$H0;62#`-ph=rZU4gE`ydt4#|8&0>$SB&If{m*#CvkRYby}XJ# z1&)>;5`F2&pXNL}&)JeIRzk;=ke^r!Grmw;MdvBTAG5~ojhJVx-f=j?E5aXRLd9FeMMa2#nM2lhFG zv}M#vBI)-8$s?!`>>ox#Y`w@%?_Fx~kO}$q(9jy$J$WiuJTARXn_>4KO4B8dM!8-- zCIt@R#$O;wp1mAZME1IEDasHNJO2%`-;+l1{4Q&BV6z^bL8_Z*2dcc2O%& zr+qsPkJp}$UR-B}?GI!-li5wN;TR0XBQ9FLkG9Ly4nx1sFl1| zGrI1hF5*Kr^snsi$7y&(>lY>h&Ji2~w7@=hmk+jG%(PTS24z2_EGJx-MU~M|WpS=~ zUJ)E!Ib!ODpEQ`@_{w~j?d3L4m7Awp)dXn*I>E5GUY}zI1z3ch1R&c`pPbM#GnhuR zETJ8TfKlF%5ek;`m{>Y}*>zJhdEY?gKb_HeY(3+J;C6g}ldw$XmozleyJPD8bf+-= z^OVlu?fp_&VczzTLSDtsZ|1IbFNGr_4m>qRUM)wIn1l)9Y@pBJDm(-Ym|1B{tB#lN zn|VF!j)0WoA~vje&pt@}$MvTmo%K2zZR}lQo#E7VE-KV+Av+Y3Lw} z#MK4ty)f?@9xj5BSqaJn*_h=8F|z3pmWQtRKHi$%Tpf#Drbyg}`R{pVcY4;h++p(z z2vGf4T3Xh5;mDrcL}F4i5^?kHOLJvhd4Fb2auF2~eIh9@h+uG8hO$|xOKwE0Ayk>0 z%ohVUO4}s-BEI>+ki+6-Wt!sqmu2t1PSQ*7+H zF8EMg()K{YIIx8s9Ikx#8_$D>F^zy-{FT;#jqPyHrY2Lz%o2~LB1_LAa!1jY2BXB} zOrGQ>Hz}h5RrLrhDDJ*df$AZfO(2f%Vk1MoBDxqDgH&$7%noxo!S!>bYZ614;t`<) zS>DBWouTdD6Z0nCekAMr2IpHVZzF4*oKd2t{I6Ej4}ZAPI4J8}=eh77Wd zi08?Cg9v~7TCqw}Tga?j((jPAgQ^ntvVAx8o=zL`2!CX7Xtts%(4@P-LpW&OdrC(U z>5<^+PS7+0v}~GnS&8G#75&t zAtaeDg0kgTFeH;1imMm4n4dyfs%zf9GollJW3XUXknahEN$-ywhr{VpsGAu!6)cmr z<`k8(_UjWM$<(0h=_S^!wkxQrNqon!kUbQ8sLrhr4O3J`+eg%r;s0t4{+cytQI487 z?sG79olN+c{80Mz4LjhTgX(2`_P}r#Ui_q=ILU;O7sc`gV=`0MS#Xd2@7LGE5KFFr zp<{_BU+(P$R@$)gIB-suUd~yzBb@Cq8cVKz@w?zbSQAlP z?Jj+$p=>E%BM(WU^*$N!0}s(m$cL#Ye$<>k;cy$;FiB{rT`~?bFSLKwoXgzjNVM(F zM(aFw`+Pyee-pAJ?%LdSy;4yz5g~edf5>&U)Z+U4bcpIH;FV~l5Uw}+=4q|xh3Ifz z++%*#R!Zb37rc|8#ERqX_sr%#EVMCU)Yr(_Ij1pt#6}qL!5Jv?TfiCyNVcviDSA7>G2du5N>=DY=B&z2X;=Bl}GR%o>AQLp?$| zjyZq7y<)vDULR}TJ{Wd3efgtv7hxPn=#afg9tm|@*PlwqXqAMJ!3@L8)cKv)SaF5* zVtNw6Ow=Z=h6&O!g$$YUhz6-;H)*6CF8VJ4Au~DBu$WZ%2^M8AiLf09Y@9vCmEN+P z2jeHOGHB0B`a>E8zWRuttq#1j4)suo0Hq~)lr!w+-iMv|l|bb&_FUD^+mc}a-&fh! z6I5NN0ZkcC`E9=)pAIIoyndFMc65mR6k5x?OG!GHY4c_B-`{>b6QF84-kg!3rS7yN zYJCN#FFVx|>wlZ=`1A6t%fEt+-~T64XlMC)b=35dOiyf@SC1!4wNi0HVb*6O{aabM!=++? zlaqy`W0l6TNW<@#RtJUUnL&|J`>WY^4ZCdvXIIgD$9pMj!3LF-_Y+O;e##hh0&r$Y zHw(<1Oi3{H$XG=%>IH2ehKvK7Q3l1syy~e;qh6laTwxjM5y^2D;hO-b!vSLJqqf+N zL`eE(nr>Wp-~p>{5)C7=?XDilZNdbEbIXjByYN`ieO&)Aa%A2(q>(wk#vE=Ihbz8% zHYf--AQV|DqeGtqBa@}xjg|m&`2=f9V z_VeP->(}x{0iI3Ut6>A)0FCx0H^r((yY>ot6_JEHa%n=AWi_URW|3o_g&B zlHtZ)y{}g-*j{e4e7YA{d2qd=I>;~$_&L{=qn{URc70C1@$Je!s!3xz6Stw5Rf*1~ zTak6xfF=$guOviEOCoBfe3isKC@eie0s-b4n&q96cLxO2{|eZgZND=aoXuXcX>QiH zh=!TpNbBAd3Osz@ZCJUi=mK``1Uj~pg(8Fs_12&_(Y_WWYbIrj2~NNti@`1dhYx0T z9+NuSe?4%h5%XyGX6szXIq8TRcz>5GUQnV4hB194)NK-o}% z)l!|7%0i1JK@1!|oIH5sTCnVJx?&5$Xa1NZ1Q`lVW&%23+MyG`p{` zJq^TrE|GkYSQYH4K^O3CZbzjkgOuPTdb$ntTU zQdaYQ`pTbSRbG5Rq}LE#qr)8|n|n;0-<)gDr0Wr!M9WgC=_CC*w}mT|LxEj+q6Y3Q zHzlmZ>XjmWW$*$FjN=l6%GbD<>)_$+5CY^VmMX@bPHIDi1;){pZCGB+Zh7A89&x69 z1a#{NWNGU%Jp8`9V!$$F@_G?I@bWtvS&|ylW|B4Q=x6Bm^Myp^N^cG<{5=kj+KhYyVCE(kZc!^;UeFHq;`ov z7rb;C1P@;beJ(NQn30qyAGzaBNuo`s1+&U;ByPTw-D=b{Znv&+hys$6h{)TA!o6ir zJe-~iF?bvf-AP$1fb+UP9jb$WT2u?*x(KIo7%HLy`sLSzu2f-0eZr}I!5t~C8QJ3- zsY*dtW*4`>7uFHKnpiMW(8X3#J^y8}LWbZM^1T~|R>_xnyVFiYWy8^Fsn04ONT6e#@&FDa}0d^hB`0*M{>zw3>B z`bu{GG8$M;kePHVs^3k{>WE6(s>{CK&~y-GMi1crN^gbXWeK$Api*IR-IQKUMSR;G zcdC!06IEbDbFtO(MUn_as%$f<510jPg5Olx#x`UdTzry*UZq>e(VO+O{YcWWyYHbB&3h3g5Y$(x{Wh?%j|k}~>(%-DDg6XM z@In{g22XZp_bK>GT3wNqyZ|5pm`DieqAws#*txdJ$e}u+FZbdNcIFEjxVx9%wPeKiZu9KW2l!vWQXIf27xMub}(2JXRgW_hN^C0B0Dt=(CD1VP5PwJ$i%r! zlF59CZ*@5BWE_y)unCJ(1O|~so#xx^y5>3)*|O_FQE8+nboD~7QgD=SQHEJ`=CU#^ z`$dbgJr&jS+drEasftOJ<5-h`^QrU&$)$W4=b&u2>69Q5v}-URo^`n<+wGYF)L1ng^81m#l+^* zx&nm0DjvdWb`$xmk=oM5u9J~FiL(@JvGel{Uw(f>aPNnK$RtV;sbnO!!uJ`W zq%gx)lrT0jFU!7s@K;Q^n#A-%zRlvz@_wr?tQalLu^(kvm9qocKgbV|$V?#hn_&Wp zSLZ6w@@Q<)#xUL@F-ec1X$>%&kK3shVgPeVT9aTh`4t?rtNSdGqN}V7TOmBkQ8F(; z6ObqniLms67KdbgrLDBF)4O)_h9yf++mO1?C4K3X-w^c6VAp6tc@>!Qsx1m{gE-MZ z>T_=t+?6K8Xi!qhkche7Zs}yV|6zw)cGRs_Z6acS&LJU@CPenlTzwucI4N>+1w3y* zzw8uMrn2O^W0c90U)?>EA((+%X8~x)^@!UD>gyW~U`8M)DiAHPiCMWr>4JnkhJSLt ze*L{ADRg8pw_B-XI-0nbI+dcv#;TkN6OdY~qeoZaS>Gs~Umd_m=qH(fFpo33{`&3i0YLuAGwrAlT$^$7Hf}_$Z@h?QLTCwt23)4OWXv?io#B?`ifs ztdDWh(y{)?m6cAN4ySVwF2q*&tfFgB-W#-WlSy2)w0<_=7qguv9;MW=wYUa<%#o2e zEx5mXm$hIgClIUKur(4HPVuU{q$6w`Q-fbxk56z89Q8tnKL<#}0C{cZ>=uDzdOZav zIGHEPlCW}LxgypAWp@z6MU;*TDrZ=rA?_XUmaz}{GQ3ex?D4j{lqRHhG(K=de|ND5 zfvAp>#iSrXJCQ_MdA}8VR`5GPoegcGYvJ!lhD-pqZHz3>gwb%s2+`Ya3!FSfWv>v2 zU1wW8$Jwgl@;J>X!SBZX>cyEv{f-#F*k|&4e|3L-g(st6@j{kWqSt_Om}KLmQ+^{e zcmopU0mJ&SKca9rkPsyC)y(OKT*dIGeR&0HE??{;_x1- z1#1&3TP4_4MUigLkNuxs4OKpT<4pai*`$S9dO?#|mt%rq{=$(Bk>SeBSPDWrN+ubK z=iPY5eg{%hWp1cZgx`S!W@q!%hkhF!tlq_Ms6*O>Mivtg0{cpJh;S27Wk^zEOn5CD z%kxGV;ABFQ2oh036zxNz5-J$d+zmji?1Z!E4_~KmKM-K*v&*w;r>@--%}S%jCmFyD z!NUdZq&VEbOF^n#{g;LIZ$`95`;|4eLbmSBINXFXaog>`eEU3zcL5s!6q&N9b$m&i z&Uo$fNc8ek-Z!Ur^T2Gmur(Z8_QT@BO0hv{@aj>b$dc{Q0e=~2^R$63!EWEMV5+qx?XNJ1BCLM-LN zcp+BFc|^e$)7WSjT0aIOk;N#3#?~P3vymCluh8_?nk2Hq+aT_9=-HDJtkkn;zFsBA+OST-b5h{l{0KP&wOsAjmSGjvC~%7 zOHNY&Zt_w$_CgFfb_9QD&hgO_=9t90YDAba3I zw&%meq31KJvf%lbIinZBUqeHHSUX9rBdUWjjX};GM{wM!5zk8}Zk99zj63(kCY&h4 z@H2bh|aJ9Xrn<&aBF ze#Pg>7fLUj2^R!wPce-9v#e(bxTKN4!~@*CK9cfMgMp0$AL0_G8g;_NfgwWtMZK?5 z$@+8lP+{LY883K-UmOdZToUv^aKL0FVWe-GV&f$XM(VBS({8EWp`0Y-pfrPqG zPZJvLohvnaSme@YyZY(eUf7^0nGsSij5$x_CxLcPXbtl&@}&jBW#1UUYBuv>7H2*b zG=;ME?64y84;fyYwo_lHD}>Vrt`PzuU#oV_(^v$JU(kYn~ zYnoNQ)l-5IW}45qJqM2)%hw@Q?y$E#WE#Z!Fg{fV9VJ?PYLoKC1`}p#YdAgDmG>6! zwyGE6)}t!31CbXC;Xmbo!t#-5#&BN&Y-grSG^Y7CcjIWlek|@rNO@tc=_w`qqIs3v zZvuudZ4Iu+b~sNDw#dhP1pQpX(+TH#X=J+0xl61U`zO%m#Tv$6`Rb67etpFGIOOFE z4;d0{3NBGM`-!I8SUp4ZrE#g-k*GjMWj;QyKwx-`J3{Q?Ds?_qXK}{F4~yUorO?{Jcvk) zdWRN;+{9RAOxV^)%04<(btGO-+iq;Em9TLF#~>uL#vw>ggM;*#W1`9GgV!7mWC*8z zz9%ZjWdty0#NjB-S&>OtaK@1=iT=uYcQ-6c>#&49g!ePJrjez>zP2B$8z(GeP;ws= zd)b%C^QXH2*K(%w-kI9Fr~}pCa6{l4vzhVv1-yw&oB5y42laG~@-d&HfzcJkWi0b7 zgLbR;$g{^dYNO!c8jZ8ymCLZ9<)_owfOenVobVu6aoZu)bh~qFmV*PPTBNlNGRI4% z!8lrWgK9%&pdN1pWJyww@XUe`h5mgV5goEE;x}!LLo(hlM4)+Z+-@RHU5-75pw}r+ z_STZbb=#Q4jaD)L>VeG_<&@5(Jc?rJ{vE?ysQ34u5X*S}P%xh%2g+@-!r)$J_<%U!IajC9c{0zjo{5;id8yMpjYS2jgSKN#YHkM9}#qVWEG2AKRAM?o;V6010OM#ELMOcH7rSN-FExUn4aDfw(ZIg zahmL>;nt_8ex{hdi7N!ynzj|plxwrlVAXGg<(7H;`<*7v$xLW&EFjG#VT7AfTAN;dBDR!fGQla*vPYtyJv`rA zph-<1svH8SxB%&mc8*J5np=oxfbu-*@O1+mDRsmr{MACg1xvmNf+bd?pi>S+zMrbT zQQ)#Kn}As-=-JSCa~D9hEUUW7^A*&WURtmhYGTpAc{2797mxQtU!&L)wffApAuioC zQ5m?EWRQd6>*D%BWs?{<{2@E%lfb#922cM(_}kogew5su$ui&dn(**|o0zNn-r~x~ z#|loIM5Zzve0Ke`2NT!dSF!UyGin#~>$5%%Qo^o3w{@e+sYUQ{JU? zo+g_Y9sSGTayfcj_%wECr$A(!ZguBi6J3hJ99kWngQ#cVP4#LT%}JWXH4e&0)6bVN zcT~;c$4`$`n}NO@GySFKH`N4Ju^n zdUE?S0kZ6J>P%$mS}F%0tzh|aB@wc~C1I`&$TXt}*Vmsr(UzPj~hN0FtV>^pVtwjHO# z79XC#QZ4#gP8w1kcPqIis`uhvR*#}U!GL2#tyAm_f^5v^I0^VSyy`tbHOaxjF5mUTctvlyxaXNdA#M*S3EqHu(r~9N0j}z%J;aO7!XC&sq*8n7`fiZ@GLI)WzR9Y7@XYNstG1c^N`E(d) zEC+mJ-NW7zCSvk&RK7|M<)@2UkX|C#JhxxL!O!lvHtKNq+PB|#e@q~Kag{*4x+HY| zNki26USs9#BA~H(`F=ued@Rigka2$ha*kB(s@r&RgvTY0tQ$i%f4Hvc!4H1ArYChB zL6d{mg{WHZ3m#1`7%ADXk%@fCeNJqt-6dE<{Ek961B4?_uCwUtX=Hx{Se(yyP#y$o zhe`iDgp8oq8FfBh8p-3)l*;Jsxk}HvxmKl_vF7^dqo|1A1!{12`X>hORIX>9@R{C` zIdnQ(baT=8z@`}JEFvwfK_N_ZE|b6-xk1`^W@i@#EV0my#SvYO z`SPoqANDG^UNSNncd${m;b0QG+2`R#AYiiTK6GbtuSMc4s&G=LCsra$PtX7K(DCW| ze9knLBmLV&3;A5j^*Ry}P=^b?XxylfJVq*VGJv-d9W@F5Ok3Fy7(zX~IBk%&KG*;a zu11f8C~Ym1c?cA}Uz_kfX0)DkdE%a>ParX`{UY#gL%?;r^38CP78hSCqO;-3WEIEO zgk}q^ypYi(9%B!E-+R3N&%aVJU!}jA_CXMa^2Hjy6k#g77je8MTW{a_c`Q6R>!oUl z@ouy4MB;HHd;9oaKw@P12w5@sk`T-UbHx}<;QnQEhPPs>hY4for!01EWHwAlrtp2#AHy4K-HTw3EWa9P{=tc8Una%103N-6W8F=zjod*HdJ@Y`* z^F;&KxrJ$pAmlcjsbBfWMgw9D4GwnVE&&>V;$n5~0 zr3R#d5n0Zq{|;R(EfdjFfkZ+oU%G#aAL2q0mpLr-9%F#DQf!lwPi9S8yVnXCvhZ`E zuZ#`@0kj)uh$ui;8C9Ex?re`HIlX!SW9ML%o8Wg(%S+`V7B(516npxZB_Uq4bAWHU zEgH;)7o6qQu1aYq23>i#c0=qBHWz5ZY6~P6;GDAV>x*sAnictqiu#7Qx?)JgN;LF* z(v}~gR_g15}CTU#b~D@6o>K{u|%Uo~So zg~W~VZUZsee2(Oyq5}yKcF{*ADfQhVg=D&@3x>ZvX3o4b)L=Eb6<)Q|5UZijP$@|yW5d^FM9rfG zv;c#`d6|?qr;-uUDyB5VSTwd>eU3)6CVmgttQvFp{#xqWo7$cFRBgaqe^P#ETdd5B_cgH>GHGj6d@-Hj z(T(yhx#GU*A0^!2O+Sun&s2YKY7Y|c{f5W+)`0Vkn3<5Io)N#vXxcQsy5mPQutaw; zvLgowW~g)jA&*ji1{QEk-W4Ca>=lM7B)zb`zGX5mzP^~UOj1gwuI}?TV3M=S-|S=j zl`10>={jlLj z->BViFwI&g@gQm^c>0vTKF08ph44@e8@i*?X|D;aZy1=L8rs1r@dHEp&bmhnRR_0j zYGPqPb_0gjT%D-T10}r7ep4e#_HCSPduZg;*?bp2(K;v175R7sMOAC}E(*C%uutF6 zvC|J-@HZ=+ewGF^TJECiCav<)*WPtA<2dmz0%24?RQZRav z;@9=*)73WE9jK=rv&~f!1(ilBs4g+;VLB+z(}1SBzZ6=G>SeZWP{vgG2Z>O5k$P&t zd)SVd-u!l4Uo78g+2N|@X=p!cs50A;RVkG>w+C3@m)hG{<`9-Mhliu6G7@$!GH;KT z4v*>EMhV^*2Q>#R>L@52akQwol|g=AnVJFH2HbR|sD#=Z%${y(47jKO*&>XWoS+%! zz228mLui?`zB?p&)Y|b-S$_O#6Eg)8pLcDc*AYfP+75`bf_|5E-IiSzBz`ip&3>0(1d6c__6I(kRiMxLT0Lga{_I6F1Ff4YuAvqbr1#AOrMYKuTif*ZzH z)@;Z~i$JSlsc565iu3v zYZd{GIg5gJ*f{*Z=hxDlhC#8xXwnO7Cs7ebn?MkwB#Cb|ll&1G4xKz6wUnproE-!YY5mhKt;+>vUd(S1{@VXEFie-ifj!4qp_qAtF*Kwnk;o)zQxSd=`f~(7 zk-@P>-)zi$IL}G+i8h!$^ueRoDO0=TIUBGIU#f#NgNI0j8+k|{E@Q+>*Ugg2yCc(p zFnP_qUC-T%HZ#{+DP{X3K`p5439d>kwcIV<|tZl;^W_xkJ#3^au-p5D>UeI7}ui;58yMD6Z+A>9xT; zH<)C`KA2(%SqcJBGEU+k%jy;wJ93MOOg5;Ly;~b~q;V%3DC2Vgo7gM~ID_cA28+%EMa?zc*MBa+)bA?QlBiQ~mCE zZFlJ&p54Ef`HgaFd{P$DVEXv$^J?fn5j^|z&A*fVZd|>Obkw}$)?FMRV0kN^KRgr{ zO?vDB*u!J|Q6%JtLnF#5en%^DT}cIcq$YTX+DeNPbW$w{o4TF_yW+IQ2;L%ucn>?d zCGKg`_`UqzQx)W*M9dFwfdVW|mE9AC9(NuZlV$0*KY(+Vy7CNRnhZ@Xk>`PEUtNX%>5QkeOS}kXo-^U)M;hGoT7bW9ph(C-zUE@<_ zX1u+xvTNwkMX({fiW_N{6@33{-?x%a1eXC=U|U4CKk*=nk0H#G=PP`f*uuZPPH|<>}OgSk#(j|5V%9V$`wV!x|G{#&we+M&~ODa z#>*cr1dtPeL>SD8k$`6_cEb4Numq&8aEE3$qOvW0(;^~)RO{zdXxLh$AfOPT1q(;p zBsctMbMqJ)o>E|;s?WvaEgs-|x0R=f1=pjXOl6xX`~k4|30@yuZYHW6jec*Dl_bFw za=pv*oAh$$4lckN>m;M=#{1@CTI0MRvhUnzDQ65mq#XZRJQ-E@b6nPdVKj$|vTcuG zJfg<6x()#4lgU|NHzRVw3nS0S5A3^#$EyYwq(w1zxZw_W8af+`l}Pk-)e&nIT>dE+ zzdrVip4em2c87?|P4!&)CHN!vC&^_$KQXG9MF`B?n?M z_(8*{d0JBlCT63Ebm(!Eg;6K>j5^~9L(BNIAAtAWZS${xY{yF6i4wnEp&=#8f1^?> z@wu%4pg^2sEKEN(+ReojtUSB6sXmrHUp1Bfc70+t=I{_x9sZ!!wyT5{?(@uTwfET< zrc17;UrhexVia@;eNLG;u}GIJ3iep=Yd%YfK3(h#g#b?GdRdKb%J=MFj5_?D|4F0@ z$PKX|!h9LNRBjwElP*aTR77!;|AUmy94Ui**5PgNn)o#fgV4+`f}85ks>m<^J!+Yv z24q^?dlAIWK8ixQdNj~9+>}ktMpJ3DUMiV#l!^zQ5>*^GSM#FYhwaJmG9;Bb0vDIM zV+l)*6&l_dxD?Q#lA&^CRvU9j%Q9~n_M=W728HGfGC{*f%i?Ag7&DYt?qr$t*%XJq~*^_?hO&Tt*4Qb6(#Yt`rdEm1oo5jLSZ z^Y#|v$Id1fWrLz$kDRRJ?{U1MkuJ=#>pN6liLton`{pUZWHm)j(uJl=fC6Y(_nY&o7cB5v4-y=AvVM2?)LW zStv;?G@{Nt=N*{JY#Gv9v_4e+)^uQ#GRq0yp!MVQn;$virhSZeu+K5-pD0vmc@2)2 z8a}DQ4y8P7GZ&tk^s{gBOt1F>o)!pgyFh1Gm=5OIZ7Q_SH9gH8MLO?C6tPx>Rz#guHRY#eIaAt~`Mpi9{}C6KXGMZtX@5rqJr6fax^Qu3SATUbBW6ULySh&0+ws~KLcZGTk_25EhoCc@ zQ2CEYvvtf8PVoh~5x3CsLu4>~5+dOV`Y^uYof!i5OgBdWV8Z03XcwAb4u56W8O}9$-HBbIQNSH}!Q=5m#7!Y(O4hst%IcsUN#`LT%ue|! ztObweC3mvZ+N^!$;dyMp>CV$o)v|Aldg@sQe$8Vm-1VyKkvVURzxW<@ zjopD5c>cXdi@_IfPi6Cp*neVK@@}&1jv+}3HS(5m#88P3XJvo0^=$FGQJZbVkI5Dq zKU)*7un7Hdj1QL=&qk#nBa_AY>&24ifzI|>V zJDsDjMIRIISLs$_KN{bX)8!h)Qu8V@ECshJ(6b!OQ7gQ!mo$PQU@yv$9MVnW{UEPy zbC2E&R=HMQ3wKYQ)@196)LaA)lFA1%6V4JFCg;4P`nu^&m7~KP?v*R4h~NWLKo*AX z96>#19pa0Sa!iX>X6z0NUbnTN7=xxW7lxU#t(|0N7TOxs%6Mh5DJ?C(HZ<`EcQs7& zJ*^A8eZ=f?uY)p9)vweuOwKU{vc>QM4!y~J|4#stDs9z9uQw{e(Od>JB8e_?iVQ}5 z*UXU<04OcpabT?rfFgJ3^8^ef4nk5bxO?*!Yqu^?jU(#QF%KL+%GAs(v#p3q;0UwY z-W%)`C?lyTXG>T08=N}%I7?GA?5<67{Mp@t)T zt074+qk$u4gccb%KnEGz8;Y1DdKNqnWfWU{|CK|o9y=!<_@1B^5RYg znyDb~3WnQB|DKuFw<}2fmo|esoo$_@1~cv=h*-b=I?GE-tlt#m#h6+xqFM_O!a7RR zr1|3s*BdaUJNk5*u(Ni9jhh!~Z{4Eb?chaqjy_OjwiYnl9^gfSpzzUAh%#UjutS4N zBqo8PyH6}fj0c`tH5U)k%!Bl{k5m+Klyi7`t}|Y|D6WF=?fPu zz%bj5WLamvR%flI0!$#yZcc!@-eO4_U2tX_SC z-L02-^R4ITckfab1AJ2Uw^zA!?Nz?#^bb>~AT}ATxQ-|su;hmxUR9z+M(8Pu;TXSF z#S|GbDG_-FX+pNYOOOu`bF;{5ovm@o_Qp23PAJAb78((co;kvJFu)%S@U29I4L1>t zKt++$vr`JBj~6=0eNRa=f-EZtm4wP1{**!QPmGtsg0PXXb@d9;XnyDipWx=*ee7%3 zxUn|k=IRb{920pk&I{^s#HcjX!zx!-d$wPlPigZVc@oIK&|mEeGj$G4H<|J!k+tMwLwgr?cQox?&F*eazh@bbCF7x?+wD8HZke;ZILnz+ zM|kz6KjAB1{3BMcooC7$Q}j0Z^5;L#`pq5m5d2bL6DWoWJL^}tef7(H_4$9r{_c5n zu?=>BAV-u5-OXFP`N}hlIyc!~zeq6}5IT4O&-XBSiJT~l`;{0xH?hMwA>Zvl<^&=K zyZe-P*YJXnicrhxf zA`Ik(b;zTLQO!jN8kD(&xW(d;W0a-k)~!2?MhR9x7>bGdxs&P-eMMNSQdAnOj@T~k z001BWNklkwjyV+l41kKIKvnwldZ)5A1tWiI1s5$Tv#6foOt5C$P)ei14Ss7yl;Ve;Iy{&9ei5dtZF2&-Tu6a^~q zQA%TUHj!u7AT&j9mty-icF<*Sx6kVO4xL_(6&`WDPT)y&mQwQt(^a3={48SOAa@cY z2D0+egJ%x@PfvgEkDD|9`>)%lo_Yt8Kx?f)2=mn3TvcTH@1(;NlZ`1;Lq6=#iW}U# z@FKVG+@Q#N96E8DwqK{Wv&YPV2gpm0YNL*n9%ZIco+Pf+nU0sZ`O2%j_4-#4B#g%c z9(wnC=v#$1&|K#-ogQlo58@Q0qcGfS}?~KU@}Ag$Bu?6JMqXH-a3*kI%CD!I1AVTEQCG2f^%Hx)5GKgV00@kU=Y zKK8eN``kbJTJ`qr9|2#t13dNAQ;3xhdMp3%T>9QaNBzNI{3D}rhSkvTbeNxRaN&)w zqQ`9%WAffETi4%U{n{m_XBHWZGeXZ}*y~f~8FQ@$o-Jr^Tw(v#^JK#{Zronw-}-Tf=g=;ZNhcF2#zzY!4$0j3eHgu;UMTByW6!`_(r zSs#)22xXQ1jXNwJILPwS9JUN}nPh zaQD_F`kiY$aN>T({WUh%FQdy5fe;kdB9+AVL$WeO0?kT;TG*gwLzIy$FCQTEEL+<* z=nnTdfBAWi9+>0)6Gu6b9$}_gqr1OOD04#XaQHXQzrwNQS?Y}%X?Mi-ew&%)1H5tR zbqWRR`yH;{S>va__YBe|WJ!Y8>!BJQ*ZPeoldgct3JRH^ycDm}M8qNG_6|Z>gqlF4 zc@JsEU`sqt;6*VytZ?J{McVzG&D}0pDQVWHh($>#pji!>ZZ_FZvyoq)zOu5i;(hd^ zAJt;zJNF{`1_AJ_0T`6!OSy=X#?)M#?e0z?MJcR$3Ptu|M0-eIaS!}NjU zbhfuSapo8YkKd2-eX`*?{p~vxgS+glUS#w3W#;D&Ff~2HAO7K|@dJU7f^uT@(rm=6 zZ(buAz*NoP2j!$w=n+K$MOL`x4+c+2d@rC?ou(DFU|={jcZB;-oZ;5>H(5HoOjt`f z|H6x$U$r#fYVyd5hk4&4k8$Jbc@~-x$7Wi1GU3kdCa+$2fm4f%EH6&;+*h7w>BM1X zX6IR3+rj#f>5R4Q9qxbRG*n}Z@G!$ZLWYQ<22sUHy+6f67M=D!3$s)7I@`SX#Xn^8&Kt;lkEC;p_Vx|p&}V*jhG(ArJk}OyGPEsG!lM#5 zsMnkHdjm|7GCSR%v$=^a6XJ?xob*YH458#iXj0OOW_aMxgUm&99G*MOfz~|j?Ja~T z5q^OmLpN(PmKlS5L{zC0RU>kf@az}<6faVkKqGw33$MJ$PIsSLtInP6yL84w;zpD9 z-Y!)S4$n=|kcNuq5zQUIdNC*;8CBo5t`R~y74k4Z#WAYX^xJ!=Y={a3WoLu_ol8V! zj2(BlaQ@5OeEq9T`99Y#-QtzkuW@H%k3r%b%BG`$sxnMP9+g1SY}6P@Pu zsS*1U5l8fU1BPkhq++clkRG8QB1^@o#RoXrI!V>U_(HO|bq8G}OwZ1fje6*$B(j=T zqr%7(ynOvd);c$M-RT%18_3xz=|jr0mkzk|R)6gz^XDPiO|1|n+EZ26SkP4xCQp6*k#16I#} zh3Qtr>E&r|jN5$a*%z6cKS~hR@%<5v2&A@5SF2Q|Gl(hD0%Z*gv(v??vyTEYNqgqiUd4eY-g(+#&8USo>-K8uop)V=2 z5v}?x$CnRr@#ahHwl_darW><(NGL2P7EUo6FH-gsgf6KBK5N^nD9#1 zo;l1bm+r8-+oLd+(D$hZ5@ig5m{|6C3Y(5mw#2hu=NEtTH;Z@5gHV2>FP=MnT6}0_ zrL4!%ud1+uu|6k`Kgd*biffl&rMq(rDN>qsA0-p6Tz&;z_K0IpGNUN^tgT(4+u7#S ziBq&1E!Ngn2||muDW0_W$|Ljx8kLYGOi$ObHmBR^5ymq#s)snTcnVd7M8rfY zCLmyOW{D#U$MAEXg~}`o^%-iiLL-`@%q<2FYXxBtQfoHQ6y!2vRCEb~k|G}>D~hWd zw|U{s^W6WQ$5`w2=mB57asyeb(`+?qRs#-9)tCx=r+^Vgh#-VY1tS$jrm=}ZMRjBt zyP*a=R2boj65a1$Mk5F+gbRmB5<{6=R@d8n<_oX$-0Qdb-REB8h3h-44+^@eV3Y}@ z7a=F=X%z%jp|~gcATquBHhJeA$N|1#06h8RliIGV$bb0%{Kg-arnquod70sG$kj`i z5jr9AEny(=L%}%dW635-okj?Y9|*!oF&uUn4SHO>_!gF8B86T~#=#`Bty!!tkfxY8 za|jMDF3{QCC6J0Js&HifD6y$QDo0u;S*$OSwNsMLkg2#%HEgo9a0Fk5 zL@Fc{5uWss)>*#UvLGFfs7DoA%{oR220Gz|i{~jS0lD-@5VV0id;1(Zbc8a^SZYnv zs8%K))no>)(D+kzloAxlm~3+k-R`)aIKZ$E$q;2yq|Tw&g+Y(z%q*i(PMTTv+GDQV z*~Z2##>!)FXy^_zk{rf~!3c>bBw-NHn8>`FqQqCEf0n%ST?qhyr&d<@KUY>F;pu!d zYLlcrq7W*6NF}P`37;7|5XuQcD?h>*g(sbbiqJXr z*ryT1_*M~=Q1uj-FP|qFb(pTtFcr=-TRq6b$KKEV(~q$19j1}jY04@hgDkh~^%F8J znXb=Lu_5EmkXl%$R;lCT;UPG=aFp5V453${EDXbdZmsRp8l~SBZWfw5yQcVvee!?>K*)7 zxmo`0{r$St*?~$ZSXi8)Ec)c*g1ksc)0EN}q<}2X5JEVw0Bcc7IJq??SlzgZ5DF`t z;D46o&a}YuD3Tn{v(B>zpQYJDEKVO{+%MVPPf12&;@M@6&fmxM!Fi&*!UN6Iyyw_M zG|GrlHSniu6vHt(w@lZXcv!5?Xase1WQel}iQxYFF`hX6K5XeBR6vqvlu~1b$7@$E z@aV}0s1TA!&Bi!mb#I@lmGnwMzqA}^)*1HuC=FF5Axj-^UkSoEaFZ;>h@zb$XIiK{ zA?fa6(jKE>mr7h=r?bz^^>uV@nk!dt(a#NDATXt)&D9$Xvf+>*iph$EQIfFO3NZq- z@EGNqyeQVb>j5ByuxHPnRloLYzqb3ge(slksmw>e+u7d}Rw}d=l*Tnzkit281|E9C z2PjR66&j;0T9?jvL3vnd@h987r#w=f5PK1!@7CvhW0s@S%giEHdb7?Cz5n}J@Mh_3?K3r3p)?)Vc5bs!nPILuN3Xp_mJ8;hCV8AQ-W(7}j}JWZ zKEh1W=?&Q(4BT8|q#!fwY;BP(EOYYUVP0OljseEfr<3L+8(ZYc`5jJIe6m3oQ|9DW zQf*G*#UWnZkt9r(ktYeh%pr)$dtEMHzsOF%&#k>K+rxy;V92N}+3XY)8sb0^D22{a zs#8s@=g2qHb4{dY-qtmeDk9YagzA3R0|4OMxpVsIvrnrhKk?)z|I;7;*-zD{YJX$6 zJ*Ge)J%urn(&Q-5qcj>TK>CVcGBT`fPF9vqv|!Q~H@d+0J%T_XFpS1y0$ZmV)ljC& zOnsTt#~!9EBkrzegtE?J^B@nOe2ngR4>^JZ^(FkiV5v5btu)x$PYG?r!gLFvHA&$U z_=>ouxII{9{ni>s7Y}i0@c`?$H?WPEOgpRjyd2_q6o&2X4Njdp#mj5g2=Un+ri{ip zxB4Asmlv2uFgHCfR)lPUmXC@e_fkywErRVn}pd04ozF(k;A9Cbo~M!eE$y;DIaSx zB0Ng1kS3s2on@xdq}$%3CuL68SG`O_=OMuz)$=)jYi|w z(==t!9+Bl5t&Q6SqyY5WBjLI%@sy)?XU1aWL?Fo+rxvaxSy@mTL#s8zu><#U-?1ln z@W>-f2Q3=48BU*o;l_*jS`w6&*{H?Q`8h-fWZ*l3s*#AoV`+LBsWYbhIwEe6dL!Cm z$ep{ldH-XN)2dBzWA!Fi*KYCN#~vb70om0AU6yFvHk*_=JDq)w99pKkwaZSYOS4(0 z2fVhv!`;9q%nGJg*I1sNre2H4vJ9gIy*q0hIJm%Tt1nZp)+mkP%IX@=+`NO0D(vT$ zPBvgOLjrd6Aw5_yaV`}lqpLX969zH;FTi_7yIIx`RGxlB2a{Fc`5ozsT15He+kZ#zQ*5*b0VONkw{WrzN*H_K8D9B?t-d z$uhxP7gjNV9}O6d3amGo6i>z^1r+L@U9Lh}<3u=%fCX5{5oPXEb7; z$Y%J$rVg7zwW#HEhjO?Bf7Ap#!{a$NohH9mb?}d0lgq$#J zq~{})kG2AbCM&#&P{i9igQHDQXiZw=loC4QA=~?H-g@H#-F6#4QdFu?uP7>k&%$Cz zeI_Ib1x2rz2muOok%O|-t5t-R#6e8GUZYx#+3)VMv%kp$XCC0j-CJC`evNcI;{FrI zX*DYN#<6X6;d=9hDXBFYbh}-~qp_2>t5)bG2?@|Muvb{N(vocr>xJdUu;B9kkd3@# zGc$CIWZz0QQ_Y@*{Q~+HO3z0Ik~DV=bZHGy5E7|-&S~1Ykof_jAE0%Pkd{j1;|Ge` z^c;!sX^(X#Yg7Lm;5oiC8rZiS02k&?TY<1a7_8^Iq_l>#D9MU^GMBEfMo^Z<86jJV zQUOAGPC?XjzQ009gj87Rf?$-T*dU@a8c}aeV+52oNBR)DL2G)wNzZEp6b6$S(2~+< z!b%0@`=~(T*D8qE!vvQ7{x&o7GqeY7u58_*oep{M*eM=4eu}9epz2A;GEDA#czeSk z)nKBvLT|_6%%}M(8-CKp>P)UK*Fldy+)MsL&Yc z6NNs$5_kwAA*e{7R;@vqS^@%UDxe+&RHKM28Dn)pAw1S{xV%52UFc8z{O_DwfBNiM zwX(8ezUx)M*Ub!`J8J}Bef2r%>jBc%krw4C^4z#mZ(>Lo`hoM=D<*UB(v^ln3M9@R z-3sS91cLr>K$Q`}SvI?sxZY zd!eLKuQ*dh;T$l8P;S*Lk1W@2mQ6yMXU?e#XIoZUO<_t3Q<57^X-fKepPPHPxV(0S zWJ306?~JKerl?jNfmZrplto4!A{5BjnG+eAp=E}O1=cgX{MJj1Rfd>~x!u`fnC0Ae z{5}p(FEbM~sRk8_G$R>}klG@(MM=px&)6UK>7-){2^Vi%#rG>%XSqDl-ItU?Oswxf zN`Xc&$~B{ONKgqTeu4&NB+9sQ&st01D?%v=tflHJLSYC@Nka*yJ|9?jT;7Y=?%00y7lSP@4=Ov}S=Om^aO$*4f z%w^V6Qs@$)Bw|wT2&Irxz0I4`MpFpQUb4rP_3M1~;;W3(1U%^Ow>f$22-ZNJrp``X zSd6Dp@nj}VTC8tS6%PXFCc7Ma*AZ+@@aFne?sj&m*P5I;c0Y%v=ZS=(%yLYrsa2eN zmo*N-)L4dP&b75W3{6hyNtEwl5g6+Nw=|a0Sd10!7LpR@d%WKtIS2AUp|mAbKA!Lh z0-sWA$4s6K)%sW>r3jQWO7?t-7lq_fGE_d>rQpV>;PxP0-|3BiMx*cl$3Bug z_Sh5uwKevOSm_l-F_|d#Ci*@l)>vm80T^Sk*1lZ{{Mqb9Snz#`ClzVd=lbny?CiBU zT01~%8qC1?0aFW|?7X%U**o`|B5-9DRl)U*TYT}_OWb(v7JF*QZZbkx#k-E2;@Io~ zWSTK5az^Qx!a`<@d%!|EIXX|GtUw#hxG31^>|?CO3i%C=MCGJkg~3`wX$=Axm6{|g zs8uWUMj6(VcxoaIAPRyYL}^1LeJ~~cEF*|25QeOFhm=aZ+Bfz;dv>*b{%hl!D{^IJ z<-79mzoi{uav3J{uPzha?BuYcwkBDSMztjwr&z9C~7Wt(;TmI;k zQU9Ob)A~C1T}br4C06h^g$(kffDeke^77TPC_h*h<(xBJobUlWH8Cl62fb2qvcfb) z>9Fs6fsmFNr7oOdqm>kiW@&bThfbU%^dj1K_F21nlVm)`1N_inL{6u-#XtL(ALsX9 z{9`H$v#7AabbX$~vqyO7@F{-i-H&syHBBjo^mLz>Zoa|qzwkL;+If?{Daous3-h*> zjremnIC)!vbpdcM5UfQAhj0qzLdBKB?sYuR17oO1A(bcuT@d=t@yke$URKbzmOl1V zdouZ{-C=q4J>dVBpr7LZ|4V#_KWh(iFiu;1dN>|_NcvfSP<{_s)M`IhV9Z`54zGMQTmmgXM);#=Sm4RAo5MNUddT%@-5I z$2ODXlK^KyodeBZ)AgOui!7klSW6ayg`Z+<-q0n-?}0vQq9JRvQ;l$rTJj#_2>td4 zZ}8Ow7eF&C`uzSgzemHLyvBtaZ*cVVNlrH&rxLcX zsmGB6M_Fo4b8KmbW+k93#(e3`m-zVS{x2?eu8~TC^`v7jDF>3v0kmd<76r`%g&5*_g$dC32f{>H1~n z<_^&54N3DpDl#NuK&4*g@pnJYeT%1f^ys^J*Wpu~n44#*SwoVM*b#4RUgLNE))ID6K&o1JYoGbGc}SciiYF8bi35h6BbX8j5O_)v zPMTwbG^eao*~{~nMn?Y1Zl3%~SmKU+)O0e2+tT@9(i!|2`|^&nsX3 zRGH^XQdpGdx&FIxyMeWiGIozdB8)*}XvQsSagAO#VWzskq6yP*000ExNklZYd7r=T zE2XdBnVY-$)yj%#p+zkmF0Fei04P!x1gr_hk{{O&s@3W#o8t-ze8QefBnh@)3f_kr zxDW)-1`LN-MW|LP=u0ZJ+w#Xd}2Pq<{5yxeCYazTtIWFO#KY)frqDlZUGGe4z z4~<#_X?g$)1BNdG($JMO?Fk%$@u`SS#_z-hKp^}i@Y7O362Tf9au}Y2-meIat`7cz z6Zo_@%)cGexGGI0Fxd zaw)=SG=g=Z(M7BBWETbz3I|05DN4|!ASehB4f_Y7B^@DiCzV8Ia#VB~T5IeL_QFu8 z@jLnO{~rTk%tBer%P!06C2kCE%RktPe`vtRXx>2^Gq! z5Qvou*CH6eSOa6i&}IVzrx=zTjB`Q3%(>7kLV8kt9J~I>e_l?@kA{MchJ+>sRW!L0 z1H${DS!`W61|FuP<_!EIfDht4f2-5)f3Y(dY@i4ao(1C{x(E0#XJoO6;0*v^0SnRS zdHPJ7=yJ7EZ;bL@V{7+uqc_^u9u`VO2;P{&ArpihIj+SkDZy~CoWl!WsFV9bP${&f z)$4T(hX;j%Cnmf5|K5PTM^qfQFH%0Rg=-h=T@uHcAUP0%{`TJAW5GAsm|K^PS)JnW zl8>;ff9e2u5)f{DcY7u=G)s!66d@tKH3#Fh=2DLYk3+ZXFI zomw1in$d8x&%E7H^blID)f$a^XB&-2$6b=X_wL1B^XgR_V#g5ZHUYWx3>yBa0bmLW zqKQQUfbEOh;>N`r9#;V@gYf5y0A3Hb%|f8Np?ZB+ax$;7%WD5T-l}O09Jti2cK)FD z&p(|1YwzLwZm)B}Id^iVR$I|Z4H8n~MB~_2Jx;P0PM_;`w>M8!DpA%SX31u{+t}%K z&e)v$3un*#^!bPH-?35-&tJJ>o-|~+)oRi5@-m%VUS>3#SZcSQS;LRlzX*T^k)-2R Q literal 0 HcmV?d00001 diff --git a/docs/img/windows_alt_icon.ico b/docs/img/windows_alt_icon.ico new file mode 100644 index 0000000000000000000000000000000000000000..aa3593c34cf323e764a8f908a678c9056e927a60 GIT binary patch literal 55502 zcmeEv2Ut|unr)FOikx#sk_15zP$Y^OLB)WAm?MfgE9R_iLNVunIp>^n&TWn;Dk74r z)?2%fZtr|E^XAUicjnG}Lx0~P_i3?z^Dj*^29B@%OqMB>4b z$#I)QJg@xY0~1;0sC?qT?_cn;k|arLVnjwpsv{Gs0LKZaU(JRFszwep-(^GR_3UrP zPv?HIzc>4{_lY65LQaglAF_Mc<)9heR@91gZ`jVo!hMvH)O3x$rqKZ#6Q_v|7S%?$ zSox0(@`&vk;nzH+U0lB=u{BaVHLBAyIXW=4i(j?+1MMsWXWAP3jSKQgt~+PWbHjP_ zo@j|{__t;yGc!|FT&qM}cT$p?L;{t6>%IT`{J%Z};G7A}t6sga@;`<2u*nD5znk7E^!NnfB zz9r@wc4e9pU1&?Rp{}A1O%*L@b7)Gmpr>I79d$!!N%SC9HIl1HbQM}E#&T7OK7o_| zF*H@p%FPV}b8RgmZ<`p_K4NU(zrobRewwAFeX6^=ckSTdIH$=;#>r(9kgS6FTtA zcm7{J_+>JF-4(i7^;p=xOrjm*WMX;8h`1TjPK`BWFyzpXs6r~Rlc+;qO$$a^G8m}q zKu-)UJs4=}!(7V{W|{`deLX(bB>n;`4Gn>DCH`u}Q>v;Ba|1IaA2if8{|0vnKSvC~ z3&970zlwx-6L$?2@`>jZ*o)Uy1P3@IyiRffnwon2EM2Ip%SyGh^iFGP>L*Ji7WArr z{Qb&_Kc4*mafe#@ht@kv9JO^cw0axK^zO)rv4IA$*VKWGbF59=^kurxQYVHgD$1d& zp$R=rYB-0kx|Wg`G7TM=aQ-c{jbW-|0%Ms8bzTQr0yBXPbzjQ&s1aLT4Lum?7!z03 zzrp@bn^)y)mG-aP{#-NckAfr{|vo|%6Py4mYP;DQMG`%sx3n7!cot;K6=Mw zAidsDq}1*OPrVv^pE0VLctXZFPfL|Ht0}G2@}HOsjH}`R;WGrzRsKVT`Xfda{wnI^ z$xr-+%?o>1Q>Cv`BkrpD^aI9x&J+}CF>MCto_;-Q7*b3Y=Y4?H)nzUJUJ~opY!^zklmZ}Eu z({n}#&&KHNmWbXpx+1~0IYNvh5Mk8-JwpayMcP^%8FT`-ras2asn2m_~KG9`b;LBGz||K$Cu*iK(d z-%VFbt0(>SPL47|Ep7VvKee73@HhC2I94>|N zkK^g&%eXc65{?Z%j^C1(U_|}N$mzRBiT$1N_wjhb?|3%#EuPQJ!~Ge#xH;zywhujm zMlQ{%BZkx#!7WW?d{7nF2~H>nu~*vsPu%|&fBFuAzwrG6d!hF#^#7_xO8nKx0TneP z`UFcQ_S#zZ(2+XBK-UX;GH*z=tHH{;I%Im5mt@+;9R<$+G7tKnj6JVi8RJ-3SjhCX zw4ds8PW3onVhH@j5b>Rqah(q1IT>R)F~nm{p}7(j+LNj>UlV$uOTTKbXATGI_vE+? z@>+*IN9sgeUK zjVZ40PsSdj-#(9*Dnww4=T_kgi^9hh3yhK^o!`UV%~sg`5^#9L^= z-|FU{ulV#`!4*+}US=Iyh`rN|$sMT0pXEBx~ZoG!dT6k8br+|2b9|LpTu8qK#9MI{j^M> zq9I~GTXMmgI$=$I*z!3~nAyfcOD_z@W|5G|YBDY$52VhgrBbWJKN0?$8p5AX{}bORC@thdi^ts|WfrpVq zhFFUL92l||I6~}mfER<`;Yz#vc#`oFpQja~AX|Y?iz@JNavp{U%s`Mz9mH5fqp3{{ zQmQ3lNJvlo)^sM$j<|tGbMkR{c0LYFe1U1Hn^D^)f!G`VfxqzaRq@`xwpG>!L@roa z2U5oUDk{wVh`)&cMC>p0Uyb-n)Xb<6rcl?kroXU;zHv<$nuSACHv}@n2uOA7Lf%mMeLry* zgV-zhQrRNl`)|OP=?Z+C2Rxr$jDZ1D5M>sRXuEo-X;=es4pC_5)e_kqR^s}!T-=*o zj`LH#VP5JHbc~%0PpkUS*K}j8kJ?RpmZ&goWu7l#49I2~xBQCYIp(KI|1V+y`V28d z?kDVDgaNk480WJLyNVIE0O}{P-%v-?NvNa6MnnKSo z9C|vwMfx)LepT34_Lcs|-2SN^B>jx}{2g60olMMNLEZn=_y2qNS8+h-z0wcR=L`Jx zSi5s#t*(_vI3CS91ne&awkm*4#GS4Ht7-Sy<-lT~_%{WLsRxBqsSlIo_&yhSFyRYQ z9S0)PH~|5Abr5D%7kw8HG4K_6wGGyNXe5w<6f3BP{enV5sW`eO+s1 zEk{}B(fG*$!3E|EO3VfJzu;e`|HA$?8T)Cdo6-*ue+hknsttMItc>~8)IF6kpr(!w zB%0oks8@%kegt$)nn7lq#BD43hvqOajD?|o9T=H~pOi?vjjOcapR~WItmD@X2ng}F zcP@7~wSc9rK5MbMe`r9J-xpgkL>>26n-?)()e!NZjyCJ`Jiex}1-kl1iC8h7CuV=kSSt}5_z@B1N zJy^LF_D`;gK7qD|0r6)np=Lr2q3@^9mq_ekXv}zDw-(<=+t+Z1n$&~5aD}QS>r%$C zkQ&s7%qRg`1}&ju(gvnBsj#$5g}&f|W&AY_!$|*M4ZbSB{`I;4rhO6btIY1-Um6({ ze7%OF3*5{sU}t2a#J|etS8+jXg>ToBN|nA|U@b-!_F^kyKQT=Be6*DVrpI?f@s6j! zX=17^(hTY>USpp@7bj|0kyebI3Gfnjpwj{$xkP=O`UC$TE+Jlgw@ zg1>15+@!upa!sr7O3kOOi`uTK zv7^$yRf+i@9QcX9guOy#{ZG_^g#D}1=d0LL2N?5P#d4pyzm_lSf*w%Qa)m_0mAPRc zWQ+kt9H6P+K#9MRbvKBiZ{8W!ZW%Cj>JClg78S06cC>ZYj_C^o4}|Rt{TKOvRot(tjQgqkDvr=I zq20@Tsr4SL{dhy0_-je4QBz!?rQ;8E#)1;Yx9fOUhnKxQ?CAf6omXKl@VC@A zh6(#(My!(?Y3nQfyXeV@{=BI5Df@ejvjqM^4}|Y`m0B=&i^B0?EAf5HGvF9?e$oTn zXw3q^N&pKgAfHl+y2Jt-ufzRo3yzfm8^cS3wV}9^HjH6HbW>I-1wJsqw!CKWpej%X}UPfE)9 zZ|Nyk^>mUNHZ*G<5nT}G>H$Y1Q&<`pD*e9b(dn}{CWfhun!w(h=))ONR}Hnf21iq- z#9wHDa(xDMK$~%a&;}#UX@G$n#)Ngo+XWX;zT-Xdr{7NCc1hsWBzZZXQag5n0O29Ep@G6s%rs5_UQF>43zp` zRqI#!0&+p*{PqUc2)C+qbmD20Y(R5t*+x=}DPZp`zr9%0-Prq<~& zupR+J=ULEoSq96X<8Y0<0`J(f@Q>bs+70Hf`tQocpSoAs>99H zmNAqmjEKLn9%BFlW951q6J0(h<=PDPLk)GzA>+CoDf_QtZH6lSyy(Y^eW3})I;Jp} z+Mt1JJxq%nil-CKLa{lQb>AZ5|AP3h09H^7X#ZfYUpBf76^!{xGHCb2I=5XR@;g%d zQ_AqXbqSuPl;c6CQtXVsi!P3{Fuut~bP1Y(2=g`wHg1AewK6a?Wd;3zCthy=d$#tB z?TnzOX9pvFH&~kZp{8$Bt~u~jYJjOhbz;o5f~@mNR0Q_Koc64uMr_rp(GPe-&maPt zTA_^Z18LWO#Jke}3m!;l>l%9GnGSWI@qvgDRWuxx^#Ls%#tNEYP?1GK%QznT79Alq zO^3Gic$imT3%igLaE!PK??x|BJLv)HHeFwml)BnRaNxf>C#do}Eg~CUi1i7Azm*g0 z4K0*wF+@*am%UvBnIXAgN-ogmiK_v7;ri^~S82b%T=d`s2c+t{N?kB!-^|&hI+AO( z$KJkc@pa){l&^lnygv^WbMN3#!v!dr{EnjliW#NAOvZim0mc2u1?v9CHidZG>ML?n zOL0H(D{dux!S&W(a4z*dMhC7%veQ^hYQ6zo!p1QcXo;Gp4Uyh#7V3DUFgA5%-c5}r z{zeAQppoM=*T_YXH=K!5uYmF6x2?xuUL92kLs!&^B!bZR2F9 z888>Hn*ckbb|Z$qjB7MFbhM0U^M=YH)^sTSKJnLLj-|tOS%MdWBaUY7 zsOOh}jD`cSrPm@nnRXI+b8ahb|Luqa*cLtxKgK@8{j|$C)?f#H0c${G7zgxMprQvg zpo0SU8a>0KR_}2;DHrD(zr=-RuW+=*W2|d=5wV)x(I;dYCL}FE$67-XAZviM@KIL;JR{^kQshi$MP-sOCalmeqi*Ni;m26ZtxE(+Z>=6Iac6*6`9`<~$O1 zzEffDJ`NVHW1(e{0sFw|Fm@jc`+%9S^`8ofaYv3;(6LT}Dm6kPbV1XFToAn>#)RaO z)F4uczm9Ptw2j(9-)aDTz+@P^EQLehURVYkgL&vx)M)VpQ7PvU+cxKEX68Ljf&70( z9Kdv2*`w^*wZC`k`Yq&9z9I0p_kx#oHOBt-jPooj?VUC+?A}D*5@rTg?0=gx&ts2- zYj2djR$^bZHc#-yfd0Wu-v-VW-UzUdKx5BDboOh5@!|cjzvF7Woq8FETF=7T+C%Yu z{3AT*au!=7m*V5_Cs2$jX6(m&u$uw}9m{a5*;5>ky^d>bUgA*PE$oiEhMkRWU_*<` z=;AUNwX_m3u;Da}Y_Nz#{$8E_oAz(P>(F7$9v9V#eJW>i)C1LB z+rY&knfTX&xpNoTd5(oc_4x>h+>MZiC*T&k9d2RU;1awZ!Ho~VyWUR3b-aPbUGKp^ zZZ8~aE<&>|N8nz264Y4R(X(t&$pPU9m?KCue5fO#vjm-92$8B)0Y0fPFpIAk)7Cmcl>V<94*y7VxdT6wwJ6(Y{Y1V%xleSL7vB zuX~RBPtc^}OEgG*gtTE_(Q9HEQitZFNtY)WH3R57CKvX>i(p@K3M^c@^4!e-HG&xr zR1V=EG!5dQZITE*^EAjTy2H$ID74Inz|1=vW&t~38-5Ai@edK*<2hnFpG3=UtELIq zBP0K@KA@`R)U{2I#N-AkifG>m#!WTZJE;yw6E|2JI&F^L}%dIpxqeh+!dKM`e0p?`FPRy9tsEK;brm@+-ZIvrxR{tP1t^{sIwD$TAahu zsQs8%dj}Ro?ZfQ&9cWg)8|+mB&^>%2Mm5gH_~wfcX`8^c2KKxbuG!LHenR}CLek*I ze!U9u7kOV;?UAss9Rv%ogR&N2;j;i{%wz2%h(9&1ZufVH=yC~-QkRYu_%jClhs6LteTzzZdb(M+ zRvq)Y#;2fFR04urgWzH9N5AdKx}S|Q#y8frgg)0mhNm?r67~OVNuh`tnW<1sn!Q^snvPx4Bw0Gv4^lOW-n$0WFtFv3l_)jz})EVm=wAS z6Y4ET```>%ORB@4^F5;ZJk0L62~9n_!cf(Yai|GvQU<8z6pZ+K-Fcr7{W8~x-*p=Ox|6{C2l=ry4Iw}p{a8vOyc zc7v38fQ8RS*w?)bujX$M+$9gSyWB$kl%;e2(fEt=lVPfvnJcx^+jhF$rG5*l ze?{G1zoSW~)mxP4`~&U3^86z9lSnd>IzAlIJQdArM8n6}mAFy&iKCT?1FTIeeSo=v zEqf_;FwnQ+xc3m&P08;b}*B$&nvNpi%}qkCND&|a|*2Vyy&}JxJKLq zy;He{#J(Q2pY=LPHMn?mMp(=eXqrugPt+v@G=7fgltQ#0KpfeBn#`W_{8iNW#d6Fc z?pcdVuzEuU_8w&E@;J*^hq#1+W9L3#%Py9%xsGZ6W?&pOAvXOXJX#!rN$^bQdkut% zdvB<-1}KrT7Nna5sd*0=I*f&d@c+KsVI6i6?g?+;m70f!LqDKdxAhzU(fH@&RMwfh zw;$YKaO}cerV~!kj+Y&(;UNR(^=Ik3v?9d05$K8P+$+!Kx-ZF*#yAdV0)2fBzX67n6li zF$>YT<|x{FTbN4tdAvWw*RD3wn@@$mNfTIdEuOW$KYIZk(Y56~`hLzobwVNuL2#Y% zu&Fj0dUjJ`@{F8})2DN`0wSMw%WtdH^&t9g$c4EAn*dIJviU0X?z!~;1 z4<4w%uDx>XAQ#r`2hzC~tWJA`Y)6BVrOQaQ18&9$BEtvozN;I z4na2Fs8>A;jmgMOn&L<l}yvO~#{+RU$RP3t=9u(7W{#1UmIbP4Di+J`^tQ>G1T=WNoJ}>vF@{ zkI8~d*eTRaE<|(IoO+H>DEes8?^Uz~)_Y>>&pZ(f*GgufT;<<+wy1 z96iA$tZZXVTv>vCi%QXW%zM-x{2Wate?+}uj}Vx00_L?AL(6#tG}!~xb{Pd7*C{Ze z{hRo1gh|Z% zMW6L?3r8dbgrbpuC}V;E)N}JeeD#`W9~O;R2RHaL#&6)}fm&uZ2(xv8uelSdTU0}8 z!xXqPcCgU0g}Ka*^>=gL2VwwS_EiP`LC#*NMhys{9~ct zwrK8 zI*eM0A5ph;0lJS8KA;Tai2tl?IhJiyVEqm`cGA9gAC%(|@jiDRxO1KT=*z&q!{x-k z0$Dp5Gp;H}!kCw6Gv_-}7ndPy-~*U7+K-^LD{zcn3vIVdsM!yNs?Bhi`DDS=Z#DhI zZkR`%L2!=`Xg->8Xz!)Zx^>I`xnA_I$6s(u*uU7OC3LxwMBT6JRto`+%>8YA;H+m0 zclK;rg+yRzi`E#@yahVc2tX%KPps*fiXA=EFeN4o6Bsq+?LDH9;1-D}uSnP#IxFwv z(U-EuOnorXw1hkTcWQ8F46HpGlVay1tJx+@uDb*i!?Q4_*(%Iyy9Vv6_dsK(M8sJ) z!oZM$%KL?;w_JwCR=qI1@p??|d;$a7AA*bdK=?RMM!O`|++AkD*<%j817@@C$DCbv zDEqOS;TU=y5y|=JISv>yU5?2Mx+qy@N{fD_^^|%}ti22Q1z@-Zc966Lv z`!B<3KDTaPIi{@siaFaEKQcCKGWsS0`_oXb_*u8mqX2QBqZiNAu;U<9p{OV zdS<~eco%FMGKcK>7LmQKGX9&lM$}}hFcLWZrGG!~)o$3Zn@jW9Rz)oY{vM%~_*;6T zu5VqWHEx6Wz);>>VS;D}7Yu9A5HsSUai)J)e4IZHUl&irlbNG&bZ|ec?9mH@8?{1x zuW&T3*#N1uZ8OFOVr_uv3knX1d%R;ks-vBM5ZbY~G(NUDMno|O^ol}F8$Z@+thi1@ zS6OQo^(G@NbLIV>F>XzeS|c6pUDGi#bUHS)--YR6zhQlw&B$uL6ph$pZ)g&aHV!G6 z5G9dn1ikC2`56kVI_M!n#bNQpm(e%)Tc!C?u)qxZnvaTIGmX^hJT z!N_4Hd}1G>^&lW^EHGe(95a{7srw3SqqZM7Lcf0!IC~Mecu9_n7fNyIdd{MbV;6I$?!n)FFjC?#dIus;FkqcQ zm%Z2;*0q_-w?|8l9%xmqH!^CB#DUc9*xh^`RJj{ca%lIoLunV*><7crc>`kDYYa;+ zLVQLE`cCIsGupn0_Ya?f!JQYBFFV>CAfd99B-d+P0|bcfJe5Z z!(nQ{PV!^}V}qH*zxNiP#lr7!Xt|rVJ{J<3K~S|whq3D@=(|mVk{~W2P{oo(84xkGEgeHB4G>UAENWTa~c?2P@S^)YsZH*9XC-|A!!`sRcf%er9 z#>a_vcG%UqInH)(gy&<@@N`&Pyd2REB@2dgt^Qb?8JU5B@vRZ#9|3D(*C4Pq(i$be znD=B_aNU3n@AGcp=#0(1(s8m^e_YNOg(VH!VQ`43-Fh>3W9^XsU;NgGvbU(j-v};- z)e+&GfCet@(4<-#;@Rgtn7k49dLG5mw%hSw)Lp!r^A#T$cja#-9`wO)wsEas4seC_ zxea6Uu#36et}N#80}C;Z`1kIa4>N1pq3Lj9-5%-TTxi*>O%>xCV-jUp#8^tmN()OEdbeP|_4 zf35$ox_4qDfm9S2iIu#!uM(2P&lhE3KxyW>qV2XxJwr_@eX3F=gqZ4cIerlF}lY@ z!;0(w>ToS-Cx1U2?B5^x+1dC$FAGnGjl;^O?a<0INSS8{?8Q19DS4p7wHSKT1WPSP z_?Xp3ghMl~{ZB%3%QoyAZN!%`5Ab^QBYc}*fX@p{@M&c!zA*H=*1&cQwIEh5rN&|@0&ejc-* zx!yI}^p$J0d&YYAi0zZd#QdoOAD?qg$}!(*u1^9I7(uYKFN!~8*i zsn7rAWFoc@xn<|B8H2hccSU?y1p8$H7}FvV!|O+2ec#U5GrAw9w5iW~abs{jvje^_ z8Gyp9ekfYd8}g+CP_~%+b98X3ri^?UA_KpOd;EAEomOP{#G+1;egD}|bDjXHH{*a>8{ygFDC(!5Kuo9AtV3sw>CYTZ83+7@ zzMv|uXp)p_lh!gto@j)uJCIAiFMNN&TXGC@$b1*WhP@4+F&B8I*+N(W=HExKMnxW_u8jc+U2Vq_JRBY^=gyTJ1 z$AHL9^rWW{?ggdW(l*3{NN z0?U#IV{h_AQ6`FtAK(l0Ey$x*tp9Oa}_;f@M?IarFXXSjZlbN_=r`5WV~_h%^w zNBNPPOR#sX0z3J!eR>xnAnY=1y!LQC$2O?hZGz1E1iV`1Bf8HIj9yZPt>n&09)IB~ zzdAW}ljpoczb<_Ib8_JQOAcb4%k9UPT$lQ+1aG+inwoHxJjpp%frX42#~uf|>;=MS zzekPH?-4V(2>w0qLKdcxZdhg<;4hI3%1ygP~6m*J-PwefqLz z{7pLDO0fT$SU|*<;(TdIT?VH%ZHy+t)zKx=8{7If#*gjeP_%0T@`!uk!D%Qvy8xx< z7NX+PQphi_P!3#KL!%&$7r5QO5Jl^z@m}seSen9|u3jX11_fY9crdngZH0GPqfoG9 zB)+d1i;_L_dE9my6^p5-w?Mw1$8TFn9GBqJ)X6y4tuwYJw`E;027&A$+wh+JIu6w^ zBPk7wSXVgIawzgA>_+Ky0<-EfMY;$-7U!dI#dltJ89wbTKq>uo@onBeAT)sE%QXdF zGk$wOJWuhXmyy05(h3k3`wGELU!iuJF9>K+0H5Xs@ND%R&4x2hWZa)a7qk0}0tYUZ z;Rw%h{AvZx-6+R(p8GM+`}AQsUOX|1Cz*b7O`m9+azNL(^u9y}jbwH83ida$0~QpkdH;MDRa!ZThVDf2d( zW#qieoVQ>9AA$eSZi9#QPi~3!k-iw*&_+>XNTRP`ptFI_j7`pGn@M;E)-QJQ zOYh+G6ta6XxZ9khE9Ah@#Vn4@ME=t8I5&O(*7WXxxoK^%rbh}6^=g3^(|a)Zp29-; zEXdDML+%s%TRhiQp64`Ufc<=K%VH=N&%n(t$++0PGj{dMV9p;*9neK;L^OuSHf4X< z58D%a;&H|Tu)(;alCuak0bu)@0DTi)e_`fQDDL83XEi3u-oo3r19DkIbEUSU&6P~ z4VX9F3g;$U;l}!uY4B`lvL>kRG7)O-^I=?P53HMAL)hT==#c#x4TkKX2l^}5gjZ@v z|NgV3ecSbTJ}4;>V_Jt}*U&_KTuQ9=astl(MvRwp{>XvX^a}RCfQUfMNsPp!Nz4;g4`bnk=etHu-RCind7isG$N6VL&^K7Pe@#Kh{;wPJkWEMDI&$9DE_v$)1^^!PIL zA1y~mUS3V+{nqgx;o9Ll5=hmNtH~45(>bNU!HcZ%UFX*%_UGvXu9I8Oc<$H4{WZ`3 zmNtK%k5Atz#-ZD#*mYBmwU<~6K3$Hfr{x&J`y7)t6u^tWTV#^56I%6_z@*`F7=+EF z_Ol;h*@Jz7A&^)Nfd=unh&lkL_74#{`V&$&e@Ei1BabR=|F8MCD)viHALY^~rBhjY z;}{H!^TLjPG00!ln^=q^K1+!+u@%_A+rkN?1N(fC&W_tRw7r+)f#AS3PSnXPl&l|v z4_Sk7aCk>7OlykC@pZAVd2Jl(9D&!fJ2EdHg|Z6^s2$XeS3C!~qqw{p700t^|K!Hu z#rV8v6jn41M^`6n1WGkw%X{{%v^8NRfc6MesD`YY6({hhi|j*NrP6hoG-?>1=03|aQoYz>jO@!S2U?+?j5 zQMslpW8j#SVZD2!cau7p5buke9$|RDtPAZZllU*9=F?%lrLex^$feDGW)LmLM~)A~ z@HwBqOO9MxhSDA5@nA+*ENd6RTrL1vN#Qs>q8;*=^rL?tiIRg8@a@!0l-yrM{Av5o zI3AKe_xKrCR}hdHI6I^@k}RdDW26I5U0qo5zG72N6k2_4N^=Z-gW( z7i1@N#iw~k$)Aq|i80?v+Wm=g8YpMyIS28V3l6*{2N*62{9j!l{(CF1b6pYE^U||P zUs306Fr*OSg9?!_=?8`~M%;Xkl68|*ddM7rc7NnX8BX4l<1!yVB%ef{_nGJZO5M+U zUxB>bN*#Flw$cyWrY;=kdskmA$E@>Z$mHktKg+m-mz%mDh?(^}oKknevhga~`!eSK z%U~198gSDyuxoP(k@Jes?jX>e*O7U>5L5S_$JEuU`TGHX{+%F>{Fcfd&Y#cz-}kCw zzw~}1`wr^aLouXHoFcnJZT^Nv8--7 z4ba!*R+}|MO;k5A zL4>sxny}}%w)GHxHYfdjA%QJJ{_)Q!xn2r6>oW2;%mJR2;X7lr&)3WFftvj4BHa(M zxWc$>Kfl6)mB8S+Wk{J(fsT}-ar{avPEpbr=ih$H+@Ba;zRMmjZTp7E>!~L%c;37Z z^ec2d#T-Ara9Z*xk=%dxsS^JiJoX&@&Ti_?8fwAxYs@)Llp*<;9F4aAKrQ~3oks>W zpw&(o#H?lSZzFVTt%G6g0r(DhhWcwu(BTY^VH`2-UO6^B`i4~}FTVP1-O8Y?2R8T~ zI=4pr{JZk+@8v+%nt}l%rlbw-HxL6`)mI!E)&yTxbVAwA{+4}>1P-o}YwOz?PPCH^PpqHyat++nP@uWu9V&4|Of5smR_ZVDrmFZ4}wFG{BqWz~fS0H#LCx2>eSJ|K;B&{x?`3yg|2c znb;pL!@;e^*v#5s)&}pL>QnvF}j@HjzUMs3D_y-jw4CgssR$ z_`+Ox4!jQCMjMFvI+$@Sn{nJ0ShPBZpb0P0dY=Ma&M7eQE^m?NIkvxI4D$Lrwp_ZV z*n0XX*6&;YY~A*yQw|*27%50VmH4}KAoHhgj2=09TK1?ucsRc+W9WVqx{;jA@hH7M zlM}EUiZ|svU8_b1|>yAujxSoCB)N*=CZoJHHey^8inEVoa>qq*rgI4~Z9PiIFp zMqXA26s_&SJdv7me=*7)FDIvRl)O?rpeEcR&#q))Y@93V=t@vco7ifpC~e+aro|pI z^`AEHY9xaLb-FjJSnT~`BjObt+vIbzcLj(?M3lE9+Q`=KwcidHqZ6(W97cU z`YX@-?W53sIlk~cFW;8q#`7|qd0d3kk_d#`?eDM{p7+%e&%lv6&Ouh?sWs`e3dc4 z84%~!@aeG?`k@n{9XJKX^;f`W#1%Bz@CE7gQIn_{o5|aY)RiaV^Z9+=f93Rjt-$v} zIX-{>thjgYlKk$2^Vqh3^AN#-9Q?=c9q*Q18B4Cn?Az^rR(E368D;xM5UV--z*SJZ zTnqWzjVOD=8r{!r@hj@~>y4a5YQoFSm->Hg33IP$$lo-Qzr&S=-2)?WcVP;?uJ27_ zm`t89$A3w!5cdCNcjXZ06aIy~xV^9g2G`U^iiZ}OIB?kOpoy~~!mXtUv(Q9SS0l7? zlp>?LAr?fsVnd7<=7-qf&6;sM=T!~@_l1-Hm0%QblD=1<_!D*DgMvPc^G5gZnp(Q= zNExzn@-cr~A(kE??xfD<8!RJmzW3fy;LL+^JmUNFiElCIyMXrpCQpu=@5_iSeaAcY ze`x!!Io{B>yyAF7+^@Y+;M|)EoMfzUoWAr-4YDpU^x15MD|*NDIGCqt5>CK_8v zkZP}rq24-J6l#n;jh%3yi8Bs&2;yfhCl60DuOyhc6z{JD`tuu9d@V%zmvYF7e-Uly z8L{8A^BX3w|BPw;_^F3jJ|y<5IL~W&*hap-gA_VNjkv=(&n5m}XmjtV1Girl`()m+;{;+O)h>gh0vn+E&NyvK0vHKd+o4tkrt1AdhQJlCC%a^w|Ki%RH| z$|}eA68=8s_hLMF{X6!Z*q6U)?E$! z4}tvZEcVann_sS`UK1a|fxPXUTVlqc_{6Qq1#_8*r=YArXZ;qh+*yl@I9wqu}|=-0ta6xu#36k#wQ9a;`zqjE=JOp zyKw5e9Fo9pkoa_mwAMiA1`S~UCxvSP+pr%n1V$+<5U}7T+MQ)xl6{65kGY13*M5zk zmq#rsqs`09sQsl16qfOK5zESP^!)MMtflipgzabk9kl?l?pqu)Y4E@<^9QxY@*aUW zJg^1|*R)~Lt3SW&G*sMMK)k5gpN0JsGwQX1e!Jo$2XjFs2ShDE3{f9^&G$WC&12`$ zkfxyGz$n(_hT+G-;V3+oiK6qtjU&L*yh_ zVq1a@_9wYuU%U-2wQ<1h)>in^&l?4!{ZTMKlDs0fUh{l{Pw)4j?8SBzJ`(&o#(eb} z%GsDJ{!)nZx4xs>+~3i5Ng+DX=XYiP*ZU0X@~4Z@{_t01Tvi~H8Zejhy^gwl@&omP z^DV;3E7XtE#Q7*Wag4RT3-s+bxqpk#-6gl~bKD{)#QV;?Q{eE+a_nW!xQB7zM%w>O z#(f#*@({7;G#m%5L-hfhQM3PcIHoLvMblYuO_&Qm;_lru2SF3hAZFt$v^i6XzBd(^ z@=%WTFXZH&9B+6nMTM2xFR;%m=jXn9RlYiBeXzjs#Lt?Z*srP)iv54x*Pb#C6dd@*bA09YD=|}k27Sbf zY`k370_QVaaJ#n?p7yDRm%W_vDZ>q42D+epN)43FuT5v2%;L-lFUW&!nKVk#(yL~UpagZ_pQDN`zN^zMn!FA?>*8jP=U3?|Jb{7++*JN@ka#;>H9y^{vQ;TVdL3zcLl!S|4Z*z zocHhFuO&F}>vR8dUs*%uZ@sLZ)cfGJ%mnNkS_hZM*FnLWM3TKHu^UhE`;Gm-`LwYm z#FzLB{m&P6yBp=7wsY)6Mcyu@7Kpr18QY0|!OM*_Ain=8wc|dI6?y($@`(H8rnohe&ZA0|8gPmHFBjw7&1J|su#xvwje|?tV%A(|QG@As zEyEzG#(Q&vlVOoI9TCf}k^@DU{ICQYspsdY`)A*M*y6#`j(1=cV2}_XP5d$A0E9g}j!+EsXc*d-ADO1?11iQTYCT6!skHhNSdRRFAI@ zldw3(UHq*^2XFrNk{{Pz^0zx(e7O#z2~1m$LGspn7=5n*OQ_?!UQq{LmtxD?3aomo zzzkwK?cP@`XP-akMKN~0E@Es(%-wLtChoQ$QIOIeH$s{V3(W47<9h`6CM^b2Pnbj zmt{D_-1B@M^AF~<*Yiqo?0o^Y-gr>HbmRKYCrx98yD3WmG&wBrB6!;ZgWTFrsK*J2Gd6EhC}o<(*3c2Bh^#7*6dVK+Zu z;^QyW-6Cvz&D{Or7mT~|62q^)rZ#*du0>c)+uugp-@$t9uD9Q@`$LKHy_=~E>z)-U zIWqTA5hmXIj$wDdW5C@a^kfewGi0AwN)83naw^5#J{{~tpZJ`Top#{p?4QMHp9!}S! zY=xGxl^x1j)`oov5FlYm$O<7q2ul(Ofj~kMvM)|-C${5wUu9deC0Vv?$+9g=vUbV3 zzu!BKsgr+u`kix~b6q{(zvgcgMRskLI1Q zhw*IqXAa>V4TtMz_f6I$3OAG6e0ts0r$x@7=C}4~7Ea#tt%G{j7Szm^LA~mr7Sl1L z4?3gzs4J%BJqa!CjOkru=I!>V=C?&P7de^H7S}V#!KBt0Bd$kmF->Sn=+@e(uG=-J z->vJ=IV;=rOM=hyw|I5M?x3!%4C&UoAq)SscZmbgZ{oJ$0Qo$I@d+*6kKhYN875xvb4(ldxDnm2 zo~CHQgQ}Q!uHJv(dzx|M(VBMiv6_C%(R%&fZ>wq9dD!!boO2%W#T)pSd|wE@89>Je z;e#mlC4#RQ=X=teKbKD+yop91=E)C~iy;?rWF?KPC8sJ8r6)OXpFZ4hqfY)6b>tI1 zuRQXKhkoY>-SF5wx@y8*I{XJG^IqO#!T$@qv+H1;aOHKH(O}o>1pSMYm-a2M zD71MpIh%(RT5*dUbI;b`!apdz{2CnD`@rm33;$Hh9Hp@VSr5929grVf0$>sXj|lhQ zU~Tl>$`7)P7(bDHCa!CLgZ%DHMZmt}!|U|%gD2_n-|VMbf0CygPv2h?#(z;$F8i|P z-F%Sx3jf5p;EBL%8V({iLtr1{d(dwsz&g8~u?_JpV6e=rV>{T#-&QvMnsSMG%B5aX zCW-#1c^;wvVZFKPGM#en=XAuWyw~qgeh1{M2kJ-E4Guq!cN-jcJn!Y;D`aa!O)u$WL@r$AP4tiJ05I`>WP*iO+^OgIY)S>0M0QnfqyWjPOn?}JJ!Br^!h*I z0JER5|L%<&53S#_s5(*$j&=CqHPcC4J*MQ!Ta{P__M2|U$9fFTo{FD^-_{NwEb9y!ZWJYymZ; zJbH}szVdk;dDbs9YiE(x`28vm22@6IXLU~xucquq=eqSwYnPt3_vkq=e!7By zouBW~MLW86VG;G-3LpI6t*fd7x~6_Wmy?9La<^YM)(+|}{D^xD7qo;mqkTvVu{-PW zJF5aQb%zof3Ww!rsTeLR+jOk)0}RGyx$mF$Pn_X|gO-`M>merobXWm2=rkIj&C z@JVg&pdYp2LEXLdPr7l@UAlI`z0_RK*Oz{GrVhJ!yiR}U7R@MKj}PCf>R`8;!+k7! zwXVNg@A$j)s@JROZ5?`{ty8l}_?j`NBcPYD4Q9;i=+}!b^uC?<5#uww&@o`~!E4x! zw~&)1$jnmbpcY_5rrSgK^DDE? z+I4BUM_1G$7gasFwzgMy5`R1nm&|pAwSt(VtT(RC!6bEogmTGX&evg^U@#xWc`W|C z^uXx;em2|k11q+@-xR2S4^GF9d(hoB?r|M@Uh<@}#7n(PFOcV*@rrG|4!?a8z8N^0 zxZKEq8Tf;_&Nq}LS7Y&k4-Rm?u2eIaVJkAW8ORNz4L`i~1?3u^R`2$^H2B3VE)aCznh^`;^H9d6c@ml=&cwP5zdHUqVe?xxwzFv4|@I}TvrGxV{97euU*pdu= zD2DG-??!kY`H2q=DEpHTF!T4m(k6+)rs{=_7ij=@tC? zm5j}Lxn{jyuHB+H+l#d!&?H;Lt&SltdA}aj40P#3XM<+d?bfu$YR#hAccu#&aJaDL zZNvd?&2;uy<7JA0b2|Fqe!r%I{qxRV&20B;KHC<#`}Bc30RDY?t<|q-&AcC~s$18W zcf;}Bx@32UE-GyX>qcauM(1xU!5(bY_|@BW_PQedZhI9n(5ef|9Qs3z+L_&Xq!w9@d=hs5&u7|g%ACZ-)qRzaq#a==X_P$7T>PNuRBFo{d_-UAx|I8JXu5a z6W|y)U;udx8|=vy85xLh?gaXtT&&d48%oFMC5i9heQ?huW+@e!h9B@EzTa$WQ|p8DJI z@9lb}-3In{xZk144m;fMw)kLDlS5P5y7Ww2w-qlu*x0298a;Z--et+f8*V?m?tGVu=AQ^oF&qFD_d!u0S}UG{vE0xOcd(t8u)+nz7e=Bbi$^ zVb{iwPqP(&kPEepV4v%M7k`U&FMgdJ+$!kP-Fk;I8?MF8x`Bp>yGAhdyr>j8nUwow zdOz^Bx{NHKmnJqexSJfIS-P;l_UTGDK27dtBE10*!39s^M?5RX`a5;iFTY5CS01(f zJY8~Ho_89t8hPnCFZ_U>xPU8JXCta6bHF+5K10 z|JlmKrz#%d*|gXkg~PMd(espC{;BZe8|oVRP!9ajR&o>Zk^I8^68-j*H9G&JlU zMttYe2dUZIpvZqP=tAMxv5#NV|~DY1jvZNbgd0qz^Y68%>% z_uJ%Z`@kODv)yk|b9+Nc_xwG404I2{#T_r<3rtZ6&L1qlPl-lsfCsg5;7Q+PEpa+?zM=WDR)EM)3I>9^!wyt|%zfzrXc{PMADVCp>ny zPM!D{efQ>T^^M=2tuOxjPj%S1U+U}U^RBI%uG1+K@6*p_PuEZ8%+-%y;hi>HN_9sA zeu%@TMVpA!7LqH;QIzQU3XXBf=K{uvu!taN54 zeKR|iO_mcwG~g?>vfjqMa^NSlQtP*qOXbam-XkB2->rF&A;UAE=}o&Nkp9ewj3_00><(c$BMp~J!dYd`t1j`-!j^DMvx zI`XpL>)2ba*QrlU()c$Q>hk;@x~q=u= zO4r}BWXq~=jLivwzvUMcZ+P=UPvZxQcYOpVA9HV4Fg`#>$)mM{bJNqxR)GupKV1EQ zGL4VHrAP;Wzw#&!>`1ix> z*5F)0?0i3%`yb$34^hKo9evgS_S`d3sm_V4KW%N#_D)tVNNkuu2Gj2{`2IN8kzpNs zFc5r21JSp!@r%^aznFf><%%T>mCf$vm~zF#CBz5y#E%Y+Z$~Da7XH2=r#kTe8wZ@K z40ouCIG~ajdsIeylt0j+)qaOQ?sbwAbZB-*6Ij>k?&ccZ-dwAD+Tj65iymuj(KKhf z=JP<{qV7(;-|Zzgh%ex9kUwhB!#p^Adqst=E84E}mcOryS1;3b8&>E-UPf{1gqw8m z89&tLzIVJnckEF*@TeomQ}E9E!w=KvPCJRd(D6Fq_FJeAPSN*gzo_3VU#&Z^}NQp36vd2o{}a1%SZ)EjrJ8{faKzg;_nF71f$`vOCqD(G`+eXm37`&?Q# z;MP0d7EN{4>JfXT9%`+D|7(fI>h%;n@Vu*y+)OLEfL6WXb!#bdV8+`W9=+J!rpImd zx}&m0f86|u#xHtJ$3A+ue(>l79e?Z9I{IR2lRy2jK6~Wh`s~;Ejk+&Uk2r+iAO04< zt#ax~`qppH(ot7ls_#B@uTFb)o_@D}6ZJ%=CXr`)5&RbtgRb`vs*>JUPb7}5OVA?} zSJ&Xc@cY$uug)*8|J6QrU_Uw@52)Jt-ZyQ#UULr9h++Zg@fS+6t_?waJ07J9S$bhNa^dS$%ZWy`g zBS(ZBi*3rTn({*s2xkG`fkHc4)R+)u6_SsR$CxF7BrdYOJ~_8Z<$t>n11 z#C_V84}=oyvE}%C^p^F6H>x*OsBofEnM{MS85=%8gHo9`^20ncm~bhC z|JOC-Qak)#6>@7U+`p!`MfunQGdA|MYa_g{3Ay-y9@AN!O?tv!rDuo@r+Qs_wzEyM zx?EZq@aiqEQ*UEp0+`UbCVa_=tq*NGn5&^d4)uDg$Q;<%QH_;JWZCmh@ZUOr;|9x!60z4MhawS4@7 z95%%GhE{A~>NgBiv$N`U$UxRlPB4IyZB~#D6=8^=cIZ`*^Zh zv8LmVak9kxz(5?df{ORN@QWKvyPfv z1F>Ku`5uSn^`PU7j{}|B9P%l@uS=^t9eT&vNUgY$T0n!IhASpI>UD2(m98k>pkFOp zpp&LQu2Y`82cQ2Y9sS4i^^I}=ZrS|f&Ka*`&pk`uzvMz4&3hCM`vt#CarUqEwe!!? zS1gPW0GuY>hUOUmrcKF!wIKDr_{s_xQ(Et0DwmpdX#4j`c zoEM%jc_DN=V|p01 zIYgE@ay(h;Im*EADL#{4#lBTwzZ#xrkO$5X&uO@bQ3972YdBp->{E`PSF3oYp7#G5 z;-OM_pah@3S^;p6WbpN3Hg(4u)fH=!5B+bCw9?brsH!l2AHM&tsMo@LD_pQOWXzu$p)Rb;w_yqhq!OzojUQ>tMt9=FW2#xU!?C`!|zbsdc97+ z{U)7!<25?|vJ0*4CtkHMUr}r|8Rom zpuvAY^da^S@!VMh>EFHow8sx@t6Q~5=!vDT22bOswLhzj{Rw4T;QZDHlxx06!;N<; zU3Uld{QH${drWCJb|8SR_LB?756IDTmFvd-yJjoyoH-&FgYOVupu0hIIl3AdSOeF? zHA(u0j2uM4IF9Whr#l=s`p!0LI!YVd%@XWQtv9)qWdXU~BIVLM8Q_jTr))KxUal1O zJ)5ex;`*do6h#Ii%tNV0b%$%^PqxaRuq%*pswL8ZZ(pyfU<1CMU8O<0iir7(2i+>g z=5NOa7|b{I*|o8^)#8Dbffmhc+pVYT3pL$?-gma>8F#Z51iG{~*r&qYZdC?*R7?G! zevtfe$ghHac%a9r_q`6iLH_8u)>>qsMi1JmbWdZMZmHPLbD+F$^0|rn=AAd_s6St= zAKrhPPP+G2edDS>=!naIudiQx9(Ml^`tD6v>bP63(&0NBu3xBY;;y_pHwjYJN3o^qz+nqDHJmR}7 zahY6CF&KY;&*8+F~jnncD+gdV2&G~fd>&L5kEZATCE40D|BQ2 z8jYJjM`taVt>3@RN#A?uF8V`n)(LmssPEo;n~uNlHl6#{%erv+2m1Xd zt94g(lOAcJSEb2At&v<)XCHl{eR2jP!!T zupD4+d_B{{5nhVUf2`~f{Tl}J%r-_L*l!2>0`O#H^Vzl;f1GDasqYP^(0hZukp)xp zGsg(*;di0e6d0L1mCLoTY*RXGLk60aWOVlLRDZHsk!&-|X7$9Iu!C*tO0?25V?#Ha z>7#2RzuTgs{uXTm*PRi!g}o&M_<2SSj0_n3*Wn9(kt`$9Q@-^+`2k;jSAGFoe-&Ln4 zn=AEb!*1PQS4MnTtm}8?>*~Ty#EDzz8D6FzO?yTsPI^ecczup8UHg&#T(*-OGP&qh zmu6CHSOB&kdHc21A5`7oko@?4v1E*xH(}xL%!s;2|EKtK!}c?ywPej9-Oep_!-M!v z-PmK}%XLj7mczC?C&2U9E7Ncx>h>E#r(cr~I9u7KbI9jhq+#dv$jklMar^{)bK}Ql z`oTMluV>`I!k#(2zyk$fO<(vBaV9dL6j*1`=VURLQHf5MfiIkn52s;dBfAm%4;LiB z9+?nz|MU#IKU1jZSsWc`gJF-g zfJ>7qLMFxX~szlr^J;^P(L?-vjY7Jz>tK44*@TWeTf(rweap&qU7 z@6-nPy)xFXo;3Q#h-C)h8{R>h=$AX{lRey{2J%Pc99uvfzRB02<-`ncw$;%W(x7?x zkh5Fshz*siMwnIKgm$8`M#zlmC2og%|d4W^0fV-7h6 z8&x}4rJ6{+s^Ne-@;>#%e8%r9#Re1@9_Ysp0Q2&&#~LM}5e_ghpy7axkxp%m`S9_( zwH<7$!QPpSC_EfR?uL}eg-2w7WjGVkATNgKbeVGaap()?(AMO--8KY||o-Tk8UT6$gXJ z@_=lih&rM%>R1V$3rum3ApG2j3Qm?+I3Mh>6NU%K z!P2mgkGB}#o@L1J|7*(hzz@B%k&iblpWpQH8J*TJ`i~3@gQXc+{PkpLJ9=Np7pWcKT*}zZKd(&bR2jOzTagI)ycE6w`JCY#_%mW6mk-wuklzdP@}JIFD>;3` z`?V?`tg&qWZaAP8yKiKmEMiy5P@9TE4E%sf^t}WQFkDcC&Ts2?YBTd?o;s}yc4%k3 zSGyyAwZ?+#%|sQ?#g)$SJ9XgRp9p}rPlMTzh84HuBE$p4y~#m1A%OkqRWqXj|FAwZ z(q~dRK+n-YFC)mc1+4v5__VV>pyp^;9dLYaI>F=a zBRrqZfPE&xy@?!9Z?KL(8l8SJ>to{1FGk{rkT*Wy%*@Dp&2w1n~RG`xt+1 zxchlz;8Ak$w<=kGwuSxh?&CCEdW;4(eO2A-j*x5h;k@qU%j({Iq+(SkDAjTXx_%Ql zKZ*^&ABbW@Oe~ki<|nWPmJGbk670>o@fD4YFn)h}4gK0Cf3pL9-v$0gmq+-(=y)~- z-bUXI?k0bm##W@NkPCc%;=42>8Y?HyRmZ=z1?-(xj2Fon+#Tv6*6X6i=ZZF3K7Rwc zUp`Pne^4tAvl0c93#!^2;w}iH)1Oy@WxPz z-!x|F=LI5W^z%@+kB7TGkpZB>;*>sV?{ifQ+@n)$#y`S4Qzc**dU(EAy_+x(X>X`$VPa#ix zFSQza|7(7(Y~^>AD?{H4k5=dEZ>Vz7m$Z6no;J>;PwTxy)V}s0Wm`@r*1weZ0o(w- zPon#CEFQ=N(fc5H$KGQ*eLCa@vgVl_;~UV^nF=kYW=9`S68~Qn417A-xOV*c4A`1F z4ZeOVSz-;E)4;z5`DjG{>nz)!PS+FT){)C=vi!anKHgx;f$p^nZId@{lbbl-iT~F+ z)F^w>p_;)u>ZR4H2sdE^+O=!2NjnE^+JR5FwZ8#-Pu{Pu+42W=Md53>-f;GAa4f*L z+Z;wN(C^wOpH!(k=i4_!|}>6 z(0k;|jAT62nv4#duf*UnY_RJ6EI;i9WZkO$G}e8xBo%;b{U3ur`Md5H$mc#z>_?1m zqwlZwOic0#N)#U_?jmFzusEbSav;YbbXlTH?a$u zOd0pe*!vnw?=_6iN8Fc&_fu&b`|XOS9r$1_4JMrmrd+)K!!2KmnhxCGKGe)}5{+sY zvZ*1~Y{mV%;D9n}zhyBK`!}g1++x{)9ryx81FghuE!r`N9RTYRxV|JDPz5qjhApp% z_Nfltbl~%Mfqy4`SMJy#&*cPE+DU#Wf5XK_*Y^GE>e_X1hu_|7>_i4R7#T6pi?BWf zj&Vi;4dxwYY{~HdH0#EWpj5;qa$9oVtMaG$$cwoc&;*)rz&1@qNVpmFMU?aCgtgk2lBMw!8|RS zyuYfK9j<823CPN^iZ`94wEG<5eDVYIU?@6^OG2i29z!YgJByEzDa0P+Gv7?Uk7tOD z-sgztjZdG0&$0dSA)et+z)6Pt_u!v}?~R`~;_ubK1=Vmv9l34te8l?cbSt{hPVWx+ z+-xU$=phDk5%aaHo7}#e{BA4QHb+`4{40aC)ON7}$V0xrTE)mfNz^eS1Jr;^@CizZ z`-`dn>dv2XF#&37)Px4Mvb zFp#}Cfcw3_x#tWEmnF;J99LOaIH97pU|LK}1<8g6%cZFlze+h6VQHWt@5 zRynHp@mI8N+Gn)r zp*+2Id!82EL$A)Gc`AMV0QCO|MH`M*#(5_C|3`HHVJ;0nkUqQ&IsIG=eK#-TG;urn zs~ma`md5AH#fzh;Gb`J0KO=VwGs2% ztz1xP)T3Q-bROL=4E5mud9{OjdU<#Ne_>EnvCyZZB!ExWRG(VBJ^!V78<&55?7iN1 z-Q4?qxBa8}yYD`JSatRC6Wxx2GrL?nAMvyoE{=v9{}gZ+9*- zwJbSGP49kLp7lp4T=i}21#zMG0{p*Q!T$;HC*F(Ar5}sjpQZcYEAUejqYXK*qx<;l z^kA5Jta3FL?v}obZI=JXcC&97d4cN%&L+l-MQf}Yp2_tX-Z!Hc-KMvoAQyzS% zPJAf0y1?G5>wte7vEC^DmE-~{u>-p!O)4Cyr@mLO!f>;SqHQV&x6$wA(01azo!I^Y z;;D4`OwCnr12<#O zu+|N3W|(b;FO2S*<)?Og^!#I&>>vHi0aO0LvF3A$9Y3W%LCpXp3QKBuaoswaK+zCgSElk$b`Xq*kQ?u za(u=N{5lFw@BulWoCE*IZsBfnJqboE-J%G29uw>Lr#cj1_|qL0_dDVL_GG(karhtG zUror@=J5%V>X=daUdI3hn{>b!QXr`BV7K6XTWR?cQ89A6R|;Q0Vgc6n1n z^WvR5A9=sD^q8?&jO0}QzV3}Z_CK@EP1_?2Ms6(mU}?$B-_`TKcmBlgV}9c;{LxW8 zwfD_8ewbCOqen`;Gq3><6Zc;OB)?R&>Nt5eeo?hc57xF9jsE9p#glow;^#nA`ADVf zj|58a?>P_u5qmK}ziycRtnh5GUkrxCcBxNB{5zAgvG8wa3Fg*tu>@1F9KpJooX&`j zTk}!e!CPj4aT;51^83T=izc!AIeb23CCV^){!r?#xn7g&_oVH_$i#Q#d>V-V%7f&7 zqBeZD7H#RR*4BYKc)dZ}$OCM_|J&5xY~^~l)AO?}=teGx`9i%a@O5gHt-f(pebvLO z8>>zjdvEvH_y4)yXB+!v{|)QuUEjQ;zvu^-%KIz{^G`-{_0;cjzGu<>yMg9>4Uy{! z>^Mv8F}!2p4Lyt-)G&q@-xcTd{gP>qdEQz`aRA8`(Rw3&9S%FL1`ckB7+Rs(jCAVTlZ&>!uga=(oVk(szpoj13^ai@mq}emKJ9`c0gl zg#VMoiJ>@ko>a9}>kXyr==Cd=XQ-51UomyKo#b$f@Z+~Bzh^toOc!fQ&n_^p<9Y2$ zt@2iCoxh%E{fPTa-G9KNVxITeHQ>_*w=KHLUe~g^uHlXKwT(X?d+WLA{R+{;{~Nv? z`?-IH^<>$RuLp{LI8VN*G>d;k-ew^K-{1cX??=2%YClVfI`X^K$EvU3YwBEcnA%o+ zMQxuPChwN7YjEdbJjeG<#cXhZ`&9VjQhff~>C1i;+~NI{!JgVpW|M}8(R*XR!9JHN zjWpLScy(v~wA&i1-hACw_x8e0&nH_#!IGrmfE?IovEwA3u?5Xmd~fnQQY+7Kl<{nn zi3e&&dUuT92li2N0RyoTtHvKBM(D#&bOyH(e{WS2%iRHf7pym5n|rsz+xg`3D(I0c z*Y-di{(LJopWYs7dTZRZTGdt)US3!3__(a*oy|4P=M=hYt>oK- z|IfONlpNpCSNcnmBXiMNaxZxdbChj7o$}g=*#9FG+3{sXi@vO2 z;i2+vK8V-7=c#|k7nEo^hMw=AB0E=)=)URIOf5nGiS<)kxm~-JGuRs&uMU1A=flhX z);%dKTXt+?^O}!3yNd!tQR+3Z8Y`wVcHMA8mKts(yo~xHTP{*ZSN-4_7|vnpoDj_6w||9svUi$_-s|$!TX@LILq~MLs4{9 zLvi&7ySGkzuWbA2C4H@j?0bC{=7q9ixPABiZ=L%~e}SKdt_t1fdHQTW`T77h8~v9T z-`{lwHsCk#z)u(_(f5U&Z#aUq%n`~|AHh3&zNL8WalA*J{k<1~J-MR(C$RmTCvLFc zjvrT!J~#3mZuH+LM^pZmDN}Cx+=$Pz`~c%0n_=do_pz#K-RE06ihj0!*Ror-)vd3q zauj&mdUg*vJVkQ2whf0OEx9PY8^-o0@cV=DdaIwSBUntFSH`n1wdCCE)YMl@-L+UP z(Q3=~H{#z__SI{B%hue&ogX&8UAXeOx3{nUw<(KX+DpGjuY-#=K9(8(Y2Rw}9RJMA zVC((-)0TtP2RuuK|8HvaftMIBFrGjT?m!MM1v=vYmhbb-#&P6#k0&Sa9ezva7$urd zqQ`d}e1Xr`KY{%26FhIn=Z8PRb{oE@A0usaz85>*pX=$eU2E!TbMB%t!ydW*PFp^;&J^ zV>4l%qops$yl26BCc`%cKM?uw-9#Dnck}?B4UE4<9?qg3G!FZ4F8eOQX5jBdpR#g% zx#Sz*%=zfSvvfY|=KOOq-&+>C3qNOfHJw^q zUHp^v6(vvQH`RPx)L6cH{jS0lTPkYif4DjS;&r>WA31O1JS*2RdLHu~BfK*bUypA8 z|I@P1iLMI!oP?*N^v#AoCZ+f;y1W26m`yJ>{eImO!2M2YfyhD6jl_4ivhQ9Xg&$0h zzNzmdjh_!<*?e%Pb_*}(%!~F7_Bp>N8SOsD+*dYy%JKi1`)sf>V~1CXN4p9 z;!{|;5&Q~S@whqezx%uYx1TR;-sNm+tR@FT%$18*;Ir=_U%LgL09@&>HhCMF-aGpB zlwe7pu2LIW=HuIOe9mA_+-`Dt!v^;dBdAcQZp-NXVZ;9j_M_+j*Ydx12Zk*EuLd6Q zH!s7_u_XU$^;OVUWA);hdYtLYN-#`sm8rw!>e=UDbTE3D^I7>BVa9;G9p#yA+uk~y ziP6n}VgFx)=YRZD{+dgGX9>gP(81AQI1GPh$jv6n$5CT3IaVtVmo*$=@HYJ3Ju-4; zX|VS(I{59QU2aeF`iURCKX26D|BG|~$5H&Re?f2g4_pIHm!s%AQWl08@U%GG#B1H; zSG)H3aYncM*k<^hrIC|QvG2rQH1@k}E2fUU5)S$o_G53zKjC`3;ae~GD$lr}zis+s z_yK!lz(+kUKrcx^!^G`bur_luav7uNMh5yBM(-Q4;lQS4(#U&7*kJhnGvpNh#rgk) zc<*~j*@mxPp{k$o^J#f`8Q;7|M8$i=AG8Cbsn3~V*3B?JUMmB-NUaIevsWDLo_Wpa zIoa_q%tvp=|B|JezWQgCeC-gh+douw$_axF7d)-llNbTwd6O3u{=V@0^*)1~K=$cu z+;e$tpzgHiLiIm7+q^TH9VYhr7uWwU!F=pxs`1M!)%4{XWc$)tYWmtW(YoVb^jDl# z>8bo}wyW_vc^a>mujK}H+pd(i;avIZPW2BqeS2G`{j0Ope#nh#`_}1dyI`-p+Sv2| zYyF@54xqPLbA<;Pw|?Yr$hzVK#*NOata)DT==_tB`I&p>x$z_OgZIo+Q%2?ojvLvq zYh;?YpL=A!dCxrW)RFo4(fMK4JU3-@{zYp(JUY!gV6xee{B)jIXy)m^&M!9e&|l{! z_|&IQxa8%1&dmLP-G9(z=0$&U4V>p!3E%*Xumt+@r>=tus>@nde!|M&9;?)}fpgT?nc9%dhN z`>fCJHQ#qXUSG%PwV34r`-+}`KohVOO0yc_?)eB4<0_=3IQWPZ>-`+v62{J?$Y z`%T{a`|}F-`uFGM-MH8NX1?$KeLjE8KJ&u8=lk8U_k4ffSnGk`?n0elm zyu32T=-=eY$uEr?KRV0XZ}F6aLSxkb(8)@E>C@*g^YR{m9Pp+_{| Date: Wed, 6 Nov 2024 19:23:21 +0200 Subject: [PATCH 12/16] Qt: Add rsyn icon to resources --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2d615bf4..6398b9f5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -591,7 +591,7 @@ if(NOT BUILD_HYDRA_CORE AND NOT BUILD_LIBRETRO_CORE) qt_add_resources(AlberCore "app_images" PREFIX "/" FILES - docs/img/rsob_icon.png docs/img/rstarstruck_icon.png docs/img/rpog_icon.png + docs/img/rsob_icon.png docs/img/rstarstruck_icon.png docs/img/rpog_icon.png docs/img/rsyn_icon.png ) else() set(FRONTEND_SOURCE_FILES src/panda_sdl/main.cpp src/panda_sdl/frontend_sdl.cpp src/panda_sdl/mappings.cpp) From 49b65242b900c9463eefca1077e2f627b43969af Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sat, 9 Nov 2024 13:09:12 +0200 Subject: [PATCH 13/16] First Metal cleanup & formatting pass --- include/renderer_gl/surface_cache.hpp | 2 - .../renderer_mtl/mtl_blit_pipeline_cache.hpp | 106 ++-- include/renderer_mtl/mtl_command_encoder.hpp | 88 ++-- .../renderer_mtl/mtl_depth_stencil_cache.hpp | 120 ++--- .../renderer_mtl/mtl_draw_pipeline_cache.hpp | 268 +++++----- include/renderer_mtl/mtl_lut_texture.hpp | 5 - include/renderer_mtl/mtl_render_target.hpp | 131 +++-- include/renderer_mtl/mtl_texture.hpp | 100 ++-- .../renderer_mtl/mtl_vertex_buffer_cache.hpp | 116 ++-- include/renderer_mtl/objc_helper.hpp | 10 +- include/renderer_mtl/pica_to_mtl.hpp | 267 +++++----- include/renderer_mtl/renderer_mtl.hpp | 164 +++--- src/core/renderer_gl/etc1.cpp | 3 +- src/core/renderer_mtl/mtl_etc1.cpp | 196 ++++--- src/core/renderer_mtl/mtl_lut_texture.cpp | 45 +- src/core/renderer_mtl/mtl_texture.cpp | 498 +++++++++--------- src/core/renderer_mtl/renderer_mtl.cpp | 80 +-- 17 files changed, 1084 insertions(+), 1115 deletions(-) diff --git a/include/renderer_gl/surface_cache.hpp b/include/renderer_gl/surface_cache.hpp index 7346fd11..fb7c71a5 100644 --- a/include/renderer_gl/surface_cache.hpp +++ b/include/renderer_gl/surface_cache.hpp @@ -19,8 +19,6 @@ template class SurfaceCache { // Vanilla std::optional can't hold actual references using OptionalRef = std::optional>; - //static_assert(std::is_same() || std::is_same() || - // std::is_same(), "Invalid surface type"); size_t size; size_t evictionIndex; diff --git a/include/renderer_mtl/mtl_blit_pipeline_cache.hpp b/include/renderer_mtl/mtl_blit_pipeline_cache.hpp index 26422635..02e075b2 100644 --- a/include/renderer_mtl/mtl_blit_pipeline_cache.hpp +++ b/include/renderer_mtl/mtl_blit_pipeline_cache.hpp @@ -7,69 +7,67 @@ using namespace PICA; namespace Metal { + struct BlitPipelineHash { + // Formats + ColorFmt colorFmt; + DepthFmt depthFmt; + }; -struct BlitPipelineHash { - // Formats - ColorFmt colorFmt; - DepthFmt depthFmt; -}; + // This pipeline only caches the pipeline with all of its color and depth attachment variations + class BlitPipelineCache { + public: + BlitPipelineCache() = default; -// This pipeline only caches the pipeline with all of its color and depth attachment variations -class BlitPipelineCache { -public: - BlitPipelineCache() = default; + ~BlitPipelineCache() { + reset(); + vertexFunction->release(); + fragmentFunction->release(); + } - ~BlitPipelineCache() { - reset(); - vertexFunction->release(); - fragmentFunction->release(); - } + void set(MTL::Device* dev, MTL::Function* vert, MTL::Function* frag) { + device = dev; + vertexFunction = vert; + fragmentFunction = frag; + } - void set(MTL::Device* dev, MTL::Function* vert, MTL::Function* frag) { - device = dev; - vertexFunction = vert; - fragmentFunction = frag; - } + MTL::RenderPipelineState* get(BlitPipelineHash hash) { + u8 intHash = ((u8)hash.colorFmt << 3) | (u8)hash.depthFmt; + auto& pipeline = pipelineCache[intHash]; + if (!pipeline) { + MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); + desc->setVertexFunction(vertexFunction); + desc->setFragmentFunction(fragmentFunction); - MTL::RenderPipelineState* get(BlitPipelineHash hash) { - u8 intHash = ((u8)hash.colorFmt << 3) | (u8)hash.depthFmt; - auto& pipeline = pipelineCache[intHash]; - if (!pipeline) { - MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); - desc->setVertexFunction(vertexFunction); - desc->setFragmentFunction(fragmentFunction); + auto colorAttachment = desc->colorAttachments()->object(0); + colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt)); - auto colorAttachment = desc->colorAttachments()->object(0); - colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt)); + desc->setDepthAttachmentPixelFormat(toMTLPixelFormatDepth(hash.depthFmt)); - desc->setDepthAttachmentPixelFormat(toMTLPixelFormatDepth(hash.depthFmt)); + NS::Error* error = nullptr; + desc->setLabel(toNSString("Blit pipeline")); + pipeline = device->newRenderPipelineState(desc, &error); + if (error) { + Helpers::panic("Error creating blit pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } - NS::Error* error = nullptr; - desc->setLabel(toNSString("Blit pipeline")); - pipeline = device->newRenderPipelineState(desc, &error); - if (error) { - Helpers::panic("Error creating blit pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); - } + desc->release(); + } - desc->release(); - } + return pipeline; + } - return pipeline; - } + void reset() { + for (auto& pair : pipelineCache) { + pair.second->release(); + } + pipelineCache.clear(); + } - void reset() { - for (auto& pair : pipelineCache) { - pair.second->release(); - } - pipelineCache.clear(); - } + private: + std::map pipelineCache; -private: - std::map pipelineCache; - - MTL::Device* device; - MTL::Function* vertexFunction; - MTL::Function* fragmentFunction; -}; - -} // namespace Metal + MTL::Device* device; + MTL::Function* vertexFunction; + MTL::Function* fragmentFunction; + }; +} // namespace Metal diff --git a/include/renderer_mtl/mtl_command_encoder.hpp b/include/renderer_mtl/mtl_command_encoder.hpp index be66699d..562e6b79 100644 --- a/include/renderer_mtl/mtl_command_encoder.hpp +++ b/include/renderer_mtl/mtl_command_encoder.hpp @@ -3,56 +3,54 @@ #include namespace Metal { + struct RenderState { + MTL::RenderPipelineState* renderPipelineState = nullptr; + MTL::DepthStencilState* depthStencilState = nullptr; + MTL::Texture* textures[3] = {nullptr}; + MTL::SamplerState* samplerStates[3] = {nullptr}; + }; -struct RenderState { - MTL::RenderPipelineState* renderPipelineState = nullptr; - MTL::DepthStencilState* depthStencilState = nullptr; - MTL::Texture* textures[3] = {nullptr}; - MTL::SamplerState* samplerStates[3] = {nullptr}; -}; + class CommandEncoder { + public: + void newRenderCommandEncoder(MTL::RenderCommandEncoder* rce) { + renderCommandEncoder = rce; -class CommandEncoder { -public: - void newRenderCommandEncoder(MTL::RenderCommandEncoder* rce) { - renderCommandEncoder = rce; + // Reset the render state + renderState = RenderState{}; + } - // Reset the render state - renderState = RenderState{}; - } + // Resource binding + void setRenderPipelineState(MTL::RenderPipelineState* renderPipelineState) { + if (renderPipelineState != renderState.renderPipelineState) { + renderCommandEncoder->setRenderPipelineState(renderPipelineState); + renderState.renderPipelineState = renderPipelineState; + } + } - // Resource binding - void setRenderPipelineState(MTL::RenderPipelineState* renderPipelineState) { - if (renderPipelineState != renderState.renderPipelineState) { - renderCommandEncoder->setRenderPipelineState(renderPipelineState); - renderState.renderPipelineState = renderPipelineState; - } - } + void setDepthStencilState(MTL::DepthStencilState* depthStencilState) { + if (depthStencilState != renderState.depthStencilState) { + renderCommandEncoder->setDepthStencilState(depthStencilState); + renderState.depthStencilState = depthStencilState; + } + } - void setDepthStencilState(MTL::DepthStencilState* depthStencilState) { - if (depthStencilState != renderState.depthStencilState) { - renderCommandEncoder->setDepthStencilState(depthStencilState); - renderState.depthStencilState = depthStencilState; - } - } + void setFragmentTexture(MTL::Texture* texture, u32 index) { + if (texture != renderState.textures[index]) { + renderCommandEncoder->setFragmentTexture(texture, index); + renderState.textures[index] = texture; + } + } - void setFragmentTexture(MTL::Texture* texture, u32 index) { - if (texture != renderState.textures[index]) { - renderCommandEncoder->setFragmentTexture(texture, index); - renderState.textures[index] = texture; - } - } + void setFragmentSamplerState(MTL::SamplerState* samplerState, u32 index) { + if (samplerState != renderState.samplerStates[index]) { + renderCommandEncoder->setFragmentSamplerState(samplerState, index); + renderState.samplerStates[index] = samplerState; + } + } - void setFragmentSamplerState(MTL::SamplerState* samplerState, u32 index) { - if (samplerState != renderState.samplerStates[index]) { - renderCommandEncoder->setFragmentSamplerState(samplerState, index); - renderState.samplerStates[index] = samplerState; - } - } + private: + MTL::RenderCommandEncoder* renderCommandEncoder = nullptr; -private: - MTL::RenderCommandEncoder* renderCommandEncoder = nullptr; - - RenderState renderState; -}; - -} // namespace Metal + RenderState renderState; + }; +} // namespace Metal diff --git a/include/renderer_mtl/mtl_depth_stencil_cache.hpp b/include/renderer_mtl/mtl_depth_stencil_cache.hpp index 90721b70..8f7256a9 100644 --- a/include/renderer_mtl/mtl_depth_stencil_cache.hpp +++ b/include/renderer_mtl/mtl_depth_stencil_cache.hpp @@ -7,80 +7,74 @@ using namespace PICA; namespace Metal { + struct DepthStencilHash { + u32 stencilConfig; + u16 stencilOpConfig; + bool depthStencilWrite; + u8 depthFunc; + }; -struct DepthStencilHash { - bool depthStencilWrite; - u8 depthFunc; - u32 stencilConfig; - u16 stencilOpConfig; -}; + class DepthStencilCache { + public: + DepthStencilCache() = default; -class DepthStencilCache { -public: - DepthStencilCache() = default; + ~DepthStencilCache() { reset(); } - ~DepthStencilCache() { - reset(); - } + void set(MTL::Device* dev) { device = dev; } - void set(MTL::Device* dev) { - device = dev; - } + MTL::DepthStencilState* get(DepthStencilHash hash) { + u64 intHash = + ((u64)hash.depthStencilWrite << 56) | ((u64)hash.depthFunc << 48) | ((u64)hash.stencilConfig << 16) | (u64)hash.stencilOpConfig; + auto& depthStencilState = depthStencilCache[intHash]; + if (!depthStencilState) { + MTL::DepthStencilDescriptor* desc = MTL::DepthStencilDescriptor::alloc()->init(); + desc->setDepthWriteEnabled(hash.depthStencilWrite); + desc->setDepthCompareFunction(toMTLCompareFunc(hash.depthFunc)); - MTL::DepthStencilState* get(DepthStencilHash hash) { - u64 intHash = ((u64)hash.depthStencilWrite << 56) | ((u64)hash.depthFunc << 48) | ((u64)hash.stencilConfig << 16) | (u64)hash.stencilOpConfig; - auto& depthStencilState = depthStencilCache[intHash]; - if (!depthStencilState) { - MTL::DepthStencilDescriptor* desc = MTL::DepthStencilDescriptor::alloc()->init(); - desc->setDepthWriteEnabled(hash.depthStencilWrite); - desc->setDepthCompareFunction(toMTLCompareFunc(hash.depthFunc)); + const bool stencilEnable = Helpers::getBit<0>(hash.stencilConfig); + MTL::StencilDescriptor* stencilDesc = nullptr; + if (stencilEnable) { + const u8 stencilFunc = Helpers::getBits<4, 3>(hash.stencilConfig); + const u8 stencilRefMask = Helpers::getBits<24, 8>(hash.stencilConfig); - const bool stencilEnable = Helpers::getBit<0>(hash.stencilConfig); - MTL::StencilDescriptor* stencilDesc = nullptr; - if (stencilEnable) { - const u8 stencilFunc = Helpers::getBits<4, 3>(hash.stencilConfig); - const u8 stencilRefMask = Helpers::getBits<24, 8>(hash.stencilConfig); + const u32 stencilBufferMask = hash.depthStencilWrite ? Helpers::getBits<8, 8>(hash.stencilConfig) : 0; - const u32 stencilBufferMask = hash.depthStencilWrite ? Helpers::getBits<8, 8>(hash.stencilConfig) : 0; + const u8 stencilFailOp = Helpers::getBits<0, 3>(hash.stencilOpConfig); + const u8 depthFailOp = Helpers::getBits<4, 3>(hash.stencilOpConfig); + const u8 passOp = Helpers::getBits<8, 3>(hash.stencilOpConfig); - const u8 stencilFailOp = Helpers::getBits<0, 3>(hash.stencilOpConfig); - const u8 depthFailOp = Helpers::getBits<4, 3>(hash.stencilOpConfig); - const u8 passOp = Helpers::getBits<8, 3>(hash.stencilOpConfig); + stencilDesc = MTL::StencilDescriptor::alloc()->init(); + stencilDesc->setStencilFailureOperation(toMTLStencilOperation(stencilFailOp)); + stencilDesc->setDepthFailureOperation(toMTLStencilOperation(depthFailOp)); + stencilDesc->setDepthStencilPassOperation(toMTLStencilOperation(passOp)); + stencilDesc->setStencilCompareFunction(toMTLCompareFunc(stencilFunc)); + stencilDesc->setReadMask(stencilRefMask); + stencilDesc->setWriteMask(stencilBufferMask); - stencilDesc = MTL::StencilDescriptor::alloc()->init(); - stencilDesc->setStencilFailureOperation(toMTLStencilOperation(stencilFailOp)); - stencilDesc->setDepthFailureOperation(toMTLStencilOperation(depthFailOp)); - stencilDesc->setDepthStencilPassOperation(toMTLStencilOperation(passOp)); - stencilDesc->setStencilCompareFunction(toMTLCompareFunc(stencilFunc)); - stencilDesc->setReadMask(stencilRefMask); - stencilDesc->setWriteMask(stencilBufferMask); + desc->setFrontFaceStencil(stencilDesc); + desc->setBackFaceStencil(stencilDesc); + } - desc->setFrontFaceStencil(stencilDesc); - desc->setBackFaceStencil(stencilDesc); - } + depthStencilState = device->newDepthStencilState(desc); - depthStencilState = device->newDepthStencilState(desc); + desc->release(); + if (stencilDesc) { + stencilDesc->release(); + } + } - desc->release(); - if (stencilDesc) { - stencilDesc->release(); - } - } + return depthStencilState; + } - return depthStencilState; - } + void reset() { + for (auto& pair : depthStencilCache) { + pair.second->release(); + } + depthStencilCache.clear(); + } - void reset() { - for (auto& pair : depthStencilCache) { - pair.second->release(); - } - depthStencilCache.clear(); - } - -private: - std::map depthStencilCache; - - MTL::Device* device; -}; - -} // namespace Metal + private: + std::map depthStencilCache; + MTL::Device* device; + }; +} // namespace Metal diff --git a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp index c5105a13..ace324fe 100644 --- a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp +++ b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp @@ -7,171 +7,155 @@ using namespace PICA; namespace Metal { + struct DrawFragmentFunctionHash { + bool lightingEnabled; // 1 bit + u8 lightingNumLights; // 3 bits + u32 lightingConfig1; // 32 bits (TODO: check this) + // | ref | func | on | + u16 alphaControl; // 12 bits (mask: 11111111 0111 0001) + }; -struct DrawFragmentFunctionHash { - bool lightingEnabled; // 1 bit - u8 lightingNumLights; // 3 bits - u32 lightingConfig1; // 32 bits (TODO: check this) - // | ref | func | on | - u16 alphaControl; // 12 bits (mask: 11111111 0111 0001) -}; + inline bool operator<(const DrawFragmentFunctionHash& l, const DrawFragmentFunctionHash& r) { + if (!l.lightingEnabled && r.lightingEnabled) return true; + if (l.lightingNumLights < r.lightingNumLights) return true; + if (l.lightingConfig1 < r.lightingConfig1) return true; + if (l.alphaControl < r.alphaControl) return true; -//bool operator==(const DrawFragmentFunctionHash& l, const DrawFragmentFunctionHash& r) { -// return ((l.lightingEnabled == r.lightingEnabled) && (l.lightingNumLights == r.lightingNumLights) && -// (l.lightingConfig1 == r.lightingConfig1) && (l.alphaControl == r.alphaControl)); -//} + return false; + } -inline bool operator<(const DrawFragmentFunctionHash& l, const DrawFragmentFunctionHash& r) { - if (!l.lightingEnabled && r.lightingEnabled) return true; - if (l.lightingNumLights < r.lightingNumLights) return true; - if (l.lightingConfig1 < r.lightingConfig1) return true; - if (l.alphaControl < r.alphaControl) return true; + struct DrawPipelineHash { // 56 bits + // Formats + ColorFmt colorFmt; // 3 bits + DepthFmt depthFmt; // 3 bits - return false; -} + // Blending + bool blendEnabled; // 1 bit + // | functions | aeq | ceq | + u32 blendControl; // 22 bits (mask: 1111111111111111 00000111 00000111) + u8 colorWriteMask; // 4 bits -struct DrawPipelineHash { // 56 bits - // Formats - ColorFmt colorFmt; // 3 bits - DepthFmt depthFmt; // 3 bits + DrawFragmentFunctionHash fragHash; + }; - // Blending - bool blendEnabled; // 1 bit - // | functions | aeq | ceq | - u32 blendControl; // 22 bits (mask: 1111111111111111 00000111 00000111) - u8 colorWriteMask; // 4 bits + inline bool operator<(const DrawPipelineHash& l, const DrawPipelineHash& r) { + if ((u32)l.colorFmt < (u32)r.colorFmt) return true; + if ((u32)l.depthFmt < (u32)r.depthFmt) return true; + if (!l.blendEnabled && r.blendEnabled) return true; + if (l.blendControl < r.blendControl) return true; + if (l.colorWriteMask < r.colorWriteMask) return true; + if (l.fragHash < r.fragHash) return true; - DrawFragmentFunctionHash fragHash; -}; + return false; + } -//bool operator==(const DrawPipelineHash& l, const DrawPipelineHash& r) { -// return (((u32)l.colorFmt == (u32)r.colorFmt) && ((u32)l.depthFmt == (u32)r.depthFmt) && -// (l.blendEnabled == r.blendEnabled) && (l.blendControl == r.blendControl) && -// (l.colorWriteMask == r.colorWriteMask) && (l.fragHash == r.fragHash)); -//} + // This pipeline only caches the pipeline with all of its color and depth attachment variations + class DrawPipelineCache { + public: + DrawPipelineCache() = default; -inline bool operator<(const DrawPipelineHash& l, const DrawPipelineHash& r) { - if ((u32)l.colorFmt < (u32)r.colorFmt) return true; - if ((u32)l.depthFmt < (u32)r.depthFmt) return true; - if (!l.blendEnabled && r.blendEnabled) return true; - if (l.blendControl < r.blendControl) return true; - if (l.colorWriteMask < r.colorWriteMask) return true; - if (l.fragHash < r.fragHash) return true; + ~DrawPipelineCache() { + reset(); + vertexDescriptor->release(); + vertexFunction->release(); + } - return false; -} + void set(MTL::Device* dev, MTL::Library* lib, MTL::Function* vert, MTL::VertexDescriptor* vertDesc) { + device = dev; + library = lib; + vertexFunction = vert; + vertexDescriptor = vertDesc; + } -// Bind the vertex buffer to binding 30 so that it doesn't occupy the lower indices -#define VERTEX_BUFFER_BINDING_INDEX 30 + MTL::RenderPipelineState* get(DrawPipelineHash hash) { + auto& pipeline = pipelineCache[hash]; -// This pipeline only caches the pipeline with all of its color and depth attachment variations -class DrawPipelineCache { -public: - DrawPipelineCache() = default; + if (!pipeline) { + auto& fragmentFunction = fragmentFunctionCache[hash.fragHash]; + if (!fragmentFunction) { + MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init(); + constants->setConstantValue(&hash.fragHash.lightingEnabled, MTL::DataTypeBool, NS::UInteger(0)); + constants->setConstantValue(&hash.fragHash.lightingNumLights, MTL::DataTypeUChar, NS::UInteger(1)); + constants->setConstantValue(&hash.fragHash.lightingConfig1, MTL::DataTypeUInt, NS::UInteger(2)); + constants->setConstantValue(&hash.fragHash.alphaControl, MTL::DataTypeUShort, NS::UInteger(3)); - ~DrawPipelineCache() { - reset(); - vertexDescriptor->release(); - vertexFunction->release(); - } + NS::Error* error = nullptr; + fragmentFunction = library->newFunction(NS::String::string("fragmentDraw", NS::ASCIIStringEncoding), constants, &error); + if (error) { + Helpers::panic("Error creating draw fragment function: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + constants->release(); + } - void set(MTL::Device* dev, MTL::Library* lib, MTL::Function* vert, MTL::VertexDescriptor* vertDesc) { - device = dev; - library = lib; - vertexFunction = vert; - vertexDescriptor = vertDesc; - } + MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); + desc->setVertexFunction(vertexFunction); + desc->setFragmentFunction(fragmentFunction); + desc->setVertexDescriptor(vertexDescriptor); - MTL::RenderPipelineState* get(DrawPipelineHash hash) { - //u32 fragmentFunctionHash = ((u32)hash.lightingEnabled << 22) | ((u32)hash.lightingNumLights << 19) | ((u32)hash.lightingConfig1 << 12) | ((((u32)hash.alphaControl & 0b1111111100000000) >> 8) << 4) | ((((u32)hash.alphaControl & 0b01110000) >> 4) << 1) | ((u32)hash.alphaControl & 0b0001); - //u64 pipelineHash = ((u64)hash.colorFmt << 53) | ((u64)hash.depthFmt << 50) | ((u64)hash.blendEnabled << 49) | ((u64)hash.colorWriteMask << 45) | ((((u64)hash.blendControl & 0b11111111111111110000000000000000) >> 16) << 29) | ((((u64)hash.blendControl & 0b0000011100000000) >> 8) << 26) | (((u64)hash.blendControl & 0b00000111) << 23) | fragmentFunctionHash; - auto& pipeline = pipelineCache[hash]; - if (!pipeline) { - auto& fragmentFunction = fragmentFunctionCache[hash.fragHash]; - if (!fragmentFunction) { - MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init(); - constants->setConstantValue(&hash.fragHash.lightingEnabled, MTL::DataTypeBool, NS::UInteger(0)); - constants->setConstantValue(&hash.fragHash.lightingNumLights, MTL::DataTypeUChar, NS::UInteger(1)); - constants->setConstantValue(&hash.fragHash.lightingConfig1, MTL::DataTypeUInt, NS::UInteger(2)); - constants->setConstantValue(&hash.fragHash.alphaControl, MTL::DataTypeUShort, NS::UInteger(3)); + auto colorAttachment = desc->colorAttachments()->object(0); + colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt)); + MTL::ColorWriteMask writeMask = 0; + if (hash.colorWriteMask & 0x1) writeMask |= MTL::ColorWriteMaskRed; + if (hash.colorWriteMask & 0x2) writeMask |= MTL::ColorWriteMaskGreen; + if (hash.colorWriteMask & 0x4) writeMask |= MTL::ColorWriteMaskBlue; + if (hash.colorWriteMask & 0x8) writeMask |= MTL::ColorWriteMaskAlpha; + colorAttachment->setWriteMask(writeMask); + if (hash.blendEnabled) { + const u8 rgbEquation = hash.blendControl & 0x7; + const u8 alphaEquation = Helpers::getBits<8, 3>(hash.blendControl); - NS::Error* error = nullptr; - fragmentFunction = library->newFunction(NS::String::string("fragmentDraw", NS::ASCIIStringEncoding), constants, &error); - if (error) { - Helpers::panic("Error creating draw fragment function: %s", error->description()->cString(NS::ASCIIStringEncoding)); - } - constants->release(); - } + // Get blending functions + const u8 rgbSourceFunc = Helpers::getBits<16, 4>(hash.blendControl); + const u8 rgbDestFunc = Helpers::getBits<20, 4>(hash.blendControl); + const u8 alphaSourceFunc = Helpers::getBits<24, 4>(hash.blendControl); + const u8 alphaDestFunc = Helpers::getBits<28, 4>(hash.blendControl); - MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); - desc->setVertexFunction(vertexFunction); - desc->setFragmentFunction(fragmentFunction); - desc->setVertexDescriptor(vertexDescriptor); + colorAttachment->setBlendingEnabled(true); + colorAttachment->setRgbBlendOperation(toMTLBlendOperation(rgbEquation)); + colorAttachment->setAlphaBlendOperation(toMTLBlendOperation(alphaEquation)); + colorAttachment->setSourceRGBBlendFactor(toMTLBlendFactor(rgbSourceFunc)); + colorAttachment->setDestinationRGBBlendFactor(toMTLBlendFactor(rgbDestFunc)); + colorAttachment->setSourceAlphaBlendFactor(toMTLBlendFactor(alphaSourceFunc)); + colorAttachment->setDestinationAlphaBlendFactor(toMTLBlendFactor(alphaDestFunc)); + } - auto colorAttachment = desc->colorAttachments()->object(0); - colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt)); - MTL::ColorWriteMask writeMask = 0; - if (hash.colorWriteMask & 0x1) writeMask |= MTL::ColorWriteMaskRed; - if (hash.colorWriteMask & 0x2) writeMask |= MTL::ColorWriteMaskGreen; - if (hash.colorWriteMask & 0x4) writeMask |= MTL::ColorWriteMaskBlue; - if (hash.colorWriteMask & 0x8) writeMask |= MTL::ColorWriteMaskAlpha; - colorAttachment->setWriteMask(writeMask); - if (hash.blendEnabled) { - const u8 rgbEquation = hash.blendControl & 0x7; - const u8 alphaEquation = Helpers::getBits<8, 3>(hash.blendControl); + MTL::PixelFormat depthFormat = toMTLPixelFormatDepth(hash.depthFmt); + desc->setDepthAttachmentPixelFormat(depthFormat); + if (hash.depthFmt == DepthFmt::Depth24Stencil8) desc->setStencilAttachmentPixelFormat(depthFormat); - // Get blending functions - const u8 rgbSourceFunc = Helpers::getBits<16, 4>(hash.blendControl); - const u8 rgbDestFunc = Helpers::getBits<20, 4>(hash.blendControl); - const u8 alphaSourceFunc = Helpers::getBits<24, 4>(hash.blendControl); - const u8 alphaDestFunc = Helpers::getBits<28, 4>(hash.blendControl); + NS::Error* error = nullptr; + desc->setLabel(toNSString("Draw pipeline")); + pipeline = device->newRenderPipelineState(desc, &error); + if (error) { + Helpers::panic("Error creating draw pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } - colorAttachment->setBlendingEnabled(true); - colorAttachment->setRgbBlendOperation(toMTLBlendOperation(rgbEquation)); - colorAttachment->setAlphaBlendOperation(toMTLBlendOperation(alphaEquation)); - colorAttachment->setSourceRGBBlendFactor(toMTLBlendFactor(rgbSourceFunc)); - colorAttachment->setDestinationRGBBlendFactor(toMTLBlendFactor(rgbDestFunc)); - colorAttachment->setSourceAlphaBlendFactor(toMTLBlendFactor(alphaSourceFunc)); - colorAttachment->setDestinationAlphaBlendFactor(toMTLBlendFactor(alphaDestFunc)); - } + desc->release(); + } - MTL::PixelFormat depthFormat = toMTLPixelFormatDepth(hash.depthFmt); - desc->setDepthAttachmentPixelFormat(depthFormat); - if (hash.depthFmt == DepthFmt::Depth24Stencil8) - desc->setStencilAttachmentPixelFormat(depthFormat); + return pipeline; + } - NS::Error* error = nullptr; - desc->setLabel(toNSString("Draw pipeline")); - pipeline = device->newRenderPipelineState(desc, &error); - if (error) { - Helpers::panic("Error creating draw pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); - } + void reset() { + for (auto& pair : pipelineCache) { + pair.second->release(); + } + pipelineCache.clear(); - desc->release(); - } + for (auto& pair : fragmentFunctionCache) { + pair.second->release(); + } + fragmentFunctionCache.clear(); + } - return pipeline; - } + private: + std::map pipelineCache; + std::map fragmentFunctionCache; - void reset() { - for (auto& pair : pipelineCache) { - pair.second->release(); - } - pipelineCache.clear(); - for (auto& pair : fragmentFunctionCache) { - pair.second->release(); - } - fragmentFunctionCache.clear(); - } + MTL::Device* device; + MTL::Library* library; + MTL::Function* vertexFunction; + MTL::VertexDescriptor* vertexDescriptor; + }; -private: - std::map pipelineCache; - std::map fragmentFunctionCache; - - MTL::Device* device; - MTL::Library* library; - MTL::Function* vertexFunction; - MTL::VertexDescriptor* vertexDescriptor; -}; - -} // namespace Metal +} // namespace Metal diff --git a/include/renderer_mtl/mtl_lut_texture.hpp b/include/renderer_mtl/mtl_lut_texture.hpp index 162bfe25..531dc73c 100644 --- a/include/renderer_mtl/mtl_lut_texture.hpp +++ b/include/renderer_mtl/mtl_lut_texture.hpp @@ -8,17 +8,12 @@ class LutTexture { public: LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name); ~LutTexture(); - u32 getNextIndex(); - // Getters MTL::Texture* getTexture() { return texture; } - u32 getCurrentIndex() { return currentIndex; } - private: MTL::Texture* texture; - u32 currentIndex = 0; }; diff --git a/include/renderer_mtl/mtl_render_target.hpp b/include/renderer_mtl/mtl_render_target.hpp index 73be45f4..8f80ea64 100644 --- a/include/renderer_mtl/mtl_render_target.hpp +++ b/include/renderer_mtl/mtl_render_target.hpp @@ -1,92 +1,91 @@ #pragma once +#include #include #include -#include + #include "boost/icl/interval.hpp" #include "helpers.hpp" #include "math_util.hpp" +#include "objc_helper.hpp" #include "opengl.hpp" #include "pica_to_mtl.hpp" -#include "objc_helper.hpp" template using Interval = boost::icl::right_open_interval; namespace Metal { + template + struct RenderTarget { + MTL::Device* device; -template -struct RenderTarget { - MTL::Device* device; + u32 location; + Format_t format; + OpenGL::uvec2 size; + bool valid; - u32 location; - Format_t format; - OpenGL::uvec2 size; - bool valid; + // Range of VRAM taken up by buffer + Interval range; - // Range of VRAM taken up by buffer - Interval range; + MTL::Texture* texture = nullptr; - MTL::Texture* texture = nullptr; + RenderTarget() : valid(false) {} - RenderTarget() : valid(false) {} + RenderTarget(MTL::Device* dev, u32 loc, Format_t format, u32 x, u32 y, bool valid = true) + : device(dev), location(loc), format(format), size({x, y}), valid(valid) { + u64 endLoc = (u64)loc + sizeInBytes(); + // Check if start and end are valid here + range = Interval(loc, (u32)endLoc); + } - RenderTarget(MTL::Device* dev, u32 loc, Format_t format, u32 x, u32 y, bool valid = true) - : device(dev), location(loc), format(format), size({x, y}), valid(valid) { - u64 endLoc = (u64)loc + sizeInBytes(); - // Check if start and end are valid here - range = Interval(loc, (u32)endLoc); - } + Math::Rect getSubRect(u32 inputAddress, u32 width, u32 height) { + const u32 startOffset = (inputAddress - location) / sizePerPixel(format); + const u32 x0 = (startOffset % (size.x() * 8)) / 8; + const u32 y0 = (startOffset / (size.x() * 8)) * 8; + return Math::Rect{x0, size.y() - y0, x0 + width, size.y() - height - y0}; + } - Math::Rect getSubRect(u32 inputAddress, u32 width, u32 height) { - const u32 startOffset = (inputAddress - location) / sizePerPixel(format); - const u32 x0 = (startOffset % (size.x() * 8)) / 8; - const u32 y0 = (startOffset / (size.x() * 8)) * 8; - return Math::Rect{x0, size.y() - y0, x0 + width, size.y() - height - y0}; - } + // For 2 textures to "match" we only care about their locations, formats, and dimensions to match + // For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture + bool matches(RenderTarget& other) { + return location == other.location && format == other.format && size.x() == other.size.x() && size.y() == other.size.y(); + } - // For 2 textures to "match" we only care about their locations, formats, and dimensions to match - // For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture - bool matches(RenderTarget& other) { - return location == other.location && format == other.format && - size.x() == other.size.x() && size.y() == other.size.y(); - } + void allocate() { + MTL::PixelFormat pixelFormat = MTL::PixelFormatInvalid; + if (std::is_same::value) { + pixelFormat = PICA::toMTLPixelFormatColor((PICA::ColorFmt)format); + } else if (std::is_same::value) { + pixelFormat = PICA::toMTLPixelFormatDepth((PICA::DepthFmt)format); + } else { + panic("Invalid format type"); + } - void allocate() { - MTL::PixelFormat pixelFormat = MTL::PixelFormatInvalid; - if (std::is_same::value) { - pixelFormat = PICA::toMTLPixelFormatColor((PICA::ColorFmt)format); - } else if (std::is_same::value) { - pixelFormat = PICA::toMTLPixelFormatDepth((PICA::DepthFmt)format); - } else { - panic("Invalid format type"); - } + MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init(); + descriptor->setTextureType(MTL::TextureType2D); + descriptor->setPixelFormat(pixelFormat); + descriptor->setWidth(size.u()); + descriptor->setHeight(size.v()); + descriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead); + descriptor->setStorageMode(MTL::StorageModePrivate); + texture = device->newTexture(descriptor); + texture->setLabel(toNSString( + std::string(std::is_same::value ? "Color" : "Depth") + " render target " + std::to_string(size.u()) + "x" + + std::to_string(size.v()) + )); + descriptor->release(); + } - MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init(); - descriptor->setTextureType(MTL::TextureType2D); - descriptor->setPixelFormat(pixelFormat); - descriptor->setWidth(size.u()); - descriptor->setHeight(size.v()); - descriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead); - descriptor->setStorageMode(MTL::StorageModePrivate); - texture = device->newTexture(descriptor); - texture->setLabel(toNSString(std::string(std::is_same::value ? "Color" : "Depth") + " render target " + std::to_string(size.u()) + "x" + std::to_string(size.v()))); - descriptor->release(); - } + void free() { + valid = false; - void free() { - valid = false; + if (texture) { + texture->release(); + } + } - if (texture) { - texture->release(); - } - } + u64 sizeInBytes() { return (size_t)size.x() * (size_t)size.y() * PICA::sizePerPixel(format); } + }; - u64 sizeInBytes() { - return (size_t)size.x() * (size_t)size.y() * PICA::sizePerPixel(format); - } -}; - -typedef RenderTarget ColorRenderTarget; -typedef RenderTarget DepthStencilRenderTarget; - -} // namespace Metal + using ColorRenderTarget = RenderTarget; + using DepthStencilRenderTarget = RenderTarget; +} // namespace Metal diff --git a/include/renderer_mtl/mtl_texture.hpp b/include/renderer_mtl/mtl_texture.hpp index 9cec268d..51cb4c4b 100644 --- a/include/renderer_mtl/mtl_texture.hpp +++ b/include/renderer_mtl/mtl_texture.hpp @@ -1,8 +1,9 @@ #pragma once +#include #include #include -#include + #include "PICA/regs.hpp" #include "boost/icl/interval.hpp" #include "helpers.hpp" @@ -10,69 +11,64 @@ #include "opengl.hpp" #include "renderer_mtl/pica_to_mtl.hpp" + template using Interval = boost::icl::right_open_interval; namespace Metal { + struct Texture { + MTL::Device* device; -struct Texture { - MTL::Device* device; + u32 location; + u32 config; // Magnification/minification filter, wrapping configs, etc + PICA::TextureFmt format; + OpenGL::uvec2 size; + bool valid; - u32 location; - u32 config; // Magnification/minification filter, wrapping configs, etc - PICA::TextureFmt format; - OpenGL::uvec2 size; - bool valid; + // Range of VRAM taken up by buffer + Interval range; - // Range of VRAM taken up by buffer - Interval range; + PICA::PixelFormatInfo formatInfo; + MTL::Texture* texture = nullptr; + MTL::SamplerState* sampler = nullptr; - PICA::PixelFormatInfo formatInfo; - MTL::Texture* texture = nullptr; - MTL::SamplerState* sampler = nullptr; + Texture() : valid(false) {} - Texture() : valid(false) {} + Texture(MTL::Device* dev, u32 loc, PICA::TextureFmt format, u32 x, u32 y, u32 config, bool valid = true) + : device(dev), location(loc), format(format), size({x, y}), config(config), valid(valid) { + u64 endLoc = (u64)loc + sizeInBytes(); + // Check if start and end are valid here + range = Interval(loc, (u32)endLoc); + } - Texture(MTL::Device* dev, u32 loc, PICA::TextureFmt format, u32 x, u32 y, u32 config, bool valid = true) - : device(dev), location(loc), format(format), size({x, y}), config(config), valid(valid) { + // For 2 textures to "match" we only care about their locations, formats, and dimensions to match + // For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture + bool matches(Texture& other) { + return location == other.location && format == other.format && size.x() == other.size.x() && size.y() == other.size.y(); + } - u64 endLoc = (u64)loc + sizeInBytes(); - // Check if start and end are valid here - range = Interval(loc, (u32)endLoc); - } + void allocate(); + void setNewConfig(u32 newConfig); + void decodeTexture(std::span data); + void free(); + u64 sizeInBytes(); - // For 2 textures to "match" we only care about their locations, formats, and dimensions to match - // For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture - bool matches(Texture& other) { - return location == other.location && format == other.format && - size.x() == other.size.x() && size.y() == other.size.y(); - } + u8 decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); + u16 decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); + u32 decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); - void allocate(); - void setNewConfig(u32 newConfig); - void decodeTexture(std::span data); - void free(); - u64 sizeInBytes(); + // Get the morton interleave offset of a texel based on its U and V values + static u32 mortonInterleave(u32 u, u32 v); + // Get the byte offset of texel (u, v) in the texture + static u32 getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel); + static u32 getSwizzledOffset_4bpp(u32 u, u32 v, u32 width); - u8 decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); - u16 decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); - u32 decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); + // Returns the format of this texture as a string + std::string_view formatToString() { return PICA::textureFormatToString(format); } - // Get the morton interleave offset of a texel based on its U and V values - static u32 mortonInterleave(u32 u, u32 v); - // Get the byte offset of texel (u, v) in the texture - static u32 getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel); - static u32 getSwizzledOffset_4bpp(u32 u, u32 v, u32 width); - - // Returns the format of this texture as a string - std::string_view formatToString() { - return PICA::textureFormatToString(format); - } - - // Returns the texel at coordinates (u, v) of an ETC1(A4) texture - // TODO: Make hasAlpha a template parameter - u32 getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span data); - u32 decodeETC(u32 alpha, u32 u, u32 v, u64 colourData); -}; - -} // namespace Metal + // Returns the texel at coordinates (u, v) of an ETC1(A4) texture + // TODO: Make hasAlpha a template parameter + u32 getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span data); + u32 decodeETC(u32 alpha, u32 u, u32 v, u64 colourData); + }; +} // namespace Metal diff --git a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp index cc552477..d53af283 100644 --- a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp +++ b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp @@ -5,76 +5,74 @@ using namespace PICA; namespace Metal { + struct BufferHandle { + MTL::Buffer* buffer; + size_t offset; + }; -struct BufferHandle { - MTL::Buffer* buffer; - size_t offset; -}; + class VertexBufferCache { + // 128MB buffer for caching vertex data + static constexpr usize CACHE_BUFFER_SIZE = 128 * 1024 * 1024; -// 128MB buffer for caching vertex data -#define CACHE_BUFFER_SIZE 128 * 1024 * 1024 + public: + VertexBufferCache() = default; -class VertexBufferCache { -public: - VertexBufferCache() = default; + ~VertexBufferCache() { + endFrame(); + buffer->release(); + } - ~VertexBufferCache() { - endFrame(); - buffer->release(); - } + void set(MTL::Device* dev) { + device = dev; + create(); + } - void set(MTL::Device* dev) { - device = dev; - create(); - } + void endFrame() { + ptr = 0; + for (auto buffer : additionalAllocations) { + buffer->release(); + } + additionalAllocations.clear(); + } - void endFrame() { - ptr = 0; - for (auto buffer : additionalAllocations) { - buffer->release(); - } - additionalAllocations.clear(); - } + BufferHandle get(const void* data, size_t size) { + // If the vertex buffer is too large, just create a new one + if (ptr + size > CACHE_BUFFER_SIZE) { + MTL::Buffer* newBuffer = device->newBuffer(data, size, MTL::ResourceStorageModeShared); + newBuffer->setLabel(toNSString("Additional vertex buffer")); + additionalAllocations.push_back(newBuffer); + Helpers::warn("Vertex buffer doesn't have enough space, creating a new buffer"); - BufferHandle get(const void* data, size_t size) { - // If the vertex buffer is too large, just create a new one - if (ptr + size > CACHE_BUFFER_SIZE) { - MTL::Buffer* newBuffer = device->newBuffer(data, size, MTL::ResourceStorageModeShared); - newBuffer->setLabel(toNSString("Additional vertex buffer")); - additionalAllocations.push_back(newBuffer); - Helpers::warn("Vertex buffer doesn't have enough space, creating a new buffer"); + return BufferHandle{newBuffer, 0}; + } - return BufferHandle{newBuffer, 0}; - } + // Copy the data into the buffer + memcpy((char*)buffer->contents() + ptr, data, size); - // Copy the data into the buffer - memcpy((char*)buffer->contents() + ptr, data, size); + size_t oldPtr = ptr; + ptr += size; - size_t oldPtr = ptr; - ptr += size; + return BufferHandle{buffer, oldPtr}; + } - return BufferHandle{buffer, oldPtr}; - } + void reset() { + endFrame(); + if (buffer) { + buffer->release(); + create(); + } + } - void reset() { - endFrame(); - if (buffer) { - buffer->release(); - create(); - } - } + private: + MTL::Buffer* buffer = nullptr; + size_t ptr = 0; + std::vector additionalAllocations; -private: - MTL::Buffer* buffer = nullptr; - size_t ptr = 0; - std::vector additionalAllocations; + MTL::Device* device; - MTL::Device* device; - - void create() { - buffer = device->newBuffer(CACHE_BUFFER_SIZE, MTL::ResourceStorageModeShared); - buffer->setLabel(toNSString("Shared vertex buffer")); - } -}; - -} // namespace Metal + void create() { + buffer = device->newBuffer(CACHE_BUFFER_SIZE, MTL::ResourceStorageModeShared); + buffer->setLabel(toNSString("Shared vertex buffer")); + } + }; +} // namespace Metal diff --git a/include/renderer_mtl/objc_helper.hpp b/include/renderer_mtl/objc_helper.hpp index 7d0e8646..86992f1d 100644 --- a/include/renderer_mtl/objc_helper.hpp +++ b/include/renderer_mtl/objc_helper.hpp @@ -5,12 +5,8 @@ #include "mtl_common.hpp" namespace Metal { - -dispatch_data_t createDispatchData(const void* data, size_t size); - -} // namespace Metal + dispatch_data_t createDispatchData(const void* data, size_t size); +} // namespace Metal // Cast from std::string to NS::String* -inline NS::String* toNSString(const std::string& str) { - return NS::String::string(str.c_str(), NS::ASCIIStringEncoding); -} +inline NS::String* toNSString(const std::string& str) { return NS::String::string(str.c_str(), NS::ASCIIStringEncoding); } \ No newline at end of file diff --git a/include/renderer_mtl/pica_to_mtl.hpp b/include/renderer_mtl/pica_to_mtl.hpp index de76dc3b..9234c748 100644 --- a/include/renderer_mtl/pica_to_mtl.hpp +++ b/include/renderer_mtl/pica_to_mtl.hpp @@ -1,155 +1,154 @@ #pragma once #include + #include "PICA/regs.hpp" + namespace PICA { + struct PixelFormatInfo { + MTL::PixelFormat pixelFormat; + size_t bytesPerTexel; + }; -struct PixelFormatInfo { - MTL::PixelFormat pixelFormat; - size_t bytesPerTexel; -}; + constexpr PixelFormatInfo pixelFormatInfos[14] = { + {MTL::PixelFormatRGBA8Unorm, 4}, // RGBA8 + {MTL::PixelFormatRGBA8Unorm, 4}, // RGB8 + {MTL::PixelFormatBGR5A1Unorm, 2}, // RGBA5551 + {MTL::PixelFormatB5G6R5Unorm, 2}, // RGB565 + {MTL::PixelFormatABGR4Unorm, 2}, // RGBA4 + {MTL::PixelFormatRGBA8Unorm, 4}, // IA8 + {MTL::PixelFormatRG8Unorm, 2}, // RG8 + {MTL::PixelFormatRGBA8Unorm, 4}, // I8 + {MTL::PixelFormatA8Unorm, 1}, // A8 + {MTL::PixelFormatABGR4Unorm, 2}, // IA4 + {MTL::PixelFormatABGR4Unorm, 2}, // I4 + {MTL::PixelFormatA8Unorm, 1}, // A4 + {MTL::PixelFormatRGBA8Unorm, 4}, // ETC1 + {MTL::PixelFormatRGBA8Unorm, 4}, // ETC1A4 + }; -constexpr PixelFormatInfo pixelFormatInfos[14] = { - {MTL::PixelFormatRGBA8Unorm, 4}, // RGBA8 - {MTL::PixelFormatRGBA8Unorm, 4}, // RGB8 - {MTL::PixelFormatBGR5A1Unorm, 2}, // RGBA5551 - {MTL::PixelFormatB5G6R5Unorm, 2}, // RGB565 - {MTL::PixelFormatABGR4Unorm, 2}, // RGBA4 - {MTL::PixelFormatRGBA8Unorm, 4}, // IA8 - {MTL::PixelFormatRG8Unorm, 2}, // RG8 - {MTL::PixelFormatRGBA8Unorm, 4}, // I8 - {MTL::PixelFormatA8Unorm, 1}, // A8 - {MTL::PixelFormatABGR4Unorm, 2}, // IA4 - {MTL::PixelFormatABGR4Unorm, 2}, // I4 - {MTL::PixelFormatA8Unorm, 1}, // A4 - {MTL::PixelFormatRGBA8Unorm, 4}, // ETC1 - {MTL::PixelFormatRGBA8Unorm, 4}, // ETC1A4 -}; + inline PixelFormatInfo getPixelFormatInfo(TextureFmt format) { return pixelFormatInfos[static_cast(format)]; } -inline PixelFormatInfo getPixelFormatInfo(TextureFmt format) { - return pixelFormatInfos[static_cast(format)]; -} + inline MTL::PixelFormat toMTLPixelFormatColor(ColorFmt format) { + switch (format) { + case ColorFmt::RGBA8: return MTL::PixelFormatRGBA8Unorm; + case ColorFmt::RGB8: return MTL::PixelFormatRGBA8Unorm; + case ColorFmt::RGBA5551: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatBGR5A1Unorm? + case ColorFmt::RGB565: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatB5G6R5Unorm? + case ColorFmt::RGBA4: return MTL::PixelFormatABGR4Unorm; + } + } -inline MTL::PixelFormat toMTLPixelFormatColor(ColorFmt format) { - switch (format) { - case ColorFmt::RGBA8: return MTL::PixelFormatRGBA8Unorm; - case ColorFmt::RGB8: return MTL::PixelFormatRGBA8Unorm; - case ColorFmt::RGBA5551: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatBGR5A1Unorm? - case ColorFmt::RGB565: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatB5G6R5Unorm? - case ColorFmt::RGBA4: return MTL::PixelFormatABGR4Unorm; - } -} + inline MTL::PixelFormat toMTLPixelFormatDepth(DepthFmt format) { + switch (format) { + case DepthFmt::Depth16: return MTL::PixelFormatDepth16Unorm; + case DepthFmt::Unknown1: return MTL::PixelFormatInvalid; + case DepthFmt::Depth24: + return MTL::PixelFormatDepth32Float; // Metal does not support 24-bit depth formats + // Apple sillicon doesn't support 24-bit depth buffers, so we use 32-bit instead + case DepthFmt::Depth24Stencil8: return MTL::PixelFormatDepth32Float_Stencil8; + } + } -inline MTL::PixelFormat toMTLPixelFormatDepth(DepthFmt format) { - switch (format) { - case DepthFmt::Depth16: return MTL::PixelFormatDepth16Unorm; - case DepthFmt::Unknown1: return MTL::PixelFormatInvalid; - case DepthFmt::Depth24: return MTL::PixelFormatDepth32Float; // Metal does not support 24-bit depth formats - // Apple sillicon doesn't support 24-bit depth buffers, so we use 32-bit instead - case DepthFmt::Depth24Stencil8: return MTL::PixelFormatDepth32Float_Stencil8; - } -} + inline MTL::CompareFunction toMTLCompareFunc(u8 func) { + switch (func) { + case 0: return MTL::CompareFunctionNever; + case 1: return MTL::CompareFunctionAlways; + case 2: return MTL::CompareFunctionEqual; + case 3: return MTL::CompareFunctionNotEqual; + case 4: return MTL::CompareFunctionLess; + case 5: return MTL::CompareFunctionLessEqual; + case 6: return MTL::CompareFunctionGreater; + case 7: return MTL::CompareFunctionGreaterEqual; + default: panic("Unknown compare function %u", func); + } -inline MTL::CompareFunction toMTLCompareFunc(u8 func) { - switch (func) { - case 0: return MTL::CompareFunctionNever; - case 1: return MTL::CompareFunctionAlways; - case 2: return MTL::CompareFunctionEqual; - case 3: return MTL::CompareFunctionNotEqual; - case 4: return MTL::CompareFunctionLess; - case 5: return MTL::CompareFunctionLessEqual; - case 6: return MTL::CompareFunctionGreater; - case 7: return MTL::CompareFunctionGreaterEqual; - default: panic("Unknown compare function %u", func); - } + return MTL::CompareFunctionAlways; + } - return MTL::CompareFunctionAlways; -} + inline MTL::BlendOperation toMTLBlendOperation(u8 op) { + switch (op) { + case 0: return MTL::BlendOperationAdd; + case 1: return MTL::BlendOperationSubtract; + case 2: return MTL::BlendOperationReverseSubtract; + case 3: return MTL::BlendOperationMin; + case 4: return MTL::BlendOperationMax; + case 5: return MTL::BlendOperationAdd; // Unused (same as 0) + case 6: return MTL::BlendOperationAdd; // Unused (same as 0) + case 7: return MTL::BlendOperationAdd; // Unused (same as 0) + default: panic("Unknown blend operation %u", op); + } -inline MTL::BlendOperation toMTLBlendOperation(u8 op) { - switch (op) { - case 0: return MTL::BlendOperationAdd; - case 1: return MTL::BlendOperationSubtract; - case 2: return MTL::BlendOperationReverseSubtract; - case 3: return MTL::BlendOperationMin; - case 4: return MTL::BlendOperationMax; - case 5: return MTL::BlendOperationAdd; // Unused (same as 0) - case 6: return MTL::BlendOperationAdd; // Unused (same as 0) - case 7: return MTL::BlendOperationAdd; // Unused (same as 0) - default: panic("Unknown blend operation %u", op); - } + return MTL::BlendOperationAdd; + } - return MTL::BlendOperationAdd; -} + inline MTL::BlendFactor toMTLBlendFactor(u8 factor) { + switch (factor) { + case 0: return MTL::BlendFactorZero; + case 1: return MTL::BlendFactorOne; + case 2: return MTL::BlendFactorSourceColor; + case 3: return MTL::BlendFactorOneMinusSourceColor; + case 4: return MTL::BlendFactorDestinationColor; + case 5: return MTL::BlendFactorOneMinusDestinationColor; + case 6: return MTL::BlendFactorSourceAlpha; + case 7: return MTL::BlendFactorOneMinusSourceAlpha; + case 8: return MTL::BlendFactorDestinationAlpha; + case 9: return MTL::BlendFactorOneMinusDestinationAlpha; + case 10: return MTL::BlendFactorBlendColor; + case 11: return MTL::BlendFactorOneMinusBlendColor; + case 12: return MTL::BlendFactorBlendAlpha; + case 13: return MTL::BlendFactorOneMinusBlendAlpha; + case 14: return MTL::BlendFactorSourceAlphaSaturated; + case 15: return MTL::BlendFactorOne; // Undocumented + default: panic("Unknown blend factor %u", factor); + } -inline MTL::BlendFactor toMTLBlendFactor(u8 factor) { - switch (factor) { - case 0: return MTL::BlendFactorZero; - case 1: return MTL::BlendFactorOne; - case 2: return MTL::BlendFactorSourceColor; - case 3: return MTL::BlendFactorOneMinusSourceColor; - case 4: return MTL::BlendFactorDestinationColor; - case 5: return MTL::BlendFactorOneMinusDestinationColor; - case 6: return MTL::BlendFactorSourceAlpha; - case 7: return MTL::BlendFactorOneMinusSourceAlpha; - case 8: return MTL::BlendFactorDestinationAlpha; - case 9: return MTL::BlendFactorOneMinusDestinationAlpha; - case 10: return MTL::BlendFactorBlendColor; - case 11: return MTL::BlendFactorOneMinusBlendColor; - case 12: return MTL::BlendFactorBlendAlpha; - case 13: return MTL::BlendFactorOneMinusBlendAlpha; - case 14: return MTL::BlendFactorSourceAlphaSaturated; - case 15: return MTL::BlendFactorOne; // Undocumented - default: panic("Unknown blend factor %u", factor); - } + return MTL::BlendFactorOne; + } - return MTL::BlendFactorOne; -} + inline MTL::StencilOperation toMTLStencilOperation(u8 op) { + switch (op) { + case 0: return MTL::StencilOperationKeep; + case 1: return MTL::StencilOperationZero; + case 2: return MTL::StencilOperationReplace; + case 3: return MTL::StencilOperationIncrementClamp; + case 4: return MTL::StencilOperationDecrementClamp; + case 5: return MTL::StencilOperationInvert; + case 6: return MTL::StencilOperationIncrementWrap; + case 7: return MTL::StencilOperationDecrementWrap; + default: panic("Unknown stencil operation %u", op); + } -inline MTL::StencilOperation toMTLStencilOperation(u8 op) { - switch (op) { - case 0: return MTL::StencilOperationKeep; - case 1: return MTL::StencilOperationZero; - case 2: return MTL::StencilOperationReplace; - case 3: return MTL::StencilOperationIncrementClamp; - case 4: return MTL::StencilOperationDecrementClamp; - case 5: return MTL::StencilOperationInvert; - case 6: return MTL::StencilOperationIncrementWrap; - case 7: return MTL::StencilOperationDecrementWrap; - default: panic("Unknown stencil operation %u", op); - } + return MTL::StencilOperationKeep; + } - return MTL::StencilOperationKeep; -} + inline MTL::PrimitiveType toMTLPrimitiveType(PrimType primType) { + switch (primType) { + case PrimType::TriangleList: return MTL::PrimitiveTypeTriangle; + case PrimType::TriangleStrip: return MTL::PrimitiveTypeTriangleStrip; + case PrimType::TriangleFan: + Helpers::warn("Triangle fans are not supported on Metal, using triangles instead"); + return MTL::PrimitiveTypeTriangle; + case PrimType::GeometryPrimitive: + // Helpers::warn("Geometry primitives are not yet, using triangles instead"); + return MTL::PrimitiveTypeTriangle; + } + } -inline MTL::PrimitiveType toMTLPrimitiveType(PrimType primType) { - switch (primType) { - case PrimType::TriangleList: return MTL::PrimitiveTypeTriangle; - case PrimType::TriangleStrip: return MTL::PrimitiveTypeTriangleStrip; - case PrimType::TriangleFan: - Helpers::warn("Triangle fans are not supported on Metal, using triangles instead"); - return MTL::PrimitiveTypeTriangle; - case PrimType::GeometryPrimitive: - //Helpers::warn("Geometry primitives are not yet, using triangles instead"); - return MTL::PrimitiveTypeTriangle; - } -} + inline MTL::SamplerAddressMode toMTLSamplerAddressMode(u8 addrMode) { + switch (addrMode) { + case 0: return MTL::SamplerAddressModeClampToEdge; + case 1: return MTL::SamplerAddressModeClampToBorderColor; + case 2: return MTL::SamplerAddressModeRepeat; + case 3: return MTL::SamplerAddressModeMirrorRepeat; + case 4: return MTL::SamplerAddressModeClampToEdge; + case 5: return MTL::SamplerAddressModeClampToBorderColor; + case 6: return MTL::SamplerAddressModeRepeat; + case 7: return MTL::SamplerAddressModeRepeat; + default: panic("Unknown sampler address mode %u", addrMode); + } -inline MTL::SamplerAddressMode toMTLSamplerAddressMode(u8 addrMode) { - switch (addrMode) { - case 0: return MTL::SamplerAddressModeClampToEdge; - case 1: return MTL::SamplerAddressModeClampToBorderColor; - case 2: return MTL::SamplerAddressModeRepeat; - case 3: return MTL::SamplerAddressModeMirrorRepeat; - case 4: return MTL::SamplerAddressModeClampToEdge; - case 5: return MTL::SamplerAddressModeClampToBorderColor; - case 6: return MTL::SamplerAddressModeRepeat; - case 7: return MTL::SamplerAddressModeRepeat; - default: panic("Unknown sampler address mode %u", addrMode); - } - - return MTL::SamplerAddressModeClampToEdge; -} - -} // namespace PICA + return MTL::SamplerAddressModeClampToEdge; + } +} // namespace PICA diff --git a/include/renderer_mtl/renderer_mtl.hpp b/include/renderer_mtl/renderer_mtl.hpp index 6b356896..bd5c3bf1 100644 --- a/include/renderer_mtl/renderer_mtl.hpp +++ b/include/renderer_mtl/renderer_mtl.hpp @@ -3,15 +3,16 @@ #include #include -#include "renderer.hpp" -#include "mtl_texture.hpp" -#include "mtl_render_target.hpp" #include "mtl_blit_pipeline_cache.hpp" -#include "mtl_draw_pipeline_cache.hpp" -#include "mtl_depth_stencil_cache.hpp" -#include "mtl_vertex_buffer_cache.hpp" -#include "mtl_lut_texture.hpp" #include "mtl_command_encoder.hpp" +#include "mtl_depth_stencil_cache.hpp" +#include "mtl_draw_pipeline_cache.hpp" +#include "mtl_lut_texture.hpp" +#include "mtl_render_target.hpp" +#include "mtl_texture.hpp" +#include "mtl_vertex_buffer_cache.hpp" +#include "renderer.hpp" + // HACK: use the OpenGL cache #include "../renderer_gl/surface_cache.hpp" @@ -19,7 +20,7 @@ class GPU; struct Color4 { - float r, g, b, a; + float r, g, b, a; }; class RendererMTL final : public Renderer { @@ -72,7 +73,7 @@ class RendererMTL final : public Renderer { // Pipelines MTL::RenderPipelineState* displayPipeline; - //MTL::RenderPipelineState* copyToLutTexturePipeline; + // MTL::RenderPipelineState* copyToLutTexturePipeline; // Clears std::map colorClearOps; @@ -95,93 +96,112 @@ class RendererMTL final : public Renderer { } void endRenderPass() { - if (renderCommandEncoder) { - renderCommandEncoder->endEncoding(); - renderCommandEncoder = nullptr; - } + if (renderCommandEncoder) { + renderCommandEncoder->endEncoding(); + renderCommandEncoder = nullptr; + } } - void beginRenderPassIfNeeded(MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture = nullptr); + void beginRenderPassIfNeeded( + MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture = nullptr + ); void commitCommandBuffer() { - if (renderCommandEncoder) { - renderCommandEncoder->endEncoding(); - renderCommandEncoder->release(); - renderCommandEncoder = nullptr; - } - if (commandBuffer) { - commandBuffer->commit(); - // HACK - commandBuffer->waitUntilCompleted(); - commandBuffer->release(); - commandBuffer = nullptr; - } - } + if (renderCommandEncoder) { + renderCommandEncoder->endEncoding(); + renderCommandEncoder->release(); + renderCommandEncoder = nullptr; + } + if (commandBuffer) { + commandBuffer->commit(); + // HACK + commandBuffer->waitUntilCompleted(); + commandBuffer->release(); + commandBuffer = nullptr; + } + } - template - inline void clearAttachment(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, ClearDataT clearData, GetAttachmentT getAttachment, SetClearDataT setClearData) { - bool beginRenderPass = (renderPassDescriptor == nullptr); - if (!renderPassDescriptor) { - renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); - } + template + inline void clearAttachment( + MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, ClearDataT clearData, GetAttachmentT getAttachment, + SetClearDataT setClearData + ) { + bool beginRenderPass = (renderPassDescriptor == nullptr); + if (!renderPassDescriptor) { + renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); + } - AttachmentT* attachment = getAttachment(renderPassDescriptor); + AttachmentT* attachment = getAttachment(renderPassDescriptor); attachment->setTexture(texture); setClearData(attachment, clearData); attachment->setLoadAction(MTL::LoadActionClear); attachment->setStoreAction(MTL::StoreActionStore); if (beginRenderPass) { - if (std::is_same::value) - beginRenderPassIfNeeded(renderPassDescriptor, true, texture); - else - beginRenderPassIfNeeded(renderPassDescriptor, true, nullptr, texture); + if (std::is_same::value) + beginRenderPassIfNeeded(renderPassDescriptor, true, texture); + else + beginRenderPassIfNeeded(renderPassDescriptor, true, nullptr, texture); } - } + } - template - inline bool clearAttachment(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, std::map& clearOps, GetAttachmentT getAttachment, SetClearDataT setClearData) { - auto it = clearOps.find(texture); - if (it != clearOps.end()) { - clearAttachment(renderPassDescriptor, texture, it->second, getAttachment, setClearData); - clearOps.erase(it); - return true; - } + template + inline bool clearAttachment( + MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, std::map& clearOps, + GetAttachmentT getAttachment, SetClearDataT setClearData + ) { + auto it = clearOps.find(texture); + if (it != clearOps.end()) { + clearAttachment(renderPassDescriptor, texture, it->second, getAttachment, setClearData); + clearOps.erase(it); + return true; + } - if (renderPassDescriptor) { - AttachmentT* attachment = getAttachment(renderPassDescriptor); - attachment->setTexture(texture); - attachment->setLoadAction(MTL::LoadActionLoad); - attachment->setStoreAction(MTL::StoreActionStore); - } + if (renderPassDescriptor) { + AttachmentT* attachment = getAttachment(renderPassDescriptor); + attachment->setTexture(texture); + attachment->setLoadAction(MTL::LoadActionLoad); + attachment->setStoreAction(MTL::StoreActionStore); + } - return false; - } + return false; + } - bool clearColor(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { - return clearAttachment(renderPassDescriptor, texture, colorClearOps, [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->colorAttachments()->object(0); }, [](auto attachment, auto& color) { - attachment->setClearColor(MTL::ClearColor(color.r, color.g, color.b, color.a)); - }); - } + bool clearColor(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { + return clearAttachment( + renderPassDescriptor, texture, colorClearOps, + [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->colorAttachments()->object(0); }, + [](auto attachment, auto& color) { attachment->setClearColor(MTL::ClearColor(color.r, color.g, color.b, color.a)); } + ); + } - bool clearDepth(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { - return clearAttachment(renderPassDescriptor, texture, depthClearOps, [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->depthAttachment(); }, [](auto attachment, auto& depth) { - attachment->setClearDepth(depth); - }); - } + bool clearDepth(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { + return clearAttachment( + renderPassDescriptor, texture, depthClearOps, + [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->depthAttachment(); }, + [](auto attachment, auto& depth) { attachment->setClearDepth(depth); } + ); + } - bool clearStencil(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { - return clearAttachment(renderPassDescriptor, texture, stencilClearOps, [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->stencilAttachment(); }, [](auto attachment, auto& stencil) { - attachment->setClearStencil(stencil); - }); - } + bool clearStencil(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { + return clearAttachment( + renderPassDescriptor, texture, stencilClearOps, + [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->stencilAttachment(); }, + [](auto attachment, auto& stencil) { attachment->setClearStencil(stencil); } + ); + } - std::optional getColorRenderTarget(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true); + std::optional getColorRenderTarget( + u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true + ); Metal::DepthStencilRenderTarget& getDepthRenderTarget(); Metal::Texture& getTexture(Metal::Texture& tex); void setupTextureEnvState(MTL::RenderCommandEncoder* encoder); void bindTexturesToSlots(); void updateLightingLUT(MTL::RenderCommandEncoder* encoder); void updateFogLUT(MTL::RenderCommandEncoder* encoder); - void textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect& srcRect, const Math::Rect& destRect); + void textureCopyImpl( + Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect& srcRect, + const Math::Rect& destRect + ); }; diff --git a/src/core/renderer_gl/etc1.cpp b/src/core/renderer_gl/etc1.cpp index 8aefd622..0b4ed1a5 100644 --- a/src/core/renderer_gl/etc1.cpp +++ b/src/core/renderer_gl/etc1.cpp @@ -12,8 +12,9 @@ static constexpr u32 signExtend3To32(u32 val) { u32 Texture::getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span data) { // Pixel offset of the 8x8 tile based on u, v and the width of the texture u32 offs = ((u & ~7) * 8) + ((v & ~7) * width); - if (!hasAlpha) + if (!hasAlpha) { offs >>= 1; + } // In-tile offsets for u/v u &= 7; diff --git a/src/core/renderer_mtl/mtl_etc1.cpp b/src/core/renderer_mtl/mtl_etc1.cpp index a414df3c..420a60ca 100644 --- a/src/core/renderer_mtl/mtl_etc1.cpp +++ b/src/core/renderer_mtl/mtl_etc1.cpp @@ -1,124 +1,116 @@ #include + #include "colour.hpp" -#include "renderer_mtl/renderer_mtl.hpp" #include "renderer_mtl/mtl_texture.hpp" +#include "renderer_mtl/renderer_mtl.hpp" + using namespace Helpers; namespace Metal { - -static constexpr u32 signExtend3To32(u32 val) { - return (u32)(s32(val) << 29 >> 29); -} - -u32 Texture::getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span data) { - // Pixel offset of the 8x8 tile based on u, v and the width of the texture - u32 offs = ((u & ~7) * 8) + ((v & ~7) * width); - if (!hasAlpha) - offs >>= 1; - - // In-tile offsets for u/v - u &= 7; - v &= 7; - - // ETC1(A4) also subdivide the 8x8 tile to 4 4x4 tiles - // Each tile is 8 bytes for ETC1, but since ETC1A4 has 4 alpha bits per pixel, that becomes 16 bytes - const u32 subTileSize = hasAlpha ? 16 : 8; - const u32 subTileIndex = (u / 4) + 2 * (v / 4); // Which of the 4 subtiles is this texel in? - - // In-subtile offsets for u/v - u &= 3; - v &= 3; - offs += subTileSize * subTileIndex; - - u32 alpha; - const u64* ptr = reinterpret_cast(data.data() + offs); // Cast to u64* - - if (hasAlpha) { - // First 64 bits of the 4x4 subtile are alpha data - const u64 alphaData = *ptr++; - alpha = Colour::convert4To8Bit((alphaData >> (4 * (u * 4 + v))) & 0xf); - } - else { - alpha = 0xff; // ETC1 without alpha uses ff for every pixel + static constexpr u32 signExtend3To32(u32 val) { + return (u32)(s32(val) << 29 >> 29); } - // Next 64 bits of the subtile are colour data - u64 colourData = *ptr; - return decodeETC(alpha, u, v, colourData); -} + u32 Texture::getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span data) { + // Pixel offset of the 8x8 tile based on u, v and the width of the texture + u32 offs = ((u & ~7) * 8) + ((v & ~7) * width); + if (!hasAlpha) { + offs >>= 1; + } -u32 Texture::decodeETC(u32 alpha, u32 u, u32 v, u64 colourData) { - static constexpr u32 modifiers[8][2] = { - { 2, 8 }, - { 5, 17 }, - { 9, 29 }, - { 13, 42 }, - { 18, 60 }, - { 24, 80 }, - { 33, 106 }, - { 47, 183 }, - }; + // In-tile offsets for u/v + u &= 7; + v &= 7; - // Parse colour data for 4x4 block - const u32 subindices = getBits<0, 16, u32>(colourData); - const u32 negationFlags = getBits<16, 16, u32>(colourData); - const bool flip = getBit<32>(colourData); - const bool diffMode = getBit<33>(colourData); + // ETC1(A4) also subdivide the 8x8 tile to 4 4x4 tiles + // Each tile is 8 bytes for ETC1, but since ETC1A4 has 4 alpha bits per pixel, that becomes 16 bytes + const u32 subTileSize = hasAlpha ? 16 : 8; + const u32 subTileIndex = (u / 4) + 2 * (v / 4); // Which of the 4 subtiles is this texel in? - // Note: index1 is indeed stored on the higher bits, with index2 in the lower bits - const u32 tableIndex1 = getBits<37, 3, u32>(colourData); - const u32 tableIndex2 = getBits<34, 3, u32>(colourData); - const u32 texelIndex = u * 4 + v; // Index of the texel in the block + // In-subtile offsets for u/v + u &= 3; + v &= 3; + offs += subTileSize * subTileIndex; - if (flip) - std::swap(u, v); + u32 alpha; + const u64* ptr = reinterpret_cast(data.data() + offs); // Cast to u64* - s32 r, g, b; - if (diffMode) { - r = getBits<59, 5, s32>(colourData); - g = getBits<51, 5, s32>(colourData); - b = getBits<43, 5, s32>(colourData); + if (hasAlpha) { + // First 64 bits of the 4x4 subtile are alpha data + const u64 alphaData = *ptr++; + alpha = Colour::convert4To8Bit((alphaData >> (4 * (u * 4 + v))) & 0xf); + } else { + alpha = 0xff; // ETC1 without alpha uses ff for every pixel + } - if (u >= 2) { - r += signExtend3To32(getBits<56, 3, u32>(colourData)); - g += signExtend3To32(getBits<48, 3, u32>(colourData)); - b += signExtend3To32(getBits<40, 3, u32>(colourData)); - } + // Next 64 bits of the subtile are colour data + u64 colourData = *ptr; + return decodeETC(alpha, u, v, colourData); + } - // Expand from 5 to 8 bits per channel - r = Colour::convert5To8Bit(r); - g = Colour::convert5To8Bit(g); - b = Colour::convert5To8Bit(b); - } else { - if (u < 2) { - r = getBits<60, 4, s32>(colourData); - g = getBits<52, 4, s32>(colourData); - b = getBits<44, 4, s32>(colourData); - } else { - r = getBits<56, 4, s32>(colourData); - g = getBits<48, 4, s32>(colourData); - b = getBits<40, 4, s32>(colourData); - } + u32 Texture::decodeETC(u32 alpha, u32 u, u32 v, u64 colourData) { + static constexpr u32 modifiers[8][2] = { + {2, 8}, {5, 17}, {9, 29}, {13, 42}, {18, 60}, {24, 80}, {33, 106}, {47, 183}, + }; - // Expand from 4 to 8 bits per channel - r = Colour::convert4To8Bit(r); - g = Colour::convert4To8Bit(g); - b = Colour::convert4To8Bit(b); - } + // Parse colour data for 4x4 block + const u32 subindices = getBits<0, 16, u32>(colourData); + const u32 negationFlags = getBits<16, 16, u32>(colourData); + const bool flip = getBit<32>(colourData); + const bool diffMode = getBit<33>(colourData); - const u32 index = (u < 2) ? tableIndex1 : tableIndex2; - s32 modifier = modifiers[index][(subindices >> texelIndex) & 1]; + // Note: index1 is indeed stored on the higher bits, with index2 in the lower bits + const u32 tableIndex1 = getBits<37, 3, u32>(colourData); + const u32 tableIndex2 = getBits<34, 3, u32>(colourData); + const u32 texelIndex = u * 4 + v; // Index of the texel in the block - if (((negationFlags >> texelIndex) & 1) != 0) { - modifier = -modifier; - } + if (flip) std::swap(u, v); - r = std::clamp(r + modifier, 0, 255); - g = std::clamp(g + modifier, 0, 255); - b = std::clamp(b + modifier, 0, 255); + s32 r, g, b; + if (diffMode) { + r = getBits<59, 5, s32>(colourData); + g = getBits<51, 5, s32>(colourData); + b = getBits<43, 5, s32>(colourData); - return (alpha << 24) | (u32(b) << 16) | (u32(g) << 8) | u32(r); -} + if (u >= 2) { + r += signExtend3To32(getBits<56, 3, u32>(colourData)); + g += signExtend3To32(getBits<48, 3, u32>(colourData)); + b += signExtend3To32(getBits<40, 3, u32>(colourData)); + } -} // namespace Metal + // Expand from 5 to 8 bits per channel + r = Colour::convert5To8Bit(r); + g = Colour::convert5To8Bit(g); + b = Colour::convert5To8Bit(b); + } else { + if (u < 2) { + r = getBits<60, 4, s32>(colourData); + g = getBits<52, 4, s32>(colourData); + b = getBits<44, 4, s32>(colourData); + } else { + r = getBits<56, 4, s32>(colourData); + g = getBits<48, 4, s32>(colourData); + b = getBits<40, 4, s32>(colourData); + } + + // Expand from 4 to 8 bits per channel + r = Colour::convert4To8Bit(r); + g = Colour::convert4To8Bit(g); + b = Colour::convert4To8Bit(b); + } + + const u32 index = (u < 2) ? tableIndex1 : tableIndex2; + s32 modifier = modifiers[index][(subindices >> texelIndex) & 1]; + + if (((negationFlags >> texelIndex) & 1) != 0) { + modifier = -modifier; + } + + r = std::clamp(r + modifier, 0, 255); + g = std::clamp(g + modifier, 0, 255); + b = std::clamp(b + modifier, 0, 255); + + return (alpha << 24) | (u32(b) << 16) | (u32(g) << 8) | u32(r); + } +} // namespace Metal diff --git a/src/core/renderer_mtl/mtl_lut_texture.cpp b/src/core/renderer_mtl/mtl_lut_texture.cpp index ac4ff6d9..8486a50c 100644 --- a/src/core/renderer_mtl/mtl_lut_texture.cpp +++ b/src/core/renderer_mtl/mtl_lut_texture.cpp @@ -1,32 +1,27 @@ #include "renderer_mtl/renderer_mtl.hpp" namespace Metal { + static constexpr u32 LAYER_COUNT = 1024; -constexpr u32 LAYER_COUNT = 1024; + LutTexture::LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name) { + MTL::TextureDescriptor* desc = MTL::TextureDescriptor::alloc()->init(); + desc->setTextureType(type); + desc->setPixelFormat(pixelFormat); + desc->setWidth(width); + desc->setHeight(height); + desc->setArrayLength(LAYER_COUNT); + desc->setUsage(MTL::TextureUsageShaderRead /* | MTL::TextureUsageShaderWrite*/); + desc->setStorageMode(MTL::StorageModeShared); -LutTexture::LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name) { - MTL::TextureDescriptor* desc = MTL::TextureDescriptor::alloc()->init(); - desc->setTextureType(type); - desc->setPixelFormat(pixelFormat); - desc->setWidth(width); - desc->setHeight(height); - desc->setArrayLength(LAYER_COUNT); - desc->setUsage(MTL::TextureUsageShaderRead/* | MTL::TextureUsageShaderWrite*/); - desc->setStorageMode(MTL::StorageModeShared); + texture = device->newTexture(desc); + texture->setLabel(toNSString(name)); + desc->release(); + } - texture = device->newTexture(desc); - texture->setLabel(toNSString(name)); - desc->release(); -} + LutTexture::~LutTexture() { texture->release(); } -LutTexture::~LutTexture() { - texture->release(); -} - -u32 LutTexture::getNextIndex() { - currentIndex = (currentIndex + 1) % LAYER_COUNT; - - return currentIndex; -} - -} // namespace Metal + u32 LutTexture::getNextIndex() { + currentIndex = (currentIndex + 1) % LAYER_COUNT; + return currentIndex; + } +} // namespace Metal diff --git a/src/core/renderer_mtl/mtl_texture.cpp b/src/core/renderer_mtl/mtl_texture.cpp index b61c5502..149fea26 100644 --- a/src/core/renderer_mtl/mtl_texture.cpp +++ b/src/core/renderer_mtl/mtl_texture.cpp @@ -1,312 +1,308 @@ #include "renderer_mtl/mtl_texture.hpp" -#include "renderer_mtl/objc_helper.hpp" -#include "colour.hpp" + #include +#include "colour.hpp" +#include "renderer_mtl/objc_helper.hpp" + + using namespace Helpers; namespace Metal { + void Texture::allocate() { + formatInfo = PICA::getPixelFormatInfo(format); -void Texture::allocate() { - formatInfo = PICA::getPixelFormatInfo(format); + MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init(); + descriptor->setTextureType(MTL::TextureType2D); + descriptor->setPixelFormat(formatInfo.pixelFormat); + descriptor->setWidth(size.u()); + descriptor->setHeight(size.v()); + descriptor->setUsage(MTL::TextureUsageShaderRead); + descriptor->setStorageMode(MTL::StorageModeShared); // TODO: use private + staging buffers? + texture = device->newTexture(descriptor); + texture->setLabel(toNSString( + "Texture " + std::string(PICA::textureFormatToString(format)) + " " + std::to_string(size.u()) + "x" + std::to_string(size.v()) + )); + descriptor->release(); - MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init(); - descriptor->setTextureType(MTL::TextureType2D); - descriptor->setPixelFormat(formatInfo.pixelFormat); - descriptor->setWidth(size.u()); - descriptor->setHeight(size.v()); - descriptor->setUsage(MTL::TextureUsageShaderRead); - descriptor->setStorageMode(MTL::StorageModeShared); // TODO: use private + staging buffers? - texture = device->newTexture(descriptor); - texture->setLabel(toNSString("Texture " + std::string(PICA::textureFormatToString(format)) + " " + std::to_string(size.u()) + "x" + std::to_string(size.v()))); - descriptor->release(); - - setNewConfig(config); -} - -// Set the texture's configuration, which includes min/mag filters, wrapping S/T modes, and so on -void Texture::setNewConfig(u32 cfg) { - config = cfg; - - if (sampler) { - sampler->release(); - } - - const auto magFilter = (cfg & 0x2) != 0 ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest; - const auto minFilter = (cfg & 0x4) != 0 ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest; - const auto wrapT = PICA::toMTLSamplerAddressMode(getBits<8, 3>(cfg)); - const auto wrapS = PICA::toMTLSamplerAddressMode(getBits<12, 3>(cfg)); - - MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init(); - samplerDescriptor->setMinFilter(minFilter); - samplerDescriptor->setMagFilter(magFilter); - samplerDescriptor->setSAddressMode(wrapS); - samplerDescriptor->setTAddressMode(wrapT); - - samplerDescriptor->setLabel(toNSString("Sampler")); - sampler = device->newSamplerState(samplerDescriptor); - samplerDescriptor->release(); -} - -void Texture::free() { - valid = false; - - if (texture) { - texture->release(); + setNewConfig(config); } - if (sampler) { - sampler->release(); - } -} -u64 Texture::sizeInBytes() { - u64 pixelCount = u64(size.x()) * u64(size.y()); + // Set the texture's configuration, which includes min/mag filters, wrapping S/T modes, and so on + void Texture::setNewConfig(u32 cfg) { + config = cfg; - switch (format) { - case PICA::TextureFmt::RGBA8: // 4 bytes per pixel - return pixelCount * 4; + if (sampler) { + sampler->release(); + } - case PICA::TextureFmt::RGB8: // 3 bytes per pixel - return pixelCount * 3; + const auto magFilter = (cfg & 0x2) != 0 ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest; + const auto minFilter = (cfg & 0x4) != 0 ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest; + const auto wrapT = PICA::toMTLSamplerAddressMode(getBits<8, 3>(cfg)); + const auto wrapS = PICA::toMTLSamplerAddressMode(getBits<12, 3>(cfg)); - case PICA::TextureFmt::RGBA5551: // 2 bytes per pixel - case PICA::TextureFmt::RGB565: - case PICA::TextureFmt::RGBA4: - case PICA::TextureFmt::RG8: - case PICA::TextureFmt::IA8: - return pixelCount * 2; + MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init(); + samplerDescriptor->setMinFilter(minFilter); + samplerDescriptor->setMagFilter(magFilter); + samplerDescriptor->setSAddressMode(wrapS); + samplerDescriptor->setTAddressMode(wrapT); - case PICA::TextureFmt::A8: // 1 byte per pixel - case PICA::TextureFmt::I8: - case PICA::TextureFmt::IA4: - return pixelCount; + samplerDescriptor->setLabel(toNSString("Sampler")); + sampler = device->newSamplerState(samplerDescriptor); + samplerDescriptor->release(); + } - case PICA::TextureFmt::I4: // 4 bits per pixel - case PICA::TextureFmt::A4: - return pixelCount / 2; + void Texture::free() { + valid = false; - case PICA::TextureFmt::ETC1: // Compressed formats - case PICA::TextureFmt::ETC1A4: { - // Number of 4x4 tiles - const u64 tileCount = pixelCount / 16; - // Tiles are 8 bytes each on ETC1 and 16 bytes each on ETC1A4 - const u64 tileSize = format == PICA::TextureFmt::ETC1 ? 8 : 16; - return tileCount * tileSize; - } + if (texture) { + texture->release(); + } + if (sampler) { + sampler->release(); + } + } - default: - Helpers::panic("[PICA] Attempted to get size of invalid texture type"); - } -} + u64 Texture::sizeInBytes() { + u64 pixelCount = u64(size.x()) * u64(size.y()); -// u and v are the UVs of the relevant texel -// Texture data is stored interleaved in Morton order, ie in a Z - order curve as shown here -// https://en.wikipedia.org/wiki/Z-order_curve -// Textures are split into 8x8 tiles.This function returns the in - tile offset depending on the u & v of the texel -// The in - tile offset is the sum of 2 offsets, one depending on the value of u % 8 and the other on the value of y % 8 -// As documented in this picture https ://en.wikipedia.org/wiki/File:Moser%E2%80%93de_Bruijn_addition.svg -u32 Texture::mortonInterleave(u32 u, u32 v) { - static constexpr u32 xOffsets[] = { 0, 1, 4, 5, 16, 17, 20, 21 }; - static constexpr u32 yOffsets[] = { 0, 2, 8, 10, 32, 34, 40, 42 }; + switch (format) { + case PICA::TextureFmt::RGBA8: // 4 bytes per pixel + return pixelCount * 4; - return xOffsets[u & 7] + yOffsets[v & 7]; -} + case PICA::TextureFmt::RGB8: // 3 bytes per pixel + return pixelCount * 3; -// Get the byte offset of texel (u, v) in the texture -u32 Texture::getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel) { - u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to - offset += mortonInterleave(u, v); // Add the in-tile offset of the texel + case PICA::TextureFmt::RGBA5551: // 2 bytes per pixel + case PICA::TextureFmt::RGB565: + case PICA::TextureFmt::RGBA4: + case PICA::TextureFmt::RG8: + case PICA::TextureFmt::IA8: return pixelCount * 2; - return offset * bytesPerPixel; -} + case PICA::TextureFmt::A8: // 1 byte per pixel + case PICA::TextureFmt::I8: + case PICA::TextureFmt::IA4: return pixelCount; -// Same as the above code except we need to divide by 2 because 4 bits is smaller than a byte -u32 Texture::getSwizzledOffset_4bpp(u32 u, u32 v, u32 width) { - u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to - offset += mortonInterleave(u, v); // Add the in-tile offset of the texel + case PICA::TextureFmt::I4: // 4 bits per pixel + case PICA::TextureFmt::A4: return pixelCount / 2; - return offset / 2; -} + case PICA::TextureFmt::ETC1: // Compressed formats + case PICA::TextureFmt::ETC1A4: { + // Number of 4x4 tiles + const u64 tileCount = pixelCount / 16; + // Tiles are 8 bytes each on ETC1 and 16 bytes each on ETC1A4 + const u64 tileSize = format == PICA::TextureFmt::ETC1 ? 8 : 16; + return tileCount * tileSize; + } -u8 Texture::decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { - switch (fmt) { - case PICA::TextureFmt::A4: { - const u32 offset = getSwizzledOffset_4bpp(u, v, size.u()); + default: Helpers::panic("[PICA] Attempted to get size of invalid texture type"); + } + } - // For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates - u8 alpha = data[offset] >> ((u % 2) ? 4 : 0); - alpha = Colour::convert4To8Bit(getBits<0, 4>(alpha)); + // u and v are the UVs of the relevant texel + // Texture data is stored interleaved in Morton order, ie in a Z - order curve as shown here + // https://en.wikipedia.org/wiki/Z-order_curve + // Textures are split into 8x8 tiles.This function returns the in - tile offset depending on the u & v of the texel + // The in - tile offset is the sum of 2 offsets, one depending on the value of u % 8 and the other on the value of y % 8 + // As documented in this picture https ://en.wikipedia.org/wiki/File:Moser%E2%80%93de_Bruijn_addition.svg + u32 Texture::mortonInterleave(u32 u, u32 v) { + static constexpr u32 xOffsets[] = {0, 1, 4, 5, 16, 17, 20, 21}; + static constexpr u32 yOffsets[] = {0, 2, 8, 10, 32, 34, 40, 42}; - // A8 - return alpha; - } + return xOffsets[u & 7] + yOffsets[v & 7]; + } - case PICA::TextureFmt::A8: { - u32 offset = getSwizzledOffset(u, v, size.u(), 1); - const u8 alpha = data[offset]; + // Get the byte offset of texel (u, v) in the texture + u32 Texture::getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel) { + u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to + offset += mortonInterleave(u, v); // Add the in-tile offset of the texel - // A8 - return alpha; - } + return offset * bytesPerPixel; + } - default: - Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); - } -} + // Same as the above code except we need to divide by 2 because 4 bits is smaller than a byte + u32 Texture::getSwizzledOffset_4bpp(u32 u, u32 v, u32 width) { + u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to + offset += mortonInterleave(u, v); // Add the in-tile offset of the texel -u16 Texture::decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { - switch (fmt) { - case PICA::TextureFmt::RG8: { - u32 offset = getSwizzledOffset(u, v, size.u(), 2); - constexpr u8 b = 0; - const u8 g = data[offset]; - const u8 r = data[offset + 1]; + return offset / 2; + } - // RG8 - return (g << 8) | r; - } + u8 Texture::decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { + switch (fmt) { + case PICA::TextureFmt::A4: { + const u32 offset = getSwizzledOffset_4bpp(u, v, size.u()); - case PICA::TextureFmt::RGBA4: { - u32 offset = getSwizzledOffset(u, v, size.u(), 2); - u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); + // For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates + u8 alpha = data[offset] >> ((u % 2) ? 4 : 0); + alpha = Colour::convert4To8Bit(getBits<0, 4>(alpha)); - u8 alpha = getBits<0, 4, u8>(texel); - u8 b = getBits<4, 4, u8>(texel); - u8 g = getBits<8, 4, u8>(texel); - u8 r = getBits<12, 4, u8>(texel); + // A8 + return alpha; + } - // ABGR4 - return (r << 12) | (g << 8) | (b << 4) | alpha; - } + case PICA::TextureFmt::A8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 1); + const u8 alpha = data[offset]; - case PICA::TextureFmt::RGBA5551: { - const u32 offset = getSwizzledOffset(u, v, size.u(), 2); - const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); + // A8 + return alpha; + } - u8 alpha = getBit<0>(texel) ? 0xff : 0; - u8 b = getBits<1, 5, u8>(texel); - u8 g = getBits<6, 5, u8>(texel); - u8 r = getBits<11, 5, u8>(texel); + default: Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); + } + } - // BGR5A1 - return (alpha << 15) | (r << 10) | (g << 5) | b; - } + u16 Texture::decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { + switch (fmt) { + case PICA::TextureFmt::RG8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 2); + constexpr u8 b = 0; + const u8 g = data[offset]; + const u8 r = data[offset + 1]; - case PICA::TextureFmt::RGB565: { - const u32 offset = getSwizzledOffset(u, v, size.u(), 2); - const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); + // RG8 + return (g << 8) | r; + } - const u8 b = getBits<0, 5, u8>(texel); - const u8 g = getBits<5, 6, u8>(texel); - const u8 r = getBits<11, 5, u8>(texel); + case PICA::TextureFmt::RGBA4: { + u32 offset = getSwizzledOffset(u, v, size.u(), 2); + u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); - // B5G6R5 - return (r << 11) | (g << 5) | b; - } + u8 alpha = getBits<0, 4, u8>(texel); + u8 b = getBits<4, 4, u8>(texel); + u8 g = getBits<8, 4, u8>(texel); + u8 r = getBits<12, 4, u8>(texel); - case PICA::TextureFmt::IA4: { - const u32 offset = getSwizzledOffset(u, v, size.u(), 1); - const u8 texel = data[offset]; - const u8 alpha = texel & 0xf; - const u8 intensity = texel >> 4; + // ABGR4 + return (r << 12) | (g << 8) | (b << 4) | alpha; + } - // ABGR4 - return (intensity << 12) | (intensity << 8) | (intensity << 4) | alpha; - } + case PICA::TextureFmt::RGBA5551: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 2); + const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); - case PICA::TextureFmt::I4: { - u32 offset = getSwizzledOffset_4bpp(u, v, size.u()); + u8 alpha = getBit<0>(texel) ? 0xff : 0; + u8 b = getBits<1, 5, u8>(texel); + u8 g = getBits<6, 5, u8>(texel); + u8 r = getBits<11, 5, u8>(texel); - // For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates - u8 intensity = data[offset] >> ((u % 2) ? 4 : 0); - intensity = getBits<0, 4>(intensity); + // BGR5A1 + return (alpha << 15) | (r << 10) | (g << 5) | b; + } - // ABGR4 - return (intensity << 12) | (intensity << 8) | (intensity << 4) | 0xff; - } + case PICA::TextureFmt::RGB565: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 2); + const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); - default: - Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); - } -} + const u8 b = getBits<0, 5, u8>(texel); + const u8 g = getBits<5, 6, u8>(texel); + const u8 r = getBits<11, 5, u8>(texel); -u32 Texture::decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { - switch (fmt) { - case PICA::TextureFmt::RGB8: { - const u32 offset = getSwizzledOffset(u, v, size.u(), 3); - const u8 b = data[offset]; - const u8 g = data[offset + 1]; - const u8 r = data[offset + 2]; + // B5G6R5 + return (r << 11) | (g << 5) | b; + } - // RGBA8 - return (0xff << 24) | (b << 16) | (g << 8) | r; - } + case PICA::TextureFmt::IA4: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 1); + const u8 texel = data[offset]; + const u8 alpha = texel & 0xf; + const u8 intensity = texel >> 4; - case PICA::TextureFmt::RGBA8: { - const u32 offset = getSwizzledOffset(u, v, size.u(), 4); - const u8 alpha = data[offset]; - const u8 b = data[offset + 1]; - const u8 g = data[offset + 2]; - const u8 r = data[offset + 3]; + // ABGR4 + return (intensity << 12) | (intensity << 8) | (intensity << 4) | alpha; + } - // RGBA8 - return (alpha << 24) | (b << 16) | (g << 8) | r; - } + case PICA::TextureFmt::I4: { + u32 offset = getSwizzledOffset_4bpp(u, v, size.u()); - case PICA::TextureFmt::I8: { - u32 offset = getSwizzledOffset(u, v, size.u(), 1); - const u8 intensity = data[offset]; + // For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates + u8 intensity = data[offset] >> ((u % 2) ? 4 : 0); + intensity = getBits<0, 4>(intensity); - // RGBA8 - return (0xff << 24) | (intensity << 16) | (intensity << 8) | intensity; - } + // ABGR4 + return (intensity << 12) | (intensity << 8) | (intensity << 4) | 0xff; + } - case PICA::TextureFmt::IA8: { - u32 offset = getSwizzledOffset(u, v, size.u(), 2); + default: Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); + } + } - // Same as I8 except each pixel gets its own alpha value too - const u8 alpha = data[offset]; - const u8 intensity = data[offset + 1]; + u32 Texture::decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { + switch (fmt) { + case PICA::TextureFmt::RGB8: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 3); + const u8 b = data[offset]; + const u8 g = data[offset + 1]; + const u8 r = data[offset + 2]; - // RGBA8 - return (alpha << 24) | (intensity << 16) | (intensity << 8) | intensity; - } + // RGBA8 + return (0xff << 24) | (b << 16) | (g << 8) | r; + } - case PICA::TextureFmt::ETC1: return getTexelETC(false, u, v, size.u(), data); - case PICA::TextureFmt::ETC1A4: return getTexelETC(true, u, v, size.u(), data); + case PICA::TextureFmt::RGBA8: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 4); + const u8 alpha = data[offset]; + const u8 b = data[offset + 1]; + const u8 g = data[offset + 2]; + const u8 r = data[offset + 3]; - default: - Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); - } -} + // RGBA8 + return (alpha << 24) | (b << 16) | (g << 8) | r; + } -void Texture::decodeTexture(std::span data) { - std::vector decoded; - decoded.reserve(u64(size.u()) * u64(size.v()) * formatInfo.bytesPerTexel); + case PICA::TextureFmt::I8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 1); + const u8 intensity = data[offset]; - // Decode texels line by line - for (u32 v = 0; v < size.v(); v++) { - for (u32 u = 0; u < size.u(); u++) { - if (formatInfo.bytesPerTexel == 1) { - u8 texel = decodeTexelU8(u, v, format, data); - decoded.push_back(texel); - } else if (formatInfo.bytesPerTexel == 2) { - u16 texel = decodeTexelU16(u, v, format, data); - decoded.push_back((texel & 0x00ff) >> 0); - decoded.push_back((texel & 0xff00) >> 8); - } else if (formatInfo.bytesPerTexel == 4) { - u32 texel = decodeTexelU32(u, v, format, data); - decoded.push_back((texel & 0x000000ff) >> 0); - decoded.push_back((texel & 0x0000ff00) >> 8); - decoded.push_back((texel & 0x00ff0000) >> 16); - decoded.push_back((texel & 0xff000000) >> 24); - } else { - Helpers::panic("[Texture::decodeTexture] Unimplemented bytesPerTexel (%u)", formatInfo.bytesPerTexel); - } - } - } + // RGBA8 + return (0xff << 24) | (intensity << 16) | (intensity << 8) | intensity; + } - texture->replaceRegion(MTL::Region(0, 0, size.u(), size.v()), 0, 0, decoded.data(), formatInfo.bytesPerTexel * size.u(), 0); -} + case PICA::TextureFmt::IA8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 2); -} // namespace Metal + // Same as I8 except each pixel gets its own alpha value too + const u8 alpha = data[offset]; + const u8 intensity = data[offset + 1]; + + // RGBA8 + return (alpha << 24) | (intensity << 16) | (intensity << 8) | intensity; + } + + case PICA::TextureFmt::ETC1: return getTexelETC(false, u, v, size.u(), data); + case PICA::TextureFmt::ETC1A4: return getTexelETC(true, u, v, size.u(), data); + + default: Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); + } + } + + void Texture::decodeTexture(std::span data) { + std::vector decoded; + decoded.reserve(u64(size.u()) * u64(size.v()) * formatInfo.bytesPerTexel); + + // Decode texels line by line + for (u32 v = 0; v < size.v(); v++) { + for (u32 u = 0; u < size.u(); u++) { + if (formatInfo.bytesPerTexel == 1) { + u8 texel = decodeTexelU8(u, v, format, data); + decoded.push_back(texel); + } else if (formatInfo.bytesPerTexel == 2) { + u16 texel = decodeTexelU16(u, v, format, data); + decoded.push_back((texel & 0x00ff) >> 0); + decoded.push_back((texel & 0xff00) >> 8); + } else if (formatInfo.bytesPerTexel == 4) { + u32 texel = decodeTexelU32(u, v, format, data); + decoded.push_back((texel & 0x000000ff) >> 0); + decoded.push_back((texel & 0x0000ff00) >> 8); + decoded.push_back((texel & 0x00ff0000) >> 16); + decoded.push_back((texel & 0xff000000) >> 24); + } else { + Helpers::panic("[Texture::decodeTexture] Unimplemented bytesPerTexel (%u)", formatInfo.bytesPerTexel); + } + } + } + + texture->replaceRegion(MTL::Region(0, 0, size.u(), size.v()), 0, 0, decoded.data(), formatInfo.bytesPerTexel * size.u(), 0); + } +} // namespace Metal diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp index 8401eecb..a0c1888a 100644 --- a/src/core/renderer_mtl/renderer_mtl.cpp +++ b/src/core/renderer_mtl/renderer_mtl.cpp @@ -2,9 +2,10 @@ #include #include + #include "renderer_mtl/mtl_lut_texture.hpp" -// HACK +// Hack: Apple annoyingly defines a global "NO" macro which ends up conflicting with our own code... #undef NO #include "PICA/gpu.hpp" @@ -14,8 +15,10 @@ using namespace PICA; CMRC_DECLARE(RendererMTL); -const u16 LIGHTING_LUT_TEXTURE_WIDTH = 256; -const u32 FOG_LUT_TEXTURE_WIDTH = 128; +static constexpr u16 LIGHTING_LUT_TEXTURE_WIDTH = 256; +static constexpr u32 FOG_LUT_TEXTURE_WIDTH = 128; +// Bind the vertex buffer to binding 30 so that it doesn't occupy the lower indices +static constexpr uint VERTEX_BUFFER_BINDING_INDEX = 30; // HACK: redefinition... PICA::ColorFmt ToColorFormat(u32 format) { @@ -40,6 +43,7 @@ MTL::Library* loadLibrary(MTL::Device* device, const cmrc::file& shaderSource) { RendererMTL::RendererMTL(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs) : Renderer(gpu, internalRegs, externalRegs) {} + RendererMTL::~RendererMTL() {} void RendererMTL::reset() { @@ -78,7 +82,7 @@ void RendererMTL::display() { clearColor(nullptr, bottomScreen->get().texture); } - // -------- Draw -------- + // Draw commandBuffer->pushDebugGroup(toNSString("Display")); MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); @@ -130,8 +134,6 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { metalLayer->setDevice(device); commandQueue = device->newCommandQueue(); - // -------- Objects -------- - // Textures MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init(); textureDescriptor->setTextureType(MTL::TextureType2D); @@ -157,7 +159,9 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { samplerDescriptor->release(); - lutLightingTexture = new Metal::LutTexture(device, MTL::TextureType2DArray, MTL::PixelFormatR16Unorm, LIGHTING_LUT_TEXTURE_WIDTH, Lights::LUT_Count, "Lighting LUT texture"); + lutLightingTexture = new Metal::LutTexture( + device, MTL::TextureType2DArray, MTL::PixelFormatR16Unorm, LIGHTING_LUT_TEXTURE_WIDTH, Lights::LUT_Count, "Lighting LUT texture" + ); lutFogTexture = new Metal::LutTexture(device, MTL::TextureType1DArray, MTL::PixelFormatRG32Float, FOG_LUT_TEXTURE_WIDTH, 1, "Fog LUT texture"); // -------- Pipelines -------- @@ -166,7 +170,7 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { auto mtlResources = cmrc::RendererMTL::get_filesystem(); library = loadLibrary(device, mtlResources.open("metal_shaders.metallib")); MTL::Library* blitLibrary = loadLibrary(device, mtlResources.open("metal_blit.metallib")); - //MTL::Library* copyToLutTextureLibrary = loadLibrary(device, mtlResources.open("metal_copy_to_lut_texture.metallib")); + // MTL::Library* copyToLutTextureLibrary = loadLibrary(device, mtlResources.open("metal_copy_to_lut_texture.metallib")); // Display MTL::Function* vertexDisplayFunction = library->newFunction(NS::String::string("vertexDisplay", NS::ASCIIStringEncoding)); @@ -295,9 +299,8 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { defaultDepthStencilState = device->newDepthStencilState(depthStencilDescriptor); depthStencilDescriptor->release(); - // Release blitLibrary->release(); - //copyToLutTextureLibrary->release(); + // copyToLutTextureLibrary->release(); } void RendererMTL::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) { @@ -592,8 +595,7 @@ void RendererMTL::deinitGraphicsContext() { delete lutLightingTexture; delete lutFogTexture; - // Release - //copyToLutTexturePipeline->release(); + // copyToLutTexturePipeline->release(); displayPipeline->release(); defaultDepthStencilState->release(); nullTexture->release(); @@ -700,9 +702,9 @@ void RendererMTL::bindTexturesToSlots() { for (int i = 0; i < 3; i++) { if ((regs[PICA::InternalRegs::TexUnitCfg] & (1 << i)) == 0) { - commandEncoder.setFragmentTexture(nullTexture, i); - commandEncoder.setFragmentSamplerState(nearestSampler, i); - continue; + commandEncoder.setFragmentTexture(nullTexture, i); + commandEncoder.setFragmentSamplerState(nearestSampler, i); + continue; } const size_t ioBase = ioBases[i]; @@ -736,7 +738,9 @@ void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) { } u32 index = lutLightingTexture->getNextIndex(); - lutLightingTexture->getTexture()->replaceRegion(MTL::Region(0, 0, LIGHTING_LUT_TEXTURE_WIDTH, Lights::LUT_Count), 0, index, lightingLut.data(), LIGHTING_LUT_TEXTURE_WIDTH * 2, 0); + lutLightingTexture->getTexture()->replaceRegion( + MTL::Region(0, 0, LIGHTING_LUT_TEXTURE_WIDTH, Lights::LUT_Count), 0, index, lightingLut.data(), LIGHTING_LUT_TEXTURE_WIDTH * 2, 0 + ); /* endRenderPass(); @@ -768,7 +772,7 @@ void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) { void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) { gpu.fogLUTDirty = false; - std::array fogLut = {0.0f}; + std::array fogLut = {0.0f}; for (int i = 0; i < fogLut.size(); i += 2) { const uint32_t value = gpu.fogLUT[i >> 1]; @@ -807,7 +811,8 @@ void RendererMTL::textureCopyImpl( ) { nextRenderPassName = "Texture copy"; MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); - // TODO: clearColor sets the load action to load if it didn't find any clear, but that is unnecessary if we are doing a copy to the whole texture + // TODO: clearColor sets the load action to load if it didn't find any clear, but that is unnecessary if we are doing a copy to the whole + // texture bool doesClear = clearColor(renderPassDescriptor, destFramebuffer.texture); beginRenderPassIfNeeded(renderPassDescriptor, doesClear, destFramebuffer.texture); @@ -819,11 +824,13 @@ void RendererMTL::textureCopyImpl( // Viewport renderCommandEncoder->setViewport(MTL::Viewport{ - double(destRect.left), double(destRect.bottom), double(destRect.right - destRect.left), double(destRect.top - destRect.bottom), 0.0, 1.0 - }); + double(destRect.left), double(destRect.bottom), double(destRect.right - destRect.left), double(destRect.top - destRect.bottom), 0.0, 1.0}); + float srcRectNDC[4] = { - srcRect.left / (float)srcFramebuffer.size.u(), srcRect.bottom / (float)srcFramebuffer.size.v(), - (srcRect.right - srcRect.left) / (float)srcFramebuffer.size.u(), (srcRect.top - srcRect.bottom) / (float)srcFramebuffer.size.v() + srcRect.left / (float)srcFramebuffer.size.u(), + srcRect.bottom / (float)srcFramebuffer.size.v(), + (srcRect.right - srcRect.left) / (float)srcFramebuffer.size.u(), + (srcRect.top - srcRect.bottom) / (float)srcFramebuffer.size.v(), }; // Bind resources @@ -834,25 +841,28 @@ void RendererMTL::textureCopyImpl( renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); } -void RendererMTL::beginRenderPassIfNeeded(MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture) { +void RendererMTL::beginRenderPassIfNeeded( + MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture +) { createCommandBufferIfNeeded(); - if (doesClears || !renderCommandEncoder || colorTexture != lastColorTexture || (depthTexture != lastDepthTexture && !(lastDepthTexture && !depthTexture))) { - endRenderPass(); + if (doesClears || !renderCommandEncoder || colorTexture != lastColorTexture || + (depthTexture != lastDepthTexture && !(lastDepthTexture && !depthTexture))) { + endRenderPass(); - renderCommandEncoder = commandBuffer->renderCommandEncoder(renderPassDescriptor); - renderCommandEncoder->setLabel(toNSString(nextRenderPassName)); - commandEncoder.newRenderCommandEncoder(renderCommandEncoder); + renderCommandEncoder = commandBuffer->renderCommandEncoder(renderPassDescriptor); + renderCommandEncoder->setLabel(toNSString(nextRenderPassName)); + commandEncoder.newRenderCommandEncoder(renderCommandEncoder); - // Bind persistent resources + // Bind persistent resources - // LUT texture - renderCommandEncoder->setFragmentTexture(lutLightingTexture->getTexture(), 3); - renderCommandEncoder->setFragmentTexture(lutFogTexture->getTexture(), 4); - renderCommandEncoder->setFragmentSamplerState(linearSampler, 3); + // LUT texture + renderCommandEncoder->setFragmentTexture(lutLightingTexture->getTexture(), 3); + renderCommandEncoder->setFragmentTexture(lutFogTexture->getTexture(), 4); + renderCommandEncoder->setFragmentSamplerState(linearSampler, 3); - lastColorTexture = colorTexture; - lastDepthTexture = depthTexture; + lastColorTexture = colorTexture; + lastDepthTexture = depthTexture; } renderPassDescriptor->release(); From 10451a676b9d37b99700bb0d2fbf617fba80211b Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sat, 9 Nov 2024 13:25:24 +0200 Subject: [PATCH 14/16] Metal: Remove padding in DrawFragmentFunctionHash --- include/renderer_mtl/mtl_draw_pipeline_cache.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp index ace324fe..47ea5314 100644 --- a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp +++ b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp @@ -8,9 +8,9 @@ using namespace PICA; namespace Metal { struct DrawFragmentFunctionHash { + u32 lightingConfig1; // 32 bits (TODO: check this) bool lightingEnabled; // 1 bit u8 lightingNumLights; // 3 bits - u32 lightingConfig1; // 32 bits (TODO: check this) // | ref | func | on | u16 alphaControl; // 12 bits (mask: 11111111 0111 0001) }; From d7e4cf18b56ab4fa2e1cda034268c765fd50442d Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sat, 9 Nov 2024 13:44:41 +0200 Subject: [PATCH 15/16] Metal: IWYU fixes --- include/renderer_mtl/mtl_blit_pipeline_cache.hpp | 1 + include/renderer_mtl/mtl_draw_pipeline_cache.hpp | 1 + include/renderer_mtl/mtl_texture.hpp | 1 - include/renderer_mtl/mtl_vertex_buffer_cache.hpp | 15 ++++++++++----- include/renderer_mtl/pica_to_mtl.hpp | 12 +++++------- 5 files changed, 17 insertions(+), 13 deletions(-) diff --git a/include/renderer_mtl/mtl_blit_pipeline_cache.hpp b/include/renderer_mtl/mtl_blit_pipeline_cache.hpp index 02e075b2..1fa47f42 100644 --- a/include/renderer_mtl/mtl_blit_pipeline_cache.hpp +++ b/include/renderer_mtl/mtl_blit_pipeline_cache.hpp @@ -2,6 +2,7 @@ #include +#include "objc_helper.hpp" #include "pica_to_mtl.hpp" using namespace PICA; diff --git a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp index 47ea5314..7178785e 100644 --- a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp +++ b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp @@ -2,6 +2,7 @@ #include +#include "objc_helper.hpp" #include "pica_to_mtl.hpp" using namespace PICA; diff --git a/include/renderer_mtl/mtl_texture.hpp b/include/renderer_mtl/mtl_texture.hpp index 51cb4c4b..93103091 100644 --- a/include/renderer_mtl/mtl_texture.hpp +++ b/include/renderer_mtl/mtl_texture.hpp @@ -11,7 +11,6 @@ #include "opengl.hpp" #include "renderer_mtl/pica_to_mtl.hpp" - template using Interval = boost::icl::right_open_interval; diff --git a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp index d53af283..b392389c 100644 --- a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp +++ b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp @@ -1,13 +1,17 @@ #pragma once +#include + +#include "helpers.hpp" #include "pica_to_mtl.hpp" + using namespace PICA; namespace Metal { struct BufferHandle { MTL::Buffer* buffer; - size_t offset; + usize offset; }; class VertexBufferCache { @@ -35,7 +39,7 @@ namespace Metal { additionalAllocations.clear(); } - BufferHandle get(const void* data, size_t size) { + BufferHandle get(const void* data, usize size) { // If the vertex buffer is too large, just create a new one if (ptr + size > CACHE_BUFFER_SIZE) { MTL::Buffer* newBuffer = device->newBuffer(data, size, MTL::ResourceStorageModeShared); @@ -47,9 +51,9 @@ namespace Metal { } // Copy the data into the buffer - memcpy((char*)buffer->contents() + ptr, data, size); + std::memcpy((char*)buffer->contents() + ptr, data, size); - size_t oldPtr = ptr; + auto oldPtr = ptr; ptr += size; return BufferHandle{buffer, oldPtr}; @@ -57,6 +61,7 @@ namespace Metal { void reset() { endFrame(); + if (buffer) { buffer->release(); create(); @@ -65,7 +70,7 @@ namespace Metal { private: MTL::Buffer* buffer = nullptr; - size_t ptr = 0; + usize ptr = 0; std::vector additionalAllocations; MTL::Device* device; diff --git a/include/renderer_mtl/pica_to_mtl.hpp b/include/renderer_mtl/pica_to_mtl.hpp index 9234c748..715088b4 100644 --- a/include/renderer_mtl/pica_to_mtl.hpp +++ b/include/renderer_mtl/pica_to_mtl.hpp @@ -4,7 +4,6 @@ #include "PICA/regs.hpp" - namespace PICA { struct PixelFormatInfo { MTL::PixelFormat pixelFormat; @@ -61,7 +60,7 @@ namespace PICA { case 5: return MTL::CompareFunctionLessEqual; case 6: return MTL::CompareFunctionGreater; case 7: return MTL::CompareFunctionGreaterEqual; - default: panic("Unknown compare function %u", func); + default: Helpers::panic("Unknown compare function %u", func); } return MTL::CompareFunctionAlways; @@ -77,7 +76,7 @@ namespace PICA { case 5: return MTL::BlendOperationAdd; // Unused (same as 0) case 6: return MTL::BlendOperationAdd; // Unused (same as 0) case 7: return MTL::BlendOperationAdd; // Unused (same as 0) - default: panic("Unknown blend operation %u", op); + default: Helpers::panic("Unknown blend operation %u", op); } return MTL::BlendOperationAdd; @@ -101,7 +100,7 @@ namespace PICA { case 13: return MTL::BlendFactorOneMinusBlendAlpha; case 14: return MTL::BlendFactorSourceAlphaSaturated; case 15: return MTL::BlendFactorOne; // Undocumented - default: panic("Unknown blend factor %u", factor); + default: Helpers::panic("Unknown blend factor %u", factor); } return MTL::BlendFactorOne; @@ -117,7 +116,7 @@ namespace PICA { case 5: return MTL::StencilOperationInvert; case 6: return MTL::StencilOperationIncrementWrap; case 7: return MTL::StencilOperationDecrementWrap; - default: panic("Unknown stencil operation %u", op); + default: Helpers::panic("Unknown stencil operation %u", op); } return MTL::StencilOperationKeep; @@ -131,7 +130,6 @@ namespace PICA { Helpers::warn("Triangle fans are not supported on Metal, using triangles instead"); return MTL::PrimitiveTypeTriangle; case PrimType::GeometryPrimitive: - // Helpers::warn("Geometry primitives are not yet, using triangles instead"); return MTL::PrimitiveTypeTriangle; } } @@ -146,7 +144,7 @@ namespace PICA { case 5: return MTL::SamplerAddressModeClampToBorderColor; case 6: return MTL::SamplerAddressModeRepeat; case 7: return MTL::SamplerAddressModeRepeat; - default: panic("Unknown sampler address mode %u", addrMode); + default: Helpers::panic("Unknown sampler address mode %u", addrMode); } return MTL::SamplerAddressModeClampToEdge; From e47923704e4c069ac2e3f882c69f915e3abb8a7b Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Sat, 9 Nov 2024 14:47:58 +0200 Subject: [PATCH 16/16] Relieve @SamoZ256 of dark memories --- src/core/renderer_mtl/renderer_mtl.cpp | 49 ++------------------------ 1 file changed, 2 insertions(+), 47 deletions(-) diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp index a0c1888a..df1f8b47 100644 --- a/src/core/renderer_mtl/renderer_mtl.cpp +++ b/src/core/renderer_mtl/renderer_mtl.cpp @@ -114,9 +114,7 @@ void RendererMTL::display() { endRenderPass(); commandBuffer->presentDrawable(drawable); - commandBuffer->popDebugGroup(); - commitCommandBuffer(); // Inform the vertex buffer cache that the frame ended @@ -623,12 +621,10 @@ std::optional RendererMTL::getColorRenderTarget( // Otherwise create and cache a new buffer. Metal::ColorRenderTarget sampleBuffer(device, addr, format, width, height); - auto& colorBuffer = colorRenderTargetCache.add(sampleBuffer); // Clear the color buffer colorClearOps[colorBuffer.texture] = {0, 0, 0, 0}; - return colorBuffer; } @@ -722,7 +718,8 @@ void RendererMTL::bindTexturesToSlots() { commandEncoder.setFragmentTexture(tex.texture, i); commandEncoder.setFragmentSamplerState(tex.sampler ? tex.sampler : nearestSampler, i); } else { - // TODO: log + // TODO: Bind a blank texture here. Some games, like Pokemon X, will render with a texture bound to nullptr, triggering GPU open bus + // Binding a blank texture makes all of those games look normal } } } @@ -741,32 +738,6 @@ void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) { lutLightingTexture->getTexture()->replaceRegion( MTL::Region(0, 0, LIGHTING_LUT_TEXTURE_WIDTH, Lights::LUT_Count), 0, index, lightingLut.data(), LIGHTING_LUT_TEXTURE_WIDTH * 2, 0 ); - - /* - endRenderPass(); - - Metal::BufferHandle buffer = vertexBufferCache.get(lightingLut.data(), sizeof(lightingLut)); - - auto blitCommandEncoder = commandBuffer->blitCommandEncoder(); - blitCommandEncoder->copyFromBuffer(buffer.buffer, buffer.offset, LIGHT_LUT_TEXTURE_WIDTH * 2 * 4, 0, MTL::Size(LIGHT_LUT_TEXTURE_WIDTH, - Lights::LUT_Count, 1), lutLightingTexture, 0, 0, MTL::Origin(0, 0, 0)); - - blitCommandEncoder->endEncoding(); - */ - - /* - renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline); - renderCommandEncoder->setVertexTexture(lutLightingTexture, 0); - Metal::BufferHandle buffer = vertexBufferCache.get(lightingLut.data(), sizeof(lightingLut)); - renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0); - u32 arrayOffset = 0; - renderCommandEncoder->setVertexBytes(&arrayOffset, sizeof(u32), 1); - - renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypePoint, NS::UInteger(0), GPU::LightingLutSize); - - MTL::Resource* barrierResources[] = {lutLightingTexture}; - renderCommandEncoder->memoryBarrier(barrierResources, 1, MTL::RenderStageVertex, MTL::RenderStageFragment); - */ } void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) { @@ -787,22 +758,6 @@ void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) { u32 index = lutFogTexture->getNextIndex(); lutFogTexture->getTexture()->replaceRegion(MTL::Region(0, 0, FOG_LUT_TEXTURE_WIDTH, 1), 0, index, fogLut.data(), 0, 0); - - /* - renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline); - renderCommandEncoder->setDepthStencilState(defaultDepthStencilState); - renderCommandEncoder->setVertexTexture(lutLightingTexture, 0); - // Metal::BufferHandle buffer = vertexBufferCache.get(fogLut.data(), sizeof(fogLut)); - // renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0); - renderCommandEncoder->setVertexBytes(fogLut.data(), sizeof(fogLut), 0); - u32 arrayOffset = (u32)Lights::LUT_Count; - renderCommandEncoder->setVertexBytes(&arrayOffset, sizeof(u32), 1); - - renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypePoint, NS::UInteger(0), NS::UInteger(128)); - - MTL::Resource* barrierResources[] = {lutLightingTexture}; - renderCommandEncoder->memoryBarrier(barrierResources, 1, MTL::RenderStageVertex, MTL::RenderStageFragment); - */ } void RendererMTL::textureCopyImpl(