Merge branch 'master' into more-dsp

This commit is contained in:
wheremyfoodat 2024-11-09 16:33:33 +02:00
commit 9be353a9b4
33 changed files with 3226 additions and 16 deletions

4
.gitignore vendored
View file

@ -64,5 +64,9 @@ fb.bat
*.elf
*.smdh
# Compiled Metal shader files
*.ir
*.metallib
config.toml
CMakeSettings.json

View file

@ -33,11 +33,11 @@ endif()
if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format-nonliteral -Wno-format-security -Wno-invalid-offsetof")
endif()
endif()
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-interference-size")
endif()
endif()
if(ANDROID)
set(DEFAULT_OPENGL_PROFILE OpenGLES)
@ -49,6 +49,7 @@ option(DISABLE_PANIC_DEV "Make a build with fewer and less intrusive asserts" ON
option(GPU_DEBUG_INFO "Enable additional GPU debugging info" OFF)
option(ENABLE_OPENGL "Enable OpenGL rendering backend" ON)
option(ENABLE_VULKAN "Enable Vulkan rendering backend" ON)
option(ENABLE_METAL "Enable Metal rendering backend (if available)" ON)
option(ENABLE_LTO "Enable link-time optimization" OFF)
option(ENABLE_TESTS "Compile unit-tests" OFF)
option(ENABLE_USER_BUILD "Make a user-facing build. These builds have various assertions disabled, LTO, and more" OFF)
@ -82,7 +83,7 @@ endif()
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" AND ENABLE_USER_BUILD)
# Disable stack buffer overflow checks in user builds
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GS-")
endif()
endif()
# Generate versioning files
find_package(Git)
@ -311,7 +312,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp
include/services/mic.hpp include/services/cecd.hpp include/services/ac.hpp
include/services/am.hpp include/services/boss.hpp include/services/frd.hpp include/services/nim.hpp
include/fs/archive_ext_save_data.hpp include/fs/archive_ncch.hpp include/services/mcu/mcu_hwc.hpp
include/colour.hpp include/services/y2r.hpp include/services/cam.hpp include/services/ssl.hpp
include/colour.hpp include/services/y2r.hpp include/services/cam.hpp include/services/ssl.hpp
include/services/ldr_ro.hpp include/ipc.hpp include/services/act.hpp include/services/nfc.hpp
include/system_models.hpp include/services/dlp_srvr.hpp include/PICA/dynapica/pica_recs.hpp
include/PICA/dynapica/x64_regs.hpp include/PICA/dynapica/vertex_loader_rec.hpp include/PICA/dynapica/shader_rec.hpp
@ -322,7 +323,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp
include/config.hpp include/services/ir_user.hpp include/http_server.hpp include/cheats.hpp
include/action_replay.hpp include/renderer_sw/renderer_sw.hpp include/compiler_builtins.hpp
include/fs/romfs.hpp include/fs/ivfc.hpp include/discord_rpc.hpp include/services/http.hpp include/result/result_cfg.hpp
include/applets/applet.hpp include/applets/mii_selector.hpp include/math_util.hpp include/services/soc.hpp
include/applets/applet.hpp include/applets/mii_selector.hpp include/math_util.hpp include/services/soc.hpp
include/services/news_u.hpp include/applets/software_keyboard.hpp include/applets/applet_manager.hpp include/fs/archive_user_save_data.hpp
include/services/amiibo_device.hpp include/services/nfc_types.hpp include/swap.hpp include/services/csnd.hpp include/services/nwm_uds.hpp
include/fs/archive_system_save_data.hpp include/lua_manager.hpp include/memory_mapped_file.hpp include/hydra_icon.hpp
@ -491,8 +492,82 @@ if(ENABLE_VULKAN)
target_link_libraries(AlberCore PRIVATE Vulkan::Vulkan resources_renderer_vk)
endif()
if(ENABLE_METAL AND APPLE)
set(RENDERER_MTL_INCLUDE_FILES include/renderer_mtl/renderer_mtl.hpp
include/renderer_mtl/mtl_depth_stencil_cache.hpp
include/renderer_mtl/mtl_blit_pipeline_cache.hpp
include/renderer_mtl/mtl_draw_pipeline_cache.hpp
include/renderer_mtl/mtl_render_target.hpp
include/renderer_mtl/mtl_texture.hpp
include/renderer_mtl/mtl_vertex_buffer_cache.hpp
include/renderer_mtl/mtl_lut_texture.hpp
include/renderer_mtl/mtl_command_encoder.hpp
include/renderer_mtl/mtl_common.hpp
include/renderer_mtl/pica_to_mtl.hpp
include/renderer_mtl/objc_helper.hpp
)
set(RENDERER_MTL_SOURCE_FILES src/core/renderer_mtl/metal_cpp_impl.cpp
src/core/renderer_mtl/renderer_mtl.cpp
src/core/renderer_mtl/mtl_texture.cpp
src/core/renderer_mtl/mtl_etc1.cpp
src/core/renderer_mtl/mtl_lut_texture.cpp
src/core/renderer_mtl/objc_helper.mm
src/host_shaders/metal_shaders.metal
src/host_shaders/metal_blit.metal
#src/host_shaders/metal_copy_to_lut_texture.metal
)
set(HEADER_FILES ${HEADER_FILES} ${RENDERER_MTL_INCLUDE_FILES})
source_group("Source Files\\Core\\Metal Renderer" FILES ${RENDERER_MTL_SOURCE_FILES})
set(RENDERER_MTL_HOST_SHADERS_SOURCES)
function (add_metal_shader SHADER)
set(SHADER_SOURCE "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.metal")
set(SHADER_IR "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.ir")
set(SHADER_METALLIB "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.metallib")
# TODO: only include sources in debug builds
add_custom_command(
OUTPUT ${SHADER_IR}
COMMAND xcrun -sdk macosx metal -gline-tables-only -frecord-sources -o ${SHADER_IR} -c ${SHADER_SOURCE}
DEPENDS ${SHADER_SOURCE}
VERBATIM)
add_custom_command(
OUTPUT ${SHADER_METALLIB}
COMMAND xcrun -sdk macosx metallib -o ${SHADER_METALLIB} ${SHADER_IR}
DEPENDS ${SHADER_IR}
VERBATIM)
set(RENDERER_MTL_HOST_SHADERS_SOURCES ${RENDERER_MTL_HOST_SHADERS_SOURCES} ${SHADER_METALLIB})
endfunction()
add_metal_shader(metal_shaders)
add_metal_shader(metal_blit)
#add_metal_shader(metal_copy_to_lut_texture)
add_custom_target(
compile_msl_shaders
DEPENDS ${RENDERER_MTL_HOST_SHADERS_SOURCES}
)
cmrc_add_resource_library(
resources_renderer_mtl
NAMESPACE RendererMTL
WHENCE "src/host_shaders/"
"src/host_shaders/metal_shaders.metallib"
"src/host_shaders/metal_blit.metallib"
#"src/host_shaders/metal_copy_to_lut_texture.metallib"
)
add_dependencies(resources_renderer_mtl compile_msl_shaders)
target_sources(AlberCore PRIVATE ${RENDERER_MTL_SOURCE_FILES})
target_compile_definitions(AlberCore PUBLIC "PANDA3DS_ENABLE_METAL=1")
target_include_directories(AlberCore PRIVATE third_party/metal-cpp)
# TODO: check if all of them are needed
target_link_libraries(AlberCore PRIVATE "-framework Metal" "-framework Foundation" "-framework QuartzCore" resources_renderer_mtl)
endif()
source_group("Header Files\\Core" FILES ${HEADER_FILES})
set(ALL_SOURCES ${SOURCE_FILES} ${FS_SOURCE_FILES} ${CRYPTO_SOURCE_FILES} ${KERNEL_SOURCE_FILES}
set(ALL_SOURCES ${SOURCE_FILES} ${FS_SOURCE_FILES} ${CRYPTO_SOURCE_FILES} ${KERNEL_SOURCE_FILES}
${LOADER_SOURCE_FILES} ${SERVICE_SOURCE_FILES} ${APPLET_SOURCE_FILES} ${RENDERER_SW_SOURCE_FILES} ${PICA_SOURCE_FILES} ${THIRD_PARTY_SOURCE_FILES}
${AUDIO_SOURCE_FILES} ${HEADER_FILES} ${FRONTEND_HEADER_FILES})
target_sources(AlberCore PRIVATE ${ALL_SOURCES})
@ -537,7 +612,7 @@ if(NOT BUILD_HYDRA_CORE AND NOT BUILD_LIBRETRO_CORE)
if(NOT ENABLE_OPENGL)
message(FATAL_ERROR "Qt frontend requires OpenGL")
endif()
option(GENERATE_QT_TRANSLATION "Generate Qt translation file" OFF)
set(QT_LANGUAGES docs/translations)
@ -591,7 +666,7 @@ if(NOT BUILD_HYDRA_CORE AND NOT BUILD_LIBRETRO_CORE)
qt_add_resources(AlberCore "app_images"
PREFIX "/"
FILES
docs/img/rsob_icon.png docs/img/rstarstruck_icon.png docs/img/rpog_icon.png
docs/img/rsob_icon.png docs/img/rstarstruck_icon.png docs/img/rpog_icon.png docs/img/rsyn_icon.png
)
else()
set(FRONTEND_SOURCE_FILES src/panda_sdl/main.cpp src/panda_sdl/frontend_sdl.cpp src/panda_sdl/mappings.cpp)

BIN
docs/img/rsyn_icon.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

View file

@ -129,6 +129,7 @@ class MainWindow : public QMainWindow {
// Tracks whether we are using an OpenGL-backed renderer or a Vulkan-backed renderer
bool usingGL = false;
bool usingVk = false;
bool usingMtl = false;
// Variables to keep track of whether the user is controlling the 3DS analog stick with their keyboard
// This is done so when a gamepad is connected, we won't automatically override the 3DS analog stick settings with the gamepad's state

View file

@ -18,7 +18,8 @@ enum class RendererType : s8 {
Null = 0,
OpenGL = 1,
Vulkan = 2,
Software = 3,
Metal = 3,
Software = 4,
};
struct EmulatorConfig;

View file

@ -19,8 +19,6 @@ template <typename SurfaceType, size_t capacity, bool evictOnOverflow = false>
class SurfaceCache {
// Vanilla std::optional can't hold actual references
using OptionalRef = std::optional<std::reference_wrapper<SurfaceType>>;
static_assert(std::is_same<SurfaceType, ColourBuffer>() || std::is_same<SurfaceType, DepthBuffer>() ||
std::is_same<SurfaceType, Texture>(), "Invalid surface type");
size_t size;
size_t evictionIndex;

View file

@ -0,0 +1,74 @@
#pragma once
#include <map>
#include "objc_helper.hpp"
#include "pica_to_mtl.hpp"
using namespace PICA;
namespace Metal {
struct BlitPipelineHash {
// Formats
ColorFmt colorFmt;
DepthFmt depthFmt;
};
// This pipeline only caches the pipeline with all of its color and depth attachment variations
class BlitPipelineCache {
public:
BlitPipelineCache() = default;
~BlitPipelineCache() {
reset();
vertexFunction->release();
fragmentFunction->release();
}
void set(MTL::Device* dev, MTL::Function* vert, MTL::Function* frag) {
device = dev;
vertexFunction = vert;
fragmentFunction = frag;
}
MTL::RenderPipelineState* get(BlitPipelineHash hash) {
u8 intHash = ((u8)hash.colorFmt << 3) | (u8)hash.depthFmt;
auto& pipeline = pipelineCache[intHash];
if (!pipeline) {
MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init();
desc->setVertexFunction(vertexFunction);
desc->setFragmentFunction(fragmentFunction);
auto colorAttachment = desc->colorAttachments()->object(0);
colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt));
desc->setDepthAttachmentPixelFormat(toMTLPixelFormatDepth(hash.depthFmt));
NS::Error* error = nullptr;
desc->setLabel(toNSString("Blit pipeline"));
pipeline = device->newRenderPipelineState(desc, &error);
if (error) {
Helpers::panic("Error creating blit pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding));
}
desc->release();
}
return pipeline;
}
void reset() {
for (auto& pair : pipelineCache) {
pair.second->release();
}
pipelineCache.clear();
}
private:
std::map<u8, MTL::RenderPipelineState*> pipelineCache;
MTL::Device* device;
MTL::Function* vertexFunction;
MTL::Function* fragmentFunction;
};
} // namespace Metal

View file

@ -0,0 +1,56 @@
#pragma once
#include <Metal/Metal.hpp>
namespace Metal {
struct RenderState {
MTL::RenderPipelineState* renderPipelineState = nullptr;
MTL::DepthStencilState* depthStencilState = nullptr;
MTL::Texture* textures[3] = {nullptr};
MTL::SamplerState* samplerStates[3] = {nullptr};
};
class CommandEncoder {
public:
void newRenderCommandEncoder(MTL::RenderCommandEncoder* rce) {
renderCommandEncoder = rce;
// Reset the render state
renderState = RenderState{};
}
// Resource binding
void setRenderPipelineState(MTL::RenderPipelineState* renderPipelineState) {
if (renderPipelineState != renderState.renderPipelineState) {
renderCommandEncoder->setRenderPipelineState(renderPipelineState);
renderState.renderPipelineState = renderPipelineState;
}
}
void setDepthStencilState(MTL::DepthStencilState* depthStencilState) {
if (depthStencilState != renderState.depthStencilState) {
renderCommandEncoder->setDepthStencilState(depthStencilState);
renderState.depthStencilState = depthStencilState;
}
}
void setFragmentTexture(MTL::Texture* texture, u32 index) {
if (texture != renderState.textures[index]) {
renderCommandEncoder->setFragmentTexture(texture, index);
renderState.textures[index] = texture;
}
}
void setFragmentSamplerState(MTL::SamplerState* samplerState, u32 index) {
if (samplerState != renderState.samplerStates[index]) {
renderCommandEncoder->setFragmentSamplerState(samplerState, index);
renderState.samplerStates[index] = samplerState;
}
}
private:
MTL::RenderCommandEncoder* renderCommandEncoder = nullptr;
RenderState renderState;
};
} // namespace Metal

View file

@ -0,0 +1,6 @@
#pragma once
#include <Metal/Metal.hpp>
#define GET_HELPER_TEXTURE_BINDING(binding) (30 - binding)
#define GET_HELPER_SAMPLER_STATE_BINDING(binding) (15 - binding)

View file

@ -0,0 +1,80 @@
#pragma once
#include <map>
#include "pica_to_mtl.hpp"
using namespace PICA;
namespace Metal {
struct DepthStencilHash {
u32 stencilConfig;
u16 stencilOpConfig;
bool depthStencilWrite;
u8 depthFunc;
};
class DepthStencilCache {
public:
DepthStencilCache() = default;
~DepthStencilCache() { reset(); }
void set(MTL::Device* dev) { device = dev; }
MTL::DepthStencilState* get(DepthStencilHash hash) {
u64 intHash =
((u64)hash.depthStencilWrite << 56) | ((u64)hash.depthFunc << 48) | ((u64)hash.stencilConfig << 16) | (u64)hash.stencilOpConfig;
auto& depthStencilState = depthStencilCache[intHash];
if (!depthStencilState) {
MTL::DepthStencilDescriptor* desc = MTL::DepthStencilDescriptor::alloc()->init();
desc->setDepthWriteEnabled(hash.depthStencilWrite);
desc->setDepthCompareFunction(toMTLCompareFunc(hash.depthFunc));
const bool stencilEnable = Helpers::getBit<0>(hash.stencilConfig);
MTL::StencilDescriptor* stencilDesc = nullptr;
if (stencilEnable) {
const u8 stencilFunc = Helpers::getBits<4, 3>(hash.stencilConfig);
const u8 stencilRefMask = Helpers::getBits<24, 8>(hash.stencilConfig);
const u32 stencilBufferMask = hash.depthStencilWrite ? Helpers::getBits<8, 8>(hash.stencilConfig) : 0;
const u8 stencilFailOp = Helpers::getBits<0, 3>(hash.stencilOpConfig);
const u8 depthFailOp = Helpers::getBits<4, 3>(hash.stencilOpConfig);
const u8 passOp = Helpers::getBits<8, 3>(hash.stencilOpConfig);
stencilDesc = MTL::StencilDescriptor::alloc()->init();
stencilDesc->setStencilFailureOperation(toMTLStencilOperation(stencilFailOp));
stencilDesc->setDepthFailureOperation(toMTLStencilOperation(depthFailOp));
stencilDesc->setDepthStencilPassOperation(toMTLStencilOperation(passOp));
stencilDesc->setStencilCompareFunction(toMTLCompareFunc(stencilFunc));
stencilDesc->setReadMask(stencilRefMask);
stencilDesc->setWriteMask(stencilBufferMask);
desc->setFrontFaceStencil(stencilDesc);
desc->setBackFaceStencil(stencilDesc);
}
depthStencilState = device->newDepthStencilState(desc);
desc->release();
if (stencilDesc) {
stencilDesc->release();
}
}
return depthStencilState;
}
void reset() {
for (auto& pair : depthStencilCache) {
pair.second->release();
}
depthStencilCache.clear();
}
private:
std::map<u64, MTL::DepthStencilState*> depthStencilCache;
MTL::Device* device;
};
} // namespace Metal

View file

@ -0,0 +1,162 @@
#pragma once
#include <map>
#include "objc_helper.hpp"
#include "pica_to_mtl.hpp"
using namespace PICA;
namespace Metal {
struct DrawFragmentFunctionHash {
u32 lightingConfig1; // 32 bits (TODO: check this)
bool lightingEnabled; // 1 bit
u8 lightingNumLights; // 3 bits
// | ref | func | on |
u16 alphaControl; // 12 bits (mask: 11111111 0111 0001)
};
inline bool operator<(const DrawFragmentFunctionHash& l, const DrawFragmentFunctionHash& r) {
if (!l.lightingEnabled && r.lightingEnabled) return true;
if (l.lightingNumLights < r.lightingNumLights) return true;
if (l.lightingConfig1 < r.lightingConfig1) return true;
if (l.alphaControl < r.alphaControl) return true;
return false;
}
struct DrawPipelineHash { // 56 bits
// Formats
ColorFmt colorFmt; // 3 bits
DepthFmt depthFmt; // 3 bits
// Blending
bool blendEnabled; // 1 bit
// | functions | aeq | ceq |
u32 blendControl; // 22 bits (mask: 1111111111111111 00000111 00000111)
u8 colorWriteMask; // 4 bits
DrawFragmentFunctionHash fragHash;
};
inline bool operator<(const DrawPipelineHash& l, const DrawPipelineHash& r) {
if ((u32)l.colorFmt < (u32)r.colorFmt) return true;
if ((u32)l.depthFmt < (u32)r.depthFmt) return true;
if (!l.blendEnabled && r.blendEnabled) return true;
if (l.blendControl < r.blendControl) return true;
if (l.colorWriteMask < r.colorWriteMask) return true;
if (l.fragHash < r.fragHash) return true;
return false;
}
// This pipeline only caches the pipeline with all of its color and depth attachment variations
class DrawPipelineCache {
public:
DrawPipelineCache() = default;
~DrawPipelineCache() {
reset();
vertexDescriptor->release();
vertexFunction->release();
}
void set(MTL::Device* dev, MTL::Library* lib, MTL::Function* vert, MTL::VertexDescriptor* vertDesc) {
device = dev;
library = lib;
vertexFunction = vert;
vertexDescriptor = vertDesc;
}
MTL::RenderPipelineState* get(DrawPipelineHash hash) {
auto& pipeline = pipelineCache[hash];
if (!pipeline) {
auto& fragmentFunction = fragmentFunctionCache[hash.fragHash];
if (!fragmentFunction) {
MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init();
constants->setConstantValue(&hash.fragHash.lightingEnabled, MTL::DataTypeBool, NS::UInteger(0));
constants->setConstantValue(&hash.fragHash.lightingNumLights, MTL::DataTypeUChar, NS::UInteger(1));
constants->setConstantValue(&hash.fragHash.lightingConfig1, MTL::DataTypeUInt, NS::UInteger(2));
constants->setConstantValue(&hash.fragHash.alphaControl, MTL::DataTypeUShort, NS::UInteger(3));
NS::Error* error = nullptr;
fragmentFunction = library->newFunction(NS::String::string("fragmentDraw", NS::ASCIIStringEncoding), constants, &error);
if (error) {
Helpers::panic("Error creating draw fragment function: %s", error->description()->cString(NS::ASCIIStringEncoding));
}
constants->release();
}
MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init();
desc->setVertexFunction(vertexFunction);
desc->setFragmentFunction(fragmentFunction);
desc->setVertexDescriptor(vertexDescriptor);
auto colorAttachment = desc->colorAttachments()->object(0);
colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt));
MTL::ColorWriteMask writeMask = 0;
if (hash.colorWriteMask & 0x1) writeMask |= MTL::ColorWriteMaskRed;
if (hash.colorWriteMask & 0x2) writeMask |= MTL::ColorWriteMaskGreen;
if (hash.colorWriteMask & 0x4) writeMask |= MTL::ColorWriteMaskBlue;
if (hash.colorWriteMask & 0x8) writeMask |= MTL::ColorWriteMaskAlpha;
colorAttachment->setWriteMask(writeMask);
if (hash.blendEnabled) {
const u8 rgbEquation = hash.blendControl & 0x7;
const u8 alphaEquation = Helpers::getBits<8, 3>(hash.blendControl);
// Get blending functions
const u8 rgbSourceFunc = Helpers::getBits<16, 4>(hash.blendControl);
const u8 rgbDestFunc = Helpers::getBits<20, 4>(hash.blendControl);
const u8 alphaSourceFunc = Helpers::getBits<24, 4>(hash.blendControl);
const u8 alphaDestFunc = Helpers::getBits<28, 4>(hash.blendControl);
colorAttachment->setBlendingEnabled(true);
colorAttachment->setRgbBlendOperation(toMTLBlendOperation(rgbEquation));
colorAttachment->setAlphaBlendOperation(toMTLBlendOperation(alphaEquation));
colorAttachment->setSourceRGBBlendFactor(toMTLBlendFactor(rgbSourceFunc));
colorAttachment->setDestinationRGBBlendFactor(toMTLBlendFactor(rgbDestFunc));
colorAttachment->setSourceAlphaBlendFactor(toMTLBlendFactor(alphaSourceFunc));
colorAttachment->setDestinationAlphaBlendFactor(toMTLBlendFactor(alphaDestFunc));
}
MTL::PixelFormat depthFormat = toMTLPixelFormatDepth(hash.depthFmt);
desc->setDepthAttachmentPixelFormat(depthFormat);
if (hash.depthFmt == DepthFmt::Depth24Stencil8) desc->setStencilAttachmentPixelFormat(depthFormat);
NS::Error* error = nullptr;
desc->setLabel(toNSString("Draw pipeline"));
pipeline = device->newRenderPipelineState(desc, &error);
if (error) {
Helpers::panic("Error creating draw pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding));
}
desc->release();
}
return pipeline;
}
void reset() {
for (auto& pair : pipelineCache) {
pair.second->release();
}
pipelineCache.clear();
for (auto& pair : fragmentFunctionCache) {
pair.second->release();
}
fragmentFunctionCache.clear();
}
private:
std::map<DrawPipelineHash, MTL::RenderPipelineState*> pipelineCache;
std::map<DrawFragmentFunctionHash, MTL::Function*> fragmentFunctionCache;
MTL::Device* device;
MTL::Library* library;
MTL::Function* vertexFunction;
MTL::VertexDescriptor* vertexDescriptor;
};
} // namespace Metal

View file

@ -0,0 +1,20 @@
#pragma once
#include <Metal/Metal.hpp>
namespace Metal {
class LutTexture {
public:
LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name);
~LutTexture();
u32 getNextIndex();
MTL::Texture* getTexture() { return texture; }
u32 getCurrentIndex() { return currentIndex; }
private:
MTL::Texture* texture;
u32 currentIndex = 0;
};
} // namespace Metal

View file

@ -0,0 +1,91 @@
#pragma once
#include <Metal/Metal.hpp>
#include <array>
#include <string>
#include "boost/icl/interval.hpp"
#include "helpers.hpp"
#include "math_util.hpp"
#include "objc_helper.hpp"
#include "opengl.hpp"
#include "pica_to_mtl.hpp"
template <typename T>
using Interval = boost::icl::right_open_interval<T>;
namespace Metal {
template <typename Format_t>
struct RenderTarget {
MTL::Device* device;
u32 location;
Format_t format;
OpenGL::uvec2 size;
bool valid;
// Range of VRAM taken up by buffer
Interval<u32> range;
MTL::Texture* texture = nullptr;
RenderTarget() : valid(false) {}
RenderTarget(MTL::Device* dev, u32 loc, Format_t format, u32 x, u32 y, bool valid = true)
: device(dev), location(loc), format(format), size({x, y}), valid(valid) {
u64 endLoc = (u64)loc + sizeInBytes();
// Check if start and end are valid here
range = Interval<u32>(loc, (u32)endLoc);
}
Math::Rect<u32> getSubRect(u32 inputAddress, u32 width, u32 height) {
const u32 startOffset = (inputAddress - location) / sizePerPixel(format);
const u32 x0 = (startOffset % (size.x() * 8)) / 8;
const u32 y0 = (startOffset / (size.x() * 8)) * 8;
return Math::Rect<u32>{x0, size.y() - y0, x0 + width, size.y() - height - y0};
}
// For 2 textures to "match" we only care about their locations, formats, and dimensions to match
// For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture
bool matches(RenderTarget& other) {
return location == other.location && format == other.format && size.x() == other.size.x() && size.y() == other.size.y();
}
void allocate() {
MTL::PixelFormat pixelFormat = MTL::PixelFormatInvalid;
if (std::is_same<Format_t, PICA::ColorFmt>::value) {
pixelFormat = PICA::toMTLPixelFormatColor((PICA::ColorFmt)format);
} else if (std::is_same<Format_t, PICA::DepthFmt>::value) {
pixelFormat = PICA::toMTLPixelFormatDepth((PICA::DepthFmt)format);
} else {
panic("Invalid format type");
}
MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init();
descriptor->setTextureType(MTL::TextureType2D);
descriptor->setPixelFormat(pixelFormat);
descriptor->setWidth(size.u());
descriptor->setHeight(size.v());
descriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead);
descriptor->setStorageMode(MTL::StorageModePrivate);
texture = device->newTexture(descriptor);
texture->setLabel(toNSString(
std::string(std::is_same<Format_t, PICA::ColorFmt>::value ? "Color" : "Depth") + " render target " + std::to_string(size.u()) + "x" +
std::to_string(size.v())
));
descriptor->release();
}
void free() {
valid = false;
if (texture) {
texture->release();
}
}
u64 sizeInBytes() { return (size_t)size.x() * (size_t)size.y() * PICA::sizePerPixel(format); }
};
using ColorRenderTarget = RenderTarget<PICA::ColorFmt>;
using DepthStencilRenderTarget = RenderTarget<PICA::DepthFmt>;
} // namespace Metal

View file

@ -0,0 +1,73 @@
#pragma once
#include <Metal/Metal.hpp>
#include <array>
#include <string>
#include "PICA/regs.hpp"
#include "boost/icl/interval.hpp"
#include "helpers.hpp"
#include "math_util.hpp"
#include "opengl.hpp"
#include "renderer_mtl/pica_to_mtl.hpp"
template <typename T>
using Interval = boost::icl::right_open_interval<T>;
namespace Metal {
struct Texture {
MTL::Device* device;
u32 location;
u32 config; // Magnification/minification filter, wrapping configs, etc
PICA::TextureFmt format;
OpenGL::uvec2 size;
bool valid;
// Range of VRAM taken up by buffer
Interval<u32> range;
PICA::PixelFormatInfo formatInfo;
MTL::Texture* texture = nullptr;
MTL::SamplerState* sampler = nullptr;
Texture() : valid(false) {}
Texture(MTL::Device* dev, u32 loc, PICA::TextureFmt format, u32 x, u32 y, u32 config, bool valid = true)
: device(dev), location(loc), format(format), size({x, y}), config(config), valid(valid) {
u64 endLoc = (u64)loc + sizeInBytes();
// Check if start and end are valid here
range = Interval<u32>(loc, (u32)endLoc);
}
// For 2 textures to "match" we only care about their locations, formats, and dimensions to match
// For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture
bool matches(Texture& other) {
return location == other.location && format == other.format && size.x() == other.size.x() && size.y() == other.size.y();
}
void allocate();
void setNewConfig(u32 newConfig);
void decodeTexture(std::span<const u8> data);
void free();
u64 sizeInBytes();
u8 decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data);
u16 decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data);
u32 decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data);
// Get the morton interleave offset of a texel based on its U and V values
static u32 mortonInterleave(u32 u, u32 v);
// Get the byte offset of texel (u, v) in the texture
static u32 getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel);
static u32 getSwizzledOffset_4bpp(u32 u, u32 v, u32 width);
// Returns the format of this texture as a string
std::string_view formatToString() { return PICA::textureFormatToString(format); }
// Returns the texel at coordinates (u, v) of an ETC1(A4) texture
// TODO: Make hasAlpha a template parameter
u32 getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span<const u8> data);
u32 decodeETC(u32 alpha, u32 u, u32 v, u64 colourData);
};
} // namespace Metal

View file

@ -0,0 +1,83 @@
#pragma once
#include <cstring>
#include "helpers.hpp"
#include "pica_to_mtl.hpp"
using namespace PICA;
namespace Metal {
struct BufferHandle {
MTL::Buffer* buffer;
usize offset;
};
class VertexBufferCache {
// 128MB buffer for caching vertex data
static constexpr usize CACHE_BUFFER_SIZE = 128 * 1024 * 1024;
public:
VertexBufferCache() = default;
~VertexBufferCache() {
endFrame();
buffer->release();
}
void set(MTL::Device* dev) {
device = dev;
create();
}
void endFrame() {
ptr = 0;
for (auto buffer : additionalAllocations) {
buffer->release();
}
additionalAllocations.clear();
}
BufferHandle get(const void* data, usize size) {
// If the vertex buffer is too large, just create a new one
if (ptr + size > CACHE_BUFFER_SIZE) {
MTL::Buffer* newBuffer = device->newBuffer(data, size, MTL::ResourceStorageModeShared);
newBuffer->setLabel(toNSString("Additional vertex buffer"));
additionalAllocations.push_back(newBuffer);
Helpers::warn("Vertex buffer doesn't have enough space, creating a new buffer");
return BufferHandle{newBuffer, 0};
}
// Copy the data into the buffer
std::memcpy((char*)buffer->contents() + ptr, data, size);
auto oldPtr = ptr;
ptr += size;
return BufferHandle{buffer, oldPtr};
}
void reset() {
endFrame();
if (buffer) {
buffer->release();
create();
}
}
private:
MTL::Buffer* buffer = nullptr;
usize ptr = 0;
std::vector<MTL::Buffer*> additionalAllocations;
MTL::Device* device;
void create() {
buffer = device->newBuffer(CACHE_BUFFER_SIZE, MTL::ResourceStorageModeShared);
buffer->setLabel(toNSString("Shared vertex buffer"));
}
};
} // namespace Metal

View file

@ -0,0 +1,12 @@
#pragma once
#include <string>
#include "mtl_common.hpp"
namespace Metal {
dispatch_data_t createDispatchData(const void* data, size_t size);
} // namespace Metal
// Cast from std::string to NS::String*
inline NS::String* toNSString(const std::string& str) { return NS::String::string(str.c_str(), NS::ASCIIStringEncoding); }

View file

@ -0,0 +1,152 @@
#pragma once
#include <Metal/Metal.hpp>
#include "PICA/regs.hpp"
namespace PICA {
struct PixelFormatInfo {
MTL::PixelFormat pixelFormat;
size_t bytesPerTexel;
};
constexpr PixelFormatInfo pixelFormatInfos[14] = {
{MTL::PixelFormatRGBA8Unorm, 4}, // RGBA8
{MTL::PixelFormatRGBA8Unorm, 4}, // RGB8
{MTL::PixelFormatBGR5A1Unorm, 2}, // RGBA5551
{MTL::PixelFormatB5G6R5Unorm, 2}, // RGB565
{MTL::PixelFormatABGR4Unorm, 2}, // RGBA4
{MTL::PixelFormatRGBA8Unorm, 4}, // IA8
{MTL::PixelFormatRG8Unorm, 2}, // RG8
{MTL::PixelFormatRGBA8Unorm, 4}, // I8
{MTL::PixelFormatA8Unorm, 1}, // A8
{MTL::PixelFormatABGR4Unorm, 2}, // IA4
{MTL::PixelFormatABGR4Unorm, 2}, // I4
{MTL::PixelFormatA8Unorm, 1}, // A4
{MTL::PixelFormatRGBA8Unorm, 4}, // ETC1
{MTL::PixelFormatRGBA8Unorm, 4}, // ETC1A4
};
inline PixelFormatInfo getPixelFormatInfo(TextureFmt format) { return pixelFormatInfos[static_cast<int>(format)]; }
inline MTL::PixelFormat toMTLPixelFormatColor(ColorFmt format) {
switch (format) {
case ColorFmt::RGBA8: return MTL::PixelFormatRGBA8Unorm;
case ColorFmt::RGB8: return MTL::PixelFormatRGBA8Unorm;
case ColorFmt::RGBA5551: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatBGR5A1Unorm?
case ColorFmt::RGB565: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatB5G6R5Unorm?
case ColorFmt::RGBA4: return MTL::PixelFormatABGR4Unorm;
}
}
inline MTL::PixelFormat toMTLPixelFormatDepth(DepthFmt format) {
switch (format) {
case DepthFmt::Depth16: return MTL::PixelFormatDepth16Unorm;
case DepthFmt::Unknown1: return MTL::PixelFormatInvalid;
case DepthFmt::Depth24:
return MTL::PixelFormatDepth32Float; // Metal does not support 24-bit depth formats
// Apple sillicon doesn't support 24-bit depth buffers, so we use 32-bit instead
case DepthFmt::Depth24Stencil8: return MTL::PixelFormatDepth32Float_Stencil8;
}
}
inline MTL::CompareFunction toMTLCompareFunc(u8 func) {
switch (func) {
case 0: return MTL::CompareFunctionNever;
case 1: return MTL::CompareFunctionAlways;
case 2: return MTL::CompareFunctionEqual;
case 3: return MTL::CompareFunctionNotEqual;
case 4: return MTL::CompareFunctionLess;
case 5: return MTL::CompareFunctionLessEqual;
case 6: return MTL::CompareFunctionGreater;
case 7: return MTL::CompareFunctionGreaterEqual;
default: Helpers::panic("Unknown compare function %u", func);
}
return MTL::CompareFunctionAlways;
}
inline MTL::BlendOperation toMTLBlendOperation(u8 op) {
switch (op) {
case 0: return MTL::BlendOperationAdd;
case 1: return MTL::BlendOperationSubtract;
case 2: return MTL::BlendOperationReverseSubtract;
case 3: return MTL::BlendOperationMin;
case 4: return MTL::BlendOperationMax;
case 5: return MTL::BlendOperationAdd; // Unused (same as 0)
case 6: return MTL::BlendOperationAdd; // Unused (same as 0)
case 7: return MTL::BlendOperationAdd; // Unused (same as 0)
default: Helpers::panic("Unknown blend operation %u", op);
}
return MTL::BlendOperationAdd;
}
inline MTL::BlendFactor toMTLBlendFactor(u8 factor) {
switch (factor) {
case 0: return MTL::BlendFactorZero;
case 1: return MTL::BlendFactorOne;
case 2: return MTL::BlendFactorSourceColor;
case 3: return MTL::BlendFactorOneMinusSourceColor;
case 4: return MTL::BlendFactorDestinationColor;
case 5: return MTL::BlendFactorOneMinusDestinationColor;
case 6: return MTL::BlendFactorSourceAlpha;
case 7: return MTL::BlendFactorOneMinusSourceAlpha;
case 8: return MTL::BlendFactorDestinationAlpha;
case 9: return MTL::BlendFactorOneMinusDestinationAlpha;
case 10: return MTL::BlendFactorBlendColor;
case 11: return MTL::BlendFactorOneMinusBlendColor;
case 12: return MTL::BlendFactorBlendAlpha;
case 13: return MTL::BlendFactorOneMinusBlendAlpha;
case 14: return MTL::BlendFactorSourceAlphaSaturated;
case 15: return MTL::BlendFactorOne; // Undocumented
default: Helpers::panic("Unknown blend factor %u", factor);
}
return MTL::BlendFactorOne;
}
inline MTL::StencilOperation toMTLStencilOperation(u8 op) {
switch (op) {
case 0: return MTL::StencilOperationKeep;
case 1: return MTL::StencilOperationZero;
case 2: return MTL::StencilOperationReplace;
case 3: return MTL::StencilOperationIncrementClamp;
case 4: return MTL::StencilOperationDecrementClamp;
case 5: return MTL::StencilOperationInvert;
case 6: return MTL::StencilOperationIncrementWrap;
case 7: return MTL::StencilOperationDecrementWrap;
default: Helpers::panic("Unknown stencil operation %u", op);
}
return MTL::StencilOperationKeep;
}
inline MTL::PrimitiveType toMTLPrimitiveType(PrimType primType) {
switch (primType) {
case PrimType::TriangleList: return MTL::PrimitiveTypeTriangle;
case PrimType::TriangleStrip: return MTL::PrimitiveTypeTriangleStrip;
case PrimType::TriangleFan:
Helpers::warn("Triangle fans are not supported on Metal, using triangles instead");
return MTL::PrimitiveTypeTriangle;
case PrimType::GeometryPrimitive:
return MTL::PrimitiveTypeTriangle;
}
}
inline MTL::SamplerAddressMode toMTLSamplerAddressMode(u8 addrMode) {
switch (addrMode) {
case 0: return MTL::SamplerAddressModeClampToEdge;
case 1: return MTL::SamplerAddressModeClampToBorderColor;
case 2: return MTL::SamplerAddressModeRepeat;
case 3: return MTL::SamplerAddressModeMirrorRepeat;
case 4: return MTL::SamplerAddressModeClampToEdge;
case 5: return MTL::SamplerAddressModeClampToBorderColor;
case 6: return MTL::SamplerAddressModeRepeat;
case 7: return MTL::SamplerAddressModeRepeat;
default: Helpers::panic("Unknown sampler address mode %u", addrMode);
}
return MTL::SamplerAddressModeClampToEdge;
}
} // namespace PICA

View file

@ -0,0 +1,207 @@
#pragma once
#include <Metal/Metal.hpp>
#include <QuartzCore/QuartzCore.hpp>
#include "mtl_blit_pipeline_cache.hpp"
#include "mtl_command_encoder.hpp"
#include "mtl_depth_stencil_cache.hpp"
#include "mtl_draw_pipeline_cache.hpp"
#include "mtl_lut_texture.hpp"
#include "mtl_render_target.hpp"
#include "mtl_texture.hpp"
#include "mtl_vertex_buffer_cache.hpp"
#include "renderer.hpp"
// HACK: use the OpenGL cache
#include "../renderer_gl/surface_cache.hpp"
class GPU;
struct Color4 {
float r, g, b, a;
};
class RendererMTL final : public Renderer {
public:
RendererMTL(GPU& gpu, const std::array<u32, regNum>& internalRegs, const std::array<u32, extRegNum>& externalRegs);
~RendererMTL() override;
void reset() override;
void display() override;
void initGraphicsContext(SDL_Window* window) override;
void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override;
void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override;
void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override;
void drawVertices(PICA::PrimType primType, std::span<const PICA::Vertex> vertices) override;
void screenshot(const std::string& name) override;
void deinitGraphicsContext() override;
#ifdef PANDA3DS_FRONTEND_QT
virtual void initGraphicsContext([[maybe_unused]] GL::Context* context) override {}
#endif
private:
CA::MetalLayer* metalLayer;
MTL::Device* device;
MTL::CommandQueue* commandQueue;
Metal::CommandEncoder commandEncoder;
// Libraries
MTL::Library* library;
// Caches
SurfaceCache<Metal::ColorRenderTarget, 16, true> colorRenderTargetCache;
SurfaceCache<Metal::DepthStencilRenderTarget, 16, true> depthStencilRenderTargetCache;
SurfaceCache<Metal::Texture, 256, true> textureCache;
Metal::BlitPipelineCache blitPipelineCache;
Metal::DrawPipelineCache drawPipelineCache;
Metal::DepthStencilCache depthStencilCache;
Metal::VertexBufferCache vertexBufferCache;
// Resources
MTL::SamplerState* nearestSampler;
MTL::SamplerState* linearSampler;
MTL::Texture* nullTexture;
MTL::DepthStencilState* defaultDepthStencilState;
Metal::LutTexture* lutLightingTexture;
Metal::LutTexture* lutFogTexture;
// Pipelines
MTL::RenderPipelineState* displayPipeline;
// MTL::RenderPipelineState* copyToLutTexturePipeline;
// Clears
std::map<MTL::Texture*, Color4> colorClearOps;
std::map<MTL::Texture*, float> depthClearOps;
std::map<MTL::Texture*, u8> stencilClearOps;
// Active state
MTL::CommandBuffer* commandBuffer = nullptr;
MTL::RenderCommandEncoder* renderCommandEncoder = nullptr;
MTL::Texture* lastColorTexture = nullptr;
MTL::Texture* lastDepthTexture = nullptr;
// Debug
std::string nextRenderPassName;
void createCommandBufferIfNeeded() {
if (!commandBuffer) {
commandBuffer = commandQueue->commandBuffer();
}
}
void endRenderPass() {
if (renderCommandEncoder) {
renderCommandEncoder->endEncoding();
renderCommandEncoder = nullptr;
}
}
void beginRenderPassIfNeeded(
MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture = nullptr
);
void commitCommandBuffer() {
if (renderCommandEncoder) {
renderCommandEncoder->endEncoding();
renderCommandEncoder->release();
renderCommandEncoder = nullptr;
}
if (commandBuffer) {
commandBuffer->commit();
// HACK
commandBuffer->waitUntilCompleted();
commandBuffer->release();
commandBuffer = nullptr;
}
}
template <typename AttachmentT, typename ClearDataT, typename GetAttachmentT, typename SetClearDataT>
inline void clearAttachment(
MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, ClearDataT clearData, GetAttachmentT getAttachment,
SetClearDataT setClearData
) {
bool beginRenderPass = (renderPassDescriptor == nullptr);
if (!renderPassDescriptor) {
renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init();
}
AttachmentT* attachment = getAttachment(renderPassDescriptor);
attachment->setTexture(texture);
setClearData(attachment, clearData);
attachment->setLoadAction(MTL::LoadActionClear);
attachment->setStoreAction(MTL::StoreActionStore);
if (beginRenderPass) {
if (std::is_same<AttachmentT, MTL::RenderPassColorAttachmentDescriptor>::value)
beginRenderPassIfNeeded(renderPassDescriptor, true, texture);
else
beginRenderPassIfNeeded(renderPassDescriptor, true, nullptr, texture);
}
}
template <typename AttachmentT, typename ClearDataT, typename GetAttachmentT, typename SetClearDataT>
inline bool clearAttachment(
MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, std::map<MTL::Texture*, ClearDataT>& clearOps,
GetAttachmentT getAttachment, SetClearDataT setClearData
) {
auto it = clearOps.find(texture);
if (it != clearOps.end()) {
clearAttachment<AttachmentT>(renderPassDescriptor, texture, it->second, getAttachment, setClearData);
clearOps.erase(it);
return true;
}
if (renderPassDescriptor) {
AttachmentT* attachment = getAttachment(renderPassDescriptor);
attachment->setTexture(texture);
attachment->setLoadAction(MTL::LoadActionLoad);
attachment->setStoreAction(MTL::StoreActionStore);
}
return false;
}
bool clearColor(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) {
return clearAttachment<MTL::RenderPassColorAttachmentDescriptor, Color4>(
renderPassDescriptor, texture, colorClearOps,
[](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->colorAttachments()->object(0); },
[](auto attachment, auto& color) { attachment->setClearColor(MTL::ClearColor(color.r, color.g, color.b, color.a)); }
);
}
bool clearDepth(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) {
return clearAttachment<MTL::RenderPassDepthAttachmentDescriptor, float>(
renderPassDescriptor, texture, depthClearOps,
[](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->depthAttachment(); },
[](auto attachment, auto& depth) { attachment->setClearDepth(depth); }
);
}
bool clearStencil(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) {
return clearAttachment<MTL::RenderPassStencilAttachmentDescriptor, u8>(
renderPassDescriptor, texture, stencilClearOps,
[](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->stencilAttachment(); },
[](auto attachment, auto& stencil) { attachment->setClearStencil(stencil); }
);
}
std::optional<Metal::ColorRenderTarget> getColorRenderTarget(
u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true
);
Metal::DepthStencilRenderTarget& getDepthRenderTarget();
Metal::Texture& getTexture(Metal::Texture& tex);
void setupTextureEnvState(MTL::RenderCommandEncoder* encoder);
void bindTexturesToSlots();
void updateLightingLUT(MTL::RenderCommandEncoder* encoder);
void updateFogLUT(MTL::RenderCommandEncoder* encoder);
void textureCopyImpl(
Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect<u32>& srcRect,
const Math::Rect<u32>& destRect
);
};

View file

@ -15,6 +15,9 @@
#ifdef PANDA3DS_ENABLE_VULKAN
#include "renderer_vk/renderer_vk.hpp"
#endif
#ifdef PANDA3DS_ENABLE_METAL
#include "renderer_mtl/renderer_mtl.hpp"
#endif
constexpr u32 topScreenWidth = 240;
constexpr u32 topScreenHeight = 400;
@ -52,6 +55,12 @@ GPU::GPU(Memory& mem, EmulatorConfig& config) : mem(mem), config(config) {
renderer.reset(new RendererVK(*this, regs, externalRegs));
break;
}
#endif
#ifdef PANDA3DS_ENABLE_METAL
case RendererType::Metal: {
renderer.reset(new RendererMTL(*this, regs, externalRegs));
break;
}
#endif
default: {
Helpers::panic("Rendering backend not supported: %s", Renderer::typeToString(config.rendererType));
@ -391,7 +400,7 @@ PICA::Vertex GPU::getImmediateModeVertex() {
// Run VS and return vertex data. TODO: Don't hardcode offsets for each attribute
shaderUnit.vs.run();
// Map shader outputs to fixed function properties
const u32 totalShaderOutputs = regs[PICA::InternalRegs::ShaderOutputCount] & 7;
for (int i = 0; i < totalShaderOutputs; i++) {

View file

@ -12,8 +12,9 @@ static constexpr u32 signExtend3To32(u32 val) {
u32 Texture::getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span<const u8> data) {
// Pixel offset of the 8x8 tile based on u, v and the width of the texture
u32 offs = ((u & ~7) * 8) + ((v & ~7) * width);
if (!hasAlpha)
if (!hasAlpha) {
offs >>= 1;
}
// In-tile offsets for u/v
u &= 7;

View file

@ -0,0 +1,6 @@
#define NS_PRIVATE_IMPLEMENTATION
#define CA_PRIVATE_IMPLEMENTATION
#define MTL_PRIVATE_IMPLEMENTATION
#include <Foundation/Foundation.hpp>
#include <Metal/Metal.hpp>
#include <QuartzCore/QuartzCore.hpp>

View file

@ -0,0 +1,116 @@
#include <algorithm>
#include "colour.hpp"
#include "renderer_mtl/mtl_texture.hpp"
#include "renderer_mtl/renderer_mtl.hpp"
using namespace Helpers;
namespace Metal {
static constexpr u32 signExtend3To32(u32 val) {
return (u32)(s32(val) << 29 >> 29);
}
u32 Texture::getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span<const u8> data) {
// Pixel offset of the 8x8 tile based on u, v and the width of the texture
u32 offs = ((u & ~7) * 8) + ((v & ~7) * width);
if (!hasAlpha) {
offs >>= 1;
}
// In-tile offsets for u/v
u &= 7;
v &= 7;
// ETC1(A4) also subdivide the 8x8 tile to 4 4x4 tiles
// Each tile is 8 bytes for ETC1, but since ETC1A4 has 4 alpha bits per pixel, that becomes 16 bytes
const u32 subTileSize = hasAlpha ? 16 : 8;
const u32 subTileIndex = (u / 4) + 2 * (v / 4); // Which of the 4 subtiles is this texel in?
// In-subtile offsets for u/v
u &= 3;
v &= 3;
offs += subTileSize * subTileIndex;
u32 alpha;
const u64* ptr = reinterpret_cast<const u64*>(data.data() + offs); // Cast to u64*
if (hasAlpha) {
// First 64 bits of the 4x4 subtile are alpha data
const u64 alphaData = *ptr++;
alpha = Colour::convert4To8Bit((alphaData >> (4 * (u * 4 + v))) & 0xf);
} else {
alpha = 0xff; // ETC1 without alpha uses ff for every pixel
}
// Next 64 bits of the subtile are colour data
u64 colourData = *ptr;
return decodeETC(alpha, u, v, colourData);
}
u32 Texture::decodeETC(u32 alpha, u32 u, u32 v, u64 colourData) {
static constexpr u32 modifiers[8][2] = {
{2, 8}, {5, 17}, {9, 29}, {13, 42}, {18, 60}, {24, 80}, {33, 106}, {47, 183},
};
// Parse colour data for 4x4 block
const u32 subindices = getBits<0, 16, u32>(colourData);
const u32 negationFlags = getBits<16, 16, u32>(colourData);
const bool flip = getBit<32>(colourData);
const bool diffMode = getBit<33>(colourData);
// Note: index1 is indeed stored on the higher bits, with index2 in the lower bits
const u32 tableIndex1 = getBits<37, 3, u32>(colourData);
const u32 tableIndex2 = getBits<34, 3, u32>(colourData);
const u32 texelIndex = u * 4 + v; // Index of the texel in the block
if (flip) std::swap(u, v);
s32 r, g, b;
if (diffMode) {
r = getBits<59, 5, s32>(colourData);
g = getBits<51, 5, s32>(colourData);
b = getBits<43, 5, s32>(colourData);
if (u >= 2) {
r += signExtend3To32(getBits<56, 3, u32>(colourData));
g += signExtend3To32(getBits<48, 3, u32>(colourData));
b += signExtend3To32(getBits<40, 3, u32>(colourData));
}
// Expand from 5 to 8 bits per channel
r = Colour::convert5To8Bit(r);
g = Colour::convert5To8Bit(g);
b = Colour::convert5To8Bit(b);
} else {
if (u < 2) {
r = getBits<60, 4, s32>(colourData);
g = getBits<52, 4, s32>(colourData);
b = getBits<44, 4, s32>(colourData);
} else {
r = getBits<56, 4, s32>(colourData);
g = getBits<48, 4, s32>(colourData);
b = getBits<40, 4, s32>(colourData);
}
// Expand from 4 to 8 bits per channel
r = Colour::convert4To8Bit(r);
g = Colour::convert4To8Bit(g);
b = Colour::convert4To8Bit(b);
}
const u32 index = (u < 2) ? tableIndex1 : tableIndex2;
s32 modifier = modifiers[index][(subindices >> texelIndex) & 1];
if (((negationFlags >> texelIndex) & 1) != 0) {
modifier = -modifier;
}
r = std::clamp(r + modifier, 0, 255);
g = std::clamp(g + modifier, 0, 255);
b = std::clamp(b + modifier, 0, 255);
return (alpha << 24) | (u32(b) << 16) | (u32(g) << 8) | u32(r);
}
} // namespace Metal

View file

@ -0,0 +1,27 @@
#include "renderer_mtl/renderer_mtl.hpp"
namespace Metal {
static constexpr u32 LAYER_COUNT = 1024;
LutTexture::LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name) {
MTL::TextureDescriptor* desc = MTL::TextureDescriptor::alloc()->init();
desc->setTextureType(type);
desc->setPixelFormat(pixelFormat);
desc->setWidth(width);
desc->setHeight(height);
desc->setArrayLength(LAYER_COUNT);
desc->setUsage(MTL::TextureUsageShaderRead /* | MTL::TextureUsageShaderWrite*/);
desc->setStorageMode(MTL::StorageModeShared);
texture = device->newTexture(desc);
texture->setLabel(toNSString(name));
desc->release();
}
LutTexture::~LutTexture() { texture->release(); }
u32 LutTexture::getNextIndex() {
currentIndex = (currentIndex + 1) % LAYER_COUNT;
return currentIndex;
}
} // namespace Metal

View file

@ -0,0 +1,308 @@
#include "renderer_mtl/mtl_texture.hpp"
#include <array>
#include "colour.hpp"
#include "renderer_mtl/objc_helper.hpp"
using namespace Helpers;
namespace Metal {
void Texture::allocate() {
formatInfo = PICA::getPixelFormatInfo(format);
MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init();
descriptor->setTextureType(MTL::TextureType2D);
descriptor->setPixelFormat(formatInfo.pixelFormat);
descriptor->setWidth(size.u());
descriptor->setHeight(size.v());
descriptor->setUsage(MTL::TextureUsageShaderRead);
descriptor->setStorageMode(MTL::StorageModeShared); // TODO: use private + staging buffers?
texture = device->newTexture(descriptor);
texture->setLabel(toNSString(
"Texture " + std::string(PICA::textureFormatToString(format)) + " " + std::to_string(size.u()) + "x" + std::to_string(size.v())
));
descriptor->release();
setNewConfig(config);
}
// Set the texture's configuration, which includes min/mag filters, wrapping S/T modes, and so on
void Texture::setNewConfig(u32 cfg) {
config = cfg;
if (sampler) {
sampler->release();
}
const auto magFilter = (cfg & 0x2) != 0 ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest;
const auto minFilter = (cfg & 0x4) != 0 ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest;
const auto wrapT = PICA::toMTLSamplerAddressMode(getBits<8, 3>(cfg));
const auto wrapS = PICA::toMTLSamplerAddressMode(getBits<12, 3>(cfg));
MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init();
samplerDescriptor->setMinFilter(minFilter);
samplerDescriptor->setMagFilter(magFilter);
samplerDescriptor->setSAddressMode(wrapS);
samplerDescriptor->setTAddressMode(wrapT);
samplerDescriptor->setLabel(toNSString("Sampler"));
sampler = device->newSamplerState(samplerDescriptor);
samplerDescriptor->release();
}
void Texture::free() {
valid = false;
if (texture) {
texture->release();
}
if (sampler) {
sampler->release();
}
}
u64 Texture::sizeInBytes() {
u64 pixelCount = u64(size.x()) * u64(size.y());
switch (format) {
case PICA::TextureFmt::RGBA8: // 4 bytes per pixel
return pixelCount * 4;
case PICA::TextureFmt::RGB8: // 3 bytes per pixel
return pixelCount * 3;
case PICA::TextureFmt::RGBA5551: // 2 bytes per pixel
case PICA::TextureFmt::RGB565:
case PICA::TextureFmt::RGBA4:
case PICA::TextureFmt::RG8:
case PICA::TextureFmt::IA8: return pixelCount * 2;
case PICA::TextureFmt::A8: // 1 byte per pixel
case PICA::TextureFmt::I8:
case PICA::TextureFmt::IA4: return pixelCount;
case PICA::TextureFmt::I4: // 4 bits per pixel
case PICA::TextureFmt::A4: return pixelCount / 2;
case PICA::TextureFmt::ETC1: // Compressed formats
case PICA::TextureFmt::ETC1A4: {
// Number of 4x4 tiles
const u64 tileCount = pixelCount / 16;
// Tiles are 8 bytes each on ETC1 and 16 bytes each on ETC1A4
const u64 tileSize = format == PICA::TextureFmt::ETC1 ? 8 : 16;
return tileCount * tileSize;
}
default: Helpers::panic("[PICA] Attempted to get size of invalid texture type");
}
}
// u and v are the UVs of the relevant texel
// Texture data is stored interleaved in Morton order, ie in a Z - order curve as shown here
// https://en.wikipedia.org/wiki/Z-order_curve
// Textures are split into 8x8 tiles.This function returns the in - tile offset depending on the u & v of the texel
// The in - tile offset is the sum of 2 offsets, one depending on the value of u % 8 and the other on the value of y % 8
// As documented in this picture https ://en.wikipedia.org/wiki/File:Moser%E2%80%93de_Bruijn_addition.svg
u32 Texture::mortonInterleave(u32 u, u32 v) {
static constexpr u32 xOffsets[] = {0, 1, 4, 5, 16, 17, 20, 21};
static constexpr u32 yOffsets[] = {0, 2, 8, 10, 32, 34, 40, 42};
return xOffsets[u & 7] + yOffsets[v & 7];
}
// Get the byte offset of texel (u, v) in the texture
u32 Texture::getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel) {
u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to
offset += mortonInterleave(u, v); // Add the in-tile offset of the texel
return offset * bytesPerPixel;
}
// Same as the above code except we need to divide by 2 because 4 bits is smaller than a byte
u32 Texture::getSwizzledOffset_4bpp(u32 u, u32 v, u32 width) {
u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to
offset += mortonInterleave(u, v); // Add the in-tile offset of the texel
return offset / 2;
}
u8 Texture::decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data) {
switch (fmt) {
case PICA::TextureFmt::A4: {
const u32 offset = getSwizzledOffset_4bpp(u, v, size.u());
// For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates
u8 alpha = data[offset] >> ((u % 2) ? 4 : 0);
alpha = Colour::convert4To8Bit(getBits<0, 4>(alpha));
// A8
return alpha;
}
case PICA::TextureFmt::A8: {
u32 offset = getSwizzledOffset(u, v, size.u(), 1);
const u8 alpha = data[offset];
// A8
return alpha;
}
default: Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast<int>(fmt));
}
}
u16 Texture::decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data) {
switch (fmt) {
case PICA::TextureFmt::RG8: {
u32 offset = getSwizzledOffset(u, v, size.u(), 2);
constexpr u8 b = 0;
const u8 g = data[offset];
const u8 r = data[offset + 1];
// RG8
return (g << 8) | r;
}
case PICA::TextureFmt::RGBA4: {
u32 offset = getSwizzledOffset(u, v, size.u(), 2);
u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8);
u8 alpha = getBits<0, 4, u8>(texel);
u8 b = getBits<4, 4, u8>(texel);
u8 g = getBits<8, 4, u8>(texel);
u8 r = getBits<12, 4, u8>(texel);
// ABGR4
return (r << 12) | (g << 8) | (b << 4) | alpha;
}
case PICA::TextureFmt::RGBA5551: {
const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8);
u8 alpha = getBit<0>(texel) ? 0xff : 0;
u8 b = getBits<1, 5, u8>(texel);
u8 g = getBits<6, 5, u8>(texel);
u8 r = getBits<11, 5, u8>(texel);
// BGR5A1
return (alpha << 15) | (r << 10) | (g << 5) | b;
}
case PICA::TextureFmt::RGB565: {
const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8);
const u8 b = getBits<0, 5, u8>(texel);
const u8 g = getBits<5, 6, u8>(texel);
const u8 r = getBits<11, 5, u8>(texel);
// B5G6R5
return (r << 11) | (g << 5) | b;
}
case PICA::TextureFmt::IA4: {
const u32 offset = getSwizzledOffset(u, v, size.u(), 1);
const u8 texel = data[offset];
const u8 alpha = texel & 0xf;
const u8 intensity = texel >> 4;
// ABGR4
return (intensity << 12) | (intensity << 8) | (intensity << 4) | alpha;
}
case PICA::TextureFmt::I4: {
u32 offset = getSwizzledOffset_4bpp(u, v, size.u());
// For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates
u8 intensity = data[offset] >> ((u % 2) ? 4 : 0);
intensity = getBits<0, 4>(intensity);
// ABGR4
return (intensity << 12) | (intensity << 8) | (intensity << 4) | 0xff;
}
default: Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast<int>(fmt));
}
}
u32 Texture::decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data) {
switch (fmt) {
case PICA::TextureFmt::RGB8: {
const u32 offset = getSwizzledOffset(u, v, size.u(), 3);
const u8 b = data[offset];
const u8 g = data[offset + 1];
const u8 r = data[offset + 2];
// RGBA8
return (0xff << 24) | (b << 16) | (g << 8) | r;
}
case PICA::TextureFmt::RGBA8: {
const u32 offset = getSwizzledOffset(u, v, size.u(), 4);
const u8 alpha = data[offset];
const u8 b = data[offset + 1];
const u8 g = data[offset + 2];
const u8 r = data[offset + 3];
// RGBA8
return (alpha << 24) | (b << 16) | (g << 8) | r;
}
case PICA::TextureFmt::I8: {
u32 offset = getSwizzledOffset(u, v, size.u(), 1);
const u8 intensity = data[offset];
// RGBA8
return (0xff << 24) | (intensity << 16) | (intensity << 8) | intensity;
}
case PICA::TextureFmt::IA8: {
u32 offset = getSwizzledOffset(u, v, size.u(), 2);
// Same as I8 except each pixel gets its own alpha value too
const u8 alpha = data[offset];
const u8 intensity = data[offset + 1];
// RGBA8
return (alpha << 24) | (intensity << 16) | (intensity << 8) | intensity;
}
case PICA::TextureFmt::ETC1: return getTexelETC(false, u, v, size.u(), data);
case PICA::TextureFmt::ETC1A4: return getTexelETC(true, u, v, size.u(), data);
default: Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast<int>(fmt));
}
}
void Texture::decodeTexture(std::span<const u8> data) {
std::vector<u8> decoded;
decoded.reserve(u64(size.u()) * u64(size.v()) * formatInfo.bytesPerTexel);
// Decode texels line by line
for (u32 v = 0; v < size.v(); v++) {
for (u32 u = 0; u < size.u(); u++) {
if (formatInfo.bytesPerTexel == 1) {
u8 texel = decodeTexelU8(u, v, format, data);
decoded.push_back(texel);
} else if (formatInfo.bytesPerTexel == 2) {
u16 texel = decodeTexelU16(u, v, format, data);
decoded.push_back((texel & 0x00ff) >> 0);
decoded.push_back((texel & 0xff00) >> 8);
} else if (formatInfo.bytesPerTexel == 4) {
u32 texel = decodeTexelU32(u, v, format, data);
decoded.push_back((texel & 0x000000ff) >> 0);
decoded.push_back((texel & 0x0000ff00) >> 8);
decoded.push_back((texel & 0x00ff0000) >> 16);
decoded.push_back((texel & 0xff000000) >> 24);
} else {
Helpers::panic("[Texture::decodeTexture] Unimplemented bytesPerTexel (%u)", formatInfo.bytesPerTexel);
}
}
}
texture->replaceRegion(MTL::Region(0, 0, size.u(), size.v()), 0, 0, decoded.data(), formatInfo.bytesPerTexel * size.u(), 0);
}
} // namespace Metal

View file

@ -0,0 +1,12 @@
#include "renderer_mtl/objc_helper.hpp"
// TODO: change the include
#import <Metal/Metal.h>
namespace Metal {
dispatch_data_t createDispatchData(const void* data, size_t size) {
return dispatch_data_create(data, size, dispatch_get_global_queue(0, 0), ^{});
}
} // namespace Metal

View file

@ -0,0 +1,824 @@
#include "renderer_mtl/renderer_mtl.hpp"
#include <cmrc/cmrc.hpp>
#include <cstddef>
#include "renderer_mtl/mtl_lut_texture.hpp"
// Hack: Apple annoyingly defines a global "NO" macro which ends up conflicting with our own code...
#undef NO
#include "PICA/gpu.hpp"
#include "SDL_metal.h"
using namespace PICA;
CMRC_DECLARE(RendererMTL);
static constexpr u16 LIGHTING_LUT_TEXTURE_WIDTH = 256;
static constexpr u32 FOG_LUT_TEXTURE_WIDTH = 128;
// Bind the vertex buffer to binding 30 so that it doesn't occupy the lower indices
static constexpr uint VERTEX_BUFFER_BINDING_INDEX = 30;
// HACK: redefinition...
PICA::ColorFmt ToColorFormat(u32 format) {
switch (format) {
case 2: return PICA::ColorFmt::RGB565;
case 3: return PICA::ColorFmt::RGBA5551;
default: return static_cast<PICA::ColorFmt>(format);
}
}
MTL::Library* loadLibrary(MTL::Device* device, const cmrc::file& shaderSource) {
// MTL::CompileOptions* compileOptions = MTL::CompileOptions::alloc()->init();
NS::Error* error = nullptr;
MTL::Library* library = device->newLibrary(Metal::createDispatchData(shaderSource.begin(), shaderSource.size()), &error);
// MTL::Library* library = device->newLibrary(NS::String::string(source.c_str(), NS::ASCIIStringEncoding), compileOptions, &error);
if (error) {
Helpers::panic("Error loading shaders: %s", error->description()->cString(NS::ASCIIStringEncoding));
}
return library;
}
RendererMTL::RendererMTL(GPU& gpu, const std::array<u32, regNum>& internalRegs, const std::array<u32, extRegNum>& externalRegs)
: Renderer(gpu, internalRegs, externalRegs) {}
RendererMTL::~RendererMTL() {}
void RendererMTL::reset() {
vertexBufferCache.reset();
depthStencilCache.reset();
drawPipelineCache.reset();
blitPipelineCache.reset();
textureCache.reset();
depthStencilRenderTargetCache.reset();
colorRenderTargetCache.reset();
}
void RendererMTL::display() {
CA::MetalDrawable* drawable = metalLayer->nextDrawable();
if (!drawable) {
return;
}
using namespace PICA::ExternalRegs;
// Top screen
const u32 topActiveFb = externalRegs[Framebuffer0Select] & 1;
const u32 topScreenAddr = externalRegs[topActiveFb == 0 ? Framebuffer0AFirstAddr : Framebuffer0ASecondAddr];
auto topScreen = colorRenderTargetCache.findFromAddress(topScreenAddr);
if (topScreen) {
clearColor(nullptr, topScreen->get().texture);
}
// Bottom screen
const u32 bottomActiveFb = externalRegs[Framebuffer1Select] & 1;
const u32 bottomScreenAddr = externalRegs[bottomActiveFb == 0 ? Framebuffer1AFirstAddr : Framebuffer1ASecondAddr];
auto bottomScreen = colorRenderTargetCache.findFromAddress(bottomScreenAddr);
if (bottomScreen) {
clearColor(nullptr, bottomScreen->get().texture);
}
// Draw
commandBuffer->pushDebugGroup(toNSString("Display"));
MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init();
MTL::RenderPassColorAttachmentDescriptor* colorAttachment = renderPassDescriptor->colorAttachments()->object(0);
colorAttachment->setTexture(drawable->texture());
colorAttachment->setLoadAction(MTL::LoadActionClear);
colorAttachment->setClearColor(MTL::ClearColor{0.0f, 0.0f, 0.0f, 1.0f});
colorAttachment->setStoreAction(MTL::StoreActionStore);
nextRenderPassName = "Display";
beginRenderPassIfNeeded(renderPassDescriptor, false, drawable->texture());
renderCommandEncoder->setRenderPipelineState(displayPipeline);
renderCommandEncoder->setFragmentSamplerState(nearestSampler, 0);
// Top screen
if (topScreen) {
renderCommandEncoder->setViewport(MTL::Viewport{0, 0, 400, 240, 0.0f, 1.0f});
renderCommandEncoder->setFragmentTexture(topScreen->get().texture, 0);
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4));
}
// Bottom screen
if (bottomScreen) {
renderCommandEncoder->setViewport(MTL::Viewport{40, 240, 320, 240, 0.0f, 1.0f});
renderCommandEncoder->setFragmentTexture(bottomScreen->get().texture, 0);
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4));
}
endRenderPass();
commandBuffer->presentDrawable(drawable);
commandBuffer->popDebugGroup();
commitCommandBuffer();
// Inform the vertex buffer cache that the frame ended
vertexBufferCache.endFrame();
// Release
drawable->release();
}
void RendererMTL::initGraphicsContext(SDL_Window* window) {
// TODO: what should be the type of the view?
void* view = SDL_Metal_CreateView(window);
metalLayer = (CA::MetalLayer*)SDL_Metal_GetLayer(view);
device = MTL::CreateSystemDefaultDevice();
metalLayer->setDevice(device);
commandQueue = device->newCommandQueue();
// Textures
MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init();
textureDescriptor->setTextureType(MTL::TextureType2D);
textureDescriptor->setPixelFormat(MTL::PixelFormatRGBA8Unorm);
textureDescriptor->setWidth(1);
textureDescriptor->setHeight(1);
textureDescriptor->setStorageMode(MTL::StorageModePrivate);
textureDescriptor->setUsage(MTL::TextureUsageShaderRead);
nullTexture = device->newTexture(textureDescriptor);
nullTexture->setLabel(toNSString("Null texture"));
textureDescriptor->release();
// Samplers
MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init();
samplerDescriptor->setLabel(toNSString("Sampler (nearest)"));
nearestSampler = device->newSamplerState(samplerDescriptor);
samplerDescriptor->setMinFilter(MTL::SamplerMinMagFilterLinear);
samplerDescriptor->setMagFilter(MTL::SamplerMinMagFilterLinear);
samplerDescriptor->setLabel(toNSString("Sampler (linear)"));
linearSampler = device->newSamplerState(samplerDescriptor);
samplerDescriptor->release();
lutLightingTexture = new Metal::LutTexture(
device, MTL::TextureType2DArray, MTL::PixelFormatR16Unorm, LIGHTING_LUT_TEXTURE_WIDTH, Lights::LUT_Count, "Lighting LUT texture"
);
lutFogTexture = new Metal::LutTexture(device, MTL::TextureType1DArray, MTL::PixelFormatRG32Float, FOG_LUT_TEXTURE_WIDTH, 1, "Fog LUT texture");
// -------- Pipelines --------
// Load shaders
auto mtlResources = cmrc::RendererMTL::get_filesystem();
library = loadLibrary(device, mtlResources.open("metal_shaders.metallib"));
MTL::Library* blitLibrary = loadLibrary(device, mtlResources.open("metal_blit.metallib"));
// MTL::Library* copyToLutTextureLibrary = loadLibrary(device, mtlResources.open("metal_copy_to_lut_texture.metallib"));
// Display
MTL::Function* vertexDisplayFunction = library->newFunction(NS::String::string("vertexDisplay", NS::ASCIIStringEncoding));
MTL::Function* fragmentDisplayFunction = library->newFunction(NS::String::string("fragmentDisplay", NS::ASCIIStringEncoding));
MTL::RenderPipelineDescriptor* displayPipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init();
displayPipelineDescriptor->setVertexFunction(vertexDisplayFunction);
displayPipelineDescriptor->setFragmentFunction(fragmentDisplayFunction);
auto* displayColorAttachment = displayPipelineDescriptor->colorAttachments()->object(0);
displayColorAttachment->setPixelFormat(MTL::PixelFormat::PixelFormatBGRA8Unorm);
NS::Error* error = nullptr;
displayPipelineDescriptor->setLabel(toNSString("Display pipeline"));
displayPipeline = device->newRenderPipelineState(displayPipelineDescriptor, &error);
if (error) {
Helpers::panic("Error creating display pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding));
}
displayPipelineDescriptor->release();
vertexDisplayFunction->release();
fragmentDisplayFunction->release();
// Blit
MTL::Function* vertexBlitFunction = blitLibrary->newFunction(NS::String::string("vertexBlit", NS::ASCIIStringEncoding));
MTL::Function* fragmentBlitFunction = blitLibrary->newFunction(NS::String::string("fragmentBlit", NS::ASCIIStringEncoding));
blitPipelineCache.set(device, vertexBlitFunction, fragmentBlitFunction);
// Draw
MTL::Function* vertexDrawFunction = library->newFunction(NS::String::string("vertexDraw", NS::ASCIIStringEncoding));
// -------- Vertex descriptor --------
MTL::VertexDescriptor* vertexDescriptor = MTL::VertexDescriptor::alloc()->init();
// Position
MTL::VertexAttributeDescriptor* positionAttribute = vertexDescriptor->attributes()->object(0);
positionAttribute->setFormat(MTL::VertexFormatFloat4);
positionAttribute->setOffset(offsetof(Vertex, s.positions));
positionAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX);
// Quaternion
MTL::VertexAttributeDescriptor* quaternionAttribute = vertexDescriptor->attributes()->object(1);
quaternionAttribute->setFormat(MTL::VertexFormatFloat4);
quaternionAttribute->setOffset(offsetof(Vertex, s.quaternion));
quaternionAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX);
// Color
MTL::VertexAttributeDescriptor* colorAttribute = vertexDescriptor->attributes()->object(2);
colorAttribute->setFormat(MTL::VertexFormatFloat4);
colorAttribute->setOffset(offsetof(Vertex, s.colour));
colorAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX);
// Texture coordinate 0
MTL::VertexAttributeDescriptor* texCoord0Attribute = vertexDescriptor->attributes()->object(3);
texCoord0Attribute->setFormat(MTL::VertexFormatFloat2);
texCoord0Attribute->setOffset(offsetof(Vertex, s.texcoord0));
texCoord0Attribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX);
// Texture coordinate 1
MTL::VertexAttributeDescriptor* texCoord1Attribute = vertexDescriptor->attributes()->object(4);
texCoord1Attribute->setFormat(MTL::VertexFormatFloat2);
texCoord1Attribute->setOffset(offsetof(Vertex, s.texcoord1));
texCoord1Attribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX);
// Texture coordinate 0 W
MTL::VertexAttributeDescriptor* texCoord0WAttribute = vertexDescriptor->attributes()->object(5);
texCoord0WAttribute->setFormat(MTL::VertexFormatFloat);
texCoord0WAttribute->setOffset(offsetof(Vertex, s.texcoord0_w));
texCoord0WAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX);
// View
MTL::VertexAttributeDescriptor* viewAttribute = vertexDescriptor->attributes()->object(6);
viewAttribute->setFormat(MTL::VertexFormatFloat3);
viewAttribute->setOffset(offsetof(Vertex, s.view));
viewAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX);
// Texture coordinate 2
MTL::VertexAttributeDescriptor* texCoord2Attribute = vertexDescriptor->attributes()->object(7);
texCoord2Attribute->setFormat(MTL::VertexFormatFloat2);
texCoord2Attribute->setOffset(offsetof(Vertex, s.texcoord2));
texCoord2Attribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX);
MTL::VertexBufferLayoutDescriptor* vertexBufferLayout = vertexDescriptor->layouts()->object(VERTEX_BUFFER_BINDING_INDEX);
vertexBufferLayout->setStride(sizeof(Vertex));
vertexBufferLayout->setStepFunction(MTL::VertexStepFunctionPerVertex);
vertexBufferLayout->setStepRate(1);
drawPipelineCache.set(device, library, vertexDrawFunction, vertexDescriptor);
// Copy to LUT texture
/*
MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init();
constants->setConstantValue(&LIGHTING_LUT_TEXTURE_WIDTH, MTL::DataTypeUShort, NS::UInteger(0));
error = nullptr;
MTL::Function* vertexCopyToLutTextureFunction =
copyToLutTextureLibrary->newFunction(NS::String::string("vertexCopyToLutTexture", NS::ASCIIStringEncoding), constants, &error);
if (error) {
Helpers::panic("Error creating copy_to_lut_texture vertex function: %s", error->description()->cString(NS::ASCIIStringEncoding));
}
constants->release();
MTL::RenderPipelineDescriptor* copyToLutTexturePipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init();
copyToLutTexturePipelineDescriptor->setVertexFunction(vertexCopyToLutTextureFunction);
// Disable rasterization
copyToLutTexturePipelineDescriptor->setRasterizationEnabled(false);
error = nullptr;
copyToLutTexturePipelineDescriptor->setLabel(toNSString("Copy to LUT texture pipeline"));
copyToLutTexturePipeline = device->newRenderPipelineState(copyToLutTexturePipelineDescriptor, &error);
if (error) {
Helpers::panic("Error creating copy_to_lut_texture pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding));
}
copyToLutTexturePipelineDescriptor->release();
vertexCopyToLutTextureFunction->release();
*/
// Depth stencil cache
depthStencilCache.set(device);
// Vertex buffer cache
vertexBufferCache.set(device);
// -------- Depth stencil state --------
MTL::DepthStencilDescriptor* depthStencilDescriptor = MTL::DepthStencilDescriptor::alloc()->init();
depthStencilDescriptor->setLabel(toNSString("Default depth stencil state"));
defaultDepthStencilState = device->newDepthStencilState(depthStencilDescriptor);
depthStencilDescriptor->release();
blitLibrary->release();
// copyToLutTextureLibrary->release();
}
void RendererMTL::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) {
const auto color = colorRenderTargetCache.findFromAddress(startAddress);
if (color) {
const float r = Helpers::getBits<24, 8>(value) / 255.0f;
const float g = Helpers::getBits<16, 8>(value) / 255.0f;
const float b = Helpers::getBits<8, 8>(value) / 255.0f;
const float a = (value & 0xff) / 255.0f;
colorClearOps[color->get().texture] = {r, g, b, a};
return;
}
const auto depth = depthStencilRenderTargetCache.findFromAddress(startAddress);
if (depth) {
float depthVal;
const auto format = depth->get().format;
if (format == DepthFmt::Depth16) {
depthVal = (value & 0xffff) / 65535.0f;
} else {
depthVal = (value & 0xffffff) / 16777215.0f;
}
depthClearOps[depth->get().texture] = depthVal;
if (format == DepthFmt::Depth24Stencil8) {
const u8 stencilVal = value >> 24;
stencilClearOps[depth->get().texture] = stencilVal;
}
return;
}
Helpers::warn("[RendererMTL::ClearBuffer] No buffer found!\n");
}
void RendererMTL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) {
const u32 inputWidth = inputSize & 0xffff;
const u32 inputHeight = inputSize >> 16;
const auto inputFormat = ToColorFormat(Helpers::getBits<8, 3>(flags));
const auto outputFormat = ToColorFormat(Helpers::getBits<12, 3>(flags));
const bool verticalFlip = flags & 1;
const PICA::Scaling scaling = static_cast<PICA::Scaling>(Helpers::getBits<24, 2>(flags));
u32 outputWidth = outputSize & 0xffff;
u32 outputHeight = outputSize >> 16;
auto srcFramebuffer = getColorRenderTarget(inputAddr, inputFormat, inputWidth, outputHeight);
nextRenderPassName = "Clear before display transfer";
clearColor(nullptr, srcFramebuffer->texture);
Math::Rect<u32> srcRect = srcFramebuffer->getSubRect(inputAddr, outputWidth, outputHeight);
if (verticalFlip) {
std::swap(srcRect.bottom, srcRect.top);
}
// Apply scaling for the destination rectangle.
if (scaling == PICA::Scaling::X || scaling == PICA::Scaling::XY) {
outputWidth >>= 1;
}
if (scaling == PICA::Scaling::XY) {
outputHeight >>= 1;
}
auto destFramebuffer = getColorRenderTarget(outputAddr, outputFormat, outputWidth, outputHeight);
// TODO: clear if not blitting to the whole framebuffer
Math::Rect<u32> destRect = destFramebuffer->getSubRect(outputAddr, outputWidth, outputHeight);
if (inputWidth != outputWidth) {
// Helpers::warn("Strided display transfer is not handled correctly!\n");
}
textureCopyImpl(*srcFramebuffer, *destFramebuffer, srcRect, destRect);
}
void RendererMTL::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) {
// Texture copy size is aligned to 16 byte units
const u32 copySize = totalBytes & ~0xf;
if (copySize == 0) {
Helpers::warn("TextureCopy total bytes less than 16!\n");
return;
}
// The width and gap are provided in 16-byte units.
const u32 inputWidth = (inputSize & 0xffff) << 4;
const u32 inputGap = (inputSize >> 16) << 4;
const u32 outputWidth = (outputSize & 0xffff) << 4;
const u32 outputGap = (outputSize >> 16) << 4;
if (inputGap != 0 || outputGap != 0) {
// Helpers::warn("Strided texture copy\n");
}
if (inputWidth != outputWidth) {
Helpers::warn("Input width does not match output width, cannot accelerate texture copy!");
return;
}
// Texture copy is a raw data copy in PICA, which means no format or tiling information is provided to the engine.
// Depending if the target surface is linear or tiled, games set inputWidth to either the width of the texture or
// the width multiplied by eight (because tiles are stored linearly in memory).
// To properly accelerate this we must examine each surface individually. For now we assume the most common case
// of tiled surface with RGBA8 format. If our assumption does not hold true, we abort the texture copy as inserting
// that surface is not correct.
// We assume the source surface is tiled and RGBA8. inputWidth is in bytes so divide it
// by eight * sizePerPixel(RGBA8) to convert it to a useable width.
const u32 bpp = sizePerPixel(PICA::ColorFmt::RGBA8);
const u32 copyStride = (inputWidth + inputGap) / (8 * bpp);
const u32 copyWidth = inputWidth / (8 * bpp);
// inputHeight/outputHeight are typically set to zero so they cannot be used to get the height of the copy region
// in contrast to display transfer. Compute height manually by dividing the copy size with the copy width. The result
// is the number of vertical tiles so multiply that by eight to get the actual copy height.
u32 copyHeight;
if (inputWidth != 0) [[likely]] {
copyHeight = (copySize / inputWidth) * 8;
} else {
copyHeight = 0;
}
// Find the source surface.
auto srcFramebuffer = getColorRenderTarget(inputAddr, PICA::ColorFmt::RGBA8, copyStride, copyHeight, false);
if (!srcFramebuffer) {
Helpers::warn("RendererMTL::TextureCopy failed to locate src framebuffer!\n");
return;
}
nextRenderPassName = "Clear before texture copy";
clearColor(nullptr, srcFramebuffer->texture);
Math::Rect<u32> srcRect = srcFramebuffer->getSubRect(inputAddr, copyWidth, copyHeight);
// Assume the destination surface has the same format. Unless the surfaces have the same block width,
// texture copy does not make sense.
auto destFramebuffer = getColorRenderTarget(outputAddr, srcFramebuffer->format, copyWidth, copyHeight);
// TODO: clear if not blitting to the whole framebuffer
Math::Rect<u32> destRect = destFramebuffer->getSubRect(outputAddr, copyWidth, copyHeight);
textureCopyImpl(*srcFramebuffer, *destFramebuffer, srcRect, destRect);
}
void RendererMTL::drawVertices(PICA::PrimType primType, std::span<const PICA::Vertex> vertices) {
// Color
auto colorRenderTarget = getColorRenderTarget(colourBufferLoc, colourBufferFormat, fbSize[0], fbSize[1]);
// Depth stencil
const u32 depthControl = regs[PICA::InternalRegs::DepthAndColorMask];
const bool depthStencilWrite = regs[PICA::InternalRegs::DepthBufferWrite];
const bool depthEnable = depthControl & 0x1;
const bool depthWriteEnable = Helpers::getBit<12>(depthControl);
const u8 depthFunc = Helpers::getBits<4, 3>(depthControl);
const u8 colorMask = Helpers::getBits<8, 4>(depthControl);
Metal::DepthStencilHash depthStencilHash{false, 1};
depthStencilHash.stencilConfig = regs[PICA::InternalRegs::StencilTest];
depthStencilHash.stencilOpConfig = regs[PICA::InternalRegs::StencilOp];
const bool stencilEnable = Helpers::getBit<0>(depthStencilHash.stencilConfig);
std::optional<Metal::DepthStencilRenderTarget> depthStencilRenderTarget = std::nullopt;
if (depthEnable) {
depthStencilHash.depthStencilWrite = depthWriteEnable && depthStencilWrite;
depthStencilHash.depthFunc = depthFunc;
depthStencilRenderTarget = getDepthRenderTarget();
} else {
if (depthWriteEnable) {
depthStencilHash.depthStencilWrite = true;
depthStencilRenderTarget = getDepthRenderTarget();
} else if (stencilEnable) {
depthStencilRenderTarget = getDepthRenderTarget();
}
}
// Depth uniforms
struct {
float depthScale;
float depthOffset;
bool depthMapEnable;
} depthUniforms;
depthUniforms.depthScale = Floats::f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32();
depthUniforms.depthOffset = Floats::f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32();
depthUniforms.depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1;
// -------- Pipeline --------
Metal::DrawPipelineHash pipelineHash{colorRenderTarget->format, DepthFmt::Unknown1};
if (depthStencilRenderTarget) {
pipelineHash.depthFmt = depthStencilRenderTarget->format;
}
pipelineHash.fragHash.lightingEnabled = regs[0x008F] & 1;
pipelineHash.fragHash.lightingNumLights = regs[0x01C2] & 0x7;
pipelineHash.fragHash.lightingConfig1 = regs[0x01C4u];
pipelineHash.fragHash.alphaControl = regs[0x104];
// Blending and logic op
pipelineHash.blendEnabled = (regs[PICA::InternalRegs::ColourOperation] & (1 << 8)) != 0;
pipelineHash.colorWriteMask = colorMask;
u8 logicOp = 3; // Copy
if (pipelineHash.blendEnabled) {
pipelineHash.blendControl = regs[PICA::InternalRegs::BlendFunc];
} else {
logicOp = Helpers::getBits<0, 4>(regs[PICA::InternalRegs::LogicOp]);
}
MTL::RenderPipelineState* pipeline = drawPipelineCache.get(pipelineHash);
// Depth stencil state
MTL::DepthStencilState* depthStencilState = depthStencilCache.get(depthStencilHash);
// -------- Render --------
MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init();
bool doesClear = clearColor(renderPassDescriptor, colorRenderTarget->texture);
if (depthStencilRenderTarget) {
if (clearDepth(renderPassDescriptor, depthStencilRenderTarget->texture)) doesClear = true;
if (depthStencilRenderTarget->format == DepthFmt::Depth24Stencil8) {
if (clearStencil(renderPassDescriptor, depthStencilRenderTarget->texture)) doesClear = true;
}
}
nextRenderPassName = "Draw vertices";
beginRenderPassIfNeeded(
renderPassDescriptor, doesClear, colorRenderTarget->texture, (depthStencilRenderTarget ? depthStencilRenderTarget->texture : nullptr)
);
// Update the LUT texture if necessary
if (gpu.lightingLUTDirty) {
updateLightingLUT(renderCommandEncoder);
}
if (gpu.fogLUTDirty) {
updateFogLUT(renderCommandEncoder);
}
commandEncoder.setRenderPipelineState(pipeline);
commandEncoder.setDepthStencilState(depthStencilState);
// If size is < 4KB, use inline vertex data, otherwise use a buffer
if (vertices.size_bytes() < 4 * 1024) {
renderCommandEncoder->setVertexBytes(vertices.data(), vertices.size_bytes(), VERTEX_BUFFER_BINDING_INDEX);
} else {
Metal::BufferHandle buffer = vertexBufferCache.get(vertices.data(), vertices.size_bytes());
renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, VERTEX_BUFFER_BINDING_INDEX);
}
// Viewport
const u32 viewportX = regs[PICA::InternalRegs::ViewportXY] & 0x3ff;
const u32 viewportY = (regs[PICA::InternalRegs::ViewportXY] >> 16) & 0x3ff;
const u32 viewportWidth = Floats::f24::fromRaw(regs[PICA::InternalRegs::ViewportWidth] & 0xffffff).toFloat32() * 2.0f;
const u32 viewportHeight = Floats::f24::fromRaw(regs[PICA::InternalRegs::ViewportHeight] & 0xffffff).toFloat32() * 2.0f;
const auto rect = colorRenderTarget->getSubRect(colourBufferLoc, fbSize[0], fbSize[1]);
MTL::Viewport viewport{double(rect.left + viewportX), double(rect.bottom + viewportY), double(viewportWidth), double(viewportHeight), 0.0, 1.0};
renderCommandEncoder->setViewport(viewport);
// Blend color
if (pipelineHash.blendEnabled) {
u32 constantColor = regs[PICA::InternalRegs::BlendColour];
const u8 r = constantColor & 0xff;
const u8 g = Helpers::getBits<8, 8>(constantColor);
const u8 b = Helpers::getBits<16, 8>(constantColor);
const u8 a = Helpers::getBits<24, 8>(constantColor);
renderCommandEncoder->setBlendColor(r / 255.0f, g / 255.0f, b / 255.0f, a / 255.0f);
}
// Stencil reference
if (stencilEnable) {
const s8 reference = s8(Helpers::getBits<16, 8>(depthStencilHash.stencilConfig)); // Signed reference value
renderCommandEncoder->setStencilReferenceValue(reference);
}
// Bind resources
setupTextureEnvState(renderCommandEncoder);
bindTexturesToSlots();
renderCommandEncoder->setVertexBytes(&regs[0x48], (0x200 - 0x48) * sizeof(regs[0]), 0);
renderCommandEncoder->setFragmentBytes(&regs[0x48], (0x200 - 0x48) * sizeof(regs[0]), 0);
renderCommandEncoder->setVertexBytes(&depthUniforms, sizeof(depthUniforms), 2);
renderCommandEncoder->setFragmentBytes(&logicOp, sizeof(logicOp), 2);
u32 lutSlices[2] = {lutLightingTexture->getCurrentIndex(), lutFogTexture->getCurrentIndex()};
renderCommandEncoder->setFragmentBytes(&lutSlices, sizeof(lutSlices), 3);
renderCommandEncoder->drawPrimitives(toMTLPrimitiveType(primType), NS::UInteger(0), NS::UInteger(vertices.size()));
}
void RendererMTL::screenshot(const std::string& name) {
// TODO: implement
Helpers::warn("RendererMTL::screenshot not implemented");
}
void RendererMTL::deinitGraphicsContext() {
reset();
delete lutLightingTexture;
delete lutFogTexture;
// copyToLutTexturePipeline->release();
displayPipeline->release();
defaultDepthStencilState->release();
nullTexture->release();
linearSampler->release();
nearestSampler->release();
library->release();
commandQueue->release();
device->release();
}
std::optional<Metal::ColorRenderTarget> RendererMTL::getColorRenderTarget(
u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound
) {
// Try to find an already existing buffer that contains the provided address
// This is a more relaxed check compared to getColourFBO as display transfer/texcopy may refer to
// subrect of a surface and in case of texcopy we don't know the format of the surface.
auto buffer = colorRenderTargetCache.findFromAddress(addr);
if (buffer.has_value()) {
return buffer.value().get();
}
if (!createIfnotFound) {
return std::nullopt;
}
// Otherwise create and cache a new buffer.
Metal::ColorRenderTarget sampleBuffer(device, addr, format, width, height);
auto& colorBuffer = colorRenderTargetCache.add(sampleBuffer);
// Clear the color buffer
colorClearOps[colorBuffer.texture] = {0, 0, 0, 0};
return colorBuffer;
}
Metal::DepthStencilRenderTarget& RendererMTL::getDepthRenderTarget() {
Metal::DepthStencilRenderTarget sampleBuffer(device, depthBufferLoc, depthBufferFormat, fbSize[0], fbSize[1]);
auto buffer = depthStencilRenderTargetCache.find(sampleBuffer);
if (buffer.has_value()) {
return buffer.value().get();
} else {
auto& depthBuffer = depthStencilRenderTargetCache.add(sampleBuffer);
// Clear the depth buffer
depthClearOps[depthBuffer.texture] = 0.0f;
if (depthBuffer.format == DepthFmt::Depth24Stencil8) {
stencilClearOps[depthBuffer.texture] = 0;
}
return depthBuffer;
}
}
Metal::Texture& RendererMTL::getTexture(Metal::Texture& tex) {
auto buffer = textureCache.find(tex);
if (buffer.has_value()) {
return buffer.value().get();
} else {
const auto textureData = std::span{gpu.getPointerPhys<u8>(tex.location), tex.sizeInBytes()}; // Get pointer to the texture data in 3DS memory
Metal::Texture& newTex = textureCache.add(tex);
newTex.decodeTexture(textureData);
return newTex;
}
}
void RendererMTL::setupTextureEnvState(MTL::RenderCommandEncoder* encoder) {
static constexpr std::array<u32, 6> ioBases = {
PICA::InternalRegs::TexEnv0Source, PICA::InternalRegs::TexEnv1Source, PICA::InternalRegs::TexEnv2Source,
PICA::InternalRegs::TexEnv3Source, PICA::InternalRegs::TexEnv4Source, PICA::InternalRegs::TexEnv5Source,
};
struct {
u32 textureEnvSourceRegs[6];
u32 textureEnvOperandRegs[6];
u32 textureEnvCombinerRegs[6];
u32 textureEnvScaleRegs[6];
} envState;
u32 textureEnvColourRegs[6];
for (int i = 0; i < 6; i++) {
const u32 ioBase = ioBases[i];
envState.textureEnvSourceRegs[i] = regs[ioBase];
envState.textureEnvOperandRegs[i] = regs[ioBase + 1];
envState.textureEnvCombinerRegs[i] = regs[ioBase + 2];
textureEnvColourRegs[i] = regs[ioBase + 3];
envState.textureEnvScaleRegs[i] = regs[ioBase + 4];
}
encoder->setVertexBytes(&textureEnvColourRegs, sizeof(textureEnvColourRegs), 1);
encoder->setFragmentBytes(&envState, sizeof(envState), 1);
}
void RendererMTL::bindTexturesToSlots() {
static constexpr std::array<u32, 3> ioBases = {
PICA::InternalRegs::Tex0BorderColor,
PICA::InternalRegs::Tex1BorderColor,
PICA::InternalRegs::Tex2BorderColor,
};
for (int i = 0; i < 3; i++) {
if ((regs[PICA::InternalRegs::TexUnitCfg] & (1 << i)) == 0) {
commandEncoder.setFragmentTexture(nullTexture, i);
commandEncoder.setFragmentSamplerState(nearestSampler, i);
continue;
}
const size_t ioBase = ioBases[i];
const u32 dim = regs[ioBase + 1];
const u32 config = regs[ioBase + 2];
const u32 height = dim & 0x7ff;
const u32 width = Helpers::getBits<16, 11>(dim);
const u32 addr = (regs[ioBase + 4] & 0x0FFFFFFF) << 3;
u32 format = regs[ioBase + (i == 0 ? 13 : 5)] & 0xF;
if (addr != 0) [[likely]] {
Metal::Texture targetTex(device, addr, static_cast<PICA::TextureFmt>(format), width, height, config);
auto tex = getTexture(targetTex);
commandEncoder.setFragmentTexture(tex.texture, i);
commandEncoder.setFragmentSamplerState(tex.sampler ? tex.sampler : nearestSampler, i);
} else {
// TODO: Bind a blank texture here. Some games, like Pokemon X, will render with a texture bound to nullptr, triggering GPU open bus
// Binding a blank texture makes all of those games look normal
}
}
}
void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) {
gpu.lightingLUTDirty = false;
std::array<u16, GPU::LightingLutSize> lightingLut;
for (int i = 0; i < gpu.lightingLUT.size(); i++) {
uint64_t value = gpu.lightingLUT[i] & 0xFFF;
lightingLut[i] = (value << 4);
}
u32 index = lutLightingTexture->getNextIndex();
lutLightingTexture->getTexture()->replaceRegion(
MTL::Region(0, 0, LIGHTING_LUT_TEXTURE_WIDTH, Lights::LUT_Count), 0, index, lightingLut.data(), LIGHTING_LUT_TEXTURE_WIDTH * 2, 0
);
}
void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) {
gpu.fogLUTDirty = false;
std::array<float, FOG_LUT_TEXTURE_WIDTH* 2> fogLut = {0.0f};
for (int i = 0; i < fogLut.size(); i += 2) {
const uint32_t value = gpu.fogLUT[i >> 1];
int32_t diff = value & 0x1fff;
diff = (diff << 19) >> 19; // Sign extend the 13-bit value to 32 bits
const float fogDifference = float(diff) / 2048.0f;
const float fogValue = float((value >> 13) & 0x7ff) / 2048.0f;
fogLut[i] = fogValue;
fogLut[i + 1] = fogDifference;
}
u32 index = lutFogTexture->getNextIndex();
lutFogTexture->getTexture()->replaceRegion(MTL::Region(0, 0, FOG_LUT_TEXTURE_WIDTH, 1), 0, index, fogLut.data(), 0, 0);
}
void RendererMTL::textureCopyImpl(
Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect<u32>& srcRect,
const Math::Rect<u32>& destRect
) {
nextRenderPassName = "Texture copy";
MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init();
// TODO: clearColor sets the load action to load if it didn't find any clear, but that is unnecessary if we are doing a copy to the whole
// texture
bool doesClear = clearColor(renderPassDescriptor, destFramebuffer.texture);
beginRenderPassIfNeeded(renderPassDescriptor, doesClear, destFramebuffer.texture);
// Pipeline
Metal::BlitPipelineHash hash{destFramebuffer.format, DepthFmt::Unknown1};
auto blitPipeline = blitPipelineCache.get(hash);
commandEncoder.setRenderPipelineState(blitPipeline);
// Viewport
renderCommandEncoder->setViewport(MTL::Viewport{
double(destRect.left), double(destRect.bottom), double(destRect.right - destRect.left), double(destRect.top - destRect.bottom), 0.0, 1.0});
float srcRectNDC[4] = {
srcRect.left / (float)srcFramebuffer.size.u(),
srcRect.bottom / (float)srcFramebuffer.size.v(),
(srcRect.right - srcRect.left) / (float)srcFramebuffer.size.u(),
(srcRect.top - srcRect.bottom) / (float)srcFramebuffer.size.v(),
};
// Bind resources
renderCommandEncoder->setVertexBytes(&srcRectNDC, sizeof(srcRectNDC), 0);
renderCommandEncoder->setFragmentTexture(srcFramebuffer.texture, GET_HELPER_TEXTURE_BINDING(0));
renderCommandEncoder->setFragmentSamplerState(nearestSampler, GET_HELPER_SAMPLER_STATE_BINDING(0));
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4));
}
void RendererMTL::beginRenderPassIfNeeded(
MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture
) {
createCommandBufferIfNeeded();
if (doesClears || !renderCommandEncoder || colorTexture != lastColorTexture ||
(depthTexture != lastDepthTexture && !(lastDepthTexture && !depthTexture))) {
endRenderPass();
renderCommandEncoder = commandBuffer->renderCommandEncoder(renderPassDescriptor);
renderCommandEncoder->setLabel(toNSString(nextRenderPassName));
commandEncoder.newRenderCommandEncoder(renderCommandEncoder);
// Bind persistent resources
// LUT texture
renderCommandEncoder->setFragmentTexture(lutLightingTexture->getTexture(), 3);
renderCommandEncoder->setFragmentTexture(lutFogTexture->getTexture(), 4);
renderCommandEncoder->setFragmentSamplerState(linearSampler, 3);
lastColorTexture = colorTexture;
lastDepthTexture = depthTexture;
}
renderPassDescriptor->release();
}

View file

@ -0,0 +1,29 @@
#include <metal_stdlib>
using namespace metal;
#define GET_HELPER_TEXTURE_BINDING(binding) (30 - binding)
#define GET_HELPER_SAMPLER_STATE_BINDING(binding) (15 - binding)
struct BasicVertexOut {
float4 position [[position]];
float2 uv;
};
struct NDCViewport {
float2 offset;
float2 scale;
};
vertex BasicVertexOut vertexBlit(uint vid [[vertex_id]], constant NDCViewport& viewport [[buffer(0)]]) {
BasicVertexOut out;
out.uv = float2((vid << 1) & 2, vid & 2);
out.position = float4(out.uv * 2.0 - 1.0, 0.0, 1.0);
out.position.y = -out.position.y;
out.uv = out.uv * viewport.scale + viewport.offset;
return out;
}
fragment float4 fragmentBlit(BasicVertexOut in [[stage_in]], texture2d<float> tex [[texture(GET_HELPER_TEXTURE_BINDING(0))]], sampler samplr [[sampler(GET_HELPER_SAMPLER_STATE_BINDING(0))]]) {
return tex.sample(samplr, in.uv);
}

View file

@ -0,0 +1,9 @@
#include <metal_stdlib>
using namespace metal;
constant ushort lutTextureWidth [[function_constant(0)]];
// The copy is done in a vertex shader instead of a compute kernel, since dispatching compute would require ending the render pass
vertex void vertexCopyToLutTexture(uint vid [[vertex_id]], texture2d<float, access::write> out [[texture(0)]], device float2* data [[buffer(0)]], constant uint& arrayOffset [[buffer(1)]]) {
out.write(float4(data[vid], 0.0, 0.0), uint2(vid % lutTextureWidth, arrayOffset + vid / lutTextureWidth));
}

View file

@ -0,0 +1,759 @@
#include <metal_stdlib>
using namespace metal;
struct BasicVertexOut {
float4 position [[position]];
float2 uv;
};
constant float4 displayPositions[4] = {
float4(-1.0, -1.0, 0.0, 1.0),
float4( 1.0, -1.0, 0.0, 1.0),
float4(-1.0, 1.0, 0.0, 1.0),
float4( 1.0, 1.0, 0.0, 1.0)
};
constant float2 displayTexCoord[4] = {
float2(0.0, 1.0),
float2(0.0, 0.0),
float2(1.0, 1.0),
float2(1.0, 0.0)
};
vertex BasicVertexOut vertexDisplay(uint vid [[vertex_id]]) {
BasicVertexOut out;
out.position = displayPositions[vid];
out.uv = displayTexCoord[vid];
return out;
}
fragment float4 fragmentDisplay(BasicVertexOut in [[stage_in]], texture2d<float> tex [[texture(0)]], sampler samplr [[sampler(0)]]) {
return tex.sample(samplr, in.uv);
}
struct PicaRegs {
uint regs[0x200 - 0x48];
uint read(uint reg) constant {
return regs[reg - 0x48];
}
};
struct VertTEV {
uint textureEnvColor[6];
};
float4 abgr8888ToFloat4(uint abgr) {
const float scale = 1.0 / 255.0;
return scale * float4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24));
}
struct DrawVertexIn {
float4 position [[attribute(0)]];
float4 quaternion [[attribute(1)]];
float4 color [[attribute(2)]];
float2 texCoord0 [[attribute(3)]];
float2 texCoord1 [[attribute(4)]];
float texCoord0W [[attribute(5)]];
float3 view [[attribute(6)]];
float2 texCoord2 [[attribute(7)]];
};
// Metal cannot return arrays from vertex functions, this is an ugly workaround
struct EnvColor {
float4 c0;
float4 c1;
float4 c2;
float4 c3;
float4 c4;
float4 c5;
thread float4& operator[](int i) {
switch (i) {
case 0: return c0;
case 1: return c1;
case 2: return c2;
case 3: return c3;
case 4: return c4;
case 5: return c5;
default: return c0;
}
}
};
float3 rotateFloat3ByQuaternion(float3 v, float4 q) {
float3 u = q.xyz;
float s = q.w;
return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v);
}
// Convert an arbitrary-width floating point literal to an f32
float decodeFP(uint hex, uint E, uint M) {
uint width = M + E + 1u;
uint bias = 128u - (1u << (E - 1u));
uint exponent = (hex >> M) & ((1u << E) - 1u);
uint mantissa = hex & ((1u << M) - 1u);
uint sign = (hex >> (E + M)) << 31u;
if ((hex & ((1u << (width - 1u)) - 1u)) != 0u) {
if (exponent == (1u << E) - 1u)
exponent = 255u;
else
exponent += bias;
hex = sign | (mantissa << (23u - M)) | (exponent << 23u);
} else {
hex = sign;
}
return as_type<float>(hex);
}
struct DepthUniforms {
float depthScale;
float depthOffset;
bool depthMapEnable;
};
struct DrawVertexOut {
float4 position [[position]];
float4 quaternion;
float4 color;
float3 texCoord0;
float2 texCoord1;
float2 texCoord2;
float3 view;
float3 normal;
float3 tangent;
float3 bitangent;
EnvColor textureEnvColor [[flat]];
float4 textureEnvBufferColor [[flat]];
};
struct DrawVertexOutWithClip {
DrawVertexOut out;
float clipDistance [[clip_distance]] [2];
};
// TODO: check this
float transformZ(float z, float w, constant DepthUniforms& depthUniforms) {
z = z / w * depthUniforms.depthScale + depthUniforms.depthOffset;
if (!depthUniforms.depthMapEnable) {
z *= w;
}
return z * w;
}
vertex DrawVertexOutWithClip vertexDraw(DrawVertexIn in [[stage_in]], constant PicaRegs& picaRegs [[buffer(0)]], constant VertTEV& tev [[buffer(1)]], constant DepthUniforms& depthUniforms [[buffer(2)]]) {
DrawVertexOut out;
// Position
out.position = in.position;
// Flip the y position
out.position.y = -out.position.y;
// Apply depth uniforms
out.position.z = transformZ(out.position.z, out.position.w, depthUniforms);
// Color
out.color = min(abs(in.color), 1.0);
// Texture coordinates
out.texCoord0 = float3(in.texCoord0, in.texCoord0W);
out.texCoord0.y = 1.0 - out.texCoord0.y;
out.texCoord1 = in.texCoord1;
out.texCoord1.y = 1.0 - out.texCoord1.y;
out.texCoord2 = in.texCoord2;
out.texCoord2.y = 1.0 - out.texCoord2.y;
// View
out.view = in.view;
// TBN
out.normal = normalize(rotateFloat3ByQuaternion(float3(0.0, 0.0, 1.0), in.quaternion));
out.tangent = normalize(rotateFloat3ByQuaternion(float3(1.0, 0.0, 0.0), in.quaternion));
out.bitangent = normalize(rotateFloat3ByQuaternion(float3(0.0, 1.0, 0.0), in.quaternion));
out.quaternion = in.quaternion;
// Environment
for (int i = 0; i < 6; i++) {
out.textureEnvColor[i] = abgr8888ToFloat4(tev.textureEnvColor[i]);
}
out.textureEnvBufferColor = abgr8888ToFloat4(picaRegs.read(0xFDu));
DrawVertexOutWithClip outWithClip;
outWithClip.out = out;
// Parse clipping plane registers
float4 clipData = float4(
decodeFP(picaRegs.read(0x48u) & 0xffffffu, 7u, 16u), decodeFP(picaRegs.read(0x49u) & 0xffffffu, 7u, 16u),
decodeFP(picaRegs.read(0x4Au) & 0xffffffu, 7u, 16u), decodeFP(picaRegs.read(0x4Bu) & 0xffffffu, 7u, 16u)
);
// There's also another, always-on clipping plane based on vertex z
// TODO: transform
outWithClip.clipDistance[0] = -in.position.z;
outWithClip.clipDistance[1] = dot(clipData, in.position);
return outWithClip;
}
constant bool lightingEnabled [[function_constant(0)]];
constant uint8_t lightingNumLights [[function_constant(1)]];
constant uint32_t lightingConfig1 [[function_constant(2)]];
constant uint16_t alphaControl [[function_constant(3)]];
struct Globals {
bool error_unimpl;
float4 tevSources[16];
float4 tevNextPreviousBuffer;
bool tevUnimplementedSourceFlag = false;
uint GPUREG_LIGHTING_LUTINPUT_SCALE;
uint GPUREG_LIGHTING_LUTINPUT_ABS;
uint GPUREG_LIGHTING_LUTINPUT_SELECT;
uint GPUREG_LIGHTi_CONFIG;
// HACK
//bool lightingEnabled;
//uint8_t lightingNumLights;
//uint32_t lightingConfig1;
//uint16_t alphaControl;
float3 normal;
};
// See docs/lighting.md
constant uint samplerEnabledBitfields[2] = {0x7170e645u, 0x7f013fefu};
bool isSamplerEnabled(uint environment_id, uint lut_id) {
uint index = 7 * environment_id + lut_id;
uint arrayIndex = (index >> 5);
return (samplerEnabledBitfields[arrayIndex] & (1u << (index & 31u))) != 0u;
}
struct FragTEV {
uint textureEnvSource[6];
uint textureEnvOperand[6];
uint textureEnvCombiner[6];
uint textureEnvScale[6];
float4 fetchSource(thread Globals& globals, uint src_id) constant {
if (src_id >= 6u && src_id < 13u) {
globals.tevUnimplementedSourceFlag = true;
}
return globals.tevSources[src_id];
}
float4 getColorAndAlphaSource(thread Globals& globals, int tev_id, int src_id) constant {
float4 result;
float4 colorSource = fetchSource(globals, (textureEnvSource[tev_id] >> (src_id * 4)) & 15u);
float4 alphaSource = fetchSource(globals, (textureEnvSource[tev_id] >> (src_id * 4 + 16)) & 15u);
uint colorOperand = (textureEnvOperand[tev_id] >> (src_id * 4)) & 15u;
uint alphaOperand = (textureEnvOperand[tev_id] >> (12 + src_id * 4)) & 7u;
// TODO: figure out what the undocumented values do
switch (colorOperand) {
case 0u: result.rgb = colorSource.rgb; break; // Source color
case 1u: result.rgb = 1.0 - colorSource.rgb; break; // One minus source color
case 2u: result.rgb = float3(colorSource.a); break; // Source alpha
case 3u: result.rgb = float3(1.0 - colorSource.a); break; // One minus source alpha
case 4u: result.rgb = float3(colorSource.r); break; // Source red
case 5u: result.rgb = float3(1.0 - colorSource.r); break; // One minus source red
case 8u: result.rgb = float3(colorSource.g); break; // Source green
case 9u: result.rgb = float3(1.0 - colorSource.g); break; // One minus source green
case 12u: result.rgb = float3(colorSource.b); break; // Source blue
case 13u: result.rgb = float3(1.0 - colorSource.b); break; // One minus source blue
default: break;
}
// TODO: figure out what the undocumented values do
switch (alphaOperand) {
case 0u: result.a = alphaSource.a; break; // Source alpha
case 1u: result.a = 1.0 - alphaSource.a; break; // One minus source alpha
case 2u: result.a = alphaSource.r; break; // Source red
case 3u: result.a = 1.0 - alphaSource.r; break; // One minus source red
case 4u: result.a = alphaSource.g; break; // Source green
case 5u: result.a = 1.0 - alphaSource.g; break; // One minus source green
case 6u: result.a = alphaSource.b; break; // Source blue
case 7u: result.a = 1.0 - alphaSource.b; break; // One minus source blue
default: break;
}
return result;
}
float4 calculateCombiner(thread Globals& globals, int tev_id) constant {
float4 source0 = getColorAndAlphaSource(globals, tev_id, 0);
float4 source1 = getColorAndAlphaSource(globals, tev_id, 1);
float4 source2 = getColorAndAlphaSource(globals, tev_id, 2);
uint colorCombine = textureEnvCombiner[tev_id] & 15u;
uint alphaCombine = (textureEnvCombiner[tev_id] >> 16) & 15u;
float4 result = float4(1.0);
// TODO: figure out what the undocumented values do
switch (colorCombine) {
case 0u: result.rgb = source0.rgb; break; // Replace
case 1u: result.rgb = source0.rgb * source1.rgb; break; // Modulate
case 2u: result.rgb = min(float3(1.0), source0.rgb + source1.rgb); break; // Add
case 3u: result.rgb = clamp(source0.rgb + source1.rgb - 0.5, 0.0, 1.0); break; // Add signed
case 4u: result.rgb = mix(source1.rgb, source0.rgb, source2.rgb); break; // Interpolate
case 5u: result.rgb = max(source0.rgb - source1.rgb, 0.0); break; // Subtract
case 6u: result.rgb = float3(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGB
case 7u: result = float4(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGBA
case 8u: result.rgb = min(source0.rgb * source1.rgb + source2.rgb, 1.0); break; // Multiply then add
case 9u: result.rgb = min((source0.rgb + source1.rgb), 1.0) * source2.rgb; break; // Add then multiply
default: break;
}
if (colorCombine != 7u) { // The color combiner also writes the alpha channel in the "Dot3 RGBA" mode.
// TODO: figure out what the undocumented values do
// TODO: test if the alpha combiner supports all the same modes as the color combiner.
switch (alphaCombine) {
case 0u: result.a = source0.a; break; // Replace
case 1u: result.a = source0.a * source1.a; break; // Modulate
case 2u: result.a = min(1.0, source0.a + source1.a); break; // Add
case 3u: result.a = clamp(source0.a + source1.a - 0.5, 0.0, 1.0); break; // Add signed
case 4u: result.a = mix(source1.a, source0.a, source2.a); break; // Interpolate
case 5u: result.a = max(0.0, source0.a - source1.a); break; // Subtract
case 8u: result.a = min(source0.a * source1.a + source2.a, 1.0); break; // Multiply then add
case 9u: result.a = min(source0.a + source1.a, 1.0) * source2.a; break; // Add then multiply
default: break;
}
}
result.rgb *= float(1 << (textureEnvScale[tev_id] & 3u));
result.a *= float(1 << ((textureEnvScale[tev_id] >> 16) & 3u));
return result;
}
};
enum class LogicOp : uint8_t {
Clear = 0,
And = 1,
AndReverse = 2,
Copy = 3,
Set = 4,
CopyInverted = 5,
NoOp = 6,
Invert = 7,
Nand = 8,
Or = 9,
Nor = 10,
Xor = 11,
Equiv = 12,
AndInverted = 13,
OrReverse = 14,
OrInverted = 15
};
uint4 performLogicOpU(LogicOp logicOp, uint4 s, uint4 d) {
switch (logicOp) {
case LogicOp::Clear: return as_type<uint4>(float4(0.0));
case LogicOp::And: return s & d;
case LogicOp::AndReverse: return s & ~d;
case LogicOp::Copy: return s;
case LogicOp::Set: return as_type<uint4>(float4(1.0));
case LogicOp::CopyInverted: return ~s;
case LogicOp::NoOp: return d;
case LogicOp::Invert: return ~d;
case LogicOp::Nand: return ~(s & d);
case LogicOp::Or: return s | d;
case LogicOp::Nor: return ~(s | d);
case LogicOp::Xor: return s ^ d;
case LogicOp::Equiv: return ~(s ^ d);
case LogicOp::AndInverted: return ~s & d;
case LogicOp::OrReverse: return s | ~d;
case LogicOp::OrInverted: return ~s | d;
}
}
#define D0_LUT 0u
#define D1_LUT 1u
#define SP_LUT 2u
#define FR_LUT 3u
#define RB_LUT 4u
#define RG_LUT 5u
#define RR_LUT 6u
float lutLookup(texture2d_array<float> texLut, uint slice, uint lut, uint index) {
return texLut.read(uint2(index, lut), slice).r;
}
float lightLutLookup(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture2d_array<float> texLut, uint slice, uint environment_id, uint lut_id, uint light_id, float3 light_vector, float3 half_vector) {
uint lut_index;
int bit_in_config1;
if (lut_id == SP_LUT) {
// These are the spotlight attenuation LUTs
bit_in_config1 = 8 + int(light_id & 7u);
lut_index = 8u + light_id;
} else if (lut_id <= 6) {
bit_in_config1 = 16 + int(lut_id);
lut_index = lut_id;
} else {
globals.error_unimpl = true;
}
bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment
if (!current_sampler_enabled || (extract_bits(lightingConfig1, bit_in_config1, 1) != 0u)) {
return 1.0;
}
uint scale_id = extract_bits(globals.GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3);
float scale = float(1u << scale_id);
if (scale_id >= 6u) scale /= 256.0;
float delta = 1.0;
uint input_id = extract_bits(globals.GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3);
switch (input_id) {
case 0u: {
delta = dot(globals.normal, normalize(half_vector));
break;
}
case 1u: {
delta = dot(normalize(in.view), normalize(half_vector));
break;
}
case 2u: {
delta = dot(globals.normal, normalize(in.view));
break;
}
case 3u: {
delta = dot(light_vector, globals.normal);
break;
}
case 4u: {
int GPUREG_LIGHTi_SPOTDIR_LOW = int(picaRegs.read(0x0146u + (light_id << 4u)));
int GPUREG_LIGHTi_SPOTDIR_HIGH = int(picaRegs.read(0x0147u + (light_id << 4u)));
// Sign extend them. Normally bitfieldExtract would do that but it's missing on some versions
// of GLSL so we do it manually
int se_x = extract_bits(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13);
int se_y = extract_bits(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13);
int se_z = extract_bits(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13);
if ((se_x & 0x1000) == 0x1000) se_x |= 0xffffe000;
if ((se_y & 0x1000) == 0x1000) se_y |= 0xffffe000;
if ((se_z & 0x1000) == 0x1000) se_z |= 0xffffe000;
// These are fixed point 1.1.11 values, so we need to convert them to float
float x = float(se_x) / 2047.0;
float y = float(se_y) / 2047.0;
float z = float(se_z) / 2047.0;
float3 spotlight_vector = float3(x, y, z);
delta = dot(light_vector, spotlight_vector); // spotlight direction is negated so we don't negate light_vector
break;
}
case 5u: {
delta = 1.0; // TODO: cos <greek symbol> (aka CP);
globals.error_unimpl = true;
break;
}
default: {
delta = 1.0;
globals.error_unimpl = true;
break;
}
}
// 0 = enabled
if (extract_bits(globals.GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) {
// Two sided diffuse
if (extract_bits(globals.GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) {
delta = max(delta, 0.0);
} else {
delta = abs(delta);
}
int index = int(clamp(floor(delta * 255.0), 0.f, 255.f));
return lutLookup(texLut, slice, lut_index, index) * scale;
} else {
// Range is [-1, 1] so we need to map it to [0, 1]
int index = int(clamp(floor(delta * 128.0), -128.f, 127.f));
if (index < 0) index += 256;
return lutLookup(texLut, slice, lut_index, index) * scale;
}
}
float3 regToColor(uint reg) {
// Normalization scale to convert from [0...255] to [0.0...1.0]
const float scale = 1.0 / 255.0;
return scale * float3(float(extract_bits(reg, 20, 8)), float(extract_bits(reg, 10, 8)), float(extract_bits(reg, 00, 8)));
}
// Implements the following algorthm: https://mathb.in/26766
void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture2d_array<float> texLut, uint slice, sampler linearSampler, thread float4& primaryColor, thread float4& secondaryColor) {
// Quaternions describe a transformation from surface-local space to eye space.
// In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1),
// the tangent vector is (1,0,0), and the bitangent vector is (0,1,0).
//float3 normal = normalize(in.normal);
//float3 tangent = normalize(in.tangent);
//float3 bitangent = normalize(in.bitangent);
//float3 view = normalize(in.view);
uint GPUREG_LIGHTING_LIGHT_PERMUTATION = picaRegs.read(0x01D9u);
primaryColor = float4(0.0, 0.0, 0.0, 1.0);
secondaryColor = float4(0.0, 0.0, 0.0, 1.0);
uint GPUREG_LIGHTING_CONFIG0 = picaRegs.read(0x01C3u);
globals.GPUREG_LIGHTING_LUTINPUT_SCALE = picaRegs.read(0x01D2u);
globals.GPUREG_LIGHTING_LUTINPUT_ABS = picaRegs.read(0x01D0u);
globals.GPUREG_LIGHTING_LUTINPUT_SELECT = picaRegs.read(0x01D1u);
uint bumpMode = extract_bits(GPUREG_LIGHTING_CONFIG0, 28, 2);
// Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker
switch (bumpMode) {
default: {
globals.normal = rotateFloat3ByQuaternion(float3(0.0, 0.0, 1.0), in.quaternion);
break;
}
}
float4 diffuseSum = float4(0.0, 0.0, 0.0, 1.0);
float4 specularSum = float4(0.0, 0.0, 0.0, 1.0);
uint environmentId = extract_bits(GPUREG_LIGHTING_CONFIG0, 4, 4);
bool clampHighlights = extract_bits(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u;
uint lightId;
float3 lightVector = float3(0.0);
float3 halfVector = float3(0.0);
for (uint i = 0u; i < lightingNumLights + 1; i++) {
lightId = extract_bits(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3);
uint GPUREG_LIGHTi_SPECULAR0 = picaRegs.read(0x0140u + (lightId << 4u));
uint GPUREG_LIGHTi_SPECULAR1 = picaRegs.read(0x0141u + (lightId << 4u));
uint GPUREG_LIGHTi_DIFFUSE = picaRegs.read(0x0142u + (lightId << 4u));
uint GPUREG_LIGHTi_AMBIENT = picaRegs.read(0x0143u + (lightId << 4u));
uint GPUREG_LIGHTi_VECTOR_LOW = picaRegs.read(0x0144u + (lightId << 4u));
uint GPUREG_LIGHTi_VECTOR_HIGH = picaRegs.read(0x0145u + (lightId << 4u));
globals.GPUREG_LIGHTi_CONFIG = picaRegs.read(0x0149u + (lightId << 4u));
float lightDistance;
float3 lightPosition = float3(
decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u),
decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u)
);
// Positional Light
if (extract_bits(globals.GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) {
// error_unimpl = true;
lightVector = lightPosition + in.view;
}
// Directional light
else {
lightVector = lightPosition;
}
lightDistance = length(lightVector);
lightVector = normalize(lightVector);
halfVector = lightVector + normalize(in.view);
float NdotL = dot(globals.normal, lightVector); // N dot Li
// Two sided diffuse
if (extract_bits(globals.GPUREG_LIGHTi_CONFIG, 1, 1) == 0u)
NdotL = max(0.0, NdotL);
else
NdotL = abs(NdotL);
float geometricFactor;
bool useGeo0 = extract_bits(globals.GPUREG_LIGHTi_CONFIG, 2, 1) == 1u;
bool useGeo1 = extract_bits(globals.GPUREG_LIGHTi_CONFIG, 3, 1) == 1u;
if (useGeo0 || useGeo1) {
geometricFactor = dot(halfVector, halfVector);
geometricFactor = geometricFactor == 0.0 ? 0.0 : min(NdotL / geometricFactor, 1.0);
}
float distanceAttenuation = 1.0;
if (extract_bits(lightingConfig1, 24 + int(lightId), 1) == 0u) {
uint GPUREG_LIGHTi_ATTENUATION_BIAS = extract_bits(picaRegs.read(0x014Au + (lightId << 4u)), 0, 20);
uint GPUREG_LIGHTi_ATTENUATION_SCALE = extract_bits(picaRegs.read(0x014Bu + (lightId << 4u)), 0, 20);
float distanceAttenuationBias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u);
float distanceAttenuationScale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u);
float delta = lightDistance * distanceAttenuationScale + distanceAttenuationBias;
delta = clamp(delta, 0.0, 1.0);
int index = int(clamp(floor(delta * 255.0), 0.0, 255.0));
distanceAttenuation = lutLookup(texLut, slice, 16u + lightId, index);
}
float spotlightAttenuation = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, SP_LUT, lightId, lightVector, halfVector);
float specular0Distribution = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, D0_LUT, lightId, lightVector, halfVector);
float specular1Distribution = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, D1_LUT, lightId, lightVector, halfVector);
float3 reflectedColor;
reflectedColor.r = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, RR_LUT, lightId, lightVector, halfVector);
if (isSamplerEnabled(environmentId, RG_LUT)) {
reflectedColor.g = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, RG_LUT, lightId, lightVector, halfVector);
} else {
reflectedColor.g = reflectedColor.r;
}
if (isSamplerEnabled(environmentId, RB_LUT)) {
reflectedColor.b = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, RB_LUT, lightId, lightVector, halfVector);
} else {
reflectedColor.b = reflectedColor.r;
}
float3 specular0 = regToColor(GPUREG_LIGHTi_SPECULAR0) * specular0Distribution;
float3 specular1 = regToColor(GPUREG_LIGHTi_SPECULAR1) * specular1Distribution * reflectedColor;
specular0 *= useGeo0 ? geometricFactor : 1.0;
specular1 *= useGeo1 ? geometricFactor : 1.0;
float clampFactor = 1.0;
if (clampHighlights && NdotL == 0.0) {
clampFactor = 0.0;
}
float lightFactor = distanceAttenuation * spotlightAttenuation;
diffuseSum.rgb += lightFactor * (regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE) * NdotL);
specularSum.rgb += lightFactor * clampFactor * (specular0 + specular1);
}
uint fresnelOutput1 = extract_bits(GPUREG_LIGHTING_CONFIG0, 2, 1);
uint fresnelOutput2 = extract_bits(GPUREG_LIGHTING_CONFIG0, 3, 1);
float fresnelFactor;
if (fresnelOutput1 == 1u || fresnelOutput2 == 1u) {
fresnelFactor = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, FR_LUT, lightId, lightVector, halfVector);
}
if (fresnelOutput1 == 1u) {
diffuseSum.a = fresnelFactor;
}
if (fresnelOutput2 == 1u) {
specularSum.a = fresnelFactor;
}
uint GPUREG_LIGHTING_AMBIENT = picaRegs.read(0x01C0u);
float4 globalAmbient = float4(regToColor(GPUREG_LIGHTING_AMBIENT), 1.0);
primaryColor = clamp(globalAmbient + diffuseSum, 0.0, 1.0);
secondaryColor = clamp(specularSum, 0.0, 1.0);
}
float4 performLogicOp(LogicOp logicOp, float4 s, float4 d) {
return as_type<float4>(performLogicOpU(logicOp, as_type<uint4>(s), as_type<uint4>(d)));
}
fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[color(0)]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], constant uint2& lutSlices [[buffer(3)]], texture2d<float> tex0 [[texture(0)]], texture2d<float> tex1 [[texture(1)]], texture2d<float> tex2 [[texture(2)]], texture2d_array<float> texLightingLut [[texture(3)]], texture1d_array<float> texFogLut [[texture(4)]], sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) {
Globals globals;
// HACK
//globals.lightingEnabled = picaRegs.read(0x008Fu) != 0u;
//globals.lightingNumLights = picaRegs.read(0x01C2u);
//globals.lightingConfig1 = picaRegs.read(0x01C4u);
//globals.alphaControl = picaRegs.read(0x104);
globals.tevSources[0] = in.color;
if (lightingEnabled) {
calcLighting(globals, in, picaRegs, texLightingLut, lutSlices.x, linearSampler, globals.tevSources[1], globals.tevSources[2]);
} else {
globals.tevSources[1] = float4(0.0);
globals.tevSources[2] = float4(0.0);
}
uint textureConfig = picaRegs.read(0x80u);
float2 texCoord2 = (textureConfig & (1u << 13)) != 0u ? in.texCoord1 : in.texCoord2;
if ((textureConfig & 1u) != 0u) globals.tevSources[3] = tex0.sample(samplr0, in.texCoord0.xy);
if ((textureConfig & 2u) != 0u) globals.tevSources[4] = tex1.sample(samplr1, in.texCoord1);
if ((textureConfig & 4u) != 0u) globals.tevSources[5] = tex2.sample(samplr2, texCoord2);
globals.tevSources[13] = float4(0.0); // Previous buffer
globals.tevSources[15] = in.color; // Previous combiner
globals.tevNextPreviousBuffer = in.textureEnvBufferColor;
uint textureEnvUpdateBuffer = picaRegs.read(0xE0u);
for (int i = 0; i < 6; i++) {
globals.tevSources[14] = in.textureEnvColor[i]; // Constant color
globals.tevSources[15] = tev.calculateCombiner(globals, i);
globals.tevSources[13] = globals.tevNextPreviousBuffer;
if (i < 4) {
if ((textureEnvUpdateBuffer & (0x100u << i)) != 0u) {
globals.tevNextPreviousBuffer.rgb = globals.tevSources[15].rgb;
}
if ((textureEnvUpdateBuffer & (0x1000u << i)) != 0u) {
globals.tevNextPreviousBuffer.a = globals.tevSources[15].a;
}
}
}
float4 color = globals.tevSources[15];
// Fog
bool enable_fog = (textureEnvUpdateBuffer & 7u) == 5u;
if (enable_fog) {
bool flipDepth = (textureEnvUpdateBuffer & (1u << 16)) != 0u;
float fogIndex = flipDepth ? 1.0 - in.position.z : in.position.z;
fogIndex *= 128.0;
float clampedIndex = clamp(floor(fogIndex), 0.0, 127.0);
float delta = fogIndex - clampedIndex;
float2 value = texFogLut.read(clampedIndex, lutSlices.y).rg;
float fogFactor = clamp(value.r + value.g * delta, 0.0, 1.0);
uint GPUREG_FOG_COLOR = picaRegs.read(0x00E1u);
// Annoyingly color is not encoded in the same way as light color
float r = (GPUREG_FOG_COLOR & 0xFFu) / 255.0;
float g = ((GPUREG_FOG_COLOR >> 8) & 0xFFu) / 255.0;
float b = ((GPUREG_FOG_COLOR >> 16) & 0xFFu) / 255.0;
float3 fogColor = float3(r, g, b);
color.rgb = mix(fogColor, color.rgb, fogFactor);
}
// Perform alpha test
if ((alphaControl & 1u) != 0u) { // Check if alpha test is on
uint func = (alphaControl >> 4u) & 7u;
float reference = float((alphaControl >> 8u) & 0xffu) / 255.0;
float alpha = color.a;
switch (func) {
case 0u: discard_fragment(); // Never pass alpha test
case 1u: break; // Always pass alpha test
case 2u: // Pass if equal
if (alpha != reference) discard_fragment();
break;
case 3u: // Pass if not equal
if (alpha == reference) discard_fragment();
break;
case 4u: // Pass if less than
if (alpha >= reference) discard_fragment();
break;
case 5u: // Pass if less than or equal
if (alpha > reference) discard_fragment();
break;
case 6u: // Pass if greater than
if (alpha <= reference) discard_fragment();
break;
case 7u: // Pass if greater than or equal
if (alpha < reference) discard_fragment();
break;
}
}
return performLogicOp(logicOp, color, prevColor);
}

View file

@ -124,6 +124,7 @@ MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent)
const RendererType rendererType = emu->getConfig().rendererType;
usingGL = (rendererType == RendererType::OpenGL || rendererType == RendererType::Software || rendererType == RendererType::Null);
usingVk = (rendererType == RendererType::Vulkan);
usingMtl = (rendererType == RendererType::Metal);
if (usingGL) {
// Make GL context current for this thread, enable VSync
@ -134,6 +135,8 @@ MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent)
emu->initGraphicsContext(glContext);
} else if (usingVk) {
Helpers::panic("Vulkan on Qt is currently WIP, try the SDL frontend instead!");
} else if (usingMtl) {
Helpers::panic("Metal on Qt currently doesn't work, try the SDL frontend instead!");
} else {
Helpers::panic("Unsupported graphics backend for Qt frontend!");
}
@ -666,4 +669,4 @@ void MainWindow::setupControllerSensors(SDL_GameController* controller) {
if (haveAccelerometer) {
SDL_GameControllerSetSensorEnabled(controller, SDL_SENSOR_ACCEL, SDL_TRUE);
}
}
}

View file

@ -91,6 +91,16 @@ FrontendSDL::FrontendSDL() : keyboardMappings(InputMappings::defaultKeyboardMapp
}
#endif
#ifdef PANDA3DS_ENABLE_METAL
if (config.rendererType == RendererType::Metal) {
window = SDL_CreateWindow(windowTitle, windowX, windowY, windowWidth, windowHeight, SDL_WINDOW_METAL | SDL_WINDOW_RESIZABLE);
if (window == nullptr) {
Helpers::warn("Window creation failed: %s", SDL_GetError());
}
}
#endif
emu.initGraphicsContext(window);
}
@ -324,7 +334,7 @@ void FrontendSDL::run() {
}
break;
}
case SDL_CONTROLLERSENSORUPDATE: {
if (event.csensor.sensor == SDL_SENSOR_GYRO) {
auto rotation = Sensors::SDL::convertRotation({

View file

@ -17,6 +17,7 @@ std::optional<RendererType> Renderer::typeFromString(std::string inString) {
{"null", RendererType::Null}, {"nil", RendererType::Null}, {"none", RendererType::Null},
{"gl", RendererType::OpenGL}, {"ogl", RendererType::OpenGL}, {"opengl", RendererType::OpenGL},
{"vk", RendererType::Vulkan}, {"vulkan", RendererType::Vulkan}, {"vulcan", RendererType::Vulkan},
{"mtl", RendererType::Metal}, {"metal", RendererType::Metal},
{"sw", RendererType::Software}, {"soft", RendererType::Software}, {"software", RendererType::Software},
{"softrast", RendererType::Software},
};
@ -33,7 +34,8 @@ const char* Renderer::typeToString(RendererType rendererType) {
case RendererType::Null: return "null";
case RendererType::OpenGL: return "opengl";
case RendererType::Vulkan: return "vulkan";
case RendererType::Metal: return "metal";
case RendererType::Software: return "software";
default: return "Invalid";
}
}
}