mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-05 22:55:41 +13:00
Merge pull request #745 from wheremyfoodat/ios
iOS driver & Metal renderer improvements
This commit is contained in:
commit
761f9264ba
16 changed files with 587 additions and 384 deletions
|
@ -65,6 +65,7 @@ option(BUILD_LIBRETRO_CORE "Build a Libretro core" OFF)
|
|||
option(ENABLE_RENDERDOC_API "Build with support for Renderdoc's capture API for graphics debugging" ON)
|
||||
option(DISABLE_SSE4 "Build with SSE4 instructions disabled, may reduce performance" OFF)
|
||||
option(USE_LIBRETRO_AUDIO "Enable to use the LR audio device with the LR core. Otherwise our own device is used" OFF)
|
||||
option(IOS_SIMULATOR_BUILD "Compiling for IOS simulator (Set to off if compiling for a real iPhone)" ON)
|
||||
|
||||
# Discord RPC & LuaJIT are currently not supported on iOS
|
||||
if(IOS)
|
||||
|
@ -419,6 +420,10 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp
|
|||
if(IOS)
|
||||
set(SOURCE_FILES ${SOURCE_FILES} src/miniaudio/miniaudio.m)
|
||||
target_compile_definitions(AlberCore PUBLIC "PANDA3DS_IOS=1")
|
||||
|
||||
if (IOS_SIMULATOR_BUILD)
|
||||
target_compile_definitions(AlberCore PUBLIC "PANDA3DS_IOS_SIMULATOR=1")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
cmrc_add_resource_library(
|
||||
|
@ -599,14 +604,16 @@ if(ENABLE_METAL AND APPLE)
|
|||
include/renderer_mtl/mtl_common.hpp
|
||||
include/renderer_mtl/pica_to_mtl.hpp
|
||||
include/renderer_mtl/objc_helper.hpp
|
||||
include/renderer_mtl/texture_decoder.hpp
|
||||
)
|
||||
|
||||
set(RENDERER_MTL_SOURCE_FILES src/core/renderer_mtl/metal_cpp_impl.cpp
|
||||
src/core/renderer_mtl/renderer_mtl.cpp
|
||||
src/core/renderer_mtl/mtl_texture.cpp
|
||||
src/core/renderer_mtl/mtl_etc1.cpp
|
||||
src/core/renderer_mtl/mtl_lut_texture.cpp
|
||||
src/core/renderer_mtl/pica_to_mtl.cpp
|
||||
src/core/renderer_mtl/objc_helper.mm
|
||||
src/core/renderer_mtl/texture_decoder.cpp
|
||||
src/host_shaders/metal_shaders.metal
|
||||
src/host_shaders/metal_blit.metal
|
||||
#src/host_shaders/metal_copy_to_lut_texture.metal
|
||||
|
@ -620,15 +627,26 @@ if(ENABLE_METAL AND APPLE)
|
|||
set(SHADER_SOURCE "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.metal")
|
||||
set(SHADER_IR "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.ir")
|
||||
set(SHADER_METALLIB "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.metallib")
|
||||
|
||||
# MacOS, iOS and the iOS simulator all use different compilation options for shaders
|
||||
set(MetalSDK "macosx")
|
||||
if(IOS)
|
||||
if (IOS_SIMULATOR_BUILD)
|
||||
set(MetalSDK "iphonesimulator")
|
||||
else()
|
||||
set(MetalSDK "iphoneos")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# TODO: only include sources in debug builds
|
||||
add_custom_command(
|
||||
OUTPUT ${SHADER_IR}
|
||||
COMMAND xcrun -sdk macosx metal -gline-tables-only -frecord-sources -o ${SHADER_IR} -c ${SHADER_SOURCE}
|
||||
COMMAND xcrun -sdk ${MetalSDK} metal -gline-tables-only -frecord-sources -o ${SHADER_IR} -c ${SHADER_SOURCE}
|
||||
DEPENDS ${SHADER_SOURCE}
|
||||
VERBATIM)
|
||||
add_custom_command(
|
||||
OUTPUT ${SHADER_METALLIB}
|
||||
COMMAND xcrun -sdk macosx metallib -o ${SHADER_METALLIB} ${SHADER_IR}
|
||||
COMMAND xcrun -sdk ${MetalSDK} metallib -o ${SHADER_METALLIB} ${SHADER_IR}
|
||||
DEPENDS ${SHADER_IR}
|
||||
VERBATIM)
|
||||
set(RENDERER_MTL_HOST_SHADERS_SOURCES ${RENDERER_MTL_HOST_SHADERS_SOURCES} ${SHADER_METALLIB})
|
||||
|
@ -657,7 +675,7 @@ if(ENABLE_METAL AND APPLE)
|
|||
target_compile_definitions(AlberCore PUBLIC "PANDA3DS_ENABLE_METAL=1")
|
||||
target_include_directories(AlberCore PRIVATE third_party/metal-cpp)
|
||||
# TODO: check if all of them are needed
|
||||
target_link_libraries(AlberCore PRIVATE "-framework Metal" "-framework Foundation" "-framework QuartzCore" resources_renderer_mtl)
|
||||
target_link_libraries(AlberCore PUBLIC "-framework Metal" "-framework Foundation" "-framework QuartzCore" resources_renderer_mtl)
|
||||
endif()
|
||||
|
||||
source_group("Header Files\\Core" FILES ${HEADER_FILES})
|
||||
|
@ -795,7 +813,13 @@ if(NOT BUILD_HYDRA_CORE AND NOT BUILD_LIBRETRO_CORE)
|
|||
elseif(BUILD_HYDRA_CORE)
|
||||
target_compile_definitions(AlberCore PRIVATE PANDA3DS_HYDRA_CORE=1)
|
||||
include_directories(third_party/hydra_core/include)
|
||||
add_library(Alber SHARED src/hydra_core.cpp)
|
||||
|
||||
set(SHARED_SOURCE_FILES src/hydra_core.cpp)
|
||||
if(IOS)
|
||||
set(SHARED_SOURCE_FILES ${SHARED_SOURCE_FILES} src/ios_driver.mm)
|
||||
endif()
|
||||
|
||||
add_library(Alber SHARED ${SHARED_SOURCE_FILES})
|
||||
target_link_libraries(Alber PUBLIC AlberCore)
|
||||
elseif(BUILD_LIBRETRO_CORE)
|
||||
include_directories(third_party/libretro/include)
|
||||
|
|
|
@ -55,6 +55,13 @@ struct EmulatorConfig {
|
|||
static constexpr bool audioEnabledDefault = false;
|
||||
#endif
|
||||
|
||||
// We default to OpenGL on all platforms other than iOS
|
||||
#if defined(PANDA3DS_IOS)
|
||||
static constexpr RendererType rendererDefault = RendererType::Metal;
|
||||
#else
|
||||
static constexpr RendererType rendererDefault = RendererType::OpenGL;
|
||||
#endif
|
||||
|
||||
bool shaderJitEnabled = shaderJitDefault;
|
||||
bool useUbershaders = ubershaderDefault;
|
||||
bool accelerateShaders = accelerateShadersDefault;
|
||||
|
@ -65,7 +72,7 @@ struct EmulatorConfig {
|
|||
bool forceShadergenForLights = true;
|
||||
int lightShadergenThreshold = 1;
|
||||
|
||||
RendererType rendererType = RendererType::OpenGL;
|
||||
RendererType rendererType = rendererDefault;
|
||||
Audio::DSPCore::Type dspType = Audio::DSPCore::Type::HLE;
|
||||
|
||||
bool sdCardInserted = true;
|
||||
|
|
6
include/ios_driver.h
Normal file
6
include/ios_driver.h
Normal file
|
@ -0,0 +1,6 @@
|
|||
#pragma once
|
||||
#include <Foundation/Foundation.h>
|
||||
#include <QuartzCore/QuartzCore.h>
|
||||
|
||||
void iosCreateEmulator();
|
||||
void iosRunFrame(CAMetalLayer* layer);
|
|
@ -86,6 +86,10 @@ class Renderer {
|
|||
// Called to notify the core to use OpenGL ES and not desktop GL
|
||||
virtual void setupGLES() {}
|
||||
|
||||
// Only relevant for Metal renderer on iOS
|
||||
// Passes a SwiftUI MTKView's layer (CAMetalLayer) to the renderer
|
||||
virtual void setMTKLayer(void* layer) {};
|
||||
|
||||
// This function is called on every draw call before parsing vertex data.
|
||||
// It is responsible for things like looking up which vertex/fragment shaders to use, recompiling them if they don't exist, choosing between
|
||||
// ubershaders and shadergen, and so on.
|
||||
|
|
|
@ -8,8 +8,9 @@
|
|||
#include "boost/icl/interval.hpp"
|
||||
#include "helpers.hpp"
|
||||
#include "math_util.hpp"
|
||||
#include "opengl.hpp"
|
||||
#include "renderer_mtl/pica_to_mtl.hpp"
|
||||
// TODO: remove dependency on OpenGL
|
||||
#include "opengl.hpp"
|
||||
|
||||
template <typename T>
|
||||
using Interval = boost::icl::right_open_interval<T>;
|
||||
|
@ -27,7 +28,8 @@ namespace Metal {
|
|||
// Range of VRAM taken up by buffer
|
||||
Interval<u32> range;
|
||||
|
||||
PICA::PixelFormatInfo formatInfo;
|
||||
PICA::MTLPixelFormatInfo formatInfo;
|
||||
MTL::Texture* base = nullptr;
|
||||
MTL::Texture* texture = nullptr;
|
||||
MTL::SamplerState* sampler = nullptr;
|
||||
|
||||
|
@ -52,22 +54,7 @@ namespace Metal {
|
|||
void free();
|
||||
u64 sizeInBytes();
|
||||
|
||||
u8 decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data);
|
||||
u16 decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data);
|
||||
u32 decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data);
|
||||
|
||||
// Get the morton interleave offset of a texel based on its U and V values
|
||||
static u32 mortonInterleave(u32 u, u32 v);
|
||||
// Get the byte offset of texel (u, v) in the texture
|
||||
static u32 getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel);
|
||||
static u32 getSwizzledOffset_4bpp(u32 u, u32 v, u32 width);
|
||||
|
||||
// Returns the format of this texture as a string
|
||||
std::string_view formatToString() { return PICA::textureFormatToString(format); }
|
||||
|
||||
// Returns the texel at coordinates (u, v) of an ETC1(A4) texture
|
||||
// TODO: Make hasAlpha a template parameter
|
||||
u32 getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span<const u8> data);
|
||||
u32 decodeETC(u32 alpha, u32 u, u32 v, u64 colourData);
|
||||
};
|
||||
} // namespace Metal
|
||||
|
|
|
@ -3,31 +3,28 @@
|
|||
#include <Metal/Metal.hpp>
|
||||
|
||||
#include "PICA/regs.hpp"
|
||||
// TODO: remove dependency on OpenGL
|
||||
#include "opengl.hpp"
|
||||
|
||||
namespace PICA {
|
||||
struct PixelFormatInfo {
|
||||
struct MTLPixelFormatInfo {
|
||||
MTL::PixelFormat pixelFormat;
|
||||
size_t bytesPerTexel;
|
||||
void (*decoder)(OpenGL::uvec2, u32, u32, std::span<const u8>, u8*);
|
||||
|
||||
bool needsSwizzle = false;
|
||||
MTL::TextureSwizzleChannels swizzle{
|
||||
.red = MTL::TextureSwizzleRed,
|
||||
.green = MTL::TextureSwizzleGreen,
|
||||
.blue = MTL::TextureSwizzleBlue,
|
||||
.alpha = MTL::TextureSwizzleAlpha,
|
||||
};
|
||||
};
|
||||
|
||||
constexpr PixelFormatInfo pixelFormatInfos[14] = {
|
||||
{MTL::PixelFormatRGBA8Unorm, 4}, // RGBA8
|
||||
{MTL::PixelFormatRGBA8Unorm, 4}, // RGB8
|
||||
{MTL::PixelFormatBGR5A1Unorm, 2}, // RGBA5551
|
||||
{MTL::PixelFormatB5G6R5Unorm, 2}, // RGB565
|
||||
{MTL::PixelFormatABGR4Unorm, 2}, // RGBA4
|
||||
{MTL::PixelFormatRGBA8Unorm, 4}, // IA8
|
||||
{MTL::PixelFormatRG8Unorm, 2}, // RG8
|
||||
{MTL::PixelFormatRGBA8Unorm, 4}, // I8
|
||||
{MTL::PixelFormatA8Unorm, 1}, // A8
|
||||
{MTL::PixelFormatABGR4Unorm, 2}, // IA4
|
||||
{MTL::PixelFormatABGR4Unorm, 2}, // I4
|
||||
{MTL::PixelFormatA8Unorm, 1}, // A4
|
||||
{MTL::PixelFormatRGBA8Unorm, 4}, // ETC1
|
||||
{MTL::PixelFormatRGBA8Unorm, 4}, // ETC1A4
|
||||
};
|
||||
extern MTLPixelFormatInfo mtlPixelFormatInfos[14];
|
||||
|
||||
inline PixelFormatInfo getPixelFormatInfo(TextureFmt format) { return pixelFormatInfos[static_cast<int>(format)]; }
|
||||
void checkForMTLPixelFormatSupport(MTL::Device* device);
|
||||
inline MTLPixelFormatInfo getMTLPixelFormatInfo(TextureFmt format) { return mtlPixelFormatInfos[static_cast<int>(format)]; }
|
||||
|
||||
inline MTL::PixelFormat toMTLPixelFormatColor(ColorFmt format) {
|
||||
switch (format) {
|
||||
|
@ -35,7 +32,11 @@ namespace PICA {
|
|||
case ColorFmt::RGB8: return MTL::PixelFormatRGBA8Unorm;
|
||||
case ColorFmt::RGBA5551: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatBGR5A1Unorm?
|
||||
case ColorFmt::RGB565: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatB5G6R5Unorm?
|
||||
#ifdef PANDA3DS_IOS
|
||||
case ColorFmt::RGBA4: return MTL::PixelFormatRGBA8Unorm; // IOS + Metal doesn't support AGBR4 properly, at least on simulator
|
||||
#else
|
||||
case ColorFmt::RGBA4: return MTL::PixelFormatABGR4Unorm;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -42,11 +42,13 @@ class RendererMTL final : public Renderer {
|
|||
virtual void initGraphicsContext([[maybe_unused]] GL::Context* context) override {}
|
||||
#endif
|
||||
|
||||
private:
|
||||
CA::MetalLayer* metalLayer;
|
||||
virtual void setMTKLayer(void* layer) override;
|
||||
|
||||
MTL::Device* device;
|
||||
MTL::CommandQueue* commandQueue;
|
||||
private:
|
||||
CA::MetalLayer* metalLayer = nullptr;
|
||||
|
||||
MTL::Device* device = nullptr;
|
||||
MTL::CommandQueue* commandQueue = nullptr;
|
||||
|
||||
Metal::CommandEncoder commandEncoder;
|
||||
|
||||
|
@ -98,6 +100,7 @@ class RendererMTL final : public Renderer {
|
|||
void endRenderPass() {
|
||||
if (renderCommandEncoder) {
|
||||
renderCommandEncoder->endEncoding();
|
||||
renderCommandEncoder->release();
|
||||
renderCommandEncoder = nullptr;
|
||||
}
|
||||
}
|
||||
|
|
24
include/renderer_mtl/texture_decoder.hpp
Normal file
24
include/renderer_mtl/texture_decoder.hpp
Normal file
|
@ -0,0 +1,24 @@
|
|||
#pragma once
|
||||
|
||||
#include "helpers.hpp"
|
||||
// TODO: remove dependency on OpenGL
|
||||
#include "opengl.hpp"
|
||||
|
||||
void decodeTexelABGR8ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
|
||||
void decodeTexelBGR8ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
|
||||
void decodeTexelA1BGR5ToBGR5A1(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
|
||||
void decodeTexelA1BGR5ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
|
||||
void decodeTexelB5G6R5ToB5G6R5(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
|
||||
void decodeTexelB5G6R5ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
|
||||
void decodeTexelABGR4ToABGR4(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
|
||||
void decodeTexelABGR4ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
|
||||
void decodeTexelAI8ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
|
||||
void decodeTexelGR8ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
|
||||
void decodeTexelI8ToR8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
|
||||
void decodeTexelA8ToA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
|
||||
void decodeTexelAI4ToABGR4(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
|
||||
void decodeTexelAI4ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
|
||||
void decodeTexelI4ToR8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
|
||||
void decodeTexelA4ToA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
|
||||
void decodeTexelETC1ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
|
||||
void decodeTexelETC1A4ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData);
|
|
@ -72,14 +72,14 @@ void EmulatorConfig::load() {
|
|||
auto gpu = gpuResult.unwrap();
|
||||
|
||||
// Get renderer
|
||||
auto rendererName = toml::find_or<std::string>(gpu, "Renderer", "OpenGL");
|
||||
auto rendererName = toml::find_or<std::string>(gpu, "Renderer", Renderer::typeToString(rendererDefault));
|
||||
auto configRendererType = Renderer::typeFromString(rendererName);
|
||||
|
||||
if (configRendererType.has_value()) {
|
||||
rendererType = configRendererType.value();
|
||||
} else {
|
||||
Helpers::warn("Invalid renderer specified: %s\n", rendererName.c_str());
|
||||
rendererType = RendererType::OpenGL;
|
||||
rendererType = rendererDefault;
|
||||
}
|
||||
|
||||
shaderJitEnabled = toml::find_or<toml::boolean>(gpu, "EnableShaderJIT", shaderJitDefault);
|
||||
|
|
|
@ -1,116 +0,0 @@
|
|||
#include <algorithm>
|
||||
|
||||
#include "colour.hpp"
|
||||
#include "renderer_mtl/mtl_texture.hpp"
|
||||
#include "renderer_mtl/renderer_mtl.hpp"
|
||||
|
||||
|
||||
using namespace Helpers;
|
||||
|
||||
namespace Metal {
|
||||
static constexpr u32 signExtend3To32(u32 val) {
|
||||
return (u32)(s32(val) << 29 >> 29);
|
||||
}
|
||||
|
||||
u32 Texture::getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span<const u8> data) {
|
||||
// Pixel offset of the 8x8 tile based on u, v and the width of the texture
|
||||
u32 offs = ((u & ~7) * 8) + ((v & ~7) * width);
|
||||
if (!hasAlpha) {
|
||||
offs >>= 1;
|
||||
}
|
||||
|
||||
// In-tile offsets for u/v
|
||||
u &= 7;
|
||||
v &= 7;
|
||||
|
||||
// ETC1(A4) also subdivide the 8x8 tile to 4 4x4 tiles
|
||||
// Each tile is 8 bytes for ETC1, but since ETC1A4 has 4 alpha bits per pixel, that becomes 16 bytes
|
||||
const u32 subTileSize = hasAlpha ? 16 : 8;
|
||||
const u32 subTileIndex = (u / 4) + 2 * (v / 4); // Which of the 4 subtiles is this texel in?
|
||||
|
||||
// In-subtile offsets for u/v
|
||||
u &= 3;
|
||||
v &= 3;
|
||||
offs += subTileSize * subTileIndex;
|
||||
|
||||
u32 alpha;
|
||||
const u64* ptr = reinterpret_cast<const u64*>(data.data() + offs); // Cast to u64*
|
||||
|
||||
if (hasAlpha) {
|
||||
// First 64 bits of the 4x4 subtile are alpha data
|
||||
const u64 alphaData = *ptr++;
|
||||
alpha = Colour::convert4To8Bit((alphaData >> (4 * (u * 4 + v))) & 0xf);
|
||||
} else {
|
||||
alpha = 0xff; // ETC1 without alpha uses ff for every pixel
|
||||
}
|
||||
|
||||
// Next 64 bits of the subtile are colour data
|
||||
u64 colourData = *ptr;
|
||||
return decodeETC(alpha, u, v, colourData);
|
||||
}
|
||||
|
||||
u32 Texture::decodeETC(u32 alpha, u32 u, u32 v, u64 colourData) {
|
||||
static constexpr u32 modifiers[8][2] = {
|
||||
{2, 8}, {5, 17}, {9, 29}, {13, 42}, {18, 60}, {24, 80}, {33, 106}, {47, 183},
|
||||
};
|
||||
|
||||
// Parse colour data for 4x4 block
|
||||
const u32 subindices = getBits<0, 16, u32>(colourData);
|
||||
const u32 negationFlags = getBits<16, 16, u32>(colourData);
|
||||
const bool flip = getBit<32>(colourData);
|
||||
const bool diffMode = getBit<33>(colourData);
|
||||
|
||||
// Note: index1 is indeed stored on the higher bits, with index2 in the lower bits
|
||||
const u32 tableIndex1 = getBits<37, 3, u32>(colourData);
|
||||
const u32 tableIndex2 = getBits<34, 3, u32>(colourData);
|
||||
const u32 texelIndex = u * 4 + v; // Index of the texel in the block
|
||||
|
||||
if (flip) std::swap(u, v);
|
||||
|
||||
s32 r, g, b;
|
||||
if (diffMode) {
|
||||
r = getBits<59, 5, s32>(colourData);
|
||||
g = getBits<51, 5, s32>(colourData);
|
||||
b = getBits<43, 5, s32>(colourData);
|
||||
|
||||
if (u >= 2) {
|
||||
r += signExtend3To32(getBits<56, 3, u32>(colourData));
|
||||
g += signExtend3To32(getBits<48, 3, u32>(colourData));
|
||||
b += signExtend3To32(getBits<40, 3, u32>(colourData));
|
||||
}
|
||||
|
||||
// Expand from 5 to 8 bits per channel
|
||||
r = Colour::convert5To8Bit(r);
|
||||
g = Colour::convert5To8Bit(g);
|
||||
b = Colour::convert5To8Bit(b);
|
||||
} else {
|
||||
if (u < 2) {
|
||||
r = getBits<60, 4, s32>(colourData);
|
||||
g = getBits<52, 4, s32>(colourData);
|
||||
b = getBits<44, 4, s32>(colourData);
|
||||
} else {
|
||||
r = getBits<56, 4, s32>(colourData);
|
||||
g = getBits<48, 4, s32>(colourData);
|
||||
b = getBits<40, 4, s32>(colourData);
|
||||
}
|
||||
|
||||
// Expand from 4 to 8 bits per channel
|
||||
r = Colour::convert4To8Bit(r);
|
||||
g = Colour::convert4To8Bit(g);
|
||||
b = Colour::convert4To8Bit(b);
|
||||
}
|
||||
|
||||
const u32 index = (u < 2) ? tableIndex1 : tableIndex2;
|
||||
s32 modifier = modifiers[index][(subindices >> texelIndex) & 1];
|
||||
|
||||
if (((negationFlags >> texelIndex) & 1) != 0) {
|
||||
modifier = -modifier;
|
||||
}
|
||||
|
||||
r = std::clamp(r + modifier, 0, 255);
|
||||
g = std::clamp(g + modifier, 0, 255);
|
||||
b = std::clamp(b + modifier, 0, 255);
|
||||
|
||||
return (alpha << 24) | (u32(b) << 16) | (u32(g) << 8) | u32(r);
|
||||
}
|
||||
} // namespace Metal
|
|
@ -1,16 +1,18 @@
|
|||
#include "renderer_mtl/mtl_texture.hpp"
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
|
||||
#include "colour.hpp"
|
||||
#include "renderer_mtl/objc_helper.hpp"
|
||||
|
||||
|
||||
using namespace Helpers;
|
||||
|
||||
namespace Metal {
|
||||
void Texture::allocate() {
|
||||
formatInfo = PICA::getPixelFormatInfo(format);
|
||||
formatInfo = PICA::getMTLPixelFormatInfo(format);
|
||||
|
||||
MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init();
|
||||
descriptor->setTextureType(MTL::TextureType2D);
|
||||
|
@ -20,11 +22,14 @@ namespace Metal {
|
|||
descriptor->setUsage(MTL::TextureUsageShaderRead);
|
||||
descriptor->setStorageMode(MTL::StorageModeShared); // TODO: use private + staging buffers?
|
||||
texture = device->newTexture(descriptor);
|
||||
texture->setLabel(toNSString(
|
||||
"Texture " + std::string(PICA::textureFormatToString(format)) + " " + std::to_string(size.u()) + "x" + std::to_string(size.v())
|
||||
));
|
||||
texture->setLabel(toNSString(fmt::format("Base texture {} {}x{}", std::string(PICA::textureFormatToString(format)), size.u(), size.v())));
|
||||
descriptor->release();
|
||||
|
||||
if (formatInfo.needsSwizzle) {
|
||||
base = texture;
|
||||
texture = base->newTextureView(formatInfo.pixelFormat, MTL::TextureType2D, NS::Range(0, 1), NS::Range(0, 1), formatInfo.swizzle);
|
||||
}
|
||||
|
||||
setNewConfig(config);
|
||||
}
|
||||
|
||||
|
@ -58,6 +63,11 @@ namespace Metal {
|
|||
if (texture) {
|
||||
texture->release();
|
||||
}
|
||||
|
||||
if (base) {
|
||||
base->release();
|
||||
}
|
||||
|
||||
if (sampler) {
|
||||
sampler->release();
|
||||
}
|
||||
|
@ -99,210 +109,19 @@ namespace Metal {
|
|||
}
|
||||
}
|
||||
|
||||
// u and v are the UVs of the relevant texel
|
||||
// Texture data is stored interleaved in Morton order, ie in a Z - order curve as shown here
|
||||
// https://en.wikipedia.org/wiki/Z-order_curve
|
||||
// Textures are split into 8x8 tiles.This function returns the in - tile offset depending on the u & v of the texel
|
||||
// The in - tile offset is the sum of 2 offsets, one depending on the value of u % 8 and the other on the value of y % 8
|
||||
// As documented in this picture https ://en.wikipedia.org/wiki/File:Moser%E2%80%93de_Bruijn_addition.svg
|
||||
u32 Texture::mortonInterleave(u32 u, u32 v) {
|
||||
static constexpr u32 xOffsets[] = {0, 1, 4, 5, 16, 17, 20, 21};
|
||||
static constexpr u32 yOffsets[] = {0, 2, 8, 10, 32, 34, 40, 42};
|
||||
|
||||
return xOffsets[u & 7] + yOffsets[v & 7];
|
||||
}
|
||||
|
||||
// Get the byte offset of texel (u, v) in the texture
|
||||
u32 Texture::getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel) {
|
||||
u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to
|
||||
offset += mortonInterleave(u, v); // Add the in-tile offset of the texel
|
||||
|
||||
return offset * bytesPerPixel;
|
||||
}
|
||||
|
||||
// Same as the above code except we need to divide by 2 because 4 bits is smaller than a byte
|
||||
u32 Texture::getSwizzledOffset_4bpp(u32 u, u32 v, u32 width) {
|
||||
u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to
|
||||
offset += mortonInterleave(u, v); // Add the in-tile offset of the texel
|
||||
|
||||
return offset / 2;
|
||||
}
|
||||
|
||||
u8 Texture::decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data) {
|
||||
switch (fmt) {
|
||||
case PICA::TextureFmt::A4: {
|
||||
const u32 offset = getSwizzledOffset_4bpp(u, v, size.u());
|
||||
|
||||
// For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates
|
||||
u8 alpha = data[offset] >> ((u % 2) ? 4 : 0);
|
||||
alpha = Colour::convert4To8Bit(getBits<0, 4>(alpha));
|
||||
|
||||
// A8
|
||||
return alpha;
|
||||
}
|
||||
|
||||
case PICA::TextureFmt::A8: {
|
||||
u32 offset = getSwizzledOffset(u, v, size.u(), 1);
|
||||
const u8 alpha = data[offset];
|
||||
|
||||
// A8
|
||||
return alpha;
|
||||
}
|
||||
|
||||
default: Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast<int>(fmt));
|
||||
}
|
||||
}
|
||||
|
||||
u16 Texture::decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data) {
|
||||
switch (fmt) {
|
||||
case PICA::TextureFmt::RG8: {
|
||||
u32 offset = getSwizzledOffset(u, v, size.u(), 2);
|
||||
constexpr u8 b = 0;
|
||||
const u8 g = data[offset];
|
||||
const u8 r = data[offset + 1];
|
||||
|
||||
// RG8
|
||||
return (g << 8) | r;
|
||||
}
|
||||
|
||||
case PICA::TextureFmt::RGBA4: {
|
||||
u32 offset = getSwizzledOffset(u, v, size.u(), 2);
|
||||
u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8);
|
||||
|
||||
u8 alpha = getBits<0, 4, u8>(texel);
|
||||
u8 b = getBits<4, 4, u8>(texel);
|
||||
u8 g = getBits<8, 4, u8>(texel);
|
||||
u8 r = getBits<12, 4, u8>(texel);
|
||||
|
||||
// ABGR4
|
||||
return (r << 12) | (g << 8) | (b << 4) | alpha;
|
||||
}
|
||||
|
||||
case PICA::TextureFmt::RGBA5551: {
|
||||
const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
|
||||
const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8);
|
||||
|
||||
u8 alpha = getBit<0>(texel) ? 0xff : 0;
|
||||
u8 b = getBits<1, 5, u8>(texel);
|
||||
u8 g = getBits<6, 5, u8>(texel);
|
||||
u8 r = getBits<11, 5, u8>(texel);
|
||||
|
||||
// BGR5A1
|
||||
return (alpha << 15) | (r << 10) | (g << 5) | b;
|
||||
}
|
||||
|
||||
case PICA::TextureFmt::RGB565: {
|
||||
const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
|
||||
const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8);
|
||||
|
||||
const u8 b = getBits<0, 5, u8>(texel);
|
||||
const u8 g = getBits<5, 6, u8>(texel);
|
||||
const u8 r = getBits<11, 5, u8>(texel);
|
||||
|
||||
// B5G6R5
|
||||
return (r << 11) | (g << 5) | b;
|
||||
}
|
||||
|
||||
case PICA::TextureFmt::IA4: {
|
||||
const u32 offset = getSwizzledOffset(u, v, size.u(), 1);
|
||||
const u8 texel = data[offset];
|
||||
const u8 alpha = texel & 0xf;
|
||||
const u8 intensity = texel >> 4;
|
||||
|
||||
// ABGR4
|
||||
return (intensity << 12) | (intensity << 8) | (intensity << 4) | alpha;
|
||||
}
|
||||
|
||||
case PICA::TextureFmt::I4: {
|
||||
u32 offset = getSwizzledOffset_4bpp(u, v, size.u());
|
||||
|
||||
// For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates
|
||||
u8 intensity = data[offset] >> ((u % 2) ? 4 : 0);
|
||||
intensity = getBits<0, 4>(intensity);
|
||||
|
||||
// ABGR4
|
||||
return (intensity << 12) | (intensity << 8) | (intensity << 4) | 0xff;
|
||||
}
|
||||
|
||||
default: Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast<int>(fmt));
|
||||
}
|
||||
}
|
||||
|
||||
u32 Texture::decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data) {
|
||||
switch (fmt) {
|
||||
case PICA::TextureFmt::RGB8: {
|
||||
const u32 offset = getSwizzledOffset(u, v, size.u(), 3);
|
||||
const u8 b = data[offset];
|
||||
const u8 g = data[offset + 1];
|
||||
const u8 r = data[offset + 2];
|
||||
|
||||
// RGBA8
|
||||
return (0xff << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
|
||||
case PICA::TextureFmt::RGBA8: {
|
||||
const u32 offset = getSwizzledOffset(u, v, size.u(), 4);
|
||||
const u8 alpha = data[offset];
|
||||
const u8 b = data[offset + 1];
|
||||
const u8 g = data[offset + 2];
|
||||
const u8 r = data[offset + 3];
|
||||
|
||||
// RGBA8
|
||||
return (alpha << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
|
||||
case PICA::TextureFmt::I8: {
|
||||
u32 offset = getSwizzledOffset(u, v, size.u(), 1);
|
||||
const u8 intensity = data[offset];
|
||||
|
||||
// RGBA8
|
||||
return (0xff << 24) | (intensity << 16) | (intensity << 8) | intensity;
|
||||
}
|
||||
|
||||
case PICA::TextureFmt::IA8: {
|
||||
u32 offset = getSwizzledOffset(u, v, size.u(), 2);
|
||||
|
||||
// Same as I8 except each pixel gets its own alpha value too
|
||||
const u8 alpha = data[offset];
|
||||
const u8 intensity = data[offset + 1];
|
||||
|
||||
// RGBA8
|
||||
return (alpha << 24) | (intensity << 16) | (intensity << 8) | intensity;
|
||||
}
|
||||
|
||||
case PICA::TextureFmt::ETC1: return getTexelETC(false, u, v, size.u(), data);
|
||||
case PICA::TextureFmt::ETC1A4: return getTexelETC(true, u, v, size.u(), data);
|
||||
|
||||
default: Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast<int>(fmt));
|
||||
}
|
||||
}
|
||||
|
||||
void Texture::decodeTexture(std::span<const u8> data) {
|
||||
std::vector<u8> decoded;
|
||||
decoded.reserve(u64(size.u()) * u64(size.v()) * formatInfo.bytesPerTexel);
|
||||
std::unique_ptr<u8[]> decodedData(new u8[u64(size.u()) * u64(size.v()) * formatInfo.bytesPerTexel]);
|
||||
// This pointer will be incremented by our texture decoders
|
||||
u8* decodePtr = decodedData.get();
|
||||
|
||||
// Decode texels line by line
|
||||
for (u32 v = 0; v < size.v(); v++) {
|
||||
for (u32 u = 0; u < size.u(); u++) {
|
||||
if (formatInfo.bytesPerTexel == 1) {
|
||||
u8 texel = decodeTexelU8(u, v, format, data);
|
||||
decoded.push_back(texel);
|
||||
} else if (formatInfo.bytesPerTexel == 2) {
|
||||
u16 texel = decodeTexelU16(u, v, format, data);
|
||||
decoded.push_back((texel & 0x00ff) >> 0);
|
||||
decoded.push_back((texel & 0xff00) >> 8);
|
||||
} else if (formatInfo.bytesPerTexel == 4) {
|
||||
u32 texel = decodeTexelU32(u, v, format, data);
|
||||
decoded.push_back((texel & 0x000000ff) >> 0);
|
||||
decoded.push_back((texel & 0x0000ff00) >> 8);
|
||||
decoded.push_back((texel & 0x00ff0000) >> 16);
|
||||
decoded.push_back((texel & 0xff000000) >> 24);
|
||||
} else {
|
||||
Helpers::panic("[Texture::decodeTexture] Unimplemented bytesPerTexel (%u)", formatInfo.bytesPerTexel);
|
||||
}
|
||||
formatInfo.decoder(size, u, v, data, decodePtr);
|
||||
decodePtr += formatInfo.bytesPerTexel;
|
||||
}
|
||||
}
|
||||
|
||||
texture->replaceRegion(MTL::Region(0, 0, size.u(), size.v()), 0, 0, decoded.data(), formatInfo.bytesPerTexel * size.u(), 0);
|
||||
texture->replaceRegion(MTL::Region(0, 0, size.u(), size.v()), 0, 0, decodedData.get(), formatInfo.bytesPerTexel * size.u(), 0);
|
||||
}
|
||||
} // namespace Metal
|
||||
|
|
62
src/core/renderer_mtl/pica_to_mtl.cpp
Normal file
62
src/core/renderer_mtl/pica_to_mtl.cpp
Normal file
|
@ -0,0 +1,62 @@
|
|||
#include "renderer_mtl/pica_to_mtl.hpp"
|
||||
|
||||
#include "renderer_mtl/texture_decoder.hpp"
|
||||
|
||||
using namespace Helpers;
|
||||
|
||||
namespace PICA {
|
||||
MTLPixelFormatInfo mtlPixelFormatInfos[14] = {
|
||||
{MTL::PixelFormatRGBA8Unorm, 4, decodeTexelABGR8ToRGBA8}, // RGBA8
|
||||
{MTL::PixelFormatRGBA8Unorm, 4, decodeTexelBGR8ToRGBA8}, // RGB8
|
||||
{MTL::PixelFormatBGR5A1Unorm, 2, decodeTexelA1BGR5ToBGR5A1}, // RGBA5551
|
||||
{MTL::PixelFormatB5G6R5Unorm, 2, decodeTexelB5G6R5ToB5G6R5}, // RGB565
|
||||
{MTL::PixelFormatABGR4Unorm, 2, decodeTexelABGR4ToABGR4}, // RGBA4
|
||||
{MTL::PixelFormatRG8Unorm,
|
||||
2,
|
||||
decodeTexelAI8ToRG8,
|
||||
true,
|
||||
{
|
||||
.red = MTL::TextureSwizzleRed,
|
||||
.green = MTL::TextureSwizzleRed,
|
||||
.blue = MTL::TextureSwizzleRed,
|
||||
.alpha = MTL::TextureSwizzleGreen,
|
||||
}}, // IA8
|
||||
{MTL::PixelFormatRG8Unorm, 2, decodeTexelGR8ToRG8}, // RG8
|
||||
{MTL::PixelFormatR8Unorm,
|
||||
1,
|
||||
decodeTexelI8ToR8,
|
||||
true,
|
||||
{.red = MTL::TextureSwizzleRed, .green = MTL::TextureSwizzleRed, .blue = MTL::TextureSwizzleRed, .alpha = MTL::TextureSwizzleOne}}, // I8
|
||||
{MTL::PixelFormatA8Unorm, 1, decodeTexelA8ToA8}, // A8
|
||||
{MTL::PixelFormatABGR4Unorm, 2, decodeTexelAI4ToABGR4}, // IA4
|
||||
{MTL::PixelFormatR8Unorm,
|
||||
1,
|
||||
decodeTexelI4ToR8,
|
||||
true,
|
||||
{.red = MTL::TextureSwizzleRed, .green = MTL::TextureSwizzleRed, .blue = MTL::TextureSwizzleRed, .alpha = MTL::TextureSwizzleOne}}, // I4
|
||||
{MTL::PixelFormatA8Unorm, 1, decodeTexelA4ToA8}, // A4
|
||||
{MTL::PixelFormatRGBA8Unorm, 4, decodeTexelETC1ToRGBA8}, // ETC1
|
||||
{MTL::PixelFormatRGBA8Unorm, 4, decodeTexelETC1A4ToRGBA8}, // ETC1A4
|
||||
};
|
||||
|
||||
void checkForMTLPixelFormatSupport(MTL::Device* device) {
|
||||
if (!device->supportsFamily(MTL::GPUFamilyApple1)) {
|
||||
mtlPixelFormatInfos[2] = {MTL::PixelFormatRGBA8Unorm, 4, decodeTexelA1BGR5ToRGBA8};
|
||||
mtlPixelFormatInfos[3] = {MTL::PixelFormatRGBA8Unorm, 4, decodeTexelB5G6R5ToRGBA8};
|
||||
mtlPixelFormatInfos[4] = {MTL::PixelFormatRGBA8Unorm, 4, decodeTexelABGR4ToRGBA8};
|
||||
|
||||
mtlPixelFormatInfos[9] = {
|
||||
MTL::PixelFormatRG8Unorm,
|
||||
2,
|
||||
decodeTexelAI4ToRG8,
|
||||
true,
|
||||
{
|
||||
.red = MTL::TextureSwizzleRed,
|
||||
.green = MTL::TextureSwizzleRed,
|
||||
.blue = MTL::TextureSwizzleRed,
|
||||
.alpha = MTL::TextureSwizzleGreen,
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
} // namespace PICA
|
|
@ -30,7 +30,6 @@ PICA::ColorFmt ToColorFormat(u32 format) {
|
|||
}
|
||||
|
||||
MTL::Library* loadLibrary(MTL::Device* device, const cmrc::file& shaderSource) {
|
||||
// MTL::CompileOptions* compileOptions = MTL::CompileOptions::alloc()->init();
|
||||
NS::Error* error = nullptr;
|
||||
MTL::Library* library = device->newLibrary(Metal::createDispatchData(shaderSource.begin(), shaderSource.size()), &error);
|
||||
// MTL::Library* library = device->newLibrary(NS::String::string(source.c_str(), NS::ASCIIStringEncoding), compileOptions, &error);
|
||||
|
@ -56,12 +55,18 @@ void RendererMTL::reset() {
|
|||
colorRenderTargetCache.reset();
|
||||
}
|
||||
|
||||
void RendererMTL::setMTKLayer(void* layer) {
|
||||
metalLayer = (CA::MetalLayer*)layer;
|
||||
}
|
||||
|
||||
void RendererMTL::display() {
|
||||
CA::MetalDrawable* drawable = metalLayer->nextDrawable();
|
||||
if (!drawable) {
|
||||
return;
|
||||
}
|
||||
|
||||
MTL::Texture* texture = drawable->texture();
|
||||
|
||||
using namespace PICA::ExternalRegs;
|
||||
|
||||
// Top screen
|
||||
|
@ -87,13 +92,13 @@ void RendererMTL::display() {
|
|||
|
||||
MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init();
|
||||
MTL::RenderPassColorAttachmentDescriptor* colorAttachment = renderPassDescriptor->colorAttachments()->object(0);
|
||||
colorAttachment->setTexture(drawable->texture());
|
||||
colorAttachment->setTexture(texture);
|
||||
colorAttachment->setLoadAction(MTL::LoadActionClear);
|
||||
colorAttachment->setClearColor(MTL::ClearColor{0.0f, 0.0f, 0.0f, 1.0f});
|
||||
colorAttachment->setStoreAction(MTL::StoreActionStore);
|
||||
|
||||
nextRenderPassName = "Display";
|
||||
beginRenderPassIfNeeded(renderPassDescriptor, false, drawable->texture());
|
||||
beginRenderPassIfNeeded(renderPassDescriptor, false, texture);
|
||||
renderCommandEncoder->setRenderPipelineState(displayPipeline);
|
||||
renderCommandEncoder->setFragmentSamplerState(nearestSampler, 0);
|
||||
|
||||
|
@ -119,17 +124,22 @@ void RendererMTL::display() {
|
|||
|
||||
// Inform the vertex buffer cache that the frame ended
|
||||
vertexBufferCache.endFrame();
|
||||
|
||||
// Release
|
||||
drawable->release();
|
||||
}
|
||||
|
||||
void RendererMTL::initGraphicsContext(SDL_Window* window) {
|
||||
// On iOS, the SwiftUI side handles the MetalLayer
|
||||
#ifdef PANDA3DS_IOS
|
||||
device = MTL::CreateSystemDefaultDevice();
|
||||
#else
|
||||
// TODO: what should be the type of the view?
|
||||
void* view = SDL_Metal_CreateView(window);
|
||||
metalLayer = (CA::MetalLayer*)SDL_Metal_GetLayer(view);
|
||||
device = MTL::CreateSystemDefaultDevice();
|
||||
metalLayer->setDevice(device);
|
||||
#endif
|
||||
checkForMTLPixelFormatSupport(device);
|
||||
|
||||
commandQueue = device->newCommandQueue();
|
||||
|
||||
// Textures
|
||||
|
|
334
src/core/renderer_mtl/texture_decoder.cpp
Normal file
334
src/core/renderer_mtl/texture_decoder.cpp
Normal file
|
@ -0,0 +1,334 @@
|
|||
#include "renderer_mtl/texture_decoder.hpp"
|
||||
|
||||
#include <array>
|
||||
#include <string>
|
||||
|
||||
#include "colour.hpp"
|
||||
#include "math_util.hpp"
|
||||
|
||||
using namespace Helpers;
|
||||
|
||||
// u and v are the UVs of the relevant texel
|
||||
// Texture data is stored interleaved in Morton order, ie in a Z - order curve as shown here
|
||||
// https://en.wikipedia.org/wiki/Z-order_curve
|
||||
// Textures are split into 8x8 tiles.This function returns the in - tile offset depending on the u & v of the texel
|
||||
// The in - tile offset is the sum of 2 offsets, one depending on the value of u % 8 and the other on the value of y % 8
|
||||
// As documented in this picture https ://en.wikipedia.org/wiki/File:Moser%E2%80%93de_Bruijn_addition.svg
|
||||
u32 mortonInterleave(u32 u, u32 v) {
|
||||
static constexpr u32 xOffsets[] = {0, 1, 4, 5, 16, 17, 20, 21};
|
||||
static constexpr u32 yOffsets[] = {0, 2, 8, 10, 32, 34, 40, 42};
|
||||
|
||||
return xOffsets[u & 7] + yOffsets[v & 7];
|
||||
}
|
||||
|
||||
// Get the byte offset of texel (u, v) in the texture
|
||||
u32 getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel) {
|
||||
u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to
|
||||
offset += mortonInterleave(u, v); // Add the in-tile offset of the texel
|
||||
|
||||
return offset * bytesPerPixel;
|
||||
}
|
||||
|
||||
// Same as the above code except we need to divide by 2 because 4 bits is smaller than a byte
|
||||
u32 getSwizzledOffset_4bpp(u32 u, u32 v, u32 width) {
|
||||
u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to
|
||||
offset += mortonInterleave(u, v); // Add the in-tile offset of the texel
|
||||
|
||||
return offset / 2;
|
||||
}
|
||||
|
||||
void decodeTexelABGR8ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
|
||||
const u32 offset = getSwizzledOffset(u, v, size.u(), 4);
|
||||
const u8 alpha = inData[offset];
|
||||
const u8 b = inData[offset + 1];
|
||||
const u8 g = inData[offset + 2];
|
||||
const u8 r = inData[offset + 3];
|
||||
|
||||
*outData++ = r;
|
||||
*outData++ = g;
|
||||
*outData++ = b;
|
||||
*outData++ = alpha;
|
||||
}
|
||||
|
||||
void decodeTexelBGR8ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
|
||||
const u32 offset = getSwizzledOffset(u, v, size.u(), 3);
|
||||
const u8 b = inData[offset];
|
||||
const u8 g = inData[offset + 1];
|
||||
const u8 r = inData[offset + 2];
|
||||
|
||||
*outData++ = r;
|
||||
*outData++ = g;
|
||||
*outData++ = b;
|
||||
*outData++ = 0xff;
|
||||
}
|
||||
|
||||
void decodeTexelA1BGR5ToBGR5A1(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
|
||||
const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
|
||||
const u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8);
|
||||
|
||||
u8 alpha = getBit<0>(texel);
|
||||
u8 b = getBits<1, 5, u8>(texel);
|
||||
u8 g = getBits<6, 5, u8>(texel);
|
||||
u8 r = getBits<11, 5, u8>(texel);
|
||||
|
||||
u16 outTexel = (alpha << 15) | (r << 10) | (g << 5) | b;
|
||||
*outData++ = outTexel & 0xff;
|
||||
*outData++ = (outTexel >> 8) & 0xff;
|
||||
}
|
||||
|
||||
void decodeTexelA1BGR5ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
|
||||
const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
|
||||
const u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8);
|
||||
|
||||
u8 alpha = getBit<0>(texel) ? 0xff : 0;
|
||||
u8 b = Colour::convert5To8Bit(getBits<1, 5, u8>(texel));
|
||||
u8 g = Colour::convert5To8Bit(getBits<6, 5, u8>(texel));
|
||||
u8 r = Colour::convert5To8Bit(getBits<11, 5, u8>(texel));
|
||||
|
||||
*outData++ = r;
|
||||
*outData++ = g;
|
||||
*outData++ = b;
|
||||
*outData++ = alpha;
|
||||
}
|
||||
|
||||
void decodeTexelB5G6R5ToB5G6R5(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
|
||||
const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
|
||||
const u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8);
|
||||
|
||||
*outData++ = texel & 0xff;
|
||||
*outData++ = (texel >> 8) & 0xff;
|
||||
}
|
||||
|
||||
void decodeTexelB5G6R5ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
|
||||
const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
|
||||
const u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8);
|
||||
|
||||
const u8 b = Colour::convert5To8Bit(getBits<0, 5, u8>(texel));
|
||||
const u8 g = Colour::convert6To8Bit(getBits<5, 6, u8>(texel));
|
||||
const u8 r = Colour::convert5To8Bit(getBits<11, 5, u8>(texel));
|
||||
|
||||
*outData++ = r;
|
||||
*outData++ = g;
|
||||
*outData++ = b;
|
||||
*outData++ = 0xff;
|
||||
}
|
||||
|
||||
void decodeTexelABGR4ToABGR4(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
|
||||
u32 offset = getSwizzledOffset(u, v, size.u(), 2);
|
||||
u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8);
|
||||
|
||||
u8 alpha = getBits<0, 4, u8>(texel);
|
||||
u8 b = getBits<4, 4, u8>(texel);
|
||||
u8 g = getBits<8, 4, u8>(texel);
|
||||
u8 r = getBits<12, 4, u8>(texel);
|
||||
|
||||
*outData++ = (b << 4) | alpha;
|
||||
*outData++ = (r << 4) | g;
|
||||
}
|
||||
|
||||
void decodeTexelABGR4ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
|
||||
u32 offset = getSwizzledOffset(u, v, size.u(), 2);
|
||||
u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8);
|
||||
|
||||
u8 alpha = Colour::convert4To8Bit(getBits<0, 4, u8>(texel));
|
||||
u8 b = Colour::convert4To8Bit(getBits<4, 4, u8>(texel));
|
||||
u8 g = Colour::convert4To8Bit(getBits<8, 4, u8>(texel));
|
||||
u8 r = Colour::convert4To8Bit(getBits<12, 4, u8>(texel));
|
||||
|
||||
*outData++ = r;
|
||||
*outData++ = g;
|
||||
*outData++ = b;
|
||||
*outData++ = alpha;
|
||||
}
|
||||
|
||||
void decodeTexelAI8ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
|
||||
u32 offset = getSwizzledOffset(u, v, size.u(), 2);
|
||||
|
||||
// Same as I8 except each pixel gets its own alpha value too
|
||||
const u8 alpha = inData[offset];
|
||||
const u8 intensity = inData[offset + 1];
|
||||
|
||||
*outData++ = intensity;
|
||||
*outData++ = alpha;
|
||||
}
|
||||
|
||||
void decodeTexelGR8ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
|
||||
u32 offset = getSwizzledOffset(u, v, size.u(), 2);
|
||||
constexpr u8 b = 0;
|
||||
const u8 g = inData[offset];
|
||||
const u8 r = inData[offset + 1];
|
||||
|
||||
*outData++ = r;
|
||||
*outData++ = g;
|
||||
}
|
||||
|
||||
void decodeTexelI8ToR8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
|
||||
u32 offset = getSwizzledOffset(u, v, size.u(), 1);
|
||||
const u8 intensity = inData[offset];
|
||||
|
||||
*outData++ = intensity;
|
||||
}
|
||||
|
||||
void decodeTexelA8ToA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
|
||||
u32 offset = getSwizzledOffset(u, v, size.u(), 1);
|
||||
const u8 alpha = inData[offset];
|
||||
|
||||
*outData++ = alpha;
|
||||
}
|
||||
|
||||
void decodeTexelAI4ToABGR4(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
|
||||
const u32 offset = getSwizzledOffset(u, v, size.u(), 1);
|
||||
const u8 texel = inData[offset];
|
||||
const u8 alpha = texel & 0xf;
|
||||
const u8 intensity = texel >> 4;
|
||||
|
||||
*outData++ = (intensity << 4) | intensity;
|
||||
*outData++ = (alpha << 4) | intensity;
|
||||
}
|
||||
|
||||
void decodeTexelAI4ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
|
||||
const u32 offset = getSwizzledOffset(u, v, size.u(), 1);
|
||||
const u8 texel = inData[offset];
|
||||
const u8 alpha = Colour::convert4To8Bit(texel & 0xf);
|
||||
const u8 intensity = Colour::convert4To8Bit(texel >> 4);
|
||||
|
||||
*outData++ = intensity;
|
||||
*outData++ = alpha;
|
||||
}
|
||||
|
||||
void decodeTexelI4ToR8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
|
||||
u32 offset = getSwizzledOffset_4bpp(u, v, size.u());
|
||||
|
||||
// For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates
|
||||
u8 intensity = inData[offset] >> ((u % 2) ? 4 : 0);
|
||||
intensity = Colour::convert4To8Bit(getBits<0, 4>(intensity));
|
||||
|
||||
*outData++ = intensity;
|
||||
}
|
||||
|
||||
void decodeTexelA4ToA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
|
||||
const u32 offset = getSwizzledOffset_4bpp(u, v, size.u());
|
||||
|
||||
// For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates
|
||||
u8 alpha = inData[offset] >> ((u % 2) ? 4 : 0);
|
||||
alpha = Colour::convert4To8Bit(getBits<0, 4>(alpha));
|
||||
|
||||
*outData++ = alpha;
|
||||
}
|
||||
|
||||
static constexpr u32 signExtend3To32(u32 val) { return (u32)(s32(val) << 29 >> 29); }
|
||||
|
||||
void decodeETC(u32 u, u32 v, u64 colourData, u32 alpha, u8* outData) {
|
||||
static constexpr u32 modifiers[8][2] = {
|
||||
{2, 8}, {5, 17}, {9, 29}, {13, 42}, {18, 60}, {24, 80}, {33, 106}, {47, 183},
|
||||
};
|
||||
|
||||
// Parse colour data for 4x4 block
|
||||
const u32 subindices = getBits<0, 16, u32>(colourData);
|
||||
const u32 negationFlags = getBits<16, 16, u32>(colourData);
|
||||
const bool flip = getBit<32>(colourData);
|
||||
const bool diffMode = getBit<33>(colourData);
|
||||
|
||||
// Note: index1 is indeed stored on the higher bits, with index2 in the lower bits
|
||||
const u32 tableIndex1 = getBits<37, 3, u32>(colourData);
|
||||
const u32 tableIndex2 = getBits<34, 3, u32>(colourData);
|
||||
const u32 texelIndex = u * 4 + v; // Index of the texel in the block
|
||||
|
||||
if (flip) std::swap(u, v);
|
||||
|
||||
s32 r, g, b;
|
||||
if (diffMode) {
|
||||
r = getBits<59, 5, s32>(colourData);
|
||||
g = getBits<51, 5, s32>(colourData);
|
||||
b = getBits<43, 5, s32>(colourData);
|
||||
|
||||
if (u >= 2) {
|
||||
r += signExtend3To32(getBits<56, 3, u32>(colourData));
|
||||
g += signExtend3To32(getBits<48, 3, u32>(colourData));
|
||||
b += signExtend3To32(getBits<40, 3, u32>(colourData));
|
||||
}
|
||||
|
||||
// Expand from 5 to 8 bits per channel
|
||||
r = Colour::convert5To8Bit(r);
|
||||
g = Colour::convert5To8Bit(g);
|
||||
b = Colour::convert5To8Bit(b);
|
||||
} else {
|
||||
if (u < 2) {
|
||||
r = getBits<60, 4, s32>(colourData);
|
||||
g = getBits<52, 4, s32>(colourData);
|
||||
b = getBits<44, 4, s32>(colourData);
|
||||
} else {
|
||||
r = getBits<56, 4, s32>(colourData);
|
||||
g = getBits<48, 4, s32>(colourData);
|
||||
b = getBits<40, 4, s32>(colourData);
|
||||
}
|
||||
|
||||
// Expand from 4 to 8 bits per channel
|
||||
r = Colour::convert4To8Bit(r);
|
||||
g = Colour::convert4To8Bit(g);
|
||||
b = Colour::convert4To8Bit(b);
|
||||
}
|
||||
|
||||
const u32 index = (u < 2) ? tableIndex1 : tableIndex2;
|
||||
s32 modifier = modifiers[index][(subindices >> texelIndex) & 1];
|
||||
|
||||
if (((negationFlags >> texelIndex) & 1) != 0) {
|
||||
modifier = -modifier;
|
||||
}
|
||||
|
||||
r = std::clamp(r + modifier, 0, 255);
|
||||
g = std::clamp(g + modifier, 0, 255);
|
||||
b = std::clamp(b + modifier, 0, 255);
|
||||
|
||||
*outData++ = r;
|
||||
*outData++ = g;
|
||||
*outData++ = b;
|
||||
*outData++ = alpha;
|
||||
}
|
||||
|
||||
template <bool hasAlpha>
|
||||
void getTexelETC(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
|
||||
// Pixel offset of the 8x8 tile based on u, v and the width of the texture
|
||||
u32 offs = ((u & ~7) * 8) + ((v & ~7) * size.u());
|
||||
if (!hasAlpha) {
|
||||
offs >>= 1;
|
||||
}
|
||||
|
||||
// In-tile offsets for u/v
|
||||
u &= 7;
|
||||
v &= 7;
|
||||
|
||||
// ETC1(A4) also subdivide the 8x8 tile to 4 4x4 tiles
|
||||
// Each tile is 8 bytes for ETC1, but since ETC1A4 has 4 alpha bits per pixel, that becomes 16 bytes
|
||||
const u32 subTileSize = hasAlpha ? 16 : 8;
|
||||
const u32 subTileIndex = (u / 4) + 2 * (v / 4); // Which of the 4 subtiles is this texel in?
|
||||
|
||||
// In-subtile offsets for u/v
|
||||
u &= 3;
|
||||
v &= 3;
|
||||
offs += subTileSize * subTileIndex;
|
||||
|
||||
u32 alpha;
|
||||
const u64* ptr = reinterpret_cast<const u64*>(inData.data() + offs); // Cast to u64*
|
||||
|
||||
if (hasAlpha) {
|
||||
// First 64 bits of the 4x4 subtile are alpha data
|
||||
const u64 alphaData = *ptr++;
|
||||
alpha = Colour::convert4To8Bit((alphaData >> (4 * (u * 4 + v))) & 0xf);
|
||||
} else {
|
||||
alpha = 0xff; // ETC1 without alpha uses ff for every pixel
|
||||
}
|
||||
|
||||
// Next 64 bits of the subtile are colour data
|
||||
u64 colourData = *ptr;
|
||||
|
||||
decodeETC(u, v, colourData, alpha, outData);
|
||||
}
|
||||
|
||||
void decodeTexelETC1ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
|
||||
getTexelETC<false>(size, u, v, inData, outData);
|
||||
}
|
||||
|
||||
void decodeTexelETC1A4ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span<const u8> inData, u8* outData) {
|
||||
getTexelETC<true>(size, u, v, inData, outData);
|
||||
}
|
|
@ -1,4 +1,6 @@
|
|||
#include <metal_stdlib>
|
||||
#include <TargetConditionals.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct BasicVertexOut {
|
||||
|
@ -219,12 +221,6 @@ struct Globals {
|
|||
uint GPUREG_LIGHTING_LUTINPUT_SELECT;
|
||||
uint GPUREG_LIGHTi_CONFIG;
|
||||
|
||||
// HACK
|
||||
//bool lightingEnabled;
|
||||
//uint8_t lightingNumLights;
|
||||
//uint32_t lightingConfig1;
|
||||
//uint16_t alphaControl;
|
||||
|
||||
float3 normal;
|
||||
};
|
||||
|
||||
|
@ -655,14 +651,15 @@ float4 performLogicOp(LogicOp logicOp, float4 s, float4 d) {
|
|||
return as_type<float4>(performLogicOpU(logicOp, as_type<uint4>(s), as_type<uint4>(d)));
|
||||
}
|
||||
|
||||
fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[color(0)]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], constant uint2& lutSlices [[buffer(3)]], texture2d<float> tex0 [[texture(0)]], texture2d<float> tex1 [[texture(1)]], texture2d<float> tex2 [[texture(2)]], texture2d_array<float> texLightingLut [[texture(3)]], texture1d_array<float> texFogLut [[texture(4)]], sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) {
|
||||
Globals globals;
|
||||
// iOS simulator doesn't support fb fetch, so don't enable it
|
||||
#ifndef TARGET_OS_SIMULATOR
|
||||
#define PREVIOUS_COLOR_DECL float4 prevColor [[color(0)]],
|
||||
#else
|
||||
#define PREVIOUS_COLOR_DECL
|
||||
#endif
|
||||
|
||||
// HACK
|
||||
//globals.lightingEnabled = picaRegs.read(0x008Fu) != 0u;
|
||||
//globals.lightingNumLights = picaRegs.read(0x01C2u);
|
||||
//globals.lightingConfig1 = picaRegs.read(0x01C4u);
|
||||
//globals.alphaControl = picaRegs.read(0x104);
|
||||
fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], PREVIOUS_COLOR_DECL constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], constant uint2& lutSlices [[buffer(3)]], texture2d<float> tex0 [[texture(0)]], texture2d<float> tex1 [[texture(1)]], texture2d<float> tex2 [[texture(2)]], texture2d_array<float> texLightingLut [[texture(3)]], texture1d_array<float> texFogLut [[texture(4)]], sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) {
|
||||
Globals globals;
|
||||
|
||||
globals.tevSources[0] = in.color;
|
||||
if (lightingEnabled) {
|
||||
|
@ -755,5 +752,9 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c
|
|||
}
|
||||
}
|
||||
|
||||
#ifndef TARGET_OS_SIMULATOR
|
||||
return performLogicOp(logicOp, color, prevColor);
|
||||
#else
|
||||
return performLogicOp(logicOp, color, float4(0.0));
|
||||
#endif
|
||||
}
|
37
src/ios_driver.mm
Normal file
37
src/ios_driver.mm
Normal file
|
@ -0,0 +1,37 @@
|
|||
#import <Foundation/Foundation.h>
|
||||
|
||||
extern "C" {
|
||||
#include "ios_driver.h"
|
||||
}
|
||||
|
||||
// Apple's Foundation headers define some macros globablly that create issues with our own code, so remove the definitions
|
||||
#undef ABS
|
||||
#undef NO
|
||||
|
||||
#include <memory>
|
||||
#include "emulator.hpp"
|
||||
|
||||
// The Objective-C++ bridge functions must be exported without name mangling in order for the SwiftUI frontend to be able to call them
|
||||
#define IOS_EXPORT extern "C" __attribute__((visibility("default")))
|
||||
|
||||
std::unique_ptr<Emulator> emulator = nullptr;
|
||||
HIDService* hidService = nullptr;
|
||||
|
||||
IOS_EXPORT void iosCreateEmulator() {
|
||||
printf("Creating emulator\n");
|
||||
|
||||
emulator = std::make_unique<Emulator>();
|
||||
hidService = &emulator->getServiceManager().getHID();
|
||||
emulator->initGraphicsContext(nullptr);
|
||||
|
||||
// TODO: Add game selection on iOS frontend
|
||||
auto path = emulator->getAppDataRoot() / "toon_shading.elf";
|
||||
emulator->loadROM(path);
|
||||
}
|
||||
|
||||
IOS_EXPORT void iosRunFrame(CAMetalLayer* layer) {
|
||||
void* layerBridged = (__bridge void*)layer;
|
||||
|
||||
emulator->getRenderer()->setMTKLayer(layerBridged);
|
||||
emulator->runFrame();
|
||||
}
|
Loading…
Add table
Reference in a new issue