metal: add all the files

This commit is contained in:
Samuliak 2024-08-16 10:06:56 +02:00
parent 0602467c61
commit 98b5d56021
18 changed files with 3041 additions and 12 deletions

View file

@ -0,0 +1,6 @@
#define NS_PRIVATE_IMPLEMENTATION
#define CA_PRIVATE_IMPLEMENTATION
#define MTL_PRIVATE_IMPLEMENTATION
#include <Foundation/Foundation.hpp>
#include <Metal/Metal.hpp>
#include <QuartzCore/QuartzCore.hpp>

View file

@ -0,0 +1,124 @@
#include <algorithm>
#include "colour.hpp"
#include "renderer_mtl/renderer_mtl.hpp"
#include "renderer_mtl/mtl_texture.hpp"
using namespace Helpers;
namespace Metal {
static constexpr u32 signExtend3To32(u32 val) {
return (u32)(s32(val) << 29 >> 29);
}
u32 Texture::getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span<const u8> data) {
// Pixel offset of the 8x8 tile based on u, v and the width of the texture
u32 offs = ((u & ~7) * 8) + ((v & ~7) * width);
if (!hasAlpha)
offs >>= 1;
// In-tile offsets for u/v
u &= 7;
v &= 7;
// ETC1(A4) also subdivide the 8x8 tile to 4 4x4 tiles
// Each tile is 8 bytes for ETC1, but since ETC1A4 has 4 alpha bits per pixel, that becomes 16 bytes
const u32 subTileSize = hasAlpha ? 16 : 8;
const u32 subTileIndex = (u / 4) + 2 * (v / 4); // Which of the 4 subtiles is this texel in?
// In-subtile offsets for u/v
u &= 3;
v &= 3;
offs += subTileSize * subTileIndex;
u32 alpha;
const u64* ptr = reinterpret_cast<const u64*>(data.data() + offs); // Cast to u64*
if (hasAlpha) {
// First 64 bits of the 4x4 subtile are alpha data
const u64 alphaData = *ptr++;
alpha = Colour::convert4To8Bit((alphaData >> (4 * (u * 4 + v))) & 0xf);
}
else {
alpha = 0xff; // ETC1 without alpha uses ff for every pixel
}
// Next 64 bits of the subtile are colour data
u64 colourData = *ptr;
return decodeETC(alpha, u, v, colourData);
}
u32 Texture::decodeETC(u32 alpha, u32 u, u32 v, u64 colourData) {
static constexpr u32 modifiers[8][2] = {
{ 2, 8 },
{ 5, 17 },
{ 9, 29 },
{ 13, 42 },
{ 18, 60 },
{ 24, 80 },
{ 33, 106 },
{ 47, 183 },
};
// Parse colour data for 4x4 block
const u32 subindices = getBits<0, 16, u32>(colourData);
const u32 negationFlags = getBits<16, 16, u32>(colourData);
const bool flip = getBit<32>(colourData);
const bool diffMode = getBit<33>(colourData);
// Note: index1 is indeed stored on the higher bits, with index2 in the lower bits
const u32 tableIndex1 = getBits<37, 3, u32>(colourData);
const u32 tableIndex2 = getBits<34, 3, u32>(colourData);
const u32 texelIndex = u * 4 + v; // Index of the texel in the block
if (flip)
std::swap(u, v);
s32 r, g, b;
if (diffMode) {
r = getBits<59, 5, s32>(colourData);
g = getBits<51, 5, s32>(colourData);
b = getBits<43, 5, s32>(colourData);
if (u >= 2) {
r += signExtend3To32(getBits<56, 3, u32>(colourData));
g += signExtend3To32(getBits<48, 3, u32>(colourData));
b += signExtend3To32(getBits<40, 3, u32>(colourData));
}
// Expand from 5 to 8 bits per channel
r = Colour::convert5To8Bit(r);
g = Colour::convert5To8Bit(g);
b = Colour::convert5To8Bit(b);
} else {
if (u < 2) {
r = getBits<60, 4, s32>(colourData);
g = getBits<52, 4, s32>(colourData);
b = getBits<44, 4, s32>(colourData);
} else {
r = getBits<56, 4, s32>(colourData);
g = getBits<48, 4, s32>(colourData);
b = getBits<40, 4, s32>(colourData);
}
// Expand from 4 to 8 bits per channel
r = Colour::convert4To8Bit(r);
g = Colour::convert4To8Bit(g);
b = Colour::convert4To8Bit(b);
}
const u32 index = (u < 2) ? tableIndex1 : tableIndex2;
s32 modifier = modifiers[index][(subindices >> texelIndex) & 1];
if (((negationFlags >> texelIndex) & 1) != 0) {
modifier = -modifier;
}
r = std::clamp(r + modifier, 0, 255);
g = std::clamp(g + modifier, 0, 255);
b = std::clamp(b + modifier, 0, 255);
return (alpha << 24) | (u32(b) << 16) | (u32(g) << 8) | u32(r);
}
} // namespace Metal

View file

@ -0,0 +1,312 @@
#include "renderer_mtl/mtl_texture.hpp"
#include "renderer_mtl/objc_helper.hpp"
#include "colour.hpp"
#include <array>
using namespace Helpers;
namespace Metal {
void Texture::allocate() {
formatInfo = PICA::getPixelFormatInfo(format);
MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init();
descriptor->setTextureType(MTL::TextureType2D);
descriptor->setPixelFormat(formatInfo.pixelFormat);
descriptor->setWidth(size.u());
descriptor->setHeight(size.v());
descriptor->setUsage(MTL::TextureUsageShaderRead);
descriptor->setStorageMode(MTL::StorageModeShared); // TODO: use private + staging buffers?
texture = device->newTexture(descriptor);
texture->setLabel(toNSString("Texture " + std::string(PICA::textureFormatToString(format)) + " " + std::to_string(size.u()) + "x" + std::to_string(size.v())));
descriptor->release();
setNewConfig(config);
}
// Set the texture's configuration, which includes min/mag filters, wrapping S/T modes, and so on
void Texture::setNewConfig(u32 cfg) {
config = cfg;
if (sampler) {
sampler->release();
}
const auto magFilter = (cfg & 0x2) != 0 ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest;
const auto minFilter = (cfg & 0x4) != 0 ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest;
const auto wrapT = PICA::toMTLSamplerAddressMode(getBits<8, 3>(cfg));
const auto wrapS = PICA::toMTLSamplerAddressMode(getBits<12, 3>(cfg));
MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init();
samplerDescriptor->setMinFilter(minFilter);
samplerDescriptor->setMagFilter(magFilter);
samplerDescriptor->setSAddressMode(wrapS);
samplerDescriptor->setTAddressMode(wrapT);
samplerDescriptor->setLabel(toNSString("Sampler"));
sampler = device->newSamplerState(samplerDescriptor);
samplerDescriptor->release();
}
void Texture::free() {
valid = false;
if (texture) {
texture->release();
}
if (sampler) {
sampler->release();
}
}
u64 Texture::sizeInBytes() {
u64 pixelCount = u64(size.x()) * u64(size.y());
switch (format) {
case PICA::TextureFmt::RGBA8: // 4 bytes per pixel
return pixelCount * 4;
case PICA::TextureFmt::RGB8: // 3 bytes per pixel
return pixelCount * 3;
case PICA::TextureFmt::RGBA5551: // 2 bytes per pixel
case PICA::TextureFmt::RGB565:
case PICA::TextureFmt::RGBA4:
case PICA::TextureFmt::RG8:
case PICA::TextureFmt::IA8:
return pixelCount * 2;
case PICA::TextureFmt::A8: // 1 byte per pixel
case PICA::TextureFmt::I8:
case PICA::TextureFmt::IA4:
return pixelCount;
case PICA::TextureFmt::I4: // 4 bits per pixel
case PICA::TextureFmt::A4:
return pixelCount / 2;
case PICA::TextureFmt::ETC1: // Compressed formats
case PICA::TextureFmt::ETC1A4: {
// Number of 4x4 tiles
const u64 tileCount = pixelCount / 16;
// Tiles are 8 bytes each on ETC1 and 16 bytes each on ETC1A4
const u64 tileSize = format == PICA::TextureFmt::ETC1 ? 8 : 16;
return tileCount * tileSize;
}
default:
Helpers::panic("[PICA] Attempted to get size of invalid texture type");
}
}
// u and v are the UVs of the relevant texel
// Texture data is stored interleaved in Morton order, ie in a Z - order curve as shown here
// https://en.wikipedia.org/wiki/Z-order_curve
// Textures are split into 8x8 tiles.This function returns the in - tile offset depending on the u & v of the texel
// The in - tile offset is the sum of 2 offsets, one depending on the value of u % 8 and the other on the value of y % 8
// As documented in this picture https ://en.wikipedia.org/wiki/File:Moser%E2%80%93de_Bruijn_addition.svg
u32 Texture::mortonInterleave(u32 u, u32 v) {
static constexpr u32 xOffsets[] = { 0, 1, 4, 5, 16, 17, 20, 21 };
static constexpr u32 yOffsets[] = { 0, 2, 8, 10, 32, 34, 40, 42 };
return xOffsets[u & 7] + yOffsets[v & 7];
}
// Get the byte offset of texel (u, v) in the texture
u32 Texture::getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel) {
u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to
offset += mortonInterleave(u, v); // Add the in-tile offset of the texel
return offset * bytesPerPixel;
}
// Same as the above code except we need to divide by 2 because 4 bits is smaller than a byte
u32 Texture::getSwizzledOffset_4bpp(u32 u, u32 v, u32 width) {
u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to
offset += mortonInterleave(u, v); // Add the in-tile offset of the texel
return offset / 2;
}
u8 Texture::decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data) {
switch (fmt) {
case PICA::TextureFmt::A4: {
const u32 offset = getSwizzledOffset_4bpp(u, v, size.u());
// For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates
u8 alpha = data[offset] >> ((u % 2) ? 4 : 0);
alpha = Colour::convert4To8Bit(getBits<0, 4>(alpha));
// A8
return alpha;
}
case PICA::TextureFmt::A8: {
u32 offset = getSwizzledOffset(u, v, size.u(), 1);
const u8 alpha = data[offset];
// A8
return alpha;
}
default:
Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast<int>(fmt));
}
}
u16 Texture::decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data) {
switch (fmt) {
case PICA::TextureFmt::RG8: {
u32 offset = getSwizzledOffset(u, v, size.u(), 2);
constexpr u8 b = 0;
const u8 g = data[offset];
const u8 r = data[offset + 1];
// RG8
return (g << 8) | r;
}
case PICA::TextureFmt::RGBA4: {
u32 offset = getSwizzledOffset(u, v, size.u(), 2);
u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8);
u8 alpha = getBits<0, 4, u8>(texel);
u8 b = getBits<4, 4, u8>(texel);
u8 g = getBits<8, 4, u8>(texel);
u8 r = getBits<12, 4, u8>(texel);
// ABGR4
return (r << 12) | (g << 8) | (b << 4) | alpha;
}
case PICA::TextureFmt::RGBA5551: {
const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8);
u8 alpha = getBit<0>(texel) ? 0xff : 0;
u8 b = getBits<1, 5, u8>(texel);
u8 g = getBits<6, 5, u8>(texel);
u8 r = getBits<11, 5, u8>(texel);
// BGR5A1
return (alpha << 15) | (r << 10) | (g << 5) | b;
}
case PICA::TextureFmt::RGB565: {
const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8);
const u8 b = getBits<0, 5, u8>(texel);
const u8 g = getBits<5, 6, u8>(texel);
const u8 r = getBits<11, 5, u8>(texel);
// B5G6R5
return (r << 11) | (g << 5) | b;
}
case PICA::TextureFmt::IA4: {
const u32 offset = getSwizzledOffset(u, v, size.u(), 1);
const u8 texel = data[offset];
const u8 alpha = texel & 0xf;
const u8 intensity = texel >> 4;
// ABGR4
return (intensity << 12) | (intensity << 8) | (intensity << 4) | alpha;
}
case PICA::TextureFmt::I4: {
u32 offset = getSwizzledOffset_4bpp(u, v, size.u());
// For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates
u8 intensity = data[offset] >> ((u % 2) ? 4 : 0);
intensity = getBits<0, 4>(intensity);
// ABGR4
return (intensity << 12) | (intensity << 8) | (intensity << 4) | 0xff;
}
default:
Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast<int>(fmt));
}
}
u32 Texture::decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data) {
switch (fmt) {
case PICA::TextureFmt::RGB8: {
const u32 offset = getSwizzledOffset(u, v, size.u(), 3);
const u8 b = data[offset];
const u8 g = data[offset + 1];
const u8 r = data[offset + 2];
// RGBA8
return (0xff << 24) | (b << 16) | (g << 8) | r;
}
case PICA::TextureFmt::RGBA8: {
const u32 offset = getSwizzledOffset(u, v, size.u(), 4);
const u8 alpha = data[offset];
const u8 b = data[offset + 1];
const u8 g = data[offset + 2];
const u8 r = data[offset + 3];
// RGBA8
return (alpha << 24) | (b << 16) | (g << 8) | r;
}
case PICA::TextureFmt::I8: {
u32 offset = getSwizzledOffset(u, v, size.u(), 1);
const u8 intensity = data[offset];
// RGBA8
return (0xff << 24) | (intensity << 16) | (intensity << 8) | intensity;
}
case PICA::TextureFmt::IA8: {
u32 offset = getSwizzledOffset(u, v, size.u(), 2);
// Same as I8 except each pixel gets its own alpha value too
const u8 alpha = data[offset];
const u8 intensity = data[offset + 1];
// RGBA8
return (alpha << 24) | (intensity << 16) | (intensity << 8) | intensity;
}
case PICA::TextureFmt::ETC1: return getTexelETC(false, u, v, size.u(), data);
case PICA::TextureFmt::ETC1A4: return getTexelETC(true, u, v, size.u(), data);
default:
Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast<int>(fmt));
}
}
void Texture::decodeTexture(std::span<const u8> data) {
std::vector<u8> decoded;
decoded.reserve(u64(size.u()) * u64(size.v()) * formatInfo.bytesPerTexel);
// Decode texels line by line
for (u32 v = 0; v < size.v(); v++) {
for (u32 u = 0; u < size.u(); u++) {
if (formatInfo.bytesPerTexel == 1) {
u8 texel = decodeTexelU8(u, v, format, data);
decoded.push_back(texel);
} else if (formatInfo.bytesPerTexel == 2) {
u16 texel = decodeTexelU16(u, v, format, data);
decoded.push_back((texel & 0x00ff) >> 0);
decoded.push_back((texel & 0xff00) >> 8);
} else if (formatInfo.bytesPerTexel == 4) {
u32 texel = decodeTexelU32(u, v, format, data);
decoded.push_back((texel & 0x000000ff) >> 0);
decoded.push_back((texel & 0x0000ff00) >> 8);
decoded.push_back((texel & 0x00ff0000) >> 16);
decoded.push_back((texel & 0xff000000) >> 24);
} else {
Helpers::panic("[Texture::decodeTexture] Unimplemented bytesPerTexel (%u)", formatInfo.bytesPerTexel);
}
}
}
texture->replaceRegion(MTL::Region(0, 0, size.u(), size.v()), 0, 0, decoded.data(), formatInfo.bytesPerTexel * size.u(), 0);
}
} // namespace Metal

View file

@ -0,0 +1,12 @@
#include "renderer_mtl/objc_helper.hpp"
// TODO: change the include
#import <Metal/Metal.h>
namespace Metal {
dispatch_data_t createDispatchData(const void* data, size_t size) {
return dispatch_data_create(data, size, dispatch_get_global_queue(0, 0), ^{});
}
} // namespace Metal

View file

@ -0,0 +1,774 @@
#include "PICA/gpu.hpp"
#include "renderer_mtl/renderer_mtl.hpp"
#include "renderer_mtl/objc_helper.hpp"
#include <cmrc/cmrc.hpp>
#include <cstddef>
#include "SDL_metal.h"
using namespace PICA;
CMRC_DECLARE(RendererMTL);
const u16 LIGHT_LUT_TEXTURE_WIDTH = 256;
// HACK: redefinition...
PICA::ColorFmt ToColorFormat(u32 format) {
switch (format) {
case 2: return PICA::ColorFmt::RGB565;
case 3: return PICA::ColorFmt::RGBA5551;
default: return static_cast<PICA::ColorFmt>(format);
}
}
MTL::Library* loadLibrary(MTL::Device* device, const cmrc::file& shaderSource) {
//MTL::CompileOptions* compileOptions = MTL::CompileOptions::alloc()->init();
NS::Error* error = nullptr;
MTL::Library* library = device->newLibrary(Metal::createDispatchData(shaderSource.begin(), shaderSource.size()), &error);
//MTL::Library* library = device->newLibrary(NS::String::string(source.c_str(), NS::ASCIIStringEncoding), compileOptions, &error);
if (error) {
Helpers::panic("Error loading shaders: %s", error->description()->cString(NS::ASCIIStringEncoding));
}
return library;
}
RendererMTL::RendererMTL(GPU& gpu, const std::array<u32, regNum>& internalRegs, const std::array<u32, extRegNum>& externalRegs)
: Renderer(gpu, internalRegs, externalRegs) {}
RendererMTL::~RendererMTL() {}
void RendererMTL::reset() {
vertexBufferCache.reset();
depthStencilCache.reset();
drawPipelineCache.reset();
blitPipelineCache.reset();
textureCache.reset();
depthStencilRenderTargetCache.reset();
colorRenderTargetCache.reset();
}
void RendererMTL::display() {
CA::MetalDrawable* drawable = metalLayer->nextDrawable();
if (!drawable) {
return;
}
using namespace PICA::ExternalRegs;
// Top screen
const u32 topActiveFb = externalRegs[Framebuffer0Select] & 1;
const u32 topScreenAddr = externalRegs[topActiveFb == 0 ? Framebuffer0AFirstAddr : Framebuffer0ASecondAddr];
auto topScreen = colorRenderTargetCache.findFromAddress(topScreenAddr);
if (topScreen) {
clearColor(nullptr, topScreen->get().texture);
}
// Bottom screen
const u32 bottomActiveFb = externalRegs[Framebuffer1Select] & 1;
const u32 bottomScreenAddr = externalRegs[bottomActiveFb == 0 ? Framebuffer1AFirstAddr : Framebuffer1ASecondAddr];
auto bottomScreen = colorRenderTargetCache.findFromAddress(bottomScreenAddr);
if (bottomScreen) {
clearColor(nullptr, bottomScreen->get().texture);
}
// -------- Draw --------
commandBuffer->pushDebugGroup(toNSString("Display"));
MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init();
MTL::RenderPassColorAttachmentDescriptor* colorAttachment = renderPassDescriptor->colorAttachments()->object(0);
colorAttachment->setTexture(drawable->texture());
colorAttachment->setLoadAction(MTL::LoadActionClear);
colorAttachment->setClearColor(MTL::ClearColor{0.0f, 0.0f, 0.0f, 1.0f});
colorAttachment->setStoreAction(MTL::StoreActionStore);
nextRenderPassName = "Display";
beginRenderPassIfNeeded(renderPassDescriptor, false, drawable->texture());
renderCommandEncoder->setRenderPipelineState(displayPipeline);
renderCommandEncoder->setFragmentSamplerState(nearestSampler, 0);
// Top screen
if (topScreen) {
renderCommandEncoder->setViewport(MTL::Viewport{0, 0, 400, 240, 0.0f, 1.0f});
renderCommandEncoder->setFragmentTexture(topScreen->get().texture, 0);
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4));
}
// Bottom screen
if (bottomScreen) {
renderCommandEncoder->setViewport(MTL::Viewport{40, 240, 320, 240, 0.0f, 1.0f});
renderCommandEncoder->setFragmentTexture(bottomScreen->get().texture, 0);
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4));
}
endRenderPass();
commandBuffer->presentDrawable(drawable);
commandBuffer->popDebugGroup();
commitCommandBuffer();
// Inform the vertex buffer cache that the frame ended
vertexBufferCache.endFrame();
// Release
drawable->release();
}
void RendererMTL::initGraphicsContext(SDL_Window* window) {
// TODO: what should be the type of the view?
void* view = SDL_Metal_CreateView(window);
metalLayer = (CA::MetalLayer*)SDL_Metal_GetLayer(view);
device = MTL::CreateSystemDefaultDevice();
metalLayer->setDevice(device);
commandQueue = device->newCommandQueue();
// -------- Objects --------
// Textures
MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init();
textureDescriptor->setTextureType(MTL::TextureType2D);
textureDescriptor->setPixelFormat(MTL::PixelFormatRGBA32Float);
textureDescriptor->setWidth(LIGHT_LUT_TEXTURE_WIDTH);
textureDescriptor->setHeight(Lights::LUT_Count + 1);
textureDescriptor->setUsage(MTL::TextureUsageShaderRead | MTL::TextureUsageShaderWrite);
textureDescriptor->setStorageMode(MTL::StorageModePrivate);
lutTexture = device->newTexture(textureDescriptor);
lutTexture->setLabel(toNSString("LUT texture"));
textureDescriptor->release();
// Samplers
MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init();
samplerDescriptor->setLabel(toNSString("Sampler (nearest)"));
nearestSampler = device->newSamplerState(samplerDescriptor);
samplerDescriptor->setMinFilter(MTL::SamplerMinMagFilterLinear);
samplerDescriptor->setMagFilter(MTL::SamplerMinMagFilterLinear);
samplerDescriptor->setLabel(toNSString("Sampler (linear)"));
linearSampler = device->newSamplerState(samplerDescriptor);
samplerDescriptor->release();
// -------- Pipelines --------
// Load shaders
auto mtlResources = cmrc::RendererMTL::get_filesystem();
library = loadLibrary(device, mtlResources.open("metal_shaders.metallib"));
MTL::Library* copyToLutTextureLibrary = loadLibrary(device, mtlResources.open("metal_copy_to_lut_texture.metallib"));
// Display
MTL::Function* vertexDisplayFunction = library->newFunction(NS::String::string("vertexDisplay", NS::ASCIIStringEncoding));
MTL::Function* fragmentDisplayFunction = library->newFunction(NS::String::string("fragmentDisplay", NS::ASCIIStringEncoding));
MTL::RenderPipelineDescriptor* displayPipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init();
displayPipelineDescriptor->setVertexFunction(vertexDisplayFunction);
displayPipelineDescriptor->setFragmentFunction(fragmentDisplayFunction);
auto* displayColorAttachment = displayPipelineDescriptor->colorAttachments()->object(0);
displayColorAttachment->setPixelFormat(MTL::PixelFormat::PixelFormatBGRA8Unorm);
NS::Error* error = nullptr;
displayPipelineDescriptor->setLabel(toNSString("Display pipeline"));
displayPipeline = device->newRenderPipelineState(displayPipelineDescriptor, &error);
if (error) {
Helpers::panic("Error creating display pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding));
}
displayPipelineDescriptor->release();
vertexDisplayFunction->release();
fragmentDisplayFunction->release();
// Blit
MTL::Function* vertexBlitFunction = library->newFunction(NS::String::string("vertexBlit", NS::ASCIIStringEncoding));
MTL::Function* fragmentBlitFunction = library->newFunction(NS::String::string("fragmentBlit", NS::ASCIIStringEncoding));
blitPipelineCache.set(device, vertexBlitFunction, fragmentBlitFunction);
// Draw
MTL::Function* vertexDrawFunction = library->newFunction(NS::String::string("vertexDraw", NS::ASCIIStringEncoding));
// -------- Vertex descriptor --------
MTL::VertexDescriptor* vertexDescriptor = MTL::VertexDescriptor::alloc()->init();
// Position
MTL::VertexAttributeDescriptor* positionAttribute = vertexDescriptor->attributes()->object(0);
positionAttribute->setFormat(MTL::VertexFormatFloat4);
positionAttribute->setOffset(offsetof(Vertex, s.positions));
positionAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX);
// Quaternion
MTL::VertexAttributeDescriptor* quaternionAttribute = vertexDescriptor->attributes()->object(1);
quaternionAttribute->setFormat(MTL::VertexFormatFloat4);
quaternionAttribute->setOffset(offsetof(Vertex, s.quaternion));
quaternionAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX);
// Color
MTL::VertexAttributeDescriptor* colorAttribute = vertexDescriptor->attributes()->object(2);
colorAttribute->setFormat(MTL::VertexFormatFloat4);
colorAttribute->setOffset(offsetof(Vertex, s.colour));
colorAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX);
// Texture coordinate 0
MTL::VertexAttributeDescriptor* texCoord0Attribute = vertexDescriptor->attributes()->object(3);
texCoord0Attribute->setFormat(MTL::VertexFormatFloat2);
texCoord0Attribute->setOffset(offsetof(Vertex, s.texcoord0));
texCoord0Attribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX);
// Texture coordinate 1
MTL::VertexAttributeDescriptor* texCoord1Attribute = vertexDescriptor->attributes()->object(4);
texCoord1Attribute->setFormat(MTL::VertexFormatFloat2);
texCoord1Attribute->setOffset(offsetof(Vertex, s.texcoord1));
texCoord1Attribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX);
// Texture coordinate 0 W
MTL::VertexAttributeDescriptor* texCoord0WAttribute = vertexDescriptor->attributes()->object(5);
texCoord0WAttribute->setFormat(MTL::VertexFormatFloat);
texCoord0WAttribute->setOffset(offsetof(Vertex, s.texcoord0_w));
texCoord0WAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX);
// View
MTL::VertexAttributeDescriptor* viewAttribute = vertexDescriptor->attributes()->object(6);
viewAttribute->setFormat(MTL::VertexFormatFloat3);
viewAttribute->setOffset(offsetof(Vertex, s.view));
viewAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX);
// Texture coordinate 2
MTL::VertexAttributeDescriptor* texCoord2Attribute = vertexDescriptor->attributes()->object(7);
texCoord2Attribute->setFormat(MTL::VertexFormatFloat2);
texCoord2Attribute->setOffset(offsetof(Vertex, s.texcoord2));
texCoord2Attribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX);
MTL::VertexBufferLayoutDescriptor* vertexBufferLayout = vertexDescriptor->layouts()->object(VERTEX_BUFFER_BINDING_INDEX);
vertexBufferLayout->setStride(sizeof(Vertex));
vertexBufferLayout->setStepFunction(MTL::VertexStepFunctionPerVertex);
vertexBufferLayout->setStepRate(1);
drawPipelineCache.set(device, library, vertexDrawFunction, vertexDescriptor);
// Copy to LUT texture
MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init();
constants->setConstantValue(&LIGHT_LUT_TEXTURE_WIDTH, MTL::DataTypeUShort, NS::UInteger(0));
error = nullptr;
MTL::Function* vertexCopyToLutTextureFunction = copyToLutTextureLibrary->newFunction(NS::String::string("vertexCopyToLutTexture", NS::ASCIIStringEncoding), constants, &error);
if (error) {
Helpers::panic("Error creating copy_to_lut_texture vertex function: %s", error->description()->cString(NS::ASCIIStringEncoding));
}
constants->release();
MTL::RenderPipelineDescriptor* copyToLutTexturePipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init();
copyToLutTexturePipelineDescriptor->setVertexFunction(vertexCopyToLutTextureFunction);
// Disable rasterization
copyToLutTexturePipelineDescriptor->setRasterizationEnabled(false);
error = nullptr;
copyToLutTexturePipelineDescriptor->setLabel(toNSString("Copy to LUT texture pipeline"));
copyToLutTexturePipeline = device->newRenderPipelineState(copyToLutTexturePipelineDescriptor, &error);
if (error) {
Helpers::panic("Error creating copy_to_lut_texture pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding));
}
copyToLutTexturePipelineDescriptor->release();
vertexCopyToLutTextureFunction->release();
// Depth stencil cache
depthStencilCache.set(device);
// Vertex buffer cache
vertexBufferCache.set(device);
// -------- Depth stencil state --------
MTL::DepthStencilDescriptor* depthStencilDescriptor = MTL::DepthStencilDescriptor::alloc()->init();
depthStencilDescriptor->setLabel(toNSString("Default depth stencil state"));
defaultDepthStencilState = device->newDepthStencilState(depthStencilDescriptor);
depthStencilDescriptor->release();
// Release
copyToLutTextureLibrary->release();
}
void RendererMTL::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) {
const auto color = colorRenderTargetCache.findFromAddress(startAddress);
if (color) {
const float r = Helpers::getBits<24, 8>(value) / 255.0f;
const float g = Helpers::getBits<16, 8>(value) / 255.0f;
const float b = Helpers::getBits<8, 8>(value) / 255.0f;
const float a = (value & 0xff) / 255.0f;
colorClearOps[color->get().texture] = {r, g, b, a};
return;
}
const auto depth = depthStencilRenderTargetCache.findFromAddress(startAddress);
if (depth) {
float depthVal;
const auto format = depth->get().format;
if (format == DepthFmt::Depth16) {
depthVal = (value & 0xffff) / 65535.0f;
} else {
depthVal = (value & 0xffffff) / 16777215.0f;
}
depthClearOps[depth->get().texture] = depthVal;
if (format == DepthFmt::Depth24Stencil8) {
const u8 stencilVal = value >> 24;
stencilClearOps[depth->get().texture] = stencilVal;
}
return;
}
Helpers::warn("[RendererMTL::ClearBuffer] No buffer found!\n");
}
void RendererMTL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) {
const u32 inputWidth = inputSize & 0xffff;
const u32 inputHeight = inputSize >> 16;
const auto inputFormat = ToColorFormat(Helpers::getBits<8, 3>(flags));
const auto outputFormat = ToColorFormat(Helpers::getBits<12, 3>(flags));
const bool verticalFlip = flags & 1;
const PICA::Scaling scaling = static_cast<PICA::Scaling>(Helpers::getBits<24, 2>(flags));
u32 outputWidth = outputSize & 0xffff;
u32 outputHeight = outputSize >> 16;
auto srcFramebuffer = getColorRenderTarget(inputAddr, inputFormat, inputWidth, outputHeight);
nextRenderPassName = "Clear before display transfer";
clearColor(nullptr, srcFramebuffer->texture);
Math::Rect<u32> srcRect = srcFramebuffer->getSubRect(inputAddr, outputWidth, outputHeight);
if (verticalFlip) {
std::swap(srcRect.bottom, srcRect.top);
}
// Apply scaling for the destination rectangle.
if (scaling == PICA::Scaling::X || scaling == PICA::Scaling::XY) {
outputWidth >>= 1;
}
if (scaling == PICA::Scaling::XY) {
outputHeight >>= 1;
}
auto destFramebuffer = getColorRenderTarget(outputAddr, outputFormat, outputWidth, outputHeight);
// TODO: clear if not blitting to the whole framebuffer
Math::Rect<u32> destRect = destFramebuffer->getSubRect(outputAddr, outputWidth, outputHeight);
if (inputWidth != outputWidth) {
// Helpers::warn("Strided display transfer is not handled correctly!\n");
}
textureCopyImpl(*srcFramebuffer, *destFramebuffer, srcRect, destRect);
}
void RendererMTL::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) {
// Texture copy size is aligned to 16 byte units
const u32 copySize = totalBytes & ~0xf;
if (copySize == 0) {
Helpers::warn("TextureCopy total bytes less than 16!\n");
return;
}
// The width and gap are provided in 16-byte units.
const u32 inputWidth = (inputSize & 0xffff) << 4;
const u32 inputGap = (inputSize >> 16) << 4;
const u32 outputWidth = (outputSize & 0xffff) << 4;
const u32 outputGap = (outputSize >> 16) << 4;
if (inputGap != 0 || outputGap != 0) {
// Helpers::warn("Strided texture copy\n");
}
if (inputWidth != outputWidth) {
Helpers::warn("Input width does not match output width, cannot accelerate texture copy!");
return;
}
// Texture copy is a raw data copy in PICA, which means no format or tiling information is provided to the engine.
// Depending if the target surface is linear or tiled, games set inputWidth to either the width of the texture or
// the width multiplied by eight (because tiles are stored linearly in memory).
// To properly accelerate this we must examine each surface individually. For now we assume the most common case
// of tiled surface with RGBA8 format. If our assumption does not hold true, we abort the texture copy as inserting
// that surface is not correct.
// We assume the source surface is tiled and RGBA8. inputWidth is in bytes so divide it
// by eight * sizePerPixel(RGBA8) to convert it to a useable width.
const u32 bpp = sizePerPixel(PICA::ColorFmt::RGBA8);
const u32 copyStride = (inputWidth + inputGap) / (8 * bpp);
const u32 copyWidth = inputWidth / (8 * bpp);
// inputHeight/outputHeight are typically set to zero so they cannot be used to get the height of the copy region
// in contrast to display transfer. Compute height manually by dividing the copy size with the copy width. The result
// is the number of vertical tiles so multiply that by eight to get the actual copy height.
u32 copyHeight;
if (inputWidth != 0) [[likely]] {
copyHeight = (copySize / inputWidth) * 8;
} else {
copyHeight = 0;
}
// Find the source surface.
auto srcFramebuffer = getColorRenderTarget(inputAddr, PICA::ColorFmt::RGBA8, copyStride, copyHeight, false);
if (!srcFramebuffer) {
Helpers::warn("RendererGL::TextureCopy failed to locate src framebuffer!\n");
return;
}
nextRenderPassName = "Clear before texture copy";
clearColor(nullptr, srcFramebuffer->texture);
Math::Rect<u32> srcRect = srcFramebuffer->getSubRect(inputAddr, copyWidth, copyHeight);
// Assume the destination surface has the same format. Unless the surfaces have the same block width,
// texture copy does not make sense.
auto destFramebuffer = getColorRenderTarget(outputAddr, srcFramebuffer->format, copyWidth, copyHeight);
// TODO: clear if not blitting to the whole framebuffer
Math::Rect<u32> destRect = destFramebuffer->getSubRect(outputAddr, copyWidth, copyHeight);
textureCopyImpl(*srcFramebuffer, *destFramebuffer, srcRect, destRect);
}
void RendererMTL::drawVertices(PICA::PrimType primType, std::span<const PICA::Vertex> vertices) {
// Color
auto colorRenderTarget = getColorRenderTarget(colourBufferLoc, colourBufferFormat, fbSize[0], fbSize[1]);
// Depth stencil
const u32 depthControl = regs[PICA::InternalRegs::DepthAndColorMask];
const bool depthStencilWrite = regs[PICA::InternalRegs::DepthBufferWrite];
const bool depthEnable = depthControl & 0x1;
const bool depthWriteEnable = Helpers::getBit<12>(depthControl);
const u8 depthFunc = Helpers::getBits<4, 3>(depthControl);
const u8 colorMask = Helpers::getBits<8, 4>(depthControl);
Metal::DepthStencilHash depthStencilHash{false, 1};
depthStencilHash.stencilConfig = regs[PICA::InternalRegs::StencilTest];
depthStencilHash.stencilOpConfig = regs[PICA::InternalRegs::StencilOp];
const bool stencilEnable = Helpers::getBit<0>(depthStencilHash.stencilConfig);
std::optional<Metal::DepthStencilRenderTarget> depthStencilRenderTarget = std::nullopt;
if (depthEnable) {
depthStencilHash.depthStencilWrite = depthWriteEnable && depthStencilWrite;
depthStencilHash.depthFunc = depthFunc;
depthStencilRenderTarget = getDepthRenderTarget();
} else {
if (depthWriteEnable) {
depthStencilHash.depthStencilWrite = true;
depthStencilRenderTarget = getDepthRenderTarget();
} else if (stencilEnable) {
depthStencilRenderTarget = getDepthRenderTarget();
}
}
// Depth uniforms
struct {
float depthScale;
float depthOffset;
bool depthMapEnable;
} depthUniforms;
depthUniforms.depthScale = Floats::f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32();
depthUniforms.depthOffset = Floats::f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32();
depthUniforms.depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1;
// -------- Pipeline --------
Metal::DrawPipelineHash pipelineHash{colorRenderTarget->format, DepthFmt::Unknown1};
if (depthStencilRenderTarget) {
pipelineHash.depthFmt = depthStencilRenderTarget->format;
}
pipelineHash.fragHash.lightingEnabled = regs[0x008F] & 1;
pipelineHash.fragHash.lightingNumLights = regs[0x01C2] & 0x7;
pipelineHash.fragHash.lightingConfig1 = regs[0x01C4u];
pipelineHash.fragHash.alphaControl = regs[0x104];
// Blending and logic op
pipelineHash.blendEnabled = (regs[PICA::InternalRegs::ColourOperation] & (1 << 8)) != 0;
pipelineHash.colorWriteMask = colorMask;
u8 logicOp = 3; // Copy, which doesn't do anything
if (pipelineHash.blendEnabled) {
pipelineHash.blendControl = regs[PICA::InternalRegs::BlendFunc];
} else {
logicOp = Helpers::getBits<0, 4>(regs[PICA::InternalRegs::LogicOp]);
}
MTL::RenderPipelineState* pipeline = drawPipelineCache.get(pipelineHash);
// Depth stencil state
MTL::DepthStencilState* depthStencilState = depthStencilCache.get(depthStencilHash);
// -------- Render --------
MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init();
bool doesClear = clearColor(renderPassDescriptor, colorRenderTarget->texture);
if (depthStencilRenderTarget) {
if (clearDepth(renderPassDescriptor, depthStencilRenderTarget->texture))
doesClear = true;
if (depthStencilRenderTarget->format == DepthFmt::Depth24Stencil8) {
if (clearStencil(renderPassDescriptor, depthStencilRenderTarget->texture))
doesClear = true;
}
}
nextRenderPassName = "Draw vertices";
beginRenderPassIfNeeded(renderPassDescriptor, doesClear, colorRenderTarget->texture, (depthStencilRenderTarget ? depthStencilRenderTarget->texture : nullptr));
// Update the LUT texture if necessary
if (gpu.lightingLUTDirty) {
updateLightingLUT(renderCommandEncoder);
}
if (gpu.fogLUTDirty) {
updateFogLUT(renderCommandEncoder);
}
renderCommandEncoder->setRenderPipelineState(pipeline);
renderCommandEncoder->setDepthStencilState(depthStencilState);
// If size is < 4KB, use inline vertex data, otherwise use a buffer
if (vertices.size_bytes() < 4 * 1024) {
renderCommandEncoder->setVertexBytes(vertices.data(), vertices.size_bytes(), VERTEX_BUFFER_BINDING_INDEX);
} else {
Metal::BufferHandle buffer = vertexBufferCache.get(vertices.data(), vertices.size_bytes());
renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, VERTEX_BUFFER_BINDING_INDEX);
}
// Viewport
const u32 viewportX = regs[PICA::InternalRegs::ViewportXY] & 0x3ff;
const u32 viewportY = (regs[PICA::InternalRegs::ViewportXY] >> 16) & 0x3ff;
const u32 viewportWidth = Floats::f24::fromRaw(regs[PICA::InternalRegs::ViewportWidth] & 0xffffff).toFloat32() * 2.0f;
const u32 viewportHeight = Floats::f24::fromRaw(regs[PICA::InternalRegs::ViewportHeight] & 0xffffff).toFloat32() * 2.0f;
const auto rect = colorRenderTarget->getSubRect(colourBufferLoc, fbSize[0], fbSize[1]);
MTL::Viewport viewport{double(rect.left + viewportX), double(rect.bottom + viewportY), double(viewportWidth), double(viewportHeight), 0.0, 1.0};
renderCommandEncoder->setViewport(viewport);
// Blend color
if (pipelineHash.blendEnabled) {
u32 constantColor = regs[PICA::InternalRegs::BlendColour];
const u8 r = constantColor & 0xff;
const u8 g = Helpers::getBits<8, 8>(constantColor);
const u8 b = Helpers::getBits<16, 8>(constantColor);
const u8 a = Helpers::getBits<24, 8>(constantColor);
renderCommandEncoder->setBlendColor(r / 255.0f, g / 255.0f, b / 255.0f, a / 255.0f);
}
// Stencil reference
if (stencilEnable) {
const s8 reference = s8(Helpers::getBits<16, 8>(depthStencilHash.stencilConfig)); // Signed reference value
renderCommandEncoder->setStencilReferenceValue(reference);
}
// Bind resources
setupTextureEnvState(renderCommandEncoder);
bindTexturesToSlots(renderCommandEncoder);
renderCommandEncoder->setVertexBytes(&regs[0x48], (0x200 - 0x48) * sizeof(regs[0]), 0);
renderCommandEncoder->setFragmentBytes(&regs[0x48], (0x200 - 0x48) * sizeof(regs[0]), 0);
renderCommandEncoder->setVertexBytes(&depthUniforms, sizeof(depthUniforms), 2);
renderCommandEncoder->setFragmentBytes(&logicOp, sizeof(logicOp), 2);
renderCommandEncoder->drawPrimitives(toMTLPrimitiveType(primType), NS::UInteger(0), NS::UInteger(vertices.size()));
}
void RendererMTL::screenshot(const std::string& name) {
// TODO: implement
Helpers::warn("RendererMTL::screenshot not implemented");
}
void RendererMTL::deinitGraphicsContext() {
reset();
// Release
copyToLutTexturePipeline->release();
displayPipeline->release();
defaultDepthStencilState->release();
lutTexture->release();
linearSampler->release();
nearestSampler->release();
library->release();
commandQueue->release();
device->release();
}
std::optional<Metal::ColorRenderTarget> RendererMTL::getColorRenderTarget(
u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound
) {
// Try to find an already existing buffer that contains the provided address
// This is a more relaxed check compared to getColourFBO as display transfer/texcopy may refer to
// subrect of a surface and in case of texcopy we don't know the format of the surface.
auto buffer = colorRenderTargetCache.findFromAddress(addr);
if (buffer.has_value()) {
return buffer.value().get();
}
if (!createIfnotFound) {
return std::nullopt;
}
// Otherwise create and cache a new buffer.
Metal::ColorRenderTarget sampleBuffer(device, addr, format, width, height);
return colorRenderTargetCache.add(sampleBuffer);
}
Metal::DepthStencilRenderTarget& RendererMTL::getDepthRenderTarget() {
Metal::DepthStencilRenderTarget sampleBuffer(device, depthBufferLoc, depthBufferFormat, fbSize[0], fbSize[1]);
auto buffer = depthStencilRenderTargetCache.find(sampleBuffer);
if (buffer.has_value()) {
return buffer.value().get();
} else {
return depthStencilRenderTargetCache.add(sampleBuffer);
}
}
Metal::Texture& RendererMTL::getTexture(Metal::Texture& tex) {
auto buffer = textureCache.find(tex);
if (buffer.has_value()) {
return buffer.value().get();
} else {
const auto textureData = std::span{gpu.getPointerPhys<u8>(tex.location), tex.sizeInBytes()}; // Get pointer to the texture data in 3DS memory
Metal::Texture& newTex = textureCache.add(tex);
newTex.decodeTexture(textureData);
return newTex;
}
}
void RendererMTL::setupTextureEnvState(MTL::RenderCommandEncoder* encoder) {
static constexpr std::array<u32, 6> ioBases = {
PICA::InternalRegs::TexEnv0Source, PICA::InternalRegs::TexEnv1Source, PICA::InternalRegs::TexEnv2Source,
PICA::InternalRegs::TexEnv3Source, PICA::InternalRegs::TexEnv4Source, PICA::InternalRegs::TexEnv5Source,
};
struct {
u32 textureEnvSourceRegs[6];
u32 textureEnvOperandRegs[6];
u32 textureEnvCombinerRegs[6];
u32 textureEnvScaleRegs[6];
} envState;
u32 textureEnvColourRegs[6];
for (int i = 0; i < 6; i++) {
const u32 ioBase = ioBases[i];
envState.textureEnvSourceRegs[i] = regs[ioBase];
envState.textureEnvOperandRegs[i] = regs[ioBase + 1];
envState.textureEnvCombinerRegs[i] = regs[ioBase + 2];
textureEnvColourRegs[i] = regs[ioBase + 3];
envState.textureEnvScaleRegs[i] = regs[ioBase + 4];
}
encoder->setVertexBytes(&textureEnvColourRegs, sizeof(textureEnvColourRegs), 1);
encoder->setFragmentBytes(&envState, sizeof(envState), 1);
}
void RendererMTL::bindTexturesToSlots(MTL::RenderCommandEncoder* encoder) {
static constexpr std::array<u32, 3> ioBases = {
PICA::InternalRegs::Tex0BorderColor,
PICA::InternalRegs::Tex1BorderColor,
PICA::InternalRegs::Tex2BorderColor,
};
for (int i = 0; i < 3; i++) {
if ((regs[PICA::InternalRegs::TexUnitCfg] & (1 << i)) == 0) {
continue;
}
const size_t ioBase = ioBases[i];
const u32 dim = regs[ioBase + 1];
const u32 config = regs[ioBase + 2];
const u32 height = dim & 0x7ff;
const u32 width = Helpers::getBits<16, 11>(dim);
const u32 addr = (regs[ioBase + 4] & 0x0FFFFFFF) << 3;
u32 format = regs[ioBase + (i == 0 ? 13 : 5)] & 0xF;
if (addr != 0) [[likely]] {
Metal::Texture targetTex(device, addr, static_cast<PICA::TextureFmt>(format), width, height, config);
auto tex = getTexture(targetTex);
encoder->setFragmentTexture(tex.texture, i);
encoder->setFragmentSamplerState(tex.sampler ? tex.sampler : nearestSampler, i);
} else {
// TODO: bind a dummy texture?
}
}
// LUT texture
encoder->setFragmentTexture(lutTexture, 3);
encoder->setFragmentSamplerState(linearSampler, 3);
}
void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) {
gpu.lightingLUTDirty = false;
std::array<float, GPU::LightingLutSize * 2> lightingLut = {0.0f};
for (int i = 0; i < gpu.lightingLUT.size(); i += 2) {
uint64_t value = gpu.lightingLUT[i >> 1] & 0xFFF;
lightingLut[i] = (float)(value << 4) / 65535.0f;
}
//for (int i = 0; i < Lights::LUT_Count; i++) {
// lutTexture->replaceRegion(MTL::Region(0, 0, LIGHT_LUT_TEXTURE_WIDTH, 1), 0, i, u16_lightinglut.data() + LIGHT_LUT_TEXTURE_WIDTH * i, 0, 0);
//}
renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline);
renderCommandEncoder->setDepthStencilState(defaultDepthStencilState);
renderCommandEncoder->setVertexTexture(lutTexture, 0);
Metal::BufferHandle buffer = vertexBufferCache.get(lightingLut.data(), sizeof(lightingLut));
renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0);
u32 arrayOffset = 0;
renderCommandEncoder->setVertexBytes(&arrayOffset, sizeof(u32), 1);
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), GPU::LightingLutSize);
}
void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) {
gpu.fogLUTDirty = false;
std::array<float, 128 * 2> fogLut = {0.0f};
for (int i = 0; i < fogLut.size(); i += 2) {
const uint32_t value = gpu.fogLUT[i >> 1];
int32_t diff = value & 0x1fff;
diff = (diff << 19) >> 19; // Sign extend the 13-bit value to 32 bits
const float fogDifference = float(diff) / 2048.0f;
const float fogValue = float((value >> 13) & 0x7ff) / 2048.0f;
fogLut[i] = fogValue;
fogLut[i + 1] = fogDifference;
}
renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline);
renderCommandEncoder->setDepthStencilState(defaultDepthStencilState);
renderCommandEncoder->setVertexTexture(lutTexture, 0);
//Metal::BufferHandle buffer = vertexBufferCache.get(fogLut.data(), sizeof(fogLut));
//renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0);
renderCommandEncoder->setVertexBytes(fogLut.data(), sizeof(fogLut), 0);
u32 arrayOffset = (u32)Lights::LUT_Count;
renderCommandEncoder->setVertexBytes(&arrayOffset, sizeof(u32), 1);
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(128));
}
void RendererMTL::textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect<u32>& srcRect, const Math::Rect<u32>& destRect) {
nextRenderPassName = "Texture copy";
MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init();
// TODO: clearColor sets the load action to load if it didn't find any clear, but that is unnecessary if we are doing a copy to the whole texture
bool doesClear = clearColor(renderPassDescriptor, destFramebuffer.texture);
beginRenderPassIfNeeded(renderPassDescriptor, doesClear, destFramebuffer.texture);
// Pipeline
Metal::BlitPipelineHash hash{destFramebuffer.format, DepthFmt::Unknown1};
auto blitPipeline = blitPipelineCache.get(hash);
renderCommandEncoder->setRenderPipelineState(blitPipeline);
// Viewport
renderCommandEncoder->setViewport(MTL::Viewport{double(destRect.left), double(destRect.bottom), double(destRect.right - destRect.left), double(destRect.top - destRect.bottom), 0.0, 1.0});
float srcRectNDC[4] = {srcRect.left / (float)srcFramebuffer.size.u(), srcRect.bottom / (float)srcFramebuffer.size.v(), (srcRect.right - srcRect.left) / (float)srcFramebuffer.size.u(), (srcRect.top - srcRect.bottom) / (float)srcFramebuffer.size.v()};
// Bind resources
renderCommandEncoder->setVertexBytes(&srcRectNDC, sizeof(srcRectNDC), 0);
renderCommandEncoder->setFragmentTexture(srcFramebuffer.texture, 0);
renderCommandEncoder->setFragmentSamplerState(nearestSampler, 0);
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4));
}