rework the lut system

This commit is contained in:
Samuliak 2024-10-31 09:25:01 +01:00
parent 90420160f2
commit 158be432fc
No known key found for this signature in database
9 changed files with 273 additions and 152 deletions

View file

@ -482,6 +482,7 @@ if(ENABLE_METAL AND APPLE)
include/renderer_mtl/mtl_render_target.hpp include/renderer_mtl/mtl_render_target.hpp
include/renderer_mtl/mtl_texture.hpp include/renderer_mtl/mtl_texture.hpp
include/renderer_mtl/mtl_vertex_buffer_cache.hpp include/renderer_mtl/mtl_vertex_buffer_cache.hpp
include/renderer_mtl/mtl_lut_texture.hpp
include/renderer_mtl/pica_to_mtl.hpp include/renderer_mtl/pica_to_mtl.hpp
include/renderer_mtl/objc_helper.hpp include/renderer_mtl/objc_helper.hpp
) )
@ -490,6 +491,7 @@ if(ENABLE_METAL AND APPLE)
src/core/renderer_mtl/renderer_mtl.cpp src/core/renderer_mtl/renderer_mtl.cpp
src/core/renderer_mtl/mtl_texture.cpp src/core/renderer_mtl/mtl_texture.cpp
src/core/renderer_mtl/mtl_etc1.cpp src/core/renderer_mtl/mtl_etc1.cpp
src/core/renderer_mtl/mtl_lut_texture.cpp
src/core/renderer_mtl/objc_helper.mm src/core/renderer_mtl/objc_helper.mm
src/host_shaders/metal_shaders.metal src/host_shaders/metal_shaders.metal
src/host_shaders/metal_copy_to_lut_texture.metal src/host_shaders/metal_copy_to_lut_texture.metal
@ -587,7 +589,7 @@ if(NOT BUILD_HYDRA_CORE AND NOT BUILD_LIBRETRO_CORE)
if(NOT ENABLE_OPENGL) if(NOT ENABLE_OPENGL)
message(FATAL_ERROR "Qt frontend requires OpenGL") message(FATAL_ERROR "Qt frontend requires OpenGL")
endif() endif()
option(GENERATE_QT_TRANSLATION "Generate Qt translation file" OFF) option(GENERATE_QT_TRANSLATION "Generate Qt translation file" OFF)
set(QT_LANGUAGES docs/translations) set(QT_LANGUAGES docs/translations)

View file

@ -135,7 +135,10 @@ public:
colorAttachment->setDestinationAlphaBlendFactor(toMTLBlendFactor(alphaDestFunc)); colorAttachment->setDestinationAlphaBlendFactor(toMTLBlendFactor(alphaDestFunc));
} }
desc->setDepthAttachmentPixelFormat(toMTLPixelFormatDepth(hash.depthFmt)); MTL::PixelFormat depthFormat = toMTLPixelFormatDepth(hash.depthFmt);
desc->setDepthAttachmentPixelFormat(depthFormat);
if (hash.depthFmt == DepthFmt::Depth24Stencil8)
desc->setStencilAttachmentPixelFormat(depthFormat);
NS::Error* error = nullptr; NS::Error* error = nullptr;
desc->setLabel(toNSString("Draw pipeline")); desc->setLabel(toNSString("Draw pipeline"));

View file

@ -0,0 +1,25 @@
#pragma once
#include <Metal/Metal.hpp>
namespace Metal {
class LutTexture {
public:
LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name);
~LutTexture();
u32 getNextIndex();
// Getters
MTL::Texture* getTexture() { return texture; }
u32 getCurrentIndex() { return currentIndex; }
private:
MTL::Texture* texture;
u32 currentIndex = 0;
};
} // namespace Metal

View file

@ -1,4 +1,5 @@
#pragma once #pragma once
#include <array> #include <array>
#include <string> #include <string>
#include <Metal/Metal.hpp> #include <Metal/Metal.hpp>

View file

@ -1,3 +1,5 @@
#pragma once
#include <Metal/Metal.hpp> #include <Metal/Metal.hpp>
#include <QuartzCore/QuartzCore.hpp> #include <QuartzCore/QuartzCore.hpp>
@ -8,6 +10,8 @@
#include "mtl_draw_pipeline_cache.hpp" #include "mtl_draw_pipeline_cache.hpp"
#include "mtl_depth_stencil_cache.hpp" #include "mtl_depth_stencil_cache.hpp"
#include "mtl_vertex_buffer_cache.hpp" #include "mtl_vertex_buffer_cache.hpp"
#include "mtl_lut_texture.hpp"
// HACK: use the OpenGL cache // HACK: use the OpenGL cache
#include "../renderer_gl/surface_cache.hpp" #include "../renderer_gl/surface_cache.hpp"
@ -54,12 +58,15 @@ class RendererMTL final : public Renderer {
Metal::DepthStencilCache depthStencilCache; Metal::DepthStencilCache depthStencilCache;
Metal::VertexBufferCache vertexBufferCache; Metal::VertexBufferCache vertexBufferCache;
// Objects // Resources
MTL::SamplerState* nearestSampler; MTL::SamplerState* nearestSampler;
MTL::SamplerState* linearSampler; MTL::SamplerState* linearSampler;
MTL::Texture* lutTexture; MTL::Texture* nullTexture;
MTL::DepthStencilState* defaultDepthStencilState; MTL::DepthStencilState* defaultDepthStencilState;
Metal::LutTexture* lutLightingTexture;
Metal::LutTexture* lutFogTexture;
// Pipelines // Pipelines
MTL::RenderPipelineState* displayPipeline; MTL::RenderPipelineState* displayPipeline;
MTL::RenderPipelineState* copyToLutTexturePipeline; MTL::RenderPipelineState* copyToLutTexturePipeline;
@ -91,21 +98,7 @@ class RendererMTL final : public Renderer {
} }
} }
void beginRenderPassIfNeeded(MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture = nullptr) { void beginRenderPassIfNeeded(MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture = nullptr);
createCommandBufferIfNeeded();
if (doesClears || !renderCommandEncoder || colorTexture != lastColorTexture || (depthTexture != lastDepthTexture && !(lastDepthTexture && !depthTexture))) {
endRenderPass();
renderCommandEncoder = commandBuffer->renderCommandEncoder(renderPassDescriptor);
renderCommandEncoder->setLabel(toNSString(nextRenderPassName));
lastColorTexture = colorTexture;
lastDepthTexture = depthTexture;
}
renderPassDescriptor->release();
}
void commitCommandBuffer() { void commitCommandBuffer() {
if (renderCommandEncoder) { if (renderCommandEncoder) {
@ -115,6 +108,8 @@ class RendererMTL final : public Renderer {
} }
if (commandBuffer) { if (commandBuffer) {
commandBuffer->commit(); commandBuffer->commit();
// HACK
commandBuffer->waitUntilCompleted();
commandBuffer->release(); commandBuffer->release();
commandBuffer = nullptr; commandBuffer = nullptr;
} }

View file

@ -0,0 +1,32 @@
#include "renderer_mtl/renderer_mtl.hpp"
namespace Metal {
constexpr u32 LAYER_COUNT = 1024;
LutTexture::LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name) {
MTL::TextureDescriptor* desc = MTL::TextureDescriptor::alloc()->init();
desc->setTextureType(type);
desc->setPixelFormat(pixelFormat);
desc->setWidth(width);
desc->setHeight(height);
desc->setArrayLength(LAYER_COUNT);
desc->setUsage(MTL::TextureUsageShaderRead/* | MTL::TextureUsageShaderWrite*/);
desc->setStorageMode(MTL::StorageModeShared);
texture = device->newTexture(desc);
texture->setLabel(toNSString(name));
desc->release();
}
LutTexture::~LutTexture() {
texture->release();
}
u32 LutTexture::getNextIndex() {
currentIndex = (currentIndex + 1) % LAYER_COUNT;
return currentIndex;
}
} // namespace Metal

View file

@ -1,17 +1,21 @@
#include "PICA/gpu.hpp"
#include "renderer_mtl/renderer_mtl.hpp" #include "renderer_mtl/renderer_mtl.hpp"
#include "renderer_mtl/objc_helper.hpp"
#include <cmrc/cmrc.hpp> #include <cmrc/cmrc.hpp>
#include <cstddef> #include <cstddef>
#include "renderer_mtl/mtl_lut_texture.hpp"
// HACK
#undef NO
#include "PICA/gpu.hpp"
#include "SDL_metal.h" #include "SDL_metal.h"
using namespace PICA; using namespace PICA;
CMRC_DECLARE(RendererMTL); CMRC_DECLARE(RendererMTL);
const u16 LIGHT_LUT_TEXTURE_WIDTH = 256; const u16 LIGHTING_LUT_TEXTURE_WIDTH = 256;
const u32 FOG_LUT_TEXTURE_WIDTH = 128;
// HACK: redefinition... // HACK: redefinition...
PICA::ColorFmt ToColorFormat(u32 format) { PICA::ColorFmt ToColorFormat(u32 format) {
@ -23,10 +27,10 @@ PICA::ColorFmt ToColorFormat(u32 format) {
} }
MTL::Library* loadLibrary(MTL::Device* device, const cmrc::file& shaderSource) { MTL::Library* loadLibrary(MTL::Device* device, const cmrc::file& shaderSource) {
//MTL::CompileOptions* compileOptions = MTL::CompileOptions::alloc()->init(); // MTL::CompileOptions* compileOptions = MTL::CompileOptions::alloc()->init();
NS::Error* error = nullptr; NS::Error* error = nullptr;
MTL::Library* library = device->newLibrary(Metal::createDispatchData(shaderSource.begin(), shaderSource.size()), &error); MTL::Library* library = device->newLibrary(Metal::createDispatchData(shaderSource.begin(), shaderSource.size()), &error);
//MTL::Library* library = device->newLibrary(NS::String::string(source.c_str(), NS::ASCIIStringEncoding), compileOptions, &error); // MTL::Library* library = device->newLibrary(NS::String::string(source.c_str(), NS::ASCIIStringEncoding), compileOptions, &error);
if (error) { if (error) {
Helpers::panic("Error loading shaders: %s", error->description()->cString(NS::ASCIIStringEncoding)); Helpers::panic("Error loading shaders: %s", error->description()->cString(NS::ASCIIStringEncoding));
} }
@ -39,19 +43,19 @@ RendererMTL::RendererMTL(GPU& gpu, const std::array<u32, regNum>& internalRegs,
RendererMTL::~RendererMTL() {} RendererMTL::~RendererMTL() {}
void RendererMTL::reset() { void RendererMTL::reset() {
vertexBufferCache.reset(); vertexBufferCache.reset();
depthStencilCache.reset(); depthStencilCache.reset();
drawPipelineCache.reset(); drawPipelineCache.reset();
blitPipelineCache.reset(); blitPipelineCache.reset();
textureCache.reset(); textureCache.reset();
depthStencilRenderTargetCache.reset(); depthStencilRenderTargetCache.reset();
colorRenderTargetCache.reset(); colorRenderTargetCache.reset();
} }
void RendererMTL::display() { void RendererMTL::display() {
CA::MetalDrawable* drawable = metalLayer->nextDrawable(); CA::MetalDrawable* drawable = metalLayer->nextDrawable();
if (!drawable) { if (!drawable) {
return; return;
} }
using namespace PICA::ExternalRegs; using namespace PICA::ExternalRegs;
@ -62,7 +66,7 @@ void RendererMTL::display() {
auto topScreen = colorRenderTargetCache.findFromAddress(topScreenAddr); auto topScreen = colorRenderTargetCache.findFromAddress(topScreenAddr);
if (topScreen) { if (topScreen) {
clearColor(nullptr, topScreen->get().texture); clearColor(nullptr, topScreen->get().texture);
} }
// Bottom screen // Bottom screen
@ -71,7 +75,7 @@ void RendererMTL::display() {
auto bottomScreen = colorRenderTargetCache.findFromAddress(bottomScreenAddr); auto bottomScreen = colorRenderTargetCache.findFromAddress(bottomScreenAddr);
if (bottomScreen) { if (bottomScreen) {
clearColor(nullptr, bottomScreen->get().texture); clearColor(nullptr, bottomScreen->get().texture);
} }
// -------- Draw -------- // -------- Draw --------
@ -131,14 +135,14 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) {
// Textures // Textures
MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init(); MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init();
textureDescriptor->setTextureType(MTL::TextureType2D); textureDescriptor->setTextureType(MTL::TextureType2D);
textureDescriptor->setPixelFormat(MTL::PixelFormatRGBA32Float); textureDescriptor->setPixelFormat(MTL::PixelFormatRGBA8Unorm);
textureDescriptor->setWidth(LIGHT_LUT_TEXTURE_WIDTH); textureDescriptor->setWidth(1);
textureDescriptor->setHeight(Lights::LUT_Count + 1); textureDescriptor->setHeight(1);
textureDescriptor->setUsage(MTL::TextureUsageShaderRead | MTL::TextureUsageShaderWrite);
textureDescriptor->setStorageMode(MTL::StorageModePrivate); textureDescriptor->setStorageMode(MTL::StorageModePrivate);
textureDescriptor->setUsage(MTL::TextureUsageShaderRead);
lutTexture = device->newTexture(textureDescriptor); nullTexture = device->newTexture(textureDescriptor);
lutTexture->setLabel(toNSString("LUT texture")); nullTexture->setLabel(toNSString("Null texture"));
textureDescriptor->release(); textureDescriptor->release();
// Samplers // Samplers
@ -153,6 +157,9 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) {
samplerDescriptor->release(); samplerDescriptor->release();
lutLightingTexture = new Metal::LutTexture(device, MTL::TextureType2DArray, MTL::PixelFormatR16Unorm, LIGHTING_LUT_TEXTURE_WIDTH, Lights::LUT_Count, "Lighting LUT texture");
lutFogTexture = new Metal::LutTexture(device, MTL::TextureType1DArray, MTL::PixelFormatRG32Float, FOG_LUT_TEXTURE_WIDTH, 1, "Fog LUT texture");
// -------- Pipelines -------- // -------- Pipelines --------
// Load shaders // Load shaders
@ -249,14 +256,15 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) {
// Copy to LUT texture // Copy to LUT texture
MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init(); MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init();
constants->setConstantValue(&LIGHT_LUT_TEXTURE_WIDTH, MTL::DataTypeUShort, NS::UInteger(0)); constants->setConstantValue(&LIGHTING_LUT_TEXTURE_WIDTH, MTL::DataTypeUShort, NS::UInteger(0));
error = nullptr; error = nullptr;
MTL::Function* vertexCopyToLutTextureFunction = copyToLutTextureLibrary->newFunction(NS::String::string("vertexCopyToLutTexture", NS::ASCIIStringEncoding), constants, &error); MTL::Function* vertexCopyToLutTextureFunction =
if (error) { copyToLutTextureLibrary->newFunction(NS::String::string("vertexCopyToLutTexture", NS::ASCIIStringEncoding), constants, &error);
Helpers::panic("Error creating copy_to_lut_texture vertex function: %s", error->description()->cString(NS::ASCIIStringEncoding)); if (error) {
} Helpers::panic("Error creating copy_to_lut_texture vertex function: %s", error->description()->cString(NS::ASCIIStringEncoding));
constants->release(); }
constants->release();
MTL::RenderPipelineDescriptor* copyToLutTexturePipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init(); MTL::RenderPipelineDescriptor* copyToLutTexturePipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init();
copyToLutTexturePipelineDescriptor->setVertexFunction(vertexCopyToLutTextureFunction); copyToLutTexturePipelineDescriptor->setVertexFunction(vertexCopyToLutTextureFunction);
@ -314,8 +322,8 @@ void RendererMTL::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 c
depthClearOps[depth->get().texture] = depthVal; depthClearOps[depth->get().texture] = depthVal;
if (format == DepthFmt::Depth24Stencil8) { if (format == DepthFmt::Depth24Stencil8) {
const u8 stencilVal = value >> 24; const u8 stencilVal = value >> 24;
stencilClearOps[depth->get().texture] = stencilVal; stencilClearOps[depth->get().texture] = stencilVal;
} }
return; return;
@ -365,7 +373,7 @@ void RendererMTL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize,
} }
void RendererMTL::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) { void RendererMTL::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) {
// Texture copy size is aligned to 16 byte units // Texture copy size is aligned to 16 byte units
const u32 copySize = totalBytes & ~0xf; const u32 copySize = totalBytes & ~0xf;
if (copySize == 0) { if (copySize == 0) {
Helpers::warn("TextureCopy total bytes less than 16!\n"); Helpers::warn("TextureCopy total bytes less than 16!\n");
@ -463,33 +471,33 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::span<const PICA::Ve
// Depth uniforms // Depth uniforms
struct { struct {
float depthScale; float depthScale;
float depthOffset; float depthOffset;
bool depthMapEnable; bool depthMapEnable;
} depthUniforms; } depthUniforms;
depthUniforms.depthScale = Floats::f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32(); depthUniforms.depthScale = Floats::f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32();
depthUniforms.depthOffset = Floats::f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32(); depthUniforms.depthOffset = Floats::f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32();
depthUniforms.depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1; depthUniforms.depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1;
// -------- Pipeline -------- // -------- Pipeline --------
Metal::DrawPipelineHash pipelineHash{colorRenderTarget->format, DepthFmt::Unknown1}; Metal::DrawPipelineHash pipelineHash{colorRenderTarget->format, DepthFmt::Unknown1};
if (depthStencilRenderTarget) { if (depthStencilRenderTarget) {
pipelineHash.depthFmt = depthStencilRenderTarget->format; pipelineHash.depthFmt = depthStencilRenderTarget->format;
} }
pipelineHash.fragHash.lightingEnabled = regs[0x008F] & 1; pipelineHash.fragHash.lightingEnabled = regs[0x008F] & 1;
pipelineHash.fragHash.lightingNumLights = regs[0x01C2] & 0x7; pipelineHash.fragHash.lightingNumLights = regs[0x01C2] & 0x7;
pipelineHash.fragHash.lightingConfig1 = regs[0x01C4u]; pipelineHash.fragHash.lightingConfig1 = regs[0x01C4u];
pipelineHash.fragHash.alphaControl = regs[0x104]; pipelineHash.fragHash.alphaControl = regs[0x104];
// Blending and logic op // Blending and logic op
pipelineHash.blendEnabled = (regs[PICA::InternalRegs::ColourOperation] & (1 << 8)) != 0; pipelineHash.blendEnabled = (regs[PICA::InternalRegs::ColourOperation] & (1 << 8)) != 0;
pipelineHash.colorWriteMask = colorMask; pipelineHash.colorWriteMask = colorMask;
u8 logicOp = 3; // Copy, which doesn't do anything u8 logicOp = 3; // Copy
if (pipelineHash.blendEnabled) { if (pipelineHash.blendEnabled) {
pipelineHash.blendControl = regs[PICA::InternalRegs::BlendFunc]; pipelineHash.blendControl = regs[PICA::InternalRegs::BlendFunc];
} else { } else {
logicOp = Helpers::getBits<0, 4>(regs[PICA::InternalRegs::LogicOp]); logicOp = Helpers::getBits<0, 4>(regs[PICA::InternalRegs::LogicOp]);
} }
MTL::RenderPipelineState* pipeline = drawPipelineCache.get(pipelineHash); MTL::RenderPipelineState* pipeline = drawPipelineCache.get(pipelineHash);
@ -500,25 +508,25 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::span<const PICA::Ve
// -------- Render -------- // -------- Render --------
MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init();
bool doesClear = clearColor(renderPassDescriptor, colorRenderTarget->texture); bool doesClear = clearColor(renderPassDescriptor, colorRenderTarget->texture);
if (depthStencilRenderTarget) { if (depthStencilRenderTarget) {
if (clearDepth(renderPassDescriptor, depthStencilRenderTarget->texture)) if (clearDepth(renderPassDescriptor, depthStencilRenderTarget->texture)) doesClear = true;
doesClear = true; if (depthStencilRenderTarget->format == DepthFmt::Depth24Stencil8) {
if (depthStencilRenderTarget->format == DepthFmt::Depth24Stencil8) { if (clearStencil(renderPassDescriptor, depthStencilRenderTarget->texture)) doesClear = true;
if (clearStencil(renderPassDescriptor, depthStencilRenderTarget->texture)) }
doesClear = true; }
}
}
nextRenderPassName = "Draw vertices"; nextRenderPassName = "Draw vertices";
beginRenderPassIfNeeded(renderPassDescriptor, doesClear, colorRenderTarget->texture, (depthStencilRenderTarget ? depthStencilRenderTarget->texture : nullptr)); beginRenderPassIfNeeded(
renderPassDescriptor, doesClear, colorRenderTarget->texture, (depthStencilRenderTarget ? depthStencilRenderTarget->texture : nullptr)
);
// Update the LUT texture if necessary // Update the LUT texture if necessary
if (gpu.lightingLUTDirty) { if (gpu.lightingLUTDirty) {
updateLightingLUT(renderCommandEncoder); updateLightingLUT(renderCommandEncoder);
} }
if (gpu.fogLUTDirty) { if (gpu.fogLUTDirty) {
updateFogLUT(renderCommandEncoder); updateFogLUT(renderCommandEncoder);
} }
renderCommandEncoder->setRenderPipelineState(pipeline); renderCommandEncoder->setRenderPipelineState(pipeline);
renderCommandEncoder->setDepthStencilState(depthStencilState); renderCommandEncoder->setDepthStencilState(depthStencilState);
@ -526,7 +534,7 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::span<const PICA::Ve
if (vertices.size_bytes() < 4 * 1024) { if (vertices.size_bytes() < 4 * 1024) {
renderCommandEncoder->setVertexBytes(vertices.data(), vertices.size_bytes(), VERTEX_BUFFER_BINDING_INDEX); renderCommandEncoder->setVertexBytes(vertices.data(), vertices.size_bytes(), VERTEX_BUFFER_BINDING_INDEX);
} else { } else {
Metal::BufferHandle buffer = vertexBufferCache.get(vertices.data(), vertices.size_bytes()); Metal::BufferHandle buffer = vertexBufferCache.get(vertices.data(), vertices.size_bytes());
renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, VERTEX_BUFFER_BINDING_INDEX); renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, VERTEX_BUFFER_BINDING_INDEX);
} }
@ -541,20 +549,20 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::span<const PICA::Ve
// Blend color // Blend color
if (pipelineHash.blendEnabled) { if (pipelineHash.blendEnabled) {
u32 constantColor = regs[PICA::InternalRegs::BlendColour]; u32 constantColor = regs[PICA::InternalRegs::BlendColour];
const u8 r = constantColor & 0xff; const u8 r = constantColor & 0xff;
const u8 g = Helpers::getBits<8, 8>(constantColor); const u8 g = Helpers::getBits<8, 8>(constantColor);
const u8 b = Helpers::getBits<16, 8>(constantColor); const u8 b = Helpers::getBits<16, 8>(constantColor);
const u8 a = Helpers::getBits<24, 8>(constantColor); const u8 a = Helpers::getBits<24, 8>(constantColor);
renderCommandEncoder->setBlendColor(r / 255.0f, g / 255.0f, b / 255.0f, a / 255.0f); renderCommandEncoder->setBlendColor(r / 255.0f, g / 255.0f, b / 255.0f, a / 255.0f);
} }
// Stencil reference // Stencil reference
if (stencilEnable) { if (stencilEnable) {
const s8 reference = s8(Helpers::getBits<16, 8>(depthStencilHash.stencilConfig)); // Signed reference value const s8 reference = s8(Helpers::getBits<16, 8>(depthStencilHash.stencilConfig)); // Signed reference value
renderCommandEncoder->setStencilReferenceValue(reference); renderCommandEncoder->setStencilReferenceValue(reference);
} }
// Bind resources // Bind resources
setupTextureEnvState(renderCommandEncoder); setupTextureEnvState(renderCommandEncoder);
@ -563,6 +571,8 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::span<const PICA::Ve
renderCommandEncoder->setFragmentBytes(&regs[0x48], (0x200 - 0x48) * sizeof(regs[0]), 0); renderCommandEncoder->setFragmentBytes(&regs[0x48], (0x200 - 0x48) * sizeof(regs[0]), 0);
renderCommandEncoder->setVertexBytes(&depthUniforms, sizeof(depthUniforms), 2); renderCommandEncoder->setVertexBytes(&depthUniforms, sizeof(depthUniforms), 2);
renderCommandEncoder->setFragmentBytes(&logicOp, sizeof(logicOp), 2); renderCommandEncoder->setFragmentBytes(&logicOp, sizeof(logicOp), 2);
u32 lutSlices[2] = {lutLightingTexture->getCurrentIndex(), lutFogTexture->getCurrentIndex()};
renderCommandEncoder->setFragmentBytes(&lutSlices, sizeof(lutSlices), 3);
renderCommandEncoder->drawPrimitives(toMTLPrimitiveType(primType), NS::UInteger(0), NS::UInteger(vertices.size())); renderCommandEncoder->drawPrimitives(toMTLPrimitiveType(primType), NS::UInteger(0), NS::UInteger(vertices.size()));
} }
@ -575,11 +585,14 @@ void RendererMTL::screenshot(const std::string& name) {
void RendererMTL::deinitGraphicsContext() { void RendererMTL::deinitGraphicsContext() {
reset(); reset();
delete lutLightingTexture;
delete lutFogTexture;
// Release // Release
copyToLutTexturePipeline->release(); copyToLutTexturePipeline->release();
displayPipeline->release(); displayPipeline->release();
defaultDepthStencilState->release(); defaultDepthStencilState->release();
lutTexture->release(); nullTexture->release();
linearSampler->release(); linearSampler->release();
nearestSampler->release(); nearestSampler->release();
library->release(); library->release();
@ -607,10 +620,10 @@ std::optional<Metal::ColorRenderTarget> RendererMTL::getColorRenderTarget(
auto& colorBuffer = colorRenderTargetCache.add(sampleBuffer); auto& colorBuffer = colorRenderTargetCache.add(sampleBuffer);
// Clear the color buffer // Clear the color buffer
colorClearOps[colorBuffer.texture] = {0, 0, 0, 0}; colorClearOps[colorBuffer.texture] = {0, 0, 0, 0};
return colorBuffer; return colorBuffer;
} }
Metal::DepthStencilRenderTarget& RendererMTL::getDepthRenderTarget() { Metal::DepthStencilRenderTarget& RendererMTL::getDepthRenderTarget() {
@ -622,13 +635,13 @@ Metal::DepthStencilRenderTarget& RendererMTL::getDepthRenderTarget() {
} else { } else {
auto& depthBuffer = depthStencilRenderTargetCache.add(sampleBuffer); auto& depthBuffer = depthStencilRenderTargetCache.add(sampleBuffer);
// Clear the depth buffer // Clear the depth buffer
depthClearOps[depthBuffer.texture] = 0.0f; depthClearOps[depthBuffer.texture] = 0.0f;
if (depthBuffer.format == DepthFmt::Depth24Stencil8) { if (depthBuffer.format == DepthFmt::Depth24Stencil8) {
stencilClearOps[depthBuffer.texture] = 0; stencilClearOps[depthBuffer.texture] = 0;
} }
return depthBuffer; return depthBuffer;
} }
} }
@ -683,7 +696,9 @@ void RendererMTL::bindTexturesToSlots(MTL::RenderCommandEncoder* encoder) {
for (int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
if ((regs[PICA::InternalRegs::TexUnitCfg] & (1 << i)) == 0) { if ((regs[PICA::InternalRegs::TexUnitCfg] & (1 << i)) == 0) {
continue; encoder->setFragmentTexture(nullTexture, i);
encoder->setFragmentSamplerState(nearestSampler, i);
continue;
} }
const size_t ioBase = ioBases[i]; const size_t ioBase = ioBases[i];
@ -701,42 +716,55 @@ void RendererMTL::bindTexturesToSlots(MTL::RenderCommandEncoder* encoder) {
encoder->setFragmentTexture(tex.texture, i); encoder->setFragmentTexture(tex.texture, i);
encoder->setFragmentSamplerState(tex.sampler ? tex.sampler : nearestSampler, i); encoder->setFragmentSamplerState(tex.sampler ? tex.sampler : nearestSampler, i);
} else { } else {
// TODO: bind a dummy texture? // TODO: log
} }
} }
// LUT texture
encoder->setFragmentTexture(lutTexture, 3);
encoder->setFragmentSamplerState(linearSampler, 3);
} }
void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) { void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) {
gpu.lightingLUTDirty = false; gpu.lightingLUTDirty = false;
std::array<float, GPU::LightingLutSize * 2> lightingLut = {0.0f};
for (int i = 0; i < gpu.lightingLUT.size(); i += 2) { std::array<u16, GPU::LightingLutSize> lightingLut;
uint64_t value = gpu.lightingLUT[i >> 1] & 0xFFF;
lightingLut[i] = (float)(value << 4) / 65535.0f; for (int i = 0; i < gpu.lightingLUT.size(); i++) {
uint64_t value = gpu.lightingLUT[i] & 0xFFF;
lightingLut[i] = (value << 4);
} }
//for (int i = 0; i < Lights::LUT_Count; i++) { u32 index = lutLightingTexture->getNextIndex();
// lutTexture->replaceRegion(MTL::Region(0, 0, LIGHT_LUT_TEXTURE_WIDTH, 1), 0, i, u16_lightinglut.data() + LIGHT_LUT_TEXTURE_WIDTH * i, 0, 0); lutLightingTexture->getTexture()->replaceRegion(MTL::Region(0, 0, LIGHTING_LUT_TEXTURE_WIDTH, Lights::LUT_Count), 0, index, lightingLut.data(), LIGHTING_LUT_TEXTURE_WIDTH * 2, 0);
//}
/*
endRenderPass();
Metal::BufferHandle buffer = vertexBufferCache.get(lightingLut.data(), sizeof(lightingLut));
auto blitCommandEncoder = commandBuffer->blitCommandEncoder();
blitCommandEncoder->copyFromBuffer(buffer.buffer, buffer.offset, LIGHT_LUT_TEXTURE_WIDTH * 2 * 4, 0, MTL::Size(LIGHT_LUT_TEXTURE_WIDTH,
Lights::LUT_Count, 1), lutLightingTexture, 0, 0, MTL::Origin(0, 0, 0));
blitCommandEncoder->endEncoding();
*/
/*
renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline); renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline);
renderCommandEncoder->setDepthStencilState(defaultDepthStencilState); renderCommandEncoder->setVertexTexture(lutLightingTexture, 0);
renderCommandEncoder->setVertexTexture(lutTexture, 0);
Metal::BufferHandle buffer = vertexBufferCache.get(lightingLut.data(), sizeof(lightingLut)); Metal::BufferHandle buffer = vertexBufferCache.get(lightingLut.data(), sizeof(lightingLut));
renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0); renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0);
u32 arrayOffset = 0; u32 arrayOffset = 0;
renderCommandEncoder->setVertexBytes(&arrayOffset, sizeof(u32), 1); renderCommandEncoder->setVertexBytes(&arrayOffset, sizeof(u32), 1);
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), GPU::LightingLutSize); renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypePoint, NS::UInteger(0), GPU::LightingLutSize);
MTL::Resource* barrierResources[] = {lutLightingTexture};
renderCommandEncoder->memoryBarrier(barrierResources, 1, MTL::RenderStageVertex, MTL::RenderStageFragment);
*/
} }
void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) { void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) {
gpu.fogLUTDirty = false; gpu.fogLUTDirty = false;
std::array<float, 128 * 2> fogLut = {0.0f};
std::array<float, FOG_LUT_TEXTURE_WIDTH * 2> fogLut = {0.0f};
for (int i = 0; i < fogLut.size(); i += 2) { for (int i = 0; i < fogLut.size(); i += 2) {
const uint32_t value = gpu.fogLUT[i >> 1]; const uint32_t value = gpu.fogLUT[i >> 1];
@ -749,20 +777,31 @@ void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) {
fogLut[i + 1] = fogDifference; fogLut[i + 1] = fogDifference;
} }
u32 index = lutFogTexture->getNextIndex();
lutFogTexture->getTexture()->replaceRegion(MTL::Region(0, 0, FOG_LUT_TEXTURE_WIDTH, 1), 0, index, fogLut.data(), 0, 0);
/*
renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline); renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline);
renderCommandEncoder->setDepthStencilState(defaultDepthStencilState); renderCommandEncoder->setDepthStencilState(defaultDepthStencilState);
renderCommandEncoder->setVertexTexture(lutTexture, 0); renderCommandEncoder->setVertexTexture(lutLightingTexture, 0);
//Metal::BufferHandle buffer = vertexBufferCache.get(fogLut.data(), sizeof(fogLut)); // Metal::BufferHandle buffer = vertexBufferCache.get(fogLut.data(), sizeof(fogLut));
//renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0); // renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0);
renderCommandEncoder->setVertexBytes(fogLut.data(), sizeof(fogLut), 0); renderCommandEncoder->setVertexBytes(fogLut.data(), sizeof(fogLut), 0);
u32 arrayOffset = (u32)Lights::LUT_Count; u32 arrayOffset = (u32)Lights::LUT_Count;
renderCommandEncoder->setVertexBytes(&arrayOffset, sizeof(u32), 1); renderCommandEncoder->setVertexBytes(&arrayOffset, sizeof(u32), 1);
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(128)); renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypePoint, NS::UInteger(0), NS::UInteger(128));
MTL::Resource* barrierResources[] = {lutLightingTexture};
renderCommandEncoder->memoryBarrier(barrierResources, 1, MTL::RenderStageVertex, MTL::RenderStageFragment);
*/
} }
void RendererMTL::textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect<u32>& srcRect, const Math::Rect<u32>& destRect) { void RendererMTL::textureCopyImpl(
nextRenderPassName = "Texture copy"; Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect<u32>& srcRect,
const Math::Rect<u32>& destRect
) {
nextRenderPassName = "Texture copy";
MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init();
// TODO: clearColor sets the load action to load if it didn't find any clear, but that is unnecessary if we are doing a copy to the whole texture // TODO: clearColor sets the load action to load if it didn't find any clear, but that is unnecessary if we are doing a copy to the whole texture
bool doesClear = clearColor(renderPassDescriptor, destFramebuffer.texture); bool doesClear = clearColor(renderPassDescriptor, destFramebuffer.texture);
@ -775,8 +814,13 @@ void RendererMTL::textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Meta
renderCommandEncoder->setRenderPipelineState(blitPipeline); renderCommandEncoder->setRenderPipelineState(blitPipeline);
// Viewport // Viewport
renderCommandEncoder->setViewport(MTL::Viewport{double(destRect.left), double(destRect.bottom), double(destRect.right - destRect.left), double(destRect.top - destRect.bottom), 0.0, 1.0}); renderCommandEncoder->setViewport(MTL::Viewport{
float srcRectNDC[4] = {srcRect.left / (float)srcFramebuffer.size.u(), srcRect.bottom / (float)srcFramebuffer.size.v(), (srcRect.right - srcRect.left) / (float)srcFramebuffer.size.u(), (srcRect.top - srcRect.bottom) / (float)srcFramebuffer.size.v()}; double(destRect.left), double(destRect.bottom), double(destRect.right - destRect.left), double(destRect.top - destRect.bottom), 0.0, 1.0
});
float srcRectNDC[4] = {
srcRect.left / (float)srcFramebuffer.size.u(), srcRect.bottom / (float)srcFramebuffer.size.v(),
(srcRect.right - srcRect.left) / (float)srcFramebuffer.size.u(), (srcRect.top - srcRect.bottom) / (float)srcFramebuffer.size.v()
};
// Bind resources // Bind resources
renderCommandEncoder->setVertexBytes(&srcRectNDC, sizeof(srcRectNDC), 0); renderCommandEncoder->setVertexBytes(&srcRectNDC, sizeof(srcRectNDC), 0);
@ -785,3 +829,26 @@ void RendererMTL::textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Meta
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4));
} }
void RendererMTL::beginRenderPassIfNeeded(MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture) {
createCommandBufferIfNeeded();
if (doesClears || !renderCommandEncoder || colorTexture != lastColorTexture || (depthTexture != lastDepthTexture && !(lastDepthTexture && !depthTexture))) {
endRenderPass();
renderCommandEncoder = commandBuffer->renderCommandEncoder(renderPassDescriptor);
renderCommandEncoder->setLabel(toNSString(nextRenderPassName));
// Bind persistent resources
// LUT texture
renderCommandEncoder->setFragmentTexture(lutLightingTexture->getTexture(), 3);
renderCommandEncoder->setFragmentTexture(lutFogTexture->getTexture(), 4);
renderCommandEncoder->setFragmentSamplerState(linearSampler, 3);
lastColorTexture = colorTexture;
lastDepthTexture = depthTexture;
}
renderPassDescriptor->release();
}

View file

@ -4,6 +4,6 @@ using namespace metal;
constant ushort lutTextureWidth [[function_constant(0)]]; constant ushort lutTextureWidth [[function_constant(0)]];
// The copy is done in a vertex shader instead of a compute kernel, since dispatching compute would require ending the render pass // The copy is done in a vertex shader instead of a compute kernel, since dispatching compute would require ending the render pass
vertex void vertexCopyToLutTexture(uint vid [[vertex_id]], texture2d<float, access::write> out [[texture(0)]], constant float2* data [[buffer(0)]], constant uint& arrayOffset [[buffer(1)]]) { vertex void vertexCopyToLutTexture(uint vid [[vertex_id]], texture2d<float, access::write> out [[texture(0)]], device float2* data [[buffer(0)]], constant uint& arrayOffset [[buffer(1)]]) {
out.write(float4(data[vid], 0.0, 0.0), uint2(vid % lutTextureWidth, arrayOffset + vid / lutTextureWidth)); out.write(float4(data[vid], 0.0, 0.0), uint2(vid % lutTextureWidth, arrayOffset + vid / lutTextureWidth));
} }

View file

@ -406,13 +406,11 @@ uint4 performLogicOpU(LogicOp logicOp, uint4 s, uint4 d) {
#define RG_LUT 5u #define RG_LUT 5u
#define RR_LUT 6u #define RR_LUT 6u
#define FOG_INDEX 24 float lutLookup(texture2d_array<float> texLut, uint slice, uint lut, uint index) {
return texLut.read(uint2(index, lut), slice).r;
float lutLookup(texture2d<float> texLut, uint lut, uint index) {
return texLut.read(uint2(index, lut)).r;
} }
float lightLutLookup(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture2d<float> texLut, uint environment_id, uint lut_id, uint light_id, float3 light_vector, float3 half_vector) { float lightLutLookup(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture2d_array<float> texLut, uint slice, uint environment_id, uint lut_id, uint light_id, float3 light_vector, float3 half_vector) {
uint lut_index; uint lut_index;
int bit_in_config1; int bit_in_config1;
if (lut_id == SP_LUT) { if (lut_id == SP_LUT) {
@ -498,12 +496,12 @@ float lightLutLookup(thread Globals& globals, thread DrawVertexOut& in, constant
delta = abs(delta); delta = abs(delta);
} }
int index = int(clamp(floor(delta * 255.0), 0.f, 255.f)); int index = int(clamp(floor(delta * 255.0), 0.f, 255.f));
return lutLookup(texLut, lut_index, index) * scale; return lutLookup(texLut, slice, lut_index, index) * scale;
} else { } else {
// Range is [-1, 1] so we need to map it to [0, 1] // Range is [-1, 1] so we need to map it to [0, 1]
int index = int(clamp(floor(delta * 128.0), -128.f, 127.f)); int index = int(clamp(floor(delta * 128.0), -128.f, 127.f));
if (index < 0) index += 256; if (index < 0) index += 256;
return lutLookup(texLut, lut_index, index) * scale; return lutLookup(texLut, slice, lut_index, index) * scale;
} }
} }
@ -515,7 +513,7 @@ float3 regToColor(uint reg) {
} }
// Implements the following algorthm: https://mathb.in/26766 // Implements the following algorthm: https://mathb.in/26766
void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture2d<float> texLut, sampler linearSampler, thread float4& primaryColor, thread float4& secondaryColor) { void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture2d_array<float> texLut, uint slice, sampler linearSampler, thread float4& primaryColor, thread float4& secondaryColor) {
// Quaternions describe a transformation from surface-local space to eye space. // Quaternions describe a transformation from surface-local space to eye space.
// In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1), // In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1),
// the tangent vector is (1,0,0), and the bitangent vector is (0,1,0). // the tangent vector is (1,0,0), and the bitangent vector is (0,1,0).
@ -566,10 +564,10 @@ void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant Pi
globals.GPUREG_LIGHTi_CONFIG = picaRegs.read(0x0149u + (lightId << 4u)); globals.GPUREG_LIGHTi_CONFIG = picaRegs.read(0x0149u + (lightId << 4u));
float lightDistance; float lightDistance;
float3 lightPosition = normalize(float3( float3 lightPosition = float3(
decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u), decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u),
decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u) decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u)
)); );
// Positional Light // Positional Light
if (extract_bits(globals.GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) { if (extract_bits(globals.GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) {
@ -613,23 +611,23 @@ void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant Pi
float delta = lightDistance * distanceAttenuationScale + distanceAttenuationBias; float delta = lightDistance * distanceAttenuationScale + distanceAttenuationBias;
delta = clamp(delta, 0.0, 1.0); delta = clamp(delta, 0.0, 1.0);
int index = int(clamp(floor(delta * 255.0), 0.0, 255.0)); int index = int(clamp(floor(delta * 255.0), 0.0, 255.0));
distanceAttenuation = lutLookup(texLut, 16u + lightId, index); distanceAttenuation = lutLookup(texLut, slice, 16u + lightId, index);
} }
float spotlightAttenuation = lightLutLookup(globals, in, picaRegs, texLut, environmentId, SP_LUT, lightId, lightVector, halfVector); float spotlightAttenuation = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, SP_LUT, lightId, lightVector, halfVector);
float specular0Distribution = lightLutLookup(globals, in, picaRegs, texLut, environmentId, D0_LUT, lightId, lightVector, halfVector); float specular0Distribution = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, D0_LUT, lightId, lightVector, halfVector);
float specular1Distribution = lightLutLookup(globals, in, picaRegs, texLut, environmentId, D1_LUT, lightId, lightVector, halfVector); float specular1Distribution = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, D1_LUT, lightId, lightVector, halfVector);
float3 reflectedColor; float3 reflectedColor;
reflectedColor.r = lightLutLookup(globals, in, picaRegs, texLut, environmentId, RR_LUT, lightId, lightVector, halfVector); reflectedColor.r = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, RR_LUT, lightId, lightVector, halfVector);
if (isSamplerEnabled(environmentId, RG_LUT)) { if (isSamplerEnabled(environmentId, RG_LUT)) {
reflectedColor.g = lightLutLookup(globals, in, picaRegs, texLut, environmentId, RG_LUT, lightId, lightVector, halfVector); reflectedColor.g = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, RG_LUT, lightId, lightVector, halfVector);
} else { } else {
reflectedColor.g = reflectedColor.r; reflectedColor.g = reflectedColor.r;
} }
if (isSamplerEnabled(environmentId, RB_LUT)) { if (isSamplerEnabled(environmentId, RB_LUT)) {
reflectedColor.b = lightLutLookup(globals, in, picaRegs, texLut, environmentId, RB_LUT, lightId, lightVector, halfVector); reflectedColor.b = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, RB_LUT, lightId, lightVector, halfVector);
} else { } else {
reflectedColor.b = reflectedColor.r; reflectedColor.b = reflectedColor.r;
} }
@ -655,7 +653,7 @@ void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant Pi
float fresnelFactor; float fresnelFactor;
if (fresnelOutput1 == 1u || fresnelOutput2 == 1u) { if (fresnelOutput1 == 1u || fresnelOutput2 == 1u) {
fresnelFactor = lightLutLookup(globals, in, picaRegs, texLut, environmentId, FR_LUT, lightId, lightVector, halfVector); fresnelFactor = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, FR_LUT, lightId, lightVector, halfVector);
} }
if (fresnelOutput1 == 1u) { if (fresnelOutput1 == 1u) {
@ -676,9 +674,7 @@ float4 performLogicOp(LogicOp logicOp, float4 s, float4 d) {
return as_type<float4>(performLogicOpU(logicOp, as_type<uint4>(s), as_type<uint4>(d))); return as_type<float4>(performLogicOpU(logicOp, as_type<uint4>(s), as_type<uint4>(d)));
} }
fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[color(0)]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[color(0)]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], constant uint2& lutSlices [[buffer(3)]], texture2d<float> tex0 [[texture(0)]], texture2d<float> tex1 [[texture(1)]], texture2d<float> tex2 [[texture(2)]], texture2d_array<float> texLightingLut [[texture(3)]], texture1d_array<float> texFogLut [[texture(4)]], sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) {
texture2d<float> tex0 [[texture(0)]], texture2d<float> tex1 [[texture(1)]], texture2d<float> tex2 [[texture(2)]], texture2d<float> texLut [[texture(3)]],
sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) {
Globals globals; Globals globals;
// HACK // HACK
@ -689,7 +685,7 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c
globals.tevSources[0] = in.color; globals.tevSources[0] = in.color;
if (lightingEnabled) { if (lightingEnabled) {
calcLighting(globals, in, picaRegs, texLut, linearSampler, globals.tevSources[1], globals.tevSources[2]); calcLighting(globals, in, picaRegs, texLightingLut, lutSlices.x, linearSampler, globals.tevSources[1], globals.tevSources[2]);
} else { } else {
globals.tevSources[1] = float4(0.0); globals.tevSources[1] = float4(0.0);
globals.tevSources[2] = float4(0.0); globals.tevSources[2] = float4(0.0);
@ -729,13 +725,13 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c
bool enable_fog = (textureEnvUpdateBuffer & 7u) == 5u; bool enable_fog = (textureEnvUpdateBuffer & 7u) == 5u;
if (enable_fog) { if (enable_fog) {
bool flip_depth = (textureEnvUpdateBuffer & (1u << 16)) != 0u; bool flipDepth = (textureEnvUpdateBuffer & (1u << 16)) != 0u;
float fog_index = flip_depth ? 1.0 - in.position.z : in.position.z; float fogIndex = flipDepth ? 1.0 - in.position.z : in.position.z;
fog_index *= 128.0; fogIndex *= 128.0;
float clamped_index = clamp(floor(fog_index), 0.0, 127.0); float clampedIndex = clamp(floor(fogIndex), 0.0, 127.0);
float delta = fog_index - clamped_index; float delta = fogIndex - clampedIndex;
float2 value = texLut.read(uint2(clamped_index, FOG_INDEX)).rg; float2 value = texFogLut.read(clampedIndex, lutSlices.y).rg;
float fog_factor = clamp(value.r + value.g * delta, 0.0, 1.0); float fogFactor = clamp(value.r + value.g * delta, 0.0, 1.0);
uint GPUREG_FOG_COLOR = picaRegs.read(0x00E1u); uint GPUREG_FOG_COLOR = picaRegs.read(0x00E1u);
@ -743,9 +739,9 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c
float r = (GPUREG_FOG_COLOR & 0xFFu) / 255.0; float r = (GPUREG_FOG_COLOR & 0xFFu) / 255.0;
float g = ((GPUREG_FOG_COLOR >> 8) & 0xFFu) / 255.0; float g = ((GPUREG_FOG_COLOR >> 8) & 0xFFu) / 255.0;
float b = ((GPUREG_FOG_COLOR >> 16) & 0xFFu) / 255.0; float b = ((GPUREG_FOG_COLOR >> 16) & 0xFFu) / 255.0;
float3 fog_color = float3(r, g, b); float3 fogColor = float3(r, g, b);
color.rgb = mix(fog_color, color.rgb, fog_factor); color.rgb = mix(fogColor, color.rgb, fogFactor);
} }
// Perform alpha test // Perform alpha test