Merge pull request #2 from SamoZ256/metal-lut-tex-fix

Metal LUT texture fix
This commit is contained in:
SamoZ256 2024-07-06 10:02:50 +02:00 committed by GitHub
commit 2a9da80ff5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 97 additions and 35 deletions

View file

@ -419,26 +419,36 @@ if(ENABLE_METAL AND APPLE)
src/core/renderer_mtl/mtl_etc1.cpp
src/core/renderer_mtl/objc_helper.mm
src/host_shaders/metal_shaders.metal
src/host_shaders/metal_copy_to_lut_texture.metal
)
set(HEADER_FILES ${HEADER_FILES} ${RENDERER_MTL_INCLUDE_FILES})
source_group("Source Files\\Core\\Metal Renderer" FILES ${RENDERER_MTL_SOURCE_FILES})
add_custom_command(
OUTPUT ${CMAKE_SOURCE_DIR}/src/host_shaders/metal_shaders.ir
COMMAND xcrun -sdk macosx metal -o ${CMAKE_SOURCE_DIR}/src/host_shaders/metal_shaders.ir -c ${CMAKE_SOURCE_DIR}/src/host_shaders/metal_shaders.metal
DEPENDS ${CMAKE_SOURCE_DIR}/src/host_shaders/metal_shaders.metal
VERBATIM)
set(RENDERER_MTL_HOST_SHADERS_SOURCES)
function (add_metal_shader SHADER)
set(SHADER_SOURCE "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.metal")
set(SHADER_IR "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.ir")
set(SHADER_METALLIB "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.metallib")
add_custom_command(
OUTPUT ${SHADER_IR}
COMMAND xcrun -sdk macosx metal -o ${SHADER_IR} -c ${SHADER_SOURCE}
DEPENDS ${SHADER_SOURCE}
VERBATIM)
add_custom_command(
OUTPUT ${SHADER_METALLIB}
COMMAND xcrun -sdk macosx metallib -o ${SHADER_METALLIB} ${SHADER_IR}
DEPENDS ${SHADER_IR}
VERBATIM)
set(RENDERER_MTL_HOST_SHADERS_SOURCES ${RENDERER_MTL_HOST_SHADERS_SOURCES} ${SHADER_METALLIB})
endfunction()
add_custom_command(
OUTPUT ${CMAKE_SOURCE_DIR}/src/host_shaders/metal_shaders.metallib
COMMAND xcrun -sdk macosx metallib -o ${CMAKE_SOURCE_DIR}/src/host_shaders/metal_shaders.metallib ${CMAKE_SOURCE_DIR}/src/host_shaders/metal_shaders.ir
DEPENDS ${CMAKE_SOURCE_DIR}/src/host_shaders/metal_shaders.ir
VERBATIM)
add_metal_shader(metal_shaders)
add_metal_shader(metal_copy_to_lut_texture)
add_custom_target(
compile_msl_shader
DEPENDS src/host_shaders/metal_shaders.metallib
compile_msl_shaders
DEPENDS ${RENDERER_MTL_HOST_SHADERS_SOURCES}
)
cmrc_add_resource_library(
@ -446,8 +456,9 @@ if(ENABLE_METAL AND APPLE)
NAMESPACE RendererMTL
WHENCE "src/host_shaders/"
"src/host_shaders/metal_shaders.metallib"
"src/host_shaders/metal_copy_to_lut_texture.metallib"
)
add_dependencies(resources_renderer_mtl compile_msl_shader)
add_dependencies(resources_renderer_mtl compile_msl_shaders)
target_sources(AlberCore PRIVATE ${RENDERER_MTL_SOURCE_FILES})
target_compile_definitions(AlberCore PUBLIC "PANDA3DS_ENABLE_METAL=1")

View file

@ -51,9 +51,11 @@ class RendererMTL final : public Renderer {
MTL::SamplerState* nearestSampler;
MTL::SamplerState* linearSampler;
MTL::Texture* lightLUTTextureArray;
MTL::DepthStencilState* defaultDepthStencilState;
// Pipelines
MTL::RenderPipelineState* displayPipeline;
MTL::RenderPipelineState* copyToLutTexturePipeline;
// Active state
MTL::CommandBuffer* commandBuffer = nullptr;
@ -103,5 +105,5 @@ class RendererMTL final : public Renderer {
Metal::Texture& getTexture(Metal::Texture& tex);
void setupTextureEnvState(MTL::RenderCommandEncoder* encoder);
void bindTexturesToSlots(MTL::RenderCommandEncoder* encoder);
void updateLightingLUT();
void updateLightingLUT(MTL::RenderCommandEncoder* encoder);
};

View file

@ -11,7 +11,7 @@ using namespace PICA;
CMRC_DECLARE(RendererMTL);
#define LIGHT_LUT_TEXTURE_WIDTH 256
const u16 LIGHT_LUT_TEXTURE_WIDTH = 256;
// HACK: redefinition...
PICA::ColorFmt ToColorFormat(u32 format) {
@ -22,6 +22,18 @@ PICA::ColorFmt ToColorFormat(u32 format) {
}
}
MTL::Library* loadLibrary(MTL::Device* device, const cmrc::file& shaderSource) {
//MTL::CompileOptions* compileOptions = MTL::CompileOptions::alloc()->init();
NS::Error* error = nullptr;
MTL::Library* library = device->newLibrary(Metal::createDispatchData(shaderSource.begin(), shaderSource.size()), &error);
//MTL::Library* library = device->newLibrary(NS::String::string(source.c_str(), NS::ASCIIStringEncoding), compileOptions, &error);
if (error) {
Helpers::panic("Error loading shaders: %s", error->description()->cString(NS::ASCIIStringEncoding));
}
return library;
}
RendererMTL::RendererMTL(GPU& gpu, const std::array<u32, regNum>& internalRegs, const std::array<u32, extRegNum>& externalRegs)
: Renderer(gpu, internalRegs, externalRegs) {}
RendererMTL::~RendererMTL() {}
@ -105,8 +117,8 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) {
textureDescriptor->setPixelFormat(MTL::PixelFormatR16Uint);
textureDescriptor->setWidth(LIGHT_LUT_TEXTURE_WIDTH);
textureDescriptor->setArrayLength(Lights::LUT_Count);
textureDescriptor->setUsage(MTL::TextureUsageShaderRead);
textureDescriptor->setStorageMode(MTL::StorageModeShared);
textureDescriptor->setUsage(MTL::TextureUsageShaderRead | MTL::TextureUsageShaderWrite);
textureDescriptor->setStorageMode(MTL::StorageModePrivate);
lightLUTTextureArray = device->newTexture(textureDescriptor);
textureDescriptor->release();
@ -125,14 +137,8 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) {
// Load shaders
auto mtlResources = cmrc::RendererMTL::get_filesystem();
auto shaderSource = mtlResources.open("metal_shaders.metallib");
//MTL::CompileOptions* compileOptions = MTL::CompileOptions::alloc()->init();
NS::Error* error = nullptr;
MTL::Library* library = device->newLibrary(Metal::createDispatchData(shaderSource.begin(), shaderSource.size()), &error);
//MTL::Library* library = device->newLibrary(NS::String::string(source.c_str(), NS::ASCIIStringEncoding), compileOptions, &error);
if (error) {
Helpers::panic("Error loading shaders: %s", error->description()->cString(NS::ASCIIStringEncoding));
}
MTL::Library* library = loadLibrary(device, mtlResources.open("metal_shaders.metallib"));
MTL::Library* copyToLutTextureLibrary = loadLibrary(device, mtlResources.open("metal_copy_to_lut_texture.metallib"));
// Display
MTL::Function* vertexDisplayFunction = library->newFunction(NS::String::string("vertexDisplay", NS::ASCIIStringEncoding));
@ -144,7 +150,7 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) {
auto* displayColorAttachment = displayPipelineDescriptor->colorAttachments()->object(0);
displayColorAttachment->setPixelFormat(MTL::PixelFormat::PixelFormatBGRA8Unorm);
error = nullptr;
NS::Error* error = nullptr;
displayPipeline = device->newRenderPipelineState(displayPipelineDescriptor, &error);
if (error) {
Helpers::panic("Error creating display pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding));
@ -217,11 +223,37 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) {
drawPipelineCache.set(device, library, vertexDrawFunction, vertexDescriptor);
// Copy to LUT texture
MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init();
constants->setConstantValue(&LIGHT_LUT_TEXTURE_WIDTH, MTL::DataTypeUShort, NS::UInteger(0));
error = nullptr;
MTL::Function* vertexCopyToLutTextureFunction = copyToLutTextureLibrary->newFunction(NS::String::string("vertexCopyToLutTexture", NS::ASCIIStringEncoding), constants, &error);
if (error) {
Helpers::panic("Error creating copy_to_lut_texture vertex function: %s", error->description()->cString(NS::ASCIIStringEncoding));
}
constants->release();
MTL::RenderPipelineDescriptor* copyToLutTexturePipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init();
copyToLutTexturePipelineDescriptor->setVertexFunction(vertexCopyToLutTextureFunction);
// Disable rasterization
copyToLutTexturePipelineDescriptor->setRasterizationEnabled(false);
error = nullptr;
copyToLutTexturePipeline = device->newRenderPipelineState(copyToLutTexturePipelineDescriptor, &error);
if (error) {
Helpers::panic("Error creating copy_to_lut_texture pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding));
}
// Depth stencil cache
depthStencilCache.set(device);
// Vertex buffer cache
vertexBufferCache.set(device);
// -------- Depth stencil state --------
MTL::DepthStencilDescriptor* depthStencilDescriptor = MTL::DepthStencilDescriptor::alloc()->init();
defaultDepthStencilState = device->newDepthStencilState(depthStencilDescriptor);
}
void RendererMTL::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) {
@ -430,6 +462,12 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::span<const PICA::Ve
}
beginRenderPassIfNeeded(renderPassDescriptor, colorRenderTarget->texture, (depthStencilRenderTarget ? depthStencilRenderTarget->texture : nullptr));
// Update the LUT texture if necessary
if (gpu.lightingLUTDirty) {
updateLightingLUT(renderCommandEncoder);
}
renderCommandEncoder->setRenderPipelineState(pipeline);
renderCommandEncoder->setDepthStencilState(depthStencilState);
// If size is < 4KB, use inline vertex data, otherwise use a buffer
@ -440,11 +478,6 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::span<const PICA::Ve
renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, VERTEX_BUFFER_BINDING_INDEX);
}
// Update the LUT texture if necessary
if (gpu.lightingLUTDirty) {
updateLightingLUT();
}
// Bind resources
setupTextureEnvState(renderCommandEncoder);
bindTexturesToSlots(renderCommandEncoder);
@ -580,7 +613,7 @@ void RendererMTL::bindTexturesToSlots(MTL::RenderCommandEncoder* encoder) {
encoder->setFragmentSamplerState(linearSampler, 3);
}
void RendererMTL::updateLightingLUT() {
void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) {
gpu.lightingLUTDirty = false;
std::array<u16, GPU::LightingLutSize> u16_lightinglut;
@ -589,7 +622,14 @@ void RendererMTL::updateLightingLUT() {
u16_lightinglut[i] = value * 65535 / 4095;
}
for (int i = 0; i < Lights::LUT_Count; i++) {
lightLUTTextureArray->replaceRegion(MTL::Region(0, 0, LIGHT_LUT_TEXTURE_WIDTH, 1), 0, i, u16_lightinglut.data() + LIGHT_LUT_TEXTURE_WIDTH * i, 0, 0);
}
//for (int i = 0; i < Lights::LUT_Count; i++) {
// lightLUTTextureArray->replaceRegion(MTL::Region(0, 0, LIGHT_LUT_TEXTURE_WIDTH, 1), 0, i, u16_lightinglut.data() + LIGHT_LUT_TEXTURE_WIDTH * i, 0, 0);
//}
renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline);
renderCommandEncoder->setDepthStencilState(defaultDepthStencilState);
renderCommandEncoder->setVertexTexture(lightLUTTextureArray, 0);
renderCommandEncoder->setVertexBytes(u16_lightinglut.data(), sizeof(u16_lightinglut), 0);
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), GPU::LightingLutSize);
}

View file

@ -0,0 +1,9 @@
#include <metal_stdlib>
using namespace metal;
constant ushort lutTextureWidth [[function_constant(0)]];
// The copy is done in a vertex shader instead of a compute kernel, since dispatching compute would require ending the render pass
vertex void vertexCopyToLutTexture(uint vid [[vertex_id]], texture1d_array<ushort, access::write> out [[texture(0)]], constant ushort* data [[buffer(0)]]) {
out.write(data[vid], vid % lutTextureWidth, vid / lutTextureWidth);
}