fix: lighting

This commit is contained in:
Samuliak 2024-07-23 13:16:33 +02:00
parent 804a0b083f
commit 8a1d9d5a22
5 changed files with 88 additions and 25 deletions

View file

@ -36,10 +36,10 @@ public:
additionalAllocations.clear();
}
BufferHandle get(const std::span<const PICA::Vertex>& vertices) {
BufferHandle get(const void* data, size_t size) {
// If the vertex buffer is too large, just create a new one
if (ptr + vertices.size_bytes() > CACHE_BUFFER_SIZE) {
MTL::Buffer* newBuffer = device->newBuffer(vertices.data(), vertices.size_bytes(), MTL::ResourceStorageModeShared);
if (ptr + size > CACHE_BUFFER_SIZE) {
MTL::Buffer* newBuffer = device->newBuffer(data, size, MTL::ResourceStorageModeShared);
newBuffer->setLabel(toNSString("Additional vertex buffer"));
additionalAllocations.push_back(newBuffer);
Helpers::warn("Vertex buffer doesn't have enough space, creating a new buffer");
@ -48,10 +48,10 @@ public:
}
// Copy the data into the buffer
memcpy((char*)buffer->contents() + ptr, vertices.data(), vertices.size_bytes());
memcpy((char*)buffer->contents() + ptr, data, size);
size_t oldPtr = ptr;
ptr += vertices.size_bytes();
ptr += size;
return BufferHandle{buffer, oldPtr};
}

View file

@ -184,5 +184,6 @@ class RendererMTL final : public Renderer {
void setupTextureEnvState(MTL::RenderCommandEncoder* encoder);
void bindTexturesToSlots(MTL::RenderCommandEncoder* encoder);
void updateLightingLUT(MTL::RenderCommandEncoder* encoder);
void updateFogLUT(MTL::RenderCommandEncoder* encoder);
void textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect<u32>& srcRect, const Math::Rect<u32>& destRect);
};

View file

@ -131,9 +131,9 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) {
// Textures
MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init();
textureDescriptor->setTextureType(MTL::TextureType1DArray);
textureDescriptor->setPixelFormat(MTL::PixelFormatR16Uint);
textureDescriptor->setPixelFormat(MTL::PixelFormatRG32Float);
textureDescriptor->setWidth(LIGHT_LUT_TEXTURE_WIDTH);
textureDescriptor->setArrayLength(Lights::LUT_Count);
textureDescriptor->setArrayLength(Lights::LUT_Count + 1);
textureDescriptor->setUsage(MTL::TextureUsageShaderRead | MTL::TextureUsageShaderWrite);
textureDescriptor->setStorageMode(MTL::StorageModePrivate);
@ -516,6 +516,9 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::span<const PICA::Ve
if (gpu.lightingLUTDirty) {
updateLightingLUT(renderCommandEncoder);
}
if (gpu.fogLUTDirty) {
updateFogLUT(renderCommandEncoder);
}
renderCommandEncoder->setRenderPipelineState(pipeline);
renderCommandEncoder->setDepthStencilState(depthStencilState);
@ -523,7 +526,7 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::span<const PICA::Ve
if (vertices.size_bytes() < 4 * 1024) {
renderCommandEncoder->setVertexBytes(vertices.data(), vertices.size_bytes(), VERTEX_BUFFER_BINDING_INDEX);
} else {
Metal::BufferHandle buffer = vertexBufferCache.get(vertices);
Metal::BufferHandle buffer = vertexBufferCache.get(vertices.data(), vertices.size_bytes());
renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, VERTEX_BUFFER_BINDING_INDEX);
}
@ -560,6 +563,7 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::span<const PICA::Ve
renderCommandEncoder->setFragmentBytes(&regs[0x48], (0x200 - 0x48) * sizeof(regs[0]), 0);
renderCommandEncoder->setVertexBytes(&depthUniforms, sizeof(depthUniforms), 2);
renderCommandEncoder->setFragmentBytes(&logicOp, sizeof(logicOp), 2);
renderCommandEncoder->setFragmentBytes(&depthUniforms, sizeof(depthUniforms), 3);
renderCommandEncoder->drawPrimitives(toMTLPrimitiveType(primType), NS::UInteger(0), NS::UInteger(vertices.size()));
}
@ -696,11 +700,11 @@ void RendererMTL::bindTexturesToSlots(MTL::RenderCommandEncoder* encoder) {
void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) {
gpu.lightingLUTDirty = false;
std::array<u16, GPU::LightingLutSize> u16_lightinglut;
std::array<float, GPU::LightingLutSize * 2> lightingLut;
for (int i = 0; i < gpu.lightingLUT.size(); i++) {
uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1);
u16_lightinglut[i] = value * 65535 / 4095;
for (int i = 0; i < gpu.lightingLUT.size(); i += 2) {
uint64_t value = gpu.lightingLUT[i >> 1] & 0xFFF;
lightingLut[i] = (float)(value << 4) / 65535.0f;
}
//for (int i = 0; i < Lights::LUT_Count; i++) {
@ -710,11 +714,39 @@ void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) {
renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline);
renderCommandEncoder->setDepthStencilState(defaultDepthStencilState);
renderCommandEncoder->setVertexTexture(lightLUTTextureArray, 0);
renderCommandEncoder->setVertexBytes(u16_lightinglut.data(), sizeof(u16_lightinglut), 0);
Metal::BufferHandle buffer = vertexBufferCache.get(lightingLut.data(), sizeof(lightingLut));
renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0);
u32 arrayOffset = 0;
renderCommandEncoder->setVertexBytes(&arrayOffset, sizeof(u32), 1);
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), GPU::LightingLutSize);
}
void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) {
gpu.fogLUTDirty = false;
std::array<float, 128 * 2> fogLut;
for (int i = 0; i < fogLut.size(); i += 2) {
const uint32_t value = gpu.fogLUT[i >> 1];
int32_t diff = value & 0x1fff;
diff = (diff << 19) >> 19; // Sign extend the 13-bit value to 32 bits
const float fogDifference = float(diff) / 2048.0f;
const float fogValue = float((value >> 13) & 0x7ff) / 2048.0f;
fogLut[i] = fogValue;
fogLut[i + 1] = fogDifference;
}
renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline);
renderCommandEncoder->setDepthStencilState(defaultDepthStencilState);
renderCommandEncoder->setVertexTexture(lightLUTTextureArray, 0);
renderCommandEncoder->setVertexBytes(fogLut.data(), sizeof(fogLut), 0);
u32 arrayOffset = (u32)Lights::LUT_Count;
renderCommandEncoder->setVertexBytes(&arrayOffset, sizeof(u32), 1);
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), NS::UInteger(128));
}
void RendererMTL::textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect<u32>& srcRect, const Math::Rect<u32>& destRect) {
nextRenderPassName = "Texture copy";
MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init();

View file

@ -4,6 +4,6 @@ using namespace metal;
constant ushort lutTextureWidth [[function_constant(0)]];
// The copy is done in a vertex shader instead of a compute kernel, since dispatching compute would require ending the render pass
vertex void vertexCopyToLutTexture(uint vid [[vertex_id]], texture1d_array<ushort, access::write> out [[texture(0)]], constant ushort* data [[buffer(0)]]) {
out.write(data[vid], vid % lutTextureWidth, vid / lutTextureWidth);
vertex void vertexCopyToLutTexture(uint vid [[vertex_id]], texture1d_array<float, access::write> out [[texture(0)]], constant float2* data [[buffer(0)]], constant uint& arrayOffset [[buffer(1)]]) {
out.write(float4(data[vid], 0.0, 0.0), vid % lutTextureWidth, arrayOffset + vid / lutTextureWidth);
}

View file

@ -104,6 +104,7 @@ struct EnvColor {
struct DrawVertexOut {
float4 position [[position]];
float depth;
float4 quaternion;
float4 color;
float3 texCoord0;
@ -176,6 +177,7 @@ vertex DrawVertexOutWithClip vertexDraw(DrawVertexIn in [[stage_in]], constant P
// Apply depth uniforms
out.position.z = transformZ(out.position.z, out.position.w, depthUniforms);
out.depth = out.position.z;
// Color
out.color = min(abs(in.color), 1.0);
@ -406,6 +408,8 @@ uint4 performLogicOpU(LogicOp logicOp, uint4 s, uint4 d) {
#define RG_LUT 5u
#define RR_LUT 6u
#define FOG_INDEX 24
float lutLookup(texture1d_array<float> texLightingLut, uint lut, uint index) {
return texLightingLut.read(index, lut).r;
}
@ -569,17 +573,15 @@ void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant Pi
decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u)
));
float3 halfVector;
// Positional Light
if (extract_bits(globals.GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) {
// error_unimpl = true;
halfVector = lightPosition + in.view;
lightVector = lightPosition + in.view;
}
// Directional light
else {
halfVector = lightPosition;
lightVector = lightPosition;
}
lightDistance = length(lightVector);
@ -676,7 +678,7 @@ float4 performLogicOp(LogicOp logicOp, float4 s, float4 d) {
return as_type<float4>(performLogicOpU(logicOp, as_type<uint4>(s), as_type<uint4>(d)));
}
fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[color(0)]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]],
fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[color(0)]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], constant DepthUniforms& depthUniforms [[buffer(3)]],
texture2d<float> tex0 [[texture(0)]], texture2d<float> tex1 [[texture(1)]], texture2d<float> tex2 [[texture(2)]], texture1d_array<float> texLightingLut [[texture(3)]],
sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) {
Globals globals;
@ -691,8 +693,8 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c
if (lightingEnabled) {
calcLighting(globals, in, picaRegs, texLightingLut, linearSampler, globals.tevSources[1], globals.tevSources[2]);
} else {
globals.tevSources[1] = float4(1.0);
globals.tevSources[2] = float4(1.0);
globals.tevSources[1] = float4(0.0);
globals.tevSources[2] = float4(0.0);
}
uint textureConfig = picaRegs.read(0x80u);
@ -723,9 +725,37 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c
}
}
float4 color = performLogicOp(logicOp, globals.tevSources[15], prevColor);
float4 color = globals.tevSources[15];
// TODO: fog
// Depth
float z_over_w = in.position.z;
float depth = z_over_w * depthUniforms.depthScale + depthUniforms.depthOffset;
if (!depthUniforms.depthMapEnable) // Divide z by w if depthmap enable == 0 (ie using W-buffering)
depth /= in.position.w;
// Fog
bool enable_fog = (textureEnvUpdateBuffer & 7u) == 5u;
if (enable_fog) {
bool flip_depth = (textureEnvUpdateBuffer & (1u << 16)) != 0u;
float fog_index = flip_depth ? 1.0 - depth : depth;
fog_index *= 128.0;
float clamped_index = clamp(floor(fog_index), 0.0, 127.0);
float delta = fog_index - clamped_index;
float2 value = texLightingLut.read(uint(clamped_index), FOG_INDEX).rg;
float fog_factor = clamp(value.r + value.g * delta, 0.0, 1.0);
uint GPUREG_FOG_COLOR = picaRegs.read(0x00E1u);
// Annoyingly color is not encoded in the same way as light color
float r = (GPUREG_FOG_COLOR & 0xFFu) / 255.0;
float g = ((GPUREG_FOG_COLOR >> 8) & 0xFFu) / 255.0;
float b = ((GPUREG_FOG_COLOR >> 16) & 0xFFu) / 255.0;
float3 fog_color = float3(r, g, b);
color.rgb = mix(fog_color, color.rgb, fog_factor);
}
// Perform alpha test
if ((alphaControl & 1u) != 0u) { // Check if alpha test is on
@ -757,5 +787,5 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c
}
}
return color;
return performLogicOp(logicOp, color, prevColor);
}