mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-11 08:39:48 +12:00
fix: lighting
This commit is contained in:
parent
804a0b083f
commit
8a1d9d5a22
5 changed files with 88 additions and 25 deletions
|
@ -36,10 +36,10 @@ public:
|
|||
additionalAllocations.clear();
|
||||
}
|
||||
|
||||
BufferHandle get(const std::span<const PICA::Vertex>& vertices) {
|
||||
BufferHandle get(const void* data, size_t size) {
|
||||
// If the vertex buffer is too large, just create a new one
|
||||
if (ptr + vertices.size_bytes() > CACHE_BUFFER_SIZE) {
|
||||
MTL::Buffer* newBuffer = device->newBuffer(vertices.data(), vertices.size_bytes(), MTL::ResourceStorageModeShared);
|
||||
if (ptr + size > CACHE_BUFFER_SIZE) {
|
||||
MTL::Buffer* newBuffer = device->newBuffer(data, size, MTL::ResourceStorageModeShared);
|
||||
newBuffer->setLabel(toNSString("Additional vertex buffer"));
|
||||
additionalAllocations.push_back(newBuffer);
|
||||
Helpers::warn("Vertex buffer doesn't have enough space, creating a new buffer");
|
||||
|
@ -48,10 +48,10 @@ public:
|
|||
}
|
||||
|
||||
// Copy the data into the buffer
|
||||
memcpy((char*)buffer->contents() + ptr, vertices.data(), vertices.size_bytes());
|
||||
memcpy((char*)buffer->contents() + ptr, data, size);
|
||||
|
||||
size_t oldPtr = ptr;
|
||||
ptr += vertices.size_bytes();
|
||||
ptr += size;
|
||||
|
||||
return BufferHandle{buffer, oldPtr};
|
||||
}
|
||||
|
|
|
@ -184,5 +184,6 @@ class RendererMTL final : public Renderer {
|
|||
void setupTextureEnvState(MTL::RenderCommandEncoder* encoder);
|
||||
void bindTexturesToSlots(MTL::RenderCommandEncoder* encoder);
|
||||
void updateLightingLUT(MTL::RenderCommandEncoder* encoder);
|
||||
void updateFogLUT(MTL::RenderCommandEncoder* encoder);
|
||||
void textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect<u32>& srcRect, const Math::Rect<u32>& destRect);
|
||||
};
|
||||
|
|
|
@ -131,9 +131,9 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) {
|
|||
// Textures
|
||||
MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init();
|
||||
textureDescriptor->setTextureType(MTL::TextureType1DArray);
|
||||
textureDescriptor->setPixelFormat(MTL::PixelFormatR16Uint);
|
||||
textureDescriptor->setPixelFormat(MTL::PixelFormatRG32Float);
|
||||
textureDescriptor->setWidth(LIGHT_LUT_TEXTURE_WIDTH);
|
||||
textureDescriptor->setArrayLength(Lights::LUT_Count);
|
||||
textureDescriptor->setArrayLength(Lights::LUT_Count + 1);
|
||||
textureDescriptor->setUsage(MTL::TextureUsageShaderRead | MTL::TextureUsageShaderWrite);
|
||||
textureDescriptor->setStorageMode(MTL::StorageModePrivate);
|
||||
|
||||
|
@ -516,6 +516,9 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::span<const PICA::Ve
|
|||
if (gpu.lightingLUTDirty) {
|
||||
updateLightingLUT(renderCommandEncoder);
|
||||
}
|
||||
if (gpu.fogLUTDirty) {
|
||||
updateFogLUT(renderCommandEncoder);
|
||||
}
|
||||
|
||||
renderCommandEncoder->setRenderPipelineState(pipeline);
|
||||
renderCommandEncoder->setDepthStencilState(depthStencilState);
|
||||
|
@ -523,7 +526,7 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::span<const PICA::Ve
|
|||
if (vertices.size_bytes() < 4 * 1024) {
|
||||
renderCommandEncoder->setVertexBytes(vertices.data(), vertices.size_bytes(), VERTEX_BUFFER_BINDING_INDEX);
|
||||
} else {
|
||||
Metal::BufferHandle buffer = vertexBufferCache.get(vertices);
|
||||
Metal::BufferHandle buffer = vertexBufferCache.get(vertices.data(), vertices.size_bytes());
|
||||
renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, VERTEX_BUFFER_BINDING_INDEX);
|
||||
}
|
||||
|
||||
|
@ -560,6 +563,7 @@ void RendererMTL::drawVertices(PICA::PrimType primType, std::span<const PICA::Ve
|
|||
renderCommandEncoder->setFragmentBytes(®s[0x48], (0x200 - 0x48) * sizeof(regs[0]), 0);
|
||||
renderCommandEncoder->setVertexBytes(&depthUniforms, sizeof(depthUniforms), 2);
|
||||
renderCommandEncoder->setFragmentBytes(&logicOp, sizeof(logicOp), 2);
|
||||
renderCommandEncoder->setFragmentBytes(&depthUniforms, sizeof(depthUniforms), 3);
|
||||
|
||||
renderCommandEncoder->drawPrimitives(toMTLPrimitiveType(primType), NS::UInteger(0), NS::UInteger(vertices.size()));
|
||||
}
|
||||
|
@ -696,11 +700,11 @@ void RendererMTL::bindTexturesToSlots(MTL::RenderCommandEncoder* encoder) {
|
|||
|
||||
void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) {
|
||||
gpu.lightingLUTDirty = false;
|
||||
std::array<u16, GPU::LightingLutSize> u16_lightinglut;
|
||||
std::array<float, GPU::LightingLutSize * 2> lightingLut;
|
||||
|
||||
for (int i = 0; i < gpu.lightingLUT.size(); i++) {
|
||||
uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1);
|
||||
u16_lightinglut[i] = value * 65535 / 4095;
|
||||
for (int i = 0; i < gpu.lightingLUT.size(); i += 2) {
|
||||
uint64_t value = gpu.lightingLUT[i >> 1] & 0xFFF;
|
||||
lightingLut[i] = (float)(value << 4) / 65535.0f;
|
||||
}
|
||||
|
||||
//for (int i = 0; i < Lights::LUT_Count; i++) {
|
||||
|
@ -710,11 +714,39 @@ void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) {
|
|||
renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline);
|
||||
renderCommandEncoder->setDepthStencilState(defaultDepthStencilState);
|
||||
renderCommandEncoder->setVertexTexture(lightLUTTextureArray, 0);
|
||||
renderCommandEncoder->setVertexBytes(u16_lightinglut.data(), sizeof(u16_lightinglut), 0);
|
||||
Metal::BufferHandle buffer = vertexBufferCache.get(lightingLut.data(), sizeof(lightingLut));
|
||||
renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0);
|
||||
u32 arrayOffset = 0;
|
||||
renderCommandEncoder->setVertexBytes(&arrayOffset, sizeof(u32), 1);
|
||||
|
||||
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), GPU::LightingLutSize);
|
||||
}
|
||||
|
||||
void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) {
|
||||
gpu.fogLUTDirty = false;
|
||||
std::array<float, 128 * 2> fogLut;
|
||||
|
||||
for (int i = 0; i < fogLut.size(); i += 2) {
|
||||
const uint32_t value = gpu.fogLUT[i >> 1];
|
||||
int32_t diff = value & 0x1fff;
|
||||
diff = (diff << 19) >> 19; // Sign extend the 13-bit value to 32 bits
|
||||
const float fogDifference = float(diff) / 2048.0f;
|
||||
const float fogValue = float((value >> 13) & 0x7ff) / 2048.0f;
|
||||
|
||||
fogLut[i] = fogValue;
|
||||
fogLut[i + 1] = fogDifference;
|
||||
}
|
||||
|
||||
renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline);
|
||||
renderCommandEncoder->setDepthStencilState(defaultDepthStencilState);
|
||||
renderCommandEncoder->setVertexTexture(lightLUTTextureArray, 0);
|
||||
renderCommandEncoder->setVertexBytes(fogLut.data(), sizeof(fogLut), 0);
|
||||
u32 arrayOffset = (u32)Lights::LUT_Count;
|
||||
renderCommandEncoder->setVertexBytes(&arrayOffset, sizeof(u32), 1);
|
||||
|
||||
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), NS::UInteger(128));
|
||||
}
|
||||
|
||||
void RendererMTL::textureCopyImpl(Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect<u32>& srcRect, const Math::Rect<u32>& destRect) {
|
||||
nextRenderPassName = "Texture copy";
|
||||
MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init();
|
||||
|
|
|
@ -4,6 +4,6 @@ using namespace metal;
|
|||
constant ushort lutTextureWidth [[function_constant(0)]];
|
||||
|
||||
// The copy is done in a vertex shader instead of a compute kernel, since dispatching compute would require ending the render pass
|
||||
vertex void vertexCopyToLutTexture(uint vid [[vertex_id]], texture1d_array<ushort, access::write> out [[texture(0)]], constant ushort* data [[buffer(0)]]) {
|
||||
out.write(data[vid], vid % lutTextureWidth, vid / lutTextureWidth);
|
||||
vertex void vertexCopyToLutTexture(uint vid [[vertex_id]], texture1d_array<float, access::write> out [[texture(0)]], constant float2* data [[buffer(0)]], constant uint& arrayOffset [[buffer(1)]]) {
|
||||
out.write(float4(data[vid], 0.0, 0.0), vid % lutTextureWidth, arrayOffset + vid / lutTextureWidth);
|
||||
}
|
||||
|
|
|
@ -104,6 +104,7 @@ struct EnvColor {
|
|||
|
||||
struct DrawVertexOut {
|
||||
float4 position [[position]];
|
||||
float depth;
|
||||
float4 quaternion;
|
||||
float4 color;
|
||||
float3 texCoord0;
|
||||
|
@ -176,6 +177,7 @@ vertex DrawVertexOutWithClip vertexDraw(DrawVertexIn in [[stage_in]], constant P
|
|||
|
||||
// Apply depth uniforms
|
||||
out.position.z = transformZ(out.position.z, out.position.w, depthUniforms);
|
||||
out.depth = out.position.z;
|
||||
|
||||
// Color
|
||||
out.color = min(abs(in.color), 1.0);
|
||||
|
@ -406,6 +408,8 @@ uint4 performLogicOpU(LogicOp logicOp, uint4 s, uint4 d) {
|
|||
#define RG_LUT 5u
|
||||
#define RR_LUT 6u
|
||||
|
||||
#define FOG_INDEX 24
|
||||
|
||||
float lutLookup(texture1d_array<float> texLightingLut, uint lut, uint index) {
|
||||
return texLightingLut.read(index, lut).r;
|
||||
}
|
||||
|
@ -569,17 +573,15 @@ void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant Pi
|
|||
decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u)
|
||||
));
|
||||
|
||||
float3 halfVector;
|
||||
|
||||
// Positional Light
|
||||
if (extract_bits(globals.GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) {
|
||||
// error_unimpl = true;
|
||||
halfVector = lightPosition + in.view;
|
||||
lightVector = lightPosition + in.view;
|
||||
}
|
||||
|
||||
// Directional light
|
||||
else {
|
||||
halfVector = lightPosition;
|
||||
lightVector = lightPosition;
|
||||
}
|
||||
|
||||
lightDistance = length(lightVector);
|
||||
|
@ -676,7 +678,7 @@ float4 performLogicOp(LogicOp logicOp, float4 s, float4 d) {
|
|||
return as_type<float4>(performLogicOpU(logicOp, as_type<uint4>(s), as_type<uint4>(d)));
|
||||
}
|
||||
|
||||
fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[color(0)]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]],
|
||||
fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[color(0)]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], constant DepthUniforms& depthUniforms [[buffer(3)]],
|
||||
texture2d<float> tex0 [[texture(0)]], texture2d<float> tex1 [[texture(1)]], texture2d<float> tex2 [[texture(2)]], texture1d_array<float> texLightingLut [[texture(3)]],
|
||||
sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) {
|
||||
Globals globals;
|
||||
|
@ -691,8 +693,8 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c
|
|||
if (lightingEnabled) {
|
||||
calcLighting(globals, in, picaRegs, texLightingLut, linearSampler, globals.tevSources[1], globals.tevSources[2]);
|
||||
} else {
|
||||
globals.tevSources[1] = float4(1.0);
|
||||
globals.tevSources[2] = float4(1.0);
|
||||
globals.tevSources[1] = float4(0.0);
|
||||
globals.tevSources[2] = float4(0.0);
|
||||
}
|
||||
|
||||
uint textureConfig = picaRegs.read(0x80u);
|
||||
|
@ -723,9 +725,37 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c
|
|||
}
|
||||
}
|
||||
|
||||
float4 color = performLogicOp(logicOp, globals.tevSources[15], prevColor);
|
||||
float4 color = globals.tevSources[15];
|
||||
|
||||
// TODO: fog
|
||||
// Depth
|
||||
float z_over_w = in.position.z;
|
||||
float depth = z_over_w * depthUniforms.depthScale + depthUniforms.depthOffset;
|
||||
|
||||
if (!depthUniforms.depthMapEnable) // Divide z by w if depthmap enable == 0 (ie using W-buffering)
|
||||
depth /= in.position.w;
|
||||
|
||||
// Fog
|
||||
bool enable_fog = (textureEnvUpdateBuffer & 7u) == 5u;
|
||||
|
||||
if (enable_fog) {
|
||||
bool flip_depth = (textureEnvUpdateBuffer & (1u << 16)) != 0u;
|
||||
float fog_index = flip_depth ? 1.0 - depth : depth;
|
||||
fog_index *= 128.0;
|
||||
float clamped_index = clamp(floor(fog_index), 0.0, 127.0);
|
||||
float delta = fog_index - clamped_index;
|
||||
float2 value = texLightingLut.read(uint(clamped_index), FOG_INDEX).rg;
|
||||
float fog_factor = clamp(value.r + value.g * delta, 0.0, 1.0);
|
||||
|
||||
uint GPUREG_FOG_COLOR = picaRegs.read(0x00E1u);
|
||||
|
||||
// Annoyingly color is not encoded in the same way as light color
|
||||
float r = (GPUREG_FOG_COLOR & 0xFFu) / 255.0;
|
||||
float g = ((GPUREG_FOG_COLOR >> 8) & 0xFFu) / 255.0;
|
||||
float b = ((GPUREG_FOG_COLOR >> 16) & 0xFFu) / 255.0;
|
||||
float3 fog_color = float3(r, g, b);
|
||||
|
||||
color.rgb = mix(fog_color, color.rgb, fog_factor);
|
||||
}
|
||||
|
||||
// Perform alpha test
|
||||
if ((alphaControl & 1u) != 0u) { // Check if alpha test is on
|
||||
|
@ -757,5 +787,5 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c
|
|||
}
|
||||
}
|
||||
|
||||
return color;
|
||||
return performLogicOp(logicOp, color, prevColor);
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue