mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-11 08:39:48 +12:00
make lut texture 2D
This commit is contained in:
parent
8a1d9d5a22
commit
4a9fb9bdc3
6 changed files with 46 additions and 38 deletions
|
@ -30,7 +30,7 @@ endif()
|
|||
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-interference-size")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
option(DISABLE_PANIC_DEV "Make a build with fewer and less intrusive asserts" ON)
|
||||
option(GPU_DEBUG_INFO "Enable additional GPU debugging info" OFF)
|
||||
|
@ -442,9 +442,10 @@ if(ENABLE_METAL AND APPLE)
|
|||
set(SHADER_SOURCE "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.metal")
|
||||
set(SHADER_IR "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.ir")
|
||||
set(SHADER_METALLIB "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.metallib")
|
||||
# TODO: only include sources in debug builds
|
||||
add_custom_command(
|
||||
OUTPUT ${SHADER_IR}
|
||||
COMMAND xcrun -sdk macosx metal -o ${SHADER_IR} -c ${SHADER_SOURCE}
|
||||
COMMAND xcrun -sdk macosx metal -gline-tables-only -frecord-sources -o ${SHADER_IR} -c ${SHADER_SOURCE}
|
||||
DEPENDS ${SHADER_SOURCE}
|
||||
VERBATIM)
|
||||
add_custom_command(
|
||||
|
|
|
@ -19,13 +19,13 @@ public:
|
|||
VertexBufferCache() = default;
|
||||
|
||||
~VertexBufferCache() {
|
||||
reset();
|
||||
endFrame();
|
||||
buffer->release();
|
||||
}
|
||||
|
||||
void set(MTL::Device* dev) {
|
||||
device = dev;
|
||||
buffer = device->newBuffer(CACHE_BUFFER_SIZE, MTL::ResourceStorageModeShared);
|
||||
buffer->setLabel(toNSString("Shared vertex buffer"));
|
||||
create();
|
||||
}
|
||||
|
||||
void endFrame() {
|
||||
|
@ -59,6 +59,7 @@ public:
|
|||
void reset() {
|
||||
endFrame();
|
||||
buffer->release();
|
||||
create();
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -67,6 +68,11 @@ private:
|
|||
std::vector<MTL::Buffer*> additionalAllocations;
|
||||
|
||||
MTL::Device* device;
|
||||
|
||||
void create() {
|
||||
buffer = device->newBuffer(CACHE_BUFFER_SIZE, MTL::ResourceStorageModeShared);
|
||||
buffer->setLabel(toNSString("Shared vertex buffer"));
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Metal
|
||||
|
|
|
@ -57,7 +57,7 @@ class RendererMTL final : public Renderer {
|
|||
// Objects
|
||||
MTL::SamplerState* nearestSampler;
|
||||
MTL::SamplerState* linearSampler;
|
||||
MTL::Texture* lightLUTTextureArray;
|
||||
MTL::Texture* lutTexture;
|
||||
MTL::DepthStencilState* defaultDepthStencilState;
|
||||
|
||||
// Pipelines
|
||||
|
|
|
@ -130,15 +130,15 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) {
|
|||
|
||||
// Textures
|
||||
MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init();
|
||||
textureDescriptor->setTextureType(MTL::TextureType1DArray);
|
||||
textureDescriptor->setPixelFormat(MTL::PixelFormatRG32Float);
|
||||
textureDescriptor->setTextureType(MTL::TextureType2D);
|
||||
textureDescriptor->setPixelFormat(MTL::PixelFormatRGBA32Float);
|
||||
textureDescriptor->setWidth(LIGHT_LUT_TEXTURE_WIDTH);
|
||||
textureDescriptor->setArrayLength(Lights::LUT_Count + 1);
|
||||
textureDescriptor->setHeight(Lights::LUT_Count + 1);
|
||||
textureDescriptor->setUsage(MTL::TextureUsageShaderRead | MTL::TextureUsageShaderWrite);
|
||||
textureDescriptor->setStorageMode(MTL::StorageModePrivate);
|
||||
|
||||
lightLUTTextureArray = device->newTexture(textureDescriptor);
|
||||
lightLUTTextureArray->setLabel(toNSString("LUT texture"));
|
||||
lutTexture = device->newTexture(textureDescriptor);
|
||||
lutTexture->setLabel(toNSString("LUT texture"));
|
||||
textureDescriptor->release();
|
||||
|
||||
// Samplers
|
||||
|
@ -580,7 +580,7 @@ void RendererMTL::deinitGraphicsContext() {
|
|||
copyToLutTexturePipeline->release();
|
||||
displayPipeline->release();
|
||||
defaultDepthStencilState->release();
|
||||
lightLUTTextureArray->release();
|
||||
lutTexture->release();
|
||||
linearSampler->release();
|
||||
nearestSampler->release();
|
||||
library->release();
|
||||
|
@ -694,13 +694,13 @@ void RendererMTL::bindTexturesToSlots(MTL::RenderCommandEncoder* encoder) {
|
|||
}
|
||||
|
||||
// LUT texture
|
||||
encoder->setFragmentTexture(lightLUTTextureArray, 3);
|
||||
encoder->setFragmentTexture(lutTexture, 3);
|
||||
encoder->setFragmentSamplerState(linearSampler, 3);
|
||||
}
|
||||
|
||||
void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) {
|
||||
gpu.lightingLUTDirty = false;
|
||||
std::array<float, GPU::LightingLutSize * 2> lightingLut;
|
||||
std::array<float, GPU::LightingLutSize * 2> lightingLut = {0.0f};
|
||||
|
||||
for (int i = 0; i < gpu.lightingLUT.size(); i += 2) {
|
||||
uint64_t value = gpu.lightingLUT[i >> 1] & 0xFFF;
|
||||
|
@ -708,12 +708,12 @@ void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) {
|
|||
}
|
||||
|
||||
//for (int i = 0; i < Lights::LUT_Count; i++) {
|
||||
// lightLUTTextureArray->replaceRegion(MTL::Region(0, 0, LIGHT_LUT_TEXTURE_WIDTH, 1), 0, i, u16_lightinglut.data() + LIGHT_LUT_TEXTURE_WIDTH * i, 0, 0);
|
||||
// lutTexture->replaceRegion(MTL::Region(0, 0, LIGHT_LUT_TEXTURE_WIDTH, 1), 0, i, u16_lightinglut.data() + LIGHT_LUT_TEXTURE_WIDTH * i, 0, 0);
|
||||
//}
|
||||
|
||||
renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline);
|
||||
renderCommandEncoder->setDepthStencilState(defaultDepthStencilState);
|
||||
renderCommandEncoder->setVertexTexture(lightLUTTextureArray, 0);
|
||||
renderCommandEncoder->setVertexTexture(lutTexture, 0);
|
||||
Metal::BufferHandle buffer = vertexBufferCache.get(lightingLut.data(), sizeof(lightingLut));
|
||||
renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0);
|
||||
u32 arrayOffset = 0;
|
||||
|
@ -724,7 +724,7 @@ void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) {
|
|||
|
||||
void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) {
|
||||
gpu.fogLUTDirty = false;
|
||||
std::array<float, 128 * 2> fogLut;
|
||||
std::array<float, 128 * 2> fogLut = {0.0f};
|
||||
|
||||
for (int i = 0; i < fogLut.size(); i += 2) {
|
||||
const uint32_t value = gpu.fogLUT[i >> 1];
|
||||
|
@ -739,8 +739,9 @@ void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) {
|
|||
|
||||
renderCommandEncoder->setRenderPipelineState(copyToLutTexturePipeline);
|
||||
renderCommandEncoder->setDepthStencilState(defaultDepthStencilState);
|
||||
renderCommandEncoder->setVertexTexture(lightLUTTextureArray, 0);
|
||||
renderCommandEncoder->setVertexBytes(fogLut.data(), sizeof(fogLut), 0);
|
||||
renderCommandEncoder->setVertexTexture(lutTexture, 0);
|
||||
Metal::BufferHandle buffer = vertexBufferCache.get(fogLut.data(), sizeof(fogLut));
|
||||
renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, 0);
|
||||
u32 arrayOffset = (u32)Lights::LUT_Count;
|
||||
renderCommandEncoder->setVertexBytes(&arrayOffset, sizeof(u32), 1);
|
||||
|
||||
|
|
|
@ -4,6 +4,6 @@ using namespace metal;
|
|||
constant ushort lutTextureWidth [[function_constant(0)]];
|
||||
|
||||
// The copy is done in a vertex shader instead of a compute kernel, since dispatching compute would require ending the render pass
|
||||
vertex void vertexCopyToLutTexture(uint vid [[vertex_id]], texture1d_array<float, access::write> out [[texture(0)]], constant float2* data [[buffer(0)]], constant uint& arrayOffset [[buffer(1)]]) {
|
||||
out.write(float4(data[vid], 0.0, 0.0), vid % lutTextureWidth, arrayOffset + vid / lutTextureWidth);
|
||||
vertex void vertexCopyToLutTexture(uint vid [[vertex_id]], texture2d<float, access::write> out [[texture(0)]], device float2* data [[buffer(0)]], constant uint& arrayOffset [[buffer(1)]]) {
|
||||
out.write(float4(data[vid].x, 0.0, 0.0, 0.0), uint2(vid % lutTextureWidth, arrayOffset + vid / lutTextureWidth));
|
||||
}
|
||||
|
|
|
@ -410,11 +410,11 @@ uint4 performLogicOpU(LogicOp logicOp, uint4 s, uint4 d) {
|
|||
|
||||
#define FOG_INDEX 24
|
||||
|
||||
float lutLookup(texture1d_array<float> texLightingLut, uint lut, uint index) {
|
||||
return texLightingLut.read(index, lut).r;
|
||||
float lutLookup(texture2d<float> texLut, uint lut, uint index) {
|
||||
return texLut.read(uint2(index, lut)).r;
|
||||
}
|
||||
|
||||
float lightLutLookup(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture1d_array<float> texLightingLut, uint environment_id, uint lut_id, uint light_id, float3 light_vector, float3 half_vector) {
|
||||
float lightLutLookup(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture2d<float> texLut, uint environment_id, uint lut_id, uint light_id, float3 light_vector, float3 half_vector) {
|
||||
uint lut_index;
|
||||
int bit_in_config1;
|
||||
if (lut_id == SP_LUT) {
|
||||
|
@ -500,12 +500,12 @@ float lightLutLookup(thread Globals& globals, thread DrawVertexOut& in, constant
|
|||
delta = abs(delta);
|
||||
}
|
||||
int index = int(clamp(floor(delta * 255.0), 0.f, 255.f));
|
||||
return lutLookup(texLightingLut, lut_index, index) * scale;
|
||||
return lutLookup(texLut, lut_index, index) * scale;
|
||||
} else {
|
||||
// Range is [-1, 1] so we need to map it to [0, 1]
|
||||
int index = int(clamp(floor(delta * 128.0), -128.f, 127.f));
|
||||
if (index < 0) index += 256;
|
||||
return lutLookup(texLightingLut, lut_index, index) * scale;
|
||||
return lutLookup(texLut, lut_index, index) * scale;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -517,7 +517,7 @@ float3 regToColor(uint reg) {
|
|||
}
|
||||
|
||||
// Implements the following algorthm: https://mathb.in/26766
|
||||
void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture1d_array<float> texLightingLut, sampler linearSampler, thread float4& primaryColor, thread float4& secondaryColor) {
|
||||
void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture2d<float> texLut, sampler linearSampler, thread float4& primaryColor, thread float4& secondaryColor) {
|
||||
// Quaternions describe a transformation from surface-local space to eye space.
|
||||
// In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1),
|
||||
// the tangent vector is (1,0,0), and the bitangent vector is (0,1,0).
|
||||
|
@ -615,23 +615,23 @@ void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant Pi
|
|||
float delta = lightDistance * distanceAttenuationScale + distanceAttenuationBias;
|
||||
delta = clamp(delta, 0.0, 1.0);
|
||||
int index = int(clamp(floor(delta * 255.0), 0.0, 255.0));
|
||||
distanceAttenuation = lutLookup(texLightingLut, 16u + lightId, index);
|
||||
distanceAttenuation = lutLookup(texLut, 16u + lightId, index);
|
||||
}
|
||||
|
||||
float spotlightAttenuation = lightLutLookup(globals, in, picaRegs, texLightingLut, environmentId, SP_LUT, lightId, lightVector, halfVector);
|
||||
float specular0Distribution = lightLutLookup(globals, in, picaRegs, texLightingLut, environmentId, D0_LUT, lightId, lightVector, halfVector);
|
||||
float specular1Distribution = lightLutLookup(globals, in, picaRegs, texLightingLut, environmentId, D1_LUT, lightId, lightVector, halfVector);
|
||||
float spotlightAttenuation = lightLutLookup(globals, in, picaRegs, texLut, environmentId, SP_LUT, lightId, lightVector, halfVector);
|
||||
float specular0Distribution = lightLutLookup(globals, in, picaRegs, texLut, environmentId, D0_LUT, lightId, lightVector, halfVector);
|
||||
float specular1Distribution = lightLutLookup(globals, in, picaRegs, texLut, environmentId, D1_LUT, lightId, lightVector, halfVector);
|
||||
float3 reflectedColor;
|
||||
reflectedColor.r = lightLutLookup(globals, in, picaRegs, texLightingLut, environmentId, RR_LUT, lightId, lightVector, halfVector);
|
||||
reflectedColor.r = lightLutLookup(globals, in, picaRegs, texLut, environmentId, RR_LUT, lightId, lightVector, halfVector);
|
||||
|
||||
if (isSamplerEnabled(environmentId, RG_LUT)) {
|
||||
reflectedColor.g = lightLutLookup(globals, in, picaRegs, texLightingLut, environmentId, RG_LUT, lightId, lightVector, halfVector);
|
||||
reflectedColor.g = lightLutLookup(globals, in, picaRegs, texLut, environmentId, RG_LUT, lightId, lightVector, halfVector);
|
||||
} else {
|
||||
reflectedColor.g = reflectedColor.r;
|
||||
}
|
||||
|
||||
if (isSamplerEnabled(environmentId, RB_LUT)) {
|
||||
reflectedColor.b = lightLutLookup(globals, in, picaRegs, texLightingLut, environmentId, RB_LUT, lightId, lightVector, halfVector);
|
||||
reflectedColor.b = lightLutLookup(globals, in, picaRegs, texLut, environmentId, RB_LUT, lightId, lightVector, halfVector);
|
||||
} else {
|
||||
reflectedColor.b = reflectedColor.r;
|
||||
}
|
||||
|
@ -657,7 +657,7 @@ void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant Pi
|
|||
float fresnelFactor;
|
||||
|
||||
if (fresnelOutput1 == 1u || fresnelOutput2 == 1u) {
|
||||
fresnelFactor = lightLutLookup(globals, in, picaRegs, texLightingLut, environmentId, FR_LUT, lightId, lightVector, halfVector);
|
||||
fresnelFactor = lightLutLookup(globals, in, picaRegs, texLut, environmentId, FR_LUT, lightId, lightVector, halfVector);
|
||||
}
|
||||
|
||||
if (fresnelOutput1 == 1u) {
|
||||
|
@ -679,7 +679,7 @@ float4 performLogicOp(LogicOp logicOp, float4 s, float4 d) {
|
|||
}
|
||||
|
||||
fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[color(0)]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], constant DepthUniforms& depthUniforms [[buffer(3)]],
|
||||
texture2d<float> tex0 [[texture(0)]], texture2d<float> tex1 [[texture(1)]], texture2d<float> tex2 [[texture(2)]], texture1d_array<float> texLightingLut [[texture(3)]],
|
||||
texture2d<float> tex0 [[texture(0)]], texture2d<float> tex1 [[texture(1)]], texture2d<float> tex2 [[texture(2)]], texture2d<float> texLut [[texture(3)]],
|
||||
sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) {
|
||||
Globals globals;
|
||||
|
||||
|
@ -691,7 +691,7 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c
|
|||
|
||||
globals.tevSources[0] = in.color;
|
||||
if (lightingEnabled) {
|
||||
calcLighting(globals, in, picaRegs, texLightingLut, linearSampler, globals.tevSources[1], globals.tevSources[2]);
|
||||
calcLighting(globals, in, picaRegs, texLut, linearSampler, globals.tevSources[1], globals.tevSources[2]);
|
||||
} else {
|
||||
globals.tevSources[1] = float4(0.0);
|
||||
globals.tevSources[2] = float4(0.0);
|
||||
|
@ -743,7 +743,7 @@ fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[c
|
|||
fog_index *= 128.0;
|
||||
float clamped_index = clamp(floor(fog_index), 0.0, 127.0);
|
||||
float delta = fog_index - clamped_index;
|
||||
float2 value = texLightingLut.read(uint(clamped_index), FOG_INDEX).rg;
|
||||
float2 value = texLut.read(uint2(clamped_index, FOG_INDEX)).rg;
|
||||
float fog_factor = clamp(value.r + value.g * delta, 0.0, 1.0);
|
||||
|
||||
uint GPUREG_FOG_COLOR = picaRegs.read(0x00E1u);
|
||||
|
|
Loading…
Add table
Reference in a new issue