mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-06 06:05:40 +12:00
759 lines
27 KiB
Metal
759 lines
27 KiB
Metal
#include <metal_stdlib>
|
|
using namespace metal;
|
|
|
|
struct BasicVertexOut {
|
|
float4 position [[position]];
|
|
float2 uv;
|
|
};
|
|
|
|
constant float4 displayPositions[4] = {
|
|
float4(-1.0, -1.0, 0.0, 1.0),
|
|
float4( 1.0, -1.0, 0.0, 1.0),
|
|
float4(-1.0, 1.0, 0.0, 1.0),
|
|
float4( 1.0, 1.0, 0.0, 1.0)
|
|
};
|
|
|
|
constant float2 displayTexCoord[4] = {
|
|
float2(0.0, 1.0),
|
|
float2(0.0, 0.0),
|
|
float2(1.0, 1.0),
|
|
float2(1.0, 0.0)
|
|
};
|
|
|
|
vertex BasicVertexOut vertexDisplay(uint vid [[vertex_id]]) {
|
|
BasicVertexOut out;
|
|
out.position = displayPositions[vid];
|
|
out.uv = displayTexCoord[vid];
|
|
|
|
return out;
|
|
}
|
|
|
|
fragment float4 fragmentDisplay(BasicVertexOut in [[stage_in]], texture2d<float> tex [[texture(0)]], sampler samplr [[sampler(0)]]) {
|
|
return tex.sample(samplr, in.uv);
|
|
}
|
|
|
|
struct PicaRegs {
|
|
uint regs[0x200 - 0x48];
|
|
|
|
uint read(uint reg) constant {
|
|
return regs[reg - 0x48];
|
|
}
|
|
};
|
|
|
|
struct VertTEV {
|
|
uint textureEnvColor[6];
|
|
};
|
|
|
|
float4 abgr8888ToFloat4(uint abgr) {
|
|
const float scale = 1.0 / 255.0;
|
|
|
|
return scale * float4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24));
|
|
}
|
|
|
|
struct DrawVertexIn {
|
|
float4 position [[attribute(0)]];
|
|
float4 quaternion [[attribute(1)]];
|
|
float4 color [[attribute(2)]];
|
|
float2 texCoord0 [[attribute(3)]];
|
|
float2 texCoord1 [[attribute(4)]];
|
|
float texCoord0W [[attribute(5)]];
|
|
float3 view [[attribute(6)]];
|
|
float2 texCoord2 [[attribute(7)]];
|
|
};
|
|
|
|
// Metal cannot return arrays from vertex functions, this is an ugly workaround
|
|
struct EnvColor {
|
|
float4 c0;
|
|
float4 c1;
|
|
float4 c2;
|
|
float4 c3;
|
|
float4 c4;
|
|
float4 c5;
|
|
|
|
thread float4& operator[](int i) {
|
|
switch (i) {
|
|
case 0: return c0;
|
|
case 1: return c1;
|
|
case 2: return c2;
|
|
case 3: return c3;
|
|
case 4: return c4;
|
|
case 5: return c5;
|
|
default: return c0;
|
|
}
|
|
}
|
|
};
|
|
|
|
float3 rotateFloat3ByQuaternion(float3 v, float4 q) {
|
|
float3 u = q.xyz;
|
|
float s = q.w;
|
|
|
|
return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v);
|
|
}
|
|
|
|
// Convert an arbitrary-width floating point literal to an f32
|
|
float decodeFP(uint hex, uint E, uint M) {
|
|
uint width = M + E + 1u;
|
|
uint bias = 128u - (1u << (E - 1u));
|
|
uint exponent = (hex >> M) & ((1u << E) - 1u);
|
|
uint mantissa = hex & ((1u << M) - 1u);
|
|
uint sign = (hex >> (E + M)) << 31u;
|
|
|
|
if ((hex & ((1u << (width - 1u)) - 1u)) != 0u) {
|
|
if (exponent == (1u << E) - 1u)
|
|
exponent = 255u;
|
|
else
|
|
exponent += bias;
|
|
hex = sign | (mantissa << (23u - M)) | (exponent << 23u);
|
|
} else {
|
|
hex = sign;
|
|
}
|
|
|
|
return as_type<float>(hex);
|
|
}
|
|
|
|
struct DepthUniforms {
|
|
float depthScale;
|
|
float depthOffset;
|
|
bool depthMapEnable;
|
|
};
|
|
|
|
struct DrawVertexOut {
|
|
float4 position [[position]];
|
|
float4 quaternion;
|
|
float4 color;
|
|
float3 texCoord0;
|
|
float2 texCoord1;
|
|
float2 texCoord2;
|
|
float3 view;
|
|
float3 normal;
|
|
float3 tangent;
|
|
float3 bitangent;
|
|
EnvColor textureEnvColor [[flat]];
|
|
float4 textureEnvBufferColor [[flat]];
|
|
};
|
|
|
|
struct DrawVertexOutWithClip {
|
|
DrawVertexOut out;
|
|
float clipDistance [[clip_distance]] [2];
|
|
};
|
|
|
|
// TODO: check this
|
|
float transformZ(float z, float w, constant DepthUniforms& depthUniforms) {
|
|
z = z / w * depthUniforms.depthScale + depthUniforms.depthOffset;
|
|
if (!depthUniforms.depthMapEnable) {
|
|
z *= w;
|
|
}
|
|
|
|
return z * w;
|
|
}
|
|
|
|
vertex DrawVertexOutWithClip vertexDraw(DrawVertexIn in [[stage_in]], constant PicaRegs& picaRegs [[buffer(0)]], constant VertTEV& tev [[buffer(1)]], constant DepthUniforms& depthUniforms [[buffer(2)]]) {
|
|
DrawVertexOut out;
|
|
|
|
// Position
|
|
out.position = in.position;
|
|
// Flip the y position
|
|
out.position.y = -out.position.y;
|
|
|
|
// Apply depth uniforms
|
|
out.position.z = transformZ(out.position.z, out.position.w, depthUniforms);
|
|
|
|
// Color
|
|
out.color = min(abs(in.color), 1.0);
|
|
|
|
// Texture coordinates
|
|
out.texCoord0 = float3(in.texCoord0, in.texCoord0W);
|
|
out.texCoord0.y = 1.0 - out.texCoord0.y;
|
|
out.texCoord1 = in.texCoord1;
|
|
out.texCoord1.y = 1.0 - out.texCoord1.y;
|
|
out.texCoord2 = in.texCoord2;
|
|
out.texCoord2.y = 1.0 - out.texCoord2.y;
|
|
|
|
// View
|
|
out.view = in.view;
|
|
|
|
// TBN
|
|
out.normal = normalize(rotateFloat3ByQuaternion(float3(0.0, 0.0, 1.0), in.quaternion));
|
|
out.tangent = normalize(rotateFloat3ByQuaternion(float3(1.0, 0.0, 0.0), in.quaternion));
|
|
out.bitangent = normalize(rotateFloat3ByQuaternion(float3(0.0, 1.0, 0.0), in.quaternion));
|
|
out.quaternion = in.quaternion;
|
|
|
|
// Environment
|
|
for (int i = 0; i < 6; i++) {
|
|
out.textureEnvColor[i] = abgr8888ToFloat4(tev.textureEnvColor[i]);
|
|
}
|
|
|
|
out.textureEnvBufferColor = abgr8888ToFloat4(picaRegs.read(0xFDu));
|
|
|
|
DrawVertexOutWithClip outWithClip;
|
|
outWithClip.out = out;
|
|
|
|
// Parse clipping plane registers
|
|
float4 clipData = float4(
|
|
decodeFP(picaRegs.read(0x48u) & 0xffffffu, 7u, 16u), decodeFP(picaRegs.read(0x49u) & 0xffffffu, 7u, 16u),
|
|
decodeFP(picaRegs.read(0x4Au) & 0xffffffu, 7u, 16u), decodeFP(picaRegs.read(0x4Bu) & 0xffffffu, 7u, 16u)
|
|
);
|
|
|
|
// There's also another, always-on clipping plane based on vertex z
|
|
// TODO: transform
|
|
outWithClip.clipDistance[0] = -in.position.z;
|
|
outWithClip.clipDistance[1] = dot(clipData, in.position);
|
|
|
|
return outWithClip;
|
|
}
|
|
|
|
constant bool lightingEnabled [[function_constant(0)]];
|
|
constant uint8_t lightingNumLights [[function_constant(1)]];
|
|
constant uint32_t lightingConfig1 [[function_constant(2)]];
|
|
constant uint16_t alphaControl [[function_constant(3)]];
|
|
|
|
struct Globals {
|
|
bool error_unimpl;
|
|
|
|
float4 tevSources[16];
|
|
float4 tevNextPreviousBuffer;
|
|
bool tevUnimplementedSourceFlag = false;
|
|
|
|
uint GPUREG_LIGHTING_LUTINPUT_SCALE;
|
|
uint GPUREG_LIGHTING_LUTINPUT_ABS;
|
|
uint GPUREG_LIGHTING_LUTINPUT_SELECT;
|
|
uint GPUREG_LIGHTi_CONFIG;
|
|
|
|
// HACK
|
|
//bool lightingEnabled;
|
|
//uint8_t lightingNumLights;
|
|
//uint32_t lightingConfig1;
|
|
//uint16_t alphaControl;
|
|
|
|
float3 normal;
|
|
};
|
|
|
|
// See docs/lighting.md
|
|
constant uint samplerEnabledBitfields[2] = {0x7170e645u, 0x7f013fefu};
|
|
|
|
bool isSamplerEnabled(uint environment_id, uint lut_id) {
|
|
uint index = 7 * environment_id + lut_id;
|
|
uint arrayIndex = (index >> 5);
|
|
return (samplerEnabledBitfields[arrayIndex] & (1u << (index & 31u))) != 0u;
|
|
}
|
|
|
|
struct FragTEV {
|
|
uint textureEnvSource[6];
|
|
uint textureEnvOperand[6];
|
|
uint textureEnvCombiner[6];
|
|
uint textureEnvScale[6];
|
|
|
|
float4 fetchSource(thread Globals& globals, uint src_id) constant {
|
|
if (src_id >= 6u && src_id < 13u) {
|
|
globals.tevUnimplementedSourceFlag = true;
|
|
}
|
|
|
|
return globals.tevSources[src_id];
|
|
}
|
|
|
|
float4 getColorAndAlphaSource(thread Globals& globals, int tev_id, int src_id) constant {
|
|
float4 result;
|
|
|
|
float4 colorSource = fetchSource(globals, (textureEnvSource[tev_id] >> (src_id * 4)) & 15u);
|
|
float4 alphaSource = fetchSource(globals, (textureEnvSource[tev_id] >> (src_id * 4 + 16)) & 15u);
|
|
|
|
uint colorOperand = (textureEnvOperand[tev_id] >> (src_id * 4)) & 15u;
|
|
uint alphaOperand = (textureEnvOperand[tev_id] >> (12 + src_id * 4)) & 7u;
|
|
|
|
// TODO: figure out what the undocumented values do
|
|
switch (colorOperand) {
|
|
case 0u: result.rgb = colorSource.rgb; break; // Source color
|
|
case 1u: result.rgb = 1.0 - colorSource.rgb; break; // One minus source color
|
|
case 2u: result.rgb = float3(colorSource.a); break; // Source alpha
|
|
case 3u: result.rgb = float3(1.0 - colorSource.a); break; // One minus source alpha
|
|
case 4u: result.rgb = float3(colorSource.r); break; // Source red
|
|
case 5u: result.rgb = float3(1.0 - colorSource.r); break; // One minus source red
|
|
case 8u: result.rgb = float3(colorSource.g); break; // Source green
|
|
case 9u: result.rgb = float3(1.0 - colorSource.g); break; // One minus source green
|
|
case 12u: result.rgb = float3(colorSource.b); break; // Source blue
|
|
case 13u: result.rgb = float3(1.0 - colorSource.b); break; // One minus source blue
|
|
default: break;
|
|
}
|
|
|
|
// TODO: figure out what the undocumented values do
|
|
switch (alphaOperand) {
|
|
case 0u: result.a = alphaSource.a; break; // Source alpha
|
|
case 1u: result.a = 1.0 - alphaSource.a; break; // One minus source alpha
|
|
case 2u: result.a = alphaSource.r; break; // Source red
|
|
case 3u: result.a = 1.0 - alphaSource.r; break; // One minus source red
|
|
case 4u: result.a = alphaSource.g; break; // Source green
|
|
case 5u: result.a = 1.0 - alphaSource.g; break; // One minus source green
|
|
case 6u: result.a = alphaSource.b; break; // Source blue
|
|
case 7u: result.a = 1.0 - alphaSource.b; break; // One minus source blue
|
|
default: break;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
float4 calculateCombiner(thread Globals& globals, int tev_id) constant {
|
|
float4 source0 = getColorAndAlphaSource(globals, tev_id, 0);
|
|
float4 source1 = getColorAndAlphaSource(globals, tev_id, 1);
|
|
float4 source2 = getColorAndAlphaSource(globals, tev_id, 2);
|
|
|
|
uint colorCombine = textureEnvCombiner[tev_id] & 15u;
|
|
uint alphaCombine = (textureEnvCombiner[tev_id] >> 16) & 15u;
|
|
|
|
float4 result = float4(1.0);
|
|
|
|
// TODO: figure out what the undocumented values do
|
|
switch (colorCombine) {
|
|
case 0u: result.rgb = source0.rgb; break; // Replace
|
|
case 1u: result.rgb = source0.rgb * source1.rgb; break; // Modulate
|
|
case 2u: result.rgb = min(float3(1.0), source0.rgb + source1.rgb); break; // Add
|
|
case 3u: result.rgb = clamp(source0.rgb + source1.rgb - 0.5, 0.0, 1.0); break; // Add signed
|
|
case 4u: result.rgb = mix(source1.rgb, source0.rgb, source2.rgb); break; // Interpolate
|
|
case 5u: result.rgb = max(source0.rgb - source1.rgb, 0.0); break; // Subtract
|
|
case 6u: result.rgb = float3(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGB
|
|
case 7u: result = float4(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGBA
|
|
case 8u: result.rgb = min(source0.rgb * source1.rgb + source2.rgb, 1.0); break; // Multiply then add
|
|
case 9u: result.rgb = min((source0.rgb + source1.rgb), 1.0) * source2.rgb; break; // Add then multiply
|
|
default: break;
|
|
}
|
|
|
|
if (colorCombine != 7u) { // The color combiner also writes the alpha channel in the "Dot3 RGBA" mode.
|
|
// TODO: figure out what the undocumented values do
|
|
// TODO: test if the alpha combiner supports all the same modes as the color combiner.
|
|
switch (alphaCombine) {
|
|
case 0u: result.a = source0.a; break; // Replace
|
|
case 1u: result.a = source0.a * source1.a; break; // Modulate
|
|
case 2u: result.a = min(1.0, source0.a + source1.a); break; // Add
|
|
case 3u: result.a = clamp(source0.a + source1.a - 0.5, 0.0, 1.0); break; // Add signed
|
|
case 4u: result.a = mix(source1.a, source0.a, source2.a); break; // Interpolate
|
|
case 5u: result.a = max(0.0, source0.a - source1.a); break; // Subtract
|
|
case 8u: result.a = min(source0.a * source1.a + source2.a, 1.0); break; // Multiply then add
|
|
case 9u: result.a = min(source0.a + source1.a, 1.0) * source2.a; break; // Add then multiply
|
|
default: break;
|
|
}
|
|
}
|
|
|
|
result.rgb *= float(1 << (textureEnvScale[tev_id] & 3u));
|
|
result.a *= float(1 << ((textureEnvScale[tev_id] >> 16) & 3u));
|
|
|
|
return result;
|
|
}
|
|
};
|
|
|
|
enum class LogicOp : uint8_t {
|
|
Clear = 0,
|
|
And = 1,
|
|
AndReverse = 2,
|
|
Copy = 3,
|
|
Set = 4,
|
|
CopyInverted = 5,
|
|
NoOp = 6,
|
|
Invert = 7,
|
|
Nand = 8,
|
|
Or = 9,
|
|
Nor = 10,
|
|
Xor = 11,
|
|
Equiv = 12,
|
|
AndInverted = 13,
|
|
OrReverse = 14,
|
|
OrInverted = 15
|
|
};
|
|
|
|
uint4 performLogicOpU(LogicOp logicOp, uint4 s, uint4 d) {
|
|
switch (logicOp) {
|
|
case LogicOp::Clear: return as_type<uint4>(float4(0.0));
|
|
case LogicOp::And: return s & d;
|
|
case LogicOp::AndReverse: return s & ~d;
|
|
case LogicOp::Copy: return s;
|
|
case LogicOp::Set: return as_type<uint4>(float4(1.0));
|
|
case LogicOp::CopyInverted: return ~s;
|
|
case LogicOp::NoOp: return d;
|
|
case LogicOp::Invert: return ~d;
|
|
case LogicOp::Nand: return ~(s & d);
|
|
case LogicOp::Or: return s | d;
|
|
case LogicOp::Nor: return ~(s | d);
|
|
case LogicOp::Xor: return s ^ d;
|
|
case LogicOp::Equiv: return ~(s ^ d);
|
|
case LogicOp::AndInverted: return ~s & d;
|
|
case LogicOp::OrReverse: return s | ~d;
|
|
case LogicOp::OrInverted: return ~s | d;
|
|
}
|
|
}
|
|
|
|
#define D0_LUT 0u
|
|
#define D1_LUT 1u
|
|
#define SP_LUT 2u
|
|
#define FR_LUT 3u
|
|
#define RB_LUT 4u
|
|
#define RG_LUT 5u
|
|
#define RR_LUT 6u
|
|
|
|
float lutLookup(texture2d_array<float> texLut, uint slice, uint lut, uint index) {
|
|
return texLut.read(uint2(index, lut), slice).r;
|
|
}
|
|
|
|
float lightLutLookup(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture2d_array<float> texLut, uint slice, uint environment_id, uint lut_id, uint light_id, float3 light_vector, float3 half_vector) {
|
|
uint lut_index;
|
|
int bit_in_config1;
|
|
if (lut_id == SP_LUT) {
|
|
// These are the spotlight attenuation LUTs
|
|
bit_in_config1 = 8 + int(light_id & 7u);
|
|
lut_index = 8u + light_id;
|
|
} else if (lut_id <= 6) {
|
|
bit_in_config1 = 16 + int(lut_id);
|
|
lut_index = lut_id;
|
|
} else {
|
|
globals.error_unimpl = true;
|
|
}
|
|
|
|
bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment
|
|
|
|
if (!current_sampler_enabled || (extract_bits(lightingConfig1, bit_in_config1, 1) != 0u)) {
|
|
return 1.0;
|
|
}
|
|
|
|
uint scale_id = extract_bits(globals.GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3);
|
|
float scale = float(1u << scale_id);
|
|
if (scale_id >= 6u) scale /= 256.0;
|
|
|
|
float delta = 1.0;
|
|
uint input_id = extract_bits(globals.GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3);
|
|
switch (input_id) {
|
|
case 0u: {
|
|
delta = dot(globals.normal, normalize(half_vector));
|
|
break;
|
|
}
|
|
case 1u: {
|
|
delta = dot(normalize(in.view), normalize(half_vector));
|
|
break;
|
|
}
|
|
case 2u: {
|
|
delta = dot(globals.normal, normalize(in.view));
|
|
break;
|
|
}
|
|
case 3u: {
|
|
delta = dot(light_vector, globals.normal);
|
|
break;
|
|
}
|
|
case 4u: {
|
|
int GPUREG_LIGHTi_SPOTDIR_LOW = int(picaRegs.read(0x0146u + (light_id << 4u)));
|
|
int GPUREG_LIGHTi_SPOTDIR_HIGH = int(picaRegs.read(0x0147u + (light_id << 4u)));
|
|
|
|
// Sign extend them. Normally bitfieldExtract would do that but it's missing on some versions
|
|
// of GLSL so we do it manually
|
|
int se_x = extract_bits(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13);
|
|
int se_y = extract_bits(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13);
|
|
int se_z = extract_bits(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13);
|
|
|
|
if ((se_x & 0x1000) == 0x1000) se_x |= 0xffffe000;
|
|
if ((se_y & 0x1000) == 0x1000) se_y |= 0xffffe000;
|
|
if ((se_z & 0x1000) == 0x1000) se_z |= 0xffffe000;
|
|
|
|
// These are fixed point 1.1.11 values, so we need to convert them to float
|
|
float x = float(se_x) / 2047.0;
|
|
float y = float(se_y) / 2047.0;
|
|
float z = float(se_z) / 2047.0;
|
|
float3 spotlight_vector = float3(x, y, z);
|
|
delta = dot(light_vector, spotlight_vector); // spotlight direction is negated so we don't negate light_vector
|
|
break;
|
|
}
|
|
case 5u: {
|
|
delta = 1.0; // TODO: cos <greek symbol> (aka CP);
|
|
globals.error_unimpl = true;
|
|
break;
|
|
}
|
|
default: {
|
|
delta = 1.0;
|
|
globals.error_unimpl = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// 0 = enabled
|
|
if (extract_bits(globals.GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) {
|
|
// Two sided diffuse
|
|
if (extract_bits(globals.GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) {
|
|
delta = max(delta, 0.0);
|
|
} else {
|
|
delta = abs(delta);
|
|
}
|
|
int index = int(clamp(floor(delta * 255.0), 0.f, 255.f));
|
|
return lutLookup(texLut, slice, lut_index, index) * scale;
|
|
} else {
|
|
// Range is [-1, 1] so we need to map it to [0, 1]
|
|
int index = int(clamp(floor(delta * 128.0), -128.f, 127.f));
|
|
if (index < 0) index += 256;
|
|
return lutLookup(texLut, slice, lut_index, index) * scale;
|
|
}
|
|
}
|
|
|
|
float3 regToColor(uint reg) {
|
|
// Normalization scale to convert from [0...255] to [0.0...1.0]
|
|
const float scale = 1.0 / 255.0;
|
|
|
|
return scale * float3(float(extract_bits(reg, 20, 8)), float(extract_bits(reg, 10, 8)), float(extract_bits(reg, 00, 8)));
|
|
}
|
|
|
|
// Implements the following algorthm: https://mathb.in/26766
|
|
void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture2d_array<float> texLut, uint slice, sampler linearSampler, thread float4& primaryColor, thread float4& secondaryColor) {
|
|
// Quaternions describe a transformation from surface-local space to eye space.
|
|
// In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1),
|
|
// the tangent vector is (1,0,0), and the bitangent vector is (0,1,0).
|
|
//float3 normal = normalize(in.normal);
|
|
//float3 tangent = normalize(in.tangent);
|
|
//float3 bitangent = normalize(in.bitangent);
|
|
//float3 view = normalize(in.view);
|
|
|
|
uint GPUREG_LIGHTING_LIGHT_PERMUTATION = picaRegs.read(0x01D9u);
|
|
|
|
primaryColor = float4(0.0, 0.0, 0.0, 1.0);
|
|
secondaryColor = float4(0.0, 0.0, 0.0, 1.0);
|
|
|
|
uint GPUREG_LIGHTING_CONFIG0 = picaRegs.read(0x01C3u);
|
|
globals.GPUREG_LIGHTING_LUTINPUT_SCALE = picaRegs.read(0x01D2u);
|
|
globals.GPUREG_LIGHTING_LUTINPUT_ABS = picaRegs.read(0x01D0u);
|
|
globals.GPUREG_LIGHTING_LUTINPUT_SELECT = picaRegs.read(0x01D1u);
|
|
|
|
uint bumpMode = extract_bits(GPUREG_LIGHTING_CONFIG0, 28, 2);
|
|
|
|
// Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker
|
|
switch (bumpMode) {
|
|
default: {
|
|
globals.normal = rotateFloat3ByQuaternion(float3(0.0, 0.0, 1.0), in.quaternion);
|
|
break;
|
|
}
|
|
}
|
|
|
|
float4 diffuseSum = float4(0.0, 0.0, 0.0, 1.0);
|
|
float4 specularSum = float4(0.0, 0.0, 0.0, 1.0);
|
|
|
|
uint environmentId = extract_bits(GPUREG_LIGHTING_CONFIG0, 4, 4);
|
|
bool clampHighlights = extract_bits(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u;
|
|
|
|
uint lightId;
|
|
float3 lightVector = float3(0.0);
|
|
float3 halfVector = float3(0.0);
|
|
|
|
for (uint i = 0u; i < lightingNumLights + 1; i++) {
|
|
lightId = extract_bits(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3);
|
|
|
|
uint GPUREG_LIGHTi_SPECULAR0 = picaRegs.read(0x0140u + (lightId << 4u));
|
|
uint GPUREG_LIGHTi_SPECULAR1 = picaRegs.read(0x0141u + (lightId << 4u));
|
|
uint GPUREG_LIGHTi_DIFFUSE = picaRegs.read(0x0142u + (lightId << 4u));
|
|
uint GPUREG_LIGHTi_AMBIENT = picaRegs.read(0x0143u + (lightId << 4u));
|
|
uint GPUREG_LIGHTi_VECTOR_LOW = picaRegs.read(0x0144u + (lightId << 4u));
|
|
uint GPUREG_LIGHTi_VECTOR_HIGH = picaRegs.read(0x0145u + (lightId << 4u));
|
|
globals.GPUREG_LIGHTi_CONFIG = picaRegs.read(0x0149u + (lightId << 4u));
|
|
|
|
float lightDistance;
|
|
float3 lightPosition = float3(
|
|
decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u),
|
|
decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u)
|
|
);
|
|
|
|
// Positional Light
|
|
if (extract_bits(globals.GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) {
|
|
// error_unimpl = true;
|
|
lightVector = lightPosition + in.view;
|
|
}
|
|
|
|
// Directional light
|
|
else {
|
|
lightVector = lightPosition;
|
|
}
|
|
|
|
lightDistance = length(lightVector);
|
|
lightVector = normalize(lightVector);
|
|
halfVector = lightVector + normalize(in.view);
|
|
|
|
float NdotL = dot(globals.normal, lightVector); // N dot Li
|
|
|
|
// Two sided diffuse
|
|
if (extract_bits(globals.GPUREG_LIGHTi_CONFIG, 1, 1) == 0u)
|
|
NdotL = max(0.0, NdotL);
|
|
else
|
|
NdotL = abs(NdotL);
|
|
|
|
float geometricFactor;
|
|
bool useGeo0 = extract_bits(globals.GPUREG_LIGHTi_CONFIG, 2, 1) == 1u;
|
|
bool useGeo1 = extract_bits(globals.GPUREG_LIGHTi_CONFIG, 3, 1) == 1u;
|
|
if (useGeo0 || useGeo1) {
|
|
geometricFactor = dot(halfVector, halfVector);
|
|
geometricFactor = geometricFactor == 0.0 ? 0.0 : min(NdotL / geometricFactor, 1.0);
|
|
}
|
|
|
|
float distanceAttenuation = 1.0;
|
|
if (extract_bits(lightingConfig1, 24 + int(lightId), 1) == 0u) {
|
|
uint GPUREG_LIGHTi_ATTENUATION_BIAS = extract_bits(picaRegs.read(0x014Au + (lightId << 4u)), 0, 20);
|
|
uint GPUREG_LIGHTi_ATTENUATION_SCALE = extract_bits(picaRegs.read(0x014Bu + (lightId << 4u)), 0, 20);
|
|
|
|
float distanceAttenuationBias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u);
|
|
float distanceAttenuationScale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u);
|
|
|
|
float delta = lightDistance * distanceAttenuationScale + distanceAttenuationBias;
|
|
delta = clamp(delta, 0.0, 1.0);
|
|
int index = int(clamp(floor(delta * 255.0), 0.0, 255.0));
|
|
distanceAttenuation = lutLookup(texLut, slice, 16u + lightId, index);
|
|
}
|
|
|
|
float spotlightAttenuation = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, SP_LUT, lightId, lightVector, halfVector);
|
|
float specular0Distribution = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, D0_LUT, lightId, lightVector, halfVector);
|
|
float specular1Distribution = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, D1_LUT, lightId, lightVector, halfVector);
|
|
float3 reflectedColor;
|
|
reflectedColor.r = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, RR_LUT, lightId, lightVector, halfVector);
|
|
|
|
if (isSamplerEnabled(environmentId, RG_LUT)) {
|
|
reflectedColor.g = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, RG_LUT, lightId, lightVector, halfVector);
|
|
} else {
|
|
reflectedColor.g = reflectedColor.r;
|
|
}
|
|
|
|
if (isSamplerEnabled(environmentId, RB_LUT)) {
|
|
reflectedColor.b = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, RB_LUT, lightId, lightVector, halfVector);
|
|
} else {
|
|
reflectedColor.b = reflectedColor.r;
|
|
}
|
|
|
|
float3 specular0 = regToColor(GPUREG_LIGHTi_SPECULAR0) * specular0Distribution;
|
|
float3 specular1 = regToColor(GPUREG_LIGHTi_SPECULAR1) * specular1Distribution * reflectedColor;
|
|
|
|
specular0 *= useGeo0 ? geometricFactor : 1.0;
|
|
specular1 *= useGeo1 ? geometricFactor : 1.0;
|
|
|
|
float clampFactor = 1.0;
|
|
if (clampHighlights && NdotL == 0.0) {
|
|
clampFactor = 0.0;
|
|
}
|
|
|
|
float lightFactor = distanceAttenuation * spotlightAttenuation;
|
|
diffuseSum.rgb += lightFactor * (regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE) * NdotL);
|
|
specularSum.rgb += lightFactor * clampFactor * (specular0 + specular1);
|
|
}
|
|
uint fresnelOutput1 = extract_bits(GPUREG_LIGHTING_CONFIG0, 2, 1);
|
|
uint fresnelOutput2 = extract_bits(GPUREG_LIGHTING_CONFIG0, 3, 1);
|
|
|
|
float fresnelFactor;
|
|
|
|
if (fresnelOutput1 == 1u || fresnelOutput2 == 1u) {
|
|
fresnelFactor = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, FR_LUT, lightId, lightVector, halfVector);
|
|
}
|
|
|
|
if (fresnelOutput1 == 1u) {
|
|
diffuseSum.a = fresnelFactor;
|
|
}
|
|
|
|
if (fresnelOutput2 == 1u) {
|
|
specularSum.a = fresnelFactor;
|
|
}
|
|
|
|
uint GPUREG_LIGHTING_AMBIENT = picaRegs.read(0x01C0u);
|
|
float4 globalAmbient = float4(regToColor(GPUREG_LIGHTING_AMBIENT), 1.0);
|
|
primaryColor = clamp(globalAmbient + diffuseSum, 0.0, 1.0);
|
|
secondaryColor = clamp(specularSum, 0.0, 1.0);
|
|
}
|
|
|
|
float4 performLogicOp(LogicOp logicOp, float4 s, float4 d) {
|
|
return as_type<float4>(performLogicOpU(logicOp, as_type<uint4>(s), as_type<uint4>(d)));
|
|
}
|
|
|
|
fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], constant uint2& lutSlices [[buffer(3)]], texture2d<float> tex0 [[texture(0)]], texture2d<float> tex1 [[texture(1)]], texture2d<float> tex2 [[texture(2)]], texture2d_array<float> texLightingLut [[texture(3)]], texture1d_array<float> texFogLut [[texture(4)]], sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) {
|
|
Globals globals;
|
|
|
|
// HACK
|
|
//globals.lightingEnabled = picaRegs.read(0x008Fu) != 0u;
|
|
//globals.lightingNumLights = picaRegs.read(0x01C2u);
|
|
//globals.lightingConfig1 = picaRegs.read(0x01C4u);
|
|
//globals.alphaControl = picaRegs.read(0x104);
|
|
|
|
globals.tevSources[0] = in.color;
|
|
if (lightingEnabled) {
|
|
calcLighting(globals, in, picaRegs, texLightingLut, lutSlices.x, linearSampler, globals.tevSources[1], globals.tevSources[2]);
|
|
} else {
|
|
globals.tevSources[1] = float4(0.0);
|
|
globals.tevSources[2] = float4(0.0);
|
|
}
|
|
|
|
uint textureConfig = picaRegs.read(0x80u);
|
|
float2 texCoord2 = (textureConfig & (1u << 13)) != 0u ? in.texCoord1 : in.texCoord2;
|
|
|
|
if ((textureConfig & 1u) != 0u) globals.tevSources[3] = tex0.sample(samplr0, in.texCoord0.xy);
|
|
if ((textureConfig & 2u) != 0u) globals.tevSources[4] = tex1.sample(samplr1, in.texCoord1);
|
|
if ((textureConfig & 4u) != 0u) globals.tevSources[5] = tex2.sample(samplr2, texCoord2);
|
|
globals.tevSources[13] = float4(0.0); // Previous buffer
|
|
globals.tevSources[15] = in.color; // Previous combiner
|
|
|
|
globals.tevNextPreviousBuffer = in.textureEnvBufferColor;
|
|
uint textureEnvUpdateBuffer = picaRegs.read(0xE0u);
|
|
|
|
for (int i = 0; i < 6; i++) {
|
|
globals.tevSources[14] = in.textureEnvColor[i]; // Constant color
|
|
globals.tevSources[15] = tev.calculateCombiner(globals, i);
|
|
globals.tevSources[13] = globals.tevNextPreviousBuffer;
|
|
|
|
if (i < 4) {
|
|
if ((textureEnvUpdateBuffer & (0x100u << i)) != 0u) {
|
|
globals.tevNextPreviousBuffer.rgb = globals.tevSources[15].rgb;
|
|
}
|
|
|
|
if ((textureEnvUpdateBuffer & (0x1000u << i)) != 0u) {
|
|
globals.tevNextPreviousBuffer.a = globals.tevSources[15].a;
|
|
}
|
|
}
|
|
}
|
|
|
|
float4 color = globals.tevSources[15];
|
|
|
|
// Fog
|
|
bool enable_fog = (textureEnvUpdateBuffer & 7u) == 5u;
|
|
|
|
if (enable_fog) {
|
|
bool flipDepth = (textureEnvUpdateBuffer & (1u << 16)) != 0u;
|
|
float fogIndex = flipDepth ? 1.0 - in.position.z : in.position.z;
|
|
fogIndex *= 128.0;
|
|
float clampedIndex = clamp(floor(fogIndex), 0.0, 127.0);
|
|
float delta = fogIndex - clampedIndex;
|
|
float2 value = texFogLut.read(clampedIndex, lutSlices.y).rg;
|
|
float fogFactor = clamp(value.r + value.g * delta, 0.0, 1.0);
|
|
|
|
uint GPUREG_FOG_COLOR = picaRegs.read(0x00E1u);
|
|
|
|
// Annoyingly color is not encoded in the same way as light color
|
|
float r = (GPUREG_FOG_COLOR & 0xFFu) / 255.0;
|
|
float g = ((GPUREG_FOG_COLOR >> 8) & 0xFFu) / 255.0;
|
|
float b = ((GPUREG_FOG_COLOR >> 16) & 0xFFu) / 255.0;
|
|
float3 fogColor = float3(r, g, b);
|
|
|
|
color.rgb = mix(fogColor, color.rgb, fogFactor);
|
|
}
|
|
|
|
// Perform alpha test
|
|
if ((alphaControl & 1u) != 0u) { // Check if alpha test is on
|
|
uint func = (alphaControl >> 4u) & 7u;
|
|
float reference = float((alphaControl >> 8u) & 0xffu) / 255.0;
|
|
float alpha = color.a;
|
|
|
|
switch (func) {
|
|
case 0u: discard_fragment(); // Never pass alpha test
|
|
case 1u: break; // Always pass alpha test
|
|
case 2u: // Pass if equal
|
|
if (alpha != reference) discard_fragment();
|
|
break;
|
|
case 3u: // Pass if not equal
|
|
if (alpha == reference) discard_fragment();
|
|
break;
|
|
case 4u: // Pass if less than
|
|
if (alpha >= reference) discard_fragment();
|
|
break;
|
|
case 5u: // Pass if less than or equal
|
|
if (alpha > reference) discard_fragment();
|
|
break;
|
|
case 6u: // Pass if greater than
|
|
if (alpha <= reference) discard_fragment();
|
|
break;
|
|
case 7u: // Pass if greater than or equal
|
|
if (alpha < reference) discard_fragment();
|
|
break;
|
|
}
|
|
}
|
|
|
|
return performLogicOp(logicOp, color, float4(1.0, 0.0, 0.0, 1.0));
|
|
}
|