diff --git a/.github/gles.patch b/.github/gles.patch index 99258011..a27b3d00 100644 --- a/.github/gles.patch +++ b/.github/gles.patch @@ -21,7 +21,7 @@ index 990e2f80..2e7842ac 100644 void main() { diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag -index 23c5c4cb..a9851a8b 100644 +index b4ad7ecc..98b1bd80 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -1,4 +1,5 @@ @@ -31,7 +31,7 @@ index 23c5c4cb..a9851a8b 100644 in vec4 v_quaternion; in vec4 v_colour; -@@ -189,11 +190,17 @@ float lutLookup(uint lut, int index) { +@@ -164,11 +165,17 @@ float lutLookup(uint lut, int index) { return texelFetch(u_tex_lighting_lut, ivec2(index, lut), 0).r; } @@ -50,8 +50,8 @@ index 23c5c4cb..a9851a8b 100644 } // Convert an arbitrary-width floating point literal to an f32 -@@ -257,16 +264,16 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light - // If RR is enabled but not RG or RB, the output of RR is used for the three components; Red, Green and Blue. +@@ -208,16 +215,16 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light + bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment - if (!current_sampler_enabled || (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) { @@ -70,26 +70,23 @@ index 23c5c4cb..a9851a8b 100644 switch (input_id) { case 0u: { delta = dot(normal, normalize(half_vector)); -@@ -285,14 +292,14 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light - break; - } - case 4u: { -- // These are ints so that bitfieldExtract sign extends for us -+ // These are ints so that bitfieldExtractCompat sign extends for us +@@ -239,11 +246,11 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light int GPUREG_LIGHTi_SPOTDIR_LOW = int(readPicaReg(0x0146u + (light_id << 4u))); int GPUREG_LIGHTi_SPOTDIR_HIGH = int(readPicaReg(0x0147u + (light_id << 4u))); - // These are fixed point 1.1.11 values, so we need to convert them to float -- float x = float(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13)) / 2047.0; -- float y = float(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13)) / 2047.0; -- float z = float(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13)) / 2047.0; -+ float x = float(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13)) / 2047.0; -+ float y = float(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13)) / 2047.0; -+ float z = float(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13)) / 2047.0; - vec3 spotlight_vector = vec3(x, y, z); - delta = dot(light_vector, spotlight_vector); // spotlight direction is negated so we don't negate light_vector - break; -@@ -310,9 +317,9 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light +- // Sign extend them. Normally bitfieldExtract would do that but it's missing on some versions ++ // Sign extend them. Normally bitfieldExtractCompat would do that but it's missing on some versions + // of GLSL so we do it manually +- int se_x = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13); +- int se_y = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13); +- int se_z = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13); ++ int se_x = bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13); ++ int se_y = bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13); ++ int se_z = bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13); + + if ((se_x & 0x1000) == 0x1000) se_x |= 0xffffe000; + if ((se_y & 0x1000) == 0x1000) se_y |= 0xffffe000; +@@ -270,9 +277,9 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light } // 0 = enabled @@ -101,16 +98,16 @@ index 23c5c4cb..a9851a8b 100644 delta = max(delta, 0.0); } else { delta = abs(delta); -@@ -339,7 +346,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - unimpl_color = vec4(1.0, 0.0, 1.0, 1.0); - +@@ -296,7 +303,7 @@ vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) { + // Implements the following algorthm: https://mathb.in/26766 + void calcLighting(out vec4 primary_color, out vec4 secondary_color) { uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008Fu); - if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) { + if (bitfieldExtractCompat(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) { primary_color = secondary_color = vec4(0.0); return; } -@@ -356,7 +363,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -313,7 +320,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u); GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u); @@ -118,8 +115,8 @@ index 23c5c4cb..a9851a8b 100644 + uint bump_mode = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 28, 2); // Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker - // Could be because the texture is not sampled correctly, may need the clamp/border color configurations -@@ -370,15 +377,15 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + switch (bump_mode) { +@@ -326,15 +333,15 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0); vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0); @@ -138,7 +135,7 @@ index 23c5c4cb..a9851a8b 100644 uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + (light_id << 4u)); uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + (light_id << 4u)); -@@ -390,12 +397,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -346,12 +353,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { float light_distance; vec3 light_position = vec3( @@ -154,7 +151,7 @@ index 23c5c4cb..a9851a8b 100644 light_vector = light_position + v_view; } -@@ -411,14 +418,14 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -367,23 +374,23 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { float NdotL = dot(normal, light_vector); // N dot Li // Two sided diffuse @@ -172,9 +169,8 @@ index 23c5c4cb..a9851a8b 100644 if (use_geo_0 || use_geo_1) { geometric_factor = dot(half_vector, half_vector); geometric_factor = geometric_factor == 0.0 ? 0.0 : min(NdotL / geometric_factor, 1.0); -@@ -430,9 +437,9 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - // fragment and the distance attenuation scale and bias to calculate where in the LUT to look up. - // See: https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTi_ATTENUATION_SCALE + } + float distance_attenuation = 1.0; - if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) { - uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtract(readPicaReg(0x014Au + (light_id << 4u)), 0, 20); @@ -185,7 +181,7 @@ index 23c5c4cb..a9851a8b 100644 float distance_attenuation_bias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u); float distance_attenuation_scale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u); -@@ -477,8 +484,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { +@@ -428,8 +435,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { specular_sum.rgb += light_factor * clamp_factor * (specular0 + specular1); } @@ -229,10 +225,10 @@ index 057f9a88..dc735ced 100644 v_quaternion = a_quaternion; } diff --git a/third_party/opengl/opengl.hpp b/third_party/opengl/opengl.hpp -index 9997e63b..5d9d7804 100644 +index 828fb784..a1861b77 100644 --- a/third_party/opengl/opengl.hpp +++ b/third_party/opengl/opengl.hpp -@@ -561,22 +561,22 @@ namespace OpenGL { +@@ -568,22 +568,22 @@ namespace OpenGL { static void disableScissor() { glDisable(GL_SCISSOR_TEST); } static void enableBlend() { glEnable(GL_BLEND); } static void disableBlend() { glDisable(GL_BLEND); } diff --git a/docs/3ds/lighting.md b/docs/3ds/lighting.md new file mode 100644 index 00000000..9f4ff2f2 --- /dev/null +++ b/docs/3ds/lighting.md @@ -0,0 +1,79 @@ +## Info on the lighting implementation + +### Missing shadow attenuation +Shadow attenuation samples a texture unit, and that likely needs render to texture for most games so that they can construct +their shadow map. As such the colors are not multiplied by the shadow attenuation value, so there's no shadows. + +### Missing bump mapping +Bump mapping also samples a texture unit, most likely doesn't need render to texture however may need better texture sampling +implementation (such as GPUREG_TEXUNITi_BORDER_COLOR, GPUREG_TEXUNITi_BORDER_PARAM). Bump mapping would work for some things, +namely the 3ds-examples bump mapping demo, but would break others such as Toad Treasure Tracker with a naive `texture` implementation. + +Also the CP configuration is missing, because it needs a tangent map implementation. It is currently marked with error_unimpl. + +### samplerEnabledBitfields +Holds the enabled state of the lighting samples for various PICA configurations +As explained in https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTING_CONFIG0 + +```c +const bool samplerEnabled[9 * 7] = bool[9 * 7]( + // D0 D1 SP FR RB RG RR + true, false, true, false, false, false, true, // Configuration 0: D0, SP, RR + false, false, true, true, false, false, true, // Configuration 1: FR, SP, RR + true, true, false, false, false, false, true, // Configuration 2: D0, D1, RR + true, true, false, true, false, false, false, // Configuration 3: D0, D1, FR + true, true, true, false, true, true, true, // Configuration 4: All except for FR + true, false, true, true, true, true, true, // Configuration 5: All except for D1 + true, true, true, true, false, false, true, // Configuration 6: All except for RB and RG + false, false, false, false, false, false, false, // Configuration 7: Unused + true, true, true, true, true, true, true // Configuration 8: All +); +``` + +The above has been condensed to two uints for performance reasons. +You can confirm they are the same by running the following: +```c +const uint samplerEnabledBitfields[2] = { 0x7170e645u, 0x7f013fefu }; +for (int i = 0; i < 9 * 7; i++) { + unsigned arrayIndex = (i >> 5); + bool b = (samplerEnabledBitfields[arrayIndex] & (1u << (i & 31))) != 0u; + if (samplerEnabled[i] == b) { + printf("%d: happy\n", i); + } else { + printf("%d: unhappy\n", i); + } +} +``` + +### lightLutLookup +lut_id is one of these values +0 D0 +1 D1 +2 SP +3 FR +4 RB +5 RG +6 RR + +lut_index on the other hand represents the actual index of the LUT in the texture +u_tex_lighting_lut has 24 LUTs and they are used like so: +0 D0 +1 D1 +2 is missing because SP uses LUTs 8-15 +3 FR +4 RB +5 RG +6 RR +8-15 SP0-7 +16-23 DA0-7, but this is not handled in this function as the lookup is a bit different + +The light environment configuration controls which LUTs are available for use +If a LUT is not available in the selected configuration, its value will always read a constant 1.0 regardless of the enable state in GPUREG_LIGHTING_CONFIG1 +If RR is enabled but not RG or RB, the output of RR is used for the three components; Red, Green and Blue. + +### Distance attenuation +Distance attenuation is computed differently from the other factors, for example +it doesn't store its scale in GPUREG_LIGHTING_LUTINPUT_SCALE and it doesn't use +GPUREG_LIGHTING_LUTINPUT_SELECT. Instead, it uses the distance from the light to the +fragment and the distance attenuation scale and bias to calculate where in the LUT to look up. +See: https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTi_ATTENUATION_SCALE \ No newline at end of file diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag index e42d8e57..6f30ebf0 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -37,38 +37,13 @@ vec4 tevNextPreviousBuffer; bool tevUnimplementedSourceFlag = false; vec3 normal; -// Holds the enabled state of the lighting samples for various PICA configurations -// As explained in https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTING_CONFIG0 -// const bool samplerEnabled[9 * 7] = bool[9 * 7]( -// // D0 D1 SP FR RB RG RR -// true, false, true, false, false, false, true, // Configuration 0: D0, SP, RR -// false, false, true, true, false, false, true, // Configuration 1: FR, SP, RR -// true, true, false, false, false, false, true, // Configuration 2: D0, D1, RR -// true, true, false, true, false, false, false, // Configuration 3: D0, D1, FR -// true, true, true, false, true, true, true, // Configuration 4: All except for FR -// true, false, true, true, true, true, true, // Configuration 5: All except for D1 -// true, true, true, true, false, false, true, // Configuration 6: All except for RB and RG -// false, false, false, false, false, false, false, // Configuration 7: Unused -// true, true, true, true, true, true, true // Configuration 8: All -// ); - -// The above have been condensed to two uints to save space -// You can confirm they are the same by running the following: -// for (int i = 0; i < 9 * 7; i++) { -// unsigned arrayIndex = (i >> 5); -// bool b = (samplerEnabledBitfields[arrayIndex] & (1u << (i & 31))) != 0u; -// if (samplerEnabled[i] == b) { -// printf("%d: happy\n", i); -// } else { -// printf("%d: unhappy\n", i); -// } -// } +// See docs/lighting.md const uint samplerEnabledBitfields[2] = uint[2](0x7170e645u, 0x7f013fefu); bool isSamplerEnabled(uint environment_id, uint lut_id) { uint index = 7 * environment_id + lut_id; uint arrayIndex = (index >> 5); - return (samplerEnabledBitfields[arrayIndex] & (1u << (index & 31))) != 0u; + return (samplerEnabledBitfields[arrayIndex] & (1u << (index & 31u))) != 0u; } // OpenGL ES 1.1 reference pages for TEVs (this is what the PICA200 implements): @@ -182,8 +157,8 @@ uint GPUREG_LIGHTING_CONFIG1; uint GPUREG_LIGHTING_LUTINPUT_SELECT; uint GPUREG_LIGHTING_LUTINPUT_SCALE; uint GPUREG_LIGHTING_LUTINPUT_ABS; -bool error_unimpl; -vec4 unimpl_color; +bool error_unimpl = false; +vec4 unimpl_color = vec4(1.0, 0.0, 1.0, 1.0); float lutLookup(uint lut, int index) { return texelFetch(u_tex_lighting_lut, ivec2(index, lut), 0).r; @@ -219,27 +194,6 @@ float decodeFP(uint hex, uint E, uint M) { float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light_vector, vec3 half_vector) { uint lut_index; - // lut_id is one of these values - // 0 D0 - // 1 D1 - // 2 SP - // 3 FR - // 4 RB - // 5 RG - // 6 RR - - // lut_index on the other hand represents the actual index of the LUT in the texture - // u_tex_lighting_lut has 24 LUTs and they are used like so: - // 0 D0 - // 1 D1 - // 2 is missing because SP uses LUTs 8-15 - // 3 FR - // 4 RB - // 5 RG - // 6 RR - // 8-15 SP0-7 - // 16-23 DA0-7, but this is not handled in this function as the lookup is a bit different - int bit_in_config1; if (lut_id == SP_LUT) { // These are the spotlight attenuation LUTs @@ -252,9 +206,6 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light error_unimpl = true; } - // The light environment configuration controls which LUTs are available for use - // If a LUT is not available in the selected configuration, its value will always read a constant 1.0 regardless of the enable state in GPUREG_LIGHTING_CONFIG1 - // If RR is enabled but not RG or RB, the output of RR is used for the three components; Red, Green and Blue. bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment if (!current_sampler_enabled || (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) { @@ -285,14 +236,23 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light break; } case 4u: { - // These are ints so that bitfieldExtract sign extends for us int GPUREG_LIGHTi_SPOTDIR_LOW = int(readPicaReg(0x0146u + (light_id << 4u))); int GPUREG_LIGHTi_SPOTDIR_HIGH = int(readPicaReg(0x0147u + (light_id << 4u))); + // Sign extend them. Normally bitfieldExtract would do that but it's missing on some versions + // of GLSL so we do it manually + int se_x = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13); + int se_y = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13); + int se_z = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13); + + if ((se_x & 0x1000) == 0x1000) se_x |= 0xffffe000; + if ((se_y & 0x1000) == 0x1000) se_y |= 0xffffe000; + if ((se_z & 0x1000) == 0x1000) se_z |= 0xffffe000; + // These are fixed point 1.1.11 values, so we need to convert them to float - float x = float(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13)) / 2047.0; - float y = float(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13)) / 2047.0; - float z = float(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13)) / 2047.0; + float x = float(se_x) / 2047.0; + float y = float(se_y) / 2047.0; + float z = float(se_z) / 2047.0; vec3 spotlight_vector = vec3(x, y, z); delta = dot(light_vector, spotlight_vector); // spotlight direction is negated so we don't negate light_vector break; @@ -335,9 +295,6 @@ vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) { // Implements the following algorthm: https://mathb.in/26766 void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - error_unimpl = false; - unimpl_color = vec4(1.0, 0.0, 1.0, 1.0); - uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008Fu); if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) { primary_color = secondary_color = vec4(0.0); @@ -359,7 +316,6 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { uint bump_mode = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 28, 2); // Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker - // Could be because the texture is not sampled correctly, may need the clamp/border color configurations switch (bump_mode) { default: { normal = rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), v_quaternion); @@ -424,11 +380,6 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { geometric_factor = geometric_factor == 0.0 ? 0.0 : min(NdotL / geometric_factor, 1.0); } - // Distance attenuation is computed differently from the other factors, for example - // it doesn't store its scale in GPUREG_LIGHTING_LUTINPUT_SCALE and it doesn't use - // GPUREG_LIGHTING_LUTINPUT_SELECT. Instead, it uses the distance from the light to the - // fragment and the distance attenuation scale and bias to calculate where in the LUT to look up. - // See: https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTi_ATTENUATION_SCALE float distance_attenuation = 1.0; if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) { uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtract(readPicaReg(0x014Au + (light_id << 4u)), 0, 20);