diff --git a/.github/gles.patch b/.github/gles.patch index f1dc2c73..270e336e 100644 --- a/.github/gles.patch +++ b/.github/gles.patch @@ -1,52 +1,3 @@ -diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp -index a11a6ffa..77486a09 100644 ---- a/src/core/renderer_gl/renderer_gl.cpp -+++ b/src/core/renderer_gl/renderer_gl.cpp -@@ -357,27 +357,27 @@ void RendererGL::bindTexturesToSlots() { - } - - glActiveTexture(GL_TEXTURE0 + 3); -- glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); -+ // glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); - glActiveTexture(GL_TEXTURE0); - } - - void RendererGL::updateLightingLUT() { -- gpu.lightingLUTDirty = false; -- std::array u16_lightinglut; -- -- for (int i = 0; i < gpu.lightingLUT.size(); i++) { -- uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1); -- u16_lightinglut[i] = value * 65535 / 4095; -- } -- -- glActiveTexture(GL_TEXTURE0 + 3); -- glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); -- glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data()); -- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR); -- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR); -- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); -- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); -- glActiveTexture(GL_TEXTURE0); -+ // gpu.lightingLUTDirty = false; -+ // std::array u16_lightinglut; -+ -+ // for (int i = 0; i < gpu.lightingLUT.size(); i++) { -+ // uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1); -+ // u16_lightinglut[i] = value * 65535 / 4095; -+ // } -+ -+ // glActiveTexture(GL_TEXTURE0 + 3); -+ // glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); -+ // glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data()); -+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR); -+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR); -+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); -+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); -+ // glActiveTexture(GL_TEXTURE0); - } - - void RendererGL::drawVertices(PICA::PrimType primType, std::span vertices) { diff --git a/src/host_shaders/opengl_display.frag b/src/host_shaders/opengl_display.frag index 612671c8..1937f711 100644 --- a/src/host_shaders/opengl_display.frag @@ -70,7 +21,7 @@ index 990e2f80..2e7842ac 100644 void main() { diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag -index f6fa6c55..bb88e278 100644 +index 9f369e39..b4bb19d3 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -1,4 +1,5 @@ @@ -78,36 +29,18 @@ index f6fa6c55..bb88e278 100644 +#version 300 es +precision mediump float; - in vec3 v_tangent; - in vec3 v_normal; -@@ -27,7 +28,7 @@ uniform bool u_depthmapEnable; - uniform sampler2D u_tex0; - uniform sampler2D u_tex1; - uniform sampler2D u_tex2; --uniform sampler1DArray u_tex_lighting_lut; -+// uniform sampler1DArray u_tex_lighting_lut; + in vec4 v_quaternion; + in vec4 v_colour; +@@ -164,11 +165,17 @@ float lutLookup(uint lut, int index) { + return texelFetch(u_tex_lighting_lut, ivec2(index, int(lut)), 0).r; + } - uniform uint u_picaRegs[0x200 - 0x48]; - -@@ -145,16 +146,23 @@ vec4 tevCalculateCombiner(int tev_id) { - #define RR_LUT 6u - - float lutLookup(uint lut, uint light, float value) { -- if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1; -- if (lut == SP_LUT) lut = light + 8; -- return texture(u_tex_lighting_lut, vec2(value, lut)).r; -+ // if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1; -+ // if (lut == SP_LUT) lut = light + 8; -+ // return texture(u_tex_lighting_lut, vec2(value, lut)).r; -+ return 0.0; -+} -+ -+// some gles versions have bitfieldExtract and complain if you redefine it, some don't and compile error, using this instead ++// some gles versions have bitfieldExtractCompat and complain if you redefine it, some don't and compile error, using this instead +uint bitfieldExtractCompat(uint val, int off, int size) { + uint mask = uint((1 << size) - 1); + return uint(val >> off) & mask; - } - ++} ++ vec3 regToColor(uint reg) { // Normalization scale to convert from [0...255] to [0.0...1.0] const float scale = 1.0 / 255.0; @@ -117,89 +50,109 @@ index f6fa6c55..bb88e278 100644 } // Convert an arbitrary-width floating point literal to an f32 -@@ -189,7 +197,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - vec3 view = normalize(v_view); +@@ -208,16 +215,16 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light + bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment + +- if (!current_sampler_enabled || (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) { ++ if (!current_sampler_enabled || (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) { + return 1.0; + } + +- uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3); ++ uint scale_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3); + float scale = float(1u << scale_id); + if (scale_id >= 6u) scale /= 256.0; + + float delta = 1.0; +- uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3); ++ uint input_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3); + switch (input_id) { + case 0u: { + delta = dot(normal, normalize(half_vector)); +@@ -239,11 +246,11 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light + int GPUREG_LIGHTi_SPOTDIR_LOW = int(readPicaReg(0x0146u + (light_id << 4u))); + int GPUREG_LIGHTi_SPOTDIR_HIGH = int(readPicaReg(0x0147u + (light_id << 4u))); + +- // Sign extend them. Normally bitfieldExtract would do that but it's missing on some versions ++ // Sign extend them. Normally bitfieldExtractCompat would do that but it's missing on some versions + // of GLSL so we do it manually +- int se_x = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13); +- int se_y = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13); +- int se_z = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13); ++ int se_x = bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13); ++ int se_y = bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13); ++ int se_z = bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13); + + if ((se_x & 0x1000) == 0x1000) se_x |= 0xffffe000; + if ((se_y & 0x1000) == 0x1000) se_y |= 0xffffe000; +@@ -270,9 +277,9 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light + } + + // 0 = enabled +- if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) { ++ if (bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) { + // Two sided diffuse +- if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) { ++ if (bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) { + delta = max(delta, 0.0); + } else { + delta = abs(delta); +@@ -296,7 +303,7 @@ vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) { + // Implements the following algorthm: https://mathb.in/26766 + void calcLighting(out vec4 primary_color, out vec4 secondary_color) { uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008Fu); - if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) { + if (bitfieldExtractCompat(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) { - primary_color = secondary_color = vec4(1.0); + primary_color = secondary_color = vec4(0.0); return; } -@@ -213,7 +221,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - bool error_unimpl = false; +@@ -313,7 +320,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u); + GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u); + +- uint bump_mode = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 28, 2); ++ uint bump_mode = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 28, 2); + + // Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker + switch (bump_mode) { +@@ -326,15 +333,15 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0); + vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0); + +- uint environment_id = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 4, 4); +- bool clamp_highlights = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u; ++ uint environment_id = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 4, 4); ++ bool clamp_highlights = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u; + + uint light_id; + vec3 light_vector; + vec3 half_vector; for (uint i = 0u; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) { -- uint light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3); -+ uint light_id = bitfieldExtractCompat(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3); +- light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3); ++ light_id = bitfieldExtractCompat(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3); - uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + 0x10u * light_id); - uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + 0x10u * light_id); -@@ -224,14 +232,14 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - uint GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + 0x10u * light_id); + uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + (light_id << 4u)); + uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + (light_id << 4u)); +@@ -346,12 +353,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - vec3 light_vector = normalize(vec3( + float light_distance; + vec3 light_position = vec3( - decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u), - decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u) + decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u), + decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u) - )); - - vec3 half_vector; + ); // Positional Light - if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) { + if (bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) { - // error_unimpl = true; - half_vector = normalize(normalize(light_vector + v_view) + view); - } -@@ -242,12 +250,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + light_vector = light_position + v_view; } - for (int c = 0; c < 7; c++) { -- if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0u) { -- uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3); -+ if (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0u) { -+ uint scale_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3); - float scale = float(1u << scale_id); - if (scale_id >= 6u) scale /= 256.0; - -- uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3); -+ uint input_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3); - if (input_id == 0u) - d[c] = dot(normal, half_vector); - else if (input_id == 1u) -@@ -260,9 +268,9 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146u + 0x10u * light_id); - uint GPUREG_LIGHTi_SPOTDIR_HIGH = readPicaReg(0x0147u + 0x10u * light_id); - vec3 spot_light_vector = normalize(vec3( -- decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1u, 11u), -- decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1u, 11u), -- decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1u, 11u) -+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1u, 11u), -+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1u, 11u), -+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1u, 11u) - )); - d[c] = dot(-light_vector, spot_light_vector); // -L dot P (aka Spotlight aka SP); - } else if (input_id == 5u) { -@@ -273,13 +281,13 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - } - - d[c] = lutLookup(uint(c), light_id, d[c] * 0.5 + 0.5) * scale; -- if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) d[c] = abs(d[c]); -+ if (bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) d[c] = abs(d[c]); - } else { - d[c] = 1.0; - } - } - -- uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 4, 4); -+ uint lookup_config = bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 4, 4); - if (lookup_config == 0u) { - d[D1_LUT] = 0.0; - d[FR_LUT] = 0.0; -@@ -310,7 +318,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - float NdotL = dot(normal, light_vector); // Li dot N +@@ -367,23 +374,23 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + float NdotL = dot(normal, light_vector); // N dot Li // Two sided diffuse - if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) @@ -207,19 +160,40 @@ index f6fa6c55..bb88e278 100644 NdotL = max(0.0, NdotL); else NdotL = abs(NdotL); -@@ -321,8 +329,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - secondary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_SPECULAR0) * d[D0_LUT] + - regToColor(GPUREG_LIGHTi_SPECULAR1) * d[D1_LUT] * vec3(d[RR_LUT], d[RG_LUT], d[RB_LUT])); + + float geometric_factor; +- bool use_geo_0 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 2, 1) == 1u; +- bool use_geo_1 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 3, 1) == 1u; ++ bool use_geo_0 = bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 2, 1) == 1u; ++ bool use_geo_1 = bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 3, 1) == 1u; + if (use_geo_0 || use_geo_1) { + geometric_factor = dot(half_vector, half_vector); + geometric_factor = geometric_factor == 0.0 ? 0.0 : min(NdotL / geometric_factor, 1.0); + } + + float distance_attenuation = 1.0; +- if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) { +- uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtract(readPicaReg(0x014Au + (light_id << 4u)), 0, 20); +- uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtract(readPicaReg(0x014Bu + (light_id << 4u)), 0, 20); ++ if (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) { ++ uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtractCompat(readPicaReg(0x014Au + (light_id << 4u)), 0, 20); ++ uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtractCompat(readPicaReg(0x014Bu + (light_id << 4u)), 0, 20); + + float distance_attenuation_bias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u); + float distance_attenuation_scale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u); +@@ -428,8 +435,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + specular_sum.rgb += light_factor * clamp_factor * (specular0 + specular1); } + - uint fresnel_output1 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 2, 1); - uint fresnel_output2 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 3, 1); + uint fresnel_output1 = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 2, 1); + uint fresnel_output2 = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 3, 1); - - if (fresnel_output1 == 1u) primary_color.a = d[FR_LUT]; - if (fresnel_output2 == 1u) secondary_color.a = d[FR_LUT]; + // Uses parameters from the last light as Fresnel is only applied to the last light + float fresnel_factor; + diff --git a/src/host_shaders/opengl_vertex_shader.vert b/src/host_shaders/opengl_vertex_shader.vert -index a25d7a6d..7cf40398 100644 +index 057f9a88..dc735ced 100644 --- a/src/host_shaders/opengl_vertex_shader.vert +++ b/src/host_shaders/opengl_vertex_shader.vert @@ -1,4 +1,6 @@ @@ -230,7 +204,7 @@ index a25d7a6d..7cf40398 100644 layout(location = 0) in vec4 a_coords; layout(location = 1) in vec4 a_quaternion; -@@ -20,7 +22,7 @@ out vec2 v_texcoord2; +@@ -18,7 +20,7 @@ out vec2 v_texcoord2; flat out vec4 v_textureEnvColor[6]; flat out vec4 v_textureEnvBufferColor; @@ -239,7 +213,7 @@ index a25d7a6d..7cf40398 100644 // TEV uniforms uniform uint u_textureEnvColor[6]; -@@ -93,6 +95,6 @@ void main() { +@@ -81,8 +83,8 @@ void main() { ); // There's also another, always-on clipping plane based on vertex z @@ -247,16 +221,20 @@ index a25d7a6d..7cf40398 100644 - gl_ClipDistance[1] = dot(clipData, a_coords); + // gl_ClipDistance[0] = -a_coords.z; + // gl_ClipDistance[1] = dot(clipData, a_coords); + + v_quaternion = a_quaternion; } diff --git a/third_party/opengl/opengl.hpp b/third_party/opengl/opengl.hpp -index f368f573..5ead7f63 100644 +index 4a08650a..21af37e3 100644 --- a/third_party/opengl/opengl.hpp +++ b/third_party/opengl/opengl.hpp -@@ -520,21 +520,21 @@ namespace OpenGL { +@@ -583,22 +583,22 @@ namespace OpenGL { + static void disableScissor() { glDisable(GL_SCISSOR_TEST); } static void enableBlend() { glEnable(GL_BLEND); } static void disableBlend() { glDisable(GL_BLEND); } - static void enableLogicOp() { glEnable(GL_COLOR_LOGIC_OP); } +- static void enableLogicOp() { glEnable(GL_COLOR_LOGIC_OP); } - static void disableLogicOp() { glDisable(GL_COLOR_LOGIC_OP); } ++ static void enableLogicOp() { /* glEnable(GL_COLOR_LOGIC_OP); */ } + static void disableLogicOp() { /* glDisable(GL_COLOR_LOGIC_OP); */ } static void enableDepth() { glEnable(GL_DEPTH_TEST); } static void disableDepth() { glDisable(GL_DEPTH_TEST); } diff --git a/docs/3ds/lighting.md b/docs/3ds/lighting.md new file mode 100644 index 00000000..9f4ff2f2 --- /dev/null +++ b/docs/3ds/lighting.md @@ -0,0 +1,79 @@ +## Info on the lighting implementation + +### Missing shadow attenuation +Shadow attenuation samples a texture unit, and that likely needs render to texture for most games so that they can construct +their shadow map. As such the colors are not multiplied by the shadow attenuation value, so there's no shadows. + +### Missing bump mapping +Bump mapping also samples a texture unit, most likely doesn't need render to texture however may need better texture sampling +implementation (such as GPUREG_TEXUNITi_BORDER_COLOR, GPUREG_TEXUNITi_BORDER_PARAM). Bump mapping would work for some things, +namely the 3ds-examples bump mapping demo, but would break others such as Toad Treasure Tracker with a naive `texture` implementation. + +Also the CP configuration is missing, because it needs a tangent map implementation. It is currently marked with error_unimpl. + +### samplerEnabledBitfields +Holds the enabled state of the lighting samples for various PICA configurations +As explained in https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTING_CONFIG0 + +```c +const bool samplerEnabled[9 * 7] = bool[9 * 7]( + // D0 D1 SP FR RB RG RR + true, false, true, false, false, false, true, // Configuration 0: D0, SP, RR + false, false, true, true, false, false, true, // Configuration 1: FR, SP, RR + true, true, false, false, false, false, true, // Configuration 2: D0, D1, RR + true, true, false, true, false, false, false, // Configuration 3: D0, D1, FR + true, true, true, false, true, true, true, // Configuration 4: All except for FR + true, false, true, true, true, true, true, // Configuration 5: All except for D1 + true, true, true, true, false, false, true, // Configuration 6: All except for RB and RG + false, false, false, false, false, false, false, // Configuration 7: Unused + true, true, true, true, true, true, true // Configuration 8: All +); +``` + +The above has been condensed to two uints for performance reasons. +You can confirm they are the same by running the following: +```c +const uint samplerEnabledBitfields[2] = { 0x7170e645u, 0x7f013fefu }; +for (int i = 0; i < 9 * 7; i++) { + unsigned arrayIndex = (i >> 5); + bool b = (samplerEnabledBitfields[arrayIndex] & (1u << (i & 31))) != 0u; + if (samplerEnabled[i] == b) { + printf("%d: happy\n", i); + } else { + printf("%d: unhappy\n", i); + } +} +``` + +### lightLutLookup +lut_id is one of these values +0 D0 +1 D1 +2 SP +3 FR +4 RB +5 RG +6 RR + +lut_index on the other hand represents the actual index of the LUT in the texture +u_tex_lighting_lut has 24 LUTs and they are used like so: +0 D0 +1 D1 +2 is missing because SP uses LUTs 8-15 +3 FR +4 RB +5 RG +6 RR +8-15 SP0-7 +16-23 DA0-7, but this is not handled in this function as the lookup is a bit different + +The light environment configuration controls which LUTs are available for use +If a LUT is not available in the selected configuration, its value will always read a constant 1.0 regardless of the enable state in GPUREG_LIGHTING_CONFIG1 +If RR is enabled but not RG or RB, the output of RR is used for the three components; Red, Green and Blue. + +### Distance attenuation +Distance attenuation is computed differently from the other factors, for example +it doesn't store its scale in GPUREG_LIGHTING_LUTINPUT_SCALE and it doesn't use +GPUREG_LIGHTING_LUTINPUT_SELECT. Instead, it uses the distance from the light to the +fragment and the distance attenuation scale and bias to calculate where in the LUT to look up. +See: https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTi_ATTENUATION_SCALE \ No newline at end of file diff --git a/include/PICA/pica_frag_config.hpp b/include/PICA/pica_frag_config.hpp index 9e13b3b5..89dd3420 100644 --- a/include/PICA/pica_frag_config.hpp +++ b/include/PICA/pica_frag_config.hpp @@ -29,20 +29,204 @@ namespace PICA { std::array tevConfigs; }; + struct Light { + union { + u16 raw; + BitField<0, 3, u16> num; + BitField<3, 1, u16> directional; + BitField<4, 1, u16> twoSidedDiffuse; + BitField<5, 1, u16> distanceAttenuationEnable; + BitField<6, 1, u16> spotAttenuationEnable; + BitField<7, 1, u16> geometricFactor0; + BitField<8, 1, u16> geometricFactor1; + BitField<9, 1, u16> shadowEnable; + }; + }; + + struct LightingLUTConfig { + union { + u32 raw; + BitField<0, 1, u32> enable; + BitField<1, 1, u32> absInput; + BitField<2, 3, u32> type; + }; + float scale; + }; + + struct LightingConfig { + union { + u32 raw{}; + BitField<0, 1, u32> enable; + BitField<1, 4, u32> lightNum; + BitField<5, 2, u32> bumpMode; + BitField<7, 2, u32> bumpSelector; + BitField<9, 1, u32> bumpRenorm; + BitField<10, 1, u32> clampHighlights; + BitField<11, 4, u32> config; + BitField<15, 1, u32> enablePrimaryAlpha; + BitField<16, 1, u32> enableSecondaryAlpha; + BitField<17, 1, u32> enableShadow; + BitField<18, 1, u32> shadowPrimary; + BitField<19, 1, u32> shadowSecondary; + BitField<20, 1, u32> shadowInvert; + BitField<21, 1, u32> shadowAlpha; + BitField<22, 2, u32> shadowSelector; + }; + + std::array luts{}; + + std::array lights{}; + + LightingConfig(const std::array& regs) { + // Ignore lighting registers if it's disabled + if ((regs[InternalRegs::LightingEnable] & 1) == 0) { + return; + } + + const u32 config0 = regs[InternalRegs::LightConfig0]; + const u32 config1 = regs[InternalRegs::LightConfig1]; + const u32 totalLightCount = Helpers::getBits<0, 3>(regs[InternalRegs::LightNumber]) + 1; + + enable = 1; + lightNum = totalLightCount; + + enableShadow = Helpers::getBit<0>(config0); + if (enableShadow) [[unlikely]] { + shadowPrimary = Helpers::getBit<16>(config0); + shadowSecondary = Helpers::getBit<17>(config0); + shadowInvert = Helpers::getBit<18>(config0); + shadowAlpha = Helpers::getBit<19>(config0); + shadowSelector = Helpers::getBits<24, 2>(config0); + } + + enablePrimaryAlpha = Helpers::getBit<2>(config0); + enableSecondaryAlpha = Helpers::getBit<3>(config0); + config = Helpers::getBits<4, 4>(config0); + + bumpSelector = Helpers::getBits<22, 2>(config0); + clampHighlights = Helpers::getBit<27>(config0); + bumpMode = Helpers::getBits<28, 2>(config0); + bumpRenorm = Helpers::getBit<30>(config0) ^ 1; // 0 = enable so flip it with xor + + for (int i = 0; i < totalLightCount; i++) { + auto& light = lights[i]; + light.num = (regs[InternalRegs::LightPermutation] >> (i * 4)) & 0x7; + + const u32 lightConfig = regs[InternalRegs::Light0Config + 0x10 * light.num]; + light.directional = Helpers::getBit<0>(lightConfig); + light.twoSidedDiffuse = Helpers::getBit<1>(lightConfig); + light.geometricFactor0 = Helpers::getBit<2>(lightConfig); + light.geometricFactor1 = Helpers::getBit<3>(lightConfig); + + light.shadowEnable = ((config1 >> light.num) & 1) ^ 1; // This also does 0 = enabled + light.spotAttenuationEnable = ((config1 >> (8 + light.num)) & 1) ^ 1; // Same here + light.distanceAttenuationEnable = ((config1 >> (24 + light.num)) & 1) ^ 1; // Of course same here + } + + LightingLUTConfig& d0 = luts[Lights::LUT_D0]; + LightingLUTConfig& d1 = luts[Lights::LUT_D1]; + LightingLUTConfig& sp = luts[spotlightLutIndex]; + LightingLUTConfig& fr = luts[Lights::LUT_FR]; + LightingLUTConfig& rb = luts[Lights::LUT_RB]; + LightingLUTConfig& rg = luts[Lights::LUT_RG]; + LightingLUTConfig& rr = luts[Lights::LUT_RR]; + + d0.enable = Helpers::getBit<16>(config1) == 0; + d1.enable = Helpers::getBit<17>(config1) == 0; + fr.enable = Helpers::getBit<19>(config1) == 0; + rb.enable = Helpers::getBit<20>(config1) == 0; + rg.enable = Helpers::getBit<21>(config1) == 0; + rr.enable = Helpers::getBit<22>(config1) == 0; + sp.enable = 1; + + const u32 lutAbs = regs[InternalRegs::LightLUTAbs]; + const u32 lutSelect = regs[InternalRegs::LightLUTSelect]; + const u32 lutScale = regs[InternalRegs::LightLUTScale]; + static constexpr float scales[] = {1.0f, 2.0f, 4.0f, 8.0f, 0.0f, 0.0f, 0.25f, 0.5f}; + + if (d0.enable) { + d0.absInput = Helpers::getBit<1>(lutAbs) == 0; + d0.type = Helpers::getBits<0, 3>(lutSelect); + d0.scale = scales[Helpers::getBits<0, 3>(lutScale)]; + } + + if (d1.enable) { + d1.absInput = Helpers::getBit<5>(lutAbs) == 0; + d1.type = Helpers::getBits<4, 3>(lutSelect); + d1.scale = scales[Helpers::getBits<4, 3>(lutScale)]; + } + + sp.absInput = Helpers::getBit<9>(lutAbs) == 0; + sp.type = Helpers::getBits<8, 3>(lutSelect); + sp.scale = scales[Helpers::getBits<8, 3>(lutScale)]; + + if (fr.enable) { + fr.absInput = Helpers::getBit<13>(lutAbs) == 0; + fr.type = Helpers::getBits<12, 3>(lutSelect); + fr.scale = scales[Helpers::getBits<12, 3>(lutScale)]; + } + + if (rb.enable) { + rb.absInput = Helpers::getBit<17>(lutAbs) == 0; + rb.type = Helpers::getBits<16, 3>(lutSelect); + rb.scale = scales[Helpers::getBits<16, 3>(lutScale)]; + } + + if (rg.enable) { + rg.absInput = Helpers::getBit<21>(lutAbs) == 0; + rg.type = Helpers::getBits<20, 3>(lutSelect); + rg.scale = scales[Helpers::getBits<20, 3>(lutScale)]; + } + + if (rr.enable) { + rr.absInput = Helpers::getBit<25>(lutAbs) == 0; + rr.type = Helpers::getBits<24, 3>(lutSelect); + rr.scale = scales[Helpers::getBits<24, 3>(lutScale)]; + } + } + }; + // Config used for identifying unique fragment pipeline configurations struct FragmentConfig { OutputConfig outConfig; TextureConfig texConfig; + LightingConfig lighting; bool operator==(const FragmentConfig& config) const { // Hash function and equality operator required by std::unordered_map return std::memcmp(this, &config, sizeof(FragmentConfig)) == 0; } + + FragmentConfig(const std::array& regs) : lighting(regs) { + auto alphaTestConfig = regs[InternalRegs::AlphaTestConfig]; + auto alphaTestFunction = Helpers::getBits<4, 3>(alphaTestConfig); + + outConfig.alphaTestFunction = + (alphaTestConfig & 1) ? static_cast(alphaTestFunction) : PICA::CompareFunction::Always; + outConfig.depthMapEnable = regs[InternalRegs::DepthmapEnable] & 1; + + texConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg]; + texConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer]; + + // Set up TEV stages. Annoyingly we can't just memcpy as the TEV registers are arranged like + // {Source, Operand, Combiner, Color, Scale} and we want to skip the color register since it's uploaded via UBO +#define setupTevStage(stage) \ + std::memcpy(&texConfig.tevConfigs[stage * 4], ®s[InternalRegs::TexEnv##stage##Source], 3 * sizeof(u32)); \ + texConfig.tevConfigs[stage * 4 + 3] = regs[InternalRegs::TexEnv##stage##Source + 4]; + + setupTevStage(0); + setupTevStage(1); + setupTevStage(2); + setupTevStage(3); + setupTevStage(4); + setupTevStage(5); +#undef setupTevStage + } }; static_assert( std::has_unique_object_representations() && std::has_unique_object_representations() && - std::has_unique_object_representations() + std::has_unique_object_representations() ); } // namespace PICA diff --git a/include/PICA/pica_frag_uniforms.hpp b/include/PICA/pica_frag_uniforms.hpp index 332acd4e..09722d61 100644 --- a/include/PICA/pica_frag_uniforms.hpp +++ b/include/PICA/pica_frag_uniforms.hpp @@ -1,10 +1,26 @@ #pragma once #include +#include #include #include "helpers.hpp" namespace PICA { + struct LightUniform { + using vec3 = std::array; + + // std140 requires vec3s be aligned to 16 bytes + alignas(16) vec3 specular0; + alignas(16) vec3 specular1; + alignas(16) vec3 diffuse; + alignas(16) vec3 ambient; + alignas(16) vec3 position; + alignas(16) vec3 spotlightDirection; + + float distanceAttenuationBias; + float distanceAttenuationScale; + }; + struct FragmentUniforms { using vec3 = std::array; using vec4 = std::array; @@ -17,5 +33,13 @@ namespace PICA { alignas(16) vec4 constantColors[tevStageCount]; alignas(16) vec4 tevBufferColor; alignas(16) vec4 clipCoords; + + // Note: We upload this as a u32 and decode on GPU + u32 globalAmbientLight; + // NOTE: THIS MUST BE LAST so that if lighting is disabled we can potentially omit uploading it + LightUniform lightUniforms[8]; }; + + // Assert that lightUniforms is the last member of the structure + static_assert(offsetof(FragmentUniforms, lightUniforms) + 8 * sizeof(LightUniform) == sizeof(FragmentUniforms)); } // namespace PICA \ No newline at end of file diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index 74f8c7d5..c4d6a5fb 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -67,7 +67,29 @@ namespace PICA { ColourBufferLoc = 0x11D, FramebufferSize = 0x11E, - //LightingRegs + // Lighting registers + LightingEnable = 0x8F, + Light0Specular0 = 0x140, + Light0Specular1 = 0x141, + Light0Diffuse = 0x142, + Light0Ambient = 0x143, + Light0XY = 0x144, + Light0Z = 0x145, + Light0SpotlightXY = 0x146, + Light0SpotlightZ = 0x147, + Light0Config = 0x149, + Light0AttenuationBias = 0x14A, + Light0AttenuationScale = 0x14B, + + LightGlobalAmbient = 0x1C0, + LightNumber = 0x1C2, + LightConfig0 = 0x1C3, + LightConfig1 = 0x1C4, + LightPermutation = 0x1D9, + LightLUTAbs = 0x1D0, + LightLUTSelect = 0x1D1, + LightLUTScale = 0x1D2, + LightingLUTIndex = 0x01C5, LightingLUTData0 = 0x01C8, LightingLUTData1 = 0x01C9, @@ -231,7 +253,8 @@ namespace PICA { enum : u32 { LUT_D0 = 0, LUT_D1, - LUT_FR, + // LUT 2 is not used, the emulator internally uses it for referring to the current source's spotlight in shaders + LUT_FR = 0x3, LUT_RB, LUT_RG, LUT_RR, @@ -255,6 +278,11 @@ namespace PICA { }; } + // There's actually 8 different LUTs (SP0-SP7), one for each light with different indices (8-15) + // We use an unused LUT value for "this light source's spotlight" instead and figure out which light source to use in compileLutLookup + // This is particularly intuitive in several places, such as checking if a LUT is enabled + static constexpr int spotlightLutIndex = 2; + enum class TextureFmt : u32 { RGBA8 = 0x0, RGB8 = 0x1, diff --git a/include/PICA/shader_gen.hpp b/include/PICA/shader_gen.hpp index e8e8ca20..6cf810a0 100644 --- a/include/PICA/shader_gen.hpp +++ b/include/PICA/shader_gen.hpp @@ -2,6 +2,7 @@ #include #include "PICA/gpu.hpp" +#include "PICA/pica_frag_config.hpp" #include "PICA/regs.hpp" #include "helpers.hpp" @@ -13,25 +14,25 @@ namespace PICA::ShaderGen { enum class Language { GLSL }; class FragmentGenerator { - using PICARegs = std::array; API api; Language language; - void compileTEV(std::string& shader, int stage, const PICARegs& regs); - void getSource(std::string& shader, PICA::TexEnvConfig::Source source, int index); - void getColorOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::ColorOperand color, int index); - void getAlphaOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::AlphaOperand alpha, int index); + void compileTEV(std::string& shader, int stage, const PICA::FragmentConfig& config); + void getSource(std::string& shader, PICA::TexEnvConfig::Source source, int index, const PICA::FragmentConfig& config); + void getColorOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::ColorOperand color, int index, const PICA::FragmentConfig& config); + void getAlphaOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::AlphaOperand alpha, int index, const PICA::FragmentConfig& config); void getColorOperation(std::string& shader, PICA::TexEnvConfig::Operation op); void getAlphaOperation(std::string& shader, PICA::TexEnvConfig::Operation op); - void applyAlphaTest(std::string& shader, const PICARegs& regs); - - u32 textureConfig = 0; + void applyAlphaTest(std::string& shader, const PICA::FragmentConfig& config); + void compileLights(std::string& shader, const PICA::FragmentConfig& config); + void compileLUTLookup(std::string& shader, const PICA::FragmentConfig& config, u32 lightIndex, u32 lutID); + bool isSamplerEnabled(u32 environmentID, u32 lutID); public: FragmentGenerator(API api, Language language) : api(api), language(language) {} - std::string generate(const PICARegs& regs); - std::string getVertexShader(const PICARegs& regs); + std::string generate(const PICA::FragmentConfig& config); + std::string getDefaultVertexShader(); void setTarget(API api, Language language) { this->api = api; diff --git a/include/config.hpp b/include/config.hpp index 8aa695aa..25f352e8 100644 --- a/include/config.hpp +++ b/include/config.hpp @@ -13,12 +13,23 @@ struct EmulatorConfig { static constexpr bool shaderJitDefault = false; #endif + // For now, use specialized shaders by default on MacOS as M1 drivers are buggy when using the ubershader, and on Android since mobile GPUs are + // horrible On other platforms we default to ubershader + shadergen fallback for lights +#if defined(__ANDROID__) || defined(__APPLE__) + static constexpr bool ubershaderDefault = false; +#else static constexpr bool ubershaderDefault = true; +#endif bool shaderJitEnabled = shaderJitDefault; bool discordRpcEnabled = false; bool useUbershaders = ubershaderDefault; bool accurateShaderMul = false; + + // Toggles whether to force shadergen when there's more than N lights active and we're using the ubershader, for better performance + bool forceShadergenForLights = true; + int lightShadergenThreshold = 1; + RendererType rendererType = RendererType::OpenGL; Audio::DSPCore::Type dspType = Audio::DSPCore::Type::Null; diff --git a/include/renderer.hpp b/include/renderer.hpp index e64d49e3..569a730b 100644 --- a/include/renderer.hpp +++ b/include/renderer.hpp @@ -20,6 +20,7 @@ enum class RendererType : s8 { Software = 3, }; +struct EmulatorConfig; class GPU; struct SDL_Window; @@ -46,6 +47,8 @@ class Renderer { u32 outputWindowWidth = 400; u32 outputWindowHeight = 240 * 2; + EmulatorConfig* emulatorConfig = nullptr; + public: Renderer(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs); virtual ~Renderer(); @@ -101,4 +104,6 @@ class Renderer { outputWindowWidth = width; outputWindowHeight = height; } + + void setConfig(EmulatorConfig* config) { emulatorConfig = config; } }; diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 6414a7cf..bfa9922b 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -30,7 +30,7 @@ class RendererGL final : public Renderer { OpenGL::VertexArray vao; OpenGL::VertexBuffer vbo; - bool usingUbershader = true; + bool enableUbershader = true; // Data struct { @@ -63,9 +63,12 @@ class RendererGL final : public Renderer { OpenGL::VertexBuffer dummyVBO; OpenGL::Texture screenTexture; - GLuint lightLUTTextureArray; + OpenGL::Texture lightLUTTexture; OpenGL::Framebuffer screenFramebuffer; OpenGL::Texture blankTexture; + // The "default" vertex shader to use when using specialized shaders but not PICA vertex shader -> GLSL recompilation + // We can compile this once and then link it with all other generated fragment shaders + OpenGL::Shader defaultShadergenVs; // Cached recompiled fragment shader struct CachedProgram { @@ -107,7 +110,7 @@ class RendererGL final : public Renderer { virtual std::string getUbershader() override; virtual void setUbershader(const std::string& shader) override; - virtual void setUbershaderSetting(bool value) override { usingUbershader = value; } + virtual void setUbershaderSetting(bool value) override { enableUbershader = value; } std::optional getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true); diff --git a/src/config.cpp b/src/config.cpp index cc34d148..dae5a0ab 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -64,6 +64,9 @@ void EmulatorConfig::load() { vsyncEnabled = toml::find_or(gpu, "EnableVSync", true); useUbershaders = toml::find_or(gpu, "UseUbershaders", ubershaderDefault); accurateShaderMul = toml::find_or(gpu, "AccurateShaderMultiplication", false); + + forceShadergenForLights = toml::find_or(gpu, "ForceShadergenForLighting", true); + lightShadergenThreshold = toml::find_or(gpu, "ShadergenLightThreshold", 1); } } @@ -130,6 +133,8 @@ void EmulatorConfig::save() { data["GPU"]["EnableVSync"] = vsyncEnabled; data["GPU"]["AccurateShaderMultiplication"] = accurateShaderMul; data["GPU"]["UseUbershaders"] = useUbershaders; + data["GPU"]["ForceShadergenForLighting"] = forceShadergenForLights; + data["GPU"]["ShadergenLightThreshold"] = lightShadergenThreshold; data["Audio"]["DSPEmulation"] = std::string(Audio::DSPCore::typeToString(dspType)); data["Audio"]["EnableAudio"] = audioEnabled; diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index a54fe6eb..ace49fea 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -58,6 +58,10 @@ GPU::GPU(Memory& mem, EmulatorConfig& config) : mem(mem), config(config) { break; } } + + if (renderer != nullptr) { + renderer->setConfig(&config); + } } void GPU::reset() { diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp index 0877e5f2..47df58b8 100644 --- a/src/core/PICA/shader_gen_glsl.cpp +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -1,8 +1,21 @@ +#include "PICA/pica_frag_config.hpp" +#include "PICA/regs.hpp" #include "PICA/shader_gen.hpp" using namespace PICA; using namespace PICA::ShaderGen; static constexpr const char* uniformDefinition = R"( + struct LightSource { + vec3 specular0; + vec3 specular1; + vec3 diffuse; + vec3 ambient; + vec3 position; + vec3 spotlightDirection; + float distanceAttenuationBias; + float distanceAttenuationScale; + }; + layout(std140) uniform FragmentUniforms { int alphaReference; float depthScale; @@ -11,10 +24,14 @@ static constexpr const char* uniformDefinition = R"( vec4 constantColors[6]; vec4 tevBufferColor; vec4 clipCoords; + + // Note: We upload this as a u32 and decode on GPU + uint globalAmbientLight; + LightSource lightSources[8]; }; )"; -std::string FragmentGenerator::getVertexShader(const PICARegs& regs) { +std::string FragmentGenerator::getDefaultVertexShader() { std::string ret = ""; switch (api) { @@ -44,9 +61,7 @@ std::string FragmentGenerator::getVertexShader(const PICARegs& regs) { layout(location = 6) in vec3 a_view; layout(location = 7) in vec2 a_texcoord2; - out vec3 v_normal; - out vec3 v_tangent; - out vec3 v_bitangent; + out vec4 v_quaternion; out vec4 v_colour; out vec3 v_texcoord0; out vec2 v_texcoord1; @@ -62,12 +77,6 @@ std::string FragmentGenerator::getVertexShader(const PICARegs& regs) { return scale * vec4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24)); } - vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) { - vec3 u = q.xyz; - float s = q.w; - return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v); - } - void main() { gl_Position = a_coords; vec4 colourAbs = abs(a_vertexColour); @@ -77,10 +86,7 @@ std::string FragmentGenerator::getVertexShader(const PICARegs& regs) { v_texcoord1 = vec2(a_texcoord1.x, 1.0 - a_texcoord1.y); v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y); v_view = a_view; - - v_normal = normalize(rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), a_quaternion)); - v_tangent = normalize(rotateVec3ByQuaternion(vec3(1.0, 0.0, 0.0), a_quaternion)); - v_bitangent = normalize(rotateVec3ByQuaternion(vec3(0.0, 1.0, 0.0), a_quaternion)); + v_quaternion = a_quaternion; #ifndef USING_GLES gl_ClipDistance[0] = -a_coords.z; @@ -92,7 +98,7 @@ std::string FragmentGenerator::getVertexShader(const PICARegs& regs) { return ret; } -std::string FragmentGenerator::generate(const PICARegs& regs) { +std::string FragmentGenerator::generate(const FragmentConfig& config) { std::string ret = ""; switch (api) { @@ -113,9 +119,7 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { // Input and output attributes ret += R"( - in vec3 v_tangent; - in vec3 v_normal; - in vec3 v_bitangent; + in vec4 v_quaternion; in vec4 v_colour; in vec3 v_texcoord0; in vec2 v_texcoord1; @@ -126,23 +130,43 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { uniform sampler2D u_tex0; uniform sampler2D u_tex1; uniform sampler2D u_tex2; - // GLES doesn't support sampler1DArray, as such we'll have to change how we handle lighting later -#ifndef USING_GLES - uniform sampler1DArray u_tex_lighting_lut; -#endif + uniform sampler2D u_tex_lighting_lut; )"; ret += uniformDefinition; + if (config.lighting.enable) { + ret += R"( + vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) { + vec3 u = q.xyz; + float s = q.w; + return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v); + } + + float lutLookup(uint lut, int index) { + return texelFetch(u_tex_lighting_lut, ivec2(index, int(lut)), 0).r; + } + + vec3 regToColor(uint reg) { + return (1.0 / 255.0) * vec3(float((reg >> 20u) & 0xFFu), float((reg >> 10u) & 0xFFu), float(reg & 0xFFu)); + } + )"; + } + // Emit main function for fragment shader // When not initialized, source 13 is set to vec4(0.0) and 15 is set to the vertex colour ret += R"( void main() { vec4 combinerOutput = v_colour; vec4 previousBuffer = vec4(0.0); - vec4 tevNextPreviousBuffer = tevBufferColor; + vec4 tevNextPreviousBuffer = tevBufferColor; + + vec4 primaryColor = vec4(0.0); + vec4 secondaryColor = vec4(0.0); )"; + compileLights(ret, config); + ret += R"( vec3 colorOp1 = vec3(0.0); vec3 colorOp2 = vec3(0.0); @@ -160,44 +184,39 @@ std::string FragmentGenerator::generate(const PICARegs& regs) { float depth = z_over_w * depthScale + depthOffset; )"; - if ((regs[InternalRegs::DepthmapEnable] & 1) == 0) { + if (!config.outConfig.depthMapEnable) { ret += "depth /= gl_FragCoord.w;\n"; } ret += "gl_FragDepth = depth;\n"; - textureConfig = regs[InternalRegs::TexUnitCfg]; for (int i = 0; i < 6; i++) { - compileTEV(ret, i, regs); + compileTEV(ret, i, config); } - applyAlphaTest(ret, regs); + applyAlphaTest(ret, config); ret += "fragColor = combinerOutput;\n}"; // End of main function return ret; } -void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICARegs& regs) { - // Base address for each TEV stage's configuration - static constexpr std::array ioBases = { - InternalRegs::TexEnv0Source, InternalRegs::TexEnv1Source, InternalRegs::TexEnv2Source, - InternalRegs::TexEnv3Source, InternalRegs::TexEnv4Source, InternalRegs::TexEnv5Source, - }; +void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICA::FragmentConfig& config) { + const u32* tevValues = config.texConfig.tevConfigs.data() + stage * 4; - const u32 ioBase = ioBases[stage]; - TexEnvConfig tev(regs[ioBase], regs[ioBase + 1], regs[ioBase + 2], regs[ioBase + 3], regs[ioBase + 4]); + // Pass a 0 to constColor here, as it doesn't matter for compilation + TexEnvConfig tev(tevValues[0], tevValues[1], tevValues[2], 0, tevValues[3]); if (!tev.isPassthroughStage()) { // Get color operands shader += "colorOp1 = "; - getColorOperand(shader, tev.colorSource1, tev.colorOperand1, stage); + getColorOperand(shader, tev.colorSource1, tev.colorOperand1, stage, config); shader += ";\ncolorOp2 = "; - getColorOperand(shader, tev.colorSource2, tev.colorOperand2, stage); + getColorOperand(shader, tev.colorSource2, tev.colorOperand2, stage, config); shader += ";\ncolorOp3 = "; - getColorOperand(shader, tev.colorSource3, tev.colorOperand3, stage); + getColorOperand(shader, tev.colorSource3, tev.colorOperand3, stage, config); shader += ";\nvec3 outputColor" + std::to_string(stage) + " = clamp("; getColorOperation(shader, tev.colorOp); @@ -209,13 +228,13 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg } else { // Get alpha operands shader += "alphaOp1 = "; - getAlphaOperand(shader, tev.alphaSource1, tev.alphaOperand1, stage); + getAlphaOperand(shader, tev.alphaSource1, tev.alphaOperand1, stage, config); shader += ";\nalphaOp2 = "; - getAlphaOperand(shader, tev.alphaSource2, tev.alphaOperand2, stage); + getAlphaOperand(shader, tev.alphaSource2, tev.alphaOperand2, stage, config); shader += ";\nalphaOp3 = "; - getAlphaOperand(shader, tev.alphaSource3, tev.alphaOperand3, stage); + getAlphaOperand(shader, tev.alphaSource3, tev.alphaOperand3, stage, config); shader += ";\nfloat outputAlpha" + std::to_string(stage) + " = clamp("; getAlphaOperation(shader, tev.alphaOp); @@ -231,7 +250,7 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg shader += "previousBuffer = tevNextPreviousBuffer;\n\n"; // Update the "next previous buffer" if necessary - const u32 textureEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer]; + const u32 textureEnvUpdateBuffer = config.texConfig.texEnvUpdateBuffer; if (stage < 4) { // Check whether to update rgb if ((textureEnvUpdateBuffer & (0x100 << stage))) { @@ -245,7 +264,7 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg } } -void FragmentGenerator::getColorOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::ColorOperand color, int index) { +void FragmentGenerator::getColorOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::ColorOperand color, int index, const PICA::FragmentConfig& config) { using OperandType = TexEnvConfig::ColorOperand; // For inverting operands, add the 1.0 - x subtraction @@ -257,31 +276,31 @@ void FragmentGenerator::getColorOperand(std::string& shader, TexEnvConfig::Sourc switch (color) { case OperandType::SourceColor: case OperandType::OneMinusSourceColor: - getSource(shader, source, index); + getSource(shader, source, index, config); shader += ".rgb"; break; case OperandType::SourceRed: case OperandType::OneMinusSourceRed: - getSource(shader, source, index); + getSource(shader, source, index, config); shader += ".rrr"; break; case OperandType::SourceGreen: case OperandType::OneMinusSourceGreen: - getSource(shader, source, index); + getSource(shader, source, index, config); shader += ".ggg"; break; case OperandType::SourceBlue: case OperandType::OneMinusSourceBlue: - getSource(shader, source, index); + getSource(shader, source, index, config); shader += ".bbb"; break; case OperandType::SourceAlpha: case OperandType::OneMinusSourceAlpha: - getSource(shader, source, index); + getSource(shader, source, index, config); shader += ".aaa"; break; @@ -292,7 +311,7 @@ void FragmentGenerator::getColorOperand(std::string& shader, TexEnvConfig::Sourc } } -void FragmentGenerator::getAlphaOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::AlphaOperand color, int index) { +void FragmentGenerator::getAlphaOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::AlphaOperand color, int index, const PICA::FragmentConfig& config) { using OperandType = TexEnvConfig::AlphaOperand; // For inverting operands, add the 1.0 - x subtraction @@ -304,25 +323,25 @@ void FragmentGenerator::getAlphaOperand(std::string& shader, TexEnvConfig::Sourc switch (color) { case OperandType::SourceRed: case OperandType::OneMinusSourceRed: - getSource(shader, source, index); + getSource(shader, source, index, config); shader += ".r"; break; case OperandType::SourceGreen: case OperandType::OneMinusSourceGreen: - getSource(shader, source, index); + getSource(shader, source, index, config); shader += ".g"; break; case OperandType::SourceBlue: case OperandType::OneMinusSourceBlue: - getSource(shader, source, index); + getSource(shader, source, index, config); shader += ".b"; break; case OperandType::SourceAlpha: case OperandType::OneMinusSourceAlpha: - getSource(shader, source, index); + getSource(shader, source, index, config); shader += ".a"; break; @@ -333,14 +352,14 @@ void FragmentGenerator::getAlphaOperand(std::string& shader, TexEnvConfig::Sourc } } -void FragmentGenerator::getSource(std::string& shader, TexEnvConfig::Source source, int index) { +void FragmentGenerator::getSource(std::string& shader, TexEnvConfig::Source source, int index, const PICA::FragmentConfig& config) { switch (source) { case TexEnvConfig::Source::PrimaryColor: shader += "v_colour"; break; case TexEnvConfig::Source::Texture0: shader += "texture(u_tex0, v_texcoord0.xy)"; break; case TexEnvConfig::Source::Texture1: shader += "texture(u_tex1, v_texcoord1)"; break; case TexEnvConfig::Source::Texture2: { // If bit 13 in texture config is set then we use the texcoords for texture 1, otherwise for texture 2 - if (Helpers::getBit<13>(textureConfig)) { + if (Helpers::getBit<13>(config.texConfig.texUnitConfig)) { shader += "texture(u_tex2, v_texcoord1)"; } else { shader += "texture(u_tex2, v_texcoord2)"; @@ -353,8 +372,8 @@ void FragmentGenerator::getSource(std::string& shader, TexEnvConfig::Source sour case TexEnvConfig::Source::PreviousBuffer: shader += "previousBuffer"; break; // Lighting - case TexEnvConfig::Source::PrimaryFragmentColor: - case TexEnvConfig::Source::SecondaryFragmentColor: shader += "vec4(1.0, 1.0, 1.0, 1.0)"; break; + case TexEnvConfig::Source::PrimaryFragmentColor: shader += "primaryColor"; break; + case TexEnvConfig::Source::SecondaryFragmentColor: shader += "secondaryColor"; break; default: Helpers::warn("Unimplemented TEV source: %d", static_cast(source)); @@ -401,12 +420,11 @@ void FragmentGenerator::getAlphaOperation(std::string& shader, TexEnvConfig::Ope } } -void FragmentGenerator::applyAlphaTest(std::string& shader, const PICARegs& regs) { - const u32 alphaConfig = regs[InternalRegs::AlphaTestConfig]; - const auto function = static_cast(Helpers::getBits<4, 3>(alphaConfig)); +void FragmentGenerator::applyAlphaTest(std::string& shader, const PICA::FragmentConfig& config) { + const CompareFunction function = config.outConfig.alphaTestFunction; // Alpha test disabled - if (Helpers::getBit<0>(alphaConfig) == 0 || function == CompareFunction::Always) { + if (function == CompareFunction::Always) { return; } @@ -430,3 +448,203 @@ void FragmentGenerator::applyAlphaTest(std::string& shader, const PICARegs& regs shader += ") { discard; }\n"; } + +void FragmentGenerator::compileLights(std::string& shader, const PICA::FragmentConfig& config) { + if (!config.lighting.enable) { + return; + } + + // Currently ignore bump mode + shader += "vec3 normal = rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), v_quaternion);\n"; + shader += R"( + vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0); + vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0); + vec3 light_position, light_vector, half_vector, specular0, specular1, reflected_color; + + float light_distance, NdotL, light_factor, geometric_factor, distance_attenuation, distance_att_delta; + float spotlight_attenuation, specular0_dist, specular1_dist; + float lut_lookup_result, lut_lookup_delta; + int lut_lookup_index; + )"; + + uint lightID = 0; + + for (int i = 0; i < config.lighting.lightNum; i++) { + lightID = config.lighting.lights[i].num; + + const auto& lightConfig = config.lighting.lights[i]; + shader += "light_position = lightSources[" + std::to_string(lightID) + "].position;\n"; + + if (lightConfig.directional) { // Directional lighting + shader += "light_vector = light_position;\n"; + } else { // Positional lighting + shader += "light_vector = light_position + v_view;\n"; + } + + shader += R"( + light_distance = length(light_vector); + light_vector = normalize(light_vector); + half_vector = light_vector + normalize(v_view); + + distance_attenuation = 1.0; + NdotL = dot(normal, light_vector); + )"; + + shader += lightConfig.twoSidedDiffuse ? "NdotL = abs(NdotL);\n" : "NdotL = max(NdotL, 0.0);\n"; + + if (lightConfig.geometricFactor0 || lightConfig.geometricFactor1) { + shader += R"( + geometric_factor = dot(half_vector, half_vector); + geometric_factor = (geometric_factor == 0.0) ? 0.0 : min(NdotL / geometric_factor, 1.0); + )"; + } + + if (lightConfig.distanceAttenuationEnable) { + shader += "distance_att_delta = clamp(light_distance * lightSources[" + std::to_string(lightID) + + "].distanceAttenuationScale + lightSources[" + std::to_string(lightID) + "].distanceAttenuationBias, 0.0, 1.0);\n"; + + shader += "distance_attenuation = lutLookup(" + std::to_string(16 + lightID) + + ", int(clamp(floor(distance_att_delta * 256.0), 0.0, 255.0)));\n"; + } + + compileLUTLookup(shader, config, i, spotlightLutIndex); + shader += "spotlight_attenuation = lut_lookup_result;\n"; + + compileLUTLookup(shader, config, i, PICA::Lights::LUT_D0); + shader += "specular0_dist = lut_lookup_result;\n"; + + compileLUTLookup(shader, config, i, PICA::Lights::LUT_D1); + shader += "specular1_dist = lut_lookup_result;\n"; + + compileLUTLookup(shader, config, i, PICA::Lights::LUT_RR); + shader += "reflected_color.r = lut_lookup_result;\n"; + + if (isSamplerEnabled(config.lighting.config, PICA::Lights::LUT_RG)) { + compileLUTLookup(shader, config, i, PICA::Lights::LUT_RG); + shader += "reflected_color.g = lut_lookup_result;\n"; + } else { + shader += "reflected_color.g = reflected_color.r;\n"; + } + + if (isSamplerEnabled(config.lighting.config, PICA::Lights::LUT_RB)) { + compileLUTLookup(shader, config, i, PICA::Lights::LUT_RB); + shader += "reflected_color.b = lut_lookup_result;\n"; + } else { + shader += "reflected_color.b = reflected_color.r;\n"; + } + + shader += "specular0 = lightSources[" + std::to_string(lightID) + "].specular0 * specular0_dist;\n"; + if (lightConfig.geometricFactor0) { + shader += "specular0 *= geometric_factor;\n"; + } + + shader += "specular1 = lightSources[" + std::to_string(lightID) + "].specular1 * specular1_dist * reflected_color;\n"; + if (lightConfig.geometricFactor1) { + shader += "specular1 *= geometric_factor;\n"; + } + + shader += "light_factor = distance_attenuation * spotlight_attenuation;\n"; + + if (config.lighting.clampHighlights) { + shader += "specular_sum.rgb += light_factor * (NdotL == 0.0 ? 0.0 : 1.0) * (specular0 + specular1);\n"; + } else { + shader += "specular_sum.rgb += light_factor * (specular0 + specular1);\n"; + } + + shader += "diffuse_sum.rgb += light_factor * (lightSources[" + std::to_string(lightID) + "].ambient + lightSources[" + + std::to_string(lightID) + "].diffuse * NdotL);\n"; + } + + if (config.lighting.enablePrimaryAlpha || config.lighting.enableSecondaryAlpha) { + compileLUTLookup(shader, config, config.lighting.lightNum - 1, PICA::Lights::LUT_FR); + shader += "float fresnel_factor = lut_lookup_result;\n"; + } + + if (config.lighting.enablePrimaryAlpha) { + shader += "diffuse_sum.a = fresnel_factor;\n"; + } + + if (config.lighting.enableSecondaryAlpha) { + shader += "specular_sum.a = fresnel_factor;\n"; + } + + shader += R"( + vec4 global_ambient = vec4(regToColor(globalAmbientLight), 1.0); + + primaryColor = clamp(global_ambient + diffuse_sum, vec4(0.0), vec4(1.0)); + secondaryColor = clamp(specular_sum, vec4(0.0), vec4(1.0)); + )"; +} + +bool FragmentGenerator::isSamplerEnabled(u32 environmentID, u32 lutID) { + static constexpr bool samplerEnabled[9 * 7] = { + // D0 D1 SP FR RB RG RR + true, false, true, false, false, false, true, // Configuration 0: D0, SP, RR + false, false, true, true, false, false, true, // Configuration 1: FR, SP, RR + true, true, false, false, false, false, true, // Configuration 2: D0, D1, RR + true, true, false, true, false, false, false, // Configuration 3: D0, D1, FR + true, true, true, false, true, true, true, // Configuration 4: All except for FR + true, false, true, true, true, true, true, // Configuration 5: All except for D1 + true, true, true, true, false, false, true, // Configuration 6: All except for RB and RG + false, false, false, false, false, false, false, // Configuration 7: Unused + true, true, true, true, true, true, true, // Configuration 8: All + }; + + return samplerEnabled[environmentID * 7 + lutID]; +} + +void FragmentGenerator::compileLUTLookup(std::string& shader, const PICA::FragmentConfig& config, u32 lightIndex, u32 lutID) { + const LightingLUTConfig& lut = config.lighting.luts[lutID]; + uint lightID = config.lighting.lights[lightIndex].num; + uint lutIndex = 0; + bool lutEnabled = false; + + if (lutID == spotlightLutIndex) { + // These are the spotlight attenuation LUTs + lutIndex = 8u + lightID; + lutEnabled = config.lighting.lights[lightIndex].spotAttenuationEnable; + } else if (lutID <= 6) { + lutIndex = lutID; + lutEnabled = lut.enable; + } else { + Helpers::warn("Shadergen: Unimplemented LUT value"); + } + + const bool samplerEnabled = isSamplerEnabled(config.lighting.config, lutID); + + if (!samplerEnabled || !lutEnabled) { + shader += "lut_lookup_result = 1.0;\n"; + return; + } + + float scale = lut.scale; + uint inputID = lut.type; + bool absEnabled = lut.absInput; + + switch (inputID) { + case 0: shader += "lut_lookup_delta = dot(normal, normalize(half_vector));\n"; break; + case 1: shader += "lut_lookup_delta = dot(normalize(v_view), normalize(half_vector));\n"; break; + case 2: shader += "lut_lookup_delta = dot(normal, normalize(v_view));\n"; break; + case 3: shader += "lut_lookup_delta = dot(normal, light_vector);\n"; break; + case 4: shader += "lut_lookup_delta = dot(light_vector, lightSources[" + std ::to_string(lightID) + "].spotlightDirection);\n"; break; + + default: + Helpers::warn("Shadergen: Unimplemented LUT select"); + shader += "lut_lookup_delta = 1.0;\n"; + break; + } + + if (absEnabled) { + bool twoSidedDiffuse = config.lighting.lights[lightIndex].twoSidedDiffuse; + shader += twoSidedDiffuse ? "lut_lookup_delta = abs(lut_lookup_delta);\n" : "lut_lookup_delta = max(lut_lookup_delta, 0.0);\n"; + shader += "lut_lookup_result = lutLookup(" + std::to_string(lutIndex) + ", int(clamp(floor(lut_lookup_delta * 256.0), 0.0, 255.0)));\n"; + if (scale != 1.0) { + shader += "lut_lookup_result *= " + std::to_string(scale) + ";\n"; + } + } else { + // Range is [-1, 1] so we need to map it to [0, 1] + shader += "lut_lookup_index = int(clamp(floor(lut_lookup_delta * 128.0), -128.f, 127.f));\n"; + shader += "if (lut_lookup_index < 0) lut_lookup_index += 256;\n"; + shader += "lut_lookup_result = lutLookup(" + std::to_string(lutIndex) + ", lut_lookup_index) *" + std::to_string(scale) + ";\n"; + } +} \ No newline at end of file diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 249d8484..22750f7d 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -4,6 +4,7 @@ #include +#include "config.hpp" #include "PICA/float_types.hpp" #include "PICA/pica_frag_uniforms.hpp" #include "PICA/gpu.hpp" @@ -117,7 +118,10 @@ void RendererGL::initGraphicsContextInternal() { const u32 screenTextureWidth = 400; // Top screen is 400 pixels wide, bottom is 320 const u32 screenTextureHeight = 2 * 240; // Both screens are 240 pixels tall - glGenTextures(1, &lightLUTTextureArray); + lightLUTTexture.create(256, Lights::LUT_Count, GL_R32F); + lightLUTTexture.bind(); + lightLUTTexture.setMinFilter(OpenGL::Linear); + lightLUTTexture.setMagFilter(OpenGL::Linear); auto prevTexture = OpenGL::getTex2D(); @@ -159,6 +163,10 @@ void RendererGL::initGraphicsContextInternal() { OpenGL::setViewport(oldViewport[0], oldViewport[1], oldViewport[2], oldViewport[3]); reset(); + + // Initialize the default vertex shader used with shadergen + std::string defaultShadergenVSSource = fragShaderGen.getDefaultVertexShader(); + defaultShadergenVs.create({defaultShadergenVSSource.c_str(), defaultShadergenVSSource.size()}, OpenGL::Vertex); } // The OpenGL renderer doesn't need to do anything with the GL context (For Qt frontend) or the SDL window (For SDL frontend) @@ -348,26 +356,22 @@ void RendererGL::bindTexturesToSlots() { } glActiveTexture(GL_TEXTURE0 + 3); - glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); + lightLUTTexture.bind(); glActiveTexture(GL_TEXTURE0); } void RendererGL::updateLightingLUT() { gpu.lightingLUTDirty = false; - std::array u16_lightinglut; + std::array lightingLut; for (int i = 0; i < gpu.lightingLUT.size(); i++) { - uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1); - u16_lightinglut[i] = value * 65535 / 4095; + uint64_t value = gpu.lightingLUT[i] & 0xFFF; + lightingLut[i] = (float)(value << 4) / 65535.0f; } glActiveTexture(GL_TEXTURE0 + 3); - glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); - glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data()); - glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + lightLUTTexture.bind(); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, Lights::LUT_Count, GL_RED, GL_FLOAT, lightingLut.data()); glActiveTexture(GL_TEXTURE0); } @@ -380,6 +384,18 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v OpenGL::Triangle, }; + bool usingUbershader = enableUbershader; + if (usingUbershader) { + const bool lightsEnabled = (regs[InternalRegs::LightingEnable] & 1) != 0; + const uint lightCount = (regs[InternalRegs::LightNumber] & 0x7) + 1; + + // Emulating lights in the ubershader is incredibly slow, so we've got an option to render draws using moret han N lights via shadergen + // This way we generate fewer shaders overall than with full shadergen, but don't tank performance + if (emulatorConfig->forceShadergenForLights && lightsEnabled && lightCount >= emulatorConfig->lightShadergenThreshold) { + usingUbershader = false; + } + } + if (usingUbershader) { gl.useProgram(triangleProgram); } else { @@ -780,43 +796,16 @@ std::optional RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt OpenGL::Program& RendererGL::getSpecializedShader() { constexpr uint uboBlockBinding = 2; - PICA::FragmentConfig fsConfig; - auto& outConfig = fsConfig.outConfig; - auto& texConfig = fsConfig.texConfig; - - auto alphaTestConfig = regs[InternalRegs::AlphaTestConfig]; - auto alphaTestFunction = Helpers::getBits<4, 3>(alphaTestConfig); - - outConfig.alphaTestFunction = (alphaTestConfig & 1) ? static_cast(alphaTestFunction) : PICA::CompareFunction::Always; - outConfig.depthMapEnable = regs[InternalRegs::DepthmapEnable] & 1; - - texConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg]; - texConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer]; - - // Set up TEV stages. Annoyingly we can't just memcpy as the TEV registers are arranged like - // {Source, Operand, Combiner, Color, Scale} and we want to skip the color register since it's uploaded via UBO -#define setupTevStage(stage) \ - std::memcpy(&texConfig.tevConfigs[stage * 4], ®s[InternalRegs::TexEnv##stage##Source], 3 * sizeof(u32)); \ - texConfig.tevConfigs[stage * 4 + 3] = regs[InternalRegs::TexEnv##stage##Source + 5]; - - setupTevStage(0); - setupTevStage(1); - setupTevStage(2); - setupTevStage(3); - setupTevStage(4); - setupTevStage(5); -#undef setupTevStage + PICA::FragmentConfig fsConfig(regs); CachedProgram& programEntry = shaderCache[fsConfig]; OpenGL::Program& program = programEntry.program; if (!program.exists()) { - std::string vs = fragShaderGen.getVertexShader(regs); - std::string fs = fragShaderGen.generate(regs); + std::string fs = fragShaderGen.generate(fsConfig); - OpenGL::Shader vertShader({vs.c_str(), vs.size()}, OpenGL::Vertex); OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment); - program.create({vertShader, fragShader}); + program.create({defaultShadergenVs, fragShader}); gl.useProgram(program); // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 @@ -875,6 +864,48 @@ OpenGL::Program& RendererGL::getSpecializedShader() { vec[3] = float((color >> 24) & 0xFF) / 255.0f; } + // Append lighting uniforms + if (fsConfig.lighting.enable) { + uniforms.globalAmbientLight = regs[InternalRegs::LightGlobalAmbient]; + for (int i = 0; i < 8; i++) { + auto& light = uniforms.lightUniforms[i]; + const u32 specular0 = regs[InternalRegs::Light0Specular0 + i * 0x10]; + const u32 specular1 = regs[InternalRegs::Light0Specular1 + i * 0x10]; + const u32 diffuse = regs[InternalRegs::Light0Diffuse + i * 0x10]; + const u32 ambient = regs[InternalRegs::Light0Ambient + i * 0x10]; + const u32 lightXY = regs[InternalRegs::Light0XY + i * 0x10]; + const u32 lightZ = regs[InternalRegs::Light0Z + i * 0x10]; + + const u32 spotlightXY = regs[InternalRegs::Light0SpotlightXY + i * 0x10]; + const u32 spotlightZ = regs[InternalRegs::Light0SpotlightZ + i * 0x10]; + const u32 attenuationBias = regs[InternalRegs::Light0AttenuationBias + i * 0x10]; + const u32 attenuationScale = regs[InternalRegs::Light0AttenuationScale + i * 0x10]; + +#define lightColorToVec3(value) \ + { \ + float(Helpers::getBits<20, 8>(value)) / 255.0f, \ + float(Helpers::getBits<10, 8>(value)) / 255.0f, \ + float(Helpers::getBits<0, 8>(value)) / 255.0f, \ + } + light.specular0 = lightColorToVec3(specular0); + light.specular1 = lightColorToVec3(specular1); + light.diffuse = lightColorToVec3(diffuse); + light.ambient = lightColorToVec3(ambient); + light.position[0] = Floats::f16::fromRaw(u16(lightXY)).toFloat32(); + light.position[1] = Floats::f16::fromRaw(u16(lightXY >> 16)).toFloat32(); + light.position[2] = Floats::f16::fromRaw(u16(lightZ)).toFloat32(); + + // Fixed point 1.11.1 to float, without negation + light.spotlightDirection[0] = float(s32(spotlightXY & 0x1FFF) << 19 >> 19) / 2047.0; + light.spotlightDirection[1] = float(s32((spotlightXY >> 16) & 0x1FFF) << 19 >> 19) / 2047.0; + light.spotlightDirection[2] = float(s32(spotlightZ & 0x1FFF) << 19 >> 19) / 2047.0; + + light.distanceAttenuationBias = Floats::f20::fromRaw(attenuationBias & 0xFFFFF).toFloat32(); + light.distanceAttenuationScale = Floats::f20::fromRaw(attenuationScale & 0xFFFFF).toFloat32(); +#undef lightColorToVec3 + } + } + gl.bindUBO(programEntry.uboBinding); glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(PICA::FragmentUniforms), &uniforms); diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag index f6fa6c55..9f369e39 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -1,8 +1,6 @@ #version 410 core -in vec3 v_tangent; -in vec3 v_normal; -in vec3 v_bitangent; +in vec4 v_quaternion; in vec4 v_colour; in vec3 v_texcoord0; in vec2 v_texcoord1; @@ -27,7 +25,7 @@ uniform bool u_depthmapEnable; uniform sampler2D u_tex0; uniform sampler2D u_tex1; uniform sampler2D u_tex2; -uniform sampler1DArray u_tex_lighting_lut; +uniform sampler2D u_tex_lighting_lut; uniform uint u_picaRegs[0x200 - 0x48]; @@ -37,6 +35,16 @@ uint readPicaReg(uint reg_addr) { return u_picaRegs[reg_addr - 0x48u]; } vec4 tevSources[16]; vec4 tevNextPreviousBuffer; bool tevUnimplementedSourceFlag = false; +vec3 normal; + +// See docs/lighting.md +const uint samplerEnabledBitfields[2] = uint[2](0x7170e645u, 0x7f013fefu); + +bool isSamplerEnabled(uint environment_id, uint lut_id) { + uint index = 7 * environment_id + lut_id; + uint arrayIndex = (index >> 5); + return (samplerEnabledBitfields[arrayIndex] & (1u << (index & 31u))) != 0u; +} // OpenGL ES 1.1 reference pages for TEVs (this is what the PICA200 implements): // https://registry.khronos.org/OpenGL-Refpages/es1.1/xhtml/glTexEnv.xml @@ -110,7 +118,7 @@ vec4 tevCalculateCombiner(int tev_id) { case 6u: result.rgb = vec3(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGB case 7u: result = vec4(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGBA case 8u: result.rgb = min(source0.rgb * source1.rgb + source2.rgb, 1.0); break; // Multiply then add - case 9u: result.rgb = min((source0.rgb + source1.rgb) * source2.rgb, 1.0); break; // Add then multiply + case 9u: result.rgb = min(source0.rgb + source1.rgb, 1.0) * source2.rgb; break; // Add then multiply default: break; } @@ -125,7 +133,7 @@ vec4 tevCalculateCombiner(int tev_id) { case 4u: result.a = mix(source1.a, source0.a, source2.a); break; // Interpolate case 5u: result.a = max(0.0, source0.a - source1.a); break; // Subtract case 8u: result.a = min(1.0, source0.a * source1.a + source2.a); break; // Multiply then add - case 9u: result.a = min(1.0, (source0.a + source1.a) * source2.a); break; // Add then multiply + case 9u: result.a = min(source0.a + source1.a, 1.0) * source2.a; break; // Add then multiply default: break; } } @@ -144,10 +152,16 @@ vec4 tevCalculateCombiner(int tev_id) { #define RG_LUT 5u #define RR_LUT 6u -float lutLookup(uint lut, uint light, float value) { - if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1; - if (lut == SP_LUT) lut = light + 8; - return texture(u_tex_lighting_lut, vec2(value, lut)).r; +uint GPUREG_LIGHTi_CONFIG; +uint GPUREG_LIGHTING_CONFIG1; +uint GPUREG_LIGHTING_LUTINPUT_SELECT; +uint GPUREG_LIGHTING_LUTINPUT_SCALE; +uint GPUREG_LIGHTING_LUTINPUT_ABS; +bool error_unimpl = false; +vec4 unimpl_color = vec4(1.0, 0.0, 1.0, 1.0); + +float lutLookup(uint lut, int index) { + return texelFetch(u_tex_lighting_lut, ivec2(index, int(lut)), 0).r; } vec3 regToColor(uint reg) { @@ -178,136 +192,179 @@ float decodeFP(uint hex, uint E, uint M) { return uintBitsToFloat(hex); } +float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light_vector, vec3 half_vector) { + uint lut_index; + int bit_in_config1; + if (lut_id == SP_LUT) { + // These are the spotlight attenuation LUTs + bit_in_config1 = 8 + int(light_id & 7u); + lut_index = 8u + light_id; + } else if (lut_id <= 6) { + bit_in_config1 = 16 + int(lut_id); + lut_index = lut_id; + } else { + error_unimpl = true; + } + + bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment + + if (!current_sampler_enabled || (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) { + return 1.0; + } + + uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3); + float scale = float(1u << scale_id); + if (scale_id >= 6u) scale /= 256.0; + + float delta = 1.0; + uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3); + switch (input_id) { + case 0u: { + delta = dot(normal, normalize(half_vector)); + break; + } + case 1u: { + delta = dot(normalize(v_view), normalize(half_vector)); + break; + } + case 2u: { + delta = dot(normal, normalize(v_view)); + break; + } + case 3u: { + delta = dot(light_vector, normal); + break; + } + case 4u: { + int GPUREG_LIGHTi_SPOTDIR_LOW = int(readPicaReg(0x0146u + (light_id << 4u))); + int GPUREG_LIGHTi_SPOTDIR_HIGH = int(readPicaReg(0x0147u + (light_id << 4u))); + + // Sign extend them. Normally bitfieldExtract would do that but it's missing on some versions + // of GLSL so we do it manually + int se_x = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13); + int se_y = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13); + int se_z = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13); + + if ((se_x & 0x1000) == 0x1000) se_x |= 0xffffe000; + if ((se_y & 0x1000) == 0x1000) se_y |= 0xffffe000; + if ((se_z & 0x1000) == 0x1000) se_z |= 0xffffe000; + + // These are fixed point 1.1.11 values, so we need to convert them to float + float x = float(se_x) / 2047.0; + float y = float(se_y) / 2047.0; + float z = float(se_z) / 2047.0; + vec3 spotlight_vector = vec3(x, y, z); + delta = dot(light_vector, spotlight_vector); // spotlight direction is negated so we don't negate light_vector + break; + } + case 5u: { + delta = 1.0; // TODO: cos (aka CP); + error_unimpl = true; + break; + } + default: { + delta = 1.0; + error_unimpl = true; + break; + } + } + + // 0 = enabled + if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) { + // Two sided diffuse + if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) { + delta = max(delta, 0.0); + } else { + delta = abs(delta); + } + int index = int(clamp(floor(delta * 255.0), 0.f, 255.f)); + return lutLookup(lut_index, index) * scale; + } else { + // Range is [-1, 1] so we need to map it to [0, 1] + int index = int(clamp(floor(delta * 128.0), -128.f, 127.f)); + if (index < 0) index += 256; + return lutLookup(lut_index, index) * scale; + } +} + +vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) { + vec3 u = q.xyz; + float s = q.w; + return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v); +} + // Implements the following algorthm: https://mathb.in/26766 void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - // Quaternions describe a transformation from surface-local space to eye space. - // In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1), - // the tangent vector is (1,0,0), and the bitangent vector is (0,1,0). - vec3 normal = normalize(v_normal); - vec3 tangent = normalize(v_tangent); - vec3 bitangent = normalize(v_bitangent); - vec3 view = normalize(v_view); - uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008Fu); if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) { - primary_color = secondary_color = vec4(1.0); + primary_color = secondary_color = vec4(0.0); return; } - uint GPUREG_LIGHTING_AMBIENT = readPicaReg(0x01C0u); uint GPUREG_LIGHTING_NUM_LIGHTS = (readPicaReg(0x01C2u) & 0x7u) + 1u; uint GPUREG_LIGHTING_LIGHT_PERMUTATION = readPicaReg(0x01D9u); primary_color = vec4(vec3(0.0), 1.0); secondary_color = vec4(vec3(0.0), 1.0); - primary_color.rgb += regToColor(GPUREG_LIGHTING_AMBIENT); - - uint GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u); - uint GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u); - uint GPUREG_LIGHTING_CONFIG0 = readPicaReg(0x01C3u); - uint GPUREG_LIGHTING_CONFIG1 = readPicaReg(0x01C4u); uint GPUREG_LIGHTING_LUTINPUT_SCALE = readPicaReg(0x01D2u); - float d[7]; + uint GPUREG_LIGHTING_CONFIG0 = readPicaReg(0x01C3u); + GPUREG_LIGHTING_CONFIG1 = readPicaReg(0x01C4u); + GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u); + GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u); - bool error_unimpl = false; + uint bump_mode = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 28, 2); + + // Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker + switch (bump_mode) { + default: { + normal = rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), v_quaternion); + break; + } + } + + vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0); + vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0); + + uint environment_id = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 4, 4); + bool clamp_highlights = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u; + + uint light_id; + vec3 light_vector; + vec3 half_vector; for (uint i = 0u; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) { - uint light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3); + light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3); - uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + 0x10u * light_id); - uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + 0x10u * light_id); - uint GPUREG_LIGHTi_DIFFUSE = readPicaReg(0x0142u + 0x10u * light_id); - uint GPUREG_LIGHTi_AMBIENT = readPicaReg(0x0143u + 0x10u * light_id); - uint GPUREG_LIGHTi_VECTOR_LOW = readPicaReg(0x0144u + 0x10u * light_id); - uint GPUREG_LIGHTi_VECTOR_HIGH = readPicaReg(0x0145u + 0x10u * light_id); - uint GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + 0x10u * light_id); + uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + (light_id << 4u)); + uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + (light_id << 4u)); + uint GPUREG_LIGHTi_DIFFUSE = readPicaReg(0x0142u + (light_id << 4u)); + uint GPUREG_LIGHTi_AMBIENT = readPicaReg(0x0143u + (light_id << 4u)); + uint GPUREG_LIGHTi_VECTOR_LOW = readPicaReg(0x0144u + (light_id << 4u)); + uint GPUREG_LIGHTi_VECTOR_HIGH = readPicaReg(0x0145u + (light_id << 4u)); + GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + (light_id << 4u)); - vec3 light_vector = normalize(vec3( + float light_distance; + vec3 light_position = vec3( decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u), decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u) - )); - - vec3 half_vector; + ); // Positional Light if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) { - // error_unimpl = true; - half_vector = normalize(normalize(light_vector + v_view) + view); + light_vector = light_position + v_view; } // Directional light else { - half_vector = normalize(normalize(light_vector) + view); + light_vector = light_position; } - for (int c = 0; c < 7; c++) { - if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0u) { - uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3); - float scale = float(1u << scale_id); - if (scale_id >= 6u) scale /= 256.0; + light_distance = length(light_vector); + light_vector = normalize(light_vector); + half_vector = light_vector + normalize(v_view); - uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3); - if (input_id == 0u) - d[c] = dot(normal, half_vector); - else if (input_id == 1u) - d[c] = dot(view, half_vector); - else if (input_id == 2u) - d[c] = dot(normal, view); - else if (input_id == 3u) - d[c] = dot(light_vector, normal); - else if (input_id == 4u) { - uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146u + 0x10u * light_id); - uint GPUREG_LIGHTi_SPOTDIR_HIGH = readPicaReg(0x0147u + 0x10u * light_id); - vec3 spot_light_vector = normalize(vec3( - decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1u, 11u), - decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1u, 11u), - decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1u, 11u) - )); - d[c] = dot(-light_vector, spot_light_vector); // -L dot P (aka Spotlight aka SP); - } else if (input_id == 5u) { - d[c] = 1.0; // TODO: cos (aka CP); - error_unimpl = true; - } else { - d[c] = 1.0; - } - - d[c] = lutLookup(uint(c), light_id, d[c] * 0.5 + 0.5) * scale; - if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) d[c] = abs(d[c]); - } else { - d[c] = 1.0; - } - } - - uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 4, 4); - if (lookup_config == 0u) { - d[D1_LUT] = 0.0; - d[FR_LUT] = 0.0; - d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; - } else if (lookup_config == 1u) { - d[D0_LUT] = 0.0; - d[D1_LUT] = 0.0; - d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; - } else if (lookup_config == 2u) { - d[FR_LUT] = 0.0; - d[SP_LUT] = 0.0; - d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; - } else if (lookup_config == 3u) { - d[SP_LUT] = 0.0; - d[RG_LUT] = d[RB_LUT] = d[RR_LUT] = 1.0; - } else if (lookup_config == 4u) { - d[FR_LUT] = 0.0; - } else if (lookup_config == 5u) { - d[D1_LUT] = 0.0; - } else if (lookup_config == 6u) { - d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; - } - - float distance_factor = 1.0; // a - float indirect_factor = 1.0; // fi - float shadow_factor = 1.0; // o - - float NdotL = dot(normal, light_vector); // Li dot N + float NdotL = dot(normal, light_vector); // N dot Li // Two sided diffuse if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) @@ -315,20 +372,86 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { else NdotL = abs(NdotL); - float light_factor = distance_factor * d[SP_LUT] * indirect_factor * shadow_factor; + float geometric_factor; + bool use_geo_0 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 2, 1) == 1u; + bool use_geo_1 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 3, 1) == 1u; + if (use_geo_0 || use_geo_1) { + geometric_factor = dot(half_vector, half_vector); + geometric_factor = geometric_factor == 0.0 ? 0.0 : min(NdotL / geometric_factor, 1.0); + } - primary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE) * NdotL); - secondary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_SPECULAR0) * d[D0_LUT] + - regToColor(GPUREG_LIGHTi_SPECULAR1) * d[D1_LUT] * vec3(d[RR_LUT], d[RG_LUT], d[RB_LUT])); + float distance_attenuation = 1.0; + if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) { + uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtract(readPicaReg(0x014Au + (light_id << 4u)), 0, 20); + uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtract(readPicaReg(0x014Bu + (light_id << 4u)), 0, 20); + + float distance_attenuation_bias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u); + float distance_attenuation_scale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u); + + float delta = light_distance * distance_attenuation_scale + distance_attenuation_bias; + delta = clamp(delta, 0.0, 1.0); + int index = int(clamp(floor(delta * 255.0), 0.0, 255.0)); + distance_attenuation = lutLookup(16u + light_id, index); + } + + float spotlight_attenuation = lightLutLookup(environment_id, SP_LUT, light_id, light_vector, half_vector); + float specular0_distribution = lightLutLookup(environment_id, D0_LUT, light_id, light_vector, half_vector); + float specular1_distribution = lightLutLookup(environment_id, D1_LUT, light_id, light_vector, half_vector); + vec3 reflected_color; + reflected_color.r = lightLutLookup(environment_id, RR_LUT, light_id, light_vector, half_vector); + + if (isSamplerEnabled(environment_id, RG_LUT)) { + reflected_color.g = lightLutLookup(environment_id, RG_LUT, light_id, light_vector, half_vector); + } else { + reflected_color.g = reflected_color.r; + } + + if (isSamplerEnabled(environment_id, RB_LUT)) { + reflected_color.b = lightLutLookup(environment_id, RB_LUT, light_id, light_vector, half_vector); + } else { + reflected_color.b = reflected_color.r; + } + + vec3 specular0 = regToColor(GPUREG_LIGHTi_SPECULAR0) * specular0_distribution; + vec3 specular1 = regToColor(GPUREG_LIGHTi_SPECULAR1) * specular1_distribution * reflected_color; + + specular0 *= use_geo_0 ? geometric_factor : 1.0; + specular1 *= use_geo_1 ? geometric_factor : 1.0; + + float clamp_factor = 1.0; + if (clamp_highlights && NdotL == 0.0) { + clamp_factor = 0.0; + } + + float light_factor = distance_attenuation * spotlight_attenuation; + diffuse_sum.rgb += light_factor * (regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE) * NdotL); + specular_sum.rgb += light_factor * clamp_factor * (specular0 + specular1); } + uint fresnel_output1 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 2, 1); uint fresnel_output2 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 3, 1); + // Uses parameters from the last light as Fresnel is only applied to the last light + float fresnel_factor; + + if (fresnel_output1 == 1u || fresnel_output2 == 1u) { + fresnel_factor = lightLutLookup(environment_id, FR_LUT, light_id, light_vector, half_vector); + } + + if (fresnel_output1 == 1u) { + diffuse_sum.a = fresnel_factor; + } - if (fresnel_output1 == 1u) primary_color.a = d[FR_LUT]; - if (fresnel_output2 == 1u) secondary_color.a = d[FR_LUT]; + if (fresnel_output2 == 1u) { + specular_sum.a = fresnel_factor; + } + + uint GPUREG_LIGHTING_AMBIENT = readPicaReg(0x01C0u); + vec4 global_ambient = vec4(regToColor(GPUREG_LIGHTING_AMBIENT), 1.0); + primary_color = clamp(global_ambient + diffuse_sum, vec4(0.0), vec4(1.0)); + secondary_color = clamp(specular_sum, vec4(0.0), vec4(1.0)); if (error_unimpl) { - // secondary_color = primary_color = vec4(1.0, 0., 1.0, 1.0); + // secondary_color = primary_color = unimpl_color; } } @@ -414,4 +537,4 @@ void main() { break; } } -} +} \ No newline at end of file diff --git a/src/host_shaders/opengl_vertex_shader.vert b/src/host_shaders/opengl_vertex_shader.vert index a25d7a6d..057f9a88 100644 --- a/src/host_shaders/opengl_vertex_shader.vert +++ b/src/host_shaders/opengl_vertex_shader.vert @@ -9,9 +9,7 @@ layout(location = 5) in float a_texcoord0_w; layout(location = 6) in vec3 a_view; layout(location = 7) in vec2 a_texcoord2; -out vec3 v_normal; -out vec3 v_tangent; -out vec3 v_bitangent; +out vec4 v_quaternion; out vec4 v_colour; out vec3 v_texcoord0; out vec2 v_texcoord1; @@ -35,12 +33,6 @@ vec4 abgr8888ToVec4(uint abgr) { return scale * vec4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24)); } -vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) { - vec3 u = q.xyz; - float s = q.w; - return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v); -} - // Convert an arbitrary-width floating point literal to an f32 float decodeFP(uint hex, uint E, uint M) { uint width = M + E + 1u; @@ -73,10 +65,6 @@ void main() { v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y); v_view = a_view; - v_normal = normalize(rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), a_quaternion)); - v_tangent = normalize(rotateVec3ByQuaternion(vec3(1.0, 0.0, 0.0), a_quaternion)); - v_bitangent = normalize(rotateVec3ByQuaternion(vec3(0.0, 1.0, 0.0), a_quaternion)); - for (int i = 0; i < 6; i++) { v_textureEnvColor[i] = abgr8888ToVec4(u_textureEnvColor[i]); } @@ -95,4 +83,6 @@ void main() { // There's also another, always-on clipping plane based on vertex z gl_ClipDistance[0] = -a_coords.z; gl_ClipDistance[1] = dot(clipData, a_coords); + + v_quaternion = a_quaternion; } diff --git a/src/libretro_core.cpp b/src/libretro_core.cpp index a6a1ff00..02bf3cd1 100644 --- a/src/libretro_core.cpp +++ b/src/libretro_core.cpp @@ -147,7 +147,8 @@ static void configInit() { static const retro_variable values[] = { {"panda3ds_use_shader_jit", "Enable shader JIT; enabled|disabled"}, {"panda3ds_accurate_shader_mul", "Enable accurate shader multiplication; disabled|enabled"}, - {"panda3ds_use_ubershader", "Use ubershaders (No stutter, maybe slower); enabled|disabled"}, + {"panda3ds_use_ubershader", EmulatorConfig::ubershaderDefault ? "Use ubershaders (No stutter, maybe slower); enabled|disabled" + : "Use ubershaders (No stutter, maybe slower); disabled|enabled"}, {"panda3ds_use_vsync", "Enable VSync; enabled|disabled"}, {"panda3ds_dsp_emulation", "DSP emulation; Null|HLE|LLE"}, {"panda3ds_use_audio", "Enable audio; disabled|enabled"}, @@ -155,6 +156,8 @@ static void configInit() { {"panda3ds_write_protect_virtual_sd", "Write protect virtual SD card; disabled|enabled"}, {"panda3ds_battery_level", "Battery percentage; 5|10|20|30|50|70|90|100"}, {"panda3ds_use_charger", "Charger plugged; enabled|disabled"}, + {"panda3ds_ubershader_lighting_override", "Force shadergen when rendering lights; enabled|disabled"}, + {"panda3ds_ubershader_lighting_override_threshold", "Light threshold for forcing shadergen; 1|2|3|4|5|6|7|8"}, {nullptr, nullptr}, }; @@ -175,6 +178,8 @@ static void configUpdate() { config.sdWriteProtected = FetchVariableBool("panda3ds_write_protect_virtual_sd", false); config.accurateShaderMul = FetchVariableBool("panda3ds_accurate_shader_mul", false); config.useUbershaders = FetchVariableBool("panda3ds_use_ubershader", true); + config.forceShadergenForLights = FetchVariableBool("panda3ds_ubershader_lighting_override", true); + config.lightShadergenThreshold = std::clamp(std::stoi(FetchVariable("panda3ds_ubershader_lighting_override_threshold", "1")), 1, 8); config.discordRpcEnabled = false; config.save(); diff --git a/third_party/opengl/opengl.hpp b/third_party/opengl/opengl.hpp index 828fb784..4a08650a 100644 --- a/third_party/opengl/opengl.hpp +++ b/third_party/opengl/opengl.hpp @@ -355,46 +355,57 @@ namespace OpenGL { } }; - enum ShaderType { - Fragment = GL_FRAGMENT_SHADER, - Vertex = GL_VERTEX_SHADER, - Geometry = GL_GEOMETRY_SHADER, - Compute = GL_COMPUTE_SHADER, - TessControl = GL_TESS_CONTROL_SHADER, - TessEvaluation = GL_TESS_EVALUATION_SHADER - }; + enum ShaderType { + Fragment = GL_FRAGMENT_SHADER, + Vertex = GL_VERTEX_SHADER, + Geometry = GL_GEOMETRY_SHADER, + Compute = GL_COMPUTE_SHADER, + TessControl = GL_TESS_CONTROL_SHADER, + TessEvaluation = GL_TESS_EVALUATION_SHADER + }; - struct Shader { - GLuint m_handle = 0; + struct Shader { + GLuint m_handle = 0; - Shader() {} - Shader(const std::string_view source, ShaderType type) { create(source, static_cast(type)); } + Shader() {} + Shader(const std::string_view source, ShaderType type) { create(source, static_cast(type)); } - // Returns whether compilation failed or not - bool create(const std::string_view source, GLenum type) { - m_handle = glCreateShader(type); - const GLchar* const sources[1] = { source.data() }; + // Returns whether compilation failed or not + bool create(const std::string_view source, GLenum type) { + m_handle = glCreateShader(type); + const GLchar* const sources[1] = {source.data()}; - glShaderSource(m_handle, 1, sources, nullptr); - glCompileShader(m_handle); + glShaderSource(m_handle, 1, sources, nullptr); + glCompileShader(m_handle); - GLint success; - glGetShaderiv(m_handle, GL_COMPILE_STATUS, &success); - if (success == GL_FALSE) { - char buf[4096]; - glGetShaderInfoLog(m_handle, 4096, nullptr, buf); - fprintf(stderr, "Failed to compile shader\nError: %s\n", buf); - glDeleteShader(m_handle); + GLint success; + glGetShaderiv(m_handle, GL_COMPILE_STATUS, &success); + if (success == GL_FALSE) { + char buf[4096]; + glGetShaderInfoLog(m_handle, 4096, nullptr, buf); + fprintf(stderr, "Failed to compile shader\nError: %s\n", buf); + glDeleteShader(m_handle); - m_handle = 0; - } + m_handle = 0; + } - return m_handle != 0; - } + return m_handle != 0; + } - GLuint handle() const { return m_handle; } - bool exists() const { return m_handle != 0; } - }; + GLuint handle() const { return m_handle; } + bool exists() const { return m_handle != 0; } + + void free() { + if (exists()) { + glDeleteShader(m_handle); + m_handle = 0; + } + } + +#ifdef OPENGL_DESTRUCTORS + ~Shader() { free(); } +#endif + }; struct Program { GLuint m_handle = 0; @@ -431,6 +442,10 @@ namespace OpenGL { m_handle = 0; } } + +#ifdef OPENGL_DESTRUCTORS + ~Program() { free(); } +#endif }; static void dispatchCompute(GLuint groupsX = 1, GLuint groupsY = 1, GLuint groupsZ = 1) {