Merge pull request #540 from wheremyfoodat/specialized-shaders-2

Finishing shader generator & mostly fixing lighting (In both shadergen & ubershader)
This commit is contained in:
wheremyfoodat 2024-07-20 15:35:40 +00:00 committed by GitHub
commit bbcd21de05
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 1136 additions and 432 deletions

280
.github/gles.patch vendored
View file

@ -1,52 +1,3 @@
diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp
index a11a6ffa..77486a09 100644
--- a/src/core/renderer_gl/renderer_gl.cpp
+++ b/src/core/renderer_gl/renderer_gl.cpp
@@ -357,27 +357,27 @@ void RendererGL::bindTexturesToSlots() {
}
glActiveTexture(GL_TEXTURE0 + 3);
- glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
+ // glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
glActiveTexture(GL_TEXTURE0);
}
void RendererGL::updateLightingLUT() {
- gpu.lightingLUTDirty = false;
- std::array<u16, GPU::LightingLutSize> u16_lightinglut;
-
- for (int i = 0; i < gpu.lightingLUT.size(); i++) {
- uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1);
- u16_lightinglut[i] = value * 65535 / 4095;
- }
-
- glActiveTexture(GL_TEXTURE0 + 3);
- glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
- glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data());
- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
- glActiveTexture(GL_TEXTURE0);
+ // gpu.lightingLUTDirty = false;
+ // std::array<u16, GPU::LightingLutSize> u16_lightinglut;
+
+ // for (int i = 0; i < gpu.lightingLUT.size(); i++) {
+ // uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1);
+ // u16_lightinglut[i] = value * 65535 / 4095;
+ // }
+
+ // glActiveTexture(GL_TEXTURE0 + 3);
+ // glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
+ // glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data());
+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+ // glActiveTexture(GL_TEXTURE0);
}
void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> vertices) {
diff --git a/src/host_shaders/opengl_display.frag b/src/host_shaders/opengl_display.frag
index 612671c8..1937f711 100644
--- a/src/host_shaders/opengl_display.frag
@ -70,7 +21,7 @@ index 990e2f80..2e7842ac 100644
void main() {
diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag
index f6fa6c55..bb88e278 100644
index 9f369e39..b4bb19d3 100644
--- a/src/host_shaders/opengl_fragment_shader.frag
+++ b/src/host_shaders/opengl_fragment_shader.frag
@@ -1,4 +1,5 @@
@ -78,36 +29,18 @@ index f6fa6c55..bb88e278 100644
+#version 300 es
+precision mediump float;
in vec3 v_tangent;
in vec3 v_normal;
@@ -27,7 +28,7 @@ uniform bool u_depthmapEnable;
uniform sampler2D u_tex0;
uniform sampler2D u_tex1;
uniform sampler2D u_tex2;
-uniform sampler1DArray u_tex_lighting_lut;
+// uniform sampler1DArray u_tex_lighting_lut;
in vec4 v_quaternion;
in vec4 v_colour;
@@ -164,11 +165,17 @@ float lutLookup(uint lut, int index) {
return texelFetch(u_tex_lighting_lut, ivec2(index, int(lut)), 0).r;
}
uniform uint u_picaRegs[0x200 - 0x48];
@@ -145,16 +146,23 @@ vec4 tevCalculateCombiner(int tev_id) {
#define RR_LUT 6u
float lutLookup(uint lut, uint light, float value) {
- if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1;
- if (lut == SP_LUT) lut = light + 8;
- return texture(u_tex_lighting_lut, vec2(value, lut)).r;
+ // if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1;
+ // if (lut == SP_LUT) lut = light + 8;
+ // return texture(u_tex_lighting_lut, vec2(value, lut)).r;
+ return 0.0;
+}
+
+// some gles versions have bitfieldExtract and complain if you redefine it, some don't and compile error, using this instead
+// some gles versions have bitfieldExtractCompat and complain if you redefine it, some don't and compile error, using this instead
+uint bitfieldExtractCompat(uint val, int off, int size) {
+ uint mask = uint((1 << size) - 1);
+ return uint(val >> off) & mask;
}
+}
+
vec3 regToColor(uint reg) {
// Normalization scale to convert from [0...255] to [0.0...1.0]
const float scale = 1.0 / 255.0;
@ -117,89 +50,109 @@ index f6fa6c55..bb88e278 100644
}
// Convert an arbitrary-width floating point literal to an f32
@@ -189,7 +197,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
vec3 view = normalize(v_view);
@@ -208,16 +215,16 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light
bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment
- if (!current_sampler_enabled || (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) {
+ if (!current_sampler_enabled || (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) {
return 1.0;
}
- uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3);
+ uint scale_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3);
float scale = float(1u << scale_id);
if (scale_id >= 6u) scale /= 256.0;
float delta = 1.0;
- uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3);
+ uint input_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3);
switch (input_id) {
case 0u: {
delta = dot(normal, normalize(half_vector));
@@ -239,11 +246,11 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light
int GPUREG_LIGHTi_SPOTDIR_LOW = int(readPicaReg(0x0146u + (light_id << 4u)));
int GPUREG_LIGHTi_SPOTDIR_HIGH = int(readPicaReg(0x0147u + (light_id << 4u)));
- // Sign extend them. Normally bitfieldExtract would do that but it's missing on some versions
+ // Sign extend them. Normally bitfieldExtractCompat would do that but it's missing on some versions
// of GLSL so we do it manually
- int se_x = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13);
- int se_y = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13);
- int se_z = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13);
+ int se_x = bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13);
+ int se_y = bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13);
+ int se_z = bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13);
if ((se_x & 0x1000) == 0x1000) se_x |= 0xffffe000;
if ((se_y & 0x1000) == 0x1000) se_y |= 0xffffe000;
@@ -270,9 +277,9 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light
}
// 0 = enabled
- if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) {
+ if (bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) {
// Two sided diffuse
- if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) {
+ if (bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) {
delta = max(delta, 0.0);
} else {
delta = abs(delta);
@@ -296,7 +303,7 @@ vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) {
// Implements the following algorthm: https://mathb.in/26766
void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008Fu);
- if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) {
+ if (bitfieldExtractCompat(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) {
primary_color = secondary_color = vec4(1.0);
primary_color = secondary_color = vec4(0.0);
return;
}
@@ -213,7 +221,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
bool error_unimpl = false;
@@ -313,7 +320,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u);
GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u);
- uint bump_mode = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 28, 2);
+ uint bump_mode = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 28, 2);
// Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker
switch (bump_mode) {
@@ -326,15 +333,15 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);
vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);
- uint environment_id = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 4, 4);
- bool clamp_highlights = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u;
+ uint environment_id = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 4, 4);
+ bool clamp_highlights = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u;
uint light_id;
vec3 light_vector;
vec3 half_vector;
for (uint i = 0u; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) {
- uint light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3);
+ uint light_id = bitfieldExtractCompat(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3);
- light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3);
+ light_id = bitfieldExtractCompat(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3);
uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + 0x10u * light_id);
uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + 0x10u * light_id);
@@ -224,14 +232,14 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
uint GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + 0x10u * light_id);
uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + (light_id << 4u));
uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + (light_id << 4u));
@@ -346,12 +353,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
vec3 light_vector = normalize(vec3(
float light_distance;
vec3 light_position = vec3(
- decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u),
- decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u)
+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u),
+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u)
));
vec3 half_vector;
);
// Positional Light
- if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) {
+ if (bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) {
// error_unimpl = true;
half_vector = normalize(normalize(light_vector + v_view) + view);
}
@@ -242,12 +250,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
light_vector = light_position + v_view;
}
for (int c = 0; c < 7; c++) {
- if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0u) {
- uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3);
+ if (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0u) {
+ uint scale_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3);
float scale = float(1u << scale_id);
if (scale_id >= 6u) scale /= 256.0;
- uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3);
+ uint input_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3);
if (input_id == 0u)
d[c] = dot(normal, half_vector);
else if (input_id == 1u)
@@ -260,9 +268,9 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146u + 0x10u * light_id);
uint GPUREG_LIGHTi_SPOTDIR_HIGH = readPicaReg(0x0147u + 0x10u * light_id);
vec3 spot_light_vector = normalize(vec3(
- decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1u, 11u),
- decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1u, 11u),
- decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1u, 11u)
+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1u, 11u),
+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1u, 11u),
+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1u, 11u)
));
d[c] = dot(-light_vector, spot_light_vector); // -L dot P (aka Spotlight aka SP);
} else if (input_id == 5u) {
@@ -273,13 +281,13 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
}
d[c] = lutLookup(uint(c), light_id, d[c] * 0.5 + 0.5) * scale;
- if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) d[c] = abs(d[c]);
+ if (bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) d[c] = abs(d[c]);
} else {
d[c] = 1.0;
}
}
- uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 4, 4);
+ uint lookup_config = bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 4, 4);
if (lookup_config == 0u) {
d[D1_LUT] = 0.0;
d[FR_LUT] = 0.0;
@@ -310,7 +318,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
float NdotL = dot(normal, light_vector); // Li dot N
@@ -367,23 +374,23 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
float NdotL = dot(normal, light_vector); // N dot Li
// Two sided diffuse
- if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u)
@ -207,19 +160,40 @@ index f6fa6c55..bb88e278 100644
NdotL = max(0.0, NdotL);
else
NdotL = abs(NdotL);
@@ -321,8 +329,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
secondary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_SPECULAR0) * d[D0_LUT] +
regToColor(GPUREG_LIGHTi_SPECULAR1) * d[D1_LUT] * vec3(d[RR_LUT], d[RG_LUT], d[RB_LUT]));
float geometric_factor;
- bool use_geo_0 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 2, 1) == 1u;
- bool use_geo_1 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 3, 1) == 1u;
+ bool use_geo_0 = bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 2, 1) == 1u;
+ bool use_geo_1 = bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 3, 1) == 1u;
if (use_geo_0 || use_geo_1) {
geometric_factor = dot(half_vector, half_vector);
geometric_factor = geometric_factor == 0.0 ? 0.0 : min(NdotL / geometric_factor, 1.0);
}
float distance_attenuation = 1.0;
- if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) {
- uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtract(readPicaReg(0x014Au + (light_id << 4u)), 0, 20);
- uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtract(readPicaReg(0x014Bu + (light_id << 4u)), 0, 20);
+ if (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) {
+ uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtractCompat(readPicaReg(0x014Au + (light_id << 4u)), 0, 20);
+ uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtractCompat(readPicaReg(0x014Bu + (light_id << 4u)), 0, 20);
float distance_attenuation_bias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u);
float distance_attenuation_scale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u);
@@ -428,8 +435,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
specular_sum.rgb += light_factor * clamp_factor * (specular0 + specular1);
}
- uint fresnel_output1 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 2, 1);
- uint fresnel_output2 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 3, 1);
+ uint fresnel_output1 = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 2, 1);
+ uint fresnel_output2 = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 3, 1);
if (fresnel_output1 == 1u) primary_color.a = d[FR_LUT];
if (fresnel_output2 == 1u) secondary_color.a = d[FR_LUT];
// Uses parameters from the last light as Fresnel is only applied to the last light
float fresnel_factor;
diff --git a/src/host_shaders/opengl_vertex_shader.vert b/src/host_shaders/opengl_vertex_shader.vert
index a25d7a6d..7cf40398 100644
index 057f9a88..dc735ced 100644
--- a/src/host_shaders/opengl_vertex_shader.vert
+++ b/src/host_shaders/opengl_vertex_shader.vert
@@ -1,4 +1,6 @@
@ -230,7 +204,7 @@ index a25d7a6d..7cf40398 100644
layout(location = 0) in vec4 a_coords;
layout(location = 1) in vec4 a_quaternion;
@@ -20,7 +22,7 @@ out vec2 v_texcoord2;
@@ -18,7 +20,7 @@ out vec2 v_texcoord2;
flat out vec4 v_textureEnvColor[6];
flat out vec4 v_textureEnvBufferColor;
@ -239,7 +213,7 @@ index a25d7a6d..7cf40398 100644
// TEV uniforms
uniform uint u_textureEnvColor[6];
@@ -93,6 +95,6 @@ void main() {
@@ -81,8 +83,8 @@ void main() {
);
// There's also another, always-on clipping plane based on vertex z
@ -247,16 +221,20 @@ index a25d7a6d..7cf40398 100644
- gl_ClipDistance[1] = dot(clipData, a_coords);
+ // gl_ClipDistance[0] = -a_coords.z;
+ // gl_ClipDistance[1] = dot(clipData, a_coords);
v_quaternion = a_quaternion;
}
diff --git a/third_party/opengl/opengl.hpp b/third_party/opengl/opengl.hpp
index f368f573..5ead7f63 100644
index 4a08650a..21af37e3 100644
--- a/third_party/opengl/opengl.hpp
+++ b/third_party/opengl/opengl.hpp
@@ -520,21 +520,21 @@ namespace OpenGL {
@@ -583,22 +583,22 @@ namespace OpenGL {
static void disableScissor() { glDisable(GL_SCISSOR_TEST); }
static void enableBlend() { glEnable(GL_BLEND); }
static void disableBlend() { glDisable(GL_BLEND); }
static void enableLogicOp() { glEnable(GL_COLOR_LOGIC_OP); }
- static void enableLogicOp() { glEnable(GL_COLOR_LOGIC_OP); }
- static void disableLogicOp() { glDisable(GL_COLOR_LOGIC_OP); }
+ static void enableLogicOp() { /* glEnable(GL_COLOR_LOGIC_OP); */ }
+ static void disableLogicOp() { /* glDisable(GL_COLOR_LOGIC_OP); */ }
static void enableDepth() { glEnable(GL_DEPTH_TEST); }
static void disableDepth() { glDisable(GL_DEPTH_TEST); }

79
docs/3ds/lighting.md Normal file
View file

@ -0,0 +1,79 @@
## Info on the lighting implementation
### Missing shadow attenuation
Shadow attenuation samples a texture unit, and that likely needs render to texture for most games so that they can construct
their shadow map. As such the colors are not multiplied by the shadow attenuation value, so there's no shadows.
### Missing bump mapping
Bump mapping also samples a texture unit, most likely doesn't need render to texture however may need better texture sampling
implementation (such as GPUREG_TEXUNITi_BORDER_COLOR, GPUREG_TEXUNITi_BORDER_PARAM). Bump mapping would work for some things,
namely the 3ds-examples bump mapping demo, but would break others such as Toad Treasure Tracker with a naive `texture` implementation.
Also the CP configuration is missing, because it needs a tangent map implementation. It is currently marked with error_unimpl.
### samplerEnabledBitfields
Holds the enabled state of the lighting samples for various PICA configurations
As explained in https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTING_CONFIG0
```c
const bool samplerEnabled[9 * 7] = bool[9 * 7](
// D0 D1 SP FR RB RG RR
true, false, true, false, false, false, true, // Configuration 0: D0, SP, RR
false, false, true, true, false, false, true, // Configuration 1: FR, SP, RR
true, true, false, false, false, false, true, // Configuration 2: D0, D1, RR
true, true, false, true, false, false, false, // Configuration 3: D0, D1, FR
true, true, true, false, true, true, true, // Configuration 4: All except for FR
true, false, true, true, true, true, true, // Configuration 5: All except for D1
true, true, true, true, false, false, true, // Configuration 6: All except for RB and RG
false, false, false, false, false, false, false, // Configuration 7: Unused
true, true, true, true, true, true, true // Configuration 8: All
);
```
The above has been condensed to two uints for performance reasons.
You can confirm they are the same by running the following:
```c
const uint samplerEnabledBitfields[2] = { 0x7170e645u, 0x7f013fefu };
for (int i = 0; i < 9 * 7; i++) {
unsigned arrayIndex = (i >> 5);
bool b = (samplerEnabledBitfields[arrayIndex] & (1u << (i & 31))) != 0u;
if (samplerEnabled[i] == b) {
printf("%d: happy\n", i);
} else {
printf("%d: unhappy\n", i);
}
}
```
### lightLutLookup
lut_id is one of these values
0 D0
1 D1
2 SP
3 FR
4 RB
5 RG
6 RR
lut_index on the other hand represents the actual index of the LUT in the texture
u_tex_lighting_lut has 24 LUTs and they are used like so:
0 D0
1 D1
2 is missing because SP uses LUTs 8-15
3 FR
4 RB
5 RG
6 RR
8-15 SP0-7
16-23 DA0-7, but this is not handled in this function as the lookup is a bit different
The light environment configuration controls which LUTs are available for use
If a LUT is not available in the selected configuration, its value will always read a constant 1.0 regardless of the enable state in GPUREG_LIGHTING_CONFIG1
If RR is enabled but not RG or RB, the output of RR is used for the three components; Red, Green and Blue.
### Distance attenuation
Distance attenuation is computed differently from the other factors, for example
it doesn't store its scale in GPUREG_LIGHTING_LUTINPUT_SCALE and it doesn't use
GPUREG_LIGHTING_LUTINPUT_SELECT. Instead, it uses the distance from the light to the
fragment and the distance attenuation scale and bias to calculate where in the LUT to look up.
See: https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTi_ATTENUATION_SCALE

View file

@ -29,20 +29,204 @@ namespace PICA {
std::array<u32, 4 * 6> tevConfigs;
};
struct Light {
union {
u16 raw;
BitField<0, 3, u16> num;
BitField<3, 1, u16> directional;
BitField<4, 1, u16> twoSidedDiffuse;
BitField<5, 1, u16> distanceAttenuationEnable;
BitField<6, 1, u16> spotAttenuationEnable;
BitField<7, 1, u16> geometricFactor0;
BitField<8, 1, u16> geometricFactor1;
BitField<9, 1, u16> shadowEnable;
};
};
struct LightingLUTConfig {
union {
u32 raw;
BitField<0, 1, u32> enable;
BitField<1, 1, u32> absInput;
BitField<2, 3, u32> type;
};
float scale;
};
struct LightingConfig {
union {
u32 raw{};
BitField<0, 1, u32> enable;
BitField<1, 4, u32> lightNum;
BitField<5, 2, u32> bumpMode;
BitField<7, 2, u32> bumpSelector;
BitField<9, 1, u32> bumpRenorm;
BitField<10, 1, u32> clampHighlights;
BitField<11, 4, u32> config;
BitField<15, 1, u32> enablePrimaryAlpha;
BitField<16, 1, u32> enableSecondaryAlpha;
BitField<17, 1, u32> enableShadow;
BitField<18, 1, u32> shadowPrimary;
BitField<19, 1, u32> shadowSecondary;
BitField<20, 1, u32> shadowInvert;
BitField<21, 1, u32> shadowAlpha;
BitField<22, 2, u32> shadowSelector;
};
std::array<LightingLUTConfig, 7> luts{};
std::array<Light, 8> lights{};
LightingConfig(const std::array<u32, 0x300>& regs) {
// Ignore lighting registers if it's disabled
if ((regs[InternalRegs::LightingEnable] & 1) == 0) {
return;
}
const u32 config0 = regs[InternalRegs::LightConfig0];
const u32 config1 = regs[InternalRegs::LightConfig1];
const u32 totalLightCount = Helpers::getBits<0, 3>(regs[InternalRegs::LightNumber]) + 1;
enable = 1;
lightNum = totalLightCount;
enableShadow = Helpers::getBit<0>(config0);
if (enableShadow) [[unlikely]] {
shadowPrimary = Helpers::getBit<16>(config0);
shadowSecondary = Helpers::getBit<17>(config0);
shadowInvert = Helpers::getBit<18>(config0);
shadowAlpha = Helpers::getBit<19>(config0);
shadowSelector = Helpers::getBits<24, 2>(config0);
}
enablePrimaryAlpha = Helpers::getBit<2>(config0);
enableSecondaryAlpha = Helpers::getBit<3>(config0);
config = Helpers::getBits<4, 4>(config0);
bumpSelector = Helpers::getBits<22, 2>(config0);
clampHighlights = Helpers::getBit<27>(config0);
bumpMode = Helpers::getBits<28, 2>(config0);
bumpRenorm = Helpers::getBit<30>(config0) ^ 1; // 0 = enable so flip it with xor
for (int i = 0; i < totalLightCount; i++) {
auto& light = lights[i];
light.num = (regs[InternalRegs::LightPermutation] >> (i * 4)) & 0x7;
const u32 lightConfig = regs[InternalRegs::Light0Config + 0x10 * light.num];
light.directional = Helpers::getBit<0>(lightConfig);
light.twoSidedDiffuse = Helpers::getBit<1>(lightConfig);
light.geometricFactor0 = Helpers::getBit<2>(lightConfig);
light.geometricFactor1 = Helpers::getBit<3>(lightConfig);
light.shadowEnable = ((config1 >> light.num) & 1) ^ 1; // This also does 0 = enabled
light.spotAttenuationEnable = ((config1 >> (8 + light.num)) & 1) ^ 1; // Same here
light.distanceAttenuationEnable = ((config1 >> (24 + light.num)) & 1) ^ 1; // Of course same here
}
LightingLUTConfig& d0 = luts[Lights::LUT_D0];
LightingLUTConfig& d1 = luts[Lights::LUT_D1];
LightingLUTConfig& sp = luts[spotlightLutIndex];
LightingLUTConfig& fr = luts[Lights::LUT_FR];
LightingLUTConfig& rb = luts[Lights::LUT_RB];
LightingLUTConfig& rg = luts[Lights::LUT_RG];
LightingLUTConfig& rr = luts[Lights::LUT_RR];
d0.enable = Helpers::getBit<16>(config1) == 0;
d1.enable = Helpers::getBit<17>(config1) == 0;
fr.enable = Helpers::getBit<19>(config1) == 0;
rb.enable = Helpers::getBit<20>(config1) == 0;
rg.enable = Helpers::getBit<21>(config1) == 0;
rr.enable = Helpers::getBit<22>(config1) == 0;
sp.enable = 1;
const u32 lutAbs = regs[InternalRegs::LightLUTAbs];
const u32 lutSelect = regs[InternalRegs::LightLUTSelect];
const u32 lutScale = regs[InternalRegs::LightLUTScale];
static constexpr float scales[] = {1.0f, 2.0f, 4.0f, 8.0f, 0.0f, 0.0f, 0.25f, 0.5f};
if (d0.enable) {
d0.absInput = Helpers::getBit<1>(lutAbs) == 0;
d0.type = Helpers::getBits<0, 3>(lutSelect);
d0.scale = scales[Helpers::getBits<0, 3>(lutScale)];
}
if (d1.enable) {
d1.absInput = Helpers::getBit<5>(lutAbs) == 0;
d1.type = Helpers::getBits<4, 3>(lutSelect);
d1.scale = scales[Helpers::getBits<4, 3>(lutScale)];
}
sp.absInput = Helpers::getBit<9>(lutAbs) == 0;
sp.type = Helpers::getBits<8, 3>(lutSelect);
sp.scale = scales[Helpers::getBits<8, 3>(lutScale)];
if (fr.enable) {
fr.absInput = Helpers::getBit<13>(lutAbs) == 0;
fr.type = Helpers::getBits<12, 3>(lutSelect);
fr.scale = scales[Helpers::getBits<12, 3>(lutScale)];
}
if (rb.enable) {
rb.absInput = Helpers::getBit<17>(lutAbs) == 0;
rb.type = Helpers::getBits<16, 3>(lutSelect);
rb.scale = scales[Helpers::getBits<16, 3>(lutScale)];
}
if (rg.enable) {
rg.absInput = Helpers::getBit<21>(lutAbs) == 0;
rg.type = Helpers::getBits<20, 3>(lutSelect);
rg.scale = scales[Helpers::getBits<20, 3>(lutScale)];
}
if (rr.enable) {
rr.absInput = Helpers::getBit<25>(lutAbs) == 0;
rr.type = Helpers::getBits<24, 3>(lutSelect);
rr.scale = scales[Helpers::getBits<24, 3>(lutScale)];
}
}
};
// Config used for identifying unique fragment pipeline configurations
struct FragmentConfig {
OutputConfig outConfig;
TextureConfig texConfig;
LightingConfig lighting;
bool operator==(const FragmentConfig& config) const {
// Hash function and equality operator required by std::unordered_map
return std::memcmp(this, &config, sizeof(FragmentConfig)) == 0;
}
FragmentConfig(const std::array<u32, 0x300>& regs) : lighting(regs) {
auto alphaTestConfig = regs[InternalRegs::AlphaTestConfig];
auto alphaTestFunction = Helpers::getBits<4, 3>(alphaTestConfig);
outConfig.alphaTestFunction =
(alphaTestConfig & 1) ? static_cast<PICA::CompareFunction>(alphaTestFunction) : PICA::CompareFunction::Always;
outConfig.depthMapEnable = regs[InternalRegs::DepthmapEnable] & 1;
texConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg];
texConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer];
// Set up TEV stages. Annoyingly we can't just memcpy as the TEV registers are arranged like
// {Source, Operand, Combiner, Color, Scale} and we want to skip the color register since it's uploaded via UBO
#define setupTevStage(stage) \
std::memcpy(&texConfig.tevConfigs[stage * 4], &regs[InternalRegs::TexEnv##stage##Source], 3 * sizeof(u32)); \
texConfig.tevConfigs[stage * 4 + 3] = regs[InternalRegs::TexEnv##stage##Source + 4];
setupTevStage(0);
setupTevStage(1);
setupTevStage(2);
setupTevStage(3);
setupTevStage(4);
setupTevStage(5);
#undef setupTevStage
}
};
static_assert(
std::has_unique_object_representations<OutputConfig>() && std::has_unique_object_representations<TextureConfig>() &&
std::has_unique_object_representations<FragmentConfig>()
std::has_unique_object_representations<Light>()
);
} // namespace PICA

View file

@ -1,10 +1,26 @@
#pragma once
#include <array>
#include <cstddef>
#include <type_traits>
#include "helpers.hpp"
namespace PICA {
struct LightUniform {
using vec3 = std::array<float, 3>;
// std140 requires vec3s be aligned to 16 bytes
alignas(16) vec3 specular0;
alignas(16) vec3 specular1;
alignas(16) vec3 diffuse;
alignas(16) vec3 ambient;
alignas(16) vec3 position;
alignas(16) vec3 spotlightDirection;
float distanceAttenuationBias;
float distanceAttenuationScale;
};
struct FragmentUniforms {
using vec3 = std::array<float, 3>;
using vec4 = std::array<float, 4>;
@ -17,5 +33,13 @@ namespace PICA {
alignas(16) vec4 constantColors[tevStageCount];
alignas(16) vec4 tevBufferColor;
alignas(16) vec4 clipCoords;
// Note: We upload this as a u32 and decode on GPU
u32 globalAmbientLight;
// NOTE: THIS MUST BE LAST so that if lighting is disabled we can potentially omit uploading it
LightUniform lightUniforms[8];
};
// Assert that lightUniforms is the last member of the structure
static_assert(offsetof(FragmentUniforms, lightUniforms) + 8 * sizeof(LightUniform) == sizeof(FragmentUniforms));
} // namespace PICA

View file

@ -67,7 +67,29 @@ namespace PICA {
ColourBufferLoc = 0x11D,
FramebufferSize = 0x11E,
//LightingRegs
// Lighting registers
LightingEnable = 0x8F,
Light0Specular0 = 0x140,
Light0Specular1 = 0x141,
Light0Diffuse = 0x142,
Light0Ambient = 0x143,
Light0XY = 0x144,
Light0Z = 0x145,
Light0SpotlightXY = 0x146,
Light0SpotlightZ = 0x147,
Light0Config = 0x149,
Light0AttenuationBias = 0x14A,
Light0AttenuationScale = 0x14B,
LightGlobalAmbient = 0x1C0,
LightNumber = 0x1C2,
LightConfig0 = 0x1C3,
LightConfig1 = 0x1C4,
LightPermutation = 0x1D9,
LightLUTAbs = 0x1D0,
LightLUTSelect = 0x1D1,
LightLUTScale = 0x1D2,
LightingLUTIndex = 0x01C5,
LightingLUTData0 = 0x01C8,
LightingLUTData1 = 0x01C9,
@ -231,7 +253,8 @@ namespace PICA {
enum : u32 {
LUT_D0 = 0,
LUT_D1,
LUT_FR,
// LUT 2 is not used, the emulator internally uses it for referring to the current source's spotlight in shaders
LUT_FR = 0x3,
LUT_RB,
LUT_RG,
LUT_RR,
@ -255,6 +278,11 @@ namespace PICA {
};
}
// There's actually 8 different LUTs (SP0-SP7), one for each light with different indices (8-15)
// We use an unused LUT value for "this light source's spotlight" instead and figure out which light source to use in compileLutLookup
// This is particularly intuitive in several places, such as checking if a LUT is enabled
static constexpr int spotlightLutIndex = 2;
enum class TextureFmt : u32 {
RGBA8 = 0x0,
RGB8 = 0x1,

View file

@ -2,6 +2,7 @@
#include <string>
#include "PICA/gpu.hpp"
#include "PICA/pica_frag_config.hpp"
#include "PICA/regs.hpp"
#include "helpers.hpp"
@ -13,25 +14,25 @@ namespace PICA::ShaderGen {
enum class Language { GLSL };
class FragmentGenerator {
using PICARegs = std::array<u32, 0x300>;
API api;
Language language;
void compileTEV(std::string& shader, int stage, const PICARegs& regs);
void getSource(std::string& shader, PICA::TexEnvConfig::Source source, int index);
void getColorOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::ColorOperand color, int index);
void getAlphaOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::AlphaOperand alpha, int index);
void compileTEV(std::string& shader, int stage, const PICA::FragmentConfig& config);
void getSource(std::string& shader, PICA::TexEnvConfig::Source source, int index, const PICA::FragmentConfig& config);
void getColorOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::ColorOperand color, int index, const PICA::FragmentConfig& config);
void getAlphaOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::AlphaOperand alpha, int index, const PICA::FragmentConfig& config);
void getColorOperation(std::string& shader, PICA::TexEnvConfig::Operation op);
void getAlphaOperation(std::string& shader, PICA::TexEnvConfig::Operation op);
void applyAlphaTest(std::string& shader, const PICARegs& regs);
u32 textureConfig = 0;
void applyAlphaTest(std::string& shader, const PICA::FragmentConfig& config);
void compileLights(std::string& shader, const PICA::FragmentConfig& config);
void compileLUTLookup(std::string& shader, const PICA::FragmentConfig& config, u32 lightIndex, u32 lutID);
bool isSamplerEnabled(u32 environmentID, u32 lutID);
public:
FragmentGenerator(API api, Language language) : api(api), language(language) {}
std::string generate(const PICARegs& regs);
std::string getVertexShader(const PICARegs& regs);
std::string generate(const PICA::FragmentConfig& config);
std::string getDefaultVertexShader();
void setTarget(API api, Language language) {
this->api = api;

View file

@ -13,12 +13,23 @@ struct EmulatorConfig {
static constexpr bool shaderJitDefault = false;
#endif
// For now, use specialized shaders by default on MacOS as M1 drivers are buggy when using the ubershader, and on Android since mobile GPUs are
// horrible On other platforms we default to ubershader + shadergen fallback for lights
#if defined(__ANDROID__) || defined(__APPLE__)
static constexpr bool ubershaderDefault = false;
#else
static constexpr bool ubershaderDefault = true;
#endif
bool shaderJitEnabled = shaderJitDefault;
bool discordRpcEnabled = false;
bool useUbershaders = ubershaderDefault;
bool accurateShaderMul = false;
// Toggles whether to force shadergen when there's more than N lights active and we're using the ubershader, for better performance
bool forceShadergenForLights = true;
int lightShadergenThreshold = 1;
RendererType rendererType = RendererType::OpenGL;
Audio::DSPCore::Type dspType = Audio::DSPCore::Type::Null;

View file

@ -20,6 +20,7 @@ enum class RendererType : s8 {
Software = 3,
};
struct EmulatorConfig;
class GPU;
struct SDL_Window;
@ -46,6 +47,8 @@ class Renderer {
u32 outputWindowWidth = 400;
u32 outputWindowHeight = 240 * 2;
EmulatorConfig* emulatorConfig = nullptr;
public:
Renderer(GPU& gpu, const std::array<u32, regNum>& internalRegs, const std::array<u32, extRegNum>& externalRegs);
virtual ~Renderer();
@ -101,4 +104,6 @@ class Renderer {
outputWindowWidth = width;
outputWindowHeight = height;
}
void setConfig(EmulatorConfig* config) { emulatorConfig = config; }
};

View file

@ -30,7 +30,7 @@ class RendererGL final : public Renderer {
OpenGL::VertexArray vao;
OpenGL::VertexBuffer vbo;
bool usingUbershader = true;
bool enableUbershader = true;
// Data
struct {
@ -63,9 +63,12 @@ class RendererGL final : public Renderer {
OpenGL::VertexBuffer dummyVBO;
OpenGL::Texture screenTexture;
GLuint lightLUTTextureArray;
OpenGL::Texture lightLUTTexture;
OpenGL::Framebuffer screenFramebuffer;
OpenGL::Texture blankTexture;
// The "default" vertex shader to use when using specialized shaders but not PICA vertex shader -> GLSL recompilation
// We can compile this once and then link it with all other generated fragment shaders
OpenGL::Shader defaultShadergenVs;
// Cached recompiled fragment shader
struct CachedProgram {
@ -107,7 +110,7 @@ class RendererGL final : public Renderer {
virtual std::string getUbershader() override;
virtual void setUbershader(const std::string& shader) override;
virtual void setUbershaderSetting(bool value) override { usingUbershader = value; }
virtual void setUbershaderSetting(bool value) override { enableUbershader = value; }
std::optional<ColourBuffer> getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true);

View file

@ -64,6 +64,9 @@ void EmulatorConfig::load() {
vsyncEnabled = toml::find_or<toml::boolean>(gpu, "EnableVSync", true);
useUbershaders = toml::find_or<toml::boolean>(gpu, "UseUbershaders", ubershaderDefault);
accurateShaderMul = toml::find_or<toml::boolean>(gpu, "AccurateShaderMultiplication", false);
forceShadergenForLights = toml::find_or<toml::boolean>(gpu, "ForceShadergenForLighting", true);
lightShadergenThreshold = toml::find_or<toml::integer>(gpu, "ShadergenLightThreshold", 1);
}
}
@ -130,6 +133,8 @@ void EmulatorConfig::save() {
data["GPU"]["EnableVSync"] = vsyncEnabled;
data["GPU"]["AccurateShaderMultiplication"] = accurateShaderMul;
data["GPU"]["UseUbershaders"] = useUbershaders;
data["GPU"]["ForceShadergenForLighting"] = forceShadergenForLights;
data["GPU"]["ShadergenLightThreshold"] = lightShadergenThreshold;
data["Audio"]["DSPEmulation"] = std::string(Audio::DSPCore::typeToString(dspType));
data["Audio"]["EnableAudio"] = audioEnabled;

View file

@ -58,6 +58,10 @@ GPU::GPU(Memory& mem, EmulatorConfig& config) : mem(mem), config(config) {
break;
}
}
if (renderer != nullptr) {
renderer->setConfig(&config);
}
}
void GPU::reset() {

View file

@ -1,8 +1,21 @@
#include "PICA/pica_frag_config.hpp"
#include "PICA/regs.hpp"
#include "PICA/shader_gen.hpp"
using namespace PICA;
using namespace PICA::ShaderGen;
static constexpr const char* uniformDefinition = R"(
struct LightSource {
vec3 specular0;
vec3 specular1;
vec3 diffuse;
vec3 ambient;
vec3 position;
vec3 spotlightDirection;
float distanceAttenuationBias;
float distanceAttenuationScale;
};
layout(std140) uniform FragmentUniforms {
int alphaReference;
float depthScale;
@ -11,10 +24,14 @@ static constexpr const char* uniformDefinition = R"(
vec4 constantColors[6];
vec4 tevBufferColor;
vec4 clipCoords;
// Note: We upload this as a u32 and decode on GPU
uint globalAmbientLight;
LightSource lightSources[8];
};
)";
std::string FragmentGenerator::getVertexShader(const PICARegs& regs) {
std::string FragmentGenerator::getDefaultVertexShader() {
std::string ret = "";
switch (api) {
@ -44,9 +61,7 @@ std::string FragmentGenerator::getVertexShader(const PICARegs& regs) {
layout(location = 6) in vec3 a_view;
layout(location = 7) in vec2 a_texcoord2;
out vec3 v_normal;
out vec3 v_tangent;
out vec3 v_bitangent;
out vec4 v_quaternion;
out vec4 v_colour;
out vec3 v_texcoord0;
out vec2 v_texcoord1;
@ -62,12 +77,6 @@ std::string FragmentGenerator::getVertexShader(const PICARegs& regs) {
return scale * vec4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24));
}
vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) {
vec3 u = q.xyz;
float s = q.w;
return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v);
}
void main() {
gl_Position = a_coords;
vec4 colourAbs = abs(a_vertexColour);
@ -77,10 +86,7 @@ std::string FragmentGenerator::getVertexShader(const PICARegs& regs) {
v_texcoord1 = vec2(a_texcoord1.x, 1.0 - a_texcoord1.y);
v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y);
v_view = a_view;
v_normal = normalize(rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), a_quaternion));
v_tangent = normalize(rotateVec3ByQuaternion(vec3(1.0, 0.0, 0.0), a_quaternion));
v_bitangent = normalize(rotateVec3ByQuaternion(vec3(0.0, 1.0, 0.0), a_quaternion));
v_quaternion = a_quaternion;
#ifndef USING_GLES
gl_ClipDistance[0] = -a_coords.z;
@ -92,7 +98,7 @@ std::string FragmentGenerator::getVertexShader(const PICARegs& regs) {
return ret;
}
std::string FragmentGenerator::generate(const PICARegs& regs) {
std::string FragmentGenerator::generate(const FragmentConfig& config) {
std::string ret = "";
switch (api) {
@ -113,9 +119,7 @@ std::string FragmentGenerator::generate(const PICARegs& regs) {
// Input and output attributes
ret += R"(
in vec3 v_tangent;
in vec3 v_normal;
in vec3 v_bitangent;
in vec4 v_quaternion;
in vec4 v_colour;
in vec3 v_texcoord0;
in vec2 v_texcoord1;
@ -126,23 +130,43 @@ std::string FragmentGenerator::generate(const PICARegs& regs) {
uniform sampler2D u_tex0;
uniform sampler2D u_tex1;
uniform sampler2D u_tex2;
// GLES doesn't support sampler1DArray, as such we'll have to change how we handle lighting later
#ifndef USING_GLES
uniform sampler1DArray u_tex_lighting_lut;
#endif
uniform sampler2D u_tex_lighting_lut;
)";
ret += uniformDefinition;
if (config.lighting.enable) {
ret += R"(
vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) {
vec3 u = q.xyz;
float s = q.w;
return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v);
}
float lutLookup(uint lut, int index) {
return texelFetch(u_tex_lighting_lut, ivec2(index, int(lut)), 0).r;
}
vec3 regToColor(uint reg) {
return (1.0 / 255.0) * vec3(float((reg >> 20u) & 0xFFu), float((reg >> 10u) & 0xFFu), float(reg & 0xFFu));
}
)";
}
// Emit main function for fragment shader
// When not initialized, source 13 is set to vec4(0.0) and 15 is set to the vertex colour
ret += R"(
void main() {
vec4 combinerOutput = v_colour;
vec4 previousBuffer = vec4(0.0);
vec4 tevNextPreviousBuffer = tevBufferColor;
vec4 tevNextPreviousBuffer = tevBufferColor;
vec4 primaryColor = vec4(0.0);
vec4 secondaryColor = vec4(0.0);
)";
compileLights(ret, config);
ret += R"(
vec3 colorOp1 = vec3(0.0);
vec3 colorOp2 = vec3(0.0);
@ -160,44 +184,39 @@ std::string FragmentGenerator::generate(const PICARegs& regs) {
float depth = z_over_w * depthScale + depthOffset;
)";
if ((regs[InternalRegs::DepthmapEnable] & 1) == 0) {
if (!config.outConfig.depthMapEnable) {
ret += "depth /= gl_FragCoord.w;\n";
}
ret += "gl_FragDepth = depth;\n";
textureConfig = regs[InternalRegs::TexUnitCfg];
for (int i = 0; i < 6; i++) {
compileTEV(ret, i, regs);
compileTEV(ret, i, config);
}
applyAlphaTest(ret, regs);
applyAlphaTest(ret, config);
ret += "fragColor = combinerOutput;\n}"; // End of main function
return ret;
}
void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICARegs& regs) {
// Base address for each TEV stage's configuration
static constexpr std::array<u32, 6> ioBases = {
InternalRegs::TexEnv0Source, InternalRegs::TexEnv1Source, InternalRegs::TexEnv2Source,
InternalRegs::TexEnv3Source, InternalRegs::TexEnv4Source, InternalRegs::TexEnv5Source,
};
void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICA::FragmentConfig& config) {
const u32* tevValues = config.texConfig.tevConfigs.data() + stage * 4;
const u32 ioBase = ioBases[stage];
TexEnvConfig tev(regs[ioBase], regs[ioBase + 1], regs[ioBase + 2], regs[ioBase + 3], regs[ioBase + 4]);
// Pass a 0 to constColor here, as it doesn't matter for compilation
TexEnvConfig tev(tevValues[0], tevValues[1], tevValues[2], 0, tevValues[3]);
if (!tev.isPassthroughStage()) {
// Get color operands
shader += "colorOp1 = ";
getColorOperand(shader, tev.colorSource1, tev.colorOperand1, stage);
getColorOperand(shader, tev.colorSource1, tev.colorOperand1, stage, config);
shader += ";\ncolorOp2 = ";
getColorOperand(shader, tev.colorSource2, tev.colorOperand2, stage);
getColorOperand(shader, tev.colorSource2, tev.colorOperand2, stage, config);
shader += ";\ncolorOp3 = ";
getColorOperand(shader, tev.colorSource3, tev.colorOperand3, stage);
getColorOperand(shader, tev.colorSource3, tev.colorOperand3, stage, config);
shader += ";\nvec3 outputColor" + std::to_string(stage) + " = clamp(";
getColorOperation(shader, tev.colorOp);
@ -209,13 +228,13 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg
} else {
// Get alpha operands
shader += "alphaOp1 = ";
getAlphaOperand(shader, tev.alphaSource1, tev.alphaOperand1, stage);
getAlphaOperand(shader, tev.alphaSource1, tev.alphaOperand1, stage, config);
shader += ";\nalphaOp2 = ";
getAlphaOperand(shader, tev.alphaSource2, tev.alphaOperand2, stage);
getAlphaOperand(shader, tev.alphaSource2, tev.alphaOperand2, stage, config);
shader += ";\nalphaOp3 = ";
getAlphaOperand(shader, tev.alphaSource3, tev.alphaOperand3, stage);
getAlphaOperand(shader, tev.alphaSource3, tev.alphaOperand3, stage, config);
shader += ";\nfloat outputAlpha" + std::to_string(stage) + " = clamp(";
getAlphaOperation(shader, tev.alphaOp);
@ -231,7 +250,7 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg
shader += "previousBuffer = tevNextPreviousBuffer;\n\n";
// Update the "next previous buffer" if necessary
const u32 textureEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer];
const u32 textureEnvUpdateBuffer = config.texConfig.texEnvUpdateBuffer;
if (stage < 4) {
// Check whether to update rgb
if ((textureEnvUpdateBuffer & (0x100 << stage))) {
@ -245,7 +264,7 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg
}
}
void FragmentGenerator::getColorOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::ColorOperand color, int index) {
void FragmentGenerator::getColorOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::ColorOperand color, int index, const PICA::FragmentConfig& config) {
using OperandType = TexEnvConfig::ColorOperand;
// For inverting operands, add the 1.0 - x subtraction
@ -257,31 +276,31 @@ void FragmentGenerator::getColorOperand(std::string& shader, TexEnvConfig::Sourc
switch (color) {
case OperandType::SourceColor:
case OperandType::OneMinusSourceColor:
getSource(shader, source, index);
getSource(shader, source, index, config);
shader += ".rgb";
break;
case OperandType::SourceRed:
case OperandType::OneMinusSourceRed:
getSource(shader, source, index);
getSource(shader, source, index, config);
shader += ".rrr";
break;
case OperandType::SourceGreen:
case OperandType::OneMinusSourceGreen:
getSource(shader, source, index);
getSource(shader, source, index, config);
shader += ".ggg";
break;
case OperandType::SourceBlue:
case OperandType::OneMinusSourceBlue:
getSource(shader, source, index);
getSource(shader, source, index, config);
shader += ".bbb";
break;
case OperandType::SourceAlpha:
case OperandType::OneMinusSourceAlpha:
getSource(shader, source, index);
getSource(shader, source, index, config);
shader += ".aaa";
break;
@ -292,7 +311,7 @@ void FragmentGenerator::getColorOperand(std::string& shader, TexEnvConfig::Sourc
}
}
void FragmentGenerator::getAlphaOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::AlphaOperand color, int index) {
void FragmentGenerator::getAlphaOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::AlphaOperand color, int index, const PICA::FragmentConfig& config) {
using OperandType = TexEnvConfig::AlphaOperand;
// For inverting operands, add the 1.0 - x subtraction
@ -304,25 +323,25 @@ void FragmentGenerator::getAlphaOperand(std::string& shader, TexEnvConfig::Sourc
switch (color) {
case OperandType::SourceRed:
case OperandType::OneMinusSourceRed:
getSource(shader, source, index);
getSource(shader, source, index, config);
shader += ".r";
break;
case OperandType::SourceGreen:
case OperandType::OneMinusSourceGreen:
getSource(shader, source, index);
getSource(shader, source, index, config);
shader += ".g";
break;
case OperandType::SourceBlue:
case OperandType::OneMinusSourceBlue:
getSource(shader, source, index);
getSource(shader, source, index, config);
shader += ".b";
break;
case OperandType::SourceAlpha:
case OperandType::OneMinusSourceAlpha:
getSource(shader, source, index);
getSource(shader, source, index, config);
shader += ".a";
break;
@ -333,14 +352,14 @@ void FragmentGenerator::getAlphaOperand(std::string& shader, TexEnvConfig::Sourc
}
}
void FragmentGenerator::getSource(std::string& shader, TexEnvConfig::Source source, int index) {
void FragmentGenerator::getSource(std::string& shader, TexEnvConfig::Source source, int index, const PICA::FragmentConfig& config) {
switch (source) {
case TexEnvConfig::Source::PrimaryColor: shader += "v_colour"; break;
case TexEnvConfig::Source::Texture0: shader += "texture(u_tex0, v_texcoord0.xy)"; break;
case TexEnvConfig::Source::Texture1: shader += "texture(u_tex1, v_texcoord1)"; break;
case TexEnvConfig::Source::Texture2: {
// If bit 13 in texture config is set then we use the texcoords for texture 1, otherwise for texture 2
if (Helpers::getBit<13>(textureConfig)) {
if (Helpers::getBit<13>(config.texConfig.texUnitConfig)) {
shader += "texture(u_tex2, v_texcoord1)";
} else {
shader += "texture(u_tex2, v_texcoord2)";
@ -353,8 +372,8 @@ void FragmentGenerator::getSource(std::string& shader, TexEnvConfig::Source sour
case TexEnvConfig::Source::PreviousBuffer: shader += "previousBuffer"; break;
// Lighting
case TexEnvConfig::Source::PrimaryFragmentColor:
case TexEnvConfig::Source::SecondaryFragmentColor: shader += "vec4(1.0, 1.0, 1.0, 1.0)"; break;
case TexEnvConfig::Source::PrimaryFragmentColor: shader += "primaryColor"; break;
case TexEnvConfig::Source::SecondaryFragmentColor: shader += "secondaryColor"; break;
default:
Helpers::warn("Unimplemented TEV source: %d", static_cast<int>(source));
@ -401,12 +420,11 @@ void FragmentGenerator::getAlphaOperation(std::string& shader, TexEnvConfig::Ope
}
}
void FragmentGenerator::applyAlphaTest(std::string& shader, const PICARegs& regs) {
const u32 alphaConfig = regs[InternalRegs::AlphaTestConfig];
const auto function = static_cast<CompareFunction>(Helpers::getBits<4, 3>(alphaConfig));
void FragmentGenerator::applyAlphaTest(std::string& shader, const PICA::FragmentConfig& config) {
const CompareFunction function = config.outConfig.alphaTestFunction;
// Alpha test disabled
if (Helpers::getBit<0>(alphaConfig) == 0 || function == CompareFunction::Always) {
if (function == CompareFunction::Always) {
return;
}
@ -430,3 +448,203 @@ void FragmentGenerator::applyAlphaTest(std::string& shader, const PICARegs& regs
shader += ") { discard; }\n";
}
void FragmentGenerator::compileLights(std::string& shader, const PICA::FragmentConfig& config) {
if (!config.lighting.enable) {
return;
}
// Currently ignore bump mode
shader += "vec3 normal = rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), v_quaternion);\n";
shader += R"(
vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);
vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);
vec3 light_position, light_vector, half_vector, specular0, specular1, reflected_color;
float light_distance, NdotL, light_factor, geometric_factor, distance_attenuation, distance_att_delta;
float spotlight_attenuation, specular0_dist, specular1_dist;
float lut_lookup_result, lut_lookup_delta;
int lut_lookup_index;
)";
uint lightID = 0;
for (int i = 0; i < config.lighting.lightNum; i++) {
lightID = config.lighting.lights[i].num;
const auto& lightConfig = config.lighting.lights[i];
shader += "light_position = lightSources[" + std::to_string(lightID) + "].position;\n";
if (lightConfig.directional) { // Directional lighting
shader += "light_vector = light_position;\n";
} else { // Positional lighting
shader += "light_vector = light_position + v_view;\n";
}
shader += R"(
light_distance = length(light_vector);
light_vector = normalize(light_vector);
half_vector = light_vector + normalize(v_view);
distance_attenuation = 1.0;
NdotL = dot(normal, light_vector);
)";
shader += lightConfig.twoSidedDiffuse ? "NdotL = abs(NdotL);\n" : "NdotL = max(NdotL, 0.0);\n";
if (lightConfig.geometricFactor0 || lightConfig.geometricFactor1) {
shader += R"(
geometric_factor = dot(half_vector, half_vector);
geometric_factor = (geometric_factor == 0.0) ? 0.0 : min(NdotL / geometric_factor, 1.0);
)";
}
if (lightConfig.distanceAttenuationEnable) {
shader += "distance_att_delta = clamp(light_distance * lightSources[" + std::to_string(lightID) +
"].distanceAttenuationScale + lightSources[" + std::to_string(lightID) + "].distanceAttenuationBias, 0.0, 1.0);\n";
shader += "distance_attenuation = lutLookup(" + std::to_string(16 + lightID) +
", int(clamp(floor(distance_att_delta * 256.0), 0.0, 255.0)));\n";
}
compileLUTLookup(shader, config, i, spotlightLutIndex);
shader += "spotlight_attenuation = lut_lookup_result;\n";
compileLUTLookup(shader, config, i, PICA::Lights::LUT_D0);
shader += "specular0_dist = lut_lookup_result;\n";
compileLUTLookup(shader, config, i, PICA::Lights::LUT_D1);
shader += "specular1_dist = lut_lookup_result;\n";
compileLUTLookup(shader, config, i, PICA::Lights::LUT_RR);
shader += "reflected_color.r = lut_lookup_result;\n";
if (isSamplerEnabled(config.lighting.config, PICA::Lights::LUT_RG)) {
compileLUTLookup(shader, config, i, PICA::Lights::LUT_RG);
shader += "reflected_color.g = lut_lookup_result;\n";
} else {
shader += "reflected_color.g = reflected_color.r;\n";
}
if (isSamplerEnabled(config.lighting.config, PICA::Lights::LUT_RB)) {
compileLUTLookup(shader, config, i, PICA::Lights::LUT_RB);
shader += "reflected_color.b = lut_lookup_result;\n";
} else {
shader += "reflected_color.b = reflected_color.r;\n";
}
shader += "specular0 = lightSources[" + std::to_string(lightID) + "].specular0 * specular0_dist;\n";
if (lightConfig.geometricFactor0) {
shader += "specular0 *= geometric_factor;\n";
}
shader += "specular1 = lightSources[" + std::to_string(lightID) + "].specular1 * specular1_dist * reflected_color;\n";
if (lightConfig.geometricFactor1) {
shader += "specular1 *= geometric_factor;\n";
}
shader += "light_factor = distance_attenuation * spotlight_attenuation;\n";
if (config.lighting.clampHighlights) {
shader += "specular_sum.rgb += light_factor * (NdotL == 0.0 ? 0.0 : 1.0) * (specular0 + specular1);\n";
} else {
shader += "specular_sum.rgb += light_factor * (specular0 + specular1);\n";
}
shader += "diffuse_sum.rgb += light_factor * (lightSources[" + std::to_string(lightID) + "].ambient + lightSources[" +
std::to_string(lightID) + "].diffuse * NdotL);\n";
}
if (config.lighting.enablePrimaryAlpha || config.lighting.enableSecondaryAlpha) {
compileLUTLookup(shader, config, config.lighting.lightNum - 1, PICA::Lights::LUT_FR);
shader += "float fresnel_factor = lut_lookup_result;\n";
}
if (config.lighting.enablePrimaryAlpha) {
shader += "diffuse_sum.a = fresnel_factor;\n";
}
if (config.lighting.enableSecondaryAlpha) {
shader += "specular_sum.a = fresnel_factor;\n";
}
shader += R"(
vec4 global_ambient = vec4(regToColor(globalAmbientLight), 1.0);
primaryColor = clamp(global_ambient + diffuse_sum, vec4(0.0), vec4(1.0));
secondaryColor = clamp(specular_sum, vec4(0.0), vec4(1.0));
)";
}
bool FragmentGenerator::isSamplerEnabled(u32 environmentID, u32 lutID) {
static constexpr bool samplerEnabled[9 * 7] = {
// D0 D1 SP FR RB RG RR
true, false, true, false, false, false, true, // Configuration 0: D0, SP, RR
false, false, true, true, false, false, true, // Configuration 1: FR, SP, RR
true, true, false, false, false, false, true, // Configuration 2: D0, D1, RR
true, true, false, true, false, false, false, // Configuration 3: D0, D1, FR
true, true, true, false, true, true, true, // Configuration 4: All except for FR
true, false, true, true, true, true, true, // Configuration 5: All except for D1
true, true, true, true, false, false, true, // Configuration 6: All except for RB and RG
false, false, false, false, false, false, false, // Configuration 7: Unused
true, true, true, true, true, true, true, // Configuration 8: All
};
return samplerEnabled[environmentID * 7 + lutID];
}
void FragmentGenerator::compileLUTLookup(std::string& shader, const PICA::FragmentConfig& config, u32 lightIndex, u32 lutID) {
const LightingLUTConfig& lut = config.lighting.luts[lutID];
uint lightID = config.lighting.lights[lightIndex].num;
uint lutIndex = 0;
bool lutEnabled = false;
if (lutID == spotlightLutIndex) {
// These are the spotlight attenuation LUTs
lutIndex = 8u + lightID;
lutEnabled = config.lighting.lights[lightIndex].spotAttenuationEnable;
} else if (lutID <= 6) {
lutIndex = lutID;
lutEnabled = lut.enable;
} else {
Helpers::warn("Shadergen: Unimplemented LUT value");
}
const bool samplerEnabled = isSamplerEnabled(config.lighting.config, lutID);
if (!samplerEnabled || !lutEnabled) {
shader += "lut_lookup_result = 1.0;\n";
return;
}
float scale = lut.scale;
uint inputID = lut.type;
bool absEnabled = lut.absInput;
switch (inputID) {
case 0: shader += "lut_lookup_delta = dot(normal, normalize(half_vector));\n"; break;
case 1: shader += "lut_lookup_delta = dot(normalize(v_view), normalize(half_vector));\n"; break;
case 2: shader += "lut_lookup_delta = dot(normal, normalize(v_view));\n"; break;
case 3: shader += "lut_lookup_delta = dot(normal, light_vector);\n"; break;
case 4: shader += "lut_lookup_delta = dot(light_vector, lightSources[" + std ::to_string(lightID) + "].spotlightDirection);\n"; break;
default:
Helpers::warn("Shadergen: Unimplemented LUT select");
shader += "lut_lookup_delta = 1.0;\n";
break;
}
if (absEnabled) {
bool twoSidedDiffuse = config.lighting.lights[lightIndex].twoSidedDiffuse;
shader += twoSidedDiffuse ? "lut_lookup_delta = abs(lut_lookup_delta);\n" : "lut_lookup_delta = max(lut_lookup_delta, 0.0);\n";
shader += "lut_lookup_result = lutLookup(" + std::to_string(lutIndex) + ", int(clamp(floor(lut_lookup_delta * 256.0), 0.0, 255.0)));\n";
if (scale != 1.0) {
shader += "lut_lookup_result *= " + std::to_string(scale) + ";\n";
}
} else {
// Range is [-1, 1] so we need to map it to [0, 1]
shader += "lut_lookup_index = int(clamp(floor(lut_lookup_delta * 128.0), -128.f, 127.f));\n";
shader += "if (lut_lookup_index < 0) lut_lookup_index += 256;\n";
shader += "lut_lookup_result = lutLookup(" + std::to_string(lutIndex) + ", lut_lookup_index) *" + std::to_string(scale) + ";\n";
}
}

View file

@ -4,6 +4,7 @@
#include <cmrc/cmrc.hpp>
#include "config.hpp"
#include "PICA/float_types.hpp"
#include "PICA/pica_frag_uniforms.hpp"
#include "PICA/gpu.hpp"
@ -117,7 +118,10 @@ void RendererGL::initGraphicsContextInternal() {
const u32 screenTextureWidth = 400; // Top screen is 400 pixels wide, bottom is 320
const u32 screenTextureHeight = 2 * 240; // Both screens are 240 pixels tall
glGenTextures(1, &lightLUTTextureArray);
lightLUTTexture.create(256, Lights::LUT_Count, GL_R32F);
lightLUTTexture.bind();
lightLUTTexture.setMinFilter(OpenGL::Linear);
lightLUTTexture.setMagFilter(OpenGL::Linear);
auto prevTexture = OpenGL::getTex2D();
@ -159,6 +163,10 @@ void RendererGL::initGraphicsContextInternal() {
OpenGL::setViewport(oldViewport[0], oldViewport[1], oldViewport[2], oldViewport[3]);
reset();
// Initialize the default vertex shader used with shadergen
std::string defaultShadergenVSSource = fragShaderGen.getDefaultVertexShader();
defaultShadergenVs.create({defaultShadergenVSSource.c_str(), defaultShadergenVSSource.size()}, OpenGL::Vertex);
}
// The OpenGL renderer doesn't need to do anything with the GL context (For Qt frontend) or the SDL window (For SDL frontend)
@ -348,26 +356,22 @@ void RendererGL::bindTexturesToSlots() {
}
glActiveTexture(GL_TEXTURE0 + 3);
glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
lightLUTTexture.bind();
glActiveTexture(GL_TEXTURE0);
}
void RendererGL::updateLightingLUT() {
gpu.lightingLUTDirty = false;
std::array<u16, GPU::LightingLutSize> u16_lightinglut;
std::array<float, GPU::LightingLutSize> lightingLut;
for (int i = 0; i < gpu.lightingLUT.size(); i++) {
uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1);
u16_lightinglut[i] = value * 65535 / 4095;
uint64_t value = gpu.lightingLUT[i] & 0xFFF;
lightingLut[i] = (float)(value << 4) / 65535.0f;
}
glActiveTexture(GL_TEXTURE0 + 3);
glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data());
glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
lightLUTTexture.bind();
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, Lights::LUT_Count, GL_RED, GL_FLOAT, lightingLut.data());
glActiveTexture(GL_TEXTURE0);
}
@ -380,6 +384,18 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
OpenGL::Triangle,
};
bool usingUbershader = enableUbershader;
if (usingUbershader) {
const bool lightsEnabled = (regs[InternalRegs::LightingEnable] & 1) != 0;
const uint lightCount = (regs[InternalRegs::LightNumber] & 0x7) + 1;
// Emulating lights in the ubershader is incredibly slow, so we've got an option to render draws using moret han N lights via shadergen
// This way we generate fewer shaders overall than with full shadergen, but don't tank performance
if (emulatorConfig->forceShadergenForLights && lightsEnabled && lightCount >= emulatorConfig->lightShadergenThreshold) {
usingUbershader = false;
}
}
if (usingUbershader) {
gl.useProgram(triangleProgram);
} else {
@ -780,43 +796,16 @@ std::optional<ColourBuffer> RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt
OpenGL::Program& RendererGL::getSpecializedShader() {
constexpr uint uboBlockBinding = 2;
PICA::FragmentConfig fsConfig;
auto& outConfig = fsConfig.outConfig;
auto& texConfig = fsConfig.texConfig;
auto alphaTestConfig = regs[InternalRegs::AlphaTestConfig];
auto alphaTestFunction = Helpers::getBits<4, 3>(alphaTestConfig);
outConfig.alphaTestFunction = (alphaTestConfig & 1) ? static_cast<PICA::CompareFunction>(alphaTestFunction) : PICA::CompareFunction::Always;
outConfig.depthMapEnable = regs[InternalRegs::DepthmapEnable] & 1;
texConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg];
texConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer];
// Set up TEV stages. Annoyingly we can't just memcpy as the TEV registers are arranged like
// {Source, Operand, Combiner, Color, Scale} and we want to skip the color register since it's uploaded via UBO
#define setupTevStage(stage) \
std::memcpy(&texConfig.tevConfigs[stage * 4], &regs[InternalRegs::TexEnv##stage##Source], 3 * sizeof(u32)); \
texConfig.tevConfigs[stage * 4 + 3] = regs[InternalRegs::TexEnv##stage##Source + 5];
setupTevStage(0);
setupTevStage(1);
setupTevStage(2);
setupTevStage(3);
setupTevStage(4);
setupTevStage(5);
#undef setupTevStage
PICA::FragmentConfig fsConfig(regs);
CachedProgram& programEntry = shaderCache[fsConfig];
OpenGL::Program& program = programEntry.program;
if (!program.exists()) {
std::string vs = fragShaderGen.getVertexShader(regs);
std::string fs = fragShaderGen.generate(regs);
std::string fs = fragShaderGen.generate(fsConfig);
OpenGL::Shader vertShader({vs.c_str(), vs.size()}, OpenGL::Vertex);
OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment);
program.create({vertShader, fragShader});
program.create({defaultShadergenVs, fragShader});
gl.useProgram(program);
// Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3
@ -875,6 +864,48 @@ OpenGL::Program& RendererGL::getSpecializedShader() {
vec[3] = float((color >> 24) & 0xFF) / 255.0f;
}
// Append lighting uniforms
if (fsConfig.lighting.enable) {
uniforms.globalAmbientLight = regs[InternalRegs::LightGlobalAmbient];
for (int i = 0; i < 8; i++) {
auto& light = uniforms.lightUniforms[i];
const u32 specular0 = regs[InternalRegs::Light0Specular0 + i * 0x10];
const u32 specular1 = regs[InternalRegs::Light0Specular1 + i * 0x10];
const u32 diffuse = regs[InternalRegs::Light0Diffuse + i * 0x10];
const u32 ambient = regs[InternalRegs::Light0Ambient + i * 0x10];
const u32 lightXY = regs[InternalRegs::Light0XY + i * 0x10];
const u32 lightZ = regs[InternalRegs::Light0Z + i * 0x10];
const u32 spotlightXY = regs[InternalRegs::Light0SpotlightXY + i * 0x10];
const u32 spotlightZ = regs[InternalRegs::Light0SpotlightZ + i * 0x10];
const u32 attenuationBias = regs[InternalRegs::Light0AttenuationBias + i * 0x10];
const u32 attenuationScale = regs[InternalRegs::Light0AttenuationScale + i * 0x10];
#define lightColorToVec3(value) \
{ \
float(Helpers::getBits<20, 8>(value)) / 255.0f, \
float(Helpers::getBits<10, 8>(value)) / 255.0f, \
float(Helpers::getBits<0, 8>(value)) / 255.0f, \
}
light.specular0 = lightColorToVec3(specular0);
light.specular1 = lightColorToVec3(specular1);
light.diffuse = lightColorToVec3(diffuse);
light.ambient = lightColorToVec3(ambient);
light.position[0] = Floats::f16::fromRaw(u16(lightXY)).toFloat32();
light.position[1] = Floats::f16::fromRaw(u16(lightXY >> 16)).toFloat32();
light.position[2] = Floats::f16::fromRaw(u16(lightZ)).toFloat32();
// Fixed point 1.11.1 to float, without negation
light.spotlightDirection[0] = float(s32(spotlightXY & 0x1FFF) << 19 >> 19) / 2047.0;
light.spotlightDirection[1] = float(s32((spotlightXY >> 16) & 0x1FFF) << 19 >> 19) / 2047.0;
light.spotlightDirection[2] = float(s32(spotlightZ & 0x1FFF) << 19 >> 19) / 2047.0;
light.distanceAttenuationBias = Floats::f20::fromRaw(attenuationBias & 0xFFFFF).toFloat32();
light.distanceAttenuationScale = Floats::f20::fromRaw(attenuationScale & 0xFFFFF).toFloat32();
#undef lightColorToVec3
}
}
gl.bindUBO(programEntry.uboBinding);
glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(PICA::FragmentUniforms), &uniforms);

View file

@ -1,8 +1,6 @@
#version 410 core
in vec3 v_tangent;
in vec3 v_normal;
in vec3 v_bitangent;
in vec4 v_quaternion;
in vec4 v_colour;
in vec3 v_texcoord0;
in vec2 v_texcoord1;
@ -27,7 +25,7 @@ uniform bool u_depthmapEnable;
uniform sampler2D u_tex0;
uniform sampler2D u_tex1;
uniform sampler2D u_tex2;
uniform sampler1DArray u_tex_lighting_lut;
uniform sampler2D u_tex_lighting_lut;
uniform uint u_picaRegs[0x200 - 0x48];
@ -37,6 +35,16 @@ uint readPicaReg(uint reg_addr) { return u_picaRegs[reg_addr - 0x48u]; }
vec4 tevSources[16];
vec4 tevNextPreviousBuffer;
bool tevUnimplementedSourceFlag = false;
vec3 normal;
// See docs/lighting.md
const uint samplerEnabledBitfields[2] = uint[2](0x7170e645u, 0x7f013fefu);
bool isSamplerEnabled(uint environment_id, uint lut_id) {
uint index = 7 * environment_id + lut_id;
uint arrayIndex = (index >> 5);
return (samplerEnabledBitfields[arrayIndex] & (1u << (index & 31u))) != 0u;
}
// OpenGL ES 1.1 reference pages for TEVs (this is what the PICA200 implements):
// https://registry.khronos.org/OpenGL-Refpages/es1.1/xhtml/glTexEnv.xml
@ -110,7 +118,7 @@ vec4 tevCalculateCombiner(int tev_id) {
case 6u: result.rgb = vec3(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGB
case 7u: result = vec4(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGBA
case 8u: result.rgb = min(source0.rgb * source1.rgb + source2.rgb, 1.0); break; // Multiply then add
case 9u: result.rgb = min((source0.rgb + source1.rgb) * source2.rgb, 1.0); break; // Add then multiply
case 9u: result.rgb = min(source0.rgb + source1.rgb, 1.0) * source2.rgb; break; // Add then multiply
default: break;
}
@ -125,7 +133,7 @@ vec4 tevCalculateCombiner(int tev_id) {
case 4u: result.a = mix(source1.a, source0.a, source2.a); break; // Interpolate
case 5u: result.a = max(0.0, source0.a - source1.a); break; // Subtract
case 8u: result.a = min(1.0, source0.a * source1.a + source2.a); break; // Multiply then add
case 9u: result.a = min(1.0, (source0.a + source1.a) * source2.a); break; // Add then multiply
case 9u: result.a = min(source0.a + source1.a, 1.0) * source2.a; break; // Add then multiply
default: break;
}
}
@ -144,10 +152,16 @@ vec4 tevCalculateCombiner(int tev_id) {
#define RG_LUT 5u
#define RR_LUT 6u
float lutLookup(uint lut, uint light, float value) {
if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1;
if (lut == SP_LUT) lut = light + 8;
return texture(u_tex_lighting_lut, vec2(value, lut)).r;
uint GPUREG_LIGHTi_CONFIG;
uint GPUREG_LIGHTING_CONFIG1;
uint GPUREG_LIGHTING_LUTINPUT_SELECT;
uint GPUREG_LIGHTING_LUTINPUT_SCALE;
uint GPUREG_LIGHTING_LUTINPUT_ABS;
bool error_unimpl = false;
vec4 unimpl_color = vec4(1.0, 0.0, 1.0, 1.0);
float lutLookup(uint lut, int index) {
return texelFetch(u_tex_lighting_lut, ivec2(index, int(lut)), 0).r;
}
vec3 regToColor(uint reg) {
@ -178,136 +192,179 @@ float decodeFP(uint hex, uint E, uint M) {
return uintBitsToFloat(hex);
}
float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light_vector, vec3 half_vector) {
uint lut_index;
int bit_in_config1;
if (lut_id == SP_LUT) {
// These are the spotlight attenuation LUTs
bit_in_config1 = 8 + int(light_id & 7u);
lut_index = 8u + light_id;
} else if (lut_id <= 6) {
bit_in_config1 = 16 + int(lut_id);
lut_index = lut_id;
} else {
error_unimpl = true;
}
bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment
if (!current_sampler_enabled || (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) {
return 1.0;
}
uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3);
float scale = float(1u << scale_id);
if (scale_id >= 6u) scale /= 256.0;
float delta = 1.0;
uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3);
switch (input_id) {
case 0u: {
delta = dot(normal, normalize(half_vector));
break;
}
case 1u: {
delta = dot(normalize(v_view), normalize(half_vector));
break;
}
case 2u: {
delta = dot(normal, normalize(v_view));
break;
}
case 3u: {
delta = dot(light_vector, normal);
break;
}
case 4u: {
int GPUREG_LIGHTi_SPOTDIR_LOW = int(readPicaReg(0x0146u + (light_id << 4u)));
int GPUREG_LIGHTi_SPOTDIR_HIGH = int(readPicaReg(0x0147u + (light_id << 4u)));
// Sign extend them. Normally bitfieldExtract would do that but it's missing on some versions
// of GLSL so we do it manually
int se_x = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13);
int se_y = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13);
int se_z = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13);
if ((se_x & 0x1000) == 0x1000) se_x |= 0xffffe000;
if ((se_y & 0x1000) == 0x1000) se_y |= 0xffffe000;
if ((se_z & 0x1000) == 0x1000) se_z |= 0xffffe000;
// These are fixed point 1.1.11 values, so we need to convert them to float
float x = float(se_x) / 2047.0;
float y = float(se_y) / 2047.0;
float z = float(se_z) / 2047.0;
vec3 spotlight_vector = vec3(x, y, z);
delta = dot(light_vector, spotlight_vector); // spotlight direction is negated so we don't negate light_vector
break;
}
case 5u: {
delta = 1.0; // TODO: cos <greek symbol> (aka CP);
error_unimpl = true;
break;
}
default: {
delta = 1.0;
error_unimpl = true;
break;
}
}
// 0 = enabled
if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) {
// Two sided diffuse
if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) {
delta = max(delta, 0.0);
} else {
delta = abs(delta);
}
int index = int(clamp(floor(delta * 255.0), 0.f, 255.f));
return lutLookup(lut_index, index) * scale;
} else {
// Range is [-1, 1] so we need to map it to [0, 1]
int index = int(clamp(floor(delta * 128.0), -128.f, 127.f));
if (index < 0) index += 256;
return lutLookup(lut_index, index) * scale;
}
}
vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) {
vec3 u = q.xyz;
float s = q.w;
return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v);
}
// Implements the following algorthm: https://mathb.in/26766
void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
// Quaternions describe a transformation from surface-local space to eye space.
// In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1),
// the tangent vector is (1,0,0), and the bitangent vector is (0,1,0).
vec3 normal = normalize(v_normal);
vec3 tangent = normalize(v_tangent);
vec3 bitangent = normalize(v_bitangent);
vec3 view = normalize(v_view);
uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008Fu);
if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) {
primary_color = secondary_color = vec4(1.0);
primary_color = secondary_color = vec4(0.0);
return;
}
uint GPUREG_LIGHTING_AMBIENT = readPicaReg(0x01C0u);
uint GPUREG_LIGHTING_NUM_LIGHTS = (readPicaReg(0x01C2u) & 0x7u) + 1u;
uint GPUREG_LIGHTING_LIGHT_PERMUTATION = readPicaReg(0x01D9u);
primary_color = vec4(vec3(0.0), 1.0);
secondary_color = vec4(vec3(0.0), 1.0);
primary_color.rgb += regToColor(GPUREG_LIGHTING_AMBIENT);
uint GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u);
uint GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u);
uint GPUREG_LIGHTING_CONFIG0 = readPicaReg(0x01C3u);
uint GPUREG_LIGHTING_CONFIG1 = readPicaReg(0x01C4u);
uint GPUREG_LIGHTING_LUTINPUT_SCALE = readPicaReg(0x01D2u);
float d[7];
uint GPUREG_LIGHTING_CONFIG0 = readPicaReg(0x01C3u);
GPUREG_LIGHTING_CONFIG1 = readPicaReg(0x01C4u);
GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u);
GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u);
bool error_unimpl = false;
uint bump_mode = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 28, 2);
// Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker
switch (bump_mode) {
default: {
normal = rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), v_quaternion);
break;
}
}
vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);
vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);
uint environment_id = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 4, 4);
bool clamp_highlights = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u;
uint light_id;
vec3 light_vector;
vec3 half_vector;
for (uint i = 0u; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) {
uint light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3);
light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3);
uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + 0x10u * light_id);
uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + 0x10u * light_id);
uint GPUREG_LIGHTi_DIFFUSE = readPicaReg(0x0142u + 0x10u * light_id);
uint GPUREG_LIGHTi_AMBIENT = readPicaReg(0x0143u + 0x10u * light_id);
uint GPUREG_LIGHTi_VECTOR_LOW = readPicaReg(0x0144u + 0x10u * light_id);
uint GPUREG_LIGHTi_VECTOR_HIGH = readPicaReg(0x0145u + 0x10u * light_id);
uint GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + 0x10u * light_id);
uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + (light_id << 4u));
uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + (light_id << 4u));
uint GPUREG_LIGHTi_DIFFUSE = readPicaReg(0x0142u + (light_id << 4u));
uint GPUREG_LIGHTi_AMBIENT = readPicaReg(0x0143u + (light_id << 4u));
uint GPUREG_LIGHTi_VECTOR_LOW = readPicaReg(0x0144u + (light_id << 4u));
uint GPUREG_LIGHTi_VECTOR_HIGH = readPicaReg(0x0145u + (light_id << 4u));
GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + (light_id << 4u));
vec3 light_vector = normalize(vec3(
float light_distance;
vec3 light_position = vec3(
decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u),
decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u)
));
vec3 half_vector;
);
// Positional Light
if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) {
// error_unimpl = true;
half_vector = normalize(normalize(light_vector + v_view) + view);
light_vector = light_position + v_view;
}
// Directional light
else {
half_vector = normalize(normalize(light_vector) + view);
light_vector = light_position;
}
for (int c = 0; c < 7; c++) {
if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0u) {
uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3);
float scale = float(1u << scale_id);
if (scale_id >= 6u) scale /= 256.0;
light_distance = length(light_vector);
light_vector = normalize(light_vector);
half_vector = light_vector + normalize(v_view);
uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3);
if (input_id == 0u)
d[c] = dot(normal, half_vector);
else if (input_id == 1u)
d[c] = dot(view, half_vector);
else if (input_id == 2u)
d[c] = dot(normal, view);
else if (input_id == 3u)
d[c] = dot(light_vector, normal);
else if (input_id == 4u) {
uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146u + 0x10u * light_id);
uint GPUREG_LIGHTi_SPOTDIR_HIGH = readPicaReg(0x0147u + 0x10u * light_id);
vec3 spot_light_vector = normalize(vec3(
decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1u, 11u),
decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1u, 11u),
decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1u, 11u)
));
d[c] = dot(-light_vector, spot_light_vector); // -L dot P (aka Spotlight aka SP);
} else if (input_id == 5u) {
d[c] = 1.0; // TODO: cos <greek symbol> (aka CP);
error_unimpl = true;
} else {
d[c] = 1.0;
}
d[c] = lutLookup(uint(c), light_id, d[c] * 0.5 + 0.5) * scale;
if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) d[c] = abs(d[c]);
} else {
d[c] = 1.0;
}
}
uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 4, 4);
if (lookup_config == 0u) {
d[D1_LUT] = 0.0;
d[FR_LUT] = 0.0;
d[RG_LUT] = d[RB_LUT] = d[RR_LUT];
} else if (lookup_config == 1u) {
d[D0_LUT] = 0.0;
d[D1_LUT] = 0.0;
d[RG_LUT] = d[RB_LUT] = d[RR_LUT];
} else if (lookup_config == 2u) {
d[FR_LUT] = 0.0;
d[SP_LUT] = 0.0;
d[RG_LUT] = d[RB_LUT] = d[RR_LUT];
} else if (lookup_config == 3u) {
d[SP_LUT] = 0.0;
d[RG_LUT] = d[RB_LUT] = d[RR_LUT] = 1.0;
} else if (lookup_config == 4u) {
d[FR_LUT] = 0.0;
} else if (lookup_config == 5u) {
d[D1_LUT] = 0.0;
} else if (lookup_config == 6u) {
d[RG_LUT] = d[RB_LUT] = d[RR_LUT];
}
float distance_factor = 1.0; // a
float indirect_factor = 1.0; // fi
float shadow_factor = 1.0; // o
float NdotL = dot(normal, light_vector); // Li dot N
float NdotL = dot(normal, light_vector); // N dot Li
// Two sided diffuse
if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u)
@ -315,20 +372,86 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
else
NdotL = abs(NdotL);
float light_factor = distance_factor * d[SP_LUT] * indirect_factor * shadow_factor;
float geometric_factor;
bool use_geo_0 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 2, 1) == 1u;
bool use_geo_1 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 3, 1) == 1u;
if (use_geo_0 || use_geo_1) {
geometric_factor = dot(half_vector, half_vector);
geometric_factor = geometric_factor == 0.0 ? 0.0 : min(NdotL / geometric_factor, 1.0);
}
primary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE) * NdotL);
secondary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_SPECULAR0) * d[D0_LUT] +
regToColor(GPUREG_LIGHTi_SPECULAR1) * d[D1_LUT] * vec3(d[RR_LUT], d[RG_LUT], d[RB_LUT]));
float distance_attenuation = 1.0;
if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) {
uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtract(readPicaReg(0x014Au + (light_id << 4u)), 0, 20);
uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtract(readPicaReg(0x014Bu + (light_id << 4u)), 0, 20);
float distance_attenuation_bias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u);
float distance_attenuation_scale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u);
float delta = light_distance * distance_attenuation_scale + distance_attenuation_bias;
delta = clamp(delta, 0.0, 1.0);
int index = int(clamp(floor(delta * 255.0), 0.0, 255.0));
distance_attenuation = lutLookup(16u + light_id, index);
}
float spotlight_attenuation = lightLutLookup(environment_id, SP_LUT, light_id, light_vector, half_vector);
float specular0_distribution = lightLutLookup(environment_id, D0_LUT, light_id, light_vector, half_vector);
float specular1_distribution = lightLutLookup(environment_id, D1_LUT, light_id, light_vector, half_vector);
vec3 reflected_color;
reflected_color.r = lightLutLookup(environment_id, RR_LUT, light_id, light_vector, half_vector);
if (isSamplerEnabled(environment_id, RG_LUT)) {
reflected_color.g = lightLutLookup(environment_id, RG_LUT, light_id, light_vector, half_vector);
} else {
reflected_color.g = reflected_color.r;
}
if (isSamplerEnabled(environment_id, RB_LUT)) {
reflected_color.b = lightLutLookup(environment_id, RB_LUT, light_id, light_vector, half_vector);
} else {
reflected_color.b = reflected_color.r;
}
vec3 specular0 = regToColor(GPUREG_LIGHTi_SPECULAR0) * specular0_distribution;
vec3 specular1 = regToColor(GPUREG_LIGHTi_SPECULAR1) * specular1_distribution * reflected_color;
specular0 *= use_geo_0 ? geometric_factor : 1.0;
specular1 *= use_geo_1 ? geometric_factor : 1.0;
float clamp_factor = 1.0;
if (clamp_highlights && NdotL == 0.0) {
clamp_factor = 0.0;
}
float light_factor = distance_attenuation * spotlight_attenuation;
diffuse_sum.rgb += light_factor * (regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE) * NdotL);
specular_sum.rgb += light_factor * clamp_factor * (specular0 + specular1);
}
uint fresnel_output1 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 2, 1);
uint fresnel_output2 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 3, 1);
// Uses parameters from the last light as Fresnel is only applied to the last light
float fresnel_factor;
if (fresnel_output1 == 1u || fresnel_output2 == 1u) {
fresnel_factor = lightLutLookup(environment_id, FR_LUT, light_id, light_vector, half_vector);
}
if (fresnel_output1 == 1u) {
diffuse_sum.a = fresnel_factor;
}
if (fresnel_output1 == 1u) primary_color.a = d[FR_LUT];
if (fresnel_output2 == 1u) secondary_color.a = d[FR_LUT];
if (fresnel_output2 == 1u) {
specular_sum.a = fresnel_factor;
}
uint GPUREG_LIGHTING_AMBIENT = readPicaReg(0x01C0u);
vec4 global_ambient = vec4(regToColor(GPUREG_LIGHTING_AMBIENT), 1.0);
primary_color = clamp(global_ambient + diffuse_sum, vec4(0.0), vec4(1.0));
secondary_color = clamp(specular_sum, vec4(0.0), vec4(1.0));
if (error_unimpl) {
// secondary_color = primary_color = vec4(1.0, 0., 1.0, 1.0);
// secondary_color = primary_color = unimpl_color;
}
}
@ -414,4 +537,4 @@ void main() {
break;
}
}
}
}

View file

@ -9,9 +9,7 @@ layout(location = 5) in float a_texcoord0_w;
layout(location = 6) in vec3 a_view;
layout(location = 7) in vec2 a_texcoord2;
out vec3 v_normal;
out vec3 v_tangent;
out vec3 v_bitangent;
out vec4 v_quaternion;
out vec4 v_colour;
out vec3 v_texcoord0;
out vec2 v_texcoord1;
@ -35,12 +33,6 @@ vec4 abgr8888ToVec4(uint abgr) {
return scale * vec4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24));
}
vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) {
vec3 u = q.xyz;
float s = q.w;
return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v);
}
// Convert an arbitrary-width floating point literal to an f32
float decodeFP(uint hex, uint E, uint M) {
uint width = M + E + 1u;
@ -73,10 +65,6 @@ void main() {
v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y);
v_view = a_view;
v_normal = normalize(rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), a_quaternion));
v_tangent = normalize(rotateVec3ByQuaternion(vec3(1.0, 0.0, 0.0), a_quaternion));
v_bitangent = normalize(rotateVec3ByQuaternion(vec3(0.0, 1.0, 0.0), a_quaternion));
for (int i = 0; i < 6; i++) {
v_textureEnvColor[i] = abgr8888ToVec4(u_textureEnvColor[i]);
}
@ -95,4 +83,6 @@ void main() {
// There's also another, always-on clipping plane based on vertex z
gl_ClipDistance[0] = -a_coords.z;
gl_ClipDistance[1] = dot(clipData, a_coords);
v_quaternion = a_quaternion;
}

View file

@ -147,7 +147,8 @@ static void configInit() {
static const retro_variable values[] = {
{"panda3ds_use_shader_jit", "Enable shader JIT; enabled|disabled"},
{"panda3ds_accurate_shader_mul", "Enable accurate shader multiplication; disabled|enabled"},
{"panda3ds_use_ubershader", "Use ubershaders (No stutter, maybe slower); enabled|disabled"},
{"panda3ds_use_ubershader", EmulatorConfig::ubershaderDefault ? "Use ubershaders (No stutter, maybe slower); enabled|disabled"
: "Use ubershaders (No stutter, maybe slower); disabled|enabled"},
{"panda3ds_use_vsync", "Enable VSync; enabled|disabled"},
{"panda3ds_dsp_emulation", "DSP emulation; Null|HLE|LLE"},
{"panda3ds_use_audio", "Enable audio; disabled|enabled"},
@ -155,6 +156,8 @@ static void configInit() {
{"panda3ds_write_protect_virtual_sd", "Write protect virtual SD card; disabled|enabled"},
{"panda3ds_battery_level", "Battery percentage; 5|10|20|30|50|70|90|100"},
{"panda3ds_use_charger", "Charger plugged; enabled|disabled"},
{"panda3ds_ubershader_lighting_override", "Force shadergen when rendering lights; enabled|disabled"},
{"panda3ds_ubershader_lighting_override_threshold", "Light threshold for forcing shadergen; 1|2|3|4|5|6|7|8"},
{nullptr, nullptr},
};
@ -175,6 +178,8 @@ static void configUpdate() {
config.sdWriteProtected = FetchVariableBool("panda3ds_write_protect_virtual_sd", false);
config.accurateShaderMul = FetchVariableBool("panda3ds_accurate_shader_mul", false);
config.useUbershaders = FetchVariableBool("panda3ds_use_ubershader", true);
config.forceShadergenForLights = FetchVariableBool("panda3ds_ubershader_lighting_override", true);
config.lightShadergenThreshold = std::clamp(std::stoi(FetchVariable("panda3ds_ubershader_lighting_override_threshold", "1")), 1, 8);
config.discordRpcEnabled = false;
config.save();

View file

@ -355,46 +355,57 @@ namespace OpenGL {
}
};
enum ShaderType {
Fragment = GL_FRAGMENT_SHADER,
Vertex = GL_VERTEX_SHADER,
Geometry = GL_GEOMETRY_SHADER,
Compute = GL_COMPUTE_SHADER,
TessControl = GL_TESS_CONTROL_SHADER,
TessEvaluation = GL_TESS_EVALUATION_SHADER
};
enum ShaderType {
Fragment = GL_FRAGMENT_SHADER,
Vertex = GL_VERTEX_SHADER,
Geometry = GL_GEOMETRY_SHADER,
Compute = GL_COMPUTE_SHADER,
TessControl = GL_TESS_CONTROL_SHADER,
TessEvaluation = GL_TESS_EVALUATION_SHADER
};
struct Shader {
GLuint m_handle = 0;
struct Shader {
GLuint m_handle = 0;
Shader() {}
Shader(const std::string_view source, ShaderType type) { create(source, static_cast<GLenum>(type)); }
Shader() {}
Shader(const std::string_view source, ShaderType type) { create(source, static_cast<GLenum>(type)); }
// Returns whether compilation failed or not
bool create(const std::string_view source, GLenum type) {
m_handle = glCreateShader(type);
const GLchar* const sources[1] = { source.data() };
// Returns whether compilation failed or not
bool create(const std::string_view source, GLenum type) {
m_handle = glCreateShader(type);
const GLchar* const sources[1] = {source.data()};
glShaderSource(m_handle, 1, sources, nullptr);
glCompileShader(m_handle);
glShaderSource(m_handle, 1, sources, nullptr);
glCompileShader(m_handle);
GLint success;
glGetShaderiv(m_handle, GL_COMPILE_STATUS, &success);
if (success == GL_FALSE) {
char buf[4096];
glGetShaderInfoLog(m_handle, 4096, nullptr, buf);
fprintf(stderr, "Failed to compile shader\nError: %s\n", buf);
glDeleteShader(m_handle);
GLint success;
glGetShaderiv(m_handle, GL_COMPILE_STATUS, &success);
if (success == GL_FALSE) {
char buf[4096];
glGetShaderInfoLog(m_handle, 4096, nullptr, buf);
fprintf(stderr, "Failed to compile shader\nError: %s\n", buf);
glDeleteShader(m_handle);
m_handle = 0;
}
m_handle = 0;
}
return m_handle != 0;
}
return m_handle != 0;
}
GLuint handle() const { return m_handle; }
bool exists() const { return m_handle != 0; }
};
GLuint handle() const { return m_handle; }
bool exists() const { return m_handle != 0; }
void free() {
if (exists()) {
glDeleteShader(m_handle);
m_handle = 0;
}
}
#ifdef OPENGL_DESTRUCTORS
~Shader() { free(); }
#endif
};
struct Program {
GLuint m_handle = 0;
@ -431,6 +442,10 @@ namespace OpenGL {
m_handle = 0;
}
}
#ifdef OPENGL_DESTRUCTORS
~Program() { free(); }
#endif
};
static void dispatchCompute(GLuint groupsX = 1, GLuint groupsY = 1, GLuint groupsZ = 1) {