Merge pull request #540 from wheremyfoodat/specialized-shaders-2

Finishing shader generator & mostly fixing lighting (In both shadergen & ubershader)
This commit is contained in:
wheremyfoodat 2024-07-20 15:35:40 +00:00 committed by GitHub
commit bbcd21de05
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 1136 additions and 432 deletions

278
.github/gles.patch vendored
View file

@ -1,52 +1,3 @@
diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp
index a11a6ffa..77486a09 100644
--- a/src/core/renderer_gl/renderer_gl.cpp
+++ b/src/core/renderer_gl/renderer_gl.cpp
@@ -357,27 +357,27 @@ void RendererGL::bindTexturesToSlots() {
}
glActiveTexture(GL_TEXTURE0 + 3);
- glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
+ // glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
glActiveTexture(GL_TEXTURE0);
}
void RendererGL::updateLightingLUT() {
- gpu.lightingLUTDirty = false;
- std::array<u16, GPU::LightingLutSize> u16_lightinglut;
-
- for (int i = 0; i < gpu.lightingLUT.size(); i++) {
- uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1);
- u16_lightinglut[i] = value * 65535 / 4095;
- }
-
- glActiveTexture(GL_TEXTURE0 + 3);
- glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
- glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data());
- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
- glActiveTexture(GL_TEXTURE0);
+ // gpu.lightingLUTDirty = false;
+ // std::array<u16, GPU::LightingLutSize> u16_lightinglut;
+
+ // for (int i = 0; i < gpu.lightingLUT.size(); i++) {
+ // uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1);
+ // u16_lightinglut[i] = value * 65535 / 4095;
+ // }
+
+ // glActiveTexture(GL_TEXTURE0 + 3);
+ // glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
+ // glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data());
+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+ // glActiveTexture(GL_TEXTURE0);
}
void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> vertices) {
diff --git a/src/host_shaders/opengl_display.frag b/src/host_shaders/opengl_display.frag diff --git a/src/host_shaders/opengl_display.frag b/src/host_shaders/opengl_display.frag
index 612671c8..1937f711 100644 index 612671c8..1937f711 100644
--- a/src/host_shaders/opengl_display.frag --- a/src/host_shaders/opengl_display.frag
@ -70,7 +21,7 @@ index 990e2f80..2e7842ac 100644
void main() { void main() {
diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag
index f6fa6c55..bb88e278 100644 index 9f369e39..b4bb19d3 100644
--- a/src/host_shaders/opengl_fragment_shader.frag --- a/src/host_shaders/opengl_fragment_shader.frag
+++ b/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag
@@ -1,4 +1,5 @@ @@ -1,4 +1,5 @@
@ -78,36 +29,18 @@ index f6fa6c55..bb88e278 100644
+#version 300 es +#version 300 es
+precision mediump float; +precision mediump float;
in vec3 v_tangent; in vec4 v_quaternion;
in vec3 v_normal; in vec4 v_colour;
@@ -27,7 +28,7 @@ uniform bool u_depthmapEnable; @@ -164,11 +165,17 @@ float lutLookup(uint lut, int index) {
uniform sampler2D u_tex0; return texelFetch(u_tex_lighting_lut, ivec2(index, int(lut)), 0).r;
uniform sampler2D u_tex1; }
uniform sampler2D u_tex2;
-uniform sampler1DArray u_tex_lighting_lut;
+// uniform sampler1DArray u_tex_lighting_lut;
uniform uint u_picaRegs[0x200 - 0x48]; +// some gles versions have bitfieldExtractCompat and complain if you redefine it, some don't and compile error, using this instead
@@ -145,16 +146,23 @@ vec4 tevCalculateCombiner(int tev_id) {
#define RR_LUT 6u
float lutLookup(uint lut, uint light, float value) {
- if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1;
- if (lut == SP_LUT) lut = light + 8;
- return texture(u_tex_lighting_lut, vec2(value, lut)).r;
+ // if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1;
+ // if (lut == SP_LUT) lut = light + 8;
+ // return texture(u_tex_lighting_lut, vec2(value, lut)).r;
+ return 0.0;
+}
+
+// some gles versions have bitfieldExtract and complain if you redefine it, some don't and compile error, using this instead
+uint bitfieldExtractCompat(uint val, int off, int size) { +uint bitfieldExtractCompat(uint val, int off, int size) {
+ uint mask = uint((1 << size) - 1); + uint mask = uint((1 << size) - 1);
+ return uint(val >> off) & mask; + return uint(val >> off) & mask;
} +}
+
vec3 regToColor(uint reg) { vec3 regToColor(uint reg) {
// Normalization scale to convert from [0...255] to [0.0...1.0] // Normalization scale to convert from [0...255] to [0.0...1.0]
const float scale = 1.0 / 255.0; const float scale = 1.0 / 255.0;
@ -117,89 +50,109 @@ index f6fa6c55..bb88e278 100644
} }
// Convert an arbitrary-width floating point literal to an f32 // Convert an arbitrary-width floating point literal to an f32
@@ -189,7 +197,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { @@ -208,16 +215,16 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light
vec3 view = normalize(v_view);
bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment
- if (!current_sampler_enabled || (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) {
+ if (!current_sampler_enabled || (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) {
return 1.0;
}
- uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3);
+ uint scale_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3);
float scale = float(1u << scale_id);
if (scale_id >= 6u) scale /= 256.0;
float delta = 1.0;
- uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3);
+ uint input_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3);
switch (input_id) {
case 0u: {
delta = dot(normal, normalize(half_vector));
@@ -239,11 +246,11 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light
int GPUREG_LIGHTi_SPOTDIR_LOW = int(readPicaReg(0x0146u + (light_id << 4u)));
int GPUREG_LIGHTi_SPOTDIR_HIGH = int(readPicaReg(0x0147u + (light_id << 4u)));
- // Sign extend them. Normally bitfieldExtract would do that but it's missing on some versions
+ // Sign extend them. Normally bitfieldExtractCompat would do that but it's missing on some versions
// of GLSL so we do it manually
- int se_x = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13);
- int se_y = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13);
- int se_z = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13);
+ int se_x = bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13);
+ int se_y = bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13);
+ int se_z = bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13);
if ((se_x & 0x1000) == 0x1000) se_x |= 0xffffe000;
if ((se_y & 0x1000) == 0x1000) se_y |= 0xffffe000;
@@ -270,9 +277,9 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light
}
// 0 = enabled
- if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) {
+ if (bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) {
// Two sided diffuse
- if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) {
+ if (bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) {
delta = max(delta, 0.0);
} else {
delta = abs(delta);
@@ -296,7 +303,7 @@ vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) {
// Implements the following algorthm: https://mathb.in/26766
void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008Fu); uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008Fu);
- if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) { - if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) {
+ if (bitfieldExtractCompat(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) { + if (bitfieldExtractCompat(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) {
primary_color = secondary_color = vec4(1.0); primary_color = secondary_color = vec4(0.0);
return; return;
} }
@@ -213,7 +221,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { @@ -313,7 +320,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
bool error_unimpl = false; GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u);
GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u);
- uint bump_mode = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 28, 2);
+ uint bump_mode = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 28, 2);
// Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker
switch (bump_mode) {
@@ -326,15 +333,15 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);
vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);
- uint environment_id = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 4, 4);
- bool clamp_highlights = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u;
+ uint environment_id = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 4, 4);
+ bool clamp_highlights = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u;
uint light_id;
vec3 light_vector;
vec3 half_vector;
for (uint i = 0u; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) { for (uint i = 0u; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) {
- uint light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3); - light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3);
+ uint light_id = bitfieldExtractCompat(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3); + light_id = bitfieldExtractCompat(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3);
uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + 0x10u * light_id); uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + (light_id << 4u));
uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + 0x10u * light_id); uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + (light_id << 4u));
@@ -224,14 +232,14 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { @@ -346,12 +353,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
uint GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + 0x10u * light_id);
vec3 light_vector = normalize(vec3( float light_distance;
vec3 light_position = vec3(
- decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u), - decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u),
- decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u) - decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u)
+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u), + decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u),
+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u) + decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u)
)); );
vec3 half_vector;
// Positional Light // Positional Light
- if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) { - if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) {
+ if (bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) { + if (bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) {
// error_unimpl = true; light_vector = light_position + v_view;
half_vector = normalize(normalize(light_vector + v_view) + view);
}
@@ -242,12 +250,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
} }
for (int c = 0; c < 7; c++) { @@ -367,23 +374,23 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
- if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0u) { float NdotL = dot(normal, light_vector); // N dot Li
- uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3);
+ if (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0u) {
+ uint scale_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3);
float scale = float(1u << scale_id);
if (scale_id >= 6u) scale /= 256.0;
- uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3);
+ uint input_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3);
if (input_id == 0u)
d[c] = dot(normal, half_vector);
else if (input_id == 1u)
@@ -260,9 +268,9 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146u + 0x10u * light_id);
uint GPUREG_LIGHTi_SPOTDIR_HIGH = readPicaReg(0x0147u + 0x10u * light_id);
vec3 spot_light_vector = normalize(vec3(
- decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1u, 11u),
- decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1u, 11u),
- decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1u, 11u)
+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1u, 11u),
+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1u, 11u),
+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1u, 11u)
));
d[c] = dot(-light_vector, spot_light_vector); // -L dot P (aka Spotlight aka SP);
} else if (input_id == 5u) {
@@ -273,13 +281,13 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
}
d[c] = lutLookup(uint(c), light_id, d[c] * 0.5 + 0.5) * scale;
- if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) d[c] = abs(d[c]);
+ if (bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) d[c] = abs(d[c]);
} else {
d[c] = 1.0;
}
}
- uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 4, 4);
+ uint lookup_config = bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 4, 4);
if (lookup_config == 0u) {
d[D1_LUT] = 0.0;
d[FR_LUT] = 0.0;
@@ -310,7 +318,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
float NdotL = dot(normal, light_vector); // Li dot N
// Two sided diffuse // Two sided diffuse
- if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) - if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u)
@ -207,19 +160,40 @@ index f6fa6c55..bb88e278 100644
NdotL = max(0.0, NdotL); NdotL = max(0.0, NdotL);
else else
NdotL = abs(NdotL); NdotL = abs(NdotL);
@@ -321,8 +329,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
secondary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_SPECULAR0) * d[D0_LUT] + float geometric_factor;
regToColor(GPUREG_LIGHTi_SPECULAR1) * d[D1_LUT] * vec3(d[RR_LUT], d[RG_LUT], d[RB_LUT])); - bool use_geo_0 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 2, 1) == 1u;
- bool use_geo_1 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 3, 1) == 1u;
+ bool use_geo_0 = bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 2, 1) == 1u;
+ bool use_geo_1 = bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 3, 1) == 1u;
if (use_geo_0 || use_geo_1) {
geometric_factor = dot(half_vector, half_vector);
geometric_factor = geometric_factor == 0.0 ? 0.0 : min(NdotL / geometric_factor, 1.0);
} }
float distance_attenuation = 1.0;
- if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) {
- uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtract(readPicaReg(0x014Au + (light_id << 4u)), 0, 20);
- uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtract(readPicaReg(0x014Bu + (light_id << 4u)), 0, 20);
+ if (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) {
+ uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtractCompat(readPicaReg(0x014Au + (light_id << 4u)), 0, 20);
+ uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtractCompat(readPicaReg(0x014Bu + (light_id << 4u)), 0, 20);
float distance_attenuation_bias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u);
float distance_attenuation_scale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u);
@@ -428,8 +435,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
specular_sum.rgb += light_factor * clamp_factor * (specular0 + specular1);
}
- uint fresnel_output1 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 2, 1); - uint fresnel_output1 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 2, 1);
- uint fresnel_output2 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 3, 1); - uint fresnel_output2 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 3, 1);
+ uint fresnel_output1 = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 2, 1); + uint fresnel_output1 = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 2, 1);
+ uint fresnel_output2 = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 3, 1); + uint fresnel_output2 = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 3, 1);
// Uses parameters from the last light as Fresnel is only applied to the last light
float fresnel_factor;
if (fresnel_output1 == 1u) primary_color.a = d[FR_LUT];
if (fresnel_output2 == 1u) secondary_color.a = d[FR_LUT];
diff --git a/src/host_shaders/opengl_vertex_shader.vert b/src/host_shaders/opengl_vertex_shader.vert diff --git a/src/host_shaders/opengl_vertex_shader.vert b/src/host_shaders/opengl_vertex_shader.vert
index a25d7a6d..7cf40398 100644 index 057f9a88..dc735ced 100644
--- a/src/host_shaders/opengl_vertex_shader.vert --- a/src/host_shaders/opengl_vertex_shader.vert
+++ b/src/host_shaders/opengl_vertex_shader.vert +++ b/src/host_shaders/opengl_vertex_shader.vert
@@ -1,4 +1,6 @@ @@ -1,4 +1,6 @@
@ -230,7 +204,7 @@ index a25d7a6d..7cf40398 100644
layout(location = 0) in vec4 a_coords; layout(location = 0) in vec4 a_coords;
layout(location = 1) in vec4 a_quaternion; layout(location = 1) in vec4 a_quaternion;
@@ -20,7 +22,7 @@ out vec2 v_texcoord2; @@ -18,7 +20,7 @@ out vec2 v_texcoord2;
flat out vec4 v_textureEnvColor[6]; flat out vec4 v_textureEnvColor[6];
flat out vec4 v_textureEnvBufferColor; flat out vec4 v_textureEnvBufferColor;
@ -239,7 +213,7 @@ index a25d7a6d..7cf40398 100644
// TEV uniforms // TEV uniforms
uniform uint u_textureEnvColor[6]; uniform uint u_textureEnvColor[6];
@@ -93,6 +95,6 @@ void main() { @@ -81,8 +83,8 @@ void main() {
); );
// There's also another, always-on clipping plane based on vertex z // There's also another, always-on clipping plane based on vertex z
@ -247,16 +221,20 @@ index a25d7a6d..7cf40398 100644
- gl_ClipDistance[1] = dot(clipData, a_coords); - gl_ClipDistance[1] = dot(clipData, a_coords);
+ // gl_ClipDistance[0] = -a_coords.z; + // gl_ClipDistance[0] = -a_coords.z;
+ // gl_ClipDistance[1] = dot(clipData, a_coords); + // gl_ClipDistance[1] = dot(clipData, a_coords);
v_quaternion = a_quaternion;
} }
diff --git a/third_party/opengl/opengl.hpp b/third_party/opengl/opengl.hpp diff --git a/third_party/opengl/opengl.hpp b/third_party/opengl/opengl.hpp
index f368f573..5ead7f63 100644 index 4a08650a..21af37e3 100644
--- a/third_party/opengl/opengl.hpp --- a/third_party/opengl/opengl.hpp
+++ b/third_party/opengl/opengl.hpp +++ b/third_party/opengl/opengl.hpp
@@ -520,21 +520,21 @@ namespace OpenGL { @@ -583,22 +583,22 @@ namespace OpenGL {
static void disableScissor() { glDisable(GL_SCISSOR_TEST); }
static void enableBlend() { glEnable(GL_BLEND); } static void enableBlend() { glEnable(GL_BLEND); }
static void disableBlend() { glDisable(GL_BLEND); } static void disableBlend() { glDisable(GL_BLEND); }
static void enableLogicOp() { glEnable(GL_COLOR_LOGIC_OP); } - static void enableLogicOp() { glEnable(GL_COLOR_LOGIC_OP); }
- static void disableLogicOp() { glDisable(GL_COLOR_LOGIC_OP); } - static void disableLogicOp() { glDisable(GL_COLOR_LOGIC_OP); }
+ static void enableLogicOp() { /* glEnable(GL_COLOR_LOGIC_OP); */ }
+ static void disableLogicOp() { /* glDisable(GL_COLOR_LOGIC_OP); */ } + static void disableLogicOp() { /* glDisable(GL_COLOR_LOGIC_OP); */ }
static void enableDepth() { glEnable(GL_DEPTH_TEST); } static void enableDepth() { glEnable(GL_DEPTH_TEST); }
static void disableDepth() { glDisable(GL_DEPTH_TEST); } static void disableDepth() { glDisable(GL_DEPTH_TEST); }

79
docs/3ds/lighting.md Normal file
View file

@ -0,0 +1,79 @@
## Info on the lighting implementation
### Missing shadow attenuation
Shadow attenuation samples a texture unit, and that likely needs render to texture for most games so that they can construct
their shadow map. As such the colors are not multiplied by the shadow attenuation value, so there's no shadows.
### Missing bump mapping
Bump mapping also samples a texture unit, most likely doesn't need render to texture however may need better texture sampling
implementation (such as GPUREG_TEXUNITi_BORDER_COLOR, GPUREG_TEXUNITi_BORDER_PARAM). Bump mapping would work for some things,
namely the 3ds-examples bump mapping demo, but would break others such as Toad Treasure Tracker with a naive `texture` implementation.
Also the CP configuration is missing, because it needs a tangent map implementation. It is currently marked with error_unimpl.
### samplerEnabledBitfields
Holds the enabled state of the lighting samples for various PICA configurations
As explained in https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTING_CONFIG0
```c
const bool samplerEnabled[9 * 7] = bool[9 * 7](
// D0 D1 SP FR RB RG RR
true, false, true, false, false, false, true, // Configuration 0: D0, SP, RR
false, false, true, true, false, false, true, // Configuration 1: FR, SP, RR
true, true, false, false, false, false, true, // Configuration 2: D0, D1, RR
true, true, false, true, false, false, false, // Configuration 3: D0, D1, FR
true, true, true, false, true, true, true, // Configuration 4: All except for FR
true, false, true, true, true, true, true, // Configuration 5: All except for D1
true, true, true, true, false, false, true, // Configuration 6: All except for RB and RG
false, false, false, false, false, false, false, // Configuration 7: Unused
true, true, true, true, true, true, true // Configuration 8: All
);
```
The above has been condensed to two uints for performance reasons.
You can confirm they are the same by running the following:
```c
const uint samplerEnabledBitfields[2] = { 0x7170e645u, 0x7f013fefu };
for (int i = 0; i < 9 * 7; i++) {
unsigned arrayIndex = (i >> 5);
bool b = (samplerEnabledBitfields[arrayIndex] & (1u << (i & 31))) != 0u;
if (samplerEnabled[i] == b) {
printf("%d: happy\n", i);
} else {
printf("%d: unhappy\n", i);
}
}
```
### lightLutLookup
lut_id is one of these values
0 D0
1 D1
2 SP
3 FR
4 RB
5 RG
6 RR
lut_index on the other hand represents the actual index of the LUT in the texture
u_tex_lighting_lut has 24 LUTs and they are used like so:
0 D0
1 D1
2 is missing because SP uses LUTs 8-15
3 FR
4 RB
5 RG
6 RR
8-15 SP0-7
16-23 DA0-7, but this is not handled in this function as the lookup is a bit different
The light environment configuration controls which LUTs are available for use
If a LUT is not available in the selected configuration, its value will always read a constant 1.0 regardless of the enable state in GPUREG_LIGHTING_CONFIG1
If RR is enabled but not RG or RB, the output of RR is used for the three components; Red, Green and Blue.
### Distance attenuation
Distance attenuation is computed differently from the other factors, for example
it doesn't store its scale in GPUREG_LIGHTING_LUTINPUT_SCALE and it doesn't use
GPUREG_LIGHTING_LUTINPUT_SELECT. Instead, it uses the distance from the light to the
fragment and the distance attenuation scale and bias to calculate where in the LUT to look up.
See: https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTi_ATTENUATION_SCALE

View file

@ -29,20 +29,204 @@ namespace PICA {
std::array<u32, 4 * 6> tevConfigs; std::array<u32, 4 * 6> tevConfigs;
}; };
struct Light {
union {
u16 raw;
BitField<0, 3, u16> num;
BitField<3, 1, u16> directional;
BitField<4, 1, u16> twoSidedDiffuse;
BitField<5, 1, u16> distanceAttenuationEnable;
BitField<6, 1, u16> spotAttenuationEnable;
BitField<7, 1, u16> geometricFactor0;
BitField<8, 1, u16> geometricFactor1;
BitField<9, 1, u16> shadowEnable;
};
};
struct LightingLUTConfig {
union {
u32 raw;
BitField<0, 1, u32> enable;
BitField<1, 1, u32> absInput;
BitField<2, 3, u32> type;
};
float scale;
};
struct LightingConfig {
union {
u32 raw{};
BitField<0, 1, u32> enable;
BitField<1, 4, u32> lightNum;
BitField<5, 2, u32> bumpMode;
BitField<7, 2, u32> bumpSelector;
BitField<9, 1, u32> bumpRenorm;
BitField<10, 1, u32> clampHighlights;
BitField<11, 4, u32> config;
BitField<15, 1, u32> enablePrimaryAlpha;
BitField<16, 1, u32> enableSecondaryAlpha;
BitField<17, 1, u32> enableShadow;
BitField<18, 1, u32> shadowPrimary;
BitField<19, 1, u32> shadowSecondary;
BitField<20, 1, u32> shadowInvert;
BitField<21, 1, u32> shadowAlpha;
BitField<22, 2, u32> shadowSelector;
};
std::array<LightingLUTConfig, 7> luts{};
std::array<Light, 8> lights{};
LightingConfig(const std::array<u32, 0x300>& regs) {
// Ignore lighting registers if it's disabled
if ((regs[InternalRegs::LightingEnable] & 1) == 0) {
return;
}
const u32 config0 = regs[InternalRegs::LightConfig0];
const u32 config1 = regs[InternalRegs::LightConfig1];
const u32 totalLightCount = Helpers::getBits<0, 3>(regs[InternalRegs::LightNumber]) + 1;
enable = 1;
lightNum = totalLightCount;
enableShadow = Helpers::getBit<0>(config0);
if (enableShadow) [[unlikely]] {
shadowPrimary = Helpers::getBit<16>(config0);
shadowSecondary = Helpers::getBit<17>(config0);
shadowInvert = Helpers::getBit<18>(config0);
shadowAlpha = Helpers::getBit<19>(config0);
shadowSelector = Helpers::getBits<24, 2>(config0);
}
enablePrimaryAlpha = Helpers::getBit<2>(config0);
enableSecondaryAlpha = Helpers::getBit<3>(config0);
config = Helpers::getBits<4, 4>(config0);
bumpSelector = Helpers::getBits<22, 2>(config0);
clampHighlights = Helpers::getBit<27>(config0);
bumpMode = Helpers::getBits<28, 2>(config0);
bumpRenorm = Helpers::getBit<30>(config0) ^ 1; // 0 = enable so flip it with xor
for (int i = 0; i < totalLightCount; i++) {
auto& light = lights[i];
light.num = (regs[InternalRegs::LightPermutation] >> (i * 4)) & 0x7;
const u32 lightConfig = regs[InternalRegs::Light0Config + 0x10 * light.num];
light.directional = Helpers::getBit<0>(lightConfig);
light.twoSidedDiffuse = Helpers::getBit<1>(lightConfig);
light.geometricFactor0 = Helpers::getBit<2>(lightConfig);
light.geometricFactor1 = Helpers::getBit<3>(lightConfig);
light.shadowEnable = ((config1 >> light.num) & 1) ^ 1; // This also does 0 = enabled
light.spotAttenuationEnable = ((config1 >> (8 + light.num)) & 1) ^ 1; // Same here
light.distanceAttenuationEnable = ((config1 >> (24 + light.num)) & 1) ^ 1; // Of course same here
}
LightingLUTConfig& d0 = luts[Lights::LUT_D0];
LightingLUTConfig& d1 = luts[Lights::LUT_D1];
LightingLUTConfig& sp = luts[spotlightLutIndex];
LightingLUTConfig& fr = luts[Lights::LUT_FR];
LightingLUTConfig& rb = luts[Lights::LUT_RB];
LightingLUTConfig& rg = luts[Lights::LUT_RG];
LightingLUTConfig& rr = luts[Lights::LUT_RR];
d0.enable = Helpers::getBit<16>(config1) == 0;
d1.enable = Helpers::getBit<17>(config1) == 0;
fr.enable = Helpers::getBit<19>(config1) == 0;
rb.enable = Helpers::getBit<20>(config1) == 0;
rg.enable = Helpers::getBit<21>(config1) == 0;
rr.enable = Helpers::getBit<22>(config1) == 0;
sp.enable = 1;
const u32 lutAbs = regs[InternalRegs::LightLUTAbs];
const u32 lutSelect = regs[InternalRegs::LightLUTSelect];
const u32 lutScale = regs[InternalRegs::LightLUTScale];
static constexpr float scales[] = {1.0f, 2.0f, 4.0f, 8.0f, 0.0f, 0.0f, 0.25f, 0.5f};
if (d0.enable) {
d0.absInput = Helpers::getBit<1>(lutAbs) == 0;
d0.type = Helpers::getBits<0, 3>(lutSelect);
d0.scale = scales[Helpers::getBits<0, 3>(lutScale)];
}
if (d1.enable) {
d1.absInput = Helpers::getBit<5>(lutAbs) == 0;
d1.type = Helpers::getBits<4, 3>(lutSelect);
d1.scale = scales[Helpers::getBits<4, 3>(lutScale)];
}
sp.absInput = Helpers::getBit<9>(lutAbs) == 0;
sp.type = Helpers::getBits<8, 3>(lutSelect);
sp.scale = scales[Helpers::getBits<8, 3>(lutScale)];
if (fr.enable) {
fr.absInput = Helpers::getBit<13>(lutAbs) == 0;
fr.type = Helpers::getBits<12, 3>(lutSelect);
fr.scale = scales[Helpers::getBits<12, 3>(lutScale)];
}
if (rb.enable) {
rb.absInput = Helpers::getBit<17>(lutAbs) == 0;
rb.type = Helpers::getBits<16, 3>(lutSelect);
rb.scale = scales[Helpers::getBits<16, 3>(lutScale)];
}
if (rg.enable) {
rg.absInput = Helpers::getBit<21>(lutAbs) == 0;
rg.type = Helpers::getBits<20, 3>(lutSelect);
rg.scale = scales[Helpers::getBits<20, 3>(lutScale)];
}
if (rr.enable) {
rr.absInput = Helpers::getBit<25>(lutAbs) == 0;
rr.type = Helpers::getBits<24, 3>(lutSelect);
rr.scale = scales[Helpers::getBits<24, 3>(lutScale)];
}
}
};
// Config used for identifying unique fragment pipeline configurations // Config used for identifying unique fragment pipeline configurations
struct FragmentConfig { struct FragmentConfig {
OutputConfig outConfig; OutputConfig outConfig;
TextureConfig texConfig; TextureConfig texConfig;
LightingConfig lighting;
bool operator==(const FragmentConfig& config) const { bool operator==(const FragmentConfig& config) const {
// Hash function and equality operator required by std::unordered_map // Hash function and equality operator required by std::unordered_map
return std::memcmp(this, &config, sizeof(FragmentConfig)) == 0; return std::memcmp(this, &config, sizeof(FragmentConfig)) == 0;
} }
FragmentConfig(const std::array<u32, 0x300>& regs) : lighting(regs) {
auto alphaTestConfig = regs[InternalRegs::AlphaTestConfig];
auto alphaTestFunction = Helpers::getBits<4, 3>(alphaTestConfig);
outConfig.alphaTestFunction =
(alphaTestConfig & 1) ? static_cast<PICA::CompareFunction>(alphaTestFunction) : PICA::CompareFunction::Always;
outConfig.depthMapEnable = regs[InternalRegs::DepthmapEnable] & 1;
texConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg];
texConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer];
// Set up TEV stages. Annoyingly we can't just memcpy as the TEV registers are arranged like
// {Source, Operand, Combiner, Color, Scale} and we want to skip the color register since it's uploaded via UBO
#define setupTevStage(stage) \
std::memcpy(&texConfig.tevConfigs[stage * 4], &regs[InternalRegs::TexEnv##stage##Source], 3 * sizeof(u32)); \
texConfig.tevConfigs[stage * 4 + 3] = regs[InternalRegs::TexEnv##stage##Source + 4];
setupTevStage(0);
setupTevStage(1);
setupTevStage(2);
setupTevStage(3);
setupTevStage(4);
setupTevStage(5);
#undef setupTevStage
}
}; };
static_assert( static_assert(
std::has_unique_object_representations<OutputConfig>() && std::has_unique_object_representations<TextureConfig>() && std::has_unique_object_representations<OutputConfig>() && std::has_unique_object_representations<TextureConfig>() &&
std::has_unique_object_representations<FragmentConfig>() std::has_unique_object_representations<Light>()
); );
} // namespace PICA } // namespace PICA

View file

@ -1,10 +1,26 @@
#pragma once #pragma once
#include <array> #include <array>
#include <cstddef>
#include <type_traits> #include <type_traits>
#include "helpers.hpp" #include "helpers.hpp"
namespace PICA { namespace PICA {
struct LightUniform {
using vec3 = std::array<float, 3>;
// std140 requires vec3s be aligned to 16 bytes
alignas(16) vec3 specular0;
alignas(16) vec3 specular1;
alignas(16) vec3 diffuse;
alignas(16) vec3 ambient;
alignas(16) vec3 position;
alignas(16) vec3 spotlightDirection;
float distanceAttenuationBias;
float distanceAttenuationScale;
};
struct FragmentUniforms { struct FragmentUniforms {
using vec3 = std::array<float, 3>; using vec3 = std::array<float, 3>;
using vec4 = std::array<float, 4>; using vec4 = std::array<float, 4>;
@ -17,5 +33,13 @@ namespace PICA {
alignas(16) vec4 constantColors[tevStageCount]; alignas(16) vec4 constantColors[tevStageCount];
alignas(16) vec4 tevBufferColor; alignas(16) vec4 tevBufferColor;
alignas(16) vec4 clipCoords; alignas(16) vec4 clipCoords;
// Note: We upload this as a u32 and decode on GPU
u32 globalAmbientLight;
// NOTE: THIS MUST BE LAST so that if lighting is disabled we can potentially omit uploading it
LightUniform lightUniforms[8];
}; };
// Assert that lightUniforms is the last member of the structure
static_assert(offsetof(FragmentUniforms, lightUniforms) + 8 * sizeof(LightUniform) == sizeof(FragmentUniforms));
} // namespace PICA } // namespace PICA

View file

@ -67,7 +67,29 @@ namespace PICA {
ColourBufferLoc = 0x11D, ColourBufferLoc = 0x11D,
FramebufferSize = 0x11E, FramebufferSize = 0x11E,
//LightingRegs // Lighting registers
LightingEnable = 0x8F,
Light0Specular0 = 0x140,
Light0Specular1 = 0x141,
Light0Diffuse = 0x142,
Light0Ambient = 0x143,
Light0XY = 0x144,
Light0Z = 0x145,
Light0SpotlightXY = 0x146,
Light0SpotlightZ = 0x147,
Light0Config = 0x149,
Light0AttenuationBias = 0x14A,
Light0AttenuationScale = 0x14B,
LightGlobalAmbient = 0x1C0,
LightNumber = 0x1C2,
LightConfig0 = 0x1C3,
LightConfig1 = 0x1C4,
LightPermutation = 0x1D9,
LightLUTAbs = 0x1D0,
LightLUTSelect = 0x1D1,
LightLUTScale = 0x1D2,
LightingLUTIndex = 0x01C5, LightingLUTIndex = 0x01C5,
LightingLUTData0 = 0x01C8, LightingLUTData0 = 0x01C8,
LightingLUTData1 = 0x01C9, LightingLUTData1 = 0x01C9,
@ -231,7 +253,8 @@ namespace PICA {
enum : u32 { enum : u32 {
LUT_D0 = 0, LUT_D0 = 0,
LUT_D1, LUT_D1,
LUT_FR, // LUT 2 is not used, the emulator internally uses it for referring to the current source's spotlight in shaders
LUT_FR = 0x3,
LUT_RB, LUT_RB,
LUT_RG, LUT_RG,
LUT_RR, LUT_RR,
@ -255,6 +278,11 @@ namespace PICA {
}; };
} }
// There's actually 8 different LUTs (SP0-SP7), one for each light with different indices (8-15)
// We use an unused LUT value for "this light source's spotlight" instead and figure out which light source to use in compileLutLookup
// This is particularly intuitive in several places, such as checking if a LUT is enabled
static constexpr int spotlightLutIndex = 2;
enum class TextureFmt : u32 { enum class TextureFmt : u32 {
RGBA8 = 0x0, RGBA8 = 0x0,
RGB8 = 0x1, RGB8 = 0x1,

View file

@ -2,6 +2,7 @@
#include <string> #include <string>
#include "PICA/gpu.hpp" #include "PICA/gpu.hpp"
#include "PICA/pica_frag_config.hpp"
#include "PICA/regs.hpp" #include "PICA/regs.hpp"
#include "helpers.hpp" #include "helpers.hpp"
@ -13,25 +14,25 @@ namespace PICA::ShaderGen {
enum class Language { GLSL }; enum class Language { GLSL };
class FragmentGenerator { class FragmentGenerator {
using PICARegs = std::array<u32, 0x300>;
API api; API api;
Language language; Language language;
void compileTEV(std::string& shader, int stage, const PICARegs& regs); void compileTEV(std::string& shader, int stage, const PICA::FragmentConfig& config);
void getSource(std::string& shader, PICA::TexEnvConfig::Source source, int index); void getSource(std::string& shader, PICA::TexEnvConfig::Source source, int index, const PICA::FragmentConfig& config);
void getColorOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::ColorOperand color, int index); void getColorOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::ColorOperand color, int index, const PICA::FragmentConfig& config);
void getAlphaOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::AlphaOperand alpha, int index); void getAlphaOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::AlphaOperand alpha, int index, const PICA::FragmentConfig& config);
void getColorOperation(std::string& shader, PICA::TexEnvConfig::Operation op); void getColorOperation(std::string& shader, PICA::TexEnvConfig::Operation op);
void getAlphaOperation(std::string& shader, PICA::TexEnvConfig::Operation op); void getAlphaOperation(std::string& shader, PICA::TexEnvConfig::Operation op);
void applyAlphaTest(std::string& shader, const PICARegs& regs); void applyAlphaTest(std::string& shader, const PICA::FragmentConfig& config);
void compileLights(std::string& shader, const PICA::FragmentConfig& config);
u32 textureConfig = 0; void compileLUTLookup(std::string& shader, const PICA::FragmentConfig& config, u32 lightIndex, u32 lutID);
bool isSamplerEnabled(u32 environmentID, u32 lutID);
public: public:
FragmentGenerator(API api, Language language) : api(api), language(language) {} FragmentGenerator(API api, Language language) : api(api), language(language) {}
std::string generate(const PICARegs& regs); std::string generate(const PICA::FragmentConfig& config);
std::string getVertexShader(const PICARegs& regs); std::string getDefaultVertexShader();
void setTarget(API api, Language language) { void setTarget(API api, Language language) {
this->api = api; this->api = api;

View file

@ -13,12 +13,23 @@ struct EmulatorConfig {
static constexpr bool shaderJitDefault = false; static constexpr bool shaderJitDefault = false;
#endif #endif
// For now, use specialized shaders by default on MacOS as M1 drivers are buggy when using the ubershader, and on Android since mobile GPUs are
// horrible On other platforms we default to ubershader + shadergen fallback for lights
#if defined(__ANDROID__) || defined(__APPLE__)
static constexpr bool ubershaderDefault = false;
#else
static constexpr bool ubershaderDefault = true; static constexpr bool ubershaderDefault = true;
#endif
bool shaderJitEnabled = shaderJitDefault; bool shaderJitEnabled = shaderJitDefault;
bool discordRpcEnabled = false; bool discordRpcEnabled = false;
bool useUbershaders = ubershaderDefault; bool useUbershaders = ubershaderDefault;
bool accurateShaderMul = false; bool accurateShaderMul = false;
// Toggles whether to force shadergen when there's more than N lights active and we're using the ubershader, for better performance
bool forceShadergenForLights = true;
int lightShadergenThreshold = 1;
RendererType rendererType = RendererType::OpenGL; RendererType rendererType = RendererType::OpenGL;
Audio::DSPCore::Type dspType = Audio::DSPCore::Type::Null; Audio::DSPCore::Type dspType = Audio::DSPCore::Type::Null;

View file

@ -20,6 +20,7 @@ enum class RendererType : s8 {
Software = 3, Software = 3,
}; };
struct EmulatorConfig;
class GPU; class GPU;
struct SDL_Window; struct SDL_Window;
@ -46,6 +47,8 @@ class Renderer {
u32 outputWindowWidth = 400; u32 outputWindowWidth = 400;
u32 outputWindowHeight = 240 * 2; u32 outputWindowHeight = 240 * 2;
EmulatorConfig* emulatorConfig = nullptr;
public: public:
Renderer(GPU& gpu, const std::array<u32, regNum>& internalRegs, const std::array<u32, extRegNum>& externalRegs); Renderer(GPU& gpu, const std::array<u32, regNum>& internalRegs, const std::array<u32, extRegNum>& externalRegs);
virtual ~Renderer(); virtual ~Renderer();
@ -101,4 +104,6 @@ class Renderer {
outputWindowWidth = width; outputWindowWidth = width;
outputWindowHeight = height; outputWindowHeight = height;
} }
void setConfig(EmulatorConfig* config) { emulatorConfig = config; }
}; };

View file

@ -30,7 +30,7 @@ class RendererGL final : public Renderer {
OpenGL::VertexArray vao; OpenGL::VertexArray vao;
OpenGL::VertexBuffer vbo; OpenGL::VertexBuffer vbo;
bool usingUbershader = true; bool enableUbershader = true;
// Data // Data
struct { struct {
@ -63,9 +63,12 @@ class RendererGL final : public Renderer {
OpenGL::VertexBuffer dummyVBO; OpenGL::VertexBuffer dummyVBO;
OpenGL::Texture screenTexture; OpenGL::Texture screenTexture;
GLuint lightLUTTextureArray; OpenGL::Texture lightLUTTexture;
OpenGL::Framebuffer screenFramebuffer; OpenGL::Framebuffer screenFramebuffer;
OpenGL::Texture blankTexture; OpenGL::Texture blankTexture;
// The "default" vertex shader to use when using specialized shaders but not PICA vertex shader -> GLSL recompilation
// We can compile this once and then link it with all other generated fragment shaders
OpenGL::Shader defaultShadergenVs;
// Cached recompiled fragment shader // Cached recompiled fragment shader
struct CachedProgram { struct CachedProgram {
@ -107,7 +110,7 @@ class RendererGL final : public Renderer {
virtual std::string getUbershader() override; virtual std::string getUbershader() override;
virtual void setUbershader(const std::string& shader) override; virtual void setUbershader(const std::string& shader) override;
virtual void setUbershaderSetting(bool value) override { usingUbershader = value; } virtual void setUbershaderSetting(bool value) override { enableUbershader = value; }
std::optional<ColourBuffer> getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true); std::optional<ColourBuffer> getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true);

View file

@ -64,6 +64,9 @@ void EmulatorConfig::load() {
vsyncEnabled = toml::find_or<toml::boolean>(gpu, "EnableVSync", true); vsyncEnabled = toml::find_or<toml::boolean>(gpu, "EnableVSync", true);
useUbershaders = toml::find_or<toml::boolean>(gpu, "UseUbershaders", ubershaderDefault); useUbershaders = toml::find_or<toml::boolean>(gpu, "UseUbershaders", ubershaderDefault);
accurateShaderMul = toml::find_or<toml::boolean>(gpu, "AccurateShaderMultiplication", false); accurateShaderMul = toml::find_or<toml::boolean>(gpu, "AccurateShaderMultiplication", false);
forceShadergenForLights = toml::find_or<toml::boolean>(gpu, "ForceShadergenForLighting", true);
lightShadergenThreshold = toml::find_or<toml::integer>(gpu, "ShadergenLightThreshold", 1);
} }
} }
@ -130,6 +133,8 @@ void EmulatorConfig::save() {
data["GPU"]["EnableVSync"] = vsyncEnabled; data["GPU"]["EnableVSync"] = vsyncEnabled;
data["GPU"]["AccurateShaderMultiplication"] = accurateShaderMul; data["GPU"]["AccurateShaderMultiplication"] = accurateShaderMul;
data["GPU"]["UseUbershaders"] = useUbershaders; data["GPU"]["UseUbershaders"] = useUbershaders;
data["GPU"]["ForceShadergenForLighting"] = forceShadergenForLights;
data["GPU"]["ShadergenLightThreshold"] = lightShadergenThreshold;
data["Audio"]["DSPEmulation"] = std::string(Audio::DSPCore::typeToString(dspType)); data["Audio"]["DSPEmulation"] = std::string(Audio::DSPCore::typeToString(dspType));
data["Audio"]["EnableAudio"] = audioEnabled; data["Audio"]["EnableAudio"] = audioEnabled;

View file

@ -58,6 +58,10 @@ GPU::GPU(Memory& mem, EmulatorConfig& config) : mem(mem), config(config) {
break; break;
} }
} }
if (renderer != nullptr) {
renderer->setConfig(&config);
}
} }
void GPU::reset() { void GPU::reset() {

View file

@ -1,8 +1,21 @@
#include "PICA/pica_frag_config.hpp"
#include "PICA/regs.hpp"
#include "PICA/shader_gen.hpp" #include "PICA/shader_gen.hpp"
using namespace PICA; using namespace PICA;
using namespace PICA::ShaderGen; using namespace PICA::ShaderGen;
static constexpr const char* uniformDefinition = R"( static constexpr const char* uniformDefinition = R"(
struct LightSource {
vec3 specular0;
vec3 specular1;
vec3 diffuse;
vec3 ambient;
vec3 position;
vec3 spotlightDirection;
float distanceAttenuationBias;
float distanceAttenuationScale;
};
layout(std140) uniform FragmentUniforms { layout(std140) uniform FragmentUniforms {
int alphaReference; int alphaReference;
float depthScale; float depthScale;
@ -11,10 +24,14 @@ static constexpr const char* uniformDefinition = R"(
vec4 constantColors[6]; vec4 constantColors[6];
vec4 tevBufferColor; vec4 tevBufferColor;
vec4 clipCoords; vec4 clipCoords;
// Note: We upload this as a u32 and decode on GPU
uint globalAmbientLight;
LightSource lightSources[8];
}; };
)"; )";
std::string FragmentGenerator::getVertexShader(const PICARegs& regs) { std::string FragmentGenerator::getDefaultVertexShader() {
std::string ret = ""; std::string ret = "";
switch (api) { switch (api) {
@ -44,9 +61,7 @@ std::string FragmentGenerator::getVertexShader(const PICARegs& regs) {
layout(location = 6) in vec3 a_view; layout(location = 6) in vec3 a_view;
layout(location = 7) in vec2 a_texcoord2; layout(location = 7) in vec2 a_texcoord2;
out vec3 v_normal; out vec4 v_quaternion;
out vec3 v_tangent;
out vec3 v_bitangent;
out vec4 v_colour; out vec4 v_colour;
out vec3 v_texcoord0; out vec3 v_texcoord0;
out vec2 v_texcoord1; out vec2 v_texcoord1;
@ -62,12 +77,6 @@ std::string FragmentGenerator::getVertexShader(const PICARegs& regs) {
return scale * vec4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24)); return scale * vec4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24));
} }
vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) {
vec3 u = q.xyz;
float s = q.w;
return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v);
}
void main() { void main() {
gl_Position = a_coords; gl_Position = a_coords;
vec4 colourAbs = abs(a_vertexColour); vec4 colourAbs = abs(a_vertexColour);
@ -77,10 +86,7 @@ std::string FragmentGenerator::getVertexShader(const PICARegs& regs) {
v_texcoord1 = vec2(a_texcoord1.x, 1.0 - a_texcoord1.y); v_texcoord1 = vec2(a_texcoord1.x, 1.0 - a_texcoord1.y);
v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y); v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y);
v_view = a_view; v_view = a_view;
v_quaternion = a_quaternion;
v_normal = normalize(rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), a_quaternion));
v_tangent = normalize(rotateVec3ByQuaternion(vec3(1.0, 0.0, 0.0), a_quaternion));
v_bitangent = normalize(rotateVec3ByQuaternion(vec3(0.0, 1.0, 0.0), a_quaternion));
#ifndef USING_GLES #ifndef USING_GLES
gl_ClipDistance[0] = -a_coords.z; gl_ClipDistance[0] = -a_coords.z;
@ -92,7 +98,7 @@ std::string FragmentGenerator::getVertexShader(const PICARegs& regs) {
return ret; return ret;
} }
std::string FragmentGenerator::generate(const PICARegs& regs) { std::string FragmentGenerator::generate(const FragmentConfig& config) {
std::string ret = ""; std::string ret = "";
switch (api) { switch (api) {
@ -113,9 +119,7 @@ std::string FragmentGenerator::generate(const PICARegs& regs) {
// Input and output attributes // Input and output attributes
ret += R"( ret += R"(
in vec3 v_tangent; in vec4 v_quaternion;
in vec3 v_normal;
in vec3 v_bitangent;
in vec4 v_colour; in vec4 v_colour;
in vec3 v_texcoord0; in vec3 v_texcoord0;
in vec2 v_texcoord1; in vec2 v_texcoord1;
@ -126,14 +130,29 @@ std::string FragmentGenerator::generate(const PICARegs& regs) {
uniform sampler2D u_tex0; uniform sampler2D u_tex0;
uniform sampler2D u_tex1; uniform sampler2D u_tex1;
uniform sampler2D u_tex2; uniform sampler2D u_tex2;
// GLES doesn't support sampler1DArray, as such we'll have to change how we handle lighting later uniform sampler2D u_tex_lighting_lut;
#ifndef USING_GLES
uniform sampler1DArray u_tex_lighting_lut;
#endif
)"; )";
ret += uniformDefinition; ret += uniformDefinition;
if (config.lighting.enable) {
ret += R"(
vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) {
vec3 u = q.xyz;
float s = q.w;
return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v);
}
float lutLookup(uint lut, int index) {
return texelFetch(u_tex_lighting_lut, ivec2(index, int(lut)), 0).r;
}
vec3 regToColor(uint reg) {
return (1.0 / 255.0) * vec3(float((reg >> 20u) & 0xFFu), float((reg >> 10u) & 0xFFu), float(reg & 0xFFu));
}
)";
}
// Emit main function for fragment shader // Emit main function for fragment shader
// When not initialized, source 13 is set to vec4(0.0) and 15 is set to the vertex colour // When not initialized, source 13 is set to vec4(0.0) and 15 is set to the vertex colour
ret += R"( ret += R"(
@ -141,8 +160,13 @@ std::string FragmentGenerator::generate(const PICARegs& regs) {
vec4 combinerOutput = v_colour; vec4 combinerOutput = v_colour;
vec4 previousBuffer = vec4(0.0); vec4 previousBuffer = vec4(0.0);
vec4 tevNextPreviousBuffer = tevBufferColor; vec4 tevNextPreviousBuffer = tevBufferColor;
vec4 primaryColor = vec4(0.0);
vec4 secondaryColor = vec4(0.0);
)"; )";
compileLights(ret, config);
ret += R"( ret += R"(
vec3 colorOp1 = vec3(0.0); vec3 colorOp1 = vec3(0.0);
vec3 colorOp2 = vec3(0.0); vec3 colorOp2 = vec3(0.0);
@ -160,44 +184,39 @@ std::string FragmentGenerator::generate(const PICARegs& regs) {
float depth = z_over_w * depthScale + depthOffset; float depth = z_over_w * depthScale + depthOffset;
)"; )";
if ((regs[InternalRegs::DepthmapEnable] & 1) == 0) { if (!config.outConfig.depthMapEnable) {
ret += "depth /= gl_FragCoord.w;\n"; ret += "depth /= gl_FragCoord.w;\n";
} }
ret += "gl_FragDepth = depth;\n"; ret += "gl_FragDepth = depth;\n";
textureConfig = regs[InternalRegs::TexUnitCfg];
for (int i = 0; i < 6; i++) { for (int i = 0; i < 6; i++) {
compileTEV(ret, i, regs); compileTEV(ret, i, config);
} }
applyAlphaTest(ret, regs); applyAlphaTest(ret, config);
ret += "fragColor = combinerOutput;\n}"; // End of main function ret += "fragColor = combinerOutput;\n}"; // End of main function
return ret; return ret;
} }
void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICARegs& regs) { void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICA::FragmentConfig& config) {
// Base address for each TEV stage's configuration const u32* tevValues = config.texConfig.tevConfigs.data() + stage * 4;
static constexpr std::array<u32, 6> ioBases = {
InternalRegs::TexEnv0Source, InternalRegs::TexEnv1Source, InternalRegs::TexEnv2Source,
InternalRegs::TexEnv3Source, InternalRegs::TexEnv4Source, InternalRegs::TexEnv5Source,
};
const u32 ioBase = ioBases[stage]; // Pass a 0 to constColor here, as it doesn't matter for compilation
TexEnvConfig tev(regs[ioBase], regs[ioBase + 1], regs[ioBase + 2], regs[ioBase + 3], regs[ioBase + 4]); TexEnvConfig tev(tevValues[0], tevValues[1], tevValues[2], 0, tevValues[3]);
if (!tev.isPassthroughStage()) { if (!tev.isPassthroughStage()) {
// Get color operands // Get color operands
shader += "colorOp1 = "; shader += "colorOp1 = ";
getColorOperand(shader, tev.colorSource1, tev.colorOperand1, stage); getColorOperand(shader, tev.colorSource1, tev.colorOperand1, stage, config);
shader += ";\ncolorOp2 = "; shader += ";\ncolorOp2 = ";
getColorOperand(shader, tev.colorSource2, tev.colorOperand2, stage); getColorOperand(shader, tev.colorSource2, tev.colorOperand2, stage, config);
shader += ";\ncolorOp3 = "; shader += ";\ncolorOp3 = ";
getColorOperand(shader, tev.colorSource3, tev.colorOperand3, stage); getColorOperand(shader, tev.colorSource3, tev.colorOperand3, stage, config);
shader += ";\nvec3 outputColor" + std::to_string(stage) + " = clamp("; shader += ";\nvec3 outputColor" + std::to_string(stage) + " = clamp(";
getColorOperation(shader, tev.colorOp); getColorOperation(shader, tev.colorOp);
@ -209,13 +228,13 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg
} else { } else {
// Get alpha operands // Get alpha operands
shader += "alphaOp1 = "; shader += "alphaOp1 = ";
getAlphaOperand(shader, tev.alphaSource1, tev.alphaOperand1, stage); getAlphaOperand(shader, tev.alphaSource1, tev.alphaOperand1, stage, config);
shader += ";\nalphaOp2 = "; shader += ";\nalphaOp2 = ";
getAlphaOperand(shader, tev.alphaSource2, tev.alphaOperand2, stage); getAlphaOperand(shader, tev.alphaSource2, tev.alphaOperand2, stage, config);
shader += ";\nalphaOp3 = "; shader += ";\nalphaOp3 = ";
getAlphaOperand(shader, tev.alphaSource3, tev.alphaOperand3, stage); getAlphaOperand(shader, tev.alphaSource3, tev.alphaOperand3, stage, config);
shader += ";\nfloat outputAlpha" + std::to_string(stage) + " = clamp("; shader += ";\nfloat outputAlpha" + std::to_string(stage) + " = clamp(";
getAlphaOperation(shader, tev.alphaOp); getAlphaOperation(shader, tev.alphaOp);
@ -231,7 +250,7 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg
shader += "previousBuffer = tevNextPreviousBuffer;\n\n"; shader += "previousBuffer = tevNextPreviousBuffer;\n\n";
// Update the "next previous buffer" if necessary // Update the "next previous buffer" if necessary
const u32 textureEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer]; const u32 textureEnvUpdateBuffer = config.texConfig.texEnvUpdateBuffer;
if (stage < 4) { if (stage < 4) {
// Check whether to update rgb // Check whether to update rgb
if ((textureEnvUpdateBuffer & (0x100 << stage))) { if ((textureEnvUpdateBuffer & (0x100 << stage))) {
@ -245,7 +264,7 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg
} }
} }
void FragmentGenerator::getColorOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::ColorOperand color, int index) { void FragmentGenerator::getColorOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::ColorOperand color, int index, const PICA::FragmentConfig& config) {
using OperandType = TexEnvConfig::ColorOperand; using OperandType = TexEnvConfig::ColorOperand;
// For inverting operands, add the 1.0 - x subtraction // For inverting operands, add the 1.0 - x subtraction
@ -257,31 +276,31 @@ void FragmentGenerator::getColorOperand(std::string& shader, TexEnvConfig::Sourc
switch (color) { switch (color) {
case OperandType::SourceColor: case OperandType::SourceColor:
case OperandType::OneMinusSourceColor: case OperandType::OneMinusSourceColor:
getSource(shader, source, index); getSource(shader, source, index, config);
shader += ".rgb"; shader += ".rgb";
break; break;
case OperandType::SourceRed: case OperandType::SourceRed:
case OperandType::OneMinusSourceRed: case OperandType::OneMinusSourceRed:
getSource(shader, source, index); getSource(shader, source, index, config);
shader += ".rrr"; shader += ".rrr";
break; break;
case OperandType::SourceGreen: case OperandType::SourceGreen:
case OperandType::OneMinusSourceGreen: case OperandType::OneMinusSourceGreen:
getSource(shader, source, index); getSource(shader, source, index, config);
shader += ".ggg"; shader += ".ggg";
break; break;
case OperandType::SourceBlue: case OperandType::SourceBlue:
case OperandType::OneMinusSourceBlue: case OperandType::OneMinusSourceBlue:
getSource(shader, source, index); getSource(shader, source, index, config);
shader += ".bbb"; shader += ".bbb";
break; break;
case OperandType::SourceAlpha: case OperandType::SourceAlpha:
case OperandType::OneMinusSourceAlpha: case OperandType::OneMinusSourceAlpha:
getSource(shader, source, index); getSource(shader, source, index, config);
shader += ".aaa"; shader += ".aaa";
break; break;
@ -292,7 +311,7 @@ void FragmentGenerator::getColorOperand(std::string& shader, TexEnvConfig::Sourc
} }
} }
void FragmentGenerator::getAlphaOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::AlphaOperand color, int index) { void FragmentGenerator::getAlphaOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::AlphaOperand color, int index, const PICA::FragmentConfig& config) {
using OperandType = TexEnvConfig::AlphaOperand; using OperandType = TexEnvConfig::AlphaOperand;
// For inverting operands, add the 1.0 - x subtraction // For inverting operands, add the 1.0 - x subtraction
@ -304,25 +323,25 @@ void FragmentGenerator::getAlphaOperand(std::string& shader, TexEnvConfig::Sourc
switch (color) { switch (color) {
case OperandType::SourceRed: case OperandType::SourceRed:
case OperandType::OneMinusSourceRed: case OperandType::OneMinusSourceRed:
getSource(shader, source, index); getSource(shader, source, index, config);
shader += ".r"; shader += ".r";
break; break;
case OperandType::SourceGreen: case OperandType::SourceGreen:
case OperandType::OneMinusSourceGreen: case OperandType::OneMinusSourceGreen:
getSource(shader, source, index); getSource(shader, source, index, config);
shader += ".g"; shader += ".g";
break; break;
case OperandType::SourceBlue: case OperandType::SourceBlue:
case OperandType::OneMinusSourceBlue: case OperandType::OneMinusSourceBlue:
getSource(shader, source, index); getSource(shader, source, index, config);
shader += ".b"; shader += ".b";
break; break;
case OperandType::SourceAlpha: case OperandType::SourceAlpha:
case OperandType::OneMinusSourceAlpha: case OperandType::OneMinusSourceAlpha:
getSource(shader, source, index); getSource(shader, source, index, config);
shader += ".a"; shader += ".a";
break; break;
@ -333,14 +352,14 @@ void FragmentGenerator::getAlphaOperand(std::string& shader, TexEnvConfig::Sourc
} }
} }
void FragmentGenerator::getSource(std::string& shader, TexEnvConfig::Source source, int index) { void FragmentGenerator::getSource(std::string& shader, TexEnvConfig::Source source, int index, const PICA::FragmentConfig& config) {
switch (source) { switch (source) {
case TexEnvConfig::Source::PrimaryColor: shader += "v_colour"; break; case TexEnvConfig::Source::PrimaryColor: shader += "v_colour"; break;
case TexEnvConfig::Source::Texture0: shader += "texture(u_tex0, v_texcoord0.xy)"; break; case TexEnvConfig::Source::Texture0: shader += "texture(u_tex0, v_texcoord0.xy)"; break;
case TexEnvConfig::Source::Texture1: shader += "texture(u_tex1, v_texcoord1)"; break; case TexEnvConfig::Source::Texture1: shader += "texture(u_tex1, v_texcoord1)"; break;
case TexEnvConfig::Source::Texture2: { case TexEnvConfig::Source::Texture2: {
// If bit 13 in texture config is set then we use the texcoords for texture 1, otherwise for texture 2 // If bit 13 in texture config is set then we use the texcoords for texture 1, otherwise for texture 2
if (Helpers::getBit<13>(textureConfig)) { if (Helpers::getBit<13>(config.texConfig.texUnitConfig)) {
shader += "texture(u_tex2, v_texcoord1)"; shader += "texture(u_tex2, v_texcoord1)";
} else { } else {
shader += "texture(u_tex2, v_texcoord2)"; shader += "texture(u_tex2, v_texcoord2)";
@ -353,8 +372,8 @@ void FragmentGenerator::getSource(std::string& shader, TexEnvConfig::Source sour
case TexEnvConfig::Source::PreviousBuffer: shader += "previousBuffer"; break; case TexEnvConfig::Source::PreviousBuffer: shader += "previousBuffer"; break;
// Lighting // Lighting
case TexEnvConfig::Source::PrimaryFragmentColor: case TexEnvConfig::Source::PrimaryFragmentColor: shader += "primaryColor"; break;
case TexEnvConfig::Source::SecondaryFragmentColor: shader += "vec4(1.0, 1.0, 1.0, 1.0)"; break; case TexEnvConfig::Source::SecondaryFragmentColor: shader += "secondaryColor"; break;
default: default:
Helpers::warn("Unimplemented TEV source: %d", static_cast<int>(source)); Helpers::warn("Unimplemented TEV source: %d", static_cast<int>(source));
@ -401,12 +420,11 @@ void FragmentGenerator::getAlphaOperation(std::string& shader, TexEnvConfig::Ope
} }
} }
void FragmentGenerator::applyAlphaTest(std::string& shader, const PICARegs& regs) { void FragmentGenerator::applyAlphaTest(std::string& shader, const PICA::FragmentConfig& config) {
const u32 alphaConfig = regs[InternalRegs::AlphaTestConfig]; const CompareFunction function = config.outConfig.alphaTestFunction;
const auto function = static_cast<CompareFunction>(Helpers::getBits<4, 3>(alphaConfig));
// Alpha test disabled // Alpha test disabled
if (Helpers::getBit<0>(alphaConfig) == 0 || function == CompareFunction::Always) { if (function == CompareFunction::Always) {
return; return;
} }
@ -430,3 +448,203 @@ void FragmentGenerator::applyAlphaTest(std::string& shader, const PICARegs& regs
shader += ") { discard; }\n"; shader += ") { discard; }\n";
} }
void FragmentGenerator::compileLights(std::string& shader, const PICA::FragmentConfig& config) {
if (!config.lighting.enable) {
return;
}
// Currently ignore bump mode
shader += "vec3 normal = rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), v_quaternion);\n";
shader += R"(
vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);
vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);
vec3 light_position, light_vector, half_vector, specular0, specular1, reflected_color;
float light_distance, NdotL, light_factor, geometric_factor, distance_attenuation, distance_att_delta;
float spotlight_attenuation, specular0_dist, specular1_dist;
float lut_lookup_result, lut_lookup_delta;
int lut_lookup_index;
)";
uint lightID = 0;
for (int i = 0; i < config.lighting.lightNum; i++) {
lightID = config.lighting.lights[i].num;
const auto& lightConfig = config.lighting.lights[i];
shader += "light_position = lightSources[" + std::to_string(lightID) + "].position;\n";
if (lightConfig.directional) { // Directional lighting
shader += "light_vector = light_position;\n";
} else { // Positional lighting
shader += "light_vector = light_position + v_view;\n";
}
shader += R"(
light_distance = length(light_vector);
light_vector = normalize(light_vector);
half_vector = light_vector + normalize(v_view);
distance_attenuation = 1.0;
NdotL = dot(normal, light_vector);
)";
shader += lightConfig.twoSidedDiffuse ? "NdotL = abs(NdotL);\n" : "NdotL = max(NdotL, 0.0);\n";
if (lightConfig.geometricFactor0 || lightConfig.geometricFactor1) {
shader += R"(
geometric_factor = dot(half_vector, half_vector);
geometric_factor = (geometric_factor == 0.0) ? 0.0 : min(NdotL / geometric_factor, 1.0);
)";
}
if (lightConfig.distanceAttenuationEnable) {
shader += "distance_att_delta = clamp(light_distance * lightSources[" + std::to_string(lightID) +
"].distanceAttenuationScale + lightSources[" + std::to_string(lightID) + "].distanceAttenuationBias, 0.0, 1.0);\n";
shader += "distance_attenuation = lutLookup(" + std::to_string(16 + lightID) +
", int(clamp(floor(distance_att_delta * 256.0), 0.0, 255.0)));\n";
}
compileLUTLookup(shader, config, i, spotlightLutIndex);
shader += "spotlight_attenuation = lut_lookup_result;\n";
compileLUTLookup(shader, config, i, PICA::Lights::LUT_D0);
shader += "specular0_dist = lut_lookup_result;\n";
compileLUTLookup(shader, config, i, PICA::Lights::LUT_D1);
shader += "specular1_dist = lut_lookup_result;\n";
compileLUTLookup(shader, config, i, PICA::Lights::LUT_RR);
shader += "reflected_color.r = lut_lookup_result;\n";
if (isSamplerEnabled(config.lighting.config, PICA::Lights::LUT_RG)) {
compileLUTLookup(shader, config, i, PICA::Lights::LUT_RG);
shader += "reflected_color.g = lut_lookup_result;\n";
} else {
shader += "reflected_color.g = reflected_color.r;\n";
}
if (isSamplerEnabled(config.lighting.config, PICA::Lights::LUT_RB)) {
compileLUTLookup(shader, config, i, PICA::Lights::LUT_RB);
shader += "reflected_color.b = lut_lookup_result;\n";
} else {
shader += "reflected_color.b = reflected_color.r;\n";
}
shader += "specular0 = lightSources[" + std::to_string(lightID) + "].specular0 * specular0_dist;\n";
if (lightConfig.geometricFactor0) {
shader += "specular0 *= geometric_factor;\n";
}
shader += "specular1 = lightSources[" + std::to_string(lightID) + "].specular1 * specular1_dist * reflected_color;\n";
if (lightConfig.geometricFactor1) {
shader += "specular1 *= geometric_factor;\n";
}
shader += "light_factor = distance_attenuation * spotlight_attenuation;\n";
if (config.lighting.clampHighlights) {
shader += "specular_sum.rgb += light_factor * (NdotL == 0.0 ? 0.0 : 1.0) * (specular0 + specular1);\n";
} else {
shader += "specular_sum.rgb += light_factor * (specular0 + specular1);\n";
}
shader += "diffuse_sum.rgb += light_factor * (lightSources[" + std::to_string(lightID) + "].ambient + lightSources[" +
std::to_string(lightID) + "].diffuse * NdotL);\n";
}
if (config.lighting.enablePrimaryAlpha || config.lighting.enableSecondaryAlpha) {
compileLUTLookup(shader, config, config.lighting.lightNum - 1, PICA::Lights::LUT_FR);
shader += "float fresnel_factor = lut_lookup_result;\n";
}
if (config.lighting.enablePrimaryAlpha) {
shader += "diffuse_sum.a = fresnel_factor;\n";
}
if (config.lighting.enableSecondaryAlpha) {
shader += "specular_sum.a = fresnel_factor;\n";
}
shader += R"(
vec4 global_ambient = vec4(regToColor(globalAmbientLight), 1.0);
primaryColor = clamp(global_ambient + diffuse_sum, vec4(0.0), vec4(1.0));
secondaryColor = clamp(specular_sum, vec4(0.0), vec4(1.0));
)";
}
bool FragmentGenerator::isSamplerEnabled(u32 environmentID, u32 lutID) {
static constexpr bool samplerEnabled[9 * 7] = {
// D0 D1 SP FR RB RG RR
true, false, true, false, false, false, true, // Configuration 0: D0, SP, RR
false, false, true, true, false, false, true, // Configuration 1: FR, SP, RR
true, true, false, false, false, false, true, // Configuration 2: D0, D1, RR
true, true, false, true, false, false, false, // Configuration 3: D0, D1, FR
true, true, true, false, true, true, true, // Configuration 4: All except for FR
true, false, true, true, true, true, true, // Configuration 5: All except for D1
true, true, true, true, false, false, true, // Configuration 6: All except for RB and RG
false, false, false, false, false, false, false, // Configuration 7: Unused
true, true, true, true, true, true, true, // Configuration 8: All
};
return samplerEnabled[environmentID * 7 + lutID];
}
void FragmentGenerator::compileLUTLookup(std::string& shader, const PICA::FragmentConfig& config, u32 lightIndex, u32 lutID) {
const LightingLUTConfig& lut = config.lighting.luts[lutID];
uint lightID = config.lighting.lights[lightIndex].num;
uint lutIndex = 0;
bool lutEnabled = false;
if (lutID == spotlightLutIndex) {
// These are the spotlight attenuation LUTs
lutIndex = 8u + lightID;
lutEnabled = config.lighting.lights[lightIndex].spotAttenuationEnable;
} else if (lutID <= 6) {
lutIndex = lutID;
lutEnabled = lut.enable;
} else {
Helpers::warn("Shadergen: Unimplemented LUT value");
}
const bool samplerEnabled = isSamplerEnabled(config.lighting.config, lutID);
if (!samplerEnabled || !lutEnabled) {
shader += "lut_lookup_result = 1.0;\n";
return;
}
float scale = lut.scale;
uint inputID = lut.type;
bool absEnabled = lut.absInput;
switch (inputID) {
case 0: shader += "lut_lookup_delta = dot(normal, normalize(half_vector));\n"; break;
case 1: shader += "lut_lookup_delta = dot(normalize(v_view), normalize(half_vector));\n"; break;
case 2: shader += "lut_lookup_delta = dot(normal, normalize(v_view));\n"; break;
case 3: shader += "lut_lookup_delta = dot(normal, light_vector);\n"; break;
case 4: shader += "lut_lookup_delta = dot(light_vector, lightSources[" + std ::to_string(lightID) + "].spotlightDirection);\n"; break;
default:
Helpers::warn("Shadergen: Unimplemented LUT select");
shader += "lut_lookup_delta = 1.0;\n";
break;
}
if (absEnabled) {
bool twoSidedDiffuse = config.lighting.lights[lightIndex].twoSidedDiffuse;
shader += twoSidedDiffuse ? "lut_lookup_delta = abs(lut_lookup_delta);\n" : "lut_lookup_delta = max(lut_lookup_delta, 0.0);\n";
shader += "lut_lookup_result = lutLookup(" + std::to_string(lutIndex) + ", int(clamp(floor(lut_lookup_delta * 256.0), 0.0, 255.0)));\n";
if (scale != 1.0) {
shader += "lut_lookup_result *= " + std::to_string(scale) + ";\n";
}
} else {
// Range is [-1, 1] so we need to map it to [0, 1]
shader += "lut_lookup_index = int(clamp(floor(lut_lookup_delta * 128.0), -128.f, 127.f));\n";
shader += "if (lut_lookup_index < 0) lut_lookup_index += 256;\n";
shader += "lut_lookup_result = lutLookup(" + std::to_string(lutIndex) + ", lut_lookup_index) *" + std::to_string(scale) + ";\n";
}
}

View file

@ -4,6 +4,7 @@
#include <cmrc/cmrc.hpp> #include <cmrc/cmrc.hpp>
#include "config.hpp"
#include "PICA/float_types.hpp" #include "PICA/float_types.hpp"
#include "PICA/pica_frag_uniforms.hpp" #include "PICA/pica_frag_uniforms.hpp"
#include "PICA/gpu.hpp" #include "PICA/gpu.hpp"
@ -117,7 +118,10 @@ void RendererGL::initGraphicsContextInternal() {
const u32 screenTextureWidth = 400; // Top screen is 400 pixels wide, bottom is 320 const u32 screenTextureWidth = 400; // Top screen is 400 pixels wide, bottom is 320
const u32 screenTextureHeight = 2 * 240; // Both screens are 240 pixels tall const u32 screenTextureHeight = 2 * 240; // Both screens are 240 pixels tall
glGenTextures(1, &lightLUTTextureArray); lightLUTTexture.create(256, Lights::LUT_Count, GL_R32F);
lightLUTTexture.bind();
lightLUTTexture.setMinFilter(OpenGL::Linear);
lightLUTTexture.setMagFilter(OpenGL::Linear);
auto prevTexture = OpenGL::getTex2D(); auto prevTexture = OpenGL::getTex2D();
@ -159,6 +163,10 @@ void RendererGL::initGraphicsContextInternal() {
OpenGL::setViewport(oldViewport[0], oldViewport[1], oldViewport[2], oldViewport[3]); OpenGL::setViewport(oldViewport[0], oldViewport[1], oldViewport[2], oldViewport[3]);
reset(); reset();
// Initialize the default vertex shader used with shadergen
std::string defaultShadergenVSSource = fragShaderGen.getDefaultVertexShader();
defaultShadergenVs.create({defaultShadergenVSSource.c_str(), defaultShadergenVSSource.size()}, OpenGL::Vertex);
} }
// The OpenGL renderer doesn't need to do anything with the GL context (For Qt frontend) or the SDL window (For SDL frontend) // The OpenGL renderer doesn't need to do anything with the GL context (For Qt frontend) or the SDL window (For SDL frontend)
@ -348,26 +356,22 @@ void RendererGL::bindTexturesToSlots() {
} }
glActiveTexture(GL_TEXTURE0 + 3); glActiveTexture(GL_TEXTURE0 + 3);
glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); lightLUTTexture.bind();
glActiveTexture(GL_TEXTURE0); glActiveTexture(GL_TEXTURE0);
} }
void RendererGL::updateLightingLUT() { void RendererGL::updateLightingLUT() {
gpu.lightingLUTDirty = false; gpu.lightingLUTDirty = false;
std::array<u16, GPU::LightingLutSize> u16_lightinglut; std::array<float, GPU::LightingLutSize> lightingLut;
for (int i = 0; i < gpu.lightingLUT.size(); i++) { for (int i = 0; i < gpu.lightingLUT.size(); i++) {
uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1); uint64_t value = gpu.lightingLUT[i] & 0xFFF;
u16_lightinglut[i] = value * 65535 / 4095; lightingLut[i] = (float)(value << 4) / 65535.0f;
} }
glActiveTexture(GL_TEXTURE0 + 3); glActiveTexture(GL_TEXTURE0 + 3);
glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); lightLUTTexture.bind();
glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data()); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, Lights::LUT_Count, GL_RED, GL_FLOAT, lightingLut.data());
glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glActiveTexture(GL_TEXTURE0); glActiveTexture(GL_TEXTURE0);
} }
@ -380,6 +384,18 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
OpenGL::Triangle, OpenGL::Triangle,
}; };
bool usingUbershader = enableUbershader;
if (usingUbershader) {
const bool lightsEnabled = (regs[InternalRegs::LightingEnable] & 1) != 0;
const uint lightCount = (regs[InternalRegs::LightNumber] & 0x7) + 1;
// Emulating lights in the ubershader is incredibly slow, so we've got an option to render draws using moret han N lights via shadergen
// This way we generate fewer shaders overall than with full shadergen, but don't tank performance
if (emulatorConfig->forceShadergenForLights && lightsEnabled && lightCount >= emulatorConfig->lightShadergenThreshold) {
usingUbershader = false;
}
}
if (usingUbershader) { if (usingUbershader) {
gl.useProgram(triangleProgram); gl.useProgram(triangleProgram);
} else { } else {
@ -780,43 +796,16 @@ std::optional<ColourBuffer> RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt
OpenGL::Program& RendererGL::getSpecializedShader() { OpenGL::Program& RendererGL::getSpecializedShader() {
constexpr uint uboBlockBinding = 2; constexpr uint uboBlockBinding = 2;
PICA::FragmentConfig fsConfig; PICA::FragmentConfig fsConfig(regs);
auto& outConfig = fsConfig.outConfig;
auto& texConfig = fsConfig.texConfig;
auto alphaTestConfig = regs[InternalRegs::AlphaTestConfig];
auto alphaTestFunction = Helpers::getBits<4, 3>(alphaTestConfig);
outConfig.alphaTestFunction = (alphaTestConfig & 1) ? static_cast<PICA::CompareFunction>(alphaTestFunction) : PICA::CompareFunction::Always;
outConfig.depthMapEnable = regs[InternalRegs::DepthmapEnable] & 1;
texConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg];
texConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer];
// Set up TEV stages. Annoyingly we can't just memcpy as the TEV registers are arranged like
// {Source, Operand, Combiner, Color, Scale} and we want to skip the color register since it's uploaded via UBO
#define setupTevStage(stage) \
std::memcpy(&texConfig.tevConfigs[stage * 4], &regs[InternalRegs::TexEnv##stage##Source], 3 * sizeof(u32)); \
texConfig.tevConfigs[stage * 4 + 3] = regs[InternalRegs::TexEnv##stage##Source + 5];
setupTevStage(0);
setupTevStage(1);
setupTevStage(2);
setupTevStage(3);
setupTevStage(4);
setupTevStage(5);
#undef setupTevStage
CachedProgram& programEntry = shaderCache[fsConfig]; CachedProgram& programEntry = shaderCache[fsConfig];
OpenGL::Program& program = programEntry.program; OpenGL::Program& program = programEntry.program;
if (!program.exists()) { if (!program.exists()) {
std::string vs = fragShaderGen.getVertexShader(regs); std::string fs = fragShaderGen.generate(fsConfig);
std::string fs = fragShaderGen.generate(regs);
OpenGL::Shader vertShader({vs.c_str(), vs.size()}, OpenGL::Vertex);
OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment); OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment);
program.create({vertShader, fragShader}); program.create({defaultShadergenVs, fragShader});
gl.useProgram(program); gl.useProgram(program);
// Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3
@ -875,6 +864,48 @@ OpenGL::Program& RendererGL::getSpecializedShader() {
vec[3] = float((color >> 24) & 0xFF) / 255.0f; vec[3] = float((color >> 24) & 0xFF) / 255.0f;
} }
// Append lighting uniforms
if (fsConfig.lighting.enable) {
uniforms.globalAmbientLight = regs[InternalRegs::LightGlobalAmbient];
for (int i = 0; i < 8; i++) {
auto& light = uniforms.lightUniforms[i];
const u32 specular0 = regs[InternalRegs::Light0Specular0 + i * 0x10];
const u32 specular1 = regs[InternalRegs::Light0Specular1 + i * 0x10];
const u32 diffuse = regs[InternalRegs::Light0Diffuse + i * 0x10];
const u32 ambient = regs[InternalRegs::Light0Ambient + i * 0x10];
const u32 lightXY = regs[InternalRegs::Light0XY + i * 0x10];
const u32 lightZ = regs[InternalRegs::Light0Z + i * 0x10];
const u32 spotlightXY = regs[InternalRegs::Light0SpotlightXY + i * 0x10];
const u32 spotlightZ = regs[InternalRegs::Light0SpotlightZ + i * 0x10];
const u32 attenuationBias = regs[InternalRegs::Light0AttenuationBias + i * 0x10];
const u32 attenuationScale = regs[InternalRegs::Light0AttenuationScale + i * 0x10];
#define lightColorToVec3(value) \
{ \
float(Helpers::getBits<20, 8>(value)) / 255.0f, \
float(Helpers::getBits<10, 8>(value)) / 255.0f, \
float(Helpers::getBits<0, 8>(value)) / 255.0f, \
}
light.specular0 = lightColorToVec3(specular0);
light.specular1 = lightColorToVec3(specular1);
light.diffuse = lightColorToVec3(diffuse);
light.ambient = lightColorToVec3(ambient);
light.position[0] = Floats::f16::fromRaw(u16(lightXY)).toFloat32();
light.position[1] = Floats::f16::fromRaw(u16(lightXY >> 16)).toFloat32();
light.position[2] = Floats::f16::fromRaw(u16(lightZ)).toFloat32();
// Fixed point 1.11.1 to float, without negation
light.spotlightDirection[0] = float(s32(spotlightXY & 0x1FFF) << 19 >> 19) / 2047.0;
light.spotlightDirection[1] = float(s32((spotlightXY >> 16) & 0x1FFF) << 19 >> 19) / 2047.0;
light.spotlightDirection[2] = float(s32(spotlightZ & 0x1FFF) << 19 >> 19) / 2047.0;
light.distanceAttenuationBias = Floats::f20::fromRaw(attenuationBias & 0xFFFFF).toFloat32();
light.distanceAttenuationScale = Floats::f20::fromRaw(attenuationScale & 0xFFFFF).toFloat32();
#undef lightColorToVec3
}
}
gl.bindUBO(programEntry.uboBinding); gl.bindUBO(programEntry.uboBinding);
glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(PICA::FragmentUniforms), &uniforms); glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(PICA::FragmentUniforms), &uniforms);

View file

@ -1,8 +1,6 @@
#version 410 core #version 410 core
in vec3 v_tangent; in vec4 v_quaternion;
in vec3 v_normal;
in vec3 v_bitangent;
in vec4 v_colour; in vec4 v_colour;
in vec3 v_texcoord0; in vec3 v_texcoord0;
in vec2 v_texcoord1; in vec2 v_texcoord1;
@ -27,7 +25,7 @@ uniform bool u_depthmapEnable;
uniform sampler2D u_tex0; uniform sampler2D u_tex0;
uniform sampler2D u_tex1; uniform sampler2D u_tex1;
uniform sampler2D u_tex2; uniform sampler2D u_tex2;
uniform sampler1DArray u_tex_lighting_lut; uniform sampler2D u_tex_lighting_lut;
uniform uint u_picaRegs[0x200 - 0x48]; uniform uint u_picaRegs[0x200 - 0x48];
@ -37,6 +35,16 @@ uint readPicaReg(uint reg_addr) { return u_picaRegs[reg_addr - 0x48u]; }
vec4 tevSources[16]; vec4 tevSources[16];
vec4 tevNextPreviousBuffer; vec4 tevNextPreviousBuffer;
bool tevUnimplementedSourceFlag = false; bool tevUnimplementedSourceFlag = false;
vec3 normal;
// See docs/lighting.md
const uint samplerEnabledBitfields[2] = uint[2](0x7170e645u, 0x7f013fefu);
bool isSamplerEnabled(uint environment_id, uint lut_id) {
uint index = 7 * environment_id + lut_id;
uint arrayIndex = (index >> 5);
return (samplerEnabledBitfields[arrayIndex] & (1u << (index & 31u))) != 0u;
}
// OpenGL ES 1.1 reference pages for TEVs (this is what the PICA200 implements): // OpenGL ES 1.1 reference pages for TEVs (this is what the PICA200 implements):
// https://registry.khronos.org/OpenGL-Refpages/es1.1/xhtml/glTexEnv.xml // https://registry.khronos.org/OpenGL-Refpages/es1.1/xhtml/glTexEnv.xml
@ -110,7 +118,7 @@ vec4 tevCalculateCombiner(int tev_id) {
case 6u: result.rgb = vec3(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGB case 6u: result.rgb = vec3(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGB
case 7u: result = vec4(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGBA case 7u: result = vec4(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGBA
case 8u: result.rgb = min(source0.rgb * source1.rgb + source2.rgb, 1.0); break; // Multiply then add case 8u: result.rgb = min(source0.rgb * source1.rgb + source2.rgb, 1.0); break; // Multiply then add
case 9u: result.rgb = min((source0.rgb + source1.rgb) * source2.rgb, 1.0); break; // Add then multiply case 9u: result.rgb = min(source0.rgb + source1.rgb, 1.0) * source2.rgb; break; // Add then multiply
default: break; default: break;
} }
@ -125,7 +133,7 @@ vec4 tevCalculateCombiner(int tev_id) {
case 4u: result.a = mix(source1.a, source0.a, source2.a); break; // Interpolate case 4u: result.a = mix(source1.a, source0.a, source2.a); break; // Interpolate
case 5u: result.a = max(0.0, source0.a - source1.a); break; // Subtract case 5u: result.a = max(0.0, source0.a - source1.a); break; // Subtract
case 8u: result.a = min(1.0, source0.a * source1.a + source2.a); break; // Multiply then add case 8u: result.a = min(1.0, source0.a * source1.a + source2.a); break; // Multiply then add
case 9u: result.a = min(1.0, (source0.a + source1.a) * source2.a); break; // Add then multiply case 9u: result.a = min(source0.a + source1.a, 1.0) * source2.a; break; // Add then multiply
default: break; default: break;
} }
} }
@ -144,10 +152,16 @@ vec4 tevCalculateCombiner(int tev_id) {
#define RG_LUT 5u #define RG_LUT 5u
#define RR_LUT 6u #define RR_LUT 6u
float lutLookup(uint lut, uint light, float value) { uint GPUREG_LIGHTi_CONFIG;
if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1; uint GPUREG_LIGHTING_CONFIG1;
if (lut == SP_LUT) lut = light + 8; uint GPUREG_LIGHTING_LUTINPUT_SELECT;
return texture(u_tex_lighting_lut, vec2(value, lut)).r; uint GPUREG_LIGHTING_LUTINPUT_SCALE;
uint GPUREG_LIGHTING_LUTINPUT_ABS;
bool error_unimpl = false;
vec4 unimpl_color = vec4(1.0, 0.0, 1.0, 1.0);
float lutLookup(uint lut, int index) {
return texelFetch(u_tex_lighting_lut, ivec2(index, int(lut)), 0).r;
} }
vec3 regToColor(uint reg) { vec3 regToColor(uint reg) {
@ -178,136 +192,179 @@ float decodeFP(uint hex, uint E, uint M) {
return uintBitsToFloat(hex); return uintBitsToFloat(hex);
} }
float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light_vector, vec3 half_vector) {
uint lut_index;
int bit_in_config1;
if (lut_id == SP_LUT) {
// These are the spotlight attenuation LUTs
bit_in_config1 = 8 + int(light_id & 7u);
lut_index = 8u + light_id;
} else if (lut_id <= 6) {
bit_in_config1 = 16 + int(lut_id);
lut_index = lut_id;
} else {
error_unimpl = true;
}
bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment
if (!current_sampler_enabled || (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) {
return 1.0;
}
uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3);
float scale = float(1u << scale_id);
if (scale_id >= 6u) scale /= 256.0;
float delta = 1.0;
uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3);
switch (input_id) {
case 0u: {
delta = dot(normal, normalize(half_vector));
break;
}
case 1u: {
delta = dot(normalize(v_view), normalize(half_vector));
break;
}
case 2u: {
delta = dot(normal, normalize(v_view));
break;
}
case 3u: {
delta = dot(light_vector, normal);
break;
}
case 4u: {
int GPUREG_LIGHTi_SPOTDIR_LOW = int(readPicaReg(0x0146u + (light_id << 4u)));
int GPUREG_LIGHTi_SPOTDIR_HIGH = int(readPicaReg(0x0147u + (light_id << 4u)));
// Sign extend them. Normally bitfieldExtract would do that but it's missing on some versions
// of GLSL so we do it manually
int se_x = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13);
int se_y = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13);
int se_z = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13);
if ((se_x & 0x1000) == 0x1000) se_x |= 0xffffe000;
if ((se_y & 0x1000) == 0x1000) se_y |= 0xffffe000;
if ((se_z & 0x1000) == 0x1000) se_z |= 0xffffe000;
// These are fixed point 1.1.11 values, so we need to convert them to float
float x = float(se_x) / 2047.0;
float y = float(se_y) / 2047.0;
float z = float(se_z) / 2047.0;
vec3 spotlight_vector = vec3(x, y, z);
delta = dot(light_vector, spotlight_vector); // spotlight direction is negated so we don't negate light_vector
break;
}
case 5u: {
delta = 1.0; // TODO: cos <greek symbol> (aka CP);
error_unimpl = true;
break;
}
default: {
delta = 1.0;
error_unimpl = true;
break;
}
}
// 0 = enabled
if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) {
// Two sided diffuse
if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) {
delta = max(delta, 0.0);
} else {
delta = abs(delta);
}
int index = int(clamp(floor(delta * 255.0), 0.f, 255.f));
return lutLookup(lut_index, index) * scale;
} else {
// Range is [-1, 1] so we need to map it to [0, 1]
int index = int(clamp(floor(delta * 128.0), -128.f, 127.f));
if (index < 0) index += 256;
return lutLookup(lut_index, index) * scale;
}
}
vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) {
vec3 u = q.xyz;
float s = q.w;
return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v);
}
// Implements the following algorthm: https://mathb.in/26766 // Implements the following algorthm: https://mathb.in/26766
void calcLighting(out vec4 primary_color, out vec4 secondary_color) { void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
// Quaternions describe a transformation from surface-local space to eye space.
// In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1),
// the tangent vector is (1,0,0), and the bitangent vector is (0,1,0).
vec3 normal = normalize(v_normal);
vec3 tangent = normalize(v_tangent);
vec3 bitangent = normalize(v_bitangent);
vec3 view = normalize(v_view);
uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008Fu); uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008Fu);
if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) { if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) {
primary_color = secondary_color = vec4(1.0); primary_color = secondary_color = vec4(0.0);
return; return;
} }
uint GPUREG_LIGHTING_AMBIENT = readPicaReg(0x01C0u);
uint GPUREG_LIGHTING_NUM_LIGHTS = (readPicaReg(0x01C2u) & 0x7u) + 1u; uint GPUREG_LIGHTING_NUM_LIGHTS = (readPicaReg(0x01C2u) & 0x7u) + 1u;
uint GPUREG_LIGHTING_LIGHT_PERMUTATION = readPicaReg(0x01D9u); uint GPUREG_LIGHTING_LIGHT_PERMUTATION = readPicaReg(0x01D9u);
primary_color = vec4(vec3(0.0), 1.0); primary_color = vec4(vec3(0.0), 1.0);
secondary_color = vec4(vec3(0.0), 1.0); secondary_color = vec4(vec3(0.0), 1.0);
primary_color.rgb += regToColor(GPUREG_LIGHTING_AMBIENT);
uint GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u);
uint GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u);
uint GPUREG_LIGHTING_CONFIG0 = readPicaReg(0x01C3u);
uint GPUREG_LIGHTING_CONFIG1 = readPicaReg(0x01C4u);
uint GPUREG_LIGHTING_LUTINPUT_SCALE = readPicaReg(0x01D2u); uint GPUREG_LIGHTING_LUTINPUT_SCALE = readPicaReg(0x01D2u);
float d[7]; uint GPUREG_LIGHTING_CONFIG0 = readPicaReg(0x01C3u);
GPUREG_LIGHTING_CONFIG1 = readPicaReg(0x01C4u);
GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u);
GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u);
bool error_unimpl = false; uint bump_mode = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 28, 2);
// Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker
switch (bump_mode) {
default: {
normal = rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), v_quaternion);
break;
}
}
vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);
vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);
uint environment_id = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 4, 4);
bool clamp_highlights = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u;
uint light_id;
vec3 light_vector;
vec3 half_vector;
for (uint i = 0u; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) { for (uint i = 0u; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) {
uint light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3); light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3);
uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + 0x10u * light_id); uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + (light_id << 4u));
uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + 0x10u * light_id); uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + (light_id << 4u));
uint GPUREG_LIGHTi_DIFFUSE = readPicaReg(0x0142u + 0x10u * light_id); uint GPUREG_LIGHTi_DIFFUSE = readPicaReg(0x0142u + (light_id << 4u));
uint GPUREG_LIGHTi_AMBIENT = readPicaReg(0x0143u + 0x10u * light_id); uint GPUREG_LIGHTi_AMBIENT = readPicaReg(0x0143u + (light_id << 4u));
uint GPUREG_LIGHTi_VECTOR_LOW = readPicaReg(0x0144u + 0x10u * light_id); uint GPUREG_LIGHTi_VECTOR_LOW = readPicaReg(0x0144u + (light_id << 4u));
uint GPUREG_LIGHTi_VECTOR_HIGH = readPicaReg(0x0145u + 0x10u * light_id); uint GPUREG_LIGHTi_VECTOR_HIGH = readPicaReg(0x0145u + (light_id << 4u));
uint GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + 0x10u * light_id); GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + (light_id << 4u));
vec3 light_vector = normalize(vec3( float light_distance;
vec3 light_position = vec3(
decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u), decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u),
decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u) decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u)
)); );
vec3 half_vector;
// Positional Light // Positional Light
if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) { if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) {
// error_unimpl = true; light_vector = light_position + v_view;
half_vector = normalize(normalize(light_vector + v_view) + view);
} }
// Directional light // Directional light
else { else {
half_vector = normalize(normalize(light_vector) + view); light_vector = light_position;
} }
for (int c = 0; c < 7; c++) { light_distance = length(light_vector);
if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0u) { light_vector = normalize(light_vector);
uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3); half_vector = light_vector + normalize(v_view);
float scale = float(1u << scale_id);
if (scale_id >= 6u) scale /= 256.0;
uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3); float NdotL = dot(normal, light_vector); // N dot Li
if (input_id == 0u)
d[c] = dot(normal, half_vector);
else if (input_id == 1u)
d[c] = dot(view, half_vector);
else if (input_id == 2u)
d[c] = dot(normal, view);
else if (input_id == 3u)
d[c] = dot(light_vector, normal);
else if (input_id == 4u) {
uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146u + 0x10u * light_id);
uint GPUREG_LIGHTi_SPOTDIR_HIGH = readPicaReg(0x0147u + 0x10u * light_id);
vec3 spot_light_vector = normalize(vec3(
decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1u, 11u),
decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1u, 11u),
decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1u, 11u)
));
d[c] = dot(-light_vector, spot_light_vector); // -L dot P (aka Spotlight aka SP);
} else if (input_id == 5u) {
d[c] = 1.0; // TODO: cos <greek symbol> (aka CP);
error_unimpl = true;
} else {
d[c] = 1.0;
}
d[c] = lutLookup(uint(c), light_id, d[c] * 0.5 + 0.5) * scale;
if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) d[c] = abs(d[c]);
} else {
d[c] = 1.0;
}
}
uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 4, 4);
if (lookup_config == 0u) {
d[D1_LUT] = 0.0;
d[FR_LUT] = 0.0;
d[RG_LUT] = d[RB_LUT] = d[RR_LUT];
} else if (lookup_config == 1u) {
d[D0_LUT] = 0.0;
d[D1_LUT] = 0.0;
d[RG_LUT] = d[RB_LUT] = d[RR_LUT];
} else if (lookup_config == 2u) {
d[FR_LUT] = 0.0;
d[SP_LUT] = 0.0;
d[RG_LUT] = d[RB_LUT] = d[RR_LUT];
} else if (lookup_config == 3u) {
d[SP_LUT] = 0.0;
d[RG_LUT] = d[RB_LUT] = d[RR_LUT] = 1.0;
} else if (lookup_config == 4u) {
d[FR_LUT] = 0.0;
} else if (lookup_config == 5u) {
d[D1_LUT] = 0.0;
} else if (lookup_config == 6u) {
d[RG_LUT] = d[RB_LUT] = d[RR_LUT];
}
float distance_factor = 1.0; // a
float indirect_factor = 1.0; // fi
float shadow_factor = 1.0; // o
float NdotL = dot(normal, light_vector); // Li dot N
// Two sided diffuse // Two sided diffuse
if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u)
@ -315,20 +372,86 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
else else
NdotL = abs(NdotL); NdotL = abs(NdotL);
float light_factor = distance_factor * d[SP_LUT] * indirect_factor * shadow_factor; float geometric_factor;
bool use_geo_0 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 2, 1) == 1u;
primary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE) * NdotL); bool use_geo_1 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 3, 1) == 1u;
secondary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_SPECULAR0) * d[D0_LUT] + if (use_geo_0 || use_geo_1) {
regToColor(GPUREG_LIGHTi_SPECULAR1) * d[D1_LUT] * vec3(d[RR_LUT], d[RG_LUT], d[RB_LUT])); geometric_factor = dot(half_vector, half_vector);
geometric_factor = geometric_factor == 0.0 ? 0.0 : min(NdotL / geometric_factor, 1.0);
} }
float distance_attenuation = 1.0;
if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) {
uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtract(readPicaReg(0x014Au + (light_id << 4u)), 0, 20);
uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtract(readPicaReg(0x014Bu + (light_id << 4u)), 0, 20);
float distance_attenuation_bias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u);
float distance_attenuation_scale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u);
float delta = light_distance * distance_attenuation_scale + distance_attenuation_bias;
delta = clamp(delta, 0.0, 1.0);
int index = int(clamp(floor(delta * 255.0), 0.0, 255.0));
distance_attenuation = lutLookup(16u + light_id, index);
}
float spotlight_attenuation = lightLutLookup(environment_id, SP_LUT, light_id, light_vector, half_vector);
float specular0_distribution = lightLutLookup(environment_id, D0_LUT, light_id, light_vector, half_vector);
float specular1_distribution = lightLutLookup(environment_id, D1_LUT, light_id, light_vector, half_vector);
vec3 reflected_color;
reflected_color.r = lightLutLookup(environment_id, RR_LUT, light_id, light_vector, half_vector);
if (isSamplerEnabled(environment_id, RG_LUT)) {
reflected_color.g = lightLutLookup(environment_id, RG_LUT, light_id, light_vector, half_vector);
} else {
reflected_color.g = reflected_color.r;
}
if (isSamplerEnabled(environment_id, RB_LUT)) {
reflected_color.b = lightLutLookup(environment_id, RB_LUT, light_id, light_vector, half_vector);
} else {
reflected_color.b = reflected_color.r;
}
vec3 specular0 = regToColor(GPUREG_LIGHTi_SPECULAR0) * specular0_distribution;
vec3 specular1 = regToColor(GPUREG_LIGHTi_SPECULAR1) * specular1_distribution * reflected_color;
specular0 *= use_geo_0 ? geometric_factor : 1.0;
specular1 *= use_geo_1 ? geometric_factor : 1.0;
float clamp_factor = 1.0;
if (clamp_highlights && NdotL == 0.0) {
clamp_factor = 0.0;
}
float light_factor = distance_attenuation * spotlight_attenuation;
diffuse_sum.rgb += light_factor * (regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE) * NdotL);
specular_sum.rgb += light_factor * clamp_factor * (specular0 + specular1);
}
uint fresnel_output1 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 2, 1); uint fresnel_output1 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 2, 1);
uint fresnel_output2 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 3, 1); uint fresnel_output2 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 3, 1);
// Uses parameters from the last light as Fresnel is only applied to the last light
float fresnel_factor;
if (fresnel_output1 == 1u) primary_color.a = d[FR_LUT]; if (fresnel_output1 == 1u || fresnel_output2 == 1u) {
if (fresnel_output2 == 1u) secondary_color.a = d[FR_LUT]; fresnel_factor = lightLutLookup(environment_id, FR_LUT, light_id, light_vector, half_vector);
}
if (fresnel_output1 == 1u) {
diffuse_sum.a = fresnel_factor;
}
if (fresnel_output2 == 1u) {
specular_sum.a = fresnel_factor;
}
uint GPUREG_LIGHTING_AMBIENT = readPicaReg(0x01C0u);
vec4 global_ambient = vec4(regToColor(GPUREG_LIGHTING_AMBIENT), 1.0);
primary_color = clamp(global_ambient + diffuse_sum, vec4(0.0), vec4(1.0));
secondary_color = clamp(specular_sum, vec4(0.0), vec4(1.0));
if (error_unimpl) { if (error_unimpl) {
// secondary_color = primary_color = vec4(1.0, 0., 1.0, 1.0); // secondary_color = primary_color = unimpl_color;
} }
} }

View file

@ -9,9 +9,7 @@ layout(location = 5) in float a_texcoord0_w;
layout(location = 6) in vec3 a_view; layout(location = 6) in vec3 a_view;
layout(location = 7) in vec2 a_texcoord2; layout(location = 7) in vec2 a_texcoord2;
out vec3 v_normal; out vec4 v_quaternion;
out vec3 v_tangent;
out vec3 v_bitangent;
out vec4 v_colour; out vec4 v_colour;
out vec3 v_texcoord0; out vec3 v_texcoord0;
out vec2 v_texcoord1; out vec2 v_texcoord1;
@ -35,12 +33,6 @@ vec4 abgr8888ToVec4(uint abgr) {
return scale * vec4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24)); return scale * vec4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24));
} }
vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) {
vec3 u = q.xyz;
float s = q.w;
return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v);
}
// Convert an arbitrary-width floating point literal to an f32 // Convert an arbitrary-width floating point literal to an f32
float decodeFP(uint hex, uint E, uint M) { float decodeFP(uint hex, uint E, uint M) {
uint width = M + E + 1u; uint width = M + E + 1u;
@ -73,10 +65,6 @@ void main() {
v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y); v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y);
v_view = a_view; v_view = a_view;
v_normal = normalize(rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), a_quaternion));
v_tangent = normalize(rotateVec3ByQuaternion(vec3(1.0, 0.0, 0.0), a_quaternion));
v_bitangent = normalize(rotateVec3ByQuaternion(vec3(0.0, 1.0, 0.0), a_quaternion));
for (int i = 0; i < 6; i++) { for (int i = 0; i < 6; i++) {
v_textureEnvColor[i] = abgr8888ToVec4(u_textureEnvColor[i]); v_textureEnvColor[i] = abgr8888ToVec4(u_textureEnvColor[i]);
} }
@ -95,4 +83,6 @@ void main() {
// There's also another, always-on clipping plane based on vertex z // There's also another, always-on clipping plane based on vertex z
gl_ClipDistance[0] = -a_coords.z; gl_ClipDistance[0] = -a_coords.z;
gl_ClipDistance[1] = dot(clipData, a_coords); gl_ClipDistance[1] = dot(clipData, a_coords);
v_quaternion = a_quaternion;
} }

View file

@ -147,7 +147,8 @@ static void configInit() {
static const retro_variable values[] = { static const retro_variable values[] = {
{"panda3ds_use_shader_jit", "Enable shader JIT; enabled|disabled"}, {"panda3ds_use_shader_jit", "Enable shader JIT; enabled|disabled"},
{"panda3ds_accurate_shader_mul", "Enable accurate shader multiplication; disabled|enabled"}, {"panda3ds_accurate_shader_mul", "Enable accurate shader multiplication; disabled|enabled"},
{"panda3ds_use_ubershader", "Use ubershaders (No stutter, maybe slower); enabled|disabled"}, {"panda3ds_use_ubershader", EmulatorConfig::ubershaderDefault ? "Use ubershaders (No stutter, maybe slower); enabled|disabled"
: "Use ubershaders (No stutter, maybe slower); disabled|enabled"},
{"panda3ds_use_vsync", "Enable VSync; enabled|disabled"}, {"panda3ds_use_vsync", "Enable VSync; enabled|disabled"},
{"panda3ds_dsp_emulation", "DSP emulation; Null|HLE|LLE"}, {"panda3ds_dsp_emulation", "DSP emulation; Null|HLE|LLE"},
{"panda3ds_use_audio", "Enable audio; disabled|enabled"}, {"panda3ds_use_audio", "Enable audio; disabled|enabled"},
@ -155,6 +156,8 @@ static void configInit() {
{"panda3ds_write_protect_virtual_sd", "Write protect virtual SD card; disabled|enabled"}, {"panda3ds_write_protect_virtual_sd", "Write protect virtual SD card; disabled|enabled"},
{"panda3ds_battery_level", "Battery percentage; 5|10|20|30|50|70|90|100"}, {"panda3ds_battery_level", "Battery percentage; 5|10|20|30|50|70|90|100"},
{"panda3ds_use_charger", "Charger plugged; enabled|disabled"}, {"panda3ds_use_charger", "Charger plugged; enabled|disabled"},
{"panda3ds_ubershader_lighting_override", "Force shadergen when rendering lights; enabled|disabled"},
{"panda3ds_ubershader_lighting_override_threshold", "Light threshold for forcing shadergen; 1|2|3|4|5|6|7|8"},
{nullptr, nullptr}, {nullptr, nullptr},
}; };
@ -175,6 +178,8 @@ static void configUpdate() {
config.sdWriteProtected = FetchVariableBool("panda3ds_write_protect_virtual_sd", false); config.sdWriteProtected = FetchVariableBool("panda3ds_write_protect_virtual_sd", false);
config.accurateShaderMul = FetchVariableBool("panda3ds_accurate_shader_mul", false); config.accurateShaderMul = FetchVariableBool("panda3ds_accurate_shader_mul", false);
config.useUbershaders = FetchVariableBool("panda3ds_use_ubershader", true); config.useUbershaders = FetchVariableBool("panda3ds_use_ubershader", true);
config.forceShadergenForLights = FetchVariableBool("panda3ds_ubershader_lighting_override", true);
config.lightShadergenThreshold = std::clamp(std::stoi(FetchVariable("panda3ds_ubershader_lighting_override_threshold", "1")), 1, 8);
config.discordRpcEnabled = false; config.discordRpcEnabled = false;
config.save(); config.save();

View file

@ -394,6 +394,17 @@ namespace OpenGL {
GLuint handle() const { return m_handle; } GLuint handle() const { return m_handle; }
bool exists() const { return m_handle != 0; } bool exists() const { return m_handle != 0; }
void free() {
if (exists()) {
glDeleteShader(m_handle);
m_handle = 0;
}
}
#ifdef OPENGL_DESTRUCTORS
~Shader() { free(); }
#endif
}; };
struct Program { struct Program {
@ -431,6 +442,10 @@ namespace OpenGL {
m_handle = 0; m_handle = 0;
} }
} }
#ifdef OPENGL_DESTRUCTORS
~Program() { free(); }
#endif
}; };
static void dispatchCompute(GLuint groupsX = 1, GLuint groupsY = 1, GLuint groupsZ = 1) { static void dispatchCompute(GLuint groupsX = 1, GLuint groupsY = 1, GLuint groupsZ = 1) {