mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-06 14:15:41 +12:00
Merge pull request #540 from wheremyfoodat/specialized-shaders-2
Finishing shader generator & mostly fixing lighting (In both shadergen & ubershader)
This commit is contained in:
commit
bbcd21de05
17 changed files with 1136 additions and 432 deletions
280
.github/gles.patch
vendored
280
.github/gles.patch
vendored
|
@ -1,52 +1,3 @@
|
|||
diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp
|
||||
index a11a6ffa..77486a09 100644
|
||||
--- a/src/core/renderer_gl/renderer_gl.cpp
|
||||
+++ b/src/core/renderer_gl/renderer_gl.cpp
|
||||
@@ -357,27 +357,27 @@ void RendererGL::bindTexturesToSlots() {
|
||||
}
|
||||
|
||||
glActiveTexture(GL_TEXTURE0 + 3);
|
||||
- glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
|
||||
+ // glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
}
|
||||
|
||||
void RendererGL::updateLightingLUT() {
|
||||
- gpu.lightingLUTDirty = false;
|
||||
- std::array<u16, GPU::LightingLutSize> u16_lightinglut;
|
||||
-
|
||||
- for (int i = 0; i < gpu.lightingLUT.size(); i++) {
|
||||
- uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1);
|
||||
- u16_lightinglut[i] = value * 65535 / 4095;
|
||||
- }
|
||||
-
|
||||
- glActiveTexture(GL_TEXTURE0 + 3);
|
||||
- glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
|
||||
- glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data());
|
||||
- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
||||
- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
|
||||
- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||
- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||
- glActiveTexture(GL_TEXTURE0);
|
||||
+ // gpu.lightingLUTDirty = false;
|
||||
+ // std::array<u16, GPU::LightingLutSize> u16_lightinglut;
|
||||
+
|
||||
+ // for (int i = 0; i < gpu.lightingLUT.size(); i++) {
|
||||
+ // uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1);
|
||||
+ // u16_lightinglut[i] = value * 65535 / 4095;
|
||||
+ // }
|
||||
+
|
||||
+ // glActiveTexture(GL_TEXTURE0 + 3);
|
||||
+ // glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
|
||||
+ // glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data());
|
||||
+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
||||
+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
|
||||
+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||
+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||
+ // glActiveTexture(GL_TEXTURE0);
|
||||
}
|
||||
|
||||
void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> vertices) {
|
||||
diff --git a/src/host_shaders/opengl_display.frag b/src/host_shaders/opengl_display.frag
|
||||
index 612671c8..1937f711 100644
|
||||
--- a/src/host_shaders/opengl_display.frag
|
||||
|
@ -70,7 +21,7 @@ index 990e2f80..2e7842ac 100644
|
|||
|
||||
void main() {
|
||||
diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag
|
||||
index f6fa6c55..bb88e278 100644
|
||||
index 9f369e39..b4bb19d3 100644
|
||||
--- a/src/host_shaders/opengl_fragment_shader.frag
|
||||
+++ b/src/host_shaders/opengl_fragment_shader.frag
|
||||
@@ -1,4 +1,5 @@
|
||||
|
@ -78,36 +29,18 @@ index f6fa6c55..bb88e278 100644
|
|||
+#version 300 es
|
||||
+precision mediump float;
|
||||
|
||||
in vec3 v_tangent;
|
||||
in vec3 v_normal;
|
||||
@@ -27,7 +28,7 @@ uniform bool u_depthmapEnable;
|
||||
uniform sampler2D u_tex0;
|
||||
uniform sampler2D u_tex1;
|
||||
uniform sampler2D u_tex2;
|
||||
-uniform sampler1DArray u_tex_lighting_lut;
|
||||
+// uniform sampler1DArray u_tex_lighting_lut;
|
||||
in vec4 v_quaternion;
|
||||
in vec4 v_colour;
|
||||
@@ -164,11 +165,17 @@ float lutLookup(uint lut, int index) {
|
||||
return texelFetch(u_tex_lighting_lut, ivec2(index, int(lut)), 0).r;
|
||||
}
|
||||
|
||||
uniform uint u_picaRegs[0x200 - 0x48];
|
||||
|
||||
@@ -145,16 +146,23 @@ vec4 tevCalculateCombiner(int tev_id) {
|
||||
#define RR_LUT 6u
|
||||
|
||||
float lutLookup(uint lut, uint light, float value) {
|
||||
- if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1;
|
||||
- if (lut == SP_LUT) lut = light + 8;
|
||||
- return texture(u_tex_lighting_lut, vec2(value, lut)).r;
|
||||
+ // if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1;
|
||||
+ // if (lut == SP_LUT) lut = light + 8;
|
||||
+ // return texture(u_tex_lighting_lut, vec2(value, lut)).r;
|
||||
+ return 0.0;
|
||||
+}
|
||||
+
|
||||
+// some gles versions have bitfieldExtract and complain if you redefine it, some don't and compile error, using this instead
|
||||
+// some gles versions have bitfieldExtractCompat and complain if you redefine it, some don't and compile error, using this instead
|
||||
+uint bitfieldExtractCompat(uint val, int off, int size) {
|
||||
+ uint mask = uint((1 << size) - 1);
|
||||
+ return uint(val >> off) & mask;
|
||||
}
|
||||
|
||||
+}
|
||||
+
|
||||
vec3 regToColor(uint reg) {
|
||||
// Normalization scale to convert from [0...255] to [0.0...1.0]
|
||||
const float scale = 1.0 / 255.0;
|
||||
|
@ -117,89 +50,109 @@ index f6fa6c55..bb88e278 100644
|
|||
}
|
||||
|
||||
// Convert an arbitrary-width floating point literal to an f32
|
||||
@@ -189,7 +197,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
|
||||
vec3 view = normalize(v_view);
|
||||
@@ -208,16 +215,16 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light
|
||||
|
||||
bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment
|
||||
|
||||
- if (!current_sampler_enabled || (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) {
|
||||
+ if (!current_sampler_enabled || (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) {
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
- uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3);
|
||||
+ uint scale_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3);
|
||||
float scale = float(1u << scale_id);
|
||||
if (scale_id >= 6u) scale /= 256.0;
|
||||
|
||||
float delta = 1.0;
|
||||
- uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3);
|
||||
+ uint input_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3);
|
||||
switch (input_id) {
|
||||
case 0u: {
|
||||
delta = dot(normal, normalize(half_vector));
|
||||
@@ -239,11 +246,11 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light
|
||||
int GPUREG_LIGHTi_SPOTDIR_LOW = int(readPicaReg(0x0146u + (light_id << 4u)));
|
||||
int GPUREG_LIGHTi_SPOTDIR_HIGH = int(readPicaReg(0x0147u + (light_id << 4u)));
|
||||
|
||||
- // Sign extend them. Normally bitfieldExtract would do that but it's missing on some versions
|
||||
+ // Sign extend them. Normally bitfieldExtractCompat would do that but it's missing on some versions
|
||||
// of GLSL so we do it manually
|
||||
- int se_x = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13);
|
||||
- int se_y = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13);
|
||||
- int se_z = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13);
|
||||
+ int se_x = bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13);
|
||||
+ int se_y = bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13);
|
||||
+ int se_z = bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13);
|
||||
|
||||
if ((se_x & 0x1000) == 0x1000) se_x |= 0xffffe000;
|
||||
if ((se_y & 0x1000) == 0x1000) se_y |= 0xffffe000;
|
||||
@@ -270,9 +277,9 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light
|
||||
}
|
||||
|
||||
// 0 = enabled
|
||||
- if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) {
|
||||
+ if (bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) {
|
||||
// Two sided diffuse
|
||||
- if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) {
|
||||
+ if (bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) {
|
||||
delta = max(delta, 0.0);
|
||||
} else {
|
||||
delta = abs(delta);
|
||||
@@ -296,7 +303,7 @@ vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) {
|
||||
// Implements the following algorthm: https://mathb.in/26766
|
||||
void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
|
||||
uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008Fu);
|
||||
- if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) {
|
||||
+ if (bitfieldExtractCompat(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) {
|
||||
primary_color = secondary_color = vec4(1.0);
|
||||
primary_color = secondary_color = vec4(0.0);
|
||||
return;
|
||||
}
|
||||
@@ -213,7 +221,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
|
||||
bool error_unimpl = false;
|
||||
@@ -313,7 +320,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
|
||||
GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u);
|
||||
GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u);
|
||||
|
||||
- uint bump_mode = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 28, 2);
|
||||
+ uint bump_mode = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 28, 2);
|
||||
|
||||
// Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker
|
||||
switch (bump_mode) {
|
||||
@@ -326,15 +333,15 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
|
||||
vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);
|
||||
vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);
|
||||
|
||||
- uint environment_id = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 4, 4);
|
||||
- bool clamp_highlights = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u;
|
||||
+ uint environment_id = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 4, 4);
|
||||
+ bool clamp_highlights = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u;
|
||||
|
||||
uint light_id;
|
||||
vec3 light_vector;
|
||||
vec3 half_vector;
|
||||
|
||||
for (uint i = 0u; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) {
|
||||
- uint light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3);
|
||||
+ uint light_id = bitfieldExtractCompat(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3);
|
||||
- light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3);
|
||||
+ light_id = bitfieldExtractCompat(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3);
|
||||
|
||||
uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + 0x10u * light_id);
|
||||
uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + 0x10u * light_id);
|
||||
@@ -224,14 +232,14 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
|
||||
uint GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + 0x10u * light_id);
|
||||
uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + (light_id << 4u));
|
||||
uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + (light_id << 4u));
|
||||
@@ -346,12 +353,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
|
||||
|
||||
vec3 light_vector = normalize(vec3(
|
||||
float light_distance;
|
||||
vec3 light_position = vec3(
|
||||
- decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u),
|
||||
- decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u)
|
||||
+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u),
|
||||
+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u)
|
||||
));
|
||||
|
||||
vec3 half_vector;
|
||||
);
|
||||
|
||||
// Positional Light
|
||||
- if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) {
|
||||
+ if (bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) {
|
||||
// error_unimpl = true;
|
||||
half_vector = normalize(normalize(light_vector + v_view) + view);
|
||||
}
|
||||
@@ -242,12 +250,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
|
||||
light_vector = light_position + v_view;
|
||||
}
|
||||
|
||||
for (int c = 0; c < 7; c++) {
|
||||
- if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0u) {
|
||||
- uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3);
|
||||
+ if (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0u) {
|
||||
+ uint scale_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3);
|
||||
float scale = float(1u << scale_id);
|
||||
if (scale_id >= 6u) scale /= 256.0;
|
||||
|
||||
- uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3);
|
||||
+ uint input_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3);
|
||||
if (input_id == 0u)
|
||||
d[c] = dot(normal, half_vector);
|
||||
else if (input_id == 1u)
|
||||
@@ -260,9 +268,9 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
|
||||
uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146u + 0x10u * light_id);
|
||||
uint GPUREG_LIGHTi_SPOTDIR_HIGH = readPicaReg(0x0147u + 0x10u * light_id);
|
||||
vec3 spot_light_vector = normalize(vec3(
|
||||
- decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1u, 11u),
|
||||
- decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1u, 11u),
|
||||
- decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1u, 11u)
|
||||
+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1u, 11u),
|
||||
+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1u, 11u),
|
||||
+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1u, 11u)
|
||||
));
|
||||
d[c] = dot(-light_vector, spot_light_vector); // -L dot P (aka Spotlight aka SP);
|
||||
} else if (input_id == 5u) {
|
||||
@@ -273,13 +281,13 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
|
||||
}
|
||||
|
||||
d[c] = lutLookup(uint(c), light_id, d[c] * 0.5 + 0.5) * scale;
|
||||
- if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) d[c] = abs(d[c]);
|
||||
+ if (bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) d[c] = abs(d[c]);
|
||||
} else {
|
||||
d[c] = 1.0;
|
||||
}
|
||||
}
|
||||
|
||||
- uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 4, 4);
|
||||
+ uint lookup_config = bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 4, 4);
|
||||
if (lookup_config == 0u) {
|
||||
d[D1_LUT] = 0.0;
|
||||
d[FR_LUT] = 0.0;
|
||||
@@ -310,7 +318,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
|
||||
float NdotL = dot(normal, light_vector); // Li dot N
|
||||
@@ -367,23 +374,23 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
|
||||
float NdotL = dot(normal, light_vector); // N dot Li
|
||||
|
||||
// Two sided diffuse
|
||||
- if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u)
|
||||
|
@ -207,19 +160,40 @@ index f6fa6c55..bb88e278 100644
|
|||
NdotL = max(0.0, NdotL);
|
||||
else
|
||||
NdotL = abs(NdotL);
|
||||
@@ -321,8 +329,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
|
||||
secondary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_SPECULAR0) * d[D0_LUT] +
|
||||
regToColor(GPUREG_LIGHTi_SPECULAR1) * d[D1_LUT] * vec3(d[RR_LUT], d[RG_LUT], d[RB_LUT]));
|
||||
|
||||
float geometric_factor;
|
||||
- bool use_geo_0 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 2, 1) == 1u;
|
||||
- bool use_geo_1 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 3, 1) == 1u;
|
||||
+ bool use_geo_0 = bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 2, 1) == 1u;
|
||||
+ bool use_geo_1 = bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 3, 1) == 1u;
|
||||
if (use_geo_0 || use_geo_1) {
|
||||
geometric_factor = dot(half_vector, half_vector);
|
||||
geometric_factor = geometric_factor == 0.0 ? 0.0 : min(NdotL / geometric_factor, 1.0);
|
||||
}
|
||||
|
||||
float distance_attenuation = 1.0;
|
||||
- if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) {
|
||||
- uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtract(readPicaReg(0x014Au + (light_id << 4u)), 0, 20);
|
||||
- uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtract(readPicaReg(0x014Bu + (light_id << 4u)), 0, 20);
|
||||
+ if (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) {
|
||||
+ uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtractCompat(readPicaReg(0x014Au + (light_id << 4u)), 0, 20);
|
||||
+ uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtractCompat(readPicaReg(0x014Bu + (light_id << 4u)), 0, 20);
|
||||
|
||||
float distance_attenuation_bias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u);
|
||||
float distance_attenuation_scale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u);
|
||||
@@ -428,8 +435,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
|
||||
specular_sum.rgb += light_factor * clamp_factor * (specular0 + specular1);
|
||||
}
|
||||
|
||||
- uint fresnel_output1 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 2, 1);
|
||||
- uint fresnel_output2 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 3, 1);
|
||||
+ uint fresnel_output1 = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 2, 1);
|
||||
+ uint fresnel_output2 = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 3, 1);
|
||||
|
||||
if (fresnel_output1 == 1u) primary_color.a = d[FR_LUT];
|
||||
if (fresnel_output2 == 1u) secondary_color.a = d[FR_LUT];
|
||||
// Uses parameters from the last light as Fresnel is only applied to the last light
|
||||
float fresnel_factor;
|
||||
|
||||
diff --git a/src/host_shaders/opengl_vertex_shader.vert b/src/host_shaders/opengl_vertex_shader.vert
|
||||
index a25d7a6d..7cf40398 100644
|
||||
index 057f9a88..dc735ced 100644
|
||||
--- a/src/host_shaders/opengl_vertex_shader.vert
|
||||
+++ b/src/host_shaders/opengl_vertex_shader.vert
|
||||
@@ -1,4 +1,6 @@
|
||||
|
@ -230,7 +204,7 @@ index a25d7a6d..7cf40398 100644
|
|||
|
||||
layout(location = 0) in vec4 a_coords;
|
||||
layout(location = 1) in vec4 a_quaternion;
|
||||
@@ -20,7 +22,7 @@ out vec2 v_texcoord2;
|
||||
@@ -18,7 +20,7 @@ out vec2 v_texcoord2;
|
||||
flat out vec4 v_textureEnvColor[6];
|
||||
flat out vec4 v_textureEnvBufferColor;
|
||||
|
||||
|
@ -239,7 +213,7 @@ index a25d7a6d..7cf40398 100644
|
|||
|
||||
// TEV uniforms
|
||||
uniform uint u_textureEnvColor[6];
|
||||
@@ -93,6 +95,6 @@ void main() {
|
||||
@@ -81,8 +83,8 @@ void main() {
|
||||
);
|
||||
|
||||
// There's also another, always-on clipping plane based on vertex z
|
||||
|
@ -247,16 +221,20 @@ index a25d7a6d..7cf40398 100644
|
|||
- gl_ClipDistance[1] = dot(clipData, a_coords);
|
||||
+ // gl_ClipDistance[0] = -a_coords.z;
|
||||
+ // gl_ClipDistance[1] = dot(clipData, a_coords);
|
||||
|
||||
v_quaternion = a_quaternion;
|
||||
}
|
||||
diff --git a/third_party/opengl/opengl.hpp b/third_party/opengl/opengl.hpp
|
||||
index f368f573..5ead7f63 100644
|
||||
index 4a08650a..21af37e3 100644
|
||||
--- a/third_party/opengl/opengl.hpp
|
||||
+++ b/third_party/opengl/opengl.hpp
|
||||
@@ -520,21 +520,21 @@ namespace OpenGL {
|
||||
@@ -583,22 +583,22 @@ namespace OpenGL {
|
||||
static void disableScissor() { glDisable(GL_SCISSOR_TEST); }
|
||||
static void enableBlend() { glEnable(GL_BLEND); }
|
||||
static void disableBlend() { glDisable(GL_BLEND); }
|
||||
static void enableLogicOp() { glEnable(GL_COLOR_LOGIC_OP); }
|
||||
- static void enableLogicOp() { glEnable(GL_COLOR_LOGIC_OP); }
|
||||
- static void disableLogicOp() { glDisable(GL_COLOR_LOGIC_OP); }
|
||||
+ static void enableLogicOp() { /* glEnable(GL_COLOR_LOGIC_OP); */ }
|
||||
+ static void disableLogicOp() { /* glDisable(GL_COLOR_LOGIC_OP); */ }
|
||||
static void enableDepth() { glEnable(GL_DEPTH_TEST); }
|
||||
static void disableDepth() { glDisable(GL_DEPTH_TEST); }
|
||||
|
|
79
docs/3ds/lighting.md
Normal file
79
docs/3ds/lighting.md
Normal file
|
@ -0,0 +1,79 @@
|
|||
## Info on the lighting implementation
|
||||
|
||||
### Missing shadow attenuation
|
||||
Shadow attenuation samples a texture unit, and that likely needs render to texture for most games so that they can construct
|
||||
their shadow map. As such the colors are not multiplied by the shadow attenuation value, so there's no shadows.
|
||||
|
||||
### Missing bump mapping
|
||||
Bump mapping also samples a texture unit, most likely doesn't need render to texture however may need better texture sampling
|
||||
implementation (such as GPUREG_TEXUNITi_BORDER_COLOR, GPUREG_TEXUNITi_BORDER_PARAM). Bump mapping would work for some things,
|
||||
namely the 3ds-examples bump mapping demo, but would break others such as Toad Treasure Tracker with a naive `texture` implementation.
|
||||
|
||||
Also the CP configuration is missing, because it needs a tangent map implementation. It is currently marked with error_unimpl.
|
||||
|
||||
### samplerEnabledBitfields
|
||||
Holds the enabled state of the lighting samples for various PICA configurations
|
||||
As explained in https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTING_CONFIG0
|
||||
|
||||
```c
|
||||
const bool samplerEnabled[9 * 7] = bool[9 * 7](
|
||||
// D0 D1 SP FR RB RG RR
|
||||
true, false, true, false, false, false, true, // Configuration 0: D0, SP, RR
|
||||
false, false, true, true, false, false, true, // Configuration 1: FR, SP, RR
|
||||
true, true, false, false, false, false, true, // Configuration 2: D0, D1, RR
|
||||
true, true, false, true, false, false, false, // Configuration 3: D0, D1, FR
|
||||
true, true, true, false, true, true, true, // Configuration 4: All except for FR
|
||||
true, false, true, true, true, true, true, // Configuration 5: All except for D1
|
||||
true, true, true, true, false, false, true, // Configuration 6: All except for RB and RG
|
||||
false, false, false, false, false, false, false, // Configuration 7: Unused
|
||||
true, true, true, true, true, true, true // Configuration 8: All
|
||||
);
|
||||
```
|
||||
|
||||
The above has been condensed to two uints for performance reasons.
|
||||
You can confirm they are the same by running the following:
|
||||
```c
|
||||
const uint samplerEnabledBitfields[2] = { 0x7170e645u, 0x7f013fefu };
|
||||
for (int i = 0; i < 9 * 7; i++) {
|
||||
unsigned arrayIndex = (i >> 5);
|
||||
bool b = (samplerEnabledBitfields[arrayIndex] & (1u << (i & 31))) != 0u;
|
||||
if (samplerEnabled[i] == b) {
|
||||
printf("%d: happy\n", i);
|
||||
} else {
|
||||
printf("%d: unhappy\n", i);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### lightLutLookup
|
||||
lut_id is one of these values
|
||||
0 D0
|
||||
1 D1
|
||||
2 SP
|
||||
3 FR
|
||||
4 RB
|
||||
5 RG
|
||||
6 RR
|
||||
|
||||
lut_index on the other hand represents the actual index of the LUT in the texture
|
||||
u_tex_lighting_lut has 24 LUTs and they are used like so:
|
||||
0 D0
|
||||
1 D1
|
||||
2 is missing because SP uses LUTs 8-15
|
||||
3 FR
|
||||
4 RB
|
||||
5 RG
|
||||
6 RR
|
||||
8-15 SP0-7
|
||||
16-23 DA0-7, but this is not handled in this function as the lookup is a bit different
|
||||
|
||||
The light environment configuration controls which LUTs are available for use
|
||||
If a LUT is not available in the selected configuration, its value will always read a constant 1.0 regardless of the enable state in GPUREG_LIGHTING_CONFIG1
|
||||
If RR is enabled but not RG or RB, the output of RR is used for the three components; Red, Green and Blue.
|
||||
|
||||
### Distance attenuation
|
||||
Distance attenuation is computed differently from the other factors, for example
|
||||
it doesn't store its scale in GPUREG_LIGHTING_LUTINPUT_SCALE and it doesn't use
|
||||
GPUREG_LIGHTING_LUTINPUT_SELECT. Instead, it uses the distance from the light to the
|
||||
fragment and the distance attenuation scale and bias to calculate where in the LUT to look up.
|
||||
See: https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTi_ATTENUATION_SCALE
|
|
@ -29,20 +29,204 @@ namespace PICA {
|
|||
std::array<u32, 4 * 6> tevConfigs;
|
||||
};
|
||||
|
||||
struct Light {
|
||||
union {
|
||||
u16 raw;
|
||||
BitField<0, 3, u16> num;
|
||||
BitField<3, 1, u16> directional;
|
||||
BitField<4, 1, u16> twoSidedDiffuse;
|
||||
BitField<5, 1, u16> distanceAttenuationEnable;
|
||||
BitField<6, 1, u16> spotAttenuationEnable;
|
||||
BitField<7, 1, u16> geometricFactor0;
|
||||
BitField<8, 1, u16> geometricFactor1;
|
||||
BitField<9, 1, u16> shadowEnable;
|
||||
};
|
||||
};
|
||||
|
||||
struct LightingLUTConfig {
|
||||
union {
|
||||
u32 raw;
|
||||
BitField<0, 1, u32> enable;
|
||||
BitField<1, 1, u32> absInput;
|
||||
BitField<2, 3, u32> type;
|
||||
};
|
||||
float scale;
|
||||
};
|
||||
|
||||
struct LightingConfig {
|
||||
union {
|
||||
u32 raw{};
|
||||
BitField<0, 1, u32> enable;
|
||||
BitField<1, 4, u32> lightNum;
|
||||
BitField<5, 2, u32> bumpMode;
|
||||
BitField<7, 2, u32> bumpSelector;
|
||||
BitField<9, 1, u32> bumpRenorm;
|
||||
BitField<10, 1, u32> clampHighlights;
|
||||
BitField<11, 4, u32> config;
|
||||
BitField<15, 1, u32> enablePrimaryAlpha;
|
||||
BitField<16, 1, u32> enableSecondaryAlpha;
|
||||
BitField<17, 1, u32> enableShadow;
|
||||
BitField<18, 1, u32> shadowPrimary;
|
||||
BitField<19, 1, u32> shadowSecondary;
|
||||
BitField<20, 1, u32> shadowInvert;
|
||||
BitField<21, 1, u32> shadowAlpha;
|
||||
BitField<22, 2, u32> shadowSelector;
|
||||
};
|
||||
|
||||
std::array<LightingLUTConfig, 7> luts{};
|
||||
|
||||
std::array<Light, 8> lights{};
|
||||
|
||||
LightingConfig(const std::array<u32, 0x300>& regs) {
|
||||
// Ignore lighting registers if it's disabled
|
||||
if ((regs[InternalRegs::LightingEnable] & 1) == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const u32 config0 = regs[InternalRegs::LightConfig0];
|
||||
const u32 config1 = regs[InternalRegs::LightConfig1];
|
||||
const u32 totalLightCount = Helpers::getBits<0, 3>(regs[InternalRegs::LightNumber]) + 1;
|
||||
|
||||
enable = 1;
|
||||
lightNum = totalLightCount;
|
||||
|
||||
enableShadow = Helpers::getBit<0>(config0);
|
||||
if (enableShadow) [[unlikely]] {
|
||||
shadowPrimary = Helpers::getBit<16>(config0);
|
||||
shadowSecondary = Helpers::getBit<17>(config0);
|
||||
shadowInvert = Helpers::getBit<18>(config0);
|
||||
shadowAlpha = Helpers::getBit<19>(config0);
|
||||
shadowSelector = Helpers::getBits<24, 2>(config0);
|
||||
}
|
||||
|
||||
enablePrimaryAlpha = Helpers::getBit<2>(config0);
|
||||
enableSecondaryAlpha = Helpers::getBit<3>(config0);
|
||||
config = Helpers::getBits<4, 4>(config0);
|
||||
|
||||
bumpSelector = Helpers::getBits<22, 2>(config0);
|
||||
clampHighlights = Helpers::getBit<27>(config0);
|
||||
bumpMode = Helpers::getBits<28, 2>(config0);
|
||||
bumpRenorm = Helpers::getBit<30>(config0) ^ 1; // 0 = enable so flip it with xor
|
||||
|
||||
for (int i = 0; i < totalLightCount; i++) {
|
||||
auto& light = lights[i];
|
||||
light.num = (regs[InternalRegs::LightPermutation] >> (i * 4)) & 0x7;
|
||||
|
||||
const u32 lightConfig = regs[InternalRegs::Light0Config + 0x10 * light.num];
|
||||
light.directional = Helpers::getBit<0>(lightConfig);
|
||||
light.twoSidedDiffuse = Helpers::getBit<1>(lightConfig);
|
||||
light.geometricFactor0 = Helpers::getBit<2>(lightConfig);
|
||||
light.geometricFactor1 = Helpers::getBit<3>(lightConfig);
|
||||
|
||||
light.shadowEnable = ((config1 >> light.num) & 1) ^ 1; // This also does 0 = enabled
|
||||
light.spotAttenuationEnable = ((config1 >> (8 + light.num)) & 1) ^ 1; // Same here
|
||||
light.distanceAttenuationEnable = ((config1 >> (24 + light.num)) & 1) ^ 1; // Of course same here
|
||||
}
|
||||
|
||||
LightingLUTConfig& d0 = luts[Lights::LUT_D0];
|
||||
LightingLUTConfig& d1 = luts[Lights::LUT_D1];
|
||||
LightingLUTConfig& sp = luts[spotlightLutIndex];
|
||||
LightingLUTConfig& fr = luts[Lights::LUT_FR];
|
||||
LightingLUTConfig& rb = luts[Lights::LUT_RB];
|
||||
LightingLUTConfig& rg = luts[Lights::LUT_RG];
|
||||
LightingLUTConfig& rr = luts[Lights::LUT_RR];
|
||||
|
||||
d0.enable = Helpers::getBit<16>(config1) == 0;
|
||||
d1.enable = Helpers::getBit<17>(config1) == 0;
|
||||
fr.enable = Helpers::getBit<19>(config1) == 0;
|
||||
rb.enable = Helpers::getBit<20>(config1) == 0;
|
||||
rg.enable = Helpers::getBit<21>(config1) == 0;
|
||||
rr.enable = Helpers::getBit<22>(config1) == 0;
|
||||
sp.enable = 1;
|
||||
|
||||
const u32 lutAbs = regs[InternalRegs::LightLUTAbs];
|
||||
const u32 lutSelect = regs[InternalRegs::LightLUTSelect];
|
||||
const u32 lutScale = regs[InternalRegs::LightLUTScale];
|
||||
static constexpr float scales[] = {1.0f, 2.0f, 4.0f, 8.0f, 0.0f, 0.0f, 0.25f, 0.5f};
|
||||
|
||||
if (d0.enable) {
|
||||
d0.absInput = Helpers::getBit<1>(lutAbs) == 0;
|
||||
d0.type = Helpers::getBits<0, 3>(lutSelect);
|
||||
d0.scale = scales[Helpers::getBits<0, 3>(lutScale)];
|
||||
}
|
||||
|
||||
if (d1.enable) {
|
||||
d1.absInput = Helpers::getBit<5>(lutAbs) == 0;
|
||||
d1.type = Helpers::getBits<4, 3>(lutSelect);
|
||||
d1.scale = scales[Helpers::getBits<4, 3>(lutScale)];
|
||||
}
|
||||
|
||||
sp.absInput = Helpers::getBit<9>(lutAbs) == 0;
|
||||
sp.type = Helpers::getBits<8, 3>(lutSelect);
|
||||
sp.scale = scales[Helpers::getBits<8, 3>(lutScale)];
|
||||
|
||||
if (fr.enable) {
|
||||
fr.absInput = Helpers::getBit<13>(lutAbs) == 0;
|
||||
fr.type = Helpers::getBits<12, 3>(lutSelect);
|
||||
fr.scale = scales[Helpers::getBits<12, 3>(lutScale)];
|
||||
}
|
||||
|
||||
if (rb.enable) {
|
||||
rb.absInput = Helpers::getBit<17>(lutAbs) == 0;
|
||||
rb.type = Helpers::getBits<16, 3>(lutSelect);
|
||||
rb.scale = scales[Helpers::getBits<16, 3>(lutScale)];
|
||||
}
|
||||
|
||||
if (rg.enable) {
|
||||
rg.absInput = Helpers::getBit<21>(lutAbs) == 0;
|
||||
rg.type = Helpers::getBits<20, 3>(lutSelect);
|
||||
rg.scale = scales[Helpers::getBits<20, 3>(lutScale)];
|
||||
}
|
||||
|
||||
if (rr.enable) {
|
||||
rr.absInput = Helpers::getBit<25>(lutAbs) == 0;
|
||||
rr.type = Helpers::getBits<24, 3>(lutSelect);
|
||||
rr.scale = scales[Helpers::getBits<24, 3>(lutScale)];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Config used for identifying unique fragment pipeline configurations
|
||||
struct FragmentConfig {
|
||||
OutputConfig outConfig;
|
||||
TextureConfig texConfig;
|
||||
LightingConfig lighting;
|
||||
|
||||
bool operator==(const FragmentConfig& config) const {
|
||||
// Hash function and equality operator required by std::unordered_map
|
||||
return std::memcmp(this, &config, sizeof(FragmentConfig)) == 0;
|
||||
}
|
||||
|
||||
FragmentConfig(const std::array<u32, 0x300>& regs) : lighting(regs) {
|
||||
auto alphaTestConfig = regs[InternalRegs::AlphaTestConfig];
|
||||
auto alphaTestFunction = Helpers::getBits<4, 3>(alphaTestConfig);
|
||||
|
||||
outConfig.alphaTestFunction =
|
||||
(alphaTestConfig & 1) ? static_cast<PICA::CompareFunction>(alphaTestFunction) : PICA::CompareFunction::Always;
|
||||
outConfig.depthMapEnable = regs[InternalRegs::DepthmapEnable] & 1;
|
||||
|
||||
texConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg];
|
||||
texConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer];
|
||||
|
||||
// Set up TEV stages. Annoyingly we can't just memcpy as the TEV registers are arranged like
|
||||
// {Source, Operand, Combiner, Color, Scale} and we want to skip the color register since it's uploaded via UBO
|
||||
#define setupTevStage(stage) \
|
||||
std::memcpy(&texConfig.tevConfigs[stage * 4], ®s[InternalRegs::TexEnv##stage##Source], 3 * sizeof(u32)); \
|
||||
texConfig.tevConfigs[stage * 4 + 3] = regs[InternalRegs::TexEnv##stage##Source + 4];
|
||||
|
||||
setupTevStage(0);
|
||||
setupTevStage(1);
|
||||
setupTevStage(2);
|
||||
setupTevStage(3);
|
||||
setupTevStage(4);
|
||||
setupTevStage(5);
|
||||
#undef setupTevStage
|
||||
}
|
||||
};
|
||||
|
||||
static_assert(
|
||||
std::has_unique_object_representations<OutputConfig>() && std::has_unique_object_representations<TextureConfig>() &&
|
||||
std::has_unique_object_representations<FragmentConfig>()
|
||||
std::has_unique_object_representations<Light>()
|
||||
);
|
||||
} // namespace PICA
|
||||
|
||||
|
|
|
@ -1,10 +1,26 @@
|
|||
#pragma once
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <type_traits>
|
||||
|
||||
#include "helpers.hpp"
|
||||
|
||||
namespace PICA {
|
||||
struct LightUniform {
|
||||
using vec3 = std::array<float, 3>;
|
||||
|
||||
// std140 requires vec3s be aligned to 16 bytes
|
||||
alignas(16) vec3 specular0;
|
||||
alignas(16) vec3 specular1;
|
||||
alignas(16) vec3 diffuse;
|
||||
alignas(16) vec3 ambient;
|
||||
alignas(16) vec3 position;
|
||||
alignas(16) vec3 spotlightDirection;
|
||||
|
||||
float distanceAttenuationBias;
|
||||
float distanceAttenuationScale;
|
||||
};
|
||||
|
||||
struct FragmentUniforms {
|
||||
using vec3 = std::array<float, 3>;
|
||||
using vec4 = std::array<float, 4>;
|
||||
|
@ -17,5 +33,13 @@ namespace PICA {
|
|||
alignas(16) vec4 constantColors[tevStageCount];
|
||||
alignas(16) vec4 tevBufferColor;
|
||||
alignas(16) vec4 clipCoords;
|
||||
|
||||
// Note: We upload this as a u32 and decode on GPU
|
||||
u32 globalAmbientLight;
|
||||
// NOTE: THIS MUST BE LAST so that if lighting is disabled we can potentially omit uploading it
|
||||
LightUniform lightUniforms[8];
|
||||
};
|
||||
|
||||
// Assert that lightUniforms is the last member of the structure
|
||||
static_assert(offsetof(FragmentUniforms, lightUniforms) + 8 * sizeof(LightUniform) == sizeof(FragmentUniforms));
|
||||
} // namespace PICA
|
|
@ -67,7 +67,29 @@ namespace PICA {
|
|||
ColourBufferLoc = 0x11D,
|
||||
FramebufferSize = 0x11E,
|
||||
|
||||
//LightingRegs
|
||||
// Lighting registers
|
||||
LightingEnable = 0x8F,
|
||||
Light0Specular0 = 0x140,
|
||||
Light0Specular1 = 0x141,
|
||||
Light0Diffuse = 0x142,
|
||||
Light0Ambient = 0x143,
|
||||
Light0XY = 0x144,
|
||||
Light0Z = 0x145,
|
||||
Light0SpotlightXY = 0x146,
|
||||
Light0SpotlightZ = 0x147,
|
||||
Light0Config = 0x149,
|
||||
Light0AttenuationBias = 0x14A,
|
||||
Light0AttenuationScale = 0x14B,
|
||||
|
||||
LightGlobalAmbient = 0x1C0,
|
||||
LightNumber = 0x1C2,
|
||||
LightConfig0 = 0x1C3,
|
||||
LightConfig1 = 0x1C4,
|
||||
LightPermutation = 0x1D9,
|
||||
LightLUTAbs = 0x1D0,
|
||||
LightLUTSelect = 0x1D1,
|
||||
LightLUTScale = 0x1D2,
|
||||
|
||||
LightingLUTIndex = 0x01C5,
|
||||
LightingLUTData0 = 0x01C8,
|
||||
LightingLUTData1 = 0x01C9,
|
||||
|
@ -231,7 +253,8 @@ namespace PICA {
|
|||
enum : u32 {
|
||||
LUT_D0 = 0,
|
||||
LUT_D1,
|
||||
LUT_FR,
|
||||
// LUT 2 is not used, the emulator internally uses it for referring to the current source's spotlight in shaders
|
||||
LUT_FR = 0x3,
|
||||
LUT_RB,
|
||||
LUT_RG,
|
||||
LUT_RR,
|
||||
|
@ -255,6 +278,11 @@ namespace PICA {
|
|||
};
|
||||
}
|
||||
|
||||
// There's actually 8 different LUTs (SP0-SP7), one for each light with different indices (8-15)
|
||||
// We use an unused LUT value for "this light source's spotlight" instead and figure out which light source to use in compileLutLookup
|
||||
// This is particularly intuitive in several places, such as checking if a LUT is enabled
|
||||
static constexpr int spotlightLutIndex = 2;
|
||||
|
||||
enum class TextureFmt : u32 {
|
||||
RGBA8 = 0x0,
|
||||
RGB8 = 0x1,
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
#include <string>
|
||||
|
||||
#include "PICA/gpu.hpp"
|
||||
#include "PICA/pica_frag_config.hpp"
|
||||
#include "PICA/regs.hpp"
|
||||
#include "helpers.hpp"
|
||||
|
||||
|
@ -13,25 +14,25 @@ namespace PICA::ShaderGen {
|
|||
enum class Language { GLSL };
|
||||
|
||||
class FragmentGenerator {
|
||||
using PICARegs = std::array<u32, 0x300>;
|
||||
API api;
|
||||
Language language;
|
||||
|
||||
void compileTEV(std::string& shader, int stage, const PICARegs& regs);
|
||||
void getSource(std::string& shader, PICA::TexEnvConfig::Source source, int index);
|
||||
void getColorOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::ColorOperand color, int index);
|
||||
void getAlphaOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::AlphaOperand alpha, int index);
|
||||
void compileTEV(std::string& shader, int stage, const PICA::FragmentConfig& config);
|
||||
void getSource(std::string& shader, PICA::TexEnvConfig::Source source, int index, const PICA::FragmentConfig& config);
|
||||
void getColorOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::ColorOperand color, int index, const PICA::FragmentConfig& config);
|
||||
void getAlphaOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::AlphaOperand alpha, int index, const PICA::FragmentConfig& config);
|
||||
void getColorOperation(std::string& shader, PICA::TexEnvConfig::Operation op);
|
||||
void getAlphaOperation(std::string& shader, PICA::TexEnvConfig::Operation op);
|
||||
|
||||
void applyAlphaTest(std::string& shader, const PICARegs& regs);
|
||||
|
||||
u32 textureConfig = 0;
|
||||
void applyAlphaTest(std::string& shader, const PICA::FragmentConfig& config);
|
||||
void compileLights(std::string& shader, const PICA::FragmentConfig& config);
|
||||
void compileLUTLookup(std::string& shader, const PICA::FragmentConfig& config, u32 lightIndex, u32 lutID);
|
||||
bool isSamplerEnabled(u32 environmentID, u32 lutID);
|
||||
|
||||
public:
|
||||
FragmentGenerator(API api, Language language) : api(api), language(language) {}
|
||||
std::string generate(const PICARegs& regs);
|
||||
std::string getVertexShader(const PICARegs& regs);
|
||||
std::string generate(const PICA::FragmentConfig& config);
|
||||
std::string getDefaultVertexShader();
|
||||
|
||||
void setTarget(API api, Language language) {
|
||||
this->api = api;
|
||||
|
|
|
@ -13,12 +13,23 @@ struct EmulatorConfig {
|
|||
static constexpr bool shaderJitDefault = false;
|
||||
#endif
|
||||
|
||||
// For now, use specialized shaders by default on MacOS as M1 drivers are buggy when using the ubershader, and on Android since mobile GPUs are
|
||||
// horrible On other platforms we default to ubershader + shadergen fallback for lights
|
||||
#if defined(__ANDROID__) || defined(__APPLE__)
|
||||
static constexpr bool ubershaderDefault = false;
|
||||
#else
|
||||
static constexpr bool ubershaderDefault = true;
|
||||
#endif
|
||||
|
||||
bool shaderJitEnabled = shaderJitDefault;
|
||||
bool discordRpcEnabled = false;
|
||||
bool useUbershaders = ubershaderDefault;
|
||||
bool accurateShaderMul = false;
|
||||
|
||||
// Toggles whether to force shadergen when there's more than N lights active and we're using the ubershader, for better performance
|
||||
bool forceShadergenForLights = true;
|
||||
int lightShadergenThreshold = 1;
|
||||
|
||||
RendererType rendererType = RendererType::OpenGL;
|
||||
Audio::DSPCore::Type dspType = Audio::DSPCore::Type::Null;
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@ enum class RendererType : s8 {
|
|||
Software = 3,
|
||||
};
|
||||
|
||||
struct EmulatorConfig;
|
||||
class GPU;
|
||||
struct SDL_Window;
|
||||
|
||||
|
@ -46,6 +47,8 @@ class Renderer {
|
|||
u32 outputWindowWidth = 400;
|
||||
u32 outputWindowHeight = 240 * 2;
|
||||
|
||||
EmulatorConfig* emulatorConfig = nullptr;
|
||||
|
||||
public:
|
||||
Renderer(GPU& gpu, const std::array<u32, regNum>& internalRegs, const std::array<u32, extRegNum>& externalRegs);
|
||||
virtual ~Renderer();
|
||||
|
@ -101,4 +104,6 @@ class Renderer {
|
|||
outputWindowWidth = width;
|
||||
outputWindowHeight = height;
|
||||
}
|
||||
|
||||
void setConfig(EmulatorConfig* config) { emulatorConfig = config; }
|
||||
};
|
||||
|
|
|
@ -30,7 +30,7 @@ class RendererGL final : public Renderer {
|
|||
|
||||
OpenGL::VertexArray vao;
|
||||
OpenGL::VertexBuffer vbo;
|
||||
bool usingUbershader = true;
|
||||
bool enableUbershader = true;
|
||||
|
||||
// Data
|
||||
struct {
|
||||
|
@ -63,9 +63,12 @@ class RendererGL final : public Renderer {
|
|||
OpenGL::VertexBuffer dummyVBO;
|
||||
|
||||
OpenGL::Texture screenTexture;
|
||||
GLuint lightLUTTextureArray;
|
||||
OpenGL::Texture lightLUTTexture;
|
||||
OpenGL::Framebuffer screenFramebuffer;
|
||||
OpenGL::Texture blankTexture;
|
||||
// The "default" vertex shader to use when using specialized shaders but not PICA vertex shader -> GLSL recompilation
|
||||
// We can compile this once and then link it with all other generated fragment shaders
|
||||
OpenGL::Shader defaultShadergenVs;
|
||||
|
||||
// Cached recompiled fragment shader
|
||||
struct CachedProgram {
|
||||
|
@ -107,7 +110,7 @@ class RendererGL final : public Renderer {
|
|||
virtual std::string getUbershader() override;
|
||||
virtual void setUbershader(const std::string& shader) override;
|
||||
|
||||
virtual void setUbershaderSetting(bool value) override { usingUbershader = value; }
|
||||
virtual void setUbershaderSetting(bool value) override { enableUbershader = value; }
|
||||
|
||||
std::optional<ColourBuffer> getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true);
|
||||
|
||||
|
|
|
@ -64,6 +64,9 @@ void EmulatorConfig::load() {
|
|||
vsyncEnabled = toml::find_or<toml::boolean>(gpu, "EnableVSync", true);
|
||||
useUbershaders = toml::find_or<toml::boolean>(gpu, "UseUbershaders", ubershaderDefault);
|
||||
accurateShaderMul = toml::find_or<toml::boolean>(gpu, "AccurateShaderMultiplication", false);
|
||||
|
||||
forceShadergenForLights = toml::find_or<toml::boolean>(gpu, "ForceShadergenForLighting", true);
|
||||
lightShadergenThreshold = toml::find_or<toml::integer>(gpu, "ShadergenLightThreshold", 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -130,6 +133,8 @@ void EmulatorConfig::save() {
|
|||
data["GPU"]["EnableVSync"] = vsyncEnabled;
|
||||
data["GPU"]["AccurateShaderMultiplication"] = accurateShaderMul;
|
||||
data["GPU"]["UseUbershaders"] = useUbershaders;
|
||||
data["GPU"]["ForceShadergenForLighting"] = forceShadergenForLights;
|
||||
data["GPU"]["ShadergenLightThreshold"] = lightShadergenThreshold;
|
||||
|
||||
data["Audio"]["DSPEmulation"] = std::string(Audio::DSPCore::typeToString(dspType));
|
||||
data["Audio"]["EnableAudio"] = audioEnabled;
|
||||
|
|
|
@ -58,6 +58,10 @@ GPU::GPU(Memory& mem, EmulatorConfig& config) : mem(mem), config(config) {
|
|||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (renderer != nullptr) {
|
||||
renderer->setConfig(&config);
|
||||
}
|
||||
}
|
||||
|
||||
void GPU::reset() {
|
||||
|
|
|
@ -1,8 +1,21 @@
|
|||
#include "PICA/pica_frag_config.hpp"
|
||||
#include "PICA/regs.hpp"
|
||||
#include "PICA/shader_gen.hpp"
|
||||
using namespace PICA;
|
||||
using namespace PICA::ShaderGen;
|
||||
|
||||
static constexpr const char* uniformDefinition = R"(
|
||||
struct LightSource {
|
||||
vec3 specular0;
|
||||
vec3 specular1;
|
||||
vec3 diffuse;
|
||||
vec3 ambient;
|
||||
vec3 position;
|
||||
vec3 spotlightDirection;
|
||||
float distanceAttenuationBias;
|
||||
float distanceAttenuationScale;
|
||||
};
|
||||
|
||||
layout(std140) uniform FragmentUniforms {
|
||||
int alphaReference;
|
||||
float depthScale;
|
||||
|
@ -11,10 +24,14 @@ static constexpr const char* uniformDefinition = R"(
|
|||
vec4 constantColors[6];
|
||||
vec4 tevBufferColor;
|
||||
vec4 clipCoords;
|
||||
|
||||
// Note: We upload this as a u32 and decode on GPU
|
||||
uint globalAmbientLight;
|
||||
LightSource lightSources[8];
|
||||
};
|
||||
)";
|
||||
|
||||
std::string FragmentGenerator::getVertexShader(const PICARegs& regs) {
|
||||
std::string FragmentGenerator::getDefaultVertexShader() {
|
||||
std::string ret = "";
|
||||
|
||||
switch (api) {
|
||||
|
@ -44,9 +61,7 @@ std::string FragmentGenerator::getVertexShader(const PICARegs& regs) {
|
|||
layout(location = 6) in vec3 a_view;
|
||||
layout(location = 7) in vec2 a_texcoord2;
|
||||
|
||||
out vec3 v_normal;
|
||||
out vec3 v_tangent;
|
||||
out vec3 v_bitangent;
|
||||
out vec4 v_quaternion;
|
||||
out vec4 v_colour;
|
||||
out vec3 v_texcoord0;
|
||||
out vec2 v_texcoord1;
|
||||
|
@ -62,12 +77,6 @@ std::string FragmentGenerator::getVertexShader(const PICARegs& regs) {
|
|||
return scale * vec4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24));
|
||||
}
|
||||
|
||||
vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) {
|
||||
vec3 u = q.xyz;
|
||||
float s = q.w;
|
||||
return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v);
|
||||
}
|
||||
|
||||
void main() {
|
||||
gl_Position = a_coords;
|
||||
vec4 colourAbs = abs(a_vertexColour);
|
||||
|
@ -77,10 +86,7 @@ std::string FragmentGenerator::getVertexShader(const PICARegs& regs) {
|
|||
v_texcoord1 = vec2(a_texcoord1.x, 1.0 - a_texcoord1.y);
|
||||
v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y);
|
||||
v_view = a_view;
|
||||
|
||||
v_normal = normalize(rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), a_quaternion));
|
||||
v_tangent = normalize(rotateVec3ByQuaternion(vec3(1.0, 0.0, 0.0), a_quaternion));
|
||||
v_bitangent = normalize(rotateVec3ByQuaternion(vec3(0.0, 1.0, 0.0), a_quaternion));
|
||||
v_quaternion = a_quaternion;
|
||||
|
||||
#ifndef USING_GLES
|
||||
gl_ClipDistance[0] = -a_coords.z;
|
||||
|
@ -92,7 +98,7 @@ std::string FragmentGenerator::getVertexShader(const PICARegs& regs) {
|
|||
return ret;
|
||||
}
|
||||
|
||||
std::string FragmentGenerator::generate(const PICARegs& regs) {
|
||||
std::string FragmentGenerator::generate(const FragmentConfig& config) {
|
||||
std::string ret = "";
|
||||
|
||||
switch (api) {
|
||||
|
@ -113,9 +119,7 @@ std::string FragmentGenerator::generate(const PICARegs& regs) {
|
|||
|
||||
// Input and output attributes
|
||||
ret += R"(
|
||||
in vec3 v_tangent;
|
||||
in vec3 v_normal;
|
||||
in vec3 v_bitangent;
|
||||
in vec4 v_quaternion;
|
||||
in vec4 v_colour;
|
||||
in vec3 v_texcoord0;
|
||||
in vec2 v_texcoord1;
|
||||
|
@ -126,23 +130,43 @@ std::string FragmentGenerator::generate(const PICARegs& regs) {
|
|||
uniform sampler2D u_tex0;
|
||||
uniform sampler2D u_tex1;
|
||||
uniform sampler2D u_tex2;
|
||||
// GLES doesn't support sampler1DArray, as such we'll have to change how we handle lighting later
|
||||
#ifndef USING_GLES
|
||||
uniform sampler1DArray u_tex_lighting_lut;
|
||||
#endif
|
||||
uniform sampler2D u_tex_lighting_lut;
|
||||
)";
|
||||
|
||||
ret += uniformDefinition;
|
||||
|
||||
if (config.lighting.enable) {
|
||||
ret += R"(
|
||||
vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) {
|
||||
vec3 u = q.xyz;
|
||||
float s = q.w;
|
||||
return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v);
|
||||
}
|
||||
|
||||
float lutLookup(uint lut, int index) {
|
||||
return texelFetch(u_tex_lighting_lut, ivec2(index, int(lut)), 0).r;
|
||||
}
|
||||
|
||||
vec3 regToColor(uint reg) {
|
||||
return (1.0 / 255.0) * vec3(float((reg >> 20u) & 0xFFu), float((reg >> 10u) & 0xFFu), float(reg & 0xFFu));
|
||||
}
|
||||
)";
|
||||
}
|
||||
|
||||
// Emit main function for fragment shader
|
||||
// When not initialized, source 13 is set to vec4(0.0) and 15 is set to the vertex colour
|
||||
ret += R"(
|
||||
void main() {
|
||||
vec4 combinerOutput = v_colour;
|
||||
vec4 previousBuffer = vec4(0.0);
|
||||
vec4 tevNextPreviousBuffer = tevBufferColor;
|
||||
vec4 tevNextPreviousBuffer = tevBufferColor;
|
||||
|
||||
vec4 primaryColor = vec4(0.0);
|
||||
vec4 secondaryColor = vec4(0.0);
|
||||
)";
|
||||
|
||||
compileLights(ret, config);
|
||||
|
||||
ret += R"(
|
||||
vec3 colorOp1 = vec3(0.0);
|
||||
vec3 colorOp2 = vec3(0.0);
|
||||
|
@ -160,44 +184,39 @@ std::string FragmentGenerator::generate(const PICARegs& regs) {
|
|||
float depth = z_over_w * depthScale + depthOffset;
|
||||
)";
|
||||
|
||||
if ((regs[InternalRegs::DepthmapEnable] & 1) == 0) {
|
||||
if (!config.outConfig.depthMapEnable) {
|
||||
ret += "depth /= gl_FragCoord.w;\n";
|
||||
}
|
||||
|
||||
ret += "gl_FragDepth = depth;\n";
|
||||
|
||||
textureConfig = regs[InternalRegs::TexUnitCfg];
|
||||
for (int i = 0; i < 6; i++) {
|
||||
compileTEV(ret, i, regs);
|
||||
compileTEV(ret, i, config);
|
||||
}
|
||||
|
||||
applyAlphaTest(ret, regs);
|
||||
applyAlphaTest(ret, config);
|
||||
|
||||
ret += "fragColor = combinerOutput;\n}"; // End of main function
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICARegs& regs) {
|
||||
// Base address for each TEV stage's configuration
|
||||
static constexpr std::array<u32, 6> ioBases = {
|
||||
InternalRegs::TexEnv0Source, InternalRegs::TexEnv1Source, InternalRegs::TexEnv2Source,
|
||||
InternalRegs::TexEnv3Source, InternalRegs::TexEnv4Source, InternalRegs::TexEnv5Source,
|
||||
};
|
||||
void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICA::FragmentConfig& config) {
|
||||
const u32* tevValues = config.texConfig.tevConfigs.data() + stage * 4;
|
||||
|
||||
const u32 ioBase = ioBases[stage];
|
||||
TexEnvConfig tev(regs[ioBase], regs[ioBase + 1], regs[ioBase + 2], regs[ioBase + 3], regs[ioBase + 4]);
|
||||
// Pass a 0 to constColor here, as it doesn't matter for compilation
|
||||
TexEnvConfig tev(tevValues[0], tevValues[1], tevValues[2], 0, tevValues[3]);
|
||||
|
||||
if (!tev.isPassthroughStage()) {
|
||||
// Get color operands
|
||||
shader += "colorOp1 = ";
|
||||
getColorOperand(shader, tev.colorSource1, tev.colorOperand1, stage);
|
||||
getColorOperand(shader, tev.colorSource1, tev.colorOperand1, stage, config);
|
||||
|
||||
shader += ";\ncolorOp2 = ";
|
||||
getColorOperand(shader, tev.colorSource2, tev.colorOperand2, stage);
|
||||
getColorOperand(shader, tev.colorSource2, tev.colorOperand2, stage, config);
|
||||
|
||||
shader += ";\ncolorOp3 = ";
|
||||
getColorOperand(shader, tev.colorSource3, tev.colorOperand3, stage);
|
||||
getColorOperand(shader, tev.colorSource3, tev.colorOperand3, stage, config);
|
||||
|
||||
shader += ";\nvec3 outputColor" + std::to_string(stage) + " = clamp(";
|
||||
getColorOperation(shader, tev.colorOp);
|
||||
|
@ -209,13 +228,13 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg
|
|||
} else {
|
||||
// Get alpha operands
|
||||
shader += "alphaOp1 = ";
|
||||
getAlphaOperand(shader, tev.alphaSource1, tev.alphaOperand1, stage);
|
||||
getAlphaOperand(shader, tev.alphaSource1, tev.alphaOperand1, stage, config);
|
||||
|
||||
shader += ";\nalphaOp2 = ";
|
||||
getAlphaOperand(shader, tev.alphaSource2, tev.alphaOperand2, stage);
|
||||
getAlphaOperand(shader, tev.alphaSource2, tev.alphaOperand2, stage, config);
|
||||
|
||||
shader += ";\nalphaOp3 = ";
|
||||
getAlphaOperand(shader, tev.alphaSource3, tev.alphaOperand3, stage);
|
||||
getAlphaOperand(shader, tev.alphaSource3, tev.alphaOperand3, stage, config);
|
||||
|
||||
shader += ";\nfloat outputAlpha" + std::to_string(stage) + " = clamp(";
|
||||
getAlphaOperation(shader, tev.alphaOp);
|
||||
|
@ -231,7 +250,7 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg
|
|||
shader += "previousBuffer = tevNextPreviousBuffer;\n\n";
|
||||
|
||||
// Update the "next previous buffer" if necessary
|
||||
const u32 textureEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer];
|
||||
const u32 textureEnvUpdateBuffer = config.texConfig.texEnvUpdateBuffer;
|
||||
if (stage < 4) {
|
||||
// Check whether to update rgb
|
||||
if ((textureEnvUpdateBuffer & (0x100 << stage))) {
|
||||
|
@ -245,7 +264,7 @@ void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICAReg
|
|||
}
|
||||
}
|
||||
|
||||
void FragmentGenerator::getColorOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::ColorOperand color, int index) {
|
||||
void FragmentGenerator::getColorOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::ColorOperand color, int index, const PICA::FragmentConfig& config) {
|
||||
using OperandType = TexEnvConfig::ColorOperand;
|
||||
|
||||
// For inverting operands, add the 1.0 - x subtraction
|
||||
|
@ -257,31 +276,31 @@ void FragmentGenerator::getColorOperand(std::string& shader, TexEnvConfig::Sourc
|
|||
switch (color) {
|
||||
case OperandType::SourceColor:
|
||||
case OperandType::OneMinusSourceColor:
|
||||
getSource(shader, source, index);
|
||||
getSource(shader, source, index, config);
|
||||
shader += ".rgb";
|
||||
break;
|
||||
|
||||
case OperandType::SourceRed:
|
||||
case OperandType::OneMinusSourceRed:
|
||||
getSource(shader, source, index);
|
||||
getSource(shader, source, index, config);
|
||||
shader += ".rrr";
|
||||
break;
|
||||
|
||||
case OperandType::SourceGreen:
|
||||
case OperandType::OneMinusSourceGreen:
|
||||
getSource(shader, source, index);
|
||||
getSource(shader, source, index, config);
|
||||
shader += ".ggg";
|
||||
break;
|
||||
|
||||
case OperandType::SourceBlue:
|
||||
case OperandType::OneMinusSourceBlue:
|
||||
getSource(shader, source, index);
|
||||
getSource(shader, source, index, config);
|
||||
shader += ".bbb";
|
||||
break;
|
||||
|
||||
case OperandType::SourceAlpha:
|
||||
case OperandType::OneMinusSourceAlpha:
|
||||
getSource(shader, source, index);
|
||||
getSource(shader, source, index, config);
|
||||
shader += ".aaa";
|
||||
break;
|
||||
|
||||
|
@ -292,7 +311,7 @@ void FragmentGenerator::getColorOperand(std::string& shader, TexEnvConfig::Sourc
|
|||
}
|
||||
}
|
||||
|
||||
void FragmentGenerator::getAlphaOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::AlphaOperand color, int index) {
|
||||
void FragmentGenerator::getAlphaOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::AlphaOperand color, int index, const PICA::FragmentConfig& config) {
|
||||
using OperandType = TexEnvConfig::AlphaOperand;
|
||||
|
||||
// For inverting operands, add the 1.0 - x subtraction
|
||||
|
@ -304,25 +323,25 @@ void FragmentGenerator::getAlphaOperand(std::string& shader, TexEnvConfig::Sourc
|
|||
switch (color) {
|
||||
case OperandType::SourceRed:
|
||||
case OperandType::OneMinusSourceRed:
|
||||
getSource(shader, source, index);
|
||||
getSource(shader, source, index, config);
|
||||
shader += ".r";
|
||||
break;
|
||||
|
||||
case OperandType::SourceGreen:
|
||||
case OperandType::OneMinusSourceGreen:
|
||||
getSource(shader, source, index);
|
||||
getSource(shader, source, index, config);
|
||||
shader += ".g";
|
||||
break;
|
||||
|
||||
case OperandType::SourceBlue:
|
||||
case OperandType::OneMinusSourceBlue:
|
||||
getSource(shader, source, index);
|
||||
getSource(shader, source, index, config);
|
||||
shader += ".b";
|
||||
break;
|
||||
|
||||
case OperandType::SourceAlpha:
|
||||
case OperandType::OneMinusSourceAlpha:
|
||||
getSource(shader, source, index);
|
||||
getSource(shader, source, index, config);
|
||||
shader += ".a";
|
||||
break;
|
||||
|
||||
|
@ -333,14 +352,14 @@ void FragmentGenerator::getAlphaOperand(std::string& shader, TexEnvConfig::Sourc
|
|||
}
|
||||
}
|
||||
|
||||
void FragmentGenerator::getSource(std::string& shader, TexEnvConfig::Source source, int index) {
|
||||
void FragmentGenerator::getSource(std::string& shader, TexEnvConfig::Source source, int index, const PICA::FragmentConfig& config) {
|
||||
switch (source) {
|
||||
case TexEnvConfig::Source::PrimaryColor: shader += "v_colour"; break;
|
||||
case TexEnvConfig::Source::Texture0: shader += "texture(u_tex0, v_texcoord0.xy)"; break;
|
||||
case TexEnvConfig::Source::Texture1: shader += "texture(u_tex1, v_texcoord1)"; break;
|
||||
case TexEnvConfig::Source::Texture2: {
|
||||
// If bit 13 in texture config is set then we use the texcoords for texture 1, otherwise for texture 2
|
||||
if (Helpers::getBit<13>(textureConfig)) {
|
||||
if (Helpers::getBit<13>(config.texConfig.texUnitConfig)) {
|
||||
shader += "texture(u_tex2, v_texcoord1)";
|
||||
} else {
|
||||
shader += "texture(u_tex2, v_texcoord2)";
|
||||
|
@ -353,8 +372,8 @@ void FragmentGenerator::getSource(std::string& shader, TexEnvConfig::Source sour
|
|||
case TexEnvConfig::Source::PreviousBuffer: shader += "previousBuffer"; break;
|
||||
|
||||
// Lighting
|
||||
case TexEnvConfig::Source::PrimaryFragmentColor:
|
||||
case TexEnvConfig::Source::SecondaryFragmentColor: shader += "vec4(1.0, 1.0, 1.0, 1.0)"; break;
|
||||
case TexEnvConfig::Source::PrimaryFragmentColor: shader += "primaryColor"; break;
|
||||
case TexEnvConfig::Source::SecondaryFragmentColor: shader += "secondaryColor"; break;
|
||||
|
||||
default:
|
||||
Helpers::warn("Unimplemented TEV source: %d", static_cast<int>(source));
|
||||
|
@ -401,12 +420,11 @@ void FragmentGenerator::getAlphaOperation(std::string& shader, TexEnvConfig::Ope
|
|||
}
|
||||
}
|
||||
|
||||
void FragmentGenerator::applyAlphaTest(std::string& shader, const PICARegs& regs) {
|
||||
const u32 alphaConfig = regs[InternalRegs::AlphaTestConfig];
|
||||
const auto function = static_cast<CompareFunction>(Helpers::getBits<4, 3>(alphaConfig));
|
||||
void FragmentGenerator::applyAlphaTest(std::string& shader, const PICA::FragmentConfig& config) {
|
||||
const CompareFunction function = config.outConfig.alphaTestFunction;
|
||||
|
||||
// Alpha test disabled
|
||||
if (Helpers::getBit<0>(alphaConfig) == 0 || function == CompareFunction::Always) {
|
||||
if (function == CompareFunction::Always) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -430,3 +448,203 @@ void FragmentGenerator::applyAlphaTest(std::string& shader, const PICARegs& regs
|
|||
|
||||
shader += ") { discard; }\n";
|
||||
}
|
||||
|
||||
void FragmentGenerator::compileLights(std::string& shader, const PICA::FragmentConfig& config) {
|
||||
if (!config.lighting.enable) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Currently ignore bump mode
|
||||
shader += "vec3 normal = rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), v_quaternion);\n";
|
||||
shader += R"(
|
||||
vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);
|
||||
vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);
|
||||
vec3 light_position, light_vector, half_vector, specular0, specular1, reflected_color;
|
||||
|
||||
float light_distance, NdotL, light_factor, geometric_factor, distance_attenuation, distance_att_delta;
|
||||
float spotlight_attenuation, specular0_dist, specular1_dist;
|
||||
float lut_lookup_result, lut_lookup_delta;
|
||||
int lut_lookup_index;
|
||||
)";
|
||||
|
||||
uint lightID = 0;
|
||||
|
||||
for (int i = 0; i < config.lighting.lightNum; i++) {
|
||||
lightID = config.lighting.lights[i].num;
|
||||
|
||||
const auto& lightConfig = config.lighting.lights[i];
|
||||
shader += "light_position = lightSources[" + std::to_string(lightID) + "].position;\n";
|
||||
|
||||
if (lightConfig.directional) { // Directional lighting
|
||||
shader += "light_vector = light_position;\n";
|
||||
} else { // Positional lighting
|
||||
shader += "light_vector = light_position + v_view;\n";
|
||||
}
|
||||
|
||||
shader += R"(
|
||||
light_distance = length(light_vector);
|
||||
light_vector = normalize(light_vector);
|
||||
half_vector = light_vector + normalize(v_view);
|
||||
|
||||
distance_attenuation = 1.0;
|
||||
NdotL = dot(normal, light_vector);
|
||||
)";
|
||||
|
||||
shader += lightConfig.twoSidedDiffuse ? "NdotL = abs(NdotL);\n" : "NdotL = max(NdotL, 0.0);\n";
|
||||
|
||||
if (lightConfig.geometricFactor0 || lightConfig.geometricFactor1) {
|
||||
shader += R"(
|
||||
geometric_factor = dot(half_vector, half_vector);
|
||||
geometric_factor = (geometric_factor == 0.0) ? 0.0 : min(NdotL / geometric_factor, 1.0);
|
||||
)";
|
||||
}
|
||||
|
||||
if (lightConfig.distanceAttenuationEnable) {
|
||||
shader += "distance_att_delta = clamp(light_distance * lightSources[" + std::to_string(lightID) +
|
||||
"].distanceAttenuationScale + lightSources[" + std::to_string(lightID) + "].distanceAttenuationBias, 0.0, 1.0);\n";
|
||||
|
||||
shader += "distance_attenuation = lutLookup(" + std::to_string(16 + lightID) +
|
||||
", int(clamp(floor(distance_att_delta * 256.0), 0.0, 255.0)));\n";
|
||||
}
|
||||
|
||||
compileLUTLookup(shader, config, i, spotlightLutIndex);
|
||||
shader += "spotlight_attenuation = lut_lookup_result;\n";
|
||||
|
||||
compileLUTLookup(shader, config, i, PICA::Lights::LUT_D0);
|
||||
shader += "specular0_dist = lut_lookup_result;\n";
|
||||
|
||||
compileLUTLookup(shader, config, i, PICA::Lights::LUT_D1);
|
||||
shader += "specular1_dist = lut_lookup_result;\n";
|
||||
|
||||
compileLUTLookup(shader, config, i, PICA::Lights::LUT_RR);
|
||||
shader += "reflected_color.r = lut_lookup_result;\n";
|
||||
|
||||
if (isSamplerEnabled(config.lighting.config, PICA::Lights::LUT_RG)) {
|
||||
compileLUTLookup(shader, config, i, PICA::Lights::LUT_RG);
|
||||
shader += "reflected_color.g = lut_lookup_result;\n";
|
||||
} else {
|
||||
shader += "reflected_color.g = reflected_color.r;\n";
|
||||
}
|
||||
|
||||
if (isSamplerEnabled(config.lighting.config, PICA::Lights::LUT_RB)) {
|
||||
compileLUTLookup(shader, config, i, PICA::Lights::LUT_RB);
|
||||
shader += "reflected_color.b = lut_lookup_result;\n";
|
||||
} else {
|
||||
shader += "reflected_color.b = reflected_color.r;\n";
|
||||
}
|
||||
|
||||
shader += "specular0 = lightSources[" + std::to_string(lightID) + "].specular0 * specular0_dist;\n";
|
||||
if (lightConfig.geometricFactor0) {
|
||||
shader += "specular0 *= geometric_factor;\n";
|
||||
}
|
||||
|
||||
shader += "specular1 = lightSources[" + std::to_string(lightID) + "].specular1 * specular1_dist * reflected_color;\n";
|
||||
if (lightConfig.geometricFactor1) {
|
||||
shader += "specular1 *= geometric_factor;\n";
|
||||
}
|
||||
|
||||
shader += "light_factor = distance_attenuation * spotlight_attenuation;\n";
|
||||
|
||||
if (config.lighting.clampHighlights) {
|
||||
shader += "specular_sum.rgb += light_factor * (NdotL == 0.0 ? 0.0 : 1.0) * (specular0 + specular1);\n";
|
||||
} else {
|
||||
shader += "specular_sum.rgb += light_factor * (specular0 + specular1);\n";
|
||||
}
|
||||
|
||||
shader += "diffuse_sum.rgb += light_factor * (lightSources[" + std::to_string(lightID) + "].ambient + lightSources[" +
|
||||
std::to_string(lightID) + "].diffuse * NdotL);\n";
|
||||
}
|
||||
|
||||
if (config.lighting.enablePrimaryAlpha || config.lighting.enableSecondaryAlpha) {
|
||||
compileLUTLookup(shader, config, config.lighting.lightNum - 1, PICA::Lights::LUT_FR);
|
||||
shader += "float fresnel_factor = lut_lookup_result;\n";
|
||||
}
|
||||
|
||||
if (config.lighting.enablePrimaryAlpha) {
|
||||
shader += "diffuse_sum.a = fresnel_factor;\n";
|
||||
}
|
||||
|
||||
if (config.lighting.enableSecondaryAlpha) {
|
||||
shader += "specular_sum.a = fresnel_factor;\n";
|
||||
}
|
||||
|
||||
shader += R"(
|
||||
vec4 global_ambient = vec4(regToColor(globalAmbientLight), 1.0);
|
||||
|
||||
primaryColor = clamp(global_ambient + diffuse_sum, vec4(0.0), vec4(1.0));
|
||||
secondaryColor = clamp(specular_sum, vec4(0.0), vec4(1.0));
|
||||
)";
|
||||
}
|
||||
|
||||
bool FragmentGenerator::isSamplerEnabled(u32 environmentID, u32 lutID) {
|
||||
static constexpr bool samplerEnabled[9 * 7] = {
|
||||
// D0 D1 SP FR RB RG RR
|
||||
true, false, true, false, false, false, true, // Configuration 0: D0, SP, RR
|
||||
false, false, true, true, false, false, true, // Configuration 1: FR, SP, RR
|
||||
true, true, false, false, false, false, true, // Configuration 2: D0, D1, RR
|
||||
true, true, false, true, false, false, false, // Configuration 3: D0, D1, FR
|
||||
true, true, true, false, true, true, true, // Configuration 4: All except for FR
|
||||
true, false, true, true, true, true, true, // Configuration 5: All except for D1
|
||||
true, true, true, true, false, false, true, // Configuration 6: All except for RB and RG
|
||||
false, false, false, false, false, false, false, // Configuration 7: Unused
|
||||
true, true, true, true, true, true, true, // Configuration 8: All
|
||||
};
|
||||
|
||||
return samplerEnabled[environmentID * 7 + lutID];
|
||||
}
|
||||
|
||||
void FragmentGenerator::compileLUTLookup(std::string& shader, const PICA::FragmentConfig& config, u32 lightIndex, u32 lutID) {
|
||||
const LightingLUTConfig& lut = config.lighting.luts[lutID];
|
||||
uint lightID = config.lighting.lights[lightIndex].num;
|
||||
uint lutIndex = 0;
|
||||
bool lutEnabled = false;
|
||||
|
||||
if (lutID == spotlightLutIndex) {
|
||||
// These are the spotlight attenuation LUTs
|
||||
lutIndex = 8u + lightID;
|
||||
lutEnabled = config.lighting.lights[lightIndex].spotAttenuationEnable;
|
||||
} else if (lutID <= 6) {
|
||||
lutIndex = lutID;
|
||||
lutEnabled = lut.enable;
|
||||
} else {
|
||||
Helpers::warn("Shadergen: Unimplemented LUT value");
|
||||
}
|
||||
|
||||
const bool samplerEnabled = isSamplerEnabled(config.lighting.config, lutID);
|
||||
|
||||
if (!samplerEnabled || !lutEnabled) {
|
||||
shader += "lut_lookup_result = 1.0;\n";
|
||||
return;
|
||||
}
|
||||
|
||||
float scale = lut.scale;
|
||||
uint inputID = lut.type;
|
||||
bool absEnabled = lut.absInput;
|
||||
|
||||
switch (inputID) {
|
||||
case 0: shader += "lut_lookup_delta = dot(normal, normalize(half_vector));\n"; break;
|
||||
case 1: shader += "lut_lookup_delta = dot(normalize(v_view), normalize(half_vector));\n"; break;
|
||||
case 2: shader += "lut_lookup_delta = dot(normal, normalize(v_view));\n"; break;
|
||||
case 3: shader += "lut_lookup_delta = dot(normal, light_vector);\n"; break;
|
||||
case 4: shader += "lut_lookup_delta = dot(light_vector, lightSources[" + std ::to_string(lightID) + "].spotlightDirection);\n"; break;
|
||||
|
||||
default:
|
||||
Helpers::warn("Shadergen: Unimplemented LUT select");
|
||||
shader += "lut_lookup_delta = 1.0;\n";
|
||||
break;
|
||||
}
|
||||
|
||||
if (absEnabled) {
|
||||
bool twoSidedDiffuse = config.lighting.lights[lightIndex].twoSidedDiffuse;
|
||||
shader += twoSidedDiffuse ? "lut_lookup_delta = abs(lut_lookup_delta);\n" : "lut_lookup_delta = max(lut_lookup_delta, 0.0);\n";
|
||||
shader += "lut_lookup_result = lutLookup(" + std::to_string(lutIndex) + ", int(clamp(floor(lut_lookup_delta * 256.0), 0.0, 255.0)));\n";
|
||||
if (scale != 1.0) {
|
||||
shader += "lut_lookup_result *= " + std::to_string(scale) + ";\n";
|
||||
}
|
||||
} else {
|
||||
// Range is [-1, 1] so we need to map it to [0, 1]
|
||||
shader += "lut_lookup_index = int(clamp(floor(lut_lookup_delta * 128.0), -128.f, 127.f));\n";
|
||||
shader += "if (lut_lookup_index < 0) lut_lookup_index += 256;\n";
|
||||
shader += "lut_lookup_result = lutLookup(" + std::to_string(lutIndex) + ", lut_lookup_index) *" + std::to_string(scale) + ";\n";
|
||||
}
|
||||
}
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
#include <cmrc/cmrc.hpp>
|
||||
|
||||
#include "config.hpp"
|
||||
#include "PICA/float_types.hpp"
|
||||
#include "PICA/pica_frag_uniforms.hpp"
|
||||
#include "PICA/gpu.hpp"
|
||||
|
@ -117,7 +118,10 @@ void RendererGL::initGraphicsContextInternal() {
|
|||
const u32 screenTextureWidth = 400; // Top screen is 400 pixels wide, bottom is 320
|
||||
const u32 screenTextureHeight = 2 * 240; // Both screens are 240 pixels tall
|
||||
|
||||
glGenTextures(1, &lightLUTTextureArray);
|
||||
lightLUTTexture.create(256, Lights::LUT_Count, GL_R32F);
|
||||
lightLUTTexture.bind();
|
||||
lightLUTTexture.setMinFilter(OpenGL::Linear);
|
||||
lightLUTTexture.setMagFilter(OpenGL::Linear);
|
||||
|
||||
auto prevTexture = OpenGL::getTex2D();
|
||||
|
||||
|
@ -159,6 +163,10 @@ void RendererGL::initGraphicsContextInternal() {
|
|||
OpenGL::setViewport(oldViewport[0], oldViewport[1], oldViewport[2], oldViewport[3]);
|
||||
|
||||
reset();
|
||||
|
||||
// Initialize the default vertex shader used with shadergen
|
||||
std::string defaultShadergenVSSource = fragShaderGen.getDefaultVertexShader();
|
||||
defaultShadergenVs.create({defaultShadergenVSSource.c_str(), defaultShadergenVSSource.size()}, OpenGL::Vertex);
|
||||
}
|
||||
|
||||
// The OpenGL renderer doesn't need to do anything with the GL context (For Qt frontend) or the SDL window (For SDL frontend)
|
||||
|
@ -348,26 +356,22 @@ void RendererGL::bindTexturesToSlots() {
|
|||
}
|
||||
|
||||
glActiveTexture(GL_TEXTURE0 + 3);
|
||||
glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
|
||||
lightLUTTexture.bind();
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
}
|
||||
|
||||
void RendererGL::updateLightingLUT() {
|
||||
gpu.lightingLUTDirty = false;
|
||||
std::array<u16, GPU::LightingLutSize> u16_lightinglut;
|
||||
std::array<float, GPU::LightingLutSize> lightingLut;
|
||||
|
||||
for (int i = 0; i < gpu.lightingLUT.size(); i++) {
|
||||
uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1);
|
||||
u16_lightinglut[i] = value * 65535 / 4095;
|
||||
uint64_t value = gpu.lightingLUT[i] & 0xFFF;
|
||||
lightingLut[i] = (float)(value << 4) / 65535.0f;
|
||||
}
|
||||
|
||||
glActiveTexture(GL_TEXTURE0 + 3);
|
||||
glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
|
||||
glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data());
|
||||
glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
||||
glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
|
||||
glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||
glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||
lightLUTTexture.bind();
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, Lights::LUT_Count, GL_RED, GL_FLOAT, lightingLut.data());
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
}
|
||||
|
||||
|
@ -380,6 +384,18 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
|
|||
OpenGL::Triangle,
|
||||
};
|
||||
|
||||
bool usingUbershader = enableUbershader;
|
||||
if (usingUbershader) {
|
||||
const bool lightsEnabled = (regs[InternalRegs::LightingEnable] & 1) != 0;
|
||||
const uint lightCount = (regs[InternalRegs::LightNumber] & 0x7) + 1;
|
||||
|
||||
// Emulating lights in the ubershader is incredibly slow, so we've got an option to render draws using moret han N lights via shadergen
|
||||
// This way we generate fewer shaders overall than with full shadergen, but don't tank performance
|
||||
if (emulatorConfig->forceShadergenForLights && lightsEnabled && lightCount >= emulatorConfig->lightShadergenThreshold) {
|
||||
usingUbershader = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (usingUbershader) {
|
||||
gl.useProgram(triangleProgram);
|
||||
} else {
|
||||
|
@ -780,43 +796,16 @@ std::optional<ColourBuffer> RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt
|
|||
OpenGL::Program& RendererGL::getSpecializedShader() {
|
||||
constexpr uint uboBlockBinding = 2;
|
||||
|
||||
PICA::FragmentConfig fsConfig;
|
||||
auto& outConfig = fsConfig.outConfig;
|
||||
auto& texConfig = fsConfig.texConfig;
|
||||
|
||||
auto alphaTestConfig = regs[InternalRegs::AlphaTestConfig];
|
||||
auto alphaTestFunction = Helpers::getBits<4, 3>(alphaTestConfig);
|
||||
|
||||
outConfig.alphaTestFunction = (alphaTestConfig & 1) ? static_cast<PICA::CompareFunction>(alphaTestFunction) : PICA::CompareFunction::Always;
|
||||
outConfig.depthMapEnable = regs[InternalRegs::DepthmapEnable] & 1;
|
||||
|
||||
texConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg];
|
||||
texConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer];
|
||||
|
||||
// Set up TEV stages. Annoyingly we can't just memcpy as the TEV registers are arranged like
|
||||
// {Source, Operand, Combiner, Color, Scale} and we want to skip the color register since it's uploaded via UBO
|
||||
#define setupTevStage(stage) \
|
||||
std::memcpy(&texConfig.tevConfigs[stage * 4], ®s[InternalRegs::TexEnv##stage##Source], 3 * sizeof(u32)); \
|
||||
texConfig.tevConfigs[stage * 4 + 3] = regs[InternalRegs::TexEnv##stage##Source + 5];
|
||||
|
||||
setupTevStage(0);
|
||||
setupTevStage(1);
|
||||
setupTevStage(2);
|
||||
setupTevStage(3);
|
||||
setupTevStage(4);
|
||||
setupTevStage(5);
|
||||
#undef setupTevStage
|
||||
PICA::FragmentConfig fsConfig(regs);
|
||||
|
||||
CachedProgram& programEntry = shaderCache[fsConfig];
|
||||
OpenGL::Program& program = programEntry.program;
|
||||
|
||||
if (!program.exists()) {
|
||||
std::string vs = fragShaderGen.getVertexShader(regs);
|
||||
std::string fs = fragShaderGen.generate(regs);
|
||||
std::string fs = fragShaderGen.generate(fsConfig);
|
||||
|
||||
OpenGL::Shader vertShader({vs.c_str(), vs.size()}, OpenGL::Vertex);
|
||||
OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment);
|
||||
program.create({vertShader, fragShader});
|
||||
program.create({defaultShadergenVs, fragShader});
|
||||
gl.useProgram(program);
|
||||
|
||||
// Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3
|
||||
|
@ -875,6 +864,48 @@ OpenGL::Program& RendererGL::getSpecializedShader() {
|
|||
vec[3] = float((color >> 24) & 0xFF) / 255.0f;
|
||||
}
|
||||
|
||||
// Append lighting uniforms
|
||||
if (fsConfig.lighting.enable) {
|
||||
uniforms.globalAmbientLight = regs[InternalRegs::LightGlobalAmbient];
|
||||
for (int i = 0; i < 8; i++) {
|
||||
auto& light = uniforms.lightUniforms[i];
|
||||
const u32 specular0 = regs[InternalRegs::Light0Specular0 + i * 0x10];
|
||||
const u32 specular1 = regs[InternalRegs::Light0Specular1 + i * 0x10];
|
||||
const u32 diffuse = regs[InternalRegs::Light0Diffuse + i * 0x10];
|
||||
const u32 ambient = regs[InternalRegs::Light0Ambient + i * 0x10];
|
||||
const u32 lightXY = regs[InternalRegs::Light0XY + i * 0x10];
|
||||
const u32 lightZ = regs[InternalRegs::Light0Z + i * 0x10];
|
||||
|
||||
const u32 spotlightXY = regs[InternalRegs::Light0SpotlightXY + i * 0x10];
|
||||
const u32 spotlightZ = regs[InternalRegs::Light0SpotlightZ + i * 0x10];
|
||||
const u32 attenuationBias = regs[InternalRegs::Light0AttenuationBias + i * 0x10];
|
||||
const u32 attenuationScale = regs[InternalRegs::Light0AttenuationScale + i * 0x10];
|
||||
|
||||
#define lightColorToVec3(value) \
|
||||
{ \
|
||||
float(Helpers::getBits<20, 8>(value)) / 255.0f, \
|
||||
float(Helpers::getBits<10, 8>(value)) / 255.0f, \
|
||||
float(Helpers::getBits<0, 8>(value)) / 255.0f, \
|
||||
}
|
||||
light.specular0 = lightColorToVec3(specular0);
|
||||
light.specular1 = lightColorToVec3(specular1);
|
||||
light.diffuse = lightColorToVec3(diffuse);
|
||||
light.ambient = lightColorToVec3(ambient);
|
||||
light.position[0] = Floats::f16::fromRaw(u16(lightXY)).toFloat32();
|
||||
light.position[1] = Floats::f16::fromRaw(u16(lightXY >> 16)).toFloat32();
|
||||
light.position[2] = Floats::f16::fromRaw(u16(lightZ)).toFloat32();
|
||||
|
||||
// Fixed point 1.11.1 to float, without negation
|
||||
light.spotlightDirection[0] = float(s32(spotlightXY & 0x1FFF) << 19 >> 19) / 2047.0;
|
||||
light.spotlightDirection[1] = float(s32((spotlightXY >> 16) & 0x1FFF) << 19 >> 19) / 2047.0;
|
||||
light.spotlightDirection[2] = float(s32(spotlightZ & 0x1FFF) << 19 >> 19) / 2047.0;
|
||||
|
||||
light.distanceAttenuationBias = Floats::f20::fromRaw(attenuationBias & 0xFFFFF).toFloat32();
|
||||
light.distanceAttenuationScale = Floats::f20::fromRaw(attenuationScale & 0xFFFFF).toFloat32();
|
||||
#undef lightColorToVec3
|
||||
}
|
||||
}
|
||||
|
||||
gl.bindUBO(programEntry.uboBinding);
|
||||
glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(PICA::FragmentUniforms), &uniforms);
|
||||
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
#version 410 core
|
||||
|
||||
in vec3 v_tangent;
|
||||
in vec3 v_normal;
|
||||
in vec3 v_bitangent;
|
||||
in vec4 v_quaternion;
|
||||
in vec4 v_colour;
|
||||
in vec3 v_texcoord0;
|
||||
in vec2 v_texcoord1;
|
||||
|
@ -27,7 +25,7 @@ uniform bool u_depthmapEnable;
|
|||
uniform sampler2D u_tex0;
|
||||
uniform sampler2D u_tex1;
|
||||
uniform sampler2D u_tex2;
|
||||
uniform sampler1DArray u_tex_lighting_lut;
|
||||
uniform sampler2D u_tex_lighting_lut;
|
||||
|
||||
uniform uint u_picaRegs[0x200 - 0x48];
|
||||
|
||||
|
@ -37,6 +35,16 @@ uint readPicaReg(uint reg_addr) { return u_picaRegs[reg_addr - 0x48u]; }
|
|||
vec4 tevSources[16];
|
||||
vec4 tevNextPreviousBuffer;
|
||||
bool tevUnimplementedSourceFlag = false;
|
||||
vec3 normal;
|
||||
|
||||
// See docs/lighting.md
|
||||
const uint samplerEnabledBitfields[2] = uint[2](0x7170e645u, 0x7f013fefu);
|
||||
|
||||
bool isSamplerEnabled(uint environment_id, uint lut_id) {
|
||||
uint index = 7 * environment_id + lut_id;
|
||||
uint arrayIndex = (index >> 5);
|
||||
return (samplerEnabledBitfields[arrayIndex] & (1u << (index & 31u))) != 0u;
|
||||
}
|
||||
|
||||
// OpenGL ES 1.1 reference pages for TEVs (this is what the PICA200 implements):
|
||||
// https://registry.khronos.org/OpenGL-Refpages/es1.1/xhtml/glTexEnv.xml
|
||||
|
@ -110,7 +118,7 @@ vec4 tevCalculateCombiner(int tev_id) {
|
|||
case 6u: result.rgb = vec3(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGB
|
||||
case 7u: result = vec4(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGBA
|
||||
case 8u: result.rgb = min(source0.rgb * source1.rgb + source2.rgb, 1.0); break; // Multiply then add
|
||||
case 9u: result.rgb = min((source0.rgb + source1.rgb) * source2.rgb, 1.0); break; // Add then multiply
|
||||
case 9u: result.rgb = min(source0.rgb + source1.rgb, 1.0) * source2.rgb; break; // Add then multiply
|
||||
default: break;
|
||||
}
|
||||
|
||||
|
@ -125,7 +133,7 @@ vec4 tevCalculateCombiner(int tev_id) {
|
|||
case 4u: result.a = mix(source1.a, source0.a, source2.a); break; // Interpolate
|
||||
case 5u: result.a = max(0.0, source0.a - source1.a); break; // Subtract
|
||||
case 8u: result.a = min(1.0, source0.a * source1.a + source2.a); break; // Multiply then add
|
||||
case 9u: result.a = min(1.0, (source0.a + source1.a) * source2.a); break; // Add then multiply
|
||||
case 9u: result.a = min(source0.a + source1.a, 1.0) * source2.a; break; // Add then multiply
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
@ -144,10 +152,16 @@ vec4 tevCalculateCombiner(int tev_id) {
|
|||
#define RG_LUT 5u
|
||||
#define RR_LUT 6u
|
||||
|
||||
float lutLookup(uint lut, uint light, float value) {
|
||||
if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1;
|
||||
if (lut == SP_LUT) lut = light + 8;
|
||||
return texture(u_tex_lighting_lut, vec2(value, lut)).r;
|
||||
uint GPUREG_LIGHTi_CONFIG;
|
||||
uint GPUREG_LIGHTING_CONFIG1;
|
||||
uint GPUREG_LIGHTING_LUTINPUT_SELECT;
|
||||
uint GPUREG_LIGHTING_LUTINPUT_SCALE;
|
||||
uint GPUREG_LIGHTING_LUTINPUT_ABS;
|
||||
bool error_unimpl = false;
|
||||
vec4 unimpl_color = vec4(1.0, 0.0, 1.0, 1.0);
|
||||
|
||||
float lutLookup(uint lut, int index) {
|
||||
return texelFetch(u_tex_lighting_lut, ivec2(index, int(lut)), 0).r;
|
||||
}
|
||||
|
||||
vec3 regToColor(uint reg) {
|
||||
|
@ -178,136 +192,179 @@ float decodeFP(uint hex, uint E, uint M) {
|
|||
return uintBitsToFloat(hex);
|
||||
}
|
||||
|
||||
float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light_vector, vec3 half_vector) {
|
||||
uint lut_index;
|
||||
int bit_in_config1;
|
||||
if (lut_id == SP_LUT) {
|
||||
// These are the spotlight attenuation LUTs
|
||||
bit_in_config1 = 8 + int(light_id & 7u);
|
||||
lut_index = 8u + light_id;
|
||||
} else if (lut_id <= 6) {
|
||||
bit_in_config1 = 16 + int(lut_id);
|
||||
lut_index = lut_id;
|
||||
} else {
|
||||
error_unimpl = true;
|
||||
}
|
||||
|
||||
bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment
|
||||
|
||||
if (!current_sampler_enabled || (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) {
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3);
|
||||
float scale = float(1u << scale_id);
|
||||
if (scale_id >= 6u) scale /= 256.0;
|
||||
|
||||
float delta = 1.0;
|
||||
uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3);
|
||||
switch (input_id) {
|
||||
case 0u: {
|
||||
delta = dot(normal, normalize(half_vector));
|
||||
break;
|
||||
}
|
||||
case 1u: {
|
||||
delta = dot(normalize(v_view), normalize(half_vector));
|
||||
break;
|
||||
}
|
||||
case 2u: {
|
||||
delta = dot(normal, normalize(v_view));
|
||||
break;
|
||||
}
|
||||
case 3u: {
|
||||
delta = dot(light_vector, normal);
|
||||
break;
|
||||
}
|
||||
case 4u: {
|
||||
int GPUREG_LIGHTi_SPOTDIR_LOW = int(readPicaReg(0x0146u + (light_id << 4u)));
|
||||
int GPUREG_LIGHTi_SPOTDIR_HIGH = int(readPicaReg(0x0147u + (light_id << 4u)));
|
||||
|
||||
// Sign extend them. Normally bitfieldExtract would do that but it's missing on some versions
|
||||
// of GLSL so we do it manually
|
||||
int se_x = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13);
|
||||
int se_y = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13);
|
||||
int se_z = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13);
|
||||
|
||||
if ((se_x & 0x1000) == 0x1000) se_x |= 0xffffe000;
|
||||
if ((se_y & 0x1000) == 0x1000) se_y |= 0xffffe000;
|
||||
if ((se_z & 0x1000) == 0x1000) se_z |= 0xffffe000;
|
||||
|
||||
// These are fixed point 1.1.11 values, so we need to convert them to float
|
||||
float x = float(se_x) / 2047.0;
|
||||
float y = float(se_y) / 2047.0;
|
||||
float z = float(se_z) / 2047.0;
|
||||
vec3 spotlight_vector = vec3(x, y, z);
|
||||
delta = dot(light_vector, spotlight_vector); // spotlight direction is negated so we don't negate light_vector
|
||||
break;
|
||||
}
|
||||
case 5u: {
|
||||
delta = 1.0; // TODO: cos <greek symbol> (aka CP);
|
||||
error_unimpl = true;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
delta = 1.0;
|
||||
error_unimpl = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// 0 = enabled
|
||||
if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) {
|
||||
// Two sided diffuse
|
||||
if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) {
|
||||
delta = max(delta, 0.0);
|
||||
} else {
|
||||
delta = abs(delta);
|
||||
}
|
||||
int index = int(clamp(floor(delta * 255.0), 0.f, 255.f));
|
||||
return lutLookup(lut_index, index) * scale;
|
||||
} else {
|
||||
// Range is [-1, 1] so we need to map it to [0, 1]
|
||||
int index = int(clamp(floor(delta * 128.0), -128.f, 127.f));
|
||||
if (index < 0) index += 256;
|
||||
return lutLookup(lut_index, index) * scale;
|
||||
}
|
||||
}
|
||||
|
||||
vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) {
|
||||
vec3 u = q.xyz;
|
||||
float s = q.w;
|
||||
return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v);
|
||||
}
|
||||
|
||||
// Implements the following algorthm: https://mathb.in/26766
|
||||
void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
|
||||
// Quaternions describe a transformation from surface-local space to eye space.
|
||||
// In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1),
|
||||
// the tangent vector is (1,0,0), and the bitangent vector is (0,1,0).
|
||||
vec3 normal = normalize(v_normal);
|
||||
vec3 tangent = normalize(v_tangent);
|
||||
vec3 bitangent = normalize(v_bitangent);
|
||||
vec3 view = normalize(v_view);
|
||||
|
||||
uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008Fu);
|
||||
if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) {
|
||||
primary_color = secondary_color = vec4(1.0);
|
||||
primary_color = secondary_color = vec4(0.0);
|
||||
return;
|
||||
}
|
||||
|
||||
uint GPUREG_LIGHTING_AMBIENT = readPicaReg(0x01C0u);
|
||||
uint GPUREG_LIGHTING_NUM_LIGHTS = (readPicaReg(0x01C2u) & 0x7u) + 1u;
|
||||
uint GPUREG_LIGHTING_LIGHT_PERMUTATION = readPicaReg(0x01D9u);
|
||||
|
||||
primary_color = vec4(vec3(0.0), 1.0);
|
||||
secondary_color = vec4(vec3(0.0), 1.0);
|
||||
|
||||
primary_color.rgb += regToColor(GPUREG_LIGHTING_AMBIENT);
|
||||
|
||||
uint GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u);
|
||||
uint GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u);
|
||||
uint GPUREG_LIGHTING_CONFIG0 = readPicaReg(0x01C3u);
|
||||
uint GPUREG_LIGHTING_CONFIG1 = readPicaReg(0x01C4u);
|
||||
uint GPUREG_LIGHTING_LUTINPUT_SCALE = readPicaReg(0x01D2u);
|
||||
float d[7];
|
||||
uint GPUREG_LIGHTING_CONFIG0 = readPicaReg(0x01C3u);
|
||||
GPUREG_LIGHTING_CONFIG1 = readPicaReg(0x01C4u);
|
||||
GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u);
|
||||
GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u);
|
||||
|
||||
bool error_unimpl = false;
|
||||
uint bump_mode = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 28, 2);
|
||||
|
||||
// Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker
|
||||
switch (bump_mode) {
|
||||
default: {
|
||||
normal = rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), v_quaternion);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);
|
||||
vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);
|
||||
|
||||
uint environment_id = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 4, 4);
|
||||
bool clamp_highlights = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u;
|
||||
|
||||
uint light_id;
|
||||
vec3 light_vector;
|
||||
vec3 half_vector;
|
||||
|
||||
for (uint i = 0u; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) {
|
||||
uint light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3);
|
||||
light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3);
|
||||
|
||||
uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + 0x10u * light_id);
|
||||
uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + 0x10u * light_id);
|
||||
uint GPUREG_LIGHTi_DIFFUSE = readPicaReg(0x0142u + 0x10u * light_id);
|
||||
uint GPUREG_LIGHTi_AMBIENT = readPicaReg(0x0143u + 0x10u * light_id);
|
||||
uint GPUREG_LIGHTi_VECTOR_LOW = readPicaReg(0x0144u + 0x10u * light_id);
|
||||
uint GPUREG_LIGHTi_VECTOR_HIGH = readPicaReg(0x0145u + 0x10u * light_id);
|
||||
uint GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + 0x10u * light_id);
|
||||
uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + (light_id << 4u));
|
||||
uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + (light_id << 4u));
|
||||
uint GPUREG_LIGHTi_DIFFUSE = readPicaReg(0x0142u + (light_id << 4u));
|
||||
uint GPUREG_LIGHTi_AMBIENT = readPicaReg(0x0143u + (light_id << 4u));
|
||||
uint GPUREG_LIGHTi_VECTOR_LOW = readPicaReg(0x0144u + (light_id << 4u));
|
||||
uint GPUREG_LIGHTi_VECTOR_HIGH = readPicaReg(0x0145u + (light_id << 4u));
|
||||
GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + (light_id << 4u));
|
||||
|
||||
vec3 light_vector = normalize(vec3(
|
||||
float light_distance;
|
||||
vec3 light_position = vec3(
|
||||
decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u),
|
||||
decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u)
|
||||
));
|
||||
|
||||
vec3 half_vector;
|
||||
);
|
||||
|
||||
// Positional Light
|
||||
if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) {
|
||||
// error_unimpl = true;
|
||||
half_vector = normalize(normalize(light_vector + v_view) + view);
|
||||
light_vector = light_position + v_view;
|
||||
}
|
||||
|
||||
// Directional light
|
||||
else {
|
||||
half_vector = normalize(normalize(light_vector) + view);
|
||||
light_vector = light_position;
|
||||
}
|
||||
|
||||
for (int c = 0; c < 7; c++) {
|
||||
if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0u) {
|
||||
uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3);
|
||||
float scale = float(1u << scale_id);
|
||||
if (scale_id >= 6u) scale /= 256.0;
|
||||
light_distance = length(light_vector);
|
||||
light_vector = normalize(light_vector);
|
||||
half_vector = light_vector + normalize(v_view);
|
||||
|
||||
uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3);
|
||||
if (input_id == 0u)
|
||||
d[c] = dot(normal, half_vector);
|
||||
else if (input_id == 1u)
|
||||
d[c] = dot(view, half_vector);
|
||||
else if (input_id == 2u)
|
||||
d[c] = dot(normal, view);
|
||||
else if (input_id == 3u)
|
||||
d[c] = dot(light_vector, normal);
|
||||
else if (input_id == 4u) {
|
||||
uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146u + 0x10u * light_id);
|
||||
uint GPUREG_LIGHTi_SPOTDIR_HIGH = readPicaReg(0x0147u + 0x10u * light_id);
|
||||
vec3 spot_light_vector = normalize(vec3(
|
||||
decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1u, 11u),
|
||||
decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1u, 11u),
|
||||
decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1u, 11u)
|
||||
));
|
||||
d[c] = dot(-light_vector, spot_light_vector); // -L dot P (aka Spotlight aka SP);
|
||||
} else if (input_id == 5u) {
|
||||
d[c] = 1.0; // TODO: cos <greek symbol> (aka CP);
|
||||
error_unimpl = true;
|
||||
} else {
|
||||
d[c] = 1.0;
|
||||
}
|
||||
|
||||
d[c] = lutLookup(uint(c), light_id, d[c] * 0.5 + 0.5) * scale;
|
||||
if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) d[c] = abs(d[c]);
|
||||
} else {
|
||||
d[c] = 1.0;
|
||||
}
|
||||
}
|
||||
|
||||
uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 4, 4);
|
||||
if (lookup_config == 0u) {
|
||||
d[D1_LUT] = 0.0;
|
||||
d[FR_LUT] = 0.0;
|
||||
d[RG_LUT] = d[RB_LUT] = d[RR_LUT];
|
||||
} else if (lookup_config == 1u) {
|
||||
d[D0_LUT] = 0.0;
|
||||
d[D1_LUT] = 0.0;
|
||||
d[RG_LUT] = d[RB_LUT] = d[RR_LUT];
|
||||
} else if (lookup_config == 2u) {
|
||||
d[FR_LUT] = 0.0;
|
||||
d[SP_LUT] = 0.0;
|
||||
d[RG_LUT] = d[RB_LUT] = d[RR_LUT];
|
||||
} else if (lookup_config == 3u) {
|
||||
d[SP_LUT] = 0.0;
|
||||
d[RG_LUT] = d[RB_LUT] = d[RR_LUT] = 1.0;
|
||||
} else if (lookup_config == 4u) {
|
||||
d[FR_LUT] = 0.0;
|
||||
} else if (lookup_config == 5u) {
|
||||
d[D1_LUT] = 0.0;
|
||||
} else if (lookup_config == 6u) {
|
||||
d[RG_LUT] = d[RB_LUT] = d[RR_LUT];
|
||||
}
|
||||
|
||||
float distance_factor = 1.0; // a
|
||||
float indirect_factor = 1.0; // fi
|
||||
float shadow_factor = 1.0; // o
|
||||
|
||||
float NdotL = dot(normal, light_vector); // Li dot N
|
||||
float NdotL = dot(normal, light_vector); // N dot Li
|
||||
|
||||
// Two sided diffuse
|
||||
if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u)
|
||||
|
@ -315,20 +372,86 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
|
|||
else
|
||||
NdotL = abs(NdotL);
|
||||
|
||||
float light_factor = distance_factor * d[SP_LUT] * indirect_factor * shadow_factor;
|
||||
float geometric_factor;
|
||||
bool use_geo_0 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 2, 1) == 1u;
|
||||
bool use_geo_1 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 3, 1) == 1u;
|
||||
if (use_geo_0 || use_geo_1) {
|
||||
geometric_factor = dot(half_vector, half_vector);
|
||||
geometric_factor = geometric_factor == 0.0 ? 0.0 : min(NdotL / geometric_factor, 1.0);
|
||||
}
|
||||
|
||||
primary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE) * NdotL);
|
||||
secondary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_SPECULAR0) * d[D0_LUT] +
|
||||
regToColor(GPUREG_LIGHTi_SPECULAR1) * d[D1_LUT] * vec3(d[RR_LUT], d[RG_LUT], d[RB_LUT]));
|
||||
float distance_attenuation = 1.0;
|
||||
if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) {
|
||||
uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtract(readPicaReg(0x014Au + (light_id << 4u)), 0, 20);
|
||||
uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtract(readPicaReg(0x014Bu + (light_id << 4u)), 0, 20);
|
||||
|
||||
float distance_attenuation_bias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u);
|
||||
float distance_attenuation_scale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u);
|
||||
|
||||
float delta = light_distance * distance_attenuation_scale + distance_attenuation_bias;
|
||||
delta = clamp(delta, 0.0, 1.0);
|
||||
int index = int(clamp(floor(delta * 255.0), 0.0, 255.0));
|
||||
distance_attenuation = lutLookup(16u + light_id, index);
|
||||
}
|
||||
|
||||
float spotlight_attenuation = lightLutLookup(environment_id, SP_LUT, light_id, light_vector, half_vector);
|
||||
float specular0_distribution = lightLutLookup(environment_id, D0_LUT, light_id, light_vector, half_vector);
|
||||
float specular1_distribution = lightLutLookup(environment_id, D1_LUT, light_id, light_vector, half_vector);
|
||||
vec3 reflected_color;
|
||||
reflected_color.r = lightLutLookup(environment_id, RR_LUT, light_id, light_vector, half_vector);
|
||||
|
||||
if (isSamplerEnabled(environment_id, RG_LUT)) {
|
||||
reflected_color.g = lightLutLookup(environment_id, RG_LUT, light_id, light_vector, half_vector);
|
||||
} else {
|
||||
reflected_color.g = reflected_color.r;
|
||||
}
|
||||
|
||||
if (isSamplerEnabled(environment_id, RB_LUT)) {
|
||||
reflected_color.b = lightLutLookup(environment_id, RB_LUT, light_id, light_vector, half_vector);
|
||||
} else {
|
||||
reflected_color.b = reflected_color.r;
|
||||
}
|
||||
|
||||
vec3 specular0 = regToColor(GPUREG_LIGHTi_SPECULAR0) * specular0_distribution;
|
||||
vec3 specular1 = regToColor(GPUREG_LIGHTi_SPECULAR1) * specular1_distribution * reflected_color;
|
||||
|
||||
specular0 *= use_geo_0 ? geometric_factor : 1.0;
|
||||
specular1 *= use_geo_1 ? geometric_factor : 1.0;
|
||||
|
||||
float clamp_factor = 1.0;
|
||||
if (clamp_highlights && NdotL == 0.0) {
|
||||
clamp_factor = 0.0;
|
||||
}
|
||||
|
||||
float light_factor = distance_attenuation * spotlight_attenuation;
|
||||
diffuse_sum.rgb += light_factor * (regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE) * NdotL);
|
||||
specular_sum.rgb += light_factor * clamp_factor * (specular0 + specular1);
|
||||
}
|
||||
|
||||
uint fresnel_output1 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 2, 1);
|
||||
uint fresnel_output2 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 3, 1);
|
||||
// Uses parameters from the last light as Fresnel is only applied to the last light
|
||||
float fresnel_factor;
|
||||
|
||||
if (fresnel_output1 == 1u || fresnel_output2 == 1u) {
|
||||
fresnel_factor = lightLutLookup(environment_id, FR_LUT, light_id, light_vector, half_vector);
|
||||
}
|
||||
|
||||
if (fresnel_output1 == 1u) {
|
||||
diffuse_sum.a = fresnel_factor;
|
||||
}
|
||||
|
||||
if (fresnel_output1 == 1u) primary_color.a = d[FR_LUT];
|
||||
if (fresnel_output2 == 1u) secondary_color.a = d[FR_LUT];
|
||||
if (fresnel_output2 == 1u) {
|
||||
specular_sum.a = fresnel_factor;
|
||||
}
|
||||
|
||||
uint GPUREG_LIGHTING_AMBIENT = readPicaReg(0x01C0u);
|
||||
vec4 global_ambient = vec4(regToColor(GPUREG_LIGHTING_AMBIENT), 1.0);
|
||||
primary_color = clamp(global_ambient + diffuse_sum, vec4(0.0), vec4(1.0));
|
||||
secondary_color = clamp(specular_sum, vec4(0.0), vec4(1.0));
|
||||
|
||||
if (error_unimpl) {
|
||||
// secondary_color = primary_color = vec4(1.0, 0., 1.0, 1.0);
|
||||
// secondary_color = primary_color = unimpl_color;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -414,4 +537,4 @@ void main() {
|
|||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -9,9 +9,7 @@ layout(location = 5) in float a_texcoord0_w;
|
|||
layout(location = 6) in vec3 a_view;
|
||||
layout(location = 7) in vec2 a_texcoord2;
|
||||
|
||||
out vec3 v_normal;
|
||||
out vec3 v_tangent;
|
||||
out vec3 v_bitangent;
|
||||
out vec4 v_quaternion;
|
||||
out vec4 v_colour;
|
||||
out vec3 v_texcoord0;
|
||||
out vec2 v_texcoord1;
|
||||
|
@ -35,12 +33,6 @@ vec4 abgr8888ToVec4(uint abgr) {
|
|||
return scale * vec4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24));
|
||||
}
|
||||
|
||||
vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) {
|
||||
vec3 u = q.xyz;
|
||||
float s = q.w;
|
||||
return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v);
|
||||
}
|
||||
|
||||
// Convert an arbitrary-width floating point literal to an f32
|
||||
float decodeFP(uint hex, uint E, uint M) {
|
||||
uint width = M + E + 1u;
|
||||
|
@ -73,10 +65,6 @@ void main() {
|
|||
v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y);
|
||||
v_view = a_view;
|
||||
|
||||
v_normal = normalize(rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), a_quaternion));
|
||||
v_tangent = normalize(rotateVec3ByQuaternion(vec3(1.0, 0.0, 0.0), a_quaternion));
|
||||
v_bitangent = normalize(rotateVec3ByQuaternion(vec3(0.0, 1.0, 0.0), a_quaternion));
|
||||
|
||||
for (int i = 0; i < 6; i++) {
|
||||
v_textureEnvColor[i] = abgr8888ToVec4(u_textureEnvColor[i]);
|
||||
}
|
||||
|
@ -95,4 +83,6 @@ void main() {
|
|||
// There's also another, always-on clipping plane based on vertex z
|
||||
gl_ClipDistance[0] = -a_coords.z;
|
||||
gl_ClipDistance[1] = dot(clipData, a_coords);
|
||||
|
||||
v_quaternion = a_quaternion;
|
||||
}
|
||||
|
|
|
@ -147,7 +147,8 @@ static void configInit() {
|
|||
static const retro_variable values[] = {
|
||||
{"panda3ds_use_shader_jit", "Enable shader JIT; enabled|disabled"},
|
||||
{"panda3ds_accurate_shader_mul", "Enable accurate shader multiplication; disabled|enabled"},
|
||||
{"panda3ds_use_ubershader", "Use ubershaders (No stutter, maybe slower); enabled|disabled"},
|
||||
{"panda3ds_use_ubershader", EmulatorConfig::ubershaderDefault ? "Use ubershaders (No stutter, maybe slower); enabled|disabled"
|
||||
: "Use ubershaders (No stutter, maybe slower); disabled|enabled"},
|
||||
{"panda3ds_use_vsync", "Enable VSync; enabled|disabled"},
|
||||
{"panda3ds_dsp_emulation", "DSP emulation; Null|HLE|LLE"},
|
||||
{"panda3ds_use_audio", "Enable audio; disabled|enabled"},
|
||||
|
@ -155,6 +156,8 @@ static void configInit() {
|
|||
{"panda3ds_write_protect_virtual_sd", "Write protect virtual SD card; disabled|enabled"},
|
||||
{"panda3ds_battery_level", "Battery percentage; 5|10|20|30|50|70|90|100"},
|
||||
{"panda3ds_use_charger", "Charger plugged; enabled|disabled"},
|
||||
{"panda3ds_ubershader_lighting_override", "Force shadergen when rendering lights; enabled|disabled"},
|
||||
{"panda3ds_ubershader_lighting_override_threshold", "Light threshold for forcing shadergen; 1|2|3|4|5|6|7|8"},
|
||||
{nullptr, nullptr},
|
||||
};
|
||||
|
||||
|
@ -175,6 +178,8 @@ static void configUpdate() {
|
|||
config.sdWriteProtected = FetchVariableBool("panda3ds_write_protect_virtual_sd", false);
|
||||
config.accurateShaderMul = FetchVariableBool("panda3ds_accurate_shader_mul", false);
|
||||
config.useUbershaders = FetchVariableBool("panda3ds_use_ubershader", true);
|
||||
config.forceShadergenForLights = FetchVariableBool("panda3ds_ubershader_lighting_override", true);
|
||||
config.lightShadergenThreshold = std::clamp(std::stoi(FetchVariable("panda3ds_ubershader_lighting_override_threshold", "1")), 1, 8);
|
||||
config.discordRpcEnabled = false;
|
||||
|
||||
config.save();
|
||||
|
|
79
third_party/opengl/opengl.hpp
vendored
79
third_party/opengl/opengl.hpp
vendored
|
@ -355,46 +355,57 @@ namespace OpenGL {
|
|||
}
|
||||
};
|
||||
|
||||
enum ShaderType {
|
||||
Fragment = GL_FRAGMENT_SHADER,
|
||||
Vertex = GL_VERTEX_SHADER,
|
||||
Geometry = GL_GEOMETRY_SHADER,
|
||||
Compute = GL_COMPUTE_SHADER,
|
||||
TessControl = GL_TESS_CONTROL_SHADER,
|
||||
TessEvaluation = GL_TESS_EVALUATION_SHADER
|
||||
};
|
||||
enum ShaderType {
|
||||
Fragment = GL_FRAGMENT_SHADER,
|
||||
Vertex = GL_VERTEX_SHADER,
|
||||
Geometry = GL_GEOMETRY_SHADER,
|
||||
Compute = GL_COMPUTE_SHADER,
|
||||
TessControl = GL_TESS_CONTROL_SHADER,
|
||||
TessEvaluation = GL_TESS_EVALUATION_SHADER
|
||||
};
|
||||
|
||||
struct Shader {
|
||||
GLuint m_handle = 0;
|
||||
struct Shader {
|
||||
GLuint m_handle = 0;
|
||||
|
||||
Shader() {}
|
||||
Shader(const std::string_view source, ShaderType type) { create(source, static_cast<GLenum>(type)); }
|
||||
Shader() {}
|
||||
Shader(const std::string_view source, ShaderType type) { create(source, static_cast<GLenum>(type)); }
|
||||
|
||||
// Returns whether compilation failed or not
|
||||
bool create(const std::string_view source, GLenum type) {
|
||||
m_handle = glCreateShader(type);
|
||||
const GLchar* const sources[1] = { source.data() };
|
||||
// Returns whether compilation failed or not
|
||||
bool create(const std::string_view source, GLenum type) {
|
||||
m_handle = glCreateShader(type);
|
||||
const GLchar* const sources[1] = {source.data()};
|
||||
|
||||
glShaderSource(m_handle, 1, sources, nullptr);
|
||||
glCompileShader(m_handle);
|
||||
glShaderSource(m_handle, 1, sources, nullptr);
|
||||
glCompileShader(m_handle);
|
||||
|
||||
GLint success;
|
||||
glGetShaderiv(m_handle, GL_COMPILE_STATUS, &success);
|
||||
if (success == GL_FALSE) {
|
||||
char buf[4096];
|
||||
glGetShaderInfoLog(m_handle, 4096, nullptr, buf);
|
||||
fprintf(stderr, "Failed to compile shader\nError: %s\n", buf);
|
||||
glDeleteShader(m_handle);
|
||||
GLint success;
|
||||
glGetShaderiv(m_handle, GL_COMPILE_STATUS, &success);
|
||||
if (success == GL_FALSE) {
|
||||
char buf[4096];
|
||||
glGetShaderInfoLog(m_handle, 4096, nullptr, buf);
|
||||
fprintf(stderr, "Failed to compile shader\nError: %s\n", buf);
|
||||
glDeleteShader(m_handle);
|
||||
|
||||
m_handle = 0;
|
||||
}
|
||||
m_handle = 0;
|
||||
}
|
||||
|
||||
return m_handle != 0;
|
||||
}
|
||||
return m_handle != 0;
|
||||
}
|
||||
|
||||
GLuint handle() const { return m_handle; }
|
||||
bool exists() const { return m_handle != 0; }
|
||||
};
|
||||
GLuint handle() const { return m_handle; }
|
||||
bool exists() const { return m_handle != 0; }
|
||||
|
||||
void free() {
|
||||
if (exists()) {
|
||||
glDeleteShader(m_handle);
|
||||
m_handle = 0;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef OPENGL_DESTRUCTORS
|
||||
~Shader() { free(); }
|
||||
#endif
|
||||
};
|
||||
|
||||
struct Program {
|
||||
GLuint m_handle = 0;
|
||||
|
@ -431,6 +442,10 @@ namespace OpenGL {
|
|||
m_handle = 0;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef OPENGL_DESTRUCTORS
|
||||
~Program() { free(); }
|
||||
#endif
|
||||
};
|
||||
|
||||
static void dispatchCompute(GLuint groupsX = 1, GLuint groupsY = 1, GLuint groupsZ = 1) {
|
||||
|
|
Loading…
Add table
Reference in a new issue