Merge branch 'wheremyfoodat:master' into master

This commit is contained in:
SamoZ256 2024-07-23 10:54:46 +02:00 committed by GitHub
commit a716e395ec
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
56 changed files with 7863 additions and 540 deletions

280
.github/gles.patch vendored
View file

@ -1,52 +1,3 @@
diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp
index a11a6ffa..77486a09 100644
--- a/src/core/renderer_gl/renderer_gl.cpp
+++ b/src/core/renderer_gl/renderer_gl.cpp
@@ -357,27 +357,27 @@ void RendererGL::bindTexturesToSlots() {
}
glActiveTexture(GL_TEXTURE0 + 3);
- glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
+ // glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
glActiveTexture(GL_TEXTURE0);
}
void RendererGL::updateLightingLUT() {
- gpu.lightingLUTDirty = false;
- std::array<u16, GPU::LightingLutSize> u16_lightinglut;
-
- for (int i = 0; i < gpu.lightingLUT.size(); i++) {
- uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1);
- u16_lightinglut[i] = value * 65535 / 4095;
- }
-
- glActiveTexture(GL_TEXTURE0 + 3);
- glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
- glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data());
- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
- glActiveTexture(GL_TEXTURE0);
+ // gpu.lightingLUTDirty = false;
+ // std::array<u16, GPU::LightingLutSize> u16_lightinglut;
+
+ // for (int i = 0; i < gpu.lightingLUT.size(); i++) {
+ // uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1);
+ // u16_lightinglut[i] = value * 65535 / 4095;
+ // }
+
+ // glActiveTexture(GL_TEXTURE0 + 3);
+ // glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
+ // glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data());
+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+ // glActiveTexture(GL_TEXTURE0);
}
void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> vertices) {
diff --git a/src/host_shaders/opengl_display.frag b/src/host_shaders/opengl_display.frag
index 612671c8..1937f711 100644
--- a/src/host_shaders/opengl_display.frag
@ -70,7 +21,7 @@ index 990e2f80..2e7842ac 100644
void main() {
diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag
index f6fa6c55..bb88e278 100644
index b9f9fe4c..f1cf286f 100644
--- a/src/host_shaders/opengl_fragment_shader.frag
+++ b/src/host_shaders/opengl_fragment_shader.frag
@@ -1,4 +1,5 @@
@ -78,36 +29,18 @@ index f6fa6c55..bb88e278 100644
+#version 300 es
+precision mediump float;
in vec3 v_tangent;
in vec3 v_normal;
@@ -27,7 +28,7 @@ uniform bool u_depthmapEnable;
uniform sampler2D u_tex0;
uniform sampler2D u_tex1;
uniform sampler2D u_tex2;
-uniform sampler1DArray u_tex_lighting_lut;
+// uniform sampler1DArray u_tex_lighting_lut;
in vec4 v_quaternion;
in vec4 v_colour;
@@ -166,11 +167,17 @@ float lutLookup(uint lut, int index) {
return texelFetch(u_tex_luts, ivec2(index, int(lut)), 0).r;
}
uniform uint u_picaRegs[0x200 - 0x48];
@@ -145,16 +146,23 @@ vec4 tevCalculateCombiner(int tev_id) {
#define RR_LUT 6u
float lutLookup(uint lut, uint light, float value) {
- if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1;
- if (lut == SP_LUT) lut = light + 8;
- return texture(u_tex_lighting_lut, vec2(value, lut)).r;
+ // if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1;
+ // if (lut == SP_LUT) lut = light + 8;
+ // return texture(u_tex_lighting_lut, vec2(value, lut)).r;
+ return 0.0;
+}
+
+// some gles versions have bitfieldExtract and complain if you redefine it, some don't and compile error, using this instead
+// some gles versions have bitfieldExtractCompat and complain if you redefine it, some don't and compile error, using this instead
+uint bitfieldExtractCompat(uint val, int off, int size) {
+ uint mask = uint((1 << size) - 1);
+ return uint(val >> off) & mask;
}
+}
+
vec3 regToColor(uint reg) {
// Normalization scale to convert from [0...255] to [0.0...1.0]
const float scale = 1.0 / 255.0;
@ -117,89 +50,109 @@ index f6fa6c55..bb88e278 100644
}
// Convert an arbitrary-width floating point literal to an f32
@@ -189,7 +197,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
vec3 view = normalize(v_view);
@@ -210,16 +217,16 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light
bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment
- if (!current_sampler_enabled || (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) {
+ if (!current_sampler_enabled || (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) {
return 1.0;
}
- uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3);
+ uint scale_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3);
float scale = float(1u << scale_id);
if (scale_id >= 6u) scale /= 256.0;
float delta = 1.0;
- uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3);
+ uint input_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3);
switch (input_id) {
case 0u: {
delta = dot(normal, normalize(half_vector));
@@ -241,11 +248,11 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light
int GPUREG_LIGHTi_SPOTDIR_LOW = int(readPicaReg(0x0146u + (light_id << 4u)));
int GPUREG_LIGHTi_SPOTDIR_HIGH = int(readPicaReg(0x0147u + (light_id << 4u)));
- // Sign extend them. Normally bitfieldExtract would do that but it's missing on some versions
+ // Sign extend them. Normally bitfieldExtractCompat would do that but it's missing on some versions
// of GLSL so we do it manually
- int se_x = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13);
- int se_y = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13);
- int se_z = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13);
+ int se_x = bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13);
+ int se_y = bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13);
+ int se_z = bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13);
if ((se_x & 0x1000) == 0x1000) se_x |= 0xffffe000;
if ((se_y & 0x1000) == 0x1000) se_y |= 0xffffe000;
@@ -272,9 +279,9 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light
}
// 0 = enabled
- if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) {
+ if (bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) {
// Two sided diffuse
- if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) {
+ if (bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) {
delta = max(delta, 0.0);
} else {
delta = abs(delta);
@@ -298,7 +305,7 @@ vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) {
// Implements the following algorthm: https://mathb.in/26766
void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008Fu);
- if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) {
+ if (bitfieldExtractCompat(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) {
primary_color = secondary_color = vec4(1.0);
primary_color = secondary_color = vec4(0.0);
return;
}
@@ -213,7 +221,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
bool error_unimpl = false;
@@ -315,7 +322,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u);
GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u);
- uint bump_mode = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 28, 2);
+ uint bump_mode = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 28, 2);
// Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker
switch (bump_mode) {
@@ -328,15 +335,15 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);
vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);
- uint environment_id = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 4, 4);
- bool clamp_highlights = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u;
+ uint environment_id = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 4, 4);
+ bool clamp_highlights = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u;
uint light_id;
vec3 light_vector;
vec3 half_vector;
for (uint i = 0u; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) {
- uint light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3);
+ uint light_id = bitfieldExtractCompat(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3);
- light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3);
+ light_id = bitfieldExtractCompat(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3);
uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + 0x10u * light_id);
uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + 0x10u * light_id);
@@ -224,14 +232,14 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
uint GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + 0x10u * light_id);
uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + (light_id << 4u));
uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + (light_id << 4u));
@@ -348,12 +355,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
vec3 light_vector = normalize(vec3(
float light_distance;
vec3 light_position = vec3(
- decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u),
- decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u)
+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u),
+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u)
));
vec3 half_vector;
);
// Positional Light
- if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) {
+ if (bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) {
// error_unimpl = true;
half_vector = normalize(normalize(light_vector + v_view) + view);
}
@@ -242,12 +250,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
light_vector = light_position + v_view;
}
for (int c = 0; c < 7; c++) {
- if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0u) {
- uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3);
+ if (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0u) {
+ uint scale_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3);
float scale = float(1u << scale_id);
if (scale_id >= 6u) scale /= 256.0;
- uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3);
+ uint input_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3);
if (input_id == 0u)
d[c] = dot(normal, half_vector);
else if (input_id == 1u)
@@ -260,9 +268,9 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146u + 0x10u * light_id);
uint GPUREG_LIGHTi_SPOTDIR_HIGH = readPicaReg(0x0147u + 0x10u * light_id);
vec3 spot_light_vector = normalize(vec3(
- decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1u, 11u),
- decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1u, 11u),
- decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1u, 11u)
+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1u, 11u),
+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1u, 11u),
+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1u, 11u)
));
d[c] = dot(-light_vector, spot_light_vector); // -L dot P (aka Spotlight aka SP);
} else if (input_id == 5u) {
@@ -273,13 +281,13 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
}
d[c] = lutLookup(uint(c), light_id, d[c] * 0.5 + 0.5) * scale;
- if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) d[c] = abs(d[c]);
+ if (bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) d[c] = abs(d[c]);
} else {
d[c] = 1.0;
}
}
- uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 4, 4);
+ uint lookup_config = bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 4, 4);
if (lookup_config == 0u) {
d[D1_LUT] = 0.0;
d[FR_LUT] = 0.0;
@@ -310,7 +318,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
float NdotL = dot(normal, light_vector); // Li dot N
@@ -369,23 +376,23 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
float NdotL = dot(normal, light_vector); // N dot Li
// Two sided diffuse
- if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u)
@ -207,19 +160,40 @@ index f6fa6c55..bb88e278 100644
NdotL = max(0.0, NdotL);
else
NdotL = abs(NdotL);
@@ -321,8 +329,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
secondary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_SPECULAR0) * d[D0_LUT] +
regToColor(GPUREG_LIGHTi_SPECULAR1) * d[D1_LUT] * vec3(d[RR_LUT], d[RG_LUT], d[RB_LUT]));
float geometric_factor;
- bool use_geo_0 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 2, 1) == 1u;
- bool use_geo_1 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 3, 1) == 1u;
+ bool use_geo_0 = bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 2, 1) == 1u;
+ bool use_geo_1 = bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 3, 1) == 1u;
if (use_geo_0 || use_geo_1) {
geometric_factor = dot(half_vector, half_vector);
geometric_factor = geometric_factor == 0.0 ? 0.0 : min(NdotL / geometric_factor, 1.0);
}
float distance_attenuation = 1.0;
- if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) {
- uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtract(readPicaReg(0x014Au + (light_id << 4u)), 0, 20);
- uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtract(readPicaReg(0x014Bu + (light_id << 4u)), 0, 20);
+ if (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) {
+ uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtractCompat(readPicaReg(0x014Au + (light_id << 4u)), 0, 20);
+ uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtractCompat(readPicaReg(0x014Bu + (light_id << 4u)), 0, 20);
float distance_attenuation_bias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u);
float distance_attenuation_scale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u);
@@ -430,8 +437,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
specular_sum.rgb += light_factor * clamp_factor * (specular0 + specular1);
}
- uint fresnel_output1 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 2, 1);
- uint fresnel_output2 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 3, 1);
+ uint fresnel_output1 = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 2, 1);
+ uint fresnel_output2 = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 3, 1);
if (fresnel_output1 == 1u) primary_color.a = d[FR_LUT];
if (fresnel_output2 == 1u) secondary_color.a = d[FR_LUT];
// Uses parameters from the last light as Fresnel is only applied to the last light
float fresnel_factor;
diff --git a/src/host_shaders/opengl_vertex_shader.vert b/src/host_shaders/opengl_vertex_shader.vert
index a25d7a6d..7cf40398 100644
index 057f9a88..dc735ced 100644
--- a/src/host_shaders/opengl_vertex_shader.vert
+++ b/src/host_shaders/opengl_vertex_shader.vert
@@ -1,4 +1,6 @@
@ -230,7 +204,7 @@ index a25d7a6d..7cf40398 100644
layout(location = 0) in vec4 a_coords;
layout(location = 1) in vec4 a_quaternion;
@@ -20,7 +22,7 @@ out vec2 v_texcoord2;
@@ -18,7 +20,7 @@ out vec2 v_texcoord2;
flat out vec4 v_textureEnvColor[6];
flat out vec4 v_textureEnvBufferColor;
@ -239,7 +213,7 @@ index a25d7a6d..7cf40398 100644
// TEV uniforms
uniform uint u_textureEnvColor[6];
@@ -93,6 +95,6 @@ void main() {
@@ -81,8 +83,8 @@ void main() {
);
// There's also another, always-on clipping plane based on vertex z
@ -247,16 +221,20 @@ index a25d7a6d..7cf40398 100644
- gl_ClipDistance[1] = dot(clipData, a_coords);
+ // gl_ClipDistance[0] = -a_coords.z;
+ // gl_ClipDistance[1] = dot(clipData, a_coords);
v_quaternion = a_quaternion;
}
diff --git a/third_party/opengl/opengl.hpp b/third_party/opengl/opengl.hpp
index f368f573..5ead7f63 100644
index 4a08650a..21af37e3 100644
--- a/third_party/opengl/opengl.hpp
+++ b/third_party/opengl/opengl.hpp
@@ -520,21 +520,21 @@ namespace OpenGL {
@@ -583,22 +583,22 @@ namespace OpenGL {
static void disableScissor() { glDisable(GL_SCISSOR_TEST); }
static void enableBlend() { glEnable(GL_BLEND); }
static void disableBlend() { glDisable(GL_BLEND); }
static void enableLogicOp() { glEnable(GL_COLOR_LOGIC_OP); }
- static void enableLogicOp() { glEnable(GL_COLOR_LOGIC_OP); }
- static void disableLogicOp() { glDisable(GL_COLOR_LOGIC_OP); }
+ static void enableLogicOp() { /* glEnable(GL_COLOR_LOGIC_OP); */ }
+ static void disableLogicOp() { /* glDisable(GL_COLOR_LOGIC_OP); */ }
static void enableDepth() { glEnable(GL_DEPTH_TEST); }
static void disableDepth() { glDisable(GL_DEPTH_TEST); }

View file

@ -32,12 +32,27 @@ jobs:
- name: Build
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
- name: Upload core
uses: actions/upload-artifact@v2
- name: Upload Hydra core
uses: actions/upload-artifact@v4
with:
name: Windows core
name: Windows Hydra core
path: '${{github.workspace}}/build/${{ env.BUILD_TYPE }}/Alber.dll'
- name: Configure CMake (Again)
run: |
rm -r -fo ${{github.workspace}}/build
cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_USER_BUILD=ON -DBUILD_LIBRETRO_CORE=ON
- name: Build (Again)
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
- name: Upload Libretro core
uses: actions/upload-artifact@v4
with:
name: Windows Libretro core
path: |
${{github.workspace}}/build/${{ env.BUILD_TYPE }}/panda3ds_libretro.dll
${{github.workspace}}/docs/libretro/panda3ds_libretro.info
MacOS:
runs-on: macos-13
@ -61,11 +76,27 @@ jobs:
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
- name: Upload core
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v4
with:
name: MacOS core
name: MacOS Hydra core
path: '${{github.workspace}}/build/libAlber.dylib'
- name: Configure CMake (Again)
run: |
rm -rf ${{github.workspace}}/build
cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_USER_BUILD=ON -DBUILD_LIBRETRO_CORE=ON
- name: Build (Again)
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} && ls -R ${{github.workspace}}/build
- name: Upload Libretro core
uses: actions/upload-artifact@v4
with:
name: MacOS Libretro core
path: |
${{github.workspace}}/build/panda3ds_libretro.dylib
${{github.workspace}}/docs/libretro/panda3ds_libretro.info
Linux:
runs-on: ubuntu-latest
@ -98,11 +129,27 @@ jobs:
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
- name: Upload core
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v4
with:
name: Linux core
name: Linux Hydra core
path: '${{github.workspace}}/build/libAlber.so'
- name: Configure CMake (Again)
run: |
rm -rf ${{github.workspace}}/build
cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang-17 -DCMAKE_CXX_COMPILER=clang++-17 -DENABLE_USER_BUILD=ON -DBUILD_LIBRETRO_CORE=ON
- name: Build (Again)
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
- name: Upload Libretro core
uses: actions/upload-artifact@v4
with:
name: Linux Libretro core
path: |
${{github.workspace}}/build/panda3ds_libretro.so
${{github.workspace}}/docs/libretro/panda3ds_libretro.info
Android-x64:
runs-on: ubuntu-latest
@ -129,7 +176,7 @@ jobs:
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
- name: Upload core
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v4
with:
name: Android core
name: Android Hydra core
path: '${{github.workspace}}/build/libAlber.so'

View file

@ -16,7 +16,7 @@ jobs:
# well on Windows or Mac. You can convert this to a matrix build if you need
# cross-platform coverage.
# See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix
runs-on: ubuntu-20.04
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
@ -33,11 +33,11 @@ jobs:
sudo ./llvm.sh 17
- name: Setup Vulkan SDK
run: |
wget -qO - http://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-focal.list http://packages.lunarg.com/vulkan/lunarg-vulkan-focal.list
sudo apt update
sudo apt install vulkan-sdk
uses: humbletim/setup-vulkan-sdk@v1.2.0
with:
vulkan-query-version: latest
vulkan-use-cache: true
vulkan-components: Vulkan-Headers, Vulkan-Loader, SPIRV-Tools, Glslang
- name: Configure CMake
# Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.

View file

@ -96,7 +96,7 @@ jobs:
path: 'Alber.zip'
Linux:
runs-on: ubuntu-20.04
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
@ -105,7 +105,7 @@ jobs:
- name: Install misc packages
run: |
sudo apt-get update && sudo apt install libx11-dev libgl1-mesa-glx mesa-common-dev libfuse2 libwayland-dev
sudo apt-get update && sudo apt install libx11-dev libgl1-mesa-glx mesa-common-dev libfuse2 libwayland-dev libgl1-mesa-dev
sudo add-apt-repository -y ppa:savoury1/qt-6-2
sudo apt update
sudo apt install qt6-base-dev qt6-base-private-dev
@ -117,11 +117,11 @@ jobs:
sudo ./llvm.sh 17
- name: Setup Vulkan SDK
run: |
wget -qO - http://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-focal.list http://packages.lunarg.com/vulkan/lunarg-vulkan-focal.list
sudo apt update
sudo apt install vulkan-sdk
uses: humbletim/setup-vulkan-sdk@v1.2.0
with:
vulkan-query-version: latest
vulkan-use-cache: true
vulkan-components: Vulkan-Headers, Vulkan-Loader, SPIRV-Tools, Glslang
- name: Configure CMake
run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang-17 -DCMAKE_CXX_COMPILER=clang++-17 -DENABLE_USER_BUILD=ON -DENABLE_QT_GUI=ON

View file

@ -28,6 +28,10 @@ if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format-nonliteral -Wno-format-security")
endif()
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-interference-size")
endif()
option(DISABLE_PANIC_DEV "Make a build with fewer and less intrusive asserts" ON)
option(GPU_DEBUG_INFO "Enable additional GPU debugging info" OFF)
option(ENABLE_OPENGL "Enable OpenGL rendering backend" ON)
@ -40,11 +44,17 @@ option(ENABLE_DISCORD_RPC "Compile with Discord RPC support (disabled by default
option(ENABLE_LUAJIT "Enable scripting with the Lua programming language" ON)
option(ENABLE_QT_GUI "Enable the Qt GUI. If not selected then the emulator uses a minimal SDL-based UI instead" OFF)
option(BUILD_HYDRA_CORE "Build a Hydra core" OFF)
option(BUILD_LIBRETRO_CORE "Build a Libretro core" OFF)
if(BUILD_HYDRA_CORE)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
endif()
if(BUILD_LIBRETRO_CORE)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
add_compile_definitions(__LIBRETRO__)
endif()
add_library(AlberCore STATIC)
include_directories(${PROJECT_SOURCE_DIR}/include/)
@ -192,7 +202,8 @@ set(SERVICE_SOURCE_FILES src/core/services/service_manager.cpp src/core/services
set(PICA_SOURCE_FILES src/core/PICA/gpu.cpp src/core/PICA/regs.cpp src/core/PICA/shader_unit.cpp
src/core/PICA/shader_interpreter.cpp src/core/PICA/dynapica/shader_rec.cpp
src/core/PICA/dynapica/shader_rec_emitter_x64.cpp src/core/PICA/pica_hash.cpp
src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp
src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp src/core/PICA/shader_gen_glsl.cpp
src/core/PICA/shader_decompiler.cpp
)
set(LOADER_SOURCE_FILES src/core/loader/elf.cpp src/core/loader/ncsd.cpp src/core/loader/ncch.cpp src/core/loader/3dsx.cpp src/core/loader/lz77.cpp)
@ -239,10 +250,11 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp
include/services/news_u.hpp include/applets/software_keyboard.hpp include/applets/applet_manager.hpp include/fs/archive_user_save_data.hpp
include/services/amiibo_device.hpp include/services/nfc_types.hpp include/swap.hpp include/services/csnd.hpp include/services/nwm_uds.hpp
include/fs/archive_system_save_data.hpp include/lua_manager.hpp include/memory_mapped_file.hpp include/hydra_icon.hpp
include/PICA/dynapica/shader_rec_emitter_arm64.hpp include/scheduler.hpp include/applets/error_applet.hpp
include/PICA/dynapica/shader_rec_emitter_arm64.hpp include/scheduler.hpp include/applets/error_applet.hpp include/PICA/shader_gen.hpp
include/audio/dsp_core.hpp include/audio/null_core.hpp include/audio/teakra_core.hpp
include/audio/miniaudio_device.hpp include/ring_buffer.hpp include/bitfield.hpp include/audio/dsp_shared_mem.hpp
include/audio/hle_core.hpp include/capstone.hpp include/audio/aac.hpp
include/audio/hle_core.hpp include/capstone.hpp include/audio/aac.hpp include/PICA/pica_frag_config.hpp
include/PICA/pica_frag_uniforms.hpp include/PICA/shader_gen_types.hpp include/PICA/shader_decompiler.hpp
)
cmrc_add_resource_library(
@ -438,7 +450,7 @@ else()
target_compile_definitions(AlberCore PUBLIC "PANDA3DS_FRONTEND_SDL=1")
endif()
if(NOT BUILD_HYDRA_CORE)
if(NOT BUILD_HYDRA_CORE AND NOT BUILD_LIBRETRO_CORE)
add_executable(Alber)
if(ENABLE_QT_GUI)
@ -449,11 +461,11 @@ if(NOT BUILD_HYDRA_CORE)
set(FRONTEND_SOURCE_FILES src/panda_qt/main.cpp src/panda_qt/screen.cpp src/panda_qt/main_window.cpp src/panda_qt/about_window.cpp
src/panda_qt/config_window.cpp src/panda_qt/zep.cpp src/panda_qt/text_editor.cpp src/panda_qt/cheats_window.cpp src/panda_qt/mappings.cpp
src/panda_qt/patch_window.cpp src/panda_qt/elided_label.cpp
src/panda_qt/patch_window.cpp src/panda_qt/elided_label.cpp src/panda_qt/shader_editor.cpp
)
set(FRONTEND_HEADER_FILES include/panda_qt/screen.hpp include/panda_qt/main_window.hpp include/panda_qt/about_window.hpp
include/panda_qt/config_window.hpp include/panda_qt/text_editor.hpp include/panda_qt/cheats_window.hpp
include/panda_qt/patch_window.hpp include/panda_qt/elided_label.hpp
include/panda_qt/patch_window.hpp include/panda_qt/elided_label.hpp include/panda_qt/shader_editor.hpp
)
source_group("Source Files\\Qt" FILES ${FRONTEND_SOURCE_FILES})
@ -500,6 +512,15 @@ elseif(BUILD_HYDRA_CORE)
include_directories(third_party/hydra_core/include)
add_library(Alber SHARED src/hydra_core.cpp)
target_link_libraries(Alber PUBLIC AlberCore)
elseif(BUILD_LIBRETRO_CORE)
include_directories(third_party/libretro/include)
add_library(Alber SHARED src/libretro_core.cpp)
target_link_libraries(Alber PUBLIC AlberCore)
set_target_properties(Alber PROPERTIES
OUTPUT_NAME "panda3ds_libretro"
PREFIX ""
)
endif()
if(ENABLE_LTO OR ENABLE_USER_BUILD)

79
docs/3ds/lighting.md Normal file
View file

@ -0,0 +1,79 @@
## Info on the lighting implementation
### Missing shadow attenuation
Shadow attenuation samples a texture unit, and that likely needs render to texture for most games so that they can construct
their shadow map. As such the colors are not multiplied by the shadow attenuation value, so there's no shadows.
### Missing bump mapping
Bump mapping also samples a texture unit, most likely doesn't need render to texture however may need better texture sampling
implementation (such as GPUREG_TEXUNITi_BORDER_COLOR, GPUREG_TEXUNITi_BORDER_PARAM). Bump mapping would work for some things,
namely the 3ds-examples bump mapping demo, but would break others such as Toad Treasure Tracker with a naive `texture` implementation.
Also the CP configuration is missing, because it needs a tangent map implementation. It is currently marked with error_unimpl.
### samplerEnabledBitfields
Holds the enabled state of the lighting samples for various PICA configurations
As explained in https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTING_CONFIG0
```c
const bool samplerEnabled[9 * 7] = bool[9 * 7](
// D0 D1 SP FR RB RG RR
true, false, true, false, false, false, true, // Configuration 0: D0, SP, RR
false, false, true, true, false, false, true, // Configuration 1: FR, SP, RR
true, true, false, false, false, false, true, // Configuration 2: D0, D1, RR
true, true, false, true, false, false, false, // Configuration 3: D0, D1, FR
true, true, true, false, true, true, true, // Configuration 4: All except for FR
true, false, true, true, true, true, true, // Configuration 5: All except for D1
true, true, true, true, false, false, true, // Configuration 6: All except for RB and RG
false, false, false, false, false, false, false, // Configuration 7: Unused
true, true, true, true, true, true, true // Configuration 8: All
);
```
The above has been condensed to two uints for performance reasons.
You can confirm they are the same by running the following:
```c
const uint samplerEnabledBitfields[2] = { 0x7170e645u, 0x7f013fefu };
for (int i = 0; i < 9 * 7; i++) {
unsigned arrayIndex = (i >> 5);
bool b = (samplerEnabledBitfields[arrayIndex] & (1u << (i & 31))) != 0u;
if (samplerEnabled[i] == b) {
printf("%d: happy\n", i);
} else {
printf("%d: unhappy\n", i);
}
}
```
### lightLutLookup
lut_id is one of these values
0 D0
1 D1
2 SP
3 FR
4 RB
5 RG
6 RR
lut_index on the other hand represents the actual index of the LUT in the texture
u_tex_luts has 24 LUTs for lighting and they are used like so:
0 D0
1 D1
2 is missing because SP uses LUTs 8-15
3 FR
4 RB
5 RG
6 RR
8-15 SP0-7
16-23 DA0-7, but this is not handled in this function as the lookup is a bit different
The light environment configuration controls which LUTs are available for use
If a LUT is not available in the selected configuration, its value will always read a constant 1.0 regardless of the enable state in GPUREG_LIGHTING_CONFIG1
If RR is enabled but not RG or RB, the output of RR is used for the three components; Red, Green and Blue.
### Distance attenuation
Distance attenuation is computed differently from the other factors, for example
it doesn't store its scale in GPUREG_LIGHTING_LUTINPUT_SCALE and it doesn't use
GPUREG_LIGHTING_LUTINPUT_SELECT. Instead, it uses the distance from the light to the
fragment and the distance attenuation scale and bias to calculate where in the LUT to look up.
See: https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTi_ATTENUATION_SCALE

View file

@ -0,0 +1,34 @@
# Software Information
display_name = "Nintendo - 3DS (Panda3DS)"
authors = "Panda3DS Authors (tm)"
supported_extensions = "3ds|3dsx|elf|axf|cci|cxi|app"
corename = "Panda3DS"
categories = "Emulator"
license = "GPLv3"
permissions = ""
display_version = "Git"
# Hardware Information
manufacturer = "Nintendo"
systemname = "3DS"
systemid = "3ds"
# Libretro Information
database = "Nintendo - Nintendo 3DS"
supports_no_game = "false"
savestate = "true"
savestate_features = "basic"
cheats = "false"
input_descriptors = "true"
memory_descriptors = "false"
libretro_saves = "true"
core_options = "true"
core_options_version = "1.0"
load_subsystem = "false"
hw_render = "true"
required_hw_api = "OpenGL Core >= 4.1"
needs_fullpath = "true"
disk_control = "false"
is_experimental = "true"
description = "Panda3DS !"

View file

@ -22,8 +22,11 @@ class ShaderJIT {
ShaderCache cache;
#endif
bool accurateMul = false;
public:
void setAccurateMul(bool value) { accurateMul = value; }
#ifdef PANDA3DS_SHADER_JIT_SUPPORTED
// Call this before starting to process a batch of vertices
// This will read the PICA config (uploaded shader and shader operand descriptors) and search if we've already compiled this shader
@ -36,11 +39,11 @@ class ShaderJIT {
static constexpr bool isAvailable() { return true; }
#else
void prepare(PICAShader& shaderUnit) {
Helpers::panic("Vertex Loader JIT: Tried to run ShaderJIT::Prepare on platform that does not support shader jit");
Helpers::panic("Shader JIT: Tried to run ShaderJIT::Prepare on platform that does not support shader jit");
}
void run(PICAShader& shaderUnit) {
Helpers::panic("Vertex Loader JIT: Tried to run ShaderJIT::Run on platform that does not support shader jit");
Helpers::panic("Shader JIT: Tried to run ShaderJIT::Run on platform that does not support shader jit");
}
// Define dummy callback. This should never be called if the shader JIT is not supported

View file

@ -37,6 +37,8 @@ class ShaderEmitter : private oaknut::CodeBlock, public oaknut::CodeGenerator {
// Shows whether the loaded shader has any log2 and exp2 instructions
bool codeHasLog2 = false;
bool codeHasExp2 = false;
// Whether to compile this shader using accurate, safe, non-IEEE multiplication (slow) or faster but less accurate mul
bool useSafeMUL = false;
oaknut::Label log2Func, exp2Func;
oaknut::Label emitLog2Func();
@ -123,7 +125,7 @@ class ShaderEmitter : private oaknut::CodeBlock, public oaknut::CodeGenerator {
PrologueCallback prologueCb = nullptr;
// Initialize our emitter with "allocSize" bytes of memory allocated for the code buffer
ShaderEmitter() : oaknut::CodeBlock(allocSize), oaknut::CodeGenerator(oaknut::CodeBlock::ptr()) {}
ShaderEmitter(bool useSafeMUL) : oaknut::CodeBlock(allocSize), oaknut::CodeGenerator(oaknut::CodeBlock::ptr()), useSafeMUL(useSafeMUL) {}
// PC must be a valid entrypoint here. It doesn't have that much overhead in this case, so we use std::array<>::at() to assert it does
InstructionCallback getInstructionCallback(u32 pc) { return getLabelPointer<InstructionCallback>(instructionLabels.at(pc)); }

View file

@ -32,6 +32,8 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
Label negateVector;
// Vector value of (1.0, 1.0, 1.0, 1.0) for SLT(i)/SGE(i)
Label onesVector;
// Vector value of (0xFF, 0xFF, 0xFF, 0) for setting the w component to 0 in DP3
Label dp3Vector;
u32 recompilerPC = 0; // PC the recompiler is currently recompiling @
u32 loopLevel = 0; // The current loop nesting level (0 = not in a loop)
@ -43,12 +45,17 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
// Shows whether the loaded shader has any log2 and exp2 instructions
bool codeHasLog2 = false;
bool codeHasExp2 = false;
// Whether to compile this shader using accurate, safe, non-IEEE multiplication (slow) or faster but less accurate mul
bool useSafeMUL = false;
Xbyak::Label log2Func, exp2Func;
Xbyak::Label emitLog2Func();
Xbyak::Label emitExp2Func();
Xbyak::util::Cpu cpuCaps;
// Emit a PICA200-compliant multiplication that handles "0 * inf = 0"
void emitSafeMUL(Xbyak::Xmm src1, Xbyak::Xmm src2, Xbyak::Xmm scratch);
// Compile all instructions from [current recompiler PC, end)
void compileUntil(const PICAShader& shaderUnit, u32 endPC);
// Compile instruction "instr"
@ -125,7 +132,7 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
PrologueCallback prologueCb = nullptr;
// Initialize our emitter with "allocSize" bytes of RWX memory
ShaderEmitter() : Xbyak::CodeGenerator(allocSize) {
ShaderEmitter(bool useSafeMUL) : Xbyak::CodeGenerator(allocSize), useSafeMUL(useSafeMUL) {
cpuCaps = Xbyak::util::Cpu();
haveSSE4_1 = cpuCaps.has(Xbyak::util::Cpu::tSSE41);

View file

@ -92,6 +92,9 @@ class GPU {
// Set to false by the renderer when the lighting_lut is uploaded ot the GPU
bool lightingLUTDirty = false;
bool fogLUTDirty = false;
std::array<uint32_t, 128> fogLUT;
GPU(Memory& mem, EmulatorConfig& config);
void display() { renderer->display(); }
void screenshot(const std::string& name) { renderer->screenshot(name); }
@ -164,7 +167,8 @@ class GPU {
u32 index = paddr - PhysicalAddrs::VRAM;
return (T*)&vram[index];
} else [[unlikely]] {
Helpers::panic("[GPU] Tried to access unknown physical address: %08X", paddr);
Helpers::warn("[GPU] Tried to access unknown physical address: %08X", paddr);
return nullptr;
}
}

View file

@ -0,0 +1,258 @@
#pragma once
#include <array>
#include <cstring>
#include <type_traits>
#include <unordered_map>
#include "PICA/pica_hash.hpp"
#include "PICA/regs.hpp"
#include "bitfield.hpp"
#include "helpers.hpp"
namespace PICA {
struct OutputConfig {
union {
u32 raw{};
// Merge the enable + compare function into 1 field to avoid duplicate shaders
// enable == off means a CompareFunction of Always
BitField<0, 3, CompareFunction> alphaTestFunction;
BitField<3, 1, u32> depthMapEnable;
};
};
struct TextureConfig {
u32 texUnitConfig;
u32 texEnvUpdateBuffer;
// There's 6 TEV stages, and each one is configured via 4 word-sized registers
// (+ the constant color register, which we don't include here, otherwise we'd generate too many shaders)
std::array<u32, 4 * 6> tevConfigs;
};
struct FogConfig {
union {
u32 raw{};
BitField<0, 3, FogMode> mode;
BitField<3, 1, u32> flipDepth;
BitField<8, 8, u32> fogColorR;
BitField<16, 8, u32> fogColorG;
BitField<24, 8, u32> fogColorB;
};
};
struct Light {
union {
u16 raw;
BitField<0, 3, u16> num;
BitField<3, 1, u16> directional;
BitField<4, 1, u16> twoSidedDiffuse;
BitField<5, 1, u16> distanceAttenuationEnable;
BitField<6, 1, u16> spotAttenuationEnable;
BitField<7, 1, u16> geometricFactor0;
BitField<8, 1, u16> geometricFactor1;
BitField<9, 1, u16> shadowEnable;
};
};
struct LightingLUTConfig {
union {
u32 raw;
BitField<0, 1, u32> enable;
BitField<1, 1, u32> absInput;
BitField<2, 3, u32> type;
BitField<5, 3, u32> scale;
};
};
struct LightingConfig {
union {
u32 raw{};
BitField<0, 1, u32> enable;
BitField<1, 4, u32> lightNum;
BitField<5, 2, u32> bumpMode;
BitField<7, 2, u32> bumpSelector;
BitField<9, 1, u32> bumpRenorm;
BitField<10, 1, u32> clampHighlights;
BitField<11, 4, u32> config;
BitField<15, 1, u32> enablePrimaryAlpha;
BitField<16, 1, u32> enableSecondaryAlpha;
BitField<17, 1, u32> enableShadow;
BitField<18, 1, u32> shadowPrimary;
BitField<19, 1, u32> shadowSecondary;
BitField<20, 1, u32> shadowInvert;
BitField<21, 1, u32> shadowAlpha;
BitField<22, 2, u32> shadowSelector;
};
std::array<LightingLUTConfig, 7> luts{};
std::array<Light, 8> lights{};
LightingConfig(const std::array<u32, 0x300>& regs) {
// Ignore lighting registers if it's disabled
if ((regs[InternalRegs::LightingEnable] & 1) == 0) {
return;
}
const u32 config0 = regs[InternalRegs::LightConfig0];
const u32 config1 = regs[InternalRegs::LightConfig1];
const u32 totalLightCount = Helpers::getBits<0, 3>(regs[InternalRegs::LightNumber]) + 1;
enable = 1;
lightNum = totalLightCount;
enableShadow = Helpers::getBit<0>(config0);
if (enableShadow) [[unlikely]] {
shadowPrimary = Helpers::getBit<16>(config0);
shadowSecondary = Helpers::getBit<17>(config0);
shadowInvert = Helpers::getBit<18>(config0);
shadowAlpha = Helpers::getBit<19>(config0);
shadowSelector = Helpers::getBits<24, 2>(config0);
}
enablePrimaryAlpha = Helpers::getBit<2>(config0);
enableSecondaryAlpha = Helpers::getBit<3>(config0);
config = Helpers::getBits<4, 4>(config0);
bumpSelector = Helpers::getBits<22, 2>(config0);
clampHighlights = Helpers::getBit<27>(config0);
bumpMode = Helpers::getBits<28, 2>(config0);
bumpRenorm = Helpers::getBit<30>(config0) ^ 1; // 0 = enable so flip it with xor
for (int i = 0; i < totalLightCount; i++) {
auto& light = lights[i];
light.num = (regs[InternalRegs::LightPermutation] >> (i * 4)) & 0x7;
const u32 lightConfig = regs[InternalRegs::Light0Config + 0x10 * light.num];
light.directional = Helpers::getBit<0>(lightConfig);
light.twoSidedDiffuse = Helpers::getBit<1>(lightConfig);
light.geometricFactor0 = Helpers::getBit<2>(lightConfig);
light.geometricFactor1 = Helpers::getBit<3>(lightConfig);
light.shadowEnable = ((config1 >> light.num) & 1) ^ 1; // This also does 0 = enabled
light.spotAttenuationEnable = ((config1 >> (8 + light.num)) & 1) ^ 1; // Same here
light.distanceAttenuationEnable = ((config1 >> (24 + light.num)) & 1) ^ 1; // Of course same here
}
LightingLUTConfig& d0 = luts[Lights::LUT_D0];
LightingLUTConfig& d1 = luts[Lights::LUT_D1];
LightingLUTConfig& sp = luts[spotlightLutIndex];
LightingLUTConfig& fr = luts[Lights::LUT_FR];
LightingLUTConfig& rb = luts[Lights::LUT_RB];
LightingLUTConfig& rg = luts[Lights::LUT_RG];
LightingLUTConfig& rr = luts[Lights::LUT_RR];
d0.enable = Helpers::getBit<16>(config1) == 0;
d1.enable = Helpers::getBit<17>(config1) == 0;
fr.enable = Helpers::getBit<19>(config1) == 0;
rb.enable = Helpers::getBit<20>(config1) == 0;
rg.enable = Helpers::getBit<21>(config1) == 0;
rr.enable = Helpers::getBit<22>(config1) == 0;
sp.enable = 1;
const u32 lutAbs = regs[InternalRegs::LightLUTAbs];
const u32 lutSelect = regs[InternalRegs::LightLUTSelect];
const u32 lutScale = regs[InternalRegs::LightLUTScale];
if (d0.enable) {
d0.absInput = Helpers::getBit<1>(lutAbs) == 0;
d0.type = Helpers::getBits<0, 3>(lutSelect);
d0.scale = Helpers::getBits<0, 3>(lutScale);
}
if (d1.enable) {
d1.absInput = Helpers::getBit<5>(lutAbs) == 0;
d1.type = Helpers::getBits<4, 3>(lutSelect);
d1.scale = Helpers::getBits<4, 3>(lutScale);
}
sp.absInput = Helpers::getBit<9>(lutAbs) == 0;
sp.type = Helpers::getBits<8, 3>(lutSelect);
sp.scale = Helpers::getBits<8, 3>(lutScale);
if (fr.enable) {
fr.absInput = Helpers::getBit<13>(lutAbs) == 0;
fr.type = Helpers::getBits<12, 3>(lutSelect);
fr.scale = Helpers::getBits<12, 3>(lutScale);
}
if (rb.enable) {
rb.absInput = Helpers::getBit<17>(lutAbs) == 0;
rb.type = Helpers::getBits<16, 3>(lutSelect);
rb.scale = Helpers::getBits<16, 3>(lutScale);
}
if (rg.enable) {
rg.absInput = Helpers::getBit<21>(lutAbs) == 0;
rg.type = Helpers::getBits<20, 3>(lutSelect);
rg.scale = Helpers::getBits<20, 3>(lutScale);
}
if (rr.enable) {
rr.absInput = Helpers::getBit<25>(lutAbs) == 0;
rr.type = Helpers::getBits<24, 3>(lutSelect);
rr.scale = Helpers::getBits<24, 3>(lutScale);
}
}
};
// Config used for identifying unique fragment pipeline configurations
struct FragmentConfig {
OutputConfig outConfig;
TextureConfig texConfig;
FogConfig fogConfig;
LightingConfig lighting;
bool operator==(const FragmentConfig& config) const {
// Hash function and equality operator required by std::unordered_map
return std::memcmp(this, &config, sizeof(FragmentConfig)) == 0;
}
FragmentConfig(const std::array<u32, 0x300>& regs) : lighting(regs) {
auto alphaTestConfig = regs[InternalRegs::AlphaTestConfig];
auto alphaTestFunction = Helpers::getBits<4, 3>(alphaTestConfig);
outConfig.alphaTestFunction =
(alphaTestConfig & 1) ? static_cast<PICA::CompareFunction>(alphaTestFunction) : PICA::CompareFunction::Always;
outConfig.depthMapEnable = regs[InternalRegs::DepthmapEnable] & 1;
texConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg];
texConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer];
// Set up TEV stages. Annoyingly we can't just memcpy as the TEV registers are arranged like
// {Source, Operand, Combiner, Color, Scale} and we want to skip the color register since it's uploaded via UBO
#define setupTevStage(stage) \
std::memcpy(&texConfig.tevConfigs[stage * 4], &regs[InternalRegs::TexEnv##stage##Source], 3 * sizeof(u32)); \
texConfig.tevConfigs[stage * 4 + 3] = regs[InternalRegs::TexEnv##stage##Source + 4];
setupTevStage(0);
setupTevStage(1);
setupTevStage(2);
setupTevStage(3);
setupTevStage(4);
setupTevStage(5);
#undef setupTevStage
fogConfig.mode = (FogMode)Helpers::getBits<0, 3>(regs[InternalRegs::TexEnvUpdateBuffer]);
if (fogConfig.mode == FogMode::Fog) {
fogConfig.flipDepth = Helpers::getBit<16>(regs[InternalRegs::TexEnvUpdateBuffer]);
fogConfig.fogColorR = Helpers::getBits<0, 8>(regs[InternalRegs::FogColor]);
fogConfig.fogColorG = Helpers::getBits<8, 8>(regs[InternalRegs::FogColor]);
fogConfig.fogColorB = Helpers::getBits<16, 8>(regs[InternalRegs::FogColor]);
}
}
};
static_assert(
std::has_unique_object_representations<OutputConfig>() && std::has_unique_object_representations<TextureConfig>() &&
std::has_unique_object_representations<FogConfig>() && std::has_unique_object_representations<Light>()
);
} // namespace PICA
// Override std::hash for our fragment config class
template <>
struct std::hash<PICA::FragmentConfig> {
std::size_t operator()(const PICA::FragmentConfig& config) const noexcept { return PICAHash::computeHash((const char*)&config, sizeof(config)); }
};

View file

@ -0,0 +1,45 @@
#pragma once
#include <array>
#include <cstddef>
#include <type_traits>
#include "helpers.hpp"
namespace PICA {
struct LightUniform {
using vec3 = std::array<float, 3>;
// std140 requires vec3s be aligned to 16 bytes
alignas(16) vec3 specular0;
alignas(16) vec3 specular1;
alignas(16) vec3 diffuse;
alignas(16) vec3 ambient;
alignas(16) vec3 position;
alignas(16) vec3 spotlightDirection;
float distanceAttenuationBias;
float distanceAttenuationScale;
};
struct FragmentUniforms {
using vec3 = std::array<float, 3>;
using vec4 = std::array<float, 4>;
static constexpr usize tevStageCount = 6;
s32 alphaReference;
float depthScale;
float depthOffset;
alignas(16) vec4 constantColors[tevStageCount];
alignas(16) vec4 tevBufferColor;
alignas(16) vec4 clipCoords;
// Note: We upload this as a u32 and decode on GPU
u32 globalAmbientLight;
// NOTE: THIS MUST BE LAST so that if lighting is disabled we can potentially omit uploading it
LightUniform lightUniforms[8];
};
// Assert that lightUniforms is the last member of the structure
static_assert(offsetof(FragmentUniforms, lightUniforms) + 8 * sizeof(LightUniform) == sizeof(FragmentUniforms));
} // namespace PICA

View file

@ -51,6 +51,18 @@ namespace PICA {
#undef defineTexEnv
// clang-format on
// Fog registers
FogColor = 0xE1,
FogLUTIndex = 0xE6,
FogLUTData0 = 0xE8,
FogLUTData1 = 0xE9,
FogLUTData2 = 0xEA,
FogLUTData3 = 0xEB,
FogLUTData4 = 0xEC,
FogLUTData5 = 0xED,
FogLUTData6 = 0xEE,
FogLUTData7 = 0xEF,
// Framebuffer registers
ColourOperation = 0x100,
BlendFunc = 0x101,
@ -67,7 +79,29 @@ namespace PICA {
ColourBufferLoc = 0x11D,
FramebufferSize = 0x11E,
//LightingRegs
// Lighting registers
LightingEnable = 0x8F,
Light0Specular0 = 0x140,
Light0Specular1 = 0x141,
Light0Diffuse = 0x142,
Light0Ambient = 0x143,
Light0XY = 0x144,
Light0Z = 0x145,
Light0SpotlightXY = 0x146,
Light0SpotlightZ = 0x147,
Light0Config = 0x149,
Light0AttenuationBias = 0x14A,
Light0AttenuationScale = 0x14B,
LightGlobalAmbient = 0x1C0,
LightNumber = 0x1C2,
LightConfig0 = 0x1C3,
LightConfig1 = 0x1C4,
LightPermutation = 0x1D9,
LightLUTAbs = 0x1D0,
LightLUTSelect = 0x1D1,
LightLUTScale = 0x1D2,
LightingLUTIndex = 0x01C5,
LightingLUTData0 = 0x01C8,
LightingLUTData1 = 0x01C9,
@ -231,7 +265,8 @@ namespace PICA {
enum : u32 {
LUT_D0 = 0,
LUT_D1,
LUT_FR,
// LUT 2 is not used, the emulator internally uses it for referring to the current source's spotlight in shaders
LUT_FR = 0x3,
LUT_RB,
LUT_RG,
LUT_RR,
@ -255,6 +290,11 @@ namespace PICA {
};
}
// There's actually 8 different LUTs (SP0-SP7), one for each light with different indices (8-15)
// We use an unused LUT value for "this light source's spotlight" instead and figure out which light source to use in compileLutLookup
// This is particularly intuitive in several places, such as checking if a LUT is enabled
static constexpr int spotlightLutIndex = 2;
enum class TextureFmt : u32 {
RGBA8 = 0x0,
RGB8 = 0x1,
@ -345,4 +385,137 @@ namespace PICA {
GeometryPrimitive = 3,
};
enum class CompareFunction : u32 {
Never = 0,
Always = 1,
Equal = 2,
NotEqual = 3,
Less = 4,
LessOrEqual = 5,
Greater = 6,
GreaterOrEqual = 7,
};
enum class FogMode : u32 {
Disabled = 0,
Fog = 5,
Gas = 7,
};
struct TexEnvConfig {
enum class Source : u8 {
PrimaryColor = 0x0,
PrimaryFragmentColor = 0x1,
SecondaryFragmentColor = 0x2,
Texture0 = 0x3,
Texture1 = 0x4,
Texture2 = 0x5,
Texture3 = 0x6,
// TODO: Inbetween values are unknown
PreviousBuffer = 0xD,
Constant = 0xE,
Previous = 0xF,
};
enum class ColorOperand : u8 {
SourceColor = 0x0,
OneMinusSourceColor = 0x1,
SourceAlpha = 0x2,
OneMinusSourceAlpha = 0x3,
SourceRed = 0x4,
OneMinusSourceRed = 0x5,
// TODO: Inbetween values are unknown
SourceGreen = 0x8,
OneMinusSourceGreen = 0x9,
// Inbetween values are unknown
SourceBlue = 0xC,
OneMinusSourceBlue = 0xD,
};
enum class AlphaOperand : u8 {
SourceAlpha = 0x0,
OneMinusSourceAlpha = 0x1,
SourceRed = 0x2,
OneMinusSourceRed = 0x3,
SourceGreen = 0x4,
OneMinusSourceGreen = 0x5,
SourceBlue = 0x6,
OneMinusSourceBlue = 0x7,
};
enum class Operation : u8 {
Replace = 0,
Modulate = 1,
Add = 2,
AddSigned = 3,
Lerp = 4,
Subtract = 5,
Dot3RGB = 6,
Dot3RGBA = 7,
MultiplyAdd = 8,
AddMultiply = 9,
};
// RGB sources
Source colorSource1, colorSource2, colorSource3;
// Alpha sources
Source alphaSource1, alphaSource2, alphaSource3;
// RGB operands
ColorOperand colorOperand1, colorOperand2, colorOperand3;
// Alpha operands
AlphaOperand alphaOperand1, alphaOperand2, alphaOperand3;
// Texture environment operations for this stage
Operation colorOp, alphaOp;
u32 constColor;
private:
// These are the only private members since their value doesn't actually reflect the scale
// So we make them public so we'll always use the appropriate member functions instead
u8 colorScale;
u8 alphaScale;
public:
// Create texture environment object from TEV registers
TexEnvConfig(u32 source, u32 operand, u32 combiner, u32 color, u32 scale) : constColor(color) {
colorSource1 = Helpers::getBits<0, 4, Source>(source);
colorSource2 = Helpers::getBits<4, 4, Source>(source);
colorSource3 = Helpers::getBits<8, 4, Source>(source);
alphaSource1 = Helpers::getBits<16, 4, Source>(source);
alphaSource2 = Helpers::getBits<20, 4, Source>(source);
alphaSource3 = Helpers::getBits<24, 4, Source>(source);
colorOperand1 = Helpers::getBits<0, 4, ColorOperand>(operand);
colorOperand2 = Helpers::getBits<4, 4, ColorOperand>(operand);
colorOperand3 = Helpers::getBits<8, 4, ColorOperand>(operand);
alphaOperand1 = Helpers::getBits<12, 3, AlphaOperand>(operand);
alphaOperand2 = Helpers::getBits<16, 3, AlphaOperand>(operand);
alphaOperand3 = Helpers::getBits<20, 3, AlphaOperand>(operand);
colorOp = Helpers::getBits<0, 4, Operation>(combiner);
alphaOp = Helpers::getBits<16, 4, Operation>(combiner);
colorScale = Helpers::getBits<0, 2>(scale);
alphaScale = Helpers::getBits<16, 2>(scale);
}
u32 getColorScale() { return (colorScale <= 2) ? (1 << colorScale) : 1; }
u32 getAlphaScale() { return (alphaScale <= 2) ? (1 << alphaScale) : 1; }
bool isPassthroughStage() {
// clang-format off
// Thank you to the Citra dev that wrote this out
return (
colorOp == Operation::Replace && alphaOp == Operation::Replace &&
colorSource1 == Source::Previous && alphaSource1 == Source::Previous &&
colorOperand1 == ColorOperand::SourceColor && alphaOperand1 == AlphaOperand::SourceAlpha &&
getColorScale() == 1 && getAlphaScale() == 1
);
// clang-format on
}
};
} // namespace PICA

View file

@ -1,6 +1,8 @@
#pragma once
#include <algorithm>
#include <array>
#include <cassert>
#include <cstddef>
#include <cstring>
#include "PICA/float_types.hpp"
@ -90,9 +92,12 @@ class PICAShader {
public:
// These are placed close to the temp registers and co because it helps the JIT generate better code
u32 entrypoint = 0; // Initial shader PC
u32 boolUniform;
std::array<std::array<u8, 4>, 4> intUniforms;
// We want these registers in this order & with this alignment for uploading them directly to a UBO
// When emulating shaders on the GPU. Plus this alignment for float uniforms is necessary for doing SIMD in the shader->CPU recompilers.
alignas(16) std::array<vec4f, 96> floatUniforms;
alignas(16) std::array<std::array<u8, 4>, 4> intUniforms;
u32 boolUniform;
alignas(16) std::array<vec4f, 16> fixedAttributes; // Fixed vertex attributes
alignas(16) std::array<vec4f, 16> inputs; // Attributes passed to the shader
@ -220,13 +225,9 @@ class PICAShader {
public:
static constexpr size_t maxInstructionCount = 4096;
std::array<u32, maxInstructionCount> loadedShader; // Currently loaded & active shader
std::array<u32, maxInstructionCount> bufferedShader; // Shader to be transferred when the SH_CODETRANSFER_END reg gets written to
PICAShader(ShaderType type) : type(type) {}
// Theese functions are in the header to be inlined more easily, though with LTO I hope I'll be able to move them
void finalize() { std::memcpy(&loadedShader[0], &bufferedShader[0], 4096 * sizeof(u32)); }
void setBufferIndex(u32 index) { bufferIndex = index & 0xfff; }
void setOpDescriptorIndex(u32 index) { opDescriptorIndex = index & 0x7f; }
@ -235,7 +236,7 @@ class PICAShader {
Helpers::panic("o no, shader upload overflew");
}
bufferedShader[bufferIndex++] = word;
loadedShader[bufferIndex++] = word;
bufferIndex &= 0xfff;
codeHashDirty = true; // Signal the JIT if necessary that the program hash has potentially changed
@ -295,4 +296,9 @@ class PICAShader {
Hash getCodeHash();
Hash getOpdescHash();
};
};
static_assert(
offsetof(PICAShader, intUniforms) == offsetof(PICAShader, floatUniforms) + 96 * sizeof(float) * 4 &&
offsetof(PICAShader, boolUniform) == offsetof(PICAShader, intUniforms) + 4 * sizeof(u8) * 4
);

View file

@ -0,0 +1,112 @@
#pragma once
#include <set>
#include <string>
#include <tuple>
#include <map>
#include <vector>
#include "PICA/shader.hpp"
#include "PICA/shader_gen_types.hpp"
struct EmulatorConfig;
namespace PICA::ShaderGen {
// Control flow analysis is partially based on
// https://github.com/PabloMK7/citra/blob/d0179559466ff09731d74474322ee880fbb44b00/src/video_core/shader/generator/glsl_shader_decompiler.cpp#L33
struct ControlFlow {
// A continuous range of addresses
struct AddressRange {
u32 start, end;
AddressRange(u32 start, u32 end) : start(start), end(end) {}
// Use lexicographic comparison for functions in order to sort them in a set
bool operator<(const AddressRange& other) const { return std::tie(start, end) < std::tie(other.start, other.end); }
};
struct Function {
using Labels = std::set<u32>;
enum class ExitMode {
Unknown, // Can't guarantee whether we'll exit properly, fall back to CPU shaders (can happen with jmp shenanigans)
AlwaysReturn, // All paths reach the return point.
Conditional, // One or more code paths reach the return point or an END instruction conditionally.
AlwaysEnd, // All paths reach an END instruction.
};
u32 start; // Starting PC of the function
u32 end; // End PC of the function
Labels outLabels{}; // Labels this function can "goto" (jump) to
ExitMode exitMode = ExitMode::Unknown;
explicit Function(u32 start, u32 end) : start(start), end(end) {}
bool operator<(const Function& other) const { return AddressRange(start, end) < AddressRange(other.start, other.end); }
std::string getIdentifier() const { return "func_" + std::to_string(start) + "_to_" + std::to_string(end); }
std::string getForwardDecl() const { return "void " + getIdentifier() + "();\n"; }
std::string getCallStatement() const { return getIdentifier() + "()"; }
};
std::set<Function> functions{};
std::map<AddressRange, Function::ExitMode> exitMap{};
// Tells us whether analysis of the shader we're trying to compile failed, in which case we'll need to fail back to shader emulation
// On the CPU
bool analysisFailed = false;
// This will recursively add all functions called by the function too, as analyzeFunction will call addFunction on control flow instructions
const Function* addFunction(const PICAShader& shader, u32 start, u32 end) {
auto searchIterator = functions.find(Function(start, end));
if (searchIterator != functions.end()) {
return &(*searchIterator);
}
// Add this function and analyze it if it doesn't already exist
Function function(start, end);
function.exitMode = analyzeFunction(shader, start, end, function.outLabels);
// This function could not be fully analyzed, report failure
if (function.exitMode == Function::ExitMode::Unknown) {
analysisFailed = true;
return nullptr;
}
// Add function to our function list
auto [it, added] = functions.insert(std::move(function));
return &(*it);
}
void analyze(const PICAShader& shader, u32 entrypoint);
Function::ExitMode analyzeFunction(const PICAShader& shader, u32 start, u32 end, Function::Labels& labels);
};
class ShaderDecompiler {
using AddressRange = ControlFlow::AddressRange;
using Function = ControlFlow::Function;
ControlFlow controlFlow{};
PICAShader& shader;
EmulatorConfig& config;
std::string decompiledShader;
u32 entrypoint;
API api;
Language language;
void compileInstruction(u32& pc, bool& finished);
void compileRange(const AddressRange& range);
void callFunction(const Function& function);
const Function* findFunction(const AddressRange& range);
void writeAttributes();
public:
ShaderDecompiler(PICAShader& shader, EmulatorConfig& config, u32 entrypoint, API api, Language language)
: shader(shader), entrypoint(entrypoint), config(config), api(api), language(language), decompiledShader("") {}
std::string decompile();
};
std::string decompileShader(PICAShader& shader, EmulatorConfig& config, u32 entrypoint, API api, Language language);
} // namespace PICA::ShaderGen

View file

@ -0,0 +1,39 @@
#pragma once
#include <string>
#include "PICA/gpu.hpp"
#include "PICA/pica_frag_config.hpp"
#include "PICA/regs.hpp"
#include "PICA/shader_gen_types.hpp"
#include "helpers.hpp"
namespace PICA::ShaderGen {
class FragmentGenerator {
API api;
Language language;
void compileTEV(std::string& shader, int stage, const PICA::FragmentConfig& config);
void getSource(std::string& shader, PICA::TexEnvConfig::Source source, int index, const PICA::FragmentConfig& config);
void getColorOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::ColorOperand color, int index, const PICA::FragmentConfig& config);
void getAlphaOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::AlphaOperand alpha, int index, const PICA::FragmentConfig& config);
void getColorOperation(std::string& shader, PICA::TexEnvConfig::Operation op);
void getAlphaOperation(std::string& shader, PICA::TexEnvConfig::Operation op);
void applyAlphaTest(std::string& shader, const PICA::FragmentConfig& config);
void compileLights(std::string& shader, const PICA::FragmentConfig& config);
void compileLUTLookup(std::string& shader, const PICA::FragmentConfig& config, u32 lightIndex, u32 lutID);
bool isSamplerEnabled(u32 environmentID, u32 lutID);
void compileFog(std::string& shader, const PICA::FragmentConfig& config);
public:
FragmentGenerator(API api, Language language) : api(api), language(language) {}
std::string generate(const PICA::FragmentConfig& config);
std::string getDefaultVertexShader();
void setTarget(API api, Language language) {
this->api = api;
this->language = language;
}
};
}; // namespace PICA::ShaderGen

View file

@ -0,0 +1,9 @@
#pragma once
namespace PICA::ShaderGen {
// Graphics API this shader is targetting
enum class API { GL, GLES, Vulkan };
// Shading language to use (Only GLSL for the time being)
enum class Language { GLSL };
} // namespace PICA::ShaderGen

View file

@ -176,6 +176,7 @@ namespace Audio {
// Decode an entire buffer worth of audio
void decodeBuffer(DSPSource& source);
SampleBuffer decodePCM8(const u8* data, usize sampleCount, Source& source);
SampleBuffer decodePCM16(const u8* data, usize sampleCount, Source& source);
SampleBuffer decodeADPCM(const u8* data, usize sampleCount, Source& source);

View file

@ -7,14 +7,29 @@
// Remember to initialize every field here to its default value otherwise bad things will happen
struct EmulatorConfig {
// Only enable the shader JIT by default on platforms where it's completely tested
#ifdef PANDA3DS_X64_HOST
#if defined(PANDA3DS_X64_HOST) || defined(PANDA3DS_ARM64_HOST)
static constexpr bool shaderJitDefault = true;
#else
static constexpr bool shaderJitDefault = false;
#endif
// For now, use specialized shaders by default on MacOS as M1 drivers are buggy when using the ubershader, and on Android since mobile GPUs are
// horrible. On other platforms we default to ubershader + shadergen fallback for lights
#if defined(__ANDROID__) || defined(__APPLE__)
static constexpr bool ubershaderDefault = false;
#else
static constexpr bool ubershaderDefault = true;
#endif
bool shaderJitEnabled = shaderJitDefault;
bool discordRpcEnabled = false;
bool useUbershaders = ubershaderDefault;
bool accurateShaderMul = false;
// Toggles whether to force shadergen when there's more than N lights active and we're using the ubershader, for better performance
bool forceShadergenForLights = true;
int lightShadergenThreshold = 1;
RendererType rendererType = RendererType::OpenGL;
Audio::DSPCore::Type dspType = Audio::DSPCore::Type::Null;
@ -36,4 +51,4 @@ struct EmulatorConfig {
EmulatorConfig(const std::filesystem::path& path);
void load();
void save();
};
};

View file

@ -15,6 +15,7 @@
#include "services/service_manager.hpp"
class CPU;
struct Scheduler;
class Kernel {
std::span<u32, 16> regs;
@ -243,6 +244,7 @@ public:
}
ServiceManager& getServiceManager() { return serviceManager; }
Scheduler& getScheduler();
void sendGPUInterrupt(GPUInterrupt type) { serviceManager.sendGPUInterrupt(type); }
void clearInstructionCache();

View file

@ -50,6 +50,7 @@ struct NCCH {
static constexpr u64 mediaUnit = 0x200;
u64 size = 0; // Size of NCCH converted to bytes
u64 saveDataSize = 0;
u32 stackSize = 0;
u32 bssSize = 0;
u32 exheaderSize = 0;
@ -60,10 +61,10 @@ struct NCCH {
CodeSetInfo text, data, rodata;
FSInfo partitionInfo;
std::optional<Crypto::AESKey> primaryKey, secondaryKey;
// Contents of the .code file in the ExeFS
std::vector<u8> codeFile;
// Contains of the cart's save data
std::vector<u8> saveData;
// The cart region. Only the CXI's region matters to us. Necessary to get past region locking
std::optional<Regions> region = std::nullopt;
std::vector<u8> smdh;
@ -76,7 +77,7 @@ struct NCCH {
bool hasExeFS() { return exeFS.size != 0; }
bool hasRomFS() { return romFS.size != 0; }
bool hasCode() { return codeFile.size() != 0; }
bool hasSaveData() { return saveData.size() != 0; }
bool hasSaveData() { return saveDataSize != 0; }
// Parse SMDH for region info and such. Returns false on failure, true on success
bool parseSMDH(const std::vector<u8> &smdh);

View file

@ -19,6 +19,7 @@
#include "panda_qt/config_window.hpp"
#include "panda_qt/patch_window.hpp"
#include "panda_qt/screen.hpp"
#include "panda_qt/shader_editor.hpp"
#include "panda_qt/text_editor.hpp"
#include "services/hid.hpp"
@ -48,6 +49,7 @@ class MainWindow : public QMainWindow {
EditCheat,
PressTouchscreen,
ReleaseTouchscreen,
ReloadUbershader,
};
// Tagged union representing our message queue messages
@ -99,6 +101,7 @@ class MainWindow : public QMainWindow {
CheatsWindow* cheatsEditor;
TextEditorWindow* luaEditor;
PatchWindow* patchWindow;
ShaderEditorWindow* shaderEditor;
// We use SDL's game controller API since it's the sanest API that supports as many controllers as possible
SDL_GameController* gameController = nullptr;
@ -110,9 +113,6 @@ class MainWindow : public QMainWindow {
void selectROM();
void dumpDspFirmware();
void dumpRomFS();
void openLuaEditor();
void openCheatsEditor();
void openPatchWindow();
void showAboutMenu();
void initControllers();
void pollControllers();
@ -139,5 +139,6 @@ class MainWindow : public QMainWindow {
void mouseReleaseEvent(QMouseEvent* event) override;
void loadLuaScript(const std::string& code);
void reloadShader(const std::string& shader);
void editCheat(u32 handle, const std::vector<uint8_t>& cheat, const std::function<void(u32)>& callback);
};

View file

@ -0,0 +1,27 @@
#pragma once
#include <QApplication>
#include <QDialog>
#include <QWidget>
#include <string>
#include "zep.h"
#include "zep/mode_repl.h"
#include "zep/regress.h"
class ShaderEditorWindow : public QDialog {
Q_OBJECT
private:
Zep::ZepWidget_Qt zepWidget;
Zep::IZepReplProvider replProvider;
static constexpr float fontSize = 14.0f;
public:
// Whether this backend supports shader editor
bool supported = true;
ShaderEditorWindow(QWidget* parent, const std::string& filename, const std::string& initialText);
void setText(const std::string& text) { zepWidget.GetEditor().GetMRUBuffer()->SetText(text); }
void setEnable(bool enable);
};

View file

@ -1,6 +1,7 @@
#pragma once
#include <array>
#include <span>
#include <string>
#include <optional>
#include "PICA/pica_vertex.hpp"
@ -19,6 +20,7 @@ enum class RendererType : s8 {
Software = 3,
};
struct EmulatorConfig;
class GPU;
struct SDL_Window;
@ -45,6 +47,8 @@ class Renderer {
u32 outputWindowWidth = 400;
u32 outputWindowHeight = 240 * 2;
EmulatorConfig* emulatorConfig = nullptr;
public:
Renderer(GPU& gpu, const std::array<u32, regNum>& internalRegs, const std::array<u32, extRegNum>& externalRegs);
virtual ~Renderer();
@ -66,6 +70,15 @@ class Renderer {
// This function does things like write back or cache necessary state before we delete our context
virtual void deinitGraphicsContext() = 0;
// Functions for hooking up the renderer core to the frontend's shader editor for editing ubershaders in real time
// SupportsShaderReload: Indicates whether the backend offers ubershader reload support or not
// GetUbershader/SetUbershader: Gets or sets the renderer's current ubershader
virtual bool supportsShaderReload() { return false; }
virtual std::string getUbershader() { return ""; }
virtual void setUbershader(const std::string& shader) {}
virtual void setUbershaderSetting(bool value) {}
// Functions for initializing the graphics context for the Qt frontend, where we don't have the convenience of SDL_Window
#ifdef PANDA3DS_FRONTEND_QT
virtual void initGraphicsContext(GL::Context* context) { Helpers::panic("Tried to initialize incompatible renderer with GL context"); }
@ -91,4 +104,6 @@ class Renderer {
outputWindowWidth = width;
outputWindowHeight = height;
}
void setConfig(EmulatorConfig* config) { emulatorConfig = config; }
};

View file

@ -40,9 +40,13 @@ struct GLStateManager {
GLuint boundVAO;
GLuint boundVBO;
GLuint currentProgram;
GLuint boundUBO;
GLenum depthFunc;
GLenum logicOp;
GLenum blendEquationRGB, blendEquationAlpha;
GLenum blendFuncSourceRGB, blendFuncSourceAlpha;
GLenum blendFuncDestRGB, blendFuncDestAlpha;
void reset();
void resetBlend();
@ -51,7 +55,7 @@ struct GLStateManager {
void resetColourMask();
void resetDepth();
void resetVAO();
void resetVBO();
void resetBuffers();
void resetProgram();
void resetScissor();
void resetStencil();
@ -183,6 +187,13 @@ struct GLStateManager {
}
}
void bindUBO(GLuint handle) {
if (boundUBO != handle) {
boundUBO = handle;
glBindBuffer(GL_UNIFORM_BUFFER, boundUBO);
}
}
void bindVAO(const OpenGL::VertexArray& vao) { bindVAO(vao.handle()); }
void bindVBO(const OpenGL::VertexBuffer& vbo) { bindVBO(vbo.handle()); }
void useProgram(const OpenGL::Program& program) { useProgram(program.handle()); }
@ -224,6 +235,41 @@ struct GLStateManager {
}
void setDepthFunc(OpenGL::DepthFunc func) { setDepthFunc(static_cast<GLenum>(func)); }
// Counterpart to glBlendEquationSeparate
void setBlendEquation(GLenum modeRGB, GLenum modeAlpha) {
if (blendEquationRGB != modeRGB || blendEquationAlpha != modeAlpha) {
blendEquationRGB = modeRGB;
blendEquationAlpha = modeAlpha;
glBlendEquationSeparate(modeRGB, modeAlpha);
}
}
// Counterpart to glBlendFuncSeparate
void setBlendFunc(GLenum sourceRGB, GLenum destRGB, GLenum sourceAlpha, GLenum destAlpha) {
if (blendFuncSourceRGB != sourceRGB || blendFuncDestRGB != destRGB || blendFuncSourceAlpha != sourceAlpha ||
blendFuncDestAlpha != destAlpha) {
blendFuncSourceRGB = sourceRGB;
blendFuncDestRGB = destRGB;
blendFuncSourceAlpha = sourceAlpha;
blendFuncDestAlpha = destAlpha;
glBlendFuncSeparate(sourceRGB, destRGB,sourceAlpha, destAlpha);
}
}
// Counterpart to regular glBlendEquation
void setBlendEquation(GLenum mode) { setBlendEquation(mode, mode); }
void setBlendEquation(OpenGL::BlendEquation modeRGB, OpenGL::BlendEquation modeAlpha) {
setBlendEquation(static_cast<GLenum>(modeRGB), static_cast<GLenum>(modeAlpha));
}
void setBlendEquation(OpenGL::BlendEquation mode) {
setBlendEquation(static_cast<GLenum>(mode));
}
};
static_assert(std::is_trivially_constructible<GLStateManager>(), "OpenGL State Manager class is not trivially constructible!");

View file

@ -1,11 +1,17 @@
#pragma once
#include <array>
#include <cstring>
#include <functional>
#include <span>
#include <unordered_map>
#include "PICA/float_types.hpp"
#include "PICA/pica_frag_config.hpp"
#include "PICA/pica_hash.hpp"
#include "PICA/pica_vertex.hpp"
#include "PICA/regs.hpp"
#include "PICA/shader_gen.hpp"
#include "gl_state.hpp"
#include "helpers.hpp"
#include "logger.hpp"
@ -24,21 +30,25 @@ class RendererGL final : public Renderer {
OpenGL::VertexArray vao;
OpenGL::VertexBuffer vbo;
bool enableUbershader = true;
// TEV configuration uniform locations
GLint textureEnvSourceLoc = -1;
GLint textureEnvOperandLoc = -1;
GLint textureEnvCombinerLoc = -1;
GLint textureEnvColorLoc = -1;
GLint textureEnvScaleLoc = -1;
// Data
struct {
// TEV configuration uniform locations
GLint textureEnvSourceLoc = -1;
GLint textureEnvOperandLoc = -1;
GLint textureEnvCombinerLoc = -1;
GLint textureEnvColorLoc = -1;
GLint textureEnvScaleLoc = -1;
// Uniform of PICA registers
GLint picaRegLoc = -1;
// Uniform of PICA registers
GLint picaRegLoc = -1;
// Depth configuration uniform locations
GLint depthOffsetLoc = -1;
GLint depthScaleLoc = -1;
GLint depthmapEnableLoc = -1;
// Depth configuration uniform locations
GLint depthOffsetLoc = -1;
GLint depthScaleLoc = -1;
GLint depthmapEnableLoc = -1;
} ubershaderData;
float oldDepthScale = -1.0;
float oldDepthOffset = 0.0;
@ -53,25 +63,39 @@ class RendererGL final : public Renderer {
OpenGL::VertexBuffer dummyVBO;
OpenGL::Texture screenTexture;
GLuint lightLUTTextureArray;
OpenGL::Texture LUTTexture;
OpenGL::Framebuffer screenFramebuffer;
OpenGL::Texture blankTexture;
// The "default" vertex shader to use when using specialized shaders but not PICA vertex shader -> GLSL recompilation
// We can compile this once and then link it with all other generated fragment shaders
OpenGL::Shader defaultShadergenVs;
// Cached recompiled fragment shader
struct CachedProgram {
OpenGL::Program program;
uint uboBinding;
};
std::unordered_map<PICA::FragmentConfig, CachedProgram> shaderCache;
OpenGL::Framebuffer getColourFBO();
OpenGL::Texture getTexture(Texture& tex);
OpenGL::Program& getSpecializedShader();
PICA::ShaderGen::FragmentGenerator fragShaderGen;
MAKE_LOG_FUNCTION(log, rendererLogger)
void setupBlending();
void setupStencilTest(bool stencilEnable);
void bindDepthBuffer();
void setupTextureEnvState();
void setupUbershaderTexEnv();
void bindTexturesToSlots();
void updateLightingLUT();
void updateFogLUT();
void initGraphicsContextInternal();
public:
RendererGL(GPU& gpu, const std::array<u32, regNum>& internalRegs, const std::array<u32, extRegNum>& externalRegs)
: Renderer(gpu, internalRegs, externalRegs) {}
: Renderer(gpu, internalRegs, externalRegs), fragShaderGen(PICA::ShaderGen::API::GL, PICA::ShaderGen::Language::GLSL) {}
~RendererGL() override;
void reset() override;
@ -82,12 +106,20 @@ class RendererGL final : public Renderer {
void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override;
void drawVertices(PICA::PrimType primType, std::span<const PICA::Vertex> vertices) override; // Draw the given vertices
void deinitGraphicsContext() override;
virtual bool supportsShaderReload() override { return true; }
virtual std::string getUbershader() override;
virtual void setUbershader(const std::string& shader) override;
virtual void setUbershaderSetting(bool value) override { enableUbershader = value; }
std::optional<ColourBuffer> getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true);
// Note: The caller is responsible for deleting the currently bound FBO before calling this
void setFBO(uint handle) { screenFramebuffer.m_handle = handle; }
void resetStateManager() { gl.reset(); }
void clearShaderCache();
void initUbershader(OpenGL::Program& program);
#ifdef PANDA3DS_FRONTEND_QT
virtual void initGraphicsContext([[maybe_unused]] GL::Context* context) override { initGraphicsContextInternal(); }
@ -95,4 +127,4 @@ class RendererGL final : public Renderer {
// Take a screenshot of the screen and store it in a file
void screenshot(const std::string& name) override;
};
};

View file

@ -11,7 +11,8 @@ struct Scheduler {
VBlank = 0, // End of frame event
UpdateTimers = 1, // Update kernel timer objects
RunDSP = 2, // Make the emulated DSP run for one audio frame
Panic = 3, // Dummy event that is always pending and should never be triggered (Timestamp = UINT64_MAX)
SignalY2R = 3, // Signal that a Y2R conversion has finished
Panic = 4, // Dummy event that is always pending and should never be triggered (Timestamp = UINT64_MAX)
TotalNumberOfEvents // How many event types do we have in total?
};
static constexpr usize totalNumberOfEvents = static_cast<usize>(EventType::TotalNumberOfEvents);

View file

@ -109,4 +109,5 @@ class ServiceManager {
HIDService& getHID() { return hid; }
NFCService& getNFC() { return nfc; }
DSPService& getDSP() { return dsp; }
Y2RService& getY2R() { return y2r; }
};

View file

@ -113,8 +113,12 @@ class Y2RService {
void startConversion(u32 messagePointer);
void stopConversion(u32 messagePointer);
public:
bool isBusy;
public:
Y2RService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel) {}
void reset();
void handleSyncRequest(u32 messagePointer);
void signalConversionDone();
};

View file

@ -62,6 +62,11 @@ void EmulatorConfig::load() {
shaderJitEnabled = toml::find_or<toml::boolean>(gpu, "EnableShaderJIT", shaderJitDefault);
vsyncEnabled = toml::find_or<toml::boolean>(gpu, "EnableVSync", true);
useUbershaders = toml::find_or<toml::boolean>(gpu, "UseUbershaders", ubershaderDefault);
accurateShaderMul = toml::find_or<toml::boolean>(gpu, "AccurateShaderMultiplication", false);
forceShadergenForLights = toml::find_or<toml::boolean>(gpu, "ForceShadergenForLighting", true);
lightShadergenThreshold = toml::find_or<toml::integer>(gpu, "ShadergenLightThreshold", 1);
}
}
@ -122,9 +127,15 @@ void EmulatorConfig::save() {
data["General"]["EnableDiscordRPC"] = discordRpcEnabled;
data["General"]["UsePortableBuild"] = usePortableBuild;
data["General"]["DefaultRomPath"] = defaultRomPath.string();
data["GPU"]["EnableShaderJIT"] = shaderJitEnabled;
data["GPU"]["Renderer"] = std::string(Renderer::typeToString(rendererType));
data["GPU"]["EnableVSync"] = vsyncEnabled;
data["GPU"]["AccurateShaderMultiplication"] = accurateShaderMul;
data["GPU"]["UseUbershaders"] = useUbershaders;
data["GPU"]["ForceShadergenForLighting"] = forceShadergenForLights;
data["GPU"]["ShadergenLightThreshold"] = lightShadergenThreshold;
data["Audio"]["DSPEmulation"] = std::string(Audio::DSPCore::typeToString(dspType));
data["Audio"]["EnableAudio"] = audioEnabled;

View file

@ -16,7 +16,7 @@ void ShaderJIT::prepare(PICAShader& shaderUnit) {
auto it = cache.find(hash);
if (it == cache.end()) { // Block has not been compiled yet
auto emitter = std::make_unique<ShaderEmitter>();
auto emitter = std::make_unique<ShaderEmitter>(accurateMul);
emitter->compile(shaderUnit);
// Get pointer to callbacks
entrypointCallback = emitter->getInstructionCallback(shaderUnit.entrypoint);

View file

@ -7,9 +7,6 @@ using namespace Helpers;
using namespace oaknut;
using namespace oaknut::util;
// TODO: Expose safe/unsafe optimizations to the user
constexpr bool useSafeMUL = true;
// Similar to the x64 recompiler, we use an odd internal ABI, which abuses the fact that we'll very rarely be calling C++ functions
// So to avoid pushing and popping, we'll be making use of volatile registers as much as possible
static constexpr QReg src1Vec = Q1;
@ -144,8 +141,8 @@ void ShaderEmitter::compileInstruction(const PICAShader& shaderUnit) {
case ShaderOpcodes::CMP2: recCMP(shaderUnit, instruction); break;
case ShaderOpcodes::DP3: recDP3(shaderUnit, instruction); break;
case ShaderOpcodes::DP4: recDP4(shaderUnit, instruction); break;
// case ShaderOpcodes::DPH:
// case ShaderOpcodes::DPHI: recDPH(shaderUnit, instruction); break;
case ShaderOpcodes::DPH:
case ShaderOpcodes::DPHI: recDPH(shaderUnit, instruction); break;
case ShaderOpcodes::END: recEND(shaderUnit, instruction); break;
case ShaderOpcodes::EX2: recEX2(shaderUnit, instruction); break;
case ShaderOpcodes::FLR: recFLR(shaderUnit, instruction); break;
@ -491,7 +488,7 @@ void ShaderEmitter::recDP3(const PICAShader& shader, u32 instruction) {
// Now do a full DP4
// Do a piecewise multiplication of the vectors first
if constexpr (useSafeMUL) {
if (useSafeMUL) {
emitSafeMUL(src1Vec, src2Vec, scratch1Vec);
} else {
FMUL(src1Vec.S4(), src1Vec.S4(), src2Vec.S4());
@ -518,7 +515,40 @@ void ShaderEmitter::recDP4(const PICAShader& shader, u32 instruction) {
loadRegister<2>(src2Vec, shader, src2, 0, operandDescriptor);
// Do a piecewise multiplication of the vectors first
if constexpr (useSafeMUL) {
if (useSafeMUL) {
emitSafeMUL(src1Vec, src2Vec, scratch1Vec);
} else {
FMUL(src1Vec.S4(), src1Vec.S4(), src2Vec.S4());
}
FADDP(src1Vec.S4(), src1Vec.S4(), src1Vec.S4()); // Now add the adjacent components together
FADDP(src1Vec.toS(), src1Vec.toD().S2()); // Again for the bottom 2 lanes. Now the bottom lane contains the dot product
if (writeMask != 0x8) { // Copy bottom lane to all lanes if we're not simply writing back x
DUP(src1Vec.S4(), src1Vec.Selem()[0]); // src1Vec = src1Vec.xxxx
}
storeRegister(src1Vec, shader, dest, operandDescriptor);
}
void ShaderEmitter::recDPH(const PICAShader& shader, u32 instruction) {
const bool isDPHI = (instruction >> 26) == ShaderOpcodes::DPHI;
const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f];
const u32 src1 = isDPHI ? getBits<14, 5>(instruction) : getBits<12, 7>(instruction);
const u32 src2 = isDPHI ? getBits<7, 7>(instruction) : getBits<7, 5>(instruction);
const u32 idx = getBits<19, 2>(instruction);
const u32 dest = getBits<21, 5>(instruction);
const u32 writeMask = getBits<0, 4>(operandDescriptor);
// TODO: Safe multiplication equivalent (Multiplication is not IEEE compliant on the PICA)
loadRegister<1>(src1Vec, shader, src1, isDPHI ? 0 : idx, operandDescriptor);
loadRegister<2>(src2Vec, shader, src2, isDPHI ? idx : 0, operandDescriptor);
// // Attach 1.0 to the w component of src1
MOV(src1Vec.Selem()[3], onesVector.Selem()[0]);
// Now perform a DP4
// Do a piecewise multiplication of the vectors first
if (useSafeMUL) {
emitSafeMUL(src1Vec, src2Vec, scratch1Vec);
} else {
FMUL(src1Vec.S4(), src1Vec.S4(), src2Vec.S4());
@ -801,7 +831,7 @@ void ShaderEmitter::recMUL(const PICAShader& shader, u32 instruction) {
loadRegister<1>(src1Vec, shader, src1, idx, operandDescriptor);
loadRegister<2>(src2Vec, shader, src2, 0, operandDescriptor);
if constexpr (useSafeMUL) {
if (useSafeMUL) {
emitSafeMUL(src1Vec, src2Vec, scratch1Vec);
} else {
FMUL(src1Vec.S4(), src1Vec.S4(), src2Vec.S4());
@ -874,7 +904,7 @@ void ShaderEmitter::recMAD(const PICAShader& shader, u32 instruction) {
loadRegister<2>(src2Vec, shader, src2, isMADI ? 0 : idx, operandDescriptor);
loadRegister<3>(src3Vec, shader, src3, isMADI ? idx : 0, operandDescriptor);
if constexpr (useSafeMUL) {
if (useSafeMUL) {
emitSafeMUL(src1Vec, src2Vec, scratch1Vec);
FADD(src3Vec.S4(), src3Vec.S4(), src1Vec.S4());
} else {

View file

@ -45,6 +45,16 @@ void ShaderEmitter::compile(const PICAShader& shaderUnit) {
L(onesVector);
dd(0x3f800000); dd(0x3f800000); dd(0x3f800000); dd(0x3f800000); // 1.0 4 times
if (useSafeMUL) {
// When doing safe mul, we need a vector to set only the w component to 0 for DP3
L(dp3Vector);
dd(0xFFFFFFFF);
dd(0xFFFFFFFF);
dd(0xFFFFFFFF);
dd(0);
}
// Emit prologue first
align(16);
prologueCb = getCurr<PrologueCallback>();
@ -523,24 +533,60 @@ void ShaderEmitter::recDP3(const PICAShader& shader, u32 instruction) {
const u32 idx = getBits<19, 2>(instruction);
const u32 dest = getBits<21, 5>(instruction);
// TODO: Safe multiplication equivalent (Multiplication is not IEEE compliant on the PICA)
loadRegister<1>(src1_xmm, shader, src1, idx, operandDescriptor);
loadRegister<2>(src2_xmm, shader, src2, 0, operandDescriptor);
dpps(src1_xmm, src2_xmm, 0b01111111); // 3-lane dot product between the 2 registers, store the result in all lanes of scratch1 similarly to PICA
if (!useSafeMUL) {
dpps(src1_xmm, src2_xmm, 0b01111111);
} else {
const u32 writeMask = operandDescriptor & 0xf;
// Set w component to 0 and do a DP4
andps(src1_xmm, xword[rip + dp3Vector]);
// Set src1 to src1 * src2, then get the dot product by doing 2 horizontal adds
emitSafeMUL(src1_xmm, src2_xmm, scratch1);
haddps(src1_xmm, src1_xmm);
haddps(src1_xmm, src1_xmm);
// If we only write back the x component to the result, we needn't perform a shuffle to do res = res.xxxx
// Otherwise we do
if (writeMask != 0x8) { // Copy bottom lane to all lanes if we're not simply writing back x
shufps(src1_xmm, src1_xmm, 0); // src1_xmm = src1_xmm.xxxx
}
}
storeRegister(src1_xmm, shader, dest, operandDescriptor);
}
void ShaderEmitter::recDP4(const PICAShader& shader, u32 instruction) {
const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f];
const u32 src1 = getBits<12, 7>(instruction);
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
const u32 idx = getBits<19, 2>(instruction);
const u32 dest = getBits<21, 5>(instruction);
// TODO: Safe multiplication equivalent (Multiplication is not IEEE compliant on the PICA)
loadRegister<1>(src1_xmm, shader, src1, idx, operandDescriptor);
loadRegister<2>(src2_xmm, shader, src2, 0, operandDescriptor);
dpps(src1_xmm, src2_xmm, 0b11111111); // 4-lane dot product between the 2 registers, store the result in all lanes of scratch1 similarly to PICA
if (!useSafeMUL) {
// 4-lane dot product between the 2 registers, store the result in all lanes of scratch1 similarly to PICA
dpps(src1_xmm, src2_xmm, 0b11111111);
} else {
const u32 writeMask = operandDescriptor & 0xf;
// Set src1 to src1 * src2, then get the dot product by doing 2 horizontal adds
emitSafeMUL(src1_xmm, src2_xmm, scratch1);
haddps(src1_xmm, src1_xmm);
haddps(src1_xmm, src1_xmm);
// If we only write back the x component to the result, we needn't perform a shuffle to do res = res.xxxx
// Otherwise we do
if (writeMask != 0x8) { // Copy bottom lane to all lanes if we're not simply writing back x
shufps(src1_xmm, src1_xmm, 0); // src1_xmm = src1_xmm.xxxx
}
}
storeRegister(src1_xmm, shader, dest, operandDescriptor);
}
@ -553,7 +599,6 @@ void ShaderEmitter::recDPH(const PICAShader& shader, u32 instruction) {
const u32 idx = getBits<19, 2>(instruction);
const u32 dest = getBits<21, 5>(instruction);
// TODO: Safe multiplication equivalent (Multiplication is not IEEE compliant on the PICA)
loadRegister<1>(src1_xmm, shader, src1, isDPHI ? 0 : idx, operandDescriptor);
loadRegister<2>(src2_xmm, shader, src2, isDPHI ? idx : 0, operandDescriptor);
@ -566,7 +611,25 @@ void ShaderEmitter::recDPH(const PICAShader& shader, u32 instruction) {
unpcklpd(src1_xmm, scratch1);
}
dpps(src1_xmm, src2_xmm, 0b11111111); // 4-lane dot product between the 2 registers, store the result in all lanes of scratch1 similarly to PICA
// Now perform a DP4
if (!useSafeMUL) {
// 4-lane dot product between the 2 registers, store the result in all lanes of scratch1 similarly to PICA
dpps(src1_xmm, src2_xmm, 0b11111111);
} else {
const u32 writeMask = operandDescriptor & 0xf;
// Set src1 to src1 * src2, then get the dot product by doing 2 horizontal adds
emitSafeMUL(src1_xmm, src2_xmm, scratch1);
haddps(src1_xmm, src1_xmm);
haddps(src1_xmm, src1_xmm);
// If we only write back the x component to the result, we needn't perform a shuffle to do res = res.xxxx
// Otherwise we do
if (writeMask != 0x8) { // Copy bottom lane to all lanes if we're not simply writing back x
shufps(src1_xmm, src1_xmm, 0); // src1_xmm = src1_xmm.xxxx
}
}
storeRegister(src1_xmm, shader, dest, operandDescriptor);
}
@ -603,10 +666,15 @@ void ShaderEmitter::recMUL(const PICAShader& shader, u32 instruction) {
const u32 idx = getBits<19, 2>(instruction);
const u32 dest = getBits<21, 5>(instruction);
// TODO: Safe multiplication equivalent (Multiplication is not IEEE compliant on the PICA)
loadRegister<1>(src1_xmm, shader, src1, idx, operandDescriptor);
loadRegister<2>(src2_xmm, shader, src2, 0, operandDescriptor);
mulps(src1_xmm, src2_xmm);
if (!useSafeMUL) {
mulps(src1_xmm, src2_xmm);
} else {
emitSafeMUL(src1_xmm, src2_xmm, scratch1);
}
storeRegister(src1_xmm, shader, dest, operandDescriptor);
}
@ -662,23 +730,31 @@ void ShaderEmitter::recMAD(const PICAShader& shader, u32 instruction) {
loadRegister<2>(src2_xmm, shader, src2, isMADI ? 0 : idx, operandDescriptor);
loadRegister<3>(src3_xmm, shader, src3, isMADI ? idx : 0, operandDescriptor);
// TODO: Implement safe PICA mul
// If we have FMA3, optimize MAD to use FMA
if (haveFMA3) {
vfmadd213ps(src1_xmm, src2_xmm, src3_xmm);
storeRegister(src1_xmm, shader, dest, operandDescriptor);
}
// If we don't have FMA3, do a multiplication and addition
else {
// Multiply src1 * src2
if (haveAVX) {
vmulps(scratch1, src1_xmm, src2_xmm);
} else {
movaps(scratch1, src1_xmm);
mulps(scratch1, src2_xmm);
if (!useSafeMUL) {
if (haveFMA3) {
vfmadd213ps(src1_xmm, src2_xmm, src3_xmm);
storeRegister(src1_xmm, shader, dest, operandDescriptor);
}
// If we don't have FMA3, do a multiplication and addition
else {
// Multiply src1 * src2
if (haveAVX) {
vmulps(scratch1, src1_xmm, src2_xmm);
} else {
movaps(scratch1, src1_xmm);
mulps(scratch1, src2_xmm);
}
// Add src3
addps(scratch1, src3_xmm);
storeRegister(scratch1, shader, dest, operandDescriptor);
}
} else {
movaps(scratch1, src1_xmm);
emitSafeMUL(scratch1, src2_xmm, src1_xmm);
// Add src3
addps(scratch1, src3_xmm);
storeRegister(scratch1, shader, dest, operandDescriptor);
@ -1115,6 +1191,41 @@ Xbyak::Label ShaderEmitter::emitLog2Func() {
return subroutine;
}
void ShaderEmitter::emitSafeMUL(Xmm src1, Xmm src2, Xmm scratch) {
// 0 * inf and inf * 0 in the PICA should return 0 instead of NaN
// This can be done by checking for NaNs before and after a multiplication
// To do this we can create a mask of which components of src1/src2 are NOT NaN using cmpordsps (cmpps with imm = 7)
// Then we multiply src1 and src2 and reate a mask of which components of the result ARE NaN using cmpunordps
// If the NaNs didn't exist (ie they were created by 0 * inf) before then we set them to 0 by XORing the 2 masks and ANDing the multiplication
// result with the xor result
// Based on Citra implementation, particularly the AVX-512 version
if (cpuCaps.has(Cpu::tAVX512F | Cpu::tAVX512VL)) {
const Xbyak::Opmask zeroMask = k1;
vmulps(scratch, src1, src2);
// Mask of any NaN values found in the result
vcmpunordps(zeroMask, scratch, scratch);
// Mask of any non-NaN inputs producing NaN results
vcmpordps(zeroMask | zeroMask, src1, src2);
knotb(zeroMask, zeroMask);
vmovaps(src1 | zeroMask | T_z, scratch);
} else {
if (haveAVX) {
vcmpordps(scratch, src1, src2);
} else {
movaps(scratch, src1);
cmpordps(scratch, src2);
}
mulps(src1, src2);
cmpunordps(src2, src1);
xorps(src2, scratch);
andps(src1, src2);
}
}
Xbyak::Label ShaderEmitter::emitExp2Func() {
Xbyak::Label subroutine;

View file

@ -58,16 +58,25 @@ GPU::GPU(Memory& mem, EmulatorConfig& config) : mem(mem), config(config) {
break;
}
}
if (renderer != nullptr) {
renderer->setConfig(&config);
}
}
void GPU::reset() {
regs.fill(0);
shaderUnit.reset();
shaderJIT.reset();
shaderJIT.setAccurateMul(config.accurateShaderMul);
std::memset(vram, 0, vramSize);
lightingLUT.fill(0);
lightingLUTDirty = true;
fogLUT.fill(0);
fogLUTDirty = true;
totalAttribCount = 0;
fixedAttribMask = 0;
fixedAttribIndex = 0;
@ -108,6 +117,7 @@ void GPU::reset() {
externalRegs[Framebuffer1Config] = static_cast<u32>(PICA::ColorFmt::RGB8);
externalRegs[Framebuffer1Select] = 0;
renderer->setUbershaderSetting(config.useUbershaders);
renderer->reset();
}

View file

@ -135,6 +135,21 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
break;
}
case FogLUTData0:
case FogLUTData1:
case FogLUTData2:
case FogLUTData3:
case FogLUTData4:
case FogLUTData5:
case FogLUTData6:
case FogLUTData7: {
const uint32_t index = regs[FogLUTIndex] & 0x7F;
fogLUT[index] = value;
fogLUTDirty = true;
regs[FogLUTIndex] = (index + 1) & 0x7F;
break;
}
case LightingLUTData0:
case LightingLUTData1:
case LightingLUTData2:
@ -314,9 +329,11 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
break;
}
/* TODO: Find out if this actually does anything
case VertexShaderTransferEnd:
if (value != 0) shaderUnit.vs.finalize();
break;
*/
case VertexShaderTransferIndex: shaderUnit.vs.setBufferIndex(value); break;

View file

@ -0,0 +1,153 @@
#include "PICA/shader_decompiler.hpp"
#include "config.hpp"
using namespace PICA;
using namespace PICA::ShaderGen;
using Function = ControlFlow::Function;
using ExitMode = Function::ExitMode;
void ControlFlow::analyze(const PICAShader& shader, u32 entrypoint) {
analysisFailed = false;
const Function* function = addFunction(shader, entrypoint, PICAShader::maxInstructionCount);
if (function == nullptr) {
analysisFailed = true;
}
}
ExitMode ControlFlow::analyzeFunction(const PICAShader& shader, u32 start, u32 end, Function::Labels& labels) {
// Initialize exit mode to unknown by default, in order to detect things like unending loops
auto [it, inserted] = exitMap.emplace(AddressRange(start, end), ExitMode::Unknown);
// Function has already been analyzed and is in the map so it wasn't added, don't analyze again
if (!inserted) {
return it->second;
}
// Make sure not to go out of bounds on the shader
for (u32 pc = start; pc < PICAShader::maxInstructionCount && pc != end; pc++) {
const u32 instruction = shader.loadedShader[pc];
const u32 opcode = instruction >> 26;
switch (opcode) {
case ShaderOpcodes::JMPC: Helpers::panic("Unimplemented control flow operation (JMPC)");
case ShaderOpcodes::JMPU: Helpers::panic("Unimplemented control flow operation (JMPU)");
case ShaderOpcodes::IFU: Helpers::panic("Unimplemented control flow operation (IFU)");
case ShaderOpcodes::IFC: Helpers::panic("Unimplemented control flow operation (IFC)");
case ShaderOpcodes::CALL: Helpers::panic("Unimplemented control flow operation (CALL)");
case ShaderOpcodes::CALLC: Helpers::panic("Unimplemented control flow operation (CALLC)");
case ShaderOpcodes::CALLU: Helpers::panic("Unimplemented control flow operation (CALLU)");
case ShaderOpcodes::LOOP: Helpers::panic("Unimplemented control flow operation (LOOP)");
case ShaderOpcodes::END: it->second = ExitMode::AlwaysEnd; return it->second;
default: break;
}
}
// A function without control flow instructions will always reach its "return point" and return
return ExitMode::AlwaysReturn;
}
void ShaderDecompiler::compileRange(const AddressRange& range) {
u32 pc = range.start;
const u32 end = range.end >= range.start ? range.end : PICAShader::maxInstructionCount;
bool finished = false;
while (pc < end && !finished) {
compileInstruction(pc, finished);
}
}
const Function* ShaderDecompiler::findFunction(const AddressRange& range) {
for (const Function& func : controlFlow.functions) {
if (range.start == func.start && range.end == func.end) {
return &func;
}
}
return nullptr;
}
void ShaderDecompiler::writeAttributes() {
decompiledShader += R"(
layout(std140) uniform PICAShaderUniforms {
vec4 uniform_float[96];
uvec4 uniform_int;
uint uniform_bool;
};
)";
decompiledShader += "\n";
}
std::string ShaderDecompiler::decompile() {
controlFlow.analyze(shader, entrypoint);
if (controlFlow.analysisFailed) {
return "";
}
decompiledShader = "";
switch (api) {
case API::GL: decompiledShader += "#version 410 core\n"; break;
case API::GLES: decompiledShader += "#version 300 es\n"; break;
default: break;
}
writeAttributes();
if (config.accurateShaderMul) {
// Safe multiplication handler from Citra: Handles the PICA's 0 * inf = 0 edge case
decompiledShader += R"(
vec4 safe_mul(vec4 a, vec4 b) {
vec4 res = a * b;
return mix(res, mix(mix(vec4(0.0), res, isnan(rhs)), product, isnan(lhs)), isnan(res));
}
)";
}
// Forward declare every generated function first so that we can easily call anything from anywhere.
for (auto& func : controlFlow.functions) {
decompiledShader += func.getForwardDecl();
}
decompiledShader += "void pica_shader_main() {\n";
AddressRange mainFunctionRange(entrypoint, PICAShader::maxInstructionCount);
callFunction(*findFunction(mainFunctionRange));
decompiledShader += "}\n";
for (auto& func : controlFlow.functions) {
if (func.outLabels.size() > 0) {
Helpers::panic("Function with out labels");
}
decompiledShader += "void " + func.getIdentifier() + "() {\n";
compileRange(AddressRange(func.start, func.end));
decompiledShader += "}\n";
}
return decompiledShader;
}
void ShaderDecompiler::compileInstruction(u32& pc, bool& finished) {
const u32 instruction = shader.loadedShader[pc];
const u32 opcode = instruction >> 26;
switch (opcode) {
case ShaderOpcodes::DP4: decompiledShader += "dp4\n"; break;
case ShaderOpcodes::MOV: decompiledShader += "mov\n"; break;
case ShaderOpcodes::END: finished = true; return;
default: Helpers::warn("GLSL recompiler: Unknown opcode: %X", opcode); break;
}
pc++;
}
void ShaderDecompiler::callFunction(const Function& function) { decompiledShader += function.getCallStatement() + ";\n"; }
std::string ShaderGen::decompileShader(PICAShader& shader, EmulatorConfig& config, u32 entrypoint, API api, Language language) {
ShaderDecompiler decompiler(shader, config, entrypoint, api, language);
return decompiler.decompile();
}

View file

@ -0,0 +1,680 @@
#include "PICA/pica_frag_config.hpp"
#include "PICA/regs.hpp"
#include "PICA/shader_gen.hpp"
using namespace PICA;
using namespace PICA::ShaderGen;
static constexpr const char* uniformDefinition = R"(
struct LightSource {
vec3 specular0;
vec3 specular1;
vec3 diffuse;
vec3 ambient;
vec3 position;
vec3 spotlightDirection;
float distanceAttenuationBias;
float distanceAttenuationScale;
};
layout(std140) uniform FragmentUniforms {
int alphaReference;
float depthScale;
float depthOffset;
vec4 constantColors[6];
vec4 tevBufferColor;
vec4 clipCoords;
// Note: We upload this as a u32 and decode on GPU
uint globalAmbientLight;
LightSource lightSources[8];
};
)";
std::string FragmentGenerator::getDefaultVertexShader() {
std::string ret = "";
switch (api) {
case API::GL: ret += "#version 410 core"; break;
case API::GLES: ret += "#version 300 es"; break;
default: break;
}
if (api == API::GLES) {
ret += R"(
#define USING_GLES 1
precision mediump int;
precision mediump float;
)";
}
ret += uniformDefinition;
ret += R"(
layout(location = 0) in vec4 a_coords;
layout(location = 1) in vec4 a_quaternion;
layout(location = 2) in vec4 a_vertexColour;
layout(location = 3) in vec2 a_texcoord0;
layout(location = 4) in vec2 a_texcoord1;
layout(location = 5) in float a_texcoord0_w;
layout(location = 6) in vec3 a_view;
layout(location = 7) in vec2 a_texcoord2;
out vec4 v_quaternion;
out vec4 v_colour;
out vec3 v_texcoord0;
out vec2 v_texcoord1;
out vec3 v_view;
out vec2 v_texcoord2;
#ifndef USING_GLES
out float gl_ClipDistance[2];
#endif
vec4 abgr8888ToVec4(uint abgr) {
const float scale = 1.0 / 255.0;
return scale * vec4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24));
}
void main() {
gl_Position = a_coords;
vec4 colourAbs = abs(a_vertexColour);
v_colour = min(colourAbs, vec4(1.f));
v_texcoord0 = vec3(a_texcoord0.x, 1.0 - a_texcoord0.y, a_texcoord0_w);
v_texcoord1 = vec2(a_texcoord1.x, 1.0 - a_texcoord1.y);
v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y);
v_view = a_view;
v_quaternion = a_quaternion;
#ifndef USING_GLES
gl_ClipDistance[0] = -a_coords.z;
gl_ClipDistance[1] = dot(clipCoords, a_coords);
#endif
}
)";
return ret;
}
std::string FragmentGenerator::generate(const FragmentConfig& config) {
std::string ret = "";
switch (api) {
case API::GL: ret += "#version 410 core"; break;
case API::GLES: ret += "#version 300 es"; break;
default: break;
}
bool unimplementedFlag = false;
if (api == API::GLES) {
ret += R"(
#define USING_GLES 1
precision mediump int;
precision mediump float;
)";
}
// Input and output attributes
ret += R"(
in vec4 v_quaternion;
in vec4 v_colour;
in vec3 v_texcoord0;
in vec2 v_texcoord1;
in vec3 v_view;
in vec2 v_texcoord2;
out vec4 fragColor;
uniform sampler2D u_tex0;
uniform sampler2D u_tex1;
uniform sampler2D u_tex2;
uniform sampler2D u_tex_luts;
)";
ret += uniformDefinition;
if (config.lighting.enable) {
ret += R"(
vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) {
vec3 u = q.xyz;
float s = q.w;
return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v);
}
float lutLookup(uint lut, int index) {
return texelFetch(u_tex_luts, ivec2(index, int(lut)), 0).r;
}
vec3 regToColor(uint reg) {
return (1.0 / 255.0) * vec3(float((reg >> 20u) & 0xFFu), float((reg >> 10u) & 0xFFu), float(reg & 0xFFu));
}
)";
}
// Emit main function for fragment shader
// When not initialized, source 13 is set to vec4(0.0) and 15 is set to the vertex colour
ret += R"(
void main() {
vec4 combinerOutput = v_colour;
vec4 previousBuffer = vec4(0.0);
vec4 tevNextPreviousBuffer = tevBufferColor;
vec4 primaryColor = vec4(0.0);
vec4 secondaryColor = vec4(0.0);
)";
compileLights(ret, config);
ret += R"(
vec3 colorOp1 = vec3(0.0);
vec3 colorOp2 = vec3(0.0);
vec3 colorOp3 = vec3(0.0);
float alphaOp1 = 0.0;
float alphaOp2 = 0.0;
float alphaOp3 = 0.0;
)";
// Get original depth value by converting from [near, far] = [0, 1] to [-1, 1]
// We do this by converting to [0, 2] first and subtracting 1 to go to [-1, 1]
ret += R"(
float z_over_w = gl_FragCoord.z * 2.0f - 1.0f;
float depth = z_over_w * depthScale + depthOffset;
)";
if (!config.outConfig.depthMapEnable) {
ret += "depth /= gl_FragCoord.w;\n";
}
ret += "gl_FragDepth = depth;\n";
for (int i = 0; i < 6; i++) {
compileTEV(ret, i, config);
}
compileFog(ret, config);
applyAlphaTest(ret, config);
ret += "fragColor = combinerOutput;\n}"; // End of main function
return ret;
}
void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICA::FragmentConfig& config) {
const u32* tevValues = config.texConfig.tevConfigs.data() + stage * 4;
// Pass a 0 to constColor here, as it doesn't matter for compilation
TexEnvConfig tev(tevValues[0], tevValues[1], tevValues[2], 0, tevValues[3]);
if (!tev.isPassthroughStage()) {
// Get color operands
shader += "colorOp1 = ";
getColorOperand(shader, tev.colorSource1, tev.colorOperand1, stage, config);
shader += ";\ncolorOp2 = ";
getColorOperand(shader, tev.colorSource2, tev.colorOperand2, stage, config);
shader += ";\ncolorOp3 = ";
getColorOperand(shader, tev.colorSource3, tev.colorOperand3, stage, config);
shader += ";\nvec3 outputColor" + std::to_string(stage) + " = clamp(";
getColorOperation(shader, tev.colorOp);
shader += ", vec3(0.0), vec3(1.0));\n";
if (tev.colorOp == TexEnvConfig::Operation::Dot3RGBA) {
// Dot3 RGBA also writes to the alpha component so we don't need to do anything more
shader += "float outputAlpha" + std::to_string(stage) + " = outputColor" + std::to_string(stage) + ".x;\n";
} else {
// Get alpha operands
shader += "alphaOp1 = ";
getAlphaOperand(shader, tev.alphaSource1, tev.alphaOperand1, stage, config);
shader += ";\nalphaOp2 = ";
getAlphaOperand(shader, tev.alphaSource2, tev.alphaOperand2, stage, config);
shader += ";\nalphaOp3 = ";
getAlphaOperand(shader, tev.alphaSource3, tev.alphaOperand3, stage, config);
shader += ";\nfloat outputAlpha" + std::to_string(stage) + " = clamp(";
getAlphaOperation(shader, tev.alphaOp);
// Clamp the alpha value to [0.0, 1.0]
shader += ", 0.0, 1.0);\n";
}
shader += "combinerOutput = vec4(clamp(outputColor" + std::to_string(stage) + " * " + std::to_string(tev.getColorScale()) +
".0, vec3(0.0), vec3(1.0)), clamp(outputAlpha" + std::to_string(stage) + " * " + std::to_string(tev.getAlphaScale()) +
".0, 0.0, 1.0));\n";
}
shader += "previousBuffer = tevNextPreviousBuffer;\n\n";
// Update the "next previous buffer" if necessary
const u32 textureEnvUpdateBuffer = config.texConfig.texEnvUpdateBuffer;
if (stage < 4) {
// Check whether to update rgb
if ((textureEnvUpdateBuffer & (0x100 << stage))) {
shader += "tevNextPreviousBuffer.rgb = combinerOutput.rgb;\n";
}
// And whether to update alpha
if ((textureEnvUpdateBuffer & (0x1000u << stage))) {
shader += "tevNextPreviousBuffer.a = combinerOutput.a;\n";
}
}
}
void FragmentGenerator::getColorOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::ColorOperand color, int index, const PICA::FragmentConfig& config) {
using OperandType = TexEnvConfig::ColorOperand;
// For inverting operands, add the 1.0 - x subtraction
if (color == OperandType::OneMinusSourceColor || color == OperandType::OneMinusSourceRed || color == OperandType::OneMinusSourceGreen ||
color == OperandType::OneMinusSourceBlue || color == OperandType::OneMinusSourceAlpha) {
shader += "vec3(1.0, 1.0, 1.0) - ";
}
switch (color) {
case OperandType::SourceColor:
case OperandType::OneMinusSourceColor:
getSource(shader, source, index, config);
shader += ".rgb";
break;
case OperandType::SourceRed:
case OperandType::OneMinusSourceRed:
getSource(shader, source, index, config);
shader += ".rrr";
break;
case OperandType::SourceGreen:
case OperandType::OneMinusSourceGreen:
getSource(shader, source, index, config);
shader += ".ggg";
break;
case OperandType::SourceBlue:
case OperandType::OneMinusSourceBlue:
getSource(shader, source, index, config);
shader += ".bbb";
break;
case OperandType::SourceAlpha:
case OperandType::OneMinusSourceAlpha:
getSource(shader, source, index, config);
shader += ".aaa";
break;
default:
shader += "vec3(1.0, 1.0, 1.0)";
Helpers::warn("FragmentGenerator: Invalid TEV color operand");
break;
}
}
void FragmentGenerator::getAlphaOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::AlphaOperand color, int index, const PICA::FragmentConfig& config) {
using OperandType = TexEnvConfig::AlphaOperand;
// For inverting operands, add the 1.0 - x subtraction
if (color == OperandType::OneMinusSourceRed || color == OperandType::OneMinusSourceGreen || color == OperandType::OneMinusSourceBlue ||
color == OperandType::OneMinusSourceAlpha) {
shader += "1.0 - ";
}
switch (color) {
case OperandType::SourceRed:
case OperandType::OneMinusSourceRed:
getSource(shader, source, index, config);
shader += ".r";
break;
case OperandType::SourceGreen:
case OperandType::OneMinusSourceGreen:
getSource(shader, source, index, config);
shader += ".g";
break;
case OperandType::SourceBlue:
case OperandType::OneMinusSourceBlue:
getSource(shader, source, index, config);
shader += ".b";
break;
case OperandType::SourceAlpha:
case OperandType::OneMinusSourceAlpha:
getSource(shader, source, index, config);
shader += ".a";
break;
default:
shader += "1.0";
Helpers::warn("FragmentGenerator: Invalid TEV color operand");
break;
}
}
void FragmentGenerator::getSource(std::string& shader, TexEnvConfig::Source source, int index, const PICA::FragmentConfig& config) {
switch (source) {
case TexEnvConfig::Source::PrimaryColor: shader += "v_colour"; break;
case TexEnvConfig::Source::Texture0: shader += "texture(u_tex0, v_texcoord0.xy)"; break;
case TexEnvConfig::Source::Texture1: shader += "texture(u_tex1, v_texcoord1)"; break;
case TexEnvConfig::Source::Texture2: {
// If bit 13 in texture config is set then we use the texcoords for texture 1, otherwise for texture 2
if (Helpers::getBit<13>(config.texConfig.texUnitConfig)) {
shader += "texture(u_tex2, v_texcoord1)";
} else {
shader += "texture(u_tex2, v_texcoord2)";
}
break;
}
case TexEnvConfig::Source::Previous: shader += "combinerOutput"; break;
case TexEnvConfig::Source::Constant: shader += "constantColors[" + std::to_string(index) + "]"; break;
case TexEnvConfig::Source::PreviousBuffer: shader += "previousBuffer"; break;
// Lighting
case TexEnvConfig::Source::PrimaryFragmentColor: shader += "primaryColor"; break;
case TexEnvConfig::Source::SecondaryFragmentColor: shader += "secondaryColor"; break;
default:
Helpers::warn("Unimplemented TEV source: %d", static_cast<int>(source));
shader += "vec4(1.0, 1.0, 1.0, 1.0)";
break;
}
}
void FragmentGenerator::getColorOperation(std::string& shader, TexEnvConfig::Operation op) {
switch (op) {
case TexEnvConfig::Operation::Replace: shader += "colorOp1"; break;
case TexEnvConfig::Operation::Add: shader += "colorOp1 + colorOp2"; break;
case TexEnvConfig::Operation::AddSigned: shader += "colorOp1 + colorOp2 - vec3(0.5)"; break;
case TexEnvConfig::Operation::Subtract: shader += "colorOp1 - colorOp2"; break;
case TexEnvConfig::Operation::Modulate: shader += "colorOp1 * colorOp2"; break;
case TexEnvConfig::Operation::Lerp: shader += "mix(colorOp2, colorOp1, colorOp3)"; break;
case TexEnvConfig::Operation::AddMultiply: shader += "min(colorOp1 + colorOp2, vec3(1.0)) * colorOp3"; break;
case TexEnvConfig::Operation::MultiplyAdd: shader += "fma(colorOp1, colorOp2, colorOp3)"; break;
case TexEnvConfig::Operation::Dot3RGB:
case TexEnvConfig::Operation::Dot3RGBA: shader += "vec3(4.0 * dot(colorOp1 - vec3(0.5), colorOp2 - vec3(0.5)))"; break;
default:
Helpers::warn("FragmentGenerator: Unimplemented color op");
shader += "vec3(1.0)";
break;
}
}
void FragmentGenerator::getAlphaOperation(std::string& shader, TexEnvConfig::Operation op) {
switch (op) {
case TexEnvConfig::Operation::Replace: shader += "alphaOp1"; break;
case TexEnvConfig::Operation::Add: shader += "alphaOp1 + alphaOp2"; break;
case TexEnvConfig::Operation::AddSigned: shader += "alphaOp1 + alphaOp2 - 0.5"; break;
case TexEnvConfig::Operation::Subtract: shader += "alphaOp1 - alphaOp2"; break;
case TexEnvConfig::Operation::Modulate: shader += "alphaOp1 * alphaOp2"; break;
case TexEnvConfig::Operation::Lerp: shader += "mix(alphaOp2, alphaOp1, alphaOp3)"; break;
case TexEnvConfig::Operation::AddMultiply: shader += "min(alphaOp1 + alphaOp2, 1.0) * alphaOp3"; break;
case TexEnvConfig::Operation::MultiplyAdd: shader += "fma(alphaOp1, alphaOp2, alphaOp3)"; break;
default:
Helpers::warn("FragmentGenerator: Unimplemented alpha op");
shader += "1.0";
break;
}
}
void FragmentGenerator::applyAlphaTest(std::string& shader, const PICA::FragmentConfig& config) {
const CompareFunction function = config.outConfig.alphaTestFunction;
// Alpha test disabled
if (function == CompareFunction::Always) {
return;
}
shader += "int testingAlpha = int(combinerOutput.a * 255.0);\n";
shader += "if (";
switch (function) {
case CompareFunction::Never: shader += "true"; break;
case CompareFunction::Always: shader += "false"; break;
case CompareFunction::Equal: shader += "testingAlpha != alphaReference"; break;
case CompareFunction::NotEqual: shader += "testingAlpha == alphaReference"; break;
case CompareFunction::Less: shader += "testingAlpha >= alphaReference"; break;
case CompareFunction::LessOrEqual: shader += "testingAlpha > alphaReference"; break;
case CompareFunction::Greater: shader += "testingAlpha <= alphaReference"; break;
case CompareFunction::GreaterOrEqual: shader += "testingAlpha < alphaReference"; break;
default:
Helpers::warn("Unimplemented alpha test function");
shader += "false";
break;
}
shader += ") { discard; }\n";
}
void FragmentGenerator::compileLights(std::string& shader, const PICA::FragmentConfig& config) {
if (!config.lighting.enable) {
return;
}
// Currently ignore bump mode
shader += "vec3 normal = rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), v_quaternion);\n";
shader += R"(
vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);
vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);
vec3 light_position, light_vector, half_vector, specular0, specular1, reflected_color;
float light_distance, NdotL, light_factor, geometric_factor, distance_attenuation, distance_att_delta;
float spotlight_attenuation, specular0_dist, specular1_dist;
float lut_lookup_result, lut_lookup_delta;
int lut_lookup_index;
)";
uint lightID = 0;
for (int i = 0; i < config.lighting.lightNum; i++) {
lightID = config.lighting.lights[i].num;
const auto& lightConfig = config.lighting.lights[i];
shader += "light_position = lightSources[" + std::to_string(lightID) + "].position;\n";
if (lightConfig.directional) { // Directional lighting
shader += "light_vector = light_position;\n";
} else { // Positional lighting
shader += "light_vector = light_position + v_view;\n";
}
shader += R"(
light_distance = length(light_vector);
light_vector = normalize(light_vector);
half_vector = light_vector + normalize(v_view);
distance_attenuation = 1.0;
NdotL = dot(normal, light_vector);
)";
shader += lightConfig.twoSidedDiffuse ? "NdotL = abs(NdotL);\n" : "NdotL = max(NdotL, 0.0);\n";
if (lightConfig.geometricFactor0 || lightConfig.geometricFactor1) {
shader += R"(
geometric_factor = dot(half_vector, half_vector);
geometric_factor = (geometric_factor == 0.0) ? 0.0 : min(NdotL / geometric_factor, 1.0);
)";
}
if (lightConfig.distanceAttenuationEnable) {
shader += "distance_att_delta = clamp(light_distance * lightSources[" + std::to_string(lightID) +
"].distanceAttenuationScale + lightSources[" + std::to_string(lightID) + "].distanceAttenuationBias, 0.0, 1.0);\n";
shader += "distance_attenuation = lutLookup(" + std::to_string(16 + lightID) +
", int(clamp(floor(distance_att_delta * 256.0), 0.0, 255.0)));\n";
}
compileLUTLookup(shader, config, i, spotlightLutIndex);
shader += "spotlight_attenuation = lut_lookup_result;\n";
compileLUTLookup(shader, config, i, PICA::Lights::LUT_D0);
shader += "specular0_dist = lut_lookup_result;\n";
compileLUTLookup(shader, config, i, PICA::Lights::LUT_D1);
shader += "specular1_dist = lut_lookup_result;\n";
compileLUTLookup(shader, config, i, PICA::Lights::LUT_RR);
shader += "reflected_color.r = lut_lookup_result;\n";
if (isSamplerEnabled(config.lighting.config, PICA::Lights::LUT_RG)) {
compileLUTLookup(shader, config, i, PICA::Lights::LUT_RG);
shader += "reflected_color.g = lut_lookup_result;\n";
} else {
shader += "reflected_color.g = reflected_color.r;\n";
}
if (isSamplerEnabled(config.lighting.config, PICA::Lights::LUT_RB)) {
compileLUTLookup(shader, config, i, PICA::Lights::LUT_RB);
shader += "reflected_color.b = lut_lookup_result;\n";
} else {
shader += "reflected_color.b = reflected_color.r;\n";
}
shader += "specular0 = lightSources[" + std::to_string(lightID) + "].specular0 * specular0_dist;\n";
if (lightConfig.geometricFactor0) {
shader += "specular0 *= geometric_factor;\n";
}
shader += "specular1 = lightSources[" + std::to_string(lightID) + "].specular1 * specular1_dist * reflected_color;\n";
if (lightConfig.geometricFactor1) {
shader += "specular1 *= geometric_factor;\n";
}
shader += "light_factor = distance_attenuation * spotlight_attenuation;\n";
if (config.lighting.clampHighlights) {
shader += "specular_sum.rgb += light_factor * (NdotL == 0.0 ? 0.0 : 1.0) * (specular0 + specular1);\n";
} else {
shader += "specular_sum.rgb += light_factor * (specular0 + specular1);\n";
}
shader += "diffuse_sum.rgb += light_factor * (lightSources[" + std::to_string(lightID) + "].ambient + lightSources[" +
std::to_string(lightID) + "].diffuse * NdotL);\n";
}
if (config.lighting.enablePrimaryAlpha || config.lighting.enableSecondaryAlpha) {
compileLUTLookup(shader, config, config.lighting.lightNum - 1, PICA::Lights::LUT_FR);
shader += "float fresnel_factor = lut_lookup_result;\n";
}
if (config.lighting.enablePrimaryAlpha) {
shader += "diffuse_sum.a = fresnel_factor;\n";
}
if (config.lighting.enableSecondaryAlpha) {
shader += "specular_sum.a = fresnel_factor;\n";
}
shader += R"(
vec4 global_ambient = vec4(regToColor(globalAmbientLight), 1.0);
primaryColor = clamp(global_ambient + diffuse_sum, vec4(0.0), vec4(1.0));
secondaryColor = clamp(specular_sum, vec4(0.0), vec4(1.0));
)";
}
bool FragmentGenerator::isSamplerEnabled(u32 environmentID, u32 lutID) {
static constexpr bool samplerEnabled[9 * 7] = {
// D0 D1 SP FR RB RG RR
true, false, true, false, false, false, true, // Configuration 0: D0, SP, RR
false, false, true, true, false, false, true, // Configuration 1: FR, SP, RR
true, true, false, false, false, false, true, // Configuration 2: D0, D1, RR
true, true, false, true, false, false, false, // Configuration 3: D0, D1, FR
true, true, true, false, true, true, true, // Configuration 4: All except for FR
true, false, true, true, true, true, true, // Configuration 5: All except for D1
true, true, true, true, false, false, true, // Configuration 6: All except for RB and RG
false, false, false, false, false, false, false, // Configuration 7: Unused
true, true, true, true, true, true, true, // Configuration 8: All
};
return samplerEnabled[environmentID * 7 + lutID];
}
void FragmentGenerator::compileLUTLookup(std::string& shader, const PICA::FragmentConfig& config, u32 lightIndex, u32 lutID) {
const LightingLUTConfig& lut = config.lighting.luts[lutID];
uint lightID = config.lighting.lights[lightIndex].num;
uint lutIndex = 0;
bool lutEnabled = false;
if (lutID == spotlightLutIndex) {
// These are the spotlight attenuation LUTs
lutIndex = 8u + lightID;
lutEnabled = config.lighting.lights[lightIndex].spotAttenuationEnable;
} else if (lutID <= 6) {
lutIndex = lutID;
lutEnabled = lut.enable;
} else {
Helpers::warn("Shadergen: Unimplemented LUT value");
}
const bool samplerEnabled = isSamplerEnabled(config.lighting.config, lutID);
if (!samplerEnabled || !lutEnabled) {
shader += "lut_lookup_result = 1.0;\n";
return;
}
uint scale = lut.scale;
uint inputID = lut.type;
bool absEnabled = lut.absInput;
switch (inputID) {
case 0: shader += "lut_lookup_delta = dot(normal, normalize(half_vector));\n"; break;
case 1: shader += "lut_lookup_delta = dot(normalize(v_view), normalize(half_vector));\n"; break;
case 2: shader += "lut_lookup_delta = dot(normal, normalize(v_view));\n"; break;
case 3: shader += "lut_lookup_delta = dot(normal, light_vector);\n"; break;
case 4: shader += "lut_lookup_delta = dot(light_vector, lightSources[" + std ::to_string(lightID) + "].spotlightDirection);\n"; break;
default:
Helpers::warn("Shadergen: Unimplemented LUT select");
shader += "lut_lookup_delta = 1.0;\n";
break;
}
static constexpr float scales[] = {1.0f, 2.0f, 4.0f, 8.0f, 0.0f, 0.0f, 0.25f, 0.5f};
if (absEnabled) {
bool twoSidedDiffuse = config.lighting.lights[lightIndex].twoSidedDiffuse;
shader += twoSidedDiffuse ? "lut_lookup_delta = abs(lut_lookup_delta);\n" : "lut_lookup_delta = max(lut_lookup_delta, 0.0);\n";
shader += "lut_lookup_result = lutLookup(" + std::to_string(lutIndex) + ", int(clamp(floor(lut_lookup_delta * 256.0), 0.0, 255.0)));\n";
if (scale != 0) {
shader += "lut_lookup_result *= " + std::to_string(scales[scale]) + ";\n";
}
} else {
// Range is [-1, 1] so we need to map it to [0, 1]
shader += "lut_lookup_index = int(clamp(floor(lut_lookup_delta * 128.0), -128.f, 127.f));\n";
shader += "if (lut_lookup_index < 0) lut_lookup_index += 256;\n";
shader += "lut_lookup_result = lutLookup(" + std::to_string(lutIndex) + ", lut_lookup_index);\n";
if (scale != 0) {
shader += "lut_lookup_result *= " + std::to_string(scales[scale]) + ";\n";
}
}
}
void FragmentGenerator::compileFog(std::string& shader, const PICA::FragmentConfig& config) {
if (config.fogConfig.mode != FogMode::Fog) {
return;
}
float r = config.fogConfig.fogColorR / 255.0f;
float g = config.fogConfig.fogColorG / 255.0f;
float b = config.fogConfig.fogColorB / 255.0f;
if (config.fogConfig.flipDepth) {
shader += "float fog_index = (1.0 - depth) * 128.0;\n";
} else {
shader += "float fog_index = depth * 128.0;\n";
}
shader += "float clamped_index = clamp(floor(fog_index), 0.0, 127.0);";
shader += "float delta = fog_index - clamped_index;";
shader += "vec3 fog_color = vec3(" + std::to_string(r) + ", " + std::to_string(g) + ", " + std::to_string(b) + ");";
shader += "vec2 value = texelFetch(u_tex_luts, ivec2(int(clamped_index), 24), 0).rg;"; // fog LUT is past the light LUTs
shader += "float fog_factor = clamp(value.r + value.g * delta, 0.0, 1.0);";
shader += "combinerOutput.rgb = mix(fog_color, combinerOutput.rgb, fog_factor);";
}

View file

@ -9,7 +9,6 @@ void ShaderUnit::reset() {
void PICAShader::reset() {
loadedShader.fill(0);
bufferedShader.fill(0);
operandDescriptors.fill(0);
boolUniform = 0;

View file

@ -355,7 +355,7 @@ namespace Audio {
}
switch (buffer.format) {
case SampleFormat::PCM8: Helpers::warn("Unimplemented sample format!"); break;
case SampleFormat::PCM8: source.currentSamples = decodePCM8(data, buffer.sampleCount, source); break;
case SampleFormat::PCM16: source.currentSamples = decodePCM16(data, buffer.sampleCount, source); break;
case SampleFormat::ADPCM: source.currentSamples = decodeADPCM(data, buffer.sampleCount, source); break;
@ -406,6 +406,26 @@ namespace Audio {
}
}
HLE_DSP::SampleBuffer HLE_DSP::decodePCM8(const u8* data, usize sampleCount, Source& source) {
SampleBuffer decodedSamples(sampleCount);
if (source.sourceType == SourceType::Stereo) {
for (usize i = 0; i < sampleCount; i++) {
const s16 left = s16(u16(*data++) << 8);
const s16 right = s16(u16(*data++) << 8);
decodedSamples[i] = {left, right};
}
} else {
// Mono
for (usize i = 0; i < sampleCount; i++) {
const s16 sample = s16(u16(*data++) << 8);
decodedSamples[i] = {sample, sample};
}
}
return decodedSamples;
}
HLE_DSP::SampleBuffer HLE_DSP::decodePCM16(const u8* data, usize sampleCount, Source& source) {
SampleBuffer decodedSamples(sampleCount);
const s16* data16 = reinterpret_cast<const s16*>(data);

View file

@ -39,7 +39,35 @@ HorizonResult SDMCArchive::createFile(const FSPath& path, u64 size) {
}
HorizonResult SDMCArchive::deleteFile(const FSPath& path) {
Helpers::panic("[SDMC] Unimplemented DeleteFile");
if (path.type == PathType::UTF16) {
if (!isPathSafe<PathType::UTF16>(path)) {
Helpers::panic("Unsafe path in SDMC::DeleteFile");
}
fs::path p = IOFile::getAppData() / "SDMC";
p += fs::path(path.utf16_string).make_preferred();
if (fs::is_directory(p)) {
Helpers::panic("SDMC::DeleteFile: Tried to delete directory");
}
if (!fs::is_regular_file(p)) {
return Result::FS::FileNotFoundAlt;
}
std::error_code ec;
bool success = fs::remove(p, ec);
// It might still be possible for fs::remove to fail, if there's eg an open handle to a file being deleted
// In this case, print a warning, but still return success for now
if (!success) {
Helpers::warn("SDMC::DeleteFile: fs::remove failed\n");
}
return Result::Success;
}
Helpers::panic("SDMCArchive::DeleteFile: Unknown path type");
return Result::Success;
}
@ -145,7 +173,7 @@ Rust::Result<DirectorySession, HorizonResult> SDMCArchive::openDirectory(const F
if (path.type == PathType::UTF16) {
if (!isPathSafe<PathType::UTF16>(path)) {
Helpers::panic("Unsafe path in SaveData::OpenDirectory");
Helpers::panic("Unsafe path in SDMC::OpenDirectory");
}
fs::path p = IOFile::getAppData() / "SDMC";

View file

@ -184,7 +184,8 @@ void Kernel::setFileSize(u32 messagePointer, Handle fileHandle) {
if (success) {
mem.write32(messagePointer + 4, Result::Success);
} else {
Helpers::panic("FileOp::SetFileSize failed");
Helpers::warn("FileOp::SetFileSize failed");
mem.write32(messagePointer + 4, Result::FailurePlaceholder);
}
} else {
Helpers::panic("Tried to set file size of file without file descriptor");

View file

@ -399,3 +399,5 @@ std::string Kernel::getProcessName(u32 pid) {
Helpers::panic("Attempted to name non-current process");
}
}
Scheduler& Kernel::getScheduler() { return cpu.getScheduler(); }

View file

@ -25,10 +25,12 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn
}
codeFile.clear();
saveData.clear();
smdh.clear();
partitionInfo = info;
primaryKey = {};
secondaryKey = {};
size = u64(*(u32*)&header[0x104]) * mediaUnit; // TODO: Maybe don't type pun because big endian will break
exheaderSize = *(u32*)&header[0x180];
@ -78,15 +80,15 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn
if (!primaryResult.first || !secondaryResult.first) {
gotCryptoKeys = false;
} else {
Crypto::AESKey primaryKey = primaryResult.second;
Crypto::AESKey secondaryKey = secondaryResult.second;
primaryKey = primaryResult.second;
secondaryKey = secondaryResult.second;
EncryptionInfo encryptionInfoTmp;
encryptionInfoTmp.normalKey = primaryKey;
encryptionInfoTmp.normalKey = *primaryKey;
encryptionInfoTmp.initialCounter.fill(0);
for (std::size_t i = 1; i <= sizeof(std::uint64_t) - 1; i++) {
encryptionInfoTmp.initialCounter[i] = header[0x108 + sizeof(std::uint64_t) - 1 - i];
for (usize i = 0; i < 8; i++) {
encryptionInfoTmp.initialCounter[i] = header[0x108 + 7 - i];
}
encryptionInfoTmp.initialCounter[8] = 1;
exheaderInfo.encryptionInfo = encryptionInfoTmp;
@ -94,7 +96,7 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn
encryptionInfoTmp.initialCounter[8] = 2;
exeFS.encryptionInfo = encryptionInfoTmp;
encryptionInfoTmp.normalKey = secondaryKey;
encryptionInfoTmp.normalKey = *secondaryKey;
encryptionInfoTmp.initialCounter[8] = 3;
romFS.encryptionInfo = encryptionInfoTmp;
}
@ -152,8 +154,7 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn
}
}
const u64 saveDataSize = *(u64*)&exheader[0x1C0 + 0x0]; // Size of save data in bytes
saveData.resize(saveDataSize, 0xff);
saveDataSize = *(u64*)&exheader[0x1C0 + 0x0]; // Size of save data in bytes
compressCode = (exheader[0xD] & 1) != 0;
stackSize = *(u32*)&exheader[0x1C];
@ -201,13 +202,20 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn
Helpers::panic("Second code file in a single NCCH partition. What should this do?\n");
}
// All files in ExeFS use the same IV, though .code uses the secondary key for decryption
// whereas .icon/.banner use the primary key.
FSInfo info = exeFS;
if (encrypted && secondaryKey.has_value() && info.encryptionInfo.has_value()) {
info.encryptionInfo->normalKey = *secondaryKey;
}
if (compressCode) {
std::vector<u8> tmp;
tmp.resize(fileSize);
// A file offset of 0 means our file is located right after the ExeFS header
// So in the ROM, files are located at (file offset + exeFS offset + exeFS header size)
readFromFile(file, exeFS, tmp.data(), fileOffset + exeFSHeaderSize, fileSize);
readFromFile(file, info, tmp.data(), fileOffset + exeFSHeaderSize, fileSize);
// Decompress .code file from the tmp vector to the "code" vector
if (!CartLZ77::decompress(codeFile, tmp)) {
@ -216,7 +224,7 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn
}
} else {
codeFile.resize(fileSize);
readFromFile(file, exeFS, codeFile.data(), fileOffset + exeFSHeaderSize, fileSize);
readFromFile(file, info, codeFile.data(), fileOffset + exeFSHeaderSize, fileSize);
}
} else if (std::strcmp(name, "icon") == 0) {
// Parse icon file to extract region info and more in the future (logo, etc)
@ -295,6 +303,7 @@ std::pair<bool, Crypto::AESKey> NCCH::getPrimaryKey(Crypto::AESEngine &aesEngine
if (encrypted) {
if (fixedCryptoKey) {
result.fill(0);
return {true, result};
}
@ -316,6 +325,7 @@ std::pair<bool, Crypto::AESKey> NCCH::getSecondaryKey(Crypto::AESEngine &aesEngi
if (encrypted) {
if (fixedCryptoKey) {
result.fill(0);
return {true, result};
}

View file

@ -5,9 +5,20 @@ void GLStateManager::resetBlend() {
logicOpEnabled = false;
logicOp = GL_COPY;
blendEquationRGB = GL_FUNC_ADD;
blendEquationAlpha = GL_FUNC_ADD;
blendFuncSourceRGB = GL_SRC_COLOR;
blendFuncDestRGB = GL_DST_COLOR;
blendFuncSourceAlpha = GL_SRC_ALPHA;
blendFuncDestAlpha = GL_DST_ALPHA;
OpenGL::disableBlend();
OpenGL::disableLogicOp();
OpenGL::setLogicOp(GL_COPY);
glBlendEquationSeparate(blendEquationRGB, blendEquationAlpha);
glBlendFuncSeparate(blendFuncSourceRGB, blendFuncDestRGB, blendFuncSourceAlpha, blendFuncDestAlpha);
}
void GLStateManager::resetClearing() {
@ -61,9 +72,12 @@ void GLStateManager::resetVAO() {
glBindVertexArray(0);
}
void GLStateManager::resetVBO() {
void GLStateManager::resetBuffers() {
boundVBO = 0;
boundUBO = 0;
glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindBuffer(GL_UNIFORM_BUFFER, 0);
}
void GLStateManager::resetProgram() {
@ -79,7 +93,7 @@ void GLStateManager::reset() {
resetDepth();
resetVAO();
resetVBO();
resetBuffers();
resetProgram();
resetScissor();
resetStencil();

View file

@ -4,7 +4,9 @@
#include <cmrc/cmrc.hpp>
#include "config.hpp"
#include "PICA/float_types.hpp"
#include "PICA/pica_frag_uniforms.hpp"
#include "PICA/gpu.hpp"
#include "PICA/regs.hpp"
#include "math_util.hpp"
@ -22,6 +24,8 @@ void RendererGL::reset() {
colourBufferCache.reset();
textureCache.reset();
clearShaderCache();
// Init the colour/depth buffer settings to some random defaults on reset
colourBufferLoc = 0;
colourBufferFormat = PICA::ColorFmt::RGBA8;
@ -38,12 +42,16 @@ void RendererGL::reset() {
oldDepthOffset = 0.0; // Default depth offset to 0
oldDepthmapEnable = false; // Enable w buffering
glUniform1f(depthScaleLoc, oldDepthScale);
glUniform1f(depthOffsetLoc, oldDepthOffset);
glUniform1i(depthmapEnableLoc, oldDepthmapEnable);
glUniform1f(ubershaderData.depthScaleLoc, oldDepthScale);
glUniform1f(ubershaderData.depthOffsetLoc, oldDepthOffset);
glUniform1i(ubershaderData.depthmapEnableLoc, oldDepthmapEnable);
gl.useProgram(oldProgram); // Switch to old GL program
}
#ifdef __ANDROID__
fragShaderGen.setTarget(PICA::ShaderGen::API::GLES, PICA::ShaderGen::Language::GLSL);
#endif
}
void RendererGL::initGraphicsContextInternal() {
@ -57,24 +65,7 @@ void RendererGL::initGraphicsContextInternal() {
OpenGL::Shader vert({vertexShaderSource.begin(), vertexShaderSource.size()}, OpenGL::Vertex);
OpenGL::Shader frag({fragmentShaderSource.begin(), fragmentShaderSource.size()}, OpenGL::Fragment);
triangleProgram.create({vert, frag});
gl.useProgram(triangleProgram);
textureEnvSourceLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvSource");
textureEnvOperandLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvOperand");
textureEnvCombinerLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvCombiner");
textureEnvColorLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvColor");
textureEnvScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvScale");
depthScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_depthScale");
depthOffsetLoc = OpenGL::uniformLocation(triangleProgram, "u_depthOffset");
depthmapEnableLoc = OpenGL::uniformLocation(triangleProgram, "u_depthmapEnable");
picaRegLoc = OpenGL::uniformLocation(triangleProgram, "u_picaRegs");
// Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3
glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex0"), 0);
glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex1"), 1);
glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex2"), 2);
glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex_lighting_lut"), 3);
initUbershader(triangleProgram);
auto displayVertexShaderSource = gl_resources.open("opengl_display.vert");
auto displayFragmentShaderSource = gl_resources.open("opengl_display.frag");
@ -124,7 +115,11 @@ void RendererGL::initGraphicsContextInternal() {
const u32 screenTextureWidth = 400; // Top screen is 400 pixels wide, bottom is 320
const u32 screenTextureHeight = 2 * 240; // Both screens are 240 pixels tall
glGenTextures(1, &lightLUTTextureArray);
// 24 rows for light, 1 for fog
LUTTexture.create(256, Lights::LUT_Count + 1, GL_RG32F);
LUTTexture.bind();
LUTTexture.setMinFilter(OpenGL::Linear);
LUTTexture.setMagFilter(OpenGL::Linear);
auto prevTexture = OpenGL::getTex2D();
@ -166,6 +161,10 @@ void RendererGL::initGraphicsContextInternal() {
OpenGL::setViewport(oldViewport[0], oldViewport[1], oldViewport[2], oldViewport[3]);
reset();
// Initialize the default vertex shader used with shadergen
std::string defaultShadergenVSSource = fragShaderGen.getDefaultVertexShader();
defaultShadergenVs.create({defaultShadergenVSSource.c_str(), defaultShadergenVSSource.size()}, OpenGL::Vertex);
}
// The OpenGL renderer doesn't need to do anything with the GL context (For Qt frontend) or the SDL window (For SDL frontend)
@ -236,8 +235,8 @@ void RendererGL::setupBlending() {
OpenGL::setBlendColor(float(r) / 255.f, float(g) / 255.f, float(b) / 255.f, float(a) / 255.f);
// Translate equations and funcs to their GL equivalents and set them
glBlendEquationSeparate(blendingEquations[rgbEquation], blendingEquations[alphaEquation]);
glBlendFuncSeparate(blendingFuncs[rgbSourceFunc], blendingFuncs[rgbDestFunc], blendingFuncs[alphaSourceFunc], blendingFuncs[alphaDestFunc]);
gl.setBlendEquation(blendingEquations[rgbEquation], blendingEquations[alphaEquation]);
gl.setBlendFunc(blendingFuncs[rgbSourceFunc], blendingFuncs[rgbDestFunc], blendingFuncs[alphaSourceFunc], blendingFuncs[alphaDestFunc]);
}
}
@ -289,10 +288,8 @@ void RendererGL::setupStencilTest(bool stencilEnable) {
glStencilOp(stencilOps[stencilFailOp], stencilOps[depthFailOp], stencilOps[passOp]);
}
void RendererGL::setupTextureEnvState() {
void RendererGL::setupUbershaderTexEnv() {
// TODO: Only update uniforms when the TEV config changed. Use an UBO potentially.
static constexpr std::array<u32, 6> ioBases = {
PICA::InternalRegs::TexEnv0Source, PICA::InternalRegs::TexEnv1Source, PICA::InternalRegs::TexEnv2Source,
PICA::InternalRegs::TexEnv3Source, PICA::InternalRegs::TexEnv4Source, PICA::InternalRegs::TexEnv5Source,
@ -314,11 +311,11 @@ void RendererGL::setupTextureEnvState() {
textureEnvScaleRegs[i] = regs[ioBase + 4];
}
glUniform1uiv(textureEnvSourceLoc, 6, textureEnvSourceRegs);
glUniform1uiv(textureEnvOperandLoc, 6, textureEnvOperandRegs);
glUniform1uiv(textureEnvCombinerLoc, 6, textureEnvCombinerRegs);
glUniform1uiv(textureEnvColorLoc, 6, textureEnvColourRegs);
glUniform1uiv(textureEnvScaleLoc, 6, textureEnvScaleRegs);
glUniform1uiv(ubershaderData.textureEnvSourceLoc, 6, textureEnvSourceRegs);
glUniform1uiv(ubershaderData.textureEnvOperandLoc, 6, textureEnvOperandRegs);
glUniform1uiv(ubershaderData.textureEnvCombinerLoc, 6, textureEnvCombinerRegs);
glUniform1uiv(ubershaderData.textureEnvColorLoc, 6, textureEnvColourRegs);
glUniform1uiv(ubershaderData.textureEnvScaleLoc, 6, textureEnvScaleRegs);
}
void RendererGL::bindTexturesToSlots() {
@ -357,26 +354,49 @@ void RendererGL::bindTexturesToSlots() {
}
glActiveTexture(GL_TEXTURE0 + 3);
glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
LUTTexture.bind();
glActiveTexture(GL_TEXTURE0);
}
void RendererGL::updateLightingLUT() {
gpu.lightingLUTDirty = false;
std::array<u16, GPU::LightingLutSize> u16_lightinglut;
std::array<float, GPU::LightingLutSize * 2> lightingLut;
for (int i = 0; i < gpu.lightingLUT.size(); i++) {
uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1);
u16_lightinglut[i] = value * 65535 / 4095;
for (int i = 0; i < lightingLut.size(); i += 2) {
uint64_t value = gpu.lightingLUT[i >> 1] & 0xFFF;
lightingLut[i] = (float)(value << 4) / 65535.0f;
}
glActiveTexture(GL_TEXTURE0 + 3);
glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data());
glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
LUTTexture.bind();
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, Lights::LUT_Count, GL_RG, GL_FLOAT, lightingLut.data());
glActiveTexture(GL_TEXTURE0);
}
void RendererGL::updateFogLUT() {
gpu.fogLUTDirty = false;
// Fog LUT elements are of this type:
// 0-12 fixed1.1.11, Difference from next element
// 13-23 fixed0.0.11, Value
// We will store them as a 128x1 RG texture with R being the value and G being the difference
std::array<float, 128 * 2> fogLut;
for (int i = 0; i < fogLut.size(); i += 2) {
const uint32_t value = gpu.fogLUT[i >> 1];
int32_t diff = value & 0x1fff;
diff = (diff << 19) >> 19; // Sign extend the 13-bit value to 32 bits
const float fogDifference = float(diff) / 2048.0f;
const float fogValue = float((value >> 13) & 0x7ff) / 2048.0f;
fogLut[i] = fogValue;
fogLut[i + 1] = fogDifference;
}
glActiveTexture(GL_TEXTURE0 + 3);
LUTTexture.bind();
// The fog LUT exists at the end of the lighting LUT
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, Lights::LUT_Count, 128, 1, GL_RG, GL_FLOAT, fogLut.data());
glActiveTexture(GL_TEXTURE0);
}
@ -389,11 +409,29 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
OpenGL::Triangle,
};
bool usingUbershader = enableUbershader;
if (usingUbershader) {
const bool lightsEnabled = (regs[InternalRegs::LightingEnable] & 1) != 0;
const uint lightCount = (regs[InternalRegs::LightNumber] & 0x7) + 1;
// Emulating lights in the ubershader is incredibly slow, so we've got an option to render draws using moret han N lights via shadergen
// This way we generate fewer shaders overall than with full shadergen, but don't tank performance
if (emulatorConfig->forceShadergenForLights && lightsEnabled && lightCount >= emulatorConfig->lightShadergenThreshold) {
usingUbershader = false;
}
}
if (usingUbershader) {
gl.useProgram(triangleProgram);
} else {
OpenGL::Program& program = getSpecializedShader();
gl.useProgram(program);
}
const auto primitiveTopology = primTypes[static_cast<usize>(primType)];
gl.disableScissor();
gl.bindVBO(vbo);
gl.bindVAO(vao);
gl.useProgram(triangleProgram);
gl.enableClipPlane(0); // Clipping plane 0 is always enabled
if (regs[PICA::InternalRegs::ClipEnable] & 1) {
@ -414,32 +452,38 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
static constexpr std::array<GLenum, 8> depthModes = {GL_NEVER, GL_ALWAYS, GL_EQUAL, GL_NOTEQUAL, GL_LESS, GL_LEQUAL, GL_GREATER, GL_GEQUAL};
const float depthScale = f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32();
const float depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32();
const bool depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1;
// Update ubershader uniforms
if (usingUbershader) {
const float depthScale = f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32();
const float depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32();
const bool depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1;
// Update depth uniforms
if (oldDepthScale != depthScale) {
oldDepthScale = depthScale;
glUniform1f(depthScaleLoc, depthScale);
if (oldDepthScale != depthScale) {
oldDepthScale = depthScale;
glUniform1f(ubershaderData.depthScaleLoc, depthScale);
}
if (oldDepthOffset != depthOffset) {
oldDepthOffset = depthOffset;
glUniform1f(ubershaderData.depthOffsetLoc, depthOffset);
}
if (oldDepthmapEnable != depthMapEnable) {
oldDepthmapEnable = depthMapEnable;
glUniform1i(ubershaderData.depthmapEnableLoc, depthMapEnable);
}
// Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48)
// The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates
glUniform1uiv(ubershaderData.picaRegLoc, 0x200 - 0x48, &regs[0x48]);
setupUbershaderTexEnv();
}
if (oldDepthOffset != depthOffset) {
oldDepthOffset = depthOffset;
glUniform1f(depthOffsetLoc, depthOffset);
}
if (oldDepthmapEnable != depthMapEnable) {
oldDepthmapEnable = depthMapEnable;
glUniform1i(depthmapEnableLoc, depthMapEnable);
}
setupTextureEnvState();
bindTexturesToSlots();
// Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48)
// The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates
glUniform1uiv(picaRegLoc, 0x200 - 0x48, &regs[0x48]);
if (gpu.fogLUTDirty) {
updateFogLUT();
}
if (gpu.lightingLUTDirty) {
updateLightingLUT();
@ -487,7 +531,6 @@ void RendererGL::display() {
gl.disableScissor();
gl.disableBlend();
gl.disableDepth();
gl.disableScissor();
// This will work fine whether or not logic ops are enabled. We set logic op to copy instead of disabling to avoid state changes
gl.setLogicOp(GL_COPY);
gl.setColourMask(true, true, true, true);
@ -616,7 +659,15 @@ OpenGL::Texture RendererGL::getTexture(Texture& tex) {
if (buffer.has_value()) {
return buffer.value().get().texture;
} else {
const auto textureData = std::span{gpu.getPointerPhys<u8>(tex.location), tex.sizeInBytes()}; // Get pointer to the texture data in 3DS memory
const u8* startPointer = gpu.getPointerPhys<u8>(tex.location);
const usize sizeInBytes = tex.sizeInBytes();
if (startPointer == nullptr || (sizeInBytes > 0 && gpu.getPointerPhys<u8>(tex.location + sizeInBytes - 1) == nullptr)) [[unlikely]] {
Helpers::warn("Out-of-bounds texture fetch");
return blankTexture;
}
const auto textureData = std::span{startPointer, tex.sizeInBytes()}; // Get pointer to the texture data in 3DS memory
Texture& newTex = textureCache.add(tex);
newTex.decodeTexture(textureData);
@ -727,7 +778,8 @@ void RendererGL::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32
if (inputWidth != 0) [[likely]] {
copyHeight = (copySize / inputWidth) * 8;
} else {
copyHeight = 0;
Helpers::warn("Zero-width texture copy");
return;
}
// Find the source surface.
@ -778,6 +830,127 @@ std::optional<ColourBuffer> RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt
return colourBufferCache.add(sampleBuffer);
}
OpenGL::Program& RendererGL::getSpecializedShader() {
constexpr uint uboBlockBinding = 2;
PICA::FragmentConfig fsConfig(regs);
CachedProgram& programEntry = shaderCache[fsConfig];
OpenGL::Program& program = programEntry.program;
if (!program.exists()) {
std::string fs = fragShaderGen.generate(fsConfig);
OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment);
program.create({defaultShadergenVs, fragShader});
gl.useProgram(program);
fragShader.free();
// Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3
glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0);
glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1);
glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2);
glUniform1i(OpenGL::uniformLocation(program, "u_tex_luts"), 3);
// Allocate memory for the program UBO
glGenBuffers(1, &programEntry.uboBinding);
gl.bindUBO(programEntry.uboBinding);
glBufferData(GL_UNIFORM_BUFFER, sizeof(PICA::FragmentUniforms), nullptr, GL_DYNAMIC_DRAW);
// Set up the binding for our UBO. Sadly we can't specify it in the shader like normal people,
// As it's an OpenGL 4.2 feature that MacOS doesn't support...
uint uboIndex = glGetUniformBlockIndex(program.handle(), "FragmentUniforms");
glUniformBlockBinding(program.handle(), uboIndex, uboBlockBinding);
}
glBindBufferBase(GL_UNIFORM_BUFFER, uboBlockBinding, programEntry.uboBinding);
// Upload uniform data to our shader's UBO
PICA::FragmentUniforms uniforms;
uniforms.alphaReference = Helpers::getBits<8, 8>(regs[InternalRegs::AlphaTestConfig]);
// Set up the texenv buffer color
const u32 texEnvBufferColor = regs[InternalRegs::TexEnvBufferColor];
uniforms.tevBufferColor[0] = float(texEnvBufferColor & 0xFF) / 255.0f;
uniforms.tevBufferColor[1] = float((texEnvBufferColor >> 8) & 0xFF) / 255.0f;
uniforms.tevBufferColor[2] = float((texEnvBufferColor >> 16) & 0xFF) / 255.0f;
uniforms.tevBufferColor[3] = float((texEnvBufferColor >> 24) & 0xFF) / 255.0f;
uniforms.depthScale = f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32();
uniforms.depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32();
if (regs[InternalRegs::ClipEnable] & 1) {
uniforms.clipCoords[0] = f24::fromRaw(regs[PICA::InternalRegs::ClipData0] & 0xffffff).toFloat32();
uniforms.clipCoords[1] = f24::fromRaw(regs[PICA::InternalRegs::ClipData1] & 0xffffff).toFloat32();
uniforms.clipCoords[2] = f24::fromRaw(regs[PICA::InternalRegs::ClipData2] & 0xffffff).toFloat32();
uniforms.clipCoords[3] = f24::fromRaw(regs[PICA::InternalRegs::ClipData3] & 0xffffff).toFloat32();
}
// Set up the constant color for the 6 TEV stages
for (int i = 0; i < 6; i++) {
static constexpr std::array<u32, 6> ioBases = {
PICA::InternalRegs::TexEnv0Source, PICA::InternalRegs::TexEnv1Source, PICA::InternalRegs::TexEnv2Source,
PICA::InternalRegs::TexEnv3Source, PICA::InternalRegs::TexEnv4Source, PICA::InternalRegs::TexEnv5Source,
};
auto& vec = uniforms.constantColors[i];
u32 base = ioBases[i];
u32 color = regs[base + 3];
vec[0] = float(color & 0xFF) / 255.0f;
vec[1] = float((color >> 8) & 0xFF) / 255.0f;
vec[2] = float((color >> 16) & 0xFF) / 255.0f;
vec[3] = float((color >> 24) & 0xFF) / 255.0f;
}
// Append lighting uniforms
if (fsConfig.lighting.enable) {
uniforms.globalAmbientLight = regs[InternalRegs::LightGlobalAmbient];
for (int i = 0; i < 8; i++) {
auto& light = uniforms.lightUniforms[i];
const u32 specular0 = regs[InternalRegs::Light0Specular0 + i * 0x10];
const u32 specular1 = regs[InternalRegs::Light0Specular1 + i * 0x10];
const u32 diffuse = regs[InternalRegs::Light0Diffuse + i * 0x10];
const u32 ambient = regs[InternalRegs::Light0Ambient + i * 0x10];
const u32 lightXY = regs[InternalRegs::Light0XY + i * 0x10];
const u32 lightZ = regs[InternalRegs::Light0Z + i * 0x10];
const u32 spotlightXY = regs[InternalRegs::Light0SpotlightXY + i * 0x10];
const u32 spotlightZ = regs[InternalRegs::Light0SpotlightZ + i * 0x10];
const u32 attenuationBias = regs[InternalRegs::Light0AttenuationBias + i * 0x10];
const u32 attenuationScale = regs[InternalRegs::Light0AttenuationScale + i * 0x10];
#define lightColorToVec3(value) \
{ \
float(Helpers::getBits<20, 8>(value)) / 255.0f, \
float(Helpers::getBits<10, 8>(value)) / 255.0f, \
float(Helpers::getBits<0, 8>(value)) / 255.0f, \
}
light.specular0 = lightColorToVec3(specular0);
light.specular1 = lightColorToVec3(specular1);
light.diffuse = lightColorToVec3(diffuse);
light.ambient = lightColorToVec3(ambient);
light.position[0] = Floats::f16::fromRaw(u16(lightXY)).toFloat32();
light.position[1] = Floats::f16::fromRaw(u16(lightXY >> 16)).toFloat32();
light.position[2] = Floats::f16::fromRaw(u16(lightZ)).toFloat32();
// Fixed point 1.11.1 to float, without negation
light.spotlightDirection[0] = float(s32(spotlightXY & 0x1FFF) << 19 >> 19) / 2047.0;
light.spotlightDirection[1] = float(s32((spotlightXY >> 16) & 0x1FFF) << 19 >> 19) / 2047.0;
light.spotlightDirection[2] = float(s32(spotlightZ & 0x1FFF) << 19 >> 19) / 2047.0;
light.distanceAttenuationBias = Floats::f20::fromRaw(attenuationBias & 0xFFFFF).toFloat32();
light.distanceAttenuationScale = Floats::f20::fromRaw(attenuationScale & 0xFFFFF).toFloat32();
#undef lightColorToVec3
}
}
gl.bindUBO(programEntry.uboBinding);
glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(PICA::FragmentUniforms), &uniforms);
return program;
}
void RendererGL::screenshot(const std::string& name) {
constexpr uint width = 400;
constexpr uint height = 2 * 240;
@ -803,13 +976,67 @@ void RendererGL::screenshot(const std::string& name) {
stbi_write_png(name.c_str(), width, height, 4, flippedPixels.data(), 0);
}
void RendererGL::clearShaderCache() {
for (auto& shader : shaderCache) {
CachedProgram& cachedProgram = shader.second;
cachedProgram.program.free();
glDeleteBuffers(1, &cachedProgram.uboBinding);
}
shaderCache.clear();
}
void RendererGL::deinitGraphicsContext() {
// Invalidate all surface caches since they'll no longer be valid
textureCache.reset();
depthBufferCache.reset();
colourBufferCache.reset();
clearShaderCache();
// All other GL objects should be invalidated automatically and be recreated by the next call to initGraphicsContext
// TODO: Make it so that depth and colour buffers get written back to 3DS memory
printf("RendererGL::DeinitGraphicsContext called\n");
}
}
std::string RendererGL::getUbershader() {
auto gl_resources = cmrc::RendererGL::get_filesystem();
auto fragmentShader = gl_resources.open("opengl_fragment_shader.frag");
return std::string(fragmentShader.begin(), fragmentShader.end());
}
void RendererGL::setUbershader(const std::string& shader) {
auto gl_resources = cmrc::RendererGL::get_filesystem();
auto vertexShaderSource = gl_resources.open("opengl_vertex_shader.vert");
OpenGL::Shader vert({vertexShaderSource.begin(), vertexShaderSource.size()}, OpenGL::Vertex);
OpenGL::Shader frag(shader, OpenGL::Fragment);
triangleProgram.create({vert, frag});
initUbershader(triangleProgram);
glUniform1f(ubershaderData.depthScaleLoc, oldDepthScale);
glUniform1f(ubershaderData.depthOffsetLoc, oldDepthOffset);
glUniform1i(ubershaderData.depthmapEnableLoc, oldDepthmapEnable);
}
void RendererGL::initUbershader(OpenGL::Program& program) {
gl.useProgram(program);
ubershaderData.textureEnvSourceLoc = OpenGL::uniformLocation(program, "u_textureEnvSource");
ubershaderData.textureEnvOperandLoc = OpenGL::uniformLocation(program, "u_textureEnvOperand");
ubershaderData.textureEnvCombinerLoc = OpenGL::uniformLocation(program, "u_textureEnvCombiner");
ubershaderData.textureEnvColorLoc = OpenGL::uniformLocation(program, "u_textureEnvColor");
ubershaderData.textureEnvScaleLoc = OpenGL::uniformLocation(program, "u_textureEnvScale");
ubershaderData.depthScaleLoc = OpenGL::uniformLocation(program, "u_depthScale");
ubershaderData.depthOffsetLoc = OpenGL::uniformLocation(program, "u_depthOffset");
ubershaderData.depthmapEnableLoc = OpenGL::uniformLocation(program, "u_depthmapEnable");
ubershaderData.picaRegLoc = OpenGL::uniformLocation(program, "u_picaRegs");
// Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2 and the LUTs go in TU 3
glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0);
glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1);
glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2);
glUniform1i(OpenGL::uniformLocation(program, "u_tex_luts"), 3);
}

View file

@ -61,6 +61,7 @@ void Y2RService::reset() {
inputLineWidth = 420;
conversionCoefficients.fill(0);
isBusy = false;
}
void Y2RService::handleSyncRequest(u32 messagePointer) {
@ -156,6 +157,11 @@ void Y2RService::setTransferEndInterrupt(u32 messagePointer) {
void Y2RService::stopConversion(u32 messagePointer) {
log("Y2R::StopConversion\n");
if (isBusy) {
isBusy = false;
kernel.getScheduler().removeEvent(Scheduler::EventType::SignalY2R);
}
mem.write32(messagePointer, IPC::responseHeader(0x27, 1, 0));
mem.write32(messagePointer + 4, Result::Success);
}
@ -167,7 +173,7 @@ void Y2RService::isBusyConversion(u32 messagePointer) {
mem.write32(messagePointer, IPC::responseHeader(0x28, 2, 0));
mem.write32(messagePointer + 4, Result::Success);
mem.write32(messagePointer + 8, static_cast<u32>(BusyStatus::NotBusy));
mem.write32(messagePointer + 8, static_cast<u32>(isBusy ? BusyStatus::Busy : BusyStatus::NotBusy));
}
void Y2RService::setBlockAlignment(u32 messagePointer) {
@ -434,11 +440,15 @@ void Y2RService::startConversion(u32 messagePointer) {
mem.write32(messagePointer, IPC::responseHeader(0x26, 1, 0));
mem.write32(messagePointer + 4, Result::Success);
// Make Y2R conversion end instantly.
// Signal the transfer end event if it's been created. TODO: Is this affected by SetTransferEndInterrupt?
if (transferEndEvent.has_value()) {
kernel.signalEvent(transferEndEvent.value());
}
// Schedule Y2R conversion end event.
// The tick value is tweaked based on the minimum delay needed to get FIFA 15 to not hang due to a race condition on its title screen
static constexpr u64 delayTicks = 1'350'000;
isBusy = true;
// Remove any potential pending Y2R event and schedule a new one
Scheduler& scheduler = kernel.getScheduler();
scheduler.removeEvent(Scheduler::EventType::SignalY2R);
scheduler.addEvent(Scheduler::EventType::SignalY2R, scheduler.currentTimestamp + delayTicks);
}
void Y2RService::isFinishedSendingYUV(u32 messagePointer) {
@ -484,4 +494,15 @@ void Y2RService::isFinishedReceiving(u32 messagePointer) {
mem.write32(messagePointer, IPC::responseHeader(0x17, 2, 0));
mem.write32(messagePointer + 4, Result::Success);
mem.write32(messagePointer + 8, finished ? 1 : 0);
}
void Y2RService::signalConversionDone() {
if (isBusy) {
isBusy = false;
// Signal the transfer end event if it's been created. TODO: Is this affected by SetTransferEndInterrupt?
if (transferEndEvent.has_value()) {
kernel.signalEvent(transferEndEvent.value());
}
}
}

View file

@ -84,6 +84,7 @@ void Emulator::reset(ReloadOption reload) {
}
}
#ifndef __LIBRETRO__
std::filesystem::path Emulator::getAndroidAppPath() {
// SDL_GetPrefPath fails to get the path due to no JNI environment
std::ifstream cmdline("/proc/self/cmdline");
@ -100,6 +101,7 @@ std::filesystem::path Emulator::getConfigPath() {
return std::filesystem::current_path() / "config.toml";
}
}
#endif
void Emulator::step() {}
void Emulator::render() {}
@ -169,6 +171,8 @@ void Emulator::pollScheduler() {
break;
}
case Scheduler::EventType::SignalY2R: kernel.getServiceManager().getY2R().signalConversionDone(); break;
default: {
Helpers::panic("Scheduler: Unimplemented event type received: %d\n", static_cast<int>(eventType));
break;
@ -177,6 +181,7 @@ void Emulator::pollScheduler() {
}
}
#ifndef __LIBRETRO__
// Get path for saving files (AppData on Windows, /home/user/.local/share/ApplicationName on Linux, etc)
// Inside that path, we be use a game-specific folder as well. Eg if we were loading a ROM called PenguinDemo.3ds, the savedata would be in
// %APPDATA%/Alber/PenguinDemo/SaveData on Windows, and so on. We do this because games save data in their own filesystem on the cart.
@ -200,6 +205,7 @@ std::filesystem::path Emulator::getAppDataRoot() {
return appDataPath;
}
#endif
bool Emulator::loadROM(const std::filesystem::path& path) {
// Reset the emulator if we've already loaded a ROM

View file

@ -1,8 +1,6 @@
#version 410 core
in vec3 v_tangent;
in vec3 v_normal;
in vec3 v_bitangent;
in vec4 v_quaternion;
in vec4 v_colour;
in vec3 v_texcoord0;
in vec2 v_texcoord1;
@ -27,7 +25,7 @@ uniform bool u_depthmapEnable;
uniform sampler2D u_tex0;
uniform sampler2D u_tex1;
uniform sampler2D u_tex2;
uniform sampler1DArray u_tex_lighting_lut;
uniform sampler2D u_tex_luts;
uniform uint u_picaRegs[0x200 - 0x48];
@ -37,6 +35,16 @@ uint readPicaReg(uint reg_addr) { return u_picaRegs[reg_addr - 0x48u]; }
vec4 tevSources[16];
vec4 tevNextPreviousBuffer;
bool tevUnimplementedSourceFlag = false;
vec3 normal;
// See docs/lighting.md
const uint samplerEnabledBitfields[2] = uint[2](0x7170e645u, 0x7f013fefu);
bool isSamplerEnabled(uint environment_id, uint lut_id) {
uint index = 7 * environment_id + lut_id;
uint arrayIndex = (index >> 5);
return (samplerEnabledBitfields[arrayIndex] & (1u << (index & 31u))) != 0u;
}
// OpenGL ES 1.1 reference pages for TEVs (this is what the PICA200 implements):
// https://registry.khronos.org/OpenGL-Refpages/es1.1/xhtml/glTexEnv.xml
@ -110,7 +118,7 @@ vec4 tevCalculateCombiner(int tev_id) {
case 6u: result.rgb = vec3(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGB
case 7u: result = vec4(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGBA
case 8u: result.rgb = min(source0.rgb * source1.rgb + source2.rgb, 1.0); break; // Multiply then add
case 9u: result.rgb = min((source0.rgb + source1.rgb) * source2.rgb, 1.0); break; // Add then multiply
case 9u: result.rgb = min(source0.rgb + source1.rgb, 1.0) * source2.rgb; break; // Add then multiply
default: break;
}
@ -125,7 +133,7 @@ vec4 tevCalculateCombiner(int tev_id) {
case 4u: result.a = mix(source1.a, source0.a, source2.a); break; // Interpolate
case 5u: result.a = max(0.0, source0.a - source1.a); break; // Subtract
case 8u: result.a = min(1.0, source0.a * source1.a + source2.a); break; // Multiply then add
case 9u: result.a = min(1.0, (source0.a + source1.a) * source2.a); break; // Add then multiply
case 9u: result.a = min(source0.a + source1.a, 1.0) * source2.a; break; // Add then multiply
default: break;
}
}
@ -144,10 +152,18 @@ vec4 tevCalculateCombiner(int tev_id) {
#define RG_LUT 5u
#define RR_LUT 6u
float lutLookup(uint lut, uint light, float value) {
if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1;
if (lut == SP_LUT) lut = light + 8;
return texture(u_tex_lighting_lut, vec2(value, lut)).r;
#define FOG_INDEX 24
uint GPUREG_LIGHTi_CONFIG;
uint GPUREG_LIGHTING_CONFIG1;
uint GPUREG_LIGHTING_LUTINPUT_SELECT;
uint GPUREG_LIGHTING_LUTINPUT_SCALE;
uint GPUREG_LIGHTING_LUTINPUT_ABS;
bool error_unimpl = false;
vec4 unimpl_color = vec4(1.0, 0.0, 1.0, 1.0);
float lutLookup(uint lut, int index) {
return texelFetch(u_tex_luts, ivec2(index, int(lut)), 0).r;
}
vec3 regToColor(uint reg) {
@ -178,136 +194,179 @@ float decodeFP(uint hex, uint E, uint M) {
return uintBitsToFloat(hex);
}
float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light_vector, vec3 half_vector) {
uint lut_index;
int bit_in_config1;
if (lut_id == SP_LUT) {
// These are the spotlight attenuation LUTs
bit_in_config1 = 8 + int(light_id & 7u);
lut_index = 8u + light_id;
} else if (lut_id <= 6) {
bit_in_config1 = 16 + int(lut_id);
lut_index = lut_id;
} else {
error_unimpl = true;
}
bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment
if (!current_sampler_enabled || (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) {
return 1.0;
}
uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3);
float scale = float(1u << scale_id);
if (scale_id >= 6u) scale /= 256.0;
float delta = 1.0;
uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3);
switch (input_id) {
case 0u: {
delta = dot(normal, normalize(half_vector));
break;
}
case 1u: {
delta = dot(normalize(v_view), normalize(half_vector));
break;
}
case 2u: {
delta = dot(normal, normalize(v_view));
break;
}
case 3u: {
delta = dot(light_vector, normal);
break;
}
case 4u: {
int GPUREG_LIGHTi_SPOTDIR_LOW = int(readPicaReg(0x0146u + (light_id << 4u)));
int GPUREG_LIGHTi_SPOTDIR_HIGH = int(readPicaReg(0x0147u + (light_id << 4u)));
// Sign extend them. Normally bitfieldExtract would do that but it's missing on some versions
// of GLSL so we do it manually
int se_x = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13);
int se_y = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13);
int se_z = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13);
if ((se_x & 0x1000) == 0x1000) se_x |= 0xffffe000;
if ((se_y & 0x1000) == 0x1000) se_y |= 0xffffe000;
if ((se_z & 0x1000) == 0x1000) se_z |= 0xffffe000;
// These are fixed point 1.1.11 values, so we need to convert them to float
float x = float(se_x) / 2047.0;
float y = float(se_y) / 2047.0;
float z = float(se_z) / 2047.0;
vec3 spotlight_vector = vec3(x, y, z);
delta = dot(light_vector, spotlight_vector); // spotlight direction is negated so we don't negate light_vector
break;
}
case 5u: {
delta = 1.0; // TODO: cos <greek symbol> (aka CP);
error_unimpl = true;
break;
}
default: {
delta = 1.0;
error_unimpl = true;
break;
}
}
// 0 = enabled
if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) {
// Two sided diffuse
if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) {
delta = max(delta, 0.0);
} else {
delta = abs(delta);
}
int index = int(clamp(floor(delta * 255.0), 0.f, 255.f));
return lutLookup(lut_index, index) * scale;
} else {
// Range is [-1, 1] so we need to map it to [0, 1]
int index = int(clamp(floor(delta * 128.0), -128.f, 127.f));
if (index < 0) index += 256;
return lutLookup(lut_index, index) * scale;
}
}
vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) {
vec3 u = q.xyz;
float s = q.w;
return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v);
}
// Implements the following algorthm: https://mathb.in/26766
void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
// Quaternions describe a transformation from surface-local space to eye space.
// In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1),
// the tangent vector is (1,0,0), and the bitangent vector is (0,1,0).
vec3 normal = normalize(v_normal);
vec3 tangent = normalize(v_tangent);
vec3 bitangent = normalize(v_bitangent);
vec3 view = normalize(v_view);
uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008Fu);
if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) {
primary_color = secondary_color = vec4(1.0);
primary_color = secondary_color = vec4(0.0);
return;
}
uint GPUREG_LIGHTING_AMBIENT = readPicaReg(0x01C0u);
uint GPUREG_LIGHTING_NUM_LIGHTS = (readPicaReg(0x01C2u) & 0x7u) + 1u;
uint GPUREG_LIGHTING_LIGHT_PERMUTATION = readPicaReg(0x01D9u);
primary_color = vec4(vec3(0.0), 1.0);
secondary_color = vec4(vec3(0.0), 1.0);
primary_color.rgb += regToColor(GPUREG_LIGHTING_AMBIENT);
uint GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u);
uint GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u);
uint GPUREG_LIGHTING_CONFIG0 = readPicaReg(0x01C3u);
uint GPUREG_LIGHTING_CONFIG1 = readPicaReg(0x01C4u);
uint GPUREG_LIGHTING_LUTINPUT_SCALE = readPicaReg(0x01D2u);
float d[7];
GPUREG_LIGHTING_LUTINPUT_SCALE = readPicaReg(0x01D2u);
GPUREG_LIGHTING_CONFIG1 = readPicaReg(0x01C4u);
GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u);
GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u);
bool error_unimpl = false;
uint bump_mode = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 28, 2);
// Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker
switch (bump_mode) {
default: {
normal = rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), v_quaternion);
break;
}
}
vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);
vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);
uint environment_id = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 4, 4);
bool clamp_highlights = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u;
uint light_id;
vec3 light_vector;
vec3 half_vector;
for (uint i = 0u; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) {
uint light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3);
light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3);
uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + 0x10u * light_id);
uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + 0x10u * light_id);
uint GPUREG_LIGHTi_DIFFUSE = readPicaReg(0x0142u + 0x10u * light_id);
uint GPUREG_LIGHTi_AMBIENT = readPicaReg(0x0143u + 0x10u * light_id);
uint GPUREG_LIGHTi_VECTOR_LOW = readPicaReg(0x0144u + 0x10u * light_id);
uint GPUREG_LIGHTi_VECTOR_HIGH = readPicaReg(0x0145u + 0x10u * light_id);
uint GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + 0x10u * light_id);
uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + (light_id << 4u));
uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + (light_id << 4u));
uint GPUREG_LIGHTi_DIFFUSE = readPicaReg(0x0142u + (light_id << 4u));
uint GPUREG_LIGHTi_AMBIENT = readPicaReg(0x0143u + (light_id << 4u));
uint GPUREG_LIGHTi_VECTOR_LOW = readPicaReg(0x0144u + (light_id << 4u));
uint GPUREG_LIGHTi_VECTOR_HIGH = readPicaReg(0x0145u + (light_id << 4u));
GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + (light_id << 4u));
vec3 light_vector = normalize(vec3(
float light_distance;
vec3 light_position = vec3(
decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u),
decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u)
));
vec3 half_vector;
);
// Positional Light
if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) {
// error_unimpl = true;
half_vector = normalize(normalize(light_vector + v_view) + view);
light_vector = light_position + v_view;
}
// Directional light
else {
half_vector = normalize(normalize(light_vector) + view);
light_vector = light_position;
}
for (int c = 0; c < 7; c++) {
if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0u) {
uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3);
float scale = float(1u << scale_id);
if (scale_id >= 6u) scale /= 256.0;
light_distance = length(light_vector);
light_vector = normalize(light_vector);
half_vector = light_vector + normalize(v_view);
uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3);
if (input_id == 0u)
d[c] = dot(normal, half_vector);
else if (input_id == 1u)
d[c] = dot(view, half_vector);
else if (input_id == 2u)
d[c] = dot(normal, view);
else if (input_id == 3u)
d[c] = dot(light_vector, normal);
else if (input_id == 4u) {
uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146u + 0x10u * light_id);
uint GPUREG_LIGHTi_SPOTDIR_HIGH = readPicaReg(0x0147u + 0x10u * light_id);
vec3 spot_light_vector = normalize(vec3(
decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1u, 11u),
decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1u, 11u),
decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1u, 11u)
));
d[c] = dot(-light_vector, spot_light_vector); // -L dot P (aka Spotlight aka SP);
} else if (input_id == 5u) {
d[c] = 1.0; // TODO: cos <greek symbol> (aka CP);
error_unimpl = true;
} else {
d[c] = 1.0;
}
d[c] = lutLookup(uint(c), light_id, d[c] * 0.5 + 0.5) * scale;
if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) d[c] = abs(d[c]);
} else {
d[c] = 1.0;
}
}
uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 4, 4);
if (lookup_config == 0u) {
d[D1_LUT] = 0.0;
d[FR_LUT] = 0.0;
d[RG_LUT] = d[RB_LUT] = d[RR_LUT];
} else if (lookup_config == 1u) {
d[D0_LUT] = 0.0;
d[D1_LUT] = 0.0;
d[RG_LUT] = d[RB_LUT] = d[RR_LUT];
} else if (lookup_config == 2u) {
d[FR_LUT] = 0.0;
d[SP_LUT] = 0.0;
d[RG_LUT] = d[RB_LUT] = d[RR_LUT];
} else if (lookup_config == 3u) {
d[SP_LUT] = 0.0;
d[RG_LUT] = d[RB_LUT] = d[RR_LUT] = 1.0;
} else if (lookup_config == 4u) {
d[FR_LUT] = 0.0;
} else if (lookup_config == 5u) {
d[D1_LUT] = 0.0;
} else if (lookup_config == 6u) {
d[RG_LUT] = d[RB_LUT] = d[RR_LUT];
}
float distance_factor = 1.0; // a
float indirect_factor = 1.0; // fi
float shadow_factor = 1.0; // o
float NdotL = dot(normal, light_vector); // Li dot N
float NdotL = dot(normal, light_vector); // N dot Li
// Two sided diffuse
if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u)
@ -315,20 +374,86 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
else
NdotL = abs(NdotL);
float light_factor = distance_factor * d[SP_LUT] * indirect_factor * shadow_factor;
float geometric_factor;
bool use_geo_0 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 2, 1) == 1u;
bool use_geo_1 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 3, 1) == 1u;
if (use_geo_0 || use_geo_1) {
geometric_factor = dot(half_vector, half_vector);
geometric_factor = geometric_factor == 0.0 ? 0.0 : min(NdotL / geometric_factor, 1.0);
}
primary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE) * NdotL);
secondary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_SPECULAR0) * d[D0_LUT] +
regToColor(GPUREG_LIGHTi_SPECULAR1) * d[D1_LUT] * vec3(d[RR_LUT], d[RG_LUT], d[RB_LUT]));
float distance_attenuation = 1.0;
if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) {
uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtract(readPicaReg(0x014Au + (light_id << 4u)), 0, 20);
uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtract(readPicaReg(0x014Bu + (light_id << 4u)), 0, 20);
float distance_attenuation_bias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u);
float distance_attenuation_scale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u);
float delta = light_distance * distance_attenuation_scale + distance_attenuation_bias;
delta = clamp(delta, 0.0, 1.0);
int index = int(clamp(floor(delta * 255.0), 0.0, 255.0));
distance_attenuation = lutLookup(16u + light_id, index);
}
float spotlight_attenuation = lightLutLookup(environment_id, SP_LUT, light_id, light_vector, half_vector);
float specular0_distribution = lightLutLookup(environment_id, D0_LUT, light_id, light_vector, half_vector);
float specular1_distribution = lightLutLookup(environment_id, D1_LUT, light_id, light_vector, half_vector);
vec3 reflected_color;
reflected_color.r = lightLutLookup(environment_id, RR_LUT, light_id, light_vector, half_vector);
if (isSamplerEnabled(environment_id, RG_LUT)) {
reflected_color.g = lightLutLookup(environment_id, RG_LUT, light_id, light_vector, half_vector);
} else {
reflected_color.g = reflected_color.r;
}
if (isSamplerEnabled(environment_id, RB_LUT)) {
reflected_color.b = lightLutLookup(environment_id, RB_LUT, light_id, light_vector, half_vector);
} else {
reflected_color.b = reflected_color.r;
}
vec3 specular0 = regToColor(GPUREG_LIGHTi_SPECULAR0) * specular0_distribution;
vec3 specular1 = regToColor(GPUREG_LIGHTi_SPECULAR1) * specular1_distribution * reflected_color;
specular0 *= use_geo_0 ? geometric_factor : 1.0;
specular1 *= use_geo_1 ? geometric_factor : 1.0;
float clamp_factor = 1.0;
if (clamp_highlights && NdotL == 0.0) {
clamp_factor = 0.0;
}
float light_factor = distance_attenuation * spotlight_attenuation;
diffuse_sum.rgb += light_factor * (regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE) * NdotL);
specular_sum.rgb += light_factor * clamp_factor * (specular0 + specular1);
}
uint fresnel_output1 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 2, 1);
uint fresnel_output2 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 3, 1);
// Uses parameters from the last light as Fresnel is only applied to the last light
float fresnel_factor;
if (fresnel_output1 == 1u || fresnel_output2 == 1u) {
fresnel_factor = lightLutLookup(environment_id, FR_LUT, light_id, light_vector, half_vector);
}
if (fresnel_output1 == 1u) {
diffuse_sum.a = fresnel_factor;
}
if (fresnel_output1 == 1u) primary_color.a = d[FR_LUT];
if (fresnel_output2 == 1u) secondary_color.a = d[FR_LUT];
if (fresnel_output2 == 1u) {
specular_sum.a = fresnel_factor;
}
uint GPUREG_LIGHTING_AMBIENT = readPicaReg(0x01C0u);
vec4 global_ambient = vec4(regToColor(GPUREG_LIGHTING_AMBIENT), 1.0);
primary_color = clamp(global_ambient + diffuse_sum, vec4(0.0), vec4(1.0));
secondary_color = clamp(specular_sum, vec4(0.0), vec4(1.0));
if (error_unimpl) {
// secondary_color = primary_color = vec4(1.0, 0., 1.0, 1.0);
// secondary_color = primary_color = unimpl_color;
}
}
@ -371,7 +496,7 @@ void main() {
if (tevUnimplementedSourceFlag) {
// fragColour = vec4(1.0, 0.0, 1.0, 1.0);
}
// fragColour.rg = texture(u_tex_lighting_lut,vec2(gl_FragCoord.x/200.,float(int(gl_FragCoord.y/2)%24))).rr;
// fragColour.rg = texture(u_tex_luts,vec2(gl_FragCoord.x/200.,float(int(gl_FragCoord.y/2)%24))).rr;
// Get original depth value by converting from [near, far] = [0, 1] to [-1, 1]
// We do this by converting to [0, 2] first and subtracting 1 to go to [-1, 1]
@ -384,6 +509,28 @@ void main() {
// Write final fragment depth
gl_FragDepth = depth;
bool enable_fog = (textureEnvUpdateBuffer & 7u) == 5u;
if (enable_fog) {
bool flip_depth = (textureEnvUpdateBuffer & (1u << 16)) != 0u;
float fog_index = flip_depth ? 1.0 - depth : depth;
fog_index *= 128.0;
float clamped_index = clamp(floor(fog_index), 0.0, 127.0);
float delta = fog_index - clamped_index;
vec2 value = texelFetch(u_tex_luts, ivec2(int(clamped_index), FOG_INDEX), 0).rg;
float fog_factor = clamp(value.r + value.g * delta, 0.0, 1.0);
uint GPUREG_FOG_COLOR = readPicaReg(0x00E1u);
// Annoyingly color is not encoded in the same way as light color
float r = (GPUREG_FOG_COLOR & 0xFFu) / 255.0;
float g = ((GPUREG_FOG_COLOR >> 8) & 0xFFu) / 255.0;
float b = ((GPUREG_FOG_COLOR >> 16) & 0xFFu) / 255.0;
vec3 fog_color = vec3(r, g, b);
fragColour.rgb = mix(fog_color, fragColour.rgb, fog_factor);
}
// Perform alpha test
uint alphaControl = readPicaReg(0x104u);
if ((alphaControl & 1u) != 0u) { // Check if alpha test is on
@ -414,4 +561,4 @@ void main() {
break;
}
}
}
}

View file

@ -9,9 +9,7 @@ layout(location = 5) in float a_texcoord0_w;
layout(location = 6) in vec3 a_view;
layout(location = 7) in vec2 a_texcoord2;
out vec3 v_normal;
out vec3 v_tangent;
out vec3 v_bitangent;
out vec4 v_quaternion;
out vec4 v_colour;
out vec3 v_texcoord0;
out vec2 v_texcoord1;
@ -35,12 +33,6 @@ vec4 abgr8888ToVec4(uint abgr) {
return scale * vec4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24));
}
vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) {
vec3 u = q.xyz;
float s = q.w;
return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v);
}
// Convert an arbitrary-width floating point literal to an f32
float decodeFP(uint hex, uint E, uint M) {
uint width = M + E + 1u;
@ -73,10 +65,6 @@ void main() {
v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y);
v_view = a_view;
v_normal = normalize(rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), a_quaternion));
v_tangent = normalize(rotateVec3ByQuaternion(vec3(1.0, 0.0, 0.0), a_quaternion));
v_bitangent = normalize(rotateVec3ByQuaternion(vec3(0.0, 1.0, 0.0), a_quaternion));
for (int i = 0; i < 6; i++) {
v_textureEnvColor[i] = abgr8888ToVec4(u_textureEnvColor[i]);
}
@ -95,4 +83,6 @@ void main() {
// There's also another, always-on clipping plane based on vertex z
gl_ClipDistance[0] = -a_coords.z;
gl_ClipDistance[1] = dot(clipData, a_coords);
v_quaternion = a_quaternion;
}

385
src/libretro_core.cpp Normal file
View file

@ -0,0 +1,385 @@
#include <stdexcept>
#include <cstdio>
#include <libretro.h>
#include <emulator.hpp>
#include <renderer_gl/renderer_gl.hpp>
static retro_environment_t envCallbacks;
static retro_video_refresh_t videoCallbacks;
static retro_audio_sample_batch_t audioBatchCallback;
static retro_input_poll_t inputPollCallback;
static retro_input_state_t inputStateCallback;
static retro_hw_render_callback hw_render;
static std::filesystem::path savePath;
static bool screenTouched;
std::unique_ptr<Emulator> emulator;
RendererGL* renderer;
std::filesystem::path Emulator::getConfigPath() {
return std::filesystem::path(savePath / "config.toml");
}
std::filesystem::path Emulator::getAppDataRoot() {
return std::filesystem::path(savePath / "Emulator Files");
}
static void* GetGLProcAddress(const char* name) {
return (void*)hw_render.get_proc_address(name);
}
static void VideoResetContext() {
#ifdef USING_GLES
if (!gladLoadGLES2Loader(reinterpret_cast<GLADloadproc>(GetGLProcAddress))) {
Helpers::panic("OpenGL ES init failed");
}
#else
if (!gladLoadGLLoader(reinterpret_cast<GLADloadproc>(GetGLProcAddress))) {
Helpers::panic("OpenGL init failed");
}
#endif
emulator->initGraphicsContext(nullptr);
}
static void VideoDestroyContext() {
emulator->deinitGraphicsContext();
}
static bool SetHWRender(retro_hw_context_type type) {
hw_render.context_type = type;
hw_render.context_reset = VideoResetContext;
hw_render.context_destroy = VideoDestroyContext;
hw_render.bottom_left_origin = true;
switch (type) {
case RETRO_HW_CONTEXT_OPENGL_CORE:
hw_render.version_major = 4;
hw_render.version_minor = 1;
if (envCallbacks(RETRO_ENVIRONMENT_SET_HW_RENDER, &hw_render)) {
return true;
}
break;
case RETRO_HW_CONTEXT_OPENGLES3:
case RETRO_HW_CONTEXT_OPENGL:
hw_render.version_major = 3;
hw_render.version_minor = 1;
if (envCallbacks(RETRO_ENVIRONMENT_SET_HW_RENDER, &hw_render)) {
return true;
}
break;
default: break;
}
return false;
}
static void videoInit() {
retro_hw_context_type preferred = RETRO_HW_CONTEXT_NONE;
envCallbacks(RETRO_ENVIRONMENT_GET_PREFERRED_HW_RENDER, &preferred);
if (preferred && SetHWRender(preferred)) return;
if (SetHWRender(RETRO_HW_CONTEXT_OPENGL_CORE)) return;
if (SetHWRender(RETRO_HW_CONTEXT_OPENGL)) return;
if (SetHWRender(RETRO_HW_CONTEXT_OPENGLES3)) return;
hw_render.context_type = RETRO_HW_CONTEXT_NONE;
}
static bool GetButtonState(uint id) { return inputStateCallback(0, RETRO_DEVICE_JOYPAD, 0, id); }
static float GetAxisState(uint index, uint id) { return inputStateCallback(0, RETRO_DEVICE_ANALOG, index, id); }
static void inputInit() {
static const retro_controller_description controllers[] = {
{"Nintendo 3DS", RETRO_DEVICE_JOYPAD},
{NULL, 0},
};
static const retro_controller_info ports[] = {
{controllers, 1},
{NULL, 0},
};
envCallbacks(RETRO_ENVIRONMENT_SET_CONTROLLER_INFO, (void*)ports);
retro_input_descriptor desc[] = {
{0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_LEFT, "Left"},
{0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_UP, "Up"},
{0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_DOWN, "Down"},
{0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_RIGHT, "Right"},
{0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_A, "A"},
{0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_B, "B"},
{0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_SELECT, "Select"},
{0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_START, "Start"},
{0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R, "R"},
{0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L, "L"},
{0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_X, "X"},
{0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_Y, "Y"},
{0, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_X, "Circle Pad X"},
{0, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_Y, "Circle Pad Y"},
{0},
};
envCallbacks(RETRO_ENVIRONMENT_SET_INPUT_DESCRIPTORS, &desc);
}
static std::string FetchVariable(std::string key, std::string def) {
retro_variable var = {nullptr};
var.key = key.c_str();
if (!envCallbacks(RETRO_ENVIRONMENT_GET_VARIABLE, &var) || var.value == nullptr) {
Helpers::warn("Fetching variable %s failed.", key.c_str());
return def;
}
return std::string(var.value);
}
static bool FetchVariableBool(std::string key, bool def) {
return FetchVariable(key, def ? "enabled" : "disabled") == "enabled";
}
static void configInit() {
static const retro_variable values[] = {
{"panda3ds_use_shader_jit", "Enable shader JIT; enabled|disabled"},
{"panda3ds_accurate_shader_mul", "Enable accurate shader multiplication; disabled|enabled"},
{"panda3ds_use_ubershader", EmulatorConfig::ubershaderDefault ? "Use ubershaders (No stutter, maybe slower); enabled|disabled"
: "Use ubershaders (No stutter, maybe slower); disabled|enabled"},
{"panda3ds_use_vsync", "Enable VSync; enabled|disabled"},
{"panda3ds_dsp_emulation", "DSP emulation; Null|HLE|LLE"},
{"panda3ds_use_audio", "Enable audio; disabled|enabled"},
{"panda3ds_use_virtual_sd", "Enable virtual SD card; enabled|disabled"},
{"panda3ds_write_protect_virtual_sd", "Write protect virtual SD card; disabled|enabled"},
{"panda3ds_battery_level", "Battery percentage; 5|10|20|30|50|70|90|100"},
{"panda3ds_use_charger", "Charger plugged; enabled|disabled"},
{"panda3ds_ubershader_lighting_override", "Force shadergen when rendering lights; enabled|disabled"},
{"panda3ds_ubershader_lighting_override_threshold", "Light threshold for forcing shadergen; 1|2|3|4|5|6|7|8"},
{nullptr, nullptr},
};
envCallbacks(RETRO_ENVIRONMENT_SET_VARIABLES, (void*)values);
}
static void configUpdate() {
EmulatorConfig& config = emulator->getConfig();
config.rendererType = RendererType::OpenGL;
config.vsyncEnabled = FetchVariableBool("panda3ds_use_vsync", true);
config.shaderJitEnabled = FetchVariableBool("panda3ds_use_shader_jit", true);
config.chargerPlugged = FetchVariableBool("panda3ds_use_charger", true);
config.batteryPercentage = std::clamp(std::stoi(FetchVariable("panda3ds_battery_level", "5")), 0, 100);
config.dspType = Audio::DSPCore::typeFromString(FetchVariable("panda3ds_dsp_emulation", "null"));
config.audioEnabled = FetchVariableBool("panda3ds_use_audio", false);
config.sdCardInserted = FetchVariableBool("panda3ds_use_virtual_sd", true);
config.sdWriteProtected = FetchVariableBool("panda3ds_write_protect_virtual_sd", false);
config.accurateShaderMul = FetchVariableBool("panda3ds_accurate_shader_mul", false);
config.useUbershaders = FetchVariableBool("panda3ds_use_ubershader", true);
config.forceShadergenForLights = FetchVariableBool("panda3ds_ubershader_lighting_override", true);
config.lightShadergenThreshold = std::clamp(std::stoi(FetchVariable("panda3ds_ubershader_lighting_override_threshold", "1")), 1, 8);
config.discordRpcEnabled = false;
config.save();
}
static void ConfigCheckVariables() {
bool updated = false;
envCallbacks(RETRO_ENVIRONMENT_GET_VARIABLE_UPDATE, &updated);
if (updated) {
configUpdate();
}
}
void retro_get_system_info(retro_system_info* info) {
info->need_fullpath = true;
info->valid_extensions = "3ds|3dsx|elf|axf|cci|cxi|app";
info->library_version = "0.8";
info->library_name = "Panda3DS";
info->block_extract = true;
}
void retro_get_system_av_info(retro_system_av_info* info) {
info->geometry.base_width = emulator->width;
info->geometry.base_height = emulator->height;
info->geometry.max_width = info->geometry.base_width;
info->geometry.max_height = info->geometry.base_height;
info->geometry.aspect_ratio = float(5.0 / 6.0);
info->timing.fps = 60.0;
info->timing.sample_rate = 32768;
}
void retro_set_environment(retro_environment_t cb) {
envCallbacks = cb;
}
void retro_set_video_refresh(retro_video_refresh_t cb) {
videoCallbacks = cb;
}
void retro_set_audio_sample_batch(retro_audio_sample_batch_t cb) {
audioBatchCallback = cb;
}
void retro_set_audio_sample(retro_audio_sample_t cb) {}
void retro_set_input_poll(retro_input_poll_t cb) {
inputPollCallback = cb;
}
void retro_set_input_state(retro_input_state_t cb) {
inputStateCallback = cb;
}
void retro_init() {
enum retro_pixel_format xrgb888 = RETRO_PIXEL_FORMAT_XRGB8888;
envCallbacks(RETRO_ENVIRONMENT_SET_PIXEL_FORMAT, &xrgb888);
char* save_dir = nullptr;
if (!envCallbacks(RETRO_ENVIRONMENT_GET_SAVE_DIRECTORY, &save_dir) || save_dir == nullptr) {
Helpers::warn("No save directory provided by LibRetro.");
savePath = std::filesystem::current_path();
} else {
savePath = std::filesystem::path(save_dir);
}
emulator = std::make_unique<Emulator>();
}
void retro_deinit() {
emulator = nullptr;
}
bool retro_load_game(const retro_game_info* game) {
configInit();
configUpdate();
if (emulator->getRendererType() != RendererType::OpenGL) {
Helpers::panic("Libretro: Renderer is not OpenGL");
}
renderer = static_cast<RendererGL*>(emulator->getRenderer());
emulator->setOutputSize(emulator->width, emulator->height);
inputInit();
videoInit();
return emulator->loadROM(game->path);
}
bool retro_load_game_special(uint type, const retro_game_info* info, usize num) { return false; }
void retro_unload_game() {
renderer->setFBO(0);
renderer = nullptr;
}
void retro_reset() {
emulator->reset(Emulator::ReloadOption::Reload);
}
void retro_run() {
ConfigCheckVariables();
renderer->setFBO(hw_render.get_current_framebuffer());
renderer->resetStateManager();
inputPollCallback();
HIDService& hid = emulator->getServiceManager().getHID();
hid.setKey(HID::Keys::A, GetButtonState(RETRO_DEVICE_ID_JOYPAD_A));
hid.setKey(HID::Keys::B, GetButtonState(RETRO_DEVICE_ID_JOYPAD_B));
hid.setKey(HID::Keys::X, GetButtonState(RETRO_DEVICE_ID_JOYPAD_X));
hid.setKey(HID::Keys::Y, GetButtonState(RETRO_DEVICE_ID_JOYPAD_Y));
hid.setKey(HID::Keys::L, GetButtonState(RETRO_DEVICE_ID_JOYPAD_L));
hid.setKey(HID::Keys::R, GetButtonState(RETRO_DEVICE_ID_JOYPAD_R));
hid.setKey(HID::Keys::Start, GetButtonState(RETRO_DEVICE_ID_JOYPAD_START));
hid.setKey(HID::Keys::Select, GetButtonState(RETRO_DEVICE_ID_JOYPAD_SELECT));
hid.setKey(HID::Keys::Up, GetButtonState(RETRO_DEVICE_ID_JOYPAD_UP));
hid.setKey(HID::Keys::Down, GetButtonState(RETRO_DEVICE_ID_JOYPAD_DOWN));
hid.setKey(HID::Keys::Left, GetButtonState(RETRO_DEVICE_ID_JOYPAD_LEFT));
hid.setKey(HID::Keys::Right, GetButtonState(RETRO_DEVICE_ID_JOYPAD_RIGHT));
// Get analog values for the left analog stick (Right analog stick is N3DS-only and unimplemented)
float xLeft = GetAxisState(RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_X);
float yLeft = GetAxisState(RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_Y);
hid.setCirclepadX((xLeft / +32767) * 0x9C);
hid.setCirclepadY((yLeft / -32767) * 0x9C);
bool touchScreen = false;
const int posX = inputStateCallback(0, RETRO_DEVICE_POINTER, 0, RETRO_DEVICE_ID_POINTER_X);
const int posY = inputStateCallback(0, RETRO_DEVICE_POINTER, 0, RETRO_DEVICE_ID_POINTER_Y);
const int newX = static_cast<int>((posX + 0x7fff) / (float)(0x7fff * 2) * emulator->width);
const int newY = static_cast<int>((posY + 0x7fff) / (float)(0x7fff * 2) * emulator->height);
const int offsetX = 40;
const int offsetY = emulator->height / 2;
const bool inScreenX = newX >= offsetX && newX <= emulator->width - offsetX;
const bool inScreenY = newY >= offsetY && newY <= emulator->height;
if (inScreenX && inScreenY) {
touchScreen |= inputStateCallback(0, RETRO_DEVICE_MOUSE, 0, RETRO_DEVICE_ID_MOUSE_LEFT);
touchScreen |= inputStateCallback(0, RETRO_DEVICE_POINTER, 0, RETRO_DEVICE_ID_POINTER_PRESSED);
}
if (touchScreen) {
u16 x = static_cast<u16>(newX - offsetX);
u16 y = static_cast<u16>(newY - offsetY);
hid.setTouchScreenPress(x, y);
screenTouched = true;
} else if (screenTouched) {
hid.releaseTouchScreen();
screenTouched = false;
}
hid.updateInputs(emulator->getTicks());
emulator->runFrame();
videoCallbacks(RETRO_HW_FRAME_BUFFER_VALID, emulator->width, emulator->height, 0);
}
void retro_set_controller_port_device(uint port, uint device) {}
usize retro_serialize_size() {
usize size = 0;
return size;
}
bool retro_serialize(void* data, usize size) { return false; }
bool retro_unserialize(const void* data, usize size) { return false; }
uint retro_get_region() { return RETRO_REGION_NTSC; }
uint retro_api_version() { return RETRO_API_VERSION; }
usize retro_get_memory_size(uint id) {
if (id == RETRO_MEMORY_SYSTEM_RAM) {
return 0;
}
return 0;
}
void* retro_get_memory_data(uint id) {
if (id == RETRO_MEMORY_SYSTEM_RAM) {
return 0;
}
return nullptr;
}
void retro_cheat_set(uint index, bool enabled, const char* code) {}
void retro_cheat_reset() {}

View file

@ -55,12 +55,14 @@ MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent)
auto luaEditorAction = toolsMenu->addAction(tr("Open Lua Editor"));
auto cheatsEditorAction = toolsMenu->addAction(tr("Open Cheats Editor"));
auto patchWindowAction = toolsMenu->addAction(tr("Open Patch Window"));
auto shaderEditorAction = toolsMenu->addAction(tr("Open Shader Editor"));
auto dumpDspFirmware = toolsMenu->addAction(tr("Dump loaded DSP firmware"));
connect(dumpRomFSAction, &QAction::triggered, this, &MainWindow::dumpRomFS);
connect(luaEditorAction, &QAction::triggered, this, &MainWindow::openLuaEditor);
connect(cheatsEditorAction, &QAction::triggered, this, &MainWindow::openCheatsEditor);
connect(patchWindowAction, &QAction::triggered, this, &MainWindow::openPatchWindow);
connect(luaEditorAction, &QAction::triggered, this, [this]() { luaEditor->show(); });
connect(shaderEditorAction, &QAction::triggered, this, [this]() { shaderEditor->show(); });
connect(cheatsEditorAction, &QAction::triggered, this, [this]() { cheatsEditor->show(); });
connect(patchWindowAction, &QAction::triggered, this, [this]() { patchWindow->show(); });
connect(dumpDspFirmware, &QAction::triggered, this, &MainWindow::dumpDspFirmware);
auto aboutAction = aboutMenu->addAction(tr("About Panda3DS"));
@ -75,6 +77,12 @@ MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent)
cheatsEditor = new CheatsWindow(emu, {}, this);
patchWindow = new PatchWindow(this);
luaEditor = new TextEditorWindow(this, "script.lua", "");
shaderEditor = new ShaderEditorWindow(this, "shader.glsl", "");
shaderEditor->setEnable(emu->getRenderer()->supportsShaderReload());
if (shaderEditor->supported) {
shaderEditor->setText(emu->getRenderer()->getUbershader());
}
auto args = QCoreApplication::arguments();
if (args.size() > 1) {
@ -294,10 +302,6 @@ void MainWindow::showAboutMenu() {
about.exec();
}
void MainWindow::openLuaEditor() { luaEditor->show(); }
void MainWindow::openCheatsEditor() { cheatsEditor->show(); }
void MainWindow::openPatchWindow() { patchWindow->show(); }
void MainWindow::dispatchMessage(const EmulatorMessage& message) {
switch (message.type) {
case MessageType::LoadROM:
@ -351,6 +355,11 @@ void MainWindow::dispatchMessage(const EmulatorMessage& message) {
emu->getServiceManager().getHID().setTouchScreenPress(message.touchscreen.x, message.touchscreen.y);
break;
case MessageType::ReleaseTouchscreen: emu->getServiceManager().getHID().releaseTouchScreen(); break;
case MessageType::ReloadUbershader:
emu->getRenderer()->setUbershader(*message.string.str);
delete message.string.str;
break;
}
}
@ -453,6 +462,14 @@ void MainWindow::loadLuaScript(const std::string& code) {
sendMessage(message);
}
void MainWindow::reloadShader(const std::string& shader) {
EmulatorMessage message{.type = MessageType::ReloadUbershader};
// Make a copy of the code on the heap to send via the message queue
message.string.str = new std::string(shader);
sendMessage(message);
}
void MainWindow::editCheat(u32 handle, const std::vector<uint8_t>& cheat, const std::function<void(u32)>& callback) {
EmulatorMessage message{.type = MessageType::EditCheat};

View file

@ -0,0 +1,54 @@
#include <QPushButton>
#include <QVBoxLayout>
#include "panda_qt/main_window.hpp"
#include "panda_qt/shader_editor.hpp"
using namespace Zep;
ShaderEditorWindow::ShaderEditorWindow(QWidget* parent, const std::string& filename, const std::string& initialText)
: QDialog(parent), zepWidget(this, qApp->applicationDirPath().toStdString(), fontSize) {
resize(600, 600);
// Register our extensions
ZepRegressExCommand::Register(zepWidget.GetEditor());
ZepReplExCommand::Register(zepWidget.GetEditor(), &replProvider);
// Default to standard mode instead of vim mode, initialize text box
zepWidget.GetEditor().InitWithText(filename, initialText);
zepWidget.GetEditor().SetGlobalMode(Zep::ZepMode_Standard::StaticName());
// Layout for widgets
QVBoxLayout* mainLayout = new QVBoxLayout();
setLayout(mainLayout);
QPushButton* button = new QPushButton(tr("Reload shader"), this);
button->setFixedSize(100, 20);
// When the Load Script button is pressed, send the current text to the MainWindow, which will upload it to the emulator's lua object
connect(button, &QPushButton::pressed, this, [this]() {
if (parentWidget()) {
auto buffer = zepWidget.GetEditor().GetMRUBuffer();
const std::string text = buffer->GetBufferText(buffer->Begin(), buffer->End());
static_cast<MainWindow*>(parentWidget())->reloadShader(text);
} else {
// This should be unreachable, only here for safety purposes
printf("Text editor does not have any parent widget, click doesn't work :(\n");
}
});
mainLayout->addWidget(button);
mainLayout->addWidget(&zepWidget);
}
void ShaderEditorWindow::setEnable(bool enable) {
supported = enable;
if (enable) {
setDisabled(false);
} else {
setDisabled(true);
setText("Shader editor window is not available for this renderer backend");
}
}

View file

@ -21,7 +21,7 @@ public class GlobalConfig {
public static DataModel data;
public static final Key<Boolean> KEY_SHADER_JIT = new Key<>("emu.shader_jit", false);
public static final Key<Boolean> KEY_SHADER_JIT = new Key<>("emu.shader_jit", true);
public static final Key<Boolean> KEY_PICTURE_IN_PICTURE = new Key<>("app.behavior.pictureInPicture", false);
public static final Key<Boolean> KEY_SHOW_PERFORMANCE_OVERLAY = new Key<>("dev.performanceOverlay", false);
public static final Key<Boolean> KEY_LOGGER_SERVICE = new Key<>("dev.loggerService", false);

4405
third_party/libretro/include/libretro.h vendored Normal file

File diff suppressed because it is too large Load diff

View file

@ -355,76 +355,98 @@ namespace OpenGL {
}
};
enum ShaderType {
Fragment = GL_FRAGMENT_SHADER,
Vertex = GL_VERTEX_SHADER,
Geometry = GL_GEOMETRY_SHADER,
Compute = GL_COMPUTE_SHADER,
TessControl = GL_TESS_CONTROL_SHADER,
TessEvaluation = GL_TESS_EVALUATION_SHADER
};
enum ShaderType {
Fragment = GL_FRAGMENT_SHADER,
Vertex = GL_VERTEX_SHADER,
Geometry = GL_GEOMETRY_SHADER,
Compute = GL_COMPUTE_SHADER,
TessControl = GL_TESS_CONTROL_SHADER,
TessEvaluation = GL_TESS_EVALUATION_SHADER
};
struct Shader {
GLuint m_handle = 0;
struct Shader {
GLuint m_handle = 0;
Shader() {}
Shader(const std::string_view source, ShaderType type) { create(source, static_cast<GLenum>(type)); }
Shader() {}
Shader(const std::string_view source, ShaderType type) { create(source, static_cast<GLenum>(type)); }
// Returns whether compilation failed or not
bool create(const std::string_view source, GLenum type) {
m_handle = glCreateShader(type);
const GLchar* const sources[1] = { source.data() };
// Returns whether compilation failed or not
bool create(const std::string_view source, GLenum type) {
m_handle = glCreateShader(type);
const GLchar* const sources[1] = {source.data()};
glShaderSource(m_handle, 1, sources, nullptr);
glCompileShader(m_handle);
glShaderSource(m_handle, 1, sources, nullptr);
glCompileShader(m_handle);
GLint success;
glGetShaderiv(m_handle, GL_COMPILE_STATUS, &success);
if (success == GL_FALSE) {
char buf[4096];
glGetShaderInfoLog(m_handle, 4096, nullptr, buf);
fprintf(stderr, "Failed to compile shader\nError: %s\n", buf);
glDeleteShader(m_handle);
GLint success;
glGetShaderiv(m_handle, GL_COMPILE_STATUS, &success);
if (success == GL_FALSE) {
char buf[4096];
glGetShaderInfoLog(m_handle, 4096, nullptr, buf);
fprintf(stderr, "Failed to compile shader\nError: %s\n", buf);
glDeleteShader(m_handle);
m_handle = 0;
}
m_handle = 0;
}
return m_handle != 0;
}
return m_handle != 0;
}
GLuint handle() const { return m_handle; }
bool exists() const { return m_handle != 0; }
};
GLuint handle() const { return m_handle; }
bool exists() const { return m_handle != 0; }
void free() {
if (exists()) {
glDeleteShader(m_handle);
m_handle = 0;
}
}
#ifdef OPENGL_DESTRUCTORS
~Shader() { free(); }
#endif
};
struct Program {
GLuint m_handle = 0;
GLuint m_handle = 0;
bool create(std::initializer_list<std::reference_wrapper<Shader>> shaders) {
m_handle = glCreateProgram();
for (const auto& shader : shaders) {
glAttachShader(m_handle, shader.get().handle());
}
bool create(std::initializer_list<std::reference_wrapper<Shader>> shaders) {
m_handle = glCreateProgram();
for (const auto& shader : shaders) {
glAttachShader(m_handle, shader.get().handle());
}
glLinkProgram(m_handle);
GLint success;
glGetProgramiv(m_handle, GL_LINK_STATUS, &success);
glLinkProgram(m_handle);
GLint success;
glGetProgramiv(m_handle, GL_LINK_STATUS, &success);
if (!success) {
char buf[4096];
glGetProgramInfoLog(m_handle, 4096, nullptr, buf);
fprintf(stderr, "Failed to link program\nError: %s\n", buf);
glDeleteProgram(m_handle);
if (!success) {
char buf[4096];
glGetProgramInfoLog(m_handle, 4096, nullptr, buf);
fprintf(stderr, "Failed to link program\nError: %s\n", buf);
glDeleteProgram(m_handle);
m_handle = 0;
}
m_handle = 0;
}
return m_handle != 0;
}
return m_handle != 0;
}
GLuint handle() const { return m_handle; }
bool exists() const { return m_handle != 0; }
void use() const { glUseProgram(m_handle); }
};
GLuint handle() const { return m_handle; }
bool exists() const { return m_handle != 0; }
void use() const { glUseProgram(m_handle); }
void free() {
if (exists()) {
glDeleteProgram(m_handle);
m_handle = 0;
}
}
#ifdef OPENGL_DESTRUCTORS
~Program() { free(); }
#endif
};
static void dispatchCompute(GLuint groupsX = 1, GLuint groupsY = 1, GLuint groupsZ = 1) {
glDispatchCompute(groupsX, groupsY, groupsZ);